[Chore] Linted Files (#150)

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#150
This commit is contained in:
2025-10-09 07:25:13 +13:00
parent 22f0bbe10b
commit afeb56622c
50 changed files with 5616 additions and 4408 deletions

View File

@ -2,12 +2,11 @@ import logging
import re
from abc import ABC
from collections import defaultdict
from typing import List, Optional, Dict
from typing import List, Optional, Dict, TYPE_CHECKING
from indigo import Indigo, IndigoException, IndigoObject
from indigo.renderer import IndigoRenderer
from rdkit import Chem
from rdkit import RDLogger
from rdkit import Chem, rdBase
from rdkit.Chem import MACCSkeys, Descriptors
from rdkit.Chem import rdChemReactions
from rdkit.Chem.Draw import rdMolDraw2D
@ -15,9 +14,11 @@ from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.rdmolops import GetMolFrags
from rdkit.Contrib.IFG import ifg
logger = logging.getLogger(__name__)
RDLogger.DisableLog('rdApp.*')
if TYPE_CHECKING:
from epdb.models import Rule
logger = logging.getLogger(__name__)
rdBase.DisableLog("rdApp.*")
# from rdkit import rdBase
# rdBase.LogToPythonLogger()
@ -28,7 +29,6 @@ RDLogger.DisableLog('rdApp.*')
class ProductSet(object):
def __init__(self, product_set: List[str]):
self.product_set = product_set
@ -42,15 +42,18 @@ class ProductSet(object):
return iter(self.product_set)
def __eq__(self, other):
return isinstance(other, ProductSet) and sorted(self.product_set) == sorted(other.product_set)
return isinstance(other, ProductSet) and sorted(self.product_set) == sorted(
other.product_set
)
def __hash__(self):
return hash('-'.join(sorted(self.product_set)))
return hash("-".join(sorted(self.product_set)))
class PredictionResult(object):
def __init__(self, product_sets: List['ProductSet'], probability: float, rule: Optional['Rule'] = None):
def __init__(
self, product_sets: List["ProductSet"], probability: float, rule: Optional["Rule"] = None
):
self.product_sets = product_sets
self.probability = probability
self.rule = rule
@ -66,7 +69,6 @@ class PredictionResult(object):
class FormatConverter(object):
@staticmethod
def mass(smiles):
return Descriptors.MolWt(FormatConverter.from_smiles(smiles))
@ -127,7 +129,7 @@ class FormatConverter(object):
if kekulize:
try:
mol = Chem.Kekulize(mol)
except:
except Exception:
mol = Chem.Mol(mol.ToBinary())
if not mol.GetNumConformers():
@ -139,8 +141,8 @@ class FormatConverter(object):
opts.clearBackground = False
drawer.DrawMolecule(mol)
drawer.FinishDrawing()
svg = drawer.GetDrawingText().replace('svg:', '')
svg = re.sub("<\?xml.*\?>", '', svg)
svg = drawer.GetDrawingText().replace("svg:", "")
svg = re.sub("<\?xml.*\?>", "", svg)
return svg
@ -151,7 +153,7 @@ class FormatConverter(object):
if kekulize:
try:
Chem.Kekulize(mol)
except:
except Exception:
mc = Chem.Mol(mol.ToBinary())
if not mc.GetNumConformers():
@ -178,7 +180,7 @@ class FormatConverter(object):
smiles = tmp_smiles
if change is False:
print(f"nothing changed")
print("nothing changed")
return smiles
@ -198,7 +200,9 @@ class FormatConverter(object):
parent_clean_mol = rdMolStandardize.FragmentParent(clean_mol)
# try to neutralize molecule
uncharger = rdMolStandardize.Uncharger() # annoying, but necessary as no convenience method exists
uncharger = (
rdMolStandardize.Uncharger()
) # annoying, but necessary as no convenience method exists
uncharged_parent_clean_mol = uncharger.uncharge(parent_clean_mol)
# note that no attempt is made at reionization at this step
@ -239,17 +243,24 @@ class FormatConverter(object):
try:
rdChemReactions.ReactionFromSmarts(smirks)
return True
except:
except Exception:
return False
@staticmethod
def apply(smiles: str, smirks: str, preprocess_smiles: bool = True, bracketize: bool = True,
standardize: bool = True, kekulize: bool = True, remove_stereo: bool = True) -> List['ProductSet']:
logger.debug(f'Applying {smirks} on {smiles}')
def apply(
smiles: str,
smirks: str,
preprocess_smiles: bool = True,
bracketize: bool = True,
standardize: bool = True,
kekulize: bool = True,
remove_stereo: bool = True,
) -> List["ProductSet"]:
logger.debug(f"Applying {smirks} on {smiles}")
# If explicitly wanted or rule generates multiple products add brackets around products to capture all
if bracketize: # or "." in smirks:
smirks = smirks.split('>>')[0] + ">>(" + smirks.split('>>')[1] + ")"
smirks = smirks.split(">>")[0] + ">>(" + smirks.split(">>")[1] + ")"
# List of ProductSet objects
pss = set()
@ -274,7 +285,9 @@ class FormatConverter(object):
Chem.SanitizeMol(product)
product = GetMolFrags(product, asMols=True)
for p in product:
p = FormatConverter.standardize(Chem.MolToSmiles(p), remove_stereo=remove_stereo)
p = FormatConverter.standardize(
Chem.MolToSmiles(p), remove_stereo=remove_stereo
)
prods.append(p)
# if kekulize:
@ -300,9 +313,8 @@ class FormatConverter(object):
# # bond.SetIsAromatic(False)
# Chem.Kekulize(product)
except ValueError as e:
logger.error(f'Sanitizing and converting failed:\n{e}')
logger.error(f"Sanitizing and converting failed:\n{e}")
continue
if len(prods):
@ -310,7 +322,7 @@ class FormatConverter(object):
pss.add(ps)
except Exception as e:
logger.error(f'Applying {smirks} on {smiles} failed:\n{e}')
logger.error(f"Applying {smirks} on {smiles} failed:\n{e}")
return pss
@ -340,22 +352,19 @@ class FormatConverter(object):
smi_p = Chem.MolToSmiles(mol, kekuleSmiles=True)
smi_p = Chem.CanonSmiles(smi_p)
if '~' in smi_p:
smi_p1 = smi_p.replace('~', '')
if "~" in smi_p:
smi_p1 = smi_p.replace("~", "")
parsed_smiles.append(smi_p1)
else:
parsed_smiles.append(smi_p)
except Exception as e:
except Exception:
errors += 1
pass
return parsed_smiles, errors
class Standardizer(ABC):
def __init__(self, name):
self.name = name
@ -364,7 +373,6 @@ class Standardizer(ABC):
class RuleStandardizer(Standardizer):
def __init__(self, name, smirks):
super().__init__(name)
self.smirks = smirks
@ -373,8 +381,8 @@ class RuleStandardizer(Standardizer):
standardized_smiles = list(set(FormatConverter.apply(smiles, self.smirks)))
if len(standardized_smiles) > 1:
logger.warning(f'{self.smirks} generated more than 1 compound {standardized_smiles}')
print(f'{self.smirks} generated more than 1 compound {standardized_smiles}')
logger.warning(f"{self.smirks} generated more than 1 compound {standardized_smiles}")
print(f"{self.smirks} generated more than 1 compound {standardized_smiles}")
standardized_smiles = standardized_smiles[:1]
if standardized_smiles:
@ -384,7 +392,6 @@ class RuleStandardizer(Standardizer):
class RegExStandardizer(Standardizer):
def __init__(self, name, replacements: dict):
super().__init__(name)
self.replacements = replacements
@ -404,28 +411,39 @@ class RegExStandardizer(Standardizer):
return super().standardize(smi)
FLATTEN = [
RegExStandardizer("Remove Stereo", {"@": ""})
]
FLATTEN = [RegExStandardizer("Remove Stereo", {"@": ""})]
UN_CIS_TRANS = [
RegExStandardizer("Un-Cis-Trans", {"/": "", "\\": ""})
]
UN_CIS_TRANS = [RegExStandardizer("Un-Cis-Trans", {"/": "", "\\": ""})]
BASIC = [
RuleStandardizer("ammoniumstandardization", "[H][N+:1]([H])([H])[#6:2]>>[H][#7:1]([H])-[#6:2]"),
RuleStandardizer("cyanate", "[H][#8:1][C:2]#[N:3]>>[#8-:1][C:2]#[N:3]"),
RuleStandardizer("deprotonatecarboxyls", "[H][#8:1]-[#6:2]=[O:3]>>[#8-:1]-[#6:2]=[O:3]"),
RuleStandardizer("forNOOH", "[H][#8:1]-[#7+:2](-[*:3])=[O:4]>>[#8-:1]-[#7+:2](-[*:3])=[O:4]"),
RuleStandardizer("Hydroxylprotonation", "[#6;A:1][#6:2](-[#8-:3])=[#6;A:4]>>[#6:1]-[#6:2](-[#8:3][H])=[#6;A:4]"),
RuleStandardizer("phosphatedeprotonation", "[H][#8:1]-[$([#15]);!$(P([O-])):2]>>[#8-:1]-[#15:2]"),
RuleStandardizer("PicricAcid",
"[H][#8:1]-[c:2]1[c:3][c:4][c:5]([c:6][c:7]1-[#7+:8](-[#8-:9])=[O:10])-[#7+:11](-[#8-:12])=[O:13]>>[#8-:1]-[c:2]1[c:3][c:4][c:5]([c:6][c:7]1-[#7+:8](-[#8-:9])=[O:10])-[#7+:11](-[#8-:12])=[O:13]"),
RuleStandardizer("Sulfate1", "[H][#8:1][S:2]([#8:3][H])(=[O:4])=[O:5]>>[#8-:1][S:2]([#8-:3])(=[O:4])=[O:5]"),
RuleStandardizer("Sulfate2",
"[#6:1]-[#8:2][S:3]([#8:4][H])(=[O:5])=[O:6]>>[#6:1]-[#8:2][S:3]([#8-:4])(=[O:5])=[O:6]"),
RuleStandardizer("Sulfate3", "[H][#8:3][S:2]([#6:1])(=[O:4])=[O:5]>>[#6:1][S:2]([#8-:3])(=[O:4])=[O:5]"),
RuleStandardizer("Transform_c1353forSOOH", "[H][#8:1][S:2]([*:3])=[O:4]>>[#8-:1][S:2]([*:3])=[O:4]"),
RuleStandardizer(
"Hydroxylprotonation",
"[#6;A:1][#6:2](-[#8-:3])=[#6;A:4]>>[#6:1]-[#6:2](-[#8:3][H])=[#6;A:4]",
),
RuleStandardizer(
"phosphatedeprotonation", "[H][#8:1]-[$([#15]);!$(P([O-])):2]>>[#8-:1]-[#15:2]"
),
RuleStandardizer(
"PicricAcid",
"[H][#8:1]-[c:2]1[c:3][c:4][c:5]([c:6][c:7]1-[#7+:8](-[#8-:9])=[O:10])-[#7+:11](-[#8-:12])=[O:13]>>[#8-:1]-[c:2]1[c:3][c:4][c:5]([c:6][c:7]1-[#7+:8](-[#8-:9])=[O:10])-[#7+:11](-[#8-:12])=[O:13]",
),
RuleStandardizer(
"Sulfate1", "[H][#8:1][S:2]([#8:3][H])(=[O:4])=[O:5]>>[#8-:1][S:2]([#8-:3])(=[O:4])=[O:5]"
),
RuleStandardizer(
"Sulfate2",
"[#6:1]-[#8:2][S:3]([#8:4][H])(=[O:5])=[O:6]>>[#6:1]-[#8:2][S:3]([#8-:4])(=[O:5])=[O:6]",
),
RuleStandardizer(
"Sulfate3", "[H][#8:3][S:2]([#6:1])(=[O:4])=[O:5]>>[#6:1][S:2]([#8-:3])(=[O:4])=[O:5]"
),
RuleStandardizer(
"Transform_c1353forSOOH", "[H][#8:1][S:2]([*:3])=[O:4]>>[#8-:1][S:2]([*:3])=[O:4]"
),
]
ENHANCED = BASIC + [
@ -433,28 +451,30 @@ ENHANCED = BASIC + [
]
EXOTIC = ENHANCED + [
RuleStandardizer("ThioPhosphate1", "[H][S:1]-[#15:2]=[$([#16]),$([#8]):3]>>[S-:1]-[#15:2]=[$([#16]),$([#8]):3]")
RuleStandardizer(
"ThioPhosphate1",
"[H][S:1]-[#15:2]=[$([#16]),$([#8]):3]>>[S-:1]-[#15:2]=[$([#16]),$([#8]):3]",
)
]
COA_CUTTER = [
RuleStandardizer("CutCoEnzymeAOff",
"CC(C)(COP(O)(=O)OP(O)(=O)OCC1OC(C(O)C1OP(O)(O)=O)n1cnc2c(N)ncnc12)C(O)C(=O)NCCC(=O)NCCS[$(*):1]>>[O-][$(*):1]")
RuleStandardizer(
"CutCoEnzymeAOff",
"CC(C)(COP(O)(=O)OP(O)(=O)OCC1OC(C(O)C1OP(O)(O)=O)n1cnc2c(N)ncnc12)C(O)C(=O)NCCC(=O)NCCS[$(*):1]>>[O-][$(*):1]",
)
]
ENOL_KETO = [
RuleStandardizer("enol2Ketone", "[H][#8:2]-[#6:3]=[#6:1]>>[#6:1]-[#6:3]=[O:2]")
]
ENOL_KETO = [RuleStandardizer("enol2Ketone", "[H][#8:2]-[#6:3]=[#6:1]>>[#6:1]-[#6:3]=[O:2]")]
MATCH_STANDARDIZER = EXOTIC + FLATTEN + UN_CIS_TRANS + COA_CUTTER + ENOL_KETO
class IndigoUtils(object):
@staticmethod
def layout(mol_data):
i = Indigo()
try:
if mol_data.startswith('$RXN') or '>>' in mol_data:
if mol_data.startswith("$RXN") or ">>" in mol_data:
rxn = i.loadQueryReaction(mol_data)
rxn.layout()
return rxn.rxnfile()
@ -462,14 +482,14 @@ class IndigoUtils(object):
mol = i.loadQueryMolecule(mol_data)
mol.layout()
return mol.molfile()
except IndigoException as e:
except IndigoException:
try:
logger.info("layout() failed, trying loadReactionSMARTS as fallback!")
rxn = IndigoUtils.load_reaction_SMARTS(mol_data)
rxn.layout()
return rxn.molfile()
except IndigoException as e2:
logger.error(f'layout() failed due to {e2}!')
logger.error(f"layout() failed due to {e2}!")
@staticmethod
def load_reaction_SMARTS(mol):
@ -479,7 +499,7 @@ class IndigoUtils(object):
def aromatize(mol_data, is_query):
i = Indigo()
try:
if mol_data.startswith('$RXN'):
if mol_data.startswith("$RXN"):
if is_query:
rxn = i.loadQueryReaction(mol_data)
else:
@ -495,20 +515,20 @@ class IndigoUtils(object):
mol.aromatize()
return mol.molfile()
except IndigoException as e:
except IndigoException:
try:
logger.info("Aromatizing failed, trying loadReactionSMARTS as fallback!")
rxn = IndigoUtils.load_reaction_SMARTS(mol_data)
rxn.aromatize()
return rxn.molfile()
except IndigoException as e2:
logger.error(f'Aromatizing failed due to {e2}!')
logger.error(f"Aromatizing failed due to {e2}!")
@staticmethod
def dearomatize(mol_data, is_query):
i = Indigo()
try:
if mol_data.startswith('$RXN'):
if mol_data.startswith("$RXN"):
if is_query:
rxn = i.loadQueryReaction(mol_data)
else:
@ -524,14 +544,14 @@ class IndigoUtils(object):
mol.dearomatize()
return mol.molfile()
except IndigoException as e:
except IndigoException:
try:
logger.info("De-Aromatizing failed, trying loadReactionSMARTS as fallback!")
rxn = IndigoUtils.load_reaction_SMARTS(mol_data)
rxn.dearomatize()
return rxn.molfile()
except IndigoException as e2:
logger.error(f'De-Aromatizing failed due to {e2}!')
logger.error(f"De-Aromatizing failed due to {e2}!")
@staticmethod
def sanitize_functional_group(functional_group: str):
@ -543,7 +563,7 @@ class IndigoUtils(object):
# special environment handling (amines, hydroxy, esters, ethers)
# the higher substituted should not contain H env.
if functional_group == '[C]=O':
if functional_group == "[C]=O":
functional_group = "[H][C](=O)[CX4,c]"
# aldamines
@ -577,15 +597,20 @@ class IndigoUtils(object):
functional_group = "[nH1,nX2](a)a" # pyrrole (with H) or pyridine (no other connections); currently overlaps with neighboring aromatic atoms
# substituted aromatic nitrogen
functional_group = functional_group.replace("N*(R)R",
"n(a)a") # substituent will be before N*; currently overlaps with neighboring aromatic atoms
functional_group = functional_group.replace(
"N*(R)R", "n(a)a"
) # substituent will be before N*; currently overlaps with neighboring aromatic atoms
# pyridinium
if functional_group == "RN*(R)(R)(R)R":
functional_group = "[CX4,c]n(a)a" # currently overlaps with neighboring aromatic atoms
functional_group = (
"[CX4,c]n(a)a" # currently overlaps with neighboring aromatic atoms
)
# N-oxide
if functional_group == "[H]ON*(R)(R)(R)R":
functional_group = "[O-][n+](a)a" # currently overlaps with neighboring aromatic atoms
functional_group = (
"[O-][n+](a)a" # currently overlaps with neighboring aromatic atoms
)
# other aromatic hetero atoms
functional_group = functional_group.replace("C*", "c")
@ -598,7 +623,9 @@ class IndigoUtils(object):
# other replacement, to accomodate for the standardization rules in enviPath
# This is not the perfect way to do it; there should be a way to replace substructure SMARTS in SMARTS?
# nitro groups are broken, due to charge handling. this SMARTS matches both forms (formal charges and hypervalent); Ertl-CDK still treats both forms separately...
functional_group = functional_group.replace("[H]O[N](=O)R", "[CX4,c][NX3](~[OX1])~[OX1]")
functional_group = functional_group.replace(
"[H]O[N](=O)R", "[CX4,c][NX3](~[OX1])~[OX1]"
)
functional_group = functional_group.replace("O=N(=O)R", "[CX4,c][NX3](~[OX1])~[OX1]")
# carboxylic acid: this SMARTS matches both neutral and anionic form; includes COOH in larger functional_groups
functional_group = functional_group.replace("[H]OC(=O)", "[OD1]C(=O)")
@ -616,7 +643,9 @@ class IndigoUtils(object):
return functional_group
@staticmethod
def _colorize(indigo: Indigo, molecule: IndigoObject, functional_groups: Dict[str, int], is_reaction: bool):
def _colorize(
indigo: Indigo, molecule: IndigoObject, functional_groups: Dict[str, int], is_reaction: bool
):
indigo.setOption("render-atom-color-property", "color")
indigo.setOption("aromaticity-model", "generic")
@ -646,7 +675,6 @@ class IndigoUtils(object):
for match in matcher.iterateMatches(query):
if match is not None:
for atom in query.iterateAtoms():
mappedAtom = match.mapAtom(atom)
if mappedAtom is None or mappedAtom.index() in environment:
@ -655,7 +683,7 @@ class IndigoUtils(object):
counts[mappedAtom.index()] = max(v, counts[mappedAtom.index()])
except IndigoException as e:
logger.debug(f'Colorizing failed due to {e}')
logger.debug(f"Colorizing failed due to {e}")
for k, v in counts.items():
if is_reaction:
@ -669,8 +697,9 @@ class IndigoUtils(object):
molecule.addDataSGroup([k], [], "color", color)
@staticmethod
def mol_to_svg(mol_data: str, width: int = 0, height: int = 0, functional_groups: Dict[str, int] = None):
def mol_to_svg(
mol_data: str, width: int = 0, height: int = 0, functional_groups: Dict[str, int] = None
):
if functional_groups is None:
functional_groups = {}
@ -682,7 +711,7 @@ class IndigoUtils(object):
i.setOption("render-image-size", width, height)
i.setOption("render-bond-line-width", 2.0)
if '~' in mol_data:
if "~" in mol_data:
mol = i.loadSmarts(mol_data)
else:
mol = i.loadMolecule(mol_data)
@ -690,11 +719,17 @@ class IndigoUtils(object):
if len(functional_groups.keys()) > 0:
IndigoUtils._colorize(i, mol, functional_groups, False)
return renderer.renderToBuffer(mol).decode('UTF-8')
return renderer.renderToBuffer(mol).decode("UTF-8")
@staticmethod
def smirks_to_svg(smirks: str, is_query_smirks, width: int = 0, height: int = 0,
educt_functional_groups: Dict[str, int] = None, product_functional_groups: Dict[str, int] = None):
def smirks_to_svg(
smirks: str,
is_query_smirks,
width: int = 0,
height: int = 0,
educt_functional_groups: Dict[str, int] = None,
product_functional_groups: Dict[str, int] = None,
):
if educt_functional_groups is None:
educt_functional_groups = {}
@ -721,18 +756,18 @@ class IndigoUtils(object):
for prod in obj.iterateProducts():
IndigoUtils._colorize(i, prod, product_functional_groups, True)
return renderer.renderToBuffer(obj).decode('UTF-8')
return renderer.renderToBuffer(obj).decode("UTF-8")
if __name__ == '__main__':
if __name__ == "__main__":
data = {
"struct": "\n Ketcher 2172510 12D 1 1.00000 0.00000 0\n\n 6 6 0 0 0 999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.5000 -0.8660 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.0000 -1.7321 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.0000 -1.7321 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.5000 -0.8660 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 2 0 0 0 0\n 2 3 1 0 0 0 0\n 3 4 2 0 0 0 0\n 4 5 1 0 0 0 0\n 5 6 2 0 0 0 0\n 6 1 1 0 0 0 0\nM END\n",
"options": {
"smart-layout": True,
"ignore-stereochemistry-errors": True,
"mass-skip-error-on-pseudoatoms": False,
"gross-formula-add-rsites": True
}
"gross-formula-add-rsites": True,
},
}
print(IndigoUtils.aromatize(data['struct'], False))
print(IndigoUtils.aromatize(data["struct"], False))