[Chore] Linted Files (#150)

Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#150
2025-10-09 07:25:13 +13:00
parent 22f0bbe10b
commit afeb56622c
50 changed files with 5616 additions and 4408 deletions
--- a/utilities/chem.py
+++ b/utilities/chem.py
@ -2,12 +2,11 @@ import logging
 import re
 from abc import ABC
 from collections import defaultdict
-from typing import List, Optional, Dict
+from typing import List, Optional, Dict, TYPE_CHECKING

 from indigo import Indigo, IndigoException, IndigoObject
 from indigo.renderer import IndigoRenderer
-from rdkit import Chem
-from rdkit import RDLogger
+from rdkit import Chem, rdBase
 from rdkit.Chem import MACCSkeys, Descriptors
 from rdkit.Chem import rdChemReactions
 from rdkit.Chem.Draw import rdMolDraw2D
@ -15,9 +14,11 @@ from rdkit.Chem.MolStandardize import rdMolStandardize
 from rdkit.Chem.rdmolops import GetMolFrags
 from rdkit.Contrib.IFG import ifg

-logger = logging.getLogger(__name__)
-RDLogger.DisableLog('rdApp.*')
+if TYPE_CHECKING:
+    from epdb.models import Rule

+logger = logging.getLogger(__name__)
+rdBase.DisableLog("rdApp.*")

 # from rdkit import rdBase
 # rdBase.LogToPythonLogger()
@ -28,7 +29,6 @@ RDLogger.DisableLog('rdApp.*')


 class ProductSet(object):
-
    def __init__(self, product_set: List[str]):
        self.product_set = product_set

@ -42,15 +42,18 @@ class ProductSet(object):
        return iter(self.product_set)

    def __eq__(self, other):
-        return isinstance(other, ProductSet) and sorted(self.product_set) == sorted(other.product_set)
+        return isinstance(other, ProductSet) and sorted(self.product_set) == sorted(
+            other.product_set
+        )

    def __hash__(self):
-        return hash('-'.join(sorted(self.product_set)))
+        return hash("-".join(sorted(self.product_set)))


 class PredictionResult(object):
-
-    def __init__(self, product_sets: List['ProductSet'], probability: float, rule: Optional['Rule'] = None):
+    def __init__(
+        self, product_sets: List["ProductSet"], probability: float, rule: Optional["Rule"] = None
+    ):
        self.product_sets = product_sets
        self.probability = probability
        self.rule = rule
@ -66,7 +69,6 @@ class PredictionResult(object):


 class FormatConverter(object):
-
    @staticmethod
    def mass(smiles):
        return Descriptors.MolWt(FormatConverter.from_smiles(smiles))
@ -127,7 +129,7 @@ class FormatConverter(object):
        if kekulize:
            try:
                mol = Chem.Kekulize(mol)
-            except:
+            except Exception:
                mol = Chem.Mol(mol.ToBinary())

        if not mol.GetNumConformers():
@ -139,8 +141,8 @@ class FormatConverter(object):
        opts.clearBackground = False
        drawer.DrawMolecule(mol)
        drawer.FinishDrawing()
-        svg = drawer.GetDrawingText().replace('svg:', '')
-        svg = re.sub("<\?xml.*\?>", '', svg)
+        svg = drawer.GetDrawingText().replace("svg:", "")
+        svg = re.sub("<\?xml.*\?>", "", svg)

        return svg

@ -151,7 +153,7 @@ class FormatConverter(object):
        if kekulize:
            try:
                Chem.Kekulize(mol)
-            except:
+            except Exception:
                mc = Chem.Mol(mol.ToBinary())

        if not mc.GetNumConformers():
@ -178,7 +180,7 @@ class FormatConverter(object):
                    smiles = tmp_smiles

            if change is False:
-                print(f"nothing changed")
+                print("nothing changed")

        return smiles

@ -198,7 +200,9 @@ class FormatConverter(object):
        parent_clean_mol = rdMolStandardize.FragmentParent(clean_mol)

        # try to neutralize molecule
-        uncharger = rdMolStandardize.Uncharger()  # annoying, but necessary as no convenience method exists
+        uncharger = (
+            rdMolStandardize.Uncharger()
+        )  # annoying, but necessary as no convenience method exists
        uncharged_parent_clean_mol = uncharger.uncharge(parent_clean_mol)

        # note that no attempt is made at reionization at this step
@ -239,17 +243,24 @@ class FormatConverter(object):
        try:
            rdChemReactions.ReactionFromSmarts(smirks)
            return True
-        except:
+        except Exception:
            return False

    @staticmethod
-    def apply(smiles: str, smirks: str, preprocess_smiles: bool = True, bracketize: bool = True,
-              standardize: bool = True, kekulize: bool = True, remove_stereo: bool = True) -> List['ProductSet']:
-        logger.debug(f'Applying {smirks} on {smiles}')
+    def apply(
+        smiles: str,
+        smirks: str,
+        preprocess_smiles: bool = True,
+        bracketize: bool = True,
+        standardize: bool = True,
+        kekulize: bool = True,
+        remove_stereo: bool = True,
+    ) -> List["ProductSet"]:
+        logger.debug(f"Applying {smirks} on {smiles}")

        # If explicitly wanted or rule generates multiple products add brackets around products to capture all
        if bracketize:  # or "." in smirks:
-            smirks = smirks.split('>>')[0] + ">>(" + smirks.split('>>')[1] + ")"
+            smirks = smirks.split(">>")[0] + ">>(" + smirks.split(">>")[1] + ")"

        # List of ProductSet objects
        pss = set()
@ -274,7 +285,9 @@ class FormatConverter(object):
                            Chem.SanitizeMol(product)
                            product = GetMolFrags(product, asMols=True)
                            for p in product:
-                                p = FormatConverter.standardize(Chem.MolToSmiles(p), remove_stereo=remove_stereo)
+                                p = FormatConverter.standardize(
+                                    Chem.MolToSmiles(p), remove_stereo=remove_stereo
+                                )
                                prods.append(p)

                            # if kekulize:
@ -300,9 +313,8 @@ class FormatConverter(object):
                            #     #     bond.SetIsAromatic(False)
                            #     Chem.Kekulize(product)

-
                        except ValueError as e:
-                            logger.error(f'Sanitizing and converting failed:\n{e}')
+                            logger.error(f"Sanitizing and converting failed:\n{e}")
                            continue

                    if len(prods):
@ -310,7 +322,7 @@ class FormatConverter(object):
                        pss.add(ps)

        except Exception as e:
-            logger.error(f'Applying {smirks} on {smiles} failed:\n{e}')
+            logger.error(f"Applying {smirks} on {smiles} failed:\n{e}")

        return pss

@ -340,22 +352,19 @@ class FormatConverter(object):
                smi_p = Chem.MolToSmiles(mol, kekuleSmiles=True)
                smi_p = Chem.CanonSmiles(smi_p)

-                if '~' in smi_p:
-                    smi_p1 = smi_p.replace('~', '')
+                if "~" in smi_p:
+                    smi_p1 = smi_p.replace("~", "")
                    parsed_smiles.append(smi_p1)
                else:
                    parsed_smiles.append(smi_p)
-            except Exception as e:
+            except Exception:
                errors += 1
                pass

        return parsed_smiles, errors


-
-
 class Standardizer(ABC):
-
    def __init__(self, name):
        self.name = name

@ -364,7 +373,6 @@ class Standardizer(ABC):


 class RuleStandardizer(Standardizer):
-
    def __init__(self, name, smirks):
        super().__init__(name)
        self.smirks = smirks
@ -373,8 +381,8 @@ class RuleStandardizer(Standardizer):
        standardized_smiles = list(set(FormatConverter.apply(smiles, self.smirks)))

        if len(standardized_smiles) > 1:
-            logger.warning(f'{self.smirks} generated more than 1 compound {standardized_smiles}')
-            print(f'{self.smirks} generated more than 1 compound {standardized_smiles}')
+            logger.warning(f"{self.smirks} generated more than 1 compound {standardized_smiles}")
+            print(f"{self.smirks} generated more than 1 compound {standardized_smiles}")
            standardized_smiles = standardized_smiles[:1]

        if standardized_smiles:
@ -384,7 +392,6 @@ class RuleStandardizer(Standardizer):


 class RegExStandardizer(Standardizer):
-
    def __init__(self, name, replacements: dict):
        super().__init__(name)
        self.replacements = replacements
@ -404,28 +411,39 @@ class RegExStandardizer(Standardizer):
        return super().standardize(smi)


-FLATTEN = [
-    RegExStandardizer("Remove Stereo", {"@": ""})
-]
+FLATTEN = [RegExStandardizer("Remove Stereo", {"@": ""})]

-UN_CIS_TRANS = [
-    RegExStandardizer("Un-Cis-Trans", {"/": "", "\\": ""})
-]
+UN_CIS_TRANS = [RegExStandardizer("Un-Cis-Trans", {"/": "", "\\": ""})]

 BASIC = [
    RuleStandardizer("ammoniumstandardization", "[H][N+:1]([H])([H])[#6:2]>>[H][#7:1]([H])-[#6:2]"),
    RuleStandardizer("cyanate", "[H][#8:1][C:2]#[N:3]>>[#8-:1][C:2]#[N:3]"),
    RuleStandardizer("deprotonatecarboxyls", "[H][#8:1]-[#6:2]=[O:3]>>[#8-:1]-[#6:2]=[O:3]"),
    RuleStandardizer("forNOOH", "[H][#8:1]-[#7+:2](-[*:3])=[O:4]>>[#8-:1]-[#7+:2](-[*:3])=[O:4]"),
-    RuleStandardizer("Hydroxylprotonation", "[#6;A:1][#6:2](-[#8-:3])=[#6;A:4]>>[#6:1]-[#6:2](-[#8:3][H])=[#6;A:4]"),
-    RuleStandardizer("phosphatedeprotonation", "[H][#8:1]-[$([#15]);!$(P([O-])):2]>>[#8-:1]-[#15:2]"),
-    RuleStandardizer("PicricAcid",
-                     "[H][#8:1]-[c:2]1[c:3][c:4][c:5]([c:6][c:7]1-[#7+:8](-[#8-:9])=[O:10])-[#7+:11](-[#8-:12])=[O:13]>>[#8-:1]-[c:2]1[c:3][c:4][c:5]([c:6][c:7]1-[#7+:8](-[#8-:9])=[O:10])-[#7+:11](-[#8-:12])=[O:13]"),
-    RuleStandardizer("Sulfate1", "[H][#8:1][S:2]([#8:3][H])(=[O:4])=[O:5]>>[#8-:1][S:2]([#8-:3])(=[O:4])=[O:5]"),
-    RuleStandardizer("Sulfate2",
-                     "[#6:1]-[#8:2][S:3]([#8:4][H])(=[O:5])=[O:6]>>[#6:1]-[#8:2][S:3]([#8-:4])(=[O:5])=[O:6]"),
-    RuleStandardizer("Sulfate3", "[H][#8:3][S:2]([#6:1])(=[O:4])=[O:5]>>[#6:1][S:2]([#8-:3])(=[O:4])=[O:5]"),
-    RuleStandardizer("Transform_c1353forSOOH", "[H][#8:1][S:2]([*:3])=[O:4]>>[#8-:1][S:2]([*:3])=[O:4]"),
+    RuleStandardizer(
+        "Hydroxylprotonation",
+        "[#6;A:1][#6:2](-[#8-:3])=[#6;A:4]>>[#6:1]-[#6:2](-[#8:3][H])=[#6;A:4]",
+    ),
+    RuleStandardizer(
+        "phosphatedeprotonation", "[H][#8:1]-[$([#15]);!$(P([O-])):2]>>[#8-:1]-[#15:2]"
+    ),
+    RuleStandardizer(
+        "PicricAcid",
+        "[H][#8:1]-[c:2]1[c:3][c:4][c:5]([c:6][c:7]1-[#7+:8](-[#8-:9])=[O:10])-[#7+:11](-[#8-:12])=[O:13]>>[#8-:1]-[c:2]1[c:3][c:4][c:5]([c:6][c:7]1-[#7+:8](-[#8-:9])=[O:10])-[#7+:11](-[#8-:12])=[O:13]",
+    ),
+    RuleStandardizer(
+        "Sulfate1", "[H][#8:1][S:2]([#8:3][H])(=[O:4])=[O:5]>>[#8-:1][S:2]([#8-:3])(=[O:4])=[O:5]"
+    ),
+    RuleStandardizer(
+        "Sulfate2",
+        "[#6:1]-[#8:2][S:3]([#8:4][H])(=[O:5])=[O:6]>>[#6:1]-[#8:2][S:3]([#8-:4])(=[O:5])=[O:6]",
+    ),
+    RuleStandardizer(
+        "Sulfate3", "[H][#8:3][S:2]([#6:1])(=[O:4])=[O:5]>>[#6:1][S:2]([#8-:3])(=[O:4])=[O:5]"
+    ),
+    RuleStandardizer(
+        "Transform_c1353forSOOH", "[H][#8:1][S:2]([*:3])=[O:4]>>[#8-:1][S:2]([*:3])=[O:4]"
+    ),
 ]

 ENHANCED = BASIC + [
@ -433,28 +451,30 @@ ENHANCED = BASIC + [
 ]

 EXOTIC = ENHANCED + [
-    RuleStandardizer("ThioPhosphate1", "[H][S:1]-[#15:2]=[$([#16]),$([#8]):3]>>[S-:1]-[#15:2]=[$([#16]),$([#8]):3]")
+    RuleStandardizer(
+        "ThioPhosphate1",
+        "[H][S:1]-[#15:2]=[$([#16]),$([#8]):3]>>[S-:1]-[#15:2]=[$([#16]),$([#8]):3]",
+    )
 ]

 COA_CUTTER = [
-    RuleStandardizer("CutCoEnzymeAOff",
-                     "CC(C)(COP(O)(=O)OP(O)(=O)OCC1OC(C(O)C1OP(O)(O)=O)n1cnc2c(N)ncnc12)C(O)C(=O)NCCC(=O)NCCS[$(*):1]>>[O-][$(*):1]")
+    RuleStandardizer(
+        "CutCoEnzymeAOff",
+        "CC(C)(COP(O)(=O)OP(O)(=O)OCC1OC(C(O)C1OP(O)(O)=O)n1cnc2c(N)ncnc12)C(O)C(=O)NCCC(=O)NCCS[$(*):1]>>[O-][$(*):1]",
+    )
 ]

-ENOL_KETO = [
-    RuleStandardizer("enol2Ketone", "[H][#8:2]-[#6:3]=[#6:1]>>[#6:1]-[#6:3]=[O:2]")
-]
+ENOL_KETO = [RuleStandardizer("enol2Ketone", "[H][#8:2]-[#6:3]=[#6:1]>>[#6:1]-[#6:3]=[O:2]")]

 MATCH_STANDARDIZER = EXOTIC + FLATTEN + UN_CIS_TRANS + COA_CUTTER + ENOL_KETO


 class IndigoUtils(object):
-
    @staticmethod
    def layout(mol_data):
        i = Indigo()
        try:
-            if mol_data.startswith('$RXN') or '>>' in mol_data:
+            if mol_data.startswith("$RXN") or ">>" in mol_data:
                rxn = i.loadQueryReaction(mol_data)
                rxn.layout()
                return rxn.rxnfile()
@ -462,14 +482,14 @@ class IndigoUtils(object):
                mol = i.loadQueryMolecule(mol_data)
                mol.layout()
                return mol.molfile()
-        except IndigoException as e:
+        except IndigoException:
            try:
                logger.info("layout() failed, trying loadReactionSMARTS as fallback!")
                rxn = IndigoUtils.load_reaction_SMARTS(mol_data)
                rxn.layout()
                return rxn.molfile()
            except IndigoException as e2:
-                logger.error(f'layout() failed due to {e2}!')
+                logger.error(f"layout() failed due to {e2}!")

    @staticmethod
    def load_reaction_SMARTS(mol):
@ -479,7 +499,7 @@ class IndigoUtils(object):
    def aromatize(mol_data, is_query):
        i = Indigo()
        try:
-            if mol_data.startswith('$RXN'):
+            if mol_data.startswith("$RXN"):
                if is_query:
                    rxn = i.loadQueryReaction(mol_data)
                else:
@ -495,20 +515,20 @@ class IndigoUtils(object):

                mol.aromatize()
                return mol.molfile()
-        except IndigoException as e:
+        except IndigoException:
            try:
                logger.info("Aromatizing failed, trying loadReactionSMARTS as fallback!")
                rxn = IndigoUtils.load_reaction_SMARTS(mol_data)
                rxn.aromatize()
                return rxn.molfile()
            except IndigoException as e2:
-                logger.error(f'Aromatizing failed due to {e2}!')
+                logger.error(f"Aromatizing failed due to {e2}!")

    @staticmethod
    def dearomatize(mol_data, is_query):
        i = Indigo()
        try:
-            if mol_data.startswith('$RXN'):
+            if mol_data.startswith("$RXN"):
                if is_query:
                    rxn = i.loadQueryReaction(mol_data)
                else:
@ -524,14 +544,14 @@ class IndigoUtils(object):

                mol.dearomatize()
                return mol.molfile()
-        except IndigoException as e:
+        except IndigoException:
            try:
                logger.info("De-Aromatizing failed, trying loadReactionSMARTS as fallback!")
                rxn = IndigoUtils.load_reaction_SMARTS(mol_data)
                rxn.dearomatize()
                return rxn.molfile()
            except IndigoException as e2:
-                logger.error(f'De-Aromatizing failed due to {e2}!')
+                logger.error(f"De-Aromatizing failed due to {e2}!")

    @staticmethod
    def sanitize_functional_group(functional_group: str):
@ -543,7 +563,7 @@ class IndigoUtils(object):

            # special environment handling (amines, hydroxy, esters, ethers)
            # the higher substituted should not contain H env.
-            if functional_group == '[C]=O':
+            if functional_group == "[C]=O":
                functional_group = "[H][C](=O)[CX4,c]"

            # aldamines
@ -577,15 +597,20 @@ class IndigoUtils(object):
                functional_group = "[nH1,nX2](a)a"  # pyrrole (with H) or pyridine (no other connections); currently overlaps with neighboring aromatic atoms

            # substituted aromatic nitrogen
-            functional_group = functional_group.replace("N*(R)R",
-                                                        "n(a)a")  # substituent will be before N*; currently overlaps with neighboring aromatic atoms
+            functional_group = functional_group.replace(
+                "N*(R)R", "n(a)a"
+            )  # substituent will be before N*; currently overlaps with neighboring aromatic atoms
            # pyridinium
            if functional_group == "RN*(R)(R)(R)R":
-                functional_group = "[CX4,c]n(a)a"  # currently overlaps with neighboring aromatic atoms
+                functional_group = (
+                    "[CX4,c]n(a)a"  # currently overlaps with neighboring aromatic atoms
+                )

            # N-oxide
            if functional_group == "[H]ON*(R)(R)(R)R":
-                functional_group = "[O-][n+](a)a"  # currently overlaps with neighboring aromatic atoms
+                functional_group = (
+                    "[O-][n+](a)a"  # currently overlaps with neighboring aromatic atoms
+                )

            # other aromatic hetero atoms
            functional_group = functional_group.replace("C*", "c")
@ -598,7 +623,9 @@ class IndigoUtils(object):
            # other replacement, to accomodate for the standardization rules in enviPath
            # This is not the perfect way to do it; there should be a way to replace substructure SMARTS in SMARTS?
            # nitro groups are broken, due to charge handling. this SMARTS matches both forms (formal charges and hypervalent); Ertl-CDK still treats both forms separately...
-            functional_group = functional_group.replace("[H]O[N](=O)R", "[CX4,c][NX3](~[OX1])~[OX1]")
+            functional_group = functional_group.replace(
+                "[H]O[N](=O)R", "[CX4,c][NX3](~[OX1])~[OX1]"
+            )
            functional_group = functional_group.replace("O=N(=O)R", "[CX4,c][NX3](~[OX1])~[OX1]")
            # carboxylic acid: this SMARTS matches both neutral and anionic form; includes COOH in larger functional_groups
            functional_group = functional_group.replace("[H]OC(=O)", "[OD1]C(=O)")
@ -616,7 +643,9 @@ class IndigoUtils(object):
        return functional_group

    @staticmethod
-    def _colorize(indigo: Indigo, molecule: IndigoObject, functional_groups: Dict[str, int], is_reaction: bool):
+    def _colorize(
+        indigo: Indigo, molecule: IndigoObject, functional_groups: Dict[str, int], is_reaction: bool
+    ):
        indigo.setOption("render-atom-color-property", "color")
        indigo.setOption("aromaticity-model", "generic")

@ -646,7 +675,6 @@ class IndigoUtils(object):

                for match in matcher.iterateMatches(query):
                    if match is not None:
-
                        for atom in query.iterateAtoms():
                            mappedAtom = match.mapAtom(atom)
                            if mappedAtom is None or mappedAtom.index() in environment:
@ -655,7 +683,7 @@ class IndigoUtils(object):
                            counts[mappedAtom.index()] = max(v, counts[mappedAtom.index()])

            except IndigoException as e:
-                logger.debug(f'Colorizing failed due to {e}')
+                logger.debug(f"Colorizing failed due to {e}")

        for k, v in counts.items():
            if is_reaction:
@ -669,8 +697,9 @@ class IndigoUtils(object):
            molecule.addDataSGroup([k], [], "color", color)

    @staticmethod
-    def mol_to_svg(mol_data: str, width: int = 0, height: int = 0, functional_groups: Dict[str, int] = None):
-
+    def mol_to_svg(
+        mol_data: str, width: int = 0, height: int = 0, functional_groups: Dict[str, int] = None
+    ):
        if functional_groups is None:
            functional_groups = {}

@ -682,7 +711,7 @@ class IndigoUtils(object):
        i.setOption("render-image-size", width, height)
        i.setOption("render-bond-line-width", 2.0)

-        if '~' in mol_data:
+        if "~" in mol_data:
            mol = i.loadSmarts(mol_data)
        else:
            mol = i.loadMolecule(mol_data)
@ -690,11 +719,17 @@ class IndigoUtils(object):
        if len(functional_groups.keys()) > 0:
            IndigoUtils._colorize(i, mol, functional_groups, False)

-        return renderer.renderToBuffer(mol).decode('UTF-8')
+        return renderer.renderToBuffer(mol).decode("UTF-8")

    @staticmethod
-    def smirks_to_svg(smirks: str, is_query_smirks, width: int = 0, height: int = 0,
-                      educt_functional_groups: Dict[str, int] = None, product_functional_groups: Dict[str, int] = None):
+    def smirks_to_svg(
+        smirks: str,
+        is_query_smirks,
+        width: int = 0,
+        height: int = 0,
+        educt_functional_groups: Dict[str, int] = None,
+        product_functional_groups: Dict[str, int] = None,
+    ):
        if educt_functional_groups is None:
            educt_functional_groups = {}

@ -721,18 +756,18 @@ class IndigoUtils(object):
                for prod in obj.iterateProducts():
                    IndigoUtils._colorize(i, prod, product_functional_groups, True)

-        return renderer.renderToBuffer(obj).decode('UTF-8')
+        return renderer.renderToBuffer(obj).decode("UTF-8")


-if __name__ == '__main__':
+if __name__ == "__main__":
    data = {
        "struct": "\n  Ketcher  2172510 12D 1   1.00000     0.00000     0\n\n  6  6  0     0  0            999 V2000\n    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -1.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -1.5000   -0.8660    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -1.0000   -1.7321    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    0.0000   -1.7321    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    0.5000   -0.8660    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n  1  2  2  0  0  0  0\n  2  3  1  0  0  0  0\n  3  4  2  0  0  0  0\n  4  5  1  0  0  0  0\n  5  6  2  0  0  0  0\n  6  1  1  0  0  0  0\nM  END\n",
        "options": {
            "smart-layout": True,
            "ignore-stereochemistry-errors": True,
            "mass-skip-error-on-pseudoatoms": False,
-            "gross-formula-add-rsites": True
-        }
+            "gross-formula-add-rsites": True,
+        },
    }

-    print(IndigoUtils.aromatize(data['struct'], False))
+    print(IndigoUtils.aromatize(data["struct"], False))