[Misc] Performance improvements, SMIRKS Coverage, Minor Bugfixes (#132)

Bump Python Version to 3.12 Make use of "epauth" optional Cache `srs` property of rules to speed up apply Adjust view names for use of `reverse()` Fix Views for Scenario Attachments Added Simply Compare View/Template to identify differences between rdkit and ambit Make migrations consistent with tests + compare Fixes #76 Set default year for Scenario Modal Fix html tags for package description Added Tests for Pathway / Rule Added remove stereo for apply Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#132
2025-09-26 19:33:03 +12:00
parent b5c759d74e
commit b757a07f91
23 changed files with 671 additions and 463 deletions
--- a/utilities/chem.py
+++ b/utilities/chem.py
@ -244,7 +244,7 @@ class FormatConverter(object):

    @staticmethod
    def apply(smiles: str, smirks: str, preprocess_smiles: bool = True, bracketize: bool = True,
-              standardize: bool = True, kekulize: bool = True) -> List['ProductSet']:
+              standardize: bool = True, kekulize: bool = True, remove_stereo: bool = True) -> List['ProductSet']:
        logger.debug(f'Applying {smirks} on {smiles}')

        # If explicitly wanted or rule generates multiple products add brackets around products to capture all
@ -274,7 +274,7 @@ class FormatConverter(object):
                            Chem.SanitizeMol(product)
                            product = GetMolFrags(product, asMols=True)
                            for p in product:
-                                p = FormatConverter.standardize(Chem.MolToSmiles(p))
+                                p = FormatConverter.standardize(Chem.MolToSmiles(p), remove_stereo=remove_stereo)
                                prods.append(p)

                            # if kekulize:
@ -314,30 +314,10 @@ class FormatConverter(object):

        return pss

-    # @staticmethod
-    # def apply(reaction, smiles):
-    #     rxn = AllChem.ReactionFromSmarts(reaction)
-    #     return [Chem.MolToSmiles(x, 1) for x in rxn.RunReactants((Chem.MolFromSmiles(smiles),))[0]]
-
    @staticmethod
    def MACCS(smiles):
        return MACCSkeys.GenMACCSKeys(FormatConverter.from_smiles(smiles))

-    @staticmethod
-    def neutralize_atoms(mol):
-        pattern = Chem.MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")
-        at_matches = mol.GetSubstructMatches(pattern)
-        at_matches_list = [y[0] for y in at_matches]
-        if len(at_matches_list) > 0:
-            for at_idx in at_matches_list:
-                atom = mol.GetAtomWithIdx(at_idx)
-                chg = atom.GetFormalCharge()
-                hcount = atom.GetTotalNumHs()
-                atom.SetFormalCharge(0)
-                atom.SetNumExplicitHs(hcount - chg)
-                atom.UpdatePropertyCache()
-        return mol
-
    @staticmethod
    def sanitize_smiles(smiles_list: List):
        parsed_smiles = []
@ -353,7 +333,8 @@ class FormatConverter(object):
                #     smi = smi.replace("@", "")

                mol = Chem.MolFromSmiles(smi)
-                mol = FormatConverter.neutralize_atoms(mol)
+                mol = FormatConverter.neutralize_molecule(mol)
+                Chem.RemoveStereochemistry(mol)
                mol = Chem.RemoveAllHs(mol)
                Chem.Kekulize(mol)
                smi_p = Chem.MolToSmiles(mol, kekuleSmiles=True)