Current Dev State

2025-06-23 20:13:54 +02:00
parent b4f9bb277d
commit ded50edaa2
22617 changed files with 4345095 additions and 174 deletions
--- a/utilities/chem.py
+++ b/utilities/chem.py
@ -0,0 +1,732 @@
+import logging
+import re
+from abc import ABC
+from collections import defaultdict
+from typing import List, Optional, Dict
+
+from indigo import Indigo, IndigoException, IndigoObject
+from indigo.renderer import IndigoRenderer
+from rdkit import Chem
+from rdkit import RDLogger
+from rdkit.Chem import MACCSkeys
+from rdkit.Chem import rdChemReactions
+from rdkit.Chem.Draw import rdMolDraw2D
+from rdkit.Chem.MolStandardize import rdMolStandardize
+
+logger = logging.getLogger(__name__)
+RDLogger.DisableLog('rdApp.*')
+
+
+# from rdkit import rdBase
+# rdBase.LogToPythonLogger()
+# pylog = logging.getLogger("rdkit")
+# pylog.propagate = False
+# pylog.disabled = True
+# print(pylog.disabled)
+
+
+class ProductSet(object):
+
+    def __init__(self, product_set: List[str]):
+        self.product_set = product_set
+
+    def __repr__(self):
+        return self.product_set.__repr__()
+
+    def __len__(self):
+        return len(self.product_set)
+
+    def __iter__(self):
+        return iter(self.product_set)
+
+    def __eq__(self, other):
+        return isinstance(other, ProductSet) and sorted(self.product_set) == sorted(other.product_set)
+
+    def __hash__(self):
+        return hash('-'.join(sorted(self.product_set)))
+
+
+class PredictionResult(object):
+
+    def __init__(self, product_sets: List['ProductSet'], probability: float, rule: Optional['Rule'] = None):
+        self.product_sets = product_sets
+        self.probability = probability
+        self.rule = rule
+
+    def __len__(self):
+        return len(self.product_sets)
+
+    def __iter__(self):
+        return iter(self.product_sets)
+
+    def __repr__(self):
+        return f"--{self.probability}/{self.rule}--> {self.product_sets}"
+
+
+class FormatConverter(object):
+
+    @staticmethod
+    def from_smiles(smiles):
+        return Chem.MolFromSmiles(smiles)
+
+    @staticmethod
+    def to_smiles(mol):
+        return Chem.MolToSmiles(mol)
+
+    @staticmethod
+    def InChIKey(smiles):
+        return Chem.MolToInchiKey(FormatConverter.from_smiles(smiles))
+
+    @staticmethod
+    def maccs(smiles):
+        mol = Chem.MolFromSmiles(smiles)
+        bitvec = MACCSkeys.GenMACCSKeys(mol)
+        return bitvec.ToList()
+
+    @staticmethod
+    def to_svg(smiles, mol_size=(200, 150), kekulize=True):
+        mol = FormatConverter.from_smiles(smiles)
+
+        if kekulize:
+            try:
+                mol = Chem.Kekulize(mol)
+            except:
+                mol = Chem.Mol(mol.ToBinary())
+
+        if not mol.GetNumConformers():
+            Chem.rdDepictor.Compute2DCoords(mol)
+
+        drawer = rdMolDraw2D.MolDraw2DSVG(*mol_size)
+        opts = drawer.drawOptions()
+
+        opts.clearBackground = False
+        drawer.DrawMolecule(mol)
+        drawer.FinishDrawing()
+        svg = drawer.GetDrawingText().replace('svg:', '')
+        svg = re.sub("<\?xml.*\?>", '', svg)
+
+        return svg
+
+    @staticmethod
+    def to_png(smiles, mol_size=(200, 150), kekulize=True):
+        mol = FormatConverter.from_smiles(smiles)
+
+        if kekulize:
+            try:
+                Chem.Kekulize(mol)
+            except:
+                mc = Chem.Mol(mol.ToBinary())
+
+        if not mc.GetNumConformers():
+            Chem.rdDepictor.Compute2DCoords(mc)
+
+        pass
+
+    @staticmethod
+    def normalize(smiles):
+        # TODO call to AMBIT Service
+        return smiles
+
+    @staticmethod
+    def standardize(smiles):
+        # Taken from https://bitsilla.com/blog/2021/06/standardizing-a-molecule-using-rdkit/
+        # follows the steps in
+        # https://github.com/greglandrum/RSC_OpenScience_Standardization_202104/blob/main/MolStandardize%20pieces.ipynb
+        # as described **excellently** (by Greg) in
+        # https://www.youtube.com/watch?v=eWTApNX8dJQ
+        mol = Chem.MolFromSmiles(smiles)
+
+        # removeHs, disconnect metal atoms, normalize the molecule, reionize the molecule
+        clean_mol = rdMolStandardize.Cleanup(mol)
+
+        # if many fragments, get the "parent" (the actual mol we are interested in)
+        parent_clean_mol = rdMolStandardize.FragmentParent(clean_mol)
+
+        # try to neutralize molecule
+        uncharger = rdMolStandardize.Uncharger()  # annoying, but necessary as no convenience method exists
+        uncharged_parent_clean_mol = uncharger.uncharge(parent_clean_mol)
+
+        # note that no attempt is made at reionization at this step
+        # nor at ionization at some pH (rdkit has no pKa caculator)
+        # the main aim to to represent all molecules from different sources
+        # in a (single) standard way, for use in ML, catalogue, etc.
+        # te = rdMolStandardize.TautomerEnumerator()  # idem
+        # taut_uncharged_parent_clean_mol = te.Canonicalize(uncharged_parent_clean_mol)
+
+        return Chem.MolToSmiles(uncharged_parent_clean_mol, kekuleSmiles=True)
+
+    @staticmethod
+    def neutralize_smiles(smiles):
+        mol = Chem.MolFromSmiles(smiles)
+        mol = FormatConverter.neutralize_molecule(mol)
+        return Chem.MolToSmiles(mol)
+
+    @staticmethod
+    def neutralize_molecule(mol):
+        pattern = Chem.MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")
+        at_matches = mol.GetSubstructMatches(pattern)
+        at_matches_list = [y[0] for y in at_matches]
+        if len(at_matches_list) > 0:
+            for at_idx in at_matches_list:
+                atom = mol.GetAtomWithIdx(at_idx)
+                chg = atom.GetFormalCharge()
+                hcount = atom.GetTotalNumHs()
+                atom.SetFormalCharge(0)
+                atom.SetNumExplicitHs(hcount - chg)
+                atom.UpdatePropertyCache()
+        return mol
+
+    # @staticmethod
+    # def apply(smiles, smirks, preprocess_smiles=True, bracketize=False, standardize=True):
+    #     logger.debug(f'Applying {smirks} on {smiles}')
+    #
+    #     if bracketize:
+    #         smirks = smirks.split('>>')[0] + ">>(" + smirks.split('>>')[1] + ")"
+    #
+    #     res = set()
+    #     try:
+    #         rxn = rdChemReactions.ReactionFromSmarts(smirks)
+    #         mol = Chem.MolFromSmiles(smiles)
+    #
+    #         # Inplace
+    #         if preprocess_smiles:
+    #             Chem.SanitizeMol(mol)
+    #             mol = Chem.AddHs(mol)
+    #
+    #         # apply!
+    #         reacts = rxn.RunReactants((mol,))
+    #         if len(reacts):
+    #             # Sanitize mols
+    #             for product_set in reacts:
+    #                 prod_set = list()
+    #                 for product in product_set:
+    #                     # Fixes
+    #                     # [2025-01-30 23:00:50] ERROR chem - Sanitizing and converting failed:
+    #                     # non-ring atom 3 marked aromatic
+    #                     # But does not improve overall performance
+    #                     #
+    #                     # for a in product.GetAtoms():
+    #                     #     if (not a.IsInRing()) and a.GetIsAromatic():
+    #                     #         a.SetIsAromatic(False)
+    #                     # for b in product.GetBonds():
+    #                     #     if (not b.IsInRing()) and b.GetIsAromatic():
+    #                     #         b.SetIsAromatic(False)
+    #
+    #                     try:
+    #                         Chem.SanitizeMol(product)
+    #                         prod_set.append(FormatConverter.standardize(Chem.MolToSmiles(product)))
+    #                     except ValueError as e:
+    #                         logger.error(f'Sanitizing and converting failed:\n{e}')
+    #                         continue
+    #                 res.add(tuple(list(set(prod_set))))
+    #     except Exception as e:
+    #         logger.error(f'Applying {smirks} on {smiles} failed:\n{e}')
+    #
+    #     return list(res)
+
+    @staticmethod
+    def apply(smiles: str, smirks: str, preprocess_smiles: bool = True, bracketize: bool = False,
+              standardize: bool = True, kekulize: bool = True) -> List['ProductSet']:
+        logger.debug(f'Applying {smirks} on {smiles}')
+
+        # If explicitly wanted or rule generates multiple products add brackets around products to capture all
+        if bracketize:  # or "." in smirks:
+            smirks = smirks.split('>>')[0] + ">>(" + smirks.split('>>')[1] + ")"
+
+        # List of ProductSet objects
+        pss = set()
+        try:
+            rxn = rdChemReactions.ReactionFromSmarts(smirks)
+            mol = Chem.MolFromSmiles(smiles)
+
+            # Inplace
+            if preprocess_smiles:
+                Chem.SanitizeMol(mol)
+                mol = Chem.AddHs(mol)
+
+            # apply!
+            sites = rxn.RunReactants((mol,))
+            logger.debug(f"{len(sites)} products sets generated")
+            if len(sites):
+                # Sanitize mols
+                for product_set in sites:
+                    prods = []
+                    for product in product_set:
+                        try:
+                            Chem.SanitizeMol(product)
+
+                            product = FormatConverter.standardize(Chem.MolToSmiles(product))
+
+                            # if kekulize:
+                            #     # from rdkit.Chem import MolStandardize
+                            #     #
+                            #     # # Attempt re-sanitization via standardizer
+                            #     # cleaner = MolStandardize.rdMolStandardize.Cleanup()
+                            #     # mol = cleaner.cleanup(product)
+                            #     # # Fixes
+                            #     # # [2025-01-30 23:00:50] ERROR chem - Sanitizing and converting failed:
+                            #     # # non-ring atom 3 marked aromatic
+                            #     # # But does not improve overall performance
+                            #     # # for a in product.GetAtoms():
+                            #     # #     if (not a.IsInRing()) and a.GetIsAromatic():
+                            #     # #         a.SetIsAromatic(False)
+                            #     # #
+                            #     # # for b in product.GetBonds():
+                            #     # #     if (not b.IsInRing()) and b.GetIsAromatic():
+                            #     # #         b.SetIsAromatic(False)
+                            #     # for atom in product.GetAtoms():
+                            #     #     atom.SetIsAromatic(False)
+                            #     # for bond in product.GetBonds():
+                            #     #     bond.SetIsAromatic(False)
+                            #     Chem.Kekulize(product)
+
+                            prods.append(product)
+                        except ValueError as e:
+                            logger.error(f'Sanitizing and converting failed:\n{e}')
+                            continue
+
+                    # TODO doc!
+                    if len(prods) and len(prods) == len(product_set):
+                        ps = ProductSet(prods)
+                        pss.add(ps)
+
+        except Exception as e:
+            logger.error(f'Applying {smirks} on {smiles} failed:\n{e}')
+
+        return pss
+
+    # @staticmethod
+    # def apply(reaction, smiles):
+    #     rxn = AllChem.ReactionFromSmarts(reaction)
+    #     return [Chem.MolToSmiles(x, 1) for x in rxn.RunReactants((Chem.MolFromSmiles(smiles),))[0]]
+
+    @staticmethod
+    def MACCS(smiles):
+        return MACCSkeys.GenMACCSKeys(FormatConverter.from_smiles(smiles))
+
+    @staticmethod
+    def neutralize_atoms(mol):
+        pattern = Chem.MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")
+        at_matches = mol.GetSubstructMatches(pattern)
+        at_matches_list = [y[0] for y in at_matches]
+        if len(at_matches_list) > 0:
+            for at_idx in at_matches_list:
+                atom = mol.GetAtomWithIdx(at_idx)
+                chg = atom.GetFormalCharge()
+                hcount = atom.GetTotalNumHs()
+                atom.SetFormalCharge(0)
+                atom.SetNumExplicitHs(hcount - chg)
+                atom.UpdatePropertyCache()
+        return mol
+
+    @staticmethod
+    def sanitize_smiles(smiles_list: List):
+        parsed_smiles = []
+        errors = 0
+        for smi in smiles_list:
+            try:
+                # # Remove Stereo and Flatten
+                # if "/" in smi:
+                #     smi = smi.replace("/", "")
+                # if "\\" in smi:
+                #     smi = smi.replace("\\", "")
+                # if "@" in smi:
+                #     smi = smi.replace("@", "")
+
+                mol = Chem.MolFromSmiles(smi)
+                mol = FormatConverter.neutralize_atoms(mol)
+                mol = Chem.RemoveAllHs(mol)
+                Chem.Kekulize(mol)
+                smi_p = Chem.MolToSmiles(mol, kekuleSmiles=True)
+                smi_p = Chem.CanonSmiles(smi_p)
+
+                if '~' in smi_p:
+                    smi_p1 = smi_p.replace('~', '')
+                    parsed_smiles.append(smi_p1)
+                else:
+                    parsed_smiles.append(smi_p)
+            except Exception as e:
+                errors += 1
+                pass
+
+        return parsed_smiles, errors
+
+
+
+
+class Standardizer(ABC):
+
+    def __init__(self, name):
+        self.name = name
+
+    def standardize(self, smiles: str) -> str:
+        return FormatConverter.normalize(smiles)
+
+
+class RuleStandardizer(Standardizer):
+
+    def __init__(self, name, smirks):
+        super().__init__(name)
+        self.smirks = smirks
+
+    def standardize(self, smiles: str) -> str:
+        standardized_smiles = list(set(FormatConverter.apply(smiles, self.smirks)))
+
+        if len(standardized_smiles) > 1:
+            logger.warning(f'{self.smirks} generated more than 1 compound {standardized_smiles}')
+            print(f'{self.smirks} generated more than 1 compound {standardized_smiles}')
+            standardized_smiles = standardized_smiles[:1]
+
+        if standardized_smiles:
+            smiles = standardized_smiles[0]
+
+        return super().standardize(smiles)
+
+
+class RegExStandardizer(Standardizer):
+
+    def __init__(self, name, replacements: dict):
+        super().__init__(name)
+        self.replacements = replacements
+
+    def standardize(self, smiles: str) -> str:
+        smi = smiles
+        mod_smi = smiles
+
+        for k, v in self.replacements.items():
+            mod_smi = smi.replace(k, v)
+
+        while mod_smi != smi:
+            mod_smi = smi
+            for k, v in self.replacements.items():
+                smi = smi.replace(k, v)
+
+        return super().standardize(smi)
+
+
+FLATTEN = [
+    RegExStandardizer("Remove Stereo", {"@": ""})
+]
+
+UN_CIS_TRANS = [
+    RegExStandardizer("Un-Cis-Trans", {"/": "", "\\": ""})
+]
+
+BASIC = [
+    RuleStandardizer("ammoniumstandardization", "[H][N+:1]([H])([H])[#6:2]>>[H][#7:1]([H])-[#6:2]"),
+    RuleStandardizer("cyanate", "[H][#8:1][C:2]#[N:3]>>[#8-:1][C:2]#[N:3]"),
+    RuleStandardizer("deprotonatecarboxyls", "[H][#8:1]-[#6:2]=[O:3]>>[#8-:1]-[#6:2]=[O:3]"),
+    RuleStandardizer("forNOOH", "[H][#8:1]-[#7+:2](-[*:3])=[O:4]>>[#8-:1]-[#7+:2](-[*:3])=[O:4]"),
+    RuleStandardizer("Hydroxylprotonation", "[#6;A:1][#6:2](-[#8-:3])=[#6;A:4]>>[#6:1]-[#6:2](-[#8:3][H])=[#6;A:4]"),
+    RuleStandardizer("phosphatedeprotonation", "[H][#8:1]-[$([#15]);!$(P([O-])):2]>>[#8-:1]-[#15:2]"),
+    RuleStandardizer("PicricAcid",
+                     "[H][#8:1]-[c:2]1[c:3][c:4][c:5]([c:6][c:7]1-[#7+:8](-[#8-:9])=[O:10])-[#7+:11](-[#8-:12])=[O:13]>>[#8-:1]-[c:2]1[c:3][c:4][c:5]([c:6][c:7]1-[#7+:8](-[#8-:9])=[O:10])-[#7+:11](-[#8-:12])=[O:13]"),
+    RuleStandardizer("Sulfate1", "[H][#8:1][S:2]([#8:3][H])(=[O:4])=[O:5]>>[#8-:1][S:2]([#8-:3])(=[O:4])=[O:5]"),
+    RuleStandardizer("Sulfate2",
+                     "[#6:1]-[#8:2][S:3]([#8:4][H])(=[O:5])=[O:6]>>[#6:1]-[#8:2][S:3]([#8-:4])(=[O:5])=[O:6]"),
+    RuleStandardizer("Sulfate3", "[H][#8:3][S:2]([#6:1])(=[O:4])=[O:5]>>[#6:1][S:2]([#8-:3])(=[O:4])=[O:5]"),
+    RuleStandardizer("Transform_c1353forSOOH", "[H][#8:1][S:2]([*:3])=[O:4]>>[#8-:1][S:2]([*:3])=[O:4]"),
+]
+
+ENHANCED = BASIC + [
+    RuleStandardizer("fullPhosphatedeprotonation", "[H][#8:1]-[#15:2]>>[#8-:1]-[#15:2]")
+]
+
+EXOTIC = ENHANCED + [
+    RuleStandardizer("ThioPhosphate1", "[H][S:1]-[#15:2]=[$([#16]),$([#8]):3]>>[S-:1]-[#15:2]=[$([#16]),$([#8]):3]")
+]
+
+COA_CUTTER = [
+    RuleStandardizer("CutCoEnzymeAOff",
+                     "CC(C)(COP(O)(=O)OP(O)(=O)OCC1OC(C(O)C1OP(O)(O)=O)n1cnc2c(N)ncnc12)C(O)C(=O)NCCC(=O)NCCS[$(*):1]>>[O-][$(*):1]")
+]
+
+ENOL_KETO = [
+    RuleStandardizer("enol2Ketone", "[H][#8:2]-[#6:3]=[#6:1]>>[#6:1]-[#6:3]=[O:2]")
+]
+
+MATCH_STANDARDIZER = EXOTIC + FLATTEN + UN_CIS_TRANS + COA_CUTTER + ENOL_KETO
+
+
+class IndigoUtils(object):
+
+    @staticmethod
+    def layout(mol_data):
+        i = Indigo()
+        try:
+            if mol_data.startswith('$RXN') or '>>' in mol_data:
+                rxn = i.loadQueryReaction(mol_data)
+                rxn.layout()
+                return rxn.rxnfile()
+            else:
+                mol = i.loadQueryMolecule(mol_data)
+                mol.layout()
+                return mol.molfile()
+        except IndigoException as e:
+            try:
+                logger.info("layout() failed, trying loadReactionSMARTS as fallback!")
+                rxn = IndigoUtils.load_reaction_SMARTS(mol_data)
+                rxn.layout()
+                return rxn.molfile()
+            except IndigoException as e2:
+                logger.error(f'layout() failed due to {e2}!')
+
+    @staticmethod
+    def load_reaction_SMARTS(mol):
+        return Indigo().loadReactionSmarts(mol)
+
+    @staticmethod
+    def aromatize(mol_data, is_query):
+        i = Indigo()
+        try:
+            if mol_data.startswith('$RXN'):
+                if is_query:
+                    rxn = i.loadQueryReaction(mol_data)
+                else:
+                    rxn = i.loadReaction(mol_data)
+
+                rxn.aromatize()
+                return rxn.rxnfile()
+            else:
+                if is_query:
+                    mol = i.loadQueryMolecule(mol_data)
+                else:
+                    mol = i.loadMolecule(mol_data)
+
+                mol.aromatize()
+                return mol.molfile()
+        except IndigoException as e:
+            try:
+                logger.info("Aromatizing failed, trying loadReactionSMARTS as fallback!")
+                rxn = IndigoUtils.load_reaction_SMARTS(mol_data)
+                rxn.aromatize()
+                return rxn.molfile()
+            except IndigoException as e2:
+                logger.error(f'Aromatizing failed due to {e2}!')
+
+    @staticmethod
+    def dearomatize(mol_data, is_query):
+        i = Indigo()
+        try:
+            if mol_data.startswith('$RXN'):
+                if is_query:
+                    rxn = i.loadQueryReaction(mol_data)
+                else:
+                    rxn = i.loadReaction(mol_data)
+
+                rxn.dearomatize()
+                return rxn.rxnfile()
+            else:
+                if is_query:
+                    mol = i.loadQueryMolecule(mol_data)
+                else:
+                    mol = i.loadMolecule(mol_data)
+
+                mol.dearomatize()
+                return mol.molfile()
+        except IndigoException as e:
+            try:
+                logger.info("De-Aromatizing failed, trying loadReactionSMARTS as fallback!")
+                rxn = IndigoUtils.load_reaction_SMARTS(mol_data)
+                rxn.dearomatize()
+                return rxn.molfile()
+            except IndigoException as e2:
+                logger.error(f'De-Aromatizing failed due to {e2}!')
+
+    @staticmethod
+    def sanitize_functional_group(functional_group: str):
+        counter = 0
+        while True:
+            counter += 1
+
+            copy = functional_group
+
+            # special environment handling (amines, hydroxy, esters, ethers)
+            # the higher substituted should not contain H env.
+            if functional_group == '[C]=O':
+                functional_group = "[H][C](=O)[CX4,c]"
+
+            # aldamines
+            if functional_group == "O=[C]N(R)R":
+                functional_group = "O=[C]([H])N(R)R"
+
+            # ether, ester, ketones, amines, thioether
+            vals = [
+                "ROR",
+                "O=C(R)OR",
+                "[H]N(R)R",
+                "O=C(R)R",
+                "RN(R)R",
+                "RSR",
+            ]
+            if functional_group in vals:
+                functional_group = functional_group.replace("R", "[CX4,c]")
+
+            # esters, ketones, amides
+            functional_group = functional_group.replace("O=C(R)", "O=C([CX4,c])")
+
+            if functional_group == "RS*R" or functional_group == "RO*R":
+                # neighboring atoms can be any aromatic atom, but they should not be highlighted
+                functional_group = functional_group.replace("R", "")
+
+            # aromatic compounds:  aromatic atoms are denoted with *
+            # single aromatic heteroatoms, not substituted
+
+            # unsubstituted aromatic nitrogen
+            if functional_group == "RN*(R)R":
+                functional_group = "[nH1,nX2](a)a"  # pyrrole (with H) or pyridine (no other connections); currently overlaps with neighboring aromatic atoms
+
+            # substituted aromatic nitrogen
+            functional_group = functional_group.replace("N*(R)R",
+                                                        "n(a)a")  # substituent will be before N*; currently overlaps with neighboring aromatic atoms
+            # pyridinium
+            if functional_group == "RN*(R)(R)(R)R":
+                functional_group = "[CX4,c]n(a)a"  # currently overlaps with neighboring aromatic atoms
+
+            # N-oxide
+            if functional_group == "[H]ON*(R)(R)(R)R":
+                functional_group = "[O-][n+](a)a"  # currently overlaps with neighboring aromatic atoms
+
+            # other aromatic hetero atoms
+            functional_group = functional_group.replace("C*", "c")
+            functional_group = functional_group.replace("N*", "n")
+            functional_group = functional_group.replace("S*", "s")
+            functional_group = functional_group.replace("O*", "o")
+            functional_group = functional_group.replace("Se*", "se")
+            functional_group = functional_group.replace("P*", "p")
+
+            # other replacement, to accomodate for the standardization rules in enviPath
+            # This is not the perfect way to do it; there should be a way to replace substructure SMARTS in SMARTS?
+            # nitro groups are broken, due to charge handling. this SMARTS matches both forms (formal charges and hypervalent); Ertl-CDK still treats both forms separately...
+            functional_group = functional_group.replace("[H]O[N](=O)R", "[CX4,c][NX3](~[OX1])~[OX1]")
+            functional_group = functional_group.replace("O=N(=O)R", "[CX4,c][NX3](~[OX1])~[OX1]")
+            # carboxylic acid: this SMARTS matches both neutral and anionic form; includes COOH in larger functional_groups
+            functional_group = functional_group.replace("[H]OC(=O)", "[OD1]C(=O)")
+            # azo
+            functional_group = functional_group.replace("N#[N]N(R)R", "[NX1]~[NX2]~N[CX4,c]")
+            functional_group = functional_group.replace("RN=[N]=NR", "[NX1]~[NX2]~N[CX4,c]")
+            # TODO: there might be more problematic groups, which we have yet to find.
+
+            # other environment atoms (can be aromatic or aliphatic Csp3, or H)
+            functional_group = functional_group.replace("R", "[H,CX4,c]")
+
+            if copy == functional_group:
+                break
+
+        return functional_group
+
+    @staticmethod
+    def _colorize(indigo: Indigo, molecule: IndigoObject, functional_groups: Dict[str, int], is_reaction: bool):
+        indigo.setOption("render-atom-color-property", "color")
+        indigo.setOption("aromaticity-model", "generic")
+
+        counts = defaultdict(lambda: 0)
+        environment = set()
+
+        matcher = indigo.substructureMatcher(molecule)
+
+        # Determine environment atoms as they will be colored black
+        for env in ["[CX4]", "c"]:
+            query = indigo.loadSmarts(env)
+            for match in matcher.iterateMatches(query):
+                if match is not None:
+                    for atom in query.iterateAtoms():
+                        mappedAtom = match.mapAtom(atom)
+
+                        if mappedAtom is None:
+                            continue
+
+                        environment.add(mappedAtom.index())
+
+        for k, v in functional_groups.items():
+
+            sanitized = IndigoUtils.sanitize_functional_group(k)
+
+            query = indigo.loadSmarts(sanitized)
+
+            for match in matcher.iterateMatches(query):
+                if match is not None:
+
+                    for atom in query.iterateAtoms():
+                        mappedAtom = match.mapAtom(atom)
+                        if mappedAtom is None or mappedAtom.index() in environment:
+                            continue
+
+                        counts[mappedAtom.index()] = max(v, counts[mappedAtom.index()])
+
+        for k, v in counts.items():
+            if is_reaction:
+                color = "128, 0, 128"
+            else:
+                if v <= 5:
+                    color = "200, 0, 0"
+                else:
+                    color = "0, 112, 0"
+
+            molecule.addDataSGroup([k], [], "color", color)
+
+    @staticmethod
+    def mol_to_svg(mol_data: str, width: int = 0, height: int = 0, functional_groups: Dict[str, int] = None):
+
+        if functional_groups is None:
+            functional_groups = {}
+
+        i = Indigo()
+        renderer = IndigoRenderer(i)
+
+        i.setOption("render-output-format", "svg")
+        i.setOption("render-coloring", not bool(len(functional_groups.keys())))
+        i.setOption("render-image-size", width, height)
+        i.setOption("render-bond-line-width", 2.0)
+
+        mol = i.loadMolecule(mol_data)
+
+        if len(functional_groups.keys()) > 0:
+            IndigoUtils._colorize(i, mol, functional_groups, False)
+
+        return renderer.renderToBuffer(mol).decode('UTF-8')
+
+    @staticmethod
+    def smirks_to_svg(smirks: str, is_query_smirks, width: int = 0, height: int = 0,
+                      educt_functional_groups: Dict[str, int] = None, product_functional_groups: Dict[str, int] = None):
+        if educt_functional_groups is None:
+            educt_functional_groups = {}
+
+        if product_functional_groups is None:
+            product_functional_groups = {}
+
+        i = Indigo()
+        renderer = IndigoRenderer(i)
+
+        i.setOption("render-output-format", "svg")
+        i.setOption("render-coloring", True)
+        i.setOption("render-image-size", width, height)
+
+        if is_query_smirks:
+            obj = i.loadReactionSmarts(smirks)
+        else:
+            obj = i.loadReaction(smirks)
+
+            if len(educt_functional_groups.keys()) > 0:
+                for react in obj.iterateReactants():
+                    IndigoUtils._colorize(i, react, educt_functional_groups, True)
+
+            if len(product_functional_groups.keys()) > 0:
+                for prod in obj.iterateProducts():
+                    IndigoUtils._colorize(i, prod, product_functional_groups, True)
+
+        return renderer.renderToBuffer(obj).decode('UTF-8')
+
+
+if __name__ == '__main__':
+    data = {
+        "struct": "\n  Ketcher  2172510 12D 1   1.00000     0.00000     0\n\n  6  6  0     0  0            999 V2000\n    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -1.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -1.5000   -0.8660    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -1.0000   -1.7321    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    0.0000   -1.7321    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    0.5000   -0.8660    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n  1  2  2  0  0  0  0\n  2  3  1  0  0  0  0\n  3  4  2  0  0  0  0\n  4  5  1  0  0  0  0\n  5  6  2  0  0  0  0\n  6  1  1  0  0  0  0\nM  END\n",
+        "options": {
+            "smart-layout": True,
+            "ignore-stereochemistry-errors": True,
+            "mass-skip-error-on-pseudoatoms": False,
+            "gross-formula-add-rsites": True
+        }
+    }
+
+    print(IndigoUtils.aromatize(data['struct'], False))