forked from enviPath/enviPy
Current Dev State
This commit is contained in:
732
utilities/chem.py
Normal file
732
utilities/chem.py
Normal file
@ -0,0 +1,732 @@
|
||||
import logging
|
||||
import re
|
||||
from abc import ABC
|
||||
from collections import defaultdict
|
||||
from typing import List, Optional, Dict
|
||||
|
||||
from indigo import Indigo, IndigoException, IndigoObject
|
||||
from indigo.renderer import IndigoRenderer
|
||||
from rdkit import Chem
|
||||
from rdkit import RDLogger
|
||||
from rdkit.Chem import MACCSkeys
|
||||
from rdkit.Chem import rdChemReactions
|
||||
from rdkit.Chem.Draw import rdMolDraw2D
|
||||
from rdkit.Chem.MolStandardize import rdMolStandardize
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
RDLogger.DisableLog('rdApp.*')
|
||||
|
||||
|
||||
# from rdkit import rdBase
|
||||
# rdBase.LogToPythonLogger()
|
||||
# pylog = logging.getLogger("rdkit")
|
||||
# pylog.propagate = False
|
||||
# pylog.disabled = True
|
||||
# print(pylog.disabled)
|
||||
|
||||
|
||||
class ProductSet(object):
|
||||
|
||||
def __init__(self, product_set: List[str]):
|
||||
self.product_set = product_set
|
||||
|
||||
def __repr__(self):
|
||||
return self.product_set.__repr__()
|
||||
|
||||
def __len__(self):
|
||||
return len(self.product_set)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.product_set)
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, ProductSet) and sorted(self.product_set) == sorted(other.product_set)
|
||||
|
||||
def __hash__(self):
|
||||
return hash('-'.join(sorted(self.product_set)))
|
||||
|
||||
|
||||
class PredictionResult(object):
|
||||
|
||||
def __init__(self, product_sets: List['ProductSet'], probability: float, rule: Optional['Rule'] = None):
|
||||
self.product_sets = product_sets
|
||||
self.probability = probability
|
||||
self.rule = rule
|
||||
|
||||
def __len__(self):
|
||||
return len(self.product_sets)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.product_sets)
|
||||
|
||||
def __repr__(self):
|
||||
return f"--{self.probability}/{self.rule}--> {self.product_sets}"
|
||||
|
||||
|
||||
class FormatConverter(object):
|
||||
|
||||
@staticmethod
|
||||
def from_smiles(smiles):
|
||||
return Chem.MolFromSmiles(smiles)
|
||||
|
||||
@staticmethod
|
||||
def to_smiles(mol):
|
||||
return Chem.MolToSmiles(mol)
|
||||
|
||||
@staticmethod
|
||||
def InChIKey(smiles):
|
||||
return Chem.MolToInchiKey(FormatConverter.from_smiles(smiles))
|
||||
|
||||
@staticmethod
|
||||
def maccs(smiles):
|
||||
mol = Chem.MolFromSmiles(smiles)
|
||||
bitvec = MACCSkeys.GenMACCSKeys(mol)
|
||||
return bitvec.ToList()
|
||||
|
||||
@staticmethod
|
||||
def to_svg(smiles, mol_size=(200, 150), kekulize=True):
|
||||
mol = FormatConverter.from_smiles(smiles)
|
||||
|
||||
if kekulize:
|
||||
try:
|
||||
mol = Chem.Kekulize(mol)
|
||||
except:
|
||||
mol = Chem.Mol(mol.ToBinary())
|
||||
|
||||
if not mol.GetNumConformers():
|
||||
Chem.rdDepictor.Compute2DCoords(mol)
|
||||
|
||||
drawer = rdMolDraw2D.MolDraw2DSVG(*mol_size)
|
||||
opts = drawer.drawOptions()
|
||||
|
||||
opts.clearBackground = False
|
||||
drawer.DrawMolecule(mol)
|
||||
drawer.FinishDrawing()
|
||||
svg = drawer.GetDrawingText().replace('svg:', '')
|
||||
svg = re.sub("<\?xml.*\?>", '', svg)
|
||||
|
||||
return svg
|
||||
|
||||
@staticmethod
|
||||
def to_png(smiles, mol_size=(200, 150), kekulize=True):
|
||||
mol = FormatConverter.from_smiles(smiles)
|
||||
|
||||
if kekulize:
|
||||
try:
|
||||
Chem.Kekulize(mol)
|
||||
except:
|
||||
mc = Chem.Mol(mol.ToBinary())
|
||||
|
||||
if not mc.GetNumConformers():
|
||||
Chem.rdDepictor.Compute2DCoords(mc)
|
||||
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def normalize(smiles):
|
||||
# TODO call to AMBIT Service
|
||||
return smiles
|
||||
|
||||
@staticmethod
|
||||
def standardize(smiles):
|
||||
# Taken from https://bitsilla.com/blog/2021/06/standardizing-a-molecule-using-rdkit/
|
||||
# follows the steps in
|
||||
# https://github.com/greglandrum/RSC_OpenScience_Standardization_202104/blob/main/MolStandardize%20pieces.ipynb
|
||||
# as described **excellently** (by Greg) in
|
||||
# https://www.youtube.com/watch?v=eWTApNX8dJQ
|
||||
mol = Chem.MolFromSmiles(smiles)
|
||||
|
||||
# removeHs, disconnect metal atoms, normalize the molecule, reionize the molecule
|
||||
clean_mol = rdMolStandardize.Cleanup(mol)
|
||||
|
||||
# if many fragments, get the "parent" (the actual mol we are interested in)
|
||||
parent_clean_mol = rdMolStandardize.FragmentParent(clean_mol)
|
||||
|
||||
# try to neutralize molecule
|
||||
uncharger = rdMolStandardize.Uncharger() # annoying, but necessary as no convenience method exists
|
||||
uncharged_parent_clean_mol = uncharger.uncharge(parent_clean_mol)
|
||||
|
||||
# note that no attempt is made at reionization at this step
|
||||
# nor at ionization at some pH (rdkit has no pKa caculator)
|
||||
# the main aim to to represent all molecules from different sources
|
||||
# in a (single) standard way, for use in ML, catalogue, etc.
|
||||
# te = rdMolStandardize.TautomerEnumerator() # idem
|
||||
# taut_uncharged_parent_clean_mol = te.Canonicalize(uncharged_parent_clean_mol)
|
||||
|
||||
return Chem.MolToSmiles(uncharged_parent_clean_mol, kekuleSmiles=True)
|
||||
|
||||
@staticmethod
|
||||
def neutralize_smiles(smiles):
|
||||
mol = Chem.MolFromSmiles(smiles)
|
||||
mol = FormatConverter.neutralize_molecule(mol)
|
||||
return Chem.MolToSmiles(mol)
|
||||
|
||||
@staticmethod
|
||||
def neutralize_molecule(mol):
|
||||
pattern = Chem.MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")
|
||||
at_matches = mol.GetSubstructMatches(pattern)
|
||||
at_matches_list = [y[0] for y in at_matches]
|
||||
if len(at_matches_list) > 0:
|
||||
for at_idx in at_matches_list:
|
||||
atom = mol.GetAtomWithIdx(at_idx)
|
||||
chg = atom.GetFormalCharge()
|
||||
hcount = atom.GetTotalNumHs()
|
||||
atom.SetFormalCharge(0)
|
||||
atom.SetNumExplicitHs(hcount - chg)
|
||||
atom.UpdatePropertyCache()
|
||||
return mol
|
||||
|
||||
# @staticmethod
|
||||
# def apply(smiles, smirks, preprocess_smiles=True, bracketize=False, standardize=True):
|
||||
# logger.debug(f'Applying {smirks} on {smiles}')
|
||||
#
|
||||
# if bracketize:
|
||||
# smirks = smirks.split('>>')[0] + ">>(" + smirks.split('>>')[1] + ")"
|
||||
#
|
||||
# res = set()
|
||||
# try:
|
||||
# rxn = rdChemReactions.ReactionFromSmarts(smirks)
|
||||
# mol = Chem.MolFromSmiles(smiles)
|
||||
#
|
||||
# # Inplace
|
||||
# if preprocess_smiles:
|
||||
# Chem.SanitizeMol(mol)
|
||||
# mol = Chem.AddHs(mol)
|
||||
#
|
||||
# # apply!
|
||||
# reacts = rxn.RunReactants((mol,))
|
||||
# if len(reacts):
|
||||
# # Sanitize mols
|
||||
# for product_set in reacts:
|
||||
# prod_set = list()
|
||||
# for product in product_set:
|
||||
# # Fixes
|
||||
# # [2025-01-30 23:00:50] ERROR chem - Sanitizing and converting failed:
|
||||
# # non-ring atom 3 marked aromatic
|
||||
# # But does not improve overall performance
|
||||
# #
|
||||
# # for a in product.GetAtoms():
|
||||
# # if (not a.IsInRing()) and a.GetIsAromatic():
|
||||
# # a.SetIsAromatic(False)
|
||||
# # for b in product.GetBonds():
|
||||
# # if (not b.IsInRing()) and b.GetIsAromatic():
|
||||
# # b.SetIsAromatic(False)
|
||||
#
|
||||
# try:
|
||||
# Chem.SanitizeMol(product)
|
||||
# prod_set.append(FormatConverter.standardize(Chem.MolToSmiles(product)))
|
||||
# except ValueError as e:
|
||||
# logger.error(f'Sanitizing and converting failed:\n{e}')
|
||||
# continue
|
||||
# res.add(tuple(list(set(prod_set))))
|
||||
# except Exception as e:
|
||||
# logger.error(f'Applying {smirks} on {smiles} failed:\n{e}')
|
||||
#
|
||||
# return list(res)
|
||||
|
||||
@staticmethod
|
||||
def apply(smiles: str, smirks: str, preprocess_smiles: bool = True, bracketize: bool = False,
|
||||
standardize: bool = True, kekulize: bool = True) -> List['ProductSet']:
|
||||
logger.debug(f'Applying {smirks} on {smiles}')
|
||||
|
||||
# If explicitly wanted or rule generates multiple products add brackets around products to capture all
|
||||
if bracketize: # or "." in smirks:
|
||||
smirks = smirks.split('>>')[0] + ">>(" + smirks.split('>>')[1] + ")"
|
||||
|
||||
# List of ProductSet objects
|
||||
pss = set()
|
||||
try:
|
||||
rxn = rdChemReactions.ReactionFromSmarts(smirks)
|
||||
mol = Chem.MolFromSmiles(smiles)
|
||||
|
||||
# Inplace
|
||||
if preprocess_smiles:
|
||||
Chem.SanitizeMol(mol)
|
||||
mol = Chem.AddHs(mol)
|
||||
|
||||
# apply!
|
||||
sites = rxn.RunReactants((mol,))
|
||||
logger.debug(f"{len(sites)} products sets generated")
|
||||
if len(sites):
|
||||
# Sanitize mols
|
||||
for product_set in sites:
|
||||
prods = []
|
||||
for product in product_set:
|
||||
try:
|
||||
Chem.SanitizeMol(product)
|
||||
|
||||
product = FormatConverter.standardize(Chem.MolToSmiles(product))
|
||||
|
||||
# if kekulize:
|
||||
# # from rdkit.Chem import MolStandardize
|
||||
# #
|
||||
# # # Attempt re-sanitization via standardizer
|
||||
# # cleaner = MolStandardize.rdMolStandardize.Cleanup()
|
||||
# # mol = cleaner.cleanup(product)
|
||||
# # # Fixes
|
||||
# # # [2025-01-30 23:00:50] ERROR chem - Sanitizing and converting failed:
|
||||
# # # non-ring atom 3 marked aromatic
|
||||
# # # But does not improve overall performance
|
||||
# # # for a in product.GetAtoms():
|
||||
# # # if (not a.IsInRing()) and a.GetIsAromatic():
|
||||
# # # a.SetIsAromatic(False)
|
||||
# # #
|
||||
# # # for b in product.GetBonds():
|
||||
# # # if (not b.IsInRing()) and b.GetIsAromatic():
|
||||
# # # b.SetIsAromatic(False)
|
||||
# # for atom in product.GetAtoms():
|
||||
# # atom.SetIsAromatic(False)
|
||||
# # for bond in product.GetBonds():
|
||||
# # bond.SetIsAromatic(False)
|
||||
# Chem.Kekulize(product)
|
||||
|
||||
prods.append(product)
|
||||
except ValueError as e:
|
||||
logger.error(f'Sanitizing and converting failed:\n{e}')
|
||||
continue
|
||||
|
||||
# TODO doc!
|
||||
if len(prods) and len(prods) == len(product_set):
|
||||
ps = ProductSet(prods)
|
||||
pss.add(ps)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Applying {smirks} on {smiles} failed:\n{e}')
|
||||
|
||||
return pss
|
||||
|
||||
# @staticmethod
|
||||
# def apply(reaction, smiles):
|
||||
# rxn = AllChem.ReactionFromSmarts(reaction)
|
||||
# return [Chem.MolToSmiles(x, 1) for x in rxn.RunReactants((Chem.MolFromSmiles(smiles),))[0]]
|
||||
|
||||
@staticmethod
|
||||
def MACCS(smiles):
|
||||
return MACCSkeys.GenMACCSKeys(FormatConverter.from_smiles(smiles))
|
||||
|
||||
@staticmethod
|
||||
def neutralize_atoms(mol):
|
||||
pattern = Chem.MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")
|
||||
at_matches = mol.GetSubstructMatches(pattern)
|
||||
at_matches_list = [y[0] for y in at_matches]
|
||||
if len(at_matches_list) > 0:
|
||||
for at_idx in at_matches_list:
|
||||
atom = mol.GetAtomWithIdx(at_idx)
|
||||
chg = atom.GetFormalCharge()
|
||||
hcount = atom.GetTotalNumHs()
|
||||
atom.SetFormalCharge(0)
|
||||
atom.SetNumExplicitHs(hcount - chg)
|
||||
atom.UpdatePropertyCache()
|
||||
return mol
|
||||
|
||||
@staticmethod
|
||||
def sanitize_smiles(smiles_list: List):
|
||||
parsed_smiles = []
|
||||
errors = 0
|
||||
for smi in smiles_list:
|
||||
try:
|
||||
# # Remove Stereo and Flatten
|
||||
# if "/" in smi:
|
||||
# smi = smi.replace("/", "")
|
||||
# if "\\" in smi:
|
||||
# smi = smi.replace("\\", "")
|
||||
# if "@" in smi:
|
||||
# smi = smi.replace("@", "")
|
||||
|
||||
mol = Chem.MolFromSmiles(smi)
|
||||
mol = FormatConverter.neutralize_atoms(mol)
|
||||
mol = Chem.RemoveAllHs(mol)
|
||||
Chem.Kekulize(mol)
|
||||
smi_p = Chem.MolToSmiles(mol, kekuleSmiles=True)
|
||||
smi_p = Chem.CanonSmiles(smi_p)
|
||||
|
||||
if '~' in smi_p:
|
||||
smi_p1 = smi_p.replace('~', '')
|
||||
parsed_smiles.append(smi_p1)
|
||||
else:
|
||||
parsed_smiles.append(smi_p)
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
pass
|
||||
|
||||
return parsed_smiles, errors
|
||||
|
||||
|
||||
|
||||
|
||||
class Standardizer(ABC):
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def standardize(self, smiles: str) -> str:
|
||||
return FormatConverter.normalize(smiles)
|
||||
|
||||
|
||||
class RuleStandardizer(Standardizer):
|
||||
|
||||
def __init__(self, name, smirks):
|
||||
super().__init__(name)
|
||||
self.smirks = smirks
|
||||
|
||||
def standardize(self, smiles: str) -> str:
|
||||
standardized_smiles = list(set(FormatConverter.apply(smiles, self.smirks)))
|
||||
|
||||
if len(standardized_smiles) > 1:
|
||||
logger.warning(f'{self.smirks} generated more than 1 compound {standardized_smiles}')
|
||||
print(f'{self.smirks} generated more than 1 compound {standardized_smiles}')
|
||||
standardized_smiles = standardized_smiles[:1]
|
||||
|
||||
if standardized_smiles:
|
||||
smiles = standardized_smiles[0]
|
||||
|
||||
return super().standardize(smiles)
|
||||
|
||||
|
||||
class RegExStandardizer(Standardizer):
|
||||
|
||||
def __init__(self, name, replacements: dict):
|
||||
super().__init__(name)
|
||||
self.replacements = replacements
|
||||
|
||||
def standardize(self, smiles: str) -> str:
|
||||
smi = smiles
|
||||
mod_smi = smiles
|
||||
|
||||
for k, v in self.replacements.items():
|
||||
mod_smi = smi.replace(k, v)
|
||||
|
||||
while mod_smi != smi:
|
||||
mod_smi = smi
|
||||
for k, v in self.replacements.items():
|
||||
smi = smi.replace(k, v)
|
||||
|
||||
return super().standardize(smi)
|
||||
|
||||
|
||||
FLATTEN = [
|
||||
RegExStandardizer("Remove Stereo", {"@": ""})
|
||||
]
|
||||
|
||||
UN_CIS_TRANS = [
|
||||
RegExStandardizer("Un-Cis-Trans", {"/": "", "\\": ""})
|
||||
]
|
||||
|
||||
BASIC = [
|
||||
RuleStandardizer("ammoniumstandardization", "[H][N+:1]([H])([H])[#6:2]>>[H][#7:1]([H])-[#6:2]"),
|
||||
RuleStandardizer("cyanate", "[H][#8:1][C:2]#[N:3]>>[#8-:1][C:2]#[N:3]"),
|
||||
RuleStandardizer("deprotonatecarboxyls", "[H][#8:1]-[#6:2]=[O:3]>>[#8-:1]-[#6:2]=[O:3]"),
|
||||
RuleStandardizer("forNOOH", "[H][#8:1]-[#7+:2](-[*:3])=[O:4]>>[#8-:1]-[#7+:2](-[*:3])=[O:4]"),
|
||||
RuleStandardizer("Hydroxylprotonation", "[#6;A:1][#6:2](-[#8-:3])=[#6;A:4]>>[#6:1]-[#6:2](-[#8:3][H])=[#6;A:4]"),
|
||||
RuleStandardizer("phosphatedeprotonation", "[H][#8:1]-[$([#15]);!$(P([O-])):2]>>[#8-:1]-[#15:2]"),
|
||||
RuleStandardizer("PicricAcid",
|
||||
"[H][#8:1]-[c:2]1[c:3][c:4][c:5]([c:6][c:7]1-[#7+:8](-[#8-:9])=[O:10])-[#7+:11](-[#8-:12])=[O:13]>>[#8-:1]-[c:2]1[c:3][c:4][c:5]([c:6][c:7]1-[#7+:8](-[#8-:9])=[O:10])-[#7+:11](-[#8-:12])=[O:13]"),
|
||||
RuleStandardizer("Sulfate1", "[H][#8:1][S:2]([#8:3][H])(=[O:4])=[O:5]>>[#8-:1][S:2]([#8-:3])(=[O:4])=[O:5]"),
|
||||
RuleStandardizer("Sulfate2",
|
||||
"[#6:1]-[#8:2][S:3]([#8:4][H])(=[O:5])=[O:6]>>[#6:1]-[#8:2][S:3]([#8-:4])(=[O:5])=[O:6]"),
|
||||
RuleStandardizer("Sulfate3", "[H][#8:3][S:2]([#6:1])(=[O:4])=[O:5]>>[#6:1][S:2]([#8-:3])(=[O:4])=[O:5]"),
|
||||
RuleStandardizer("Transform_c1353forSOOH", "[H][#8:1][S:2]([*:3])=[O:4]>>[#8-:1][S:2]([*:3])=[O:4]"),
|
||||
]
|
||||
|
||||
ENHANCED = BASIC + [
|
||||
RuleStandardizer("fullPhosphatedeprotonation", "[H][#8:1]-[#15:2]>>[#8-:1]-[#15:2]")
|
||||
]
|
||||
|
||||
EXOTIC = ENHANCED + [
|
||||
RuleStandardizer("ThioPhosphate1", "[H][S:1]-[#15:2]=[$([#16]),$([#8]):3]>>[S-:1]-[#15:2]=[$([#16]),$([#8]):3]")
|
||||
]
|
||||
|
||||
COA_CUTTER = [
|
||||
RuleStandardizer("CutCoEnzymeAOff",
|
||||
"CC(C)(COP(O)(=O)OP(O)(=O)OCC1OC(C(O)C1OP(O)(O)=O)n1cnc2c(N)ncnc12)C(O)C(=O)NCCC(=O)NCCS[$(*):1]>>[O-][$(*):1]")
|
||||
]
|
||||
|
||||
ENOL_KETO = [
|
||||
RuleStandardizer("enol2Ketone", "[H][#8:2]-[#6:3]=[#6:1]>>[#6:1]-[#6:3]=[O:2]")
|
||||
]
|
||||
|
||||
MATCH_STANDARDIZER = EXOTIC + FLATTEN + UN_CIS_TRANS + COA_CUTTER + ENOL_KETO
|
||||
|
||||
|
||||
class IndigoUtils(object):
|
||||
|
||||
@staticmethod
|
||||
def layout(mol_data):
|
||||
i = Indigo()
|
||||
try:
|
||||
if mol_data.startswith('$RXN') or '>>' in mol_data:
|
||||
rxn = i.loadQueryReaction(mol_data)
|
||||
rxn.layout()
|
||||
return rxn.rxnfile()
|
||||
else:
|
||||
mol = i.loadQueryMolecule(mol_data)
|
||||
mol.layout()
|
||||
return mol.molfile()
|
||||
except IndigoException as e:
|
||||
try:
|
||||
logger.info("layout() failed, trying loadReactionSMARTS as fallback!")
|
||||
rxn = IndigoUtils.load_reaction_SMARTS(mol_data)
|
||||
rxn.layout()
|
||||
return rxn.molfile()
|
||||
except IndigoException as e2:
|
||||
logger.error(f'layout() failed due to {e2}!')
|
||||
|
||||
@staticmethod
|
||||
def load_reaction_SMARTS(mol):
|
||||
return Indigo().loadReactionSmarts(mol)
|
||||
|
||||
@staticmethod
|
||||
def aromatize(mol_data, is_query):
|
||||
i = Indigo()
|
||||
try:
|
||||
if mol_data.startswith('$RXN'):
|
||||
if is_query:
|
||||
rxn = i.loadQueryReaction(mol_data)
|
||||
else:
|
||||
rxn = i.loadReaction(mol_data)
|
||||
|
||||
rxn.aromatize()
|
||||
return rxn.rxnfile()
|
||||
else:
|
||||
if is_query:
|
||||
mol = i.loadQueryMolecule(mol_data)
|
||||
else:
|
||||
mol = i.loadMolecule(mol_data)
|
||||
|
||||
mol.aromatize()
|
||||
return mol.molfile()
|
||||
except IndigoException as e:
|
||||
try:
|
||||
logger.info("Aromatizing failed, trying loadReactionSMARTS as fallback!")
|
||||
rxn = IndigoUtils.load_reaction_SMARTS(mol_data)
|
||||
rxn.aromatize()
|
||||
return rxn.molfile()
|
||||
except IndigoException as e2:
|
||||
logger.error(f'Aromatizing failed due to {e2}!')
|
||||
|
||||
@staticmethod
|
||||
def dearomatize(mol_data, is_query):
|
||||
i = Indigo()
|
||||
try:
|
||||
if mol_data.startswith('$RXN'):
|
||||
if is_query:
|
||||
rxn = i.loadQueryReaction(mol_data)
|
||||
else:
|
||||
rxn = i.loadReaction(mol_data)
|
||||
|
||||
rxn.dearomatize()
|
||||
return rxn.rxnfile()
|
||||
else:
|
||||
if is_query:
|
||||
mol = i.loadQueryMolecule(mol_data)
|
||||
else:
|
||||
mol = i.loadMolecule(mol_data)
|
||||
|
||||
mol.dearomatize()
|
||||
return mol.molfile()
|
||||
except IndigoException as e:
|
||||
try:
|
||||
logger.info("De-Aromatizing failed, trying loadReactionSMARTS as fallback!")
|
||||
rxn = IndigoUtils.load_reaction_SMARTS(mol_data)
|
||||
rxn.dearomatize()
|
||||
return rxn.molfile()
|
||||
except IndigoException as e2:
|
||||
logger.error(f'De-Aromatizing failed due to {e2}!')
|
||||
|
||||
@staticmethod
|
||||
def sanitize_functional_group(functional_group: str):
|
||||
counter = 0
|
||||
while True:
|
||||
counter += 1
|
||||
|
||||
copy = functional_group
|
||||
|
||||
# special environment handling (amines, hydroxy, esters, ethers)
|
||||
# the higher substituted should not contain H env.
|
||||
if functional_group == '[C]=O':
|
||||
functional_group = "[H][C](=O)[CX4,c]"
|
||||
|
||||
# aldamines
|
||||
if functional_group == "O=[C]N(R)R":
|
||||
functional_group = "O=[C]([H])N(R)R"
|
||||
|
||||
# ether, ester, ketones, amines, thioether
|
||||
vals = [
|
||||
"ROR",
|
||||
"O=C(R)OR",
|
||||
"[H]N(R)R",
|
||||
"O=C(R)R",
|
||||
"RN(R)R",
|
||||
"RSR",
|
||||
]
|
||||
if functional_group in vals:
|
||||
functional_group = functional_group.replace("R", "[CX4,c]")
|
||||
|
||||
# esters, ketones, amides
|
||||
functional_group = functional_group.replace("O=C(R)", "O=C([CX4,c])")
|
||||
|
||||
if functional_group == "RS*R" or functional_group == "RO*R":
|
||||
# neighboring atoms can be any aromatic atom, but they should not be highlighted
|
||||
functional_group = functional_group.replace("R", "")
|
||||
|
||||
# aromatic compounds: aromatic atoms are denoted with *
|
||||
# single aromatic heteroatoms, not substituted
|
||||
|
||||
# unsubstituted aromatic nitrogen
|
||||
if functional_group == "RN*(R)R":
|
||||
functional_group = "[nH1,nX2](a)a" # pyrrole (with H) or pyridine (no other connections); currently overlaps with neighboring aromatic atoms
|
||||
|
||||
# substituted aromatic nitrogen
|
||||
functional_group = functional_group.replace("N*(R)R",
|
||||
"n(a)a") # substituent will be before N*; currently overlaps with neighboring aromatic atoms
|
||||
# pyridinium
|
||||
if functional_group == "RN*(R)(R)(R)R":
|
||||
functional_group = "[CX4,c]n(a)a" # currently overlaps with neighboring aromatic atoms
|
||||
|
||||
# N-oxide
|
||||
if functional_group == "[H]ON*(R)(R)(R)R":
|
||||
functional_group = "[O-][n+](a)a" # currently overlaps with neighboring aromatic atoms
|
||||
|
||||
# other aromatic hetero atoms
|
||||
functional_group = functional_group.replace("C*", "c")
|
||||
functional_group = functional_group.replace("N*", "n")
|
||||
functional_group = functional_group.replace("S*", "s")
|
||||
functional_group = functional_group.replace("O*", "o")
|
||||
functional_group = functional_group.replace("Se*", "se")
|
||||
functional_group = functional_group.replace("P*", "p")
|
||||
|
||||
# other replacement, to accomodate for the standardization rules in enviPath
|
||||
# This is not the perfect way to do it; there should be a way to replace substructure SMARTS in SMARTS?
|
||||
# nitro groups are broken, due to charge handling. this SMARTS matches both forms (formal charges and hypervalent); Ertl-CDK still treats both forms separately...
|
||||
functional_group = functional_group.replace("[H]O[N](=O)R", "[CX4,c][NX3](~[OX1])~[OX1]")
|
||||
functional_group = functional_group.replace("O=N(=O)R", "[CX4,c][NX3](~[OX1])~[OX1]")
|
||||
# carboxylic acid: this SMARTS matches both neutral and anionic form; includes COOH in larger functional_groups
|
||||
functional_group = functional_group.replace("[H]OC(=O)", "[OD1]C(=O)")
|
||||
# azo
|
||||
functional_group = functional_group.replace("N#[N]N(R)R", "[NX1]~[NX2]~N[CX4,c]")
|
||||
functional_group = functional_group.replace("RN=[N]=NR", "[NX1]~[NX2]~N[CX4,c]")
|
||||
# TODO: there might be more problematic groups, which we have yet to find.
|
||||
|
||||
# other environment atoms (can be aromatic or aliphatic Csp3, or H)
|
||||
functional_group = functional_group.replace("R", "[H,CX4,c]")
|
||||
|
||||
if copy == functional_group:
|
||||
break
|
||||
|
||||
return functional_group
|
||||
|
||||
@staticmethod
|
||||
def _colorize(indigo: Indigo, molecule: IndigoObject, functional_groups: Dict[str, int], is_reaction: bool):
|
||||
indigo.setOption("render-atom-color-property", "color")
|
||||
indigo.setOption("aromaticity-model", "generic")
|
||||
|
||||
counts = defaultdict(lambda: 0)
|
||||
environment = set()
|
||||
|
||||
matcher = indigo.substructureMatcher(molecule)
|
||||
|
||||
# Determine environment atoms as they will be colored black
|
||||
for env in ["[CX4]", "c"]:
|
||||
query = indigo.loadSmarts(env)
|
||||
for match in matcher.iterateMatches(query):
|
||||
if match is not None:
|
||||
for atom in query.iterateAtoms():
|
||||
mappedAtom = match.mapAtom(atom)
|
||||
|
||||
if mappedAtom is None:
|
||||
continue
|
||||
|
||||
environment.add(mappedAtom.index())
|
||||
|
||||
for k, v in functional_groups.items():
|
||||
|
||||
sanitized = IndigoUtils.sanitize_functional_group(k)
|
||||
|
||||
query = indigo.loadSmarts(sanitized)
|
||||
|
||||
for match in matcher.iterateMatches(query):
|
||||
if match is not None:
|
||||
|
||||
for atom in query.iterateAtoms():
|
||||
mappedAtom = match.mapAtom(atom)
|
||||
if mappedAtom is None or mappedAtom.index() in environment:
|
||||
continue
|
||||
|
||||
counts[mappedAtom.index()] = max(v, counts[mappedAtom.index()])
|
||||
|
||||
for k, v in counts.items():
|
||||
if is_reaction:
|
||||
color = "128, 0, 128"
|
||||
else:
|
||||
if v <= 5:
|
||||
color = "200, 0, 0"
|
||||
else:
|
||||
color = "0, 112, 0"
|
||||
|
||||
molecule.addDataSGroup([k], [], "color", color)
|
||||
|
||||
@staticmethod
|
||||
def mol_to_svg(mol_data: str, width: int = 0, height: int = 0, functional_groups: Dict[str, int] = None):
|
||||
|
||||
if functional_groups is None:
|
||||
functional_groups = {}
|
||||
|
||||
i = Indigo()
|
||||
renderer = IndigoRenderer(i)
|
||||
|
||||
i.setOption("render-output-format", "svg")
|
||||
i.setOption("render-coloring", not bool(len(functional_groups.keys())))
|
||||
i.setOption("render-image-size", width, height)
|
||||
i.setOption("render-bond-line-width", 2.0)
|
||||
|
||||
mol = i.loadMolecule(mol_data)
|
||||
|
||||
if len(functional_groups.keys()) > 0:
|
||||
IndigoUtils._colorize(i, mol, functional_groups, False)
|
||||
|
||||
return renderer.renderToBuffer(mol).decode('UTF-8')
|
||||
|
||||
@staticmethod
|
||||
def smirks_to_svg(smirks: str, is_query_smirks, width: int = 0, height: int = 0,
|
||||
educt_functional_groups: Dict[str, int] = None, product_functional_groups: Dict[str, int] = None):
|
||||
if educt_functional_groups is None:
|
||||
educt_functional_groups = {}
|
||||
|
||||
if product_functional_groups is None:
|
||||
product_functional_groups = {}
|
||||
|
||||
i = Indigo()
|
||||
renderer = IndigoRenderer(i)
|
||||
|
||||
i.setOption("render-output-format", "svg")
|
||||
i.setOption("render-coloring", True)
|
||||
i.setOption("render-image-size", width, height)
|
||||
|
||||
if is_query_smirks:
|
||||
obj = i.loadReactionSmarts(smirks)
|
||||
else:
|
||||
obj = i.loadReaction(smirks)
|
||||
|
||||
if len(educt_functional_groups.keys()) > 0:
|
||||
for react in obj.iterateReactants():
|
||||
IndigoUtils._colorize(i, react, educt_functional_groups, True)
|
||||
|
||||
if len(product_functional_groups.keys()) > 0:
|
||||
for prod in obj.iterateProducts():
|
||||
IndigoUtils._colorize(i, prod, product_functional_groups, True)
|
||||
|
||||
return renderer.renderToBuffer(obj).decode('UTF-8')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
data = {
|
||||
"struct": "\n Ketcher 2172510 12D 1 1.00000 0.00000 0\n\n 6 6 0 0 0 999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.5000 -0.8660 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.0000 -1.7321 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.0000 -1.7321 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.5000 -0.8660 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 2 0 0 0 0\n 2 3 1 0 0 0 0\n 3 4 2 0 0 0 0\n 4 5 1 0 0 0 0\n 5 6 2 0 0 0 0\n 6 1 1 0 0 0 0\nM END\n",
|
||||
"options": {
|
||||
"smart-layout": True,
|
||||
"ignore-stereochemistry-errors": True,
|
||||
"mass-skip-error-on-pseudoatoms": False,
|
||||
"gross-formula-add-rsites": True
|
||||
}
|
||||
}
|
||||
|
||||
print(IndigoUtils.aromatize(data['struct'], False))
|
||||
Reference in New Issue
Block a user