Fix bond breaking (#46)

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#46
This commit is contained in:
2025-08-15 09:06:07 +12:00
parent 1267ca8ace
commit 3308d47071
4 changed files with 17 additions and 18 deletions

File diff suppressed because one or more lines are too long

View File

@ -38,10 +38,8 @@ def migration(request):
res = True res = True
for comp, ambit_prod in zip(bt_rule['compounds'], bt_rule['products']): for comp, ambit_prod in zip(bt_rule['compounds'], bt_rule['products']):
# if comp['smiles'] != 'CC1=C(C(=C(C=N1)CO)C=O)O':
# continue
products = FormatConverter.apply(comp['smiles'], smirks, preprocess_smiles=True, bracketize=False) products = FormatConverter.apply(comp['smiles'], smirks)
all_rdkit_prods = [] all_rdkit_prods = []
for ps in products: for ps in products:
@ -130,7 +128,7 @@ def migration_detail(request, package_uuid, rule_uuid):
# if comp['smiles'] != 'CC1=C(C(=C(C=N1)CO)C=O)O': # if comp['smiles'] != 'CC1=C(C(=C(C=N1)CO)C=O)O':
# continue # continue
products = FormatConverter.apply(comp['smiles'], smirks, preprocess_smiles=True, bracketize=False) products = FormatConverter.apply(comp['smiles'], smirks)
all_rdkit_prods = [] all_rdkit_prods = []
for ps in products: for ps in products:

View File

@ -19,9 +19,8 @@ class RuleApplicationTest(TestCase):
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):
from collections import Counter print(f"\nTotal Errors across Rules {len(cls.error_smiles)}")
# print(Counter(cls.error_smiles)) # print(cls.error_smiles)
pass
def tearDown(self): def tearDown(self):
print(f"\nTotal errors {self.total_errors}") print(f"\nTotal errors {self.total_errors}")
@ -36,7 +35,7 @@ class RuleApplicationTest(TestCase):
for comp, ambit_prod in zip(bt_rule['compounds'], bt_rule['products']): for comp, ambit_prod in zip(bt_rule['compounds'], bt_rule['products']):
smi = comp['smiles'] smi = comp['smiles']
products = FormatConverter.apply(smi, smirks, preprocess_smiles=True, bracketize=False) products = FormatConverter.apply(smi, smirks)
all_rdkit_prods = [] all_rdkit_prods = []
for ps in products: for ps in products:
@ -53,15 +52,15 @@ class RuleApplicationTest(TestCase):
# TODO mode "intersection" # TODO mode "intersection"
# partial_res = (len(set(ambit_smiles).intersection(set(rdkit_smiles))) > 0) or (len(ambit_smiles) == 0) # partial_res = (len(set(ambit_smiles).intersection(set(rdkit_smiles))) > 0) or (len(ambit_smiles) == 0)
# FAILED (failures=42) # FAILED (failures=33)
# TODO mode = "full ambit" # TODO mode = "full ambit"
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(ambit_smiles) # partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(ambit_smiles)
# FAILED (failures=52) # FAILED (failures=44)
# TODO mode = "equality" # TODO mode = "equality"
partial_res = set(ambit_smiles) == set(rdkit_smiles) partial_res = set(ambit_smiles) == set(rdkit_smiles)
# FAILED (failures=71) # FAILED (failures=64)
if len(ambit_smiles) and not partial_res: if len(ambit_smiles) and not partial_res:
print(f""" print(f"""

View File

@ -12,6 +12,7 @@ from rdkit.Chem import MACCSkeys
from rdkit.Chem import rdChemReactions from rdkit.Chem import rdChemReactions
from rdkit.Chem.Draw import rdMolDraw2D from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.MolStandardize import rdMolStandardize from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.rdmolops import GetMolFrags
from rdkit.Contrib.IFG import ifg from rdkit.Contrib.IFG import ifg
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -223,7 +224,7 @@ class FormatConverter(object):
return False return False
@staticmethod @staticmethod
def apply(smiles: str, smirks: str, preprocess_smiles: bool = True, bracketize: bool = False, def apply(smiles: str, smirks: str, preprocess_smiles: bool = True, bracketize: bool = True,
standardize: bool = True, kekulize: bool = True) -> List['ProductSet']: standardize: bool = True, kekulize: bool = True) -> List['ProductSet']:
logger.debug(f'Applying {smirks} on {smiles}') logger.debug(f'Applying {smirks} on {smiles}')
@ -252,8 +253,10 @@ class FormatConverter(object):
for product in product_set: for product in product_set:
try: try:
Chem.SanitizeMol(product) Chem.SanitizeMol(product)
product = GetMolFrags(product, asMols=True)
product = FormatConverter.standardize(Chem.MolToSmiles(product)) for p in product:
p = FormatConverter.standardize(Chem.MolToSmiles(p))
prods.append(p)
# if kekulize: # if kekulize:
# # from rdkit.Chem import MolStandardize # # from rdkit.Chem import MolStandardize
@ -278,13 +281,12 @@ class FormatConverter(object):
# # bond.SetIsAromatic(False) # # bond.SetIsAromatic(False)
# Chem.Kekulize(product) # Chem.Kekulize(product)
prods.append(product)
except ValueError as e: except ValueError as e:
logger.error(f'Sanitizing and converting failed:\n{e}') logger.error(f'Sanitizing and converting failed:\n{e}')
continue continue
# TODO doc! if len(prods):
if len(prods) and len(prods) == len(product_set):
ps = ProductSet(prods) ps = ProductSet(prods)
pss.add(ps) pss.add(ps)