Fix bond breaking (#46)

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#46
This commit is contained in:
2025-08-15 09:06:07 +12:00
parent 1267ca8ace
commit 3308d47071
4 changed files with 17 additions and 18 deletions

File diff suppressed because one or more lines are too long

View File

@ -38,10 +38,8 @@ def migration(request):
res = True
for comp, ambit_prod in zip(bt_rule['compounds'], bt_rule['products']):
# if comp['smiles'] != 'CC1=C(C(=C(C=N1)CO)C=O)O':
# continue
products = FormatConverter.apply(comp['smiles'], smirks, preprocess_smiles=True, bracketize=False)
products = FormatConverter.apply(comp['smiles'], smirks)
all_rdkit_prods = []
for ps in products:
@ -130,7 +128,7 @@ def migration_detail(request, package_uuid, rule_uuid):
# if comp['smiles'] != 'CC1=C(C(=C(C=N1)CO)C=O)O':
# continue
products = FormatConverter.apply(comp['smiles'], smirks, preprocess_smiles=True, bracketize=False)
products = FormatConverter.apply(comp['smiles'], smirks)
all_rdkit_prods = []
for ps in products:

View File

@ -19,9 +19,8 @@ class RuleApplicationTest(TestCase):
@classmethod
def tearDownClass(cls):
from collections import Counter
# print(Counter(cls.error_smiles))
pass
print(f"\nTotal Errors across Rules {len(cls.error_smiles)}")
# print(cls.error_smiles)
def tearDown(self):
print(f"\nTotal errors {self.total_errors}")
@ -36,7 +35,7 @@ class RuleApplicationTest(TestCase):
for comp, ambit_prod in zip(bt_rule['compounds'], bt_rule['products']):
smi = comp['smiles']
products = FormatConverter.apply(smi, smirks, preprocess_smiles=True, bracketize=False)
products = FormatConverter.apply(smi, smirks)
all_rdkit_prods = []
for ps in products:
@ -53,15 +52,15 @@ class RuleApplicationTest(TestCase):
# TODO mode "intersection"
# partial_res = (len(set(ambit_smiles).intersection(set(rdkit_smiles))) > 0) or (len(ambit_smiles) == 0)
# FAILED (failures=42)
# FAILED (failures=33)
# TODO mode = "full ambit"
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(ambit_smiles)
# FAILED (failures=52)
# FAILED (failures=44)
# TODO mode = "equality"
partial_res = set(ambit_smiles) == set(rdkit_smiles)
# FAILED (failures=71)
# FAILED (failures=64)
if len(ambit_smiles) and not partial_res:
print(f"""

View File

@ -12,6 +12,7 @@ from rdkit.Chem import MACCSkeys
from rdkit.Chem import rdChemReactions
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.rdmolops import GetMolFrags
from rdkit.Contrib.IFG import ifg
logger = logging.getLogger(__name__)
@ -223,7 +224,7 @@ class FormatConverter(object):
return False
@staticmethod
def apply(smiles: str, smirks: str, preprocess_smiles: bool = True, bracketize: bool = False,
def apply(smiles: str, smirks: str, preprocess_smiles: bool = True, bracketize: bool = True,
standardize: bool = True, kekulize: bool = True) -> List['ProductSet']:
logger.debug(f'Applying {smirks} on {smiles}')
@ -252,8 +253,10 @@ class FormatConverter(object):
for product in product_set:
try:
Chem.SanitizeMol(product)
product = FormatConverter.standardize(Chem.MolToSmiles(product))
product = GetMolFrags(product, asMols=True)
for p in product:
p = FormatConverter.standardize(Chem.MolToSmiles(p))
prods.append(p)
# if kekulize:
# # from rdkit.Chem import MolStandardize
@ -278,13 +281,12 @@ class FormatConverter(object):
# # bond.SetIsAromatic(False)
# Chem.Kekulize(product)
prods.append(product)
except ValueError as e:
logger.error(f'Sanitizing and converting failed:\n{e}')
continue
# TODO doc!
if len(prods) and len(prods) == len(product_set):
if len(prods):
ps = ProductSet(prods)
pss.add(ps)