[Misc] Performance improvements, SMIRKS Coverage, Minor Bugfixes (#132)

Bump Python Version to 3.12
Make use of "epauth" optional
Cache `srs` property of rules to speed up apply
Adjust view names for use of `reverse()`
Fix Views for Scenario Attachments
Added Simply Compare View/Template to identify differences between rdkit and ambit
Make migrations consistent with tests + compare
Fixes #76
Set default year for Scenario Modal
Fix html tags for package description
Added Tests for Pathway / Rule
Added remove stereo for apply

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#132
This commit is contained in:
2025-09-26 19:33:03 +12:00
parent b5c759d74e
commit b757a07f91
23 changed files with 671 additions and 463 deletions

View File

@ -4,6 +4,9 @@ import json
from django.conf import settings as s
from django.test import TestCase, tag
from utilities.chem import FormatConverter
from envipy_ambit import apply
from rdkit import Chem
from rdkit.Chem.MolStandardize import rdMolStandardize
@tag("slow")
class RuleApplicationTest(TestCase):
@ -16,17 +19,56 @@ class RuleApplicationTest(TestCase):
super(RuleApplicationTest, cls).setUpClass()
cls.data = json.load(gzip.open(s.BASE_DIR / 'fixtures' / 'ambit_rules.json.gz', 'rb'))
cls.error_smiles = list()
from collections import defaultdict
cls.triggered = defaultdict(lambda: defaultdict(lambda: 0))
@classmethod
def tearDownClass(cls):
super().tearDownClass()
print(f"\nTotal Errors across Rules {len(cls.error_smiles)}")
# print(cls.error_smiles)
from pprint import pprint
from collections import Counter
pprint(Counter(cls.error_smiles))
# import json
# pprint(json.loads(json.dumps(cls.triggered)))
def tearDown(self):
super().tearDown()
print(f"\nTotal errors {self.total_errors}")
@staticmethod
def normalize_smiles(smiles):
m1 = Chem.MolFromSmiles(smiles)
if m1 is None:
print("Couldnt read smi: ", smiles)
return smiles
Chem.RemoveStereochemistry(m1)
# Normalizer takes care of charge/tautomer/resonance standardization
normalizer = rdMolStandardize.Normalizer()
return Chem.MolToSmiles(normalizer.normalize(m1), canonical=True)
@staticmethod
def run_both_engines(smiles, smirks):
ambit_res = apply(smirks, smiles)
ambit_res = list(
set([RuleApplicationTest.normalize_smiles(str(x)) for x in FormatConverter.sanitize_smiles([str(s) for s in ambit_res])[0]]))
products = FormatConverter.apply(smiles, smirks)
all_rdkit_prods = []
for ps in products:
for p in ps:
all_rdkit_prods.append(p)
all_rdkit_prods = list(set(all_rdkit_prods))
all_rdkit_res = list(set([RuleApplicationTest.normalize_smiles(str(x)) for x in
FormatConverter.sanitize_smiles([str(s) for s in all_rdkit_prods])[0]]))
return ambit_res, 0, all_rdkit_res, 0
def run_bt_test(self, bt_rule_name):
bt_rule = self.data[bt_rule_name]
smirks = bt_rule['smirks']
@ -34,35 +76,26 @@ class RuleApplicationTest(TestCase):
res = True
all_prods = set()
for comp, ambit_prod in zip(bt_rule['compounds'], bt_rule['products']):
for comp in bt_rule['compounds']:
smi = comp['smiles']
products = FormatConverter.apply(smi, smirks)
all_rdkit_prods = []
for ps in products:
for p in ps:
all_rdkit_prods.append(p)
all_rdkit_prods = list(set(all_rdkit_prods))
ambit_smiles, ambit_errors = FormatConverter.sanitize_smiles(ambit_prod)
rdkit_smiles, rdkit_errors = FormatConverter.sanitize_smiles(all_rdkit_prods)
ambit_smiles, ambit_errors, rdkit_smiles, rdkit_errors = self.run_both_engines(smi, smirks)
for x in ambit_smiles:
all_prods.add(x)
# TODO mode "intersection"
# partial_res = (len(set(ambit_smiles).intersection(set(rdkit_smiles))) > 0) or (len(ambit_smiles) == 0)
# FAILED (failures=33)
# FAILED (failures=18)
# TODO mode = "full ambit"
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(ambit_smiles)
# FAILED (failures=44)
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(set(ambit_smiles))
# FAILED (failures=22)
# TODO mode = "equality"
partial_res = set(ambit_smiles) == set(rdkit_smiles)
# FAILED (failures=64)
# FAILED (failures=30)
if len(ambit_smiles) and not partial_res:
print(f"""