forked from enviPath/enviPy
[Misc] Performance improvements, SMIRKS Coverage, Minor Bugfixes (#132)
Bump Python Version to 3.12 Make use of "epauth" optional Cache `srs` property of rules to speed up apply Adjust view names for use of `reverse()` Fix Views for Scenario Attachments Added Simply Compare View/Template to identify differences between rdkit and ambit Make migrations consistent with tests + compare Fixes #76 Set default year for Scenario Modal Fix html tags for package description Added Tests for Pathway / Rule Added remove stereo for apply Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#132
This commit is contained in:
@ -4,6 +4,9 @@ import json
|
||||
from django.conf import settings as s
|
||||
from django.test import TestCase, tag
|
||||
from utilities.chem import FormatConverter
|
||||
from envipy_ambit import apply
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem.MolStandardize import rdMolStandardize
|
||||
|
||||
@tag("slow")
|
||||
class RuleApplicationTest(TestCase):
|
||||
@ -16,17 +19,56 @@ class RuleApplicationTest(TestCase):
|
||||
super(RuleApplicationTest, cls).setUpClass()
|
||||
cls.data = json.load(gzip.open(s.BASE_DIR / 'fixtures' / 'ambit_rules.json.gz', 'rb'))
|
||||
cls.error_smiles = list()
|
||||
from collections import defaultdict
|
||||
cls.triggered = defaultdict(lambda: defaultdict(lambda: 0))
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
super().tearDownClass()
|
||||
print(f"\nTotal Errors across Rules {len(cls.error_smiles)}")
|
||||
# print(cls.error_smiles)
|
||||
from pprint import pprint
|
||||
from collections import Counter
|
||||
pprint(Counter(cls.error_smiles))
|
||||
# import json
|
||||
# pprint(json.loads(json.dumps(cls.triggered)))
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
print(f"\nTotal errors {self.total_errors}")
|
||||
|
||||
@staticmethod
|
||||
def normalize_smiles(smiles):
|
||||
m1 = Chem.MolFromSmiles(smiles)
|
||||
if m1 is None:
|
||||
print("Couldnt read smi: ", smiles)
|
||||
return smiles
|
||||
Chem.RemoveStereochemistry(m1)
|
||||
# Normalizer takes care of charge/tautomer/resonance standardization
|
||||
normalizer = rdMolStandardize.Normalizer()
|
||||
return Chem.MolToSmiles(normalizer.normalize(m1), canonical=True)
|
||||
|
||||
@staticmethod
|
||||
def run_both_engines(smiles, smirks):
|
||||
ambit_res = apply(smirks, smiles)
|
||||
|
||||
ambit_res = list(
|
||||
set([RuleApplicationTest.normalize_smiles(str(x)) for x in FormatConverter.sanitize_smiles([str(s) for s in ambit_res])[0]]))
|
||||
|
||||
products = FormatConverter.apply(smiles, smirks)
|
||||
|
||||
all_rdkit_prods = []
|
||||
for ps in products:
|
||||
for p in ps:
|
||||
all_rdkit_prods.append(p)
|
||||
|
||||
all_rdkit_prods = list(set(all_rdkit_prods))
|
||||
|
||||
all_rdkit_res = list(set([RuleApplicationTest.normalize_smiles(str(x)) for x in
|
||||
FormatConverter.sanitize_smiles([str(s) for s in all_rdkit_prods])[0]]))
|
||||
|
||||
return ambit_res, 0, all_rdkit_res, 0
|
||||
|
||||
def run_bt_test(self, bt_rule_name):
|
||||
bt_rule = self.data[bt_rule_name]
|
||||
smirks = bt_rule['smirks']
|
||||
@ -34,35 +76,26 @@ class RuleApplicationTest(TestCase):
|
||||
res = True
|
||||
|
||||
all_prods = set()
|
||||
for comp, ambit_prod in zip(bt_rule['compounds'], bt_rule['products']):
|
||||
for comp in bt_rule['compounds']:
|
||||
|
||||
smi = comp['smiles']
|
||||
products = FormatConverter.apply(smi, smirks)
|
||||
|
||||
all_rdkit_prods = []
|
||||
for ps in products:
|
||||
for p in ps:
|
||||
all_rdkit_prods.append(p)
|
||||
|
||||
all_rdkit_prods = list(set(all_rdkit_prods))
|
||||
|
||||
ambit_smiles, ambit_errors = FormatConverter.sanitize_smiles(ambit_prod)
|
||||
rdkit_smiles, rdkit_errors = FormatConverter.sanitize_smiles(all_rdkit_prods)
|
||||
ambit_smiles, ambit_errors, rdkit_smiles, rdkit_errors = self.run_both_engines(smi, smirks)
|
||||
|
||||
for x in ambit_smiles:
|
||||
all_prods.add(x)
|
||||
|
||||
# TODO mode "intersection"
|
||||
# partial_res = (len(set(ambit_smiles).intersection(set(rdkit_smiles))) > 0) or (len(ambit_smiles) == 0)
|
||||
# FAILED (failures=33)
|
||||
# FAILED (failures=18)
|
||||
|
||||
# TODO mode = "full ambit"
|
||||
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(ambit_smiles)
|
||||
# FAILED (failures=44)
|
||||
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(set(ambit_smiles))
|
||||
# FAILED (failures=22)
|
||||
|
||||
# TODO mode = "equality"
|
||||
partial_res = set(ambit_smiles) == set(rdkit_smiles)
|
||||
# FAILED (failures=64)
|
||||
# FAILED (failures=30)
|
||||
|
||||
if len(ambit_smiles) and not partial_res:
|
||||
print(f"""
|
||||
|
||||
Reference in New Issue
Block a user