[Misc] Performance improvements, SMIRKS Coverage, Minor Bugfixes (#132)

Bump Python Version to 3.12
Make use of "epauth" optional
Cache `srs` property of rules to speed up apply
Adjust view names for use of `reverse()`
Fix Views for Scenario Attachments
Added Simply Compare View/Template to identify differences between rdkit and ambit
Make migrations consistent with tests + compare
Fixes #76
Set default year for Scenario Modal
Fix html tags for package description
Added Tests for Pathway / Rule
Added remove stereo for apply

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#132
This commit is contained in:
2025-09-26 19:33:03 +12:00
parent b5c759d74e
commit b757a07f91
23 changed files with 671 additions and 463 deletions

View File

@ -12,4 +12,5 @@ urlpatterns = [
re_path(rf'^migration/package/(?P<package_uuid>{UUID})/simple-rdkit-rule/(?P<rule_uuid>{UUID})$', v.migration_detail, name='migration detail'),
re_path(rf'^migration/package/(?P<package_uuid>{UUID})/parallel-rule/(?P<rule_uuid>{UUID})$', v.migration_detail, name='migration detail'),
re_path(rf'^migration/package/(?P<package_uuid>{UUID})/sequential-rule/(?P<rule_uuid>{UUID})$', v.migration_detail, name='migration detail'),
re_path(rf'^migration/compare$', v.compare, name='compare'),
]

View File

@ -1,15 +1,55 @@
import gzip
import json
import logging
import os.path
from datetime import datetime
from django.conf import settings as s
from django.http import HttpResponseNotAllowed
from django.shortcuts import render
from epdb.logic import PackageManager
from epdb.models import Rule
from epdb.models import Rule, SimpleAmbitRule, Package, CompoundStructure
from epdb.views import get_base_context, _anonymous_or_real
from utilities.chem import FormatConverter
from envipy_ambit import apply
from rdkit import Chem
from rdkit.Chem.MolStandardize import rdMolStandardize
logger = logging.getLogger(__name__)
def normalize_smiles(smiles):
m1 = Chem.MolFromSmiles(smiles)
if m1 is None:
print("Couldnt read smi: ", smiles)
return smiles
Chem.RemoveStereochemistry(m1)
# Normalizer takes care of charge/tautomer/resonance standardization
normalizer = rdMolStandardize.Normalizer()
return Chem.MolToSmiles(normalizer.normalize(m1), canonical=True)
def run_both_engines(SMILES, SMIRKS):
ambit_res = apply(SMIRKS, SMILES)
# ambit_res, ambit_errors = FormatConverter.sanitize_smiles([str(s) for s in ambit_res])
ambit_res = list(set([normalize_smiles(str(x)) for x in
FormatConverter.sanitize_smiles([str(s) for s in ambit_res])[0]]))
products = FormatConverter.apply(SMILES, SMIRKS)
all_rdkit_prods = []
for ps in products:
for p in ps:
all_rdkit_prods.append(p)
all_rdkit_prods = list(set(all_rdkit_prods))
# all_rdkit_res, rdkit_errors = FormatConverter.sanitize_smiles(all_rdkit_prods)
all_rdkit_res = list(set([normalize_smiles(str(x)) for x in
FormatConverter.sanitize_smiles([str(s) for s in all_rdkit_prods])[0]]))
# return ambit_res, ambit_errors, all_rdkit_res, rdkit_errors
return ambit_res, 0, all_rdkit_res, 0
def migration(request):
if request.method == 'GET':
@ -19,64 +59,35 @@ def migration(request):
"force") is None:
migration_status = json.load(open(s.BASE_DIR / 'fixtures' / 'migration_status_per_rule.json'))
else:
data = json.load(gzip.open(s.BASE_DIR / 'fixtures' / 'ambit_rules.json.gz', 'rb'))
results = []
BBD = Package.objects.get(url='http://localhost:8000/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1')
ALL_SMILES = [cs.smiles for cs in CompoundStructure.objects.filter(compound__package=BBD)]
RULES = SimpleAmbitRule.objects.filter(package=BBD)
results = list()
num_rules = len(RULES)
success = 0
error = 0
total = 0
num_keys = len(data.keys())
for i, bt_rule_name in enumerate(data.keys()):
print(f"{i + 1}/{num_keys}")
bt_rule = data[bt_rule_name]
smirks = bt_rule['smirks']
all_prods = set()
for i, r in enumerate(RULES):
logger.debug(f'\r{i + 1:03d}/{num_rules}')
res = True
for smiles in ALL_SMILES:
try:
ambit_res, _, rdkit_res, _ = run_both_engines(smiles, r.smirks)
for comp, ambit_prod in zip(bt_rule['compounds'], bt_rule['products']):
res &= set(ambit_res) == set(rdkit_res)
except Exception as e:
logger.error(e)
products = FormatConverter.apply(comp['smiles'], smirks)
all_rdkit_prods = []
for ps in products:
for p in ps:
all_rdkit_prods.append(p)
all_rdkit_prods = list(set(all_rdkit_prods))
ambit_smiles, ambit_errors = FormatConverter.sanitize_smiles(ambit_prod)
rdkit_smiles, rdkit_errors = FormatConverter.sanitize_smiles(all_rdkit_prods)
for x in ambit_smiles:
all_prods.add(x)
# TODO mode "intersection"
# partial_res = (len(set(ambit_smiles).intersection(set(rdkit_smiles))) > 0) or (len(ambit_smiles) == 0)
# FAILED (failures=37)
# TODO mode = "full ambit"
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(ambit_smiles)
# FAILED (failures=46)
# TODO mode = "equality"
partial_res = set(ambit_smiles) == set(rdkit_smiles)
# FAILED (failures=69)
res &= partial_res
results.append(
{
'name': bt_rule_name,
'id': bt_rule['id'].split('/')[-1],
'url': bt_rule['id'],
'status': res,
'detail_url': s.SERVER_URL + '/migration/' + bt_rule['id'].replace('https://envipath.org/', '')
}
)
results.append({
'name': r.name,
'detail_url': s.SERVER_URL + '/migration/' + r.url.replace('https://envipath.org/', '').replace('http://localhost:8000/', ''),
'id': str(r.uuid),
'url': r.url,
'status': res,
})
if res:
success += 1
@ -84,7 +95,6 @@ def migration(request):
error += 1
total += 1
results = sorted(results, key=lambda x: (x['status'], x['name']))
migration_status = {
@ -110,69 +120,51 @@ def migration_detail(request, package_uuid, rule_uuid):
if request.method == 'GET':
context = get_base_context(request)
p = PackageManager.get_package_by_id(current_user, package_uuid)
rule = Rule.objects.get(package=p, uuid=rule_uuid)
BBD = Package.objects.get(name='EAWAG-BBD')
STRUCTURES = CompoundStructure.objects.filter(compound__package=BBD)
rule = Rule.objects.get(package=BBD, uuid=rule_uuid)
bt_rule_name = rule.name
data = json.load(gzip.open(s.BASE_DIR / 'fixtures' / 'ambit_rules.json.gz', 'rb'))
bt_rule = data[bt_rule_name]
smirks = bt_rule['smirks']
results = []
smirks = rule.smirks
res = True
results = []
all_prods = set()
for comp, ambit_prod in zip(bt_rule['compounds'], bt_rule['products']):
# if comp['smiles'] != 'CC1=C(C(=C(C=N1)CO)C=O)O':
# continue
for structure in STRUCTURES:
products = FormatConverter.apply(comp['smiles'], smirks)
all_rdkit_prods = []
for ps in products:
for p in ps:
all_rdkit_prods.append(p)
all_rdkit_prods = list(set(all_rdkit_prods))
ambit_smiles, ambit_errors = FormatConverter.sanitize_smiles(ambit_prod)
rdkit_smiles, rdkit_errors = FormatConverter.sanitize_smiles(all_rdkit_prods)
ambit_smiles, ambit_errors, rdkit_smiles, rdkit_errors = run_both_engines(structure.smiles, smirks)
for x in ambit_smiles:
all_prods.add(x)
# TODO mode "intersection"
# partial_res = (len(set(ambit_smiles).intersection(set(rdkit_smiles))) > 0) or (len(ambit_smiles) == 0)
# FAILED (failures=37)
# FAILED (failures=18)
# TODO mode = "full ambit"
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(ambit_smiles)
# FAILED (failures=46)
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(set(ambit_smiles))
# FAILED (failures=34)
# TODO mode = "equality"
partial_res = set(ambit_smiles) == set(rdkit_smiles)
# FAILED (failures=69)
# FAILED (failures=30)
#
if len(ambit_smiles) or len(rdkit_smiles):
temp = {
'url': comp['id'],
'id': comp['id'].split('/')[-1],
'name': comp['name'],
'initial_smiles': comp['smiles'],
'url': structure.url,
'id': str(structure.uuid),
'name': structure.name,
'initial_smiles': structure.smiles,
'ambit_smiles': sorted(list(ambit_smiles)),
'rdkit_smiles': sorted(list(rdkit_smiles)),
'status': set(ambit_smiles) == set(rdkit_smiles),
}
if set(ambit_smiles) != set(rdkit_smiles):
detail = f"""
detail = f"""
BT: {bt_rule_name}
SMIRKS: {bt_rule['smirks']}
Compound: {comp['smiles']}
Compound URL: {comp['id']}
SMIRKS: {smirks}
Compound: {structure.smiles}
Compound URL: {structure.url}
Num ambit: {len(set(ambit_smiles))}
Num rdkit: {len(set(rdkit_smiles))}
Num Intersection A: {len(set(ambit_smiles).intersection(set(rdkit_smiles)))}
@ -185,8 +177,7 @@ def migration_detail(request, package_uuid, rule_uuid):
rdkit_errors: {rdkit_errors}
"""
temp['detail'] = '\n'.join([x.strip() for x in detail.split('\n')])
# print(detail.strip())
temp['detail'] = '\n'.join([x.strip() for x in detail.split('\n')])
results.append(temp)
@ -197,3 +188,49 @@ def migration_detail(request, package_uuid, rule_uuid):
context['res'] = res
context['bt_rule_name'] = bt_rule_name
return render(request, 'migration_detail.html', context)
def compare(request):
context = get_base_context(request)
if request.method == 'GET':
context[
"smirks"] = "[#1,#6:6][#7;X3;!$(NC1CC1)!$([N][C]=O)!$([!#8]CNC=O):1]([#1,#6:7])[#6;A;X4:2][H:3]>>[#1,#6:6][#7;X3:1]([#1,#6:7])[H:3].[#6;A:2]=O"
context["smiles"] = "C(CC(=O)N[C@@H](CS[Se-])C(=O)NCC(=O)[O-])[C@@H](C(=O)[O-])N"
return render(request, 'compare.html', context)
elif request.method == 'POST':
smiles = request.POST.get("smiles")
smirks = request.POST.get("smirks")
from envipy_ambit import apply
ambit_res = apply(smirks, smiles)
ambit_res, _ = FormatConverter.sanitize_smiles([str(x) for x in ambit_res])
products = FormatConverter.apply(smiles, smirks)
all_rdkit_prods = []
for ps in products:
for p in ps:
all_rdkit_prods.append(p)
all_rdkit_prods = list(set(all_rdkit_prods))
rdkit_res, _ = FormatConverter.sanitize_smiles(all_rdkit_prods)
context["result"] = True
context['ambit_res'] = sorted(set(ambit_res))
context['rdkit_res'] = sorted(set(rdkit_res))
context['diff'] = sorted(set(ambit_res).difference(set(rdkit_res)))
context["smirks"] = smirks
context["smiles"] = smiles
r = SimpleAmbitRule.objects.filter(smirks=smirks)
if r.exists():
context["rule"] = r.first()
return render(request, 'compare.html', context)
else:
return HttpResponseNotAllowed(['GET', 'POST'])