Files
enviPy-bayer/migration/views.py
jebus b757a07f91 [Misc] Performance improvements, SMIRKS Coverage, Minor Bugfixes (#132)
Bump Python Version to 3.12
Make use of "epauth" optional
Cache `srs` property of rules to speed up apply
Adjust view names for use of `reverse()`
Fix Views for Scenario Attachments
Added Simply Compare View/Template to identify differences between rdkit and ambit
Make migrations consistent with tests + compare
Fixes #76
Set default year for Scenario Modal
Fix html tags for package description
Added Tests for Pathway / Rule
Added remove stereo for apply

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#132
2025-09-26 19:33:03 +12:00

237 lines
8.6 KiB
Python

import gzip
import json
import logging
import os.path
from datetime import datetime
from django.conf import settings as s
from django.http import HttpResponseNotAllowed
from django.shortcuts import render
from epdb.logic import PackageManager
from epdb.models import Rule, SimpleAmbitRule, Package, CompoundStructure
from epdb.views import get_base_context, _anonymous_or_real
from utilities.chem import FormatConverter
from envipy_ambit import apply
from rdkit import Chem
from rdkit.Chem.MolStandardize import rdMolStandardize
logger = logging.getLogger(__name__)
def normalize_smiles(smiles):
m1 = Chem.MolFromSmiles(smiles)
if m1 is None:
print("Couldnt read smi: ", smiles)
return smiles
Chem.RemoveStereochemistry(m1)
# Normalizer takes care of charge/tautomer/resonance standardization
normalizer = rdMolStandardize.Normalizer()
return Chem.MolToSmiles(normalizer.normalize(m1), canonical=True)
def run_both_engines(SMILES, SMIRKS):
ambit_res = apply(SMIRKS, SMILES)
# ambit_res, ambit_errors = FormatConverter.sanitize_smiles([str(s) for s in ambit_res])
ambit_res = list(set([normalize_smiles(str(x)) for x in
FormatConverter.sanitize_smiles([str(s) for s in ambit_res])[0]]))
products = FormatConverter.apply(SMILES, SMIRKS)
all_rdkit_prods = []
for ps in products:
for p in ps:
all_rdkit_prods.append(p)
all_rdkit_prods = list(set(all_rdkit_prods))
# all_rdkit_res, rdkit_errors = FormatConverter.sanitize_smiles(all_rdkit_prods)
all_rdkit_res = list(set([normalize_smiles(str(x)) for x in
FormatConverter.sanitize_smiles([str(s) for s in all_rdkit_prods])[0]]))
# return ambit_res, ambit_errors, all_rdkit_res, rdkit_errors
return ambit_res, 0, all_rdkit_res, 0
def migration(request):
if request.method == 'GET':
context = get_base_context(request)
if os.path.exists(s.BASE_DIR / 'fixtures' / 'migration_status_per_rule.json') and request.GET.get(
"force") is None:
migration_status = json.load(open(s.BASE_DIR / 'fixtures' / 'migration_status_per_rule.json'))
else:
BBD = Package.objects.get(url='http://localhost:8000/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1')
ALL_SMILES = [cs.smiles for cs in CompoundStructure.objects.filter(compound__package=BBD)]
RULES = SimpleAmbitRule.objects.filter(package=BBD)
results = list()
num_rules = len(RULES)
success = 0
error = 0
total = 0
for i, r in enumerate(RULES):
logger.debug(f'\r{i + 1:03d}/{num_rules}')
res = True
for smiles in ALL_SMILES:
try:
ambit_res, _, rdkit_res, _ = run_both_engines(smiles, r.smirks)
res &= set(ambit_res) == set(rdkit_res)
except Exception as e:
logger.error(e)
results.append({
'name': r.name,
'detail_url': s.SERVER_URL + '/migration/' + r.url.replace('https://envipath.org/', '').replace('http://localhost:8000/', ''),
'id': str(r.uuid),
'url': r.url,
'status': res,
})
if res:
success += 1
else:
error += 1
total += 1
results = sorted(results, key=lambda x: (x['status'], x['name']))
migration_status = {
'results': results,
'success': success,
'error': error,
'total': total
}
json.dump(migration_status, open(s.BASE_DIR / 'fixtures' / 'migration_status_per_rule.json', 'w'))
for r in migration_status['results']:
r['detail_url'] = r['detail_url'].replace('http://localhost:8000', s.SERVER_URL)
context.update(**migration_status)
return render(request, 'migration.html', context)
def migration_detail(request, package_uuid, rule_uuid):
current_user = _anonymous_or_real(request)
if request.method == 'GET':
context = get_base_context(request)
BBD = Package.objects.get(name='EAWAG-BBD')
STRUCTURES = CompoundStructure.objects.filter(compound__package=BBD)
rule = Rule.objects.get(package=BBD, uuid=rule_uuid)
bt_rule_name = rule.name
smirks = rule.smirks
res = True
results = []
all_prods = set()
for structure in STRUCTURES:
ambit_smiles, ambit_errors, rdkit_smiles, rdkit_errors = run_both_engines(structure.smiles, smirks)
for x in ambit_smiles:
all_prods.add(x)
# TODO mode "intersection"
# partial_res = (len(set(ambit_smiles).intersection(set(rdkit_smiles))) > 0) or (len(ambit_smiles) == 0)
# FAILED (failures=18)
# TODO mode = "full ambit"
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(set(ambit_smiles))
# FAILED (failures=34)
# TODO mode = "equality"
partial_res = set(ambit_smiles) == set(rdkit_smiles)
# FAILED (failures=30)
if len(ambit_smiles) or len(rdkit_smiles):
temp = {
'url': structure.url,
'id': str(structure.uuid),
'name': structure.name,
'initial_smiles': structure.smiles,
'ambit_smiles': sorted(list(ambit_smiles)),
'rdkit_smiles': sorted(list(rdkit_smiles)),
'status': set(ambit_smiles) == set(rdkit_smiles),
}
detail = f"""
BT: {bt_rule_name}
SMIRKS: {smirks}
Compound: {structure.smiles}
Compound URL: {structure.url}
Num ambit: {len(set(ambit_smiles))}
Num rdkit: {len(set(rdkit_smiles))}
Num Intersection A: {len(set(ambit_smiles).intersection(set(rdkit_smiles)))}
Num Intersection B: {len(set(rdkit_smiles).intersection(set(ambit_smiles)))}
Difference A: {set(ambit_smiles).difference(set(rdkit_smiles))}
Difference B: {set(rdkit_smiles).difference(set(ambit_smiles))}
ambit products: {ambit_smiles}
rdkit products: {rdkit_smiles}
ambit_errors: {ambit_errors}
rdkit_errors: {rdkit_errors}
"""
temp['detail'] = '\n'.join([x.strip() for x in detail.split('\n')])
results.append(temp)
res &= partial_res
results = sorted(results, key=lambda x: x['status'])
context['results'] = results
context['res'] = res
context['bt_rule_name'] = bt_rule_name
return render(request, 'migration_detail.html', context)
def compare(request):
context = get_base_context(request)
if request.method == 'GET':
context[
"smirks"] = "[#1,#6:6][#7;X3;!$(NC1CC1)!$([N][C]=O)!$([!#8]CNC=O):1]([#1,#6:7])[#6;A;X4:2][H:3]>>[#1,#6:6][#7;X3:1]([#1,#6:7])[H:3].[#6;A:2]=O"
context["smiles"] = "C(CC(=O)N[C@@H](CS[Se-])C(=O)NCC(=O)[O-])[C@@H](C(=O)[O-])N"
return render(request, 'compare.html', context)
elif request.method == 'POST':
smiles = request.POST.get("smiles")
smirks = request.POST.get("smirks")
from envipy_ambit import apply
ambit_res = apply(smirks, smiles)
ambit_res, _ = FormatConverter.sanitize_smiles([str(x) for x in ambit_res])
products = FormatConverter.apply(smiles, smirks)
all_rdkit_prods = []
for ps in products:
for p in ps:
all_rdkit_prods.append(p)
all_rdkit_prods = list(set(all_rdkit_prods))
rdkit_res, _ = FormatConverter.sanitize_smiles(all_rdkit_prods)
context["result"] = True
context['ambit_res'] = sorted(set(ambit_res))
context['rdkit_res'] = sorted(set(rdkit_res))
context['diff'] = sorted(set(ambit_res).difference(set(rdkit_res)))
context["smirks"] = smirks
context["smiles"] = smiles
r = SimpleAmbitRule.objects.filter(smirks=smirks)
if r.exists():
context["rule"] = r.first()
return render(request, 'compare.html', context)
else:
return HttpResponseNotAllowed(['GET', 'POST'])