forked from enviPath/enviPy
[Misc] Performance improvements, SMIRKS Coverage, Minor Bugfixes (#132)
Bump Python Version to 3.12 Make use of "epauth" optional Cache `srs` property of rules to speed up apply Adjust view names for use of `reverse()` Fix Views for Scenario Attachments Added Simply Compare View/Template to identify differences between rdkit and ambit Make migrations consistent with tests + compare Fixes #76 Set default year for Scenario Modal Fix html tags for package description Added Tests for Pathway / Rule Added remove stereo for apply Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#132
This commit is contained in:
@ -1,15 +1,55 @@
|
||||
import gzip
|
||||
import json
|
||||
import logging
|
||||
import os.path
|
||||
from datetime import datetime
|
||||
|
||||
from django.conf import settings as s
|
||||
from django.http import HttpResponseNotAllowed
|
||||
from django.shortcuts import render
|
||||
|
||||
from epdb.logic import PackageManager
|
||||
from epdb.models import Rule
|
||||
from epdb.models import Rule, SimpleAmbitRule, Package, CompoundStructure
|
||||
from epdb.views import get_base_context, _anonymous_or_real
|
||||
from utilities.chem import FormatConverter
|
||||
from envipy_ambit import apply
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem.MolStandardize import rdMolStandardize
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def normalize_smiles(smiles):
|
||||
m1 = Chem.MolFromSmiles(smiles)
|
||||
if m1 is None:
|
||||
print("Couldnt read smi: ", smiles)
|
||||
return smiles
|
||||
Chem.RemoveStereochemistry(m1)
|
||||
# Normalizer takes care of charge/tautomer/resonance standardization
|
||||
normalizer = rdMolStandardize.Normalizer()
|
||||
return Chem.MolToSmiles(normalizer.normalize(m1), canonical=True)
|
||||
|
||||
|
||||
def run_both_engines(SMILES, SMIRKS):
|
||||
ambit_res = apply(SMIRKS, SMILES)
|
||||
# ambit_res, ambit_errors = FormatConverter.sanitize_smiles([str(s) for s in ambit_res])
|
||||
|
||||
ambit_res = list(set([normalize_smiles(str(x)) for x in
|
||||
FormatConverter.sanitize_smiles([str(s) for s in ambit_res])[0]]))
|
||||
|
||||
products = FormatConverter.apply(SMILES, SMIRKS)
|
||||
|
||||
all_rdkit_prods = []
|
||||
for ps in products:
|
||||
for p in ps:
|
||||
all_rdkit_prods.append(p)
|
||||
|
||||
all_rdkit_prods = list(set(all_rdkit_prods))
|
||||
# all_rdkit_res, rdkit_errors = FormatConverter.sanitize_smiles(all_rdkit_prods)
|
||||
all_rdkit_res = list(set([normalize_smiles(str(x)) for x in
|
||||
FormatConverter.sanitize_smiles([str(s) for s in all_rdkit_prods])[0]]))
|
||||
# return ambit_res, ambit_errors, all_rdkit_res, rdkit_errors
|
||||
return ambit_res, 0, all_rdkit_res, 0
|
||||
|
||||
def migration(request):
|
||||
if request.method == 'GET':
|
||||
@ -19,64 +59,35 @@ def migration(request):
|
||||
"force") is None:
|
||||
migration_status = json.load(open(s.BASE_DIR / 'fixtures' / 'migration_status_per_rule.json'))
|
||||
else:
|
||||
data = json.load(gzip.open(s.BASE_DIR / 'fixtures' / 'ambit_rules.json.gz', 'rb'))
|
||||
|
||||
results = []
|
||||
BBD = Package.objects.get(url='http://localhost:8000/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1')
|
||||
ALL_SMILES = [cs.smiles for cs in CompoundStructure.objects.filter(compound__package=BBD)]
|
||||
RULES = SimpleAmbitRule.objects.filter(package=BBD)
|
||||
|
||||
results = list()
|
||||
num_rules = len(RULES)
|
||||
success = 0
|
||||
error = 0
|
||||
total = 0
|
||||
|
||||
num_keys = len(data.keys())
|
||||
for i, bt_rule_name in enumerate(data.keys()):
|
||||
print(f"{i + 1}/{num_keys}")
|
||||
bt_rule = data[bt_rule_name]
|
||||
smirks = bt_rule['smirks']
|
||||
|
||||
all_prods = set()
|
||||
|
||||
for i, r in enumerate(RULES):
|
||||
logger.debug(f'\r{i + 1:03d}/{num_rules}')
|
||||
res = True
|
||||
for smiles in ALL_SMILES:
|
||||
try:
|
||||
ambit_res, _, rdkit_res, _ = run_both_engines(smiles, r.smirks)
|
||||
|
||||
for comp, ambit_prod in zip(bt_rule['compounds'], bt_rule['products']):
|
||||
res &= set(ambit_res) == set(rdkit_res)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
|
||||
products = FormatConverter.apply(comp['smiles'], smirks)
|
||||
|
||||
all_rdkit_prods = []
|
||||
for ps in products:
|
||||
for p in ps:
|
||||
all_rdkit_prods.append(p)
|
||||
|
||||
all_rdkit_prods = list(set(all_rdkit_prods))
|
||||
|
||||
ambit_smiles, ambit_errors = FormatConverter.sanitize_smiles(ambit_prod)
|
||||
rdkit_smiles, rdkit_errors = FormatConverter.sanitize_smiles(all_rdkit_prods)
|
||||
|
||||
for x in ambit_smiles:
|
||||
all_prods.add(x)
|
||||
|
||||
# TODO mode "intersection"
|
||||
# partial_res = (len(set(ambit_smiles).intersection(set(rdkit_smiles))) > 0) or (len(ambit_smiles) == 0)
|
||||
# FAILED (failures=37)
|
||||
|
||||
# TODO mode = "full ambit"
|
||||
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(ambit_smiles)
|
||||
# FAILED (failures=46)
|
||||
|
||||
# TODO mode = "equality"
|
||||
partial_res = set(ambit_smiles) == set(rdkit_smiles)
|
||||
# FAILED (failures=69)
|
||||
|
||||
res &= partial_res
|
||||
|
||||
results.append(
|
||||
{
|
||||
'name': bt_rule_name,
|
||||
'id': bt_rule['id'].split('/')[-1],
|
||||
'url': bt_rule['id'],
|
||||
'status': res,
|
||||
'detail_url': s.SERVER_URL + '/migration/' + bt_rule['id'].replace('https://envipath.org/', '')
|
||||
}
|
||||
)
|
||||
results.append({
|
||||
'name': r.name,
|
||||
'detail_url': s.SERVER_URL + '/migration/' + r.url.replace('https://envipath.org/', '').replace('http://localhost:8000/', ''),
|
||||
'id': str(r.uuid),
|
||||
'url': r.url,
|
||||
'status': res,
|
||||
})
|
||||
|
||||
if res:
|
||||
success += 1
|
||||
@ -84,7 +95,6 @@ def migration(request):
|
||||
error += 1
|
||||
|
||||
total += 1
|
||||
|
||||
results = sorted(results, key=lambda x: (x['status'], x['name']))
|
||||
|
||||
migration_status = {
|
||||
@ -110,69 +120,51 @@ def migration_detail(request, package_uuid, rule_uuid):
|
||||
if request.method == 'GET':
|
||||
context = get_base_context(request)
|
||||
|
||||
p = PackageManager.get_package_by_id(current_user, package_uuid)
|
||||
rule = Rule.objects.get(package=p, uuid=rule_uuid)
|
||||
BBD = Package.objects.get(name='EAWAG-BBD')
|
||||
STRUCTURES = CompoundStructure.objects.filter(compound__package=BBD)
|
||||
rule = Rule.objects.get(package=BBD, uuid=rule_uuid)
|
||||
|
||||
bt_rule_name = rule.name
|
||||
|
||||
data = json.load(gzip.open(s.BASE_DIR / 'fixtures' / 'ambit_rules.json.gz', 'rb'))
|
||||
|
||||
bt_rule = data[bt_rule_name]
|
||||
smirks = bt_rule['smirks']
|
||||
|
||||
results = []
|
||||
smirks = rule.smirks
|
||||
|
||||
res = True
|
||||
results = []
|
||||
|
||||
all_prods = set()
|
||||
for comp, ambit_prod in zip(bt_rule['compounds'], bt_rule['products']):
|
||||
# if comp['smiles'] != 'CC1=C(C(=C(C=N1)CO)C=O)O':
|
||||
# continue
|
||||
for structure in STRUCTURES:
|
||||
|
||||
products = FormatConverter.apply(comp['smiles'], smirks)
|
||||
|
||||
all_rdkit_prods = []
|
||||
for ps in products:
|
||||
for p in ps:
|
||||
all_rdkit_prods.append(p)
|
||||
|
||||
all_rdkit_prods = list(set(all_rdkit_prods))
|
||||
|
||||
ambit_smiles, ambit_errors = FormatConverter.sanitize_smiles(ambit_prod)
|
||||
rdkit_smiles, rdkit_errors = FormatConverter.sanitize_smiles(all_rdkit_prods)
|
||||
ambit_smiles, ambit_errors, rdkit_smiles, rdkit_errors = run_both_engines(structure.smiles, smirks)
|
||||
|
||||
for x in ambit_smiles:
|
||||
all_prods.add(x)
|
||||
|
||||
# TODO mode "intersection"
|
||||
# partial_res = (len(set(ambit_smiles).intersection(set(rdkit_smiles))) > 0) or (len(ambit_smiles) == 0)
|
||||
# FAILED (failures=37)
|
||||
# FAILED (failures=18)
|
||||
|
||||
# TODO mode = "full ambit"
|
||||
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(ambit_smiles)
|
||||
# FAILED (failures=46)
|
||||
# partial_res = len(set(ambit_smiles).intersection(set(rdkit_smiles))) == len(set(ambit_smiles))
|
||||
# FAILED (failures=34)
|
||||
|
||||
# TODO mode = "equality"
|
||||
partial_res = set(ambit_smiles) == set(rdkit_smiles)
|
||||
# FAILED (failures=69)
|
||||
# FAILED (failures=30)
|
||||
|
||||
#
|
||||
if len(ambit_smiles) or len(rdkit_smiles):
|
||||
temp = {
|
||||
'url': comp['id'],
|
||||
'id': comp['id'].split('/')[-1],
|
||||
'name': comp['name'],
|
||||
'initial_smiles': comp['smiles'],
|
||||
'url': structure.url,
|
||||
'id': str(structure.uuid),
|
||||
'name': structure.name,
|
||||
'initial_smiles': structure.smiles,
|
||||
'ambit_smiles': sorted(list(ambit_smiles)),
|
||||
'rdkit_smiles': sorted(list(rdkit_smiles)),
|
||||
'status': set(ambit_smiles) == set(rdkit_smiles),
|
||||
}
|
||||
|
||||
if set(ambit_smiles) != set(rdkit_smiles):
|
||||
detail = f"""
|
||||
detail = f"""
|
||||
BT: {bt_rule_name}
|
||||
SMIRKS: {bt_rule['smirks']}
|
||||
Compound: {comp['smiles']}
|
||||
Compound URL: {comp['id']}
|
||||
SMIRKS: {smirks}
|
||||
Compound: {structure.smiles}
|
||||
Compound URL: {structure.url}
|
||||
Num ambit: {len(set(ambit_smiles))}
|
||||
Num rdkit: {len(set(rdkit_smiles))}
|
||||
Num Intersection A: {len(set(ambit_smiles).intersection(set(rdkit_smiles)))}
|
||||
@ -185,8 +177,7 @@ def migration_detail(request, package_uuid, rule_uuid):
|
||||
rdkit_errors: {rdkit_errors}
|
||||
"""
|
||||
|
||||
temp['detail'] = '\n'.join([x.strip() for x in detail.split('\n')])
|
||||
# print(detail.strip())
|
||||
temp['detail'] = '\n'.join([x.strip() for x in detail.split('\n')])
|
||||
|
||||
results.append(temp)
|
||||
|
||||
@ -197,3 +188,49 @@ def migration_detail(request, package_uuid, rule_uuid):
|
||||
context['res'] = res
|
||||
context['bt_rule_name'] = bt_rule_name
|
||||
return render(request, 'migration_detail.html', context)
|
||||
|
||||
|
||||
def compare(request):
|
||||
context = get_base_context(request)
|
||||
|
||||
if request.method == 'GET':
|
||||
context[
|
||||
"smirks"] = "[#1,#6:6][#7;X3;!$(NC1CC1)!$([N][C]=O)!$([!#8]CNC=O):1]([#1,#6:7])[#6;A;X4:2][H:3]>>[#1,#6:6][#7;X3:1]([#1,#6:7])[H:3].[#6;A:2]=O"
|
||||
context["smiles"] = "C(CC(=O)N[C@@H](CS[Se-])C(=O)NCC(=O)[O-])[C@@H](C(=O)[O-])N"
|
||||
return render(request, 'compare.html', context)
|
||||
|
||||
elif request.method == 'POST':
|
||||
smiles = request.POST.get("smiles")
|
||||
smirks = request.POST.get("smirks")
|
||||
|
||||
from envipy_ambit import apply
|
||||
|
||||
ambit_res = apply(smirks, smiles)
|
||||
ambit_res, _ = FormatConverter.sanitize_smiles([str(x) for x in ambit_res])
|
||||
|
||||
products = FormatConverter.apply(smiles, smirks)
|
||||
|
||||
all_rdkit_prods = []
|
||||
for ps in products:
|
||||
for p in ps:
|
||||
all_rdkit_prods.append(p)
|
||||
|
||||
all_rdkit_prods = list(set(all_rdkit_prods))
|
||||
|
||||
rdkit_res, _ = FormatConverter.sanitize_smiles(all_rdkit_prods)
|
||||
context["result"] = True
|
||||
context['ambit_res'] = sorted(set(ambit_res))
|
||||
context['rdkit_res'] = sorted(set(rdkit_res))
|
||||
context['diff'] = sorted(set(ambit_res).difference(set(rdkit_res)))
|
||||
context["smirks"] = smirks
|
||||
context["smiles"] = smiles
|
||||
|
||||
r = SimpleAmbitRule.objects.filter(smirks=smirks)
|
||||
|
||||
if r.exists():
|
||||
context["rule"] = r.first()
|
||||
|
||||
return render(request, 'compare.html', context)
|
||||
|
||||
else:
|
||||
return HttpResponseNotAllowed(['GET', 'POST'])
|
||||
|
||||
Reference in New Issue
Block a user