Merge remote-tracking branch 'origin/develop' into enhancement/dataset

# Conflicts:
#	epdb/models.py
#	tests/test_enviformer.py
#	tests/test_model.py
This commit is contained in:
Liam Brydon
2025-11-07 08:28:03 +13:00
25 changed files with 1024 additions and 280 deletions

View File

@ -16,3 +16,5 @@ POSTGRES_PORT=
# MAIL # MAIL
EMAIL_HOST_USER= EMAIL_HOST_USER=
EMAIL_HOST_PASSWORD= EMAIL_HOST_PASSWORD=
# MATOMO
MATOMO_SITE_ID

View File

@ -357,3 +357,6 @@ if MS_ENTRA_ENABLED:
MS_ENTRA_AUTHORITY = f"https://login.microsoftonline.com/{MS_ENTRA_TENANT_ID}" MS_ENTRA_AUTHORITY = f"https://login.microsoftonline.com/{MS_ENTRA_TENANT_ID}"
MS_ENTRA_REDIRECT_URI = os.environ["MS_REDIRECT_URI"] MS_ENTRA_REDIRECT_URI = os.environ["MS_REDIRECT_URI"]
MS_ENTRA_SCOPES = os.environ.get("MS_SCOPES", "").split(",") MS_ENTRA_SCOPES = os.environ.get("MS_SCOPES", "").split(",")
# Site ID 10 -> beta.envipath.org
MATOMO_SITE_ID = os.environ.get("MATOMO_SITE_ID", "10")

View File

@ -7,6 +7,7 @@ from .models import (
GroupPackagePermission, GroupPackagePermission,
Package, Package,
MLRelativeReasoning, MLRelativeReasoning,
EnviFormer,
Compound, Compound,
CompoundStructure, CompoundStructure,
SimpleAmbitRule, SimpleAmbitRule,
@ -19,11 +20,12 @@ from .models import (
Setting, Setting,
ExternalDatabase, ExternalDatabase,
ExternalIdentifier, ExternalIdentifier,
JobLog,
) )
class UserAdmin(admin.ModelAdmin): class UserAdmin(admin.ModelAdmin):
pass list_display = ["username", "email", "is_active"]
class UserPackagePermissionAdmin(admin.ModelAdmin): class UserPackagePermissionAdmin(admin.ModelAdmin):
@ -38,8 +40,14 @@ class GroupPackagePermissionAdmin(admin.ModelAdmin):
pass pass
class JobLogAdmin(admin.ModelAdmin):
pass
class EPAdmin(admin.ModelAdmin): class EPAdmin(admin.ModelAdmin):
search_fields = ["name", "description"] search_fields = ["name", "description"]
list_display = ["name", "url", "created"]
ordering = ["-created"]
class PackageAdmin(EPAdmin): class PackageAdmin(EPAdmin):
@ -50,6 +58,10 @@ class MLRelativeReasoningAdmin(EPAdmin):
pass pass
class EnviFormerAdmin(EPAdmin):
pass
class CompoundAdmin(EPAdmin): class CompoundAdmin(EPAdmin):
pass pass
@ -102,8 +114,10 @@ admin.site.register(User, UserAdmin)
admin.site.register(UserPackagePermission, UserPackagePermissionAdmin) admin.site.register(UserPackagePermission, UserPackagePermissionAdmin)
admin.site.register(Group, GroupAdmin) admin.site.register(Group, GroupAdmin)
admin.site.register(GroupPackagePermission, GroupPackagePermissionAdmin) admin.site.register(GroupPackagePermission, GroupPackagePermissionAdmin)
admin.site.register(JobLog, JobLogAdmin)
admin.site.register(Package, PackageAdmin) admin.site.register(Package, PackageAdmin)
admin.site.register(MLRelativeReasoning, MLRelativeReasoningAdmin) admin.site.register(MLRelativeReasoning, MLRelativeReasoningAdmin)
admin.site.register(EnviFormer, EnviFormerAdmin)
admin.site.register(Compound, CompoundAdmin) admin.site.register(Compound, CompoundAdmin)
admin.site.register(CompoundStructure, CompoundStructureAdmin) admin.site.register(CompoundStructure, CompoundStructureAdmin)
admin.site.register(SimpleAmbitRule, SimpleAmbitRuleAdmin) admin.site.register(SimpleAmbitRule, SimpleAmbitRuleAdmin)

View File

@ -7,10 +7,11 @@ from epdb.models import MLRelativeReasoning, EnviFormer, Package
class Command(BaseCommand): class Command(BaseCommand):
"""This command can be run with """This command can be run with
`python manage.py create_ml_models [model_names] -d [data_packages] OPTIONAL: -e [eval_packages]` `python manage.py create_ml_models [model_names] -d [data_packages] FOR MLRR ONLY: -r [rule_packages]
For example, to train both EnviFormer and MLRelativeReasoning on BBD and SOIL and evaluate them on SLUDGE OPTIONAL: -e [eval_packages] -t threshold`
the below command would be used: For example, to train both EnviFormer and MLRelativeReasoning on BBD and SOIL and evaluate them on SLUDGE with a
`python manage.py create_ml_models enviformer mlrr -d bbd soil -e sludge threshold of 0.6, the below command would be used:
`python manage.py create_ml_models enviformer mlrr -d bbd soil -e sludge -t 0.6
""" """
def add_arguments(self, parser): def add_arguments(self, parser):
@ -34,6 +35,13 @@ class Command(BaseCommand):
help="Rule Packages mandatory for MLRR", help="Rule Packages mandatory for MLRR",
default=[], default=[],
) )
parser.add_argument(
"-t",
"--threshold",
type=float,
help="Model prediction threshold",
default=0.5,
)
@transaction.atomic @transaction.atomic
def handle(self, *args, **options): def handle(self, *args, **options):
@ -67,7 +75,11 @@ class Command(BaseCommand):
return packages return packages
# Iteratively create models in options["model_names"] # Iteratively create models in options["model_names"]
print(f"Creating models: {options['model_names']}") print(f"Creating models: {options['model_names']}\n"
f"Data packages: {options['data_packages']}\n"
f"Rule Packages (only for MLRR): {options['rule_packages']}\n"
f"Eval Packages: {options['eval_packages']}\n"
f"Threshold: {options['threshold']:.2f}")
data_packages = decode_packages(options["data_packages"]) data_packages = decode_packages(options["data_packages"])
eval_packages = decode_packages(options["eval_packages"]) eval_packages = decode_packages(options["eval_packages"])
rule_packages = decode_packages(options["rule_packages"]) rule_packages = decode_packages(options["rule_packages"])
@ -78,9 +90,10 @@ class Command(BaseCommand):
pack, pack,
data_packages=data_packages, data_packages=data_packages,
eval_packages=eval_packages, eval_packages=eval_packages,
threshold=0.5, threshold=options['threshold'],
name="EnviFormer - T0.5", name=f"EnviFormer - {', '.join(options['data_packages'])} - T{options['threshold']:.2f}",
description="EnviFormer transformer", description=f"EnviFormer transformer trained on {options['data_packages']} "
f"evaluated on {options['eval_packages']}.",
) )
elif model_name == "mlrr": elif model_name == "mlrr":
model = MLRelativeReasoning.create( model = MLRelativeReasoning.create(
@ -88,9 +101,10 @@ class Command(BaseCommand):
rule_packages=rule_packages, rule_packages=rule_packages,
data_packages=data_packages, data_packages=data_packages,
eval_packages=eval_packages, eval_packages=eval_packages,
threshold=0.5, threshold=options['threshold'],
name="ECC - BBD - T0.5", name=f"ECC - {', '.join(options['data_packages'])} - T{options['threshold']:.2f}",
description="ML Relative Reasoning", description=f"ML Relative Reasoning trained on {options['data_packages']} with rules from "
f"{options['rule_packages']} and evaluated on {options['eval_packages']}.",
) )
else: else:
raise ValueError(f"Cannot create model of type {model_name}, unknown model type") raise ValueError(f"Cannot create model of type {model_name}, unknown model type")
@ -100,6 +114,6 @@ class Command(BaseCommand):
print(f"Training {model_name}") print(f"Training {model_name}")
model.build_model() model.build_model()
print(f"Evaluating {model_name}") print(f"Evaluating {model_name}")
model.evaluate_model() model.evaluate_model(False, eval_packages=eval_packages)
print(f"Saving {model_name}") print(f"Saving {model_name}")
model.save() model.save()

View File

@ -0,0 +1,38 @@
from datetime import date, timedelta
from django.core.management.base import BaseCommand
from django.db import transaction
from epdb.models import JobLog
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--cleanup",
type=int,
default=None,
help="Remove all logs older than this number of days. Default is None, which does not remove any logs.",
)
@transaction.atomic
def handle(self, *args, **options):
if options["cleanup"] is not None:
cleanup_dt = date.today() - timedelta(days=options["cleanup"])
print(JobLog.objects.filter(created__lt=cleanup_dt).delete())
logs = JobLog.objects.filter(status="INITIAL")
print(f"Found {logs.count()} logs to update")
updated = 0
for log in logs:
res = log.check_for_update()
if res:
updated += 1
print(f"Updated {updated} logs")
from django.db.models import Count
qs = JobLog.objects.values("status").annotate(total=Count("status"))
for r in qs:
print(r["status"], r["total"])

View File

@ -0,0 +1,66 @@
# Generated by Django 5.2.7 on 2025-10-27 09:39
import django.db.models.deletion
import django.utils.timezone
import model_utils.fields
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("epdb", "0008_enzymelink"),
]
operations = [
migrations.CreateModel(
name="JobLog",
fields=[
(
"id",
models.BigAutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
(
"created",
model_utils.fields.AutoCreatedField(
default=django.utils.timezone.now, editable=False, verbose_name="created"
),
),
(
"modified",
model_utils.fields.AutoLastModifiedField(
default=django.utils.timezone.now, editable=False, verbose_name="modified"
),
),
("task_id", models.UUIDField(unique=True)),
("job_name", models.TextField()),
(
"status",
models.CharField(
choices=[
("INITIAL", "Initial"),
("SUCCESS", "Success"),
("FAILURE", "Failure"),
("REVOKED", "Revoked"),
("IGNORED", "Ignored"),
],
default="INITIAL",
max_length=20,
),
),
("done_at", models.DateTimeField(blank=True, default=None, null=True)),
("task_result", models.TextField(blank=True, default=None, null=True)),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL
),
),
],
options={
"abstract": False,
},
),
]

View File

@ -2226,10 +2226,18 @@ class PackageBasedModel(EPModel):
self.model_status = self.BUILT_NOT_EVALUATED self.model_status = self.BUILT_NOT_EVALUATED
self.save() self.save()
def evaluate_model(self, **kwargs): def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None, **kwargs):
if self.model_status != self.BUILT_NOT_EVALUATED: if self.model_status != self.BUILT_NOT_EVALUATED:
raise ValueError(f"Can't evaluate a model in state {self.model_status}!") raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
if multigen:
self.multigen_eval = multigen
self.save()
if eval_packages is not None:
for p in eval_packages:
self.eval_packages.add(p)
self.model_status = self.EVALUATING self.model_status = self.EVALUATING
self.save() self.save()
@ -2526,7 +2534,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
package: "Package", package: "Package",
rule_packages: List["Package"], rule_packages: List["Package"],
data_packages: List["Package"], data_packages: List["Package"],
eval_packages: List["Package"],
threshold: float = 0.5, threshold: float = 0.5,
min_count: int = 10, min_count: int = 10,
max_count: int = 0, max_count: int = 0,
@ -2575,10 +2582,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
for p in rule_packages: for p in rule_packages:
rbrr.data_packages.add(p) rbrr.data_packages.add(p)
if eval_packages:
for p in eval_packages:
rbrr.eval_packages.add(p)
rbrr.save() rbrr.save()
return rbrr return rbrr
@ -2633,7 +2636,6 @@ class MLRelativeReasoning(PackageBasedModel):
package: "Package", package: "Package",
rule_packages: List["Package"], rule_packages: List["Package"],
data_packages: List["Package"], data_packages: List["Package"],
eval_packages: List["Package"],
threshold: float = 0.5, threshold: float = 0.5,
name: "str" = None, name: "str" = None,
description: str = None, description: str = None,
@ -2673,10 +2675,6 @@ class MLRelativeReasoning(PackageBasedModel):
for p in rule_packages: for p in rule_packages:
mlrr.data_packages.add(p) mlrr.data_packages.add(p)
if eval_packages:
for p in eval_packages:
mlrr.eval_packages.add(p)
if build_app_domain: if build_app_domain:
ad = ApplicabilityDomain.create( ad = ApplicabilityDomain.create(
mlrr, mlrr,
@ -2953,7 +2951,6 @@ class EnviFormer(PackageBasedModel):
def create( def create(
package: "Package", package: "Package",
data_packages: List["Package"], data_packages: List["Package"],
eval_packages: List["Package"],
threshold: float = 0.5, threshold: float = 0.5,
name: "str" = None, name: "str" = None,
description: str = None, description: str = None,
@ -2986,10 +2983,6 @@ class EnviFormer(PackageBasedModel):
for p in data_packages: for p in data_packages:
mod.data_packages.add(p) mod.data_packages.add(p)
if eval_packages:
for p in eval_packages:
mod.eval_packages.add(p)
# if build_app_domain: # if build_app_domain:
# ad = ApplicabilityDomain.create(mod, app_domain_num_neighbours, app_domain_reliability_threshold, # ad = ApplicabilityDomain.create(mod, app_domain_num_neighbours, app_domain_reliability_threshold,
# app_domain_local_compatibility_threshold) # app_domain_local_compatibility_threshold)
@ -3082,10 +3075,18 @@ class EnviFormer(PackageBasedModel):
args = {"clz": "EnviFormer"} args = {"clz": "EnviFormer"}
return args return args
def evaluate_model(self, **kwargs): def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None, **kwargs):
if self.model_status != self.BUILT_NOT_EVALUATED: if self.model_status != self.BUILT_NOT_EVALUATED:
raise ValueError(f"Can't evaluate a model in state {self.model_status}!") raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
if multigen:
self.multigen_eval = multigen
self.save()
if eval_packages is not None:
for p in eval_packages:
self.eval_packages.add(p)
self.model_status = self.EVALUATING self.model_status = self.EVALUATING
self.save() self.save()
@ -3226,7 +3227,7 @@ class EnviFormer(PackageBasedModel):
ds = self.load_dataset() ds = self.load_dataset()
n_splits = kwargs.get("n_splits", 20) n_splits = kwargs.get("n_splits", 20)
shuff = ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42) shuff = ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42)
# Single gen eval is done in one loop of train then evaluate rather than storing all n_splits trained models # Single gen eval is done in one loop of train then evaluate rather than storing all n_splits trained models
# this helps reduce the memory footprint. # this helps reduce the memory footprint.
@ -3294,7 +3295,7 @@ class EnviFormer(PackageBasedModel):
# Compute splits of the collected pathway and evaluate. Like single gen we train and evaluate in each # Compute splits of the collected pathway and evaluate. Like single gen we train and evaluate in each
# iteration instead of storing all trained models. # iteration instead of storing all trained models.
for split_id, (train, test) in enumerate( for split_id, (train, test) in enumerate(
ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42).split(pathways) ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42).split(pathways)
): ):
train_pathways = [pathways[i] for i in train] train_pathways = [pathways[i] for i in train]
test_pathways = [pathways[i] for i in test] test_pathways = [pathways[i] for i in test]
@ -3577,3 +3578,53 @@ class Setting(EnviPathModel):
self.public = True self.public = True
self.global_default = True self.global_default = True
self.save() self.save()
class JobLogStatus(models.TextChoices):
INITIAL = "INITIAL", "Initial"
SUCCESS = "SUCCESS", "Success"
FAILURE = "FAILURE", "Failure"
REVOKED = "REVOKED", "Revoked"
IGNORED = "IGNORED", "Ignored"
class JobLog(TimeStampedModel):
user = models.ForeignKey("epdb.User", models.CASCADE)
task_id = models.UUIDField(unique=True)
job_name = models.TextField(null=False, blank=False)
status = models.CharField(
max_length=20,
choices=JobLogStatus.choices,
default=JobLogStatus.INITIAL,
)
done_at = models.DateTimeField(null=True, blank=True, default=None)
task_result = models.TextField(null=True, blank=True, default=None)
def check_for_update(self):
async_res = self.get_result()
new_status = async_res.state
TERMINAL_STATES = [
"SUCCESS",
"FAILURE",
"REVOKED",
"IGNORED",
]
if new_status != self.status and new_status in TERMINAL_STATES:
self.status = new_status
self.done_at = async_res.date_done
if new_status == "SUCCESS":
self.task_result = async_res.result
self.save()
return True
return False
def get_result(self):
from celery.result import AsyncResult
return AsyncResult(str(self.task_id))

View File

@ -1,10 +1,15 @@
import csv
import io
import logging import logging
from typing import Optional from datetime import datetime
from celery.utils.functional import LRUCache from typing import Any, Callable, List, Optional
from celery import shared_task from uuid import uuid4
from epdb.models import Pathway, Node, EPModel, Setting
from epdb.logic import SPathway
from celery import shared_task
from celery.utils.functional import LRUCache
from epdb.logic import SPathway
from epdb.models import EPModel, JobLog, Node, Package, Pathway, Rule, Setting, User, Edge
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
ML_CACHE = LRUCache(3) # Cache the three most recent ML models to reduce load times. ML_CACHE = LRUCache(3) # Cache the three most recent ML models to reduce load times.
@ -16,6 +21,40 @@ def get_ml_model(model_pk: int):
return ML_CACHE[model_pk] return ML_CACHE[model_pk]
def dispatch_eager(user: "User", job: Callable, *args, **kwargs):
try:
x = job(*args, **kwargs)
log = JobLog()
log.user = user
log.task_id = uuid4()
log.job_name = job.__name__
log.status = "SUCCESS"
log.done_at = datetime.now()
log.task_result = str(x) if x else None
log.save()
return x
except Exception as e:
logger.exception(e)
raise e
def dispatch(user: "User", job: Callable, *args, **kwargs):
try:
x = job.delay(*args, **kwargs)
log = JobLog()
log.user = user
log.task_id = x.task_id
log.job_name = job.__name__
log.status = "INITIAL"
log.save()
return x.result
except Exception as e:
logger.exception(e)
raise e
@shared_task(queue="background") @shared_task(queue="background")
def mul(a, b): def mul(a, b):
return a * b return a * b
@ -33,17 +72,55 @@ def send_registration_mail(user_pk: int):
pass pass
@shared_task(queue="model") @shared_task(bind=True, queue="model")
def build_model(model_pk: int): def build_model(self, model_pk: int):
mod = EPModel.objects.get(id=model_pk) mod = EPModel.objects.get(id=model_pk)
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=mod.url)
try:
mod.build_dataset() mod.build_dataset()
mod.build_model() mod.build_model()
except Exception as e:
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(
status="FAILED", task_result=mod.url
)
raise e
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=mod.url)
return mod.url
@shared_task(queue="model") @shared_task(bind=True, queue="model")
def evaluate_model(model_pk: int): def evaluate_model(self, model_pk: int, multigen: bool, package_pks: Optional[list] = None):
packages = None
if package_pks:
packages = Package.objects.filter(pk__in=package_pks)
mod = EPModel.objects.get(id=model_pk) mod = EPModel.objects.get(id=model_pk)
mod.evaluate_model() if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=mod.url)
try:
mod.evaluate_model(multigen, eval_packages=packages)
except Exception as e:
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(
status="FAILED", task_result=mod.url
)
raise e
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=mod.url)
return mod.url
@shared_task(queue="model") @shared_task(queue="model")
@ -52,9 +129,13 @@ def retrain(model_pk: int):
mod.retrain() mod.retrain()
@shared_task(queue="predict") @shared_task(bind=True, queue="predict")
def predict( def predict(
pw_pk: int, pred_setting_pk: int, limit: Optional[int] = None, node_pk: Optional[int] = None self,
pw_pk: int,
pred_setting_pk: int,
limit: Optional[int] = None,
node_pk: Optional[int] = None,
) -> Pathway: ) -> Pathway:
pw = Pathway.objects.get(id=pw_pk) pw = Pathway.objects.get(id=pw_pk)
setting = Setting.objects.get(id=pred_setting_pk) setting = Setting.objects.get(id=pred_setting_pk)
@ -65,6 +146,9 @@ def predict(
pw.kv.update(**{"status": "running"}) pw.kv.update(**{"status": "running"})
pw.save() pw.save()
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=pw.url)
try: try:
# regular prediction # regular prediction
if limit is not None: if limit is not None:
@ -89,7 +173,111 @@ def predict(
except Exception as e: except Exception as e:
pw.kv.update({"status": "failed"}) pw.kv.update({"status": "failed"})
pw.save() pw.save()
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(
status="FAILED", task_result=pw.url
)
raise e raise e
pw.kv.update(**{"status": "completed"}) pw.kv.update(**{"status": "completed"})
pw.save() pw.save()
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=pw.url)
return pw.url
@shared_task(bind=True, queue="background")
def identify_missing_rules(
self,
pw_pks: List[int],
rule_package_pk: int,
):
from utilities.misc import PathwayUtils
rules = Package.objects.get(pk=rule_package_pk).get_applicable_rules()
rows: List[Any] = []
header = [
"Package Name",
"Pathway Name",
"Educt Name",
"Educt SMILES",
"Reaction Name",
"Reaction SMIRKS",
"Triggered Rules",
"Reactant SMARTS",
"Product SMARTS",
"Product Names",
"Product SMILES",
]
rows.append(header)
for pw in Pathway.objects.filter(pk__in=pw_pks):
pu = PathwayUtils(pw)
missing_rules = pu.find_missing_rules(rules)
package_name = pw.package.name
pathway_name = pw.name
for edge_url, rule_chain in missing_rules.items():
row: List[Any] = [package_name, pathway_name]
edge = Edge.objects.get(url=edge_url)
educts = edge.start_nodes.all()
for educt in educts:
row.append(educt.default_node_label.name)
row.append(educt.default_node_label.smiles)
row.append(edge.edge_label.name)
row.append(edge.edge_label.smirks())
rule_names = []
reactant_smarts = []
product_smarts = []
for r in rule_chain:
r = Rule.objects.get(url=r[0])
rule_names.append(r.name)
rs = r.reactants_smarts
if isinstance(rs, set):
rs = list(rs)
ps = r.products_smarts
if isinstance(ps, set):
ps = list(ps)
reactant_smarts.append(rs)
product_smarts.append(ps)
row.append(rule_names)
row.append(reactant_smarts)
row.append(product_smarts)
products = edge.end_nodes.all()
product_names = []
product_smiles = []
for product in products:
product_names.append(product.default_node_label.name)
product_smiles.append(product.default_node_label.smiles)
row.append(product_names)
row.append(product_smiles)
rows.append(row)
buffer = io.StringIO()
writer = csv.writer(buffer)
writer.writerows(rows)
buffer.seek(0)
return buffer.getvalue()

View File

@ -1,8 +1,21 @@
from django import template from django import template
from pydantic import AnyHttpUrl, ValidationError
from pydantic.type_adapter import TypeAdapter
register = template.Library() register = template.Library()
url_adapter = TypeAdapter(AnyHttpUrl)
@register.filter @register.filter
def classname(obj): def classname(obj):
return obj.__class__.__name__ return obj.__class__.__name__
@register.filter
def is_url(value):
try:
url_adapter.validate_python(value)
return True
except ValidationError:
return False

View File

@ -190,6 +190,7 @@ urlpatterns = [
re_path(r"^indigo/dearomatize$", v.dearomatize, name="indigo_dearomatize"), re_path(r"^indigo/dearomatize$", v.dearomatize, name="indigo_dearomatize"),
re_path(r"^indigo/layout$", v.layout, name="indigo_layout"), re_path(r"^indigo/layout$", v.layout, name="indigo_layout"),
re_path(r"^depict$", v.depict, name="depict"), re_path(r"^depict$", v.depict, name="depict"),
re_path(r"^jobs", v.jobs, name="jobs"),
# OAuth Stuff # OAuth Stuff
path("o/userinfo/", v.userinfo, name="oauth_userinfo"), path("o/userinfo/", v.userinfo, name="oauth_userinfo"),
] ]

View File

@ -47,6 +47,7 @@ from .models import (
ExternalDatabase, ExternalDatabase,
ExternalIdentifier, ExternalIdentifier,
EnzymeLink, EnzymeLink,
JobLog,
) )
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -236,6 +237,7 @@ def get_base_context(request, for_user=None) -> Dict[str, Any]:
"enabled_features": s.FLAGS, "enabled_features": s.FLAGS,
"debug": s.DEBUG, "debug": s.DEBUG,
"external_databases": ExternalDatabase.get_databases(), "external_databases": ExternalDatabase.get_databases(),
"site_id": s.MATOMO_SITE_ID,
}, },
} }
@ -754,8 +756,8 @@ def package_models(request, package_uuid):
context["unreviewed_objects"] = unreviewed_model_qs context["unreviewed_objects"] = unreviewed_model_qs
context["model_types"] = { context["model_types"] = {
"ML Relative Reasoning": "ml-relative-reasoning", "ML Relative Reasoning": "mlrr",
"Rule Based Relative Reasoning": "rule-based-relative-reasoning", "Rule Based Relative Reasoning": "rbrr",
} }
if s.FLAGS.get("ENVIFORMER", False): if s.FLAGS.get("ENVIFORMER", False):
@ -775,48 +777,40 @@ def package_models(request, package_uuid):
model_type = request.POST.get("model-type") model_type = request.POST.get("model-type")
if model_type == "enviformer":
threshold = float(request.POST.get(f"{model_type}-threshold", 0.5))
mod = EnviFormer.create(current_package, name, description, threshold)
elif model_type == "ml-relative-reasoning" or model_type == "rule-based-relative-reasoning":
# Generic fields for ML and Rule Based # Generic fields for ML and Rule Based
rule_packages = request.POST.getlist("package-based-relative-reasoning-rule-packages") rule_packages = request.POST.getlist("model-rule-packages")
data_packages = request.POST.getlist("package-based-relative-reasoning-data-packages") data_packages = request.POST.getlist("model-data-packages")
eval_packages = request.POST.getlist(
"package-based-relative-reasoning-evaluation-packages", []
)
# Generic params # Generic params
params = { params = {
"package": current_package, "package": current_package,
"name": name, "name": name,
"description": description, "description": description,
"rule_packages": [
PackageManager.get_package_by_url(current_user, p) for p in rule_packages
],
"data_packages": [ "data_packages": [
PackageManager.get_package_by_url(current_user, p) for p in data_packages PackageManager.get_package_by_url(current_user, p) for p in data_packages
], ],
"eval_packages": [
PackageManager.get_package_by_url(current_user, p) for p in eval_packages
],
} }
if model_type == "ml-relative-reasoning": if model_type == "enviformer":
threshold = float(request.POST.get("model-threshold", 0.5))
params["threshold"] = threshold
mod = EnviFormer.create(**params)
elif model_type == "mlrr":
# ML Specific # ML Specific
threshold = float(request.POST.get(f"{model_type}-threshold", 0.5)) threshold = float(request.POST.get("model-threshold", 0.5))
# TODO handle additional fingerprinter # TODO handle additional fingerprinter
# fingerprinter = request.POST.get(f"{model_type}-fingerprinter") # fingerprinter = request.POST.get("model-fingerprinter")
params["rule_packages"] = [
PackageManager.get_package_by_url(current_user, p) for p in rule_packages
]
# App Domain related parameters # App Domain related parameters
build_ad = request.POST.get("build-app-domain", False) == "on" build_ad = request.POST.get("build-app-domain", False) == "on"
num_neighbors = request.POST.get("num-neighbors", 5) num_neighbors = request.POST.get("num-neighbors", 5)
reliability_threshold = request.POST.get("reliability-threshold", 0.5) reliability_threshold = request.POST.get("reliability-threshold", 0.5)
local_compatibility_threshold = request.POST.get( local_compatibility_threshold = request.POST.get("local-compatibility-threshold", 0.5)
"local-compatibility-threshold", 0.5
)
params["threshold"] = threshold params["threshold"] = threshold
# params['fingerprinter'] = fingerprinter # params['fingerprinter'] = fingerprinter
@ -826,18 +820,24 @@ def package_models(request, package_uuid):
params["app_domain_local_compatibility_threshold"] = local_compatibility_threshold params["app_domain_local_compatibility_threshold"] = local_compatibility_threshold
mod = MLRelativeReasoning.create(**params) mod = MLRelativeReasoning.create(**params)
else: elif model_type == "rbrr":
params["rule_packages"] = [
PackageManager.get_package_by_url(current_user, p) for p in rule_packages
]
mod = RuleBasedRelativeReasoning.create(**params) mod = RuleBasedRelativeReasoning.create(**params)
elif s.FLAGS.get("PLUGINS", False) and model_type in s.CLASSIFIER_PLUGINS.values():
from .tasks import build_model pass
build_model.delay(mod.pk)
else: else:
return error( return error(
request, "Invalid model type.", f'Model type "{model_type}" is not supported."' request, "Invalid model type.", f'Model type "{model_type}" is not supported."'
) )
return redirect(mod.url)
from .tasks import dispatch, build_model
dispatch(current_user, build_model, mod.pk)
return redirect(mod.url)
else: else:
return HttpResponseNotAllowed(["GET", "POST"]) return HttpResponseNotAllowed(["GET", "POST"])
@ -865,6 +865,10 @@ def package_model(request, package_uuid, model_uuid):
return JsonResponse({"error": f'"{smiles}" is not a valid SMILES'}, status=400) return JsonResponse({"error": f'"{smiles}" is not a valid SMILES'}, status=400)
if classify: if classify:
from epdb.tasks import dispatch_eager, predict_simple
res = dispatch_eager(current_user, predict_simple, current_model.pk, stand_smiles)
pred_res = current_model.predict(stand_smiles) pred_res = current_model.predict(stand_smiles)
res = [] res = []
@ -909,9 +913,25 @@ def package_model(request, package_uuid, model_uuid):
current_model.delete() current_model.delete()
return redirect(current_package.url + "/model") return redirect(current_package.url + "/model")
elif hidden == "evaluate": elif hidden == "evaluate":
from .tasks import evaluate_model from .tasks import dispatch, evaluate_model
eval_type = request.POST.get("model-evaluation-type")
if eval_type not in ["sg", "mg"]:
return error(
request,
"Invalid evaluation type",
f'Evaluation type "{eval_type}" is not supported. Only "sg" and "mg" are supported.',
)
multigen = eval_type == "mg"
eval_packages = request.POST.getlist("model-evaluation-packages")
eval_package_ids = [
PackageManager.get_package_by_url(current_user, p).id for p in eval_packages
]
dispatch(current_user, evaluate_model, current_model.pk, multigen, eval_package_ids)
evaluate_model.delay(current_model.pk)
return redirect(current_model.url) return redirect(current_model.url)
else: else:
return HttpResponseBadRequest() return HttpResponseBadRequest()
@ -1809,9 +1829,9 @@ def package_pathways(request, package_uuid):
pw.setting = prediction_setting pw.setting = prediction_setting
pw.save() pw.save()
from .tasks import predict from .tasks import dispatch, predict
predict.delay(pw.pk, prediction_setting.pk, limit=limit) dispatch(current_user, predict, pw.pk, prediction_setting.pk, limit=limit)
return redirect(pw.url) return redirect(pw.url)
@ -1847,6 +1867,25 @@ def package_pathway(request, package_uuid, pathway_uuid):
return response return response
if (
request.GET.get("identify-missing-rules", False) == "true"
and request.GET.get("rule-package") is not None
):
from .tasks import dispatch_eager, identify_missing_rules
rule_package = PackageManager.get_package_by_url(
current_user, request.GET.get("rule-package")
)
res = dispatch_eager(
current_user, identify_missing_rules, [current_pathway.pk], rule_package.pk
)
filename = f"{current_pathway.name.replace(' ', '_')}_{current_pathway.uuid}.csv"
response = HttpResponse(res, content_type="text/csv")
response["Content-Disposition"] = f'attachment; filename="{filename}"'
return response
# Pathway d3_json() relies on a lot of related objects (Nodes, Structures, Edges, Reaction, Rules, ...) # Pathway d3_json() relies on a lot of related objects (Nodes, Structures, Edges, Reaction, Rules, ...)
# we will again fetch the current pathway identified by this url, but this time together with nearly all # we will again fetch the current pathway identified by this url, but this time together with nearly all
# related objects # related objects
@ -1930,10 +1969,16 @@ def package_pathway(request, package_uuid, pathway_uuid):
if node_url: if node_url:
n = current_pathway.get_node(node_url) n = current_pathway.get_node(node_url)
from .tasks import predict from .tasks import dispatch, predict
dispatch(
current_user,
predict,
current_pathway.pk,
current_pathway.setting.pk,
node_pk=n.pk,
)
# Dont delay?
predict(current_pathway.pk, current_pathway.setting.pk, node_pk=n.pk)
return JsonResponse({"success": current_pathway.url}) return JsonResponse({"success": current_pathway.url})
return HttpResponseBadRequest() return HttpResponseBadRequest()
@ -2705,6 +2750,24 @@ def setting(request, setting_uuid):
pass pass
def jobs(request):
current_user = _anonymous_or_real(request)
context = get_base_context(request)
if request.method == "GET":
context["object_type"] = "joblog"
context["breadcrumbs"] = [
{"Home": s.SERVER_URL},
{"Jobs": s.SERVER_URL + "/jobs"},
]
if current_user.is_superuser:
context["jobs"] = JobLog.objects.all().order_by("-created")
else:
context["jobs"] = JobLog.objects.filter(user=current_user).order_by("-created")
return render(request, "collections/joblog.html", context)
########### ###########
# KETCHER # # KETCHER #
########### ###########

View File

@ -22,6 +22,10 @@
<i class="glyphicon glyphicon-floppy-save"></i> Download Pathway as Image</a> <i class="glyphicon glyphicon-floppy-save"></i> Download Pathway as Image</a>
</li> </li>
{% if meta.can_edit %} {% if meta.can_edit %}
<li>
<a class="button" data-toggle="modal" data-target="#identify_missing_rules_modal">
<i class="glyphicon glyphicon-question-sign"></i> Identify Missing Rules</a>
</li>
<li role="separator" class="divider"></li> <li role="separator" class="divider"></li>
<li> <li>
<a class="button" data-toggle="modal" data-target="#edit_pathway_modal"> <a class="button" data-toggle="modal" data-target="#edit_pathway_modal">

View File

@ -0,0 +1,71 @@
{% extends "framework.html" %}
{% load static %}
{% load envipytags %}
{% block content %}
<div class="panel-group" id="reviewListAccordion">
<div class="panel panel-default">
<div class="panel-heading" id="headingPanel" style="font-size:2rem;height: 46px">
Jobs
</div>
<div class="panel-body">
<p>
Job Logs Desc
</p>
</div>
<div class="panel panel-default panel-heading list-group-item" style="background-color:silver">
<h4 class="panel-title">
<a id="job-accordion-link" data-toggle="collapse" data-parent="#job-accordion" href="#jobs">
Jobs
</a>
</h4>
</div>
<div id="jobs"
class="panel-collapse collapse in">
<div class="panel-body list-group-item" id="job-content">
<table class="table table-bordered table-hover">
<tr style="background-color: rgba(0, 0, 0, 0.08);">
<th scope="col">ID</th>
<th scope="col">Name</th>
<th scope="col">Status</th>
<th scope="col">Queued</th>
<th scope="col">Done</th>
<th scope="col">Result</th>
</tr>
<tbody>
{% for job in jobs %}
<tr>
<td>{{ job.task_id }}</td>
<td>{{ job.job_name }}</td>
<td>{{ job.status }}</td>
<td>{{ job.created }}</td>
<td>{{ job.done_at }}</td>
{% if job.task_result and job.task_result|is_url == True %}
<td><a href="{{ job.task_result }}">Result</a></td>
{% elif job.task_result %}
<td>{{ job.task_result|slice:"40" }}...</td>
{% else %}
<td>Empty</td>
{% endif %}
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
<!-- Unreviewable objects such as User / Group / Setting -->
<ul class='list-group'>
{% for obj in objects %}
{% if object_type == 'user' %}
<a class="list-group-item" href="{{ obj.url }}">{{ obj.username }}</a>
{% else %}
<a class="list-group-item" href="{{ obj.url }}">{{ obj.name }}</a>
{% endif %}
{% endfor %}
</ul>
</div>
</div>
{% endblock content %}

View File

@ -56,7 +56,7 @@
(function () { (function () {
var u = "//matomo.envipath.com/"; var u = "//matomo.envipath.com/";
_paq.push(['setTrackerUrl', u + 'matomo.php']); _paq.push(['setTrackerUrl', u + 'matomo.php']);
_paq.push(['setSiteId', '10']); _paq.push(['setSiteId', '{{ meta.site_id }}']);
var d = document, g = d.createElement('script'), s = d.getElementsByTagName('script')[0]; var d = document, g = d.createElement('script'), s = d.getElementsByTagName('script')[0];
g.async = true; g.async = true;
g.src = u + 'matomo.js'; g.src = u + 'matomo.js';

View File

@ -18,13 +18,19 @@
prediction. You just need to set a name and the packages prediction. You just need to set a name and the packages
you want the object to be based on. There are multiple types of models available. you want the object to be based on. There are multiple types of models available.
For additional information have a look at our For additional information have a look at our
<a target="_blank" href="https://wiki.envipath.org/index.php/relative-reasoning" role="button">wiki &gt;&gt;</a> <a target="_blank" href="https://wiki.envipath.org/index.php/relative-reasoning" role="button">wiki
&gt;&gt;</a>
</div> </div>
<!-- Name -->
<label for="model-name">Name</label> <label for="model-name">Name</label>
<input id="model-name" name="model-name" class="form-control" placeholder="Name"/> <input id="model-name" name="model-name" class="form-control" placeholder="Name"/>
<!-- Description -->
<label for="model-description">Description</label> <label for="model-description">Description</label>
<input id="model-description" name="model-description" class="form-control" <input id="model-description" name="model-description" class="form-control"
placeholder="Description"/> placeholder="Description"/>
<!-- Model Type -->
<label for="model-type">Model Type</label> <label for="model-type">Model Type</label>
<select id="model-type" name="model-type" class="form-control" data-width='100%'> <select id="model-type" name="model-type" class="form-control" data-width='100%'>
<option disabled selected>Select Model Type</option> <option disabled selected>Select Model Type</option>
@ -32,12 +38,12 @@
<option value="{{ v }}">{{ k }}</option> <option value="{{ v }}">{{ k }}</option>
{% endfor %} {% endfor %}
</select> </select>
<!-- ML and Rule Based Based Form-->
<div id="package-based-relative-reasoning-specific-form">
<!-- Rule Packages --> <!-- Rule Packages -->
<label for="package-based-relative-reasoning-rule-packages">Rule Packages</label> <div id="rule-packages" class="ep-model-param mlrr rbrr">
<select id="package-based-relative-reasoning-rule-packages" name="package-based-relative-reasoning-rule-packages" <label for="model-rule-packages">Rule Packages</label>
data-actions-box='true' class="form-control" multiple data-width='100%'> <select id="model-rule-packages" name="model-rule-packages" data-actions-box='true'
class="form-control" multiple data-width='100%'>
<option disabled>Reviewed Packages</option> <option disabled>Reviewed Packages</option>
{% for obj in meta.readable_packages %} {% for obj in meta.readable_packages %}
{% if obj.reviewed %} {% if obj.reviewed %}
@ -52,10 +58,13 @@
{% endif %} {% endif %}
{% endfor %} {% endfor %}
</select> </select>
</div>
<!-- Data Packages --> <!-- Data Packages -->
<label for="package-based-relative-reasoning-data-packages" >Data Packages</label> <div id="data-packages" class="ep-model-param mlrr rbrr enviformer">
<select id="package-based-relative-reasoning-data-packages" name="package-based-relative-reasoning-data-packages" <label for="model-data-packages">Data Packages</label>
data-actions-box='true' class="form-control" multiple data-width='100%'> <select id="model-data-packages" name="model-data-packages" data-actions-box='true'
class="form-control" multiple data-width='100%'>
<option disabled>Reviewed Packages</option> <option disabled>Reviewed Packages</option>
{% for obj in meta.readable_packages %} {% for obj in meta.readable_packages %}
{% if obj.reviewed %} {% if obj.reviewed %}
@ -70,32 +79,31 @@
{% endif %} {% endif %}
{% endfor %} {% endfor %}
</select> </select>
</div>
<div id="ml-relative-reasoning-specific-form">
<!-- Fingerprinter --> <!-- Fingerprinter -->
<label for="ml-relative-reasoning-fingerprinter">Fingerprinter</label> <div id="fingerprinter" class="ep-model-param mlrr">
<select id="ml-relative-reasoning-fingerprinter" name="ml-relative-reasoning-fingerprinter" <label for="model-fingerprinter">Fingerprinter</label>
class="form-control"> <select id="model-fingerprinter" name="model-fingerprinter" data-actions-box='true'
class="form-control" multiple data-width='100%'>
<option value="MACCS" selected>MACCS Fingerprinter</option> <option value="MACCS" selected>MACCS Fingerprinter</option>
</select>
{% if meta.enabled_features.PLUGINS and additional_descriptors %} {% if meta.enabled_features.PLUGINS and additional_descriptors %}
<!-- Property Plugins go here -->
<label for="ml-relative-reasoning-additional-fingerprinter">Additional Fingerprinter /
Descriptors</label>
<select id="ml-relative-reasoning-additional-fingerprinter"
name="ml-relative-reasoning-additional-fingerprinter" class="form-control">
<option disabled selected>Select Additional Fingerprinter / Descriptor</option> <option disabled selected>Select Additional Fingerprinter / Descriptor</option>
{% for k, v in additional_descriptors.items %} {% for k, v in additional_descriptors.items %}
<option value="{{ v }}">{{ k }}</option> <option value="{{ v }}">{{ k }}</option>
{% endfor %} {% endfor %}
</select>
{% endif %} {% endif %}
</select>
<label for="ml-relative-reasoning-threshold">Threshold</label>
<input type="number" min="0" max="1" step="0.05" value="0.5"
id="ml-relative-reasoning-threshold"
name="ml-relative-reasoning-threshold" class="form-control">
</div> </div>
<!-- Threshold -->
<div id="threshold" class="ep-model-param mlrr enviformer">
<label for="model-threshold">Threshold</label>
<input type="number" min="0" max="1" step="0.05" value="0.5" id="model-threshold"
name="model-threshold" class="form-control">
</div>
<div id="appdomain" class="ep-model-param mlrr">
{% if meta.enabled_features.APPLICABILITY_DOMAIN %} {% if meta.enabled_features.APPLICABILITY_DOMAIN %}
<!-- Build AD? --> <!-- Build AD? -->
<div class="checkbox"> <div class="checkbox">
@ -107,11 +115,13 @@
<div id="ad-params" style="display:none"> <div id="ad-params" style="display:none">
<!-- Num Neighbors --> <!-- Num Neighbors -->
<label for="num-neighbors">Number of Neighbors</label> <label for="num-neighbors">Number of Neighbors</label>
<input id="num-neighbors" name="num-neighbors" type="number" class="form-control" value="5" <input id="num-neighbors" name="num-neighbors" type="number" class="form-control"
value="5"
step="1" min="0" max="10"> step="1" min="0" max="10">
<!-- Local Compatibility --> <!-- Local Compatibility -->
<label for="local-compatibility-threshold">Local Compatibility Threshold</label> <label for="local-compatibility-threshold">Local Compatibility Threshold</label>
<input id="local-compatibility-threshold" name="local-compatibility-threshold" type="number" <input id="local-compatibility-threshold" name="local-compatibility-threshold"
type="number"
class="form-control" value="0.5" step="0.01" min="0" max="1"> class="form-control" value="0.5" step="0.01" min="0" max="1">
<!-- Reliability --> <!-- Reliability -->
<label for="reliability-threshold">Reliability Threshold</label> <label for="reliability-threshold">Reliability Threshold</label>
@ -120,12 +130,6 @@
</div> </div>
{% endif %} {% endif %}
</div> </div>
<!-- EnviFormer-->
<div id="enviformer-specific-form">
<label for="enviformer-threshold">Threshold</label>
<input type="number" min="0" max="1" step="0.05" value="0.5" id="enviformer-threshold"
name="enviformer-threshold" class="form-control">
</div>
</form> </form>
</div> </div>
<div class="modal-footer"> <div class="modal-footer">
@ -137,20 +141,23 @@
</div> </div>
<script> <script>
$(function() { $(function () {
// Built in Model Types
var nativeModelTypes = [
"mlrr",
"rbrr",
"enviformer",
]
// Initially hide all "specific" forms // Initially hide all "specific" forms
$("div[id$='-specific-form']").each( function() { $(".ep-model-param").each(function () {
$(this).hide(); $(this).hide();
}); });
$('#model-type').selectpicker(); $('#model-type').selectpicker();
$("#ml-relative-reasoning-fingerprinter").selectpicker(); $("#model-fingerprinter").selectpicker();
$("#package-based-relative-reasoning-rule-packages").selectpicker(); $("#model-rule-packages").selectpicker();
$("#package-based-relative-reasoning-data-packages").selectpicker(); $("#model-data-packages").selectpicker();
$("#package-based-relative-reasoning-evaluation-packages").selectpicker();
if ($('#ml-relative-reasoning-additional-fingerprinter').length > 0) {
$("#ml-relative-reasoning-additional-fingerprinter").selectpicker();
}
$("#build-app-domain").change(function () { $("#build-app-domain").change(function () {
if ($(this).is(":checked")) { if ($(this).is(":checked")) {
@ -161,29 +168,20 @@ $(function() {
}); });
// On change hide all and show only selected // On change hide all and show only selected
$("#model-type").change(function() { $("#model-type").change(function () {
$("div[id$='-specific-form']").each( function() { $('.ep-model-param').hide();
$(this).hide(); var modelType = $('#model-type').val();
}); if (nativeModelTypes.indexOf(modelType) !== -1) {
val = $('option:selected', this).val(); $('.' + modelType).show();
if (val === 'ml-relative-reasoning' || val === 'rule-based-relative-reasoning') {
$("#package-based-relative-reasoning-specific-form").show();
if (val === 'ml-relative-reasoning') {
$("#ml-relative-reasoning-specific-form").show();
}
} else { } else {
$("#" + val + "-specific-form").show(); // do nothing
} }
}); });
$('#new_model_modal_form_submit').on('click', function(e){ $('#new_model_modal_form_submit').on('click', function (e) {
e.preventDefault(); e.preventDefault();
$('#new_model_form').submit(); $('#new_model_form').submit();
}); });
}); });
</script> </script>

View File

@ -17,10 +17,10 @@
For evaluation, you need to select the packages you want to use. For evaluation, you need to select the packages you want to use.
While the model is evaluating, you can use the model for predictions. While the model is evaluating, you can use the model for predictions.
</div> </div>
<!-- Evaluation --> <!-- Evaluation Packages -->
<label for="relative-reasoning-evaluation-packages">Evaluation Packages</label> <label for="model-evaluation-packages">Evaluation Packages</label>
<select id="relative-reasoning-evaluation-packages" name=relative-reasoning-evaluation-packages" <select id="model-evaluation-packages" name="model-evaluation-packages" data-actions-box='true'
data-actions-box='true' class="form-control" multiple data-width='100%'> class="form-control" multiple data-width='100%'>
<option disabled>Reviewed Packages</option> <option disabled>Reviewed Packages</option>
{% for obj in meta.readable_packages %} {% for obj in meta.readable_packages %}
{% if obj.reviewed %} {% if obj.reviewed %}
@ -35,6 +35,15 @@
{% endif %} {% endif %}
{% endfor %} {% endfor %}
</select> </select>
<!-- Eval Type -->
<label for="model-evaluation-type">Evaluation Type</label>
<select id="model-evaluation-type" name="model-evaluation-type" class="form-control">
<option disabled selected>Select evaluation type</option>
<option value="sg">Single Generation</option>
<option value="mg">Multiple Generations</option>
</select>
<input type="hidden" name="hidden" value="evaluate"> <input type="hidden" name="hidden" value="evaluate">
</form> </form>
</div> </div>
@ -50,7 +59,7 @@
$(function () { $(function () {
$("#relative-reasoning-evaluation-packages").selectpicker(); $("#model-evaluation-packages").selectpicker();
$('#evaluate_model_form_submit').on('click', function (e) { $('#evaluate_model_form_submit').on('click', function (e) {
e.preventDefault(); e.preventDefault();

View File

@ -0,0 +1,54 @@
{% load static %}
<!-- Identify Missing Rules -->
<div id="identify_missing_rules_modal" class="modal" tabindex="-1">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<h3 class="modal-title">Identify Missing Rules</h3>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
By clicking on Download we'll search the Pathway for Reactions that are not backed by
a Rule or which can be assembled by chaining two rules.
<form id="identify-missing-rules-modal-form" accept-charset="UTF-8" action="{{ pathway.url }}"
data-remote="true" method="GET">
<label for="rule-package">Select the Rule Package</label>
<select id="rule-package" name="rule-package" data-actions-box='true' class="form-control"
data-width='100%'>
<option disabled>Reviewed Packages</option>
{% for obj in meta.readable_packages %}
{% if obj.reviewed %}
<option value="{{ obj.url }}">{{ obj.name }}</option>
{% endif %}
{% endfor %}
<option disabled>Unreviewed Packages</option>
{% for obj in meta.readable_packages %}
{% if not obj.reviewed %}
<option value="{{ obj.url }}">{{ obj.name }}</option>
{% endif %}
{% endfor %}
</select>
<input type="hidden" name="identify-missing-rules" value="true"/>
</form>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
<button type="button" class="btn btn-primary" id="identify-missing-rules-modal-submit">Download</button>
</div>
</div>
</div>
</div>
<script>
$(function () {
$('#identify-missing-rules-modal-submit').click(function (e) {
e.preventDefault();
$('#identify-missing-rules-modal-form').submit();
$('#identify_missing_rules_modal').modal('hide');
});
})
</script>

View File

@ -117,7 +117,7 @@
<!-- End Predict Panel --> <!-- End Predict Panel -->
{% endif %} {% endif %}
{% if model.app_domain %} {% if model.ready_for_prediction and model.app_domain %}
<!-- App Domain --> <!-- App Domain -->
<div class="panel panel-default panel-heading list-group-item" style="background-color:silver"> <div class="panel panel-default panel-heading list-group-item" style="background-color:silver">
<h4 class="panel-title"> <h4 class="panel-title">

View File

@ -83,6 +83,7 @@
{% include "modals/objects/add_pathway_edge_modal.html" %} {% include "modals/objects/add_pathway_edge_modal.html" %}
{% include "modals/objects/download_pathway_csv_modal.html" %} {% include "modals/objects/download_pathway_csv_modal.html" %}
{% include "modals/objects/download_pathway_image_modal.html" %} {% include "modals/objects/download_pathway_image_modal.html" %}
{% include "modals/objects/identify_missing_rules_modal.html" %}
{% include "modals/objects/generic_copy_object_modal.html" %} {% include "modals/objects/generic_copy_object_modal.html" %}
{% include "modals/objects/edit_pathway_modal.html" %} {% include "modals/objects/edit_pathway_modal.html" %}
{% include "modals/objects/generic_set_aliases_modal.html" %} {% include "modals/objects/generic_set_aliases_modal.html" %}

View File

@ -3,7 +3,7 @@ from datetime import datetime
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
from django.test import TestCase, tag from django.test import TestCase, tag
from epdb.logic import PackageManager from epdb.logic import PackageManager
from epdb.models import User, EnviFormer, Package, Setting, Pathway from epdb.models import User, EnviFormer, Package, Setting
from epdb.tasks import predict_simple, predict from epdb.tasks import predict_simple, predict
@ -48,9 +48,7 @@ class EnviFormerTest(TestCase):
mod.build_dataset() mod.build_dataset()
mod.build_model() mod.build_model()
mod.multigen_eval = True mod.evaluate_model(True, eval_packages_objs, n_splits=2)
mod.save()
mod.evaluate_model(n_splits=2)
mod.predict("CCN(CC)C(=O)C1=CC(=CC=C1)C") mod.predict("CCN(CC)C(=O)C1=CC(=CC=C1)C")

View File

@ -30,7 +30,6 @@ class ModelTest(TestCase):
self.package, self.package,
rule_package_objs, rule_package_objs,
data_package_objs, data_package_objs,
eval_packages_objs,
threshold=threshold, threshold=threshold,
name="ECC - BBD - 0.5", name="ECC - BBD - 0.5",
description="Created MLRelativeReasoning in Testcase", description="Created MLRelativeReasoning in Testcase",
@ -38,9 +37,7 @@ class ModelTest(TestCase):
mod.build_dataset() mod.build_dataset()
mod.build_model() mod.build_model()
mod.multigen_eval = True mod.evaluate_model(True, eval_packages_objs, n_splits=2)
mod.save()
mod.evaluate_model(n_splits=2)
results = mod.predict("CCN(CC)C(=O)C1=CC(=CC=C1)C") results = mod.predict("CCN(CC)C(=O)C1=CC(=CC=C1)C")

View File

@ -6,7 +6,7 @@ from epdb.logic import UserManager
from epdb.models import Package, User from epdb.models import Package, User
@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models") @override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True)
class PathwayViewTest(TestCase): class PathwayViewTest(TestCase):
fixtures = ["test_fixtures_incl_model.jsonl.gz"] fixtures = ["test_fixtures_incl_model.jsonl.gz"]

View File

@ -6,7 +6,7 @@ from epdb.logic import UserManager, PackageManager
from epdb.models import Pathway, Edge from epdb.models import Pathway, Edge
@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models") @override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True)
class PathwayViewTest(TestCase): class PathwayViewTest(TestCase):
fixtures = ["test_fixtures_incl_model.jsonl.gz"] fixtures = ["test_fixtures_incl_model.jsonl.gz"]

View File

@ -192,7 +192,7 @@ class FormatConverter(object):
return smiles return smiles
@staticmethod @staticmethod
def standardize(smiles, remove_stereo=False): def standardize(smiles, remove_stereo=False, canonicalize_tautomers=False):
# Taken from https://bitsilla.com/blog/2021/06/standardizing-a-molecule-using-rdkit/ # Taken from https://bitsilla.com/blog/2021/06/standardizing-a-molecule-using-rdkit/
# follows the steps in # follows the steps in
# https://github.com/greglandrum/RSC_OpenScience_Standardization_202104/blob/main/MolStandardize%20pieces.ipynb # https://github.com/greglandrum/RSC_OpenScience_Standardization_202104/blob/main/MolStandardize%20pieces.ipynb
@ -210,19 +210,21 @@ class FormatConverter(object):
uncharger = ( uncharger = (
rdMolStandardize.Uncharger() rdMolStandardize.Uncharger()
) # annoying, but necessary as no convenience method exists ) # annoying, but necessary as no convenience method exists
uncharged_parent_clean_mol = uncharger.uncharge(parent_clean_mol) res_mol = uncharger.uncharge(parent_clean_mol)
# note that no attempt is made at reionization at this step # note that no attempt is made at reionization at this step
# nor at ionization at some pH (rdkit has no pKa caculator) # nor at ionization at some pH (rdkit has no pKa caculator)
# the main aim to to represent all molecules from different sources # the main aim to to represent all molecules from different sources
# in a (single) standard way, for use in ML, catalogue, etc. # in a (single) standard way, for use in ML, catalogue, etc.
# te = rdMolStandardize.TautomerEnumerator() # idem
# taut_uncharged_parent_clean_mol = te.Canonicalize(uncharged_parent_clean_mol)
if remove_stereo: if remove_stereo:
Chem.RemoveStereochemistry(uncharged_parent_clean_mol) Chem.RemoveStereochemistry(res_mol)
return Chem.MolToSmiles(uncharged_parent_clean_mol, kekuleSmiles=True) if canonicalize_tautomers:
te = rdMolStandardize.TautomerEnumerator() # idem
res_mol = te.Canonicalize(res_mol)
return Chem.MolToSmiles(res_mol, kekuleSmiles=True)
@staticmethod @staticmethod
def neutralize_smiles(smiles): def neutralize_smiles(smiles):
@ -370,6 +372,76 @@ class FormatConverter(object):
return parsed_smiles, errors return parsed_smiles, errors
@staticmethod
def smiles_covered_by(
l_smiles: List[str],
r_smiles: List[str],
standardize: bool = True,
canonicalize_tautomers: bool = True,
) -> bool:
"""
Check if all SMILES in the left list are covered by (contained in) the right list.
This function performs a subset check to determine if every chemical structure
represented in l_smiles has a corresponding representation in r_smiles.
Args:
l_smiles (List[str]): List of SMILES strings to check for coverage.
r_smiles (List[str]): List of SMILES strings that should contain all l_smiles.
standardize (bool, optional): Whether to standardize SMILES before comparison.
Defaults to True. When True, applies FormatConverter.standardize() to
normalize representations for accurate comparison.
canonicalize_tautomers (bool, optional): Whether to canonicalize tautomers
Defaults to False. When True, applies rdMolStandardize.TautomerEnumerator().Canonicalize(res_mol)
to the compounds before comparison.
Returns:
bool: True if all SMILES in l_smiles are found in r_smiles (i.e., l_smiles
is a subset of r_smiles), False otherwise.
Note:
- Comparison treats lists as sets, ignoring duplicates and order
- Failed standardization attempts are silently ignored (original SMILES used)
- This is a one-directional check: l_smiles ⊆ r_smiles
- For bidirectional equality, both directions must be checked separately
Example:
>>> FormatConverter.smiles_covered_by(["CCO", "CC"], ["CCO", "CC", "CCC"])
True
>>> FormatConverter.smiles_covered_by(["CCO", "CCCC"], ["CCO", "CC", "CCC"])
False
"""
standardized_l_smiles = []
if standardize:
for smi in l_smiles:
try:
smi = FormatConverter.standardize(
smi, canonicalize_tautomers=canonicalize_tautomers
)
except Exception:
# :shrug:
# logger.debug(f'Standardizing SMILES failed for {smi}')
pass
standardized_l_smiles.append(smi)
else:
standardized_l_smiles = l_smiles
standardized_r_smiles = []
if standardize:
for smi in r_smiles:
try:
smi = FormatConverter.standardize(smi)
except Exception:
# :shrug:
# logger.debug(f'Standardizing SMILES failed for {smi}')
pass
standardized_r_smiles.append(smi)
else:
standardized_r_smiles = r_smiles
return len(set(standardized_l_smiles).difference(set(standardized_r_smiles))) == 0
class Standardizer(ABC): class Standardizer(ABC):
def __init__(self, name): def __init__(self, name):

View File

@ -9,36 +9,37 @@ from collections import defaultdict
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from types import NoneType from types import NoneType
from typing import Dict, Any, List from typing import Any, Dict, List
from django.db import transaction from django.db import transaction
from envipy_additional_information import Interval, EnviPyModel from envipy_additional_information import NAME_MAPPING, EnviPyModel, Interval
from envipy_additional_information import NAME_MAPPING
from pydantic import BaseModel, HttpUrl from pydantic import BaseModel, HttpUrl
from epdb.models import ( from epdb.models import (
Package,
Compound, Compound,
CompoundStructure, CompoundStructure,
SimpleRule, Edge,
EnviFormer,
EPModel,
ExternalDatabase,
ExternalIdentifier,
License,
MLRelativeReasoning,
Node,
Package,
ParallelRule,
Pathway,
PluginModel,
Reaction,
Rule,
RuleBasedRelativeReasoning,
Scenario,
SequentialRule,
SimpleAmbitRule, SimpleAmbitRule,
SimpleRDKitRule, SimpleRDKitRule,
ParallelRule, SimpleRule,
SequentialRule,
Reaction,
Pathway,
Node,
Edge,
Scenario,
EPModel,
MLRelativeReasoning,
RuleBasedRelativeReasoning,
EnviFormer,
PluginModel,
ExternalIdentifier,
ExternalDatabase,
License,
) )
from utilities.chem import FormatConverter
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -48,7 +49,7 @@ class HTMLGenerator:
@staticmethod @staticmethod
def generate_html(additional_information: "EnviPyModel", prefix="") -> str: def generate_html(additional_information: "EnviPyModel", prefix="") -> str:
from typing import get_origin, get_args, Union from typing import Union, get_args, get_origin
if isinstance(additional_information, type): if isinstance(additional_information, type):
clz_name = additional_information.__name__ clz_name = additional_information.__name__
@ -1171,3 +1172,89 @@ class PackageImporter:
url=identifier_data.get("url", ""), url=identifier_data.get("url", ""),
is_primary=identifier_data.get("is_primary", False), is_primary=identifier_data.get("is_primary", False),
) )
class PathwayUtils:
def __init__(self, pathway: "Pathway"):
self.pathway = pathway
@staticmethod
def _get_products(smiles: str, rules: List["Rule"]):
educt_rule_products: Dict[str, Dict[str, List[str]]] = defaultdict(
lambda: defaultdict(list)
)
for r in rules:
product_sets = r.apply(smiles)
for product_set in product_sets:
for product in product_set:
educt_rule_products[smiles][r.url].append(product)
return educt_rule_products
def find_missing_rules(self, rules: List["Rule"]):
print(f"Processing {self.pathway.name}")
# compute products for each node / rule combination in the pathway
educt_rule_products = defaultdict(lambda: defaultdict(list))
for node in self.pathway.nodes:
educt_rule_products.update(**self._get_products(node.default_node_label.smiles, rules))
# loop through edges and determine reactions that can't be constructed by
# any of the rules or a combination of two rules in a chained fashion
res: Dict[str, List["Rule"]] = dict()
for edge in self.pathway.edges:
found = False
reaction = edge.edge_label
educts = [cs for cs in reaction.educts.all()]
products = [cs.smiles for cs in reaction.products.all()]
rule_chain = []
for educt in educts:
educt = educt.smiles
triggered_rules = list(educt_rule_products.get(educt, {}).keys())
for triggered_rule in triggered_rules:
if rule_products := educt_rule_products[educt][triggered_rule]:
# check if this rule covers the reaction
if FormatConverter.smiles_covered_by(
products, rule_products, standardize=True, canonicalize_tautomers=True
):
found = True
else:
# Check if another prediction step would cover the reaction
for product in rule_products:
prod_rule_products = self._get_products(product, rules)
prod_triggered_rules = list(
prod_rule_products.get(product, {}).keys()
)
for prod_triggered_rule in prod_triggered_rules:
if second_step_products := prod_rule_products[product][
prod_triggered_rule
]:
if FormatConverter.smiles_covered_by(
products,
second_step_products,
standardize=True,
canonicalize_tautomers=True,
):
rule_chain.append(
(
triggered_rule,
Rule.objects.get(url=triggered_rule).name,
)
)
rule_chain.append(
(
prod_triggered_rule,
Rule.objects.get(url=prod_triggered_rule).name,
)
)
res[edge.url] = rule_chain
if not found:
res[edge.url] = rule_chain
return res