11 Commits

Author SHA1 Message Date
62e890245e ... 2025-10-29 20:00:41 +01:00
138846d84d ... 2025-10-29 19:46:20 +01:00
13ed86a780 [Feature] Identify Missing Rules (#177)
Fixes #97
Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#177
2025-10-30 00:47:45 +13:00
f1b4c5aadb [Feature] Adding list_display to various django admin sites (#180)
Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#180
2025-10-29 22:26:28 +13:00
37e0e18a28 [Fix] Fixed Incremental Prediction Typo (#176)
Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#176
2025-10-28 23:29:08 +13:00
de44c22606 [Migration] Added missing Migration for JobLog (#175)
Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#175
2025-10-27 22:41:16 +13:00
a952c08469 [Feature] Basic logging of Jobs, Model Evaluation (#169)
Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#169
2025-10-27 22:34:05 +13:00
551cfc7768 [Enhancement] Create ML Models (#173)
## Changes

- Ability to change the threshold from a command line argument.
- Names of data packages included in model name
- Names of data, rule and eval packages included in the model description
- EnviFormer models are now viewable on the admin site
- Ignore CO2 for training and evaluating EnviFormer

Co-authored-by: Liam Brydon <62733830+MyCreativityOutlet@users.noreply.github.com>
Reviewed-on: enviPath/enviPy#173
Reviewed-by: jebus <lorsbach@envipath.com>
Co-authored-by: liambrydon <lbry121@aucklanduni.ac.nz>
Co-committed-by: liambrydon <lbry121@aucklanduni.ac.nz>
2025-10-23 06:20:22 +13:00
8fda2577ee [Feature] Dump/Restore of enviFormer Models (#170)
Dump:
`./manage.py  dump_enviformer d544303c-a1ca-439d-b036-5e3413ce4a48 --output test.tar.gz`

Restore:
`./manage.py load_enviformer test.tar.gz 1062eb09-5ec7-4bdd-a8f2-ae0252eb4b06`

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#170
2025-10-22 10:39:22 +13:00
819a94aced [Fix] Catch Exception for Adding Structures / Show PubChem Substances (#168)
Fixes #163
Fixes #165

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#168
2025-10-22 01:13:06 +13:00
376fd65785 [Feature] ML model caching for reducing prediction overhead (#156)
The caching is now finished. The cache is created in `settings.py` giving us the most flexibility for using it in the future.

The cache is currently updated/accessed by `tasks.py/get_ml_model` which can be called from whatever task needs to access ml models in this way (currently, `predict` and `predict_simple`).

This implementation currently caches all ml models including the relative reasoning. If we don't want this and only want to cache enviFormer, i can change it to that. However, I don't think there is a harm in having the other models be cached as well.

Co-authored-by: Liam Brydon <62733830+MyCreativityOutlet@users.noreply.github.com>
Reviewed-on: enviPath/enviPy#156
Co-authored-by: liambrydon <lbry121@aucklanduni.ac.nz>
Co-committed-by: liambrydon <lbry121@aucklanduni.ac.nz>
2025-10-16 08:58:36 +13:00
49 changed files with 1724 additions and 380 deletions

View File

@ -52,6 +52,28 @@ INSTALLED_APPS = [
"migration",
]
# Add the TENANT providing implementations for
# Required
# - Package
# - Compound (TODO)
# - CompoundStructure (TODO)
# Optional
# - PackageManager (TODO)
# - GroupManager (TODO)
# - SettingManager (TODO)
TENANT = os.environ.get("TENANT", "public")
INSTALLED_APPS.append(TENANT)
PACKAGE_IMPLEMENTATION = f"{TENANT}.Package"
PACKAGE_MODULE_PATH = f"{TENANT}.models.Package"
def GET_PACKAGE_MODEL():
from django.apps import apps
return apps.get_model(TENANT, "Package")
AUTHENTICATION_BACKENDS = [
"django.contrib.auth.backends.ModelBackend",
]

View File

@ -1,12 +1,13 @@
from django.contrib import admin
from django.conf import settings as s
from .models import (
User,
UserPackagePermission,
Group,
GroupPackagePermission,
Package,
MLRelativeReasoning,
EnviFormer,
Compound,
CompoundStructure,
SimpleAmbitRule,
@ -19,11 +20,15 @@ from .models import (
Setting,
ExternalDatabase,
ExternalIdentifier,
JobLog,
)
Package = s.GET_PACKAGE_MODEL()
class UserAdmin(admin.ModelAdmin):
pass
list_display = ["username", "email", "is_active"]
class UserPackagePermissionAdmin(admin.ModelAdmin):
@ -38,8 +43,14 @@ class GroupPackagePermissionAdmin(admin.ModelAdmin):
pass
class JobLogAdmin(admin.ModelAdmin):
pass
class EPAdmin(admin.ModelAdmin):
search_fields = ["name", "description"]
list_display = ["name", "url", "created"]
ordering = ["-created"]
class PackageAdmin(EPAdmin):
@ -50,6 +61,10 @@ class MLRelativeReasoningAdmin(EPAdmin):
pass
class EnviFormerAdmin(EPAdmin):
pass
class CompoundAdmin(EPAdmin):
pass
@ -102,8 +117,10 @@ admin.site.register(User, UserAdmin)
admin.site.register(UserPackagePermission, UserPackagePermissionAdmin)
admin.site.register(Group, GroupAdmin)
admin.site.register(GroupPackagePermission, GroupPackagePermissionAdmin)
admin.site.register(JobLog, JobLogAdmin)
admin.site.register(Package, PackageAdmin)
admin.site.register(MLRelativeReasoning, MLRelativeReasoningAdmin)
admin.site.register(EnviFormer, EnviFormerAdmin)
admin.site.register(Compound, CompoundAdmin)
admin.site.register(CompoundStructure, CompoundStructureAdmin)
admin.site.register(SimpleAmbitRule, SimpleAmbitRuleAdmin)

View File

@ -1,5 +1,6 @@
from typing import List, Dict, Optional, Any
from django.conf import settings as s
from django.contrib.auth import get_user_model
from django.http import HttpResponse
from django.shortcuts import redirect
@ -10,7 +11,6 @@ from .logic import PackageManager, UserManager, SettingManager
from .models import (
Compound,
CompoundStructure,
Package,
User,
UserPackagePermission,
Rule,
@ -23,6 +23,9 @@ from .models import (
)
Package = s.GET_PACKAGE_MODEL()
def _anonymous_or_real(request):
if request.user.is_authenticated and not request.user.is_anonymous:
return request.user

View File

@ -11,7 +11,6 @@ from pydantic import ValidationError
from epdb.models import (
User,
Package,
UserPackagePermission,
GroupPackagePermission,
Permission,
@ -33,6 +32,8 @@ from utilities.misc import PackageImporter, PackageExporter
logger = logging.getLogger(__name__)
Package = s.GET_PACKAGE_MODEL()
class EPDBURLParser:
UUID_PATTERN = r"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}"

View File

@ -2,15 +2,18 @@ from django.conf import settings as s
from django.core.management.base import BaseCommand
from django.db import transaction
from epdb.models import MLRelativeReasoning, EnviFormer, Package
from epdb.models import EnviFormer, MLRelativeReasoning
Package = s.GET_PACKAGE_MODEL()
class Command(BaseCommand):
"""This command can be run with
`python manage.py create_ml_models [model_names] -d [data_packages] OPTIONAL: -e [eval_packages]`
For example, to train both EnviFormer and MLRelativeReasoning on BBD and SOIL and evaluate them on SLUDGE
the below command would be used:
`python manage.py create_ml_models enviformer mlrr -d bbd soil -e sludge
`python manage.py create_ml_models [model_names] -d [data_packages] FOR MLRR ONLY: -r [rule_packages]
OPTIONAL: -e [eval_packages] -t threshold`
For example, to train both EnviFormer and MLRelativeReasoning on BBD and SOIL and evaluate them on SLUDGE with a
threshold of 0.6, the below command would be used:
`python manage.py create_ml_models enviformer mlrr -d bbd soil -e sludge -t 0.6
"""
def add_arguments(self, parser):
@ -34,6 +37,13 @@ class Command(BaseCommand):
help="Rule Packages mandatory for MLRR",
default=[],
)
parser.add_argument(
"-t",
"--threshold",
type=float,
help="Model prediction threshold",
default=0.5,
)
@transaction.atomic
def handle(self, *args, **options):
@ -67,7 +77,13 @@ class Command(BaseCommand):
return packages
# Iteratively create models in options["model_names"]
print(f"Creating models: {options['model_names']}")
print(
f"Creating models: {options['model_names']}\n"
f"Data packages: {options['data_packages']}\n"
f"Rule Packages (only for MLRR): {options['rule_packages']}\n"
f"Eval Packages: {options['eval_packages']}\n"
f"Threshold: {options['threshold']:.2f}"
)
data_packages = decode_packages(options["data_packages"])
eval_packages = decode_packages(options["eval_packages"])
rule_packages = decode_packages(options["rule_packages"])
@ -78,9 +94,10 @@ class Command(BaseCommand):
pack,
data_packages=data_packages,
eval_packages=eval_packages,
threshold=0.5,
name="EnviFormer - T0.5",
description="EnviFormer transformer",
threshold=options["threshold"],
name=f"EnviFormer - {', '.join(options['data_packages'])} - T{options['threshold']:.2f}",
description=f"EnviFormer transformer trained on {options['data_packages']} "
f"evaluated on {options['eval_packages']}.",
)
elif model_name == "mlrr":
model = MLRelativeReasoning.create(
@ -88,9 +105,10 @@ class Command(BaseCommand):
rule_packages=rule_packages,
data_packages=data_packages,
eval_packages=eval_packages,
threshold=0.5,
name="ECC - BBD - T0.5",
description="ML Relative Reasoning",
threshold=options["threshold"],
name=f"ECC - {', '.join(options['data_packages'])} - T{options['threshold']:.2f}",
description=f"ML Relative Reasoning trained on {options['data_packages']} with rules from "
f"{options['rule_packages']} and evaluated on {options['eval_packages']}.",
)
else:
raise ValueError(f"Cannot create model of type {model_name}, unknown model type")
@ -100,6 +118,6 @@ class Command(BaseCommand):
print(f"Training {model_name}")
model.build_model()
print(f"Evaluating {model_name}")
model.evaluate_model()
model.evaluate_model(False, eval_packages=eval_packages)
print(f"Saving {model_name}")
model.save()

View File

@ -0,0 +1,59 @@
import json
import os
import tarfile
from tempfile import TemporaryDirectory
from django.conf import settings as s
from django.core.management.base import BaseCommand
from django.db import transaction
from epdb.models import EnviFormer
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"model",
type=str,
help="Model UUID of the Model to Dump",
)
parser.add_argument("--output", type=str)
def package_dict_and_folder(self, dict_data, folder_path, output_path):
with TemporaryDirectory() as tmpdir:
dict_filename = os.path.join(tmpdir, "data.json")
with open(dict_filename, "w", encoding="utf-8") as f:
json.dump(dict_data, f, indent=2)
with tarfile.open(output_path, "w:gz") as tar:
tar.add(dict_filename, arcname="data.json")
tar.add(folder_path, arcname=os.path.basename(folder_path))
os.remove(dict_filename)
@transaction.atomic
def handle(self, *args, **options):
output = options["output"]
if os.path.exists(output):
raise ValueError(f"Output file {output} already exists")
model = EnviFormer.objects.get(uuid=options["model"])
data = {
"uuid": str(model.uuid),
"name": model.name,
"description": model.description,
"kv": model.kv,
"data_packages_uuids": [str(p.uuid) for p in model.data_packages.all()],
"eval_packages_uuids": [str(p.uuid) for p in model.data_packages.all()],
"threshold": model.threshold,
"eval_results": model.eval_results,
"multigen_eval": model.multigen_eval,
"model_status": model.model_status,
}
model_folder = os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid))
self.package_dict_and_folder(data, model_folder, output)

View File

@ -0,0 +1,83 @@
import json
import os
import shutil
import tarfile
from tempfile import TemporaryDirectory
from django.conf import settings as s
from django.core.management.base import BaseCommand
from django.db import transaction
from epdb.models import EnviFormer
Package = s.GET_PACKAGE_MODEL()
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"input",
type=str,
help=".tar.gz file containing the Model dump.",
)
parser.add_argument(
"package",
type=str,
help="Package UUID where the Model should be loaded to.",
)
def read_dict_and_folder_from_archive(self, archive_path, extract_to="extracted_folder"):
with tarfile.open(archive_path, "r:gz") as tar:
tar.extractall(extract_to)
dict_path = os.path.join(extract_to, "data.json")
if not os.path.exists(dict_path):
raise FileNotFoundError("data.json not found in the archive.")
with open(dict_path, "r", encoding="utf-8") as f:
data_dict = json.load(f)
extracted_items = os.listdir(extract_to)
folders = [item for item in extracted_items if item != "data.json"]
folder_path = os.path.join(extract_to, folders[0]) if folders else None
return data_dict, folder_path
@transaction.atomic
def handle(self, *args, **options):
if not os.path.exists(options["input"]):
raise ValueError(f"Input file {options['input']} does not exist.")
target_package = Package.objects.get(uuid=options["package"])
with TemporaryDirectory() as tmpdir:
data, folder = self.read_dict_and_folder_from_archive(options["input"], tmpdir)
model = EnviFormer()
model.package = target_package
# model.uuid = data["uuid"]
model.name = data["name"]
model.description = data["description"]
model.kv = data["kv"]
model.threshold = float(data["threshold"])
model.eval_results = data["eval_results"]
model.multigen_eval = data["multigen_eval"]
model.model_status = data["model_status"]
model.save()
for p_uuid in data["data_packages_uuids"]:
p = Package.objects.get(uuid=p_uuid)
model.data_packages.add(p)
for p_uuid in data["eval_packages_uuids"]:
p = Package.objects.get(uuid=p_uuid)
model.eval_packages.add(p)
target_folder = os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid))
shutil.copytree(folder, target_folder)
os.rename(
os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid), f"{data['uuid']}.ckpt"),
os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid), f"{model.uuid}.ckpt"),
)

View File

@ -1,8 +1,8 @@
from django.apps import apps
from django.conf import settings as s
from django.core.management.base import BaseCommand
from django.db.models import F, Value, TextField, JSONField
from django.db.models.functions import Replace, Cast
from django.db.models import F, JSONField, TextField, Value
from django.db.models.functions import Cast, Replace
from epdb.models import EnviPathModel
@ -23,10 +23,13 @@ class Command(BaseCommand):
)
def handle(self, *args, **options):
Package = s.GET_PACKAGE_MODEL()
print("Localizing urls for Package")
Package.objects.update(url=Replace(F("url"), Value(options["old"]), Value(options["new"])))
MODELS = [
"User",
"Group",
"Package",
"Compound",
"CompoundStructure",
"Pathway",

View File

@ -0,0 +1,38 @@
from datetime import date, timedelta
from django.core.management.base import BaseCommand
from django.db import transaction
from epdb.models import JobLog
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--cleanup",
type=int,
default=None,
help="Remove all logs older than this number of days. Default is None, which does not remove any logs.",
)
@transaction.atomic
def handle(self, *args, **options):
if options["cleanup"] is not None:
cleanup_dt = date.today() - timedelta(days=options["cleanup"])
print(JobLog.objects.filter(created__lt=cleanup_dt).delete())
logs = JobLog.objects.filter(status="INITIAL")
print(f"Found {logs.count()} logs to update")
updated = 0
for log in logs:
res = log.check_for_update()
if res:
updated += 1
print(f"Updated {updated} logs")
from django.db.models import Count
qs = JobLog.objects.values("status").annotate(total=Count("status"))
for r in qs:
print(r["status"], r["total"])

View File

@ -0,0 +1,66 @@
# Generated by Django 5.2.7 on 2025-10-27 09:39
import django.db.models.deletion
import django.utils.timezone
import model_utils.fields
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("epdb", "0008_enzymelink"),
]
operations = [
migrations.CreateModel(
name="JobLog",
fields=[
(
"id",
models.BigAutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
(
"created",
model_utils.fields.AutoCreatedField(
default=django.utils.timezone.now, editable=False, verbose_name="created"
),
),
(
"modified",
model_utils.fields.AutoLastModifiedField(
default=django.utils.timezone.now, editable=False, verbose_name="modified"
),
),
("task_id", models.UUIDField(unique=True)),
("job_name", models.TextField()),
(
"status",
models.CharField(
choices=[
("INITIAL", "Initial"),
("SUCCESS", "Success"),
("FAILURE", "Failure"),
("REVOKED", "Revoked"),
("IGNORED", "Ignored"),
],
default="INITIAL",
max_length=20,
),
),
("done_at", models.DateTimeField(blank=True, default=None, null=True)),
("task_result", models.TextField(blank=True, default=None, null=True)),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL
),
),
],
options={
"abstract": False,
},
),
]

View File

@ -0,0 +1,190 @@
# Generated by Django 5.2.7 on 2025-10-29 13:32
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("epdb", "0009_joblog"),
("public", "0001_initial"),
]
operations = [
migrations.AlterField(
model_name="userpackagepermission",
name="package",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="public.package",
verbose_name="Permission on",
),
),
migrations.AlterField(
model_name="grouppackagepermission",
name="package",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="public.package",
verbose_name="Permission on",
),
),
migrations.AlterField(
model_name="epmodel",
name="package",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="public.package",
verbose_name="Package",
),
),
migrations.AlterField(
model_name="rule",
name="package",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="public.package",
verbose_name="Package",
),
),
migrations.AlterField(
model_name="compound",
name="package",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="public.package",
verbose_name="Package",
),
),
migrations.AlterField(
model_name="scenario",
name="package",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="public.package",
verbose_name="Package",
),
),
migrations.AlterField(
model_name="pathway",
name="package",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="public.package",
verbose_name="Package",
),
),
migrations.AlterField(
model_name="reaction",
name="package",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="public.package",
verbose_name="Package",
),
),
migrations.AlterField(
model_name="user",
name="default_package",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="public.package",
verbose_name="Default Package",
),
),
migrations.AlterField(
model_name="enviformer",
name="data_packages",
field=models.ManyToManyField(
related_name="%(app_label)s_%(class)s_data_packages",
to="public.package",
verbose_name="Data Packages",
),
),
migrations.AlterField(
model_name="enviformer",
name="eval_packages",
field=models.ManyToManyField(
related_name="%(app_label)s_%(class)s_eval_packages",
to="public.package",
verbose_name="Evaluation Packages",
),
),
migrations.AlterField(
model_name="enviformer",
name="rule_packages",
field=models.ManyToManyField(
related_name="%(app_label)s_%(class)s_rule_packages",
to="public.package",
verbose_name="Rule Packages",
),
),
migrations.AlterField(
model_name="mlrelativereasoning",
name="data_packages",
field=models.ManyToManyField(
related_name="%(app_label)s_%(class)s_data_packages",
to="public.package",
verbose_name="Data Packages",
),
),
migrations.AlterField(
model_name="mlrelativereasoning",
name="eval_packages",
field=models.ManyToManyField(
related_name="%(app_label)s_%(class)s_eval_packages",
to="public.package",
verbose_name="Evaluation Packages",
),
),
migrations.AlterField(
model_name="mlrelativereasoning",
name="rule_packages",
field=models.ManyToManyField(
related_name="%(app_label)s_%(class)s_rule_packages",
to="public.package",
verbose_name="Rule Packages",
),
),
migrations.AlterField(
model_name="rulebasedrelativereasoning",
name="data_packages",
field=models.ManyToManyField(
related_name="%(app_label)s_%(class)s_data_packages",
to="public.package",
verbose_name="Data Packages",
),
),
migrations.AlterField(
model_name="rulebasedrelativereasoning",
name="eval_packages",
field=models.ManyToManyField(
related_name="%(app_label)s_%(class)s_eval_packages",
to="public.package",
verbose_name="Evaluation Packages",
),
),
migrations.AlterField(
model_name="rulebasedrelativereasoning",
name="rule_packages",
field=models.ManyToManyField(
related_name="%(app_label)s_%(class)s_rule_packages",
to="public.package",
verbose_name="Rule Packages",
),
),
migrations.AlterField(
model_name="setting",
name="rule_packages",
field=models.ManyToManyField(
blank=True,
related_name="setting_rule_packages",
to="public.package",
verbose_name="Setting Rule Packages",
),
),
migrations.DeleteModel(
name="Package",
),
]

View File

@ -7,7 +7,7 @@ import secrets
from abc import abstractmethod
from collections import defaultdict
from datetime import datetime
from typing import Union, List, Optional, Dict, Tuple, Set, Any
from typing import Union, List, Optional, Dict, Tuple, Set, Any, TYPE_CHECKING
from uuid import uuid4
import math
import joblib
@ -32,6 +32,8 @@ from utilities.ml import Dataset, ApplicabilityDomainPCA, EnsembleClassifierChai
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
Package = s.GET_PACKAGE_MODEL()
##########################
# User/Groups/Permission #
@ -45,7 +47,10 @@ class User(AbstractUser):
)
url = models.TextField(blank=False, null=True, verbose_name="URL", unique=True)
default_package = models.ForeignKey(
"epdb.Package", verbose_name="Default Package", null=True, on_delete=models.SET_NULL
s.PACKAGE_IMPLEMENTATION,
verbose_name="Default Package",
null=True,
on_delete=models.SET_NULL,
)
default_group = models.ForeignKey(
"Group",
@ -235,7 +240,7 @@ class UserPackagePermission(Permission):
)
user = models.ForeignKey("User", verbose_name="Permission to", on_delete=models.CASCADE)
package = models.ForeignKey(
"epdb.Package", verbose_name="Permission on", on_delete=models.CASCADE
s.PACKAGE_IMPLEMENTATION, verbose_name="Permission on", on_delete=models.CASCADE
)
class Meta:
@ -251,7 +256,7 @@ class GroupPackagePermission(Permission):
)
group = models.ForeignKey("Group", verbose_name="Permission to", on_delete=models.CASCADE)
package = models.ForeignKey(
"epdb.Package", verbose_name="Permission on", on_delete=models.CASCADE
s.PACKAGE_IMPLEMENTATION, verbose_name="Permission on", on_delete=models.CASCADE
)
class Meta:
@ -310,7 +315,7 @@ class ExternalDatabase(TimeStampedModel):
},
{
"database": ExternalDatabase.objects.get(name="ChEBI"),
"placeholder": "ChEBI ID without prefix e.g. 12345",
"placeholder": "ChEBI ID without prefix e.g. 10576",
},
],
"structure": [
@ -328,7 +333,7 @@ class ExternalDatabase(TimeStampedModel):
},
{
"database": ExternalDatabase.objects.get(name="ChEBI"),
"placeholder": "ChEBI ID without prefix e.g. 12345",
"placeholder": "ChEBI ID without prefix e.g. 10576",
},
],
"reaction": [
@ -342,7 +347,7 @@ class ExternalDatabase(TimeStampedModel):
},
{
"database": ExternalDatabase.objects.get(name="UniProt"),
"placeholder": "Query ID for UniPro e.g. rhea:12345",
"placeholder": "Query ID for UniProt e.g. rhea:12345",
},
],
}
@ -477,7 +482,7 @@ class ChemicalIdentifierMixin(ExternalIdentifierMixin):
return self.add_external_identifier("CAS", cas_number)
def get_pubchem_identifiers(self):
return self.get_external_identifier("PubChem Compound") or self.get_external_identifier(
return self.get_external_identifier("PubChem Compound") | self.get_external_identifier(
"PubChem Substance"
)
@ -651,7 +656,7 @@ class License(models.Model):
image_link = models.URLField(blank=False, null=False, verbose_name="Image link")
class Package(EnviPathModel):
class AbstractPackage(EnviPathModel):
reviewed = models.BooleanField(verbose_name="Reviewstatus", default=False)
license = models.ForeignKey(
"epdb.License", on_delete=models.SET_NULL, blank=True, null=True, verbose_name="License"
@ -719,10 +724,13 @@ class Package(EnviPathModel):
rules = sorted(rules, key=lambda x: x.url)
return rules
class Meta:
abstract = True
class Compound(EnviPathModel, AliasMixin, ScenarioMixin, ChemicalIdentifierMixin):
package = models.ForeignKey(
"epdb.Package", verbose_name="Package", on_delete=models.CASCADE, db_index=True
s.PACKAGE_IMPLEMENTATION, verbose_name="Package", on_delete=models.CASCADE, db_index=True
)
default_structure = models.ForeignKey(
"CompoundStructure",
@ -772,7 +780,7 @@ class Compound(EnviPathModel, AliasMixin, ScenarioMixin, ChemicalIdentifierMixin
@staticmethod
@transaction.atomic
def create(
package: Package, smiles: str, name: str = None, description: str = None, *args, **kwargs
package: "Package", smiles: str, name: str = None, description: str = None, *args, **kwargs
) -> "Compound":
if smiles is None or smiles.strip() == "":
raise ValueError("SMILES is required")
@ -1050,7 +1058,7 @@ class EnzymeLink(EnviPathModel, KEGGIdentifierMixin):
class Rule(PolymorphicModel, EnviPathModel, AliasMixin, ScenarioMixin):
package = models.ForeignKey(
"epdb.Package", verbose_name="Package", on_delete=models.CASCADE, db_index=True
s.PACKAGE_IMPLEMENTATION, verbose_name="Package", on_delete=models.CASCADE, db_index=True
)
# # https://github.com/django-polymorphic/django-polymorphic/issues/229
@ -1156,7 +1164,7 @@ class SimpleAmbitRule(SimpleRule):
@staticmethod
@transaction.atomic
def create(
package: Package,
package: "Package",
name: str = None,
description: str = None,
smirks: str = None,
@ -1222,6 +1230,7 @@ class SimpleAmbitRule(SimpleRule):
@property
def related_reactions(self):
Package = s.GET_PACKAGE_MODEL()
qs = Package.objects.filter(reviewed=True)
return self.reaction_rule.filter(package__in=qs).order_by("name")
@ -1314,7 +1323,7 @@ class SequentialRuleOrdering(models.Model):
class Reaction(EnviPathModel, AliasMixin, ScenarioMixin, ReactionIdentifierMixin):
package = models.ForeignKey(
"epdb.Package", verbose_name="Package", on_delete=models.CASCADE, db_index=True
s.PACKAGE_IMPLEMENTATION, verbose_name="Package", on_delete=models.CASCADE, db_index=True
)
educts = models.ManyToManyField(
"epdb.CompoundStructure", verbose_name="Educts", related_name="reaction_educts"
@ -1336,7 +1345,7 @@ class Reaction(EnviPathModel, AliasMixin, ScenarioMixin, ReactionIdentifierMixin
@staticmethod
@transaction.atomic
def create(
package: Package,
package: "Package",
name: str = None,
description: str = None,
educts: Union[List[str], List[CompoundStructure]] = None,
@ -1496,7 +1505,7 @@ class Reaction(EnviPathModel, AliasMixin, ScenarioMixin, ReactionIdentifierMixin
class Pathway(EnviPathModel, AliasMixin, ScenarioMixin):
package = models.ForeignKey(
"epdb.Package", verbose_name="Package", on_delete=models.CASCADE, db_index=True
s.PACKAGE_IMPLEMENTATION, verbose_name="Package", on_delete=models.CASCADE, db_index=True
)
setting = models.ForeignKey(
"epdb.Setting", verbose_name="Setting", on_delete=models.CASCADE, null=True, blank=True
@ -2052,7 +2061,7 @@ class Edge(EnviPathModel, AliasMixin, ScenarioMixin):
class EPModel(PolymorphicModel, EnviPathModel):
package = models.ForeignKey(
"epdb.Package", verbose_name="Package", on_delete=models.CASCADE, db_index=True
s.PACKAGE_IMPLEMENTATION, verbose_name="Package", on_delete=models.CASCADE, db_index=True
)
def _url(self):
@ -2061,17 +2070,17 @@ class EPModel(PolymorphicModel, EnviPathModel):
class PackageBasedModel(EPModel):
rule_packages = models.ManyToManyField(
"Package",
s.PACKAGE_IMPLEMENTATION,
verbose_name="Rule Packages",
related_name="%(app_label)s_%(class)s_rule_packages",
)
data_packages = models.ManyToManyField(
"Package",
s.PACKAGE_IMPLEMENTATION,
verbose_name="Data Packages",
related_name="%(app_label)s_%(class)s_data_packages",
)
eval_packages = models.ManyToManyField(
"Package",
s.PACKAGE_IMPLEMENTATION,
verbose_name="Evaluation Packages",
related_name="%(app_label)s_%(class)s_eval_packages",
)
@ -2225,10 +2234,18 @@ class PackageBasedModel(EPModel):
self.model_status = self.BUILT_NOT_EVALUATED
self.save()
def evaluate_model(self):
def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None):
if self.model_status != self.BUILT_NOT_EVALUATED:
raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
if multigen:
self.multigen_eval = multigen
self.save()
if eval_packages is not None:
for p in eval_packages:
self.eval_packages.add(p)
self.model_status = self.EVALUATING
self.save()
@ -2525,7 +2542,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
package: "Package",
rule_packages: List["Package"],
data_packages: List["Package"],
eval_packages: List["Package"],
threshold: float = 0.5,
min_count: int = 10,
max_count: int = 0,
@ -2574,10 +2590,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
for p in rule_packages:
rbrr.data_packages.add(p)
if eval_packages:
for p in eval_packages:
rbrr.eval_packages.add(p)
rbrr.save()
return rbrr
@ -2632,7 +2644,6 @@ class MLRelativeReasoning(PackageBasedModel):
package: "Package",
rule_packages: List["Package"],
data_packages: List["Package"],
eval_packages: List["Package"],
threshold: float = 0.5,
name: "str" = None,
description: str = None,
@ -2672,10 +2683,6 @@ class MLRelativeReasoning(PackageBasedModel):
for p in rule_packages:
mlrr.data_packages.add(p)
if eval_packages:
for p in eval_packages:
mlrr.eval_packages.add(p)
if build_app_domain:
ad = ApplicabilityDomain.create(
mlrr,
@ -2995,7 +3002,6 @@ class EnviFormer(PackageBasedModel):
def create(
package: "Package",
data_packages: List["Package"],
eval_packages: List["Package"],
threshold: float = 0.5,
name: "str" = None,
description: str = None,
@ -3028,10 +3034,6 @@ class EnviFormer(PackageBasedModel):
for p in data_packages:
mod.data_packages.add(p)
if eval_packages:
for p in eval_packages:
mod.eval_packages.add(p)
# if build_app_domain:
# ad = ApplicabilityDomain.create(mod, app_domain_num_neighbours, app_domain_reliability_threshold,
# app_domain_local_compatibility_threshold)
@ -3045,7 +3047,8 @@ class EnviFormer(PackageBasedModel):
from enviformer import load
ckpt = os.path.join(s.MODEL_DIR, "enviformer", str(self.uuid), f"{self.uuid}.ckpt")
return load(device=s.ENVIFORMER_DEVICE, ckpt_path=ckpt)
mod = load(device=s.ENVIFORMER_DEVICE, ckpt_path=ckpt)
return mod
def predict(self, smiles) -> List["PredictionResult"]:
return self.predict_batch([smiles])[0]
@ -3059,8 +3062,12 @@ class EnviFormer(PackageBasedModel):
for smiles in smiles_list
]
logger.info(f"Submitting {canon_smiles} to {self.name}")
start = datetime.now()
products_list = self.model.predict_batch(canon_smiles)
logger.info(f"Got results {products_list}")
end = datetime.now()
logger.info(
f"Prediction took {(end - start).total_seconds():.2f} seconds. Got results {products_list}"
)
results = []
for products in products_list:
@ -3087,6 +3094,7 @@ class EnviFormer(PackageBasedModel):
start = datetime.now()
# Standardise reactions for the training data, EnviFormer ignores stereochemistry currently
co2 = {"C(=O)=O", "O=C=O"}
ds = []
for reaction in self._get_reactions():
educts = ".".join(
@ -3101,6 +3109,7 @@ class EnviFormer(PackageBasedModel):
for smile in reaction.products.all()
]
)
if products not in co2:
ds.append(f"{educts}>>{products}")
end = datetime.now()
@ -3137,10 +3146,18 @@ class EnviFormer(PackageBasedModel):
args = {"clz": "EnviFormer"}
return args
def evaluate_model(self):
def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None):
if self.model_status != self.BUILT_NOT_EVALUATED:
raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
if multigen:
self.multigen_eval = multigen
self.save()
if eval_packages is not None:
for p in eval_packages:
self.eval_packages.add(p)
self.model_status = self.EVALUATING
self.save()
@ -3297,7 +3314,7 @@ class EnviFormer(PackageBasedModel):
ds = self.load_dataset()
n_splits = 20
shuff = ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42)
shuff = ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42)
# Single gen eval is done in one loop of train then evaluate rather than storing all n_splits trained models
# this helps reduce the memory footprint.
@ -3365,7 +3382,7 @@ class EnviFormer(PackageBasedModel):
# Compute splits of the collected pathway and evaluate. Like single gen we train and evaluate in each
# iteration instead of storing all trained models.
for split_id, (train, test) in enumerate(
ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42).split(pathways)
ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42).split(pathways)
):
train_pathways = [pathways[i] for i in train]
test_pathways = [pathways[i] for i in test]
@ -3431,7 +3448,7 @@ class PluginModel(EPModel):
class Scenario(EnviPathModel):
package = models.ForeignKey(
"epdb.Package", verbose_name="Package", on_delete=models.CASCADE, db_index=True
s.PACKAGE_IMPLEMENTATION, verbose_name="Package", on_delete=models.CASCADE, db_index=True
)
scenario_date = models.CharField(max_length=256, null=False, blank=False, default="No date")
scenario_type = models.CharField(
@ -3582,7 +3599,7 @@ class Setting(EnviPathModel):
)
rule_packages = models.ManyToManyField(
"Package",
s.PACKAGE_IMPLEMENTATION,
verbose_name="Setting Rule Packages",
related_name="setting_rule_packages",
blank=True,
@ -3664,3 +3681,53 @@ class Setting(EnviPathModel):
self.public = True
self.global_default = True
self.save()
class JobLogStatus(models.TextChoices):
INITIAL = "INITIAL", "Initial"
SUCCESS = "SUCCESS", "Success"
FAILURE = "FAILURE", "Failure"
REVOKED = "REVOKED", "Revoked"
IGNORED = "IGNORED", "Ignored"
class JobLog(TimeStampedModel):
user = models.ForeignKey("epdb.User", models.CASCADE)
task_id = models.UUIDField(unique=True)
job_name = models.TextField(null=False, blank=False)
status = models.CharField(
max_length=20,
choices=JobLogStatus.choices,
default=JobLogStatus.INITIAL,
)
done_at = models.DateTimeField(null=True, blank=True, default=None)
task_result = models.TextField(null=True, blank=True, default=None)
def check_for_update(self):
async_res = self.get_result()
new_status = async_res.state
TERMINAL_STATES = [
"SUCCESS",
"FAILURE",
"REVOKED",
"IGNORED",
]
if new_status != self.status and new_status in TERMINAL_STATES:
self.status = new_status
self.done_at = async_res.date_done
if new_status == "SUCCESS":
self.task_result = async_res.result
self.save()
return True
return False
def get_result(self):
from celery.result import AsyncResult
return AsyncResult(str(self.task_id))

View File

@ -1,12 +1,61 @@
import csv
import io
import logging
from typing import Optional
from datetime import datetime
from typing import Any, Callable, List, Optional
from uuid import uuid4
from celery import shared_task
from epdb.models import Pathway, Node, EPModel, Setting
from epdb.logic import SPathway
from celery.utils.functional import LRUCache
from django.conf import settings as s
from epdb.logic import SPathway
from epdb.models import Edge, EPModel, JobLog, Node, Pathway, Rule, Setting, User
Package = s.GET_PACKAGE_MODEL()
logger = logging.getLogger(__name__)
ML_CACHE = LRUCache(3) # Cache the three most recent ML models to reduce load times.
def get_ml_model(model_pk: int):
if model_pk not in ML_CACHE:
ML_CACHE[model_pk] = EPModel.objects.get(id=model_pk)
return ML_CACHE[model_pk]
def dispatch_eager(user: "User", job: Callable, *args, **kwargs):
try:
x = job(*args, **kwargs)
log = JobLog()
log.user = user
log.task_id = uuid4()
log.job_name = job.__name__
log.status = "SUCCESS"
log.done_at = datetime.now()
log.task_result = str(x) if x else None
log.save()
return x
except Exception as e:
logger.exception(e)
raise e
def dispatch(user: "User", job: Callable, *args, **kwargs):
try:
x = job.delay(*args, **kwargs)
log = JobLog()
log.user = user
log.task_id = x.task_id
log.job_name = job.__name__
log.status = "INITIAL"
log.save()
return x.result
except Exception as e:
logger.exception(e)
raise e
@shared_task(queue="background")
@ -16,7 +65,7 @@ def mul(a, b):
@shared_task(queue="predict")
def predict_simple(model_pk: int, smiles: str):
mod = EPModel.objects.get(id=model_pk)
mod = get_ml_model(model_pk)
res = mod.predict(smiles)
return res
@ -26,17 +75,55 @@ def send_registration_mail(user_pk: int):
pass
@shared_task(queue="model")
def build_model(model_pk: int):
@shared_task(bind=True, queue="model")
def build_model(self, model_pk: int):
mod = EPModel.objects.get(id=model_pk)
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=mod.url)
try:
mod.build_dataset()
mod.build_model()
except Exception as e:
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(
status="FAILED", task_result=mod.url
)
raise e
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=mod.url)
return mod.url
@shared_task(queue="model")
def evaluate_model(model_pk: int):
@shared_task(bind=True, queue="model")
def evaluate_model(self, model_pk: int, multigen: bool, package_pks: Optional[list] = None):
packages = None
if package_pks:
packages = Package.objects.filter(pk__in=package_pks)
mod = EPModel.objects.get(id=model_pk)
mod.evaluate_model()
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=mod.url)
try:
mod.evaluate_model(multigen, eval_packages=packages)
except Exception as e:
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(
status="FAILED", task_result=mod.url
)
raise e
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=mod.url)
return mod.url
@shared_task(queue="model")
@ -45,16 +132,26 @@ def retrain(model_pk: int):
mod.retrain()
@shared_task(queue="predict")
@shared_task(bind=True, queue="predict")
def predict(
pw_pk: int, pred_setting_pk: int, limit: Optional[int] = None, node_pk: Optional[int] = None
self,
pw_pk: int,
pred_setting_pk: int,
limit: Optional[int] = None,
node_pk: Optional[int] = None,
) -> Pathway:
pw = Pathway.objects.get(id=pw_pk)
setting = Setting.objects.get(id=pred_setting_pk)
# If the setting has a model add/restore it from the cache
if setting.model is not None:
setting.model = get_ml_model(setting.model.pk)
pw.kv.update(**{"status": "running"})
pw.save()
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=pw.url)
try:
# regular prediction
if limit is not None:
@ -79,7 +176,111 @@ def predict(
except Exception as e:
pw.kv.update({"status": "failed"})
pw.save()
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(
status="FAILED", task_result=pw.url
)
raise e
pw.kv.update(**{"status": "completed"})
pw.save()
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=pw.url)
return pw.url
@shared_task(bind=True, queue="background")
def identify_missing_rules(
self,
pw_pks: List[int],
rule_package_pk: int,
):
from utilities.misc import PathwayUtils
rules = Package.objects.get(pk=rule_package_pk).get_applicable_rules()
rows: List[Any] = []
header = [
"Package Name",
"Pathway Name",
"Educt Name",
"Educt SMILES",
"Reaction Name",
"Reaction SMIRKS",
"Triggered Rules",
"Reactant SMARTS",
"Product SMARTS",
"Product Names",
"Product SMILES",
]
rows.append(header)
for pw in Pathway.objects.filter(pk__in=pw_pks):
pu = PathwayUtils(pw)
missing_rules = pu.find_missing_rules(rules)
package_name = pw.package.name
pathway_name = pw.name
for edge_url, rule_chain in missing_rules.items():
row: List[Any] = [package_name, pathway_name]
edge = Edge.objects.get(url=edge_url)
educts = edge.start_nodes.all()
for educt in educts:
row.append(educt.default_node_label.name)
row.append(educt.default_node_label.smiles)
row.append(edge.edge_label.name)
row.append(edge.edge_label.smirks())
rule_names = []
reactant_smarts = []
product_smarts = []
for r in rule_chain:
r = Rule.objects.get(url=r[0])
rule_names.append(r.name)
rs = r.reactants_smarts
if isinstance(rs, set):
rs = list(rs)
ps = r.products_smarts
if isinstance(ps, set):
ps = list(ps)
reactant_smarts.append(rs)
product_smarts.append(ps)
row.append(rule_names)
row.append(reactant_smarts)
row.append(product_smarts)
products = edge.end_nodes.all()
product_names = []
product_smiles = []
for product in products:
product_names.append(product.default_node_label.name)
product_smiles.append(product.default_node_label.smiles)
row.append(product_names)
row.append(product_smiles)
rows.append(row)
buffer = io.StringIO()
writer = csv.writer(buffer)
writer.writerows(rows)
buffer.seek(0)
return buffer.getvalue()

View File

@ -1,8 +1,21 @@
from django import template
from pydantic import AnyHttpUrl, ValidationError
from pydantic.type_adapter import TypeAdapter
register = template.Library()
url_adapter = TypeAdapter(AnyHttpUrl)
@register.filter
def classname(obj):
return obj.__class__.__name__
@register.filter
def is_url(value):
try:
url_adapter.validate_python(value)
return True
except ValidationError:
return False

View File

@ -190,6 +190,7 @@ urlpatterns = [
re_path(r"^indigo/dearomatize$", v.dearomatize, name="indigo_dearomatize"),
re_path(r"^indigo/layout$", v.layout, name="indigo_layout"),
re_path(r"^depict$", v.depict, name="depict"),
re_path(r"^jobs", v.jobs, name="jobs"),
# OAuth Stuff
path("o/userinfo/", v.userinfo, name="oauth_userinfo"),
]

View File

@ -1,11 +1,11 @@
import json
import logging
from typing import List, Dict, Any
from typing import Any, Dict, List
from django.conf import settings as s
from django.contrib.auth import get_user_model
from django.http import JsonResponse, HttpResponse, HttpResponseNotAllowed, HttpResponseBadRequest
from django.shortcuts import render, redirect
from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseNotAllowed, JsonResponse
from django.shortcuts import redirect, render
from django.urls import reverse
from django.views.decorators.csrf import csrf_exempt
from envipy_additional_information import NAME_MAPPING
@ -14,41 +14,43 @@ from oauth2_provider.decorators import protected_resource
from utilities.chem import FormatConverter, IndigoUtils
from utilities.decorators import package_permission_required
from utilities.misc import HTMLGenerator
from .logic import (
EPDBURLParser,
GroupManager,
PackageManager,
UserManager,
SettingManager,
SearchManager,
EPDBURLParser,
SettingManager,
UserManager,
)
from .models import (
Package,
GroupPackagePermission,
Group,
CompoundStructure,
APIToken,
Compound,
CompoundStructure,
Edge,
EnviFormer,
EnzymeLink,
EPModel,
ExternalDatabase,
ExternalIdentifier,
Group,
GroupPackagePermission,
JobLog,
License,
MLRelativeReasoning,
Node,
Pathway,
Permission,
Reaction,
Rule,
Pathway,
Node,
EPModel,
EnviFormer,
MLRelativeReasoning,
RuleBasedRelativeReasoning,
Scenario,
SimpleAmbitRule,
APIToken,
UserPackagePermission,
Permission,
License,
User,
Edge,
ExternalDatabase,
ExternalIdentifier,
EnzymeLink,
UserPackagePermission,
)
Package = s.GET_PACKAGE_MODEL()
logger = logging.getLogger(__name__)
@ -81,8 +83,7 @@ def login(request):
return render(request, "static/login.html", context)
elif request.method == "POST":
from django.contrib.auth import authenticate
from django.contrib.auth import login
from django.contrib.auth import authenticate, login
username = request.POST.get("username")
password = request.POST.get("password")
@ -754,8 +755,8 @@ def package_models(request, package_uuid):
context["unreviewed_objects"] = unreviewed_model_qs
context["model_types"] = {
"ML Relative Reasoning": "ml-relative-reasoning",
"Rule Based Relative Reasoning": "rule-based-relative-reasoning",
"ML Relative Reasoning": "mlrr",
"Rule Based Relative Reasoning": "rbrr",
}
if s.FLAGS.get("ENVIFORMER", False):
@ -775,48 +776,40 @@ def package_models(request, package_uuid):
model_type = request.POST.get("model-type")
if model_type == "enviformer":
threshold = float(request.POST.get(f"{model_type}-threshold", 0.5))
mod = EnviFormer.create(current_package, name, description, threshold)
elif model_type == "ml-relative-reasoning" or model_type == "rule-based-relative-reasoning":
# Generic fields for ML and Rule Based
rule_packages = request.POST.getlist("package-based-relative-reasoning-rule-packages")
data_packages = request.POST.getlist("package-based-relative-reasoning-data-packages")
eval_packages = request.POST.getlist(
"package-based-relative-reasoning-evaluation-packages", []
)
rule_packages = request.POST.getlist("model-rule-packages")
data_packages = request.POST.getlist("model-data-packages")
# Generic params
params = {
"package": current_package,
"name": name,
"description": description,
"rule_packages": [
PackageManager.get_package_by_url(current_user, p) for p in rule_packages
],
"data_packages": [
PackageManager.get_package_by_url(current_user, p) for p in data_packages
],
"eval_packages": [
PackageManager.get_package_by_url(current_user, p) for p in eval_packages
],
}
if model_type == "ml-relative-reasoning":
if model_type == "enviformer":
threshold = float(request.POST.get("model-threshold", 0.5))
params["threshold"] = threshold
mod = EnviFormer.create(**params)
elif model_type == "mlrr":
# ML Specific
threshold = float(request.POST.get(f"{model_type}-threshold", 0.5))
threshold = float(request.POST.get("model-threshold", 0.5))
# TODO handle additional fingerprinter
# fingerprinter = request.POST.get(f"{model_type}-fingerprinter")
# fingerprinter = request.POST.get("model-fingerprinter")
params["rule_packages"] = [
PackageManager.get_package_by_url(current_user, p) for p in rule_packages
]
# App Domain related parameters
build_ad = request.POST.get("build-app-domain", False) == "on"
num_neighbors = request.POST.get("num-neighbors", 5)
reliability_threshold = request.POST.get("reliability-threshold", 0.5)
local_compatibility_threshold = request.POST.get(
"local-compatibility-threshold", 0.5
)
local_compatibility_threshold = request.POST.get("local-compatibility-threshold", 0.5)
params["threshold"] = threshold
# params['fingerprinter'] = fingerprinter
@ -826,18 +819,24 @@ def package_models(request, package_uuid):
params["app_domain_local_compatibility_threshold"] = local_compatibility_threshold
mod = MLRelativeReasoning.create(**params)
else:
elif model_type == "rbrr":
params["rule_packages"] = [
PackageManager.get_package_by_url(current_user, p) for p in rule_packages
]
mod = RuleBasedRelativeReasoning.create(**params)
from .tasks import build_model
build_model.delay(mod.pk)
elif s.FLAGS.get("PLUGINS", False) and model_type in s.CLASSIFIER_PLUGINS.values():
pass
else:
return error(
request, "Invalid model type.", f'Model type "{model_type}" is not supported."'
)
return redirect(mod.url)
from .tasks import build_model, dispatch
dispatch(current_user, build_model, mod.pk)
return redirect(mod.url)
else:
return HttpResponseNotAllowed(["GET", "POST"])
@ -865,6 +864,10 @@ def package_model(request, package_uuid, model_uuid):
return JsonResponse({"error": f'"{smiles}" is not a valid SMILES'}, status=400)
if classify:
from epdb.tasks import dispatch_eager, predict_simple
res = dispatch_eager(current_user, predict_simple, current_model.pk, stand_smiles)
pred_res = current_model.predict(stand_smiles)
res = []
@ -909,9 +912,25 @@ def package_model(request, package_uuid, model_uuid):
current_model.delete()
return redirect(current_package.url + "/model")
elif hidden == "evaluate":
from .tasks import evaluate_model
from .tasks import dispatch, evaluate_model
eval_type = request.POST.get("model-evaluation-type")
if eval_type not in ["sg", "mg"]:
return error(
request,
"Invalid evaluation type",
f'Evaluation type "{eval_type}" is not supported. Only "sg" and "mg" are supported.',
)
multigen = eval_type == "mg"
eval_packages = request.POST.getlist("model-evaluation-packages")
eval_package_ids = [
PackageManager.get_package_by_url(current_user, p).id for p in eval_packages
]
dispatch(current_user, evaluate_model, current_model.pk, multigen, eval_package_ids)
evaluate_model.delay(current_model.pk)
return redirect(current_model.url)
else:
return HttpResponseBadRequest()
@ -1251,7 +1270,16 @@ def package_compound_structures(request, package_uuid, compound_uuid):
structure_smiles = request.POST.get("structure-smiles")
structure_description = request.POST.get("structure-description")
cs = current_compound.add_structure(structure_smiles, structure_name, structure_description)
try:
cs = current_compound.add_structure(
structure_smiles, structure_name, structure_description
)
except ValueError:
return error(
request,
"Adding structure failed!",
"The structure could not be added as normalized structures don't match!",
)
return redirect(cs.url)
@ -1800,9 +1828,9 @@ def package_pathways(request, package_uuid):
pw.setting = prediction_setting
pw.save()
from .tasks import predict
from .tasks import dispatch, predict
predict.delay(pw.pk, prediction_setting.pk, limit=limit)
dispatch(current_user, predict, pw.pk, prediction_setting.pk, limit=limit)
return redirect(pw.url)
@ -1838,6 +1866,25 @@ def package_pathway(request, package_uuid, pathway_uuid):
return response
if (
request.GET.get("identify-missing-rules", False) == "true"
and request.GET.get("rule-package") is not None
):
from .tasks import dispatch_eager, identify_missing_rules
rule_package = PackageManager.get_package_by_url(
current_user, request.GET.get("rule-package")
)
res = dispatch_eager(
current_user, identify_missing_rules, [current_pathway.pk], rule_package.pk
)
filename = f"{current_pathway.name.replace(' ', '_')}_{current_pathway.uuid}.csv"
response = HttpResponse(res, content_type="text/csv")
response["Content-Disposition"] = f'attachment; filename="{filename}"'
return response
# Pathway d3_json() relies on a lot of related objects (Nodes, Structures, Edges, Reaction, Rules, ...)
# we will again fetch the current pathway identified by this url, but this time together with nearly all
# related objects
@ -1921,10 +1968,16 @@ def package_pathway(request, package_uuid, pathway_uuid):
if node_url:
n = current_pathway.get_node(node_url)
from .tasks import predict
from .tasks import dispatch, predict
dispatch(
current_user,
predict,
current_pathway.pk,
current_pathway.setting.pk,
node_pk=n.pk,
)
# Dont delay?
predict(current_pathway.pk, current_pathway.setting.pk, node_pk=n.pk)
return JsonResponse({"success": current_pathway.url})
return HttpResponseBadRequest()
@ -2272,9 +2325,9 @@ def package_scenarios(request, package_uuid):
context["unreviewed_objects"] = unreviewed_scenario_qs
from envipy_additional_information import (
SEDIMENT_ADDITIONAL_INFORMATION,
SLUDGE_ADDITIONAL_INFORMATION,
SOIL_ADDITIONAL_INFORMATION,
SEDIMENT_ADDITIONAL_INFORMATION,
)
context["scenario_types"] = {
@ -2696,6 +2749,24 @@ def setting(request, setting_uuid):
pass
def jobs(request):
current_user = _anonymous_or_real(request)
context = get_base_context(request)
if request.method == "GET":
context["object_type"] = "joblog"
context["breadcrumbs"] = [
{"Home": s.SERVER_URL},
{"Jobs": s.SERVER_URL + "/jobs"},
]
if current_user.is_superuser:
context["jobs"] = JobLog.objects.all().order_by("-created")
else:
context["jobs"] = JobLog.objects.filter(user=current_user).order_by("-created")
return render(request, "collections/joblog.html", context)
###########
# KETCHER #
###########

Binary file not shown.

View File

@ -1,22 +1,19 @@
import gzip
import json
import logging
import os.path
from datetime import datetime
from django.conf import settings as s
from django.http import HttpResponseNotAllowed
from django.shortcuts import render
from epdb.logic import PackageManager
from epdb.models import Rule, SimpleAmbitRule, Package, CompoundStructure
from epdb.views import get_base_context, _anonymous_or_real
from utilities.chem import FormatConverter
from rdkit import Chem
from rdkit.Chem.MolStandardize import rdMolStandardize
from epdb.models import CompoundStructure, Rule, SimpleAmbitRule
from epdb.views import get_base_context
from utilities.chem import FormatConverter
Package = s.GET_PACKAGE_MODEL()
logger = logging.getLogger(__name__)
@ -59,9 +56,7 @@ def run_both_engines(SMILES, SMIRKS):
set(
[
normalize_smiles(str(x))
for x in FormatConverter.sanitize_smiles(
[str(s) for s in all_rdkit_prods]
)[0]
for x in FormatConverter.sanitize_smiles([str(s) for s in all_rdkit_prods])[0]
]
)
)
@ -85,8 +80,7 @@ def migration(request):
url="http://localhost:8000/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1"
)
ALL_SMILES = [
cs.smiles
for cs in CompoundStructure.objects.filter(compound__package=BBD)
cs.smiles for cs in CompoundStructure.objects.filter(compound__package=BBD)
]
RULES = SimpleAmbitRule.objects.filter(package=BBD)
@ -142,9 +136,7 @@ def migration(request):
)
for r in migration_status["results"]:
r["detail_url"] = r["detail_url"].replace(
"http://localhost:8000", s.SERVER_URL
)
r["detail_url"] = r["detail_url"].replace("http://localhost:8000", s.SERVER_URL)
context.update(**migration_status)
@ -152,8 +144,6 @@ def migration(request):
def migration_detail(request, package_uuid, rule_uuid):
current_user = _anonymous_or_real(request)
if request.method == "GET":
context = get_base_context(request)
@ -235,9 +225,7 @@ def compare(request):
context["smirks"] = (
"[#1,#6:6][#7;X3;!$(NC1CC1)!$([N][C]=O)!$([!#8]CNC=O):1]([#1,#6:7])[#6;A;X4:2][H:3]>>[#1,#6:6][#7;X3:1]([#1,#6:7])[H:3].[#6;A:2]=O"
)
context["smiles"] = (
"C(CC(=O)N[C@@H](CS[Se-])C(=O)NCC(=O)[O-])[C@@H](C(=O)[O-])N"
)
context["smiles"] = "C(CC(=O)N[C@@H](CS[Se-])C(=O)NCC(=O)[O-])[C@@H](C(=O)[O-])N"
return render(request, "compare.html", context)
elif request.method == "POST":

0
public/__init__.py Normal file
View File

1
public/admin.py Normal file
View File

@ -0,0 +1 @@
# Register your models here.

6
public/apps.py Normal file
View File

@ -0,0 +1,6 @@
from django.apps import AppConfig
class PublicConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "public"

View File

@ -0,0 +1,56 @@
# Generated by Django 5.2.7 on 2025-10-29 13:32
import django.utils.timezone
import model_utils.fields
import uuid
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name="Package",
fields=[
(
"id",
models.BigAutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
(
"created",
model_utils.fields.AutoCreatedField(
default=django.utils.timezone.now, editable=False, verbose_name="created"
),
),
(
"modified",
model_utils.fields.AutoLastModifiedField(
default=django.utils.timezone.now, editable=False, verbose_name="modified"
),
),
(
"uuid",
models.UUIDField(
default=uuid.uuid4, unique=True, verbose_name="UUID of this object"
),
),
("name", models.TextField(default="no name", verbose_name="Name")),
(
"description",
models.TextField(default="no description", verbose_name="Descriptions"),
),
("url", models.TextField(null=True, unique=True, verbose_name="URL")),
("kv", models.JSONField(blank=True, default=dict, null=True)),
("reviewed", models.BooleanField(default=False, verbose_name="Reviewstatus")),
],
options={
"db_table": "epdb_package",
"managed": False,
},
),
]

View File

@ -0,0 +1,16 @@
# Generated by Django 5.2.7 on 2025-10-29 18:39
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("public", "0001_initial"),
]
operations = [
migrations.AlterModelOptions(
name="package",
options={},
),
]

View File

@ -0,0 +1,25 @@
# Generated by Django 5.2.7 on 2025-10-29 18:40
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("epdb", "0010_alter_userpackagepermission_package_and_more"),
("public", "0002_alter_package_options"),
]
operations = [
migrations.AddField(
model_name="package",
name="license",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="epdb.license",
verbose_name="License",
),
),
]

View File

6
public/models.py Normal file
View File

@ -0,0 +1,6 @@
from epdb.models import AbstractPackage
class Package(AbstractPackage):
class Meta:
db_table = "epdb_package"

1
public/tests.py Normal file
View File

@ -0,0 +1 @@
# Create your tests here.

1
public/views.py Normal file
View File

@ -0,0 +1 @@
# Create your views here.

View File

@ -22,6 +22,10 @@
<i class="glyphicon glyphicon-floppy-save"></i> Download Pathway as Image</a>
</li>
{% if meta.can_edit %}
<li>
<a class="button" data-toggle="modal" data-target="#identify_missing_rules_modal">
<i class="glyphicon glyphicon-question-sign"></i> Identify Missing Rules</a>
</li>
<li role="separator" class="divider"></li>
<li>
<a class="button" data-toggle="modal" data-target="#edit_pathway_modal">

View File

@ -0,0 +1,71 @@
{% extends "framework.html" %}
{% load static %}
{% load envipytags %}
{% block content %}
<div class="panel-group" id="reviewListAccordion">
<div class="panel panel-default">
<div class="panel-heading" id="headingPanel" style="font-size:2rem;height: 46px">
Jobs
</div>
<div class="panel-body">
<p>
Job Logs Desc
</p>
</div>
<div class="panel panel-default panel-heading list-group-item" style="background-color:silver">
<h4 class="panel-title">
<a id="job-accordion-link" data-toggle="collapse" data-parent="#job-accordion" href="#jobs">
Jobs
</a>
</h4>
</div>
<div id="jobs"
class="panel-collapse collapse in">
<div class="panel-body list-group-item" id="job-content">
<table class="table table-bordered table-hover">
<tr style="background-color: rgba(0, 0, 0, 0.08);">
<th scope="col">ID</th>
<th scope="col">Name</th>
<th scope="col">Status</th>
<th scope="col">Queued</th>
<th scope="col">Done</th>
<th scope="col">Result</th>
</tr>
<tbody>
{% for job in jobs %}
<tr>
<td>{{ job.task_id }}</td>
<td>{{ job.job_name }}</td>
<td>{{ job.status }}</td>
<td>{{ job.created }}</td>
<td>{{ job.done_at }}</td>
{% if job.task_result and job.task_result|is_url == True %}
<td><a href="{{ job.task_result }}">Result</a></td>
{% elif job.task_result %}
<td>{{ job.task_result|slice:"40" }}...</td>
{% else %}
<td>Empty</td>
{% endif %}
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
<!-- Unreviewable objects such as User / Group / Setting -->
<ul class='list-group'>
{% for obj in objects %}
{% if object_type == 'user' %}
<a class="list-group-item" href="{{ obj.url }}">{{ obj.username }}</a>
{% else %}
<a class="list-group-item" href="{{ obj.url }}">{{ obj.name }}</a>
{% endif %}
{% endfor %}
</ul>
</div>
</div>
{% endblock content %}

View File

@ -18,13 +18,19 @@
prediction. You just need to set a name and the packages
you want the object to be based on. There are multiple types of models available.
For additional information have a look at our
<a target="_blank" href="https://wiki.envipath.org/index.php/relative-reasoning" role="button">wiki &gt;&gt;</a>
<a target="_blank" href="https://wiki.envipath.org/index.php/relative-reasoning" role="button">wiki
&gt;&gt;</a>
</div>
<!-- Name -->
<label for="model-name">Name</label>
<input id="model-name" name="model-name" class="form-control" placeholder="Name"/>
<!-- Description -->
<label for="model-description">Description</label>
<input id="model-description" name="model-description" class="form-control"
placeholder="Description"/>
<!-- Model Type -->
<label for="model-type">Model Type</label>
<select id="model-type" name="model-type" class="form-control" data-width='100%'>
<option disabled selected>Select Model Type</option>
@ -32,12 +38,12 @@
<option value="{{ v }}">{{ k }}</option>
{% endfor %}
</select>
<!-- ML and Rule Based Based Form-->
<div id="package-based-relative-reasoning-specific-form">
<!-- Rule Packages -->
<label for="package-based-relative-reasoning-rule-packages">Rule Packages</label>
<select id="package-based-relative-reasoning-rule-packages" name="package-based-relative-reasoning-rule-packages"
data-actions-box='true' class="form-control" multiple data-width='100%'>
<div id="rule-packages" class="ep-model-param mlrr rbrr">
<label for="model-rule-packages">Rule Packages</label>
<select id="model-rule-packages" name="model-rule-packages" data-actions-box='true'
class="form-control" multiple data-width='100%'>
<option disabled>Reviewed Packages</option>
{% for obj in meta.readable_packages %}
{% if obj.reviewed %}
@ -52,10 +58,13 @@
{% endif %}
{% endfor %}
</select>
</div>
<!-- Data Packages -->
<label for="package-based-relative-reasoning-data-packages" >Data Packages</label>
<select id="package-based-relative-reasoning-data-packages" name="package-based-relative-reasoning-data-packages"
data-actions-box='true' class="form-control" multiple data-width='100%'>
<div id="data-packages" class="ep-model-param mlrr rbrr enviformer">
<label for="model-data-packages">Data Packages</label>
<select id="model-data-packages" name="model-data-packages" data-actions-box='true'
class="form-control" multiple data-width='100%'>
<option disabled>Reviewed Packages</option>
{% for obj in meta.readable_packages %}
{% if obj.reviewed %}
@ -70,32 +79,31 @@
{% endif %}
{% endfor %}
</select>
</div>
<div id="ml-relative-reasoning-specific-form">
<!-- Fingerprinter -->
<label for="ml-relative-reasoning-fingerprinter">Fingerprinter</label>
<select id="ml-relative-reasoning-fingerprinter" name="ml-relative-reasoning-fingerprinter"
class="form-control">
<div id="fingerprinter" class="ep-model-param mlrr">
<label for="model-fingerprinter">Fingerprinter</label>
<select id="model-fingerprinter" name="model-fingerprinter" data-actions-box='true'
class="form-control" multiple data-width='100%'>
<option value="MACCS" selected>MACCS Fingerprinter</option>
</select>
{% if meta.enabled_features.PLUGINS and additional_descriptors %}
<!-- Property Plugins go here -->
<label for="ml-relative-reasoning-additional-fingerprinter">Additional Fingerprinter /
Descriptors</label>
<select id="ml-relative-reasoning-additional-fingerprinter"
name="ml-relative-reasoning-additional-fingerprinter" class="form-control">
<option disabled selected>Select Additional Fingerprinter / Descriptor</option>
{% for k, v in additional_descriptors.items %}
<option value="{{ v }}">{{ k }}</option>
{% endfor %}
</select>
{% endif %}
<label for="ml-relative-reasoning-threshold">Threshold</label>
<input type="number" min="0" max="1" step="0.05" value="0.5"
id="ml-relative-reasoning-threshold"
name="ml-relative-reasoning-threshold" class="form-control">
</select>
</div>
<!-- Threshold -->
<div id="threshold" class="ep-model-param mlrr enviformer">
<label for="model-threshold">Threshold</label>
<input type="number" min="0" max="1" step="0.05" value="0.5" id="model-threshold"
name="model-threshold" class="form-control">
</div>
<div id="appdomain" class="ep-model-param mlrr">
{% if meta.enabled_features.APPLICABILITY_DOMAIN %}
<!-- Build AD? -->
<div class="checkbox">
@ -107,11 +115,13 @@
<div id="ad-params" style="display:none">
<!-- Num Neighbors -->
<label for="num-neighbors">Number of Neighbors</label>
<input id="num-neighbors" name="num-neighbors" type="number" class="form-control" value="5"
<input id="num-neighbors" name="num-neighbors" type="number" class="form-control"
value="5"
step="1" min="0" max="10">
<!-- Local Compatibility -->
<label for="local-compatibility-threshold">Local Compatibility Threshold</label>
<input id="local-compatibility-threshold" name="local-compatibility-threshold" type="number"
<input id="local-compatibility-threshold" name="local-compatibility-threshold"
type="number"
class="form-control" value="0.5" step="0.01" min="0" max="1">
<!-- Reliability -->
<label for="reliability-threshold">Reliability Threshold</label>
@ -120,12 +130,6 @@
</div>
{% endif %}
</div>
<!-- EnviFormer-->
<div id="enviformer-specific-form">
<label for="enviformer-threshold">Threshold</label>
<input type="number" min="0" max="1" step="0.05" value="0.5" id="enviformer-threshold"
name="enviformer-threshold" class="form-control">
</div>
</form>
</div>
<div class="modal-footer">
@ -138,19 +142,22 @@
<script>
$(function () {
// Built in Model Types
var nativeModelTypes = [
"mlrr",
"rbrr",
"enviformer",
]
// Initially hide all "specific" forms
$("div[id$='-specific-form']").each( function() {
$(".ep-model-param").each(function () {
$(this).hide();
});
$('#model-type').selectpicker();
$("#ml-relative-reasoning-fingerprinter").selectpicker();
$("#package-based-relative-reasoning-rule-packages").selectpicker();
$("#package-based-relative-reasoning-data-packages").selectpicker();
$("#package-based-relative-reasoning-evaluation-packages").selectpicker();
if ($('#ml-relative-reasoning-additional-fingerprinter').length > 0) {
$("#ml-relative-reasoning-additional-fingerprinter").selectpicker();
}
$("#model-fingerprinter").selectpicker();
$("#model-rule-packages").selectpicker();
$("#model-data-packages").selectpicker();
$("#build-app-domain").change(function () {
if ($(this).is(":checked")) {
@ -162,18 +169,12 @@ $(function() {
// On change hide all and show only selected
$("#model-type").change(function () {
$("div[id$='-specific-form']").each( function() {
$(this).hide();
});
val = $('option:selected', this).val();
if (val === 'ml-relative-reasoning' || val === 'rule-based-relative-reasoning') {
$("#package-based-relative-reasoning-specific-form").show();
if (val === 'ml-relative-reasoning') {
$("#ml-relative-reasoning-specific-form").show();
}
$('.ep-model-param').hide();
var modelType = $('#model-type').val();
if (nativeModelTypes.indexOf(modelType) !== -1) {
$('.' + modelType).show();
} else {
$("#" + val + "-specific-form").show();
// do nothing
}
});
@ -183,7 +184,4 @@ $(function() {
});
});
</script>

View File

@ -17,10 +17,10 @@
For evaluation, you need to select the packages you want to use.
While the model is evaluating, you can use the model for predictions.
</div>
<!-- Evaluation -->
<label for="relative-reasoning-evaluation-packages">Evaluation Packages</label>
<select id="relative-reasoning-evaluation-packages" name=relative-reasoning-evaluation-packages"
data-actions-box='true' class="form-control" multiple data-width='100%'>
<!-- Evaluation Packages -->
<label for="model-evaluation-packages">Evaluation Packages</label>
<select id="model-evaluation-packages" name="model-evaluation-packages" data-actions-box='true'
class="form-control" multiple data-width='100%'>
<option disabled>Reviewed Packages</option>
{% for obj in meta.readable_packages %}
{% if obj.reviewed %}
@ -35,6 +35,15 @@
{% endif %}
{% endfor %}
</select>
<!-- Eval Type -->
<label for="model-evaluation-type">Evaluation Type</label>
<select id="model-evaluation-type" name="model-evaluation-type" class="form-control">
<option disabled selected>Select evaluation type</option>
<option value="sg">Single Generation</option>
<option value="mg">Multiple Generations</option>
</select>
<input type="hidden" name="hidden" value="evaluate">
</form>
</div>
@ -50,7 +59,7 @@
$(function () {
$("#relative-reasoning-evaluation-packages").selectpicker();
$("#model-evaluation-packages").selectpicker();
$('#evaluate_model_form_submit').on('click', function (e) {
e.preventDefault();

View File

@ -0,0 +1,54 @@
{% load static %}
<!-- Identify Missing Rules -->
<div id="identify_missing_rules_modal" class="modal" tabindex="-1">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<h3 class="modal-title">Identify Missing Rules</h3>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
By clicking on Download we'll search the Pathway for Reactions that are not backed by
a Rule or which can be assembled by chaining two rules.
<form id="identify-missing-rules-modal-form" accept-charset="UTF-8" action="{{ pathway.url }}"
data-remote="true" method="GET">
<label for="rule-package">Select the Rule Package</label>
<select id="rule-package" name="rule-package" data-actions-box='true' class="form-control"
data-width='100%'>
<option disabled>Reviewed Packages</option>
{% for obj in meta.readable_packages %}
{% if obj.reviewed %}
<option value="{{ obj.url }}">{{ obj.name }}</option>
{% endif %}
{% endfor %}
<option disabled>Unreviewed Packages</option>
{% for obj in meta.readable_packages %}
{% if not obj.reviewed %}
<option value="{{ obj.url }}">{{ obj.name }}</option>
{% endif %}
{% endfor %}
</select>
<input type="hidden" name="identify-missing-rules" value="true"/>
</form>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
<button type="button" class="btn btn-primary" id="identify-missing-rules-modal-submit">Download</button>
</div>
</div>
</div>
</div>
<script>
$(function () {
$('#identify-missing-rules-modal-submit').click(function (e) {
e.preventDefault();
$('#identify-missing-rules-modal-form').submit();
$('#identify_missing_rules_modal').modal('hide');
});
})
</script>

View File

@ -183,7 +183,7 @@
</div>
<div id="compound-external-identifier" class="panel-collapse collapse in">
<div class="panel-body list-group-item">
{% if compound.get_pubchem_identifiers %}
{% if compound.get_pubchem_compound_identifiers %}
<div class="panel panel-default panel-heading list-group-item"
style="background-color:silver">
<h4 class="panel-title">
@ -193,12 +193,28 @@
</h4>
</div>
<div id="compound-pubchem-identifier" class="panel-collapse collapse in">
{% for eid in compound.get_pubchem_identifiers %}
{% for eid in compound.get_pubchem_compound_identifiers %}
<a class="list-group-item"
href="{{ eid.external_url }}">CID{{ eid.identifier_value }}</a>
{% endfor %}
</div>
{% endif %}
{% if compound.get_pubchem_substance_identifiers %}
<div class="panel panel-default panel-heading list-group-item"
style="background-color:silver">
<h4 class="panel-title">
<a id="compound-pubchem-identifier-link" data-toggle="collapse"
data-parent="#compound-external-identifier"
href="#compound-pubchem-identifier">PubChem Substance Identifier</a>
</h4>
</div>
<div id="compound-pubchem-identifier" class="panel-collapse collapse in">
{% for eid in compound.get_pubchem_substance_identifiers %}
<a class="list-group-item"
href="{{ eid.external_url }}">SID{{ eid.identifier_value }}</a>
{% endfor %}
</div>
{% endif %}
{% if compound.get_chebi_identifiers %}
<div class="panel panel-default panel-heading list-group-item"
style="background-color:silver">

View File

@ -117,7 +117,7 @@
<!-- End Predict Panel -->
{% endif %}
{% if model.app_domain %}
{% if model.ready_for_prediction and model.app_domain %}
<!-- App Domain -->
<div class="panel panel-default panel-heading list-group-item" style="background-color:silver">
<h4 class="panel-title">

View File

@ -83,6 +83,7 @@
{% include "modals/objects/add_pathway_edge_modal.html" %}
{% include "modals/objects/download_pathway_csv_modal.html" %}
{% include "modals/objects/download_pathway_image_modal.html" %}
{% include "modals/objects/identify_missing_rules_modal.html" %}
{% include "modals/objects/generic_copy_object_modal.html" %}
{% include "modals/objects/edit_pathway_modal.html" %}
{% include "modals/objects/generic_set_aliases_modal.html" %}

View File

@ -1,7 +1,32 @@
from collections import defaultdict
from datetime import datetime
from tempfile import TemporaryDirectory
from django.conf import settings as s
from django.test import TestCase, tag
from epdb.logic import PackageManager
from epdb.models import User, EnviFormer, Package
from epdb.models import EnviFormer, Setting, User
from epdb.tasks import predict, predict_simple
Package = s.GET_PACKAGE_MODEL()
def measure_predict(mod, pathway_pk=None):
# Measure and return the prediction time
start = datetime.now()
if pathway_pk:
s = Setting()
s.model = mod
s.model_threshold = 0.2
s.max_depth = 4
s.max_nodes = 20
s.save()
pred_result = predict.delay(pathway_pk, s.pk, limit=s.max_depth)
else:
pred_result = predict_simple.delay(mod.pk, "C1=CC=C(CSCC2=CC=CC=C2)C=C1")
_ = pred_result.get()
return round((datetime.now() - start).total_seconds(), 2)
@tag("slow")
@ -28,8 +53,41 @@ class EnviFormerTest(TestCase):
mod.build_dataset()
mod.build_model()
mod.multigen_eval = True
mod.save()
mod.evaluate_model()
mod.evaluate_model(True, eval_packages_objs)
mod.predict("CCN(CC)C(=O)C1=CC(=CC=C1)C")
def test_predict_runtime(self):
with TemporaryDirectory() as tmpdir:
with self.settings(MODEL_DIR=tmpdir):
threshold = float(0.5)
data_package_objs = [self.BBD_SUBSET]
eval_packages_objs = [self.BBD_SUBSET]
mods = []
for _ in range(4):
mod = EnviFormer.create(
self.package, data_package_objs, eval_packages_objs, threshold=threshold
)
mod.build_dataset()
mod.build_model()
mods.append(mod)
# Test prediction time drops after first prediction
times = [measure_predict(mods[0]) for _ in range(5)]
print(f"First prediction took {times[0]} seconds, subsequent ones took {times[1:]}")
# Test pathway prediction
times = [measure_predict(mods[1], self.BBD_SUBSET.pathways[0].pk) for _ in range(5)]
print(
f"First pathway prediction took {times[0]} seconds, subsequent ones took {times[1:]}"
)
# Test eviction by performing three prediction with every model, twice.
times = defaultdict(list)
for _ in range(
2
): # Eviction should cause the second iteration here to have to reload the models
for mod in mods:
for _ in range(3):
times[mod.pk].append(measure_predict(mod))
print(times)

View File

@ -1,10 +1,13 @@
from tempfile import TemporaryDirectory
import numpy as np
from django.conf import settings as s
from django.test import TestCase
from epdb.logic import PackageManager
from epdb.models import User, MLRelativeReasoning, Package
from epdb.models import MLRelativeReasoning, User
Package = s.GET_PACKAGE_MODEL()
class ModelTest(TestCase):
@ -30,7 +33,6 @@ class ModelTest(TestCase):
self.package,
rule_package_objs,
data_package_objs,
eval_packages_objs,
threshold=threshold,
name="ECC - BBD - 0.5",
description="Created MLRelativeReasoning in Testcase",
@ -50,9 +52,7 @@ class ModelTest(TestCase):
mod.build_dataset()
mod.build_model()
mod.multigen_eval = True
mod.save()
mod.evaluate_model()
mod.evaluate_model(True, eval_packages_objs)
results = mod.predict("CCN(CC)C(=O)C1=CC(=CC=C1)C")

View File

@ -1,8 +1,12 @@
from django.conf import settings as s
from django.test import TestCase
from networkx.utils.misc import graphs_equal
from epdb.logic import PackageManager, SPathway
from epdb.models import Pathway, User, Package
from utilities.ml import multigen_eval, pathway_edit_eval, graph_from_pathway
from epdb.models import Pathway, User
from utilities.ml import graph_from_pathway, multigen_eval, pathway_edit_eval
Package = s.GET_PACKAGE_MODEL()
class MultiGenTest(TestCase):

View File

@ -1,9 +1,10 @@
from unittest.mock import patch, MagicMock, PropertyMock
from unittest.mock import MagicMock, PropertyMock, patch
from django.conf import settings as s
from django.test import TestCase
from epdb.logic import PackageManager
from epdb.models import User, SimpleAmbitRule
from epdb.models import SimpleAmbitRule, User
class SimpleAmbitRuleTest(TestCase):
@ -209,7 +210,7 @@ class SimpleAmbitRuleTest(TestCase):
self.assertEqual(rule.products_smarts, expected_products)
@patch("epdb.models.Package.objects")
@patch(f"{s.PACKAGE_MODULE_PATH}.objects")
def test_related_reactions_property(self, mock_package_objects):
"""Test related_reactions property returns correct queryset."""
mock_qs = MagicMock()

View File

@ -1,12 +1,14 @@
from django.conf import settings as s
from django.test import TestCase, override_settings
from django.urls import reverse
from django.conf import settings as s
from epdb.logic import UserManager
from epdb.models import Package, User
from epdb.models import User
Package = s.GET_PACKAGE_MODEL()
@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models")
@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True)
class PathwayViewTest(TestCase):
fixtures = ["test_fixtures_incl_model.jsonl.gz"]

View File

@ -4,7 +4,9 @@ from django.test import TestCase, tag
from django.urls import reverse
from epdb.logic import UserManager
from epdb.models import Package, UserPackagePermission, Permission, GroupPackagePermission, Group
from epdb.models import Group, GroupPackagePermission, Permission, UserPackagePermission
Package = s.GET_PACKAGE_MODEL()
class PackageViewTest(TestCase):

View File

@ -1,12 +1,14 @@
from django.conf import settings as s
from django.test import TestCase, override_settings
from django.urls import reverse
from django.conf import settings as s
from epdb.logic import UserManager, PackageManager
from epdb.models import Pathway, Edge
from epdb.logic import PackageManager, UserManager
from epdb.models import Edge, Pathway
Package = s.GET_PACKAGE_MODEL()
@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models")
@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True)
class PathwayViewTest(TestCase):
fixtures = ["test_fixtures_incl_model.jsonl.gz"]

View File

@ -1,9 +1,12 @@
from django.conf import settings as s
from django.test import TestCase
from django.urls import reverse
from envipy_additional_information import Temperature, Interval
from envipy_additional_information import Interval, Temperature
from epdb.logic import UserManager, PackageManager
from epdb.models import Reaction, Scenario, ExternalDatabase
from epdb.logic import PackageManager, UserManager
from epdb.models import ExternalDatabase, Reaction, Scenario
Package = s.GET_PACKAGE_MODEL()
class ReactionViewTest(TestCase):

View File

@ -1,8 +1,11 @@
from django.conf import settings as s
from django.test import TestCase
from django.urls import reverse
from epdb.logic import PackageManager
from epdb.models import Package, User
from django.urls import reverse
from epdb.models import User
Package = s.GET_PACKAGE_MODEL()
class UserViewTest(TestCase):

View File

@ -185,7 +185,7 @@ class FormatConverter(object):
return smiles
@staticmethod
def standardize(smiles, remove_stereo=False):
def standardize(smiles, remove_stereo=False, canonicalize_tautomers=False):
# Taken from https://bitsilla.com/blog/2021/06/standardizing-a-molecule-using-rdkit/
# follows the steps in
# https://github.com/greglandrum/RSC_OpenScience_Standardization_202104/blob/main/MolStandardize%20pieces.ipynb
@ -203,19 +203,21 @@ class FormatConverter(object):
uncharger = (
rdMolStandardize.Uncharger()
) # annoying, but necessary as no convenience method exists
uncharged_parent_clean_mol = uncharger.uncharge(parent_clean_mol)
res_mol = uncharger.uncharge(parent_clean_mol)
# note that no attempt is made at reionization at this step
# nor at ionization at some pH (rdkit has no pKa caculator)
# the main aim to to represent all molecules from different sources
# in a (single) standard way, for use in ML, catalogue, etc.
# te = rdMolStandardize.TautomerEnumerator() # idem
# taut_uncharged_parent_clean_mol = te.Canonicalize(uncharged_parent_clean_mol)
if remove_stereo:
Chem.RemoveStereochemistry(uncharged_parent_clean_mol)
Chem.RemoveStereochemistry(res_mol)
return Chem.MolToSmiles(uncharged_parent_clean_mol, kekuleSmiles=True)
if canonicalize_tautomers:
te = rdMolStandardize.TautomerEnumerator() # idem
res_mol = te.Canonicalize(res_mol)
return Chem.MolToSmiles(res_mol, kekuleSmiles=True)
@staticmethod
def neutralize_smiles(smiles):
@ -363,6 +365,76 @@ class FormatConverter(object):
return parsed_smiles, errors
@staticmethod
def smiles_covered_by(
l_smiles: List[str],
r_smiles: List[str],
standardize: bool = True,
canonicalize_tautomers: bool = True,
) -> bool:
"""
Check if all SMILES in the left list are covered by (contained in) the right list.
This function performs a subset check to determine if every chemical structure
represented in l_smiles has a corresponding representation in r_smiles.
Args:
l_smiles (List[str]): List of SMILES strings to check for coverage.
r_smiles (List[str]): List of SMILES strings that should contain all l_smiles.
standardize (bool, optional): Whether to standardize SMILES before comparison.
Defaults to True. When True, applies FormatConverter.standardize() to
normalize representations for accurate comparison.
canonicalize_tautomers (bool, optional): Whether to canonicalize tautomers
Defaults to False. When True, applies rdMolStandardize.TautomerEnumerator().Canonicalize(res_mol)
to the compounds before comparison.
Returns:
bool: True if all SMILES in l_smiles are found in r_smiles (i.e., l_smiles
is a subset of r_smiles), False otherwise.
Note:
- Comparison treats lists as sets, ignoring duplicates and order
- Failed standardization attempts are silently ignored (original SMILES used)
- This is a one-directional check: l_smiles ⊆ r_smiles
- For bidirectional equality, both directions must be checked separately
Example:
>>> FormatConverter.smiles_covered_by(["CCO", "CC"], ["CCO", "CC", "CCC"])
True
>>> FormatConverter.smiles_covered_by(["CCO", "CCCC"], ["CCO", "CC", "CCC"])
False
"""
standardized_l_smiles = []
if standardize:
for smi in l_smiles:
try:
smi = FormatConverter.standardize(
smi, canonicalize_tautomers=canonicalize_tautomers
)
except Exception:
# :shrug:
# logger.debug(f'Standardizing SMILES failed for {smi}')
pass
standardized_l_smiles.append(smi)
else:
standardized_l_smiles = l_smiles
standardized_r_smiles = []
if standardize:
for smi in r_smiles:
try:
smi = FormatConverter.standardize(smi)
except Exception:
# :shrug:
# logger.debug(f'Standardizing SMILES failed for {smi}')
pass
standardized_r_smiles.append(smi)
else:
standardized_r_smiles = r_smiles
return len(set(standardized_l_smiles).difference(set(standardized_r_smiles))) == 0
class Standardizer(ABC):
def __init__(self, name):

View File

@ -1,10 +1,12 @@
# decorators.py
from functools import wraps
from django.conf import settings as s
from django.shortcuts import get_object_or_404
from epdb.logic import PackageManager
from epdb.models import Package
Package = s.GET_PACKAGE_MODEL()
# Map HTTP methods to required permissions
DEFAULT_METHOD_PERMISSIONS = {

View File

@ -9,36 +9,39 @@ from collections import defaultdict
from datetime import datetime
from enum import Enum
from types import NoneType
from typing import Dict, Any, List
from typing import Any, Dict, List
from django.conf import settings as s
from django.db import transaction
from envipy_additional_information import Interval, EnviPyModel
from envipy_additional_information import NAME_MAPPING
from envipy_additional_information import NAME_MAPPING, EnviPyModel, Interval
from pydantic import BaseModel, HttpUrl
from epdb.models import (
Package,
Compound,
CompoundStructure,
SimpleRule,
Edge,
EnviFormer,
EPModel,
ExternalDatabase,
ExternalIdentifier,
License,
MLRelativeReasoning,
Node,
ParallelRule,
Pathway,
PluginModel,
Reaction,
Rule,
RuleBasedRelativeReasoning,
Scenario,
SequentialRule,
SimpleAmbitRule,
SimpleRDKitRule,
ParallelRule,
SequentialRule,
Reaction,
Pathway,
Node,
Edge,
Scenario,
EPModel,
MLRelativeReasoning,
RuleBasedRelativeReasoning,
EnviFormer,
PluginModel,
ExternalIdentifier,
ExternalDatabase,
License,
SimpleRule,
)
from utilities.chem import FormatConverter
Package = s.GET_PACKAGE_MODEL()
logger = logging.getLogger(__name__)
@ -48,7 +51,7 @@ class HTMLGenerator:
@staticmethod
def generate_html(additional_information: "EnviPyModel", prefix="") -> str:
from typing import get_origin, get_args, Union
from typing import Union, get_args, get_origin
if isinstance(additional_information, type):
clz_name = additional_information.__name__
@ -1171,3 +1174,89 @@ class PackageImporter:
url=identifier_data.get("url", ""),
is_primary=identifier_data.get("is_primary", False),
)
class PathwayUtils:
def __init__(self, pathway: "Pathway"):
self.pathway = pathway
@staticmethod
def _get_products(smiles: str, rules: List["Rule"]):
educt_rule_products: Dict[str, Dict[str, List[str]]] = defaultdict(
lambda: defaultdict(list)
)
for r in rules:
product_sets = r.apply(smiles)
for product_set in product_sets:
for product in product_set:
educt_rule_products[smiles][r.url].append(product)
return educt_rule_products
def find_missing_rules(self, rules: List["Rule"]):
print(f"Processing {self.pathway.name}")
# compute products for each node / rule combination in the pathway
educt_rule_products = defaultdict(lambda: defaultdict(list))
for node in self.pathway.nodes:
educt_rule_products.update(**self._get_products(node.default_node_label.smiles, rules))
# loop through edges and determine reactions that can't be constructed by
# any of the rules or a combination of two rules in a chained fashion
res: Dict[str, List["Rule"]] = dict()
for edge in self.pathway.edges:
found = False
reaction = edge.edge_label
educts = [cs for cs in reaction.educts.all()]
products = [cs.smiles for cs in reaction.products.all()]
rule_chain = []
for educt in educts:
educt = educt.smiles
triggered_rules = list(educt_rule_products.get(educt, {}).keys())
for triggered_rule in triggered_rules:
if rule_products := educt_rule_products[educt][triggered_rule]:
# check if this rule covers the reaction
if FormatConverter.smiles_covered_by(
products, rule_products, standardize=True, canonicalize_tautomers=True
):
found = True
else:
# Check if another prediction step would cover the reaction
for product in rule_products:
prod_rule_products = self._get_products(product, rules)
prod_triggered_rules = list(
prod_rule_products.get(product, {}).keys()
)
for prod_triggered_rule in prod_triggered_rules:
if second_step_products := prod_rule_products[product][
prod_triggered_rule
]:
if FormatConverter.smiles_covered_by(
products,
second_step_products,
standardize=True,
canonicalize_tautomers=True,
):
rule_chain.append(
(
triggered_rule,
Rule.objects.get(url=triggered_rule).name,
)
)
rule_chain.append(
(
prod_triggered_rule,
Rule.objects.get(url=prod_triggered_rule).name,
)
)
res[edge.url] = rule_chain
if not found:
res[edge.url] = rule_chain
return res