From 819a94aced7cb65b089d508fac7f8aba1bbab1a4 Mon Sep 17 00:00:00 2001 From: jebus Date: Wed, 22 Oct 2025 01:13:06 +1300 Subject: [PATCH 1/9] [Fix] Catch Exception for Adding Structures / Show PubChem Substances (#168) Fixes #163 Fixes #165 Co-authored-by: Tim Lorsbach Reviewed-on: https://git.envipath.com/enviPath/enviPy/pulls/168 --- epdb/models.py | 13 ++++++++----- epdb/views.py | 11 ++++++++++- templates/objects/compound.html | 20 ++++++++++++++++++-- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/epdb/models.py b/epdb/models.py index a03fcb6d..998503e2 100644 --- a/epdb/models.py +++ b/epdb/models.py @@ -310,7 +310,7 @@ class ExternalDatabase(TimeStampedModel): }, { "database": ExternalDatabase.objects.get(name="ChEBI"), - "placeholder": "ChEBI ID without prefix e.g. 12345", + "placeholder": "ChEBI ID without prefix e.g. 10576", }, ], "structure": [ @@ -328,7 +328,7 @@ class ExternalDatabase(TimeStampedModel): }, { "database": ExternalDatabase.objects.get(name="ChEBI"), - "placeholder": "ChEBI ID without prefix e.g. 12345", + "placeholder": "ChEBI ID without prefix e.g. 10576", }, ], "reaction": [ @@ -342,7 +342,7 @@ class ExternalDatabase(TimeStampedModel): }, { "database": ExternalDatabase.objects.get(name="UniProt"), - "placeholder": "Query ID for UniPro e.g. rhea:12345", + "placeholder": "Query ID for UniProt e.g. rhea:12345", }, ], } @@ -477,7 +477,7 @@ class ChemicalIdentifierMixin(ExternalIdentifierMixin): return self.add_external_identifier("CAS", cas_number) def get_pubchem_identifiers(self): - return self.get_external_identifier("PubChem Compound") or self.get_external_identifier( + return self.get_external_identifier("PubChem Compound") | self.get_external_identifier( "PubChem Substance" ) @@ -3043,6 +3043,7 @@ class EnviFormer(PackageBasedModel): @cached_property def model(self): from enviformer import load + ckpt = os.path.join(s.MODEL_DIR, "enviformer", str(self.uuid), f"{self.uuid}.ckpt") mod = load(device=s.ENVIFORMER_DEVICE, ckpt_path=ckpt) return mod @@ -3062,7 +3063,9 @@ class EnviFormer(PackageBasedModel): start = datetime.now() products_list = self.model.predict_batch(canon_smiles) end = datetime.now() - logger.info(f"Prediction took {(end - start).total_seconds():.2f} seconds. Got results {products_list}") + logger.info( + f"Prediction took {(end - start).total_seconds():.2f} seconds. Got results {products_list}" + ) results = [] for products in products_list: diff --git a/epdb/views.py b/epdb/views.py index 4844d3be..1f73027a 100644 --- a/epdb/views.py +++ b/epdb/views.py @@ -1251,7 +1251,16 @@ def package_compound_structures(request, package_uuid, compound_uuid): structure_smiles = request.POST.get("structure-smiles") structure_description = request.POST.get("structure-description") - cs = current_compound.add_structure(structure_smiles, structure_name, structure_description) + try: + cs = current_compound.add_structure( + structure_smiles, structure_name, structure_description + ) + except ValueError: + return error( + request, + "Adding structure failed!", + "The structure could not be added as normalized structures don't match!", + ) return redirect(cs.url) diff --git a/templates/objects/compound.html b/templates/objects/compound.html index 98083ca6..90e3e1db 100644 --- a/templates/objects/compound.html +++ b/templates/objects/compound.html @@ -183,7 +183,7 @@
- {% if compound.get_pubchem_identifiers %} + {% if compound.get_pubchem_compound_identifiers %}

@@ -193,12 +193,28 @@

- {% for eid in compound.get_pubchem_identifiers %} + {% for eid in compound.get_pubchem_compound_identifiers %} CID{{ eid.identifier_value }} {% endfor %}
{% endif %} + {% if compound.get_pubchem_substance_identifiers %} + +
+ {% for eid in compound.get_pubchem_substance_identifiers %} + SID{{ eid.identifier_value }} + {% endfor %} +
+ {% endif %} {% if compound.get_chebi_identifiers %}
From 8fda2577ee1244d0cfa11795ffc529bcb2dd6aa4 Mon Sep 17 00:00:00 2001 From: jebus Date: Wed, 22 Oct 2025 10:39:22 +1300 Subject: [PATCH 2/9] [Feature] Dump/Restore of enviFormer Models (#170) Dump: `./manage.py dump_enviformer d544303c-a1ca-439d-b036-5e3413ce4a48 --output test.tar.gz` Restore: `./manage.py load_enviformer test.tar.gz 1062eb09-5ec7-4bdd-a8f2-ae0252eb4b06` Co-authored-by: Tim Lorsbach Reviewed-on: https://git.envipath.com/enviPath/enviPy/pulls/170 --- epdb/management/commands/dump_enviformer.py | 59 +++++++++++++++ epdb/management/commands/load_enviformer.py | 81 +++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 epdb/management/commands/dump_enviformer.py create mode 100644 epdb/management/commands/load_enviformer.py diff --git a/epdb/management/commands/dump_enviformer.py b/epdb/management/commands/dump_enviformer.py new file mode 100644 index 00000000..e333248a --- /dev/null +++ b/epdb/management/commands/dump_enviformer.py @@ -0,0 +1,59 @@ +import json +import os +import tarfile +from tempfile import TemporaryDirectory + +from django.conf import settings as s +from django.core.management.base import BaseCommand +from django.db import transaction + +from epdb.models import EnviFormer + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "model", + type=str, + help="Model UUID of the Model to Dump", + ) + parser.add_argument("--output", type=str) + + def package_dict_and_folder(self, dict_data, folder_path, output_path): + with TemporaryDirectory() as tmpdir: + dict_filename = os.path.join(tmpdir, "data.json") + + with open(dict_filename, "w", encoding="utf-8") as f: + json.dump(dict_data, f, indent=2) + + with tarfile.open(output_path, "w:gz") as tar: + tar.add(dict_filename, arcname="data.json") + tar.add(folder_path, arcname=os.path.basename(folder_path)) + + os.remove(dict_filename) + + @transaction.atomic + def handle(self, *args, **options): + output = options["output"] + + if os.path.exists(output): + raise ValueError(f"Output file {output} already exists") + + model = EnviFormer.objects.get(uuid=options["model"]) + + data = { + "uuid": str(model.uuid), + "name": model.name, + "description": model.description, + "kv": model.kv, + "data_packages_uuids": [str(p.uuid) for p in model.data_packages.all()], + "eval_packages_uuids": [str(p.uuid) for p in model.data_packages.all()], + "threshold": model.threshold, + "eval_results": model.eval_results, + "multigen_eval": model.multigen_eval, + "model_status": model.model_status, + } + + model_folder = os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid)) + + self.package_dict_and_folder(data, model_folder, output) diff --git a/epdb/management/commands/load_enviformer.py b/epdb/management/commands/load_enviformer.py new file mode 100644 index 00000000..b2f9c3e3 --- /dev/null +++ b/epdb/management/commands/load_enviformer.py @@ -0,0 +1,81 @@ +import json +import os +import shutil +import tarfile +from tempfile import TemporaryDirectory + +from django.conf import settings as s +from django.core.management.base import BaseCommand +from django.db import transaction + +from epdb.models import EnviFormer, Package + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "input", + type=str, + help=".tar.gz file containing the Model dump.", + ) + parser.add_argument( + "package", + type=str, + help="Package UUID where the Model should be loaded to.", + ) + + def read_dict_and_folder_from_archive(self, archive_path, extract_to="extracted_folder"): + with tarfile.open(archive_path, "r:gz") as tar: + tar.extractall(extract_to) + + dict_path = os.path.join(extract_to, "data.json") + + if not os.path.exists(dict_path): + raise FileNotFoundError("data.json not found in the archive.") + + with open(dict_path, "r", encoding="utf-8") as f: + data_dict = json.load(f) + + extracted_items = os.listdir(extract_to) + folders = [item for item in extracted_items if item != "data.json"] + folder_path = os.path.join(extract_to, folders[0]) if folders else None + + return data_dict, folder_path + + @transaction.atomic + def handle(self, *args, **options): + if not os.path.exists(options["input"]): + raise ValueError(f"Input file {options['input']} does not exist.") + + target_package = Package.objects.get(uuid=options["package"]) + + with TemporaryDirectory() as tmpdir: + data, folder = self.read_dict_and_folder_from_archive(options["input"], tmpdir) + + model = EnviFormer() + model.package = target_package + # model.uuid = data["uuid"] + model.name = data["name"] + model.description = data["description"] + model.kv = data["kv"] + model.threshold = float(data["threshold"]) + model.eval_results = data["eval_results"] + model.multigen_eval = data["multigen_eval"] + model.model_status = data["model_status"] + model.save() + + for p_uuid in data["data_packages_uuids"]: + p = Package.objects.get(uuid=p_uuid) + model.data_packages.add(p) + + for p_uuid in data["eval_packages_uuids"]: + p = Package.objects.get(uuid=p_uuid) + model.eval_packages.add(p) + + target_folder = os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid)) + + shutil.copytree(folder, target_folder) + os.rename( + os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid), f"{data['uuid']}.ckpt"), + os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid), f"{model.uuid}.ckpt"), + ) From 551cfc77684b67a295e37cb243cfe555ed39ad50 Mon Sep 17 00:00:00 2001 From: liambrydon Date: Thu, 23 Oct 2025 06:20:22 +1300 Subject: [PATCH 3/9] [Enhancement] Create ML Models (#173) ## Changes - Ability to change the threshold from a command line argument. - Names of data packages included in model name - Names of data, rule and eval packages included in the model description - EnviFormer models are now viewable on the admin site - Ignore CO2 for training and evaluating EnviFormer Co-authored-by: Liam Brydon <62733830+MyCreativityOutlet@users.noreply.github.com> Reviewed-on: https://git.envipath.com/enviPath/enviPy/pulls/173 Reviewed-by: jebus Co-authored-by: liambrydon Co-committed-by: liambrydon --- epdb/admin.py | 6 ++++ epdb/management/commands/create_ml_models.py | 36 ++++++++++++++------ epdb/models.py | 8 +++-- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/epdb/admin.py b/epdb/admin.py index fefcdc32..1f251cc1 100644 --- a/epdb/admin.py +++ b/epdb/admin.py @@ -7,6 +7,7 @@ from .models import ( GroupPackagePermission, Package, MLRelativeReasoning, + EnviFormer, Compound, CompoundStructure, SimpleAmbitRule, @@ -50,6 +51,10 @@ class MLRelativeReasoningAdmin(EPAdmin): pass +class EnviFormerAdmin(EPAdmin): + pass + + class CompoundAdmin(EPAdmin): pass @@ -104,6 +109,7 @@ admin.site.register(Group, GroupAdmin) admin.site.register(GroupPackagePermission, GroupPackagePermissionAdmin) admin.site.register(Package, PackageAdmin) admin.site.register(MLRelativeReasoning, MLRelativeReasoningAdmin) +admin.site.register(EnviFormer, EnviFormerAdmin) admin.site.register(Compound, CompoundAdmin) admin.site.register(CompoundStructure, CompoundStructureAdmin) admin.site.register(SimpleAmbitRule, SimpleAmbitRuleAdmin) diff --git a/epdb/management/commands/create_ml_models.py b/epdb/management/commands/create_ml_models.py index 8cf3fd55..6c59141d 100644 --- a/epdb/management/commands/create_ml_models.py +++ b/epdb/management/commands/create_ml_models.py @@ -7,10 +7,11 @@ from epdb.models import MLRelativeReasoning, EnviFormer, Package class Command(BaseCommand): """This command can be run with - `python manage.py create_ml_models [model_names] -d [data_packages] OPTIONAL: -e [eval_packages]` - For example, to train both EnviFormer and MLRelativeReasoning on BBD and SOIL and evaluate them on SLUDGE - the below command would be used: - `python manage.py create_ml_models enviformer mlrr -d bbd soil -e sludge + `python manage.py create_ml_models [model_names] -d [data_packages] FOR MLRR ONLY: -r [rule_packages] + OPTIONAL: -e [eval_packages] -t threshold` + For example, to train both EnviFormer and MLRelativeReasoning on BBD and SOIL and evaluate them on SLUDGE with a + threshold of 0.6, the below command would be used: + `python manage.py create_ml_models enviformer mlrr -d bbd soil -e sludge -t 0.6 """ def add_arguments(self, parser): @@ -34,6 +35,13 @@ class Command(BaseCommand): help="Rule Packages mandatory for MLRR", default=[], ) + parser.add_argument( + "-t", + "--threshold", + type=float, + help="Model prediction threshold", + default=0.5, + ) @transaction.atomic def handle(self, *args, **options): @@ -67,7 +75,11 @@ class Command(BaseCommand): return packages # Iteratively create models in options["model_names"] - print(f"Creating models: {options['model_names']}") + print(f"Creating models: {options['model_names']}\n" + f"Data packages: {options['data_packages']}\n" + f"Rule Packages (only for MLRR): {options['rule_packages']}\n" + f"Eval Packages: {options['eval_packages']}\n" + f"Threshold: {options['threshold']:.2f}") data_packages = decode_packages(options["data_packages"]) eval_packages = decode_packages(options["eval_packages"]) rule_packages = decode_packages(options["rule_packages"]) @@ -78,9 +90,10 @@ class Command(BaseCommand): pack, data_packages=data_packages, eval_packages=eval_packages, - threshold=0.5, - name="EnviFormer - T0.5", - description="EnviFormer transformer", + threshold=options['threshold'], + name=f"EnviFormer - {', '.join(options['data_packages'])} - T{options['threshold']:.2f}", + description=f"EnviFormer transformer trained on {options['data_packages']} " + f"evaluated on {options['eval_packages']}.", ) elif model_name == "mlrr": model = MLRelativeReasoning.create( @@ -88,9 +101,10 @@ class Command(BaseCommand): rule_packages=rule_packages, data_packages=data_packages, eval_packages=eval_packages, - threshold=0.5, - name="ECC - BBD - T0.5", - description="ML Relative Reasoning", + threshold=options['threshold'], + name=f"ECC - {', '.join(options['data_packages'])} - T{options['threshold']:.2f}", + description=f"ML Relative Reasoning trained on {options['data_packages']} with rules from " + f"{options['rule_packages']} and evaluated on {options['eval_packages']}.", ) else: raise ValueError(f"Cannot create model of type {model_name}, unknown model type") diff --git a/epdb/models.py b/epdb/models.py index 998503e2..33a0b89b 100644 --- a/epdb/models.py +++ b/epdb/models.py @@ -3092,6 +3092,7 @@ class EnviFormer(PackageBasedModel): start = datetime.now() # Standardise reactions for the training data, EnviFormer ignores stereochemistry currently + co2 = {"C(=O)=O", "O=C=O"} ds = [] for reaction in self._get_reactions(): educts = ".".join( @@ -3106,7 +3107,8 @@ class EnviFormer(PackageBasedModel): for smile in reaction.products.all() ] ) - ds.append(f"{educts}>>{products}") + if products not in co2: + ds.append(f"{educts}>>{products}") end = datetime.now() logger.debug(f"build_dataset took {(end - start).total_seconds()} seconds") @@ -3302,7 +3304,7 @@ class EnviFormer(PackageBasedModel): ds = self.load_dataset() n_splits = 20 - shuff = ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42) + shuff = ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42) # Single gen eval is done in one loop of train then evaluate rather than storing all n_splits trained models # this helps reduce the memory footprint. @@ -3370,7 +3372,7 @@ class EnviFormer(PackageBasedModel): # Compute splits of the collected pathway and evaluate. Like single gen we train and evaluate in each # iteration instead of storing all trained models. for split_id, (train, test) in enumerate( - ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42).split(pathways) + ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42).split(pathways) ): train_pathways = [pathways[i] for i in train] test_pathways = [pathways[i] for i in test] From a952c08469700542db201ab37a5cd1c921369e06 Mon Sep 17 00:00:00 2001 From: jebus Date: Mon, 27 Oct 2025 22:34:05 +1300 Subject: [PATCH 4/9] [Feature] Basic logging of Jobs, Model Evaluation (#169) Co-authored-by: Tim Lorsbach Reviewed-on: https://git.envipath.com/enviPath/enviPy/pulls/169 --- epdb/management/commands/create_ml_models.py | 2 +- epdb/management/commands/update_job_logs.py | 38 +++ epdb/models.py | 85 ++++-- epdb/tasks.py | 121 ++++++++- epdb/templatetags/envipytags.py | 13 + epdb/urls.py | 1 + epdb/views.py | 163 +++++++---- templates/collections/joblog.html | 71 +++++ .../modals/collections/new_model_modal.html | 256 +++++++++--------- .../modals/objects/evaluate_model_modal.html | 21 +- templates/objects/model.html | 2 +- tests/test_enviformer.py | 14 +- tests/test_model.py | 5 +- tests/views/test_model_views.py | 2 +- tests/views/test_pathway_views.py | 2 +- 15 files changed, 556 insertions(+), 240 deletions(-) create mode 100644 epdb/management/commands/update_job_logs.py create mode 100644 templates/collections/joblog.html diff --git a/epdb/management/commands/create_ml_models.py b/epdb/management/commands/create_ml_models.py index 6c59141d..89fbc0ec 100644 --- a/epdb/management/commands/create_ml_models.py +++ b/epdb/management/commands/create_ml_models.py @@ -114,6 +114,6 @@ class Command(BaseCommand): print(f"Training {model_name}") model.build_model() print(f"Evaluating {model_name}") - model.evaluate_model() + model.evaluate_model(False, eval_packages=eval_packages) print(f"Saving {model_name}") model.save() diff --git a/epdb/management/commands/update_job_logs.py b/epdb/management/commands/update_job_logs.py new file mode 100644 index 00000000..a5b17cfa --- /dev/null +++ b/epdb/management/commands/update_job_logs.py @@ -0,0 +1,38 @@ +from datetime import date, timedelta + +from django.core.management.base import BaseCommand +from django.db import transaction + +from epdb.models import JobLog + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--cleanup", + type=int, + default=None, + help="Remove all logs older than this number of days. Default is None, which does not remove any logs.", + ) + + @transaction.atomic + def handle(self, *args, **options): + if options["cleanup"] is not None: + cleanup_dt = date.today() - timedelta(days=options["cleanup"]) + print(JobLog.objects.filter(created__lt=cleanup_dt).delete()) + + logs = JobLog.objects.filter(status="INITIAL") + print(f"Found {logs.count()} logs to update") + updated = 0 + for log in logs: + res = log.check_for_update() + if res: + updated += 1 + + print(f"Updated {updated} logs") + + from django.db.models import Count + + qs = JobLog.objects.values("status").annotate(total=Count("status")) + for r in qs: + print(r["status"], r["total"]) diff --git a/epdb/models.py b/epdb/models.py index 33a0b89b..324fe301 100644 --- a/epdb/models.py +++ b/epdb/models.py @@ -2225,10 +2225,18 @@ class PackageBasedModel(EPModel): self.model_status = self.BUILT_NOT_EVALUATED self.save() - def evaluate_model(self): + def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None): if self.model_status != self.BUILT_NOT_EVALUATED: raise ValueError(f"Can't evaluate a model in state {self.model_status}!") + if multigen: + self.multigen_eval = multigen + self.save() + + if eval_packages is not None: + for p in eval_packages: + self.eval_packages.add(p) + self.model_status = self.EVALUATING self.save() @@ -2525,7 +2533,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel): package: "Package", rule_packages: List["Package"], data_packages: List["Package"], - eval_packages: List["Package"], threshold: float = 0.5, min_count: int = 10, max_count: int = 0, @@ -2574,10 +2581,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel): for p in rule_packages: rbrr.data_packages.add(p) - if eval_packages: - for p in eval_packages: - rbrr.eval_packages.add(p) - rbrr.save() return rbrr @@ -2632,7 +2635,6 @@ class MLRelativeReasoning(PackageBasedModel): package: "Package", rule_packages: List["Package"], data_packages: List["Package"], - eval_packages: List["Package"], threshold: float = 0.5, name: "str" = None, description: str = None, @@ -2672,10 +2674,6 @@ class MLRelativeReasoning(PackageBasedModel): for p in rule_packages: mlrr.data_packages.add(p) - if eval_packages: - for p in eval_packages: - mlrr.eval_packages.add(p) - if build_app_domain: ad = ApplicabilityDomain.create( mlrr, @@ -2995,7 +2993,6 @@ class EnviFormer(PackageBasedModel): def create( package: "Package", data_packages: List["Package"], - eval_packages: List["Package"], threshold: float = 0.5, name: "str" = None, description: str = None, @@ -3028,10 +3025,6 @@ class EnviFormer(PackageBasedModel): for p in data_packages: mod.data_packages.add(p) - if eval_packages: - for p in eval_packages: - mod.eval_packages.add(p) - # if build_app_domain: # ad = ApplicabilityDomain.create(mod, app_domain_num_neighbours, app_domain_reliability_threshold, # app_domain_local_compatibility_threshold) @@ -3144,10 +3137,18 @@ class EnviFormer(PackageBasedModel): args = {"clz": "EnviFormer"} return args - def evaluate_model(self): + def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None): if self.model_status != self.BUILT_NOT_EVALUATED: raise ValueError(f"Can't evaluate a model in state {self.model_status}!") + if multigen: + self.multigen_eval = multigen + self.save() + + if eval_packages is not None: + for p in eval_packages: + self.eval_packages.add(p) + self.model_status = self.EVALUATING self.save() @@ -3671,3 +3672,53 @@ class Setting(EnviPathModel): self.public = True self.global_default = True self.save() + + +class JobLogStatus(models.TextChoices): + INITIAL = "INITIAL", "Initial" + SUCCESS = "SUCCESS", "Success" + FAILURE = "FAILURE", "Failure" + REVOKED = "REVOKED", "Revoked" + IGNORED = "IGNORED", "Ignored" + + +class JobLog(TimeStampedModel): + user = models.ForeignKey("epdb.User", models.CASCADE) + task_id = models.UUIDField(unique=True) + job_name = models.TextField(null=False, blank=False) + status = models.CharField( + max_length=20, + choices=JobLogStatus.choices, + default=JobLogStatus.INITIAL, + ) + + done_at = models.DateTimeField(null=True, blank=True, default=None) + task_result = models.TextField(null=True, blank=True, default=None) + + def check_for_update(self): + async_res = self.get_result() + new_status = async_res.state + + TERMINAL_STATES = [ + "SUCCESS", + "FAILURE", + "REVOKED", + "IGNORED", + ] + + if new_status != self.status and new_status in TERMINAL_STATES: + self.status = new_status + self.done_at = async_res.date_done + + if new_status == "SUCCESS": + self.task_result = async_res.result + + self.save() + + return True + return False + + def get_result(self): + from celery.result import AsyncResult + + return AsyncResult(str(self.task_id)) diff --git a/epdb/tasks.py b/epdb/tasks.py index b9845c86..b6f4e6b0 100644 --- a/epdb/tasks.py +++ b/epdb/tasks.py @@ -1,10 +1,13 @@ import logging -from typing import Optional -from celery.utils.functional import LRUCache -from celery import shared_task -from epdb.models import Pathway, Node, EPModel, Setting -from epdb.logic import SPathway +from datetime import datetime +from typing import Callable, Optional +from uuid import uuid4 +from celery import shared_task +from celery.utils.functional import LRUCache + +from epdb.logic import SPathway +from epdb.models import EPModel, JobLog, Node, Package, Pathway, Setting, User logger = logging.getLogger(__name__) ML_CACHE = LRUCache(3) # Cache the three most recent ML models to reduce load times. @@ -16,6 +19,40 @@ def get_ml_model(model_pk: int): return ML_CACHE[model_pk] +def dispatch_eager(user: "User", job: Callable, *args, **kwargs): + try: + x = job(*args, **kwargs) + log = JobLog() + log.user = user + log.task_id = uuid4() + log.job_name = job.__name__ + log.status = "SUCCESS" + log.done_at = datetime.now() + log.task_result = str(x) if x else None + log.save() + + return x + except Exception as e: + logger.exception(e) + raise e + + +def dispatch(user: "User", job: Callable, *args, **kwargs): + try: + x = job.delay(*args, **kwargs) + log = JobLog() + log.user = user + log.task_id = x.task_id + log.job_name = job.__name__ + log.status = "INITIAL" + log.save() + + return x.result + except Exception as e: + logger.exception(e) + raise e + + @shared_task(queue="background") def mul(a, b): return a * b @@ -33,17 +70,55 @@ def send_registration_mail(user_pk: int): pass -@shared_task(queue="model") -def build_model(model_pk: int): +@shared_task(bind=True, queue="model") +def build_model(self, model_pk: int): mod = EPModel.objects.get(id=model_pk) - mod.build_dataset() - mod.build_model() + + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=mod.url) + + try: + mod.build_dataset() + mod.build_model() + except Exception as e: + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update( + status="FAILED", task_result=mod.url + ) + + raise e + + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=mod.url) + + return mod.url -@shared_task(queue="model") -def evaluate_model(model_pk: int): +@shared_task(bind=True, queue="model") +def evaluate_model(self, model_pk: int, multigen: bool, package_pks: Optional[list] = None): + packages = None + + if package_pks: + packages = Package.objects.filter(pk__in=package_pks) + mod = EPModel.objects.get(id=model_pk) - mod.evaluate_model() + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=mod.url) + + try: + mod.evaluate_model(multigen, eval_packages=packages) + except Exception as e: + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update( + status="FAILED", task_result=mod.url + ) + + raise e + + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=mod.url) + + return mod.url @shared_task(queue="model") @@ -52,9 +127,13 @@ def retrain(model_pk: int): mod.retrain() -@shared_task(queue="predict") +@shared_task(bind=True, queue="predict") def predict( - pw_pk: int, pred_setting_pk: int, limit: Optional[int] = None, node_pk: Optional[int] = None + self, + pw_pk: int, + pred_setting_pk: int, + limit: Optional[int] = None, + node_pk: Optional[int] = None, ) -> Pathway: pw = Pathway.objects.get(id=pw_pk) setting = Setting.objects.get(id=pred_setting_pk) @@ -65,6 +144,9 @@ def predict( pw.kv.update(**{"status": "running"}) pw.save() + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=pw.url) + try: # regular prediction if limit is not None: @@ -89,7 +171,18 @@ def predict( except Exception as e: pw.kv.update({"status": "failed"}) pw.save() + + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update( + status="FAILED", task_result=pw.url + ) + raise e pw.kv.update(**{"status": "completed"}) pw.save() + + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=pw.url) + + return pw.url diff --git a/epdb/templatetags/envipytags.py b/epdb/templatetags/envipytags.py index c8c92fef..6c250e63 100644 --- a/epdb/templatetags/envipytags.py +++ b/epdb/templatetags/envipytags.py @@ -1,8 +1,21 @@ from django import template +from pydantic import AnyHttpUrl, ValidationError +from pydantic.type_adapter import TypeAdapter register = template.Library() +url_adapter = TypeAdapter(AnyHttpUrl) + @register.filter def classname(obj): return obj.__class__.__name__ + + +@register.filter +def is_url(value): + try: + url_adapter.validate_python(value) + return True + except ValidationError: + return False diff --git a/epdb/urls.py b/epdb/urls.py index 391a2f32..25e18680 100644 --- a/epdb/urls.py +++ b/epdb/urls.py @@ -190,6 +190,7 @@ urlpatterns = [ re_path(r"^indigo/dearomatize$", v.dearomatize, name="indigo_dearomatize"), re_path(r"^indigo/layout$", v.layout, name="indigo_layout"), re_path(r"^depict$", v.depict, name="depict"), + re_path(r"^jobs", v.jobs, name="jobs"), # OAuth Stuff path("o/userinfo/", v.userinfo, name="oauth_userinfo"), ] diff --git a/epdb/views.py b/epdb/views.py index 1f73027a..f2fd8c74 100644 --- a/epdb/views.py +++ b/epdb/views.py @@ -47,6 +47,7 @@ from .models import ( ExternalDatabase, ExternalIdentifier, EnzymeLink, + JobLog, ) logger = logging.getLogger(__name__) @@ -754,8 +755,8 @@ def package_models(request, package_uuid): context["unreviewed_objects"] = unreviewed_model_qs context["model_types"] = { - "ML Relative Reasoning": "ml-relative-reasoning", - "Rule Based Relative Reasoning": "rule-based-relative-reasoning", + "ML Relative Reasoning": "mlrr", + "Rule Based Relative Reasoning": "rbrr", } if s.FLAGS.get("ENVIFORMER", False): @@ -775,69 +776,67 @@ def package_models(request, package_uuid): model_type = request.POST.get("model-type") + # Generic fields for ML and Rule Based + rule_packages = request.POST.getlist("model-rule-packages") + data_packages = request.POST.getlist("model-data-packages") + + # Generic params + params = { + "package": current_package, + "name": name, + "description": description, + "data_packages": [ + PackageManager.get_package_by_url(current_user, p) for p in data_packages + ], + } + if model_type == "enviformer": - threshold = float(request.POST.get(f"{model_type}-threshold", 0.5)) + threshold = float(request.POST.get("model-threshold", 0.5)) + params["threshold"] = threshold - mod = EnviFormer.create(current_package, name, description, threshold) + mod = EnviFormer.create(**params) + elif model_type == "mlrr": + # ML Specific + threshold = float(request.POST.get("model-threshold", 0.5)) + # TODO handle additional fingerprinter + # fingerprinter = request.POST.get("model-fingerprinter") - elif model_type == "ml-relative-reasoning" or model_type == "rule-based-relative-reasoning": - # Generic fields for ML and Rule Based - rule_packages = request.POST.getlist("package-based-relative-reasoning-rule-packages") - data_packages = request.POST.getlist("package-based-relative-reasoning-data-packages") - eval_packages = request.POST.getlist( - "package-based-relative-reasoning-evaluation-packages", [] - ) + params["rule_packages"] = [ + PackageManager.get_package_by_url(current_user, p) for p in rule_packages + ] - # Generic params - params = { - "package": current_package, - "name": name, - "description": description, - "rule_packages": [ - PackageManager.get_package_by_url(current_user, p) for p in rule_packages - ], - "data_packages": [ - PackageManager.get_package_by_url(current_user, p) for p in data_packages - ], - "eval_packages": [ - PackageManager.get_package_by_url(current_user, p) for p in eval_packages - ], - } + # App Domain related parameters + build_ad = request.POST.get("build-app-domain", False) == "on" + num_neighbors = request.POST.get("num-neighbors", 5) + reliability_threshold = request.POST.get("reliability-threshold", 0.5) + local_compatibility_threshold = request.POST.get("local-compatibility-threshold", 0.5) - if model_type == "ml-relative-reasoning": - # ML Specific - threshold = float(request.POST.get(f"{model_type}-threshold", 0.5)) - # TODO handle additional fingerprinter - # fingerprinter = request.POST.get(f"{model_type}-fingerprinter") + params["threshold"] = threshold + # params['fingerprinter'] = fingerprinter + params["build_app_domain"] = build_ad + params["app_domain_num_neighbours"] = num_neighbors + params["app_domain_reliability_threshold"] = reliability_threshold + params["app_domain_local_compatibility_threshold"] = local_compatibility_threshold - # App Domain related parameters - build_ad = request.POST.get("build-app-domain", False) == "on" - num_neighbors = request.POST.get("num-neighbors", 5) - reliability_threshold = request.POST.get("reliability-threshold", 0.5) - local_compatibility_threshold = request.POST.get( - "local-compatibility-threshold", 0.5 - ) + mod = MLRelativeReasoning.create(**params) + elif model_type == "rbrr": + params["rule_packages"] = [ + PackageManager.get_package_by_url(current_user, p) for p in rule_packages + ] - params["threshold"] = threshold - # params['fingerprinter'] = fingerprinter - params["build_app_domain"] = build_ad - params["app_domain_num_neighbours"] = num_neighbors - params["app_domain_reliability_threshold"] = reliability_threshold - params["app_domain_local_compatibility_threshold"] = local_compatibility_threshold - - mod = MLRelativeReasoning.create(**params) - else: - mod = RuleBasedRelativeReasoning.create(**params) - - from .tasks import build_model - - build_model.delay(mod.pk) + mod = RuleBasedRelativeReasoning.create(**params) + elif s.FLAGS.get("PLUGINS", False) and model_type in s.CLASSIFIER_PLUGINS.values(): + pass else: return error( request, "Invalid model type.", f'Model type "{model_type}" is not supported."' ) - return redirect(mod.url) + from .tasks import dispatch, build_model + + dispatch(current_user, build_model, mod.pk) + + return redirect(mod.url) else: return HttpResponseNotAllowed(["GET", "POST"]) @@ -865,6 +864,10 @@ def package_model(request, package_uuid, model_uuid): return JsonResponse({"error": f'"{smiles}" is not a valid SMILES'}, status=400) if classify: + from epdb.tasks import dispatch_eager, predict_simple + + res = dispatch_eager(current_user, predict_simple, current_model.pk, stand_smiles) + pred_res = current_model.predict(stand_smiles) res = [] @@ -909,9 +912,25 @@ def package_model(request, package_uuid, model_uuid): current_model.delete() return redirect(current_package.url + "/model") elif hidden == "evaluate": - from .tasks import evaluate_model + from .tasks import dispatch, evaluate_model + + eval_type = request.POST.get("model-evaluation-type") + + if eval_type not in ["sg", "mg"]: + return error( + request, + "Invalid evaluation type", + f'Evaluation type "{eval_type}" is not supported. Only "sg" and "mg" are supported.', + ) + + multigen = eval_type == "mg" + + eval_packages = request.POST.getlist("model-evaluation-packages") + eval_package_ids = [ + PackageManager.get_package_by_url(current_user, p).id for p in eval_packages + ] + dispatch(current_user, evaluate_model, current_model.pk, multigen, eval_package_ids) - evaluate_model.delay(current_model.pk) return redirect(current_model.url) else: return HttpResponseBadRequest() @@ -1809,9 +1828,9 @@ def package_pathways(request, package_uuid): pw.setting = prediction_setting pw.save() - from .tasks import predict + from .tasks import dispatch, predict - predict.delay(pw.pk, prediction_setting.pk, limit=limit) + dispatch(current_user, predict, pw.pk, prediction_setting.pk, limit=limit) return redirect(pw.url) @@ -1930,10 +1949,16 @@ def package_pathway(request, package_uuid, pathway_uuid): if node_url: n = current_pathway.get_node(node_url) - from .tasks import predict + from .tasks import dispatch, predict + + dispatch( + current_user, + predict, + current_pathway.pk, + current_pathway.prediction_setting.pk, + node_pk=n.pk, + ) - # Dont delay? - predict(current_pathway.pk, current_pathway.setting.pk, node_pk=n.pk) return JsonResponse({"success": current_pathway.url}) return HttpResponseBadRequest() @@ -2705,6 +2730,24 @@ def setting(request, setting_uuid): pass +def jobs(request): + current_user = _anonymous_or_real(request) + context = get_base_context(request) + + if request.method == "GET": + context["object_type"] = "joblog" + context["breadcrumbs"] = [ + {"Home": s.SERVER_URL}, + {"Jobs": s.SERVER_URL + "/jobs"}, + ] + if current_user.is_superuser: + context["jobs"] = JobLog.objects.all().order_by("-created") + else: + context["jobs"] = JobLog.objects.filter(user=current_user).order_by("-created") + + return render(request, "collections/joblog.html", context) + + ########### # KETCHER # ########### diff --git a/templates/collections/joblog.html b/templates/collections/joblog.html new file mode 100644 index 00000000..7075e08e --- /dev/null +++ b/templates/collections/joblog.html @@ -0,0 +1,71 @@ +{% extends "framework.html" %} +{% load static %} +{% load envipytags %} +{% block content %} + +
+
+
+ Jobs +
+
+

+ Job Logs Desc +

+ +
+ +
+

+ + Jobs + +

+
+
+
+ + + + + + + + + + + {% for job in jobs %} + + + + + + + {% if job.task_result and job.task_result|is_url == True %} + + {% elif job.task_result %} + + {% else %} + + {% endif %} + + {% endfor %} + +
IDNameStatusQueuedDoneResult
{{ job.task_id }}{{ job.job_name }}{{ job.status }}{{ job.created }}{{ job.done_at }}Result{{ job.task_result|slice:"40" }}...Empty
+
+
+ + + +
+
+{% endblock content %} diff --git a/templates/modals/collections/new_model_modal.html b/templates/modals/collections/new_model_modal.html index b58a65ed..b5e903b6 100644 --- a/templates/modals/collections/new_model_modal.html +++ b/templates/modals/collections/new_model_modal.html @@ -18,113 +18,117 @@ prediction. You just need to set a name and the packages you want the object to be based on. There are multiple types of models available. For additional information have a look at our - wiki >> + wiki + >>
+ + + + + - -
- - - {% for obj in meta.readable_packages %} - {% if obj.reviewed %} - - {% endif %} + {% if obj.reviewed %} + + {% endif %} {% endfor %} {% for obj in meta.readable_packages %} - {% if not obj.reviewed %} - - {% endif %} + {% if not obj.reviewed %} + + {% endif %} {% endfor %} - - - - -
- - - - {% if meta.enabled_features.PLUGINS and additional_descriptors %} - - - - {% endif %} - - - -
- {% if meta.enabled_features.APPLICABILITY_DOMAIN %} - -
- -
- - {% endif %}
- -
- - + + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ +
+ {% if meta.enabled_features.APPLICABILITY_DOMAIN %} + +
+ +
+ + {% endif %}
@@ -137,53 +141,47 @@
diff --git a/templates/modals/objects/evaluate_model_modal.html b/templates/modals/objects/evaluate_model_modal.html index a42c68bb..1d4b3801 100644 --- a/templates/modals/objects/evaluate_model_modal.html +++ b/templates/modals/objects/evaluate_model_modal.html @@ -17,10 +17,10 @@ For evaluation, you need to select the packages you want to use. While the model is evaluating, you can use the model for predictions.
- - - {% for obj in meta.readable_packages %} {% if obj.reviewed %} @@ -35,7 +35,16 @@ {% endif %} {% endfor %} - + + + + + +