diff --git a/envipath/settings.py b/envipath/settings.py index 257f1f30..26703b5c 100644 --- a/envipath/settings.py +++ b/envipath/settings.py @@ -135,6 +135,7 @@ USE_TZ = True DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" +EMAIL_SUBJECT_PREFIX = "[enviPath] " if DEBUG: EMAIL_BACKEND = "django.core.mail.backends.console.EmailBackend" else: @@ -144,6 +145,8 @@ else: EMAIL_HOST_USER = os.environ["EMAIL_HOST_USER"] EMAIL_HOST_PASSWORD = os.environ["EMAIL_HOST_PASSWORD"] EMAIL_PORT = 587 + DEFAULT_FROM_EMAIL = os.environ["DEFAULT_FROM_EMAIL"] + SERVER_EMAIL = os.environ["SERVER_EMAIL"] AUTH_USER_MODEL = "epdb.User" ADMIN_APPROVAL_REQUIRED = os.environ.get("ADMIN_APPROVAL_REQUIRED", "False") == "True" diff --git a/epdb/admin.py b/epdb/admin.py index fefcdc32..1f251cc1 100644 --- a/epdb/admin.py +++ b/epdb/admin.py @@ -7,6 +7,7 @@ from .models import ( GroupPackagePermission, Package, MLRelativeReasoning, + EnviFormer, Compound, CompoundStructure, SimpleAmbitRule, @@ -50,6 +51,10 @@ class MLRelativeReasoningAdmin(EPAdmin): pass +class EnviFormerAdmin(EPAdmin): + pass + + class CompoundAdmin(EPAdmin): pass @@ -104,6 +109,7 @@ admin.site.register(Group, GroupAdmin) admin.site.register(GroupPackagePermission, GroupPackagePermissionAdmin) admin.site.register(Package, PackageAdmin) admin.site.register(MLRelativeReasoning, MLRelativeReasoningAdmin) +admin.site.register(EnviFormer, EnviFormerAdmin) admin.site.register(Compound, CompoundAdmin) admin.site.register(CompoundStructure, CompoundStructureAdmin) admin.site.register(SimpleAmbitRule, SimpleAmbitRuleAdmin) diff --git a/epdb/logic.py b/epdb/logic.py index 530ebc51..19f03ae2 100644 --- a/epdb/logic.py +++ b/epdb/logic.py @@ -26,6 +26,7 @@ from epdb.models import ( Compound, Reaction, CompoundStructure, + EnzymeLink, ) from utilities.chem import FormatConverter from utilities.misc import PackageImporter, PackageExporter @@ -617,6 +618,8 @@ class PackageManager(object): parent_mapping = {} # Mapping old scen_id to old_obj_id scen_mapping = defaultdict(list) + # Enzymelink Mapping rule_id to enzymelink objects + enzyme_mapping = defaultdict(list) # Store Scenarios for scenario in data["scenarios"]: @@ -648,9 +651,7 @@ class PackageManager(object): # Broken eP Data if name == "initialmasssediment" and addinf_data == "missing data": continue - - # TODO Enzymes arent ready yet - if name == "enzyme": + if name == "columnheight" and addinf_data == "(2)-(2.5);(6)-(8)": continue try: @@ -740,6 +741,9 @@ class PackageManager(object): for scen in rule["scenarios"]: scen_mapping[scen["id"]].append(r) + for enzyme_link in rule.get("enzymeLinks", []): + enzyme_mapping[r.uuid].append(enzyme_link) + print("Par: ", len(par_rules)) print("Seq: ", len(seq_rules)) @@ -757,6 +761,9 @@ class PackageManager(object): for scen in par_rule["scenarios"]: scen_mapping[scen["id"]].append(r) + for enzyme_link in par_rule.get("enzymeLinks", []): + enzyme_mapping[r.uuid].append(enzyme_link) + for simple_rule in par_rule["simpleRules"]: if simple_rule["id"] in mapping: r.simple_rules.add(SimpleRule.objects.get(uuid=mapping[simple_rule["id"]])) @@ -777,6 +784,9 @@ class PackageManager(object): for scen in seq_rule["scenarios"]: scen_mapping[scen["id"]].append(r) + for enzyme_link in seq_rule.get("enzymeLinks", []): + enzyme_mapping[r.uuid].append(enzyme_link) + for i, simple_rule in enumerate(seq_rule["simpleRules"]): sro = SequentialRuleOrdering() sro.simple_rule = simple_rule @@ -910,6 +920,39 @@ class PackageManager(object): print("Scenarios linked...") + # Import Enzyme Links + for rule_uuid, enzyme_links in enzyme_mapping.items(): + r = Rule.objects.get(uuid=rule_uuid) + for enzyme in enzyme_links: + e = EnzymeLink() + e.uuid = UUID(enzyme["id"].split("/")[-1]) if keep_ids else uuid4() + e.rule = r + e.name = enzyme["name"] + e.ec_number = enzyme["ecNumber"] + e.classification_level = enzyme["classificationLevel"] + e.linking_method = enzyme["linkingMethod"] + e.save() + + for reaction in enzyme["reactionLinkEvidence"]: + reaction = Reaction.objects.get(uuid=mapping[reaction["id"]]) + e.reaction_evidence.add(reaction) + + for edge in enzyme["edgeLinkEvidence"]: + edge = Edge.objects.get(uuid=mapping[edge["id"]]) + e.reaction_evidence.add(edge) + + for evidence in enzyme["linkEvidence"]: + matches = re.findall(r">(R[0-9]+)<", evidence["evidence"]) + if not matches or len(matches) != 1: + logger.warning(f"Could not find reaction id in {evidence['evidence']}") + continue + + e.add_kegg_reaction_id(matches[0]) + + e.save() + + print("Enzyme links imported...") + print("Import statistics:") print("Package {} stored".format(pack.url)) print("Imported {} compounds".format(Compound.objects.filter(package=pack).count())) diff --git a/epdb/management/commands/create_ml_models.py b/epdb/management/commands/create_ml_models.py index 8cf3fd55..89fbc0ec 100644 --- a/epdb/management/commands/create_ml_models.py +++ b/epdb/management/commands/create_ml_models.py @@ -7,10 +7,11 @@ from epdb.models import MLRelativeReasoning, EnviFormer, Package class Command(BaseCommand): """This command can be run with - `python manage.py create_ml_models [model_names] -d [data_packages] OPTIONAL: -e [eval_packages]` - For example, to train both EnviFormer and MLRelativeReasoning on BBD and SOIL and evaluate them on SLUDGE - the below command would be used: - `python manage.py create_ml_models enviformer mlrr -d bbd soil -e sludge + `python manage.py create_ml_models [model_names] -d [data_packages] FOR MLRR ONLY: -r [rule_packages] + OPTIONAL: -e [eval_packages] -t threshold` + For example, to train both EnviFormer and MLRelativeReasoning on BBD and SOIL and evaluate them on SLUDGE with a + threshold of 0.6, the below command would be used: + `python manage.py create_ml_models enviformer mlrr -d bbd soil -e sludge -t 0.6 """ def add_arguments(self, parser): @@ -34,6 +35,13 @@ class Command(BaseCommand): help="Rule Packages mandatory for MLRR", default=[], ) + parser.add_argument( + "-t", + "--threshold", + type=float, + help="Model prediction threshold", + default=0.5, + ) @transaction.atomic def handle(self, *args, **options): @@ -67,7 +75,11 @@ class Command(BaseCommand): return packages # Iteratively create models in options["model_names"] - print(f"Creating models: {options['model_names']}") + print(f"Creating models: {options['model_names']}\n" + f"Data packages: {options['data_packages']}\n" + f"Rule Packages (only for MLRR): {options['rule_packages']}\n" + f"Eval Packages: {options['eval_packages']}\n" + f"Threshold: {options['threshold']:.2f}") data_packages = decode_packages(options["data_packages"]) eval_packages = decode_packages(options["eval_packages"]) rule_packages = decode_packages(options["rule_packages"]) @@ -78,9 +90,10 @@ class Command(BaseCommand): pack, data_packages=data_packages, eval_packages=eval_packages, - threshold=0.5, - name="EnviFormer - T0.5", - description="EnviFormer transformer", + threshold=options['threshold'], + name=f"EnviFormer - {', '.join(options['data_packages'])} - T{options['threshold']:.2f}", + description=f"EnviFormer transformer trained on {options['data_packages']} " + f"evaluated on {options['eval_packages']}.", ) elif model_name == "mlrr": model = MLRelativeReasoning.create( @@ -88,9 +101,10 @@ class Command(BaseCommand): rule_packages=rule_packages, data_packages=data_packages, eval_packages=eval_packages, - threshold=0.5, - name="ECC - BBD - T0.5", - description="ML Relative Reasoning", + threshold=options['threshold'], + name=f"ECC - {', '.join(options['data_packages'])} - T{options['threshold']:.2f}", + description=f"ML Relative Reasoning trained on {options['data_packages']} with rules from " + f"{options['rule_packages']} and evaluated on {options['eval_packages']}.", ) else: raise ValueError(f"Cannot create model of type {model_name}, unknown model type") @@ -100,6 +114,6 @@ class Command(BaseCommand): print(f"Training {model_name}") model.build_model() print(f"Evaluating {model_name}") - model.evaluate_model() + model.evaluate_model(False, eval_packages=eval_packages) print(f"Saving {model_name}") model.save() diff --git a/epdb/management/commands/dump_enviformer.py b/epdb/management/commands/dump_enviformer.py new file mode 100644 index 00000000..e333248a --- /dev/null +++ b/epdb/management/commands/dump_enviformer.py @@ -0,0 +1,59 @@ +import json +import os +import tarfile +from tempfile import TemporaryDirectory + +from django.conf import settings as s +from django.core.management.base import BaseCommand +from django.db import transaction + +from epdb.models import EnviFormer + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "model", + type=str, + help="Model UUID of the Model to Dump", + ) + parser.add_argument("--output", type=str) + + def package_dict_and_folder(self, dict_data, folder_path, output_path): + with TemporaryDirectory() as tmpdir: + dict_filename = os.path.join(tmpdir, "data.json") + + with open(dict_filename, "w", encoding="utf-8") as f: + json.dump(dict_data, f, indent=2) + + with tarfile.open(output_path, "w:gz") as tar: + tar.add(dict_filename, arcname="data.json") + tar.add(folder_path, arcname=os.path.basename(folder_path)) + + os.remove(dict_filename) + + @transaction.atomic + def handle(self, *args, **options): + output = options["output"] + + if os.path.exists(output): + raise ValueError(f"Output file {output} already exists") + + model = EnviFormer.objects.get(uuid=options["model"]) + + data = { + "uuid": str(model.uuid), + "name": model.name, + "description": model.description, + "kv": model.kv, + "data_packages_uuids": [str(p.uuid) for p in model.data_packages.all()], + "eval_packages_uuids": [str(p.uuid) for p in model.data_packages.all()], + "threshold": model.threshold, + "eval_results": model.eval_results, + "multigen_eval": model.multigen_eval, + "model_status": model.model_status, + } + + model_folder = os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid)) + + self.package_dict_and_folder(data, model_folder, output) diff --git a/epdb/management/commands/load_enviformer.py b/epdb/management/commands/load_enviformer.py new file mode 100644 index 00000000..b2f9c3e3 --- /dev/null +++ b/epdb/management/commands/load_enviformer.py @@ -0,0 +1,81 @@ +import json +import os +import shutil +import tarfile +from tempfile import TemporaryDirectory + +from django.conf import settings as s +from django.core.management.base import BaseCommand +from django.db import transaction + +from epdb.models import EnviFormer, Package + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "input", + type=str, + help=".tar.gz file containing the Model dump.", + ) + parser.add_argument( + "package", + type=str, + help="Package UUID where the Model should be loaded to.", + ) + + def read_dict_and_folder_from_archive(self, archive_path, extract_to="extracted_folder"): + with tarfile.open(archive_path, "r:gz") as tar: + tar.extractall(extract_to) + + dict_path = os.path.join(extract_to, "data.json") + + if not os.path.exists(dict_path): + raise FileNotFoundError("data.json not found in the archive.") + + with open(dict_path, "r", encoding="utf-8") as f: + data_dict = json.load(f) + + extracted_items = os.listdir(extract_to) + folders = [item for item in extracted_items if item != "data.json"] + folder_path = os.path.join(extract_to, folders[0]) if folders else None + + return data_dict, folder_path + + @transaction.atomic + def handle(self, *args, **options): + if not os.path.exists(options["input"]): + raise ValueError(f"Input file {options['input']} does not exist.") + + target_package = Package.objects.get(uuid=options["package"]) + + with TemporaryDirectory() as tmpdir: + data, folder = self.read_dict_and_folder_from_archive(options["input"], tmpdir) + + model = EnviFormer() + model.package = target_package + # model.uuid = data["uuid"] + model.name = data["name"] + model.description = data["description"] + model.kv = data["kv"] + model.threshold = float(data["threshold"]) + model.eval_results = data["eval_results"] + model.multigen_eval = data["multigen_eval"] + model.model_status = data["model_status"] + model.save() + + for p_uuid in data["data_packages_uuids"]: + p = Package.objects.get(uuid=p_uuid) + model.data_packages.add(p) + + for p_uuid in data["eval_packages_uuids"]: + p = Package.objects.get(uuid=p_uuid) + model.eval_packages.add(p) + + target_folder = os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid)) + + shutil.copytree(folder, target_folder) + os.rename( + os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid), f"{data['uuid']}.ckpt"), + os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid), f"{model.uuid}.ckpt"), + ) diff --git a/epdb/management/commands/localize_urls.py b/epdb/management/commands/localize_urls.py index b9f95b11..cc0a3726 100644 --- a/epdb/management/commands/localize_urls.py +++ b/epdb/management/commands/localize_urls.py @@ -1,8 +1,10 @@ from django.apps import apps from django.core.management.base import BaseCommand -from django.db.models import F, Value -from django.db.models.functions import Replace +from django.db.models import F, Value, TextField, JSONField +from django.db.models.functions import Replace, Cast + +from epdb.models import EnviPathModel class Command(BaseCommand): @@ -41,6 +43,7 @@ class Command(BaseCommand): "RuleBasedRelativeReasoning", "EnviFormer", "ApplicabilityDomain", + "EnzymeLink", ] for model in MODELS: obj_cls = apps.get_model("epdb", model) @@ -48,3 +51,14 @@ class Command(BaseCommand): obj_cls.objects.update( url=Replace(F("url"), Value(options["old"]), Value(options["new"])) ) + if issubclass(obj_cls, EnviPathModel): + obj_cls.objects.update( + kv=Cast( + Replace( + Cast(F("kv"), output_field=TextField()), + Value(options["old"]), + Value(options["new"]), + ), + output_field=JSONField(), + ) + ) diff --git a/epdb/management/commands/update_job_logs.py b/epdb/management/commands/update_job_logs.py new file mode 100644 index 00000000..a5b17cfa --- /dev/null +++ b/epdb/management/commands/update_job_logs.py @@ -0,0 +1,38 @@ +from datetime import date, timedelta + +from django.core.management.base import BaseCommand +from django.db import transaction + +from epdb.models import JobLog + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--cleanup", + type=int, + default=None, + help="Remove all logs older than this number of days. Default is None, which does not remove any logs.", + ) + + @transaction.atomic + def handle(self, *args, **options): + if options["cleanup"] is not None: + cleanup_dt = date.today() - timedelta(days=options["cleanup"]) + print(JobLog.objects.filter(created__lt=cleanup_dt).delete()) + + logs = JobLog.objects.filter(status="INITIAL") + print(f"Found {logs.count()} logs to update") + updated = 0 + for log in logs: + res = log.check_for_update() + if res: + updated += 1 + + print(f"Updated {updated} logs") + + from django.db.models import Count + + qs = JobLog.objects.values("status").annotate(total=Count("status")) + for r in qs: + print(r["status"], r["total"]) diff --git a/epdb/migrations/0008_enzymelink.py b/epdb/migrations/0008_enzymelink.py new file mode 100644 index 00000000..35d0a950 --- /dev/null +++ b/epdb/migrations/0008_enzymelink.py @@ -0,0 +1,64 @@ +# Generated by Django 5.2.7 on 2025-10-10 06:58 + +import django.db.models.deletion +import django.utils.timezone +import model_utils.fields +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("epdb", "0007_alter_enviformer_options_enviformer_app_domain_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="EnzymeLink", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "created", + model_utils.fields.AutoCreatedField( + default=django.utils.timezone.now, editable=False, verbose_name="created" + ), + ), + ( + "modified", + model_utils.fields.AutoLastModifiedField( + default=django.utils.timezone.now, editable=False, verbose_name="modified" + ), + ), + ( + "uuid", + models.UUIDField( + default=uuid.uuid4, unique=True, verbose_name="UUID of this object" + ), + ), + ("name", models.TextField(default="no name", verbose_name="Name")), + ( + "description", + models.TextField(default="no description", verbose_name="Descriptions"), + ), + ("url", models.TextField(null=True, unique=True, verbose_name="URL")), + ("kv", models.JSONField(blank=True, default=dict, null=True)), + ("ec_number", models.TextField(verbose_name="EC Number")), + ("classification_level", models.IntegerField(verbose_name="Classification Level")), + ("linking_method", models.TextField(verbose_name="Linking Method")), + ("edge_evidence", models.ManyToManyField(to="epdb.edge")), + ("reaction_evidence", models.ManyToManyField(to="epdb.reaction")), + ( + "rule", + models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="epdb.rule"), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/epdb/migrations/0009_joblog.py b/epdb/migrations/0009_joblog.py new file mode 100644 index 00000000..5c731eb1 --- /dev/null +++ b/epdb/migrations/0009_joblog.py @@ -0,0 +1,66 @@ +# Generated by Django 5.2.7 on 2025-10-27 09:39 + +import django.db.models.deletion +import django.utils.timezone +import model_utils.fields +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("epdb", "0008_enzymelink"), + ] + + operations = [ + migrations.CreateModel( + name="JobLog", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "created", + model_utils.fields.AutoCreatedField( + default=django.utils.timezone.now, editable=False, verbose_name="created" + ), + ), + ( + "modified", + model_utils.fields.AutoLastModifiedField( + default=django.utils.timezone.now, editable=False, verbose_name="modified" + ), + ), + ("task_id", models.UUIDField(unique=True)), + ("job_name", models.TextField()), + ( + "status", + models.CharField( + choices=[ + ("INITIAL", "Initial"), + ("SUCCESS", "Success"), + ("FAILURE", "Failure"), + ("REVOKED", "Revoked"), + ("IGNORED", "Ignored"), + ], + default="INITIAL", + max_length=20, + ), + ), + ("done_at", models.DateTimeField(blank=True, default=None, null=True)), + ("task_result", models.TextField(blank=True, default=None, null=True)), + ( + "user", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/epdb/models.py b/epdb/models.py index 83b29925..324fe301 100644 --- a/epdb/models.py +++ b/epdb/models.py @@ -310,7 +310,7 @@ class ExternalDatabase(TimeStampedModel): }, { "database": ExternalDatabase.objects.get(name="ChEBI"), - "placeholder": "ChEBI ID without prefix e.g. 12345", + "placeholder": "ChEBI ID without prefix e.g. 10576", }, ], "structure": [ @@ -328,7 +328,7 @@ class ExternalDatabase(TimeStampedModel): }, { "database": ExternalDatabase.objects.get(name="ChEBI"), - "placeholder": "ChEBI ID without prefix e.g. 12345", + "placeholder": "ChEBI ID without prefix e.g. 10576", }, ], "reaction": [ @@ -342,7 +342,7 @@ class ExternalDatabase(TimeStampedModel): }, { "database": ExternalDatabase.objects.get(name="UniProt"), - "placeholder": "Query ID for UniPro e.g. rhea:12345", + "placeholder": "Query ID for UniProt e.g. rhea:12345", }, ], } @@ -477,7 +477,7 @@ class ChemicalIdentifierMixin(ExternalIdentifierMixin): return self.add_external_identifier("CAS", cas_number) def get_pubchem_identifiers(self): - return self.get_external_identifier("PubChem Compound") or self.get_external_identifier( + return self.get_external_identifier("PubChem Compound") | self.get_external_identifier( "PubChem Substance" ) @@ -494,6 +494,20 @@ class ChemicalIdentifierMixin(ExternalIdentifierMixin): return self.get_external_identifier("CAS") +class KEGGIdentifierMixin(ExternalIdentifierMixin): + @property + def kegg_reaction_links(self): + return self.get_external_identifier("KEGG Reaction") + + def add_kegg_reaction_id(self, kegg_id): + return self.add_external_identifier( + "KEGG Reaction", kegg_id, f"https://www.genome.jp/entry/{kegg_id}" + ) + + class Meta: + abstract = True + + class ReactionIdentifierMixin(ExternalIdentifierMixin): class Meta: abstract = True @@ -1014,6 +1028,26 @@ class CompoundStructure(EnviPathModel, AliasMixin, ScenarioMixin, ChemicalIdenti return self.compound.default_structure == self +class EnzymeLink(EnviPathModel, KEGGIdentifierMixin): + rule = models.ForeignKey("Rule", on_delete=models.CASCADE, db_index=True) + ec_number = models.TextField(blank=False, null=False, verbose_name="EC Number") + classification_level = models.IntegerField( + blank=False, null=False, verbose_name="Classification Level" + ) + linking_method = models.TextField(blank=False, null=False, verbose_name="Linking Method") + + reaction_evidence = models.ManyToManyField("epdb.Reaction") + edge_evidence = models.ManyToManyField("epdb.Edge") + + external_identifiers = GenericRelation("ExternalIdentifier") + + def _url(self): + return "{}/enzymelink/{}".format(self.rule.url, self.uuid) + + def get_group(self) -> str: + return ".".join(self.ec_number.split(".")[:3]) + ".-" + + class Rule(PolymorphicModel, EnviPathModel, AliasMixin, ScenarioMixin): package = models.ForeignKey( "epdb.Package", verbose_name="Package", on_delete=models.CASCADE, db_index=True @@ -1095,6 +1129,18 @@ class Rule(PolymorphicModel, EnviPathModel, AliasMixin, ScenarioMixin): return new_rule + def enzymelinks(self): + return self.enzymelink_set.all() + + def get_grouped_enzymelinks(self): + res = defaultdict(list) + + for el in self.enzymelinks(): + key = ".".join(el.ec_number.split(".")[:3]) + ".-" + res[key].append(el) + + return dict(res) + class SimpleRule(Rule): pass @@ -1437,6 +1483,16 @@ class Reaction(EnviPathModel, AliasMixin, ScenarioMixin, ReactionIdentifierMixin id__in=Edge.objects.filter(edge_label=self).values("pathway_id") ).order_by("name") + def get_related_enzymes(self): + res = [] + edges = Edge.objects.filter(edge_label=self) + for e in edges: + for scen in e.scenarios.all(): + for ai in scen.additional_information.keys(): + if ai == "Enzyme": + res.extend(scen.additional_information[ai]) + return res + class Pathway(EnviPathModel, AliasMixin, ScenarioMixin): package = models.ForeignKey( @@ -2169,10 +2225,18 @@ class PackageBasedModel(EPModel): self.model_status = self.BUILT_NOT_EVALUATED self.save() - def evaluate_model(self): + def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None): if self.model_status != self.BUILT_NOT_EVALUATED: raise ValueError(f"Can't evaluate a model in state {self.model_status}!") + if multigen: + self.multigen_eval = multigen + self.save() + + if eval_packages is not None: + for p in eval_packages: + self.eval_packages.add(p) + self.model_status = self.EVALUATING self.save() @@ -2469,7 +2533,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel): package: "Package", rule_packages: List["Package"], data_packages: List["Package"], - eval_packages: List["Package"], threshold: float = 0.5, min_count: int = 10, max_count: int = 0, @@ -2518,10 +2581,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel): for p in rule_packages: rbrr.data_packages.add(p) - if eval_packages: - for p in eval_packages: - rbrr.eval_packages.add(p) - rbrr.save() return rbrr @@ -2576,7 +2635,6 @@ class MLRelativeReasoning(PackageBasedModel): package: "Package", rule_packages: List["Package"], data_packages: List["Package"], - eval_packages: List["Package"], threshold: float = 0.5, name: "str" = None, description: str = None, @@ -2616,10 +2674,6 @@ class MLRelativeReasoning(PackageBasedModel): for p in rule_packages: mlrr.data_packages.add(p) - if eval_packages: - for p in eval_packages: - mlrr.eval_packages.add(p) - if build_app_domain: ad = ApplicabilityDomain.create( mlrr, @@ -2939,7 +2993,6 @@ class EnviFormer(PackageBasedModel): def create( package: "Package", data_packages: List["Package"], - eval_packages: List["Package"], threshold: float = 0.5, name: "str" = None, description: str = None, @@ -2972,10 +3025,6 @@ class EnviFormer(PackageBasedModel): for p in data_packages: mod.data_packages.add(p) - if eval_packages: - for p in eval_packages: - mod.eval_packages.add(p) - # if build_app_domain: # ad = ApplicabilityDomain.create(mod, app_domain_num_neighbours, app_domain_reliability_threshold, # app_domain_local_compatibility_threshold) @@ -2989,7 +3038,8 @@ class EnviFormer(PackageBasedModel): from enviformer import load ckpt = os.path.join(s.MODEL_DIR, "enviformer", str(self.uuid), f"{self.uuid}.ckpt") - return load(device=s.ENVIFORMER_DEVICE, ckpt_path=ckpt) + mod = load(device=s.ENVIFORMER_DEVICE, ckpt_path=ckpt) + return mod def predict(self, smiles) -> List["PredictionResult"]: return self.predict_batch([smiles])[0] @@ -3003,8 +3053,12 @@ class EnviFormer(PackageBasedModel): for smiles in smiles_list ] logger.info(f"Submitting {canon_smiles} to {self.name}") + start = datetime.now() products_list = self.model.predict_batch(canon_smiles) - logger.info(f"Got results {products_list}") + end = datetime.now() + logger.info( + f"Prediction took {(end - start).total_seconds():.2f} seconds. Got results {products_list}" + ) results = [] for products in products_list: @@ -3031,6 +3085,7 @@ class EnviFormer(PackageBasedModel): start = datetime.now() # Standardise reactions for the training data, EnviFormer ignores stereochemistry currently + co2 = {"C(=O)=O", "O=C=O"} ds = [] for reaction in self._get_reactions(): educts = ".".join( @@ -3045,7 +3100,8 @@ class EnviFormer(PackageBasedModel): for smile in reaction.products.all() ] ) - ds.append(f"{educts}>>{products}") + if products not in co2: + ds.append(f"{educts}>>{products}") end = datetime.now() logger.debug(f"build_dataset took {(end - start).total_seconds()} seconds") @@ -3081,10 +3137,18 @@ class EnviFormer(PackageBasedModel): args = {"clz": "EnviFormer"} return args - def evaluate_model(self): + def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None): if self.model_status != self.BUILT_NOT_EVALUATED: raise ValueError(f"Can't evaluate a model in state {self.model_status}!") + if multigen: + self.multigen_eval = multigen + self.save() + + if eval_packages is not None: + for p in eval_packages: + self.eval_packages.add(p) + self.model_status = self.EVALUATING self.save() @@ -3241,7 +3305,7 @@ class EnviFormer(PackageBasedModel): ds = self.load_dataset() n_splits = 20 - shuff = ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42) + shuff = ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42) # Single gen eval is done in one loop of train then evaluate rather than storing all n_splits trained models # this helps reduce the memory footprint. @@ -3309,7 +3373,7 @@ class EnviFormer(PackageBasedModel): # Compute splits of the collected pathway and evaluate. Like single gen we train and evaluate in each # iteration instead of storing all trained models. for split_id, (train, test) in enumerate( - ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42).split(pathways) + ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42).split(pathways) ): train_pathways = [pathways[i] for i in train] test_pathways = [pathways[i] for i in test] @@ -3608,3 +3672,53 @@ class Setting(EnviPathModel): self.public = True self.global_default = True self.save() + + +class JobLogStatus(models.TextChoices): + INITIAL = "INITIAL", "Initial" + SUCCESS = "SUCCESS", "Success" + FAILURE = "FAILURE", "Failure" + REVOKED = "REVOKED", "Revoked" + IGNORED = "IGNORED", "Ignored" + + +class JobLog(TimeStampedModel): + user = models.ForeignKey("epdb.User", models.CASCADE) + task_id = models.UUIDField(unique=True) + job_name = models.TextField(null=False, blank=False) + status = models.CharField( + max_length=20, + choices=JobLogStatus.choices, + default=JobLogStatus.INITIAL, + ) + + done_at = models.DateTimeField(null=True, blank=True, default=None) + task_result = models.TextField(null=True, blank=True, default=None) + + def check_for_update(self): + async_res = self.get_result() + new_status = async_res.state + + TERMINAL_STATES = [ + "SUCCESS", + "FAILURE", + "REVOKED", + "IGNORED", + ] + + if new_status != self.status and new_status in TERMINAL_STATES: + self.status = new_status + self.done_at = async_res.date_done + + if new_status == "SUCCESS": + self.task_result = async_res.result + + self.save() + + return True + return False + + def get_result(self): + from celery.result import AsyncResult + + return AsyncResult(str(self.task_id)) diff --git a/epdb/tasks.py b/epdb/tasks.py index aabaf8d1..b6f4e6b0 100644 --- a/epdb/tasks.py +++ b/epdb/tasks.py @@ -1,12 +1,56 @@ import logging -from typing import Optional +from datetime import datetime +from typing import Callable, Optional +from uuid import uuid4 from celery import shared_task -from epdb.models import Pathway, Node, EPModel, Setting -from epdb.logic import SPathway +from celery.utils.functional import LRUCache +from epdb.logic import SPathway +from epdb.models import EPModel, JobLog, Node, Package, Pathway, Setting, User logger = logging.getLogger(__name__) +ML_CACHE = LRUCache(3) # Cache the three most recent ML models to reduce load times. + + +def get_ml_model(model_pk: int): + if model_pk not in ML_CACHE: + ML_CACHE[model_pk] = EPModel.objects.get(id=model_pk) + return ML_CACHE[model_pk] + + +def dispatch_eager(user: "User", job: Callable, *args, **kwargs): + try: + x = job(*args, **kwargs) + log = JobLog() + log.user = user + log.task_id = uuid4() + log.job_name = job.__name__ + log.status = "SUCCESS" + log.done_at = datetime.now() + log.task_result = str(x) if x else None + log.save() + + return x + except Exception as e: + logger.exception(e) + raise e + + +def dispatch(user: "User", job: Callable, *args, **kwargs): + try: + x = job.delay(*args, **kwargs) + log = JobLog() + log.user = user + log.task_id = x.task_id + log.job_name = job.__name__ + log.status = "INITIAL" + log.save() + + return x.result + except Exception as e: + logger.exception(e) + raise e @shared_task(queue="background") @@ -16,7 +60,7 @@ def mul(a, b): @shared_task(queue="predict") def predict_simple(model_pk: int, smiles: str): - mod = EPModel.objects.get(id=model_pk) + mod = get_ml_model(model_pk) res = mod.predict(smiles) return res @@ -26,17 +70,55 @@ def send_registration_mail(user_pk: int): pass -@shared_task(queue="model") -def build_model(model_pk: int): +@shared_task(bind=True, queue="model") +def build_model(self, model_pk: int): mod = EPModel.objects.get(id=model_pk) - mod.build_dataset() - mod.build_model() + + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=mod.url) + + try: + mod.build_dataset() + mod.build_model() + except Exception as e: + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update( + status="FAILED", task_result=mod.url + ) + + raise e + + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=mod.url) + + return mod.url -@shared_task(queue="model") -def evaluate_model(model_pk: int): +@shared_task(bind=True, queue="model") +def evaluate_model(self, model_pk: int, multigen: bool, package_pks: Optional[list] = None): + packages = None + + if package_pks: + packages = Package.objects.filter(pk__in=package_pks) + mod = EPModel.objects.get(id=model_pk) - mod.evaluate_model() + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=mod.url) + + try: + mod.evaluate_model(multigen, eval_packages=packages) + except Exception as e: + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update( + status="FAILED", task_result=mod.url + ) + + raise e + + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=mod.url) + + return mod.url @shared_task(queue="model") @@ -45,16 +127,26 @@ def retrain(model_pk: int): mod.retrain() -@shared_task(queue="predict") +@shared_task(bind=True, queue="predict") def predict( - pw_pk: int, pred_setting_pk: int, limit: Optional[int] = None, node_pk: Optional[int] = None + self, + pw_pk: int, + pred_setting_pk: int, + limit: Optional[int] = None, + node_pk: Optional[int] = None, ) -> Pathway: pw = Pathway.objects.get(id=pw_pk) setting = Setting.objects.get(id=pred_setting_pk) + # If the setting has a model add/restore it from the cache + if setting.model is not None: + setting.model = get_ml_model(setting.model.pk) pw.kv.update(**{"status": "running"}) pw.save() + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=pw.url) + try: # regular prediction if limit is not None: @@ -79,7 +171,18 @@ def predict( except Exception as e: pw.kv.update({"status": "failed"}) pw.save() + + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update( + status="FAILED", task_result=pw.url + ) + raise e pw.kv.update(**{"status": "completed"}) pw.save() + + if JobLog.objects.filter(task_id=self.request.id).exists(): + JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=pw.url) + + return pw.url diff --git a/epdb/templatetags/envipytags.py b/epdb/templatetags/envipytags.py index c8c92fef..6c250e63 100644 --- a/epdb/templatetags/envipytags.py +++ b/epdb/templatetags/envipytags.py @@ -1,8 +1,21 @@ from django import template +from pydantic import AnyHttpUrl, ValidationError +from pydantic.type_adapter import TypeAdapter register = template.Library() +url_adapter = TypeAdapter(AnyHttpUrl) + @register.filter def classname(obj): return obj.__class__.__name__ + + +@register.filter +def is_url(value): + try: + url_adapter.validate_python(value) + return True + except ValidationError: + return False diff --git a/epdb/urls.py b/epdb/urls.py index 0cc44aeb..41c21d11 100644 --- a/epdb/urls.py +++ b/epdb/urls.py @@ -1,5 +1,5 @@ -from django.urls import path, re_path from django.contrib.auth import views as auth_views +from django.urls import path, re_path from . import views as v @@ -88,20 +88,36 @@ urlpatterns = [ v.package_rule, name="package rule detail", ), - re_path( - rf"^package/(?P{UUID})/simple-rdkit-rule/(?P{UUID})$", - v.package_rule, - name="package rule detail", - ), + # re_path( + # rf"^package/(?P{UUID})/simple-rdkit-rule/(?P{UUID})$", + # v.package_rule, + # name="package rule detail", + # ), re_path( rf"^package/(?P{UUID})/parallel-rule/(?P{UUID})$", v.package_rule, name="package rule detail", ), + # re_path( + # rf"^package/(?P{UUID})/sequential-rule/(?P{UUID})$", + # v.package_rule, + # name="package rule detail", + # ), + # EnzymeLinks re_path( - rf"^package/(?P{UUID})/sequential-rule/(?P{UUID})$", - v.package_rule, - name="package rule detail", + rf"^package/(?P{UUID})/rule/(?P{UUID})/enzymelink/(?P{UUID})$", + v.package_rule_enzymelink, + name="package rule enzymelink detail", + ), + re_path( + rf"^package/(?P{UUID})/simple-ambit-rule/(?P{UUID})/enzymelink/(?P{UUID})$", + v.package_rule_enzymelink, + name="package rule enzymelink detail", + ), + re_path( + rf"^package/(?P{UUID})/parallel-rule/(?P{UUID})/enzymelink/(?P{UUID})$", + v.package_rule_enzymelink, + name="package rule enzymelink detail", ), # Reaction re_path( @@ -174,15 +190,16 @@ urlpatterns = [ re_path(r"^indigo/dearomatize$", v.dearomatize, name="indigo_dearomatize"), re_path(r"^indigo/layout$", v.layout, name="indigo_layout"), re_path(r"^depict$", v.depict, name="depict"), + re_path(r"^jobs", v.jobs, name="jobs"), # OAuth Stuff path("o/userinfo/", v.userinfo, name="oauth_userinfo"), # Static Pages - re_path(r"^terms$", v.terms_of_use, name="terms_of_use"), - re_path(r"^privacy$", v.privacy_policy, name="privacy_policy"), - re_path(r"^cookie-policy$", v.cookie_policy, name="cookie_policy"), - re_path(r"^about$", v.about_us, name="about_us"), - re_path(r"^contact$", v.contact_support, name="contact_support"), - re_path(r"^jobs$", v.jobs, name="jobs"), - re_path(r"^cite$", v.cite, name="cite"), - re_path(r"^legal$", v.legal, name="legal"), + re_path(r"^terms$", v.static_terms_of_use, name="terms_of_use"), + re_path(r"^privacy$", v.static_privacy_policy, name="privacy_policy"), + re_path(r"^cookie-policy$", v.static_cookie_policy, name="cookie_policy"), + re_path(r"^about$", v.static_about_us, name="about_us"), + re_path(r"^contact$", v.static_contact_support, name="contact_support"), + re_path(r"^jobs$", v.static_jobs, name="jobs"), + re_path(r"^cite$", v.static_cite, name="cite"), + re_path(r"^legal$", v.static_legal, name="legal"), ] diff --git a/epdb/views.py b/epdb/views.py index 96f1422d..b66aac22 100644 --- a/epdb/views.py +++ b/epdb/views.py @@ -46,6 +46,8 @@ from .models import ( Edge, ExternalDatabase, ExternalIdentifier, + EnzymeLink, + JobLog, ) logger = logging.getLogger(__name__) @@ -756,8 +758,8 @@ def package_models(request, package_uuid): context["unreviewed_objects"] = unreviewed_model_qs context["model_types"] = { - "ML Relative Reasoning": "ml-relative-reasoning", - "Rule Based Relative Reasoning": "rule-based-relative-reasoning", + "ML Relative Reasoning": "mlrr", + "Rule Based Relative Reasoning": "rbrr", } if s.FLAGS.get("ENVIFORMER", False): @@ -777,69 +779,67 @@ def package_models(request, package_uuid): model_type = request.POST.get("model-type") + # Generic fields for ML and Rule Based + rule_packages = request.POST.getlist("model-rule-packages") + data_packages = request.POST.getlist("model-data-packages") + + # Generic params + params = { + "package": current_package, + "name": name, + "description": description, + "data_packages": [ + PackageManager.get_package_by_url(current_user, p) for p in data_packages + ], + } + if model_type == "enviformer": - threshold = float(request.POST.get(f"{model_type}-threshold", 0.5)) + threshold = float(request.POST.get("model-threshold", 0.5)) + params["threshold"] = threshold - mod = EnviFormer.create(current_package, name, description, threshold) + mod = EnviFormer.create(**params) + elif model_type == "mlrr": + # ML Specific + threshold = float(request.POST.get("model-threshold", 0.5)) + # TODO handle additional fingerprinter + # fingerprinter = request.POST.get("model-fingerprinter") - elif model_type == "ml-relative-reasoning" or model_type == "rule-based-relative-reasoning": - # Generic fields for ML and Rule Based - rule_packages = request.POST.getlist("package-based-relative-reasoning-rule-packages") - data_packages = request.POST.getlist("package-based-relative-reasoning-data-packages") - eval_packages = request.POST.getlist( - "package-based-relative-reasoning-evaluation-packages", [] - ) + params["rule_packages"] = [ + PackageManager.get_package_by_url(current_user, p) for p in rule_packages + ] - # Generic params - params = { - "package": current_package, - "name": name, - "description": description, - "rule_packages": [ - PackageManager.get_package_by_url(current_user, p) for p in rule_packages - ], - "data_packages": [ - PackageManager.get_package_by_url(current_user, p) for p in data_packages - ], - "eval_packages": [ - PackageManager.get_package_by_url(current_user, p) for p in eval_packages - ], - } + # App Domain related parameters + build_ad = request.POST.get("build-app-domain", False) == "on" + num_neighbors = request.POST.get("num-neighbors", 5) + reliability_threshold = request.POST.get("reliability-threshold", 0.5) + local_compatibility_threshold = request.POST.get("local-compatibility-threshold", 0.5) - if model_type == "ml-relative-reasoning": - # ML Specific - threshold = float(request.POST.get(f"{model_type}-threshold", 0.5)) - # TODO handle additional fingerprinter - # fingerprinter = request.POST.get(f"{model_type}-fingerprinter") + params["threshold"] = threshold + # params['fingerprinter'] = fingerprinter + params["build_app_domain"] = build_ad + params["app_domain_num_neighbours"] = num_neighbors + params["app_domain_reliability_threshold"] = reliability_threshold + params["app_domain_local_compatibility_threshold"] = local_compatibility_threshold - # App Domain related parameters - build_ad = request.POST.get("build-app-domain", False) == "on" - num_neighbors = request.POST.get("num-neighbors", 5) - reliability_threshold = request.POST.get("reliability-threshold", 0.5) - local_compatibility_threshold = request.POST.get( - "local-compatibility-threshold", 0.5 - ) + mod = MLRelativeReasoning.create(**params) + elif model_type == "rbrr": + params["rule_packages"] = [ + PackageManager.get_package_by_url(current_user, p) for p in rule_packages + ] - params["threshold"] = threshold - # params['fingerprinter'] = fingerprinter - params["build_app_domain"] = build_ad - params["app_domain_num_neighbours"] = num_neighbors - params["app_domain_reliability_threshold"] = reliability_threshold - params["app_domain_local_compatibility_threshold"] = local_compatibility_threshold - - mod = MLRelativeReasoning.create(**params) - else: - mod = RuleBasedRelativeReasoning.create(**params) - - from .tasks import build_model - - build_model.delay(mod.pk) + mod = RuleBasedRelativeReasoning.create(**params) + elif s.FLAGS.get("PLUGINS", False) and model_type in s.CLASSIFIER_PLUGINS.values(): + pass else: return error( request, "Invalid model type.", f'Model type "{model_type}" is not supported."' ) - return redirect(mod.url) + from .tasks import dispatch, build_model + + dispatch(current_user, build_model, mod.pk) + + return redirect(mod.url) else: return HttpResponseNotAllowed(["GET", "POST"]) @@ -867,6 +867,10 @@ def package_model(request, package_uuid, model_uuid): return JsonResponse({"error": f'"{smiles}" is not a valid SMILES'}, status=400) if classify: + from epdb.tasks import dispatch_eager, predict_simple + + res = dispatch_eager(current_user, predict_simple, current_model.pk, stand_smiles) + pred_res = current_model.predict(stand_smiles) res = [] @@ -911,9 +915,25 @@ def package_model(request, package_uuid, model_uuid): current_model.delete() return redirect(current_package.url + "/model") elif hidden == "evaluate": - from .tasks import evaluate_model + from .tasks import dispatch, evaluate_model + + eval_type = request.POST.get("model-evaluation-type") + + if eval_type not in ["sg", "mg"]: + return error( + request, + "Invalid evaluation type", + f'Evaluation type "{eval_type}" is not supported. Only "sg" and "mg" are supported.', + ) + + multigen = eval_type == "mg" + + eval_packages = request.POST.getlist("model-evaluation-packages") + eval_package_ids = [ + PackageManager.get_package_by_url(current_user, p).id for p in eval_packages + ] + dispatch(current_user, evaluate_model, current_model.pk, multigen, eval_package_ids) - evaluate_model.delay(current_model.pk) return redirect(current_model.url) else: return HttpResponseBadRequest() @@ -1253,7 +1273,16 @@ def package_compound_structures(request, package_uuid, compound_uuid): structure_smiles = request.POST.get("structure-smiles") structure_description = request.POST.get("structure-description") - cs = current_compound.add_structure(structure_smiles, structure_name, structure_description) + try: + cs = current_compound.add_structure( + structure_smiles, structure_name, structure_description + ) + except ValueError: + return error( + request, + "Adding structure failed!", + "The structure could not be added as normalized structures don't match!", + ) return redirect(cs.url) @@ -1456,12 +1485,20 @@ def package_rule(request, package_uuid, rule_uuid): logger.info( f"Rule {current_rule.uuid} returned multiple product sets on {smiles}, picking the first one." ) - - smirks = f"{stand_smiles}>>{'.'.join(sorted(res[0]))}" + # Some Rules are touching unrelated areas which might result in ~ indicating + # any bond (-, =, #). For drawing we need a concrete bond. -> use single bond + product_smiles = [x.replace("~", "-") for x in res[0]] + smirks = f"{stand_smiles}>>{'.'.join(sorted(product_smiles))}" # Usually the functional groups are a mapping of fg -> count # As we are doing it on the fly here fake a high count to ensure that its properly highlighted - educt_functional_groups = {x: 1000 for x in current_rule.reactants_smarts} - product_functional_groups = {x: 1000 for x in current_rule.products_smarts} + + if isinstance(current_rule, SimpleAmbitRule): + educt_functional_groups = {current_rule.reactants_smarts: 1000} + product_functional_groups = {current_rule.products_smarts: 1000} + else: + educt_functional_groups = {x: 1000 for x in current_rule.reactants_smarts} + product_functional_groups = {x: 1000 for x in current_rule.products_smarts} + return HttpResponse( IndigoUtils.smirks_to_svg( smirks, @@ -1531,6 +1568,32 @@ def package_rule(request, package_uuid, rule_uuid): return HttpResponseNotAllowed(["GET", "POST"]) +@package_permission_required() +def package_rule_enzymelink(request, package_uuid, rule_uuid, enzymelink_uuid): + current_user = _anonymous_or_real(request) + current_package = PackageManager.get_package_by_id(current_user, package_uuid) + current_rule = Rule.objects.get(package=current_package, uuid=rule_uuid) + current_enzymelink = EnzymeLink.objects.get(rule=current_rule, uuid=enzymelink_uuid) + + if request.method == "GET": + context = get_base_context(request) + + context["title"] = f"enviPath - {current_package.name} - {current_rule.name}" + + context["meta"]["current_package"] = current_package + context["object_type"] = "enzyme" + context["breadcrumbs"] = breadcrumbs( + current_package, "rule", current_rule, "enzymelink", current_enzymelink + ) + + context["enzymelink"] = current_enzymelink + context["current_object"] = current_enzymelink + + return render(request, "objects/enzymelink.html", context) + + return HttpResponseNotAllowed(["GET"]) + + @package_permission_required() def package_reactions(request, package_uuid): current_user = _anonymous_or_real(request) @@ -1768,9 +1831,9 @@ def package_pathways(request, package_uuid): pw.setting = prediction_setting pw.save() - from .tasks import predict + from .tasks import dispatch, predict - predict.delay(pw.pk, prediction_setting.pk, limit=limit) + dispatch(current_user, predict, pw.pk, prediction_setting.pk, limit=limit) return redirect(pw.url) @@ -1889,10 +1952,16 @@ def package_pathway(request, package_uuid, pathway_uuid): if node_url: n = current_pathway.get_node(node_url) - from .tasks import predict + from .tasks import dispatch, predict + + dispatch( + current_user, + predict, + current_pathway.pk, + current_pathway.setting.pk, + node_pk=n.pk, + ) - # Dont delay? - predict(current_pathway.pk, current_pathway.setting.pk, node_pk=n.pk) return JsonResponse({"success": current_pathway.url}) return HttpResponseBadRequest() @@ -1969,9 +2038,42 @@ def package_pathway_node(request, package_uuid, pathway_uuid, node_uuid): if request.method == "GET": is_image_request = request.GET.get("image") + is_highlight_request = request.GET.get("highlight", False) + is_highlight_reactivity = request.GET.get("highlightReactivity", False) if is_image_request: if is_image_request == "svg": - svg_data = current_node.as_svg + # TODO optimize this chain + if is_highlight_request: + # User functional groups covered by the model training data + fgs = {} + if current_pathway.setting: + if current_pathway.setting.model: + if current_pathway.setting.model.app_domain: + fgs = current_pathway.setting.model.app_domain.functional_groups + + svg_data = IndigoUtils.mol_to_svg( + current_node.default_node_label.smiles, functional_groups=fgs + ) + elif is_highlight_reactivity: + # Use reactant smarts to show all reaction sites + # set a high count to obtain a strong color + ad_data = current_node.get_app_domain_assessment_data() + fgs = {} + for t in ad_data.get("assessment", {}).get("transformations", []): + r = Rule.objects.get(url=t["rule"]["url"]) + + if isinstance(r, SimpleAmbitRule): + fgs[r.reactants_smarts] = 1000 + else: + for sr in r.srs: + fgs[sr.reactants_smarts] = 1000 + + svg_data = IndigoUtils.mol_to_svg( + current_node.default_node_label.smiles, functional_groups=fgs + ) + else: + svg_data = current_node.as_svg + return HttpResponse(svg_data, content_type="image/svg+xml") context = get_base_context(request) @@ -2631,6 +2733,24 @@ def setting(request, setting_uuid): pass +def jobs(request): + current_user = _anonymous_or_real(request) + context = get_base_context(request) + + if request.method == "GET": + context["object_type"] = "joblog" + context["breadcrumbs"] = [ + {"Home": s.SERVER_URL}, + {"Jobs": s.SERVER_URL + "/jobs"}, + ] + if current_user.is_superuser: + context["jobs"] = JobLog.objects.all().order_by("-created") + else: + context["jobs"] = JobLog.objects.filter(user=current_user).order_by("-created") + + return render(request, "collections/joblog.html", context) + + ########### # KETCHER # ########### @@ -2705,49 +2825,49 @@ def userinfo(request): # Static Pages -def terms_of_use(request): +def static_terms_of_use(request): context = get_base_context(request) context["title"] = "enviPath - Terms of Use" return render(request, "static/terms_of_use.html", context) -def privacy_policy(request): +def static_privacy_policy(request): context = get_base_context(request) context["title"] = "enviPath - Privacy Policy" return render(request, "static/privacy_policy.html", context) -def cookie_policy(request): +def static_cookie_policy(request): context = get_base_context(request) context["title"] = "enviPath - Cookie Policy" return render(request, "static/cookie_policy.html", context) -def about_us(request): +def static_about_us(request): context = get_base_context(request) context["title"] = "enviPath - About Us" return render(request, "static/about_us.html", context) -def contact_support(request): +def static_contact_support(request): context = get_base_context(request) context["title"] = "enviPath - Contact & Support" return render(request, "static/contact.html", context) -def jobs(request): +def static_jobs(request): context = get_base_context(request) context["title"] = "enviPath - Jobs & Careers" return render(request, "static/jobs.html", context) -def cite(request): +def static_cite(request): context = get_base_context(request) context["title"] = "enviPath - How to Cite" return render(request, "static/cite.html", context) -def legal(request): +def static_legal(request): context = get_base_context(request) context["title"] = "enviPath - Legal Information" return render(request, "static/legal.html", context) diff --git a/pyproject.toml b/pyproject.toml index 346618fd..d054e6dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ "django-oauth-toolkit>=3.0.1", "django-polymorphic>=4.1.0", "django-stubs>=5.2.4", - #"enviformer", + "enviformer", "envipy-additional-information", "envipy-ambit>=0.1.0", "envipy-plugins", @@ -31,9 +31,9 @@ dependencies = [ ] [tool.uv.sources] -#enviformer = { git = "ssh://git@git.envipath.com/enviPath/enviformer.git", rev = "v0.1.2" } +enviformer = { git = "ssh://git@git.envipath.com/enviPath/enviformer.git", rev = "v0.1.2" } envipy-plugins = { git = "ssh://git@git.envipath.com/enviPath/enviPy-plugins.git", rev = "v0.1.0" } -envipy-additional-information = { git = "ssh://git@git.envipath.com/enviPath/enviPy-additional-information.git", rev = "v0.1.4"} +envipy-additional-information = { git = "ssh://git@git.envipath.com/enviPath/enviPy-additional-information.git", rev = "v0.1.7"} envipy-ambit = { git = "ssh://git@git.envipath.com/enviPath/enviPy-ambit.git" } [dependency-groups] @@ -45,6 +45,8 @@ dev = [ [project.optional-dependencies] ms-login = ["msal>=1.33.0"] dev = [ + "celery-stubs==0.1.3", + "django-stubs>=5.2.4", "poethepoet>=0.37.0", "pre-commit>=4.3.0", "ruff>=0.13.3", diff --git a/static/images/UoA-Logo-Primary-RGB-Large.png b/static/images/UoA-Logo-Primary-RGB-Large.png new file mode 100644 index 00000000..29b2a10d Binary files /dev/null and b/static/images/UoA-Logo-Primary-RGB-Large.png differ diff --git a/static/images/UoA-Logo-Primary-RGB-Reversed-Large.png b/static/images/UoA-Logo-Primary-RGB-Reversed-Large.png new file mode 100644 index 00000000..7714bd63 Binary files /dev/null and b/static/images/UoA-Logo-Primary-RGB-Reversed-Large.png differ diff --git a/static/images/UoA-Logo-Primary-RGB-Reversed-Small.png b/static/images/UoA-Logo-Primary-RGB-Reversed-Small.png new file mode 100644 index 00000000..7c48c2de Binary files /dev/null and b/static/images/UoA-Logo-Primary-RGB-Reversed-Small.png differ diff --git a/static/images/UoA-Logo-Primary-RGB-Small.png b/static/images/UoA-Logo-Primary-RGB-Small.png new file mode 100644 index 00000000..8d9ff890 Binary files /dev/null and b/static/images/UoA-Logo-Primary-RGB-Small.png differ diff --git a/static/images/uzh-logo.svg b/static/images/uzh-logo.svg index 666c057b..4fa27b88 100644 --- a/static/images/uzh-logo.svg +++ b/static/images/uzh-logo.svg @@ -1,6 +1,488 @@ - + - - + + diff --git a/static/js/pps.js b/static/js/pps.js index abc92695..c0829ad1 100644 --- a/static/js/pps.js +++ b/static/js/pps.js @@ -646,8 +646,8 @@ function handleAssessmentResponse(depict_url, data) { var reactivityCentersImgSrc = null; if (data['assessment']['node'] !== undefined) { - functionalGroupsImgSrc = ""; - reactivityCentersImgSrc = "" + functionalGroupsImgSrc = ""; + reactivityCentersImgSrc = "" } else { functionalGroupsImgSrc = ""; reactivityCentersImgSrc = "" @@ -784,4 +784,4 @@ function handleAssessmentResponse(depict_url, data) { $("#appDomainAssessmentResultTable").append(res); -} \ No newline at end of file +} diff --git a/static/js/pw.js b/static/js/pw.js index a335f951..907f28e0 100644 --- a/static/js/pw.js +++ b/static/js/pw.js @@ -444,6 +444,13 @@ function serializeSVG(svgElement) { line.setAttribute("fill", style.fill); }); + svgElement.querySelectorAll("line.link_no_arrow").forEach(line => { + const style = getComputedStyle(line); + line.setAttribute("stroke", style.stroke); + line.setAttribute("stroke-width", style.strokeWidth); + line.setAttribute("fill", style.fill); + }); + const serializer = new XMLSerializer(); let svgString = serializer.serializeToString(svgElement); @@ -455,7 +462,26 @@ function serializeSVG(svgElement) { return svgString; } +function shrinkSVG(svgSelector) { + + const svg = d3.select(svgSelector); + const node = svg.node(); + + // Compute bounding box of everything inside the SVG + const bbox = node.getBBox(); + + const padding = 10; + svg.attr("viewBox", + `${bbox.x - padding} ${bbox.y - padding} ${bbox.width + 2 * padding} ${bbox.height + 2 * padding}` + ) + .attr("width", bbox.width + 2 * padding) + .attr("height", bbox.height + 2 * padding); + + return bbox; +} + function downloadSVG(svgElement, filename = 'chart.svg') { + shrinkSVG("#" + svgElement.id); const svgString = serializeSVG(svgElement); const blob = new Blob([svgString], {type: 'image/svg+xml;charset=utf-8'}); const url = URL.createObjectURL(blob); diff --git a/templates/collections/joblog.html b/templates/collections/joblog.html new file mode 100644 index 00000000..7075e08e --- /dev/null +++ b/templates/collections/joblog.html @@ -0,0 +1,71 @@ +{% extends "framework.html" %} +{% load static %} +{% load envipytags %} +{% block content %} + +
+
+
+ Jobs +
+
+

+ Job Logs Desc +

+ +
+ +
+

+ + Jobs + +

+
+
+
+ + + + + + + + + + + {% for job in jobs %} + + + + + + + {% if job.task_result and job.task_result|is_url == True %} + + {% elif job.task_result %} + + {% else %} + + {% endif %} + + {% endfor %} + +
IDNameStatusQueuedDoneResult
{{ job.task_id }}{{ job.job_name }}{{ job.status }}{{ job.created }}{{ job.done_at }}Result{{ job.task_result|slice:"40" }}...Empty
+
+
+ + + +
+
+{% endblock content %} diff --git a/templates/framework.html b/templates/framework.html index e5fca12a..d2ace179 100644 --- a/templates/framework.html +++ b/templates/framework.html @@ -242,21 +242,23 @@
diff --git a/templates/modals/collections/new_model_modal.html b/templates/modals/collections/new_model_modal.html index b58a65ed..b5e903b6 100644 --- a/templates/modals/collections/new_model_modal.html +++ b/templates/modals/collections/new_model_modal.html @@ -18,113 +18,117 @@ prediction. You just need to set a name and the packages you want the object to be based on. There are multiple types of models available. For additional information have a look at our - wiki >> + wiki + >> + + + + + - -
- - - {% for obj in meta.readable_packages %} - {% if obj.reviewed %} - - {% endif %} + {% if obj.reviewed %} + + {% endif %} {% endfor %} {% for obj in meta.readable_packages %} - {% if not obj.reviewed %} - - {% endif %} + {% if not obj.reviewed %} + + {% endif %} {% endfor %} - - - - -
- - - - {% if meta.enabled_features.PLUGINS and additional_descriptors %} - - - - {% endif %} - - - -
- {% if meta.enabled_features.APPLICABILITY_DOMAIN %} - -
- -
- - {% endif %}
- -
- - + + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ +
+ {% if meta.enabled_features.APPLICABILITY_DOMAIN %} + +
+ +
+ + {% endif %}
@@ -137,53 +141,47 @@ diff --git a/templates/modals/objects/evaluate_model_modal.html b/templates/modals/objects/evaluate_model_modal.html index a42c68bb..1d4b3801 100644 --- a/templates/modals/objects/evaluate_model_modal.html +++ b/templates/modals/objects/evaluate_model_modal.html @@ -17,10 +17,10 @@ For evaluation, you need to select the packages you want to use. While the model is evaluating, you can use the model for predictions. - - - {% for obj in meta.readable_packages %} {% if obj.reviewed %} @@ -35,7 +35,16 @@ {% endif %} {% endfor %} - + + + + + + @@ -65,7 +66,7 @@ $('#set_scenario_modal_form_submit').on('click', function (e) { e.preventDefault(); if ($('#scenario-select').val().length == 0) { - $('#scenario-select').val(['']) + $('#scenario-select').val("") } $('#set_scenario_modal_form').submit(); }); diff --git a/templates/objects/composite_rule.html b/templates/objects/composite_rule.html index 4e5aaafe..e27e8a00 100644 --- a/templates/objects/composite_rule.html +++ b/templates/objects/composite_rule.html @@ -29,7 +29,7 @@

- {{ rule.description }} + {{ rule.description|safe }}

@@ -87,19 +87,41 @@ {% endif %} - -
-

- EC Numbers -

-
-
-
- + {% if rule.enzymelinks %} + +
+

+ EC Numbers +

-
- +
+
+ {% for k, v in rule.get_grouped_enzymelinks.items %} + + + {% endfor %} +
+
+ {% endif %}
{% endblock content %} diff --git a/templates/objects/compound.html b/templates/objects/compound.html index 98083ca6..90e3e1db 100644 --- a/templates/objects/compound.html +++ b/templates/objects/compound.html @@ -183,7 +183,7 @@
- {% if compound.get_pubchem_identifiers %} + {% if compound.get_pubchem_compound_identifiers %}

@@ -193,12 +193,28 @@

- {% for eid in compound.get_pubchem_identifiers %} + {% for eid in compound.get_pubchem_compound_identifiers %} CID{{ eid.identifier_value }} {% endfor %}
{% endif %} + {% if compound.get_pubchem_substance_identifiers %} + +
+ {% for eid in compound.get_pubchem_substance_identifiers %} + SID{{ eid.identifier_value }} + {% endfor %} +
+ {% endif %} {% if compound.get_chebi_identifiers %}
diff --git a/templates/objects/enzymelink.html b/templates/objects/enzymelink.html new file mode 100644 index 00000000..464af8ae --- /dev/null +++ b/templates/objects/enzymelink.html @@ -0,0 +1,105 @@ +{% extends "framework.html" %} + +{% block content %} + +
+
+
+ {{ enzymelink.ec_number }} +
+ + +
+

+ Enzyme Name +

+
+
+
+ {{ enzymelink.name }} +
+
+ + +
+

+ Linking Method +

+
+
+
+ {{ enzymelink.linking_method }}.  Learn more >> +
+
+ + {% if enzymelink.kegg_reaction_links %} + +
+
+ {% for kl in enzymelink.kegg_reaction_links %} + {{ kl.identifier_value }} + {% endfor %} +
+
+ {% endif %} + + {% if enzymelink.reaction_evidence.all %} + +
+
+ {% for r in enzymelink.reaction_evidence.all %} + {{ r.name }} ({{ r.package.name }}) + {% endfor %} +
+
+ {% endif %} + + {% if enzymelink.edge_evidence.all %} + +
+
+ {% for e in enzymelink.edge_evidence.all %} + {{ e.pathway.name }} + ({{ r.package.name }}) + {% endfor %} +
+
+ {% endif %} + + + + + +
+
+{% endblock content %} diff --git a/templates/objects/model.html b/templates/objects/model.html index b64bee39..4cf4acd2 100644 --- a/templates/objects/model.html +++ b/templates/objects/model.html @@ -117,7 +117,7 @@ {% endif %} - {% if model.app_domain %} + {% if model.ready_for_prediction and model.app_domain %}

diff --git a/templates/objects/pathway.html b/templates/objects/pathway.html index 40f2d807..4e4cc27d 100644 --- a/templates/objects/pathway.html +++ b/templates/objects/pathway.html @@ -177,9 +177,6 @@
- {% if debug %} - - {% endif %} diff --git a/templates/objects/reaction.html b/templates/objects/reaction.html index 2a026d22..1335a6d7 100644 --- a/templates/objects/reaction.html +++ b/templates/objects/reaction.html @@ -124,6 +124,23 @@
{% endif %} + {% if reaction.get_related_enzymes %} + +
+

+ EC Numbers +

+
+
+
+ {% for e in reaction.get_related_enzymes %} + {{ e.name }} + {% endfor %} +
+
+ {% endif %} + {% if reaction.related_pathways %}
diff --git a/templates/objects/simple_rule.html b/templates/objects/simple_rule.html index cc55d856..a84751a8 100644 --- a/templates/objects/simple_rule.html +++ b/templates/objects/simple_rule.html @@ -201,6 +201,43 @@

{% endif %} + + {% if rule.enzymelinks %} + +
+

+ EC Numbers +

+
+
+
+ {% for k, v in rule.get_grouped_enzymelinks.items %} + + + {% endfor %} +
+
+ {% endif %}
{% endblock content %} diff --git a/tests/test_enviformer.py b/tests/test_enviformer.py index 1a688cb1..647433fc 100644 --- a/tests/test_enviformer.py +++ b/tests/test_enviformer.py @@ -1,7 +1,27 @@ +from collections import defaultdict +from datetime import datetime from tempfile import TemporaryDirectory from django.test import TestCase, tag from epdb.logic import PackageManager -from epdb.models import User, EnviFormer, Package +from epdb.models import User, EnviFormer, Package, Setting +from epdb.tasks import predict_simple, predict + + +def measure_predict(mod, pathway_pk=None): + # Measure and return the prediction time + start = datetime.now() + if pathway_pk: + s = Setting() + s.model = mod + s.model_threshold = 0.2 + s.max_depth = 4 + s.max_nodes = 20 + s.save() + pred_result = predict.delay(pathway_pk, s.pk, limit=s.max_depth) + else: + pred_result = predict_simple.delay(mod.pk, "C1=CC=C(CSCC2=CC=CC=C2)C=C1") + _ = pred_result.get() + return round((datetime.now() - start).total_seconds(), 2) @tag("slow") @@ -28,8 +48,41 @@ class EnviFormerTest(TestCase): mod.build_dataset() mod.build_model() - mod.multigen_eval = True - mod.save() - mod.evaluate_model() + mod.evaluate_model(True, eval_packages_objs) mod.predict("CCN(CC)C(=O)C1=CC(=CC=C1)C") + + def test_predict_runtime(self): + with TemporaryDirectory() as tmpdir: + with self.settings(MODEL_DIR=tmpdir): + threshold = float(0.5) + data_package_objs = [self.BBD_SUBSET] + eval_packages_objs = [self.BBD_SUBSET] + mods = [] + for _ in range(4): + mod = EnviFormer.create( + self.package, data_package_objs, eval_packages_objs, threshold=threshold + ) + mod.build_dataset() + mod.build_model() + mods.append(mod) + + # Test prediction time drops after first prediction + times = [measure_predict(mods[0]) for _ in range(5)] + print(f"First prediction took {times[0]} seconds, subsequent ones took {times[1:]}") + + # Test pathway prediction + times = [measure_predict(mods[1], self.BBD_SUBSET.pathways[0].pk) for _ in range(5)] + print( + f"First pathway prediction took {times[0]} seconds, subsequent ones took {times[1:]}" + ) + + # Test eviction by performing three prediction with every model, twice. + times = defaultdict(list) + for _ in range( + 2 + ): # Eviction should cause the second iteration here to have to reload the models + for mod in mods: + for _ in range(3): + times[mod.pk].append(measure_predict(mod)) + print(times) diff --git a/tests/test_model.py b/tests/test_model.py index e46046ec..f0355be9 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -30,7 +30,6 @@ class ModelTest(TestCase): self.package, rule_package_objs, data_package_objs, - eval_packages_objs, threshold=threshold, name="ECC - BBD - 0.5", description="Created MLRelativeReasoning in Testcase", @@ -50,9 +49,7 @@ class ModelTest(TestCase): mod.build_dataset() mod.build_model() - mod.multigen_eval = True - mod.save() - mod.evaluate_model() + mod.evaluate_model(True, eval_packages_objs) results = mod.predict("CCN(CC)C(=O)C1=CC(=CC=C1)C") diff --git a/tests/views/test_model_views.py b/tests/views/test_model_views.py index 558277f5..10cbefe2 100644 --- a/tests/views/test_model_views.py +++ b/tests/views/test_model_views.py @@ -6,7 +6,7 @@ from epdb.logic import UserManager from epdb.models import Package, User -@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models") +@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True) class PathwayViewTest(TestCase): fixtures = ["test_fixtures_incl_model.jsonl.gz"] diff --git a/tests/views/test_pathway_views.py b/tests/views/test_pathway_views.py index 9e64e22f..b5fe99fd 100644 --- a/tests/views/test_pathway_views.py +++ b/tests/views/test_pathway_views.py @@ -6,7 +6,7 @@ from epdb.logic import UserManager, PackageManager from epdb.models import Pathway, Edge -@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models") +@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True) class PathwayViewTest(TestCase): fixtures = ["test_fixtures_incl_model.jsonl.gz"] diff --git a/utilities/chem.py b/utilities/chem.py index 6de46147..279de26f 100644 --- a/utilities/chem.py +++ b/utilities/chem.py @@ -729,6 +729,7 @@ class IndigoUtils(object): height: int = 0, educt_functional_groups: Dict[str, int] = None, product_functional_groups: Dict[str, int] = None, + debug: bool = False, ): if educt_functional_groups is None: educt_functional_groups = {} @@ -739,6 +740,11 @@ class IndigoUtils(object): i = Indigo() renderer = IndigoRenderer(i) + if debug: + i.setOption("render-atom-ids-visible", True) + i.setOption("render-bond-ids-visible", False) + i.setOption("render-atom-bond-ids-from-one", True) + i.setOption("render-output-format", "svg") i.setOption("render-coloring", True) i.setOption("render-image-size", width, height)