[Feature] Identify Missing Rules (#177)

Fixes #97 Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#177
2025-10-30 00:47:45 +13:00
parent f1b4c5aadb
commit 13ed86a780
7 changed files with 361 additions and 29 deletions
--- a/epdb/tasks.py
+++ b/epdb/tasks.py
@ -1,13 +1,15 @@
+import csv
+import io
 import logging
 from datetime import datetime
-from typing import Callable, Optional
+from typing import Any, Callable, List, Optional
 from uuid import uuid4

 from celery import shared_task
 from celery.utils.functional import LRUCache

 from epdb.logic import SPathway
-from epdb.models import EPModel, JobLog, Node, Package, Pathway, Setting, User
+from epdb.models import EPModel, JobLog, Node, Package, Pathway, Rule, Setting, User, Edge

 logger = logging.getLogger(__name__)
 ML_CACHE = LRUCache(3)  # Cache the three most recent ML models to reduce load times.
@ -186,3 +188,96 @@ def predict(
        JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=pw.url)

    return pw.url
+
+
+@shared_task(bind=True, queue="background")
+def identify_missing_rules(
+    self,
+    pw_pks: List[int],
+    rule_package_pk: int,
+):
+    from utilities.misc import PathwayUtils
+
+    rules = Package.objects.get(pk=rule_package_pk).get_applicable_rules()
+
+    rows: List[Any] = []
+    header = [
+        "Package Name",
+        "Pathway Name",
+        "Educt Name",
+        "Educt SMILES",
+        "Reaction Name",
+        "Reaction SMIRKS",
+        "Triggered Rules",
+        "Reactant SMARTS",
+        "Product SMARTS",
+        "Product Names",
+        "Product SMILES",
+    ]
+
+    rows.append(header)
+
+    for pw in Pathway.objects.filter(pk__in=pw_pks):
+        pu = PathwayUtils(pw)
+
+        missing_rules = pu.find_missing_rules(rules)
+
+        package_name = pw.package.name
+        pathway_name = pw.name
+
+        for edge_url, rule_chain in missing_rules.items():
+            row: List[Any] = [package_name, pathway_name]
+            edge = Edge.objects.get(url=edge_url)
+            educts = edge.start_nodes.all()
+
+            for educt in educts:
+                row.append(educt.default_node_label.name)
+                row.append(educt.default_node_label.smiles)
+
+            row.append(edge.edge_label.name)
+            row.append(edge.edge_label.smirks())
+
+            rule_names = []
+            reactant_smarts = []
+            product_smarts = []
+
+            for r in rule_chain:
+                r = Rule.objects.get(url=r[0])
+                rule_names.append(r.name)
+
+                rs = r.reactants_smarts
+                if isinstance(rs, set):
+                    rs = list(rs)
+
+                ps = r.products_smarts
+                if isinstance(ps, set):
+                    ps = list(ps)
+
+                reactant_smarts.append(rs)
+                product_smarts.append(ps)
+
+            row.append(rule_names)
+            row.append(reactant_smarts)
+            row.append(product_smarts)
+
+            products = edge.end_nodes.all()
+            product_names = []
+            product_smiles = []
+
+            for product in products:
+                product_names.append(product.default_node_label.name)
+                product_smiles.append(product.default_node_label.smiles)
+
+            row.append(product_names)
+            row.append(product_smiles)
+
+            rows.append(row)
+
+    buffer = io.StringIO()
+
+    writer = csv.writer(buffer)
+    writer.writerows(rows)
+
+    buffer.seek(0)
+
+    return buffer.getvalue()
--- a/epdb/views.py
+++ b/epdb/views.py
@ -1866,6 +1866,25 @@ def package_pathway(request, package_uuid, pathway_uuid):

            return response

+        if (
+            request.GET.get("identify-missing-rules", False) == "true"
+            and request.GET.get("rule-package") is not None
+        ):
+            from .tasks import dispatch_eager, identify_missing_rules
+
+            rule_package = PackageManager.get_package_by_url(
+                current_user, request.GET.get("rule-package")
+            )
+            res = dispatch_eager(
+                current_user, identify_missing_rules, [current_pathway.pk], rule_package.pk
+            )
+
+            filename = f"{current_pathway.name.replace(' ', '_')}_{current_pathway.uuid}.csv"
+            response = HttpResponse(res, content_type="text/csv")
+            response["Content-Disposition"] = f'attachment; filename="{filename}"'
+
+            return response
+
        # Pathway d3_json() relies on a lot of related objects (Nodes, Structures, Edges, Reaction, Rules, ...)
        # we will again fetch the current pathway identified by this url, but this time together with nearly all
        # related objects