App Domain Pathway Prediction (#47)

Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#47
2025-08-19 02:53:56 +12:00
parent 3308d47071
commit c3c1d4f5cf
9 changed files with 424 additions and 178 deletions
--- a/epdb/logic.py
+++ b/epdb/logic.py
@ -895,9 +895,10 @@ class SearchManager(object):

 class SNode(object):

-    def __init__(self, smiles: str, depth: int):
+    def __init__(self, smiles: str, depth: int, app_domain_assessment: dict = None):
        self.smiles = smiles
        self.depth = depth
+        self.app_domain_assessment = app_domain_assessment

    def __hash__(self):
        return hash(self.smiles)
@ -1040,7 +1041,7 @@ class SPathway(object):
    def depth(self):
        return max([v.depth for v in self.smiles_to_node.values()])

-    def _get_nodes_for_depth(self, depth: int):
+    def _get_nodes_for_depth(self, depth: int) -> List[SNode]:
        if depth == 0:
            return self.root_nodes

@ -1051,7 +1052,7 @@ class SPathway(object):

        return sorted(res, key=lambda x: x.smiles)

-    def _get_edges_for_depth(self, depth: int):
+    def _get_edges_for_depth(self, depth: int) -> List[SEdge]:
        res = []
        for e in self.edges:
            for n in e.educts:
@ -1076,15 +1077,44 @@ class SPathway(object):
        new_tp = False
        if substrates:
            for sub in substrates:
+
+                if sub.app_domain_assessment is None:
+                    if self.prediction_setting.model:
+                        if self.prediction_setting.model.app_domain:
+                            app_domain_assessment = self.prediction_setting.model.app_domain.assess(sub.smiles)[0]
+
+                            if self.persist is not None:
+                                n = self.snode_persist_lookup[sub]
+
+                                assert n.id is not None, "Node has no id! Should have been saved already... aborting!"
+                                node_data = n.simple_json()
+                                node_data['image'] = f"{n.url}?image=svg"
+                                app_domain_assessment['assessment']['node'] = node_data
+
+                                n.kv['app_domain_assessment'] = app_domain_assessment
+                                n.save()
+
+                            sub.app_domain_assessment = app_domain_assessment
+
+
                candidates = self.prediction_setting.expand(self, sub)
+                # candidates is a List of PredictionResult. The length of the List is equal to the number of rules
                for cand_set in candidates:
                    if cand_set:
                        new_tp = True
+                        # cand_set is a PredictionResult object that can consist of multiple candidate reactions
                        for cand in cand_set:
                            cand_nodes = []
+                            # candidate reactions can have multiple fragments
                            for c in cand:
                                if c not in self.smiles_to_node:
-                                    self.smiles_to_node[c] = SNode(c, sub.depth + 1)
+                                    # For new nodes do an AppDomain Assessment if an AppDomain is attached
+                                    app_domain_assessment = None
+                                    if self.prediction_setting.model:
+                                        if self.prediction_setting.model.app_domain:
+                                            app_domain_assessment = self.prediction_setting.model.app_domain.assess(c)[0]
+
+                                    self.smiles_to_node[c] = SNode(c, sub.depth + 1, app_domain_assessment)

                                node = self.smiles_to_node[c]
                                cand_nodes.append(node)
@ -1097,18 +1127,30 @@ class SPathway(object):
        if len(substrates) == 0 or from_node is not None:
            self.done = True

-        # Check if we need to write back data to database
+        # Check if we need to write back data to the database
        if new_tp and self.persist:
            self._sync_to_pathway()
-            # call save to update internal modified field
+            # call save to update the internal modified field
            self.persist.save()

-    def _sync_to_pathway(self):
+    def _sync_to_pathway(self) -> None:
        logger.info("Updating Pathway with SPathway")

        for snode in self.smiles_to_node.values():
            if snode not in self.snode_persist_lookup:
                n = Node.create(self.persist, snode.smiles, snode.depth)
+
+                if snode.app_domain_assessment is not None:
+                    app_domain_assessment = snode.app_domain_assessment
+
+                    assert n.id is not None, "Node has no id! Should have been saved already... aborting!"
+                    node_data = n.simple_json()
+                    node_data['image'] = f"{n.url}?image=svg"
+                    app_domain_assessment['assessment']['node'] = node_data
+
+                    n.kv['app_domain_assessment'] = app_domain_assessment
+                    n.save()
+
                self.snode_persist_lookup[snode] = n

        for sedge in self.edges:
@ -1130,7 +1172,6 @@ class SPathway(object):
                self.sedge_persist_lookup[sedge] = e

        logger.info("Update done!")
-        pass

    def to_json(self):
        nodes = []
--- a/epdb/models.py
+++ b/epdb/models.py
@ -912,7 +912,8 @@ class Pathway(EnviPathModel, AliasMixin, ScenarioMixin):
                    'reaction_probability': link['reaction_probability'],
                    'scenarios': link['scenarios'],
                    'source': node_url_to_idx[link['start_node_urls'][0]],
-                    'target': pseudo_idx
+                    'target': pseudo_idx,
+                    'app_domain': link.get('app_domain', None)
                }
                adjusted_links.append(new_link)

@ -927,7 +928,8 @@ class Pathway(EnviPathModel, AliasMixin, ScenarioMixin):
                        'reaction_probability': link['reaction_probability'],
                        'scenarios': link['scenarios'],
                        'source': pseudo_idx,
-                        'target': node_url_to_idx[target]
+                        'target': node_url_to_idx[target],
+                        'app_domain': link.get('app_domain', None)
                    }
                    adjusted_links.append(new_link)

@ -1044,6 +1046,8 @@ class Node(EnviPathModel, AliasMixin, ScenarioMixin):
        return '{}/node/{}'.format(self.pathway.url, self.uuid)

    def d3_json(self):
+        app_domain_data = self.get_app_domain_assessment_data()
+
        return {
            "depth": self.depth,
            "url": self.url,
@ -1053,6 +1057,10 @@ class Node(EnviPathModel, AliasMixin, ScenarioMixin):
            "name": self.default_node_label.name,
            "smiles": self.default_node_label.smiles,
            "scenarios": [{'name': s.name, 'url': s.url} for s in self.scenarios.all()],
+            "app_domain": {
+                'inside_app_domain': app_domain_data['assessment']['inside_app_domain'] if app_domain_data else None,
+                'uncovered_functional_groups': False,
+            }
        }

    @staticmethod
@ -1078,6 +1086,32 @@ class Node(EnviPathModel, AliasMixin, ScenarioMixin):
    def as_svg(self):
        return IndigoUtils.mol_to_svg(self.default_node_label.smiles)

+    def get_app_domain_assessment_data(self):
+        data = self.kv.get('app_domain_assessment', None)
+
+        if data:
+            rule_ids = dict()
+            for e in Edge.objects.filter(start_nodes__in=[self]):
+                for r in e.edge_label.rules.all():
+                    rule_ids[str(r.uuid)] = e
+
+
+            for t in data['assessment']['transformations']:
+                if t['rule']['uuid'] in rule_ids:
+                    t['is_predicted'] = True
+                    t['edge'] = rule_ids[t['rule']['uuid']].simple_json()
+
+        return data
+
+
+    def simple_json(self, include_description=False):
+        res = super().simple_json()
+        name = res.get('name', None)
+        if name == 'no name':
+            res['name'] = self.default_node_label.name
+
+        return res
+

 class Edge(EnviPathModel, AliasMixin, ScenarioMixin):
    pathway = models.ForeignKey('epdb.Pathway', verbose_name='belongs to', on_delete=models.CASCADE, db_index=True)
@ -1090,19 +1124,44 @@ class Edge(EnviPathModel, AliasMixin, ScenarioMixin):
        return '{}/edge/{}'.format(self.pathway.url, self.uuid)

    def d3_json(self):
-        return {
+        edge_json = {
            'name': self.name,
            'id': self.url,
            'url': self.url,
            'image': self.url + '?image=svg',
            'reaction': {'name': self.edge_label.name, 'url': self.edge_label.url } if self.edge_label else None,
            'reaction_probability': self.kv.get('probability'),
-            # TODO
            'start_node_urls': [x.url for x in self.start_nodes.all()],
            'end_node_urls': [x.url for x in self.end_nodes.all()],
            "scenarios": [{'name': s.name, 'url': s.url} for s in self.scenarios.all()],
        }

+        for n in self.start_nodes.all():
+            app_domain_data = n.get_app_domain_assessment_data()
+
+            if app_domain_data:
+                for t in app_domain_data['assessment']['transformations']:
+                    if 'edge' in t and t['edge']['uuid'] == str(self.uuid):
+                        passes_app_domain = (
+                                t['local_compatibility'] >= app_domain_data['ad_params']['local_compatibility_threshold']
+                        ) and (
+                            t['reliability'] >= app_domain_data['ad_params']['reliability_threshold']
+                        )
+
+                        edge_json['app_domain'] = {
+                            'passes_app_domain': passes_app_domain,
+                            'local_compatibility': t['local_compatibility'],
+                            'local_compatibility_threshold': app_domain_data['ad_params']['local_compatibility_threshold'],
+                            'reliability': t['reliability'],
+                            'reliability_threshold': app_domain_data['ad_params']['reliability_threshold'],
+                            'times_triggered': t['times_triggered'],
+                        }
+
+                        break
+
+        return edge_json
+
+
    @staticmethod
    def create(pathway, start_nodes: List[Node], end_nodes: List[Node], rule: Optional[Rule] = None, name: Optional[str] = None,
               description: Optional[str] = None):
@ -1136,6 +1195,14 @@ class Edge(EnviPathModel, AliasMixin, ScenarioMixin):
    def as_svg(self):
        return self.edge_label.as_svg if self.edge_label else None

+    def simple_json(self, include_description=False):
+        res = super().simple_json()
+        name = res.get('name', None)
+        if name == 'no name':
+            res['name'] = self.edge_label.name
+
+        return res
+

 class EPModel(PolymorphicModel, EnviPathModel):
    package = models.ForeignKey('epdb.Package', verbose_name='Package', on_delete=models.CASCADE, db_index=True)
@ -1463,6 +1530,7 @@ class MLRelativeReasoning(EPModel):

        return res

+
 class ApplicabilityDomain(EnviPathModel):
    model = models.ForeignKey(MLRelativeReasoning, on_delete=models.CASCADE)

@ -1614,7 +1682,7 @@ class ApplicabilityDomain(EnviPathModel):
                    'model': self.model.simple_json(),
                    'num_neighbours': self.num_neighbours,
                    'reliability_threshold': self.reliability_threshold,
-                    'local_compatibilty_threshold': self.local_compatibilty_threshold,
+                    'local_compatibility_threshold': self.local_compatibilty_threshold,
                },
                'assessment': {
                    'smiles': smiles,
--- a/epdb/tasks.py
+++ b/epdb/tasks.py
@ -58,7 +58,7 @@ def predict(pw_pk: int, pred_setting_pk: int, limit: Optional[int] = None, node_
                spw.predict_step(from_depth=level)
                level += 1

-                # break in case we are in incremental model
+                # break in case we are in incremental mode
                if limit != -1:
                    if level >= limit:
                        break
--- a/epdb/views.py
+++ b/epdb/views.py
@ -1454,6 +1454,7 @@ def package_pathway_node(request, package_uuid, pathway_uuid, node_uuid):
        ]

        context['node'] = current_node
+        context['app_domain_assessment_data'] = json.dumps(current_node.get_app_domain_assessment_data())

        return render(request, 'objects/node.html', context)