[Feature] Engineer Pathway (#256)

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#256
This commit is contained in:
2025-12-10 07:35:42 +13:00
parent 46b0f1c124
commit 648ec150a9
17 changed files with 990 additions and 127 deletions

View File

@ -1815,7 +1815,7 @@ def get_model(request, package_uuid, model_uuid, c: Query[Classify]):
from epdb.tasks import dispatch_eager, predict_simple
pred_res = dispatch_eager(request.user, predict_simple, mod.pk, stand_smiles)
_, pred_res = dispatch_eager(request.user, predict_simple, mod.pk, stand_smiles)
result = []

View File

@ -1398,6 +1398,9 @@ class SEdge(object):
self.rule = rule
self.probability = probability
def product_smiles(self):
return [p.smiles for p in self.products]
def __hash__(self):
full_hash = 0
@ -1630,6 +1633,14 @@ class SPathway(object):
# call save to update the internal modified field
self.persist.save()
def get_edge_for_educt_smiles(self, smiles: str) -> List[SEdge]:
res = []
for e in self.edges:
for n in e.educts:
if n.smiles == smiles:
res.append(e)
return res
def _sync_to_pathway(self) -> None:
logger.info("Updating Pathway with SPathway")

View File

@ -754,6 +754,30 @@ class Compound(EnviPathModel, AliasMixin, ScenarioMixin, ChemicalIdentifierMixin
@property
def normalized_structure(self) -> "CompoundStructure":
if not CompoundStructure.objects.filter(compound=self, normalized_structure=True).exists():
num_structs = self.structures.count()
stand_smiles = set()
for structure in self.structures.all():
stand_smiles.add(FormatConverter.standardize(structure.smiles))
if len(stand_smiles) != 1:
logger.debug(
f"#Structures: {num_structs} - #Standardized SMILES: {len(stand_smiles)}"
)
logger.debug(f"Couldn't infer normalized structure for {self.name} - {self.url}")
raise ValueError(
f"Couldn't find nor infer normalized structure for {self.name} ({self.url})"
)
else:
cs = CompoundStructure.create(
self,
stand_smiles.pop(),
name="Normalized structure of {}".format(self.name),
description="{} (in its normalized form)".format(self.description),
normalized_structure=True,
)
return cs
return CompoundStructure.objects.get(compound=self, normalized_structure=True)
def _url(self):
@ -901,57 +925,121 @@ class Compound(EnviPathModel, AliasMixin, ScenarioMixin, ChemicalIdentifierMixin
if self in mapping:
return mapping[self]
new_compound = Compound.objects.create(
package=target,
name=self.name,
description=self.description,
kv=self.kv.copy() if self.kv else {},
)
mapping[self] = new_compound
default_structure_smiles = self.default_structure.smiles
normalized_structure_smiles = self.normalized_structure.smiles
# Copy compound structures
for structure in self.structures.all():
if structure not in mapping:
new_structure = CompoundStructure.objects.create(
compound=new_compound,
smiles=structure.smiles,
canonical_smiles=structure.canonical_smiles,
inchikey=structure.inchikey,
normalized_structure=structure.normalized_structure,
name=structure.name,
description=structure.description,
kv=structure.kv.copy() if structure.kv else {},
)
mapping[structure] = new_structure
existing_compound = None
existing_normalized_compound = None
# Copy external identifiers for structure
for ext_id in structure.external_identifiers.all():
ExternalIdentifier.objects.create(
content_object=new_structure,
database=ext_id.database,
identifier_value=ext_id.identifier_value,
url=ext_id.url,
is_primary=ext_id.is_primary,
# Dedup check - Check if we find a direct match for a given SMILES
if CompoundStructure.objects.filter(
smiles=default_structure_smiles, compound__package=target
).exists():
existing_compound = CompoundStructure.objects.get(
smiles=default_structure_smiles, compound__package=target
).compound
# Check if we can find the standardized one
if CompoundStructure.objects.filter(
smiles=normalized_structure_smiles, compound__package=target
).exists():
existing_normalized_compound = CompoundStructure.objects.get(
smiles=normalized_structure_smiles, compound__package=target
).compound
if any([existing_compound, existing_normalized_compound]):
if existing_normalized_compound and existing_compound:
# We only have to set the mapping
mapping[self] = existing_compound
for structure in self.structures.all():
if structure not in mapping:
mapping[structure] = existing_compound.structures.get(
smiles=structure.smiles
)
return existing_compound
elif existing_normalized_compound:
mapping[self] = existing_normalized_compound
# Merge the structure into the existing compound
for structure in self.structures.all():
if existing_normalized_compound.structures.filter(
smiles=structure.smiles
).exists():
continue
# Create a new Structure
cs = CompoundStructure.create(
existing_normalized_compound,
structure.smiles,
name=structure.name,
description=structure.description,
normalized_structure=structure.normalized_structure,
)
if self.default_structure:
new_compound.default_structure = mapping.get(self.default_structure)
new_compound.save()
mapping[structure] = cs
for a in self.aliases:
new_compound.add_alias(a)
new_compound.save()
return existing_normalized_compound
# Copy external identifiers for compound
for ext_id in self.external_identifiers.all():
ExternalIdentifier.objects.create(
content_object=new_compound,
database=ext_id.database,
identifier_value=ext_id.identifier_value,
url=ext_id.url,
is_primary=ext_id.is_primary,
else:
raise ValueError(
f"Found a CompoundStructure for {default_structure_smiles} but not for {normalized_structure_smiles} in target package {target.name}"
)
else:
# Here we can safely use Compound.objects.create as we won't end up in a duplicate
new_compound = Compound.objects.create(
package=target,
name=self.name,
description=self.description,
kv=self.kv.copy() if self.kv else {},
)
mapping[self] = new_compound
# Copy underlying structures
for structure in self.structures.all():
if structure not in mapping:
new_structure = CompoundStructure.objects.create(
compound=new_compound,
smiles=structure.smiles,
canonical_smiles=structure.canonical_smiles,
inchikey=structure.inchikey,
normalized_structure=structure.normalized_structure,
name=structure.name,
description=structure.description,
kv=structure.kv.copy() if structure.kv else {},
)
mapping[structure] = new_structure
# Copy external identifiers for structure
for ext_id in structure.external_identifiers.all():
ExternalIdentifier.objects.create(
content_object=new_structure,
database=ext_id.database,
identifier_value=ext_id.identifier_value,
url=ext_id.url,
is_primary=ext_id.is_primary,
)
if self.default_structure:
new_compound.default_structure = mapping.get(self.default_structure)
new_compound.save()
for a in self.aliases:
new_compound.add_alias(a)
new_compound.save()
# Copy external identifiers for compound
for ext_id in self.external_identifiers.all():
ExternalIdentifier.objects.create(
content_object=new_compound,
database=ext_id.database,
identifier_value=ext_id.identifier_value,
url=ext_id.url,
is_primary=ext_id.is_primary,
)
return new_compound
class Meta:
@ -1112,34 +1200,44 @@ class Rule(PolymorphicModel, EnviPathModel, AliasMixin, ScenarioMixin):
rule_type = type(self)
if rule_type == SimpleAmbitRule:
new_rule = SimpleAmbitRule.objects.create(
new_rule = SimpleAmbitRule.create(
package=target,
name=self.name,
description=self.description,
smirks=self.smirks,
reactant_filter_smarts=self.reactant_filter_smarts,
product_filter_smarts=self.product_filter_smarts,
kv=self.kv.copy() if self.kv else {},
)
if self.kv:
new_rule.kv.update(**self.kv)
new_rule.save()
elif rule_type == SimpleRDKitRule:
new_rule = SimpleRDKitRule.objects.create(
new_rule = SimpleRDKitRule.create(
package=target,
name=self.name,
description=self.description,
reaction_smarts=self.reaction_smarts,
kv=self.kv.copy() if self.kv else {},
)
if self.kv:
new_rule.kv.update(**self.kv)
new_rule.save()
elif rule_type == ParallelRule:
new_rule = ParallelRule.objects.create(
package=target,
name=self.name,
description=self.description,
kv=self.kv.copy() if self.kv else {},
)
# Copy simple rules relationships
new_srs = []
for simple_rule in self.simple_rules.all():
copied_simple_rule = simple_rule.copy(target, mapping)
new_rule.simple_rules.add(copied_simple_rule)
new_srs.append(copied_simple_rule)
new_rule = ParallelRule.create(
package=target,
simple_rules=new_srs,
name=self.name,
description=self.description,
)
elif rule_type == SequentialRule:
raise ValueError("SequentialRule copy not implemented!")
else:
@ -1343,6 +1441,20 @@ class ParallelRule(Rule):
f"Simple rule {sr.uuid} does not belong to package {package.uuid}!"
)
# Deduplication check
query = ParallelRule.objects.annotate(
srs_count=Count("simple_rules", filter=Q(simple_rules__in=simple_rules), distinct=True)
)
existing_rule_qs = query.filter(
srs_count=len(simple_rules),
)
if existing_rule_qs.exists():
if existing_rule_qs.count() > 1:
logger.error(f"Found more than one reaction for given input! {existing_rule_qs}")
return existing_rule_qs.first()
r = ParallelRule()
r.package = package
@ -1524,31 +1636,44 @@ class Reaction(EnviPathModel, AliasMixin, ScenarioMixin, ReactionIdentifierMixin
if self in mapping:
return mapping[self]
# Create new reaction
new_reaction = Reaction.objects.create(
package=target,
name=self.name,
description=self.description,
multi_step=self.multi_step,
medline_references=self.medline_references,
kv=self.kv.copy() if self.kv else {},
)
mapping[self] = new_reaction
copied_reaction_educts = []
copied_reaction_products = []
copied_reaction_rules = []
# Copy educts (reactant compounds)
for educt in self.educts.all():
copied_educt = educt.copy(target, mapping)
new_reaction.educts.add(copied_educt)
copied_reaction_educts.append(copied_educt)
# Copy products
for product in self.products.all():
copied_product = product.copy(target, mapping)
new_reaction.products.add(copied_product)
copied_reaction_products.append(copied_product)
# Copy rules
for rule in self.rules.all():
copied_rule = rule.copy(target, mapping)
new_reaction.rules.add(copied_rule)
copied_reaction_rules.append(copied_rule)
new_reaction = Reaction.create(
package=target,
name=self.name,
description=self.description,
educts=copied_reaction_educts,
products=copied_reaction_products,
rules=copied_reaction_rules,
multi_step=self.multi_step,
)
if self.medline_references:
new_reaction.medline_references = self.medline_references
new_reaction.save()
if self.kv:
new_reaction.kv = self.kv
new_reaction.save()
mapping[self] = new_reaction
# Copy external identifiers
for ext_id in self.external_identifiers.all():
@ -1666,14 +1791,12 @@ class Pathway(EnviPathModel, AliasMixin, ScenarioMixin):
while len(queue):
current = queue.pop()
processed.add(current)
nodes.append(current.d3_json())
for e in self.edges:
if current in e.start_nodes.all():
for prod in e.end_nodes.all():
if prod not in queue and prod not in processed:
queue.append(prod)
for e in self.edges.filter(start_nodes=current).distinct():
for prod in e.end_nodes.all():
if prod not in queue and prod not in processed:
queue.append(prod)
# We shouldn't lose or make up nodes...
assert len(nodes) == len(self.nodes)
@ -1838,6 +1961,8 @@ class Pathway(EnviPathModel, AliasMixin, ScenarioMixin):
return mapping[self]
# Start copying the pathway
# Its safe to use .objects.create here as Pathways itself aren't
# deduplicated
new_pathway = Pathway.objects.create(
package=target,
name=self.name,
@ -1975,6 +2100,7 @@ class Node(EnviPathModel, AliasMixin, ScenarioMixin):
else None,
"uncovered_functional_groups": False,
},
"is_engineered_intermediate": self.kv.get("is_engineered_intermediate", False),
}
@staticmethod
@ -3762,23 +3888,29 @@ class JobLog(TimeStampedModel):
done_at = models.DateTimeField(null=True, blank=True, default=None)
task_result = models.TextField(null=True, blank=True, default=None)
TERMINAL_STATES = [
"SUCCESS",
"FAILURE",
"REVOKED",
"IGNORED",
]
def is_in_terminal_state(self):
return self.status in self.TERMINAL_STATES
def check_for_update(self):
if self.is_in_terminal_state():
return
async_res = self.get_result()
new_status = async_res.state
TERMINAL_STATES = [
"SUCCESS",
"FAILURE",
"REVOKED",
"IGNORED",
]
if new_status != self.status and new_status in TERMINAL_STATES:
if new_status != self.status and new_status in self.TERMINAL_STATES:
self.status = new_status
self.done_at = async_res.date_done
if new_status == "SUCCESS":
self.task_result = async_res.result
self.task_result = str(async_res.result) if async_res.result else None
self.save()
@ -3789,3 +3921,13 @@ class JobLog(TimeStampedModel):
from celery.result import AsyncResult
return AsyncResult(str(self.task_id))
def parsed_result(self):
if not self.is_in_terminal_state() or self.task_result is None:
return None
import ast
if self.job_name == "engineer_pathways":
return ast.literal_eval(self.task_result)
return self.task_result

View File

@ -36,7 +36,7 @@ def dispatch_eager(user: "User", job: Callable, *args, **kwargs):
log.task_result = str(x) if x else None
log.save()
return x
return log, x
except Exception as e:
logger.exception(e)
raise e
@ -52,7 +52,7 @@ def dispatch(user: "User", job: Callable, *args, **kwargs):
log.status = "INITIAL"
log.save()
return x.result
return log
except Exception as e:
logger.exception(e)
raise e
@ -175,6 +175,7 @@ def predict(
except Exception as e:
pw.kv.update({"status": "failed"})
pw.kv.update(**{"error": str(e)})
pw.save()
if JobLog.objects.filter(task_id=self.request.id).exists():
@ -284,3 +285,71 @@ def identify_missing_rules(
buffer.seek(0)
return buffer.getvalue()
@shared_task(bind=True, queue="background")
def engineer_pathways(self, pw_pks: List[int], setting_pk: int, target_package_pk: int):
from utilities.misc import PathwayUtils
setting = Setting.objects.get(pk=setting_pk)
# Temporarily set model_threshold to 0.0 to keep all tps
setting.model_threshold = 0.0
target = Package.objects.get(pk=target_package_pk)
intermediate_pathways = []
predicted_pathways = []
for pw in Pathway.objects.filter(pk__in=pw_pks):
pu = PathwayUtils(pw)
eng_pw, node_to_snode_mapping, intermediates = pu.engineer(setting)
# If we've found intermediates, do the following
# - Get a copy of the original pathway and add intermediates
# - Store the predicted pathway for further investigation
if len(intermediates):
copy_mapping = {}
copied_pw = pw.copy(target, copy_mapping)
copied_pw.name = f"{copied_pw.name} (Engineered)"
copied_pw.description = f"The original Pathway can be found here: {pw.url}"
copied_pw.save()
for inter in intermediates:
start = copy_mapping[inter[0]]
end = copy_mapping[inter[1]]
start_snode = inter[2]
end_snode = inter[3]
for idx, intermediate_edge in enumerate(inter[4]):
smiles_to_node = {}
snodes_to_create = list(
set(intermediate_edge.educts + intermediate_edge.products)
)
for snode in snodes_to_create:
if snode == start_snode or snode == end_snode:
smiles_to_node[snode.smiles] = start if snode == start_snode else end
continue
if snode.smiles not in smiles_to_node:
n = Node.create(copied_pw, smiles=snode.smiles, depth=snode.depth)
# Used in viz to highlight intermediates
n.kv.update({"is_engineered_intermediate": True})
n.save()
smiles_to_node[snode.smiles] = n
Edge.create(
copied_pw,
[smiles_to_node[educt.smiles] for educt in intermediate_edge.educts],
[smiles_to_node[product.smiles] for product in intermediate_edge.products],
rule=intermediate_edge.rule,
)
# Persist the predicted pathway
pred_pw = pu.spathway_to_pathway(target, eng_pw, name=f"{pw.name} (Predicted)")
intermediate_pathways.append(copied_pw.url)
predicted_pathways.append(pred_pw.url)
return intermediate_pathways, predicted_pathways

View File

@ -196,7 +196,8 @@ urlpatterns = [
re_path(r"^indigo/dearomatize$", v.dearomatize, name="indigo_dearomatize"),
re_path(r"^indigo/layout$", v.layout, name="indigo_layout"),
re_path(r"^depict$", v.depict, name="depict"),
re_path(r"^jobs", v.jobs, name="jobs"),
path("jobs", v.jobs, name="jobs"),
path("jobs/<uuid:job_uuid>", v.job, name="job detail"),
# OAuth Stuff
path("o/userinfo/", v.userinfo, name="oauth_userinfo"),
# Static Pages

View File

@ -970,7 +970,7 @@ def package_model(request, package_uuid, model_uuid):
if classify:
from epdb.tasks import dispatch_eager, predict_simple
pred_res = dispatch_eager(
_, pred_res = dispatch_eager(
current_user, predict_simple, current_model.pk, stand_smiles
)
@ -2023,7 +2023,7 @@ def package_pathway(request, package_uuid, pathway_uuid):
rule_package = PackageManager.get_package_by_url(
current_user, request.GET.get("rule-package")
)
res = dispatch_eager(
_, res = dispatch_eager(
current_user, identify_missing_rules, [current_pathway.pk], rule_package.pk
)
@ -2927,6 +2927,75 @@ def jobs(request):
return render(request, "collections/joblog.html", context)
elif request.method == "POST":
job_name = request.POST.get("job-name")
if job_name == "engineer-pathway":
pathway_to_engineer = request.POST.get("pathway-to-engineer")
engineer_setting = request.POST.get("engineer-setting")
if not all([pathway_to_engineer, engineer_setting]):
raise BadRequest(
f"Unable to run {job_name} as it requires 'pathway-to-engineer' and 'engineer-setting' parameters."
)
pathway_package = PackageManager.get_package_by_url(current_user, pathway_to_engineer)
pathway_to_engineer = Pathway.objects.get(
url=pathway_to_engineer, package=pathway_package
)
engineer_setting = SettingManager.get_setting_by_url(current_user, engineer_setting)
target_package = PackageManager.create_package(
current_user,
f"Autogenerated Package for Pathway Engineering of {pathway_to_engineer.name}",
f"This Package was generated automatically for the engineering Task of {pathway_to_engineer.name}.",
)
from .tasks import dispatch, engineer_pathways
res = dispatch(
current_user,
engineer_pathways,
[pathway_to_engineer.pk],
engineer_setting.pk,
target_package.pk,
)
return redirect(f"{s.SERVER_URL}/jobs/{res.task_id}")
else:
raise BadRequest(f"Job {job_name} is not supported!")
else:
return HttpResponseNotAllowed(["GET", "POST"])
def job(request, job_uuid):
current_user = _anonymous_or_real(request)
context = get_base_context(request)
if request.method == "GET":
if current_user.is_superuser:
job = JobLog.objects.get(task_id=job_uuid)
else:
job = JobLog.objects.get(task_id=job_uuid, user=current_user)
# No op if status is already in a terminal state
job.check_for_update()
context["object_type"] = "joblog"
context["breadcrumbs"] = [
{"Home": s.SERVER_URL},
{"Jobs": s.SERVER_URL + "/jobs"},
{job.job_name: f"{s.SERVER_URL}/jobs/{job.task_id}"},
]
context["job"] = job
return render(request, "objects/joblog.html", context)
else:
return HttpResponseNotAllowed(["GET"])
###########
# KETCHER #

View File

@ -524,7 +524,7 @@ function draw(pathway, elem) {
node.append("circle")
// make radius "invisible" for pseudo nodes
.attr("r", d => d.pseudo ? 0.01 : nodeRadius)
.style("fill", "#e8e8e8");
.style("fill", d => d.is_engineered_intermediate ? "#42eff5" : "#e8e8e8");
// Add image only for non pseudo nodes
node.filter(d => !d.pseudo).each(function (d, i) {

View File

View File

@ -41,6 +41,14 @@
<i class="glyphicon glyphicon-floppy-save"></i> Download Pathway as Image</a
>
</li>
<li>
<a
role="button"
onclick="document.getElementById('engineer_pathway_modal').showModal(); return false;"
>
<i class="glyphicon glyphicon-cog"></i> Engineer Pathway</a
>
</li>
{% if meta.can_edit %}
<li>
<a

View File

@ -20,6 +20,9 @@
<table class="table-zebra table">
<thead>
<tr>
{% if meta.user.is_superuser %}
<th>User</th>
{% endif %}
<th>ID</th>
<th>Name</th>
<th>Status</th>
@ -36,7 +39,11 @@
<a href="{{ job.user.url }}">{{ job.user.username }}</a>
</td>
{% endif %}
<td>{{ job.task_id }}</td>
<td>
<a href="{% url 'job detail' job.task_id %}"
>{{ job.task_id }}</a
>
</td>
<td>{{ job.job_name }}</td>
<td>{{ job.status }}</td>
<td>{{ job.created }}</td>

View File

@ -0,0 +1,107 @@
{% load static %}
<dialog
id="engineer_pathway_modal"
class="modal"
x-data="modalForm()"
@close="reset()"
>
<div class="modal-box max-w-2xl">
<!-- Header -->
<h3 class="font-bold text-lg">Engineer Pathway</h3>
<!-- Close button (X) -->
<form method="dialog">
<button
class="btn btn-sm btn-circle btn-ghost absolute right-2 top-2"
:disabled="isSubmitting"
>
</button>
</form>
<!-- Body -->
<div class="py-4">
<p class="mb-4">
Engineering Package is a process used to identify potential intermediate
transformation products. To achieve this, a pathway is predicted using
an existing setting. The threshold is temporarily set to zero to ensure
that even intermediates with very low probability are not filtered out.
<br /><br />
If any intermediates are found, two pathways will be saved in a
generated Package:
<br />
1. The engineered Pathway with the identified intermediates highlighted.
<br />
2. The fully predicted Pathway preserved for further analysis.
<br /><br />
Note: This is an asynchronous process and may take a few minutes to
complete. You will be redirected to a page containing details about the
task and its status.
</p>
<form
id="engineer-pathway-modal-form"
accept-charset="UTF-8"
action="{% url 'jobs' %}"
method="post"
>
{% csrf_token %}
<div class="form-control mb-3">
<label class="label" for="engineer-setting">
<span class="label-text">
Select the Setting you want to use for pathway engineering
</span>
</label>
<select
id="engineer-setting"
name="engineer-setting"
class="select select-bordered w-full"
required
>
<option value="" disabled selected>Select Setting</option>
{% for s in meta.available_settings %}
<option value="{{ s.url }}">{{ s.name|safe }}</option>
{% endfor %}
</select>
<input
type="hidden"
name="pathway-to-engineer"
value="{{ pathway.url }}"
/>
<input type="hidden" name="job-name" value="engineer-pathway" />
</div>
</form>
</div>
<!-- Footer -->
<div class="modal-action">
<button
type="button"
class="btn"
onclick="this.closest('dialog').close()"
:disabled="isSubmitting"
>
Close
</button>
<button
type="button"
class="btn btn-primary"
@click="submit('engineer-pathway-modal-form')"
:disabled="isSubmitting"
>
<span x-show="!isSubmitting">Engineer</span>
<span
x-show="isSubmitting"
class="loading loading-spinner loading-sm"
></span>
<span x-show="isSubmitting">Engineering...</span>
</button>
</div>
</div>
<!-- Backdrop -->
<form method="dialog" class="modal-backdrop">
<button :disabled="isSubmitting">close</button>
</form>
</dialog>

View File

@ -0,0 +1,123 @@
{% extends "framework_modern.html" %}
{% block content %}
{% block action_modals %}
{# {% include "modals/objects/refresh_job_log.html" %}#}
{% endblock action_modals %}
<div class="space-y-2 p-4">
<!-- Header Section -->
<div class="card bg-base-100">
<div class="card-body">
<div class="flex items-center justify-between">
<h2 class="card-title text-2xl">Job Status for {{ job.job_name }}</h2>
<div id="actionsButton" class="dropdown dropdown-end hidden">
<div tabindex="0" role="button" class="btn btn-ghost btn-sm">
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
class="lucide lucide-wrench"
>
<path
d="M14.7 6.3a1 1 0 0 0 0 1.4l1.6 1.6a1 1 0 0 0 1.4 0l3.77-3.77a6 6 0 0 1-7.94 7.94l-6.91 6.91a2.12 2.12 0 0 1-3-3l6.91-6.91a6 6 0 0 1 7.94-7.94l-3.76 3.76z"
/>
</svg>
Actions
</div>
<ul
tabindex="-1"
class="dropdown-content menu bg-base-100 rounded-box z-50 w-52 p-2"
>
{% block actions %}
{% include "actions/objects/joblog.html" %}
{% endblock %}
</ul>
</div>
</div>
</div>
</div>
<!-- Description -->
<div class="collapse-arrow bg-base-200 collapse">
<input type="checkbox" checked />
<div class="collapse-title text-xl font-medium">Description</div>
<div class="collapse-content">
Status page for Task {{ job.job_name }}
</div>
</div>
<!-- Job Status -->
<div class="collapse-arrow bg-base-200 collapse">
<input type="checkbox" checked />
<div class="collapse-title text-xl font-medium">Task Status</div>
<div class="collapse-content">{{ job.status }}</div>
</div>
<!-- Job ID -->
<div class="collapse-arrow bg-base-200 collapse">
<input type="checkbox" checked />
<div class="collapse-title text-xl font-medium">Task ID</div>
<div class="collapse-content">{{ job.task_id }}</div>
</div>
<!-- Job Result -->
{% if job.is_in_terminal_state %}
<div class="collapse-arrow bg-base-200 collapse">
<input type="checkbox" checked />
<div class="collapse-title text-xl font-medium">Task Result</div>
<div class="collapse-content">
{% if job.job_name == 'engineer_pathways' %}
<div class="card bg-base-100">
<div class="card-body">
<p>Engineered Pathways:</p>
<ul class="menu bg-base-200 rounded-box w-full">
{% for engineered_url in job.parsed_result.0 %}
<li>
<a href="{{ engineered_url }}" class="hover:bg-base-300"
>{{ engineered_url }}</a
>
</li>
{% endfor %}
</ul>
</div>
</div>
<div class="card bg-base-100">
<div class="card-body">
<p>Predicted Pathways:</p>
<ul class="menu bg-base-200 rounded-box w-full">
{% for engineered_url in job.parsed_result.1 %}
<li>
<a href="{{ engineered_url }}" class="hover:bg-base-300"
>{{ engineered_url }}</a
>
</li>
{% endfor %}
</ul>
</div>
</div>
{% else %}
{{ job.parsed_result }}
{% endif %}
</div>
</div>
{% endif %}
<script>
// Show actions button if there are actions
document.addEventListener("DOMContentLoaded", function () {
const actionsButton = document.getElementById("actionsButton");
const actionsList = actionsButton?.querySelector("ul");
if (actionsList && actionsList.children.length > 0) {
actionsButton?.classList.remove("hidden");
}
});
</script>
</div>
{% endblock content %}

View File

@ -81,6 +81,7 @@
{% include "modals/objects/delete_pathway_node_modal.html" %}
{% include "modals/objects/delete_pathway_edge_modal.html" %}
{% include "modals/objects/generic_delete_modal.html" %}
{% include "modals/objects/engineer_pathway_modal.html" %}
{% endblock action_modals %}
<div class="space-y-2 p-4">
@ -103,38 +104,36 @@
<div class="bg-base-100 mb-2 rounded-lg p-2">
<div class="navbar bg-base-100 rounded-lg">
<div class="flex-1">
{% if meta.can_edit %}
<div class="dropdown">
<div tabindex="0" role="button" class="btn btn-ghost btn-sm">
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
class="lucide lucide-edit"
>
<path
d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"
/>
<path
d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"
/>
</svg>
Edit
</div>
<ul
tabindex="0"
class="dropdown-content menu bg-base-100 rounded-box z-50 w-52 p-2"
<div class="dropdown">
<div tabindex="0" role="button" class="btn btn-ghost btn-sm">
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
class="lucide lucide-edit"
>
{% include "actions/objects/pathway.html" %}
</ul>
<path
d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"
/>
<path
d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"
/>
</svg>
Actions
</div>
{% endif %}
<ul
tabindex="0"
class="dropdown-content menu bg-base-100 rounded-box z-50 w-52 p-2"
>
{% include "actions/objects/pathway.html" %}
</ul>
</div>
{% if pathway.setting.model.app_domain %}
<div class="dropdown">
<div tabindex="0" role="button" class="btn btn-ghost btn-sm">

View File

@ -3,7 +3,7 @@ from django.test import TestCase, override_settings
from epdb.logic import PackageManager
from epdb.models import Compound, User, Reaction
from epdb.models import Compound, User, Reaction, Rule, SimpleAmbitRule, ParallelRule
@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True)
@ -48,11 +48,38 @@ class CopyTest(TestCase):
description="Eawag BBD compound c0005",
).default_structure
cls.SIMPLE_RULE = Rule.create(
rule_type="SimpleAmbitRule",
package=cls.package,
name="bt0022-2833",
description="Dihalomethyl derivative + Halomethyl derivative > 1-Halo-1-methylalcohol derivative + 1-Methylalcohol derivative",
smirks="[H:5][C:1]([#6:6])([#1,#9,#17,#35,#53:4])[#9,#17,#35,#53]>>[H:5][C:1]([#6:6])([#8])[#1,#9,#17,#35,#53:4]",
)
cls.SIMPLE_RULE_2 = Rule.create(
rule_type="SimpleAmbitRule",
package=cls.package,
name="Crap",
description=None,
smirks="CC>>CCC",
)
cls.PARALLEL_RULE = Rule.create(
rule_type="ParallelRule",
package=cls.package,
simple_rules=[cls.SIMPLE_RULE, cls.SIMPLE_RULE_2],
name="Par Rule",
description=None,
reactant_filter_smarts=None,
product_filter_smarts=None,
)
cls.REACTION = Reaction.create(
package=cls.package,
name="Eawag BBD reaction r0001",
educts=[cls.reaction_educt],
products=[cls.reaction_product],
rules=[cls.SIMPLE_RULE],
multi_step=False,
)
@ -188,3 +215,76 @@ class CopyTest(TestCase):
self.assertEqual(copy_product.compound.package, self.target_package)
self.assertEqual(orig_product.compound.package, self.package)
self.assertEqual(orig_product.smiles, copy_product.smiles)
def test_copy_compound_deduplication(self):
mapping = dict()
first_copy = self.AFOXOLANER.copy(self.target_package, mapping)
# If we copy it again, we should get the exact same mapping and the number of
# Compounds should not increase
second_mapping = dict()
second_copy = self.AFOXOLANER.copy(self.target_package, second_mapping)
self.assertEqual(self.target_package.compounds.count(), 1)
self.assertEqual(first_copy, second_copy)
# Create a Compound where the initial SMILES is already normalized
# The Compound will only have a CompoundStructure
c = Compound.create(
package=self.target_package,
smiles="O=C(O)C1=CC=C([N+](=O)[O-])C=C1",
name="Compound with single structure",
description="Compound with single structure",
)
self.assertEqual(c.structures.count(), 1)
# Now we copy a Compound that share the same normalized structure but has
# a non normalized. We expect them to be merged
third_mapping = dict()
third_copy = self.FOUR_NITROBENZOIC_ACID.copy(self.target_package, third_mapping)
self.assertEqual(third_copy, c)
self.assertEqual(c.structures.count(), 2)
def test_copy_rule_deduplication(self):
mapping = dict()
first_copy = self.SIMPLE_RULE.copy(self.target_package, mapping)
# If we copy it again, we should get the exact same mapping and the number of
# Rule should not increase
second_mapping = dict()
second_copy = self.SIMPLE_RULE.copy(self.target_package, second_mapping)
self.assertEqual(self.target_package.rules.count(), 1)
self.assertEqual(first_copy, second_copy)
third_mapping = dict()
first_par_copy = self.PARALLEL_RULE.copy(self.target_package, third_mapping)
# 1 ParallelRule, 2 SimpleRules
self.assertEqual(self.target_package.rules.count(), 3)
self.assertEqual(SimpleAmbitRule.objects.filter(package=self.target_package).count(), 2)
self.assertEqual(ParallelRule.objects.filter(package=self.target_package).count(), 1)
fourth_mapping = dict()
second_par_copy = self.PARALLEL_RULE.copy(self.target_package, fourth_mapping)
# Counts should remain...
self.assertEqual(self.target_package.rules.count(), 3)
self.assertEqual(SimpleAmbitRule.objects.filter(package=self.target_package).count(), 2)
self.assertEqual(ParallelRule.objects.filter(package=self.target_package).count(), 1)
# Mapping should be identical
self.assertEqual(first_par_copy, second_par_copy)
def test_copy_reaction_deduplication(self):
mapping = dict()
first_copy = self.REACTION.copy(self.target_package, mapping)
# If we copy it again, we should get the exact same mapping and the number of
# Reaction should not increase
second_mapping = dict()
second_copy = self.REACTION.copy(self.target_package, second_mapping)
self.assertEqual(self.target_package.reactions.count(), 1)
self.assertEqual(first_copy, second_copy)

95
tests/test_jobs.py Normal file
View File

@ -0,0 +1,95 @@
from django.conf import settings as s
from django.test import TestCase, override_settings
from epdb.logic import PackageManager
from epdb.models import Pathway, User
Package = s.GET_PACKAGE_MODEL()
@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True)
class MultiGenTest(TestCase):
fixtures = ["test_fixtures_incl_model.jsonl.gz"]
@classmethod
def setUpClass(cls):
super(MultiGenTest, cls).setUpClass()
cls.user: "User" = User.objects.get(username="anonymous")
cls.package: "Package" = PackageManager.create_package(
cls.user, "Anon Test Package", "No Desc"
)
cls.BBD_SUBSET: "Package" = Package.objects.get(name="Fixtures")
# 1,1,1-Trichloroethane (an/aerobic)
cls.PW_WITH_INTERMEDIATE_NAME = "1,1,1-Trichloroethane (an/aerobic)"
cls.PW_WITHOUT_INTERMEDIATE_NAME = "Caffeine"
def test_engineer_pathway(self):
from epdb.tasks import engineer_pathways
pw_to_engineer = Pathway.objects.get(name=self.PW_WITH_INTERMEDIATE_NAME)
engineered, predicted = engineer_pathways(
[pw_to_engineer.pk], self.user.prediction_settings().pk, self.package.pk
)
self.assertEqual(len(engineered), 1)
self.assertEqual(len(predicted), 1)
eng_pw = Pathway.objects.get(url=engineered[0])
for n in eng_pw.nodes:
if n.kv.get("is_engineered_intermediate"):
self.assertEqual(n.default_node_label.smiles, "CCO")
pw_to_engineer = Pathway.objects.get(name=self.PW_WITHOUT_INTERMEDIATE_NAME)
engineered, predicted = engineer_pathways(
[pw_to_engineer.pk], self.user.prediction_settings().pk, self.package.pk
)
self.assertEqual(len(engineered), 0)
self.assertEqual(len(predicted), 0)
# Test pathway deduplication in eng pathway process
pw1 = Pathway.objects.get(name=self.PW_WITH_INTERMEDIATE_NAME)
# Add pw1 twice
engineered, predicted = engineer_pathways(
[pw1.pk, pw1.pk], self.user.prediction_settings().pk, self.package.pk
)
self.assertEqual(len(engineered), 1)
self.assertEqual(len(predicted), 1)
# Check that both pathways contain the intermediate
num_intermediates_found = 0
for eng in engineered:
eng_pw = Pathway.objects.get(url=eng)
for n in eng_pw.nodes:
if n.kv.get("is_engineered_intermediate"):
self.assertEqual(n.default_node_label.smiles, "CCO")
num_intermediates_found += 1
self.assertEqual(num_intermediates_found, 1)
# Get a copy to have two pathways with potential intermediates as the fixture
# only contains one
mapping = {}
pw2 = pw1.copy(self.package, mapping=mapping)
engineered, predicted = engineer_pathways(
[pw1.pk, pw2.pk], self.user.prediction_settings().pk, self.package.pk
)
self.assertEqual(len(engineered), 2)
self.assertEqual(len(predicted), 2)
# Check that both pathways contain the intermediate
num_intermediates_found = 0
for eng in engineered:
eng_pw = Pathway.objects.get(url=eng)
for n in eng_pw.nodes:
if n.kv.get("is_engineered_intermediate"):
self.assertEqual(n.default_node_label.smiles, "CCO")
num_intermediates_found += 1
self.assertEqual(num_intermediates_found, 2)

View File

@ -2,12 +2,13 @@ import logging
import re
from abc import ABC
from collections import defaultdict
from typing import List, Optional, Dict, TYPE_CHECKING
from typing import List, Optional, Dict, TYPE_CHECKING, Union
from indigo import Indigo, IndigoException, IndigoObject
from indigo.renderer import IndigoRenderer
from rdkit import Chem, rdBase
from rdkit.Chem import MACCSkeys, Descriptors, rdFingerprintGenerator
from rdkit.Chem import rdchem
from rdkit.Chem import rdChemReactions
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.MolStandardize import rdMolStandardize
@ -94,8 +95,15 @@ class FormatConverter(object):
return Chem.MolToSmiles(mol, canonical=canonical)
@staticmethod
def InChIKey(smiles):
return Chem.MolToInchiKey(FormatConverter.from_smiles(smiles))
def InChIKey(mol_or_smiles: Union[rdchem.Mol | str]):
if isinstance(mol_or_smiles, str):
mol_or_smiles = mol_or_smiles.replace("~", "")
mol_or_smiles = FormatConverter.from_smiles(mol_or_smiles)
if mol_or_smiles is None:
return None
return Chem.MolToInchiKey(mol_or_smiles)
@staticmethod
def InChI(smiles):
@ -352,7 +360,8 @@ class FormatConverter(object):
product = GetMolFrags(product, asMols=True)
for p in product:
p = FormatConverter.standardize(
Chem.MolToSmiles(p), remove_stereo=remove_stereo
Chem.MolToSmiles(p).replace("~", ""),
remove_stereo=remove_stereo,
)
if product_filter_smarts and FormatConverter.smarts_matches(

View File

@ -9,7 +9,7 @@ from collections import defaultdict
from datetime import datetime
from enum import Enum
from types import NoneType
from typing import Any, Dict, List
from typing import Any, Dict, List, TYPE_CHECKING
from django.conf import settings as s
from django.db import transaction
@ -35,6 +35,7 @@ from epdb.models import (
RuleBasedRelativeReasoning,
Scenario,
SequentialRule,
Setting,
SimpleAmbitRule,
SimpleRDKitRule,
SimpleRule,
@ -44,6 +45,9 @@ from utilities.chem import FormatConverter
logger = logging.getLogger(__name__)
Package = s.GET_PACKAGE_MODEL()
if TYPE_CHECKING:
from epdb.logic import SPathway
class HTMLGenerator:
registry = {x.__name__: x for x in NAME_MAPPING.values()}
@ -1260,3 +1264,122 @@ class PathwayUtils:
res[edge.url] = rule_chain
return res
def engineer(self, setting: "Setting"):
from epdb.logic import SPathway
from utilities.chem import FormatConverter
from utilities.ml import graph_from_pathway, get_shortest_path
# get a fresh copy
pw = Pathway.objects.get(id=self.pathway.pk)
root_nodes = [n.default_node_label.smiles for n in pw.root_nodes]
if len(root_nodes) != 1:
logger.warning(f"Pathway {pw.name} has {len(root_nodes)} root nodes")
# spw, mapping, intermediates
return None, {}, []
# Predict the Pathway in memory
spw = SPathway(root_nodes[0], None, setting)
level = 0
while not spw.done:
spw.predict_step(from_depth=level)
level += 1
# Generate SNode -> Node mapping
node_mapping = {}
for node in pw.nodes:
for snode in spw.smiles_to_node.values():
data_smiles = node.default_node_label.smiles
pred_smiles = snode.smiles
# "~" denotes any bond remove and use implicit single bond for comparison
data_key = FormatConverter.InChIKey(data_smiles.replace("~", ""))
pred_key = FormatConverter.InChIKey(pred_smiles.replace("~", ""))
if data_key == pred_key:
node_mapping[snode] = node
reverse_mapping = {v: k for k, v in node_mapping.items()}
graph = graph_from_pathway(spw)
intermediate_mapping = []
# loop through each edge and each reactant <-> product pair
# and compute the shortest path on the predicted pathway
for e in pw.edges:
for start in e.start_nodes.all():
if start not in reverse_mapping:
continue
start_snode = reverse_mapping[start]
for end in e.end_nodes.all():
if end not in reverse_mapping:
continue
end_snode = reverse_mapping[end]
# If res is non-empty, we've found intermediates
intermediate_smiles = get_shortest_path(
graph,
FormatConverter.standardize(start_snode.smiles, remove_stereo=True),
FormatConverter.standardize(end_snode.smiles, remove_stereo=True),
)
if intermediate_smiles:
intermediates = []
prev = start_snode.smiles
for smi in intermediate_smiles + [end_snode.smiles]:
for e in spw.get_edge_for_educt_smiles(prev):
if smi in e.product_smiles():
intermediates.append(e)
prev = smi
intermediate_mapping.append(
(start, end, start_snode, end_snode, intermediates)
)
return spw, reverse_mapping, intermediate_mapping
@staticmethod
def spathway_to_pathway(
package: "Package", spw: "SPathway", name: str = None, description: str = None
):
snode_to_node_mapping = dict()
root_nodes = spw.root_nodes
pw = Pathway.create(
package=package,
smiles=root_nodes[0].smiles,
name=name,
description=description,
predicted=True,
)
pw.setting = spw.prediction_setting
pw.save()
snode_to_node_mapping[root_nodes[0]] = pw.root_nodes[0]
if len(root_nodes) > 1:
for rn in root_nodes[1:]:
n = Node.create(pw, rn.smiles, depth=0)
snode_to_node_mapping[rn] = n
for snode, node in snode_to_node_mapping.items():
spw.snode_persist_lookup[snode] = node
spw.persist = pw
spw._sync_to_pathway()
return pw