[Feature] Engineer Pathway (#256)

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#256
This commit is contained in:
2025-12-10 07:35:42 +13:00
parent 46b0f1c124
commit 648ec150a9
17 changed files with 990 additions and 127 deletions

View File

@ -3,7 +3,7 @@ from django.test import TestCase, override_settings
from epdb.logic import PackageManager
from epdb.models import Compound, User, Reaction
from epdb.models import Compound, User, Reaction, Rule, SimpleAmbitRule, ParallelRule
@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True)
@ -48,11 +48,38 @@ class CopyTest(TestCase):
description="Eawag BBD compound c0005",
).default_structure
cls.SIMPLE_RULE = Rule.create(
rule_type="SimpleAmbitRule",
package=cls.package,
name="bt0022-2833",
description="Dihalomethyl derivative + Halomethyl derivative > 1-Halo-1-methylalcohol derivative + 1-Methylalcohol derivative",
smirks="[H:5][C:1]([#6:6])([#1,#9,#17,#35,#53:4])[#9,#17,#35,#53]>>[H:5][C:1]([#6:6])([#8])[#1,#9,#17,#35,#53:4]",
)
cls.SIMPLE_RULE_2 = Rule.create(
rule_type="SimpleAmbitRule",
package=cls.package,
name="Crap",
description=None,
smirks="CC>>CCC",
)
cls.PARALLEL_RULE = Rule.create(
rule_type="ParallelRule",
package=cls.package,
simple_rules=[cls.SIMPLE_RULE, cls.SIMPLE_RULE_2],
name="Par Rule",
description=None,
reactant_filter_smarts=None,
product_filter_smarts=None,
)
cls.REACTION = Reaction.create(
package=cls.package,
name="Eawag BBD reaction r0001",
educts=[cls.reaction_educt],
products=[cls.reaction_product],
rules=[cls.SIMPLE_RULE],
multi_step=False,
)
@ -188,3 +215,76 @@ class CopyTest(TestCase):
self.assertEqual(copy_product.compound.package, self.target_package)
self.assertEqual(orig_product.compound.package, self.package)
self.assertEqual(orig_product.smiles, copy_product.smiles)
def test_copy_compound_deduplication(self):
mapping = dict()
first_copy = self.AFOXOLANER.copy(self.target_package, mapping)
# If we copy it again, we should get the exact same mapping and the number of
# Compounds should not increase
second_mapping = dict()
second_copy = self.AFOXOLANER.copy(self.target_package, second_mapping)
self.assertEqual(self.target_package.compounds.count(), 1)
self.assertEqual(first_copy, second_copy)
# Create a Compound where the initial SMILES is already normalized
# The Compound will only have a CompoundStructure
c = Compound.create(
package=self.target_package,
smiles="O=C(O)C1=CC=C([N+](=O)[O-])C=C1",
name="Compound with single structure",
description="Compound with single structure",
)
self.assertEqual(c.structures.count(), 1)
# Now we copy a Compound that share the same normalized structure but has
# a non normalized. We expect them to be merged
third_mapping = dict()
third_copy = self.FOUR_NITROBENZOIC_ACID.copy(self.target_package, third_mapping)
self.assertEqual(third_copy, c)
self.assertEqual(c.structures.count(), 2)
def test_copy_rule_deduplication(self):
mapping = dict()
first_copy = self.SIMPLE_RULE.copy(self.target_package, mapping)
# If we copy it again, we should get the exact same mapping and the number of
# Rule should not increase
second_mapping = dict()
second_copy = self.SIMPLE_RULE.copy(self.target_package, second_mapping)
self.assertEqual(self.target_package.rules.count(), 1)
self.assertEqual(first_copy, second_copy)
third_mapping = dict()
first_par_copy = self.PARALLEL_RULE.copy(self.target_package, third_mapping)
# 1 ParallelRule, 2 SimpleRules
self.assertEqual(self.target_package.rules.count(), 3)
self.assertEqual(SimpleAmbitRule.objects.filter(package=self.target_package).count(), 2)
self.assertEqual(ParallelRule.objects.filter(package=self.target_package).count(), 1)
fourth_mapping = dict()
second_par_copy = self.PARALLEL_RULE.copy(self.target_package, fourth_mapping)
# Counts should remain...
self.assertEqual(self.target_package.rules.count(), 3)
self.assertEqual(SimpleAmbitRule.objects.filter(package=self.target_package).count(), 2)
self.assertEqual(ParallelRule.objects.filter(package=self.target_package).count(), 1)
# Mapping should be identical
self.assertEqual(first_par_copy, second_par_copy)
def test_copy_reaction_deduplication(self):
mapping = dict()
first_copy = self.REACTION.copy(self.target_package, mapping)
# If we copy it again, we should get the exact same mapping and the number of
# Reaction should not increase
second_mapping = dict()
second_copy = self.REACTION.copy(self.target_package, second_mapping)
self.assertEqual(self.target_package.reactions.count(), 1)
self.assertEqual(first_copy, second_copy)

95
tests/test_jobs.py Normal file
View File

@ -0,0 +1,95 @@
from django.conf import settings as s
from django.test import TestCase, override_settings
from epdb.logic import PackageManager
from epdb.models import Pathway, User
Package = s.GET_PACKAGE_MODEL()
@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True)
class MultiGenTest(TestCase):
fixtures = ["test_fixtures_incl_model.jsonl.gz"]
@classmethod
def setUpClass(cls):
super(MultiGenTest, cls).setUpClass()
cls.user: "User" = User.objects.get(username="anonymous")
cls.package: "Package" = PackageManager.create_package(
cls.user, "Anon Test Package", "No Desc"
)
cls.BBD_SUBSET: "Package" = Package.objects.get(name="Fixtures")
# 1,1,1-Trichloroethane (an/aerobic)
cls.PW_WITH_INTERMEDIATE_NAME = "1,1,1-Trichloroethane (an/aerobic)"
cls.PW_WITHOUT_INTERMEDIATE_NAME = "Caffeine"
def test_engineer_pathway(self):
from epdb.tasks import engineer_pathways
pw_to_engineer = Pathway.objects.get(name=self.PW_WITH_INTERMEDIATE_NAME)
engineered, predicted = engineer_pathways(
[pw_to_engineer.pk], self.user.prediction_settings().pk, self.package.pk
)
self.assertEqual(len(engineered), 1)
self.assertEqual(len(predicted), 1)
eng_pw = Pathway.objects.get(url=engineered[0])
for n in eng_pw.nodes:
if n.kv.get("is_engineered_intermediate"):
self.assertEqual(n.default_node_label.smiles, "CCO")
pw_to_engineer = Pathway.objects.get(name=self.PW_WITHOUT_INTERMEDIATE_NAME)
engineered, predicted = engineer_pathways(
[pw_to_engineer.pk], self.user.prediction_settings().pk, self.package.pk
)
self.assertEqual(len(engineered), 0)
self.assertEqual(len(predicted), 0)
# Test pathway deduplication in eng pathway process
pw1 = Pathway.objects.get(name=self.PW_WITH_INTERMEDIATE_NAME)
# Add pw1 twice
engineered, predicted = engineer_pathways(
[pw1.pk, pw1.pk], self.user.prediction_settings().pk, self.package.pk
)
self.assertEqual(len(engineered), 1)
self.assertEqual(len(predicted), 1)
# Check that both pathways contain the intermediate
num_intermediates_found = 0
for eng in engineered:
eng_pw = Pathway.objects.get(url=eng)
for n in eng_pw.nodes:
if n.kv.get("is_engineered_intermediate"):
self.assertEqual(n.default_node_label.smiles, "CCO")
num_intermediates_found += 1
self.assertEqual(num_intermediates_found, 1)
# Get a copy to have two pathways with potential intermediates as the fixture
# only contains one
mapping = {}
pw2 = pw1.copy(self.package, mapping=mapping)
engineered, predicted = engineer_pathways(
[pw1.pk, pw2.pk], self.user.prediction_settings().pk, self.package.pk
)
self.assertEqual(len(engineered), 2)
self.assertEqual(len(predicted), 2)
# Check that both pathways contain the intermediate
num_intermediates_found = 0
for eng in engineered:
eng_pw = Pathway.objects.get(url=eng)
for n in eng_pw.nodes:
if n.kv.get("is_engineered_intermediate"):
self.assertEqual(n.default_node_label.smiles, "CCO")
num_intermediates_found += 1
self.assertEqual(num_intermediates_found, 2)