Files
enviPy-bayer/tests/test_multigen_eval.py
Liam Brydon 901de4640c [Fix] Stereochemistry prediction handling (#228 and #238) (#250)
**This pull request will need a separate migration pull-request**

I have added an alert box in two places when the user tries to predict with stereo chemistry.

When a user predicts a pathway with stereo chemistry an alert box is shown in that node's hover.
To do this I added two new fields. Pathway now has a "predicted" BooleanField indicating whether it was predicted or not. It is set to True if the pathway mode for prediction is "predict" or "incremental" and False if it is "build". I think it is a flag that could be useful in the future, perhaps for analysing how many predicted pathways are in enviPath?
Node now has a `stereo_removed` BooleanField which is set to True if the Node's parent Pathways has "predicted" as true and the node SMILES has stereochemistry.
<img width="500" alt="{927AC9FF-DBC9-4A19-9E6E-0EDD3B08C7AC}.png" src="attachments/69ea29bc-c2d2-4cd2-8e98-aae5c5737f69">

When a user does a prediction on a model's page it shows at the top of the list. This did not require any new fields as the entered SMILES does not get saved anywhere.
<img width="500" alt="{BED66F12-5F07-419E-AAA6-FE1FE5B4F266}.png" src="attachments/5fcc3a9b-4d1a-4e48-acac-76b7571f6507">

I think the alert box is an alright solution but if you have a great idea for something that looks/fits better please change it or let me know.

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#250
Co-authored-by: Liam Brydon <lbry121@aucklanduni.ac.nz>
Co-committed-by: Liam Brydon <lbry121@aucklanduni.ac.nz>
2025-12-03 10:19:34 +13:00

181 lines
8.3 KiB
Python

from django.conf import settings as s
from django.test import TestCase, override_settings
from networkx.utils.misc import graphs_equal
from epdb.logic import PackageManager, SPathway
from epdb.models import Pathway, User
from utilities.ml import graph_from_pathway, multigen_eval, pathway_edit_eval
Package = s.GET_PACKAGE_MODEL()
@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True)
class MultiGenTest(TestCase):
fixtures = ["test_fixtures_incl_model.jsonl.gz"]
@classmethod
def setUpClass(cls):
super(MultiGenTest, cls).setUpClass()
cls.user: "User" = User.objects.get(username="anonymous")
cls.package: "Package" = PackageManager.create_package(
cls.user, "Anon Test Package", "No Desc"
)
cls.BBD_SUBSET: "Package" = Package.objects.get(name="Fixtures")
def test_equal_pathways(self):
"""Test that two identical pathways return a precision and recall of 1.0"""
pathways = self.BBD_SUBSET.pathways.all()
for pathway in pathways:
if len(pathway.edge_set.all()) == 0: # Do not test pathways with no edges
continue
score, precision, recall = multigen_eval(pathway, pathway)
self.assertEqual(
precision,
1.0,
f"Precision should be one for identical pathways. "
f"Failed on pathway: {pathway.name}",
)
self.assertEqual(
recall,
1.0,
f"Recall should be one for identical pathways. Failed on pathway: {pathway.name}",
)
def test_intermediates(self):
"""Test that an intermediate can be correctly identified and the metrics are correctly adjusted"""
score, precision, recall, intermediates = multigen_eval(
*self.intermediate_case(), return_intermediates=True
)
self.assertEqual(len(intermediates), 1, "There should be 1 found intermediate")
self.assertEqual(precision, 1, "Precision should be 1")
self.assertEqual(recall, 1, "Recall should be 1")
def test_fp(self):
"""Test that a false-positive (extra compound) is correctly penalised"""
score, precision, recall = multigen_eval(*self.fp_case())
self.assertAlmostEqual(precision, 0.75, 3, "Precision should be 0.75")
self.assertEqual(recall, 1, "Recall should be 1")
def test_fn(self):
"""Test that a false-negative (missed compound) is correctly penalised"""
score, precision, recall = multigen_eval(*self.fn_case())
self.assertEqual(precision, 1, "Precision should be 1.0")
self.assertAlmostEqual(recall, 0.667, 3, "Recall should be 0.667")
def test_all(self):
"""Test an intermediate, false-positive and false-negative together"""
score, precision, recall, intermediates = multigen_eval(
*self.all_case(), return_intermediates=True
)
self.assertEqual(len(intermediates), 1, "There should be 1 found intermediate")
self.assertAlmostEqual(precision, 0.6, 3, "Precision should be 0.6")
self.assertAlmostEqual(recall, 0.75, 3, "Recall should be 0.75")
def test_shallow_pathway(self):
pathways = self.BBD_SUBSET.pathways.all()
for pathway in pathways:
if len(pathway.edge_set.all()) == 0: # Do not test pathways with no edges
continue
shallow_pathway = graph_from_pathway(SPathway.from_pathway(pathway))
pathway = graph_from_pathway(pathway)
if not graphs_equal(shallow_pathway, pathway):
print("\n\nS", shallow_pathway.adj)
print("\n\nPW", pathway.adj)
# print(shallow_pathway.nodes, pathway.nodes)
# print(shallow_pathway.graph, pathway.graph)
self.assertTrue(
graphs_equal(shallow_pathway, pathway),
f"Networkx graph from shallow pathway not "
f"equal to pathway for pathway {pathway.name}",
)
def test_graph_edit_eval(self):
"""Performs all the previous tests but with graph_edit_eval
Unlike multigen_eval, these test cases have not been hand verified"""
pathways = self.BBD_SUBSET.pathways.all()
for pathway in pathways:
if len(pathway.edge_set.all()) == 0: # Do not test pathways with no edges
continue
score = pathway_edit_eval(pathway, pathway)
self.assertEqual(
score,
0.0,
"Pathway edit distance should be zero for identical pathways. "
f"Failed on pathway: {pathway.name}",
)
inter_score = pathway_edit_eval(*self.intermediate_case())
self.assertAlmostEqual(
inter_score, 1.75, 3, "Pathway edit distance failed on intermediate case"
)
fp_score = pathway_edit_eval(*self.fp_case())
self.assertAlmostEqual(fp_score, 1.25, 3, "Pathway edit distance failed on fp case")
fn_score = pathway_edit_eval(*self.fn_case())
self.assertAlmostEqual(fn_score, 1.25, 3, "Pathway edit distance failed on fn case")
all_score = pathway_edit_eval(*self.all_case())
self.assertAlmostEqual(all_score, 1.0, 3, "Pathway edit distance failed on all case")
def intermediate_case(self):
"""Create an example with an intermediate in the predicted pathway"""
true_pathway = Pathway.create(self.package, "CCO")
true_pathway.add_edge(
[true_pathway.root_nodes.all()[0]], [true_pathway.add_node("CC(=O)O", depth=1)]
)
pred_pathway = Pathway.create(self.package, "CCO")
pred_pathway.add_edge(
[pred_pathway.root_nodes.all()[0]],
[acetaldehyde := pred_pathway.add_node("CC=O", depth=1)],
)
pred_pathway.add_edge([acetaldehyde], [pred_pathway.add_node("CC(=O)O", depth=2)])
return true_pathway, pred_pathway
def fp_case(self):
"""Create an example with an extra compound in the predicted pathway"""
true_pathway = Pathway.create(self.package, "CCO")
true_pathway.add_edge(
[true_pathway.root_nodes.all()[0]],
[acetaldehyde := true_pathway.add_node("CC=O", depth=1)],
)
true_pathway.add_edge([acetaldehyde], [true_pathway.add_node("CC(=O)O", depth=2)])
pred_pathway = Pathway.create(self.package, "CCO")
pred_pathway.add_edge(
[pred_pathway.root_nodes.all()[0]],
[acetaldehyde := pred_pathway.add_node("CC=O", depth=1)],
)
pred_pathway.add_edge([acetaldehyde], [pred_pathway.add_node("CC(=O)O", depth=2)])
pred_pathway.add_edge([acetaldehyde], [pred_pathway.add_node("C", depth=2)])
return true_pathway, pred_pathway
def fn_case(self):
"""Create an example with a missing compound in the predicted pathway"""
true_pathway = Pathway.create(self.package, "CCO")
true_pathway.add_edge(
[true_pathway.root_nodes.all()[0]],
[acetaldehyde := true_pathway.add_node("CC=O", depth=1)],
)
true_pathway.add_edge([acetaldehyde], [true_pathway.add_node("CC(=O)O", depth=2)])
pred_pathway = Pathway.create(self.package, "CCO")
pred_pathway.add_edge(
[pred_pathway.root_nodes.all()[0]], [pred_pathway.add_node("CC=O", depth=1)]
)
return true_pathway, pred_pathway
def all_case(self):
"""Create an example with an intermediate, extra compound and missing compound"""
true_pathway = Pathway.create(self.package, "CCO")
true_pathway.add_edge(
[true_pathway.root_nodes.all()[0]],
[acetaldehyde := true_pathway.add_node("CC=O", depth=1)],
)
true_pathway.add_edge([acetaldehyde], [true_pathway.add_node("C", depth=2)])
true_pathway.add_edge([acetaldehyde], [true_pathway.add_node("CC(=O)O", depth=2)])
pred_pathway = Pathway.create(self.package, "CCO")
pred_pathway.add_edge(
[pred_pathway.root_nodes.all()[0]], [methane := pred_pathway.add_node("C", depth=1)]
)
pred_pathway.add_edge([methane], [true_pathway.add_node("CC=O", depth=2)])
pred_pathway.add_edge([methane], [true_pathway.add_node("c1ccccc1", depth=2)])
return true_pathway, pred_pathway