Files
enviPy-bayer/epiuclid/tests/test_pathway_mapper.py
Tobias O d06bd0d4fd [Feature] Minimal IUCLID export (#338)
This is an initial implementation that creates a working minimal .i6z document.
It passes schema validation and can be imported into IUCLID.

Caveat:
IUCLID files target individual compounds.
Pathway is not actually covered by the format.

It can be added in either soil or water and soil OECD endpoints.
**I currently only implemented the soil endpoint for all data.**

This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future).

Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field.
I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc.

The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣

New specifications get released once per year, so we will have to update accordingly.
I believe that this should be a more expensive feature, as it requires significant effort to uphold.

Currently implemented for root compound only in SOIL:

- Soil Texture 2
- Soil Texture 1
- pH value
- Half-life per soil sample / scenario (mapped to disappearance; not sure about that).
- CEC
- Organic Matter (only Carbon)
- Moisture content
- Humidity

<img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62">
<img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93">
<img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c">

Reviewed-on: enviPath/enviPy#338
Co-authored-by: Tobias O <tobias.olenyi@envipath.com>
Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
2026-04-07 19:46:12 +12:00

513 lines
19 KiB
Python

"""Tests for PathwayMapper - no DB needed, uses DTO fixtures."""
from django.test import SimpleTestCase, tag
from uuid import uuid4
from epapi.v1.interfaces.iuclid.dto import (
PathwayCompoundDTO,
PathwayEdgeDTO,
PathwayExportDTO,
PathwayNodeDTO,
PathwayScenarioDTO,
)
from epiuclid.serializers.pathway_mapper import PathwayMapper
@tag("iuclid")
class PathwayMapperTest(SimpleTestCase):
def setUp(self):
self.compounds = [
PathwayCompoundDTO(pk=1, name="Benzene", smiles="c1ccccc1"),
PathwayCompoundDTO(pk=2, name="Phenol", smiles="c1ccc(O)cc1"),
]
def test_mapper_produces_bundle(self):
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="Test Pathway",
compounds=self.compounds,
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
self.assertEqual(len(bundle.substances), 2)
self.assertEqual(len(bundle.reference_substances), 2)
self.assertEqual(len(bundle.endpoint_study_records), 1)
def test_mapper_deduplicates_compounds(self):
compounds_with_dup = [
PathwayCompoundDTO(pk=1, name="Benzene", smiles="c1ccccc1"),
PathwayCompoundDTO(pk=2, name="Phenol", smiles="c1ccc(O)cc1"),
PathwayCompoundDTO(pk=1, name="Benzene", smiles="c1ccccc1"),
]
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="Test Pathway",
compounds=compounds_with_dup,
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
# 2 unique compounds -> 2 substances, 2 ref substances
self.assertEqual(len(bundle.substances), 2)
self.assertEqual(len(bundle.reference_substances), 2)
# One endpoint study record per pathway
self.assertEqual(len(bundle.endpoint_study_records), 1)
def test_mapper_extracts_smiles(self):
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="Test Pathway",
compounds=self.compounds,
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
smiles_values = [s.smiles for s in bundle.reference_substances]
self.assertTrue(all(s is not None for s in smiles_values))
def test_mapper_extracts_cas_when_present(self):
compounds = [
PathwayCompoundDTO(pk=1, name="Benzene", smiles="c1ccccc1", cas_number="71-43-2"),
PathwayCompoundDTO(pk=2, name="Phenol", smiles="c1ccc(O)cc1"),
]
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="Test Pathway",
compounds=compounds,
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
cas_values = [r.cas_number for r in bundle.reference_substances]
self.assertIn("71-43-2", cas_values)
def test_mapper_builds_transformation_entries(self):
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="Test Pathway",
compounds=self.compounds,
edges=[
PathwayEdgeDTO(
edge_uuid=uuid4(),
start_compound_pks=[1],
end_compound_pks=[2],
probability=0.73,
)
],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
self.assertEqual(len(bundle.endpoint_study_records), 1)
esr = bundle.endpoint_study_records[0]
self.assertEqual(len(esr.transformation_products), 1)
self.assertIsNone(esr.transformation_products[0].kinetic_formation_fraction)
def test_mapper_deduplicates_transformation_entries(self):
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="Test Pathway",
compounds=self.compounds,
edges=[
PathwayEdgeDTO(edge_uuid=uuid4(), start_compound_pks=[1], end_compound_pks=[2]),
PathwayEdgeDTO(edge_uuid=uuid4(), start_compound_pks=[1], end_compound_pks=[2]),
],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
esr = bundle.endpoint_study_records[0]
self.assertEqual(len(esr.transformation_products), 1)
def test_mapper_creates_endpoint_record_for_each_root_compound(self):
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="Test Pathway",
compounds=self.compounds,
root_compound_pks=[1, 2],
)
bundle = PathwayMapper().map(export)
self.assertEqual(len(bundle.endpoint_study_records), 2)
esr_names = {record.name for record in bundle.endpoint_study_records}
self.assertIn("Biodegradation in soil - Test Pathway (Benzene)", esr_names)
self.assertIn("Biodegradation in soil - Test Pathway (Phenol)", esr_names)
def test_mapper_builds_root_specific_transformations_for_disjoint_subgraphs(self):
compounds = [
PathwayCompoundDTO(pk=1, name="Root A", smiles="CC"),
PathwayCompoundDTO(pk=2, name="Root B", smiles="CCC"),
PathwayCompoundDTO(pk=3, name="A Child", smiles="CCCC"),
PathwayCompoundDTO(pk=4, name="B Child", smiles="CCCCC"),
]
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="Disjoint Pathway",
compounds=compounds,
edges=[
PathwayEdgeDTO(edge_uuid=uuid4(), start_compound_pks=[1], end_compound_pks=[3]),
PathwayEdgeDTO(edge_uuid=uuid4(), start_compound_pks=[2], end_compound_pks=[4]),
],
root_compound_pks=[1, 2],
)
bundle = PathwayMapper().map(export)
substance_name_by_uuid = {sub.uuid: sub.name for sub in bundle.substances}
reference_name_by_uuid = {ref.uuid: ref.name for ref in bundle.reference_substances}
products_by_root: dict[str, set[str]] = {}
for esr in bundle.endpoint_study_records:
root_name = substance_name_by_uuid[esr.substance_uuid]
products_by_root[root_name] = {
reference_name_by_uuid[tp.product_reference_uuid]
for tp in esr.transformation_products
}
self.assertEqual(products_by_root["Root A"], {"A Child"})
self.assertEqual(products_by_root["Root B"], {"B Child"})
def test_mapper_requires_all_edge_parents_to_be_reachable(self):
compounds = [
PathwayCompoundDTO(pk=1, name="Root", smiles="CC"),
PathwayCompoundDTO(pk=2, name="Co-reactant", smiles="CCC"),
PathwayCompoundDTO(pk=3, name="Product", smiles="CCCC"),
]
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="Multi Parent Pathway",
compounds=compounds,
edges=[
PathwayEdgeDTO(edge_uuid=uuid4(), start_compound_pks=[1, 2], end_compound_pks=[3]),
],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
esr = bundle.endpoint_study_records[0]
self.assertEqual(len(esr.transformation_products), 0)
def test_mapper_resolves_multi_parent_transformations_after_intermediate_is_reachable(self):
compounds = [
PathwayCompoundDTO(pk=1, name="Root", smiles="CC"),
PathwayCompoundDTO(pk=2, name="Intermediate", smiles="CCC"),
PathwayCompoundDTO(pk=3, name="Product", smiles="CCCC"),
]
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="Closure Pathway",
compounds=compounds,
edges=[
PathwayEdgeDTO(edge_uuid=uuid4(), start_compound_pks=[1], end_compound_pks=[2]),
PathwayEdgeDTO(edge_uuid=uuid4(), start_compound_pks=[1, 2], end_compound_pks=[3]),
],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
esr = bundle.endpoint_study_records[0]
reference_name_by_uuid = {ref.uuid: ref.name for ref in bundle.reference_substances}
product_names = {
reference_name_by_uuid[tp.product_reference_uuid] for tp in esr.transformation_products
}
self.assertEqual(product_names, {"Intermediate", "Product"})
def test_mapper_populates_half_lives_from_root_node_ai(self):
"""HalfLife AI on root node → ESR.half_lives."""
from envipy_additional_information.information import HalfLife, Interval
hl = HalfLife(
model="SFO", fit="ok", comment="", dt50=Interval(start=5.0, end=10.0), source="test"
)
root_node = PathwayNodeDTO(
node_uuid=uuid4(),
compound_pk=1,
name="Root",
depth=0,
smiles="CC",
additional_info=[hl],
)
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="P",
compounds=[PathwayCompoundDTO(pk=1, name="Root", smiles="CC")],
nodes=[root_node],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
esr = bundle.endpoint_study_records[0]
self.assertEqual(len(esr.half_lives), 1)
self.assertEqual(esr.half_lives[0].dt50_start, 5.0)
self.assertEqual(esr.half_lives[0].dt50_end, 10.0)
def test_mapper_populates_temperature_from_root_node_ai(self):
"""Temperature AI on root node → ESR.temperature as tuple."""
from envipy_additional_information.information import Temperature, Interval
temp = Temperature(interval=Interval(start=20.0, end=25.0))
root_node = PathwayNodeDTO(
node_uuid=uuid4(),
compound_pk=1,
name="Root",
depth=0,
smiles="CC",
additional_info=[temp],
)
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="P",
compounds=[PathwayCompoundDTO(pk=1, name="Root", smiles="CC")],
nodes=[root_node],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
esr = bundle.endpoint_study_records[0]
self.assertEqual(esr.temperature, (20.0, 25.0))
def test_mapper_ignores_ai_on_non_root_nodes(self):
"""AI from non-root nodes (depth > 0) should not appear in ESR."""
from envipy_additional_information.information import HalfLife, Interval
hl = HalfLife(
model="SFO", fit="ok", comment="", dt50=Interval(start=5.0, end=10.0), source="test"
)
non_root_node = PathwayNodeDTO(
node_uuid=uuid4(),
compound_pk=2,
name="Product",
depth=1,
smiles="CCC",
additional_info=[hl],
)
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="P",
compounds=[
PathwayCompoundDTO(pk=1, name="Root", smiles="CC"),
PathwayCompoundDTO(pk=2, name="Product", smiles="CCC"),
],
nodes=[non_root_node],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
esr = bundle.endpoint_study_records[0]
self.assertEqual(len(esr.half_lives), 0)
def test_extracts_soil_texture2_from_root_node_ai(self):
"""SoilTexture2 AI on root node → ESR.soil_properties.sand/silt/clay."""
from envipy_additional_information.information import SoilTexture2
texture = SoilTexture2(sand=65.0, silt=25.0, clay=10.0)
root_node = PathwayNodeDTO(
node_uuid=uuid4(),
compound_pk=1,
name="Root",
depth=0,
smiles="CC",
additional_info=[texture],
)
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="P",
compounds=[PathwayCompoundDTO(pk=1, name="Root", smiles="CC")],
nodes=[root_node],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
esr = bundle.endpoint_study_records[0]
self.assertIsNotNone(esr.soil_properties)
self.assertEqual(esr.soil_properties.sand, 65.0)
self.assertEqual(esr.soil_properties.silt, 25.0)
self.assertEqual(esr.soil_properties.clay, 10.0)
def test_extracts_ph_from_root_node_ai(self):
"""Acidity AI on root node → ESR.soil_properties.ph_lower/ph_upper/ph_method."""
from envipy_additional_information.information import Acidity, Interval
acidity = Acidity(interval=Interval(start=6.5, end=7.2), method="CaCl2")
root_node = PathwayNodeDTO(
node_uuid=uuid4(),
compound_pk=1,
name="Root",
depth=0,
smiles="CC",
additional_info=[acidity],
)
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="P",
compounds=[PathwayCompoundDTO(pk=1, name="Root", smiles="CC")],
nodes=[root_node],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
esr = bundle.endpoint_study_records[0]
self.assertIsNotNone(esr.soil_properties)
self.assertEqual(esr.soil_properties.ph_lower, 6.5)
self.assertEqual(esr.soil_properties.ph_upper, 7.2)
self.assertEqual(esr.soil_properties.ph_method, "CaCl2")
def test_normalizes_blank_ph_method_to_none(self):
"""Blank Acidity method should not produce an empty PHMeasuredIn XML node."""
from envipy_additional_information.information import Acidity, Interval
acidity = Acidity(interval=Interval(start=6.5, end=7.2), method=" ")
root_node = PathwayNodeDTO(
node_uuid=uuid4(),
compound_pk=1,
name="Root",
depth=0,
smiles="CC",
additional_info=[acidity],
)
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="P",
compounds=[PathwayCompoundDTO(pk=1, name="Root", smiles="CC")],
nodes=[root_node],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
esr = bundle.endpoint_study_records[0]
self.assertIsNotNone(esr.soil_properties)
self.assertIsNone(esr.soil_properties.ph_method)
def test_extracts_cec_and_org_carbon(self):
"""CEC and OMContent AI on root node → ESR.soil_properties.cec/org_carbon."""
from envipy_additional_information.information import CEC, OMContent
cec = CEC(capacity=15.3)
om = OMContent(in_oc=2.1)
root_node = PathwayNodeDTO(
node_uuid=uuid4(),
compound_pk=1,
name="Root",
depth=0,
smiles="CC",
additional_info=[cec, om],
)
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="P",
compounds=[PathwayCompoundDTO(pk=1, name="Root", smiles="CC")],
nodes=[root_node],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
esr = bundle.endpoint_study_records[0]
self.assertIsNotNone(esr.soil_properties)
self.assertEqual(esr.soil_properties.cec, 15.3)
self.assertEqual(esr.soil_properties.org_carbon, 2.1)
def test_soil_properties_none_when_no_soil_ai(self):
"""No soil AI → soil_properties is None."""
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="P",
compounds=[PathwayCompoundDTO(pk=1, name="Root", smiles="CC")],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
esr = bundle.endpoint_study_records[0]
self.assertIsNone(esr.soil_properties)
def test_ignores_soil_ai_on_non_root_nodes(self):
"""Soil AI on non-root nodes (depth > 0) is not extracted."""
from envipy_additional_information.information import SoilTexture2
texture = SoilTexture2(sand=60.0, silt=30.0, clay=10.0)
non_root_node = PathwayNodeDTO(
node_uuid=uuid4(),
compound_pk=2,
name="Product",
depth=1,
smiles="CCC",
additional_info=[texture],
)
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="P",
compounds=[
PathwayCompoundDTO(pk=1, name="Root", smiles="CC"),
PathwayCompoundDTO(pk=2, name="Product", smiles="CCC"),
],
nodes=[non_root_node],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
esr = bundle.endpoint_study_records[0]
self.assertIsNone(esr.soil_properties)
def test_mapper_merges_root_scenarios_into_single_esr_with_soil_numbers(self):
"""Scenario-aware root export should merge scenarios into one ESR linked by SoilNo."""
from envipy_additional_information.information import HalfLife, Interval, SoilTexture2
scenario_a = PathwayScenarioDTO(
scenario_uuid=uuid4(),
name="Scenario A",
additional_info=[
HalfLife(
model="SFO",
fit="ok",
comment="",
dt50=Interval(start=2.0, end=2.0),
source="A",
),
SoilTexture2(sand=70.0, silt=20.0, clay=10.0),
],
)
scenario_b = PathwayScenarioDTO(
scenario_uuid=uuid4(),
name="Scenario B",
additional_info=[
HalfLife(
model="SFO",
fit="ok",
comment="",
dt50=Interval(start=5.0, end=5.0),
source="B",
),
SoilTexture2(sand=40.0, silt=40.0, clay=20.0),
],
)
root_node = PathwayNodeDTO(
node_uuid=uuid4(),
compound_pk=1,
name="Root",
depth=0,
smiles="CC",
scenarios=[scenario_a, scenario_b],
)
export = PathwayExportDTO(
pathway_uuid=uuid4(),
pathway_name="P",
compounds=[PathwayCompoundDTO(pk=1, name="Root", smiles="CC")],
nodes=[root_node],
root_compound_pks=[1],
)
bundle = PathwayMapper().map(export)
self.assertEqual(len(bundle.endpoint_study_records), 1)
esr = bundle.endpoint_study_records[0]
self.assertEqual(esr.name, "Biodegradation in soil - P")
self.assertEqual(len(esr.half_lives), 2)
self.assertEqual(len(esr.soil_properties_entries), 2)
by_dt50 = {hl.dt50_start: hl for hl in esr.half_lives}
self.assertEqual(by_dt50[2.0].soil_no_code, "2")
self.assertEqual(by_dt50[5.0].soil_no_code, "4")
self.assertEqual(by_dt50[2.0].temperature, None)
by_soil_no = {soil.soil_no_code: soil for soil in esr.soil_properties_entries}
self.assertEqual(by_soil_no["2"].sand, 70.0)
self.assertEqual(by_soil_no["4"].sand, 40.0)