Files
enviPy-bayer/epiuclid/tests/test_builders.py
Tobias O d06bd0d4fd [Feature] Minimal IUCLID export (#338)
This is an initial implementation that creates a working minimal .i6z document.
It passes schema validation and can be imported into IUCLID.

Caveat:
IUCLID files target individual compounds.
Pathway is not actually covered by the format.

It can be added in either soil or water and soil OECD endpoints.
**I currently only implemented the soil endpoint for all data.**

This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future).

Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field.
I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc.

The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣

New specifications get released once per year, so we will have to update accordingly.
I believe that this should be a more expensive feature, as it requires significant effort to uphold.

Currently implemented for root compound only in SOIL:

- Soil Texture 2
- Soil Texture 1
- pH value
- Half-life per soil sample / scenario (mapped to disappearance; not sure about that).
- CEC
- Organic Matter (only Carbon)
- Moisture content
- Humidity

<img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62">
<img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93">
<img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c">

Reviewed-on: enviPath/enviPy#338
Co-authored-by: Tobias O <tobias.olenyi@envipath.com>
Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
2026-04-07 19:46:12 +12:00

522 lines
18 KiB
Python

"""Contract tests for IUCLID XML builders - no DB required."""
import xml.etree.ElementTree as ET
from uuid import uuid4
from django.test import SimpleTestCase, tag
from epiuclid.builders.base import (
NS_PLATFORM_CONTAINER,
NS_PLATFORM_FIELDS,
NS_PLATFORM_METADATA,
document_key,
)
from epiuclid.builders.endpoint_study import DOC_SUBTYPE, EndpointStudyRecordBuilder, NS_ESR_BIODEG
from epiuclid.builders.reference_substance import NS_REFERENCE_SUBSTANCE, ReferenceSubstanceBuilder
from epiuclid.builders.substance import NS_SUBSTANCE, SubstanceBuilder
from .factories import (
make_endpoint_study_record_data,
make_half_life_entry,
make_reference_substance_data,
make_soil_properties_data,
make_substance_data,
make_transformation_entry,
)
from .xml_assertions import assert_xpath_absent, assert_xpath_text
@tag("iuclid")
class SubstanceBuilderContractTest(SimpleTestCase):
def test_maps_name_and_reference_key(self):
reference_uuid = uuid4()
data = make_substance_data(name="Atrazine", reference_substance_uuid=reference_uuid)
root = ET.fromstring(SubstanceBuilder().build(data))
assert_xpath_text(self, root, f".//{{{NS_SUBSTANCE}}}ChemicalName", "Atrazine")
assert_xpath_text(
self,
root,
f".//{{{NS_SUBSTANCE}}}ReferenceSubstance/{{{NS_SUBSTANCE}}}ReferenceSubstance",
document_key(reference_uuid),
)
def test_omits_reference_substance_when_missing(self):
data = make_substance_data(reference_substance_uuid=None)
root = ET.fromstring(SubstanceBuilder().build(data))
assert_xpath_absent(self, root, f".//{{{NS_SUBSTANCE}}}ReferenceSubstance")
def test_sets_substance_document_type(self):
data = make_substance_data()
root = ET.fromstring(SubstanceBuilder().build(data))
assert_xpath_text(
self,
root,
f"{{{NS_PLATFORM_CONTAINER}}}PlatformMetadata/{{{NS_PLATFORM_METADATA}}}documentType",
"SUBSTANCE",
)
@tag("iuclid")
class ReferenceSubstanceBuilderContractTest(SimpleTestCase):
def test_maps_structural_identifiers_and_mass_precision(self):
data = make_reference_substance_data(molecular_weight=215.6)
root = ET.fromstring(ReferenceSubstanceBuilder().build(data))
assert_xpath_text(
self,
root,
f".//{{{NS_REFERENCE_SUBSTANCE}}}Inventory/{{{NS_REFERENCE_SUBSTANCE}}}CASNumber",
"1912-24-9",
)
assert_xpath_text(
self,
root,
f".//{{{NS_REFERENCE_SUBSTANCE}}}MolecularStructuralInfo/{{{NS_REFERENCE_SUBSTANCE}}}InChl",
(
"InChI=1S/C8H14ClN5/c1-4-10-7-12-6(9)11-8(13-7)"
"14-5(2)3/h5H,4H2,1-3H3,(H2,10,11,12,13,14)"
),
)
assert_xpath_text(
self,
root,
f".//{{{NS_REFERENCE_SUBSTANCE}}}MolecularStructuralInfo/{{{NS_REFERENCE_SUBSTANCE}}}InChIKey",
"MXWJVTOOROXGIU-UHFFFAOYSA-N",
)
assert_xpath_text(
self,
root,
f".//{{{NS_REFERENCE_SUBSTANCE}}}MolecularStructuralInfo"
f"/{{{NS_REFERENCE_SUBSTANCE}}}MolecularWeightRange"
f"/{{{NS_REFERENCE_SUBSTANCE}}}lowerValue",
"215.60",
)
def test_omits_inventory_and_weight_for_minimal_payload(self):
data = make_reference_substance_data(
cas_number=None,
molecular_formula=None,
molecular_weight=None,
inchi=None,
inchi_key=None,
smiles="CC",
)
root = ET.fromstring(ReferenceSubstanceBuilder().build(data))
assert_xpath_absent(self, root, f".//{{{NS_REFERENCE_SUBSTANCE}}}Inventory")
assert_xpath_absent(
self,
root,
f".//{{{NS_REFERENCE_SUBSTANCE}}}MolecularWeightRange",
)
assert_xpath_text(
self,
root,
f".//{{{NS_REFERENCE_SUBSTANCE}}}MolecularStructuralInfo/{{{NS_REFERENCE_SUBSTANCE}}}SmilesNotation",
"CC",
)
@tag("iuclid")
class EndpointStudyRecordBuilderContractTest(SimpleTestCase):
def test_sets_document_metadata_and_parent_link(self):
substance_uuid = uuid4()
data = make_endpoint_study_record_data(substance_uuid=substance_uuid)
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
metadata_root = f"{{{NS_PLATFORM_CONTAINER}}}PlatformMetadata"
assert_xpath_text(
self,
root,
f"{metadata_root}/{{{NS_PLATFORM_METADATA}}}documentType",
"ENDPOINT_STUDY_RECORD",
)
assert_xpath_text(
self,
root,
f"{metadata_root}/{{{NS_PLATFORM_METADATA}}}documentSubType",
DOC_SUBTYPE,
)
assert_xpath_text(
self,
root,
f"{metadata_root}/{{{NS_PLATFORM_METADATA}}}parentDocumentKey",
document_key(substance_uuid),
)
assert_xpath_text(
self,
root,
f"{metadata_root}/{{{NS_PLATFORM_METADATA}}}orderInSectionNo",
"1",
)
def test_esr_metadata_order_uses_stax_safe_layout(self):
data = make_endpoint_study_record_data()
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
metadata = root.find(f"{{{NS_PLATFORM_CONTAINER}}}PlatformMetadata")
self.assertIsNotNone(metadata)
assert metadata is not None
child_tags = [el.tag.split("}", 1)[1] for el in list(metadata)]
self.assertEqual(
child_tags,
[
"iuclidVersion",
"documentKey",
"documentType",
"definitionVersion",
"creationDate",
"lastModificationDate",
"name",
"documentSubType",
"parentDocumentKey",
"orderInSectionNo",
"i5Origin",
"creationTool",
],
)
def test_omits_results_for_skeleton_payload(self):
data = make_endpoint_study_record_data()
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
assert_xpath_absent(self, root, f".//{{{NS_ESR_BIODEG}}}ResultsAndDiscussion")
def test_maps_half_life_and_temperature_ranges(self):
data = make_endpoint_study_record_data(
half_lives=[make_half_life_entry(model="SFO", dt50_start=12.5, dt50_end=15.0)],
temperature=(20.0, 20.0),
)
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
base = (
f".//{{{NS_ESR_BIODEG}}}ResultsAndDiscussion"
f"/{{{NS_ESR_BIODEG}}}DTParentCompound"
f"/{{{NS_ESR_BIODEG}}}entry"
)
assert_xpath_text(self, root, f"{base}/{{{NS_ESR_BIODEG}}}KineticParameters", "SFO")
assert_xpath_text(
self, root, f"{base}/{{{NS_ESR_BIODEG}}}Value/{{{NS_ESR_BIODEG}}}lowerValue", "12.5"
)
assert_xpath_text(
self, root, f"{base}/{{{NS_ESR_BIODEG}}}Value/{{{NS_ESR_BIODEG}}}upperValue", "15.0"
)
assert_xpath_text(
self, root, f"{base}/{{{NS_ESR_BIODEG}}}Temp/{{{NS_ESR_BIODEG}}}lowerValue", "20.0"
)
assert_xpath_text(
self, root, f"{base}/{{{NS_ESR_BIODEG}}}Temp/{{{NS_ESR_BIODEG}}}upperValue", "20.0"
)
def test_maps_soil_no_on_dt_entries(self):
data = make_endpoint_study_record_data(
half_lives=[
make_half_life_entry(
model="SFO",
dt50_start=12.5,
dt50_end=15.0,
soil_no_code="2",
temperature=(22.0, 22.0),
)
],
temperature=None,
)
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
base = (
f".//{{{NS_ESR_BIODEG}}}ResultsAndDiscussion"
f"/{{{NS_ESR_BIODEG}}}DTParentCompound"
f"/{{{NS_ESR_BIODEG}}}entry"
)
assert_xpath_text(
self, root, f"{base}/{{{NS_ESR_BIODEG}}}SoilNo/{{{NS_ESR_BIODEG}}}value", "2"
)
assert_xpath_text(
self, root, f"{base}/{{{NS_ESR_BIODEG}}}Temp/{{{NS_ESR_BIODEG}}}lowerValue", "22.0"
)
def test_maps_transformation_entries_and_model_context(self):
parent_ref_uuid = uuid4()
product_ref_uuid = uuid4()
data = make_endpoint_study_record_data(
transformation_products=[
make_transformation_entry(
parent_reference_uuids=[parent_ref_uuid],
product_reference_uuid=product_ref_uuid,
kinetic_formation_fraction=0.42,
)
],
model_name_and_version=["Test model 1.0"],
software_name_and_version=["enviPath"],
model_remarks=["Model UUID: 00000000-0000-0000-0000-000000000000"],
)
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
assert_xpath_text(
self,
root,
f".//{{{NS_ESR_BIODEG}}}MaterialsAndMethods"
f"/{{{NS_ESR_BIODEG}}}ModelAndSoftware"
f"/{{{NS_ESR_BIODEG}}}ModelNameAndVersion",
"Test model 1.0",
)
entry_base = (
f".//{{{NS_ESR_BIODEG}}}ResultsAndDiscussion"
f"/{{{NS_ESR_BIODEG}}}TransformationProductsDetails"
f"/{{{NS_ESR_BIODEG}}}entry"
)
assert_xpath_text(
self,
root,
f"{entry_base}/{{{NS_ESR_BIODEG}}}IdentityOfCompound",
document_key(product_ref_uuid),
)
assert_xpath_text(
self,
root,
f"{entry_base}/{{{NS_ESR_BIODEG}}}ParentCompoundS/{{{NS_PLATFORM_FIELDS}}}key",
document_key(parent_ref_uuid),
)
assert_xpath_text(
self,
root,
f"{entry_base}/{{{NS_ESR_BIODEG}}}KineticFormationFraction",
"0.42",
)
def test_temperature_without_half_lives_in_xml(self):
"""Temperature with no half-lives still renders a DTParentCompound entry."""
data = make_endpoint_study_record_data(temperature=(21.0, 21.0))
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
base = (
f".//{{{NS_ESR_BIODEG}}}ResultsAndDiscussion"
f"/{{{NS_ESR_BIODEG}}}DTParentCompound"
f"/{{{NS_ESR_BIODEG}}}entry"
)
assert_xpath_text(
self, root, f"{base}/{{{NS_ESR_BIODEG}}}Temp/{{{NS_ESR_BIODEG}}}lowerValue", "21.0"
)
assert_xpath_text(
self, root, f"{base}/{{{NS_ESR_BIODEG}}}Temp/{{{NS_ESR_BIODEG}}}upperValue", "21.0"
)
def test_temperature_interval_in_xml(self):
"""Temperature tuple renders as lowerValue/upperValue in Temp element."""
hl = make_half_life_entry()
data = make_endpoint_study_record_data(half_lives=[hl], temperature=(20.0, 25.0))
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
base = (
f".//{{{NS_ESR_BIODEG}}}ResultsAndDiscussion"
f"/{{{NS_ESR_BIODEG}}}DTParentCompound"
f"/{{{NS_ESR_BIODEG}}}entry"
)
assert_xpath_text(
self, root, f"{base}/{{{NS_ESR_BIODEG}}}Temp/{{{NS_ESR_BIODEG}}}lowerValue", "20.0"
)
assert_xpath_text(
self, root, f"{base}/{{{NS_ESR_BIODEG}}}Temp/{{{NS_ESR_BIODEG}}}upperValue", "25.0"
)
def test_esr_with_soil_properties_emits_structured_soil_by_default(self):
props = make_soil_properties_data(clay=15.0, silt=35.0, sand=50.0)
data = make_endpoint_study_record_data(soil_properties=props)
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
entry_path = (
f".//{{{NS_ESR_BIODEG}}}StudyDesign"
f"/{{{NS_ESR_BIODEG}}}SoilProperties"
f"/{{{NS_ESR_BIODEG}}}entry"
)
assert_xpath_text(
self,
root,
f"{entry_path}/{{{NS_ESR_BIODEG}}}Clay/{{{NS_ESR_BIODEG}}}lowerValue",
"15.0",
)
assert_xpath_text(
self,
root,
f"{entry_path}/{{{NS_ESR_BIODEG}}}Silt/{{{NS_ESR_BIODEG}}}lowerValue",
"35.0",
)
assert_xpath_text(
self,
root,
f"{entry_path}/{{{NS_ESR_BIODEG}}}Sand/{{{NS_ESR_BIODEG}}}lowerValue",
"50.0",
)
assert_xpath_text(
self,
root,
f".//{{{NS_ESR_BIODEG}}}StudyDesign"
f"/{{{NS_ESR_BIODEG}}}SoilClassification"
f"/{{{NS_ESR_BIODEG}}}value",
"1649",
)
assert_xpath_absent(
self,
root,
f".//{{{NS_ESR_BIODEG}}}StudyDesign"
f"/{{{NS_ESR_BIODEG}}}SoilClassification"
f"/{{{NS_ESR_BIODEG}}}other",
)
assert_xpath_text(
self,
root,
f"{entry_path}/{{{NS_ESR_BIODEG}}}SoilType/{{{NS_ESR_BIODEG}}}value",
"1026",
)
assert_xpath_absent(
self,
root,
f"{entry_path}/{{{NS_ESR_BIODEG}}}SoilType/{{{NS_ESR_BIODEG}}}other",
)
assert_xpath_absent(
self,
root,
f".//{{{NS_ESR_BIODEG}}}AnyOtherInformationOnMaterialsAndMethodsInclTables",
)
assert_xpath_absent(self, root, f".//{{{NS_ESR_BIODEG}}}DetailsOnSoilCharacteristics")
def test_maps_multiple_soil_entries_with_soil_no(self):
data = make_endpoint_study_record_data(
soil_properties_entries=[
make_soil_properties_data(soil_no_code="2", soil_type="LOAMY_SAND", sand=83.1),
make_soil_properties_data(soil_no_code="4", soil_type="CLAY_LOAM", sand=23.7),
]
)
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
entries = root.findall(
f".//{{{NS_ESR_BIODEG}}}StudyDesign"
f"/{{{NS_ESR_BIODEG}}}SoilProperties"
f"/{{{NS_ESR_BIODEG}}}entry"
)
self.assertEqual(len(entries), 2)
soil_no_values = [
entry.findtext(f"{{{NS_ESR_BIODEG}}}SoilNo/{{{NS_ESR_BIODEG}}}value")
for entry in entries
]
self.assertEqual(soil_no_values, ["2", "4"])
def test_maps_soil_type_and_soil_classification_to_structured_fields(self):
props = make_soil_properties_data(soil_type="LOAMY_SAND", soil_classification="USDA")
data = make_endpoint_study_record_data(soil_properties=props)
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
entry_path = (
f".//{{{NS_ESR_BIODEG}}}StudyDesign"
f"/{{{NS_ESR_BIODEG}}}SoilProperties"
f"/{{{NS_ESR_BIODEG}}}entry"
)
assert_xpath_text(
self,
root,
f"{entry_path}/{{{NS_ESR_BIODEG}}}SoilType/{{{NS_ESR_BIODEG}}}value",
"1027",
)
assert_xpath_absent(
self, root, f"{entry_path}/{{{NS_ESR_BIODEG}}}SoilType/{{{NS_ESR_BIODEG}}}other"
)
assert_xpath_text(
self,
root,
f".//{{{NS_ESR_BIODEG}}}StudyDesign"
f"/{{{NS_ESR_BIODEG}}}SoilClassification"
f"/{{{NS_ESR_BIODEG}}}value",
"1649",
)
assert_xpath_absent(
self,
root,
f".//{{{NS_ESR_BIODEG}}}StudyDesign"
f"/{{{NS_ESR_BIODEG}}}SoilClassification"
f"/{{{NS_ESR_BIODEG}}}other",
)
def test_unknown_soil_type_and_classification_use_open_picklist(self):
props = make_soil_properties_data(soil_type="SILTY_SAND", soil_classification="UK_ADAS")
data = make_endpoint_study_record_data(soil_properties=props)
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
entry_path = (
f".//{{{NS_ESR_BIODEG}}}StudyDesign"
f"/{{{NS_ESR_BIODEG}}}SoilProperties"
f"/{{{NS_ESR_BIODEG}}}entry"
)
assert_xpath_text(
self,
root,
f"{entry_path}/{{{NS_ESR_BIODEG}}}SoilType/{{{NS_ESR_BIODEG}}}value",
"1342",
)
assert_xpath_text(
self,
root,
f"{entry_path}/{{{NS_ESR_BIODEG}}}SoilType/{{{NS_ESR_BIODEG}}}other",
"SILTY SAND",
)
assert_xpath_text(
self,
root,
f".//{{{NS_ESR_BIODEG}}}StudyDesign"
f"/{{{NS_ESR_BIODEG}}}SoilClassification"
f"/{{{NS_ESR_BIODEG}}}value",
"1342",
)
assert_xpath_text(
self,
root,
f".//{{{NS_ESR_BIODEG}}}StudyDesign"
f"/{{{NS_ESR_BIODEG}}}SoilClassification"
f"/{{{NS_ESR_BIODEG}}}other",
"UK ADAS",
)
def test_infers_usda_soil_classification_from_soil_type(self):
props = make_soil_properties_data(soil_type="LOAMY_SAND", soil_classification=None)
data = make_endpoint_study_record_data(soil_properties=props)
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
assert_xpath_text(
self,
root,
f".//{{{NS_ESR_BIODEG}}}StudyDesign"
f"/{{{NS_ESR_BIODEG}}}SoilClassification"
f"/{{{NS_ESR_BIODEG}}}value",
"1649",
)
assert_xpath_absent(
self,
root,
f".//{{{NS_ESR_BIODEG}}}StudyDesign/{{{NS_ESR_BIODEG}}}SoilClassification/{{{NS_ESR_BIODEG}}}other",
)
def test_esr_without_soil_properties_omits_study_design(self):
"""ESR with soil_properties=None → no <StudyDesign> in XML."""
data = make_endpoint_study_record_data(soil_properties=None)
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
assert_xpath_absent(self, root, f".//{{{NS_ESR_BIODEG}}}StudyDesign")
def test_omits_empty_ph_measured_in(self):
props = make_soil_properties_data(ph_method="")
data = make_endpoint_study_record_data(soil_properties=props)
root = ET.fromstring(EndpointStudyRecordBuilder().build(data))
self.assertNotIn("PHMeasuredIn", ET.tostring(root, encoding="unicode"))