Files
enviPy-bayer/epiuclid/tests/test_xsd_validation.py
Tobias O d06bd0d4fd [Feature] Minimal IUCLID export (#338)
This is an initial implementation that creates a working minimal .i6z document.
It passes schema validation and can be imported into IUCLID.

Caveat:
IUCLID files target individual compounds.
Pathway is not actually covered by the format.

It can be added in either soil or water and soil OECD endpoints.
**I currently only implemented the soil endpoint for all data.**

This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future).

Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field.
I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc.

The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣

New specifications get released once per year, so we will have to update accordingly.
I believe that this should be a more expensive feature, as it requires significant effort to uphold.

Currently implemented for root compound only in SOIL:

- Soil Texture 2
- Soil Texture 1
- pH value
- Half-life per soil sample / scenario (mapped to disappearance; not sure about that).
- CEC
- Organic Matter (only Carbon)
- Moisture content
- Humidity

<img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62">
<img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93">
<img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c">

Reviewed-on: enviPath/enviPy#338
Co-authored-by: Tobias O <tobias.olenyi@envipath.com>
Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
2026-04-07 19:46:12 +12:00

149 lines
6.1 KiB
Python

"""XSD validation tests for IUCLID XML builders — no DB required."""
import xml.etree.ElementTree as ET
from django.test import SimpleTestCase, tag
from epiuclid.builders.base import NS_PLATFORM_CONTAINER
from epiuclid.builders.endpoint_study import EndpointStudyRecordBuilder
from epiuclid.builders.reference_substance import ReferenceSubstanceBuilder
from epiuclid.builders.substance import SubstanceBuilder
from epiuclid.schemas.loader import get_content_schema, get_document_schema
from .factories import (
make_endpoint_study_record_data,
make_half_life_entry,
make_reference_substance_data,
make_soil_properties_data,
make_substance_data,
make_transformation_entry,
)
def _content_element(xml_str: str) -> ET.Element:
"""Extract the first child of <Content> from a full i6d XML string."""
root = ET.fromstring(xml_str)
content = root.find(f"{{{NS_PLATFORM_CONTAINER}}}Content")
assert content is not None and len(content) > 0
return list(content)[0]
def _assert_content_valid(xml_str: str, doc_type: str, subtype: str | None = None) -> None:
schema = get_content_schema(doc_type, subtype)
schema.validate(_content_element(xml_str))
@tag("iuclid")
class SubstanceXSDValidationTest(SimpleTestCase):
def test_substance_validates_against_xsd(self):
data = make_substance_data()
xml_str = SubstanceBuilder().build(data)
_assert_content_valid(xml_str, "SUBSTANCE")
def test_minimal_substance_validates_against_xsd(self):
data = make_substance_data(name="Unknown compound", reference_substance_uuid=None)
xml_str = SubstanceBuilder().build(data)
_assert_content_valid(xml_str, "SUBSTANCE")
@tag("iuclid")
class ReferenceSubstanceXSDValidationTest(SimpleTestCase):
def test_reference_substance_validates_against_xsd(self):
data = make_reference_substance_data()
xml_str = ReferenceSubstanceBuilder().build(data)
_assert_content_valid(xml_str, "REFERENCE_SUBSTANCE")
def test_reference_substance_minimal_validates_against_xsd(self):
data = make_reference_substance_data(
name="Minimal compound",
smiles="CC",
cas_number=None,
molecular_formula=None,
molecular_weight=None,
inchi=None,
inchi_key=None,
)
xml_str = ReferenceSubstanceBuilder().build(data)
_assert_content_valid(xml_str, "REFERENCE_SUBSTANCE")
@tag("iuclid")
class EndpointStudyRecordXSDValidationTest(SimpleTestCase):
def test_endpoint_study_record_validates_against_xsd(self):
data = make_endpoint_study_record_data(
name="Biodegradation study with data",
half_lives=[
make_half_life_entry(),
],
temperature=(20.0, 20.0),
transformation_products=[
make_transformation_entry(),
],
model_name_and_version=["Test model 1.0"],
software_name_and_version=["enviPath"],
model_remarks=["Model UUID: 00000000-0000-0000-0000-000000000000"],
)
xml_str = EndpointStudyRecordBuilder().build(data)
_assert_content_valid(xml_str, "ENDPOINT_STUDY_RECORD", "BiodegradationInSoil")
def test_temperature_only_esr_validates_against_xsd(self):
data = make_endpoint_study_record_data(
name="Biodegradation study with temperature only", temperature=(21.0, 21.0)
)
xml_str = EndpointStudyRecordBuilder().build(data)
_assert_content_valid(xml_str, "ENDPOINT_STUDY_RECORD", "BiodegradationInSoil")
def test_skeleton_esr_validates_against_xsd(self):
data = make_endpoint_study_record_data(name="Biodegradation study")
xml_str = EndpointStudyRecordBuilder().build(data)
_assert_content_valid(xml_str, "ENDPOINT_STUDY_RECORD", "BiodegradationInSoil")
def test_esr_with_soil_properties_validates_against_xsd(self):
"""ESR with full soil properties validates against BiodegradationInSoil XSD."""
data = make_endpoint_study_record_data(
name="Biodegradation study with soil properties",
soil_properties=make_soil_properties_data(),
)
xml_str = EndpointStudyRecordBuilder().build(data)
_assert_content_valid(xml_str, "ENDPOINT_STUDY_RECORD", "BiodegradationInSoil")
def test_esr_with_multiple_soils_and_linked_dt_validates_against_xsd(self):
data = make_endpoint_study_record_data(
name="Biodegradation study with multiple soils",
soil_properties_entries=[
make_soil_properties_data(soil_no_code="2", soil_type="LOAMY_SAND"),
make_soil_properties_data(soil_no_code="4", soil_type="CLAY_LOAM"),
],
half_lives=[
make_half_life_entry(dt50_start=1.0, dt50_end=1.0, soil_no_code="2"),
make_half_life_entry(dt50_start=2.0, dt50_end=2.0, soil_no_code="4"),
],
)
xml_str = EndpointStudyRecordBuilder().build(data)
_assert_content_valid(xml_str, "ENDPOINT_STUDY_RECORD", "BiodegradationInSoil")
@tag("iuclid")
class DocumentWrapperXSDValidationTest(SimpleTestCase):
def test_full_i6d_document_validates_against_container_xsd(self):
"""Validate the Document wrapper (PlatformMetadata + Content + Attachments + ModificationHistory).
The container schema uses processContents="strict" for xs:any in Content,
so we need the content schema loaded into the validator too.
"""
data = make_substance_data()
xml_str = SubstanceBuilder().build(data)
root = ET.fromstring(xml_str)
doc_schema = get_document_schema()
content_schema = get_content_schema("SUBSTANCE")
# This is a xmlschema quirk and happens because there are children of the Content element not defined in the Content schema.
errors = [
e for e in doc_schema.iter_errors(root) if "unavailable namespace" not in str(e.reason)
]
self.assertEqual(errors, [], msg=f"Document wrapper errors: {errors}")
content_el = _content_element(xml_str)
content_schema.validate(content_el)