forked from enviPath/enviPy
[Feature] Minimal IUCLID export (#338)
This is an initial implementation that creates a working minimal .i6z document. It passes schema validation and can be imported into IUCLID. Caveat: IUCLID files target individual compounds. Pathway is not actually covered by the format. It can be added in either soil or water and soil OECD endpoints. **I currently only implemented the soil endpoint for all data.** This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future). Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field. I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc. The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣 New specifications get released once per year, so we will have to update accordingly. I believe that this should be a more expensive feature, as it requires significant effort to uphold. Currently implemented for root compound only in SOIL: - Soil Texture 2 - Soil Texture 1 - pH value - Half-life per soil sample / scenario (mapped to disappearance; not sure about that). - CEC - Organic Matter (only Carbon) - Moisture content - Humidity <img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62"> <img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93"> <img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c"> Reviewed-on: enviPath/enviPy#338 Co-authored-by: Tobias O <tobias.olenyi@envipath.com> Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
This commit is contained in:
148
epiuclid/tests/test_xsd_validation.py
Normal file
148
epiuclid/tests/test_xsd_validation.py
Normal file
@ -0,0 +1,148 @@
|
||||
"""XSD validation tests for IUCLID XML builders — no DB required."""
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from django.test import SimpleTestCase, tag
|
||||
|
||||
from epiuclid.builders.base import NS_PLATFORM_CONTAINER
|
||||
from epiuclid.builders.endpoint_study import EndpointStudyRecordBuilder
|
||||
from epiuclid.builders.reference_substance import ReferenceSubstanceBuilder
|
||||
from epiuclid.builders.substance import SubstanceBuilder
|
||||
from epiuclid.schemas.loader import get_content_schema, get_document_schema
|
||||
|
||||
from .factories import (
|
||||
make_endpoint_study_record_data,
|
||||
make_half_life_entry,
|
||||
make_reference_substance_data,
|
||||
make_soil_properties_data,
|
||||
make_substance_data,
|
||||
make_transformation_entry,
|
||||
)
|
||||
|
||||
|
||||
def _content_element(xml_str: str) -> ET.Element:
|
||||
"""Extract the first child of <Content> from a full i6d XML string."""
|
||||
root = ET.fromstring(xml_str)
|
||||
content = root.find(f"{{{NS_PLATFORM_CONTAINER}}}Content")
|
||||
assert content is not None and len(content) > 0
|
||||
return list(content)[0]
|
||||
|
||||
|
||||
def _assert_content_valid(xml_str: str, doc_type: str, subtype: str | None = None) -> None:
|
||||
schema = get_content_schema(doc_type, subtype)
|
||||
schema.validate(_content_element(xml_str))
|
||||
|
||||
|
||||
@tag("iuclid")
|
||||
class SubstanceXSDValidationTest(SimpleTestCase):
|
||||
def test_substance_validates_against_xsd(self):
|
||||
data = make_substance_data()
|
||||
xml_str = SubstanceBuilder().build(data)
|
||||
_assert_content_valid(xml_str, "SUBSTANCE")
|
||||
|
||||
def test_minimal_substance_validates_against_xsd(self):
|
||||
data = make_substance_data(name="Unknown compound", reference_substance_uuid=None)
|
||||
xml_str = SubstanceBuilder().build(data)
|
||||
_assert_content_valid(xml_str, "SUBSTANCE")
|
||||
|
||||
|
||||
@tag("iuclid")
|
||||
class ReferenceSubstanceXSDValidationTest(SimpleTestCase):
|
||||
def test_reference_substance_validates_against_xsd(self):
|
||||
data = make_reference_substance_data()
|
||||
xml_str = ReferenceSubstanceBuilder().build(data)
|
||||
_assert_content_valid(xml_str, "REFERENCE_SUBSTANCE")
|
||||
|
||||
def test_reference_substance_minimal_validates_against_xsd(self):
|
||||
data = make_reference_substance_data(
|
||||
name="Minimal compound",
|
||||
smiles="CC",
|
||||
cas_number=None,
|
||||
molecular_formula=None,
|
||||
molecular_weight=None,
|
||||
inchi=None,
|
||||
inchi_key=None,
|
||||
)
|
||||
xml_str = ReferenceSubstanceBuilder().build(data)
|
||||
_assert_content_valid(xml_str, "REFERENCE_SUBSTANCE")
|
||||
|
||||
|
||||
@tag("iuclid")
|
||||
class EndpointStudyRecordXSDValidationTest(SimpleTestCase):
|
||||
def test_endpoint_study_record_validates_against_xsd(self):
|
||||
data = make_endpoint_study_record_data(
|
||||
name="Biodegradation study with data",
|
||||
half_lives=[
|
||||
make_half_life_entry(),
|
||||
],
|
||||
temperature=(20.0, 20.0),
|
||||
transformation_products=[
|
||||
make_transformation_entry(),
|
||||
],
|
||||
model_name_and_version=["Test model 1.0"],
|
||||
software_name_and_version=["enviPath"],
|
||||
model_remarks=["Model UUID: 00000000-0000-0000-0000-000000000000"],
|
||||
)
|
||||
xml_str = EndpointStudyRecordBuilder().build(data)
|
||||
_assert_content_valid(xml_str, "ENDPOINT_STUDY_RECORD", "BiodegradationInSoil")
|
||||
|
||||
def test_temperature_only_esr_validates_against_xsd(self):
|
||||
data = make_endpoint_study_record_data(
|
||||
name="Biodegradation study with temperature only", temperature=(21.0, 21.0)
|
||||
)
|
||||
xml_str = EndpointStudyRecordBuilder().build(data)
|
||||
_assert_content_valid(xml_str, "ENDPOINT_STUDY_RECORD", "BiodegradationInSoil")
|
||||
|
||||
def test_skeleton_esr_validates_against_xsd(self):
|
||||
data = make_endpoint_study_record_data(name="Biodegradation study")
|
||||
xml_str = EndpointStudyRecordBuilder().build(data)
|
||||
_assert_content_valid(xml_str, "ENDPOINT_STUDY_RECORD", "BiodegradationInSoil")
|
||||
|
||||
def test_esr_with_soil_properties_validates_against_xsd(self):
|
||||
"""ESR with full soil properties validates against BiodegradationInSoil XSD."""
|
||||
data = make_endpoint_study_record_data(
|
||||
name="Biodegradation study with soil properties",
|
||||
soil_properties=make_soil_properties_data(),
|
||||
)
|
||||
xml_str = EndpointStudyRecordBuilder().build(data)
|
||||
_assert_content_valid(xml_str, "ENDPOINT_STUDY_RECORD", "BiodegradationInSoil")
|
||||
|
||||
def test_esr_with_multiple_soils_and_linked_dt_validates_against_xsd(self):
|
||||
data = make_endpoint_study_record_data(
|
||||
name="Biodegradation study with multiple soils",
|
||||
soil_properties_entries=[
|
||||
make_soil_properties_data(soil_no_code="2", soil_type="LOAMY_SAND"),
|
||||
make_soil_properties_data(soil_no_code="4", soil_type="CLAY_LOAM"),
|
||||
],
|
||||
half_lives=[
|
||||
make_half_life_entry(dt50_start=1.0, dt50_end=1.0, soil_no_code="2"),
|
||||
make_half_life_entry(dt50_start=2.0, dt50_end=2.0, soil_no_code="4"),
|
||||
],
|
||||
)
|
||||
xml_str = EndpointStudyRecordBuilder().build(data)
|
||||
_assert_content_valid(xml_str, "ENDPOINT_STUDY_RECORD", "BiodegradationInSoil")
|
||||
|
||||
|
||||
@tag("iuclid")
|
||||
class DocumentWrapperXSDValidationTest(SimpleTestCase):
|
||||
def test_full_i6d_document_validates_against_container_xsd(self):
|
||||
"""Validate the Document wrapper (PlatformMetadata + Content + Attachments + ModificationHistory).
|
||||
|
||||
The container schema uses processContents="strict" for xs:any in Content,
|
||||
so we need the content schema loaded into the validator too.
|
||||
"""
|
||||
data = make_substance_data()
|
||||
xml_str = SubstanceBuilder().build(data)
|
||||
root = ET.fromstring(xml_str)
|
||||
|
||||
doc_schema = get_document_schema()
|
||||
content_schema = get_content_schema("SUBSTANCE")
|
||||
|
||||
# This is a xmlschema quirk and happens because there are children of the Content element not defined in the Content schema.
|
||||
errors = [
|
||||
e for e in doc_schema.iter_errors(root) if "unavailable namespace" not in str(e.reason)
|
||||
]
|
||||
self.assertEqual(errors, [], msg=f"Document wrapper errors: {errors}")
|
||||
|
||||
content_el = _content_element(xml_str)
|
||||
content_schema.validate(content_el)
|
||||
Reference in New Issue
Block a user