[Feature] Minimal IUCLID export (#338)

This is an initial implementation that creates a working minimal .i6z document.
It passes schema validation and can be imported into IUCLID.

Caveat:
IUCLID files target individual compounds.
Pathway is not actually covered by the format.

It can be added in either soil or water and soil OECD endpoints.
**I currently only implemented the soil endpoint for all data.**

This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future).

Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field.
I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc.

The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣

New specifications get released once per year, so we will have to update accordingly.
I believe that this should be a more expensive feature, as it requires significant effort to uphold.

Currently implemented for root compound only in SOIL:

- Soil Texture 2
- Soil Texture 1
- pH value
- Half-life per soil sample / scenario (mapped to disappearance; not sure about that).
- CEC
- Organic Matter (only Carbon)
- Moisture content
- Humidity

<img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62">
<img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93">
<img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c">

Reviewed-on: enviPath/enviPy#338
Co-authored-by: Tobias O <tobias.olenyi@envipath.com>
Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
This commit is contained in:
2026-04-07 19:46:12 +12:00
committed by jebus
parent f7c45b8015
commit d06bd0d4fd
49 changed files with 66402 additions and 1014 deletions

199
epiuclid/tests/test_i6z.py Normal file
View File

@ -0,0 +1,199 @@
"""Tests for i6z archive assembly."""
import io
import xml.etree.ElementTree as ET
import zipfile
from uuid import uuid4
from django.test import SimpleTestCase, tag
from epiuclid.serializers.i6z import I6ZSerializer
from epiuclid.serializers.pathway_mapper import (
IUCLIDDocumentBundle,
IUCLIDEndpointStudyRecordData,
IUCLIDReferenceSubstanceData,
IUCLIDSubstanceData,
IUCLIDTransformationProductEntry,
)
def _make_bundle() -> IUCLIDDocumentBundle:
ref_uuid = uuid4()
sub_uuid = uuid4()
return IUCLIDDocumentBundle(
substances=[
IUCLIDSubstanceData(
uuid=sub_uuid,
name="Benzene",
reference_substance_uuid=ref_uuid,
),
],
reference_substances=[
IUCLIDReferenceSubstanceData(
uuid=ref_uuid,
name="Benzene",
smiles="c1ccccc1",
cas_number="71-43-2",
molecular_formula="C6H6",
molecular_weight=78.11,
),
],
endpoint_study_records=[
IUCLIDEndpointStudyRecordData(
uuid=uuid4(),
substance_uuid=sub_uuid,
name="Endpoint study - Benzene",
),
],
)
def _make_bundle_with_transformation_links() -> tuple[IUCLIDDocumentBundle, str, str]:
parent_ref_uuid = uuid4()
product_ref_uuid = uuid4()
sub_uuid = uuid4()
bundle = IUCLIDDocumentBundle(
substances=[
IUCLIDSubstanceData(
uuid=sub_uuid,
name="Benzene",
reference_substance_uuid=parent_ref_uuid,
),
],
reference_substances=[
IUCLIDReferenceSubstanceData(uuid=parent_ref_uuid, name="Benzene", smiles="c1ccccc1"),
IUCLIDReferenceSubstanceData(
uuid=product_ref_uuid, name="Phenol", smiles="c1ccc(O)cc1"
),
],
endpoint_study_records=[
IUCLIDEndpointStudyRecordData(
uuid=uuid4(),
substance_uuid=sub_uuid,
name="Endpoint study - Benzene",
transformation_products=[
IUCLIDTransformationProductEntry(
uuid=uuid4(),
product_reference_uuid=product_ref_uuid,
parent_reference_uuids=[parent_ref_uuid],
)
],
),
],
)
return bundle, f"{parent_ref_uuid}/0", f"{product_ref_uuid}/0"
@tag("iuclid")
class I6ZSerializerTest(SimpleTestCase):
def test_output_is_valid_zip(self):
bundle = _make_bundle()
data = I6ZSerializer().serialize(bundle)
self.assertTrue(zipfile.is_zipfile(io.BytesIO(data)))
def test_contains_manifest(self):
bundle = _make_bundle()
data = I6ZSerializer().serialize(bundle)
with zipfile.ZipFile(io.BytesIO(data)) as zf:
self.assertIn("manifest.xml", zf.namelist())
def test_contains_i6d_files(self):
bundle = _make_bundle()
data = I6ZSerializer().serialize(bundle)
with zipfile.ZipFile(io.BytesIO(data)) as zf:
names = zf.namelist()
# manifest + 1 substance + 1 ref substance + 1 ESR = 4 files
self.assertEqual(len(names), 4)
i6d_files = [n for n in names if n.endswith(".i6d")]
self.assertEqual(len(i6d_files), 3)
def test_manifest_references_all_documents(self):
bundle = _make_bundle()
data = I6ZSerializer().serialize(bundle)
with zipfile.ZipFile(io.BytesIO(data)) as zf:
manifest_xml = zf.read("manifest.xml").decode("utf-8")
root = ET.fromstring(manifest_xml)
ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
docs = root.findall(f".//{{{ns}}}document")
self.assertEqual(len(docs), 3)
types = set()
for doc in docs:
type_elem = doc.find(f"{{{ns}}}type")
self.assertIsNotNone(type_elem)
assert type_elem is not None
types.add(type_elem.text)
self.assertEqual(types, {"SUBSTANCE", "REFERENCE_SUBSTANCE", "ENDPOINT_STUDY_RECORD"})
def test_manifest_contains_expected_document_links(self):
bundle = _make_bundle()
data = I6ZSerializer().serialize(bundle)
with zipfile.ZipFile(io.BytesIO(data)) as zf:
manifest_xml = zf.read("manifest.xml").decode("utf-8")
root = ET.fromstring(manifest_xml)
ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
docs = root.findall(f".//{{{ns}}}document")
links_by_type: dict[str, set[tuple[str | None, str | None]]] = {}
for doc in docs:
doc_type = doc.findtext(f"{{{ns}}}type")
links = set()
for link in doc.findall(f"{{{ns}}}links/{{{ns}}}link"):
links.add(
(
link.findtext(f"{{{ns}}}ref-type"),
link.findtext(f"{{{ns}}}ref-uuid"),
)
)
if doc_type:
links_by_type[doc_type] = links
self.assertIn("REFERENCE", {ref_type for ref_type, _ in links_by_type["SUBSTANCE"]})
self.assertIn("CHILD", {ref_type for ref_type, _ in links_by_type["SUBSTANCE"]})
self.assertIn(
"PARENT", {ref_type for ref_type, _ in links_by_type["ENDPOINT_STUDY_RECORD"]}
)
def test_i6d_files_are_valid_xml(self):
bundle = _make_bundle()
data = I6ZSerializer().serialize(bundle)
with zipfile.ZipFile(io.BytesIO(data)) as zf:
for name in zf.namelist():
if name.endswith(".i6d"):
content = zf.read(name).decode("utf-8")
# Should not raise
ET.fromstring(content)
def test_manifest_links_esr_to_transformation_reference_substances(self):
bundle, parent_ref_key, product_ref_key = _make_bundle_with_transformation_links()
data = I6ZSerializer().serialize(bundle)
with zipfile.ZipFile(io.BytesIO(data)) as zf:
manifest_xml = zf.read("manifest.xml").decode("utf-8")
root = ET.fromstring(manifest_xml)
ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
esr_doc = None
for doc in root.findall(f".//{{{ns}}}document"):
if doc.findtext(f"{{{ns}}}type") == "ENDPOINT_STUDY_RECORD":
esr_doc = doc
break
self.assertIsNotNone(esr_doc)
assert esr_doc is not None
reference_links = {
link.findtext(f"{{{ns}}}ref-uuid")
for link in esr_doc.findall(f"{{{ns}}}links/{{{ns}}}link")
if link.findtext(f"{{{ns}}}ref-type") == "REFERENCE"
}
self.assertIn(parent_ref_key, reference_links)
self.assertIn(product_ref_key, reference_links)