Files
enviPy-bayer/epiuclid/tests/test_i6z.py
Tobias O d06bd0d4fd [Feature] Minimal IUCLID export (#338)
This is an initial implementation that creates a working minimal .i6z document.
It passes schema validation and can be imported into IUCLID.

Caveat:
IUCLID files target individual compounds.
Pathway is not actually covered by the format.

It can be added in either soil or water and soil OECD endpoints.
**I currently only implemented the soil endpoint for all data.**

This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future).

Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field.
I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc.

The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣

New specifications get released once per year, so we will have to update accordingly.
I believe that this should be a more expensive feature, as it requires significant effort to uphold.

Currently implemented for root compound only in SOIL:

- Soil Texture 2
- Soil Texture 1
- pH value
- Half-life per soil sample / scenario (mapped to disappearance; not sure about that).
- CEC
- Organic Matter (only Carbon)
- Moisture content
- Humidity

<img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62">
<img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93">
<img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c">

Reviewed-on: enviPath/enviPy#338
Co-authored-by: Tobias O <tobias.olenyi@envipath.com>
Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
2026-04-07 19:46:12 +12:00

200 lines
7.0 KiB
Python

"""Tests for i6z archive assembly."""
import io
import xml.etree.ElementTree as ET
import zipfile
from uuid import uuid4
from django.test import SimpleTestCase, tag
from epiuclid.serializers.i6z import I6ZSerializer
from epiuclid.serializers.pathway_mapper import (
IUCLIDDocumentBundle,
IUCLIDEndpointStudyRecordData,
IUCLIDReferenceSubstanceData,
IUCLIDSubstanceData,
IUCLIDTransformationProductEntry,
)
def _make_bundle() -> IUCLIDDocumentBundle:
ref_uuid = uuid4()
sub_uuid = uuid4()
return IUCLIDDocumentBundle(
substances=[
IUCLIDSubstanceData(
uuid=sub_uuid,
name="Benzene",
reference_substance_uuid=ref_uuid,
),
],
reference_substances=[
IUCLIDReferenceSubstanceData(
uuid=ref_uuid,
name="Benzene",
smiles="c1ccccc1",
cas_number="71-43-2",
molecular_formula="C6H6",
molecular_weight=78.11,
),
],
endpoint_study_records=[
IUCLIDEndpointStudyRecordData(
uuid=uuid4(),
substance_uuid=sub_uuid,
name="Endpoint study - Benzene",
),
],
)
def _make_bundle_with_transformation_links() -> tuple[IUCLIDDocumentBundle, str, str]:
parent_ref_uuid = uuid4()
product_ref_uuid = uuid4()
sub_uuid = uuid4()
bundle = IUCLIDDocumentBundle(
substances=[
IUCLIDSubstanceData(
uuid=sub_uuid,
name="Benzene",
reference_substance_uuid=parent_ref_uuid,
),
],
reference_substances=[
IUCLIDReferenceSubstanceData(uuid=parent_ref_uuid, name="Benzene", smiles="c1ccccc1"),
IUCLIDReferenceSubstanceData(
uuid=product_ref_uuid, name="Phenol", smiles="c1ccc(O)cc1"
),
],
endpoint_study_records=[
IUCLIDEndpointStudyRecordData(
uuid=uuid4(),
substance_uuid=sub_uuid,
name="Endpoint study - Benzene",
transformation_products=[
IUCLIDTransformationProductEntry(
uuid=uuid4(),
product_reference_uuid=product_ref_uuid,
parent_reference_uuids=[parent_ref_uuid],
)
],
),
],
)
return bundle, f"{parent_ref_uuid}/0", f"{product_ref_uuid}/0"
@tag("iuclid")
class I6ZSerializerTest(SimpleTestCase):
def test_output_is_valid_zip(self):
bundle = _make_bundle()
data = I6ZSerializer().serialize(bundle)
self.assertTrue(zipfile.is_zipfile(io.BytesIO(data)))
def test_contains_manifest(self):
bundle = _make_bundle()
data = I6ZSerializer().serialize(bundle)
with zipfile.ZipFile(io.BytesIO(data)) as zf:
self.assertIn("manifest.xml", zf.namelist())
def test_contains_i6d_files(self):
bundle = _make_bundle()
data = I6ZSerializer().serialize(bundle)
with zipfile.ZipFile(io.BytesIO(data)) as zf:
names = zf.namelist()
# manifest + 1 substance + 1 ref substance + 1 ESR = 4 files
self.assertEqual(len(names), 4)
i6d_files = [n for n in names if n.endswith(".i6d")]
self.assertEqual(len(i6d_files), 3)
def test_manifest_references_all_documents(self):
bundle = _make_bundle()
data = I6ZSerializer().serialize(bundle)
with zipfile.ZipFile(io.BytesIO(data)) as zf:
manifest_xml = zf.read("manifest.xml").decode("utf-8")
root = ET.fromstring(manifest_xml)
ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
docs = root.findall(f".//{{{ns}}}document")
self.assertEqual(len(docs), 3)
types = set()
for doc in docs:
type_elem = doc.find(f"{{{ns}}}type")
self.assertIsNotNone(type_elem)
assert type_elem is not None
types.add(type_elem.text)
self.assertEqual(types, {"SUBSTANCE", "REFERENCE_SUBSTANCE", "ENDPOINT_STUDY_RECORD"})
def test_manifest_contains_expected_document_links(self):
bundle = _make_bundle()
data = I6ZSerializer().serialize(bundle)
with zipfile.ZipFile(io.BytesIO(data)) as zf:
manifest_xml = zf.read("manifest.xml").decode("utf-8")
root = ET.fromstring(manifest_xml)
ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
docs = root.findall(f".//{{{ns}}}document")
links_by_type: dict[str, set[tuple[str | None, str | None]]] = {}
for doc in docs:
doc_type = doc.findtext(f"{{{ns}}}type")
links = set()
for link in doc.findall(f"{{{ns}}}links/{{{ns}}}link"):
links.add(
(
link.findtext(f"{{{ns}}}ref-type"),
link.findtext(f"{{{ns}}}ref-uuid"),
)
)
if doc_type:
links_by_type[doc_type] = links
self.assertIn("REFERENCE", {ref_type for ref_type, _ in links_by_type["SUBSTANCE"]})
self.assertIn("CHILD", {ref_type for ref_type, _ in links_by_type["SUBSTANCE"]})
self.assertIn(
"PARENT", {ref_type for ref_type, _ in links_by_type["ENDPOINT_STUDY_RECORD"]}
)
def test_i6d_files_are_valid_xml(self):
bundle = _make_bundle()
data = I6ZSerializer().serialize(bundle)
with zipfile.ZipFile(io.BytesIO(data)) as zf:
for name in zf.namelist():
if name.endswith(".i6d"):
content = zf.read(name).decode("utf-8")
# Should not raise
ET.fromstring(content)
def test_manifest_links_esr_to_transformation_reference_substances(self):
bundle, parent_ref_key, product_ref_key = _make_bundle_with_transformation_links()
data = I6ZSerializer().serialize(bundle)
with zipfile.ZipFile(io.BytesIO(data)) as zf:
manifest_xml = zf.read("manifest.xml").decode("utf-8")
root = ET.fromstring(manifest_xml)
ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
esr_doc = None
for doc in root.findall(f".//{{{ns}}}document"):
if doc.findtext(f"{{{ns}}}type") == "ENDPOINT_STUDY_RECORD":
esr_doc = doc
break
self.assertIsNotNone(esr_doc)
assert esr_doc is not None
reference_links = {
link.findtext(f"{{{ns}}}ref-uuid")
for link in esr_doc.findall(f"{{{ns}}}links/{{{ns}}}link")
if link.findtext(f"{{{ns}}}ref-type") == "REFERENCE"
}
self.assertIn(parent_ref_key, reference_links)
self.assertIn(product_ref_key, reference_links)