[Feature] Minimal IUCLID export (#338)

This is an initial implementation that creates a working minimal .i6z document.
It passes schema validation and can be imported into IUCLID.

Caveat:
IUCLID files target individual compounds.
Pathway is not actually covered by the format.

It can be added in either soil or water and soil OECD endpoints.
**I currently only implemented the soil endpoint for all data.**

This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future).

Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field.
I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc.

The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣

New specifications get released once per year, so we will have to update accordingly.
I believe that this should be a more expensive feature, as it requires significant effort to uphold.

Currently implemented for root compound only in SOIL:

- Soil Texture 2
- Soil Texture 1
- pH value
- Half-life per soil sample / scenario (mapped to disappearance; not sure about that).
- CEC
- Organic Matter (only Carbon)
- Moisture content
- Humidity

<img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62">
<img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93">
<img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c">

Reviewed-on: enviPath/enviPy#338
Co-authored-by: Tobias O <tobias.olenyi@envipath.com>
Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
This commit is contained in:
2026-04-07 19:46:12 +12:00
committed by jebus
parent f7c45b8015
commit d06bd0d4fd
49 changed files with 66402 additions and 1014 deletions

118
epiuclid/serializers/i6z.py Normal file
View File

@ -0,0 +1,118 @@
import io
import xml.etree.ElementTree as ET
import zipfile
from epiuclid.builders.base import NS_PLATFORM_CONTAINER, document_key
from epiuclid.builders.endpoint_study import EndpointStudyRecordBuilder
from epiuclid.builders.reference_substance import ReferenceSubstanceBuilder
from epiuclid.builders.substance import SubstanceBuilder
from epiuclid.serializers.manifest import ManifestBuilder
from epiuclid.serializers.pathway_mapper import IUCLIDDocumentBundle
from epiuclid.schemas.loader import get_content_schema
def _i6d_filename(uuid) -> str:
return f"{uuid}_0.i6d"
class I6ZSerializer:
"""Serialize a IUCLIDDocumentBundle to a ZIP file containing the manifest.xml and the i6d files in memory."""
def serialize(self, bundle: IUCLIDDocumentBundle, *, validate: bool = False) -> bytes:
return self._assemble(bundle, validate=validate)
def _assemble(self, bundle: IUCLIDDocumentBundle, *, validate: bool = False) -> bytes:
sub_builder = SubstanceBuilder()
ref_builder = ReferenceSubstanceBuilder()
esr_builder = EndpointStudyRecordBuilder()
# (filename, xml_string, doc_type, uuid, subtype)
files: list[tuple[str, str, str, str, str | None]] = []
for sub in bundle.substances:
fname = _i6d_filename(sub.uuid)
xml = sub_builder.build(sub)
files.append((fname, xml, "SUBSTANCE", str(sub.uuid), None))
for ref in bundle.reference_substances:
fname = _i6d_filename(ref.uuid)
xml = ref_builder.build(ref)
files.append((fname, xml, "REFERENCE_SUBSTANCE", str(ref.uuid), None))
for esr in bundle.endpoint_study_records:
fname = _i6d_filename(esr.uuid)
xml = esr_builder.build(esr)
files.append(
(fname, xml, "ENDPOINT_STUDY_RECORD", str(esr.uuid), "BiodegradationInSoil")
)
if validate:
self._validate_documents(files)
# Build document relationship links for manifest
links = self._build_links(bundle)
# Build manifest
manifest_docs = [(f[0], f[2], f[3], f[4]) for f in files]
base_uuid = str(bundle.substances[0].uuid) if bundle.substances else ""
manifest_xml = ManifestBuilder().build(manifest_docs, base_uuid, links=links)
# Assemble ZIP
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
zf.writestr("manifest.xml", manifest_xml)
for fname, xml, _, _, _ in files:
zf.writestr(fname, xml)
return buf.getvalue()
@staticmethod
def _validate_documents(
files: list[tuple[str, str, str, str, str | None]],
) -> None:
"""Validate each i6d document against its XSD schema.
Raises ``xmlschema.XMLSchemaValidationError`` on the first failure.
"""
for fname, xml, doc_type, _uuid, subtype in files:
root = ET.fromstring(xml)
content = root.find(f"{{{NS_PLATFORM_CONTAINER}}}Content")
if content is None or len(content) == 0:
continue
content_el = list(content)[0]
schema = get_content_schema(doc_type, subtype)
schema.validate(content_el)
@staticmethod
def _build_links(bundle: IUCLIDDocumentBundle) -> dict[str, list[tuple[str, str]]]:
"""Build manifest link relationships between documents.
Returns a dict mapping document UUID (str) to list of (target_doc_key, ref_type).
"""
links: dict[str, list[tuple[str, str]]] = {}
def _add(uuid_str: str, target_key: str, ref_type: str) -> None:
doc_links = links.setdefault(uuid_str, [])
link = (target_key, ref_type)
if link not in doc_links:
doc_links.append(link)
# Substance -> REFERENCE link to its reference substance
for sub in bundle.substances:
if sub.reference_substance_uuid:
ref_key = document_key(sub.reference_substance_uuid)
_add(str(sub.uuid), ref_key, "REFERENCE")
# ESR -> PARENT link to its substance; substance -> CHILD link to ESR
for esr in bundle.endpoint_study_records:
sub_key = document_key(esr.substance_uuid)
esr_key = document_key(esr.uuid)
_add(str(esr.uuid), sub_key, "PARENT")
_add(str(esr.substance_uuid), esr_key, "CHILD")
for tp in esr.transformation_products:
_add(str(esr.uuid), document_key(tp.product_reference_uuid), "REFERENCE")
for parent_ref_uuid in tp.parent_reference_uuids:
_add(str(esr.uuid), document_key(parent_ref_uuid), "REFERENCE")
return links