[Feature] Minimal IUCLID export (#338)

This is an initial implementation that creates a working minimal .i6z document.
It passes schema validation and can be imported into IUCLID.

Caveat:
IUCLID files target individual compounds.
Pathway is not actually covered by the format.

It can be added in either soil or water and soil OECD endpoints.
**I currently only implemented the soil endpoint for all data.**

This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future).

Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field.
I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc.

The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣

New specifications get released once per year, so we will have to update accordingly.
I believe that this should be a more expensive feature, as it requires significant effort to uphold.

Currently implemented for root compound only in SOIL:

- Soil Texture 2
- Soil Texture 1
- pH value
- Half-life per soil sample / scenario (mapped to disappearance; not sure about that).
- CEC
- Organic Matter (only Carbon)
- Moisture content
- Humidity

<img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62">
<img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93">
<img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c">

Reviewed-on: enviPath/enviPy#338
Co-authored-by: Tobias O <tobias.olenyi@envipath.com>
Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
This commit is contained in:
2026-04-07 19:46:12 +12:00
committed by jebus
parent f7c45b8015
commit d06bd0d4fd
49 changed files with 66402 additions and 1014 deletions

View File

@ -0,0 +1,259 @@
import xml.etree.ElementTree as ET
from uuid import uuid4
from epiuclid.serializers.pathway_mapper import IUCLIDEndpointStudyRecordData, SoilPropertiesData
from .base import (
NS_PLATFORM_FIELDS,
_sub,
_tag,
build_document,
document_key,
)
NS_ESR_BIODEG = (
"http://iuclid6.echa.europa.eu/namespaces/ENDPOINT_STUDY_RECORD-BiodegradationInSoil/10.0"
)
ET.register_namespace("", NS_ESR_BIODEG)
DOC_SUBTYPE = "BiodegradationInSoil"
PICKLIST_OTHER_CODE = "1342"
SOIL_TYPE_CODE_BY_KEY = {
"CLAY": "257",
"CLAY_LOAM": "258",
"LOAM": "1026",
"LOAMY_SAND": "1027",
"SAND": "1522",
"SANDY_CLAY_LOAM": "1523",
"SANDY_LOAM": "1524",
"SANDY_CLAY": "1525",
"SILT": "1549",
"SILT_LOAM": "1550",
"SILTY_CLAY": "1551",
"SILTY_CLAY_LOAM": "1552",
}
SOIL_CLASSIFICATION_CODE_BY_KEY = {
"USDA": "1649",
"DE": "314",
"INTERNATIONAL": "1658",
}
class EndpointStudyRecordBuilder:
def build(self, data: IUCLIDEndpointStudyRecordData) -> str:
esr = ET.Element(f"{{{NS_ESR_BIODEG}}}ENDPOINT_STUDY_RECORD.{DOC_SUBTYPE}")
soil_entries = list(data.soil_properties_entries)
if not soil_entries and data.soil_properties is not None:
soil_entries = [data.soil_properties]
has_materials = bool(
data.model_name_and_version
or data.software_name_and_version
or data.model_remarks
or soil_entries
)
if has_materials:
materials = _sub(esr, NS_ESR_BIODEG, "MaterialsAndMethods")
if soil_entries:
self._build_soil_structured_full(materials, soil_entries)
if data.model_name_and_version or data.software_name_and_version or data.model_remarks:
model_info = _sub(materials, NS_ESR_BIODEG, "ModelAndSoftware")
for model_name in data.model_name_and_version:
_sub(model_info, NS_ESR_BIODEG, "ModelNameAndVersion", model_name)
for software_name in data.software_name_and_version:
_sub(model_info, NS_ESR_BIODEG, "SoftwareNameAndVersion", software_name)
for remark in data.model_remarks:
_sub(model_info, NS_ESR_BIODEG, "Remarks", remark)
has_results = (
data.half_lives or data.transformation_products or data.temperature is not None
)
if has_results:
results = _sub(esr, NS_ESR_BIODEG, "ResultsAndDiscussion")
if data.half_lives or data.temperature is not None:
dt_parent = _sub(results, NS_ESR_BIODEG, "DTParentCompound")
if data.half_lives:
for hl in data.half_lives:
entry = ET.SubElement(dt_parent, _tag(NS_ESR_BIODEG, "entry"))
entry.set(_tag(NS_PLATFORM_FIELDS, "uuid"), str(uuid4()))
if hl.soil_no_code:
soil_no = _sub(entry, NS_ESR_BIODEG, "SoilNo")
_sub(soil_no, NS_ESR_BIODEG, "value", hl.soil_no_code)
value_range = _sub(entry, NS_ESR_BIODEG, "Value")
_sub(value_range, NS_ESR_BIODEG, "unitCode", "2329") # days
_sub(value_range, NS_ESR_BIODEG, "lowerValue", str(hl.dt50_start))
_sub(value_range, NS_ESR_BIODEG, "upperValue", str(hl.dt50_end))
temperature = (
hl.temperature if hl.temperature is not None else data.temperature
)
if temperature is not None:
temp_range = _sub(entry, NS_ESR_BIODEG, "Temp")
_sub(temp_range, NS_ESR_BIODEG, "unitCode", "2493") # degree Celsius
_sub(temp_range, NS_ESR_BIODEG, "lowerValue", str(temperature[0]))
_sub(temp_range, NS_ESR_BIODEG, "upperValue", str(temperature[1]))
_sub(entry, NS_ESR_BIODEG, "KineticParameters", hl.model)
else:
# Temperature without half-lives: single entry with only Temp
assert data.temperature is not None
entry = ET.SubElement(dt_parent, _tag(NS_ESR_BIODEG, "entry"))
entry.set(_tag(NS_PLATFORM_FIELDS, "uuid"), str(uuid4()))
temp_range = _sub(entry, NS_ESR_BIODEG, "Temp")
_sub(temp_range, NS_ESR_BIODEG, "unitCode", "2493") # degree Celsius
_sub(temp_range, NS_ESR_BIODEG, "lowerValue", str(data.temperature[0]))
_sub(temp_range, NS_ESR_BIODEG, "upperValue", str(data.temperature[1]))
if data.transformation_products:
tp_details = _sub(results, NS_ESR_BIODEG, "TransformationProductsDetails")
for tp in data.transformation_products:
entry = ET.SubElement(tp_details, _tag(NS_ESR_BIODEG, "entry"))
entry.set(_tag(NS_PLATFORM_FIELDS, "uuid"), str(tp.uuid))
_sub(
entry,
NS_ESR_BIODEG,
"IdentityOfCompound",
document_key(tp.product_reference_uuid),
)
if tp.parent_reference_uuids:
parents = _sub(entry, NS_ESR_BIODEG, "ParentCompoundS")
for parent_uuid in tp.parent_reference_uuids:
_sub(parents, NS_PLATFORM_FIELDS, "key", document_key(parent_uuid))
if tp.kinetic_formation_fraction is not None:
_sub(
entry,
NS_ESR_BIODEG,
"KineticFormationFraction",
str(tp.kinetic_formation_fraction),
)
doc_key = document_key(data.uuid)
return build_document(
document_key=doc_key,
document_type="ENDPOINT_STUDY_RECORD",
document_sub_type=DOC_SUBTYPE,
name=data.name,
content_element=esr,
parent_document_key=document_key(data.substance_uuid),
order_in_section_no=1,
)
@staticmethod
def _build_soil_structured_full(
materials: ET.Element,
props_entries: list[SoilPropertiesData],
) -> None:
study_design = _sub(materials, NS_ESR_BIODEG, "StudyDesign")
soil_classification = None
for props in props_entries:
soil_classification = EndpointStudyRecordBuilder._soil_classification(props)
if soil_classification:
break
if soil_classification:
soil_classification_el = _sub(study_design, NS_ESR_BIODEG, "SoilClassification")
value, other = EndpointStudyRecordBuilder._picklist_value_and_other(
soil_classification,
SOIL_CLASSIFICATION_CODE_BY_KEY,
)
if value:
_sub(soil_classification_el, NS_ESR_BIODEG, "value", value)
if other:
_sub(soil_classification_el, NS_ESR_BIODEG, "other", other)
soil_props = _sub(study_design, NS_ESR_BIODEG, "SoilProperties")
for props in props_entries:
entry = ET.SubElement(soil_props, _tag(NS_ESR_BIODEG, "entry"))
entry.set(_tag(NS_PLATFORM_FIELDS, "uuid"), str(uuid4()))
if props.soil_no_code:
soil_no = _sub(entry, NS_ESR_BIODEG, "SoilNo")
_sub(soil_no, NS_ESR_BIODEG, "value", props.soil_no_code)
soil_type = props.soil_type.strip() if props.soil_type else None
if soil_type:
soil_type_el = _sub(entry, NS_ESR_BIODEG, "SoilType")
value, other = EndpointStudyRecordBuilder._picklist_value_and_other(
soil_type,
SOIL_TYPE_CODE_BY_KEY,
)
if value:
_sub(soil_type_el, NS_ESR_BIODEG, "value", value)
if other:
_sub(soil_type_el, NS_ESR_BIODEG, "other", other)
if props.clay is not None:
clay_el = _sub(entry, NS_ESR_BIODEG, "Clay")
_sub(clay_el, NS_ESR_BIODEG, "lowerValue", str(props.clay))
if props.silt is not None:
silt_el = _sub(entry, NS_ESR_BIODEG, "Silt")
_sub(silt_el, NS_ESR_BIODEG, "lowerValue", str(props.silt))
if props.sand is not None:
sand_el = _sub(entry, NS_ESR_BIODEG, "Sand")
_sub(sand_el, NS_ESR_BIODEG, "lowerValue", str(props.sand))
if props.org_carbon is not None:
orgc_el = _sub(entry, NS_ESR_BIODEG, "OrgC")
_sub(orgc_el, NS_ESR_BIODEG, "lowerValue", str(props.org_carbon))
if props.ph_lower is not None or props.ph_upper is not None:
ph_el = _sub(entry, NS_ESR_BIODEG, "Ph")
if props.ph_lower is not None:
_sub(ph_el, NS_ESR_BIODEG, "lowerValue", str(props.ph_lower))
if props.ph_upper is not None:
_sub(ph_el, NS_ESR_BIODEG, "upperValue", str(props.ph_upper))
ph_method = props.ph_method.strip() if props.ph_method else None
if ph_method:
_sub(entry, NS_ESR_BIODEG, "PHMeasuredIn", ph_method)
if props.cec is not None:
cec_el = _sub(entry, NS_ESR_BIODEG, "CEC")
_sub(cec_el, NS_ESR_BIODEG, "lowerValue", str(props.cec))
if props.moisture_content is not None:
moisture_el = _sub(entry, NS_ESR_BIODEG, "MoistureContent")
_sub(moisture_el, NS_ESR_BIODEG, "lowerValue", str(props.moisture_content))
@staticmethod
def _soil_classification(props: SoilPropertiesData) -> str | None:
if props.soil_classification:
value = props.soil_classification.strip()
if value:
return value
if props.soil_type:
return "USDA"
return None
@staticmethod
def _picklist_value_and_other(
raw_value: str,
code_map: dict[str, str],
) -> tuple[str | None, str | None]:
value = raw_value.strip()
if not value:
return None, None
key = value.upper().replace("-", "_").replace(" ", "_")
code = code_map.get(key)
if code is not None:
return code, None
return PICKLIST_OTHER_CODE, value.replace("_", " ")