forked from enviPath/enviPy
[Feature] Minimal IUCLID export (#338)
This is an initial implementation that creates a working minimal .i6z document. It passes schema validation and can be imported into IUCLID. Caveat: IUCLID files target individual compounds. Pathway is not actually covered by the format. It can be added in either soil or water and soil OECD endpoints. **I currently only implemented the soil endpoint for all data.** This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future). Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field. I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc. The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣 New specifications get released once per year, so we will have to update accordingly. I believe that this should be a more expensive feature, as it requires significant effort to uphold. Currently implemented for root compound only in SOIL: - Soil Texture 2 - Soil Texture 1 - pH value - Half-life per soil sample / scenario (mapped to disappearance; not sure about that). - CEC - Organic Matter (only Carbon) - Moisture content - Humidity <img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62"> <img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93"> <img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c"> Reviewed-on: enviPath/enviPy#338 Co-authored-by: Tobias O <tobias.olenyi@envipath.com> Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
This commit is contained in:
259
epiuclid/builders/endpoint_study.py
Normal file
259
epiuclid/builders/endpoint_study.py
Normal file
@ -0,0 +1,259 @@
|
||||
import xml.etree.ElementTree as ET
|
||||
from uuid import uuid4
|
||||
|
||||
from epiuclid.serializers.pathway_mapper import IUCLIDEndpointStudyRecordData, SoilPropertiesData
|
||||
|
||||
from .base import (
|
||||
NS_PLATFORM_FIELDS,
|
||||
_sub,
|
||||
_tag,
|
||||
build_document,
|
||||
document_key,
|
||||
)
|
||||
|
||||
NS_ESR_BIODEG = (
|
||||
"http://iuclid6.echa.europa.eu/namespaces/ENDPOINT_STUDY_RECORD-BiodegradationInSoil/10.0"
|
||||
)
|
||||
ET.register_namespace("", NS_ESR_BIODEG)
|
||||
|
||||
DOC_SUBTYPE = "BiodegradationInSoil"
|
||||
PICKLIST_OTHER_CODE = "1342"
|
||||
SOIL_TYPE_CODE_BY_KEY = {
|
||||
"CLAY": "257",
|
||||
"CLAY_LOAM": "258",
|
||||
"LOAM": "1026",
|
||||
"LOAMY_SAND": "1027",
|
||||
"SAND": "1522",
|
||||
"SANDY_CLAY_LOAM": "1523",
|
||||
"SANDY_LOAM": "1524",
|
||||
"SANDY_CLAY": "1525",
|
||||
"SILT": "1549",
|
||||
"SILT_LOAM": "1550",
|
||||
"SILTY_CLAY": "1551",
|
||||
"SILTY_CLAY_LOAM": "1552",
|
||||
}
|
||||
SOIL_CLASSIFICATION_CODE_BY_KEY = {
|
||||
"USDA": "1649",
|
||||
"DE": "314",
|
||||
"INTERNATIONAL": "1658",
|
||||
}
|
||||
|
||||
|
||||
class EndpointStudyRecordBuilder:
|
||||
def build(self, data: IUCLIDEndpointStudyRecordData) -> str:
|
||||
esr = ET.Element(f"{{{NS_ESR_BIODEG}}}ENDPOINT_STUDY_RECORD.{DOC_SUBTYPE}")
|
||||
|
||||
soil_entries = list(data.soil_properties_entries)
|
||||
if not soil_entries and data.soil_properties is not None:
|
||||
soil_entries = [data.soil_properties]
|
||||
|
||||
has_materials = bool(
|
||||
data.model_name_and_version
|
||||
or data.software_name_and_version
|
||||
or data.model_remarks
|
||||
or soil_entries
|
||||
)
|
||||
if has_materials:
|
||||
materials = _sub(esr, NS_ESR_BIODEG, "MaterialsAndMethods")
|
||||
|
||||
if soil_entries:
|
||||
self._build_soil_structured_full(materials, soil_entries)
|
||||
|
||||
if data.model_name_and_version or data.software_name_and_version or data.model_remarks:
|
||||
model_info = _sub(materials, NS_ESR_BIODEG, "ModelAndSoftware")
|
||||
|
||||
for model_name in data.model_name_and_version:
|
||||
_sub(model_info, NS_ESR_BIODEG, "ModelNameAndVersion", model_name)
|
||||
|
||||
for software_name in data.software_name_and_version:
|
||||
_sub(model_info, NS_ESR_BIODEG, "SoftwareNameAndVersion", software_name)
|
||||
|
||||
for remark in data.model_remarks:
|
||||
_sub(model_info, NS_ESR_BIODEG, "Remarks", remark)
|
||||
|
||||
has_results = (
|
||||
data.half_lives or data.transformation_products or data.temperature is not None
|
||||
)
|
||||
if has_results:
|
||||
results = _sub(esr, NS_ESR_BIODEG, "ResultsAndDiscussion")
|
||||
|
||||
if data.half_lives or data.temperature is not None:
|
||||
dt_parent = _sub(results, NS_ESR_BIODEG, "DTParentCompound")
|
||||
|
||||
if data.half_lives:
|
||||
for hl in data.half_lives:
|
||||
entry = ET.SubElement(dt_parent, _tag(NS_ESR_BIODEG, "entry"))
|
||||
entry.set(_tag(NS_PLATFORM_FIELDS, "uuid"), str(uuid4()))
|
||||
|
||||
if hl.soil_no_code:
|
||||
soil_no = _sub(entry, NS_ESR_BIODEG, "SoilNo")
|
||||
_sub(soil_no, NS_ESR_BIODEG, "value", hl.soil_no_code)
|
||||
|
||||
value_range = _sub(entry, NS_ESR_BIODEG, "Value")
|
||||
_sub(value_range, NS_ESR_BIODEG, "unitCode", "2329") # days
|
||||
_sub(value_range, NS_ESR_BIODEG, "lowerValue", str(hl.dt50_start))
|
||||
_sub(value_range, NS_ESR_BIODEG, "upperValue", str(hl.dt50_end))
|
||||
|
||||
temperature = (
|
||||
hl.temperature if hl.temperature is not None else data.temperature
|
||||
)
|
||||
if temperature is not None:
|
||||
temp_range = _sub(entry, NS_ESR_BIODEG, "Temp")
|
||||
_sub(temp_range, NS_ESR_BIODEG, "unitCode", "2493") # degree Celsius
|
||||
_sub(temp_range, NS_ESR_BIODEG, "lowerValue", str(temperature[0]))
|
||||
_sub(temp_range, NS_ESR_BIODEG, "upperValue", str(temperature[1]))
|
||||
|
||||
_sub(entry, NS_ESR_BIODEG, "KineticParameters", hl.model)
|
||||
else:
|
||||
# Temperature without half-lives: single entry with only Temp
|
||||
assert data.temperature is not None
|
||||
entry = ET.SubElement(dt_parent, _tag(NS_ESR_BIODEG, "entry"))
|
||||
entry.set(_tag(NS_PLATFORM_FIELDS, "uuid"), str(uuid4()))
|
||||
temp_range = _sub(entry, NS_ESR_BIODEG, "Temp")
|
||||
_sub(temp_range, NS_ESR_BIODEG, "unitCode", "2493") # degree Celsius
|
||||
_sub(temp_range, NS_ESR_BIODEG, "lowerValue", str(data.temperature[0]))
|
||||
_sub(temp_range, NS_ESR_BIODEG, "upperValue", str(data.temperature[1]))
|
||||
|
||||
if data.transformation_products:
|
||||
tp_details = _sub(results, NS_ESR_BIODEG, "TransformationProductsDetails")
|
||||
for tp in data.transformation_products:
|
||||
entry = ET.SubElement(tp_details, _tag(NS_ESR_BIODEG, "entry"))
|
||||
entry.set(_tag(NS_PLATFORM_FIELDS, "uuid"), str(tp.uuid))
|
||||
|
||||
_sub(
|
||||
entry,
|
||||
NS_ESR_BIODEG,
|
||||
"IdentityOfCompound",
|
||||
document_key(tp.product_reference_uuid),
|
||||
)
|
||||
|
||||
if tp.parent_reference_uuids:
|
||||
parents = _sub(entry, NS_ESR_BIODEG, "ParentCompoundS")
|
||||
for parent_uuid in tp.parent_reference_uuids:
|
||||
_sub(parents, NS_PLATFORM_FIELDS, "key", document_key(parent_uuid))
|
||||
|
||||
if tp.kinetic_formation_fraction is not None:
|
||||
_sub(
|
||||
entry,
|
||||
NS_ESR_BIODEG,
|
||||
"KineticFormationFraction",
|
||||
str(tp.kinetic_formation_fraction),
|
||||
)
|
||||
|
||||
doc_key = document_key(data.uuid)
|
||||
return build_document(
|
||||
document_key=doc_key,
|
||||
document_type="ENDPOINT_STUDY_RECORD",
|
||||
document_sub_type=DOC_SUBTYPE,
|
||||
name=data.name,
|
||||
content_element=esr,
|
||||
parent_document_key=document_key(data.substance_uuid),
|
||||
order_in_section_no=1,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _build_soil_structured_full(
|
||||
materials: ET.Element,
|
||||
props_entries: list[SoilPropertiesData],
|
||||
) -> None:
|
||||
study_design = _sub(materials, NS_ESR_BIODEG, "StudyDesign")
|
||||
|
||||
soil_classification = None
|
||||
for props in props_entries:
|
||||
soil_classification = EndpointStudyRecordBuilder._soil_classification(props)
|
||||
if soil_classification:
|
||||
break
|
||||
|
||||
if soil_classification:
|
||||
soil_classification_el = _sub(study_design, NS_ESR_BIODEG, "SoilClassification")
|
||||
value, other = EndpointStudyRecordBuilder._picklist_value_and_other(
|
||||
soil_classification,
|
||||
SOIL_CLASSIFICATION_CODE_BY_KEY,
|
||||
)
|
||||
if value:
|
||||
_sub(soil_classification_el, NS_ESR_BIODEG, "value", value)
|
||||
if other:
|
||||
_sub(soil_classification_el, NS_ESR_BIODEG, "other", other)
|
||||
|
||||
soil_props = _sub(study_design, NS_ESR_BIODEG, "SoilProperties")
|
||||
|
||||
for props in props_entries:
|
||||
entry = ET.SubElement(soil_props, _tag(NS_ESR_BIODEG, "entry"))
|
||||
entry.set(_tag(NS_PLATFORM_FIELDS, "uuid"), str(uuid4()))
|
||||
|
||||
if props.soil_no_code:
|
||||
soil_no = _sub(entry, NS_ESR_BIODEG, "SoilNo")
|
||||
_sub(soil_no, NS_ESR_BIODEG, "value", props.soil_no_code)
|
||||
|
||||
soil_type = props.soil_type.strip() if props.soil_type else None
|
||||
if soil_type:
|
||||
soil_type_el = _sub(entry, NS_ESR_BIODEG, "SoilType")
|
||||
value, other = EndpointStudyRecordBuilder._picklist_value_and_other(
|
||||
soil_type,
|
||||
SOIL_TYPE_CODE_BY_KEY,
|
||||
)
|
||||
if value:
|
||||
_sub(soil_type_el, NS_ESR_BIODEG, "value", value)
|
||||
if other:
|
||||
_sub(soil_type_el, NS_ESR_BIODEG, "other", other)
|
||||
|
||||
if props.clay is not None:
|
||||
clay_el = _sub(entry, NS_ESR_BIODEG, "Clay")
|
||||
_sub(clay_el, NS_ESR_BIODEG, "lowerValue", str(props.clay))
|
||||
|
||||
if props.silt is not None:
|
||||
silt_el = _sub(entry, NS_ESR_BIODEG, "Silt")
|
||||
_sub(silt_el, NS_ESR_BIODEG, "lowerValue", str(props.silt))
|
||||
|
||||
if props.sand is not None:
|
||||
sand_el = _sub(entry, NS_ESR_BIODEG, "Sand")
|
||||
_sub(sand_el, NS_ESR_BIODEG, "lowerValue", str(props.sand))
|
||||
|
||||
if props.org_carbon is not None:
|
||||
orgc_el = _sub(entry, NS_ESR_BIODEG, "OrgC")
|
||||
_sub(orgc_el, NS_ESR_BIODEG, "lowerValue", str(props.org_carbon))
|
||||
|
||||
if props.ph_lower is not None or props.ph_upper is not None:
|
||||
ph_el = _sub(entry, NS_ESR_BIODEG, "Ph")
|
||||
if props.ph_lower is not None:
|
||||
_sub(ph_el, NS_ESR_BIODEG, "lowerValue", str(props.ph_lower))
|
||||
if props.ph_upper is not None:
|
||||
_sub(ph_el, NS_ESR_BIODEG, "upperValue", str(props.ph_upper))
|
||||
|
||||
ph_method = props.ph_method.strip() if props.ph_method else None
|
||||
if ph_method:
|
||||
_sub(entry, NS_ESR_BIODEG, "PHMeasuredIn", ph_method)
|
||||
|
||||
if props.cec is not None:
|
||||
cec_el = _sub(entry, NS_ESR_BIODEG, "CEC")
|
||||
_sub(cec_el, NS_ESR_BIODEG, "lowerValue", str(props.cec))
|
||||
|
||||
if props.moisture_content is not None:
|
||||
moisture_el = _sub(entry, NS_ESR_BIODEG, "MoistureContent")
|
||||
_sub(moisture_el, NS_ESR_BIODEG, "lowerValue", str(props.moisture_content))
|
||||
|
||||
@staticmethod
|
||||
def _soil_classification(props: SoilPropertiesData) -> str | None:
|
||||
if props.soil_classification:
|
||||
value = props.soil_classification.strip()
|
||||
if value:
|
||||
return value
|
||||
if props.soil_type:
|
||||
return "USDA"
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _picklist_value_and_other(
|
||||
raw_value: str,
|
||||
code_map: dict[str, str],
|
||||
) -> tuple[str | None, str | None]:
|
||||
value = raw_value.strip()
|
||||
if not value:
|
||||
return None, None
|
||||
|
||||
key = value.upper().replace("-", "_").replace(" ", "_")
|
||||
code = code_map.get(key)
|
||||
if code is not None:
|
||||
return code, None
|
||||
|
||||
return PICKLIST_OTHER_CODE, value.replace("_", " ")
|
||||
Reference in New Issue
Block a user