forked from enviPath/enviPy
[Feature] Minimal IUCLID export (#338)
This is an initial implementation that creates a working minimal .i6z document. It passes schema validation and can be imported into IUCLID. Caveat: IUCLID files target individual compounds. Pathway is not actually covered by the format. It can be added in either soil or water and soil OECD endpoints. **I currently only implemented the soil endpoint for all data.** This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future). Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field. I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc. The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣 New specifications get released once per year, so we will have to update accordingly. I believe that this should be a more expensive feature, as it requires significant effort to uphold. Currently implemented for root compound only in SOIL: - Soil Texture 2 - Soil Texture 1 - pH value - Half-life per soil sample / scenario (mapped to disappearance; not sure about that). - CEC - Organic Matter (only Carbon) - Moisture content - Humidity <img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62"> <img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93"> <img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c"> Reviewed-on: enviPath/enviPy#338 Co-authored-by: Tobias O <tobias.olenyi@envipath.com> Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
This commit is contained in:
120
epiuclid/serializers/manifest.py
Normal file
120
epiuclid/serializers/manifest.py
Normal file
@ -0,0 +1,120 @@
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from epiuclid.builders.base import document_key
|
||||
|
||||
NS_MANIFEST = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
|
||||
NS_XLINK = "http://www.w3.org/1999/xlink"
|
||||
|
||||
ET.register_namespace("", NS_MANIFEST)
|
||||
ET.register_namespace("xlink", NS_XLINK)
|
||||
|
||||
|
||||
def _i6d_filename(uuid) -> str:
|
||||
"""Convert UUID to i6d filename (uuid_0.i6d for raw data)."""
|
||||
return f"{uuid}_0.i6d"
|
||||
|
||||
|
||||
def _tag(local: str) -> str:
|
||||
return f"{{{NS_MANIFEST}}}{local}"
|
||||
|
||||
|
||||
def _add_link(links_elem: ET.Element, ref_uuid: str, ref_type: str) -> None:
|
||||
"""Add a <link> element with ref-uuid and ref-type."""
|
||||
link = ET.SubElement(links_elem, _tag("link"))
|
||||
ref_uuid_elem = ET.SubElement(link, _tag("ref-uuid"))
|
||||
ref_uuid_elem.text = ref_uuid
|
||||
ref_type_elem = ET.SubElement(link, _tag("ref-type"))
|
||||
ref_type_elem.text = ref_type
|
||||
|
||||
|
||||
class ManifestBuilder:
|
||||
def build(
|
||||
self,
|
||||
documents: list[tuple[str, str, str, str | None]],
|
||||
base_document_uuid: str,
|
||||
links: dict[str, list[tuple[str, str]]] | None = None,
|
||||
) -> str:
|
||||
"""Build manifest.xml.
|
||||
|
||||
Args:
|
||||
documents: List of (filename, doc_type, uuid, subtype) tuples.
|
||||
base_document_uuid: UUID of the base document (the substance export started from).
|
||||
links: Optional dict mapping document UUID to list of (target_doc_key, ref_type) tuples.
|
||||
ref_type is one of: PARENT, CHILD, REFERENCE.
|
||||
"""
|
||||
if links is None:
|
||||
links = {}
|
||||
|
||||
root = ET.Element(_tag("manifest"))
|
||||
|
||||
# general-information
|
||||
gi = ET.SubElement(root, _tag("general-information"))
|
||||
title = ET.SubElement(gi, _tag("title"))
|
||||
title.text = "IUCLID 6 container manifest file"
|
||||
|
||||
created = ET.SubElement(gi, _tag("created"))
|
||||
created.text = datetime.now(timezone.utc).strftime("%a %b %d %H:%M:%S %Z %Y")
|
||||
|
||||
author = ET.SubElement(gi, _tag("author"))
|
||||
author.text = "enviPath"
|
||||
|
||||
application = ET.SubElement(gi, _tag("application"))
|
||||
application.text = "enviPath IUCLID Export"
|
||||
|
||||
submission_type = ET.SubElement(gi, _tag("submission-type"))
|
||||
submission_type.text = "R_INT_ONSITE"
|
||||
|
||||
archive_type = ET.SubElement(gi, _tag("archive-type"))
|
||||
archive_type.text = "RAW_DATA"
|
||||
|
||||
legislations = ET.SubElement(gi, _tag("legislations-info"))
|
||||
leg = ET.SubElement(legislations, _tag("legislation"))
|
||||
leg_id = ET.SubElement(leg, _tag("id"))
|
||||
leg_id.text = "core"
|
||||
leg_ver = ET.SubElement(leg, _tag("version"))
|
||||
leg_ver.text = "10.0"
|
||||
|
||||
# base-document-uuid
|
||||
base_doc = ET.SubElement(root, _tag("base-document-uuid"))
|
||||
base_doc.text = document_key(base_document_uuid)
|
||||
|
||||
# contained-documents
|
||||
contained = ET.SubElement(root, _tag("contained-documents"))
|
||||
|
||||
now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
for filename, doc_type, uuid, subtype in documents:
|
||||
doc_key = document_key(uuid)
|
||||
doc_elem = ET.SubElement(contained, _tag("document"))
|
||||
doc_elem.set("id", doc_key)
|
||||
|
||||
type_elem = ET.SubElement(doc_elem, _tag("type"))
|
||||
type_elem.text = doc_type
|
||||
|
||||
if subtype:
|
||||
subtype_elem = ET.SubElement(doc_elem, _tag("subtype"))
|
||||
subtype_elem.text = subtype
|
||||
|
||||
name_elem = ET.SubElement(doc_elem, _tag("name"))
|
||||
name_elem.set(f"{{{NS_XLINK}}}type", "simple")
|
||||
name_elem.set(f"{{{NS_XLINK}}}href", filename)
|
||||
name_elem.text = filename
|
||||
|
||||
first_mod = ET.SubElement(doc_elem, _tag("first-modification-date"))
|
||||
first_mod.text = now
|
||||
|
||||
last_mod = ET.SubElement(doc_elem, _tag("last-modification-date"))
|
||||
last_mod.text = now
|
||||
|
||||
uuid_elem = ET.SubElement(doc_elem, _tag("uuid"))
|
||||
uuid_elem.text = doc_key
|
||||
|
||||
# Add links for this document if any
|
||||
doc_links = links.get(uuid, [])
|
||||
if doc_links:
|
||||
links_elem = ET.SubElement(doc_elem, _tag("links"))
|
||||
for target_key, ref_type in doc_links:
|
||||
_add_link(links_elem, target_key, ref_type)
|
||||
|
||||
return ET.tostring(root, encoding="unicode", xml_declaration=True)
|
||||
Reference in New Issue
Block a user