forked from enviPath/enviPy
This is an initial implementation that creates a working minimal .i6z document. It passes schema validation and can be imported into IUCLID. Caveat: IUCLID files target individual compounds. Pathway is not actually covered by the format. It can be added in either soil or water and soil OECD endpoints. **I currently only implemented the soil endpoint for all data.** This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future). Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field. I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc. The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣 New specifications get released once per year, so we will have to update accordingly. I believe that this should be a more expensive feature, as it requires significant effort to uphold. Currently implemented for root compound only in SOIL: - Soil Texture 2 - Soil Texture 1 - pH value - Half-life per soil sample / scenario (mapped to disappearance; not sure about that). - CEC - Organic Matter (only Carbon) - Moisture content - Humidity <img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62"> <img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93"> <img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c"> Reviewed-on: enviPath/enviPy#338 Co-authored-by: Tobias O <tobias.olenyi@envipath.com> Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
121 lines
4.3 KiB
Python
121 lines
4.3 KiB
Python
import xml.etree.ElementTree as ET
|
|
from datetime import datetime, timezone
|
|
|
|
from epiuclid.builders.base import document_key
|
|
|
|
NS_MANIFEST = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
|
|
NS_XLINK = "http://www.w3.org/1999/xlink"
|
|
|
|
ET.register_namespace("", NS_MANIFEST)
|
|
ET.register_namespace("xlink", NS_XLINK)
|
|
|
|
|
|
def _i6d_filename(uuid) -> str:
|
|
"""Convert UUID to i6d filename (uuid_0.i6d for raw data)."""
|
|
return f"{uuid}_0.i6d"
|
|
|
|
|
|
def _tag(local: str) -> str:
|
|
return f"{{{NS_MANIFEST}}}{local}"
|
|
|
|
|
|
def _add_link(links_elem: ET.Element, ref_uuid: str, ref_type: str) -> None:
|
|
"""Add a <link> element with ref-uuid and ref-type."""
|
|
link = ET.SubElement(links_elem, _tag("link"))
|
|
ref_uuid_elem = ET.SubElement(link, _tag("ref-uuid"))
|
|
ref_uuid_elem.text = ref_uuid
|
|
ref_type_elem = ET.SubElement(link, _tag("ref-type"))
|
|
ref_type_elem.text = ref_type
|
|
|
|
|
|
class ManifestBuilder:
|
|
def build(
|
|
self,
|
|
documents: list[tuple[str, str, str, str | None]],
|
|
base_document_uuid: str,
|
|
links: dict[str, list[tuple[str, str]]] | None = None,
|
|
) -> str:
|
|
"""Build manifest.xml.
|
|
|
|
Args:
|
|
documents: List of (filename, doc_type, uuid, subtype) tuples.
|
|
base_document_uuid: UUID of the base document (the substance export started from).
|
|
links: Optional dict mapping document UUID to list of (target_doc_key, ref_type) tuples.
|
|
ref_type is one of: PARENT, CHILD, REFERENCE.
|
|
"""
|
|
if links is None:
|
|
links = {}
|
|
|
|
root = ET.Element(_tag("manifest"))
|
|
|
|
# general-information
|
|
gi = ET.SubElement(root, _tag("general-information"))
|
|
title = ET.SubElement(gi, _tag("title"))
|
|
title.text = "IUCLID 6 container manifest file"
|
|
|
|
created = ET.SubElement(gi, _tag("created"))
|
|
created.text = datetime.now(timezone.utc).strftime("%a %b %d %H:%M:%S %Z %Y")
|
|
|
|
author = ET.SubElement(gi, _tag("author"))
|
|
author.text = "enviPath"
|
|
|
|
application = ET.SubElement(gi, _tag("application"))
|
|
application.text = "enviPath IUCLID Export"
|
|
|
|
submission_type = ET.SubElement(gi, _tag("submission-type"))
|
|
submission_type.text = "R_INT_ONSITE"
|
|
|
|
archive_type = ET.SubElement(gi, _tag("archive-type"))
|
|
archive_type.text = "RAW_DATA"
|
|
|
|
legislations = ET.SubElement(gi, _tag("legislations-info"))
|
|
leg = ET.SubElement(legislations, _tag("legislation"))
|
|
leg_id = ET.SubElement(leg, _tag("id"))
|
|
leg_id.text = "core"
|
|
leg_ver = ET.SubElement(leg, _tag("version"))
|
|
leg_ver.text = "10.0"
|
|
|
|
# base-document-uuid
|
|
base_doc = ET.SubElement(root, _tag("base-document-uuid"))
|
|
base_doc.text = document_key(base_document_uuid)
|
|
|
|
# contained-documents
|
|
contained = ET.SubElement(root, _tag("contained-documents"))
|
|
|
|
now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
for filename, doc_type, uuid, subtype in documents:
|
|
doc_key = document_key(uuid)
|
|
doc_elem = ET.SubElement(contained, _tag("document"))
|
|
doc_elem.set("id", doc_key)
|
|
|
|
type_elem = ET.SubElement(doc_elem, _tag("type"))
|
|
type_elem.text = doc_type
|
|
|
|
if subtype:
|
|
subtype_elem = ET.SubElement(doc_elem, _tag("subtype"))
|
|
subtype_elem.text = subtype
|
|
|
|
name_elem = ET.SubElement(doc_elem, _tag("name"))
|
|
name_elem.set(f"{{{NS_XLINK}}}type", "simple")
|
|
name_elem.set(f"{{{NS_XLINK}}}href", filename)
|
|
name_elem.text = filename
|
|
|
|
first_mod = ET.SubElement(doc_elem, _tag("first-modification-date"))
|
|
first_mod.text = now
|
|
|
|
last_mod = ET.SubElement(doc_elem, _tag("last-modification-date"))
|
|
last_mod.text = now
|
|
|
|
uuid_elem = ET.SubElement(doc_elem, _tag("uuid"))
|
|
uuid_elem.text = doc_key
|
|
|
|
# Add links for this document if any
|
|
doc_links = links.get(uuid, [])
|
|
if doc_links:
|
|
links_elem = ET.SubElement(doc_elem, _tag("links"))
|
|
for target_key, ref_type in doc_links:
|
|
_add_link(links_elem, target_key, ref_type)
|
|
|
|
return ET.tostring(root, encoding="unicode", xml_declaration=True)
|