enviPy-bayer/epiuclid/serializers/manifest.py

import xml.etree.ElementTree as ET
from datetime import datetime, timezone

from epiuclid.builders.base import document_key

NS_MANIFEST = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
NS_XLINK = "http://www.w3.org/1999/xlink"

ET.register_namespace("", NS_MANIFEST)
ET.register_namespace("xlink", NS_XLINK)


def _i6d_filename(uuid) -> str:
    """Convert UUID to i6d filename (uuid_0.i6d for raw data)."""
    return f"{uuid}_0.i6d"


def _tag(local: str) -> str:
    return f"{{{NS_MANIFEST}}}{local}"


def _add_link(links_elem: ET.Element, ref_uuid: str, ref_type: str) -> None:
    """Add a <link> element with ref-uuid and ref-type."""
    link = ET.SubElement(links_elem, _tag("link"))
    ref_uuid_elem = ET.SubElement(link, _tag("ref-uuid"))
    ref_uuid_elem.text = ref_uuid
    ref_type_elem = ET.SubElement(link, _tag("ref-type"))
    ref_type_elem.text = ref_type


class ManifestBuilder:
    def build(
        self,
        documents: list[tuple[str, str, str, str | None]],
        base_document_uuid: str,
        links: dict[str, list[tuple[str, str]]] | None = None,
    ) -> str:
        """Build manifest.xml.

        Args:
            documents: List of (filename, doc_type, uuid, subtype) tuples.
            base_document_uuid: UUID of the base document (the substance export started from).
            links: Optional dict mapping document UUID to list of (target_doc_key, ref_type) tuples.
                   ref_type is one of: PARENT, CHILD, REFERENCE.
        """
        if links is None:
            links = {}

        root = ET.Element(_tag("manifest"))

        # general-information
        gi = ET.SubElement(root, _tag("general-information"))
        title = ET.SubElement(gi, _tag("title"))
        title.text = "IUCLID 6 container manifest file"

        created = ET.SubElement(gi, _tag("created"))
        created.text = datetime.now(timezone.utc).strftime("%a %b %d %H:%M:%S %Z %Y")

        author = ET.SubElement(gi, _tag("author"))
        author.text = "enviPath"

        application = ET.SubElement(gi, _tag("application"))
        application.text = "enviPath IUCLID Export"

        submission_type = ET.SubElement(gi, _tag("submission-type"))
        submission_type.text = "R_INT_ONSITE"

        archive_type = ET.SubElement(gi, _tag("archive-type"))
        archive_type.text = "RAW_DATA"

        legislations = ET.SubElement(gi, _tag("legislations-info"))
        leg = ET.SubElement(legislations, _tag("legislation"))
        leg_id = ET.SubElement(leg, _tag("id"))
        leg_id.text = "core"
        leg_ver = ET.SubElement(leg, _tag("version"))
        leg_ver.text = "10.0"

        # base-document-uuid
        base_doc = ET.SubElement(root, _tag("base-document-uuid"))
        base_doc.text = document_key(base_document_uuid)

        # contained-documents
        contained = ET.SubElement(root, _tag("contained-documents"))

        now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

        for filename, doc_type, uuid, subtype in documents:
            doc_key = document_key(uuid)
            doc_elem = ET.SubElement(contained, _tag("document"))
            doc_elem.set("id", doc_key)

            type_elem = ET.SubElement(doc_elem, _tag("type"))
            type_elem.text = doc_type

            if subtype:
                subtype_elem = ET.SubElement(doc_elem, _tag("subtype"))
                subtype_elem.text = subtype

            name_elem = ET.SubElement(doc_elem, _tag("name"))
            name_elem.set(f"{{{NS_XLINK}}}type", "simple")
            name_elem.set(f"{{{NS_XLINK}}}href", filename)
            name_elem.text = filename

            first_mod = ET.SubElement(doc_elem, _tag("first-modification-date"))
            first_mod.text = now

            last_mod = ET.SubElement(doc_elem, _tag("last-modification-date"))
            last_mod.text = now

            uuid_elem = ET.SubElement(doc_elem, _tag("uuid"))
            uuid_elem.text = doc_key

            # Add links for this document if any
            doc_links = links.get(uuid, [])
            if doc_links:
                links_elem = ET.SubElement(doc_elem, _tag("links"))
                for target_key, ref_type in doc_links:
                    _add_link(links_elem, target_key, ref_type)

        return ET.tostring(root, encoding="unicode", xml_declaration=True)