import io import xml.etree.ElementTree as ET import zipfile from epiuclid.builders.base import NS_PLATFORM_CONTAINER, document_key from epiuclid.builders.endpoint_study import EndpointStudyRecordBuilder from epiuclid.builders.reference_substance import ReferenceSubstanceBuilder from epiuclid.builders.substance import SubstanceBuilder from epiuclid.serializers.manifest import ManifestBuilder from epiuclid.serializers.pathway_mapper import IUCLIDDocumentBundle from epiuclid.schemas.loader import get_content_schema def _i6d_filename(uuid) -> str: return f"{uuid}_0.i6d" class I6ZSerializer: """Serialize a IUCLIDDocumentBundle to a ZIP file containing the manifest.xml and the i6d files in memory.""" def serialize(self, bundle: IUCLIDDocumentBundle, *, validate: bool = False) -> bytes: return self._assemble(bundle, validate=validate) def _assemble(self, bundle: IUCLIDDocumentBundle, *, validate: bool = False) -> bytes: sub_builder = SubstanceBuilder() ref_builder = ReferenceSubstanceBuilder() esr_builder = EndpointStudyRecordBuilder() # (filename, xml_string, doc_type, uuid, subtype) files: list[tuple[str, str, str, str, str | None]] = [] for sub in bundle.substances: fname = _i6d_filename(sub.uuid) xml = sub_builder.build(sub) files.append((fname, xml, "SUBSTANCE", str(sub.uuid), None)) for ref in bundle.reference_substances: fname = _i6d_filename(ref.uuid) xml = ref_builder.build(ref) files.append((fname, xml, "REFERENCE_SUBSTANCE", str(ref.uuid), None)) for esr in bundle.endpoint_study_records: fname = _i6d_filename(esr.uuid) xml = esr_builder.build(esr) files.append( (fname, xml, "ENDPOINT_STUDY_RECORD", str(esr.uuid), "BiodegradationInSoil") ) if validate: self._validate_documents(files) # Build document relationship links for manifest links = self._build_links(bundle) # Build manifest manifest_docs = [(f[0], f[2], f[3], f[4]) for f in files] base_uuid = str(bundle.substances[0].uuid) if bundle.substances else "" manifest_xml = ManifestBuilder().build(manifest_docs, base_uuid, links=links) # Assemble ZIP buf = io.BytesIO() with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: zf.writestr("manifest.xml", manifest_xml) for fname, xml, _, _, _ in files: zf.writestr(fname, xml) return buf.getvalue() @staticmethod def _validate_documents( files: list[tuple[str, str, str, str, str | None]], ) -> None: """Validate each i6d document against its XSD schema. Raises ``xmlschema.XMLSchemaValidationError`` on the first failure. """ for fname, xml, doc_type, _uuid, subtype in files: root = ET.fromstring(xml) content = root.find(f"{{{NS_PLATFORM_CONTAINER}}}Content") if content is None or len(content) == 0: continue content_el = list(content)[0] schema = get_content_schema(doc_type, subtype) schema.validate(content_el) @staticmethod def _build_links(bundle: IUCLIDDocumentBundle) -> dict[str, list[tuple[str, str]]]: """Build manifest link relationships between documents. Returns a dict mapping document UUID (str) to list of (target_doc_key, ref_type). """ links: dict[str, list[tuple[str, str]]] = {} def _add(uuid_str: str, target_key: str, ref_type: str) -> None: doc_links = links.setdefault(uuid_str, []) link = (target_key, ref_type) if link not in doc_links: doc_links.append(link) # Substance -> REFERENCE link to its reference substance for sub in bundle.substances: if sub.reference_substance_uuid: ref_key = document_key(sub.reference_substance_uuid) _add(str(sub.uuid), ref_key, "REFERENCE") # ESR -> PARENT link to its substance; substance -> CHILD link to ESR for esr in bundle.endpoint_study_records: sub_key = document_key(esr.substance_uuid) esr_key = document_key(esr.uuid) _add(str(esr.uuid), sub_key, "PARENT") _add(str(esr.substance_uuid), esr_key, "CHILD") for tp in esr.transformation_products: _add(str(esr.uuid), document_key(tp.product_reference_uuid), "REFERENCE") for parent_ref_uuid in tp.parent_reference_uuids: _add(str(esr.uuid), document_key(parent_ref_uuid), "REFERENCE") return links