enviPy-bayer/epiuclid/tests/test_i6z.py

"""Tests for i6z archive assembly."""

import io
import xml.etree.ElementTree as ET
import zipfile
from uuid import uuid4

from django.test import SimpleTestCase, tag

from epiuclid.serializers.i6z import I6ZSerializer
from epiuclid.serializers.pathway_mapper import (
    IUCLIDDocumentBundle,
    IUCLIDEndpointStudyRecordData,
    IUCLIDReferenceSubstanceData,
    IUCLIDSubstanceData,
    IUCLIDTransformationProductEntry,
)


def _make_bundle() -> IUCLIDDocumentBundle:
    ref_uuid = uuid4()
    sub_uuid = uuid4()
    return IUCLIDDocumentBundle(
        substances=[
            IUCLIDSubstanceData(
                uuid=sub_uuid,
                name="Benzene",
                reference_substance_uuid=ref_uuid,
            ),
        ],
        reference_substances=[
            IUCLIDReferenceSubstanceData(
                uuid=ref_uuid,
                name="Benzene",
                smiles="c1ccccc1",
                cas_number="71-43-2",
                molecular_formula="C6H6",
                molecular_weight=78.11,
            ),
        ],
        endpoint_study_records=[
            IUCLIDEndpointStudyRecordData(
                uuid=uuid4(),
                substance_uuid=sub_uuid,
                name="Endpoint study - Benzene",
            ),
        ],
    )


def _make_bundle_with_transformation_links() -> tuple[IUCLIDDocumentBundle, str, str]:
    parent_ref_uuid = uuid4()
    product_ref_uuid = uuid4()
    sub_uuid = uuid4()

    bundle = IUCLIDDocumentBundle(
        substances=[
            IUCLIDSubstanceData(
                uuid=sub_uuid,
                name="Benzene",
                reference_substance_uuid=parent_ref_uuid,
            ),
        ],
        reference_substances=[
            IUCLIDReferenceSubstanceData(uuid=parent_ref_uuid, name="Benzene", smiles="c1ccccc1"),
            IUCLIDReferenceSubstanceData(
                uuid=product_ref_uuid, name="Phenol", smiles="c1ccc(O)cc1"
            ),
        ],
        endpoint_study_records=[
            IUCLIDEndpointStudyRecordData(
                uuid=uuid4(),
                substance_uuid=sub_uuid,
                name="Endpoint study - Benzene",
                transformation_products=[
                    IUCLIDTransformationProductEntry(
                        uuid=uuid4(),
                        product_reference_uuid=product_ref_uuid,
                        parent_reference_uuids=[parent_ref_uuid],
                    )
                ],
            ),
        ],
    )
    return bundle, f"{parent_ref_uuid}/0", f"{product_ref_uuid}/0"


@tag("iuclid")
class I6ZSerializerTest(SimpleTestCase):
    def test_output_is_valid_zip(self):
        bundle = _make_bundle()
        data = I6ZSerializer().serialize(bundle)
        self.assertTrue(zipfile.is_zipfile(io.BytesIO(data)))

    def test_contains_manifest(self):
        bundle = _make_bundle()
        data = I6ZSerializer().serialize(bundle)

        with zipfile.ZipFile(io.BytesIO(data)) as zf:
            self.assertIn("manifest.xml", zf.namelist())

    def test_contains_i6d_files(self):
        bundle = _make_bundle()
        data = I6ZSerializer().serialize(bundle)

        with zipfile.ZipFile(io.BytesIO(data)) as zf:
            names = zf.namelist()
            # manifest + 1 substance + 1 ref substance + 1 ESR = 4 files
            self.assertEqual(len(names), 4)
            i6d_files = [n for n in names if n.endswith(".i6d")]
            self.assertEqual(len(i6d_files), 3)

    def test_manifest_references_all_documents(self):
        bundle = _make_bundle()
        data = I6ZSerializer().serialize(bundle)

        with zipfile.ZipFile(io.BytesIO(data)) as zf:
            manifest_xml = zf.read("manifest.xml").decode("utf-8")
            root = ET.fromstring(manifest_xml)

            ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
            docs = root.findall(f".//{{{ns}}}document")
            self.assertEqual(len(docs), 3)

            types = set()
            for doc in docs:
                type_elem = doc.find(f"{{{ns}}}type")
                self.assertIsNotNone(type_elem)
                assert type_elem is not None
                types.add(type_elem.text)
            self.assertEqual(types, {"SUBSTANCE", "REFERENCE_SUBSTANCE", "ENDPOINT_STUDY_RECORD"})

    def test_manifest_contains_expected_document_links(self):
        bundle = _make_bundle()
        data = I6ZSerializer().serialize(bundle)

        with zipfile.ZipFile(io.BytesIO(data)) as zf:
            manifest_xml = zf.read("manifest.xml").decode("utf-8")
            root = ET.fromstring(manifest_xml)

            ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
            docs = root.findall(f".//{{{ns}}}document")

            links_by_type: dict[str, set[tuple[str | None, str | None]]] = {}
            for doc in docs:
                doc_type = doc.findtext(f"{{{ns}}}type")
                links = set()
                for link in doc.findall(f"{{{ns}}}links/{{{ns}}}link"):
                    links.add(
                        (
                            link.findtext(f"{{{ns}}}ref-type"),
                            link.findtext(f"{{{ns}}}ref-uuid"),
                        )
                    )
                if doc_type:
                    links_by_type[doc_type] = links

            self.assertIn("REFERENCE", {ref_type for ref_type, _ in links_by_type["SUBSTANCE"]})
            self.assertIn("CHILD", {ref_type for ref_type, _ in links_by_type["SUBSTANCE"]})
            self.assertIn(
                "PARENT", {ref_type for ref_type, _ in links_by_type["ENDPOINT_STUDY_RECORD"]}
            )

    def test_i6d_files_are_valid_xml(self):
        bundle = _make_bundle()
        data = I6ZSerializer().serialize(bundle)

        with zipfile.ZipFile(io.BytesIO(data)) as zf:
            for name in zf.namelist():
                if name.endswith(".i6d"):
                    content = zf.read(name).decode("utf-8")
                    # Should not raise
                    ET.fromstring(content)

    def test_manifest_links_esr_to_transformation_reference_substances(self):
        bundle, parent_ref_key, product_ref_key = _make_bundle_with_transformation_links()
        data = I6ZSerializer().serialize(bundle)

        with zipfile.ZipFile(io.BytesIO(data)) as zf:
            manifest_xml = zf.read("manifest.xml").decode("utf-8")
            root = ET.fromstring(manifest_xml)

            ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
            esr_doc = None
            for doc in root.findall(f".//{{{ns}}}document"):
                if doc.findtext(f"{{{ns}}}type") == "ENDPOINT_STUDY_RECORD":
                    esr_doc = doc
                    break

            self.assertIsNotNone(esr_doc)
            assert esr_doc is not None

            reference_links = {
                link.findtext(f"{{{ns}}}ref-uuid")
                for link in esr_doc.findall(f"{{{ns}}}links/{{{ns}}}link")
                if link.findtext(f"{{{ns}}}ref-type") == "REFERENCE"
            }
            self.assertIn(parent_ref_key, reference_links)
            self.assertIn(product_ref_key, reference_links)