"""Tests for i6z archive assembly.""" import io import xml.etree.ElementTree as ET import zipfile from uuid import uuid4 from django.test import SimpleTestCase, tag from epiuclid.serializers.i6z import I6ZSerializer from epiuclid.serializers.pathway_mapper import ( IUCLIDDocumentBundle, IUCLIDEndpointStudyRecordData, IUCLIDReferenceSubstanceData, IUCLIDSubstanceData, IUCLIDTransformationProductEntry, ) def _make_bundle() -> IUCLIDDocumentBundle: ref_uuid = uuid4() sub_uuid = uuid4() return IUCLIDDocumentBundle( substances=[ IUCLIDSubstanceData( uuid=sub_uuid, name="Benzene", reference_substance_uuid=ref_uuid, ), ], reference_substances=[ IUCLIDReferenceSubstanceData( uuid=ref_uuid, name="Benzene", smiles="c1ccccc1", cas_number="71-43-2", molecular_formula="C6H6", molecular_weight=78.11, ), ], endpoint_study_records=[ IUCLIDEndpointStudyRecordData( uuid=uuid4(), substance_uuid=sub_uuid, name="Endpoint study - Benzene", ), ], ) def _make_bundle_with_transformation_links() -> tuple[IUCLIDDocumentBundle, str, str]: parent_ref_uuid = uuid4() product_ref_uuid = uuid4() sub_uuid = uuid4() bundle = IUCLIDDocumentBundle( substances=[ IUCLIDSubstanceData( uuid=sub_uuid, name="Benzene", reference_substance_uuid=parent_ref_uuid, ), ], reference_substances=[ IUCLIDReferenceSubstanceData(uuid=parent_ref_uuid, name="Benzene", smiles="c1ccccc1"), IUCLIDReferenceSubstanceData( uuid=product_ref_uuid, name="Phenol", smiles="c1ccc(O)cc1" ), ], endpoint_study_records=[ IUCLIDEndpointStudyRecordData( uuid=uuid4(), substance_uuid=sub_uuid, name="Endpoint study - Benzene", transformation_products=[ IUCLIDTransformationProductEntry( uuid=uuid4(), product_reference_uuid=product_ref_uuid, parent_reference_uuids=[parent_ref_uuid], ) ], ), ], ) return bundle, f"{parent_ref_uuid}/0", f"{product_ref_uuid}/0" @tag("iuclid") class I6ZSerializerTest(SimpleTestCase): def test_output_is_valid_zip(self): bundle = _make_bundle() data = I6ZSerializer().serialize(bundle) self.assertTrue(zipfile.is_zipfile(io.BytesIO(data))) def test_contains_manifest(self): bundle = _make_bundle() data = I6ZSerializer().serialize(bundle) with zipfile.ZipFile(io.BytesIO(data)) as zf: self.assertIn("manifest.xml", zf.namelist()) def test_contains_i6d_files(self): bundle = _make_bundle() data = I6ZSerializer().serialize(bundle) with zipfile.ZipFile(io.BytesIO(data)) as zf: names = zf.namelist() # manifest + 1 substance + 1 ref substance + 1 ESR = 4 files self.assertEqual(len(names), 4) i6d_files = [n for n in names if n.endswith(".i6d")] self.assertEqual(len(i6d_files), 3) def test_manifest_references_all_documents(self): bundle = _make_bundle() data = I6ZSerializer().serialize(bundle) with zipfile.ZipFile(io.BytesIO(data)) as zf: manifest_xml = zf.read("manifest.xml").decode("utf-8") root = ET.fromstring(manifest_xml) ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1" docs = root.findall(f".//{{{ns}}}document") self.assertEqual(len(docs), 3) types = set() for doc in docs: type_elem = doc.find(f"{{{ns}}}type") self.assertIsNotNone(type_elem) assert type_elem is not None types.add(type_elem.text) self.assertEqual(types, {"SUBSTANCE", "REFERENCE_SUBSTANCE", "ENDPOINT_STUDY_RECORD"}) def test_manifest_contains_expected_document_links(self): bundle = _make_bundle() data = I6ZSerializer().serialize(bundle) with zipfile.ZipFile(io.BytesIO(data)) as zf: manifest_xml = zf.read("manifest.xml").decode("utf-8") root = ET.fromstring(manifest_xml) ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1" docs = root.findall(f".//{{{ns}}}document") links_by_type: dict[str, set[tuple[str | None, str | None]]] = {} for doc in docs: doc_type = doc.findtext(f"{{{ns}}}type") links = set() for link in doc.findall(f"{{{ns}}}links/{{{ns}}}link"): links.add( ( link.findtext(f"{{{ns}}}ref-type"), link.findtext(f"{{{ns}}}ref-uuid"), ) ) if doc_type: links_by_type[doc_type] = links self.assertIn("REFERENCE", {ref_type for ref_type, _ in links_by_type["SUBSTANCE"]}) self.assertIn("CHILD", {ref_type for ref_type, _ in links_by_type["SUBSTANCE"]}) self.assertIn( "PARENT", {ref_type for ref_type, _ in links_by_type["ENDPOINT_STUDY_RECORD"]} ) def test_i6d_files_are_valid_xml(self): bundle = _make_bundle() data = I6ZSerializer().serialize(bundle) with zipfile.ZipFile(io.BytesIO(data)) as zf: for name in zf.namelist(): if name.endswith(".i6d"): content = zf.read(name).decode("utf-8") # Should not raise ET.fromstring(content) def test_manifest_links_esr_to_transformation_reference_substances(self): bundle, parent_ref_key, product_ref_key = _make_bundle_with_transformation_links() data = I6ZSerializer().serialize(bundle) with zipfile.ZipFile(io.BytesIO(data)) as zf: manifest_xml = zf.read("manifest.xml").decode("utf-8") root = ET.fromstring(manifest_xml) ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1" esr_doc = None for doc in root.findall(f".//{{{ns}}}document"): if doc.findtext(f"{{{ns}}}type") == "ENDPOINT_STUDY_RECORD": esr_doc = doc break self.assertIsNotNone(esr_doc) assert esr_doc is not None reference_links = { link.findtext(f"{{{ns}}}ref-uuid") for link in esr_doc.findall(f"{{{ns}}}links/{{{ns}}}link") if link.findtext(f"{{{ns}}}ref-type") == "REFERENCE" } self.assertIn(parent_ref_key, reference_links) self.assertIn(product_ref_key, reference_links)