forked from enviPath/enviPy
[Feature] Minimal IUCLID export (#338)
This is an initial implementation that creates a working minimal .i6z document. It passes schema validation and can be imported into IUCLID. Caveat: IUCLID files target individual compounds. Pathway is not actually covered by the format. It can be added in either soil or water and soil OECD endpoints. **I currently only implemented the soil endpoint for all data.** This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future). Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field. I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc. The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣 New specifications get released once per year, so we will have to update accordingly. I believe that this should be a more expensive feature, as it requires significant effort to uphold. Currently implemented for root compound only in SOIL: - Soil Texture 2 - Soil Texture 1 - pH value - Half-life per soil sample / scenario (mapped to disappearance; not sure about that). - CEC - Organic Matter (only Carbon) - Moisture content - Humidity <img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62"> <img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93"> <img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c"> Reviewed-on: enviPath/enviPy#338 Co-authored-by: Tobias O <tobias.olenyi@envipath.com> Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
This commit is contained in:
199
epiuclid/tests/test_i6z.py
Normal file
199
epiuclid/tests/test_i6z.py
Normal file
@ -0,0 +1,199 @@
|
||||
"""Tests for i6z archive assembly."""
|
||||
|
||||
import io
|
||||
import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from uuid import uuid4
|
||||
|
||||
from django.test import SimpleTestCase, tag
|
||||
|
||||
from epiuclid.serializers.i6z import I6ZSerializer
|
||||
from epiuclid.serializers.pathway_mapper import (
|
||||
IUCLIDDocumentBundle,
|
||||
IUCLIDEndpointStudyRecordData,
|
||||
IUCLIDReferenceSubstanceData,
|
||||
IUCLIDSubstanceData,
|
||||
IUCLIDTransformationProductEntry,
|
||||
)
|
||||
|
||||
|
||||
def _make_bundle() -> IUCLIDDocumentBundle:
|
||||
ref_uuid = uuid4()
|
||||
sub_uuid = uuid4()
|
||||
return IUCLIDDocumentBundle(
|
||||
substances=[
|
||||
IUCLIDSubstanceData(
|
||||
uuid=sub_uuid,
|
||||
name="Benzene",
|
||||
reference_substance_uuid=ref_uuid,
|
||||
),
|
||||
],
|
||||
reference_substances=[
|
||||
IUCLIDReferenceSubstanceData(
|
||||
uuid=ref_uuid,
|
||||
name="Benzene",
|
||||
smiles="c1ccccc1",
|
||||
cas_number="71-43-2",
|
||||
molecular_formula="C6H6",
|
||||
molecular_weight=78.11,
|
||||
),
|
||||
],
|
||||
endpoint_study_records=[
|
||||
IUCLIDEndpointStudyRecordData(
|
||||
uuid=uuid4(),
|
||||
substance_uuid=sub_uuid,
|
||||
name="Endpoint study - Benzene",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _make_bundle_with_transformation_links() -> tuple[IUCLIDDocumentBundle, str, str]:
|
||||
parent_ref_uuid = uuid4()
|
||||
product_ref_uuid = uuid4()
|
||||
sub_uuid = uuid4()
|
||||
|
||||
bundle = IUCLIDDocumentBundle(
|
||||
substances=[
|
||||
IUCLIDSubstanceData(
|
||||
uuid=sub_uuid,
|
||||
name="Benzene",
|
||||
reference_substance_uuid=parent_ref_uuid,
|
||||
),
|
||||
],
|
||||
reference_substances=[
|
||||
IUCLIDReferenceSubstanceData(uuid=parent_ref_uuid, name="Benzene", smiles="c1ccccc1"),
|
||||
IUCLIDReferenceSubstanceData(
|
||||
uuid=product_ref_uuid, name="Phenol", smiles="c1ccc(O)cc1"
|
||||
),
|
||||
],
|
||||
endpoint_study_records=[
|
||||
IUCLIDEndpointStudyRecordData(
|
||||
uuid=uuid4(),
|
||||
substance_uuid=sub_uuid,
|
||||
name="Endpoint study - Benzene",
|
||||
transformation_products=[
|
||||
IUCLIDTransformationProductEntry(
|
||||
uuid=uuid4(),
|
||||
product_reference_uuid=product_ref_uuid,
|
||||
parent_reference_uuids=[parent_ref_uuid],
|
||||
)
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
return bundle, f"{parent_ref_uuid}/0", f"{product_ref_uuid}/0"
|
||||
|
||||
|
||||
@tag("iuclid")
|
||||
class I6ZSerializerTest(SimpleTestCase):
|
||||
def test_output_is_valid_zip(self):
|
||||
bundle = _make_bundle()
|
||||
data = I6ZSerializer().serialize(bundle)
|
||||
self.assertTrue(zipfile.is_zipfile(io.BytesIO(data)))
|
||||
|
||||
def test_contains_manifest(self):
|
||||
bundle = _make_bundle()
|
||||
data = I6ZSerializer().serialize(bundle)
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(data)) as zf:
|
||||
self.assertIn("manifest.xml", zf.namelist())
|
||||
|
||||
def test_contains_i6d_files(self):
|
||||
bundle = _make_bundle()
|
||||
data = I6ZSerializer().serialize(bundle)
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(data)) as zf:
|
||||
names = zf.namelist()
|
||||
# manifest + 1 substance + 1 ref substance + 1 ESR = 4 files
|
||||
self.assertEqual(len(names), 4)
|
||||
i6d_files = [n for n in names if n.endswith(".i6d")]
|
||||
self.assertEqual(len(i6d_files), 3)
|
||||
|
||||
def test_manifest_references_all_documents(self):
|
||||
bundle = _make_bundle()
|
||||
data = I6ZSerializer().serialize(bundle)
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(data)) as zf:
|
||||
manifest_xml = zf.read("manifest.xml").decode("utf-8")
|
||||
root = ET.fromstring(manifest_xml)
|
||||
|
||||
ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
|
||||
docs = root.findall(f".//{{{ns}}}document")
|
||||
self.assertEqual(len(docs), 3)
|
||||
|
||||
types = set()
|
||||
for doc in docs:
|
||||
type_elem = doc.find(f"{{{ns}}}type")
|
||||
self.assertIsNotNone(type_elem)
|
||||
assert type_elem is not None
|
||||
types.add(type_elem.text)
|
||||
self.assertEqual(types, {"SUBSTANCE", "REFERENCE_SUBSTANCE", "ENDPOINT_STUDY_RECORD"})
|
||||
|
||||
def test_manifest_contains_expected_document_links(self):
|
||||
bundle = _make_bundle()
|
||||
data = I6ZSerializer().serialize(bundle)
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(data)) as zf:
|
||||
manifest_xml = zf.read("manifest.xml").decode("utf-8")
|
||||
root = ET.fromstring(manifest_xml)
|
||||
|
||||
ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
|
||||
docs = root.findall(f".//{{{ns}}}document")
|
||||
|
||||
links_by_type: dict[str, set[tuple[str | None, str | None]]] = {}
|
||||
for doc in docs:
|
||||
doc_type = doc.findtext(f"{{{ns}}}type")
|
||||
links = set()
|
||||
for link in doc.findall(f"{{{ns}}}links/{{{ns}}}link"):
|
||||
links.add(
|
||||
(
|
||||
link.findtext(f"{{{ns}}}ref-type"),
|
||||
link.findtext(f"{{{ns}}}ref-uuid"),
|
||||
)
|
||||
)
|
||||
if doc_type:
|
||||
links_by_type[doc_type] = links
|
||||
|
||||
self.assertIn("REFERENCE", {ref_type for ref_type, _ in links_by_type["SUBSTANCE"]})
|
||||
self.assertIn("CHILD", {ref_type for ref_type, _ in links_by_type["SUBSTANCE"]})
|
||||
self.assertIn(
|
||||
"PARENT", {ref_type for ref_type, _ in links_by_type["ENDPOINT_STUDY_RECORD"]}
|
||||
)
|
||||
|
||||
def test_i6d_files_are_valid_xml(self):
|
||||
bundle = _make_bundle()
|
||||
data = I6ZSerializer().serialize(bundle)
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(data)) as zf:
|
||||
for name in zf.namelist():
|
||||
if name.endswith(".i6d"):
|
||||
content = zf.read(name).decode("utf-8")
|
||||
# Should not raise
|
||||
ET.fromstring(content)
|
||||
|
||||
def test_manifest_links_esr_to_transformation_reference_substances(self):
|
||||
bundle, parent_ref_key, product_ref_key = _make_bundle_with_transformation_links()
|
||||
data = I6ZSerializer().serialize(bundle)
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(data)) as zf:
|
||||
manifest_xml = zf.read("manifest.xml").decode("utf-8")
|
||||
root = ET.fromstring(manifest_xml)
|
||||
|
||||
ns = "http://iuclid6.echa.europa.eu/namespaces/manifest/v1"
|
||||
esr_doc = None
|
||||
for doc in root.findall(f".//{{{ns}}}document"):
|
||||
if doc.findtext(f"{{{ns}}}type") == "ENDPOINT_STUDY_RECORD":
|
||||
esr_doc = doc
|
||||
break
|
||||
|
||||
self.assertIsNotNone(esr_doc)
|
||||
assert esr_doc is not None
|
||||
|
||||
reference_links = {
|
||||
link.findtext(f"{{{ns}}}ref-uuid")
|
||||
for link in esr_doc.findall(f"{{{ns}}}links/{{{ns}}}link")
|
||||
if link.findtext(f"{{{ns}}}ref-type") == "REFERENCE"
|
||||
}
|
||||
self.assertIn(parent_ref_key, reference_links)
|
||||
self.assertIn(product_ref_key, reference_links)
|
||||
Reference in New Issue
Block a user