[Feature] Minimal IUCLID export (#338)

This is an initial implementation that creates a working minimal .i6z document.
It passes schema validation and can be imported into IUCLID.

Caveat:
IUCLID files target individual compounds.
Pathway is not actually covered by the format.

It can be added in either soil or water and soil OECD endpoints.
**I currently only implemented the soil endpoint for all data.**

This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future).

Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field.
I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc.

The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣

New specifications get released once per year, so we will have to update accordingly.
I believe that this should be a more expensive feature, as it requires significant effort to uphold.

Currently implemented for root compound only in SOIL:

- Soil Texture 2
- Soil Texture 1
- pH value
- Half-life per soil sample / scenario (mapped to disappearance; not sure about that).
- CEC
- Organic Matter (only Carbon)
- Moisture content
- Humidity

<img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62">
<img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93">
<img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c">

Reviewed-on: enviPath/enviPy#338
Co-authored-by: Tobias O <tobias.olenyi@envipath.com>
Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
This commit is contained in:
2026-04-07 19:46:12 +12:00
committed by jebus
parent f7c45b8015
commit d06bd0d4fd
49 changed files with 66402 additions and 1014 deletions

View File

@ -74,7 +74,6 @@ class TestSchemaGeneration:
assert all(isinstance(g, str) for g in groups), (
f"{model_name}: all groups should be strings, got {groups}"
)
assert len(groups) > 0, f"{model_name}: should have at least one group"
@pytest.mark.parametrize("model_name,model_cls", list(registry.list_models().items()))
def test_form_data_matches_schema(self, model_name: str, model_cls: Type[BaseModel]):

View File

@ -1,10 +1,14 @@
from django.db.models import Model
from epdb.logic import PackageManager
from epdb.models import CompoundStructure, User, Package, Compound, Scenario
from uuid import UUID
from django.conf import settings as s
from django.db.models import Model
from epdb.logic import PackageManager
from epdb.models import CompoundStructure, User, Compound, Scenario
from .errors import EPAPINotFoundError, EPAPIPermissionDeniedError
Package = s.GET_PACKAGE_MODEL()
def get_compound_for_read(user, compound_uuid: UUID):
"""

View File

@ -0,0 +1,3 @@
"""
Service interfaces: each subdirectory defines the full boundary contract between enviPy and feature-flagged apps. DTOs and projections are shared concerns to avoid direct ORM access.
"""

View File

View File

@ -0,0 +1,58 @@
from dataclasses import dataclass, field
from uuid import UUID
@dataclass(frozen=True)
class PathwayCompoundDTO:
pk: int
name: str
smiles: str | None = None
cas_number: str | None = None
ec_number: str | None = None
@dataclass(frozen=True)
class PathwayScenarioDTO:
scenario_uuid: UUID
name: str
additional_info: list = field(default_factory=list) # EnviPyModel instances
@dataclass(frozen=True)
class PathwayNodeDTO:
node_uuid: UUID
compound_pk: int
name: str
depth: int
smiles: str | None = None
cas_number: str | None = None
ec_number: str | None = None
additional_info: list = field(default_factory=list) # EnviPyModel instances
scenarios: list[PathwayScenarioDTO] = field(default_factory=list)
@dataclass(frozen=True)
class PathwayEdgeDTO:
edge_uuid: UUID
start_compound_pks: list[int] = field(default_factory=list)
end_compound_pks: list[int] = field(default_factory=list)
probability: float | None = None
@dataclass(frozen=True)
class PathwayModelInfoDTO:
model_name: str | None = None
model_uuid: UUID | None = None
software_name: str | None = None
software_version: str | None = None
@dataclass(frozen=True)
class PathwayExportDTO:
pathway_uuid: UUID
pathway_name: str
compounds: list[PathwayCompoundDTO] = field(default_factory=list)
nodes: list[PathwayNodeDTO] = field(default_factory=list)
edges: list[PathwayEdgeDTO] = field(default_factory=list)
root_compound_pks: list[int] = field(default_factory=list)
model_info: PathwayModelInfoDTO | None = None

View File

@ -0,0 +1,142 @@
from uuid import UUID
from epdb.logic import PackageManager
from epdb.models import Pathway
from epapi.v1.errors import EPAPINotFoundError, EPAPIPermissionDeniedError
from .dto import (
PathwayCompoundDTO,
PathwayEdgeDTO,
PathwayExportDTO,
PathwayModelInfoDTO,
PathwayNodeDTO,
PathwayScenarioDTO,
)
def get_pathway_for_iuclid_export(user, pathway_uuid: UUID) -> PathwayExportDTO:
"""Return pathway data projected into DTOs for the IUCLID export consumer."""
try:
pathway = (
Pathway.objects.select_related("package", "setting", "setting__model")
.prefetch_related(
"node_set__default_node_label__compound__external_identifiers__database",
"node_set__scenarios",
"edge_set__start_nodes__default_node_label__compound",
"edge_set__end_nodes__default_node_label__compound",
)
.get(uuid=pathway_uuid)
)
except Pathway.DoesNotExist:
raise EPAPINotFoundError(f"Pathway with UUID {pathway_uuid} not found")
if not user or user.is_anonymous or not PackageManager.readable(user, pathway.package):
raise EPAPIPermissionDeniedError("Insufficient permissions to access this pathway.")
nodes: list[PathwayNodeDTO] = []
edges: list[PathwayEdgeDTO] = []
compounds_by_pk: dict[int, PathwayCompoundDTO] = {}
root_compound_pks: list[int] = []
for node in pathway.node_set.all().order_by("depth", "pk"):
cs = node.default_node_label
if cs is None:
continue
compound = cs.compound
cas_number = None
ec_number = None
for ext_id in compound.external_identifiers.all():
db_name = ext_id.database.name if ext_id.database else None
if db_name == "CAS" and cas_number is None:
cas_number = ext_id.identifier_value
elif db_name == "EC" and ec_number is None:
ec_number = ext_id.identifier_value
ai_for_node = []
scenario_entries: list[PathwayScenarioDTO] = []
for scenario in sorted(node.scenarios.all(), key=lambda item: item.pk):
ai_for_scenario = list(scenario.get_additional_information(direct_only=True))
ai_for_node.extend(ai_for_scenario)
scenario_entries.append(
PathwayScenarioDTO(
scenario_uuid=scenario.uuid,
name=scenario.name,
additional_info=ai_for_scenario,
)
)
nodes.append(
PathwayNodeDTO(
node_uuid=node.uuid,
compound_pk=compound.pk,
name=compound.name,
depth=node.depth,
smiles=cs.smiles,
cas_number=cas_number,
ec_number=ec_number,
additional_info=ai_for_node,
scenarios=scenario_entries,
)
)
if node.depth == 0 and compound.pk not in root_compound_pks:
root_compound_pks.append(compound.pk)
if compound.pk not in compounds_by_pk:
compounds_by_pk[compound.pk] = PathwayCompoundDTO(
pk=compound.pk,
name=compound.name,
smiles=cs.smiles,
cas_number=cas_number,
ec_number=ec_number,
)
for edge in pathway.edge_set.all():
start_compounds = {
n.default_node_label.compound.pk
for n in edge.start_nodes.all()
if n.default_node_label is not None
}
end_compounds = {
n.default_node_label.compound.pk
for n in edge.end_nodes.all()
if n.default_node_label is not None
}
probability = None
if edge.kv and edge.kv.get("probability") is not None:
try:
probability = float(edge.kv.get("probability"))
except (TypeError, ValueError):
probability = None
edges.append(
PathwayEdgeDTO(
edge_uuid=edge.uuid,
start_compound_pks=sorted(start_compounds),
end_compound_pks=sorted(end_compounds),
probability=probability,
)
)
model_info = None
if pathway.setting and pathway.setting.model:
model = pathway.setting.model
model_info = PathwayModelInfoDTO(
model_name=model.get_name(),
model_uuid=model.uuid,
software_name="enviPath",
software_version=None,
)
return PathwayExportDTO(
pathway_uuid=pathway.uuid,
pathway_name=pathway.get_name(),
compounds=list(compounds_by_pk.values()),
nodes=nodes,
edges=edges,
root_compound_pks=root_compound_pks,
model_info=model_info,
)

View File

@ -14,6 +14,7 @@ from .endpoints import (
additional_information,
settings,
)
from envipath import settings as s
# Main router with authentication
router = Router(
@ -34,3 +35,8 @@ router.add_router("", models.router)
router.add_router("", structure.router)
router.add_router("", additional_information.router)
router.add_router("", settings.router)
if s.IUCLID_EXPORT_ENABLED:
from epiuclid.api import router as iuclid_router
router.add_router("", iuclid_router)