forked from enviPath/enviPy
[Feature] Minimal IUCLID export (#338)
This is an initial implementation that creates a working minimal .i6z document. It passes schema validation and can be imported into IUCLID. Caveat: IUCLID files target individual compounds. Pathway is not actually covered by the format. It can be added in either soil or water and soil OECD endpoints. **I currently only implemented the soil endpoint for all data.** This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future). Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field. I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc. The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣 New specifications get released once per year, so we will have to update accordingly. I believe that this should be a more expensive feature, as it requires significant effort to uphold. Currently implemented for root compound only in SOIL: - Soil Texture 2 - Soil Texture 1 - pH value - Half-life per soil sample / scenario (mapped to disappearance; not sure about that). - CEC - Organic Matter (only Carbon) - Moisture content - Humidity <img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62"> <img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93"> <img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c"> Reviewed-on: enviPath/enviPy#338 Co-authored-by: Tobias O <tobias.olenyi@envipath.com> Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
This commit is contained in:
3
epapi/v1/interfaces/__init__.py
Normal file
3
epapi/v1/interfaces/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
"""
|
||||
Service interfaces: each subdirectory defines the full boundary contract between enviPy and feature-flagged apps. DTOs and projections are shared concerns to avoid direct ORM access.
|
||||
"""
|
||||
0
epapi/v1/interfaces/iuclid/__init__.py
Normal file
0
epapi/v1/interfaces/iuclid/__init__.py
Normal file
58
epapi/v1/interfaces/iuclid/dto.py
Normal file
58
epapi/v1/interfaces/iuclid/dto.py
Normal file
@ -0,0 +1,58 @@
|
||||
from dataclasses import dataclass, field
|
||||
from uuid import UUID
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PathwayCompoundDTO:
|
||||
pk: int
|
||||
name: str
|
||||
smiles: str | None = None
|
||||
cas_number: str | None = None
|
||||
ec_number: str | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PathwayScenarioDTO:
|
||||
scenario_uuid: UUID
|
||||
name: str
|
||||
additional_info: list = field(default_factory=list) # EnviPyModel instances
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PathwayNodeDTO:
|
||||
node_uuid: UUID
|
||||
compound_pk: int
|
||||
name: str
|
||||
depth: int
|
||||
smiles: str | None = None
|
||||
cas_number: str | None = None
|
||||
ec_number: str | None = None
|
||||
additional_info: list = field(default_factory=list) # EnviPyModel instances
|
||||
scenarios: list[PathwayScenarioDTO] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PathwayEdgeDTO:
|
||||
edge_uuid: UUID
|
||||
start_compound_pks: list[int] = field(default_factory=list)
|
||||
end_compound_pks: list[int] = field(default_factory=list)
|
||||
probability: float | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PathwayModelInfoDTO:
|
||||
model_name: str | None = None
|
||||
model_uuid: UUID | None = None
|
||||
software_name: str | None = None
|
||||
software_version: str | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PathwayExportDTO:
|
||||
pathway_uuid: UUID
|
||||
pathway_name: str
|
||||
compounds: list[PathwayCompoundDTO] = field(default_factory=list)
|
||||
nodes: list[PathwayNodeDTO] = field(default_factory=list)
|
||||
edges: list[PathwayEdgeDTO] = field(default_factory=list)
|
||||
root_compound_pks: list[int] = field(default_factory=list)
|
||||
model_info: PathwayModelInfoDTO | None = None
|
||||
142
epapi/v1/interfaces/iuclid/projections.py
Normal file
142
epapi/v1/interfaces/iuclid/projections.py
Normal file
@ -0,0 +1,142 @@
|
||||
from uuid import UUID
|
||||
|
||||
from epdb.logic import PackageManager
|
||||
from epdb.models import Pathway
|
||||
|
||||
from epapi.v1.errors import EPAPINotFoundError, EPAPIPermissionDeniedError
|
||||
|
||||
from .dto import (
|
||||
PathwayCompoundDTO,
|
||||
PathwayEdgeDTO,
|
||||
PathwayExportDTO,
|
||||
PathwayModelInfoDTO,
|
||||
PathwayNodeDTO,
|
||||
PathwayScenarioDTO,
|
||||
)
|
||||
|
||||
|
||||
def get_pathway_for_iuclid_export(user, pathway_uuid: UUID) -> PathwayExportDTO:
|
||||
"""Return pathway data projected into DTOs for the IUCLID export consumer."""
|
||||
try:
|
||||
pathway = (
|
||||
Pathway.objects.select_related("package", "setting", "setting__model")
|
||||
.prefetch_related(
|
||||
"node_set__default_node_label__compound__external_identifiers__database",
|
||||
"node_set__scenarios",
|
||||
"edge_set__start_nodes__default_node_label__compound",
|
||||
"edge_set__end_nodes__default_node_label__compound",
|
||||
)
|
||||
.get(uuid=pathway_uuid)
|
||||
)
|
||||
except Pathway.DoesNotExist:
|
||||
raise EPAPINotFoundError(f"Pathway with UUID {pathway_uuid} not found")
|
||||
|
||||
if not user or user.is_anonymous or not PackageManager.readable(user, pathway.package):
|
||||
raise EPAPIPermissionDeniedError("Insufficient permissions to access this pathway.")
|
||||
|
||||
nodes: list[PathwayNodeDTO] = []
|
||||
edges: list[PathwayEdgeDTO] = []
|
||||
compounds_by_pk: dict[int, PathwayCompoundDTO] = {}
|
||||
root_compound_pks: list[int] = []
|
||||
|
||||
for node in pathway.node_set.all().order_by("depth", "pk"):
|
||||
cs = node.default_node_label
|
||||
if cs is None:
|
||||
continue
|
||||
compound = cs.compound
|
||||
|
||||
cas_number = None
|
||||
ec_number = None
|
||||
for ext_id in compound.external_identifiers.all():
|
||||
db_name = ext_id.database.name if ext_id.database else None
|
||||
if db_name == "CAS" and cas_number is None:
|
||||
cas_number = ext_id.identifier_value
|
||||
elif db_name == "EC" and ec_number is None:
|
||||
ec_number = ext_id.identifier_value
|
||||
|
||||
ai_for_node = []
|
||||
scenario_entries: list[PathwayScenarioDTO] = []
|
||||
for scenario in sorted(node.scenarios.all(), key=lambda item: item.pk):
|
||||
ai_for_scenario = list(scenario.get_additional_information(direct_only=True))
|
||||
ai_for_node.extend(ai_for_scenario)
|
||||
scenario_entries.append(
|
||||
PathwayScenarioDTO(
|
||||
scenario_uuid=scenario.uuid,
|
||||
name=scenario.name,
|
||||
additional_info=ai_for_scenario,
|
||||
)
|
||||
)
|
||||
|
||||
nodes.append(
|
||||
PathwayNodeDTO(
|
||||
node_uuid=node.uuid,
|
||||
compound_pk=compound.pk,
|
||||
name=compound.name,
|
||||
depth=node.depth,
|
||||
smiles=cs.smiles,
|
||||
cas_number=cas_number,
|
||||
ec_number=ec_number,
|
||||
additional_info=ai_for_node,
|
||||
scenarios=scenario_entries,
|
||||
)
|
||||
)
|
||||
|
||||
if node.depth == 0 and compound.pk not in root_compound_pks:
|
||||
root_compound_pks.append(compound.pk)
|
||||
|
||||
if compound.pk not in compounds_by_pk:
|
||||
compounds_by_pk[compound.pk] = PathwayCompoundDTO(
|
||||
pk=compound.pk,
|
||||
name=compound.name,
|
||||
smiles=cs.smiles,
|
||||
cas_number=cas_number,
|
||||
ec_number=ec_number,
|
||||
)
|
||||
|
||||
for edge in pathway.edge_set.all():
|
||||
start_compounds = {
|
||||
n.default_node_label.compound.pk
|
||||
for n in edge.start_nodes.all()
|
||||
if n.default_node_label is not None
|
||||
}
|
||||
end_compounds = {
|
||||
n.default_node_label.compound.pk
|
||||
for n in edge.end_nodes.all()
|
||||
if n.default_node_label is not None
|
||||
}
|
||||
|
||||
probability = None
|
||||
if edge.kv and edge.kv.get("probability") is not None:
|
||||
try:
|
||||
probability = float(edge.kv.get("probability"))
|
||||
except (TypeError, ValueError):
|
||||
probability = None
|
||||
|
||||
edges.append(
|
||||
PathwayEdgeDTO(
|
||||
edge_uuid=edge.uuid,
|
||||
start_compound_pks=sorted(start_compounds),
|
||||
end_compound_pks=sorted(end_compounds),
|
||||
probability=probability,
|
||||
)
|
||||
)
|
||||
|
||||
model_info = None
|
||||
if pathway.setting and pathway.setting.model:
|
||||
model = pathway.setting.model
|
||||
model_info = PathwayModelInfoDTO(
|
||||
model_name=model.get_name(),
|
||||
model_uuid=model.uuid,
|
||||
software_name="enviPath",
|
||||
software_version=None,
|
||||
)
|
||||
|
||||
return PathwayExportDTO(
|
||||
pathway_uuid=pathway.uuid,
|
||||
pathway_name=pathway.get_name(),
|
||||
compounds=list(compounds_by_pk.values()),
|
||||
nodes=nodes,
|
||||
edges=edges,
|
||||
root_compound_pks=root_compound_pks,
|
||||
model_info=model_info,
|
||||
)
|
||||
Reference in New Issue
Block a user