forked from enviPath/enviPy
[Feature] Minimal IUCLID export (#338)
This is an initial implementation that creates a working minimal .i6z document. It passes schema validation and can be imported into IUCLID. Caveat: IUCLID files target individual compounds. Pathway is not actually covered by the format. It can be added in either soil or water and soil OECD endpoints. **I currently only implemented the soil endpoint for all data.** This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future). Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field. I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc. The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣 New specifications get released once per year, so we will have to update accordingly. I believe that this should be a more expensive feature, as it requires significant effort to uphold. Currently implemented for root compound only in SOIL: - Soil Texture 2 - Soil Texture 1 - pH value - Half-life per soil sample / scenario (mapped to disappearance; not sure about that). - CEC - Organic Matter (only Carbon) - Moisture content - Humidity <img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62"> <img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93"> <img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c"> Reviewed-on: enviPath/enviPy#338 Co-authored-by: Tobias O <tobias.olenyi@envipath.com> Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
This commit is contained in:
90
epiuclid/schemas/loader.py
Normal file
90
epiuclid/schemas/loader.py
Normal file
@ -0,0 +1,90 @@
|
||||
"""Load and cache IUCLID XSD schemas with cross-reference resolution.
|
||||
|
||||
The bundled XSD schemas use bare ``schemaLocation`` filenames (e.g.
|
||||
``platform-fields.xsd``, ``commonTypesDomainV10.xsd``) that don't match the
|
||||
actual directory layout. This module builds an explicit namespace → file-path
|
||||
mapping so that ``xmlschema`` can resolve every import.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
import xmlschema
|
||||
|
||||
_SCHEMA_ROOT = Path(__file__).resolve().parent / "v10"
|
||||
|
||||
# Namespace → relative file-path (from _SCHEMA_ROOT) for schemas that are
|
||||
# referenced by bare filename from subdirectories that don't contain them.
|
||||
_NS_LOCATIONS: dict[str, str] = {
|
||||
"http://iuclid6.echa.europa.eu/namespaces/platform-fields/v1": "platform-fields.xsd",
|
||||
"http://iuclid6.echa.europa.eu/namespaces/platform-metadata/v1": "platform-metadata.xsd",
|
||||
"http://iuclid6.echa.europa.eu/namespaces/platform-container/v2": "platform-container-v2.xsd",
|
||||
"http://iuclid6.echa.europa.eu/namespaces/platform-attachment/v1": "platform-attachment.xsd",
|
||||
"http://iuclid6.echa.europa.eu/namespaces/platform-modification-history/v1": (
|
||||
"platform-modification-history.xsd"
|
||||
),
|
||||
"http://www.w3.org/1999/xlink": "xlink.xsd",
|
||||
"http://www.w3.org/XML/1998/namespace": "xml.xsd",
|
||||
"http://iuclid6.echa.europa.eu/namespaces/domain/v10": ("domain/v10/commonTypesDomainV10.xsd"),
|
||||
"http://iuclid6.echa.europa.eu/namespaces/oecd/v10": ("oecd/v10/commonTypesOecdV10.xsd"),
|
||||
}
|
||||
|
||||
# doc_type → (subdir, filename-pattern)
|
||||
_DOC_TYPE_PATHS: dict[str, tuple[str, str]] = {
|
||||
"SUBSTANCE": ("domain/v10", "SUBSTANCE-10.0.xsd"),
|
||||
"REFERENCE_SUBSTANCE": ("domain/v10", "REFERENCE_SUBSTANCE-10.0.xsd"),
|
||||
}
|
||||
|
||||
|
||||
def _absolute_locations() -> list[tuple[str, str]]:
|
||||
"""Return (namespace, absolute-file-URI) pairs for all known schemas."""
|
||||
return [(ns, (_SCHEMA_ROOT / rel).as_uri()) for ns, rel in _NS_LOCATIONS.items()]
|
||||
|
||||
|
||||
def _esr_path(subtype: str) -> Path:
|
||||
"""Return the path to an Endpoint Study Record schema."""
|
||||
return _SCHEMA_ROOT / "oecd" / "v10" / f"ENDPOINT_STUDY_RECORD-{subtype}-10.0.xsd"
|
||||
|
||||
|
||||
def _doc_type_path(doc_type: str, subtype: str | None = None) -> Path:
|
||||
if doc_type == "ENDPOINT_STUDY_RECORD":
|
||||
if not subtype:
|
||||
raise ValueError("subtype is required for ENDPOINT_STUDY_RECORD schemas")
|
||||
return _esr_path(subtype)
|
||||
info = _DOC_TYPE_PATHS.get(doc_type)
|
||||
if info is None:
|
||||
raise ValueError(f"Unknown document type: {doc_type}")
|
||||
subdir, filename = info
|
||||
return _SCHEMA_ROOT / subdir / filename
|
||||
|
||||
|
||||
@lru_cache(maxsize=32)
|
||||
def get_content_schema(doc_type: str, subtype: str | None = None) -> xmlschema.XMLSchema:
|
||||
"""Return a compiled XSD schema for validating content elements.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
doc_type:
|
||||
IUCLID document type (``SUBSTANCE``, ``REFERENCE_SUBSTANCE``,
|
||||
``ENDPOINT_STUDY_RECORD``).
|
||||
subtype:
|
||||
Required for ``ENDPOINT_STUDY_RECORD`` (e.g. ``BiodegradationInSoil``).
|
||||
"""
|
||||
path = _doc_type_path(doc_type, subtype)
|
||||
return xmlschema.XMLSchema(str(path), locations=_absolute_locations())
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_document_schema() -> xmlschema.XMLSchema:
|
||||
"""Return a compiled XSD schema for the ``platform-container-v2`` wrapper.
|
||||
|
||||
This validates the full ``<Document>`` element (PlatformMetadata + Content +
|
||||
Attachments + ModificationHistory). Content is validated with
|
||||
``processContents="strict"`` via ``xs:any``, but only if the content
|
||||
namespace has been loaded. For full content validation, use
|
||||
:func:`get_content_schema` separately.
|
||||
"""
|
||||
path = _SCHEMA_ROOT / "platform-container-v2.xsd"
|
||||
return xmlschema.XMLSchema(str(path), locations=_absolute_locations())
|
||||
Reference in New Issue
Block a user