forked from enviPath/enviPy
This is an initial implementation that creates a working minimal .i6z document. It passes schema validation and can be imported into IUCLID. Caveat: IUCLID files target individual compounds. Pathway is not actually covered by the format. It can be added in either soil or water and soil OECD endpoints. **I currently only implemented the soil endpoint for all data.** This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future). Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field. I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc. The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣 New specifications get released once per year, so we will have to update accordingly. I believe that this should be a more expensive feature, as it requires significant effort to uphold. Currently implemented for root compound only in SOIL: - Soil Texture 2 - Soil Texture 1 - pH value - Half-life per soil sample / scenario (mapped to disappearance; not sure about that). - CEC - Organic Matter (only Carbon) - Moisture content - Humidity <img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62"> <img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93"> <img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c"> Reviewed-on: enviPath/enviPy#338 Co-authored-by: Tobias O <tobias.olenyi@envipath.com> Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
91 lines
3.7 KiB
Python
91 lines
3.7 KiB
Python
"""Load and cache IUCLID XSD schemas with cross-reference resolution.
|
|
|
|
The bundled XSD schemas use bare ``schemaLocation`` filenames (e.g.
|
|
``platform-fields.xsd``, ``commonTypesDomainV10.xsd``) that don't match the
|
|
actual directory layout. This module builds an explicit namespace → file-path
|
|
mapping so that ``xmlschema`` can resolve every import.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from functools import lru_cache
|
|
from pathlib import Path
|
|
|
|
import xmlschema
|
|
|
|
_SCHEMA_ROOT = Path(__file__).resolve().parent / "v10"
|
|
|
|
# Namespace → relative file-path (from _SCHEMA_ROOT) for schemas that are
|
|
# referenced by bare filename from subdirectories that don't contain them.
|
|
_NS_LOCATIONS: dict[str, str] = {
|
|
"http://iuclid6.echa.europa.eu/namespaces/platform-fields/v1": "platform-fields.xsd",
|
|
"http://iuclid6.echa.europa.eu/namespaces/platform-metadata/v1": "platform-metadata.xsd",
|
|
"http://iuclid6.echa.europa.eu/namespaces/platform-container/v2": "platform-container-v2.xsd",
|
|
"http://iuclid6.echa.europa.eu/namespaces/platform-attachment/v1": "platform-attachment.xsd",
|
|
"http://iuclid6.echa.europa.eu/namespaces/platform-modification-history/v1": (
|
|
"platform-modification-history.xsd"
|
|
),
|
|
"http://www.w3.org/1999/xlink": "xlink.xsd",
|
|
"http://www.w3.org/XML/1998/namespace": "xml.xsd",
|
|
"http://iuclid6.echa.europa.eu/namespaces/domain/v10": ("domain/v10/commonTypesDomainV10.xsd"),
|
|
"http://iuclid6.echa.europa.eu/namespaces/oecd/v10": ("oecd/v10/commonTypesOecdV10.xsd"),
|
|
}
|
|
|
|
# doc_type → (subdir, filename-pattern)
|
|
_DOC_TYPE_PATHS: dict[str, tuple[str, str]] = {
|
|
"SUBSTANCE": ("domain/v10", "SUBSTANCE-10.0.xsd"),
|
|
"REFERENCE_SUBSTANCE": ("domain/v10", "REFERENCE_SUBSTANCE-10.0.xsd"),
|
|
}
|
|
|
|
|
|
def _absolute_locations() -> list[tuple[str, str]]:
|
|
"""Return (namespace, absolute-file-URI) pairs for all known schemas."""
|
|
return [(ns, (_SCHEMA_ROOT / rel).as_uri()) for ns, rel in _NS_LOCATIONS.items()]
|
|
|
|
|
|
def _esr_path(subtype: str) -> Path:
|
|
"""Return the path to an Endpoint Study Record schema."""
|
|
return _SCHEMA_ROOT / "oecd" / "v10" / f"ENDPOINT_STUDY_RECORD-{subtype}-10.0.xsd"
|
|
|
|
|
|
def _doc_type_path(doc_type: str, subtype: str | None = None) -> Path:
|
|
if doc_type == "ENDPOINT_STUDY_RECORD":
|
|
if not subtype:
|
|
raise ValueError("subtype is required for ENDPOINT_STUDY_RECORD schemas")
|
|
return _esr_path(subtype)
|
|
info = _DOC_TYPE_PATHS.get(doc_type)
|
|
if info is None:
|
|
raise ValueError(f"Unknown document type: {doc_type}")
|
|
subdir, filename = info
|
|
return _SCHEMA_ROOT / subdir / filename
|
|
|
|
|
|
@lru_cache(maxsize=32)
|
|
def get_content_schema(doc_type: str, subtype: str | None = None) -> xmlschema.XMLSchema:
|
|
"""Return a compiled XSD schema for validating content elements.
|
|
|
|
Parameters
|
|
----------
|
|
doc_type:
|
|
IUCLID document type (``SUBSTANCE``, ``REFERENCE_SUBSTANCE``,
|
|
``ENDPOINT_STUDY_RECORD``).
|
|
subtype:
|
|
Required for ``ENDPOINT_STUDY_RECORD`` (e.g. ``BiodegradationInSoil``).
|
|
"""
|
|
path = _doc_type_path(doc_type, subtype)
|
|
return xmlschema.XMLSchema(str(path), locations=_absolute_locations())
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def get_document_schema() -> xmlschema.XMLSchema:
|
|
"""Return a compiled XSD schema for the ``platform-container-v2`` wrapper.
|
|
|
|
This validates the full ``<Document>`` element (PlatformMetadata + Content +
|
|
Attachments + ModificationHistory). Content is validated with
|
|
``processContents="strict"`` via ``xs:any``, but only if the content
|
|
namespace has been loaded. For full content validation, use
|
|
:func:`get_content_schema` separately.
|
|
"""
|
|
path = _SCHEMA_ROOT / "platform-container-v2.xsd"
|
|
return xmlschema.XMLSchema(str(path), locations=_absolute_locations())
|