"""Load and cache IUCLID XSD schemas with cross-reference resolution. The bundled XSD schemas use bare ``schemaLocation`` filenames (e.g. ``platform-fields.xsd``, ``commonTypesDomainV10.xsd``) that don't match the actual directory layout. This module builds an explicit namespace → file-path mapping so that ``xmlschema`` can resolve every import. """ from __future__ import annotations from functools import lru_cache from pathlib import Path import xmlschema _SCHEMA_ROOT = Path(__file__).resolve().parent / "v10" # Namespace → relative file-path (from _SCHEMA_ROOT) for schemas that are # referenced by bare filename from subdirectories that don't contain them. _NS_LOCATIONS: dict[str, str] = { "http://iuclid6.echa.europa.eu/namespaces/platform-fields/v1": "platform-fields.xsd", "http://iuclid6.echa.europa.eu/namespaces/platform-metadata/v1": "platform-metadata.xsd", "http://iuclid6.echa.europa.eu/namespaces/platform-container/v2": "platform-container-v2.xsd", "http://iuclid6.echa.europa.eu/namespaces/platform-attachment/v1": "platform-attachment.xsd", "http://iuclid6.echa.europa.eu/namespaces/platform-modification-history/v1": ( "platform-modification-history.xsd" ), "http://www.w3.org/1999/xlink": "xlink.xsd", "http://www.w3.org/XML/1998/namespace": "xml.xsd", "http://iuclid6.echa.europa.eu/namespaces/domain/v10": ("domain/v10/commonTypesDomainV10.xsd"), "http://iuclid6.echa.europa.eu/namespaces/oecd/v10": ("oecd/v10/commonTypesOecdV10.xsd"), } # doc_type → (subdir, filename-pattern) _DOC_TYPE_PATHS: dict[str, tuple[str, str]] = { "SUBSTANCE": ("domain/v10", "SUBSTANCE-10.0.xsd"), "REFERENCE_SUBSTANCE": ("domain/v10", "REFERENCE_SUBSTANCE-10.0.xsd"), } def _absolute_locations() -> list[tuple[str, str]]: """Return (namespace, absolute-file-URI) pairs for all known schemas.""" return [(ns, (_SCHEMA_ROOT / rel).as_uri()) for ns, rel in _NS_LOCATIONS.items()] def _esr_path(subtype: str) -> Path: """Return the path to an Endpoint Study Record schema.""" return _SCHEMA_ROOT / "oecd" / "v10" / f"ENDPOINT_STUDY_RECORD-{subtype}-10.0.xsd" def _doc_type_path(doc_type: str, subtype: str | None = None) -> Path: if doc_type == "ENDPOINT_STUDY_RECORD": if not subtype: raise ValueError("subtype is required for ENDPOINT_STUDY_RECORD schemas") return _esr_path(subtype) info = _DOC_TYPE_PATHS.get(doc_type) if info is None: raise ValueError(f"Unknown document type: {doc_type}") subdir, filename = info return _SCHEMA_ROOT / subdir / filename @lru_cache(maxsize=32) def get_content_schema(doc_type: str, subtype: str | None = None) -> xmlschema.XMLSchema: """Return a compiled XSD schema for validating content elements. Parameters ---------- doc_type: IUCLID document type (``SUBSTANCE``, ``REFERENCE_SUBSTANCE``, ``ENDPOINT_STUDY_RECORD``). subtype: Required for ``ENDPOINT_STUDY_RECORD`` (e.g. ``BiodegradationInSoil``). """ path = _doc_type_path(doc_type, subtype) return xmlschema.XMLSchema(str(path), locations=_absolute_locations()) @lru_cache(maxsize=1) def get_document_schema() -> xmlschema.XMLSchema: """Return a compiled XSD schema for the ``platform-container-v2`` wrapper. This validates the full ```` element (PlatformMetadata + Content + Attachments + ModificationHistory). Content is validated with ``processContents="strict"`` via ``xs:any``, but only if the content namespace has been loaded. For full content validation, use :func:`get_content_schema` separately. """ path = _SCHEMA_ROOT / "platform-container-v2.xsd" return xmlschema.XMLSchema(str(path), locations=_absolute_locations())