forked from enviPath/enviPy
This is an initial implementation that creates a working minimal .i6z document. It passes schema validation and can be imported into IUCLID. Caveat: IUCLID files target individual compounds. Pathway is not actually covered by the format. It can be added in either soil or water and soil OECD endpoints. **I currently only implemented the soil endpoint for all data.** This sort of works, and I can report all degradation products in a pathway (not a nice view, but we can report many transformation products and add a diagram attachment in the future). Adding additional information is an absolute pain, as we need to explicitly map each type of information to the relevant OECD field. I use the XSD scheme for validation, but unfortunately the IUCLID parser is not fully compliant and requires a specific order, etc. The workflow is: finding the AI structure from the XSD scheme -> make the scheme validation pass -> upload to IUCLID to get obscure error messages -> guess what could be wrong -> repeat 💣 New specifications get released once per year, so we will have to update accordingly. I believe that this should be a more expensive feature, as it requires significant effort to uphold. Currently implemented for root compound only in SOIL: - Soil Texture 2 - Soil Texture 1 - pH value - Half-life per soil sample / scenario (mapped to disappearance; not sure about that). - CEC - Organic Matter (only Carbon) - Moisture content - Humidity <img width="2123" alt="image.png" src="attachments/d29830e1-65ef-4136-8939-1825e0959c62"> <img width="2124" alt="image.png" src="attachments/ac9de2ac-bf68-4ba4-b40b-82f810a9de93"> <img width="2139" alt="image.png" src="attachments/5674c7e6-865e-420e-974a-6b825b331e6c"> Reviewed-on: enviPath/enviPy#338 Co-authored-by: Tobias O <tobias.olenyi@envipath.com> Co-committed-by: Tobias O <tobias.olenyi@envipath.com>
55 lines
2.1 KiB
Python
55 lines
2.1 KiB
Python
import xml.etree.ElementTree as ET
|
|
|
|
from epiuclid.serializers.pathway_mapper import IUCLIDReferenceSubstanceData
|
|
|
|
from .base import (
|
|
_sub,
|
|
_sub_if,
|
|
build_document,
|
|
document_key,
|
|
)
|
|
|
|
NS_REFERENCE_SUBSTANCE = "http://iuclid6.echa.europa.eu/namespaces/REFERENCE_SUBSTANCE/10.0"
|
|
ET.register_namespace("", NS_REFERENCE_SUBSTANCE)
|
|
|
|
|
|
class ReferenceSubstanceBuilder:
|
|
def build(self, data: IUCLIDReferenceSubstanceData) -> str:
|
|
ref = ET.Element(f"{{{NS_REFERENCE_SUBSTANCE}}}REFERENCE_SUBSTANCE")
|
|
|
|
_sub(ref, NS_REFERENCE_SUBSTANCE, "ReferenceSubstanceName", data.name)
|
|
_sub_if(ref, NS_REFERENCE_SUBSTANCE, "IupacName", data.iupac_name)
|
|
if data.cas_number:
|
|
inventory = _sub(ref, NS_REFERENCE_SUBSTANCE, "Inventory")
|
|
_sub(inventory, NS_REFERENCE_SUBSTANCE, "CASNumber", data.cas_number)
|
|
|
|
has_structural_info = any(
|
|
[
|
|
data.molecular_formula,
|
|
data.molecular_weight is not None,
|
|
data.smiles,
|
|
data.inchi,
|
|
data.inchi_key,
|
|
]
|
|
)
|
|
if has_structural_info:
|
|
structural = _sub(ref, NS_REFERENCE_SUBSTANCE, "MolecularStructuralInfo")
|
|
_sub_if(structural, NS_REFERENCE_SUBSTANCE, "MolecularFormula", data.molecular_formula)
|
|
|
|
if data.molecular_weight is not None:
|
|
mw = _sub(structural, NS_REFERENCE_SUBSTANCE, "MolecularWeightRange")
|
|
_sub(mw, NS_REFERENCE_SUBSTANCE, "lowerValue", f"{data.molecular_weight:.2f}")
|
|
_sub(mw, NS_REFERENCE_SUBSTANCE, "upperValue", f"{data.molecular_weight:.2f}")
|
|
|
|
_sub_if(structural, NS_REFERENCE_SUBSTANCE, "SmilesNotation", data.smiles)
|
|
_sub_if(structural, NS_REFERENCE_SUBSTANCE, "InChl", data.inchi)
|
|
_sub_if(structural, NS_REFERENCE_SUBSTANCE, "InChIKey", data.inchi_key)
|
|
|
|
doc_key = document_key(data.uuid)
|
|
return build_document(
|
|
document_key=doc_key,
|
|
document_type="REFERENCE_SUBSTANCE",
|
|
name=data.name,
|
|
content_element=ref,
|
|
)
|