enviPy-bayer/epiuclid/builders/reference_substance.py

import xml.etree.ElementTree as ET

from epiuclid.serializers.pathway_mapper import IUCLIDReferenceSubstanceData

from .base import (
    _sub,
    _sub_if,
    build_document,
    document_key,
)

NS_REFERENCE_SUBSTANCE = "http://iuclid6.echa.europa.eu/namespaces/REFERENCE_SUBSTANCE/10.0"
ET.register_namespace("", NS_REFERENCE_SUBSTANCE)


class ReferenceSubstanceBuilder:
    def build(self, data: IUCLIDReferenceSubstanceData) -> str:
        ref = ET.Element(f"{{{NS_REFERENCE_SUBSTANCE}}}REFERENCE_SUBSTANCE")

        _sub(ref, NS_REFERENCE_SUBSTANCE, "ReferenceSubstanceName", data.name)
        _sub_if(ref, NS_REFERENCE_SUBSTANCE, "IupacName", data.iupac_name)
        if data.cas_number:
            inventory = _sub(ref, NS_REFERENCE_SUBSTANCE, "Inventory")
            _sub(inventory, NS_REFERENCE_SUBSTANCE, "CASNumber", data.cas_number)

        has_structural_info = any(
            [
                data.molecular_formula,
                data.molecular_weight is not None,
                data.smiles,
                data.inchi,
                data.inchi_key,
            ]
        )
        if has_structural_info:
            structural = _sub(ref, NS_REFERENCE_SUBSTANCE, "MolecularStructuralInfo")
            _sub_if(structural, NS_REFERENCE_SUBSTANCE, "MolecularFormula", data.molecular_formula)

            if data.molecular_weight is not None:
                mw = _sub(structural, NS_REFERENCE_SUBSTANCE, "MolecularWeightRange")
                _sub(mw, NS_REFERENCE_SUBSTANCE, "lowerValue", f"{data.molecular_weight:.2f}")
                _sub(mw, NS_REFERENCE_SUBSTANCE, "upperValue", f"{data.molecular_weight:.2f}")

            _sub_if(structural, NS_REFERENCE_SUBSTANCE, "SmilesNotation", data.smiles)
            _sub_if(structural, NS_REFERENCE_SUBSTANCE, "InChl", data.inchi)
            _sub_if(structural, NS_REFERENCE_SUBSTANCE, "InChIKey", data.inchi_key)

        doc_key = document_key(data.uuid)
        return build_document(
            document_key=doc_key,
            document_type="REFERENCE_SUBSTANCE",
            name=data.name,
            content_element=ref,
        )