[Feature] EnzymeLink Annotations (#152)

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#152
This commit is contained in:
2025-10-15 19:35:26 +13:00
parent ef697ac5f5
commit 386098b8a6
10 changed files with 352 additions and 27 deletions

View File

@ -26,6 +26,7 @@ from epdb.models import (
Compound,
Reaction,
CompoundStructure,
EnzymeLink,
)
from utilities.chem import FormatConverter
from utilities.misc import PackageImporter, PackageExporter
@ -617,6 +618,8 @@ class PackageManager(object):
parent_mapping = {}
# Mapping old scen_id to old_obj_id
scen_mapping = defaultdict(list)
# Enzymelink Mapping rule_id to enzymelink objects
enzyme_mapping = defaultdict(list)
# Store Scenarios
for scenario in data["scenarios"]:
@ -648,9 +651,7 @@ class PackageManager(object):
# Broken eP Data
if name == "initialmasssediment" and addinf_data == "missing data":
continue
# TODO Enzymes arent ready yet
if name == "enzyme":
if name == "columnheight" and addinf_data == "(2)-(2.5);(6)-(8)":
continue
try:
@ -740,6 +741,9 @@ class PackageManager(object):
for scen in rule["scenarios"]:
scen_mapping[scen["id"]].append(r)
for enzyme_link in rule.get("enzymeLinks", []):
enzyme_mapping[r.uuid].append(enzyme_link)
print("Par: ", len(par_rules))
print("Seq: ", len(seq_rules))
@ -757,6 +761,9 @@ class PackageManager(object):
for scen in par_rule["scenarios"]:
scen_mapping[scen["id"]].append(r)
for enzyme_link in par_rule.get("enzymeLinks", []):
enzyme_mapping[r.uuid].append(enzyme_link)
for simple_rule in par_rule["simpleRules"]:
if simple_rule["id"] in mapping:
r.simple_rules.add(SimpleRule.objects.get(uuid=mapping[simple_rule["id"]]))
@ -777,6 +784,9 @@ class PackageManager(object):
for scen in seq_rule["scenarios"]:
scen_mapping[scen["id"]].append(r)
for enzyme_link in seq_rule.get("enzymeLinks", []):
enzyme_mapping[r.uuid].append(enzyme_link)
for i, simple_rule in enumerate(seq_rule["simpleRules"]):
sro = SequentialRuleOrdering()
sro.simple_rule = simple_rule
@ -910,6 +920,39 @@ class PackageManager(object):
print("Scenarios linked...")
# Import Enzyme Links
for rule_uuid, enzyme_links in enzyme_mapping.items():
r = Rule.objects.get(uuid=rule_uuid)
for enzyme in enzyme_links:
e = EnzymeLink()
e.uuid = UUID(enzyme["id"].split("/")[-1]) if keep_ids else uuid4()
e.rule = r
e.name = enzyme["name"]
e.ec_number = enzyme["ecNumber"]
e.classification_level = enzyme["classificationLevel"]
e.linking_method = enzyme["linkingMethod"]
e.save()
for reaction in enzyme["reactionLinkEvidence"]:
reaction = Reaction.objects.get(uuid=mapping[reaction["id"]])
e.reaction_evidence.add(reaction)
for edge in enzyme["edgeLinkEvidence"]:
edge = Edge.objects.get(uuid=mapping[edge["id"]])
e.reaction_evidence.add(edge)
for evidence in enzyme["linkEvidence"]:
matches = re.findall(r">(R[0-9]+)<", evidence["evidence"])
if not matches or len(matches) != 1:
logger.warning(f"Could not find reaction id in {evidence['evidence']}")
continue
e.add_kegg_reaction_id(matches[0])
e.save()
print("Enzyme links imported...")
print("Import statistics:")
print("Package {} stored".format(pack.url))
print("Imported {} compounds".format(Compound.objects.filter(package=pack).count()))