[Fix] bootstrap command now reflects new Scenario/AdditionalInformation structure (#346)

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#346
This commit is contained in:
2026-03-07 03:14:28 +13:00
parent c6ff97694d
commit d4295c9349
2 changed files with 74 additions and 34 deletions

View File

@ -1,4 +1,3 @@
import json
import logging import logging
import re import re
from typing import Any, Dict, List, Optional, Set, Union, Tuple from typing import Any, Dict, List, Optional, Set, Union, Tuple
@ -11,6 +10,7 @@ from django.db import transaction
from pydantic import ValidationError from pydantic import ValidationError
from epdb.models import ( from epdb.models import (
AdditionalInformation,
Compound, Compound,
CompoundStructure, CompoundStructure,
Edge, Edge,
@ -634,15 +634,30 @@ class PackageManager(object):
# Stores old_id to new_id # Stores old_id to new_id
mapping = {} mapping = {}
# Stores new_scen_id to old_parent_scen_id
parent_mapping = {}
# Mapping old scen_id to old_obj_id # Mapping old scen_id to old_obj_id
scen_mapping = defaultdict(list) scen_mapping = defaultdict(list)
# Enzymelink Mapping rule_id to enzymelink objects # Enzymelink Mapping rule_id to enzymelink objects
enzyme_mapping = defaultdict(list) enzyme_mapping = defaultdict(list)
# old_parent_id to child
postponed_scens = defaultdict(list)
# Store Scenarios # Store Scenarios
for scenario in data["scenarios"]: for scenario in data["scenarios"]:
skip_scen = False
# Check if parent exists and park this Scenario to convert it later into an
# AdditionalInformation object
for ex in scenario.get("additionalInformationCollection", {}).get(
"additionalInformation", []
):
if ex["name"] == "referringscenario":
postponed_scens[ex["data"]].append(scenario)
skip_scen = True
break
if skip_scen:
continue
scen = Scenario() scen = Scenario()
scen.package = pack scen.package = pack
scen.uuid = UUID(scenario["id"].split("/")[-1]) if keep_ids else uuid4() scen.uuid = UUID(scenario["id"].split("/")[-1]) if keep_ids else uuid4()
@ -655,19 +670,12 @@ class PackageManager(object):
mapping[scenario["id"]] = scen.uuid mapping[scenario["id"]] = scen.uuid
new_add_inf = defaultdict(list)
# TODO Store AI...
for ex in scenario.get("additionalInformationCollection", {}).get( for ex in scenario.get("additionalInformationCollection", {}).get(
"additionalInformation", [] "additionalInformation", []
): ):
name = ex["name"] name = ex["name"]
addinf_data = ex["data"] addinf_data = ex["data"]
# park the parent scen id for now and link it later
if name == "referringscenario":
parent_mapping[scen.uuid] = addinf_data
continue
# Broken eP Data # Broken eP Data
if name == "initialmasssediment" and addinf_data == "missing data": if name == "initialmasssediment" and addinf_data == "missing data":
continue continue
@ -675,17 +683,11 @@ class PackageManager(object):
continue continue
try: try:
res = AdditionalInformationConverter.convert(name, addinf_data) ai = AdditionalInformationConverter.convert(name, addinf_data)
res_cls_name = res.__class__.__name__ AdditionalInformation.create(pack, ai, scenario=scen)
ai_data = json.loads(res.model_dump_json())
ai_data["uuid"] = f"{uuid4()}"
new_add_inf[res_cls_name].append(ai_data)
except (ValidationError, ValueError): except (ValidationError, ValueError):
logger.error(f"Failed to convert {name} with {addinf_data}") logger.error(f"Failed to convert {name} with {addinf_data}")
scen.additional_information = new_add_inf
scen.save()
print("Scenarios imported...") print("Scenarios imported...")
# Store compounds and its structures # Store compounds and its structures
@ -925,14 +927,46 @@ class PackageManager(object):
print("Pathways imported...") print("Pathways imported...")
# Linking Phase for parent, children in postponed_scens.items():
for child, parent in parent_mapping.items(): for child in children:
child_obj = Scenario.objects.get(uuid=child) for ex in child.get("additionalInformationCollection", {}).get(
parent_obj = Scenario.objects.get(uuid=mapping[parent]) "additionalInformation", []
child_obj.parent = parent_obj ):
child_obj.save() child_id = child["id"]
name = ex["name"]
addinf_data = ex["data"]
if name == "referringscenario":
continue
# Broken eP Data
if name == "initialmasssediment" and addinf_data == "missing data":
continue
if name == "columnheight" and addinf_data == "(2)-(2.5);(6)-(8)":
continue
ai = AdditionalInformationConverter.convert(name, addinf_data)
if child_id not in scen_mapping:
logger.info(
f"{child_id} not found in scen_mapping. Seems like its not attached to any object"
)
print(
f"{child_id} not found in scen_mapping. Seems like its not attached to any object"
)
scen = Scenario.objects.get(uuid=mapping[parent])
mapping[child_id] = scen.uuid
for obj in scen_mapping[child_id]:
_ = AdditionalInformation.create(pack, ai, scen, content_object=obj)
for scen_id, objects in scen_mapping.items(): for scen_id, objects in scen_mapping.items():
new_id = mapping.get(scen_id)
if new_id is None:
logger.warning(f"Could not find mapping for {scen_id}")
print(f"Could not find mapping for {scen_id}")
continue
scen = Scenario.objects.get(uuid=mapping[scen_id]) scen = Scenario.objects.get(uuid=mapping[scen_id])
for o in objects: for o in objects:
o.scenarios.add(scen) o.scenarios.add(scen)
@ -965,6 +999,7 @@ class PackageManager(object):
matches = re.findall(r">(R[0-9]+)<", evidence["evidence"]) matches = re.findall(r">(R[0-9]+)<", evidence["evidence"])
if not matches or len(matches) != 1: if not matches or len(matches) != 1:
logger.warning(f"Could not find reaction id in {evidence['evidence']}") logger.warning(f"Could not find reaction id in {evidence['evidence']}")
print(f"Could not find reaction id in {evidence['evidence']}")
continue continue
e.add_kegg_reaction_id(matches[0]) e.add_kegg_reaction_id(matches[0])
@ -984,7 +1019,6 @@ class PackageManager(object):
print("Fixing Node depths...") print("Fixing Node depths...")
total_pws = Pathway.objects.filter(package=pack).count() total_pws = Pathway.objects.filter(package=pack).count()
for p, pw in enumerate(Pathway.objects.filter(package=pack)): for p, pw in enumerate(Pathway.objects.filter(package=pack)):
print(pw.url)
in_count = defaultdict(lambda: 0) in_count = defaultdict(lambda: 0)
out_count = defaultdict(lambda: 0) out_count = defaultdict(lambda: 0)
@ -1020,7 +1054,6 @@ class PackageManager(object):
if str(prod.uuid) not in seen: if str(prod.uuid) not in seen:
old_depth = prod.depth old_depth = prod.depth
if old_depth != i + 1: if old_depth != i + 1:
print(f"updating depth from {old_depth} to {i + 1}")
prod.depth = i + 1 prod.depth = i + 1
prod.save() prod.save()
@ -1031,7 +1064,7 @@ class PackageManager(object):
if new_level: if new_level:
levels.append(new_level) levels.append(new_level)
print(f"{p + 1}/{total_pws} fixed.") print(f"{p + 1}/{total_pws} fixed.", end="\r")
return pack return pack

View File

@ -1,6 +1,7 @@
import os import os
import subprocess import subprocess
from django.conf import settings
from django.core.management import call_command from django.core.management import call_command
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
@ -45,11 +46,13 @@ class Command(BaseCommand):
if not os.path.exists(dump_file): if not os.path.exists(dump_file):
raise ValueError(f"Dump file {dump_file} does not exist") raise ValueError(f"Dump file {dump_file} does not exist")
print(f"Dropping database {options['name']} y/n: ", end="") db_name = options["name"]
print(f"Dropping database {db_name} y/n: ", end="")
if input() in "yY": if input() in "yY":
result = subprocess.run( result = subprocess.run(
["dropdb", "appdb"], ["dropdb", db_name],
capture_output=True, capture_output=True,
text=True, text=True,
) )
@ -57,20 +60,24 @@ class Command(BaseCommand):
else: else:
raise ValueError("Aborted") raise ValueError("Aborted")
print(f"Creating database {options['name']}") print(f"Creating database {db_name}")
result = subprocess.run( result = subprocess.run(
["createdb", "appdb"], ["createdb", db_name],
capture_output=True, capture_output=True,
text=True, text=True,
) )
print(result.stdout) print(result.stdout)
print(f"Restoring database {options['name']} from {dump_file}") print(f"Restoring database {db_name} from {dump_file}")
result = subprocess.run( result = subprocess.run(
["pg_restore", "-d", "appdb", dump_file, "--no-owner"], ["pg_restore", "-d", db_name, dump_file, "--no-owner"],
capture_output=True, capture_output=True,
text=True, text=True,
) )
print(result.stdout) print(result.stdout)
if db_name == settings.DATABASES["default"]["NAME"]:
call_command("localize_urls", "--old", options["oldurl"], "--new", options["newurl"]) call_command("localize_urls", "--old", options["oldurl"], "--new", options["newurl"])
else:
print("Skipping localize_urls as database is not the default one.")