diff --git a/epdb/logic.py b/epdb/logic.py index 9aab6b97..2f0d6d87 100644 --- a/epdb/logic.py +++ b/epdb/logic.py @@ -1,4 +1,3 @@ -import json import logging import re from typing import Any, Dict, List, Optional, Set, Union, Tuple @@ -11,6 +10,7 @@ from django.db import transaction from pydantic import ValidationError from epdb.models import ( + AdditionalInformation, Compound, CompoundStructure, Edge, @@ -634,15 +634,30 @@ class PackageManager(object): # Stores old_id to new_id mapping = {} - # Stores new_scen_id to old_parent_scen_id - parent_mapping = {} # Mapping old scen_id to old_obj_id scen_mapping = defaultdict(list) # Enzymelink Mapping rule_id to enzymelink objects enzyme_mapping = defaultdict(list) + # old_parent_id to child + postponed_scens = defaultdict(list) + # Store Scenarios for scenario in data["scenarios"]: + skip_scen = False + # Check if parent exists and park this Scenario to convert it later into an + # AdditionalInformation object + for ex in scenario.get("additionalInformationCollection", {}).get( + "additionalInformation", [] + ): + if ex["name"] == "referringscenario": + postponed_scens[ex["data"]].append(scenario) + skip_scen = True + break + + if skip_scen: + continue + scen = Scenario() scen.package = pack scen.uuid = UUID(scenario["id"].split("/")[-1]) if keep_ids else uuid4() @@ -655,19 +670,12 @@ class PackageManager(object): mapping[scenario["id"]] = scen.uuid - new_add_inf = defaultdict(list) - # TODO Store AI... for ex in scenario.get("additionalInformationCollection", {}).get( "additionalInformation", [] ): name = ex["name"] addinf_data = ex["data"] - # park the parent scen id for now and link it later - if name == "referringscenario": - parent_mapping[scen.uuid] = addinf_data - continue - # Broken eP Data if name == "initialmasssediment" and addinf_data == "missing data": continue @@ -675,17 +683,11 @@ class PackageManager(object): continue try: - res = AdditionalInformationConverter.convert(name, addinf_data) - res_cls_name = res.__class__.__name__ - ai_data = json.loads(res.model_dump_json()) - ai_data["uuid"] = f"{uuid4()}" - new_add_inf[res_cls_name].append(ai_data) + ai = AdditionalInformationConverter.convert(name, addinf_data) + AdditionalInformation.create(pack, ai, scenario=scen) except (ValidationError, ValueError): logger.error(f"Failed to convert {name} with {addinf_data}") - scen.additional_information = new_add_inf - scen.save() - print("Scenarios imported...") # Store compounds and its structures @@ -925,14 +927,46 @@ class PackageManager(object): print("Pathways imported...") - # Linking Phase - for child, parent in parent_mapping.items(): - child_obj = Scenario.objects.get(uuid=child) - parent_obj = Scenario.objects.get(uuid=mapping[parent]) - child_obj.parent = parent_obj - child_obj.save() + for parent, children in postponed_scens.items(): + for child in children: + for ex in child.get("additionalInformationCollection", {}).get( + "additionalInformation", [] + ): + child_id = child["id"] + name = ex["name"] + addinf_data = ex["data"] + + if name == "referringscenario": + continue + # Broken eP Data + if name == "initialmasssediment" and addinf_data == "missing data": + continue + if name == "columnheight" and addinf_data == "(2)-(2.5);(6)-(8)": + continue + + ai = AdditionalInformationConverter.convert(name, addinf_data) + + if child_id not in scen_mapping: + logger.info( + f"{child_id} not found in scen_mapping. Seems like its not attached to any object" + ) + print( + f"{child_id} not found in scen_mapping. Seems like its not attached to any object" + ) + + scen = Scenario.objects.get(uuid=mapping[parent]) + mapping[child_id] = scen.uuid + for obj in scen_mapping[child_id]: + _ = AdditionalInformation.create(pack, ai, scen, content_object=obj) for scen_id, objects in scen_mapping.items(): + new_id = mapping.get(scen_id) + + if new_id is None: + logger.warning(f"Could not find mapping for {scen_id}") + print(f"Could not find mapping for {scen_id}") + continue + scen = Scenario.objects.get(uuid=mapping[scen_id]) for o in objects: o.scenarios.add(scen) @@ -965,6 +999,7 @@ class PackageManager(object): matches = re.findall(r">(R[0-9]+)<", evidence["evidence"]) if not matches or len(matches) != 1: logger.warning(f"Could not find reaction id in {evidence['evidence']}") + print(f"Could not find reaction id in {evidence['evidence']}") continue e.add_kegg_reaction_id(matches[0]) @@ -984,7 +1019,6 @@ class PackageManager(object): print("Fixing Node depths...") total_pws = Pathway.objects.filter(package=pack).count() for p, pw in enumerate(Pathway.objects.filter(package=pack)): - print(pw.url) in_count = defaultdict(lambda: 0) out_count = defaultdict(lambda: 0) @@ -1020,7 +1054,6 @@ class PackageManager(object): if str(prod.uuid) not in seen: old_depth = prod.depth if old_depth != i + 1: - print(f"updating depth from {old_depth} to {i + 1}") prod.depth = i + 1 prod.save() @@ -1031,7 +1064,7 @@ class PackageManager(object): if new_level: levels.append(new_level) - print(f"{p + 1}/{total_pws} fixed.") + print(f"{p + 1}/{total_pws} fixed.", end="\r") return pack diff --git a/epdb/management/commands/recreate_db.py b/epdb/management/commands/recreate_db.py index ee69fe65..fd70c949 100644 --- a/epdb/management/commands/recreate_db.py +++ b/epdb/management/commands/recreate_db.py @@ -1,6 +1,7 @@ import os import subprocess +from django.conf import settings from django.core.management import call_command from django.core.management.base import BaseCommand @@ -45,11 +46,13 @@ class Command(BaseCommand): if not os.path.exists(dump_file): raise ValueError(f"Dump file {dump_file} does not exist") - print(f"Dropping database {options['name']} y/n: ", end="") + db_name = options["name"] + + print(f"Dropping database {db_name} y/n: ", end="") if input() in "yY": result = subprocess.run( - ["dropdb", "appdb"], + ["dropdb", db_name], capture_output=True, text=True, ) @@ -57,20 +60,24 @@ class Command(BaseCommand): else: raise ValueError("Aborted") - print(f"Creating database {options['name']}") + print(f"Creating database {db_name}") result = subprocess.run( - ["createdb", "appdb"], + ["createdb", db_name], capture_output=True, text=True, ) print(result.stdout) - print(f"Restoring database {options['name']} from {dump_file}") + print(f"Restoring database {db_name} from {dump_file}") result = subprocess.run( - ["pg_restore", "-d", "appdb", dump_file, "--no-owner"], + ["pg_restore", "-d", db_name, dump_file, "--no-owner"], capture_output=True, text=True, ) print(result.stdout) - call_command("localize_urls", "--old", options["oldurl"], "--new", options["newurl"]) + + if db_name == settings.DATABASES["default"]["NAME"]: + call_command("localize_urls", "--old", options["oldurl"], "--new", options["newurl"]) + else: + print("Skipping localize_urls as database is not the default one.")