feature/additional_information (#30)

Fixes #12

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#30
This commit is contained in:
2025-07-19 08:10:40 +12:00
parent 4fff78541b
commit 49e02ed97d
11 changed files with 534 additions and 344 deletions

View File

@ -255,6 +255,382 @@ class PackageManager(object):
else:
_ = perm_cls.objects.update_or_create(defaults={'permission': new_perm}, **data)
@staticmethod
@transaction.atomic
def import_package(data: dict, owner: User, keep_ids=False):
from uuid import UUID, uuid4
from datetime import datetime
from collections import defaultdict
from .models import Package, Compound, CompoundStructure, SimpleRule, SimpleAmbitRule, SimpleRDKitRule, \
ParallelRule, SequentialRule, SequentialRuleOrdering, Reaction, Pathway, Node, Edge, Scenario
from envipy_additional_information import AdditionalInformationConverter
pack = Package()
pack.uuid = UUID(data['id'].split('/')[-1]) if keep_ids else uuid4()
pack.name = '{} - {}'.format(data['name'], datetime.now().strftime('%Y-%m-%d %H:%M'))
pack.reviewed = True if data['reviewStatus'] == 'reviewed' else False
pack.description = data['description']
pack.save()
up = UserPackagePermission()
up.user = owner
up.package = pack
up.permission = up.ALL[0]
up.save()
# Stores old_id to new_id
mapping = {}
# Stores new_scen_id to old_parent_scen_id
parent_mapping = {}
# Mapping old scen_id to old_obj_id
scen_mapping = defaultdict(list)
# Store Scenarios
for scenario in data['scenarios']:
scen = Scenario()
scen.package = pack
scen.uuid = UUID(scenario['id'].split('/')[-1]) if keep_ids else uuid4()
scen.name = scenario['name']
scen.description = scenario['description']
scen.scenario_type = scenario['type']
scen.scenario_date = scenario['date']
scen.additional_information = dict()
scen.save()
mapping[scenario['id']] = scen.uuid
new_add_inf = defaultdict(list)
# TODO Store AI...
for ex in scenario.get('additionalInformationCollection', {}).get('additionalInformation', []):
name = ex['name']
addinf_data = ex['data']
# park the parent scen id for now and link it later
if name == 'referringscenario':
parent_mapping[scen.uuid] = addinf_data
continue
# Broken eP Data
if name == 'initialmasssediment' and addinf_data == 'missing data':
continue
# TODO Enzymes arent ready yet
if name == 'enzyme':
continue
try:
res = AdditionalInformationConverter.convert(name, addinf_data)
except:
logger.error(f"Failed to convert {name} with {addinf_data}")
new_add_inf[name].append(res.model_dump_json())
scen.additional_information = new_add_inf
scen.save()
print('Scenarios imported...')
# Store compounds and its structures
for compound in data['compounds']:
comp = Compound()
comp.package = pack
comp.uuid = UUID(compound['id'].split('/')[-1]) if keep_ids else uuid4()
comp.name = compound['name']
comp.description = compound['description']
comp.aliases = compound['aliases']
comp.save()
mapping[compound['id']] = comp.uuid
for scen in compound['scenarios']:
scen_mapping[scen['id']].append(comp)
default_structure = None
for structure in compound['structures']:
struc = CompoundStructure()
# struc.object_url = Command.get_id(structure, keep_ids)
struc.compound = comp
struc.uuid = UUID(structure['id'].split('/')[-1]) if keep_ids else uuid4()
struc.name = structure['name']
struc.description = structure['description']
struc.smiles = structure['smiles']
struc.save()
for scen in structure['scenarios']:
scen_mapping[scen['id']].append(struc)
mapping[structure['id']] = struc.uuid
if structure['id'] == compound['defaultStructure']['id']:
default_structure = struc
struc.save()
if default_structure is None:
raise ValueError('No default structure set')
comp.default_structure = default_structure
comp.save()
print('Compounds imported...')
# Store simple and parallel-rules
par_rules = []
seq_rules = []
for rule in data['rules']:
if rule['identifier'] == 'parallel-rule':
par_rules.append(rule)
continue
if rule['identifier'] == 'sequential-rule':
seq_rules.append(rule)
continue
r = SimpleAmbitRule()
r.uuid = UUID(rule['id'].split('/')[-1]) if keep_ids else uuid4()
r.package = pack
r.name = rule['name']
r.description = rule['description']
r.smirks = rule['smirks']
r.reactant_filter_smarts = rule.get('reactantFilterSmarts', None)
r.product_filter_smarts = rule.get('productFilterSmarts', None)
r.save()
mapping[rule['id']] = r.uuid
for scen in rule['scenarios']:
scen_mapping[scen['id']].append(r)
print("Par: ", len(par_rules))
print("Seq: ", len(seq_rules))
for par_rule in par_rules:
r = ParallelRule()
r.package = pack
r.uuid = UUID(par_rule['id'].split('/')[-1]) if keep_ids else uuid4()
r.name = par_rule['name']
r.description = par_rule['description']
r.save()
mapping[par_rule['id']] = r.uuid
for scen in par_rule['scenarios']:
scen_mapping[scen['id']].append(r)
for simple_rule in par_rule['simpleRules']:
if simple_rule['id'] in mapping:
r.simple_rules.add(SimpleRule.objects.get(uuid=mapping[simple_rule['id']]))
r.save()
for seq_rule in seq_rules:
r = SequentialRule()
r.package = pack
r.uuid = UUID(seq_rule['id'].split('/')[-1]) if keep_ids else uuid4()
r.name = seq_rule['name']
r.description = seq_rule['description']
r.save()
mapping[seq_rule['id']] = r.uuid
for scen in seq_rule['scenarios']:
scen_mapping[scen['id']].append(r)
for i, simple_rule in enumerate(seq_rule['simpleRules']):
sro = SequentialRuleOrdering()
sro.simple_rule = simple_rule
sro.sequential_rule = r
sro.order_index = i
sro.save()
# r.simple_rules.add(SimpleRule.objects.get(uuid=mapping[simple_rule['id']]))
r.save()
print('Rules imported...')
for reaction in data['reactions']:
r = Reaction()
r.package = pack
r.uuid = UUID(reaction['id'].split('/')[-1]) if keep_ids else uuid4()
r.name = reaction['name']
r.description = reaction['description']
r.medlinereferences = reaction['medlinereferences'],
r.multi_step = True if reaction['multistep'] == 'true' else False
r.save()
mapping[reaction['id']] = r.uuid
for scen in reaction['scenarios']:
scen_mapping[scen['id']].append(r)
for educt in reaction['educts']:
r.educts.add(CompoundStructure.objects.get(uuid=mapping[educt['id']]))
for product in reaction['products']:
r.products.add(CompoundStructure.objects.get(uuid=mapping[product['id']]))
if 'rules' in reaction:
for rule in reaction['rules']:
try:
r.rules.add(Rule.objects.get(uuid=mapping[rule['id']]))
except Exception as e:
print(f"Rule with id {rule['id']} not found!")
print(e)
r.save()
print('Reactions imported...')
for pathway in data['pathways']:
pw = Pathway()
pw.package = pack
pw.uuid = UUID(pathway['id'].split('/')[-1]) if keep_ids else uuid4()
pw.name = pathway['name']
pw.description = pathway['description']
pw.save()
mapping[pathway['id']] = pw.uuid
for scen in pathway['scenarios']:
scen_mapping[scen['id']].append(pw)
out_nodes_mapping = defaultdict(set)
root_node = None
for node in pathway['nodes']:
n = Node()
n.uuid = UUID(node['id'].split('/')[-1]) if keep_ids else uuid4()
n.name = node['name']
n.pathway = pw
n.depth = node['depth']
n.default_node_label = CompoundStructure.objects.get(uuid=mapping[node['defaultNodeLabel']['id']])
n.save()
mapping[node['id']] = n.uuid
for scen in node['scenarios']:
scen_mapping[scen['id']].append(n)
for node_label in node['nodeLabels']:
n.node_labels.add(CompoundStructure.objects.get(uuid=mapping[node_label['id']]))
n.save()
for out_edge in node['outEdges']:
out_nodes_mapping[n.uuid].add(out_edge)
for edge in pathway['edges']:
e = Edge()
e.uuid = UUID(edge['id'].split('/')[-1]) if keep_ids else uuid4()
e.name = edge['name']
e.pathway = pw
e.description = edge['description']
e.edge_label = Reaction.objects.get(uuid=mapping[edge['edgeLabel']['id']])
e.save()
mapping[edge['id']] = e.uuid
for scen in edge['scenarios']:
scen_mapping[scen['id']].append(e)
for start_node in edge['startNodes']:
e.start_nodes.add(Node.objects.get(uuid=mapping[start_node]))
for end_node in edge['endNodes']:
e.end_nodes.add(Node.objects.get(uuid=mapping[end_node]))
e.save()
for k, v in out_nodes_mapping.items():
n = Node.objects.get(uuid=k)
for v1 in v:
n.out_edges.add(Edge.objects.get(uuid=mapping[v1]))
n.save()
print('Pathways imported...')
# Linking Phase
for child, parent in parent_mapping.items():
child_obj = Scenario.objects.get(uuid=child)
parent_obj = Scenario.objects.get(uuid=mapping[parent])
child_obj.parent = parent_obj
child_obj.save()
for scen_id, objects in scen_mapping.items():
scen = Scenario.objects.get(uuid=mapping[scen_id])
for o in objects:
o.scenarios.add(scen)
o.save()
print("Scenarios linked...")
print('Import statistics:')
print('Package {} stored'.format(pack.url))
print('Imported {} compounds'.format(Compound.objects.filter(package=pack).count()))
print('Imported {} rules'.format(Rule.objects.filter(package=pack).count()))
print('Imported {} reactions'.format(Reaction.objects.filter(package=pack).count()))
print('Imported {} pathways'.format(Pathway.objects.filter(package=pack).count()))
print('Imported {} Scenarios'.format(Scenario.objects.filter(package=pack).count()))
print("Fixing Node depths...")
total_pws = Pathway.objects.filter(package=pack).count()
for p, pw in enumerate(Pathway.objects.filter(package=pack)):
print(pw.url)
in_count = defaultdict(lambda: 0)
out_count = defaultdict(lambda: 0)
for e in pw.edges:
# TODO check if this will remain
for react in e.start_nodes.all():
out_count[str(react.uuid)] += 1
for prod in e.end_nodes.all():
in_count[str(prod.uuid)] += 1
root_nodes = []
for n in pw.nodes:
num_parents = in_count[str(n.uuid)]
if num_parents == 0:
# must be a root node or unconnected node
if n.depth != 0:
n.depth = 0
n.save()
# Only root node may have children
if out_count[str(n.uuid)] > 0:
root_nodes.append(n)
levels = [root_nodes]
seen = set()
# Do a bfs to determine depths starting with level 0 a.k.a. root nodes
for i, level_nodes in enumerate(levels):
new_level = []
for n in level_nodes:
for e in n.out_edges.all():
for prod in e.end_nodes.all():
if str(prod.uuid) not in seen:
old_depth = prod.depth
if old_depth != i + 1:
print(f'updating depth from {old_depth} to {i + 1}')
prod.depth = i + 1
prod.save()
new_level.append(prod)
seen.add(str(n.uuid))
if new_level:
levels.append(new_level)
print(f'{p + 1}/{total_pws} fixed.')
return pack
class SettingManager(object):
setting_pattern = re.compile(r".*/setting/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$")