feature/additional_information (#30)

Fixes #12

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#30
This commit is contained in:
2025-07-19 08:10:40 +12:00
parent 4fff78541b
commit 49e02ed97d
11 changed files with 534 additions and 344 deletions

View File

@ -1,11 +1,11 @@
import json
from collections import defaultdict
from datetime import datetime
from uuid import UUID
from django.core.management.base import BaseCommand
from django.conf import settings as s
from epdb.models import *
from django.core.management.base import BaseCommand
from django.db import transaction
from epdb.logic import UserManager, GroupManager, PackageManager, SettingManager
from epdb.models import UserSettingPermission, MLRelativeReasoning, EnviFormer, Permission, User
class Command(BaseCommand):
@ -52,283 +52,7 @@ class Command(BaseCommand):
return anon, admin, g, jebus
def import_package(self, data, owner):
# Start import
pack = Package()
pack.uuid = UUID(data['id'].split('/')[-1])
pack.name = '{} - {}'.format(data['name'], datetime.now().strftime('%Y-%m-%d %H:%M'))
pack.reviewed = True if data['reviewStatus'] == 'reviewed' else False
pack.description = data['description']
pack.save()
up = UserPackagePermission()
up.user = owner
up.package = pack
up.permission = up.ALL[0]
up.save()
# Stores old_id to new_id
mapping = {}
# Store compounds and its structures
for compound in data['compounds']:
comp = Compound()
comp.package = pack
comp.uuid = UUID(compound['id'].split('/')[-1])
comp.name = compound['name']
comp.description = compound['description']
comp.aliases = compound['aliases']
comp.save()
mapping[compound['id']] = comp.uuid
default_structure = None
for structure in compound['structures']:
struc = CompoundStructure()
# struc.object_url = Command.get_id(structure, keep_ids)
struc.compound = comp
struc.uuid = UUID(structure['id'].split('/')[-1])
struc.name = structure['name']
struc.description = structure['description']
struc.smiles = structure['smiles']
struc.save()
mapping[structure['id']] = struc.uuid
if structure['id'] == compound['defaultStructure']['id']:
default_structure = struc
struc.save()
if default_structure is None:
raise ValueError('No default structure set')
comp.default_structure = default_structure
comp.save()
print('Compounds imported...')
# Store simple and parallel-rules
par_rules = []
seq_rules = []
for rule in data['rules']:
if rule['identifier'] == 'parallel-rule':
par_rules.append(rule)
continue
if rule['identifier'] == 'sequential-rule':
seq_rules.append(rule)
continue
r = SimpleAmbitRule()
r.uuid = UUID(rule['id'].split('/')[-1])
r.package = pack
r.name = rule['name']
r.description = rule['description']
r.smirks = rule['smirks']
r.reactant_filter_smarts = rule.get('reactantFilterSmarts', None)
r.product_filter_smarts = rule.get('productFilterSmarts', None)
r.save()
mapping[rule['id']] = r.uuid
print("Par: ", len(par_rules))
print("Seq: ", len(seq_rules))
for par_rule in par_rules:
r = ParallelRule()
r.package = pack
r.uuid = UUID(par_rule['id'].split('/')[-1])
r.name = par_rule['name']
r.description = par_rule['description']
r.save()
mapping[par_rule['id']] = r.uuid
for simple_rule in par_rule['simpleRules']:
if simple_rule['id'] in mapping:
r.simple_rules.add(SimpleRule.objects.get(uuid=mapping[simple_rule['id']]))
r.save()
for seq_rule in seq_rules:
r = SequentialRule()
r.package = pack
r.uuid = UUID(seq_rule['id'].split('/')[-1])
r.name = seq_rule['name']
r.description = seq_rule['description']
r.save()
mapping[seq_rule['id']] = r.uuid
# m1 = Membership(
# ... person=ringo,
# ... group=beatles,
# ... date_joined=date(1962, 8, 16),
# ... invite_reason="Needed a new drummer.",
# ... )
# >>> m1.save()
for i, simple_rule in enumerate(seq_rule['simpleRules']):
sro = SequentialRuleOrdering()
sro.simple_rule = simple_rule
sro.sequential_rule = r
sro.order_index = i
sro.save()
# r.simple_rules.add(SimpleRule.objects.get(uuid=mapping[simple_rule['id']]))
r.save()
print('Rules imported...')
for reaction in data['reactions']:
r = Reaction()
r.package = pack
r.uuid = UUID(reaction['id'].split('/')[-1])
r.name = reaction['name']
r.description = reaction['description']
r.medlinereferences = reaction['medlinereferences'],
r.multi_step = True if reaction['multistep'] == 'true' else False
r.save()
mapping[reaction['id']] = r.uuid
for educt in reaction['educts']:
r.educts.add(CompoundStructure.objects.get(uuid=mapping[educt['id']]))
for product in reaction['products']:
r.products.add(CompoundStructure.objects.get(uuid=mapping[product['id']]))
if 'rules' in reaction:
for rule in reaction['rules']:
try:
r.rules.add(Rule.objects.get(uuid=mapping[rule['id']]))
except Exception as e:
print(f"Rule with id {rule['id']} not found!")
print(e)
r.save()
print('Reactions imported...')
for pathway in data['pathways']:
pw = Pathway()
pw.package = pack
pw.uuid = UUID(pathway['id'].split('/')[-1])
pw.name = pathway['name']
pw.description = pathway['description']
pw.save()
mapping[pathway['id']] = pw.uuid
out_nodes_mapping = defaultdict(set)
root_node = None
for node in pathway['nodes']:
n = Node()
n.uuid = UUID(node['id'].split('/')[-1])
n.name = node['name']
n.pathway = pw
n.depth = node['depth']
n.default_node_label = CompoundStructure.objects.get(uuid=mapping[node['defaultNodeLabel']['id']])
n.save()
mapping[node['id']] = n.uuid
for node_label in node['nodeLabels']:
n.node_labels.add(CompoundStructure.objects.get(uuid=mapping[node_label['id']]))
n.save()
for out_edge in node['outEdges']:
out_nodes_mapping[n.uuid].add(out_edge)
for edge in pathway['edges']:
e = Edge()
e.uuid = UUID(edge['id'].split('/')[-1])
e.name = edge['name']
e.pathway = pw
e.description = edge['description']
e.edge_label = Reaction.objects.get(uuid=mapping[edge['edgeLabel']['id']])
e.save()
mapping[edge['id']] = e.uuid
for start_node in edge['startNodes']:
e.start_nodes.add(Node.objects.get(uuid=mapping[start_node]))
for end_node in edge['endNodes']:
e.end_nodes.add(Node.objects.get(uuid=mapping[end_node]))
e.save()
for k, v in out_nodes_mapping.items():
n = Node.objects.get(uuid=k)
for v1 in v:
n.out_edges.add(Edge.objects.get(uuid=mapping[v1]))
n.save()
print('Pathways imported...')
print('Import statistics:')
print('Package {} stored'.format(pack.url))
print('Imported {} compounds'.format(Compound.objects.filter(package=pack).count()))
print('Imported {} rules'.format(Rule.objects.filter(package=pack).count()))
print('Imported {} reactions'.format(Reaction.objects.filter(package=pack).count()))
print('Imported {} pathways'.format(Pathway.objects.filter(package=pack).count()))
print("Fixing Node depths...")
total_pws = Pathway.objects.filter(package=pack).count()
for p, pw in enumerate(Pathway.objects.filter(package=pack)):
print(pw.url)
in_count = defaultdict(lambda: 0)
out_count = defaultdict(lambda: 0)
for e in pw.edges:
# TODO check if this will remain
for react in e.start_nodes.all():
out_count[str(react.uuid)] += 1
for prod in e.end_nodes.all():
in_count[str(prod.uuid)] += 1
root_nodes = []
for n in pw.nodes:
num_parents = in_count[str(n.uuid)]
if num_parents == 0:
# must be a root node or unconnected node
if n.depth != 0:
n.depth = 0
n.save()
# Only root node may have children
if out_count[str(n.uuid)] > 0:
root_nodes.append(n)
levels = [root_nodes]
seen = set()
# Do a bfs to determine depths starting with level 0 a.k.a. root nodes
for i, level_nodes in enumerate(levels):
new_level = []
for n in level_nodes:
for e in n.out_edges.all():
for prod in e.end_nodes.all():
if str(prod.uuid) not in seen:
old_depth = prod.depth
if old_depth != i + 1:
print(f'updating depth from {old_depth} to {i + 1}')
prod.depth = i + 1
prod.save()
new_level.append(prod)
seen.add(str(n.uuid))
if new_level:
levels.append(new_level)
print(f'{p + 1}/{total_pws} fixed.')
return pack
return PackageManager.import_package(data, owner, keep_ids=True)
def create_default_setting(self, owner, packages):
s = SettingManager.create_setting(
@ -344,8 +68,6 @@ class Command(BaseCommand):
return s
@transaction.atomic
def handle(self, *args, **options):
# Create users
@ -356,12 +78,13 @@ class Command(BaseCommand):
'EAWAG-BBD.json',
'EAWAG-SOIL.json',
'EAWAG-SLUDGE.json',
'EAWAG-SEDIMENT.json',
]
mapping = {}
for p in packages:
print(f"Importing {p}...")
package_data = json.loads(open(s.BASE_DIR / 'fixtures' / p).read())
package_data = json.loads(open(s.BASE_DIR / 'fixtures' / 'packages' / '2025-07-18' / p).read())
imported_package = self.import_package(package_data, admin)
mapping[p.replace('.json', '')] = imported_package
@ -378,7 +101,8 @@ class Command(BaseCommand):
usp.save()
# Create Model Package
pack = PackageManager.create_package(admin, "Public Prediction Models", "Package to make Prediction Models publicly available")
pack = PackageManager.create_package(admin, "Public Prediction Models",
"Package to make Prediction Models publicly available")
pack.reviewed = True
pack.save()