import json from collections import defaultdict from datetime import datetime from uuid import UUID from django.core.management.base import BaseCommand from django.conf import settings as s from epdb.models import * from epdb.logic import UserManager, GroupManager, PackageManager, SettingManager class Command(BaseCommand): def create_users(self): if not User.objects.filter(email='anon@lorsba.ch').exists(): anon = UserManager.create_user("anonymous", "anon@lorsba.ch", "SuperSafe") else: anon = User.objects.get(email='anon@lorsba.ch') if not User.objects.filter(email='admin@lorsba.ch').exists(): admin = UserManager.create_user("admin", "admin@lorsba.ch", "SuperSafe",) admin.is_staff = True admin.is_superuser = True admin.save() else: admin = User.objects.get(email='admin@lorsba.ch') g = GroupManager.create_group(admin, 'enviPath Users', 'All enviPath Users') g.user_member.add(anon) g.save() anon.default_group = g anon.save() admin.default_group = g admin.save() if not User.objects.filter(email='jebus@lorsba.ch').exists(): jebus = UserManager.create_user("jebus", "jebus@lorsba.ch", "SuperSafe",) jebus.is_staff = True jebus.is_superuser = True jebus.save() else: jebus = User.objects.get(email='jebus@lorsba.ch') g.user_member.add(jebus) g.save() jebus.default_group = g jebus.save() return anon, admin, g, jebus def import_package(self, data, owner): # Start import pack = Package() pack.uuid = UUID(data['id'].split('/')[-1]) pack.name = '{} - {}'.format(data['name'], datetime.now().strftime('%Y-%m-%d %H:%M')) pack.reviewed = True if data['reviewStatus'] == 'reviewed' else False pack.description = data['description'] pack.save() up = UserPackagePermission() up.user = owner up.package = pack up.permission = up.ALL[0] up.save() # Stores old_id to new_id mapping = {} # Store compounds and its structures for compound in data['compounds']: comp = Compound() comp.package = pack comp.uuid = UUID(compound['id'].split('/')[-1]) comp.name = compound['name'] comp.description = compound['description'] comp.aliases = compound['aliases'] comp.save() mapping[compound['id']] = comp.uuid default_structure = None for structure in compound['structures']: struc = CompoundStructure() # struc.object_url = Command.get_id(structure, keep_ids) struc.compound = comp struc.uuid = UUID(structure['id'].split('/')[-1]) struc.name = structure['name'] struc.description = structure['description'] struc.smiles = structure['smiles'] struc.save() mapping[structure['id']] = struc.uuid if structure['id'] == compound['defaultStructure']['id']: default_structure = struc struc.save() if default_structure is None: raise ValueError('No default structure set') comp.default_structure = default_structure comp.save() print('Compounds imported...') # Store simple and parallel-rules par_rules = [] seq_rules = [] for rule in data['rules']: if rule['identifier'] == 'parallel-rule': par_rules.append(rule) continue if rule['identifier'] == 'sequential-rule': seq_rules.append(rule) continue r = SimpleAmbitRule() r.uuid = UUID(rule['id'].split('/')[-1]) r.package = pack r.name = rule['name'] r.description = rule['description'] r.smirks = rule['smirks'] r.reactant_filter_smarts = rule.get('reactantFilterSmarts', None) r.product_filter_smarts = rule.get('productFilterSmarts', None) r.save() mapping[rule['id']] = r.uuid print("Par: ", len(par_rules)) print("Seq: ", len(seq_rules)) for par_rule in par_rules: r = ParallelRule() r.package = pack r.uuid = UUID(par_rule['id'].split('/')[-1]) r.name = par_rule['name'] r.description = par_rule['description'] r.save() mapping[par_rule['id']] = r.uuid for simple_rule in par_rule['simpleRules']: if simple_rule['id'] in mapping: r.simple_rules.add(SimpleRule.objects.get(uuid=mapping[simple_rule['id']])) r.save() for seq_rule in seq_rules: r = SequentialRule() r.package = pack r.uuid = UUID(seq_rule['id'].split('/')[-1]) r.name = seq_rule['name'] r.description = seq_rule['description'] r.save() mapping[seq_rule['id']] = r.uuid # m1 = Membership( # ... person=ringo, # ... group=beatles, # ... date_joined=date(1962, 8, 16), # ... invite_reason="Needed a new drummer.", # ... ) # >>> m1.save() for i, simple_rule in enumerate(seq_rule['simpleRules']): sro = SequentialRuleOrdering() sro.simple_rule = simple_rule sro.sequential_rule = r sro.order_index = i sro.save() # r.simple_rules.add(SimpleRule.objects.get(uuid=mapping[simple_rule['id']])) r.save() print('Rules imported...') for reaction in data['reactions']: r = Reaction() r.package = pack r.uuid = UUID(reaction['id'].split('/')[-1]) r.name = reaction['name'] r.description = reaction['description'] r.medlinereferences = reaction['medlinereferences'], r.multi_step = True if reaction['multistep'] == 'true' else False r.save() mapping[reaction['id']] = r.uuid for educt in reaction['educts']: r.educts.add(CompoundStructure.objects.get(uuid=mapping[educt['id']])) for product in reaction['products']: r.products.add(CompoundStructure.objects.get(uuid=mapping[product['id']])) if 'rules' in reaction: for rule in reaction['rules']: try: r.rules.add(Rule.objects.get(uuid=mapping[rule['id']])) except Exception as e: print(f"Rule with id {rule['id']} not found!") print(e) r.save() print('Reactions imported...') for pathway in data['pathways']: pw = Pathway() pw.package = pack pw.uuid = UUID(pathway['id'].split('/')[-1]) pw.name = pathway['name'] pw.description = pathway['description'] pw.save() mapping[pathway['id']] = pw.uuid out_nodes_mapping = defaultdict(set) root_node = None for node in pathway['nodes']: n = Node() n.uuid = UUID(node['id'].split('/')[-1]) n.name = node['name'] n.pathway = pw n.depth = node['depth'] n.default_node_label = CompoundStructure.objects.get(uuid=mapping[node['defaultNodeLabel']['id']]) n.save() mapping[node['id']] = n.uuid for node_label in node['nodeLabels']: n.node_labels.add(CompoundStructure.objects.get(uuid=mapping[node_label['id']])) n.save() for out_edge in node['outEdges']: out_nodes_mapping[n.uuid].add(out_edge) for edge in pathway['edges']: e = Edge() e.uuid = UUID(edge['id'].split('/')[-1]) e.name = edge['name'] e.pathway = pw e.description = edge['description'] e.edge_label = Reaction.objects.get(uuid=mapping[edge['edgeLabel']['id']]) e.save() mapping[edge['id']] = e.uuid for start_node in edge['startNodes']: e.start_nodes.add(Node.objects.get(uuid=mapping[start_node])) for end_node in edge['endNodes']: e.end_nodes.add(Node.objects.get(uuid=mapping[end_node])) e.save() for k, v in out_nodes_mapping.items(): n = Node.objects.get(uuid=k) for v1 in v: n.out_edges.add(Edge.objects.get(uuid=mapping[v1])) n.save() print('Pathways imported...') print('Import statistics:') print('Package {} stored'.format(pack.url)) print('Imported {} compounds'.format(Compound.objects.filter(package=pack).count())) print('Imported {} rules'.format(Rule.objects.filter(package=pack).count())) print('Imported {} reactions'.format(Reaction.objects.filter(package=pack).count())) print('Imported {} pathways'.format(Pathway.objects.filter(package=pack).count())) print("Fixing Node depths...") total_pws = Pathway.objects.filter(package=pack).count() for p, pw in enumerate(Pathway.objects.filter(package=pack)): print(pw.url) in_count = defaultdict(lambda: 0) out_count = defaultdict(lambda: 0) for e in pw.edges: # TODO check if this will remain for react in e.start_nodes.all(): out_count[str(react.uuid)] += 1 for prod in e.end_nodes.all(): in_count[str(prod.uuid)] += 1 root_nodes = [] for n in pw.nodes: num_parents = in_count[str(n.uuid)] if num_parents == 0: # must be a root node or unconnected node if n.depth != 0: n.depth = 0 n.save() # Only root node may have children if out_count[str(n.uuid)] > 0: root_nodes.append(n) levels = [root_nodes] seen = set() # Do a bfs to determine depths starting with level 0 a.k.a. root nodes for i, level_nodes in enumerate(levels): new_level = [] for n in level_nodes: for e in n.out_edges.all(): for prod in e.end_nodes.all(): if str(prod.uuid) not in seen: old_depth = prod.depth if old_depth != i + 1: print(f'updating depth from {old_depth} to {i + 1}') prod.depth = i + 1 prod.save() new_level.append(prod) seen.add(str(n.uuid)) if new_level: levels.append(new_level) print(f'{p + 1}/{total_pws} fixed.') return pack def create_default_setting(self, owner, packages): s = SettingManager.create_setting( owner, name='Global Default Setting', description='Global Default Setting containing BBD Rules and Max 30 Nodes and Max Depth of 8', max_nodes=30, max_depth=5, rule_packages=packages, model=None, model_threshold=None ) return s @transaction.atomic def handle(self, *args, **options): # Create users anon, admin, g, jebus = self.create_users() # Import Packages packages = [ 'EAWAG-BBD.json', 'EAWAG-SOIL.json', 'EAWAG-SLUDGE.json', ] mapping = {} for p in packages: print(f"Importing {p}...") package_data = json.loads(open(s.BASE_DIR / 'fixtures' / p).read()) imported_package = self.import_package(package_data, admin) mapping[p.replace('.json', '')] = imported_package setting = self.create_default_setting(admin, [mapping['EAWAG-BBD']]) setting.public = True setting.save() setting.make_global_default() for u in [anon, jebus]: usp = UserSettingPermission() usp.user = u usp.setting = setting usp.permission = Permission.READ[0] usp.save() # Create Model Package pack = PackageManager.create_package(admin, "Public Prediction Models", "Package to make Prediction Models publicly available") pack.reviewed = True pack.save() # Create RR ml_model = MLRelativeReasoning.create( pack, 'ECC - BBD - T0.5', 'ML Relative Reasoning', [mapping['EAWAG-BBD']], [mapping['EAWAG-BBD']], [], 0.5 ) X, y = ml_model.build_dataset() ml_model.build_model(X, y) ml_model.evaluate_model() # If available create EnviFormerModel if s.ENVIFORMER_PRESENT: enviFormer_model = EnviFormer.create(pack, 'EnviFormer - T0.5', 'EnviFormer Model with Threshold 0.5', 0.5)