Files
enviPy-bayer/epdb/management/commands/bootstrap.py

206 lines
7.5 KiB
Python

import json
from django.conf import settings as s
from django.core.management.base import BaseCommand
from django.db import transaction
from epdb.logic import UserManager, GroupManager, PackageManager, SettingManager
from epdb.models import UserSettingPermission, MLRelativeReasoning, EnviFormer, Permission, User, ExternalDatabase
class Command(BaseCommand):
def create_users(self):
if not User.objects.filter(email='anon@lorsba.ch').exists():
anon = UserManager.create_user("anonymous", "anon@lorsba.ch", "SuperSafe", is_active=True,
add_to_group=False, set_setting=False)
else:
anon = User.objects.get(email='anon@lorsba.ch')
if not User.objects.filter(email='admin@lorsba.ch').exists():
admin = UserManager.create_user("admin", "admin@lorsba.ch", "SuperSafe", is_active=True, add_to_group=False,
set_setting=False)
admin.is_staff = True
admin.is_superuser = True
admin.save()
else:
admin = User.objects.get(email='admin@lorsba.ch')
g = GroupManager.create_group(admin, 'enviPath Users', 'All enviPath Users')
g.public = True
g.save()
g.user_member.add(anon)
g.save()
anon.default_group = g
anon.save()
admin.default_group = g
admin.save()
if not User.objects.filter(email='jebus@lorsba.ch').exists():
jebus = UserManager.create_user("jebus", "jebus@lorsba.ch", "SuperSafe", is_active=True, add_to_group=False,
set_setting=False)
jebus.is_staff = True
jebus.is_superuser = True
jebus.save()
else:
jebus = User.objects.get(email='jebus@lorsba.ch')
g.user_member.add(jebus)
g.save()
jebus.default_group = g
jebus.save()
return anon, admin, g, jebus
def import_package(self, data, owner):
return PackageManager.import_package(data, owner, keep_ids=True, add_import_timestamp=False)
def create_default_setting(self, owner, packages):
s = SettingManager.create_setting(
owner,
name='Global Default Setting',
description='Global Default Setting containing BBD Rules and Max 30 Nodes and Max Depth of 8',
max_nodes=30,
max_depth=5,
rule_packages=packages,
model=None,
model_threshold=None
)
return s
def populate_common_external_databases(self):
"""
Helper function to populate common external databases.
This can be called from a Django management command.
"""
databases = [
{
'name': 'PubChem Compound',
'full_name': 'PubChem Compound Database',
'description': 'Chemical database of small organic molecules',
'base_url': 'https://pubchem.ncbi.nlm.nih.gov',
'url_pattern': 'https://pubchem.ncbi.nlm.nih.gov/compound/{id}'
},
{
'name': 'PubChem Substance',
'full_name': 'PubChem Substance Database',
'description': 'Database of chemical substances',
'base_url': 'https://pubchem.ncbi.nlm.nih.gov',
'url_pattern': 'https://pubchem.ncbi.nlm.nih.gov/substance/{id}'
},
{
'name': 'ChEBI',
'full_name': 'Chemical Entities of Biological Interest',
'description': 'Dictionary of molecular entities',
'base_url': 'https://www.ebi.ac.uk/chebi',
'url_pattern': 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:{id}'
},
{
'name': 'RHEA',
'full_name': 'RHEA Reaction Database',
'description': 'Comprehensive resource of biochemical reactions',
'base_url': 'https://www.rhea-db.org',
'url_pattern': 'https://www.rhea-db.org/rhea/{id}'
},
{
'name': 'CAS',
'full_name': 'Chemical Abstracts Service Registry',
'description': 'Registry of chemical substances',
'base_url': 'https://www.cas.org',
'url_pattern': None # CAS doesn't have a free public URL pattern
},
{
'name': 'KEGG Reaction',
'full_name': 'KEGG Reaction Database',
'description': 'Database of biochemical reactions',
'base_url': 'https://www.genome.jp',
'url_pattern': 'https://www.genome.jp/entry/reaction+{id}'
},
{
'name': 'MetaCyc',
'full_name': 'MetaCyc Metabolic Pathway Database',
'description': 'Database of metabolic pathways and enzymes',
'base_url': 'https://metacyc.org',
'url_pattern': None
},
{
'name': 'UniProt',
'full_name': 'MetaCyc Metabolic Pathway Database',
'description': 'UniProt is a freely accessible database of protein sequence and functional information',
'base_url': 'https://www.uniprot.org',
'url_pattern': 'https://www.uniprot.org/uniprotkb?query="{id}"'
}
]
for db_info in databases:
ExternalDatabase.objects.get_or_create(
name=db_info['name'],
defaults=db_info
)
@transaction.atomic
def handle(self, *args, **options):
# Create users
anon, admin, g, jebus = self.create_users()
# Import Packages
packages = [
'EAWAG-BBD.json',
'EAWAG-SOIL.json',
'EAWAG-SLUDGE.json',
'EAWAG-SEDIMENT.json',
]
mapping = {}
for p in packages:
print(f"Importing {p}...")
package_data = json.loads(open(s.BASE_DIR / 'fixtures' / 'packages' / '2025-07-18' / p).read())
imported_package = self.import_package(package_data, admin)
mapping[p.replace('.json', '')] = imported_package
setting = self.create_default_setting(admin, [mapping['EAWAG-BBD']])
setting.public = True
setting.save()
setting.make_global_default()
for u in [anon, jebus]:
u.default_setting = setting
u.save()
usp = UserSettingPermission()
usp.user = u
usp.setting = setting
usp.permission = Permission.READ[0]
usp.save()
# Create Model Package
pack = PackageManager.create_package(admin, "Public Prediction Models",
"Package to make Prediction Models publicly available")
pack.reviewed = True
pack.save()
# Create RR
ml_model = MLRelativeReasoning.create(
package=pack,
rule_packages=[mapping['EAWAG-BBD']],
data_packages=[mapping['EAWAG-BBD']],
eval_packages=[],
threshold=0.5,
name='ECC - BBD - T0.5',
description='ML Relative Reasoning',
)
ml_model.build_dataset()
ml_model.build_model()
# ml_model.evaluate_model()
# If available create EnviFormerModel
if s.ENVIFORMER_PRESENT:
enviFormer_model = EnviFormer.create(pack, 'EnviFormer - T0.5', 'EnviFormer Model with Threshold 0.5', 0.5)