diff --git a/epdb/management/commands/bootstrap.py b/epdb/management/commands/bootstrap.py index c937ab78..5347eabc 100644 --- a/epdb/management/commands/bootstrap.py +++ b/epdb/management/commands/bootstrap.py @@ -5,7 +5,7 @@ from django.core.management.base import BaseCommand from django.db import transaction from epdb.logic import UserManager, GroupManager, PackageManager, SettingManager -from epdb.models import UserSettingPermission, MLRelativeReasoning, EnviFormer, Permission, User +from epdb.models import UserSettingPermission, MLRelativeReasoning, EnviFormer, Permission, User, ExternalDatabase class Command(BaseCommand): @@ -74,6 +74,76 @@ class Command(BaseCommand): return s + def populate_common_external_databases(self): + """ + Helper function to populate common external databases. + This can be called from a Django management command. + """ + databases = [ + { + 'name': 'PubChem Compound', + 'full_name': 'PubChem Compound Database', + 'description': 'Chemical database of small organic molecules', + 'base_url': 'https://pubchem.ncbi.nlm.nih.gov', + 'url_pattern': 'https://pubchem.ncbi.nlm.nih.gov/compound/{id}' + }, + { + 'name': 'PubChem Substance', + 'full_name': 'PubChem Substance Database', + 'description': 'Database of chemical substances', + 'base_url': 'https://pubchem.ncbi.nlm.nih.gov', + 'url_pattern': 'https://pubchem.ncbi.nlm.nih.gov/substance/{id}' + }, + { + 'name': 'ChEBI', + 'full_name': 'Chemical Entities of Biological Interest', + 'description': 'Dictionary of molecular entities', + 'base_url': 'https://www.ebi.ac.uk/chebi', + 'url_pattern': 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:{id}' + }, + { + 'name': 'RHEA', + 'full_name': 'RHEA Reaction Database', + 'description': 'Comprehensive resource of biochemical reactions', + 'base_url': 'https://www.rhea-db.org', + 'url_pattern': 'https://www.rhea-db.org/rhea/{id}' + }, + { + 'name': 'CAS', + 'full_name': 'Chemical Abstracts Service Registry', + 'description': 'Registry of chemical substances', + 'base_url': 'https://www.cas.org', + 'url_pattern': None # CAS doesn't have a free public URL pattern + }, + { + 'name': 'KEGG Reaction', + 'full_name': 'KEGG Reaction Database', + 'description': 'Database of biochemical reactions', + 'base_url': 'https://www.genome.jp', + 'url_pattern': 'https://www.genome.jp/entry/reaction+{id}' + }, + { + 'name': 'MetaCyc', + 'full_name': 'MetaCyc Metabolic Pathway Database', + 'description': 'Database of metabolic pathways and enzymes', + 'base_url': 'https://metacyc.org', + 'url_pattern': None + }, + { + 'name': 'UniProt', + 'full_name': 'MetaCyc Metabolic Pathway Database', + 'description': 'UniProt is a freely accessible database of protein sequence and functional information', + 'base_url': 'https://www.uniprot.org', + 'url_pattern': 'https://www.uniprot.org/uniprotkb?query="{id}"' + } + ] + + for db_info in databases: + ExternalDatabase.objects.get_or_create( + name=db_info['name'], + defaults=db_info + ) + @transaction.atomic def handle(self, *args, **options): # Create users diff --git a/epdb/models.py b/epdb/models.py index d72576cb..6c6ba94c 100644 --- a/epdb/models.py +++ b/epdb/models.py @@ -1,19 +1,20 @@ import abc +import hashlib import json import logging import os import secrets -import hashlib from collections import defaultdict -from datetime import datetime, timedelta +from datetime import datetime from typing import Union, List, Optional, Dict, Tuple, Set from uuid import uuid4 import joblib import numpy as np from django.conf import settings as s -from django.contrib.auth.hashers import make_password, check_password from django.contrib.auth.models import AbstractUser +from django.contrib.contenttypes.fields import GenericRelation, GenericForeignKey +from django.contrib.contenttypes.models import ContentType from django.contrib.postgres.fields import ArrayField from django.db import models, transaction from django.db.models import JSONField, Count, Q, QuerySet @@ -233,6 +234,239 @@ class GroupPackagePermission(Permission): return f"Group: {self.group} has Permission: {self.permission} on Package: {self.package}" +############################ +# External IDs / Databases # +############################ +class ExternalDatabase(TimeStampedModel): + uuid = models.UUIDField(default=uuid4, editable=False, unique=True) + name = models.CharField(max_length=100, unique=True, verbose_name="Database Name") + full_name = models.CharField(max_length=255, blank=True, verbose_name="Full Database Name") + description = models.TextField(blank=True, verbose_name="Description") + base_url = models.URLField(blank=True, null=True, verbose_name="Base URL") + url_pattern = models.CharField( + max_length=500, + blank=True, + verbose_name="URL Pattern", + help_text="URL pattern with {id} placeholder, e.g., 'https://pubchem.ncbi.nlm.nih.gov/compound/{id}'" + ) + is_active = models.BooleanField(default=True, verbose_name="Is Active") + + class Meta: + db_table = 'epdb_external_database' + verbose_name = 'External Database' + verbose_name_plural = 'External Databases' + ordering = ['name'] + + def __str__(self): + return self.full_name or self.name + + def get_url_for_identifier(self, identifier_value): + if self.url_pattern and '{id}' in self.url_pattern: + return self.url_pattern.format(id=identifier_value) + return None + + +class ExternalIdentifier(TimeStampedModel): + uuid = models.UUIDField(default=uuid4, editable=False, unique=True) + + # Generic foreign key to link to any model + content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE) + object_id = models.IntegerField() + content_object = GenericForeignKey('content_type', 'object_id') + + database = models.ForeignKey( + ExternalDatabase, + on_delete=models.CASCADE, + verbose_name="External Database" + ) + identifier_value = models.CharField(max_length=255, verbose_name="Identifier Value") + url = models.URLField(blank=True, null=True, verbose_name="Direct URL") + is_primary = models.BooleanField( + default=False, + verbose_name="Is Primary", + help_text="Mark this as the primary identifier for this database" + ) + + class Meta: + db_table = 'epdb_external_identifier' + verbose_name = 'External Identifier' + verbose_name_plural = 'External Identifiers' + unique_together = [('content_type', 'object_id', 'database', 'identifier_value')] + indexes = [ + models.Index(fields=['content_type', 'object_id']), + models.Index(fields=['database', 'identifier_value']), + ] + + def __str__(self): + return f"{self.database.name}: {self.identifier_value}" + + @property + def external_url(self): + if self.url: + return self.url + return self.database.get_url_for_identifier(self.identifier_value) + + def save(self, *args, **kwargs): + if not self.url and self.database.url_pattern: + self.url = self.database.get_url_for_identifier(self.identifier_value) + super().save(*args, **kwargs) + + +class ExternalIdentifierMixin(models.Model): + class Meta: + abstract = True + + def get_external_identifiers(self): + return self.external_identifiers.all() + + def get_external_identifier(self, database_name): + return self.external_identifiers.filter(database__name=database_name) + + def add_external_identifier(self, database_name, identifier_value, url=None, is_primary=False): + database, created = ExternalDatabase.objects.get_or_create(name=database_name) + + if is_primary: + self.external_identifiers.filter(database=database, is_primary=True).update(is_primary=False) + + external_id, created = ExternalIdentifier.objects.get_or_create( + content_type=ContentType.objects.get_for_model(self), + object_id=self.pk, + database=database, + identifier_value=identifier_value, + defaults={ + 'url': url, + 'is_primary': is_primary + } + ) + return external_id + + def remove_external_identifier(self, database_name, identifier_value): + self.external_identifiers.filter( + database__name=database_name, + identifier_value=identifier_value + ).delete() + + +class ChemicalIdentifierMixin(ExternalIdentifierMixin): + + class Meta: + abstract = True + + @property + def pubchem_compound_id(self): + identifier = self.get_external_identifier('PubChem Compound') + return identifier.identifier_value if identifier else None + + @property + def pubchem_substance_id(self): + identifier = self.get_external_identifier('PubChem Substance') + return identifier.identifier_value if identifier else None + + @property + def chebi_id(self): + identifier = self.get_external_identifier('ChEBI') + return identifier.identifier_value if identifier else None + + @property + def cas_number(self): + identifier = self.get_external_identifier('CAS') + return identifier.identifier_value if identifier else None + + def add_pubchem_compound_id(self, compound_id, is_primary=True): + return self.add_external_identifier( + 'PubChem Compound', + compound_id, + f'https://pubchem.ncbi.nlm.nih.gov/compound/{compound_id}', + is_primary + ) + + def add_pubchem_substance_id(self, substance_id): + return self.add_external_identifier( + 'PubChem Substance', + substance_id, + f'https://pubchem.ncbi.nlm.nih.gov/substance/{substance_id}' + ) + + def add_chebi_id(self, chebi_id, is_primary=False): + clean_id = chebi_id.replace('CHEBI:', '') + return self.add_external_identifier( + 'ChEBI', + clean_id, + f'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:{clean_id}', + is_primary + ) + + def add_cas_number(self, cas_number): + return self.add_external_identifier('CAS', cas_number) + + def get_pubchem_identifiers(self): + return self.get_external_identifier('PubChem Compound') or self.get_external_identifier('PubChem Substance') + + def get_pubchem_compound_identifiers(self): + return self.get_external_identifier('PubChem Compound') + + def get_pubchem_substance_identifiers(self): + return self.get_external_identifier('PubChem Substance') + + def get_chebi_identifiers(self): + return self.get_external_identifier('ChEBI') + + def get_cas_identifiers(self): + return self.get_external_identifier('CAS') + +class ReactionIdentifierMixin(ExternalIdentifierMixin): + + class Meta: + abstract = True + + @property + def rhea_id(self): + identifier = self.get_external_identifier('RHEA') + return identifier.identifier_value if identifier else None + + @property + def kegg_reaction_id(self): + identifier = self.get_external_identifier('KEGG Reaction') + return identifier.identifier_value if identifier else None + + @property + def metacyc_reaction_id(self): + identifier = self.get_external_identifier('MetaCyc') + return identifier.identifier_value if identifier else None + + def add_rhea_id(self, rhea_id, is_primary=True): + return self.add_external_identifier( + 'RHEA', + rhea_id, + f'https://www.rhea-db.org/rhea/{rhea_id}', + is_primary + ) + + def add_uniprot_id(self, uniprot_id, is_primary=True): + return self.add_external_identifier( + 'UniProt', + uniprot_id, + f'https://www.uniprot.org/uniprotkb?query="{uniprot_id}"', + is_primary + ) + + def add_kegg_reaction_id(self, kegg_id): + return self.add_external_identifier( + 'KEGG Reaction', + kegg_id, + f'https://www.genome.jp/entry/reaction+{kegg_id}' + ) + + def add_metacyc_reaction_id(self, metacyc_id): + return self.add_external_identifier('MetaCyc', metacyc_id) + + def get_rhea_identifiers(self): + return self.get_external_identifier('RHEA') + + def get_uniprot_identifiers(self): + return self.get_external_identifier('UniProt') + + ############## # EP Objects # ############## @@ -303,6 +537,7 @@ class ScenarioMixin(models.Model): class Meta: abstract = True + class License(models.Model): link = models.URLField(blank=False, null=False, verbose_name='link') image_link = models.URLField(blank=False, null=False, verbose_name='Image link') @@ -310,7 +545,8 @@ class License(models.Model): class Package(EnviPathModel): reviewed = models.BooleanField(verbose_name='Reviewstatus', default=False) - license = models.ForeignKey('epdb.License', on_delete=models.SET_NULL, blank=True, null=True, verbose_name='License') + license = models.ForeignKey('epdb.License', on_delete=models.SET_NULL, blank=True, null=True, + verbose_name='License') def __str__(self): return f"{self.name} (pk={self.pk})" @@ -370,12 +606,14 @@ class Package(EnviPathModel): return rules -class Compound(EnviPathModel, AliasMixin, ScenarioMixin): +class Compound(EnviPathModel, AliasMixin, ScenarioMixin, ChemicalIdentifierMixin): package = models.ForeignKey('epdb.Package', verbose_name='Package', on_delete=models.CASCADE, db_index=True) default_structure = models.ForeignKey('CompoundStructure', verbose_name='Default Structure', related_name='compound_default_structure', on_delete=models.CASCADE, null=True) + external_identifiers = GenericRelation('ExternalIdentifier') + @property def structures(self): return CompoundStructure.objects.filter(compound=self) @@ -455,7 +693,8 @@ class Compound(EnviPathModel, AliasMixin, ScenarioMixin): description='{} (in its normalized form)'.format(description), normalized_structure=True) - cs = CompoundStructure.create(c, smiles, name=name, description=description, normalized_structure=is_standardized) + cs = CompoundStructure.create(c, smiles, name=name, description=description, + normalized_structure=is_standardized) c.default_structure = cs c.save() @@ -489,7 +728,8 @@ class Compound(EnviPathModel, AliasMixin, ScenarioMixin): if CompoundStructure.objects.filter(smiles__in=smiles, compound__package=self.package).exists(): return CompoundStructure.objects.get(smiles__in=smiles, compound__package=self.package) - cs = CompoundStructure.create(self, smiles, name=name, description=description, normalized_structure=is_standardized) + cs = CompoundStructure.create(self, smiles, name=name, description=description, + normalized_structure=is_standardized) if default_structure: self.default_structure = cs @@ -501,13 +741,15 @@ class Compound(EnviPathModel, AliasMixin, ScenarioMixin): unique_together = [('uuid', 'package')] -class CompoundStructure(EnviPathModel, AliasMixin, ScenarioMixin): +class CompoundStructure(EnviPathModel, AliasMixin, ScenarioMixin, ChemicalIdentifierMixin): compound = models.ForeignKey('epdb.Compound', on_delete=models.CASCADE, db_index=True) smiles = models.TextField(blank=False, null=False, verbose_name='SMILES') canonical_smiles = models.TextField(blank=False, null=False, verbose_name='Canonical SMILES') inchikey = models.TextField(max_length=27, blank=False, null=False, verbose_name="InChIKey") normalized_structure = models.BooleanField(null=False, blank=False, default=False) + external_identifiers = GenericRelation('ExternalIdentifier') + def save(self, *args, **kwargs): # Compute these fields only on initial save call if self.pk is None: @@ -589,6 +831,7 @@ class Rule(PolymorphicModel, EnviPathModel, AliasMixin, ScenarioMixin): cls = Rule.cls_for_type(rule_type) return cls.create(*args, **kwargs) + # # @property # def related_pathways(self): @@ -772,7 +1015,7 @@ class SequentialRuleOrdering(models.Model): order_index = models.IntegerField(null=False, blank=False) -class Reaction(EnviPathModel, AliasMixin, ScenarioMixin): +class Reaction(EnviPathModel, AliasMixin, ScenarioMixin, ReactionIdentifierMixin): package = models.ForeignKey('epdb.Package', verbose_name='Package', on_delete=models.CASCADE, db_index=True) educts = models.ManyToManyField('epdb.CompoundStructure', verbose_name='Educts', related_name='reaction_educts') products = models.ManyToManyField('epdb.CompoundStructure', verbose_name='Products', @@ -784,6 +1027,8 @@ class Reaction(EnviPathModel, AliasMixin, ScenarioMixin): verbose_name='Medline References' ) + external_identifiers = GenericRelation('ExternalIdentifier') + @property def url(self): return '{}/reaction/{}'.format(self.package.url, self.uuid) @@ -793,7 +1038,7 @@ class Reaction(EnviPathModel, AliasMixin, ScenarioMixin): def create(package: Package, name: str = None, description: str = None, educts: Union[List[str], List[CompoundStructure]] = None, products: Union[List[str], List[CompoundStructure]] = None, - rules: Union[Rule|List[Rule]] = None, multi_step: bool = True): + rules: Union[Rule | List[Rule]] = None, multi_step: bool = True): _educts = [] _products = [] @@ -824,7 +1069,6 @@ class Reaction(EnviPathModel, AliasMixin, ScenarioMixin): if isinstance(rules, Rule): rules = [rules] - query = Reaction.objects.annotate( educt_count=Count('educts', filter=Q(educts__in=_educts), distinct=True), product_count=Count('products', filter=Q(products__in=_products), distinct=True), @@ -891,7 +1135,6 @@ class Reaction(EnviPathModel, AliasMixin, ScenarioMixin): id__in=Edge.objects.filter(edge_label=self).values('pathway_id')).order_by('name') - class Pathway(EnviPathModel, AliasMixin, ScenarioMixin): package = models.ForeignKey('epdb.Package', verbose_name='Package', on_delete=models.CASCADE, db_index=True) setting = models.ForeignKey('epdb.Setting', verbose_name='Setting', on_delete=models.CASCADE, null=True, blank=True) @@ -1117,6 +1360,7 @@ class Pathway(EnviPathModel, AliasMixin, ScenarioMixin): name: Optional[str] = None, description: Optional[str] = None): return Edge.create(self, start_nodes, end_nodes, rule, name=name, description=description) + class Node(EnviPathModel, AliasMixin, ScenarioMixin): pathway = models.ForeignKey('epdb.Pathway', verbose_name='belongs to', on_delete=models.CASCADE, db_index=True) default_node_label = models.ForeignKey('epdb.CompoundStructure', verbose_name='Default Node Label', @@ -1149,7 +1393,8 @@ class Node(EnviPathModel, AliasMixin, ScenarioMixin): } @staticmethod - def create(pathway: 'Pathway', smiles: str, depth: int, name: Optional[str] = None, description: Optional[str] = None): + def create(pathway: 'Pathway', smiles: str, depth: int, name: Optional[str] = None, + description: Optional[str] = None): c = Compound.create(pathway.package, smiles, name=name, description=description) if Node.objects.filter(pathway=pathway, default_node_label=c.default_structure).exists(): @@ -1187,7 +1432,6 @@ class Node(EnviPathModel, AliasMixin, ScenarioMixin): return data - def simple_json(self, include_description=False): res = super().simple_json() name = res.get('name', None) @@ -1213,7 +1457,7 @@ class Edge(EnviPathModel, AliasMixin, ScenarioMixin): 'id': self.url, 'url': self.url, 'image': self.url + '?image=svg', - 'reaction': {'name': self.edge_label.name, 'url': self.edge_label.url } if self.edge_label else None, + 'reaction': {'name': self.edge_label.name, 'url': self.edge_label.url} if self.edge_label else None, 'reaction_probability': self.kv.get('probability'), 'start_node_urls': [x.url for x in self.start_nodes.all()], 'end_node_urls': [x.url for x in self.end_nodes.all()], @@ -1229,15 +1473,18 @@ class Edge(EnviPathModel, AliasMixin, ScenarioMixin): for e in t['edges']: if e['uuid'] == str(self.uuid): passes_app_domain = ( - t['local_compatibility'] >= app_domain_data['ad_params']['local_compatibility_threshold'] - ) and ( - t['reliability'] >= app_domain_data['ad_params']['reliability_threshold'] - ) + t['local_compatibility'] >= app_domain_data['ad_params'][ + 'local_compatibility_threshold'] + ) and ( + t['reliability'] >= app_domain_data['ad_params'][ + 'reliability_threshold'] + ) edge_json['app_domain'] = { 'passes_app_domain': passes_app_domain, 'local_compatibility': t['local_compatibility'], - 'local_compatibility_threshold': app_domain_data['ad_params']['local_compatibility_threshold'], + 'local_compatibility_threshold': app_domain_data['ad_params'][ + 'local_compatibility_threshold'], 'reliability': t['reliability'], 'reliability_threshold': app_domain_data['ad_params']['reliability_threshold'], 'times_triggered': t['times_triggered'], @@ -1247,9 +1494,9 @@ class Edge(EnviPathModel, AliasMixin, ScenarioMixin): return edge_json - @staticmethod - def create(pathway, start_nodes: List[Node], end_nodes: List[Node], rule: Optional[Rule] = None, name: Optional[str] = None, + def create(pathway, start_nodes: List[Node], end_nodes: List[Node], rule: Optional[Rule] = None, + name: Optional[str] = None, description: Optional[str] = None): e = Edge() e.pathway = pathway @@ -1429,7 +1676,6 @@ class MLRelativeReasoning(EPModel): pathway_qs = pathway_qs.distinct() return pathway_qs - def _get_reactions(self) -> QuerySet: return Reaction.objects.filter(package__in=self.data_packages.all()).distinct() @@ -1479,7 +1725,6 @@ class MLRelativeReasoning(EPModel): self.app_domain.build() logger.debug("Done building applicability domain.") - self.model_status = self.BUILT_NOT_EVALUATED self.save() @@ -1590,7 +1835,6 @@ class MLRelativeReasoning(EPModel): logger.info(f"Full predict took {(end - start).total_seconds()}s") return res - @staticmethod def combine_products_and_probs(rules: List['Rule'], probabilities, products): res = [] @@ -1746,7 +1990,7 @@ class ApplicabilityDomain(EnviPathModel): ) dists_with_index = list() - for ti, dist in zip(train_instances, dists): + for ti, dist in zip(train_instances, dists): dists_with_index.append((ti[0], dist[1])) # sort them in a descending way and take at most `self.num_neighbours` @@ -1754,12 +1998,14 @@ class ApplicabilityDomain(EnviPathModel): dists_with_index = dists_with_index[:self.num_neighbours] # compute average distance - rule_reliabilities[rule_idx] = sum([d[1] for d in dists_with_index]) / len(dists_with_index) if len(dists_with_index) > 0 else 0.0 + rule_reliabilities[rule_idx] = sum([d[1] for d in dists_with_index]) / len(dists_with_index) if len( + dists_with_index) > 0 else 0.0 # for local_compatibility we'll need the datasets for the indices having the highest similarity neighbour_datasets = [(d[0], ds.at(d[0])) for d in dists_with_index] local_compatibilities[rule_idx] = self._compute_compatibility(rule_idx, probs, neighbour_datasets) - neighbours_per_rule[rule_idx] = [CompoundStructure.objects.get(uuid=ds[1].structure_id()) for ds in neighbour_datasets] + neighbours_per_rule[rule_idx] = [CompoundStructure.objects.get(uuid=ds[1].structure_id()) for ds in + neighbour_datasets] neighbor_probs_per_rule[rule_idx] = [probs[d[0]][rule_idx] for d in dists_with_index] ad_res = { @@ -1789,7 +2035,7 @@ class ApplicabilityDomain(EnviPathModel): neighbor['image'] = f"{n.url}?image=svg" neighbor['smiles'] = n.smiles neighbor['related_pathways'] = [ - pw.simple_json() for pw in Pathway.objects.filter( + pw.simple_json() for pw in Pathway.objects.filter( node__default_node_label=n, package__in=self.model.data_packages.all() ).distinct() @@ -1949,6 +2195,7 @@ class Scenario(EnviPathModel): for v in vals: yield NAME_MAPPING[k](**json.loads(v)) + class UserSettingPermission(Permission): uuid = models.UUIDField(null=False, blank=False, verbose_name='UUID of this object', primary_key=True, default=uuid4) diff --git a/templates/objects/compound.html b/templates/objects/compound.html index 09036387..a4d16c06 100644 --- a/templates/objects/compound.html +++ b/templates/objects/compound.html @@ -134,7 +134,52 @@ {% endif %} + + {% if compound.get_external_identifiers %} +