forked from enviPath/enviPy
1606 lines
55 KiB
Python
1606 lines
55 KiB
Python
import abc
|
|
import json
|
|
import logging
|
|
import os
|
|
from collections import defaultdict
|
|
from datetime import datetime, timedelta, date
|
|
from typing import Union, List, Optional
|
|
from uuid import uuid4
|
|
|
|
import joblib
|
|
import numpy as np
|
|
from django.conf import settings as s
|
|
from django.contrib.auth.hashers import make_password, check_password
|
|
from django.contrib.auth.models import AbstractUser
|
|
from django.contrib.postgres.fields import ArrayField
|
|
from django.db import models, transaction
|
|
from django.db.models import JSONField
|
|
from django.utils import timezone
|
|
from django.utils.functional import cached_property
|
|
from model_utils.models import TimeStampedModel
|
|
from polymorphic.models import PolymorphicModel
|
|
from sklearn.metrics import precision_score, recall_score, jaccard_score
|
|
from sklearn.model_selection import ShuffleSplit
|
|
|
|
from utilities.chem import FormatConverter, ProductSet, PredictionResult, IndigoUtils
|
|
from utilities.ml import SparseLabelECC
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
##########################
|
|
# User/Groups/Permission #
|
|
##########################
|
|
|
|
class User(AbstractUser):
|
|
email = models.EmailField(unique=True)
|
|
|
|
uuid = models.UUIDField(null=False, blank=False, verbose_name='UUID of this object', unique=True,
|
|
default=uuid4)
|
|
default_package = models.ForeignKey('epdb.Package', verbose_name='Default Package', null=True,
|
|
on_delete=models.SET_NULL)
|
|
default_group = models.ForeignKey('Group', verbose_name='Default Group', null=True, blank=False,
|
|
on_delete=models.SET_NULL, related_name='default_group')
|
|
default_setting = models.ForeignKey('epdb.Setting', on_delete=models.SET_NULL,
|
|
verbose_name='The users default settings', null=True, blank=False)
|
|
# TODO remove
|
|
groups = models.ManyToManyField("Group", verbose_name='groups')
|
|
|
|
USERNAME_FIELD = "email"
|
|
REQUIRED_FIELDS = ['username']
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/user/{}'.format(s.SERVER_URL, self.uuid)
|
|
|
|
def prediction_settings(self):
|
|
if self.default_setting is None:
|
|
self.default_setting = Setting.objects.get(global_default=True)
|
|
self.save()
|
|
return self.default_setting
|
|
|
|
|
|
class APIToken(models.Model):
|
|
hashed_key = models.CharField(max_length=128, unique=True)
|
|
user = models.ForeignKey(User, on_delete=models.CASCADE)
|
|
created = models.DateTimeField(auto_now_add=True)
|
|
expires_at = models.DateTimeField(null=True, blank=True, default=timezone.now() + timedelta(days=90))
|
|
name = models.CharField(max_length=100, blank=True, help_text="Optional name for the token")
|
|
|
|
def is_valid(self):
|
|
return not self.expires_at or self.expires_at > timezone.now()
|
|
|
|
@staticmethod
|
|
def create_token(user, name="", valid_for=90):
|
|
import secrets
|
|
raw_token = secrets.token_urlsafe(32)
|
|
hashed = make_password(raw_token)
|
|
token = APIToken.objects.create(user=user, hashed_key=hashed, name=name,
|
|
expires_at=timezone.now() + timedelta(days=valid_for))
|
|
return token, raw_token
|
|
|
|
def check_token(self, raw_token):
|
|
return check_password(raw_token, self.hashed_key)
|
|
|
|
|
|
class Group(TimeStampedModel):
|
|
uuid = models.UUIDField(null=False, blank=False, verbose_name='UUID of this object', unique=True, default=uuid4)
|
|
name = models.TextField(blank=False, null=False, verbose_name='Group name')
|
|
owner = models.ForeignKey("User", verbose_name='Group Owner', on_delete=models.CASCADE)
|
|
description = models.TextField(blank=False, null=False, verbose_name='Descriptions', default='no description')
|
|
user_member = models.ManyToManyField("User", verbose_name='User members', related_name='users_in_group')
|
|
group_member = models.ManyToManyField("Group", verbose_name='Group member', related_name='groups_in_group')
|
|
|
|
def __str__(self):
|
|
return f"{self.name} (pk={self.pk})"
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/group/{}'.format(s.SERVER_URL, self.uuid)
|
|
|
|
|
|
class Permission(TimeStampedModel):
|
|
READ = ('read', 'Read')
|
|
WRITE = ('write', 'Write')
|
|
ALL = ('all', 'All')
|
|
PERMS = [
|
|
READ,
|
|
WRITE,
|
|
ALL
|
|
]
|
|
permission = models.CharField(max_length=32, choices=PERMS, null=False)
|
|
|
|
def has_read(self):
|
|
return self.permission in [p[0] for p in self.PERMS]
|
|
|
|
def has_write(self):
|
|
return self.permission in [self.WRITE[0], self.ALL[0]]
|
|
|
|
def has_all(self):
|
|
return self.permission == self.ALL[0]
|
|
|
|
class Meta:
|
|
abstract: True
|
|
|
|
|
|
class UserPackagePermission(Permission):
|
|
uuid = models.UUIDField(null=False, blank=False, verbose_name='UUID of this object', primary_key=True,
|
|
default=uuid4)
|
|
user = models.ForeignKey('User', verbose_name='Permission to', on_delete=models.CASCADE)
|
|
package = models.ForeignKey('epdb.Package', verbose_name='Permission on', on_delete=models.CASCADE)
|
|
|
|
class Meta:
|
|
unique_together = [('package', 'user')]
|
|
|
|
def __str__(self):
|
|
return f"User: {self.user} has Permission: {self.permission} on Package: {self.package}"
|
|
|
|
|
|
class GroupPackagePermission(Permission):
|
|
uuid = models.UUIDField(null=False, blank=False, verbose_name='UUID of this object', primary_key=True,
|
|
default=uuid4)
|
|
group = models.ForeignKey('Group', verbose_name='Permission to', on_delete=models.CASCADE)
|
|
package = models.ForeignKey('epdb.Package', verbose_name='Permission on', on_delete=models.CASCADE)
|
|
|
|
class Meta:
|
|
unique_together = [('package', 'group')]
|
|
|
|
def __str__(self):
|
|
return f"Group: {self.group} has Permission: {self.permission} on Package: {self.package}"
|
|
|
|
|
|
##############
|
|
# EP Objects #
|
|
##############
|
|
class EnviPathModel(TimeStampedModel):
|
|
uuid = models.UUIDField(null=False, blank=False, verbose_name='UUID of this object', unique=True,
|
|
default=uuid4)
|
|
name = models.TextField(blank=False, null=False, verbose_name='Name', default='no name')
|
|
description = models.TextField(blank=False, null=False, verbose_name='Descriptions', default='no description')
|
|
|
|
kv = JSONField(null=True, blank=True, default=dict)
|
|
|
|
@property
|
|
@abc.abstractmethod
|
|
def url(self):
|
|
pass
|
|
|
|
def get_v(self, k, default=None):
|
|
if self.kv:
|
|
return self.kv.get(k, default)
|
|
return default
|
|
|
|
class Meta:
|
|
abstract = True
|
|
|
|
|
|
class AliasMixin(models.Model):
|
|
aliases = ArrayField(
|
|
models.TextField(blank=False, null=False),
|
|
verbose_name='Aliases', default=list
|
|
)
|
|
|
|
@transaction.atomic
|
|
def add_alias(self, new_alias, set_as_default=False):
|
|
if set_as_default:
|
|
self.aliases.add(self.name)
|
|
self.name = new_alias
|
|
|
|
if new_alias in self.aliases:
|
|
self.aliases.remove(new_alias)
|
|
else:
|
|
if new_alias not in self.aliases:
|
|
self.aliases.add(new_alias)
|
|
|
|
self.save()
|
|
|
|
class Meta:
|
|
abstract = True
|
|
|
|
|
|
class ScenarioMixin(models.Model):
|
|
scenarios = models.ManyToManyField("epdb.Scenario", verbose_name='Attached Scenarios')
|
|
|
|
class Meta:
|
|
abstract = True
|
|
|
|
class License(models.Model):
|
|
link = models.URLField(blank=False, null=False, verbose_name='link')
|
|
image_link = models.URLField(blank=False, null=False, verbose_name='Image link')
|
|
|
|
|
|
class Package(EnviPathModel):
|
|
reviewed = models.BooleanField(verbose_name='Reviewstatus', default=False)
|
|
license = models.ForeignKey('epdb.License', on_delete=models.SET_NULL, blank=True, null=True, verbose_name='License')
|
|
|
|
def __str__(self):
|
|
return f"{self.name} (pk={self.pk})"
|
|
|
|
@property
|
|
def compounds(self):
|
|
return Compound.objects.filter(package=self)
|
|
|
|
@property
|
|
def rules(self):
|
|
return Rule.objects.filter(package=self)
|
|
|
|
@property
|
|
def reactions(self):
|
|
return Reaction.objects.filter(package=self)
|
|
|
|
@property
|
|
def pathways(self) -> 'Pathway':
|
|
return Pathway.objects.filter(package=self)
|
|
|
|
@property
|
|
def scenarios(self):
|
|
return Scenario.objects.filter(package=self)
|
|
|
|
@property
|
|
def models(self):
|
|
return EPModel.objects.filter(package=self)
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/package/{}'.format(s.SERVER_URL, self.uuid)
|
|
|
|
def get_applicable_rules(self):
|
|
"""
|
|
Returns a ordered set of rules where the following applies:
|
|
1. All Composite will be added to result
|
|
2. All SimpleRules will be added if theres no CompositeRule present using the SimpleRule
|
|
Ordering is based on "url" field.
|
|
"""
|
|
rules = []
|
|
rule_qs = self.rules
|
|
|
|
reflected_simple_rules = set()
|
|
|
|
for r in rule_qs:
|
|
if isinstance(r, ParallelRule) or isinstance(r, SequentialRule):
|
|
rules.append(r)
|
|
for sr in r.simple_rules.all():
|
|
reflected_simple_rules.add(sr)
|
|
|
|
for r in rule_qs:
|
|
if isinstance(r, SimpleAmbitRule) or isinstance(r, SimpleRDKitRule):
|
|
if r not in reflected_simple_rules:
|
|
rules.append(r)
|
|
|
|
rules = sorted(rules, key=lambda x: x.url)
|
|
return rules
|
|
|
|
|
|
class Compound(EnviPathModel, AliasMixin, ScenarioMixin):
|
|
package = models.ForeignKey('epdb.Package', verbose_name='Package', on_delete=models.CASCADE, db_index=True)
|
|
default_structure = models.ForeignKey('CompoundStructure', verbose_name='Default Structure',
|
|
related_name='compound_default_structure',
|
|
on_delete=models.CASCADE, null=True)
|
|
|
|
@property
|
|
def structures(self):
|
|
return CompoundStructure.objects.filter(compound=self)
|
|
|
|
@property
|
|
def normalized_structure(self):
|
|
return CompoundStructure.objects.get(compound=self, normalized_structure=True)
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/compound/{}'.format(self.package.url, self.uuid)
|
|
|
|
@transaction.atomic
|
|
def set_default_structure(self, cs: 'CompoundStructure'):
|
|
if cs.compound != self:
|
|
raise ValueError("Attempt to set a CompoundStructure stored in a different compound as default")
|
|
|
|
self.default_structure = cs
|
|
self.save()
|
|
|
|
@property
|
|
def related_pathways(self):
|
|
pathways = Node.objects.filter(node_labels__in=[self.default_structure]).values_list('pathway', flat=True)
|
|
return Pathway.objects.filter(package=self.package, id__in=set(pathways)).order_by('name')
|
|
|
|
@property
|
|
def related_reactions(self):
|
|
return (
|
|
Reaction.objects.filter(package=self.package, educts__in=[self.default_structure])
|
|
|
|
|
Reaction.objects.filter(package=self.package, products__in=[self.default_structure])
|
|
).order_by('name')
|
|
|
|
@staticmethod
|
|
@transaction.atomic
|
|
def create(package: Package, smiles: str, name: str = None, description: str = None, *args, **kwargs) -> 'Compound':
|
|
|
|
if smiles is None or smiles == '':
|
|
raise ValueError('SMILES is required')
|
|
|
|
smiles = smiles.strip()
|
|
|
|
parsed = FormatConverter.from_smiles(smiles)
|
|
if parsed is None:
|
|
raise ValueError('Given SMILES is invalid')
|
|
|
|
standardized_smiles = FormatConverter.standardize(smiles)
|
|
|
|
# Check if we find a direct match for a given SMILES
|
|
if CompoundStructure.objects.filter(smiles=smiles, compound__package=package).exists():
|
|
return CompoundStructure.objects.get(smiles=smiles, compound__package=package).compound
|
|
|
|
# Check if we can find the standardized one
|
|
if CompoundStructure.objects.filter(smiles=standardized_smiles, compound__package=package).exists():
|
|
# TODO should we add a structure?
|
|
return CompoundStructure.objects.get(smiles=standardized_smiles, compound__package=package).compound
|
|
|
|
# Generate Compound
|
|
c = Compound()
|
|
c.package = package
|
|
|
|
# For name and description we have defaults so only set them if they carry a real value
|
|
if name is not None and name != '':
|
|
c.name = name
|
|
|
|
if description is not None and description != '':
|
|
c.description = description
|
|
|
|
c.save()
|
|
|
|
is_standardized = standardized_smiles == smiles
|
|
|
|
if not is_standardized:
|
|
_ = CompoundStructure.create(c, standardized_smiles, name='Normalized structure of {}'.format(name),
|
|
description='{} (in its normalized form)'.format(description),
|
|
normalized_structure=True)
|
|
|
|
cs = CompoundStructure.create(c, smiles, name=name, description=description, normalized_structure=is_standardized)
|
|
|
|
c.default_structure = cs
|
|
c.save()
|
|
|
|
return c
|
|
|
|
@transaction.atomic
|
|
def add_structure(self, smiles: str, name: str = None, description: str = None, default_structure: bool = False,
|
|
*args, **kwargs) -> 'CompoundStructure':
|
|
|
|
if smiles is None or smiles == '':
|
|
raise ValueError('SMILES is required')
|
|
|
|
smiles = smiles.strip()
|
|
|
|
parsed = FormatConverter.from_smiles(smiles)
|
|
if parsed is None:
|
|
raise ValueError('Given SMILES is invalid')
|
|
|
|
standardized_smiles = FormatConverter.standardize(smiles)
|
|
|
|
is_standardized = standardized_smiles == smiles
|
|
|
|
if self.normalized_structure.smiles != standardized_smiles:
|
|
raise ValueError('The standardized SMILES does not match the compounds standardized one!')
|
|
|
|
if is_standardized:
|
|
CompoundStructure.objects.get(smiles__in=smiles, compound__package=self.package)
|
|
|
|
# Check if we find a direct match for a given SMILES and/or its standardized SMILES
|
|
if CompoundStructure.objects.filter(smiles__in=smiles, compound__package=self.package).exists():
|
|
return CompoundStructure.objects.get(smiles__in=smiles, compound__package=self.package)
|
|
|
|
cs = CompoundStructure.create(self, smiles, name=name, description=description, normalized_structure=is_standardized)
|
|
|
|
if default_structure:
|
|
self.default_structure = cs
|
|
self.save()
|
|
|
|
return cs
|
|
|
|
class Meta:
|
|
unique_together = [('uuid', 'package')]
|
|
|
|
|
|
class CompoundStructure(EnviPathModel, AliasMixin, ScenarioMixin):
|
|
compound = models.ForeignKey('epdb.Compound', on_delete=models.CASCADE, db_index=True)
|
|
smiles = models.TextField(blank=False, null=False, verbose_name='SMILES')
|
|
normalized_structure = models.BooleanField(null=False, blank=False, default=False)
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/structure/{}'.format(self.compound.url, self.uuid)
|
|
|
|
# @property
|
|
# def related_pathways(self):
|
|
# pathways = Node.objects.filter(node_labels__in=[self]).values_list('pathway', flat=True)
|
|
# return Pathway.objects.filter(package=self.compound.package, id__in=set(pathways)).order_by('name')
|
|
|
|
# @property
|
|
# def related_reactions(self):
|
|
# return (
|
|
# Reaction.objects.filter(package=self.compound.package, educts__in=[self])
|
|
# |
|
|
# Reaction.objects.filter(package=self.compound.package, products__in=[self])
|
|
# ).order_by('name')
|
|
|
|
@staticmethod
|
|
@transaction.atomic
|
|
def create(compound: Compound, smiles: str, name: str = None, description: str = None, *args, **kwargs):
|
|
if CompoundStructure.objects.filter(compound=compound, smiles=smiles).exists():
|
|
return CompoundStructure.objects.get(compound=compound, smiles=smiles)
|
|
|
|
if compound.pk is None:
|
|
raise ValueError("Unpersisted Compound! Persist compound first!")
|
|
|
|
cs = CompoundStructure()
|
|
if name is not None:
|
|
cs.name = name
|
|
|
|
if description is not None:
|
|
cs.description = description
|
|
|
|
cs.smiles = smiles
|
|
cs.compound = compound
|
|
|
|
if 'normalized_structure' in kwargs:
|
|
cs.normalized_structure = kwargs['normalized_structure']
|
|
|
|
cs.save()
|
|
|
|
return cs
|
|
|
|
# TODO add find method
|
|
|
|
@property
|
|
def InChIKey(self):
|
|
return FormatConverter.InChIKey(self.smiles)
|
|
|
|
@property
|
|
def canonical_smiles(self):
|
|
return FormatConverter.canonicalize(self.smiles)
|
|
|
|
@property
|
|
def as_svg(self):
|
|
return IndigoUtils.mol_to_svg(self.smiles)
|
|
|
|
|
|
class Rule(PolymorphicModel, EnviPathModel, AliasMixin, ScenarioMixin):
|
|
package = models.ForeignKey('epdb.Package', verbose_name='Package', on_delete=models.CASCADE, db_index=True)
|
|
|
|
# I think this only affects Django Admin which we are barely using
|
|
# # https://github.com/django-polymorphic/django-polymorphic/issues/229
|
|
_non_polymorphic = models.Manager()
|
|
|
|
class Meta:
|
|
base_manager_name = '_non_polymorphic'
|
|
|
|
@abc.abstractmethod
|
|
def apply(self, *args, **kwargs):
|
|
pass
|
|
|
|
@staticmethod
|
|
def cls_for_type(rule_type: str):
|
|
if rule_type == 'SimpleAmbitRule':
|
|
return SimpleAmbitRule
|
|
elif rule_type == 'SimpleRDKitRule':
|
|
return SimpleRDKitRule
|
|
elif rule_type == 'ParallelRule':
|
|
return ParallelRule
|
|
elif rule_type == 'SequentialRule':
|
|
return SequentialRule
|
|
else:
|
|
raise ValueError(f'{rule_type} is unknown!')
|
|
|
|
@staticmethod
|
|
@transaction.atomic
|
|
def create(package: Package, rule_type: str, name: str = None, description: str = None, *args, **kwargs):
|
|
r = Rule.cls_for_type(rule_type)()
|
|
r.package = package
|
|
r.name = name
|
|
r.description = description
|
|
|
|
# As we are setting params this way the "k" has to match the property name
|
|
for k, v in kwargs.items():
|
|
setattr(r, k, v)
|
|
|
|
r.save()
|
|
return r
|
|
|
|
|
|
#
|
|
# @property
|
|
# def related_pathways(self):
|
|
# reaction_ids = self.related_reactions.values_list('id', flat=True)
|
|
# pathways = Edge.objects.filter(edge_label__in=reaction_ids).values_list('pathway', flat=True)
|
|
# return Pathway.objects.filter(package=self.package, id__in=set(pathways)).order_by('name')
|
|
#
|
|
# @property
|
|
# def related_reactions(self):
|
|
# return (
|
|
# Reaction.objects.filter(package=self.package, rules__in=[self])
|
|
# |
|
|
# Reaction.objects.filter(package=self.package, rules__in=[self])
|
|
# ).order_by('name')
|
|
#
|
|
#
|
|
class SimpleRule(Rule):
|
|
pass
|
|
|
|
|
|
#
|
|
#
|
|
class SimpleAmbitRule(SimpleRule):
|
|
smirks = models.TextField(blank=False, null=False, verbose_name='SMIRKS')
|
|
reactant_filter_smarts = models.TextField(null=True, verbose_name='Reactant Filter SMARTS')
|
|
product_filter_smarts = models.TextField(null=True, verbose_name='Product Filter SMARTS')
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/simple-ambit-rule/{}'.format(self.package.url, self.uuid)
|
|
|
|
def apply(self, smiles):
|
|
return FormatConverter.apply(smiles, self.smirks)
|
|
|
|
@property
|
|
def reactants_smarts(self):
|
|
return self.smirks.split('>>')[0]
|
|
|
|
@property
|
|
def products_smarts(self):
|
|
return self.smirks.split('>>')[1]
|
|
|
|
@property
|
|
def related_reactions(self):
|
|
qs = Package.objects.filter(reviewed=True)
|
|
return self.reaction_rule.filter(package__in=qs).order_by('name')
|
|
|
|
@property
|
|
def related_pathways(self):
|
|
return Pathway.objects.filter(
|
|
id__in=Edge.objects.filter(edge_label__in=self.related_reactions).values('pathway_id')).order_by('name')
|
|
|
|
@property
|
|
def as_svg(self):
|
|
return IndigoUtils.smirks_to_svg(self.smirks, True)
|
|
|
|
|
|
class SimpleRDKitRule(SimpleRule):
|
|
reaction_smarts = models.TextField(blank=False, null=False, verbose_name='SMIRKS')
|
|
|
|
def apply(self, smiles):
|
|
return FormatConverter.apply(smiles, self.reaction_smarts)
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/simple-rdkit-rule/{}'.format(self.package.url, self.uuid)
|
|
|
|
|
|
#
|
|
#
|
|
class ParallelRule(Rule):
|
|
simple_rules = models.ManyToManyField('epdb.SimpleRule', verbose_name='Simple rules')
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/parallel-rule/{}'.format(self.package.url, self.uuid)
|
|
|
|
@property
|
|
def srs(self):
|
|
return self.simple_rules.all()
|
|
|
|
def apply(self, structure):
|
|
res = list()
|
|
for simple_rule in self.srs:
|
|
res.extend(simple_rule.apply(structure))
|
|
|
|
return list(set(res))
|
|
|
|
|
|
class SequentialRule(Rule):
|
|
simple_rules = models.ManyToManyField('epdb.SimpleRule', verbose_name='Simple rules',
|
|
through='SequentialRuleOrdering')
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/sequential-rule/{}'.format(self.compound.url, self.uuid)
|
|
|
|
@property
|
|
def srs(self):
|
|
return self.simple_rules.all()
|
|
|
|
def apply(self, structure):
|
|
# TODO determine levels or see java implementation
|
|
res = set()
|
|
for simple_rule in self.srs:
|
|
res.union(set(simple_rule.apply(structure)))
|
|
return res
|
|
|
|
|
|
class SequentialRuleOrdering(models.Model):
|
|
sequential_rule = models.ForeignKey(SequentialRule, on_delete=models.CASCADE)
|
|
simple_rule = models.ForeignKey(SimpleRule, on_delete=models.CASCADE)
|
|
order_index = models.IntegerField(null=False, blank=False)
|
|
|
|
|
|
class Reaction(EnviPathModel, AliasMixin, ScenarioMixin):
|
|
package = models.ForeignKey('epdb.Package', verbose_name='Package', on_delete=models.CASCADE, db_index=True)
|
|
educts = models.ManyToManyField('epdb.CompoundStructure', verbose_name='Educts', related_name='reaction_educts')
|
|
products = models.ManyToManyField('epdb.CompoundStructure', verbose_name='Products',
|
|
related_name='reaction_products')
|
|
rules = models.ManyToManyField('epdb.Rule', verbose_name='Rule', related_name='reaction_rule')
|
|
multi_step = models.BooleanField(verbose_name='Multistep Reaction')
|
|
medline_references = ArrayField(
|
|
models.TextField(blank=False, null=False), null=True,
|
|
verbose_name='Medline References'
|
|
)
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/reaction/{}'.format(self.package.url, self.uuid)
|
|
|
|
@staticmethod
|
|
@transaction.atomic
|
|
def create(package: Package, name: str = None, description: str = None,
|
|
educts: Union[List[str], List[CompoundStructure]] = None,
|
|
products: Union[List[str], List[CompoundStructure]] = None,
|
|
rule: Rule = None, multi_step: bool = True):
|
|
|
|
_educts = []
|
|
_products = []
|
|
|
|
# Determine if we receive smiles or compoundstructures
|
|
if all(isinstance(x, str) for x in educts + products):
|
|
for educt in educts:
|
|
c = Compound.create(package, educt)
|
|
_educts.append(c.default_structure)
|
|
|
|
for product in products:
|
|
c = Compound.create(package, product)
|
|
_products.append(c.default_structure)
|
|
|
|
elif all(isinstance(x, CompoundStructure) for x in educts + products):
|
|
_educts += educts
|
|
_products += products
|
|
|
|
else:
|
|
raise ValueError("")
|
|
|
|
r = Reaction()
|
|
r.package = package
|
|
r.name = name
|
|
r.description = description
|
|
r.multi_step = multi_step
|
|
|
|
r.save()
|
|
|
|
if rule:
|
|
r.rules.add(rule)
|
|
|
|
for educt in _educts:
|
|
r.educts.add(educt)
|
|
|
|
for product in _products:
|
|
r.products.add(product)
|
|
|
|
r.save()
|
|
return r
|
|
|
|
def smirks(self):
|
|
return f"{'.'.join([cs.smiles for cs in self.educts.all()])}>>{'.'.join([cs.smiles for cs in self.products.all()])}"
|
|
|
|
@property
|
|
def as_svg(self):
|
|
return IndigoUtils.smirks_to_svg(self.smirks(), False, width=800, height=400)
|
|
|
|
@property
|
|
def related_pathways(self):
|
|
return Pathway.objects.filter(
|
|
id__in=Edge.objects.filter(edge_label=self).values('pathway_id')).order_by('name')
|
|
|
|
|
|
|
|
class Pathway(EnviPathModel, AliasMixin, ScenarioMixin):
|
|
package = models.ForeignKey('epdb.Package', verbose_name='Package', on_delete=models.CASCADE, db_index=True)
|
|
|
|
@property
|
|
def root_nodes(self):
|
|
return Node.objects.filter(pathway=self, depth=0)
|
|
|
|
@property
|
|
def nodes(self):
|
|
return Node.objects.filter(pathway=self)
|
|
|
|
@property
|
|
def edges(self):
|
|
return Edge.objects.filter(pathway=self)
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/pathway/{}'.format(self.package.url, self.uuid)
|
|
|
|
def d3_json(self):
|
|
# Ideally it would be something like this but
|
|
# to reduce crossing in edges do a DFS
|
|
# nodes = [n.d3_json() for n in self.nodes]
|
|
|
|
nodes = []
|
|
processed = set()
|
|
|
|
queue = list()
|
|
for n in self.root_nodes:
|
|
queue.append(n)
|
|
|
|
while len(queue):
|
|
current = queue.pop()
|
|
processed.add(current)
|
|
|
|
nodes.append(current.d3_json())
|
|
|
|
for e in self.edges:
|
|
if current in e.start_nodes.all():
|
|
for prod in e.end_nodes.all():
|
|
if prod not in queue and prod not in processed:
|
|
queue.append(prod)
|
|
|
|
# We shouldn't lose or make up nodes...
|
|
assert len(nodes) == len(self.nodes)
|
|
print(f"Num Nodes {len(nodes)} vs. DB Nodes {len(self.nodes)}")
|
|
|
|
links = [e.d3_json() for e in self.edges]
|
|
|
|
# D3 links Nodes based on indices in nodes array
|
|
node_url_to_idx = dict()
|
|
for i, n in enumerate(nodes):
|
|
n['id'] = i
|
|
node_url_to_idx[n['url']] = i
|
|
|
|
adjusted_links = []
|
|
for link in links:
|
|
# Check if we'll need pseudo nodes
|
|
if len(link['end_node_urls']) > 1:
|
|
start_depth = nodes[node_url_to_idx[link['start_node_urls'][0]]]['depth']
|
|
pseudo_idx = len(nodes)
|
|
pseudo_node = {
|
|
"depth": start_depth + 0.5,
|
|
"pseudo": True,
|
|
"id": pseudo_idx,
|
|
}
|
|
nodes.append(pseudo_node)
|
|
|
|
# add links start -> pseudo
|
|
new_link = {
|
|
'name': link['name'],
|
|
'id': link['id'],
|
|
'reaction': link['reaction'],
|
|
'source': node_url_to_idx[link['start_node_urls'][0]],
|
|
'target': pseudo_idx
|
|
}
|
|
adjusted_links.append(new_link)
|
|
|
|
# add n links pseudo -> end
|
|
for target in link['end_node_urls']:
|
|
new_link = {
|
|
'name': link['name'],
|
|
'id': link['id'],
|
|
'reaction': link['reaction'],
|
|
'source': pseudo_idx,
|
|
'target': node_url_to_idx[target]
|
|
}
|
|
adjusted_links.append(new_link)
|
|
|
|
else:
|
|
link['source'] = node_url_to_idx[link['start_node_urls'][0]]
|
|
link['target'] = node_url_to_idx[link['end_node_urls'][0]]
|
|
adjusted_links.append(link)
|
|
|
|
res = {
|
|
"aliases": [],
|
|
"completed": "true",
|
|
"description": self.description,
|
|
"id": self.url,
|
|
"isIncremental": False,
|
|
"isPredicted": False,
|
|
"lastModified": 1447842835894,
|
|
"pathwayName": self.name,
|
|
"reviewStatus": "reviewed" if self.package.reviewed else 'unreviewed',
|
|
"scenarios": [],
|
|
"upToDate": True,
|
|
"links": adjusted_links,
|
|
"nodes": nodes,
|
|
"modified": self.modified.strftime('%Y-%m-%d %H:%M:%S')
|
|
}
|
|
|
|
return json.dumps(res)
|
|
|
|
@staticmethod
|
|
@transaction.atomic
|
|
def create(package, name, description, smiles):
|
|
pw = Pathway()
|
|
pw.package = package
|
|
pw.name = name
|
|
pw.description = description
|
|
pw.save()
|
|
|
|
# create root node
|
|
Node.create(pw, smiles, 0)
|
|
|
|
return pw
|
|
|
|
|
|
class Node(EnviPathModel, AliasMixin, ScenarioMixin):
|
|
pathway = models.ForeignKey('epdb.Pathway', verbose_name='belongs to', on_delete=models.CASCADE, db_index=True)
|
|
default_node_label = models.ForeignKey('epdb.CompoundStructure', verbose_name='Default Node Label',
|
|
on_delete=models.CASCADE, related_name='default_node_structure')
|
|
node_labels = models.ManyToManyField('epdb.CompoundStructure', verbose_name='All Node Labels',
|
|
related_name='node_structures')
|
|
out_edges = models.ManyToManyField('epdb.Edge', verbose_name='Outgoing Edges')
|
|
depth = models.IntegerField(verbose_name='Node depth', null=False, blank=False)
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/node/{}'.format(self.pathway.url, self.uuid)
|
|
|
|
def d3_json(self):
|
|
return {
|
|
"depth": self.depth,
|
|
"url": self.url,
|
|
"node_label_id": self.default_node_label.url,
|
|
"image": self.url + '?image=svg',
|
|
"imageSize": 490, # TODO
|
|
"name": self.default_node_label.name,
|
|
"smiles": self.default_node_label.smiles,
|
|
}
|
|
|
|
@staticmethod
|
|
def create(pathway, smiles, depth):
|
|
c = Compound.create(pathway.package, smiles)
|
|
|
|
if Node.objects.filter(pathway=pathway, default_node_label=c.default_structure).exists():
|
|
print("found node")
|
|
return Node.objects.get(pathway=pathway, default_node_label=c.default_structure)
|
|
|
|
n = Node()
|
|
n.pathway = pathway
|
|
n.depth = depth
|
|
|
|
n.default_node_label = c.default_structure
|
|
n.save()
|
|
|
|
n.node_labels.add(c.default_structure)
|
|
n.save()
|
|
|
|
return n
|
|
|
|
@property
|
|
def as_svg(self):
|
|
return IndigoUtils.mol_to_svg(self.default_node_label.smiles)
|
|
|
|
|
|
class Edge(PolymorphicModel, EnviPathModel, AliasMixin, ScenarioMixin):
|
|
pathway = models.ForeignKey('epdb.Pathway', verbose_name='belongs to', on_delete=models.CASCADE, db_index=True)
|
|
edge_label = models.ForeignKey('epdb.Reaction', verbose_name='Edge label', null=True, on_delete=models.SET_NULL)
|
|
start_nodes = models.ManyToManyField('epdb.Node', verbose_name='Start Nodes', related_name='edge_educts')
|
|
end_nodes = models.ManyToManyField('epdb.Node', verbose_name='End Nodes', related_name='edge_products')
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/edge/{}'.format(self.pathway.url, self.uuid)
|
|
|
|
def d3_json(self):
|
|
# {
|
|
# "ecNumbers": [
|
|
# {
|
|
# "ecName": "DDT 2,3-dioxygenase",
|
|
# "ecNumber": "1.14.12.-"
|
|
# }
|
|
# ],
|
|
# "id": "https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/pathway/3f58e4d4-1c63-4b30-bf31-7ae4b98899fe/edge/ff193e7b-f010-43d4-acb3-45f34d938824",
|
|
# "idreaction": "https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/reaction/e11419cd-6b46-470b-8a06-a08d62281734",
|
|
# "multistep": "false",
|
|
# "name": "Eawag BBD reaction r0450",
|
|
# "pseudo": False,
|
|
# "scenarios": [],
|
|
# "source": 0,
|
|
# "target": 4
|
|
# }
|
|
|
|
return {
|
|
'name': self.name,
|
|
'id': self.url,
|
|
'reaction': self.edge_label.url if self.edge_label else None,
|
|
# TODO
|
|
'start_node_urls': [x.url for x in self.start_nodes.all()],
|
|
'end_node_urls': [x.url for x in self.end_nodes.all()],
|
|
}
|
|
|
|
@staticmethod
|
|
def create(pathway, start_nodes, end_nodes, rule: Optional[Rule] = None, name: Optional[str] = None,
|
|
description: Optional[str] = None):
|
|
e = Edge()
|
|
e.pathway = pathway
|
|
e.save()
|
|
|
|
for node in start_nodes:
|
|
e.start_nodes.add(node)
|
|
|
|
for node in end_nodes:
|
|
e.end_nodes.add(node)
|
|
|
|
if name is None:
|
|
name = f'Reaction {pathway.package.reactions.count() + 1}'
|
|
|
|
if description is None:
|
|
description = s.DEFAULT_VALUES['description']
|
|
|
|
r = Reaction.create(pathway.package, name=name, description=description,
|
|
educts=[n.default_node_label for n in e.start_nodes.all()],
|
|
products=[n.default_node_label for n in e.end_nodes.all()],
|
|
rule=rule, multi_step=False
|
|
)
|
|
|
|
e.edge_label = r
|
|
e.save()
|
|
return e
|
|
|
|
|
|
class EPModel(PolymorphicModel, EnviPathModel):
|
|
package = models.ForeignKey('epdb.Package', verbose_name='Package', on_delete=models.CASCADE, db_index=True)
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/model/{}'.format(self.package.url, self.uuid)
|
|
|
|
|
|
class MLRelativeReasoning(EPModel):
|
|
rule_packages = models.ManyToManyField("Package", verbose_name="Rule Packages", related_name="rule_packages")
|
|
data_packages = models.ManyToManyField("Package", verbose_name="Data Packages", related_name="data_packages")
|
|
eval_packages = models.ManyToManyField("Package", verbose_name="Evaluation Packages", related_name="eval_packages")
|
|
threshold = models.FloatField(null=False, blank=False, default=0.5)
|
|
|
|
INITIAL = "INITIAL"
|
|
INITIALIZING = "INITIALIZING"
|
|
BUILDING = "BUILDING"
|
|
BUILT_NOT_EVALUATED = "BUILT_NOT_EVALUATED"
|
|
EVALUATING = "EVALUATING"
|
|
FINISHED = "FINISHED"
|
|
ERROR = "ERROR"
|
|
PROGRESS_STATUS_CHOICES = {
|
|
INITIAL: "Initial",
|
|
INITIALIZING: "Model is initializing.",
|
|
BUILDING: "Model is building.",
|
|
BUILT_NOT_EVALUATED: "Model is built and can be used for predictions, Model is not evaluated yet.",
|
|
EVALUATING: "Model is evaluating",
|
|
FINISHED: "Model has finished building and evaluation.",
|
|
ERROR: "Model has failed."
|
|
}
|
|
model_status = models.CharField(blank=False, null=False, choices=PROGRESS_STATUS_CHOICES, default=INITIAL)
|
|
|
|
eval_results = JSONField(null=True, blank=True, default=dict)
|
|
|
|
@staticmethod
|
|
@transaction.atomic
|
|
def create(package, name, description, rule_packages, data_packages, eval_packages, threshold):
|
|
mlrr = MLRelativeReasoning()
|
|
mlrr.package = package
|
|
mlrr.name = name
|
|
mlrr.description = description
|
|
mlrr.threshold = threshold
|
|
|
|
mlrr.save()
|
|
for p in rule_packages:
|
|
mlrr.rule_packages.add(p)
|
|
|
|
if data_packages:
|
|
for p in data_packages:
|
|
mlrr.data_packages.add(p)
|
|
else:
|
|
for p in rule_packages:
|
|
mlrr.data_packages.add(p)
|
|
|
|
if eval_packages:
|
|
for p in eval_packages:
|
|
mlrr.eval_packages.add(p)
|
|
|
|
mlrr.save()
|
|
return mlrr
|
|
|
|
@cached_property
|
|
def applicable_rules(self):
|
|
"""
|
|
Returns a ordered set of rules where the following applies:
|
|
1. All Composite will be added to result
|
|
2. All SimpleRules will be added if theres no CompositeRule present using the SimpleRule
|
|
Ordering is based on "url" field.
|
|
"""
|
|
rules = []
|
|
rule_qs = Rule.objects.none()
|
|
for package in self.rule_packages.all():
|
|
rule_qs |= package.rules
|
|
|
|
rule_qs = rule_qs.distinct()
|
|
|
|
reflected_simple_rules = set()
|
|
|
|
for r in rule_qs:
|
|
if isinstance(r, ParallelRule) or isinstance(r, SequentialRule):
|
|
rules.append(r)
|
|
for sr in r.simple_rules.all():
|
|
reflected_simple_rules.add(sr)
|
|
|
|
for r in rule_qs:
|
|
if isinstance(r, SimpleAmbitRule) or isinstance(r, SimpleRDKitRule):
|
|
if r not in reflected_simple_rules:
|
|
rules.append(r)
|
|
|
|
rules = sorted(rules, key=lambda x: x.url)
|
|
return rules
|
|
|
|
def _get_excludes(self):
|
|
# TODO
|
|
return []
|
|
|
|
def _get_pathways(self):
|
|
pathway_qs = Pathway.objects.none()
|
|
for p in self.data_packages.all():
|
|
pathway_qs |= p.pathways
|
|
|
|
pathway_qs = pathway_qs.distinct()
|
|
return pathway_qs
|
|
|
|
def build_dataset(self):
|
|
self.model_status = self.INITIALIZING
|
|
self.save()
|
|
from datetime import datetime
|
|
start = datetime.now()
|
|
applicable_rules = self.applicable_rules
|
|
print("got rules")
|
|
|
|
# if s.DEBUG:
|
|
# pathways = self._get_pathways().order_by('-name')[:20]
|
|
# else:
|
|
pathways = self._get_pathways()
|
|
|
|
print("got pathways")
|
|
excludes = self._get_excludes()
|
|
|
|
# Collect all compounds
|
|
compounds = set()
|
|
reactions = set()
|
|
for i, p in enumerate(pathways):
|
|
print(f"{i + 1}/{len(pathways)}...")
|
|
for n in p.nodes:
|
|
cs = n.default_node_label.compound.default_structure
|
|
# TODO too many lookups
|
|
if cs.smiles in excludes:
|
|
continue
|
|
|
|
compounds.add(cs)
|
|
|
|
for e in p.edges:
|
|
reactions.add(e.edge_label)
|
|
|
|
print(len(compounds))
|
|
print(len(reactions))
|
|
|
|
triggered = set()
|
|
observed = set()
|
|
|
|
# TODO naming
|
|
|
|
pw = defaultdict(lambda: defaultdict(set))
|
|
|
|
for i, c in enumerate(compounds):
|
|
print(f"{i + 1}/{len(compounds)}...")
|
|
for r in applicable_rules:
|
|
# TODO check normalization
|
|
product_sets = r.apply(c.smiles)
|
|
|
|
if len(product_sets) == 0:
|
|
continue
|
|
|
|
triggered.add(f"{r.uuid} + {c.uuid}")
|
|
|
|
for ps in product_sets:
|
|
for p in ps:
|
|
pw[c][r].add(p)
|
|
|
|
for r in reactions:
|
|
if r is None:
|
|
print(r)
|
|
continue
|
|
if len(r.educts.all()) != 1:
|
|
print(f"Skipping {r.url}")
|
|
continue
|
|
|
|
# Loop will run only once
|
|
for c in r.educts.all():
|
|
if c not in pw:
|
|
continue
|
|
|
|
for rule in pw[c].keys():
|
|
# standardize...
|
|
|
|
if 0 != len(pw[c][rule]) and len(pw[c][rule]) == len(r.products.all()):
|
|
print(f"potential match for {c.smiles} and {r.uuid} ({r.name})")
|
|
|
|
standardized_products = []
|
|
for cs in r.products.all():
|
|
smi = cs.smiles
|
|
|
|
try:
|
|
smi = FormatConverter.standardize(smi)
|
|
except Exception as e:
|
|
# :shrug:
|
|
pass
|
|
|
|
standardized_products.append(smi)
|
|
|
|
standardized_pred_products = []
|
|
for smi in pw[c][rule]:
|
|
|
|
try:
|
|
smi = FormatConverter.standardize(smi)
|
|
except Exception as e:
|
|
# :shrug:
|
|
pass
|
|
|
|
standardized_pred_products.append(smi)
|
|
|
|
if sorted(list(set(standardized_products))) == sorted(list(set(standardized_pred_products))):
|
|
observed.add(f"{rule.uuid} + {c.uuid}")
|
|
print(f"Adding observed, current count {len(observed)}")
|
|
|
|
header = None
|
|
X = []
|
|
y = []
|
|
for i, c in enumerate(compounds):
|
|
print(f'{i + 1}/{len(compounds)}...')
|
|
# Features
|
|
feat = FormatConverter.maccs(c.smiles)
|
|
trig = []
|
|
obs = []
|
|
for rule in applicable_rules:
|
|
key = f"{rule.uuid} + {c.uuid}"
|
|
|
|
# Check triggered
|
|
if key in triggered:
|
|
trig.append(1)
|
|
else:
|
|
trig.append(0)
|
|
|
|
# Check obs
|
|
if key in triggered:
|
|
obs.append(1)
|
|
else:
|
|
obs.append(0)
|
|
|
|
if header is None:
|
|
header = [f'feature_{i}' for i, _ in enumerate(feat)] \
|
|
+ [f'trig_{r.uuid}' for r in applicable_rules] \
|
|
+ [f'corr_{r.uuid}' for r in applicable_rules]
|
|
X.append(feat + trig)
|
|
y.append(obs)
|
|
|
|
end = datetime.now()
|
|
print(f"Duration {(end - start).total_seconds()}s")
|
|
|
|
data = {
|
|
'X': X,
|
|
'y': y,
|
|
'header': header
|
|
}
|
|
f = os.path.join(s.MODEL_DIR, f"{self.uuid}.json")
|
|
json.dump(data, open(f, 'w'))
|
|
return X, y
|
|
|
|
def load_dataset(self):
|
|
ds_path = os.path.join(s.MODEL_DIR, f"{self.uuid}.json")
|
|
return json.load(open(ds_path, 'r'))
|
|
|
|
def build_model(self, X, y):
|
|
self.model_status = self.BUILDING
|
|
self.save()
|
|
|
|
mod = SparseLabelECC(
|
|
**s.DEFAULT_MODELS_PARAMS
|
|
)
|
|
|
|
mod.fit(X, y)
|
|
f = os.path.join(s.MODEL_DIR, f"{self.uuid}.pkl")
|
|
joblib.dump(mod, f)
|
|
self.model_status = self.BUILT_NOT_EVALUATED
|
|
self.save()
|
|
|
|
def rebuild(self):
|
|
data = self.load_dataset()
|
|
self.build_model(data['X'], data['y'])
|
|
|
|
def evaluate_model(self):
|
|
"""
|
|
Performs Leave-One-Out cross-validation on a multi-label dataset.
|
|
|
|
Parameters:
|
|
X (list of lists): Feature matrix.
|
|
y (list of lists): Multi-label targets.
|
|
classifier (sklearn estimator, optional): Base classifier. Defaults to RandomForest.
|
|
|
|
Returns:
|
|
float: Average accuracy across all LOO splits.
|
|
"""
|
|
if self.model_status != self.BUILT_NOT_EVALUATED:
|
|
raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
|
|
|
|
self.model_status = self.EVALUATING
|
|
self.save()
|
|
|
|
f = os.path.join(s.MODEL_DIR, f"{self.uuid}.json")
|
|
data = json.load(open(f))
|
|
|
|
X = np.array(data['X'])
|
|
y = np.array(data['y'])
|
|
|
|
n_splits = 20
|
|
|
|
shuff = ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42)
|
|
|
|
def train_and_evaluate(X, y, train_index, test_index, threshold):
|
|
X_train, X_test = X[train_index], X[test_index]
|
|
y_train, y_test = y[train_index], y[test_index]
|
|
|
|
model = SparseLabelECC(
|
|
**s.DEFAULT_MODELS_PARAMS
|
|
)
|
|
model.fit(X_train, y_train)
|
|
|
|
y_pred = model.predict_proba(X_test)
|
|
y_thresholded = (y_pred >= threshold).astype(int)
|
|
|
|
acc = jaccard_score(y_test, y_thresholded, average='samples', zero_division=0)
|
|
|
|
prec, rec = dict(), dict()
|
|
|
|
for t in np.arange(0, 1.05, 0.05):
|
|
temp_thresholded = (y_pred >= t).astype(int)
|
|
prec[f"{t:.2f}"] = precision_score(y_test, temp_thresholded, average='samples', zero_division=0)
|
|
rec[f"{t:.2f}"] = recall_score(y_test, temp_thresholded, average='samples', zero_division=0)
|
|
|
|
return acc, prec, rec
|
|
|
|
from joblib import Parallel, delayed
|
|
ret_vals = Parallel(n_jobs=10)(
|
|
delayed(train_and_evaluate)(X, y, train_index, test_index, self.threshold)
|
|
for train_index, test_index in shuff.split(X)
|
|
)
|
|
|
|
def compute_averages(data):
|
|
num_items = len(data)
|
|
avg_first_item = sum(item[0] for item in data) / num_items
|
|
|
|
sum_dict2 = defaultdict(float)
|
|
sum_dict3 = defaultdict(float)
|
|
|
|
for _, dict2, dict3 in data:
|
|
for key in dict2:
|
|
sum_dict2[key] += dict2[key]
|
|
for key in dict3:
|
|
sum_dict3[key] += dict3[key]
|
|
|
|
avg_dict2 = {key: val / num_items for key, val in sum_dict2.items()}
|
|
avg_dict3 = {key: val / num_items for key, val in sum_dict3.items()}
|
|
|
|
return {
|
|
"average_accuracy": float(avg_first_item),
|
|
"average_precision_per_threshold": avg_dict2,
|
|
"average_recall_per_threshold": avg_dict3
|
|
}
|
|
|
|
self.eval_results = compute_averages(ret_vals)
|
|
self.model_status = self.FINISHED
|
|
self.save()
|
|
|
|
@cached_property
|
|
def model(self):
|
|
mod = joblib.load(os.path.join(s.MODEL_DIR, f'{self.uuid}.pkl'))
|
|
mod.base_clf.n_jobs = -1
|
|
return mod
|
|
|
|
def predict(self, smiles) -> List['PredictionResult']:
|
|
start = datetime.now()
|
|
features = FormatConverter.maccs(smiles)
|
|
|
|
trig = []
|
|
prods = []
|
|
for rule in self.applicable_rules:
|
|
products = rule.apply(smiles)
|
|
|
|
if len(products):
|
|
trig.append(1)
|
|
prods.append(products)
|
|
else:
|
|
trig.append(0)
|
|
prods.append([])
|
|
|
|
end_ds_gen = datetime.now()
|
|
logger.info(f"Gen predict dataset took {(end_ds_gen - start).total_seconds()}s")
|
|
pred = self.model.predict_proba([features + trig])
|
|
|
|
res = []
|
|
for rule, p, smis in zip(self.applicable_rules, pred[0], prods):
|
|
res.append(PredictionResult(smis, p, rule))
|
|
|
|
end = datetime.now()
|
|
logger.info(f"Full predict took {(end - start).total_seconds()}s")
|
|
return res
|
|
|
|
@property
|
|
def pr_curve(self):
|
|
if self.model_status != self.FINISHED:
|
|
raise ValueError(f"Expected {self.FINISHED} but model is in status {self.model_status}")
|
|
|
|
res = []
|
|
|
|
thresholds = self.eval_results['average_precision_per_threshold'].keys()
|
|
|
|
for t in thresholds:
|
|
res.append({
|
|
'precision': self.eval_results['average_precision_per_threshold'][t],
|
|
'recall': self.eval_results['average_recall_per_threshold'][t],
|
|
'threshold': float(t)
|
|
})
|
|
|
|
return res
|
|
|
|
class ApplicabilityDomain(EnviPathModel):
|
|
model = models.ForeignKey(MLRelativeReasoning, on_delete=models.CASCADE)
|
|
|
|
num_neighbours = models.FloatField(blank=False, null=False, default=5)
|
|
reliability_threshold = models.FloatField(blank=False, null=False, default=0.5)
|
|
local_compatibilty_threshold = models.FloatField(blank=False, null=False, default=0.5)
|
|
|
|
def build_applicability_domain(self):
|
|
ds = self.model.load_dataset()
|
|
X = ds['X']
|
|
import numpy as np
|
|
from sklearn.decomposition import PCA
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
|
scaler = StandardScaler()
|
|
X_scaled = scaler.fit_transform(X)
|
|
pca = PCA(n_components=5) # choose number of components
|
|
X_pca = pca.fit_transform(X_scaled)
|
|
|
|
max_vals = np.max(X_pca, axis=0)
|
|
min_vals = np.min(X_pca, axis=0)
|
|
|
|
|
|
|
|
|
|
class RuleBaseRelativeReasoning(EPModel):
|
|
pass
|
|
|
|
|
|
class EnviFormer(EPModel):
|
|
threshold = models.FloatField(null=False, blank=False, default=0.5)
|
|
|
|
@staticmethod
|
|
@transaction.atomic
|
|
def create(package, name, description, threshold):
|
|
mod = EnviFormer()
|
|
mod.package = package
|
|
mod.name = name
|
|
mod.description = description
|
|
mod.threshold = threshold
|
|
mod.save()
|
|
|
|
return mod
|
|
|
|
@cached_property
|
|
def model(self):
|
|
mod = getattr(s, 'ENVIFORMER_INSTANCE', None)
|
|
logger.info(f"Model from settings {hash(mod)}")
|
|
return mod
|
|
|
|
def predict(self, smiles) -> List['PredictionResult']:
|
|
# example = {
|
|
# 'C#N': 0.46326889595136767,
|
|
# 'C#C': 0.04981685951409509,
|
|
# }
|
|
from rdkit import Chem
|
|
m = Chem.MolFromSmiles(smiles)
|
|
Chem.Kekulize(m)
|
|
kek = Chem.MolToSmiles(m, kekuleSmiles=True)
|
|
logger.info(f"Submitting {kek} to {hash(self.model)}")
|
|
products = self.model.predict(kek)
|
|
logger.info(f"Got results {products}")
|
|
# from pprint import pprint
|
|
#
|
|
# print(smiles)
|
|
# pprint(products)
|
|
|
|
res = []
|
|
for smi, prob in products.items():
|
|
res.append(PredictionResult([ProductSet([smi])], prob, None))
|
|
|
|
return res
|
|
|
|
@cached_property
|
|
def applicable_rules(self):
|
|
return []
|
|
|
|
|
|
class PluginModel(EPModel):
|
|
pass
|
|
|
|
|
|
# #
|
|
# #
|
|
# # # TODO fully implement AdditionalInformation
|
|
# # # TODO consider Scenario, BaseScenario, RelatedScenario
|
|
class Scenario(EnviPathModel):
|
|
package = models.ForeignKey('epdb.Package', verbose_name='Package', on_delete=models.CASCADE, db_index=True)
|
|
type = models.CharField(max_length=256, null=False, blank=False, default='No date')
|
|
type = models.CharField(max_length=256, null=False, blank=False, default='Not specified')
|
|
|
|
additional_information = models.JSONField(verbose_name='Additional Information')
|
|
|
|
@property
|
|
def url(self):
|
|
return '{}/scenario/{}'.format(self.package.url, self.uuid)
|
|
|
|
@staticmethod
|
|
@transaction.atomic
|
|
def create(package, name, description, date, type, additional_information):
|
|
s = Scenario()
|
|
s.package = package
|
|
s.name = name
|
|
s.description = description
|
|
s.date = date
|
|
s.type = type
|
|
s.additional_information = additional_information
|
|
|
|
s.save()
|
|
|
|
return s
|
|
|
|
def add_additional_information(self, data):
|
|
pass
|
|
|
|
def remove_additional_information(self, data):
|
|
pass
|
|
|
|
def set_additional_information(self, data):
|
|
pass
|
|
|
|
example = {
|
|
"additionalInformationCollection": {
|
|
"additionalInformation": [
|
|
{
|
|
"addInfoName": "referringscenario",
|
|
"creationDate": "2017-12-15 11:46:07.993",
|
|
"data": "http://localhost:8080/package/5882df9c-dae1-4d80-a40e-db4724271456/scenario/11482bc1-8a0c-44a0-ae8b-5a02ae732559",
|
|
"id": "http://localhost:8080/package/5882df9c-dae1-4d80-a40e-db4724271456/infocollection/0f30d0ca-b2bd-4c85-a425-ed8b22d4fed6/referringscenario/41532eac-e04a-4474-937a-df1344c3dce7",
|
|
"identifier": "referringscenario",
|
|
"lastModified": "2017-12-15 11:46:07.993",
|
|
"name": "referringscenario"
|
|
},
|
|
{
|
|
"addInfoName": "halflife",
|
|
"creationDate": "2017-12-15 11:46:07.934",
|
|
"data": "First Order;;reported,no further information about the model;3690.0 - 3690.0;McCorquodale, G. & Wardrope, L. (2006)",
|
|
"id": "http://localhost:8080/package/5882df9c-dae1-4d80-a40e-db4724271456/infocollection/0f30d0ca-b2bd-4c85-a425-ed8b22d4fed6/halflife/8f44fdd9-f453-4ab1-8509-2ee5826faad7",
|
|
"identifier": "halflife",
|
|
"lastModified": "2020-05-05 17:26:14.753",
|
|
"name": "halflife"
|
|
}
|
|
],
|
|
"creationDate": "2017-12-15 11:46:07.608",
|
|
"id": "http://localhost:8080/package/5882df9c-dae1-4d80-a40e-db4724271456/infocollection/0f30d0ca-b2bd-4c85-a425-ed8b22d4fed6",
|
|
"identifier": "infocollection",
|
|
"lastModified": "2020-05-05 17:26:15.496",
|
|
"name": "no name"
|
|
},
|
|
"aliases": [],
|
|
"creationDate": "2017-12-15 11:46:08.221",
|
|
"date": "no date",
|
|
"description": "no description",
|
|
"id": "http://localhost:8080/package/5882df9c-dae1-4d80-a40e-db4724271456/scenario/e7089e49-e07d-4a2d-8045-e144b7eb5a5e",
|
|
"identifier": "scenario",
|
|
"lastModified": "2020-05-05 17:26:15.065",
|
|
"name": "McCorquodale, G. & Wardrope, L. (2006) - (00002) (Related Scenario) - (00000)",
|
|
"reviewStatus": "reviewed",
|
|
"scenarios": [],
|
|
"type": "Not specified"
|
|
}
|
|
|
|
|
|
class UserSettingPermission(Permission):
|
|
uuid = models.UUIDField(null=False, blank=False, verbose_name='UUID of this object', primary_key=True,
|
|
default=uuid4)
|
|
user = models.ForeignKey('User', verbose_name='Permission to', on_delete=models.CASCADE)
|
|
setting = models.ForeignKey('epdb.Setting', verbose_name='Permission on', on_delete=models.CASCADE)
|
|
|
|
class Meta:
|
|
unique_together = [('setting', 'user')]
|
|
|
|
def __str__(self):
|
|
return f"User: {self.user} has Permission: {self.permission} on Setting: {self.setting}"
|
|
|
|
|
|
class Setting(EnviPathModel):
|
|
public = models.BooleanField(null=False, blank=False, default=False)
|
|
global_default = models.BooleanField(null=False, blank=False, default=False)
|
|
|
|
max_depth = models.IntegerField(null=False, blank=False, verbose_name='Setting Max Depth', default=5)
|
|
max_nodes = models.IntegerField(null=False, blank=False, verbose_name='Setting Max Number of Nodes', default=30)
|
|
|
|
rule_packages = models.ManyToManyField("Package", verbose_name="Setting Rule Packages",
|
|
related_name="setting_rule_packages")
|
|
model = models.ForeignKey('EPModel', verbose_name='Setting EPModel', on_delete=models.SET_NULL, null=True,
|
|
blank=True)
|
|
model_threshold = models.FloatField(null=True, blank=True, verbose_name='Setting Model Threshold', default=0.25)
|
|
|
|
@cached_property
|
|
def applicable_rules(self):
|
|
"""
|
|
Returns a ordered set of rules where the following applies:
|
|
1. All Composite will be added to result
|
|
2. All SimpleRules will be added if theres no CompositeRule present using the SimpleRule
|
|
Ordering is based on "url" field.
|
|
"""
|
|
rules = []
|
|
rule_qs = Rule.objects.none()
|
|
for package in self.rule_packages.all():
|
|
rule_qs |= package.rules
|
|
|
|
rule_qs = rule_qs.distinct()
|
|
|
|
reflected_simple_rules = set()
|
|
|
|
for r in rule_qs:
|
|
if isinstance(r, ParallelRule) or isinstance(r, SequentialRule):
|
|
rules.append(r)
|
|
for sr in r.simple_rules.all():
|
|
reflected_simple_rules.add(sr)
|
|
|
|
for r in rule_qs:
|
|
if isinstance(r, SimpleAmbitRule) or isinstance(r, SimpleRDKitRule):
|
|
if r not in reflected_simple_rules:
|
|
rules.append(r)
|
|
|
|
rules = sorted(rules, key=lambda x: x.url)
|
|
return rules
|
|
|
|
def expand(self, pathway, current_node):
|
|
"""Decision Method whether to expand on a certain Node or not"""
|
|
if pathway.num_nodes() >= self.max_nodes:
|
|
logger.info(f"Pathway has {pathway.num_nodes()} which exceeds the limit of {self.max_nodes}")
|
|
return []
|
|
|
|
if pathway.depth() >= self.max_depth:
|
|
logger.info(f"Pathway has reached depth {pathway.depth()} which exceeds the limit of {self.max_depth}")
|
|
return []
|
|
|
|
transformations = []
|
|
if self.model is not None:
|
|
print(self.model)
|
|
pred_results = self.model.predict(current_node.smiles)
|
|
print(pred_results)
|
|
for pred_result in pred_results:
|
|
if pred_result.probability >= self.model_threshold:
|
|
transformations.append(pred_result)
|
|
else:
|
|
for rule in self.applicable_rules:
|
|
tmp_products = rule.apply(current_node.smiles)
|
|
if tmp_products:
|
|
transformations.append(PredictionResult(tmp_products, 1.0, rule))
|
|
|
|
return transformations
|
|
|
|
@transaction.atomic
|
|
def make_global_default(self):
|
|
# Flag all others as global_default False to ensure there's only a single global_default
|
|
Setting.objects.all().update(global_default=False)
|
|
if not self.public:
|
|
self.public = True
|
|
self.global_default = True
|
|
self.save()
|