forked from enviPath/enviPy
Merge remote-tracking branch 'origin/develop' into fix/xss
# Conflicts: # templates/modals/collections/new_model_modal.html
This commit is contained in:
166
epdb/models.py
166
epdb/models.py
@ -311,7 +311,7 @@ class ExternalDatabase(TimeStampedModel):
|
||||
},
|
||||
{
|
||||
"database": ExternalDatabase.objects.get(name="ChEBI"),
|
||||
"placeholder": "ChEBI ID without prefix e.g. 12345",
|
||||
"placeholder": "ChEBI ID without prefix e.g. 10576",
|
||||
},
|
||||
],
|
||||
"structure": [
|
||||
@ -329,7 +329,7 @@ class ExternalDatabase(TimeStampedModel):
|
||||
},
|
||||
{
|
||||
"database": ExternalDatabase.objects.get(name="ChEBI"),
|
||||
"placeholder": "ChEBI ID without prefix e.g. 12345",
|
||||
"placeholder": "ChEBI ID without prefix e.g. 10576",
|
||||
},
|
||||
],
|
||||
"reaction": [
|
||||
@ -343,7 +343,7 @@ class ExternalDatabase(TimeStampedModel):
|
||||
},
|
||||
{
|
||||
"database": ExternalDatabase.objects.get(name="UniProt"),
|
||||
"placeholder": "Query ID for UniPro e.g. rhea:12345",
|
||||
"placeholder": "Query ID for UniProt e.g. rhea:12345",
|
||||
},
|
||||
],
|
||||
}
|
||||
@ -478,7 +478,7 @@ class ChemicalIdentifierMixin(ExternalIdentifierMixin):
|
||||
return self.add_external_identifier("CAS", cas_number)
|
||||
|
||||
def get_pubchem_identifiers(self):
|
||||
return self.get_external_identifier("PubChem Compound") or self.get_external_identifier(
|
||||
return self.get_external_identifier("PubChem Compound") | self.get_external_identifier(
|
||||
"PubChem Substance"
|
||||
)
|
||||
|
||||
@ -495,6 +495,20 @@ class ChemicalIdentifierMixin(ExternalIdentifierMixin):
|
||||
return self.get_external_identifier("CAS")
|
||||
|
||||
|
||||
class KEGGIdentifierMixin(ExternalIdentifierMixin):
|
||||
@property
|
||||
def kegg_reaction_links(self):
|
||||
return self.get_external_identifier("KEGG Reaction")
|
||||
|
||||
def add_kegg_reaction_id(self, kegg_id):
|
||||
return self.add_external_identifier(
|
||||
"KEGG Reaction", kegg_id, f"https://www.genome.jp/entry/{kegg_id}"
|
||||
)
|
||||
|
||||
class Meta:
|
||||
abstract = True
|
||||
|
||||
|
||||
class ReactionIdentifierMixin(ExternalIdentifierMixin):
|
||||
class Meta:
|
||||
abstract = True
|
||||
@ -1014,6 +1028,26 @@ class CompoundStructure(EnviPathModel, AliasMixin, ScenarioMixin, ChemicalIdenti
|
||||
return self.compound.default_structure == self
|
||||
|
||||
|
||||
class EnzymeLink(EnviPathModel, KEGGIdentifierMixin):
|
||||
rule = models.ForeignKey("Rule", on_delete=models.CASCADE, db_index=True)
|
||||
ec_number = models.TextField(blank=False, null=False, verbose_name="EC Number")
|
||||
classification_level = models.IntegerField(
|
||||
blank=False, null=False, verbose_name="Classification Level"
|
||||
)
|
||||
linking_method = models.TextField(blank=False, null=False, verbose_name="Linking Method")
|
||||
|
||||
reaction_evidence = models.ManyToManyField("epdb.Reaction")
|
||||
edge_evidence = models.ManyToManyField("epdb.Edge")
|
||||
|
||||
external_identifiers = GenericRelation("ExternalIdentifier")
|
||||
|
||||
def _url(self):
|
||||
return "{}/enzymelink/{}".format(self.rule.url, self.uuid)
|
||||
|
||||
def get_group(self) -> str:
|
||||
return ".".join(self.ec_number.split(".")[:3]) + ".-"
|
||||
|
||||
|
||||
class Rule(PolymorphicModel, EnviPathModel, AliasMixin, ScenarioMixin):
|
||||
package = models.ForeignKey(
|
||||
"epdb.Package", verbose_name="Package", on_delete=models.CASCADE, db_index=True
|
||||
@ -1095,6 +1129,18 @@ class Rule(PolymorphicModel, EnviPathModel, AliasMixin, ScenarioMixin):
|
||||
|
||||
return new_rule
|
||||
|
||||
def enzymelinks(self):
|
||||
return self.enzymelink_set.all()
|
||||
|
||||
def get_grouped_enzymelinks(self):
|
||||
res = defaultdict(list)
|
||||
|
||||
for el in self.enzymelinks():
|
||||
key = ".".join(el.ec_number.split(".")[:3]) + ".-"
|
||||
res[key].append(el)
|
||||
|
||||
return dict(res)
|
||||
|
||||
|
||||
class SimpleRule(Rule):
|
||||
pass
|
||||
@ -1436,6 +1482,16 @@ class Reaction(EnviPathModel, AliasMixin, ScenarioMixin, ReactionIdentifierMixin
|
||||
id__in=Edge.objects.filter(edge_label=self).values("pathway_id")
|
||||
).order_by("name")
|
||||
|
||||
def get_related_enzymes(self):
|
||||
res = []
|
||||
edges = Edge.objects.filter(edge_label=self)
|
||||
for e in edges:
|
||||
for scen in e.scenarios.all():
|
||||
for ai in scen.additional_information.keys():
|
||||
if ai == "Enzyme":
|
||||
res.extend(scen.additional_information[ai])
|
||||
return res
|
||||
|
||||
|
||||
class Pathway(EnviPathModel, AliasMixin, ScenarioMixin):
|
||||
package = models.ForeignKey(
|
||||
@ -2172,10 +2228,18 @@ class PackageBasedModel(EPModel):
|
||||
self.model_status = self.BUILT_NOT_EVALUATED
|
||||
self.save()
|
||||
|
||||
def evaluate_model(self):
|
||||
def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None):
|
||||
if self.model_status != self.BUILT_NOT_EVALUATED:
|
||||
raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
|
||||
|
||||
if multigen:
|
||||
self.multigen_eval = multigen
|
||||
self.save()
|
||||
|
||||
if eval_packages is not None:
|
||||
for p in eval_packages:
|
||||
self.eval_packages.add(p)
|
||||
|
||||
self.model_status = self.EVALUATING
|
||||
self.save()
|
||||
|
||||
@ -2472,7 +2536,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
|
||||
package: "Package",
|
||||
rule_packages: List["Package"],
|
||||
data_packages: List["Package"],
|
||||
eval_packages: List["Package"],
|
||||
threshold: float = 0.5,
|
||||
min_count: int = 10,
|
||||
max_count: int = 0,
|
||||
@ -2521,10 +2584,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
|
||||
for p in rule_packages:
|
||||
rbrr.data_packages.add(p)
|
||||
|
||||
if eval_packages:
|
||||
for p in eval_packages:
|
||||
rbrr.eval_packages.add(p)
|
||||
|
||||
rbrr.save()
|
||||
|
||||
return rbrr
|
||||
@ -2579,7 +2638,6 @@ class MLRelativeReasoning(PackageBasedModel):
|
||||
package: "Package",
|
||||
rule_packages: List["Package"],
|
||||
data_packages: List["Package"],
|
||||
eval_packages: List["Package"],
|
||||
threshold: float = 0.5,
|
||||
name: "str" = None,
|
||||
description: str = None,
|
||||
@ -2619,10 +2677,6 @@ class MLRelativeReasoning(PackageBasedModel):
|
||||
for p in rule_packages:
|
||||
mlrr.data_packages.add(p)
|
||||
|
||||
if eval_packages:
|
||||
for p in eval_packages:
|
||||
mlrr.eval_packages.add(p)
|
||||
|
||||
if build_app_domain:
|
||||
ad = ApplicabilityDomain.create(
|
||||
mlrr,
|
||||
@ -2942,7 +2996,6 @@ class EnviFormer(PackageBasedModel):
|
||||
def create(
|
||||
package: "Package",
|
||||
data_packages: List["Package"],
|
||||
eval_packages: List["Package"],
|
||||
threshold: float = 0.5,
|
||||
name: "str" = None,
|
||||
description: str = None,
|
||||
@ -2975,10 +3028,6 @@ class EnviFormer(PackageBasedModel):
|
||||
for p in data_packages:
|
||||
mod.data_packages.add(p)
|
||||
|
||||
if eval_packages:
|
||||
for p in eval_packages:
|
||||
mod.eval_packages.add(p)
|
||||
|
||||
# if build_app_domain:
|
||||
# ad = ApplicabilityDomain.create(mod, app_domain_num_neighbours, app_domain_reliability_threshold,
|
||||
# app_domain_local_compatibility_threshold)
|
||||
@ -2992,7 +3041,8 @@ class EnviFormer(PackageBasedModel):
|
||||
from enviformer import load
|
||||
|
||||
ckpt = os.path.join(s.MODEL_DIR, "enviformer", str(self.uuid), f"{self.uuid}.ckpt")
|
||||
return load(device=s.ENVIFORMER_DEVICE, ckpt_path=ckpt)
|
||||
mod = load(device=s.ENVIFORMER_DEVICE, ckpt_path=ckpt)
|
||||
return mod
|
||||
|
||||
def predict(self, smiles) -> List["PredictionResult"]:
|
||||
return self.predict_batch([smiles])[0]
|
||||
@ -3006,8 +3056,12 @@ class EnviFormer(PackageBasedModel):
|
||||
for smiles in smiles_list
|
||||
]
|
||||
logger.info(f"Submitting {canon_smiles} to {self.name}")
|
||||
start = datetime.now()
|
||||
products_list = self.model.predict_batch(canon_smiles)
|
||||
logger.info(f"Got results {products_list}")
|
||||
end = datetime.now()
|
||||
logger.info(
|
||||
f"Prediction took {(end - start).total_seconds():.2f} seconds. Got results {products_list}"
|
||||
)
|
||||
|
||||
results = []
|
||||
for products in products_list:
|
||||
@ -3034,6 +3088,7 @@ class EnviFormer(PackageBasedModel):
|
||||
|
||||
start = datetime.now()
|
||||
# Standardise reactions for the training data, EnviFormer ignores stereochemistry currently
|
||||
co2 = {"C(=O)=O", "O=C=O"}
|
||||
ds = []
|
||||
for reaction in self._get_reactions():
|
||||
educts = ".".join(
|
||||
@ -3048,7 +3103,8 @@ class EnviFormer(PackageBasedModel):
|
||||
for smile in reaction.products.all()
|
||||
]
|
||||
)
|
||||
ds.append(f"{educts}>>{products}")
|
||||
if products not in co2:
|
||||
ds.append(f"{educts}>>{products}")
|
||||
|
||||
end = datetime.now()
|
||||
logger.debug(f"build_dataset took {(end - start).total_seconds()} seconds")
|
||||
@ -3084,10 +3140,18 @@ class EnviFormer(PackageBasedModel):
|
||||
args = {"clz": "EnviFormer"}
|
||||
return args
|
||||
|
||||
def evaluate_model(self):
|
||||
def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None):
|
||||
if self.model_status != self.BUILT_NOT_EVALUATED:
|
||||
raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
|
||||
|
||||
if multigen:
|
||||
self.multigen_eval = multigen
|
||||
self.save()
|
||||
|
||||
if eval_packages is not None:
|
||||
for p in eval_packages:
|
||||
self.eval_packages.add(p)
|
||||
|
||||
self.model_status = self.EVALUATING
|
||||
self.save()
|
||||
|
||||
@ -3244,7 +3308,7 @@ class EnviFormer(PackageBasedModel):
|
||||
|
||||
ds = self.load_dataset()
|
||||
n_splits = 20
|
||||
shuff = ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42)
|
||||
shuff = ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42)
|
||||
|
||||
# Single gen eval is done in one loop of train then evaluate rather than storing all n_splits trained models
|
||||
# this helps reduce the memory footprint.
|
||||
@ -3312,7 +3376,7 @@ class EnviFormer(PackageBasedModel):
|
||||
# Compute splits of the collected pathway and evaluate. Like single gen we train and evaluate in each
|
||||
# iteration instead of storing all trained models.
|
||||
for split_id, (train, test) in enumerate(
|
||||
ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42).split(pathways)
|
||||
ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42).split(pathways)
|
||||
):
|
||||
train_pathways = [pathways[i] for i in train]
|
||||
test_pathways = [pathways[i] for i in test]
|
||||
@ -3614,3 +3678,53 @@ class Setting(EnviPathModel):
|
||||
self.public = True
|
||||
self.global_default = True
|
||||
self.save()
|
||||
|
||||
|
||||
class JobLogStatus(models.TextChoices):
|
||||
INITIAL = "INITIAL", "Initial"
|
||||
SUCCESS = "SUCCESS", "Success"
|
||||
FAILURE = "FAILURE", "Failure"
|
||||
REVOKED = "REVOKED", "Revoked"
|
||||
IGNORED = "IGNORED", "Ignored"
|
||||
|
||||
|
||||
class JobLog(TimeStampedModel):
|
||||
user = models.ForeignKey("epdb.User", models.CASCADE)
|
||||
task_id = models.UUIDField(unique=True)
|
||||
job_name = models.TextField(null=False, blank=False)
|
||||
status = models.CharField(
|
||||
max_length=20,
|
||||
choices=JobLogStatus.choices,
|
||||
default=JobLogStatus.INITIAL,
|
||||
)
|
||||
|
||||
done_at = models.DateTimeField(null=True, blank=True, default=None)
|
||||
task_result = models.TextField(null=True, blank=True, default=None)
|
||||
|
||||
def check_for_update(self):
|
||||
async_res = self.get_result()
|
||||
new_status = async_res.state
|
||||
|
||||
TERMINAL_STATES = [
|
||||
"SUCCESS",
|
||||
"FAILURE",
|
||||
"REVOKED",
|
||||
"IGNORED",
|
||||
]
|
||||
|
||||
if new_status != self.status and new_status in TERMINAL_STATES:
|
||||
self.status = new_status
|
||||
self.done_at = async_res.date_done
|
||||
|
||||
if new_status == "SUCCESS":
|
||||
self.task_result = async_res.result
|
||||
|
||||
self.save()
|
||||
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_result(self):
|
||||
from celery.result import AsyncResult
|
||||
|
||||
return AsyncResult(str(self.task_id))
|
||||
|
||||
Reference in New Issue
Block a user