Merge remote-tracking branch 'origin/develop' into fix/xss

# Conflicts:
#	templates/modals/collections/new_model_modal.html
This commit is contained in:
Liam Brydon
2025-11-07 08:34:33 +13:00
44 changed files with 2286 additions and 339 deletions

View File

@ -311,7 +311,7 @@ class ExternalDatabase(TimeStampedModel):
},
{
"database": ExternalDatabase.objects.get(name="ChEBI"),
"placeholder": "ChEBI ID without prefix e.g. 12345",
"placeholder": "ChEBI ID without prefix e.g. 10576",
},
],
"structure": [
@ -329,7 +329,7 @@ class ExternalDatabase(TimeStampedModel):
},
{
"database": ExternalDatabase.objects.get(name="ChEBI"),
"placeholder": "ChEBI ID without prefix e.g. 12345",
"placeholder": "ChEBI ID without prefix e.g. 10576",
},
],
"reaction": [
@ -343,7 +343,7 @@ class ExternalDatabase(TimeStampedModel):
},
{
"database": ExternalDatabase.objects.get(name="UniProt"),
"placeholder": "Query ID for UniPro e.g. rhea:12345",
"placeholder": "Query ID for UniProt e.g. rhea:12345",
},
],
}
@ -478,7 +478,7 @@ class ChemicalIdentifierMixin(ExternalIdentifierMixin):
return self.add_external_identifier("CAS", cas_number)
def get_pubchem_identifiers(self):
return self.get_external_identifier("PubChem Compound") or self.get_external_identifier(
return self.get_external_identifier("PubChem Compound") | self.get_external_identifier(
"PubChem Substance"
)
@ -495,6 +495,20 @@ class ChemicalIdentifierMixin(ExternalIdentifierMixin):
return self.get_external_identifier("CAS")
class KEGGIdentifierMixin(ExternalIdentifierMixin):
@property
def kegg_reaction_links(self):
return self.get_external_identifier("KEGG Reaction")
def add_kegg_reaction_id(self, kegg_id):
return self.add_external_identifier(
"KEGG Reaction", kegg_id, f"https://www.genome.jp/entry/{kegg_id}"
)
class Meta:
abstract = True
class ReactionIdentifierMixin(ExternalIdentifierMixin):
class Meta:
abstract = True
@ -1014,6 +1028,26 @@ class CompoundStructure(EnviPathModel, AliasMixin, ScenarioMixin, ChemicalIdenti
return self.compound.default_structure == self
class EnzymeLink(EnviPathModel, KEGGIdentifierMixin):
rule = models.ForeignKey("Rule", on_delete=models.CASCADE, db_index=True)
ec_number = models.TextField(blank=False, null=False, verbose_name="EC Number")
classification_level = models.IntegerField(
blank=False, null=False, verbose_name="Classification Level"
)
linking_method = models.TextField(blank=False, null=False, verbose_name="Linking Method")
reaction_evidence = models.ManyToManyField("epdb.Reaction")
edge_evidence = models.ManyToManyField("epdb.Edge")
external_identifiers = GenericRelation("ExternalIdentifier")
def _url(self):
return "{}/enzymelink/{}".format(self.rule.url, self.uuid)
def get_group(self) -> str:
return ".".join(self.ec_number.split(".")[:3]) + ".-"
class Rule(PolymorphicModel, EnviPathModel, AliasMixin, ScenarioMixin):
package = models.ForeignKey(
"epdb.Package", verbose_name="Package", on_delete=models.CASCADE, db_index=True
@ -1095,6 +1129,18 @@ class Rule(PolymorphicModel, EnviPathModel, AliasMixin, ScenarioMixin):
return new_rule
def enzymelinks(self):
return self.enzymelink_set.all()
def get_grouped_enzymelinks(self):
res = defaultdict(list)
for el in self.enzymelinks():
key = ".".join(el.ec_number.split(".")[:3]) + ".-"
res[key].append(el)
return dict(res)
class SimpleRule(Rule):
pass
@ -1436,6 +1482,16 @@ class Reaction(EnviPathModel, AliasMixin, ScenarioMixin, ReactionIdentifierMixin
id__in=Edge.objects.filter(edge_label=self).values("pathway_id")
).order_by("name")
def get_related_enzymes(self):
res = []
edges = Edge.objects.filter(edge_label=self)
for e in edges:
for scen in e.scenarios.all():
for ai in scen.additional_information.keys():
if ai == "Enzyme":
res.extend(scen.additional_information[ai])
return res
class Pathway(EnviPathModel, AliasMixin, ScenarioMixin):
package = models.ForeignKey(
@ -2172,10 +2228,18 @@ class PackageBasedModel(EPModel):
self.model_status = self.BUILT_NOT_EVALUATED
self.save()
def evaluate_model(self):
def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None):
if self.model_status != self.BUILT_NOT_EVALUATED:
raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
if multigen:
self.multigen_eval = multigen
self.save()
if eval_packages is not None:
for p in eval_packages:
self.eval_packages.add(p)
self.model_status = self.EVALUATING
self.save()
@ -2472,7 +2536,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
package: "Package",
rule_packages: List["Package"],
data_packages: List["Package"],
eval_packages: List["Package"],
threshold: float = 0.5,
min_count: int = 10,
max_count: int = 0,
@ -2521,10 +2584,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
for p in rule_packages:
rbrr.data_packages.add(p)
if eval_packages:
for p in eval_packages:
rbrr.eval_packages.add(p)
rbrr.save()
return rbrr
@ -2579,7 +2638,6 @@ class MLRelativeReasoning(PackageBasedModel):
package: "Package",
rule_packages: List["Package"],
data_packages: List["Package"],
eval_packages: List["Package"],
threshold: float = 0.5,
name: "str" = None,
description: str = None,
@ -2619,10 +2677,6 @@ class MLRelativeReasoning(PackageBasedModel):
for p in rule_packages:
mlrr.data_packages.add(p)
if eval_packages:
for p in eval_packages:
mlrr.eval_packages.add(p)
if build_app_domain:
ad = ApplicabilityDomain.create(
mlrr,
@ -2942,7 +2996,6 @@ class EnviFormer(PackageBasedModel):
def create(
package: "Package",
data_packages: List["Package"],
eval_packages: List["Package"],
threshold: float = 0.5,
name: "str" = None,
description: str = None,
@ -2975,10 +3028,6 @@ class EnviFormer(PackageBasedModel):
for p in data_packages:
mod.data_packages.add(p)
if eval_packages:
for p in eval_packages:
mod.eval_packages.add(p)
# if build_app_domain:
# ad = ApplicabilityDomain.create(mod, app_domain_num_neighbours, app_domain_reliability_threshold,
# app_domain_local_compatibility_threshold)
@ -2992,7 +3041,8 @@ class EnviFormer(PackageBasedModel):
from enviformer import load
ckpt = os.path.join(s.MODEL_DIR, "enviformer", str(self.uuid), f"{self.uuid}.ckpt")
return load(device=s.ENVIFORMER_DEVICE, ckpt_path=ckpt)
mod = load(device=s.ENVIFORMER_DEVICE, ckpt_path=ckpt)
return mod
def predict(self, smiles) -> List["PredictionResult"]:
return self.predict_batch([smiles])[0]
@ -3006,8 +3056,12 @@ class EnviFormer(PackageBasedModel):
for smiles in smiles_list
]
logger.info(f"Submitting {canon_smiles} to {self.name}")
start = datetime.now()
products_list = self.model.predict_batch(canon_smiles)
logger.info(f"Got results {products_list}")
end = datetime.now()
logger.info(
f"Prediction took {(end - start).total_seconds():.2f} seconds. Got results {products_list}"
)
results = []
for products in products_list:
@ -3034,6 +3088,7 @@ class EnviFormer(PackageBasedModel):
start = datetime.now()
# Standardise reactions for the training data, EnviFormer ignores stereochemistry currently
co2 = {"C(=O)=O", "O=C=O"}
ds = []
for reaction in self._get_reactions():
educts = ".".join(
@ -3048,7 +3103,8 @@ class EnviFormer(PackageBasedModel):
for smile in reaction.products.all()
]
)
ds.append(f"{educts}>>{products}")
if products not in co2:
ds.append(f"{educts}>>{products}")
end = datetime.now()
logger.debug(f"build_dataset took {(end - start).total_seconds()} seconds")
@ -3084,10 +3140,18 @@ class EnviFormer(PackageBasedModel):
args = {"clz": "EnviFormer"}
return args
def evaluate_model(self):
def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None):
if self.model_status != self.BUILT_NOT_EVALUATED:
raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
if multigen:
self.multigen_eval = multigen
self.save()
if eval_packages is not None:
for p in eval_packages:
self.eval_packages.add(p)
self.model_status = self.EVALUATING
self.save()
@ -3244,7 +3308,7 @@ class EnviFormer(PackageBasedModel):
ds = self.load_dataset()
n_splits = 20
shuff = ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42)
shuff = ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42)
# Single gen eval is done in one loop of train then evaluate rather than storing all n_splits trained models
# this helps reduce the memory footprint.
@ -3312,7 +3376,7 @@ class EnviFormer(PackageBasedModel):
# Compute splits of the collected pathway and evaluate. Like single gen we train and evaluate in each
# iteration instead of storing all trained models.
for split_id, (train, test) in enumerate(
ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42).split(pathways)
ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42).split(pathways)
):
train_pathways = [pathways[i] for i in train]
test_pathways = [pathways[i] for i in test]
@ -3614,3 +3678,53 @@ class Setting(EnviPathModel):
self.public = True
self.global_default = True
self.save()
class JobLogStatus(models.TextChoices):
INITIAL = "INITIAL", "Initial"
SUCCESS = "SUCCESS", "Success"
FAILURE = "FAILURE", "Failure"
REVOKED = "REVOKED", "Revoked"
IGNORED = "IGNORED", "Ignored"
class JobLog(TimeStampedModel):
user = models.ForeignKey("epdb.User", models.CASCADE)
task_id = models.UUIDField(unique=True)
job_name = models.TextField(null=False, blank=False)
status = models.CharField(
max_length=20,
choices=JobLogStatus.choices,
default=JobLogStatus.INITIAL,
)
done_at = models.DateTimeField(null=True, blank=True, default=None)
task_result = models.TextField(null=True, blank=True, default=None)
def check_for_update(self):
async_res = self.get_result()
new_status = async_res.state
TERMINAL_STATES = [
"SUCCESS",
"FAILURE",
"REVOKED",
"IGNORED",
]
if new_status != self.status and new_status in TERMINAL_STATES:
self.status = new_status
self.done_at = async_res.date_done
if new_status == "SUCCESS":
self.task_result = async_res.result
self.save()
return True
return False
def get_result(self):
from celery.result import AsyncResult
return AsyncResult(str(self.task_id))