[Feature] Basic logging of Jobs, Model Evaluation (#169)

Co-authored-by: Tim Lorsbach <tim@lorsba.ch>
Reviewed-on: enviPath/enviPy#169
This commit is contained in:
2025-10-27 22:34:05 +13:00
parent 551cfc7768
commit a952c08469
15 changed files with 556 additions and 240 deletions

View File

@ -114,6 +114,6 @@ class Command(BaseCommand):
print(f"Training {model_name}")
model.build_model()
print(f"Evaluating {model_name}")
model.evaluate_model()
model.evaluate_model(False, eval_packages=eval_packages)
print(f"Saving {model_name}")
model.save()

View File

@ -0,0 +1,38 @@
from datetime import date, timedelta
from django.core.management.base import BaseCommand
from django.db import transaction
from epdb.models import JobLog
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--cleanup",
type=int,
default=None,
help="Remove all logs older than this number of days. Default is None, which does not remove any logs.",
)
@transaction.atomic
def handle(self, *args, **options):
if options["cleanup"] is not None:
cleanup_dt = date.today() - timedelta(days=options["cleanup"])
print(JobLog.objects.filter(created__lt=cleanup_dt).delete())
logs = JobLog.objects.filter(status="INITIAL")
print(f"Found {logs.count()} logs to update")
updated = 0
for log in logs:
res = log.check_for_update()
if res:
updated += 1
print(f"Updated {updated} logs")
from django.db.models import Count
qs = JobLog.objects.values("status").annotate(total=Count("status"))
for r in qs:
print(r["status"], r["total"])

View File

@ -2225,10 +2225,18 @@ class PackageBasedModel(EPModel):
self.model_status = self.BUILT_NOT_EVALUATED
self.save()
def evaluate_model(self):
def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None):
if self.model_status != self.BUILT_NOT_EVALUATED:
raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
if multigen:
self.multigen_eval = multigen
self.save()
if eval_packages is not None:
for p in eval_packages:
self.eval_packages.add(p)
self.model_status = self.EVALUATING
self.save()
@ -2525,7 +2533,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
package: "Package",
rule_packages: List["Package"],
data_packages: List["Package"],
eval_packages: List["Package"],
threshold: float = 0.5,
min_count: int = 10,
max_count: int = 0,
@ -2574,10 +2581,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
for p in rule_packages:
rbrr.data_packages.add(p)
if eval_packages:
for p in eval_packages:
rbrr.eval_packages.add(p)
rbrr.save()
return rbrr
@ -2632,7 +2635,6 @@ class MLRelativeReasoning(PackageBasedModel):
package: "Package",
rule_packages: List["Package"],
data_packages: List["Package"],
eval_packages: List["Package"],
threshold: float = 0.5,
name: "str" = None,
description: str = None,
@ -2672,10 +2674,6 @@ class MLRelativeReasoning(PackageBasedModel):
for p in rule_packages:
mlrr.data_packages.add(p)
if eval_packages:
for p in eval_packages:
mlrr.eval_packages.add(p)
if build_app_domain:
ad = ApplicabilityDomain.create(
mlrr,
@ -2995,7 +2993,6 @@ class EnviFormer(PackageBasedModel):
def create(
package: "Package",
data_packages: List["Package"],
eval_packages: List["Package"],
threshold: float = 0.5,
name: "str" = None,
description: str = None,
@ -3028,10 +3025,6 @@ class EnviFormer(PackageBasedModel):
for p in data_packages:
mod.data_packages.add(p)
if eval_packages:
for p in eval_packages:
mod.eval_packages.add(p)
# if build_app_domain:
# ad = ApplicabilityDomain.create(mod, app_domain_num_neighbours, app_domain_reliability_threshold,
# app_domain_local_compatibility_threshold)
@ -3144,10 +3137,18 @@ class EnviFormer(PackageBasedModel):
args = {"clz": "EnviFormer"}
return args
def evaluate_model(self):
def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None):
if self.model_status != self.BUILT_NOT_EVALUATED:
raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
if multigen:
self.multigen_eval = multigen
self.save()
if eval_packages is not None:
for p in eval_packages:
self.eval_packages.add(p)
self.model_status = self.EVALUATING
self.save()
@ -3671,3 +3672,53 @@ class Setting(EnviPathModel):
self.public = True
self.global_default = True
self.save()
class JobLogStatus(models.TextChoices):
INITIAL = "INITIAL", "Initial"
SUCCESS = "SUCCESS", "Success"
FAILURE = "FAILURE", "Failure"
REVOKED = "REVOKED", "Revoked"
IGNORED = "IGNORED", "Ignored"
class JobLog(TimeStampedModel):
user = models.ForeignKey("epdb.User", models.CASCADE)
task_id = models.UUIDField(unique=True)
job_name = models.TextField(null=False, blank=False)
status = models.CharField(
max_length=20,
choices=JobLogStatus.choices,
default=JobLogStatus.INITIAL,
)
done_at = models.DateTimeField(null=True, blank=True, default=None)
task_result = models.TextField(null=True, blank=True, default=None)
def check_for_update(self):
async_res = self.get_result()
new_status = async_res.state
TERMINAL_STATES = [
"SUCCESS",
"FAILURE",
"REVOKED",
"IGNORED",
]
if new_status != self.status and new_status in TERMINAL_STATES:
self.status = new_status
self.done_at = async_res.date_done
if new_status == "SUCCESS":
self.task_result = async_res.result
self.save()
return True
return False
def get_result(self):
from celery.result import AsyncResult
return AsyncResult(str(self.task_id))

View File

@ -1,10 +1,13 @@
import logging
from typing import Optional
from celery.utils.functional import LRUCache
from celery import shared_task
from epdb.models import Pathway, Node, EPModel, Setting
from epdb.logic import SPathway
from datetime import datetime
from typing import Callable, Optional
from uuid import uuid4
from celery import shared_task
from celery.utils.functional import LRUCache
from epdb.logic import SPathway
from epdb.models import EPModel, JobLog, Node, Package, Pathway, Setting, User
logger = logging.getLogger(__name__)
ML_CACHE = LRUCache(3) # Cache the three most recent ML models to reduce load times.
@ -16,6 +19,40 @@ def get_ml_model(model_pk: int):
return ML_CACHE[model_pk]
def dispatch_eager(user: "User", job: Callable, *args, **kwargs):
try:
x = job(*args, **kwargs)
log = JobLog()
log.user = user
log.task_id = uuid4()
log.job_name = job.__name__
log.status = "SUCCESS"
log.done_at = datetime.now()
log.task_result = str(x) if x else None
log.save()
return x
except Exception as e:
logger.exception(e)
raise e
def dispatch(user: "User", job: Callable, *args, **kwargs):
try:
x = job.delay(*args, **kwargs)
log = JobLog()
log.user = user
log.task_id = x.task_id
log.job_name = job.__name__
log.status = "INITIAL"
log.save()
return x.result
except Exception as e:
logger.exception(e)
raise e
@shared_task(queue="background")
def mul(a, b):
return a * b
@ -33,17 +70,55 @@ def send_registration_mail(user_pk: int):
pass
@shared_task(queue="model")
def build_model(model_pk: int):
@shared_task(bind=True, queue="model")
def build_model(self, model_pk: int):
mod = EPModel.objects.get(id=model_pk)
mod.build_dataset()
mod.build_model()
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=mod.url)
try:
mod.build_dataset()
mod.build_model()
except Exception as e:
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(
status="FAILED", task_result=mod.url
)
raise e
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=mod.url)
return mod.url
@shared_task(queue="model")
def evaluate_model(model_pk: int):
@shared_task(bind=True, queue="model")
def evaluate_model(self, model_pk: int, multigen: bool, package_pks: Optional[list] = None):
packages = None
if package_pks:
packages = Package.objects.filter(pk__in=package_pks)
mod = EPModel.objects.get(id=model_pk)
mod.evaluate_model()
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=mod.url)
try:
mod.evaluate_model(multigen, eval_packages=packages)
except Exception as e:
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(
status="FAILED", task_result=mod.url
)
raise e
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=mod.url)
return mod.url
@shared_task(queue="model")
@ -52,9 +127,13 @@ def retrain(model_pk: int):
mod.retrain()
@shared_task(queue="predict")
@shared_task(bind=True, queue="predict")
def predict(
pw_pk: int, pred_setting_pk: int, limit: Optional[int] = None, node_pk: Optional[int] = None
self,
pw_pk: int,
pred_setting_pk: int,
limit: Optional[int] = None,
node_pk: Optional[int] = None,
) -> Pathway:
pw = Pathway.objects.get(id=pw_pk)
setting = Setting.objects.get(id=pred_setting_pk)
@ -65,6 +144,9 @@ def predict(
pw.kv.update(**{"status": "running"})
pw.save()
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=pw.url)
try:
# regular prediction
if limit is not None:
@ -89,7 +171,18 @@ def predict(
except Exception as e:
pw.kv.update({"status": "failed"})
pw.save()
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(
status="FAILED", task_result=pw.url
)
raise e
pw.kv.update(**{"status": "completed"})
pw.save()
if JobLog.objects.filter(task_id=self.request.id).exists():
JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=pw.url)
return pw.url

View File

@ -1,8 +1,21 @@
from django import template
from pydantic import AnyHttpUrl, ValidationError
from pydantic.type_adapter import TypeAdapter
register = template.Library()
url_adapter = TypeAdapter(AnyHttpUrl)
@register.filter
def classname(obj):
return obj.__class__.__name__
@register.filter
def is_url(value):
try:
url_adapter.validate_python(value)
return True
except ValidationError:
return False

View File

@ -190,6 +190,7 @@ urlpatterns = [
re_path(r"^indigo/dearomatize$", v.dearomatize, name="indigo_dearomatize"),
re_path(r"^indigo/layout$", v.layout, name="indigo_layout"),
re_path(r"^depict$", v.depict, name="depict"),
re_path(r"^jobs", v.jobs, name="jobs"),
# OAuth Stuff
path("o/userinfo/", v.userinfo, name="oauth_userinfo"),
]

View File

@ -47,6 +47,7 @@ from .models import (
ExternalDatabase,
ExternalIdentifier,
EnzymeLink,
JobLog,
)
logger = logging.getLogger(__name__)
@ -754,8 +755,8 @@ def package_models(request, package_uuid):
context["unreviewed_objects"] = unreviewed_model_qs
context["model_types"] = {
"ML Relative Reasoning": "ml-relative-reasoning",
"Rule Based Relative Reasoning": "rule-based-relative-reasoning",
"ML Relative Reasoning": "mlrr",
"Rule Based Relative Reasoning": "rbrr",
}
if s.FLAGS.get("ENVIFORMER", False):
@ -775,69 +776,67 @@ def package_models(request, package_uuid):
model_type = request.POST.get("model-type")
# Generic fields for ML and Rule Based
rule_packages = request.POST.getlist("model-rule-packages")
data_packages = request.POST.getlist("model-data-packages")
# Generic params
params = {
"package": current_package,
"name": name,
"description": description,
"data_packages": [
PackageManager.get_package_by_url(current_user, p) for p in data_packages
],
}
if model_type == "enviformer":
threshold = float(request.POST.get(f"{model_type}-threshold", 0.5))
threshold = float(request.POST.get("model-threshold", 0.5))
params["threshold"] = threshold
mod = EnviFormer.create(current_package, name, description, threshold)
mod = EnviFormer.create(**params)
elif model_type == "mlrr":
# ML Specific
threshold = float(request.POST.get("model-threshold", 0.5))
# TODO handle additional fingerprinter
# fingerprinter = request.POST.get("model-fingerprinter")
elif model_type == "ml-relative-reasoning" or model_type == "rule-based-relative-reasoning":
# Generic fields for ML and Rule Based
rule_packages = request.POST.getlist("package-based-relative-reasoning-rule-packages")
data_packages = request.POST.getlist("package-based-relative-reasoning-data-packages")
eval_packages = request.POST.getlist(
"package-based-relative-reasoning-evaluation-packages", []
)
params["rule_packages"] = [
PackageManager.get_package_by_url(current_user, p) for p in rule_packages
]
# Generic params
params = {
"package": current_package,
"name": name,
"description": description,
"rule_packages": [
PackageManager.get_package_by_url(current_user, p) for p in rule_packages
],
"data_packages": [
PackageManager.get_package_by_url(current_user, p) for p in data_packages
],
"eval_packages": [
PackageManager.get_package_by_url(current_user, p) for p in eval_packages
],
}
# App Domain related parameters
build_ad = request.POST.get("build-app-domain", False) == "on"
num_neighbors = request.POST.get("num-neighbors", 5)
reliability_threshold = request.POST.get("reliability-threshold", 0.5)
local_compatibility_threshold = request.POST.get("local-compatibility-threshold", 0.5)
if model_type == "ml-relative-reasoning":
# ML Specific
threshold = float(request.POST.get(f"{model_type}-threshold", 0.5))
# TODO handle additional fingerprinter
# fingerprinter = request.POST.get(f"{model_type}-fingerprinter")
params["threshold"] = threshold
# params['fingerprinter'] = fingerprinter
params["build_app_domain"] = build_ad
params["app_domain_num_neighbours"] = num_neighbors
params["app_domain_reliability_threshold"] = reliability_threshold
params["app_domain_local_compatibility_threshold"] = local_compatibility_threshold
# App Domain related parameters
build_ad = request.POST.get("build-app-domain", False) == "on"
num_neighbors = request.POST.get("num-neighbors", 5)
reliability_threshold = request.POST.get("reliability-threshold", 0.5)
local_compatibility_threshold = request.POST.get(
"local-compatibility-threshold", 0.5
)
mod = MLRelativeReasoning.create(**params)
elif model_type == "rbrr":
params["rule_packages"] = [
PackageManager.get_package_by_url(current_user, p) for p in rule_packages
]
params["threshold"] = threshold
# params['fingerprinter'] = fingerprinter
params["build_app_domain"] = build_ad
params["app_domain_num_neighbours"] = num_neighbors
params["app_domain_reliability_threshold"] = reliability_threshold
params["app_domain_local_compatibility_threshold"] = local_compatibility_threshold
mod = MLRelativeReasoning.create(**params)
else:
mod = RuleBasedRelativeReasoning.create(**params)
from .tasks import build_model
build_model.delay(mod.pk)
mod = RuleBasedRelativeReasoning.create(**params)
elif s.FLAGS.get("PLUGINS", False) and model_type in s.CLASSIFIER_PLUGINS.values():
pass
else:
return error(
request, "Invalid model type.", f'Model type "{model_type}" is not supported."'
)
return redirect(mod.url)
from .tasks import dispatch, build_model
dispatch(current_user, build_model, mod.pk)
return redirect(mod.url)
else:
return HttpResponseNotAllowed(["GET", "POST"])
@ -865,6 +864,10 @@ def package_model(request, package_uuid, model_uuid):
return JsonResponse({"error": f'"{smiles}" is not a valid SMILES'}, status=400)
if classify:
from epdb.tasks import dispatch_eager, predict_simple
res = dispatch_eager(current_user, predict_simple, current_model.pk, stand_smiles)
pred_res = current_model.predict(stand_smiles)
res = []
@ -909,9 +912,25 @@ def package_model(request, package_uuid, model_uuid):
current_model.delete()
return redirect(current_package.url + "/model")
elif hidden == "evaluate":
from .tasks import evaluate_model
from .tasks import dispatch, evaluate_model
eval_type = request.POST.get("model-evaluation-type")
if eval_type not in ["sg", "mg"]:
return error(
request,
"Invalid evaluation type",
f'Evaluation type "{eval_type}" is not supported. Only "sg" and "mg" are supported.',
)
multigen = eval_type == "mg"
eval_packages = request.POST.getlist("model-evaluation-packages")
eval_package_ids = [
PackageManager.get_package_by_url(current_user, p).id for p in eval_packages
]
dispatch(current_user, evaluate_model, current_model.pk, multigen, eval_package_ids)
evaluate_model.delay(current_model.pk)
return redirect(current_model.url)
else:
return HttpResponseBadRequest()
@ -1809,9 +1828,9 @@ def package_pathways(request, package_uuid):
pw.setting = prediction_setting
pw.save()
from .tasks import predict
from .tasks import dispatch, predict
predict.delay(pw.pk, prediction_setting.pk, limit=limit)
dispatch(current_user, predict, pw.pk, prediction_setting.pk, limit=limit)
return redirect(pw.url)
@ -1930,10 +1949,16 @@ def package_pathway(request, package_uuid, pathway_uuid):
if node_url:
n = current_pathway.get_node(node_url)
from .tasks import predict
from .tasks import dispatch, predict
dispatch(
current_user,
predict,
current_pathway.pk,
current_pathway.prediction_setting.pk,
node_pk=n.pk,
)
# Dont delay?
predict(current_pathway.pk, current_pathway.setting.pk, node_pk=n.pk)
return JsonResponse({"success": current_pathway.url})
return HttpResponseBadRequest()
@ -2705,6 +2730,24 @@ def setting(request, setting_uuid):
pass
def jobs(request):
current_user = _anonymous_or_real(request)
context = get_base_context(request)
if request.method == "GET":
context["object_type"] = "joblog"
context["breadcrumbs"] = [
{"Home": s.SERVER_URL},
{"Jobs": s.SERVER_URL + "/jobs"},
]
if current_user.is_superuser:
context["jobs"] = JobLog.objects.all().order_by("-created")
else:
context["jobs"] = JobLog.objects.filter(user=current_user).order_by("-created")
return render(request, "collections/joblog.html", context)
###########
# KETCHER #
###########