test fixes

Merge remote-tracking branch 'origin/develop' into enhancement/dataset
2025-11-07 08:46:28 +13:00 · 2025-11-07 08:32:05 +13:00 · 2025-11-07 08:28:03 +13:00 · 2025-11-07 08:09:06 +13:00 · 2025-11-06 10:42:32 +13:00 · 2025-11-06 10:32:21 +13:00
33 changed files with 2006 additions and 785 deletions
--- a/.env.prod.example
+++ b/.env.prod.example
@ -16,3 +16,5 @@ POSTGRES_PORT=
 # MAIL
 EMAIL_HOST_USER=
 EMAIL_HOST_PASSWORD=
+# MATOMO
+MATOMO_SITE_ID
--- a/envipath/settings.py
+++ b/envipath/settings.py
@ -357,3 +357,6 @@ if MS_ENTRA_ENABLED:
    MS_ENTRA_AUTHORITY = f"https://login.microsoftonline.com/{MS_ENTRA_TENANT_ID}"
    MS_ENTRA_REDIRECT_URI = os.environ["MS_REDIRECT_URI"]
    MS_ENTRA_SCOPES = os.environ.get("MS_SCOPES", "").split(",")
+
+# Site ID 10 -> beta.envipath.org
+MATOMO_SITE_ID = os.environ.get("MATOMO_SITE_ID", "10")
--- a/epdb/admin.py
+++ b/epdb/admin.py
@ -7,6 +7,7 @@ from .models import (
    GroupPackagePermission,
    Package,
    MLRelativeReasoning,
+    EnviFormer,
    Compound,
    CompoundStructure,
    SimpleAmbitRule,
@ -19,11 +20,12 @@ from .models import (
    Setting,
    ExternalDatabase,
    ExternalIdentifier,
+    JobLog,
 )


 class UserAdmin(admin.ModelAdmin):
-    pass
+    list_display = ["username", "email", "is_active"]


 class UserPackagePermissionAdmin(admin.ModelAdmin):
@ -38,8 +40,14 @@ class GroupPackagePermissionAdmin(admin.ModelAdmin):
    pass


+class JobLogAdmin(admin.ModelAdmin):
+    pass
+
+
 class EPAdmin(admin.ModelAdmin):
    search_fields = ["name", "description"]
+    list_display = ["name", "url", "created"]
+    ordering = ["-created"]


 class PackageAdmin(EPAdmin):
@ -50,6 +58,10 @@ class MLRelativeReasoningAdmin(EPAdmin):
    pass


+class EnviFormerAdmin(EPAdmin):
+    pass
+
+
 class CompoundAdmin(EPAdmin):
    pass

@ -102,8 +114,10 @@ admin.site.register(User, UserAdmin)
 admin.site.register(UserPackagePermission, UserPackagePermissionAdmin)
 admin.site.register(Group, GroupAdmin)
 admin.site.register(GroupPackagePermission, GroupPackagePermissionAdmin)
+admin.site.register(JobLog, JobLogAdmin)
 admin.site.register(Package, PackageAdmin)
 admin.site.register(MLRelativeReasoning, MLRelativeReasoningAdmin)
+admin.site.register(EnviFormer, EnviFormerAdmin)
 admin.site.register(Compound, CompoundAdmin)
 admin.site.register(CompoundStructure, CompoundStructureAdmin)
 admin.site.register(SimpleAmbitRule, SimpleAmbitRuleAdmin)
--- a/epdb/logic.py
+++ b/epdb/logic.py
@ -1542,9 +1542,7 @@ class SPathway(object):
                if sub.app_domain_assessment is None:
                    if self.prediction_setting.model:
                        if self.prediction_setting.model.app_domain:
-                            app_domain_assessment = self.prediction_setting.model.app_domain.assess(
-                                sub.smiles
-                            )[0]
+                            app_domain_assessment = self.prediction_setting.model.app_domain.assess(sub.smiles)

                            if self.persist is not None:
                                n = self.snode_persist_lookup[sub]
@ -1576,11 +1574,7 @@ class SPathway(object):
                                    app_domain_assessment = None
                                    if self.prediction_setting.model:
                                        if self.prediction_setting.model.app_domain:
-                                            app_domain_assessment = (
-                                                self.prediction_setting.model.app_domain.assess(c)[
-                                                    0
-                                                ]
-                                            )
+                                            app_domain_assessment = (self.prediction_setting.model.app_domain.assess(c))

                                    self.smiles_to_node[c] = SNode(
                                        c, sub.depth + 1, app_domain_assessment
--- a/epdb/management/commands/create_ml_models.py
+++ b/epdb/management/commands/create_ml_models.py
@ -7,10 +7,11 @@ from epdb.models import MLRelativeReasoning, EnviFormer, Package

 class Command(BaseCommand):
    """This command can be run with
-    `python manage.py create_ml_models [model_names] -d [data_packages] OPTIONAL: -e [eval_packages]`
-    For example, to train both EnviFormer and MLRelativeReasoning on BBD and SOIL and evaluate them on SLUDGE
-    the below command would be used:
-    `python manage.py create_ml_models enviformer mlrr -d bbd soil -e sludge
+    `python manage.py create_ml_models [model_names] -d [data_packages] FOR MLRR ONLY: -r [rule_packages]
+                                        OPTIONAL: -e [eval_packages] -t threshold`
+    For example, to train both EnviFormer and MLRelativeReasoning on BBD and SOIL and evaluate them on SLUDGE with a
+    threshold of 0.6, the below command would be used:
+    `python manage.py create_ml_models enviformer mlrr -d bbd soil -e sludge -t 0.6
    """

    def add_arguments(self, parser):
@ -34,6 +35,13 @@ class Command(BaseCommand):
            help="Rule Packages mandatory for MLRR",
            default=[],
        )
+        parser.add_argument(
+            "-t",
+            "--threshold",
+            type=float,
+            help="Model prediction threshold",
+            default=0.5,
+        )

    @transaction.atomic
    def handle(self, *args, **options):
@ -67,7 +75,11 @@ class Command(BaseCommand):
            return packages

        # Iteratively create models in options["model_names"]
-        print(f"Creating models: {options['model_names']}")
+        print(f"Creating models: {options['model_names']}\n"
+              f"Data packages: {options['data_packages']}\n"
+              f"Rule Packages (only for MLRR): {options['rule_packages']}\n"
+              f"Eval Packages: {options['eval_packages']}\n"
+              f"Threshold: {options['threshold']:.2f}")
        data_packages = decode_packages(options["data_packages"])
        eval_packages = decode_packages(options["eval_packages"])
        rule_packages = decode_packages(options["rule_packages"])
@ -78,9 +90,10 @@ class Command(BaseCommand):
                    pack,
                    data_packages=data_packages,
                    eval_packages=eval_packages,
-                    threshold=0.5,
-                    name="EnviFormer - T0.5",
-                    description="EnviFormer transformer",
+                    threshold=options['threshold'],
+                    name=f"EnviFormer - {', '.join(options['data_packages'])} - T{options['threshold']:.2f}",
+                    description=f"EnviFormer transformer trained on {options['data_packages']} "
+                                f"evaluated on {options['eval_packages']}.",
                )
            elif model_name == "mlrr":
                model = MLRelativeReasoning.create(
@ -88,9 +101,10 @@ class Command(BaseCommand):
                    rule_packages=rule_packages,
                    data_packages=data_packages,
                    eval_packages=eval_packages,
-                    threshold=0.5,
-                    name="ECC - BBD - T0.5",
-                    description="ML Relative Reasoning",
+                    threshold=options['threshold'],
+                    name=f"ECC - {', '.join(options['data_packages'])} - T{options['threshold']:.2f}",
+                    description=f"ML Relative Reasoning trained on {options['data_packages']} with rules from "
+                                f"{options['rule_packages']} and evaluated on {options['eval_packages']}.",
                )
            else:
                raise ValueError(f"Cannot create model of type {model_name}, unknown model type")
@ -100,6 +114,6 @@ class Command(BaseCommand):
            print(f"Training {model_name}")
            model.build_model()
            print(f"Evaluating {model_name}")
-            model.evaluate_model()
+            model.evaluate_model(False, eval_packages=eval_packages)
            print(f"Saving {model_name}")
            model.save()
--- a/epdb/management/commands/dump_enviformer.py
+++ b/epdb/management/commands/dump_enviformer.py
@ -0,0 +1,59 @@
+import json
+import os
+import tarfile
+from tempfile import TemporaryDirectory
+
+from django.conf import settings as s
+from django.core.management.base import BaseCommand
+from django.db import transaction
+
+from epdb.models import EnviFormer
+
+
+class Command(BaseCommand):
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "model",
+            type=str,
+            help="Model UUID of the Model to Dump",
+        )
+        parser.add_argument("--output", type=str)
+
+    def package_dict_and_folder(self, dict_data, folder_path, output_path):
+        with TemporaryDirectory() as tmpdir:
+            dict_filename = os.path.join(tmpdir, "data.json")
+
+            with open(dict_filename, "w", encoding="utf-8") as f:
+                json.dump(dict_data, f, indent=2)
+
+            with tarfile.open(output_path, "w:gz") as tar:
+                tar.add(dict_filename, arcname="data.json")
+                tar.add(folder_path, arcname=os.path.basename(folder_path))
+
+            os.remove(dict_filename)
+
+    @transaction.atomic
+    def handle(self, *args, **options):
+        output = options["output"]
+
+        if os.path.exists(output):
+            raise ValueError(f"Output file {output} already exists")
+
+        model = EnviFormer.objects.get(uuid=options["model"])
+
+        data = {
+            "uuid": str(model.uuid),
+            "name": model.name,
+            "description": model.description,
+            "kv": model.kv,
+            "data_packages_uuids": [str(p.uuid) for p in model.data_packages.all()],
+            "eval_packages_uuids": [str(p.uuid) for p in model.data_packages.all()],
+            "threshold": model.threshold,
+            "eval_results": model.eval_results,
+            "multigen_eval": model.multigen_eval,
+            "model_status": model.model_status,
+        }
+
+        model_folder = os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid))
+
+        self.package_dict_and_folder(data, model_folder, output)
--- a/epdb/management/commands/load_enviformer.py
+++ b/epdb/management/commands/load_enviformer.py
@ -0,0 +1,81 @@
+import json
+import os
+import shutil
+import tarfile
+from tempfile import TemporaryDirectory
+
+from django.conf import settings as s
+from django.core.management.base import BaseCommand
+from django.db import transaction
+
+from epdb.models import EnviFormer, Package
+
+
+class Command(BaseCommand):
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "input",
+            type=str,
+            help=".tar.gz file containing the Model dump.",
+        )
+        parser.add_argument(
+            "package",
+            type=str,
+            help="Package UUID where the Model should be loaded to.",
+        )
+
+    def read_dict_and_folder_from_archive(self, archive_path, extract_to="extracted_folder"):
+        with tarfile.open(archive_path, "r:gz") as tar:
+            tar.extractall(extract_to)
+
+            dict_path = os.path.join(extract_to, "data.json")
+
+            if not os.path.exists(dict_path):
+                raise FileNotFoundError("data.json not found in the archive.")
+
+            with open(dict_path, "r", encoding="utf-8") as f:
+                data_dict = json.load(f)
+
+            extracted_items = os.listdir(extract_to)
+            folders = [item for item in extracted_items if item != "data.json"]
+            folder_path = os.path.join(extract_to, folders[0]) if folders else None
+
+        return data_dict, folder_path
+
+    @transaction.atomic
+    def handle(self, *args, **options):
+        if not os.path.exists(options["input"]):
+            raise ValueError(f"Input file {options['input']} does not exist.")
+
+        target_package = Package.objects.get(uuid=options["package"])
+
+        with TemporaryDirectory() as tmpdir:
+            data, folder = self.read_dict_and_folder_from_archive(options["input"], tmpdir)
+
+            model = EnviFormer()
+            model.package = target_package
+            # model.uuid = data["uuid"]
+            model.name = data["name"]
+            model.description = data["description"]
+            model.kv = data["kv"]
+            model.threshold = float(data["threshold"])
+            model.eval_results = data["eval_results"]
+            model.multigen_eval = data["multigen_eval"]
+            model.model_status = data["model_status"]
+            model.save()
+
+            for p_uuid in data["data_packages_uuids"]:
+                p = Package.objects.get(uuid=p_uuid)
+                model.data_packages.add(p)
+
+            for p_uuid in data["eval_packages_uuids"]:
+                p = Package.objects.get(uuid=p_uuid)
+                model.eval_packages.add(p)
+
+            target_folder = os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid))
+
+            shutil.copytree(folder, target_folder)
+            os.rename(
+                os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid), f"{data['uuid']}.ckpt"),
+                os.path.join(s.MODEL_DIR, "enviformer", str(model.uuid), f"{model.uuid}.ckpt"),
+            )
--- a/epdb/management/commands/update_job_logs.py
+++ b/epdb/management/commands/update_job_logs.py
@ -0,0 +1,38 @@
+from datetime import date, timedelta
+
+from django.core.management.base import BaseCommand
+from django.db import transaction
+
+from epdb.models import JobLog
+
+
+class Command(BaseCommand):
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--cleanup",
+            type=int,
+            default=None,
+            help="Remove all logs older than this number of days. Default is None, which does not remove any logs.",
+        )
+
+    @transaction.atomic
+    def handle(self, *args, **options):
+        if options["cleanup"] is not None:
+            cleanup_dt = date.today() - timedelta(days=options["cleanup"])
+            print(JobLog.objects.filter(created__lt=cleanup_dt).delete())
+
+        logs = JobLog.objects.filter(status="INITIAL")
+        print(f"Found {logs.count()} logs to update")
+        updated = 0
+        for log in logs:
+            res = log.check_for_update()
+            if res:
+                updated += 1
+
+        print(f"Updated {updated} logs")
+
+        from django.db.models import Count
+
+        qs = JobLog.objects.values("status").annotate(total=Count("status"))
+        for r in qs:
+            print(r["status"], r["total"])
--- a/epdb/migrations/0009_joblog.py
+++ b/epdb/migrations/0009_joblog.py
@ -0,0 +1,66 @@
+# Generated by Django 5.2.7 on 2025-10-27 09:39
+
+import django.db.models.deletion
+import django.utils.timezone
+import model_utils.fields
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("epdb", "0008_enzymelink"),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="JobLog",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+                    ),
+                ),
+                (
+                    "created",
+                    model_utils.fields.AutoCreatedField(
+                        default=django.utils.timezone.now, editable=False, verbose_name="created"
+                    ),
+                ),
+                (
+                    "modified",
+                    model_utils.fields.AutoLastModifiedField(
+                        default=django.utils.timezone.now, editable=False, verbose_name="modified"
+                    ),
+                ),
+                ("task_id", models.UUIDField(unique=True)),
+                ("job_name", models.TextField()),
+                (
+                    "status",
+                    models.CharField(
+                        choices=[
+                            ("INITIAL", "Initial"),
+                            ("SUCCESS", "Success"),
+                            ("FAILURE", "Failure"),
+                            ("REVOKED", "Revoked"),
+                            ("IGNORED", "Ignored"),
+                        ],
+                        default="INITIAL",
+                        max_length=20,
+                    ),
+                ),
+                ("done_at", models.DateTimeField(blank=True, default=None, null=True)),
+                ("task_result", models.TextField(blank=True, default=None, null=True)),
+                (
+                    "user",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL
+                    ),
+                ),
+            ],
+            options={
+                "abstract": False,
+            },
+        ),
+    ]
--- a/epdb/models.py
+++ b/epdb/models.py
@ -28,7 +28,8 @@ from sklearn.metrics import precision_score, recall_score, jaccard_score
 from sklearn.model_selection import ShuffleSplit

 from utilities.chem import FormatConverter, ProductSet, PredictionResult, IndigoUtils
-from utilities.ml import Dataset, ApplicabilityDomainPCA, EnsembleClassifierChain, RelativeReasoning
+from utilities.ml import RuleBasedDataset, ApplicabilityDomainPCA, EnsembleClassifierChain, RelativeReasoning, \
+    EnviFormerDataset, Dataset

 logger = logging.getLogger(__name__)

@ -310,7 +311,7 @@ class ExternalDatabase(TimeStampedModel):
                },
                {
                    "database": ExternalDatabase.objects.get(name="ChEBI"),
-                    "placeholder": "ChEBI ID without prefix e.g. 12345",
+                    "placeholder": "ChEBI ID without prefix e.g. 10576",
                },
            ],
            "structure": [
@ -328,7 +329,7 @@ class ExternalDatabase(TimeStampedModel):
                },
                {
                    "database": ExternalDatabase.objects.get(name="ChEBI"),
-                    "placeholder": "ChEBI ID without prefix e.g. 12345",
+                    "placeholder": "ChEBI ID without prefix e.g. 10576",
                },
            ],
            "reaction": [
@ -342,7 +343,7 @@ class ExternalDatabase(TimeStampedModel):
                },
                {
                    "database": ExternalDatabase.objects.get(name="UniProt"),
-                    "placeholder": "Query ID for UniPro e.g. rhea:12345",
+                    "placeholder": "Query ID for UniProt e.g. rhea:12345",
                },
            ],
        }
@ -477,7 +478,7 @@ class ChemicalIdentifierMixin(ExternalIdentifierMixin):
        return self.add_external_identifier("CAS", cas_number)

    def get_pubchem_identifiers(self):
-        return self.get_external_identifier("PubChem Compound") or self.get_external_identifier(
+        return self.get_external_identifier("PubChem Compound") | self.get_external_identifier(
            "PubChem Substance"
        )

@ -2175,7 +2176,7 @@ class PackageBasedModel(EPModel):

        applicable_rules = self.applicable_rules
        reactions = list(self._get_reactions())
-        ds = Dataset.generate_dataset(reactions, applicable_rules, educts_only=True)
+        ds = RuleBasedDataset.generate_dataset(reactions, applicable_rules, educts_only=True)

        end = datetime.now()
        logger.debug(f"build_dataset took {(end - start).total_seconds()} seconds")
@ -2184,7 +2185,7 @@ class PackageBasedModel(EPModel):
        ds.save(f)
        return ds

-    def load_dataset(self) -> "Dataset":
+    def load_dataset(self) -> "Dataset | RuleBasedDataset | EnviFormerDataset":
        ds_path = os.path.join(s.MODEL_DIR, f"{self.uuid}_ds.pkl")
        return Dataset.load(ds_path)

@ -2225,10 +2226,18 @@ class PackageBasedModel(EPModel):
        self.model_status = self.BUILT_NOT_EVALUATED
        self.save()

-    def evaluate_model(self):
+    def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None, **kwargs):
        if self.model_status != self.BUILT_NOT_EVALUATED:
            raise ValueError(f"Can't evaluate a model in state {self.model_status}!")

+        if multigen:
+            self.multigen_eval = multigen
+            self.save()
+
+        if eval_packages is not None:
+            for p in eval_packages:
+                self.eval_packages.add(p)
+
        self.model_status = self.EVALUATING
        self.save()

@ -2335,37 +2344,37 @@ class PackageBasedModel(EPModel):
            eval_reactions = list(
                Reaction.objects.filter(package__in=self.eval_packages.all()).distinct()
            )
-            ds = Dataset.generate_dataset(eval_reactions, self.applicable_rules, educts_only=True)
+            ds = RuleBasedDataset.generate_dataset(eval_reactions, self.applicable_rules, educts_only=True)
            if isinstance(self, RuleBasedRelativeReasoning):
-                X = np.array(ds.X(exclude_id_col=False, na_replacement=None))
-                y = np.array(ds.y(na_replacement=np.nan))
+                X = ds.X(exclude_id_col=False, na_replacement=None).to_numpy()
+                y = ds.y(na_replacement=np.nan).to_numpy()
            else:
-                X = np.array(ds.X(na_replacement=np.nan))
-                y = np.array(ds.y(na_replacement=np.nan))
+                X = ds.X(na_replacement=np.nan).to_numpy()
+                y = ds.y(na_replacement=np.nan).to_numpy()
            single_gen_result = evaluate_sg(self.model, X, y, np.arange(len(X)), self.threshold)
            self.eval_results = self.compute_averages([single_gen_result])
        else:
            ds = self.load_dataset()

            if isinstance(self, RuleBasedRelativeReasoning):
-                X = np.array(ds.X(exclude_id_col=False, na_replacement=None))
-                y = np.array(ds.y(na_replacement=np.nan))
+                X = ds.X(exclude_id_col=False, na_replacement=None).to_numpy()
+                y = ds.y(na_replacement=np.nan).to_numpy()
            else:
-                X = np.array(ds.X(na_replacement=np.nan))
-                y = np.array(ds.y(na_replacement=np.nan))
+                X = ds.X(na_replacement=np.nan).to_numpy()
+                y = ds.y(na_replacement=np.nan).to_numpy()

-            n_splits = 20
+            n_splits = kwargs.get("n_splits", 20)

            shuff = ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42)
            splits = list(shuff.split(X))

            from joblib import Parallel, delayed

-            models = Parallel(n_jobs=10)(
+            models = Parallel(n_jobs=min(10, len(splits)))(
                delayed(train_func)(X, y, train_index, self._model_args())
                for train_index, _ in splits
            )
-            evaluations = Parallel(n_jobs=10)(
+            evaluations = Parallel(n_jobs=min(10, len(splits)))(
                delayed(evaluate_sg)(model, X, y, test_index, self.threshold)
                for model, (_, test_index) in zip(models, splits)
            )
@ -2525,7 +2534,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
        package: "Package",
        rule_packages: List["Package"],
        data_packages: List["Package"],
-        eval_packages: List["Package"],
        threshold: float = 0.5,
        min_count: int = 10,
        max_count: int = 0,
@ -2574,19 +2582,15 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
            for p in rule_packages:
                rbrr.data_packages.add(p)

-        if eval_packages:
-            for p in eval_packages:
-                rbrr.eval_packages.add(p)
-
        rbrr.save()

        return rbrr

-    def _fit_model(self, ds: Dataset):
+    def _fit_model(self, ds: RuleBasedDataset):
        X, y = ds.X(exclude_id_col=False, na_replacement=None), ds.y(na_replacement=None)
        model = RelativeReasoning(
            start_index=ds.triggered()[0],
-            end_index=ds.triggered()[1],
+            end_index=ds.triggered()[-1],
        )
        model.fit(X, y)
        return model
@ -2596,7 +2600,7 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
        return {
            "clz": "RuleBaseRelativeReasoning",
            "start_index": ds.triggered()[0],
-            "end_index": ds.triggered()[1],
+            "end_index": ds.triggered()[-1],
        }

    def _save_model(self, model):
@ -2632,7 +2636,6 @@ class MLRelativeReasoning(PackageBasedModel):
        package: "Package",
        rule_packages: List["Package"],
        data_packages: List["Package"],
-        eval_packages: List["Package"],
        threshold: float = 0.5,
        name: "str" = None,
        description: str = None,
@ -2672,10 +2675,6 @@ class MLRelativeReasoning(PackageBasedModel):
            for p in rule_packages:
                mlrr.data_packages.add(p)

-        if eval_packages:
-            for p in eval_packages:
-                mlrr.eval_packages.add(p)
-
        if build_app_domain:
            ad = ApplicabilityDomain.create(
                mlrr,
@ -2689,11 +2688,11 @@ class MLRelativeReasoning(PackageBasedModel):

        return mlrr

-    def _fit_model(self, ds: Dataset):
+    def _fit_model(self, ds: RuleBasedDataset):
        X, y = ds.X(na_replacement=np.nan), ds.y(na_replacement=np.nan)

        model = EnsembleClassifierChain(**s.DEFAULT_MODEL_PARAMS)
-        model.fit(X, y)
+        model.fit(X.to_numpy(), y.to_numpy())
        return model

    def _model_args(self):
@ -2716,7 +2715,7 @@ class MLRelativeReasoning(PackageBasedModel):
        start = datetime.now()
        ds = self.load_dataset()
        classify_ds, classify_prods = ds.classification_dataset([smiles], self.applicable_rules)
-        pred = self.model.predict_proba(classify_ds.X())
+        pred = self.model.predict_proba(classify_ds.X().to_numpy())

        res = MLRelativeReasoning.combine_products_and_probs(
            self.applicable_rules, pred[0], classify_prods[0]
@ -2761,7 +2760,9 @@ class ApplicabilityDomain(EnviPathModel):

    @cached_property
    def training_set_probs(self):
-        return joblib.load(os.path.join(s.MODEL_DIR, f"{self.model.uuid}_train_probs.pkl"))
+        ds = self.model.load_dataset()
+        col_ids = ds.block_indices("prob")
+        return ds[:, col_ids]

    def build(self):
        ds = self.model.load_dataset()
@ -2769,9 +2770,9 @@ class ApplicabilityDomain(EnviPathModel):
        start = datetime.now()

        # Get Trainingset probs and dump them as they're required when using the app domain
-        probs = self.model.model.predict_proba(ds.X())
-        f = os.path.join(s.MODEL_DIR, f"{self.model.uuid}_train_probs.pkl")
-        joblib.dump(probs, f)
+        probs = self.model.model.predict_proba(ds.X().to_numpy())
+        ds.add_probs(probs)
+        ds.save(os.path.join(s.MODEL_DIR, f"{self.model.uuid}_ds.pkl"))

        ad = ApplicabilityDomainPCA(num_neighbours=self.num_neighbours)
        ad.build(ds)
@ -2794,16 +2795,19 @@ class ApplicabilityDomain(EnviPathModel):
        joblib.dump(ad, f)

    def assess(self, structure: Union[str, "CompoundStructure"]):
+        return self.assess_batch([structure])[0]
+
+    def assess_batch(self, structures: List["CompoundStructure | str"]):
        ds = self.model.load_dataset()

-        if isinstance(structure, CompoundStructure):
-            smiles = structure.smiles
-        else:
-            smiles = structure
+        smiles = []
+        for struct in structures:
+            if isinstance(struct, CompoundStructure):
+                smiles.append(structures.smiles)
+            else:
+                smiles.append(structures)

-        assessment_ds, assessment_prods = ds.classification_dataset(
-            [structure], self.model.applicable_rules
-        )
+        assessment_ds, assessment_prods = ds.classification_dataset(structures, self.model.applicable_rules)

        # qualified_neighbours_per_rule is a nested dictionary structured as:
        #   {
@ -2816,82 +2820,46 @@ class ApplicabilityDomain(EnviPathModel):
        # it identifies all training structures that have the same trigger reaction activated (i.e., value 1).
        # This is used to find "qualified neighbours" — training examples that share the same triggered feature
        # with a given assessment structure under a particular rule.
-        qualified_neighbours_per_rule: Dict[int, Dict[int, List[int]]] = defaultdict(
-            lambda: defaultdict(list)
-        )
+        qualified_neighbours_per_rule: Dict = {}

-        for rule_idx, feature_index in enumerate(range(*assessment_ds.triggered())):
-            feature = ds.columns[feature_index]
-            if feature.startswith("trig_"):
-                # TODO unroll loop
-                for i, cx in enumerate(assessment_ds.X(exclude_id_col=False)):
-                    if int(cx[feature_index]) == 1:
-                        for j, tx in enumerate(ds.X(exclude_id_col=False)):
-                            if int(tx[feature_index]) == 1:
-                                qualified_neighbours_per_rule[i][rule_idx].append(j)
-
-        probs = self.training_set_probs
-        # preds = self.model.model.predict_proba(assessment_ds.X())
+        import polars as pl
+        # Select only the triggered columns
+        for i, row in enumerate(assessment_ds[:, assessment_ds.triggered()].iter_rows(named=True)):
+            # Find the rules the structure triggers. For each rule, filter the training dataset to rows that also
+            # trigger that rule.
+            train_trig = {trig_uuid.split("_")[-1]: ds.filter(pl.col(trig_uuid).eq(1))
+                          for trig_uuid, value in row.items() if value == 1}
+            qualified_neighbours_per_rule[i] = train_trig
+        rule_to_i = {str(r.uuid): i for i, r in enumerate(self.model.applicable_rules)}
        preds = self.model.combine_products_and_probs(
            self.model.applicable_rules,
-            self.model.model.predict_proba(assessment_ds.X())[0],
+            self.model.model.predict_proba(assessment_ds.X().to_numpy())[0],
            assessment_prods[0],
        )

        assessments = list()
-
        # loop through our assessment dataset
-        for i, instance in enumerate(assessment_ds):
+        for i, instance in enumerate(assessment_ds[:, assessment_ds.struct_features()]):
            rule_reliabilities = dict()
            local_compatibilities = dict()
            neighbours_per_rule = dict()
            neighbor_probs_per_rule = dict()

            # loop through rule indices together with the collected neighbours indices from train dataset
-            for rule_idx, vals in qualified_neighbours_per_rule[i].items():
-                # collect the train dataset instances and store it along with the index (a.k.a. row number) of the
-                # train dataset
-                train_instances = []
-                for v in vals:
-                    train_instances.append((v, ds.at(v)))
-
-                # sf is a tuple with start/end index of the features
-                sf = ds.struct_features()
-
-                # compute tanimoto distance for all neighbours
-                # result ist a list of tuples with train index and computed distance
-                dists = self._compute_distances(
-                    instance.X()[0][sf[0] : sf[1]],
-                    [ti[1].X()[0][sf[0] : sf[1]] for ti in train_instances],
-                )
-
-                dists_with_index = list()
-                for ti, dist in zip(train_instances, dists):
-                    dists_with_index.append((ti[0], dist[1]))
+            for rule_uuid, train_instances in qualified_neighbours_per_rule[i].items():
+                # compute tanimoto distance for all neighbours and add to dataset
+                dists = self._compute_distances(assessment_ds[i, assessment_ds.struct_features()].to_numpy()[0],
+                                                train_instances[:, train_instances.struct_features()].to_numpy())
+                train_instances = train_instances.with_columns(dist=pl.Series(dists))

                #  sort them in a descending way and take at most `self.num_neighbours`
-                dists_with_index = sorted(dists_with_index, key=lambda x: x[1], reverse=True)
-                dists_with_index = dists_with_index[: self.num_neighbours]
-
+                train_instances = train_instances.sort("dist", descending=True)[:self.num_neighbours]
                # compute average distance
-                rule_reliabilities[rule_idx] = (
-                    sum([d[1] for d in dists_with_index]) / len(dists_with_index)
-                    if len(dists_with_index) > 0
-                    else 0.0
-                )
-
+                rule_reliabilities[rule_uuid] = train_instances.select(pl.mean("dist")).fill_nan(0.0).item()
                # for local_compatibility we'll need the datasets for the indices having the highest similarity
-                neighbour_datasets = [(d[0], ds.at(d[0])) for d in dists_with_index]
-                local_compatibilities[rule_idx] = self._compute_compatibility(
-                    rule_idx, probs, neighbour_datasets
-                )
-                neighbours_per_rule[rule_idx] = [
-                    CompoundStructure.objects.get(uuid=ds[1].structure_id())
-                    for ds in neighbour_datasets
-                ]
-                neighbor_probs_per_rule[rule_idx] = [
-                    probs[d[0]][rule_idx] for d in dists_with_index
-                ]
+                local_compatibilities[rule_uuid] = self._compute_compatibility(rule_uuid, train_instances)
+                neighbours_per_rule[rule_uuid] = list(CompoundStructure.objects.filter(uuid__in=train_instances["structure_id"]))
+                neighbor_probs_per_rule[rule_uuid] = train_instances[f"prob_{rule_uuid}"].to_list()

            ad_res = {
                "ad_params": {
@ -2902,23 +2870,21 @@ class ApplicabilityDomain(EnviPathModel):
                    "local_compatibility_threshold": self.local_compatibilty_threshold,
                },
                "assessment": {
-                    "smiles": smiles,
-                    "inside_app_domain": self.pca.is_applicable(instance)[0],
+                    "smiles": smiles[i],
+                    "inside_app_domain": self.pca.is_applicable(assessment_ds[i])[0],
                },
            }

            transformations = list()
-            for rule_idx in rule_reliabilities.keys():
-                rule = Rule.objects.get(
-                    uuid=instance.columns[instance.observed()[0] + rule_idx].replace("obs_", "")
-                )
+            for rule_uuid in rule_reliabilities.keys():
+                rule = Rule.objects.get(uuid=rule_uuid)

                rule_data = rule.simple_json()
                rule_data["image"] = f"{rule.url}?image=svg"

                neighbors = []
                for n, n_prob in zip(
-                    neighbours_per_rule[rule_idx], neighbor_probs_per_rule[rule_idx]
+                    neighbours_per_rule[rule_uuid], neighbor_probs_per_rule[rule_uuid]
                ):
                    neighbor = n.simple_json()
                    neighbor["image"] = f"{n.url}?image=svg"
@ -2935,14 +2901,14 @@ class ApplicabilityDomain(EnviPathModel):

                transformation = {
                    "rule": rule_data,
-                    "reliability": rule_reliabilities[rule_idx],
+                    "reliability": rule_reliabilities[rule_uuid],
                    # We're setting it here to False, as we don't know whether "assess" is called during pathway
                    # prediction or from Model Page. For persisted Nodes this field will be overwritten at runtime
                    "is_predicted": False,
-                    "local_compatibility": local_compatibilities[rule_idx],
-                    "probability": preds[rule_idx].probability,
+                    "local_compatibility": local_compatibilities[rule_uuid],
+                    "probability": preds[rule_to_i[rule_uuid]].probability,
                    "transformation_products": [
-                        x.product_set for x in preds[rule_idx].product_sets
+                        x.product_set for x in preds[rule_to_i[rule_uuid]].product_sets
                    ],
                    "times_triggered": ds.times_triggered(str(rule.uuid)),
                    "neighbors": neighbors,
@ -2960,32 +2926,21 @@ class ApplicabilityDomain(EnviPathModel):
    def _compute_distances(classify_instance: List[int], train_instances: List[List[int]]):
        from utilities.ml import tanimoto_distance

-        distances = [
-            (i, tanimoto_distance(classify_instance, train))
-            for i, train in enumerate(train_instances)
-        ]
+        distances = [tanimoto_distance(classify_instance, train) for train in train_instances]
        return distances

-    @staticmethod
-    def _compute_compatibility(rule_idx: int, preds, neighbours: List[Tuple[int, "Dataset"]]):
-        tp, tn, fp, fn = 0.0, 0.0, 0.0, 0.0
+    def _compute_compatibility(self, rule_idx: int, neighbours: "RuleBasedDataset"):
        accuracy = 0.0
-
-        for n in neighbours:
-            obs = n[1].y()[0][rule_idx]
-            pred = preds[n[0]][rule_idx]
-            if obs and pred:
-                tp += 1
-            elif not obs and pred:
-                fp += 1
-            elif obs and not pred:
-                fn += 1
-            else:
-                tn += 1
-            # Jaccard Index
-            if tp + tn > 0.0:
-                accuracy = (tp + tn) / (tp + tn + fp + fn)
-
+        import polars as pl
+        obs_pred = neighbours.select(obs=pl.col(f"obs_{rule_idx}").cast(pl.Boolean),
+                                     pred=pl.col(f"prob_{rule_idx}") >= self.model.threshold)
+        # Compute tp, tn, fp, fn using polars expressions
+        tp = obs_pred.filter((pl.col("obs")) & (pl.col("pred"))).height
+        tn = obs_pred.filter((~pl.col("obs")) & (~pl.col("pred"))).height
+        fp = obs_pred.filter((~pl.col("obs")) & (pl.col("pred"))).height
+        fn = obs_pred.filter((pl.col("obs")) & (~pl.col("pred"))).height
+        if tp + tn > 0.0:
+            accuracy = (tp + tn) / (tp + tn + fp + fn)
        return accuracy


@ -2995,7 +2950,6 @@ class EnviFormer(PackageBasedModel):
    def create(
        package: "Package",
        data_packages: List["Package"],
-        eval_packages: List["Package"],
        threshold: float = 0.5,
        name: "str" = None,
        description: str = None,
@ -3028,10 +2982,6 @@ class EnviFormer(PackageBasedModel):
        for p in data_packages:
            mod.data_packages.add(p)

-        if eval_packages:
-            for p in eval_packages:
-                mod.eval_packages.add(p)
-
        # if build_app_domain:
        #     ad = ApplicabilityDomain.create(mod, app_domain_num_neighbours, app_domain_reliability_threshold,
        #                                     app_domain_local_compatibility_threshold)
@ -3045,7 +2995,8 @@ class EnviFormer(PackageBasedModel):
        from enviformer import load

        ckpt = os.path.join(s.MODEL_DIR, "enviformer", str(self.uuid), f"{self.uuid}.ckpt")
-        return load(device=s.ENVIFORMER_DEVICE, ckpt_path=ckpt)
+        mod = load(device=s.ENVIFORMER_DEVICE, ckpt_path=ckpt)
+        return mod

    def predict(self, smiles) -> List["PredictionResult"]:
        return self.predict_batch([smiles])[0]
@ -3059,8 +3010,12 @@ class EnviFormer(PackageBasedModel):
            for smiles in smiles_list
        ]
        logger.info(f"Submitting {canon_smiles} to {self.name}")
+        start = datetime.now()
        products_list = self.model.predict_batch(canon_smiles)
-        logger.info(f"Got results {products_list}")
+        end = datetime.now()
+        logger.info(
+            f"Prediction took {(end - start).total_seconds():.2f} seconds. Got results {products_list}"
+        )

        results = []
        for products in products_list:
@ -3086,42 +3041,24 @@ class EnviFormer(PackageBasedModel):
        self.save()

        start = datetime.now()
-        # Standardise reactions for the training data, EnviFormer ignores stereochemistry currently
-        ds = []
-        for reaction in self._get_reactions():
-            educts = ".".join(
-                [
-                    FormatConverter.standardize(smile.smiles, remove_stereo=True)
-                    for smile in reaction.educts.all()
-                ]
-            )
-            products = ".".join(
-                [
-                    FormatConverter.standardize(smile.smiles, remove_stereo=True)
-                    for smile in reaction.products.all()
-                ]
-            )
-            ds.append(f"{educts}>>{products}")
+        ds = EnviFormerDataset.generate_dataset(self._get_reactions())

        end = datetime.now()
        logger.debug(f"build_dataset took {(end - start).total_seconds()} seconds")
        f = os.path.join(s.MODEL_DIR, f"{self.uuid}_ds.json")
-        with open(f, "w") as d_file:
-            json.dump(ds, d_file)
+        ds.save(f)
        return ds

-    def load_dataset(self) -> "Dataset":
+    def load_dataset(self):
        ds_path = os.path.join(s.MODEL_DIR, f"{self.uuid}_ds.json")
-        with open(ds_path) as d_file:
-            ds = json.load(d_file)
-        return ds
+        return EnviFormerDataset.load(ds_path)

    def _fit_model(self, ds):
        # Call to enviFormer's fine_tune function and return the model
        from enviformer.finetune import fine_tune

        start = datetime.now()
-        model = fine_tune(ds, s.MODEL_DIR, str(self.uuid), device=s.ENVIFORMER_DEVICE)
+        model = fine_tune(ds.X(), ds.y(), s.MODEL_DIR, str(self.uuid), device=s.ENVIFORMER_DEVICE)
        end = datetime.now()
        logger.debug(f"EnviFormer finetuning took {(end - start).total_seconds():.2f} seconds")
        return model
@ -3137,28 +3074,35 @@ class EnviFormer(PackageBasedModel):
        args = {"clz": "EnviFormer"}
        return args

-    def evaluate_model(self):
+    def evaluate_model(self, multigen: bool, eval_packages: List["Package"] = None, **kwargs):
        if self.model_status != self.BUILT_NOT_EVALUATED:
            raise ValueError(f"Can't evaluate a model in state {self.model_status}!")

+        if multigen:
+            self.multigen_eval = multigen
+            self.save()
+
+        if eval_packages is not None:
+            for p in eval_packages:
+                self.eval_packages.add(p)
+
        self.model_status = self.EVALUATING
        self.save()

-        def evaluate_sg(test_reactions, predictions, model_thresh):
+        def evaluate_sg(test_ds, predictions, model_thresh):
            # Group the true products of reactions with the same reactant together
+            assert len(test_ds) == len(predictions)
            true_dict = {}
-            for r in test_reactions:
-                reactant, true_product_set = r.split(">>")
+            for r in test_ds:
+                reactant, true_product_set = r
                true_product_set = {p for p in true_product_set.split(".")}
                true_dict[reactant] = true_dict.setdefault(reactant, []) + [true_product_set]
-            assert len(test_reactions) == len(predictions)
-            assert sum(len(v) for v in true_dict.values()) == len(test_reactions)

            # Group the predicted products of reactions with the same reactant together
            pred_dict = {}
            for k, pred in enumerate(predictions):
                pred_smiles, pred_proba = zip(*pred.items())
-                reactant, true_product = test_reactions[k].split(">>")
+                reactant, true_product = test_ds[k, "educts"], test_ds[k, "products"]
                pred_dict.setdefault(reactant, {"predict": [], "scores": []})
                for smiles, proba in zip(pred_smiles, pred_proba):
                    smiles = set(smiles.split("."))
@ -3193,7 +3137,7 @@ class EnviFormer(PackageBasedModel):
                                break

            # Recall is TP (correct) / TP + FN (len(test_reactions))
-            rec = {f"{k:.2f}": v / len(test_reactions) for k, v in correct.items()}
+            rec = {f"{k:.2f}": v / len(test_ds) for k, v in correct.items()}
            # Precision is TP (correct) / TP + FP (predicted)
            prec = {
                f"{k:.2f}": v / predicted[k] if predicted[k] > 0 else 0 for k, v in correct.items()
@ -3272,47 +3216,32 @@ class EnviFormer(PackageBasedModel):

        # If there are eval packages perform single generation evaluation on them instead of random splits
        if self.eval_packages.count() > 0:
-            ds = []
-            for reaction in Reaction.objects.filter(
-                package__in=self.eval_packages.all()
-            ).distinct():
-                educts = ".".join(
-                    [
-                        FormatConverter.standardize(smile.smiles, remove_stereo=True)
-                        for smile in reaction.educts.all()
-                    ]
-                )
-                products = ".".join(
-                    [
-                        FormatConverter.standardize(smile.smiles, remove_stereo=True)
-                        for smile in reaction.products.all()
-                    ]
-                )
-                ds.append(f"{educts}>>{products}")
-            test_result = self.model.predict_batch([smirk.split(">>")[0] for smirk in ds])
+            ds = EnviFormerDataset.generate_dataset(Reaction.objects.filter(
+                package__in=self.eval_packages.all()).distinct())
+            test_result = self.model.predict_batch(ds.X())
            single_gen_result = evaluate_sg(ds, test_result, self.threshold)
            self.eval_results = self.compute_averages([single_gen_result])
        else:
            from enviformer.finetune import fine_tune

            ds = self.load_dataset()
-            n_splits = 20
-            shuff = ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42)
+            n_splits = kwargs.get("n_splits", 20)
+            shuff = ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42)

            # Single gen eval is done in one loop of train then evaluate rather than storing all n_splits trained models
            # this helps reduce the memory footprint.
            single_gen_results = []
            for split_id, (train_index, test_index) in enumerate(shuff.split(ds)):
-                train = [ds[i] for i in train_index]
-                test = [ds[i] for i in test_index]
+                train = ds[train_index]
+                test = ds[test_index]
                start = datetime.now()
-                model = fine_tune(train, s.MODEL_DIR, str(split_id), device=s.ENVIFORMER_DEVICE)
+                model = fine_tune(train.X(), train.y(), s.MODEL_DIR, str(split_id), device=s.ENVIFORMER_DEVICE)
                end = datetime.now()
                logger.debug(
                    f"EnviFormer finetuning took {(end - start).total_seconds():.2f} seconds"
                )
                model.to(s.ENVIFORMER_DEVICE)
-                test_result = model.predict_batch([smirk.split(">>")[0] for smirk in test])
+                test_result = model.predict_batch(test.X())
                single_gen_results.append(evaluate_sg(test, test_result, self.threshold))

            self.eval_results = self.compute_averages(single_gen_results)
@ -3365,7 +3294,7 @@ class EnviFormer(PackageBasedModel):
                # Compute splits of the collected pathway and evaluate. Like single gen we train and evaluate in each
                # iteration instead of storing all trained models.
                for split_id, (train, test) in enumerate(
-                    ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42).split(pathways)
+                    ShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=42).split(pathways)
                ):
                    train_pathways = [pathways[i] for i in train]
                    test_pathways = [pathways[i] for i in test]
@ -3383,31 +3312,15 @@ class EnviFormer(PackageBasedModel):
                    for pathway in train_pathways:
                        for reaction in pathway.edges:
                            reaction = reaction.edge_label
-                            if any(
-                                [
-                                    educt in test_educts
-                                    for educt in reaction_to_educts[str(reaction.uuid)]
-                                ]
-                            ):
+                            if any([educt in test_educts for educt in reaction_to_educts[str(reaction.uuid)]]):
                                overlap += 1
                                continue
-                            educts = ".".join(
-                                [
-                                    FormatConverter.standardize(smile.smiles, remove_stereo=True)
-                                    for smile in reaction.educts.all()
-                                ]
-                            )
-                            products = ".".join(
-                                [
-                                    FormatConverter.standardize(smile.smiles, remove_stereo=True)
-                                    for smile in reaction.products.all()
-                                ]
-                            )
-                            train_reactions.append(f"{educts}>>{products}")
+                            train_reactions.append(reaction)
+                    train_ds = EnviFormerDataset.generate_dataset(train_reactions)
                    logging.debug(
                        f"{overlap} compounds had to be removed from multigen split due to overlap within pathways"
                    )
-                    model = fine_tune(train_reactions, s.MODEL_DIR, f"mg_{split_id}")
+                    model = fine_tune(train_ds.X(), train_ds.y(), s.MODEL_DIR, f"mg_{split_id}")
                    multi_gen_results.append(evaluate_mg(model, test_pathways, self.threshold))

                self.eval_results.update(
@ -3664,3 +3577,53 @@ class Setting(EnviPathModel):
            self.public = True
        self.global_default = True
        self.save()
+
+
+class JobLogStatus(models.TextChoices):
+    INITIAL = "INITIAL", "Initial"
+    SUCCESS = "SUCCESS", "Success"
+    FAILURE = "FAILURE", "Failure"
+    REVOKED = "REVOKED", "Revoked"
+    IGNORED = "IGNORED", "Ignored"
+
+
+class JobLog(TimeStampedModel):
+    user = models.ForeignKey("epdb.User", models.CASCADE)
+    task_id = models.UUIDField(unique=True)
+    job_name = models.TextField(null=False, blank=False)
+    status = models.CharField(
+        max_length=20,
+        choices=JobLogStatus.choices,
+        default=JobLogStatus.INITIAL,
+    )
+
+    done_at = models.DateTimeField(null=True, blank=True, default=None)
+    task_result = models.TextField(null=True, blank=True, default=None)
+
+    def check_for_update(self):
+        async_res = self.get_result()
+        new_status = async_res.state
+
+        TERMINAL_STATES = [
+            "SUCCESS",
+            "FAILURE",
+            "REVOKED",
+            "IGNORED",
+        ]
+
+        if new_status != self.status and new_status in TERMINAL_STATES:
+            self.status = new_status
+            self.done_at = async_res.date_done
+
+            if new_status == "SUCCESS":
+                self.task_result = async_res.result
+
+            self.save()
+
+            return True
+        return False
+
+    def get_result(self):
+        from celery.result import AsyncResult
+
+        return AsyncResult(str(self.task_id))
--- a/epdb/tasks.py
+++ b/epdb/tasks.py
@ -1,12 +1,58 @@
+import csv
+import io
 import logging
-from typing import Optional
+from datetime import datetime
+from typing import Any, Callable, List, Optional
+from uuid import uuid4

 from celery import shared_task
-from epdb.models import Pathway, Node, EPModel, Setting
-from epdb.logic import SPathway
+from celery.utils.functional import LRUCache

+from epdb.logic import SPathway
+from epdb.models import EPModel, JobLog, Node, Package, Pathway, Rule, Setting, User, Edge

 logger = logging.getLogger(__name__)
+ML_CACHE = LRUCache(3)  # Cache the three most recent ML models to reduce load times.
+
+
+def get_ml_model(model_pk: int):
+    if model_pk not in ML_CACHE:
+        ML_CACHE[model_pk] = EPModel.objects.get(id=model_pk)
+    return ML_CACHE[model_pk]
+
+
+def dispatch_eager(user: "User", job: Callable, *args, **kwargs):
+    try:
+        x = job(*args, **kwargs)
+        log = JobLog()
+        log.user = user
+        log.task_id = uuid4()
+        log.job_name = job.__name__
+        log.status = "SUCCESS"
+        log.done_at = datetime.now()
+        log.task_result = str(x) if x else None
+        log.save()
+
+        return x
+    except Exception as e:
+        logger.exception(e)
+        raise e
+
+
+def dispatch(user: "User", job: Callable, *args, **kwargs):
+    try:
+        x = job.delay(*args, **kwargs)
+        log = JobLog()
+        log.user = user
+        log.task_id = x.task_id
+        log.job_name = job.__name__
+        log.status = "INITIAL"
+        log.save()
+
+        return x.result
+    except Exception as e:
+        logger.exception(e)
+        raise e


@shared_task(queue="background")
@ -16,7 +62,7 @@ def mul(a, b):

@shared_task(queue="predict")
 def predict_simple(model_pk: int, smiles: str):
-    mod = EPModel.objects.get(id=model_pk)
+    mod = get_ml_model(model_pk)
    res = mod.predict(smiles)
    return res

@ -26,17 +72,55 @@ def send_registration_mail(user_pk: int):
    pass


-@shared_task(queue="model")
-def build_model(model_pk: int):
+@shared_task(bind=True, queue="model")
+def build_model(self, model_pk: int):
    mod = EPModel.objects.get(id=model_pk)
-    mod.build_dataset()
-    mod.build_model()
+
+    if JobLog.objects.filter(task_id=self.request.id).exists():
+        JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=mod.url)
+
+    try:
+        mod.build_dataset()
+        mod.build_model()
+    except Exception as e:
+        if JobLog.objects.filter(task_id=self.request.id).exists():
+            JobLog.objects.filter(task_id=self.request.id).update(
+                status="FAILED", task_result=mod.url
+            )
+
+        raise e
+
+    if JobLog.objects.filter(task_id=self.request.id).exists():
+        JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=mod.url)
+
+    return mod.url


-@shared_task(queue="model")
-def evaluate_model(model_pk: int):
+@shared_task(bind=True, queue="model")
+def evaluate_model(self, model_pk: int, multigen: bool, package_pks: Optional[list] = None):
+    packages = None
+
+    if package_pks:
+        packages = Package.objects.filter(pk__in=package_pks)
+
    mod = EPModel.objects.get(id=model_pk)
-    mod.evaluate_model()
+    if JobLog.objects.filter(task_id=self.request.id).exists():
+        JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=mod.url)
+
+    try:
+        mod.evaluate_model(multigen, eval_packages=packages)
+    except Exception as e:
+        if JobLog.objects.filter(task_id=self.request.id).exists():
+            JobLog.objects.filter(task_id=self.request.id).update(
+                status="FAILED", task_result=mod.url
+            )
+
+        raise e
+
+    if JobLog.objects.filter(task_id=self.request.id).exists():
+        JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=mod.url)
+
+    return mod.url


@shared_task(queue="model")
@ -45,16 +129,26 @@ def retrain(model_pk: int):
    mod.retrain()


-@shared_task(queue="predict")
+@shared_task(bind=True, queue="predict")
 def predict(
-    pw_pk: int, pred_setting_pk: int, limit: Optional[int] = None, node_pk: Optional[int] = None
+    self,
+    pw_pk: int,
+    pred_setting_pk: int,
+    limit: Optional[int] = None,
+    node_pk: Optional[int] = None,
 ) -> Pathway:
    pw = Pathway.objects.get(id=pw_pk)
    setting = Setting.objects.get(id=pred_setting_pk)
+    # If the setting has a model add/restore it from the cache
+    if setting.model is not None:
+        setting.model = get_ml_model(setting.model.pk)

    pw.kv.update(**{"status": "running"})
    pw.save()

+    if JobLog.objects.filter(task_id=self.request.id).exists():
+        JobLog.objects.filter(task_id=self.request.id).update(status="RUNNING", task_result=pw.url)
+
    try:
        # regular prediction
        if limit is not None:
@ -79,7 +173,111 @@ def predict(
    except Exception as e:
        pw.kv.update({"status": "failed"})
        pw.save()
+
+        if JobLog.objects.filter(task_id=self.request.id).exists():
+            JobLog.objects.filter(task_id=self.request.id).update(
+                status="FAILED", task_result=pw.url
+            )
+
        raise e

    pw.kv.update(**{"status": "completed"})
    pw.save()
+
+    if JobLog.objects.filter(task_id=self.request.id).exists():
+        JobLog.objects.filter(task_id=self.request.id).update(status="SUCCESS", task_result=pw.url)
+
+    return pw.url
+
+
+@shared_task(bind=True, queue="background")
+def identify_missing_rules(
+    self,
+    pw_pks: List[int],
+    rule_package_pk: int,
+):
+    from utilities.misc import PathwayUtils
+
+    rules = Package.objects.get(pk=rule_package_pk).get_applicable_rules()
+
+    rows: List[Any] = []
+    header = [
+        "Package Name",
+        "Pathway Name",
+        "Educt Name",
+        "Educt SMILES",
+        "Reaction Name",
+        "Reaction SMIRKS",
+        "Triggered Rules",
+        "Reactant SMARTS",
+        "Product SMARTS",
+        "Product Names",
+        "Product SMILES",
+    ]
+
+    rows.append(header)
+
+    for pw in Pathway.objects.filter(pk__in=pw_pks):
+        pu = PathwayUtils(pw)
+
+        missing_rules = pu.find_missing_rules(rules)
+
+        package_name = pw.package.name
+        pathway_name = pw.name
+
+        for edge_url, rule_chain in missing_rules.items():
+            row: List[Any] = [package_name, pathway_name]
+            edge = Edge.objects.get(url=edge_url)
+            educts = edge.start_nodes.all()
+
+            for educt in educts:
+                row.append(educt.default_node_label.name)
+                row.append(educt.default_node_label.smiles)
+
+            row.append(edge.edge_label.name)
+            row.append(edge.edge_label.smirks())
+
+            rule_names = []
+            reactant_smarts = []
+            product_smarts = []
+
+            for r in rule_chain:
+                r = Rule.objects.get(url=r[0])
+                rule_names.append(r.name)
+
+                rs = r.reactants_smarts
+                if isinstance(rs, set):
+                    rs = list(rs)
+
+                ps = r.products_smarts
+                if isinstance(ps, set):
+                    ps = list(ps)
+
+                reactant_smarts.append(rs)
+                product_smarts.append(ps)
+
+            row.append(rule_names)
+            row.append(reactant_smarts)
+            row.append(product_smarts)
+
+            products = edge.end_nodes.all()
+            product_names = []
+            product_smiles = []
+
+            for product in products:
+                product_names.append(product.default_node_label.name)
+                product_smiles.append(product.default_node_label.smiles)
+
+            row.append(product_names)
+            row.append(product_smiles)
+
+            rows.append(row)
+
+    buffer = io.StringIO()
+
+    writer = csv.writer(buffer)
+    writer.writerows(rows)
+
+    buffer.seek(0)
+
+    return buffer.getvalue()
--- a/epdb/templatetags/envipytags.py
+++ b/epdb/templatetags/envipytags.py
@ -1,8 +1,21 @@
 from django import template
+from pydantic import AnyHttpUrl, ValidationError
+from pydantic.type_adapter import TypeAdapter

 register = template.Library()

+url_adapter = TypeAdapter(AnyHttpUrl)
+

@register.filter
 def classname(obj):
    return obj.__class__.__name__
+
+
+@register.filter
+def is_url(value):
+    try:
+        url_adapter.validate_python(value)
+        return True
+    except ValidationError:
+        return False
--- a/epdb/urls.py
+++ b/epdb/urls.py
@ -190,6 +190,7 @@ urlpatterns = [
    re_path(r"^indigo/dearomatize$", v.dearomatize, name="indigo_dearomatize"),
    re_path(r"^indigo/layout$", v.layout, name="indigo_layout"),
    re_path(r"^depict$", v.depict, name="depict"),
+    re_path(r"^jobs", v.jobs, name="jobs"),
    # OAuth Stuff
    path("o/userinfo/", v.userinfo, name="oauth_userinfo"),
 ]
--- a/epdb/views.py
+++ b/epdb/views.py
@ -47,6 +47,7 @@ from .models import (
    ExternalDatabase,
    ExternalIdentifier,
    EnzymeLink,
+    JobLog,
 )

 logger = logging.getLogger(__name__)
@ -236,6 +237,7 @@ def get_base_context(request, for_user=None) -> Dict[str, Any]:
            "enabled_features": s.FLAGS,
            "debug": s.DEBUG,
            "external_databases": ExternalDatabase.get_databases(),
+            "site_id": s.MATOMO_SITE_ID,
        },
    }

@ -754,8 +756,8 @@ def package_models(request, package_uuid):
        context["unreviewed_objects"] = unreviewed_model_qs

        context["model_types"] = {
-            "ML Relative Reasoning": "ml-relative-reasoning",
-            "Rule Based Relative Reasoning": "rule-based-relative-reasoning",
+            "ML Relative Reasoning": "mlrr",
+            "Rule Based Relative Reasoning": "rbrr",
        }

        if s.FLAGS.get("ENVIFORMER", False):
@ -775,69 +777,67 @@ def package_models(request, package_uuid):

        model_type = request.POST.get("model-type")

+        # Generic fields for ML and Rule Based
+        rule_packages = request.POST.getlist("model-rule-packages")
+        data_packages = request.POST.getlist("model-data-packages")
+
+        # Generic params
+        params = {
+            "package": current_package,
+            "name": name,
+            "description": description,
+            "data_packages": [
+                PackageManager.get_package_by_url(current_user, p) for p in data_packages
+            ],
+        }
+
        if model_type == "enviformer":
-            threshold = float(request.POST.get(f"{model_type}-threshold", 0.5))
+            threshold = float(request.POST.get("model-threshold", 0.5))
+            params["threshold"] = threshold

-            mod = EnviFormer.create(current_package, name, description, threshold)
+            mod = EnviFormer.create(**params)
+        elif model_type == "mlrr":
+            # ML Specific
+            threshold = float(request.POST.get("model-threshold", 0.5))
+            # TODO handle additional fingerprinter
+            # fingerprinter = request.POST.get("model-fingerprinter")

-        elif model_type == "ml-relative-reasoning" or model_type == "rule-based-relative-reasoning":
-            # Generic fields for ML and Rule Based
-            rule_packages = request.POST.getlist("package-based-relative-reasoning-rule-packages")
-            data_packages = request.POST.getlist("package-based-relative-reasoning-data-packages")
-            eval_packages = request.POST.getlist(
-                "package-based-relative-reasoning-evaluation-packages", []
-            )
+            params["rule_packages"] = [
+                PackageManager.get_package_by_url(current_user, p) for p in rule_packages
+            ]

-            # Generic params
-            params = {
-                "package": current_package,
-                "name": name,
-                "description": description,
-                "rule_packages": [
-                    PackageManager.get_package_by_url(current_user, p) for p in rule_packages
-                ],
-                "data_packages": [
-                    PackageManager.get_package_by_url(current_user, p) for p in data_packages
-                ],
-                "eval_packages": [
-                    PackageManager.get_package_by_url(current_user, p) for p in eval_packages
-                ],
-            }
+            # App Domain related parameters
+            build_ad = request.POST.get("build-app-domain", False) == "on"
+            num_neighbors = request.POST.get("num-neighbors", 5)
+            reliability_threshold = request.POST.get("reliability-threshold", 0.5)
+            local_compatibility_threshold = request.POST.get("local-compatibility-threshold", 0.5)

-            if model_type == "ml-relative-reasoning":
-                # ML Specific
-                threshold = float(request.POST.get(f"{model_type}-threshold", 0.5))
-                # TODO handle additional fingerprinter
-                # fingerprinter = request.POST.get(f"{model_type}-fingerprinter")
+            params["threshold"] = threshold
+            # params['fingerprinter'] = fingerprinter
+            params["build_app_domain"] = build_ad
+            params["app_domain_num_neighbours"] = num_neighbors
+            params["app_domain_reliability_threshold"] = reliability_threshold
+            params["app_domain_local_compatibility_threshold"] = local_compatibility_threshold

-                # App Domain related parameters
-                build_ad = request.POST.get("build-app-domain", False) == "on"
-                num_neighbors = request.POST.get("num-neighbors", 5)
-                reliability_threshold = request.POST.get("reliability-threshold", 0.5)
-                local_compatibility_threshold = request.POST.get(
-                    "local-compatibility-threshold", 0.5
-                )
+            mod = MLRelativeReasoning.create(**params)
+        elif model_type == "rbrr":
+            params["rule_packages"] = [
+                PackageManager.get_package_by_url(current_user, p) for p in rule_packages
+            ]

-                params["threshold"] = threshold
-                # params['fingerprinter'] = fingerprinter
-                params["build_app_domain"] = build_ad
-                params["app_domain_num_neighbours"] = num_neighbors
-                params["app_domain_reliability_threshold"] = reliability_threshold
-                params["app_domain_local_compatibility_threshold"] = local_compatibility_threshold
-
-                mod = MLRelativeReasoning.create(**params)
-            else:
-                mod = RuleBasedRelativeReasoning.create(**params)
-
-            from .tasks import build_model
-
-            build_model.delay(mod.pk)
+            mod = RuleBasedRelativeReasoning.create(**params)
+        elif s.FLAGS.get("PLUGINS", False) and model_type in s.CLASSIFIER_PLUGINS.values():
+            pass
        else:
            return error(
                request, "Invalid model type.", f'Model type "{model_type}" is not supported."'
            )
-        return redirect(mod.url)

+        from .tasks import dispatch, build_model
+
+        dispatch(current_user, build_model, mod.pk)
+
+        return redirect(mod.url)
    else:
        return HttpResponseNotAllowed(["GET", "POST"])

@ -865,6 +865,10 @@ def package_model(request, package_uuid, model_uuid):
                return JsonResponse({"error": f'"{smiles}" is not a valid SMILES'}, status=400)

            if classify:
+                from epdb.tasks import dispatch_eager, predict_simple
+
+                res = dispatch_eager(current_user, predict_simple, current_model.pk, stand_smiles)
+
                pred_res = current_model.predict(stand_smiles)
                res = []

@ -888,7 +892,7 @@ def package_model(request, package_uuid, model_uuid):
                return JsonResponse(res, safe=False)

            else:
-                app_domain_assessment = current_model.app_domain.assess(stand_smiles)[0]
+                app_domain_assessment = current_model.app_domain.assess(stand_smiles)
                return JsonResponse(app_domain_assessment, safe=False)

        context = get_base_context(request)
@ -909,9 +913,25 @@ def package_model(request, package_uuid, model_uuid):
                current_model.delete()
                return redirect(current_package.url + "/model")
            elif hidden == "evaluate":
-                from .tasks import evaluate_model
+                from .tasks import dispatch, evaluate_model
+
+                eval_type = request.POST.get("model-evaluation-type")
+
+                if eval_type not in ["sg", "mg"]:
+                    return error(
+                        request,
+                        "Invalid evaluation type",
+                        f'Evaluation type "{eval_type}" is not supported. Only "sg" and "mg" are supported.',
+                    )
+
+                multigen = eval_type == "mg"
+
+                eval_packages = request.POST.getlist("model-evaluation-packages")
+                eval_package_ids = [
+                    PackageManager.get_package_by_url(current_user, p).id for p in eval_packages
+                ]
+                dispatch(current_user, evaluate_model, current_model.pk, multigen, eval_package_ids)

-                evaluate_model.delay(current_model.pk)
                return redirect(current_model.url)
            else:
                return HttpResponseBadRequest()
@ -1251,7 +1271,16 @@ def package_compound_structures(request, package_uuid, compound_uuid):
        structure_smiles = request.POST.get("structure-smiles")
        structure_description = request.POST.get("structure-description")

-        cs = current_compound.add_structure(structure_smiles, structure_name, structure_description)
+        try:
+            cs = current_compound.add_structure(
+                structure_smiles, structure_name, structure_description
+            )
+        except ValueError:
+            return error(
+                request,
+                "Adding structure failed!",
+                "The structure could not be added as normalized structures don't match!",
+            )

        return redirect(cs.url)

@ -1800,9 +1829,9 @@ def package_pathways(request, package_uuid):
            pw.setting = prediction_setting
            pw.save()

-            from .tasks import predict
+            from .tasks import dispatch, predict

-            predict.delay(pw.pk, prediction_setting.pk, limit=limit)
+            dispatch(current_user, predict, pw.pk, prediction_setting.pk, limit=limit)

        return redirect(pw.url)

@ -1838,6 +1867,25 @@ def package_pathway(request, package_uuid, pathway_uuid):

            return response

+        if (
+            request.GET.get("identify-missing-rules", False) == "true"
+            and request.GET.get("rule-package") is not None
+        ):
+            from .tasks import dispatch_eager, identify_missing_rules
+
+            rule_package = PackageManager.get_package_by_url(
+                current_user, request.GET.get("rule-package")
+            )
+            res = dispatch_eager(
+                current_user, identify_missing_rules, [current_pathway.pk], rule_package.pk
+            )
+
+            filename = f"{current_pathway.name.replace(' ', '_')}_{current_pathway.uuid}.csv"
+            response = HttpResponse(res, content_type="text/csv")
+            response["Content-Disposition"] = f'attachment; filename="{filename}"'
+
+            return response
+
        # Pathway d3_json() relies on a lot of related objects (Nodes, Structures, Edges, Reaction, Rules, ...)
        # we will again fetch the current pathway identified by this url, but this time together with nearly all
        # related objects
@ -1921,10 +1969,16 @@ def package_pathway(request, package_uuid, pathway_uuid):
        if node_url:
            n = current_pathway.get_node(node_url)

-            from .tasks import predict
+            from .tasks import dispatch, predict
+
+            dispatch(
+                current_user,
+                predict,
+                current_pathway.pk,
+                current_pathway.setting.pk,
+                node_pk=n.pk,
+            )

-            # Dont delay?
-            predict(current_pathway.pk, current_pathway.setting.pk, node_pk=n.pk)
            return JsonResponse({"success": current_pathway.url})

        return HttpResponseBadRequest()
@ -2696,6 +2750,24 @@ def setting(request, setting_uuid):
    pass


+def jobs(request):
+    current_user = _anonymous_or_real(request)
+    context = get_base_context(request)
+
+    if request.method == "GET":
+        context["object_type"] = "joblog"
+        context["breadcrumbs"] = [
+            {"Home": s.SERVER_URL},
+            {"Jobs": s.SERVER_URL + "/jobs"},
+        ]
+        if current_user.is_superuser:
+            context["jobs"] = JobLog.objects.all().order_by("-created")
+        else:
+            context["jobs"] = JobLog.objects.filter(user=current_user).order_by("-created")
+
+        return render(request, "collections/joblog.html", context)
+
+
 ###########
 # KETCHER #
 ###########
--- a/pyproject.toml
+++ b/pyproject.toml
@ -27,10 +27,11 @@ dependencies = [
    "scikit-learn>=1.6.1",
    "sentry-sdk[django]>=2.32.0",
    "setuptools>=80.8.0",
+    "polars==1.35.1",
 ]

 [tool.uv.sources]
-enviformer = { git = "ssh://git@git.envipath.com/enviPath/enviformer.git", rev = "v0.1.2" }
+enviformer = { git = "ssh://git@git.envipath.com/enviPath/enviformer.git", rev = "v0.1.4" }
 envipy-plugins = { git = "ssh://git@git.envipath.com/enviPath/enviPy-plugins.git", rev = "v0.1.0" }
 envipy-additional-information = { git = "ssh://git@git.envipath.com/enviPath/enviPy-additional-information.git", rev = "v0.1.7"}
 envipy-ambit = { git = "ssh://git@git.envipath.com/enviPath/enviPy-ambit.git" }
--- a/templates/actions/objects/pathway.html
+++ b/templates/actions/objects/pathway.html
@ -22,6 +22,10 @@
            <i class="glyphicon glyphicon-floppy-save"></i> Download Pathway as Image</a>
    </li>
 {% if meta.can_edit %}
+    <li>
+        <a class="button" data-toggle="modal" data-target="#identify_missing_rules_modal">
+            <i class="glyphicon glyphicon-question-sign"></i> Identify Missing Rules</a>
+    </li>
    <li role="separator" class="divider"></li>
    <li>
        <a class="button" data-toggle="modal" data-target="#edit_pathway_modal">
--- a/templates/collections/joblog.html
+++ b/templates/collections/joblog.html
@ -0,0 +1,71 @@
+{% extends "framework.html" %}
+{% load static %}
+{% load envipytags %}
+{% block content %}
+
+    <div class="panel-group" id="reviewListAccordion">
+        <div class="panel panel-default">
+            <div class="panel-heading" id="headingPanel" style="font-size:2rem;height: 46px">
+                Jobs
+            </div>
+            <div class="panel-body">
+                <p>
+                    Job Logs Desc
+                </p>
+
+            </div>
+
+            <div class="panel panel-default panel-heading list-group-item" style="background-color:silver">
+                <h4 class="panel-title">
+                    <a id="job-accordion-link" data-toggle="collapse" data-parent="#job-accordion" href="#jobs">
+                        Jobs
+                    </a>
+                </h4>
+            </div>
+            <div id="jobs"
+                 class="panel-collapse collapse in">
+                <div class="panel-body list-group-item" id="job-content">
+                    <table class="table table-bordered table-hover">
+                        <tr style="background-color: rgba(0, 0, 0, 0.08);">
+                            <th scope="col">ID</th>
+                            <th scope="col">Name</th>
+                            <th scope="col">Status</th>
+                            <th scope="col">Queued</th>
+                            <th scope="col">Done</th>
+                            <th scope="col">Result</th>
+                        </tr>
+                        <tbody>
+                        {% for job in jobs %}
+                            <tr>
+                                <td>{{ job.task_id }}</td>
+                                <td>{{ job.job_name }}</td>
+                                <td>{{ job.status }}</td>
+                                <td>{{ job.created }}</td>
+                                <td>{{ job.done_at }}</td>
+                                {% if job.task_result and job.task_result|is_url == True %}
+                                    <td><a href="{{ job.task_result }}">Result</a></td>
+                                {% elif job.task_result %}
+                                    <td>{{ job.task_result|slice:"40" }}...</td>
+                                {% else %}
+                                    <td>Empty</td>
+                                {% endif %}
+                            </tr>
+                        {% endfor %}
+                        </tbody>
+                    </table>
+                </div>
+            </div>
+
+            <!-- Unreviewable objects such as User / Group / Setting -->
+            <ul class='list-group'>
+                {% for obj in objects %}
+                    {% if object_type == 'user' %}
+                        <a class="list-group-item" href="{{ obj.url }}">{{ obj.username }}</a>
+                    {% else %}
+                        <a class="list-group-item" href="{{ obj.url }}">{{ obj.name }}</a>
+                    {% endif %}
+                {% endfor %}
+            </ul>
+        </div>
+    </div>
+{% endblock content %}
--- a/templates/framework.html
+++ b/templates/framework.html
@ -56,7 +56,7 @@
            (function () {
                var u = "//matomo.envipath.com/";
                _paq.push(['setTrackerUrl', u + 'matomo.php']);
-                _paq.push(['setSiteId', '10']);
+                _paq.push(['setSiteId', '{{ meta.site_id }}']);
                var d = document, g = d.createElement('script'), s = d.getElementsByTagName('script')[0];
                g.async = true;
                g.src = u + 'matomo.js';
--- a/templates/modals/collections/new_model_modal.html
+++ b/templates/modals/collections/new_model_modal.html
@ -18,113 +18,117 @@
                        prediction. You just need to set a name and the packages
                        you want the object to be based on. There are multiple types of models available.
                        For additional information have a look at our
-                        <a target="_blank" href="https://wiki.envipath.org/index.php/relative-reasoning" role="button">wiki &gt;&gt;</a>
+                        <a target="_blank" href="https://wiki.envipath.org/index.php/relative-reasoning" role="button">wiki
+                            &gt;&gt;</a>
                    </div>
+                    <!-- Name -->
                    <label for="model-name">Name</label>
                    <input id="model-name" name="model-name" class="form-control" placeholder="Name"/>
+
+                    <!-- Description -->
                    <label for="model-description">Description</label>
                    <input id="model-description" name="model-description" class="form-control"
                           placeholder="Description"/>
+
+                    <!-- Model Type -->
                    <label for="model-type">Model Type</label>
                    <select id="model-type" name="model-type" class="form-control" data-width='100%'>
                        <option disabled selected>Select Model Type</option>
                        {% for k, v in model_types.items %}
-                        <option value="{{ v }}">{{ k }}</option>
+                            <option value="{{ v }}">{{ k }}</option>
                        {% endfor %}
                    </select>
-                    <!-- ML and Rule Based Based Form-->
-                    <div id="package-based-relative-reasoning-specific-form">
-                        <!-- Rule Packages -->
-                        <label for="package-based-relative-reasoning-rule-packages">Rule Packages</label>
-                        <select id="package-based-relative-reasoning-rule-packages" name="package-based-relative-reasoning-rule-packages"
-                                data-actions-box='true' class="form-control" multiple data-width='100%'>
+
+                    <!-- Rule Packages -->
+                    <div id="rule-packages" class="ep-model-param mlrr rbrr">
+                        <label for="model-rule-packages">Rule Packages</label>
+                        <select id="model-rule-packages" name="model-rule-packages" data-actions-box='true'
+                                class="form-control" multiple data-width='100%'>
                            <option disabled>Reviewed Packages</option>
                            {% for obj in meta.readable_packages %}
-                            {% if obj.reviewed %}
-                            <option value="{{ obj.url }}">{{ obj.name }}</option>
-                            {% endif %}
+                                {% if obj.reviewed %}
+                                    <option value="{{ obj.url }}">{{ obj.name }}</option>
+                                {% endif %}
                            {% endfor %}

                            <option disabled>Unreviewed Packages</option>
                            {% for obj in meta.readable_packages %}
-                            {% if not obj.reviewed %}
-                            <option value="{{ obj.url }}">{{ obj.name }}</option>
-                            {% endif %}
+                                {% if not obj.reviewed %}
+                                    <option value="{{ obj.url }}">{{ obj.name }}</option>
+                                {% endif %}
                            {% endfor %}
                        </select>
-                        <!-- Data Packages -->
-                        <label for="package-based-relative-reasoning-data-packages" >Data Packages</label>
-                        <select id="package-based-relative-reasoning-data-packages" name="package-based-relative-reasoning-data-packages"
-                                data-actions-box='true' class="form-control" multiple data-width='100%'>
-                            <option disabled>Reviewed Packages</option>
-                            {% for obj in meta.readable_packages %}
-                            {% if obj.reviewed %}
-                            <option value="{{ obj.url }}">{{ obj.name }}</option>
-                            {% endif %}
-                            {% endfor %}
-
-                            <option disabled>Unreviewed Packages</option>
-                            {% for obj in meta.readable_packages %}
-                            {% if not obj.reviewed %}
-                            <option value="{{ obj.url }}">{{ obj.name }}</option>
-                            {% endif %}
-                            {% endfor %}
-                        </select>
-
-                        <div id="ml-relative-reasoning-specific-form">
-                            <!-- Fingerprinter -->
-                            <label for="ml-relative-reasoning-fingerprinter">Fingerprinter</label>
-                            <select id="ml-relative-reasoning-fingerprinter" name="ml-relative-reasoning-fingerprinter"
-                                    class="form-control">
-                                <option value="MACCS" selected>MACCS Fingerprinter</option>
-                            </select>
-                            {% if meta.enabled_features.PLUGINS and additional_descriptors %}
-                                <!-- Property Plugins go here -->
-                                <label for="ml-relative-reasoning-additional-fingerprinter">Additional Fingerprinter /
-                                    Descriptors</label>
-                                <select id="ml-relative-reasoning-additional-fingerprinter"
-                                        name="ml-relative-reasoning-additional-fingerprinter" class="form-control">
-                                    <option disabled selected>Select Additional Fingerprinter / Descriptor</option>
-                                    {% for k, v in additional_descriptors.items %}
-                                        <option value="{{ v }}">{{ k }}</option>
-                                    {% endfor %}
-                                </select>
-                            {% endif %}
-
-                            <label for="ml-relative-reasoning-threshold">Threshold</label>
-                            <input type="number" min="0" max="1" step="0.05" value="0.5"
-                                   id="ml-relative-reasoning-threshold"
-                                   name="ml-relative-reasoning-threshold" class="form-control">
-                        </div>
-                    {% if meta.enabled_features.APPLICABILITY_DOMAIN %}
-                        <!-- Build AD? -->
-                        <div class="checkbox">
-                            <label>
-                                <input type="checkbox" id="build-app-domain" name="build-app-domain">Also build an
-                                Applicability Domain?
-                            </label>
-                        </div>
-                        <div id="ad-params" style="display:none">
-                            <!-- Num Neighbors -->
-                            <label for="num-neighbors">Number of Neighbors</label>
-                            <input id="num-neighbors" name="num-neighbors" type="number" class="form-control" value="5"
-                                   step="1" min="0" max="10">
-                            <!-- Local Compatibility -->
-                            <label for="local-compatibility-threshold">Local Compatibility Threshold</label>
-                            <input id="local-compatibility-threshold" name="local-compatibility-threshold" type="number"
-                                   class="form-control" value="0.5" step="0.01" min="0" max="1">
-                            <!-- Reliability -->
-                            <label for="reliability-threshold">Reliability Threshold</label>
-                            <input id="reliability-threshold" name="reliability-threshold" type="number"
-                                   class="form-control" value="0.5" step="0.01" min="0" max="1">
-                        </div>
-                    {% endif %}
                    </div>
-                    <!-- EnviFormer-->
-                    <div id="enviformer-specific-form">
-                        <label for="enviformer-threshold">Threshold</label>
-                        <input type="number" min="0" max="1" step="0.05" value="0.5" id="enviformer-threshold"
-                               name="enviformer-threshold" class="form-control">
+
+                    <!-- Data Packages -->
+                    <div id="data-packages" class="ep-model-param mlrr rbrr enviformer">
+                        <label for="model-data-packages">Data Packages</label>
+                        <select id="model-data-packages" name="model-data-packages" data-actions-box='true'
+                                class="form-control" multiple data-width='100%'>
+                            <option disabled>Reviewed Packages</option>
+                            {% for obj in meta.readable_packages %}
+                                {% if obj.reviewed %}
+                                    <option value="{{ obj.url }}">{{ obj.name }}</option>
+                                {% endif %}
+                            {% endfor %}
+
+                            <option disabled>Unreviewed Packages</option>
+                            {% for obj in meta.readable_packages %}
+                                {% if not obj.reviewed %}
+                                    <option value="{{ obj.url }}">{{ obj.name }}</option>
+                                {% endif %}
+                            {% endfor %}
+                        </select>
+                    </div>
+
+                    <!-- Fingerprinter -->
+                    <div id="fingerprinter" class="ep-model-param mlrr">
+                        <label for="model-fingerprinter">Fingerprinter</label>
+                        <select id="model-fingerprinter" name="model-fingerprinter" data-actions-box='true'
+                                class="form-control" multiple data-width='100%'>
+                            <option value="MACCS" selected>MACCS Fingerprinter</option>
+                            {% if meta.enabled_features.PLUGINS and additional_descriptors %}
+                                <option disabled selected>Select Additional Fingerprinter / Descriptor</option>
+                                {% for k, v in additional_descriptors.items %}
+                                    <option value="{{ v }}">{{ k }}</option>
+                                {% endfor %}
+                            {% endif %}
+                        </select>
+                    </div>
+
+                    <!-- Threshold -->
+                    <div id="threshold" class="ep-model-param mlrr enviformer">
+                        <label for="model-threshold">Threshold</label>
+                        <input type="number" min="0" max="1" step="0.05" value="0.5" id="model-threshold"
+                               name="model-threshold" class="form-control">
+                    </div>
+
+                    <div id="appdomain" class="ep-model-param mlrr">
+                        {% if meta.enabled_features.APPLICABILITY_DOMAIN %}
+                            <!-- Build AD? -->
+                            <div class="checkbox">
+                                <label>
+                                    <input type="checkbox" id="build-app-domain" name="build-app-domain">Also build an
+                                    Applicability Domain?
+                                </label>
+                            </div>
+                            <div id="ad-params" style="display:none">
+                                <!-- Num Neighbors -->
+                                <label for="num-neighbors">Number of Neighbors</label>
+                                <input id="num-neighbors" name="num-neighbors" type="number" class="form-control"
+                                       value="5"
+                                       step="1" min="0" max="10">
+                                <!-- Local Compatibility -->
+                                <label for="local-compatibility-threshold">Local Compatibility Threshold</label>
+                                <input id="local-compatibility-threshold" name="local-compatibility-threshold"
+                                       type="number"
+                                       class="form-control" value="0.5" step="0.01" min="0" max="1">
+                                <!-- Reliability -->
+                                <label for="reliability-threshold">Reliability Threshold</label>
+                                <input id="reliability-threshold" name="reliability-threshold" type="number"
+                                       class="form-control" value="0.5" step="0.01" min="0" max="1">
+                            </div>
+                        {% endif %}
                    </div>
                </form>
            </div>
@ -137,53 +141,47 @@
 </div>

 <script>
-$(function() {
-    // Initially hide all "specific" forms
-    $("div[id$='-specific-form']").each( function() {
-        $(this).hide();
-    });
+    $(function () {
+        // Built in Model Types
+        var nativeModelTypes = [
+            "mlrr",
+            "rbrr",
+            "enviformer",
+        ]

-    $('#model-type').selectpicker();
-    $("#ml-relative-reasoning-fingerprinter").selectpicker();
-    $("#package-based-relative-reasoning-rule-packages").selectpicker();
-    $("#package-based-relative-reasoning-data-packages").selectpicker();
-    $("#package-based-relative-reasoning-evaluation-packages").selectpicker();
-    if ($('#ml-relative-reasoning-additional-fingerprinter').length > 0) {
-        $("#ml-relative-reasoning-additional-fingerprinter").selectpicker();
-    }
-
-    $("#build-app-domain").change(function () {
-        if ($(this).is(":checked")) {
-            $('#ad-params').show();
-        } else {
-            $('#ad-params').hide();
-        }
-    });
-
-    // On change hide all and show only selected
-    $("#model-type").change(function() {
-        $("div[id$='-specific-form']").each( function() {
+        // Initially hide all "specific" forms
+        $(".ep-model-param").each(function () {
            $(this).hide();
        });
-        val = $('option:selected', this).val();

-        if (val === 'ml-relative-reasoning' || val === 'rule-based-relative-reasoning') {
-            $("#package-based-relative-reasoning-specific-form").show();
-            if (val === 'ml-relative-reasoning') {
-                 $("#ml-relative-reasoning-specific-form").show();
+        $('#model-type').selectpicker();
+        $("#model-fingerprinter").selectpicker();
+        $("#model-rule-packages").selectpicker();
+        $("#model-data-packages").selectpicker();
+
+        $("#build-app-domain").change(function () {
+            if ($(this).is(":checked")) {
+                $('#ad-params').show();
+            } else {
+                $('#ad-params').hide();
            }
-        } else {
-            $("#" + val + "-specific-form").show();
-        }
+        });
+
+        // On change hide all and show only selected
+        $("#model-type").change(function () {
+            $('.ep-model-param').hide();
+            var modelType = $('#model-type').val();
+            if (nativeModelTypes.indexOf(modelType) !== -1) {
+                $('.' + modelType).show();
+            } else {
+                // do nothing
+            }
+        });
+
+        $('#new_model_modal_form_submit').on('click', function (e) {
+            e.preventDefault();
+            $('#new_model_form').submit();
+        });
+
    });
-
-    $('#new_model_modal_form_submit').on('click', function(e){
-        e.preventDefault();
-        $('#new_model_form').submit();
-    });
-
-});
-
-
-
 </script>
--- a/templates/modals/objects/evaluate_model_modal.html
+++ b/templates/modals/objects/evaluate_model_modal.html
@ -17,10 +17,10 @@
                        For evaluation, you need to select the packages you want to use.
                        While the model is evaluating, you can use the model for predictions.
                    </div>
-                    <!-- Evaluation -->
-                    <label for="relative-reasoning-evaluation-packages">Evaluation Packages</label>
-                    <select id="relative-reasoning-evaluation-packages" name=relative-reasoning-evaluation-packages"
-                            data-actions-box='true' class="form-control" multiple data-width='100%'>
+                    <!-- Evaluation Packages -->
+                    <label for="model-evaluation-packages">Evaluation Packages</label>
+                    <select id="model-evaluation-packages" name="model-evaluation-packages" data-actions-box='true'
+                            class="form-control" multiple data-width='100%'>
                        <option disabled>Reviewed Packages</option>
                        {% for obj in meta.readable_packages %}
                            {% if obj.reviewed %}
@ -35,7 +35,16 @@
                            {% endif %}
                        {% endfor %}
                    </select>
-                <input type="hidden" name="hidden" value="evaluate">
+
+                    <!-- Eval Type -->
+                    <label for="model-evaluation-type">Evaluation Type</label>
+                    <select id="model-evaluation-type" name="model-evaluation-type" class="form-control">
+                        <option disabled selected>Select evaluation type</option>
+                        <option value="sg">Single Generation</option>
+                        <option value="mg">Multiple Generations</option>
+                    </select>
+
+                    <input type="hidden" name="hidden" value="evaluate">
                </form>
            </div>
            <div class="modal-footer">
@ -50,7 +59,7 @@

    $(function () {

-        $("#relative-reasoning-evaluation-packages").selectpicker();
+        $("#model-evaluation-packages").selectpicker();

        $('#evaluate_model_form_submit').on('click', function (e) {
            e.preventDefault();
--- a/templates/modals/objects/identify_missing_rules_modal.html
+++ b/templates/modals/objects/identify_missing_rules_modal.html
@ -0,0 +1,54 @@
+{% load static %}
+<!-- Identify Missing Rules -->
+<div id="identify_missing_rules_modal" class="modal" tabindex="-1">
+    <div class="modal-dialog">
+        <div class="modal-content">
+            <div class="modal-header">
+                <h3 class="modal-title">Identify Missing Rules</h3>
+                <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+                    <span aria-hidden="true">&times;</span>
+                </button>
+            </div>
+            <div class="modal-body">
+                By clicking on Download we'll search the Pathway for Reactions that are not backed by
+                a Rule or which can be assembled by chaining two rules.
+                <form id="identify-missing-rules-modal-form" accept-charset="UTF-8" action="{{ pathway.url }}"
+                      data-remote="true" method="GET">
+                    <label for="rule-package">Select the Rule Package</label>
+                    <select id="rule-package" name="rule-package" data-actions-box='true' class="form-control"
+                            data-width='100%'>
+                        <option disabled>Reviewed Packages</option>
+                        {% for obj in meta.readable_packages %}
+                            {% if obj.reviewed %}
+                                <option value="{{ obj.url }}">{{ obj.name }}</option>
+                            {% endif %}
+                        {% endfor %}
+
+                        <option disabled>Unreviewed Packages</option>
+                        {% for obj in meta.readable_packages %}
+                            {% if not obj.reviewed %}
+                                <option value="{{ obj.url }}">{{ obj.name }}</option>
+                            {% endif %}
+                        {% endfor %}
+                    </select>
+                    <input type="hidden" name="identify-missing-rules" value="true"/>
+                </form>
+            </div>
+            <div class="modal-footer">
+                <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+                <button type="button" class="btn btn-primary" id="identify-missing-rules-modal-submit">Download</button>
+            </div>
+        </div>
+    </div>
+</div>
+<script>
+    $(function () {
+
+        $('#identify-missing-rules-modal-submit').click(function (e) {
+            e.preventDefault();
+            $('#identify-missing-rules-modal-form').submit();
+            $('#identify_missing_rules_modal').modal('hide');
+        });
+
+    })
+</script>
--- a/templates/objects/compound.html
+++ b/templates/objects/compound.html
@ -183,7 +183,7 @@
                </div>
                <div id="compound-external-identifier" class="panel-collapse collapse in">
                    <div class="panel-body list-group-item">
-                        {% if compound.get_pubchem_identifiers %}
+                        {% if compound.get_pubchem_compound_identifiers %}
                            <div class="panel panel-default panel-heading list-group-item"
                                 style="background-color:silver">
                                <h4 class="panel-title">
@ -193,12 +193,28 @@
                                </h4>
                            </div>
                            <div id="compound-pubchem-identifier" class="panel-collapse collapse in">
-                                {% for eid in compound.get_pubchem_identifiers %}
+                                {% for eid in compound.get_pubchem_compound_identifiers %}
                                    <a class="list-group-item"
                                       href="{{ eid.external_url }}">CID{{ eid.identifier_value }}</a>
                                {% endfor %}
                            </div>
                        {% endif %}
+                        {% if compound.get_pubchem_substance_identifiers %}
+                            <div class="panel panel-default panel-heading list-group-item"
+                                 style="background-color:silver">
+                                <h4 class="panel-title">
+                                    <a id="compound-pubchem-identifier-link" data-toggle="collapse"
+                                       data-parent="#compound-external-identifier"
+                                       href="#compound-pubchem-identifier">PubChem Substance Identifier</a>
+                                </h4>
+                            </div>
+                            <div id="compound-pubchem-identifier" class="panel-collapse collapse in">
+                                {% for eid in compound.get_pubchem_substance_identifiers %}
+                                    <a class="list-group-item"
+                                       href="{{ eid.external_url }}">SID{{ eid.identifier_value }}</a>
+                                {% endfor %}
+                            </div>
+                        {% endif %}
                        {% if compound.get_chebi_identifiers %}
                            <div class="panel panel-default panel-heading list-group-item"
                                 style="background-color:silver">
--- a/templates/objects/model.html
+++ b/templates/objects/model.html
@ -117,7 +117,7 @@
                <!-- End Predict Panel       -->
            {% endif %}

-            {% if model.app_domain %}
+            {% if model.ready_for_prediction and model.app_domain %}
                <!-- App Domain -->
                <div class="panel panel-default panel-heading list-group-item" style="background-color:silver">
                    <h4 class="panel-title">
--- a/templates/objects/pathway.html
+++ b/templates/objects/pathway.html
@ -83,6 +83,7 @@
        {% include "modals/objects/add_pathway_edge_modal.html" %}
        {% include "modals/objects/download_pathway_csv_modal.html" %}
        {% include "modals/objects/download_pathway_image_modal.html" %}
+        {% include "modals/objects/identify_missing_rules_modal.html" %}
        {% include "modals/objects/generic_copy_object_modal.html" %}
        {% include "modals/objects/edit_pathway_modal.html" %}
        {% include "modals/objects/generic_set_aliases_modal.html" %}
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@ -1,8 +1,10 @@
+import os.path
+from tempfile import TemporaryDirectory
 from django.test import TestCase
-
 from epdb.logic import PackageManager
-from epdb.models import Reaction, Compound, User, Rule
-from utilities.ml import Dataset
+from epdb.models import Reaction, Compound, User, Rule, Package
+from utilities.chem import FormatConverter
+from utilities.ml import RuleBasedDataset, EnviFormerDataset


 class DatasetTest(TestCase):
@ -41,12 +43,108 @@ class DatasetTest(TestCase):
        super(DatasetTest, cls).setUpClass()
        cls.user = User.objects.get(username="anonymous")
        cls.package = PackageManager.create_package(cls.user, "Anon Test Package", "No Desc")
+        cls.BBD_SUBSET = Package.objects.get(name="Fixtures")

-    def test_smoke(self):
+    def test_generate_dataset(self):
+        """Test generating dataset does not crash"""
+        self.generate_rule_dataset()
+
+    def test_indexing(self):
+        """Test indexing a few different ways to check for crashes"""
+        ds, reactions, rules = self.generate_rule_dataset()
+        print(ds[5])
+        print(ds[2, 5])
+        print(ds[3:6, 2:8])
+        print(ds[:2, "structure_id"])
+
+    def test_add_rows(self):
+        """Test adding one row and adding multiple rows"""
+        ds, reactions, rules = self.generate_rule_dataset()
+        ds.add_row(list(ds.df.row(1)))
+        ds.add_rows([list(ds.df.row(i)) for i in range(5)])
+
+    def test_times_triggered(self):
+        """Check getting times triggered for a rule id"""
+        ds, reactions, rules = self.generate_rule_dataset()
+        print(ds.times_triggered(rules[0].uuid))
+
+    def test_block_indices(self):
+        """Test the usages of _block_indices"""
+        ds, reactions, rules = self.generate_rule_dataset()
+        print(ds.struct_features())
+        print(ds.triggered())
+        print(ds.observed())
+
+    def test_structure_id(self):
+        """Check getting a structure id from row index"""
+        ds, reactions, rules = self.generate_rule_dataset()
+        print(ds.structure_id(0))
+
+    def test_x(self):
+        """Test getting X portion of the dataframe"""
+        ds, reactions, rules = self.generate_rule_dataset()
+        print(ds.X().df.head())
+
+    def test_trig(self):
+        """Test getting the triggered portion of the dataframe"""
+        ds, reactions, rules = self.generate_rule_dataset()
+        print(ds.trig().df.head())
+
+    def test_y(self):
+        """Test getting the Y portion of the dataframe"""
+        ds, reactions, rules = self.generate_rule_dataset()
+        print(ds.y().df.head())
+
+    def test_classification_dataset(self):
+        """Test making the classification dataset"""
+        ds, reactions, rules = self.generate_rule_dataset()
+        compounds = [c.default_structure for c in Compound.objects.filter(package=self.BBD_SUBSET)]
+        class_ds, products = ds.classification_dataset(compounds, rules)
+        print(class_ds.df.head(5))
+        print(products[:5])
+
+    def test_extra_features(self):
+        reactions = [r for r in Reaction.objects.filter(package=self.BBD_SUBSET)]
+        applicable_rules = [r for r in Rule.objects.filter(package=self.BBD_SUBSET)]
+        ds = RuleBasedDataset.generate_dataset(reactions, applicable_rules, feat_funcs=[FormatConverter.maccs, FormatConverter.morgan])
+        print(ds.shape)
+
+    def test_to_arff(self):
+        """Test exporting the arff version of the dataset"""
+        ds, reactions, rules = self.generate_rule_dataset()
+        ds.to_arff("dataset_arff_test.arff")
+
+    def test_save_load(self):
+        """Test saving and loading dataset"""
+        with TemporaryDirectory() as tmpdir:
+            ds, reactions, rules = self.generate_rule_dataset()
+            ds.save(os.path.join(tmpdir, "save_dataset.pkl"))
+            ds_loaded = RuleBasedDataset.load(os.path.join(tmpdir, "save_dataset.pkl"))
+            self.assertTrue(ds.df.equals(ds_loaded.df))
+
+    def test_dataset_example(self):
+        """Test with a concrete example checking dataset size"""
        reactions = [r for r in Reaction.objects.filter(package=self.package)]
        applicable_rules = [self.rule1]

-        ds = Dataset.generate_dataset(reactions, applicable_rules)
+        ds = RuleBasedDataset.generate_dataset(reactions, applicable_rules)

        self.assertEqual(len(ds.y()), 1)
-        self.assertEqual(sum(ds.y()[0]), 1)
+        self.assertEqual(ds.y().df.item(), 1)
+
+    def test_enviformer_dataset(self):
+        ds, reactions = self.generate_enviformer_dataset()
+        print(ds.X().head())
+        print(ds.y().head())
+
+    def generate_rule_dataset(self):
+        """Generate a RuleBasedDataset from test package data"""
+        reactions = [r for r in Reaction.objects.filter(package=self.BBD_SUBSET)]
+        applicable_rules = [r for r in Rule.objects.filter(package=self.BBD_SUBSET)]
+        ds = RuleBasedDataset.generate_dataset(reactions, applicable_rules)
+        return ds, reactions, applicable_rules
+
+    def generate_enviformer_dataset(self):
+        reactions = [r for r in Reaction.objects.filter(package=self.BBD_SUBSET)]
+        ds = EnviFormerDataset.generate_dataset(reactions)
+        return ds, reactions
--- a/tests/test_enviformer.py
+++ b/tests/test_enviformer.py
@ -1,7 +1,27 @@
+from collections import defaultdict
+from datetime import datetime
 from tempfile import TemporaryDirectory
 from django.test import TestCase, tag
 from epdb.logic import PackageManager
-from epdb.models import User, EnviFormer, Package
+from epdb.models import User, EnviFormer, Package, Setting
+from epdb.tasks import predict_simple, predict
+
+
+def measure_predict(mod, pathway_pk=None):
+    # Measure and return the prediction time
+    start = datetime.now()
+    if pathway_pk:
+        s = Setting()
+        s.model = mod
+        s.model_threshold = 0.2
+        s.max_depth = 4
+        s.max_nodes = 20
+        s.save()
+        pred_result = predict.delay(pathway_pk, s.pk, limit=s.max_depth)
+    else:
+        pred_result = predict_simple.delay(mod.pk, "C1=CC=C(CSCC2=CC=CC=C2)C=C1")
+    _ = pred_result.get()
+    return round((datetime.now() - start).total_seconds(), 2)


@tag("slow")
@ -22,14 +42,38 @@ class EnviFormerTest(TestCase):
                threshold = float(0.5)
                data_package_objs = [self.BBD_SUBSET]
                eval_packages_objs = [self.BBD_SUBSET]
-                mod = EnviFormer.create(
-                    self.package, data_package_objs, eval_packages_objs, threshold=threshold
-                )
+                mod = EnviFormer.create(self.package, data_package_objs, threshold=threshold)

                mod.build_dataset()
                mod.build_model()
-                mod.multigen_eval = True
-                mod.save()
-                mod.evaluate_model()
+                mod.evaluate_model(True, eval_packages_objs, n_splits=2)

                mod.predict("CCN(CC)C(=O)C1=CC(=CC=C1)C")
+
+    def test_predict_runtime(self):
+        with TemporaryDirectory() as tmpdir:
+            with self.settings(MODEL_DIR=tmpdir):
+                threshold = float(0.5)
+                data_package_objs = [self.BBD_SUBSET]
+                mods = []
+                for _ in range(4):
+                    mod = EnviFormer.create(self.package, data_package_objs, threshold=threshold)
+                    mod.build_dataset()
+                    mod.build_model()
+                    mods.append(mod)
+
+                # Test prediction time drops after first prediction
+                times = [measure_predict(mods[0]) for _ in range(5)]
+                print(f"First prediction took {times[0]} seconds, subsequent ones took {times[1:]}")
+
+                # Test pathway prediction
+                times = [measure_predict(mods[1], self.BBD_SUBSET.pathways[0].pk) for _ in range(5)]
+                print(f"First pathway prediction took {times[0]} seconds, subsequent ones took {times[1:]}")
+
+                # Test eviction by performing three prediction with every model, twice.
+                times = defaultdict(list)
+                for _ in range(2):  # Eviction should cause the second iteration here to have to reload the models
+                    for mod in mods:
+                        for _ in range(3):
+                            times[mod.pk].append(measure_predict(mod))
+                print(times)
--- a/tests/test_model.py
+++ b/tests/test_model.py
@ -4,7 +4,7 @@ import numpy as np
 from django.test import TestCase

 from epdb.logic import PackageManager
-from epdb.models import User, MLRelativeReasoning, Package
+from epdb.models import User, MLRelativeReasoning, Package, RuleBasedRelativeReasoning


 class ModelTest(TestCase):
@ -17,7 +17,7 @@ class ModelTest(TestCase):
        cls.package = PackageManager.create_package(cls.user, "Anon Test Package", "No Desc")
        cls.BBD_SUBSET = Package.objects.get(name="Fixtures")

-    def test_smoke(self):
+    def test_mlrr(self):
        with TemporaryDirectory() as tmpdir:
            with self.settings(MODEL_DIR=tmpdir):
                threshold = float(0.5)
@ -30,29 +30,14 @@ class ModelTest(TestCase):
                    self.package,
                    rule_package_objs,
                    data_package_objs,
-                    eval_packages_objs,
                    threshold=threshold,
                    name="ECC - BBD - 0.5",
                    description="Created MLRelativeReasoning in Testcase",
                )

-                # mod = RuleBasedRelativeReasoning.create(
-                #     self.package,
-                #     rule_package_objs,
-                #     data_package_objs,
-                #     eval_packages_objs,
-                #     threshold=threshold,
-                #     min_count=5,
-                #     max_count=0,
-                #     name='ECC - BBD - 0.5',
-                #     description='Created MLRelativeReasoning in Testcase',
-                # )
-
                mod.build_dataset()
                mod.build_model()
-                mod.multigen_eval = True
-                mod.save()
-                mod.evaluate_model()
+                mod.evaluate_model(True, eval_packages_objs, n_splits=2)

                results = mod.predict("CCN(CC)C(=O)C1=CC(=CC=C1)C")

@ -73,3 +58,57 @@ class ModelTest(TestCase):

                # from pprint import pprint
                # pprint(mod.eval_results)
+
+    def test_applicability(self):
+        with TemporaryDirectory() as tmpdir:
+            with self.settings(MODEL_DIR=tmpdir):
+                threshold = float(0.5)
+
+                rule_package_objs = [self.BBD_SUBSET]
+                data_package_objs = [self.BBD_SUBSET]
+                eval_packages_objs = [self.BBD_SUBSET]
+
+                mod = MLRelativeReasoning.create(
+                    self.package,
+                    rule_package_objs,
+                    data_package_objs,
+                    threshold=threshold,
+                    name="ECC - BBD - 0.5",
+                    description="Created MLRelativeReasoning in Testcase",
+                    build_app_domain=True,  # To test the applicability domain this must be True
+                    app_domain_num_neighbours=5,
+                    app_domain_local_compatibility_threshold=0.5,
+                    app_domain_reliability_threshold=0.5,
+                )
+
+                mod.build_dataset()
+                mod.build_model()
+                mod.evaluate_model(True, eval_packages_objs, n_splits=2)
+
+                results = mod.predict("CCN(CC)C(=O)C1=CC(=CC=C1)C")
+
+    def test_rbrr(self):
+        with TemporaryDirectory() as tmpdir:
+            with self.settings(MODEL_DIR=tmpdir):
+                threshold = float(0.5)
+
+                rule_package_objs = [self.BBD_SUBSET]
+                data_package_objs = [self.BBD_SUBSET]
+                eval_packages_objs = [self.BBD_SUBSET]
+
+                mod = RuleBasedRelativeReasoning.create(
+                    self.package,
+                    rule_package_objs,
+                    data_package_objs,
+                    threshold=threshold,
+                    min_count=5,
+                    max_count=0,
+                    name='ECC - BBD - 0.5',
+                    description='Created MLRelativeReasoning in Testcase',
+                )
+
+                mod.build_dataset()
+                mod.build_model()
+                mod.evaluate_model(True, eval_packages_objs, n_splits=2)
+
+                results = mod.predict("CCN(CC)C(=O)C1=CC(=CC=C1)C")
--- a/tests/views/test_model_views.py
+++ b/tests/views/test_model_views.py
@ -6,7 +6,7 @@ from epdb.logic import UserManager
 from epdb.models import Package, User


-@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models")
+@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True)
 class PathwayViewTest(TestCase):
    fixtures = ["test_fixtures_incl_model.jsonl.gz"]

--- a/tests/views/test_pathway_views.py
+++ b/tests/views/test_pathway_views.py
@ -6,7 +6,7 @@ from epdb.logic import UserManager, PackageManager
 from epdb.models import Pathway, Edge


-@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models")
+@override_settings(MODEL_DIR=s.FIXTURE_DIRS[0] / "models", CELERY_TASK_ALWAYS_EAGER=True)
 class PathwayViewTest(TestCase):
    fixtures = ["test_fixtures_incl_model.jsonl.gz"]

--- a/utilities/chem.py
+++ b/utilities/chem.py
@ -7,7 +7,7 @@ from typing import List, Optional, Dict, TYPE_CHECKING
 from indigo import Indigo, IndigoException, IndigoObject
 from indigo.renderer import IndigoRenderer
 from rdkit import Chem, rdBase
-from rdkit.Chem import MACCSkeys, Descriptors
+from rdkit.Chem import MACCSkeys, Descriptors, rdFingerprintGenerator
 from rdkit.Chem import rdChemReactions
 from rdkit.Chem.Draw import rdMolDraw2D
 from rdkit.Chem.MolStandardize import rdMolStandardize
@ -107,6 +107,13 @@ class FormatConverter(object):
        bitvec = MACCSkeys.GenMACCSKeys(mol)
        return bitvec.ToList()

+    @staticmethod
+    def morgan(smiles, radius=3, fpSize=2048):
+        finger_gen = rdFingerprintGenerator.GetMorganGenerator(radius=radius, fpSize=fpSize)
+        mol = Chem.MolFromSmiles(smiles)
+        fp = finger_gen.GetFingerprint(mol)
+        return fp.ToList()
+
    @staticmethod
    def get_functional_groups(smiles: str) -> List[str]:
        res = list()
@ -185,7 +192,7 @@ class FormatConverter(object):
        return smiles

    @staticmethod
-    def standardize(smiles, remove_stereo=False):
+    def standardize(smiles, remove_stereo=False, canonicalize_tautomers=False):
        # Taken from https://bitsilla.com/blog/2021/06/standardizing-a-molecule-using-rdkit/
        # follows the steps in
        # https://github.com/greglandrum/RSC_OpenScience_Standardization_202104/blob/main/MolStandardize%20pieces.ipynb
@ -203,19 +210,21 @@ class FormatConverter(object):
        uncharger = (
            rdMolStandardize.Uncharger()
        )  # annoying, but necessary as no convenience method exists
-        uncharged_parent_clean_mol = uncharger.uncharge(parent_clean_mol)
+        res_mol = uncharger.uncharge(parent_clean_mol)

        # note that no attempt is made at reionization at this step
        # nor at ionization at some pH (rdkit has no pKa caculator)
        # the main aim to to represent all molecules from different sources
        # in a (single) standard way, for use in ML, catalogue, etc.
-        # te = rdMolStandardize.TautomerEnumerator()  # idem
-        # taut_uncharged_parent_clean_mol = te.Canonicalize(uncharged_parent_clean_mol)

        if remove_stereo:
-            Chem.RemoveStereochemistry(uncharged_parent_clean_mol)
+            Chem.RemoveStereochemistry(res_mol)

-        return Chem.MolToSmiles(uncharged_parent_clean_mol, kekuleSmiles=True)
+        if canonicalize_tautomers:
+            te = rdMolStandardize.TautomerEnumerator()  # idem
+            res_mol = te.Canonicalize(res_mol)
+
+        return Chem.MolToSmiles(res_mol, kekuleSmiles=True)

    @staticmethod
    def neutralize_smiles(smiles):
@ -363,6 +372,76 @@ class FormatConverter(object):

        return parsed_smiles, errors

+    @staticmethod
+    def smiles_covered_by(
+        l_smiles: List[str],
+        r_smiles: List[str],
+        standardize: bool = True,
+        canonicalize_tautomers: bool = True,
+    ) -> bool:
+        """
+        Check if all SMILES in the left list are covered by (contained in) the right list.
+
+        This function performs a subset check to determine if every chemical structure
+        represented in l_smiles has a corresponding representation in r_smiles.
+
+        Args:
+            l_smiles (List[str]): List of SMILES strings to check for coverage.
+            r_smiles (List[str]): List of SMILES strings that should contain all l_smiles.
+            standardize (bool, optional): Whether to standardize SMILES before comparison.
+                Defaults to True. When True, applies FormatConverter.standardize() to
+                normalize representations for accurate comparison.
+            canonicalize_tautomers (bool, optional): Whether to canonicalize tautomers
+                Defaults to False. When True, applies rdMolStandardize.TautomerEnumerator().Canonicalize(res_mol)
+                to the compounds before comparison.
+        Returns:
+            bool: True if all SMILES in l_smiles are found in r_smiles (i.e., l_smiles
+                  is a subset of r_smiles), False otherwise.
+
+        Note:
+            - Comparison treats lists as sets, ignoring duplicates and order
+            - Failed standardization attempts are silently ignored (original SMILES used)
+            - This is a one-directional check: l_smiles ⊆ r_smiles
+            - For bidirectional equality, both directions must be checked separately
+
+        Example:
+            >>> FormatConverter.smiles_covered_by(["CCO", "CC"], ["CCO", "CC", "CCC"])
+            True
+            >>> FormatConverter.smiles_covered_by(["CCO", "CCCC"], ["CCO", "CC", "CCC"])
+            False
+        """
+
+        standardized_l_smiles = []
+
+        if standardize:
+            for smi in l_smiles:
+                try:
+                    smi = FormatConverter.standardize(
+                        smi, canonicalize_tautomers=canonicalize_tautomers
+                    )
+                except Exception:
+                    # :shrug:
+                    # logger.debug(f'Standardizing SMILES failed for {smi}')
+                    pass
+                standardized_l_smiles.append(smi)
+        else:
+            standardized_l_smiles = l_smiles
+
+        standardized_r_smiles = []
+        if standardize:
+            for smi in r_smiles:
+                try:
+                    smi = FormatConverter.standardize(smi)
+                except Exception:
+                    # :shrug:
+                    # logger.debug(f'Standardizing SMILES failed for {smi}')
+                    pass
+                standardized_r_smiles.append(smi)
+        else:
+            standardized_r_smiles = r_smiles
+
+        return len(set(standardized_l_smiles).difference(set(standardized_r_smiles))) == 0
+

 class Standardizer(ABC):
    def __init__(self, name):
--- a/utilities/misc.py
+++ b/utilities/misc.py
@ -9,36 +9,37 @@ from collections import defaultdict
 from datetime import datetime
 from enum import Enum
 from types import NoneType
-from typing import Dict, Any, List
+from typing import Any, Dict, List

 from django.db import transaction
-from envipy_additional_information import Interval, EnviPyModel
-from envipy_additional_information import NAME_MAPPING
+from envipy_additional_information import NAME_MAPPING, EnviPyModel, Interval
 from pydantic import BaseModel, HttpUrl

 from epdb.models import (
-    Package,
    Compound,
    CompoundStructure,
-    SimpleRule,
+    Edge,
+    EnviFormer,
+    EPModel,
+    ExternalDatabase,
+    ExternalIdentifier,
+    License,
+    MLRelativeReasoning,
+    Node,
+    Package,
+    ParallelRule,
+    Pathway,
+    PluginModel,
+    Reaction,
+    Rule,
+    RuleBasedRelativeReasoning,
+    Scenario,
+    SequentialRule,
    SimpleAmbitRule,
    SimpleRDKitRule,
-    ParallelRule,
-    SequentialRule,
-    Reaction,
-    Pathway,
-    Node,
-    Edge,
-    Scenario,
-    EPModel,
-    MLRelativeReasoning,
-    RuleBasedRelativeReasoning,
-    EnviFormer,
-    PluginModel,
-    ExternalIdentifier,
-    ExternalDatabase,
-    License,
+    SimpleRule,
 )
+from utilities.chem import FormatConverter

 logger = logging.getLogger(__name__)

@ -48,7 +49,7 @@ class HTMLGenerator:

    @staticmethod
    def generate_html(additional_information: "EnviPyModel", prefix="") -> str:
-        from typing import get_origin, get_args, Union
+        from typing import Union, get_args, get_origin

        if isinstance(additional_information, type):
            clz_name = additional_information.__name__
@ -1171,3 +1172,89 @@ class PackageImporter:
                url=identifier_data.get("url", ""),
                is_primary=identifier_data.get("is_primary", False),
            )
+
+
+class PathwayUtils:
+    def __init__(self, pathway: "Pathway"):
+        self.pathway = pathway
+
+    @staticmethod
+    def _get_products(smiles: str, rules: List["Rule"]):
+        educt_rule_products: Dict[str, Dict[str, List[str]]] = defaultdict(
+            lambda: defaultdict(list)
+        )
+
+        for r in rules:
+            product_sets = r.apply(smiles)
+            for product_set in product_sets:
+                for product in product_set:
+                    educt_rule_products[smiles][r.url].append(product)
+
+        return educt_rule_products
+
+    def find_missing_rules(self, rules: List["Rule"]):
+        print(f"Processing {self.pathway.name}")
+        # compute products for each node / rule combination in the pathway
+        educt_rule_products = defaultdict(lambda: defaultdict(list))
+
+        for node in self.pathway.nodes:
+            educt_rule_products.update(**self._get_products(node.default_node_label.smiles, rules))
+
+        # loop through edges and determine reactions that can't be constructed by
+        # any of the rules or a combination of two rules in a chained fashion
+
+        res: Dict[str, List["Rule"]] = dict()
+
+        for edge in self.pathway.edges:
+            found = False
+            reaction = edge.edge_label
+
+            educts = [cs for cs in reaction.educts.all()]
+            products = [cs.smiles for cs in reaction.products.all()]
+            rule_chain = []
+
+            for educt in educts:
+                educt = educt.smiles
+                triggered_rules = list(educt_rule_products.get(educt, {}).keys())
+                for triggered_rule in triggered_rules:
+                    if rule_products := educt_rule_products[educt][triggered_rule]:
+                        # check if this rule covers the reaction
+                        if FormatConverter.smiles_covered_by(
+                            products, rule_products, standardize=True, canonicalize_tautomers=True
+                        ):
+                            found = True
+                        else:
+                            # Check if another prediction step would cover the reaction
+                            for product in rule_products:
+                                prod_rule_products = self._get_products(product, rules)
+                                prod_triggered_rules = list(
+                                    prod_rule_products.get(product, {}).keys()
+                                )
+                                for prod_triggered_rule in prod_triggered_rules:
+                                    if second_step_products := prod_rule_products[product][
+                                        prod_triggered_rule
+                                    ]:
+                                        if FormatConverter.smiles_covered_by(
+                                            products,
+                                            second_step_products,
+                                            standardize=True,
+                                            canonicalize_tautomers=True,
+                                        ):
+                                            rule_chain.append(
+                                                (
+                                                    triggered_rule,
+                                                    Rule.objects.get(url=triggered_rule).name,
+                                                )
+                                            )
+                                            rule_chain.append(
+                                                (
+                                                    prod_triggered_rule,
+                                                    Rule.objects.get(url=prod_triggered_rule).name,
+                                                )
+                                            )
+                                            res[edge.url] = rule_chain
+
+            if not found:
+                res[edge.url] = rule_chain
+
+        return res
--- a/utilities/ml.py
+++ b/utilities/ml.py
@ -5,11 +5,14 @@ import logging
 from collections import defaultdict
 from datetime import datetime
 from pathlib import Path
-from typing import List, Dict, Set, Tuple, TYPE_CHECKING
+from typing import List, Dict, Set, Tuple, TYPE_CHECKING, Callable
+from abc import ABC, abstractmethod

 import networkx as nx
 import numpy as np
+from envipy_plugins import Descriptor
 from numpy.random import default_rng
+import polars as pl
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.decomposition import PCA
 from sklearn.dummy import DummyClassifier
@ -26,70 +29,281 @@ if TYPE_CHECKING:
    from epdb.models import Rule, CompoundStructure, Reaction


-class Dataset:
-    def __init__(
-        self, columns: List[str], num_labels: int, data: List[List[str | int | float]] = None
-    ):
-        self.columns: List[str] = columns
-        self.num_labels: int = num_labels
-
-        if data is None:
-            self.data: List[List[str | int | float]] = list()
+class Dataset(ABC):
+    def __init__(self, columns: List[str] = None, data: List[List[str | int | float]] | pl.DataFrame = None):
+        if isinstance(data, pl.DataFrame):  # Allows for re-creation of self in cases like indexing with __getitem__
+            self.df = data
        else:
-            self.data = data
+            # Build either an empty dataframe with columns or fill it with list of list data
+            if data is not None and len(columns) != len(data[0]):
+                raise ValueError(f"Header and Data are not aligned {len(columns)} columns vs. {len(data[0])} columns")
+            if columns is None:
+                raise ValueError("Columns can't be None if data is not already a DataFrame")
+            self.df = pl.DataFrame(data=data, schema=columns, orient="row", infer_schema_length=None)

-        self.num_features: int = len(columns) - self.num_labels
-        self._struct_features: Tuple[int, int] = self._block_indices("feature_")
-        self._triggered: Tuple[int, int] = self._block_indices("trig_")
-        self._observed: Tuple[int, int] = self._block_indices("obs_")
+    def add_rows(self, rows: List[List[str | int | float]]):
+        """Add rows to the dataset. Extends the polars dataframe stored in self"""
+        if len(self.columns) != len(rows[0]):
+            raise ValueError(f"Header and Data are not aligned {len(self.columns)} columns vs. {len(rows[0])} columns")
+        new_rows = pl.DataFrame(data=rows, schema=self.columns, orient="row", infer_schema_length=None)
+        self.df.extend(new_rows)

-    def _block_indices(self, prefix) -> Tuple[int, int]:
+    def add_row(self, row: List[str | int | float]):
+        """See add_rows"""
+        self.add_rows([row])
+
+    def block_indices(self, prefix) -> List[int]:
+        """Find the indexes in column labels that has the prefix"""
        indices: List[int] = []
        for i, feature in enumerate(self.columns):
            if feature.startswith(prefix):
                indices.append(i)
+        return indices

-        return min(indices), max(indices)
+    @property
+    def columns(self) -> List[str]:
+        """Use the polars dataframe columns"""
+        return self.df.columns

-    def structure_id(self):
-        return self.data[0][0]
+    @property
+    def shape(self):
+        return self.df.shape

-    def add_row(self, row: List[str | int | float]):
-        if len(self.columns) != len(row):
-            raise ValueError(f"Header and Data are not aligned {len(self.columns)} vs. {len(row)}")
-        self.data.append(row)
+    @abstractmethod
+    def X(self, **kwargs):
+        pass

-    def times_triggered(self, rule_uuid) -> int:
-        idx = self.columns.index(f"trig_{rule_uuid}")
+    @abstractmethod
+    def y(self, **kwargs):
+        pass

-        times_triggered = 0
-        for row in self.data:
-            if row[idx] == 1:
-                times_triggered += 1
-
-        return times_triggered
-
-    def struct_features(self) -> Tuple[int, int]:
-        return self._struct_features
-
-    def triggered(self) -> Tuple[int, int]:
-        return self._triggered
-
-    def observed(self) -> Tuple[int, int]:
-        return self._observed
-
-    def at(self, position: int) -> Dataset:
-        return Dataset(self.columns, self.num_labels, [self.data[position]])
-
-    def limit(self, limit: int) -> Dataset:
-        return Dataset(self.columns, self.num_labels, self.data[:limit])
+    @staticmethod
+    @abstractmethod
+    def generate_dataset(reactions, *args, **kwargs):
+        pass

    def __iter__(self):
-        return (self.at(i) for i, _ in enumerate(self.data))
+        """Use polars iter_rows for iterating over the dataset"""
+        return self.df.iter_rows()
+
+    def __getitem__(self, item):
+        """Item is passed to polars allowing for advanced indexing.
+        See https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.__getitem__.html#polars.DataFrame.__getitem__"""
+        res = self.df[item]
+        if isinstance(res, pl.DataFrame):  # If we get a dataframe back from indexing make new self with res dataframe
+            return self.__class__(data=res)
+        else:  # If we don't get a dataframe back (likely base type, int, str, float etc.) return the item
+            return res
+
+    def save(self, path: "Path | str"):
+        import pickle
+
+        with open(path, "wb") as fh:
+            pickle.dump(self, fh)
+
+    @staticmethod
+    def load(path: "str | Path") -> "Dataset":
+        import pickle
+
+        return pickle.load(open(path, "rb"))
+
+    def to_numpy(self):
+        return self.df.to_numpy()
+
+    def __repr__(self):
+        return (
+            f"<{self.__class__.__name__} #rows={len(self.df)} #cols={len(self.columns)}>"
+        )
+
+    def __len__(self):
+        return len(self.df)
+
+    def iter_rows(self, named=False):
+        return self.df.iter_rows(named=named)
+
+    def filter(self, *predicates, **constraints):
+        return self.__class__(data=self.df.filter(*predicates, **constraints))
+
+    def select(self, *exprs, **named_exprs):
+        return self.__class__(data=self.df.select(*exprs, **named_exprs))
+
+    def with_columns(self, *exprs, **name_exprs):
+        return self.__class__(data=self.df.with_columns(*exprs, **name_exprs))
+
+    def sort(self, by, *more_by, descending=False, nulls_last=False, multithreaded=True, maintain_order=False):
+        return self.__class__(data=self.df.sort(by, *more_by, descending=descending, nulls_last=nulls_last,
+                                                multithreaded=multithreaded, maintain_order=maintain_order))
+
+    def item(self, row=None, column=None):
+        return self.df.item(row, column)
+
+    def fill_nan(self, value):
+        return self.__class__(data=self.df.fill_nan(value))
+
+    @property
+    def height(self):
+        return self.df.height
+
+
+class RuleBasedDataset(Dataset):
+    def __init__(self, num_labels=None, columns=None, data=None):
+        super().__init__(columns, data)
+        # Calculating num_labels allows functions like getitem to be in the base Dataset as it unifies the init.
+        self.num_labels: int = num_labels if num_labels else sum([1 for c in self.columns if "obs_" in c])
+        # Pre-calculate the ids of columns for features/labels, useful later in X and y
+        self._struct_features: List[int] = self.block_indices("feature_")
+        self._triggered: List[int] = self.block_indices("trig_")
+        self._observed: List[int] = self.block_indices("obs_")
+        self.feature_cols: List[int] = self._struct_features + self._triggered
+        self.num_features: int = len(self.feature_cols)
+        self.has_probs = False
+
+    def times_triggered(self, rule_uuid) -> int:
+        """Count how many times a rule is triggered by the number of rows with one in the rules trig column"""
+        return self.df.filter(pl.col(f"trig_{rule_uuid}") == 1).height
+
+    def struct_features(self) -> List[int]:
+        return self._struct_features
+
+    def triggered(self) -> List[int]:
+        return self._triggered
+
+    def observed(self) -> List[int]:
+        return self._observed
+
+    def structure_id(self, index: int):
+        """Get the UUID of a compound"""
+        return self.item(index, "structure_id")
+
+    def X(self, exclude_id_col=True, na_replacement=0):
+        """Get all the feature and trig columns"""
+        _col_ids = self.feature_cols
+        if not exclude_id_col:
+            _col_ids = [0] + _col_ids
+        res = self[:, _col_ids]
+        if na_replacement is not None:
+            res.df = res.df.fill_null(na_replacement)
+        return res
+
+    def trig(self, na_replacement=0):
+        """Get all the trig columns"""
+        res = self[:, self._triggered]
+        if na_replacement is not None:
+            res.df = res.df.fill_null(na_replacement)
+        return res
+
+    def y(self, na_replacement=0):
+        """Get all the obs columns"""
+        res = self[:, self._observed]
+        if na_replacement is not None:
+            res.df = res.df.fill_null(na_replacement)
+        return res
+
+    @staticmethod
+    def generate_dataset(reactions, applicable_rules, educts_only=True, feat_funcs: List["Callable | Descriptor"]=None):
+        if feat_funcs is None:
+            feat_funcs = [FormatConverter.maccs]
+        _structures = set()  # Get all the structures
+        for r in reactions:
+            _structures.update(r.educts.all())
+            if not educts_only:
+                _structures.update(r.products.all())
+
+        compounds = sorted(_structures, key=lambda x: x.url)
+        triggered: Dict[str, Set[str]] = defaultdict(set)
+        observed: Set[str] = set()
+
+        # Apply rules on collected compounds and store tps
+        for i, comp in enumerate(compounds):
+            logger.debug(f"{i + 1}/{len(compounds)}...")
+
+            for rule in applicable_rules:
+                product_sets = rule.apply(comp.smiles)
+                if len(product_sets) == 0:
+                    continue
+
+                key = f"{rule.uuid} + {comp.uuid}"
+                if key in triggered:
+                    logger.info(f"{key} already present. Duplicate reaction?")
+
+                for prod_set in product_sets:
+                    for smi in prod_set:
+                        try:
+                            smi = FormatConverter.standardize(smi, remove_stereo=True)
+                        except Exception:
+                            logger.debug(f"Standardizing SMILES failed for {smi}")
+                        triggered[key].add(smi)
+
+        for i, r in enumerate(reactions):
+            logger.debug(f"{i + 1}/{len(reactions)}...")
+
+            if len(r.educts.all()) != 1:
+                logger.debug(f"Skipping {r.url} as it has {len(r.educts.all())} substrates!")
+                continue
+
+            for comp in r.educts.all():
+                for rule in applicable_rules:
+                    key = f"{rule.uuid} + {comp.uuid}"
+                    if key not in triggered:
+                        continue
+
+                    # standardize products from reactions for comparison
+                    standardized_products = []
+                    for cs in r.products.all():
+                        smi = cs.smiles
+                        try:
+                            smi = FormatConverter.standardize(smi, remove_stereo=True)
+                        except Exception as e:
+                            logger.debug(f"Standardizing SMILES failed for {smi}")
+                        standardized_products.append(smi)
+                    if len(set(standardized_products).difference(triggered[key])) == 0:
+                        observed.add(key)
+        feat_columns = []
+        for feat_func in feat_funcs:
+            if isinstance(feat_func, Descriptor):
+                feats = feat_func.get_molecule_descriptors(compounds[0].smiles)
+            else:
+                feats = feat_func(compounds[0].smiles)
+            start_i = len(feat_columns)
+            feat_columns.extend([f"feature_{start_i + i}" for i, _ in enumerate(feats)])
+        ds_columns = (["structure_id"] +
+                      feat_columns +
+                      [f"trig_{r.uuid}" for r in applicable_rules] +
+                      [f"obs_{r.uuid}" for r in applicable_rules])
+        rows = []
+
+        for i, comp in enumerate(compounds):
+            # Features
+            feats = []
+            for feat_func in feat_funcs:
+                if isinstance(feat_func, Descriptor):
+                    feat = feat_func.get_molecule_descriptors(comp.smiles)
+                else:
+                    feat = feat_func(comp.smiles)
+                feats.extend(feat)
+            trig = []
+            obs = []
+            for rule in applicable_rules:
+                key = f"{rule.uuid} + {comp.uuid}"
+                # Check triggered
+                if key in triggered:
+                    trig.append(1)
+                else:
+                    trig.append(0)
+                # Check obs
+                if key in observed:
+                    obs.append(1)
+                elif key not in triggered:
+                    obs.append(None)
+                else:
+                    obs.append(0)
+            rows.append([str(comp.uuid)] + feats + trig + obs)
+        ds = RuleBasedDataset(len(applicable_rules), ds_columns, data=rows)
+        return ds

    def classification_dataset(
        self, structures: List[str | "CompoundStructure"], applicable_rules: List["Rule"]
-    ) -> Tuple[Dataset, List[List[PredictionResult]]]:
+    ) -> Tuple[RuleBasedDataset, List[List[PredictionResult]]]:
        classify_data = []
        classify_products = []
        for struct in structures:
@ -113,186 +327,18 @@ class Dataset:
                else:
                    trig.append(0)
                    prods.append([])
-
-            classify_data.append([struct_id] + features + trig + ([-1] * len(trig)))
+            new_row = [struct_id] + features + trig + ([-1] * len(trig))
+            if self.has_probs:
+                new_row += [-1] * len(trig)
+            classify_data.append(new_row)
            classify_products.append(prods)
+        ds = RuleBasedDataset(len(applicable_rules), self.columns, data=classify_data)
+        return ds, classify_products

-        return Dataset(
-            columns=self.columns, num_labels=self.num_labels, data=classify_data
-        ), classify_products
-
-    @staticmethod
-    def generate_dataset(
-        reactions: List["Reaction"], applicable_rules: List["Rule"], educts_only: bool = True
-    ) -> Dataset:
-        _structures = set()
-
-        for r in reactions:
-            for e in r.educts.all():
-                _structures.add(e)
-
-            if not educts_only:
-                for e in r.products:
-                    _structures.add(e)
-
-        compounds = sorted(_structures, key=lambda x: x.url)
-
-        triggered: Dict[str, Set[str]] = defaultdict(set)
-        observed: Set[str] = set()
-
-        # Apply rules on collected compounds and store tps
-        for i, comp in enumerate(compounds):
-            logger.debug(f"{i + 1}/{len(compounds)}...")
-
-            for rule in applicable_rules:
-                product_sets = rule.apply(comp.smiles)
-
-                if len(product_sets) == 0:
-                    continue
-
-                key = f"{rule.uuid} + {comp.uuid}"
-
-                if key in triggered:
-                    logger.info(f"{key} already present. Duplicate reaction?")
-
-                for prod_set in product_sets:
-                    for smi in prod_set:
-                        try:
-                            smi = FormatConverter.standardize(smi, remove_stereo=True)
-                        except Exception:
-                            # :shrug:
-                            logger.debug(f"Standardizing SMILES failed for {smi}")
-                            pass
-
-                        triggered[key].add(smi)
-
-        for i, r in enumerate(reactions):
-            logger.debug(f"{i + 1}/{len(reactions)}...")
-
-            if len(r.educts.all()) != 1:
-                logger.debug(f"Skipping {r.url} as it has {len(r.educts.all())} substrates!")
-                continue
-
-            for comp in r.educts.all():
-                for rule in applicable_rules:
-                    key = f"{rule.uuid} + {comp.uuid}"
-
-                    if key not in triggered:
-                        continue
-
-                    # standardize products from reactions for comparison
-                    standardized_products = []
-                    for cs in r.products.all():
-                        smi = cs.smiles
-
-                        try:
-                            smi = FormatConverter.standardize(smi, remove_stereo=True)
-                        except Exception as e:
-                            # :shrug:
-                            logger.debug(f"Standardizing SMILES failed for {smi}")
-                            pass
-
-                        standardized_products.append(smi)
-
-                    if len(set(standardized_products).difference(triggered[key])) == 0:
-                        observed.add(key)
-                    else:
-                        pass
-
-        ds = None
-
-        for i, comp in enumerate(compounds):
-            # Features
-            feat = FormatConverter.maccs(comp.smiles)
-            trig = []
-            obs = []
-
-            for rule in applicable_rules:
-                key = f"{rule.uuid} + {comp.uuid}"
-
-                # Check triggered
-                if key in triggered:
-                    trig.append(1)
-                else:
-                    trig.append(0)
-
-                # Check obs
-                if key in observed:
-                    obs.append(1)
-                elif key not in triggered:
-                    obs.append(None)
-                else:
-                    obs.append(0)
-
-            if ds is None:
-                header = (
-                    ["structure_id"]
-                    + [f"feature_{i}" for i, _ in enumerate(feat)]
-                    + [f"trig_{r.uuid}" for r in applicable_rules]
-                    + [f"obs_{r.uuid}" for r in applicable_rules]
-                )
-                ds = Dataset(header, len(applicable_rules))
-
-            ds.add_row([str(comp.uuid)] + feat + trig + obs)
-
-        return ds
-
-    def X(self, exclude_id_col=True, na_replacement=0):
-        res = self.__getitem__(
-            (slice(None), slice(1 if exclude_id_col else 0, len(self.columns) - self.num_labels))
-        )
-        if na_replacement is not None:
-            res = [[x if x is not None else na_replacement for x in row] for row in res]
-        return res
-
-    def trig(self, na_replacement=0):
-        res = self.__getitem__((slice(None), slice(self._triggered[0], self._triggered[1])))
-        if na_replacement is not None:
-            res = [[x if x is not None else na_replacement for x in row] for row in res]
-        return res
-
-    def y(self, na_replacement=0):
-        res = self.__getitem__((slice(None), slice(len(self.columns) - self.num_labels, None)))
-        if na_replacement is not None:
-            res = [[x if x is not None else na_replacement for x in row] for row in res]
-        return res
-
-    def __getitem__(self, key):
-        if not isinstance(key, tuple):
-            raise TypeError("Dataset must be indexed with dataset[rows, columns]")
-
-        row_key, col_key = key
-
-        # Normalize rows
-        if isinstance(row_key, int):
-            rows = [self.data[row_key]]
-        else:
-            rows = self.data[row_key]
-
-        # Normalize columns
-        if isinstance(col_key, int):
-            res = [row[col_key] for row in rows]
-        else:
-            res = [
-                [row[i] for i in range(*col_key.indices(len(row)))]
-                if isinstance(col_key, slice)
-                else [row[i] for i in col_key]
-                for row in rows
-            ]
-
-        return res
-
-    def save(self, path: "Path"):
-        import pickle
-
-        with open(path, "wb") as fh:
-            pickle.dump(self, fh)
-
-    @staticmethod
-    def load(path: "Path") -> "Dataset":
-        import pickle
-
-        return pickle.load(open(path, "rb"))
+    def add_probs(self, probs):
+        col_names = [f"prob_{self.columns[r_id].split('_')[-1]}" for r_id in self._observed]
+        self.df = self.df.with_columns(*[pl.Series(name, probs[:, j]) for j, name in enumerate(col_names)])
+        self.has_probs = True

    def to_arff(self, path: "Path"):
        arff = f"@relation 'enviPy-dataset: -C {self.num_labels}'\n"
@ -304,7 +350,7 @@ class Dataset:
                arff += f"@attribute {c} {{0,1}}\n"

        arff += "\n@data\n"
-        for d in self.data:
+        for d in self:
            ys = ",".join([str(v if v is not None else "?") for v in d[-self.num_labels :]])
            xs = ",".join([str(v if v is not None else "?") for v in d[: self.num_features]])
            arff += f"{ys},{xs}\n"
@ -313,10 +359,40 @@ class Dataset:
            fh.write(arff)
            fh.flush()

-    def __repr__(self):
-        return (
-            f"<Dataset #rows={len(self.data)} #cols={len(self.columns)} #labels={self.num_labels}>"
-        )
+
+class EnviFormerDataset(Dataset):
+    def __init__(self, columns=None, data=None):
+        super().__init__(columns, data)
+
+    def X(self):
+        """Return the educts"""
+        return self["educts"]
+
+    def y(self):
+        """Return the products"""
+        return self["products"]
+
+    @staticmethod
+    def generate_dataset(reactions, *args, **kwargs):
+        # Standardise reactions for the training data
+        stereo = kwargs.get("stereo", False)
+        rows = []
+        for reaction in reactions:
+            e = ".".join(
+                [
+                    FormatConverter.standardize(smile.smiles, remove_stereo=not stereo)
+                    for smile in reaction.educts.all()
+                ]
+            )
+            p = ".".join(
+                [
+                    FormatConverter.standardize(smile.smiles, remove_stereo=not stereo)
+                    for smile in reaction.products.all()
+                ]
+            )
+            rows.append([e, p])
+        ds = EnviFormerDataset(["educts", "products"], rows)
+        return ds


 class SparseLabelECC(BaseEstimator, ClassifierMixin):
@ -498,7 +574,7 @@ class EnsembleClassifierChain:
            self.classifiers = []

        if self.num_labels is None:
-            self.num_labels = len(Y[0])
+            self.num_labels = Y.shape[1]

        for p in range(self.num_chains):
            logger.debug(f"{datetime.now()} fitting {p + 1}/{self.num_chains}")
@ -529,7 +605,7 @@ class RelativeReasoning:

    def fit(self, X, Y):
        n_instances = len(Y)
-        n_attributes = len(Y[0])
+        n_attributes = Y.shape[1]

        for i in range(n_attributes):
            for j in range(n_attributes):
@ -541,8 +617,8 @@ class RelativeReasoning:
                countboth = 0

                for k in range(n_instances):
-                    vi = Y[k][i]
-                    vj = Y[k][j]
+                    vi = Y[k, i]
+                    vj = Y[k, j]

                    if vi is None or vj is None:
                        continue
@ -598,7 +674,7 @@ class ApplicabilityDomainPCA(PCA):
        self.min_vals = None
        self.max_vals = None

-    def build(self, train_dataset: "Dataset"):
+    def build(self, train_dataset: "RuleBasedDataset"):
        # transform
        X_scaled = self.scaler.fit_transform(train_dataset.X())
        # fit pca
@ -612,7 +688,7 @@ class ApplicabilityDomainPCA(PCA):
        instances_pca = self.transform(instances_scaled)
        return instances_pca

-    def is_applicable(self, classify_instances: "Dataset"):
+    def is_applicable(self, classify_instances: "RuleBasedDataset"):
        instances_pca = self.__transform(classify_instances.X())

        is_applicable = []
--- a/uv.lock
+++ b/uv.lock
@ -1,6 +1,10 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.12"
+resolution-markers = [
+    "sys_platform == 'linux' or sys_platform == 'win32'",
+    "sys_platform != 'linux' and sys_platform != 'win32'",
+]

 [[package]]
 name = "aiohappyeyeballs"
@ -176,6 +180,19 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/c9/af/0dcccc7fdcdf170f9a1585e5e96b6fb0ba1749ef6be8c89a6202284759bd/celery-5.5.3-py3-none-any.whl", hash = "sha256:0b5761a07057acee94694464ca482416b959568904c9dfa41ce8413a7d65d525", size = 438775, upload-time = "2025-06-01T11:08:09.94Z" },
 ]

+[[package]]
+name = "celery-stubs"
+version = "0.1.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mypy" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/98/14/b853ada8706a3a301396566b6dd405d1cbb24bff756236a12a01dbe766a4/celery-stubs-0.1.3.tar.gz", hash = "sha256:0fb5345820f8a2bd14e6ffcbef2d10181e12e40f8369f551d7acc99d8d514919", size = 46583, upload-time = "2023-02-10T02:20:11.837Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1c/7a/4ab2347d13f1f59d10a7337feb9beb002664119f286036785284c6bec150/celery_stubs-0.1.3-py3-none-any.whl", hash = "sha256:dfb9ad27614a8af028b2055bb4a4ae99ca5e9a8d871428a506646d62153218d7", size = 89085, upload-time = "2023-02-10T02:20:09.409Z" },
+]
+
 [[package]]
 name = "certifi"
 version = "2025.10.5"
@ -525,13 +542,14 @@ wheels = [
 [[package]]
 name = "enviformer"
 version = "0.1.0"
-source = { git = "ssh://git@git.envipath.com/enviPath/enviformer.git?rev=v0.1.2#3f28f60cfa1df814cf7559303b5130933efa40ae" }
+source = { git = "ssh://git@git.envipath.com/enviPath/enviformer.git?rev=v0.1.4#7094be5767748fd63d4a84a5d71f06cf02ba07f3" }
 dependencies = [
    { name = "joblib" },
    { name = "lightning" },
    { name = "pytorch-lightning" },
    { name = "scikit-learn" },
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torch", version = "2.8.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]

 [[package]]
@ -546,7 +564,6 @@ dependencies = [
    { name = "django-ninja" },
    { name = "django-oauth-toolkit" },
    { name = "django-polymorphic" },
-    { name = "django-stubs" },
    { name = "enviformer" },
    { name = "envipy-additional-information" },
    { name = "envipy-ambit" },
@ -554,6 +571,7 @@ dependencies = [
    { name = "epam-indigo" },
    { name = "gunicorn" },
    { name = "networkx" },
+    { name = "polars" },
    { name = "psycopg2-binary" },
    { name = "python-dotenv" },
    { name = "rdkit" },
@ -566,6 +584,8 @@ dependencies = [

 [package.optional-dependencies]
 dev = [
+    { name = "celery-stubs" },
+    { name = "django-stubs" },
    { name = "poethepoet" },
    { name = "pre-commit" },
    { name = "ruff" },
@ -577,15 +597,16 @@ ms-login = [
 [package.metadata]
 requires-dist = [
    { name = "celery", specifier = ">=5.5.2" },
+    { name = "celery-stubs", marker = "extra == 'dev'", specifier = "==0.1.3" },
    { name = "django", specifier = ">=5.2.1" },
    { name = "django-extensions", specifier = ">=4.1" },
    { name = "django-model-utils", specifier = ">=5.0.0" },
    { name = "django-ninja", specifier = ">=1.4.1" },
    { name = "django-oauth-toolkit", specifier = ">=3.0.1" },
    { name = "django-polymorphic", specifier = ">=4.1.0" },
-    { name = "django-stubs", specifier = ">=5.2.4" },
-    { name = "enviformer", git = "ssh://git@git.envipath.com/enviPath/enviformer.git?rev=v0.1.2" },
-    { name = "envipy-additional-information", git = "ssh://git@git.envipath.com/enviPath/enviPy-additional-information.git?rev=v0.1.4" },
+    { name = "django-stubs", marker = "extra == 'dev'", specifier = ">=5.2.4" },
+    { name = "enviformer", git = "ssh://git@git.envipath.com/enviPath/enviformer.git?rev=v0.1.4" },
+    { name = "envipy-additional-information", git = "ssh://git@git.envipath.com/enviPath/enviPy-additional-information.git?rev=v0.1.7" },
    { name = "envipy-ambit", git = "ssh://git@git.envipath.com/enviPath/enviPy-ambit.git" },
    { name = "envipy-plugins", git = "ssh://git@git.envipath.com/enviPath/enviPy-plugins.git?rev=v0.1.0" },
    { name = "epam-indigo", specifier = ">=1.30.1" },
@ -593,6 +614,7 @@ requires-dist = [
    { name = "msal", marker = "extra == 'ms-login'", specifier = ">=1.33.0" },
    { name = "networkx", specifier = ">=3.4.2" },
    { name = "poethepoet", marker = "extra == 'dev'", specifier = ">=0.37.0" },
+    { name = "polars", specifier = "==1.35.1" },
    { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4.3.0" },
    { name = "psycopg2-binary", specifier = ">=2.9.10" },
    { name = "python-dotenv", specifier = ">=1.1.0" },
@ -608,8 +630,8 @@ provides-extras = ["ms-login", "dev"]

 [[package]]
 name = "envipy-additional-information"
-version = "0.1.0"
-source = { git = "ssh://git@git.envipath.com/enviPath/enviPy-additional-information.git?rev=v0.1.4#4da604090bf7cf1f3f552d69485472dbc623030a" }
+version = "0.1.7"
+source = { git = "ssh://git@git.envipath.com/enviPath/enviPy-additional-information.git?rev=v0.1.7#d02a5d5e6a931e6565ea86127813acf7e4b33a30" }
 dependencies = [
    { name = "pydantic" },
 ]
@ -865,7 +887,8 @@ dependencies = [
    { name = "packaging" },
    { name = "pytorch-lightning" },
    { name = "pyyaml" },
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torch", version = "2.8.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
    { name = "torchmetrics" },
    { name = "tqdm" },
    { name = "typing-extensions" },
@ -1074,6 +1097,47 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
 ]

+[[package]]
+name = "mypy"
+version = "1.18.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mypy-extensions" },
+    { name = "pathspec" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" },
+    { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" },
+    { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" },
+    { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" },
+    { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" },
+    { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" },
+    { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" },
+    { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" },
+    { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" },
+    { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" },
+]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+]
+
 [[package]]
 name = "networkx"
 version = "3.5"
@ -1192,7 +1256,7 @@ name = "nvidia-cudnn-cu12"
 version = "9.10.2.21"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12" },
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
@ -1203,7 +1267,7 @@ name = "nvidia-cufft-cu12"
 version = "11.3.3.83"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
@ -1230,9 +1294,9 @@ name = "nvidia-cusolver-cu12"
 version = "11.7.3.90"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12" },
-    { name = "nvidia-cusparse-cu12" },
-    { name = "nvidia-nvjitlink-cu12" },
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
@ -1243,7 +1307,7 @@ name = "nvidia-cusparse-cu12"
 version = "12.5.8.93"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
@ -1308,6 +1372,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/aa/18/a8444036c6dd65ba3624c63b734d3ba95ba63ace513078e1580590075d21/pastel-0.2.1-py2.py3-none-any.whl", hash = "sha256:4349225fcdf6c2bb34d483e523475de5bb04a5c10ef711263452cb37d7dd4364", size = 5955, upload-time = "2020-09-16T19:21:11.409Z" },
 ]

+[[package]]
+name = "pathspec"
+version = "0.12.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
+]
+
 [[package]]
 name = "pillow"
 version = "11.3.0"
@ -1396,6 +1469,32 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/92/1b/5337af1a6a478d25a3e3c56b9b4b42b0a160314e02f4a0498d5322c8dac4/poethepoet-0.37.0-py3-none-any.whl", hash = "sha256:861790276315abcc8df1b4bd60e28c3d48a06db273edd3092f3c94e1a46e5e22", size = 90062, upload-time = "2025-08-11T18:00:27.595Z" },
 ]

+[[package]]
+name = "polars"
+version = "1.35.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "polars-runtime-32" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9b/5b/3caad788d93304026cbf0ab4c37f8402058b64a2f153b9c62f8b30f5d2ee/polars-1.35.1.tar.gz", hash = "sha256:06548e6d554580151d6ca7452d74bceeec4640b5b9261836889b8e68cfd7a62e", size = 694881, upload-time = "2025-10-30T12:12:52.294Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9f/4c/21a227b722534404241c2a76beceb7463469d50c775a227fc5c209eb8adc/polars-1.35.1-py3-none-any.whl", hash = "sha256:c29a933f28aa330d96a633adbd79aa5e6a6247a802a720eead9933f4613bdbf4", size = 783598, upload-time = "2025-10-30T12:11:54.668Z" },
+]
+
+[[package]]
+name = "polars-runtime-32"
+version = "1.35.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/df/3e/19c252e8eb4096300c1a36ec3e50a27e5fa9a1ccaf32d3927793c16abaee/polars_runtime_32-1.35.1.tar.gz", hash = "sha256:f6b4ec9cd58b31c87af1b8c110c9c986d82345f1d50d7f7595b5d447a19dc365", size = 2696218, upload-time = "2025-10-30T12:12:53.479Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/2c/da339459805a26105e9d9c2f07e43ca5b8baeee55acd5457e6881487a79a/polars_runtime_32-1.35.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:6f051a42f6ae2f26e3bc2cf1f170f2120602976e2a3ffb6cfba742eecc7cc620", size = 40525100, upload-time = "2025-10-30T12:11:58.098Z" },
+    { url = "https://files.pythonhosted.org/packages/27/70/a0733568b3533481924d2ce68b279ab3d7334e5fa6ed259f671f650b7c5e/polars_runtime_32-1.35.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:c2232f9cf05ba59efc72d940b86c033d41fd2d70bf2742e8115ed7112a766aa9", size = 36701908, upload-time = "2025-10-30T12:12:02.166Z" },
+    { url = "https://files.pythonhosted.org/packages/46/54/6c09137bef9da72fd891ba58c2962cc7c6c5cad4649c0e668d6b344a9d7b/polars_runtime_32-1.35.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42f9837348557fd674477ea40a6ac8a7e839674f6dd0a199df24be91b026024c", size = 41317692, upload-time = "2025-10-30T12:12:04.928Z" },
+    { url = "https://files.pythonhosted.org/packages/22/55/81c5b266a947c339edd7fbaa9e1d9614012d02418453f48b76cc177d3dd9/polars_runtime_32-1.35.1-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:c873aeb36fed182d5ebc35ca17c7eb193fe83ae2ea551ee8523ec34776731390", size = 37853058, upload-time = "2025-10-30T12:12:08.342Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/58/be8b034d559eac515f52408fd6537be9bea095bc0388946a4e38910d3d50/polars_runtime_32-1.35.1-cp39-abi3-win_amd64.whl", hash = "sha256:35cde9453ca7032933f0e58e9ed4388f5a1e415dd0db2dd1e442c81d815e630c", size = 41289554, upload-time = "2025-10-30T12:12:11.104Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/7f/e0111b9e2a1169ea82cde3ded9c92683e93c26dfccd72aee727996a1ac5b/polars_runtime_32-1.35.1-cp39-abi3-win_arm64.whl", hash = "sha256:fd77757a6c9eb9865c4bfb7b07e22225207c6b7da382bd0b9bd47732f637105d", size = 36958878, upload-time = "2025-10-30T12:12:15.206Z" },
+]
+
 [[package]]
 name = "pre-commit"
 version = "4.3.0"
@ -1670,7 +1769,8 @@ dependencies = [
    { name = "lightning-utilities" },
    { name = "packaging" },
    { name = "pyyaml" },
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torch", version = "2.8.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
    { name = "torchmetrics" },
    { name = "tqdm" },
    { name = "typing-extensions" },
@ -1754,11 +1854,11 @@ wheels = [

 [[package]]
 name = "redis"
-version = "6.4.0"
+version = "7.0.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/0d/d6/e8b92798a5bd67d659d51a18170e91c16ac3b59738d91894651ee255ed49/redis-6.4.0.tar.gz", hash = "sha256:b01bc7282b8444e28ec36b261df5375183bb47a07eb9c603f284e89cbc5ef010", size = 4647399, upload-time = "2025-08-07T08:10:11.441Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/8f/f125feec0b958e8d22c8f0b492b30b1991d9499a4315dfde466cf4289edc/redis-7.0.1.tar.gz", hash = "sha256:c949df947dca995dc68fdf5a7863950bf6df24f8d6022394585acc98e81624f1", size = 4755322, upload-time = "2025-10-27T14:34:00.33Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e8/02/89e2ed7e85db6c93dfa9e8f691c5087df4e3551ab39081a4d7c6d1f90e05/redis-6.4.0-py3-none-any.whl", hash = "sha256:f0544fa9604264e9464cdf4814e7d4830f74b165d52f2a330a760a88dd248b7f", size = 279847, upload-time = "2025-08-07T08:10:09.84Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/97/9f22a33c475cda519f20aba6babb340fb2f2254a02fb947816960d1e669a/redis-7.0.1-py3-none-any.whl", hash = "sha256:4977af3c7d67f8f0eb8b6fec0dafc9605db9343142f634041fb0235f67c0588a", size = 339938, upload-time = "2025-10-27T14:33:58.553Z" },
 ]

 [[package]]
@ -1963,15 +2063,40 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" },
 ]

+[[package]]
+name = "torch"
+version = "2.8.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "sys_platform != 'linux' and sys_platform != 'win32'",
+]
+dependencies = [
+    { name = "filelock", marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "fsspec", marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "jinja2", marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "networkx", marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "setuptools", marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "sympy", marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "typing-extensions", marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/be/66/5c9a321b325aaecb92d4d1855421e3a055abd77903b7dab6575ca07796db/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:619c2869db3ada2c0105487ba21b5008defcc472d23f8b80ed91ac4a380283b0", size = 73630478, upload-time = "2025-08-06T14:53:57.144Z" },
+    { url = "https://files.pythonhosted.org/packages/de/69/8b7b13bba430f5e21d77708b616f767683629fc4f8037564a177d20f90ed/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:1a62a1ec4b0498930e2543535cf70b1bef8c777713de7ceb84cd79115f553767", size = 73915128, upload-time = "2025-08-06T14:54:34.769Z" },
+    { url = "https://files.pythonhosted.org/packages/04/6e/650bb7f28f771af0cb791b02348db8b7f5f64f40f6829ee82aa6ce99aabe/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7b677e17f5a3e69fdef7eb3b9da72622f8d322692930297e4ccb52fefc6c8211", size = 73632395, upload-time = "2025-08-06T14:55:28.645Z" },
+]
+
 [[package]]
 name = "torch"
 version = "2.8.0+cu128"
 source = { registry = "https://download.pytorch.org/whl/cu128" }
+resolution-markers = [
+    "sys_platform == 'linux' or sys_platform == 'win32'",
+]
 dependencies = [
-    { name = "filelock" },
-    { name = "fsspec" },
-    { name = "jinja2" },
-    { name = "networkx" },
+    { name = "filelock", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "fsspec", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "jinja2", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "networkx", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
@ -1986,10 +2111,10 @@ dependencies = [
    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "setuptools" },
-    { name = "sympy" },
+    { name = "setuptools", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "sympy", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
    { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "typing-extensions" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 wheels = [
    { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4354fc05bb79b208d6995a04ca1ceef6a9547b1c4334435574353d381c55087c" },
@ -2008,7 +2133,8 @@ dependencies = [
    { name = "lightning-utilities" },
    { name = "numpy" },
    { name = "packaging" },
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torch", version = "2.8.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/85/2e/48a887a59ecc4a10ce9e8b35b3e3c5cef29d902c4eac143378526e7485cb/torchmetrics-1.8.2.tar.gz", hash = "sha256:cf64a901036bf107f17a524009eea7781c9c5315d130713aeca5747a686fe7a5", size = 580679, upload-time = "2025-09-03T14:00:54.077Z" }
 wheels = [
@ -2032,7 +2158,7 @@ name = "triton"
 version = "3.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "setuptools" },
+    { name = "setuptools", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/d0/66/b1eb52839f563623d185f0927eb3530ee4d5ffe9d377cdaf5346b306689e/triton-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31c1d84a5c0ec2c0f8e8a072d7fd150cab84a9c239eaddc6706c081bfae4eb04", size = 155560068, upload-time = "2025-07-30T19:58:37.081Z" },
Author	SHA1	Message	Date
Liam Brydon	9ec5e433ea	test fixes	2025-11-07 08:46:28 +13:00
Liam Brydon	dddea79daf	test fixes	2025-11-07 08:32:05 +13:00
Liam Brydon	cfd8d7440b	Merge remote-tracking branch 'origin/develop' into enhancement/dataset # Conflicts: # epdb/models.py # tests/test_enviformer.py # tests/test_model.py	2025-11-07 08:28:03 +13:00
Liam Brydon	6a5413b492	pyproject.toml update and merge from develop	2025-11-07 08:09:06 +13:00
Liam Brydon	8282855975	add compatibility with Descriptor objects.	2025-11-06 10:42:32 +13:00
Liam Brydon	09ddd46d69	app domain assess and assess_batch. Add threshold check for compatability	2025-11-06 10:32:21 +13:00
Liam Brydon	9f0e396437	...	2025-11-05 13:30:03 +13:00
Liam Brydon	5dc4c822c4	simple implementation for other feature types #120	2025-11-05 13:11:40 +13:00
Liam Brydon	f1f7ce344c	finished app domain conversion #120	2025-11-05 12:41:33 +13:00
jebus	98d62e1d1f	[Feature] Make Matomo Site ID configurable via .env (#183 ) Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#183	2025-11-05 10:19:07 +13:00
Liam Brydon	13af49488e	starting on app domain with new dataset #120	2025-11-04 16:33:56 +13:00
Liam Brydon	ac5d370b18	new RuleBasedDataset and EnviFormer dataset working for respective models #120	2025-11-04 10:58:16 +13:00
Liam Brydon	ff51e48f90	work towards #120	2025-11-03 15:24:28 +13:00
jebus	13ed86a780	[Feature] Identify Missing Rules (#177 ) Fixes #97 Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#177	2025-10-30 00:47:45 +13:00
jebus	f1b4c5aadb	[Feature] Adding list_display to various django admin sites (#180 ) Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#180	2025-10-29 22:26:28 +13:00
jebus	37e0e18a28	[Fix] Fixed Incremental Prediction Typo (#176 ) Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#176	2025-10-28 23:29:08 +13:00
jebus	de44c22606	[Migration] Added missing Migration for JobLog (#175 ) Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#175	2025-10-27 22:41:16 +13:00
jebus	a952c08469	[Feature] Basic logging of Jobs, Model Evaluation (#169 ) Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#169	2025-10-27 22:34:05 +13:00
Liam Brydon	8166df6f39	work towards #120	2025-10-24 14:40:26 +13:00
liambrydon	551cfc7768	[Enhancement] Create ML Models (#173 ) ## Changes - Ability to change the threshold from a command line argument. - Names of data packages included in model name - Names of data, rule and eval packages included in the model description - EnviFormer models are now viewable on the admin site - Ignore CO2 for training and evaluating EnviFormer Co-authored-by: Liam Brydon <62733830+MyCreativityOutlet@users.noreply.github.com> Reviewed-on: enviPath/enviPy#173 Reviewed-by: jebus <lorsbach@envipath.com> Co-authored-by: liambrydon <lbry121@aucklanduni.ac.nz> Co-committed-by: liambrydon <lbry121@aucklanduni.ac.nz>	2025-10-23 06:20:22 +13:00
jebus	8fda2577ee	[Feature] Dump/Restore of enviFormer Models (#170 ) Dump: `./manage.py dump_enviformer d544303c-a1ca-439d-b036-5e3413ce4a48 --output test.tar.gz` Restore: `./manage.py load_enviformer test.tar.gz 1062eb09-5ec7-4bdd-a8f2-ae0252eb4b06` Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#170	2025-10-22 10:39:22 +13:00
Liam Brydon	2980a75daa	start towards #120	2025-10-22 08:22:29 +13:00
jebus	819a94aced	[Fix] Catch Exception for Adding Structures / Show PubChem Substances (#168 ) Fixes #163 Fixes #165 Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#168	2025-10-22 01:13:06 +13:00
liambrydon	376fd65785	[Feature] ML model caching for reducing prediction overhead (#156 ) The caching is now finished. The cache is created in `settings.py` giving us the most flexibility for using it in the future. The cache is currently updated/accessed by `tasks.py/get_ml_model` which can be called from whatever task needs to access ml models in this way (currently, `predict` and `predict_simple`). This implementation currently caches all ml models including the relative reasoning. If we don't want this and only want to cache enviFormer, i can change it to that. However, I don't think there is a harm in having the other models be cached as well. Co-authored-by: Liam Brydon <62733830+MyCreativityOutlet@users.noreply.github.com> Reviewed-on: enviPath/enviPy#156 Co-authored-by: liambrydon <lbry121@aucklanduni.ac.nz> Co-committed-by: liambrydon <lbry121@aucklanduni.ac.nz>	2025-10-16 08:58:36 +13:00