diff --git a/epdb/models.py b/epdb/models.py
index 4611d187..e3e66157 100644
--- a/epdb/models.py
+++ b/epdb/models.py
@@ -2886,7 +2886,7 @@ class ApplicabilityDomain(EnviPathModel):
                     rule_idx, probs, neighbour_datasets
                 )
                 neighbours_per_rule[rule_idx] = [
-                    CompoundStructure.objects.get(uuid=ds[1].structure_id())
+                    CompoundStructure.objects.get(uuid=ds.structure_id(1))
                     for ds in neighbour_datasets
                 ]
                 neighbor_probs_per_rule[rule_idx] = [
diff --git a/pyproject.toml b/pyproject.toml
index 1fba9371..295f8b55 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,10 +27,11 @@ dependencies = [
     "scikit-learn>=1.6.1",
     "sentry-sdk[django]>=2.32.0",
     "setuptools>=80.8.0",
+    "polars==1.35.1",
 ]
 
 [tool.uv.sources]
-enviformer = { git = "ssh://git@git.envipath.com/enviPath/enviformer.git", rev = "v0.1.2" }
+enviformer = { git = "ssh://git@git.envipath.com/enviPath/enviformer.git", branch = "enhancement/dataset_support" }
 envipy-plugins = { git = "ssh://git@git.envipath.com/enviPath/enviPy-plugins.git", rev = "v0.1.0" }
 envipy-additional-information = { git = "ssh://git@git.envipath.com/enviPath/enviPy-additional-information.git", rev = "v0.1.7"}
 envipy-ambit = { git = "ssh://git@git.envipath.com/enviPath/enviPy-ambit.git" }
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 1cc7999d..962ce400 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -1,7 +1,7 @@
 from django.test import TestCase
 
 from epdb.logic import PackageManager
-from epdb.models import Reaction, Compound, User, Rule
+from epdb.models import Reaction, Compound, User, Rule, Package
 from utilities.ml import RuleBasedDataset
 
 
@@ -41,12 +41,88 @@ class DatasetTest(TestCase):
         super(DatasetTest, cls).setUpClass()
         cls.user = User.objects.get(username="anonymous")
         cls.package = PackageManager.create_package(cls.user, "Anon Test Package", "No Desc")
+        cls.BBD_SUBSET = Package.objects.get(name="Fixtures")
 
-    def test_smoke(self):
+    def test_generate_dataset(self):
+        """Test generating dataset does not crash"""
+        self.generate_dataset()
+
+    def test_indexing(self):
+        """Test indexing a few different ways to check for crashes"""
+        ds, reactions, rules = self.generate_dataset()
+        print(ds[5])
+        print(ds[2, 5])
+        print(ds[3:6, 2:8])
+        print(ds[:2, "structure_id"])
+
+    def test_add_rows(self):
+        """Test adding one row and adding multiple rows"""
+        ds, reactions, rules = self.generate_dataset()
+        ds.add_row(list(ds.df.row(1)))
+        ds.add_rows([list(ds.df.row(i)) for i in range(5)])
+
+    def test_times_triggered(self):
+        """Check getting times triggered for a rule id"""
+        ds, reactions, rules = self.generate_dataset()
+        print(ds.times_triggered(rules[0].uuid))
+
+    def test_block_indices(self):
+        """Test the usages of _block_indices"""
+        ds, reactions, rules = self.generate_dataset()
+        print(ds.struct_features())
+        print(ds.triggered())
+        print(ds.observed())
+
+    def test_structure_id(self):
+        """Check getting a structure id from row index"""
+        ds, reactions, rules = self.generate_dataset()
+        print(ds.structure_id(0))
+
+    def test_x(self):
+        """Test getting X portion of the dataframe"""
+        ds, reactions, rules = self.generate_dataset()
+        print(ds.X().df.head())
+
+    def test_trig(self):
+        """Test getting the triggered portion of the dataframe"""
+        ds, reactions, rules = self.generate_dataset()
+        print(ds.trig().df.head())
+
+    def test_y(self):
+        """Test getting the Y portion of the dataframe"""
+        ds, reactions, rules = self.generate_dataset()
+        print(ds.y().df.head())
+
+    def test_classification_dataset(self):
+        """Test making the classification dataset"""
+        ds, reactions, rules = self.generate_dataset()
+        compounds = [c.default_structure for c in Compound.objects.filter(package=self.BBD_SUBSET)]
+        class_ds, products = ds.classification_dataset(compounds, rules)
+        print(class_ds.df.head(5))
+        print(products[:5])
+
+    def test_to_arff(self):
+        """Test exporting the arff version of the dataset"""
+        ds, reactions, rules = self.generate_dataset()
+        ds.to_arff("dataset_arff_test.arff")
+
+    def test_save_load(self):
+        """Test saving and loading dataset"""
+        ds, reactions, rules = self.generate_dataset()
+
+    def test_dataset_example(self):
+        """Test with a concrete example checking dataset size"""
         reactions = [r for r in Reaction.objects.filter(package=self.package)]
         applicable_rules = [self.rule1]
 
         ds = RuleBasedDataset.generate_dataset(reactions, applicable_rules)
 
         self.assertEqual(len(ds.y()), 1)
-        self.assertEqual(sum(ds.y()[0]), 1)
+        self.assertEqual(ds.y().df.item(), 1)
+
+    def generate_dataset(self):
+        """Generate a RuleBasedDataset from test package data"""
+        reactions = [r for r in Reaction.objects.filter(package=self.BBD_SUBSET)]
+        applicable_rules = [r for r in Rule.objects.filter(package=self.BBD_SUBSET)]
+        ds = RuleBasedDataset.generate_dataset(reactions, applicable_rules)
+        return ds, reactions, applicable_rules
diff --git a/utilities/ml.py b/utilities/ml.py
index f75656b5..9521cc39 100644
--- a/utilities/ml.py
+++ b/utilities/ml.py
@@ -30,42 +30,47 @@ if TYPE_CHECKING:
 
 class Dataset(ABC):
     def __init__(self, columns: List[str] = None, data: List[List[str | int | float]] | pl.DataFrame = None):
-        if isinstance(data, pl.DataFrame):
+        if isinstance(data, pl.DataFrame):  # Allows for re-creation of self in cases like indexing with __getitem__
             self.df = data
         else:
+            # Build either an empty dataframe with columns or fill it with list of list data
             if data is not None and len(columns) != len(data[0]):
-                raise ValueError(f"Header and Data are not aligned {len(columns)} vs. {len(data[0])}")
+                raise ValueError(f"Header and Data are not aligned {len(columns)} columns vs. {len(data[0])} columns")
             if columns is None:
                 raise ValueError("Columns can't be None if data is not already a DataFrame")
-            self.df = pl.DataFrame(data=data, schema=columns)
+            self.df = pl.DataFrame(data=data, schema=columns, orient="row", infer_schema_length=None)
 
     def add_rows(self, rows: List[List[str | int | float]]):
+        """Add rows to the dataset. Extends the polars dataframe stored in self"""
         if len(self.columns) != len(rows[0]):
-            raise ValueError(f"Header and Data are not aligned {len(self.columns)} vs. {len(rows[0])}")
-        new_rows = pl.DataFrame(data=rows, schema=self.columns)
+            raise ValueError(f"Header and Data are not aligned {len(self.columns)} columns vs. {len(rows[0])} columns")
+        new_rows = pl.DataFrame(data=rows, schema=self.columns, orient="row", infer_schema_length=None)
         self.df.extend(new_rows)
 
     def add_row(self, row: List[str | int | float]):
+        """See add_rows"""
         self.add_rows([row])
 
     def _block_indices(self, prefix) -> Tuple[int, int]:
+        """Find the start and end indexes in column labels that has the prefix"""
         indices: List[int] = []
         for i, feature in enumerate(self.columns):
             if feature.startswith(prefix):
                 indices.append(i)
 
-        return min(indices), max(indices)
+        return min(indices, default=None), max(indices, default=None)
 
     @property
     def columns(self) -> List[str]:
+        """Use the polars dataframe columns"""
         return self.df.columns
 
     @abstractmethod
-    def X(self):
+    def X(self, **kwargs):
         pass
 
     @abstractmethod
-    def y(self):
+    def y(self, **kwargs):
         pass
 
     @staticmethod
@@ -73,11 +78,26 @@ class Dataset(ABC):
     def generate_dataset(reactions, *args, **kwargs):
         pass
 
-    def at(self, position: int) -> RuleBasedDataset:
-        return RuleBasedDataset(self.columns, self.num_labels, self.df[position])
+    def at(self, position: int) -> Dataset:
+        """See __getitem__"""
+        return self[position]
+
+    def limit(self, limit: int) -> Dataset:
+        """See __getitem__"""
+        return self[:limit]
 
     def __iter__(self):
-        return (self.at(i) for i, _ in enumerate(self.data))
+        """Use polars iter_rows for iterating over the dataset"""
+        return self.df.iter_rows()
+
+    def __getitem__(self, item):
+        """Item is passed to polars allowing for advanced indexing.
+        See https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.__getitem__.html#polars.DataFrame.__getitem__"""
+        res = self.df[item]
+        if isinstance(res, pl.DataFrame):  # If we get a dataframe back from indexing make new self with res dataframe
+            return self.__class__(data=self.df[item])
+        else:  # If we don't get a dataframe back (likely base type, int, str, float etc.) return the item
+            return res
 
     def save(self, path: "Path"):
         import pickle
@@ -86,35 +106,61 @@ class Dataset(ABC):
             pickle.dump(self, fh)
 
     @staticmethod
-    def load(path: "str | Path") -> "RuleBasedDataset":
+    def load(path: "str | Path") -> "Dataset":
         import pickle
 
         return pickle.load(open(path, "rb"))
 
+    def __repr__(self):
+        return (
+            f"<{self.__class__.__name__} #rows={len(self.df)} #cols={len(self.columns)}>"
+        )
 
-class NewRuleBasedDataset(Dataset):
-    def __init__(self, num_labels, columns=None, data=None):
+    def __len__(self):
+        return len(self.df)
+
+
+class RuleBasedDataset(Dataset):
+    def __init__(self, num_labels=None, columns=None, data=None):
         super().__init__(columns, data)
-        self.num_labels: int = num_labels
+        self.num_labels: int = num_labels if num_labels else sum([1 for c in self.columns if "trig_" in c])
         self.num_features: int = len(self.columns) - self.num_labels
+        self._struct_features: Tuple[int, int] = self._block_indices("feature_")
+        self._triggered: Tuple[int, int] = self._block_indices("trig_")
+        self._observed: Tuple[int, int] = self._block_indices("obs_")
 
     def times_triggered(self, rule_uuid) -> int:
         return self.df.filter(pl.col(f"trig_{rule_uuid}") == 1).height
 
     def struct_features(self) -> Tuple[int, int]:
-        return self._block_indices("feature_")
+        return self._struct_features
 
     def triggered(self) -> Tuple[int, int]:
-        return self._block_indices("trig_")
+        return self._triggered
 
     def observed(self) -> Tuple[int, int]:
-        return self._block_indices("obs_")
+        return self._observed
 
-    def X(self):
-        pass
+    def structure_id(self, index: int):
+        return self.df.item(index, "structure_id")
 
-    def y(self):
-        pass
+    def X(self, exclude_id_col=True, na_replacement=0):
+        res = self[:, 1 if exclude_id_col else 0: len(self.columns) - self.num_labels]
+        if na_replacement is not None:
+            res.df = res.df.fill_null(na_replacement)
+        return res
+
+    def trig(self, na_replacement=0):
+        res = self[:, self._triggered[0]: self._triggered[1]]
+        if na_replacement is not None:
+            res.df = res.df.fill_null(na_replacement)
+        return res
+
+    def y(self, na_replacement=0):
+        res = self[:, len(self.columns) - self.num_labels:]
+        if na_replacement is not None:
+            res.df = res.df.fill_null(na_replacement)
+        return res
 
     @staticmethod
     def generate_dataset(reactions, applicable_rules, educts_only=True):
@@ -178,7 +224,6 @@ class NewRuleBasedDataset(Dataset):
                       [f"feature_{i}" for i, _ in enumerate(FormatConverter.maccs(compounds[0].smiles))] +
                       [f"trig_{r.uuid}" for r in applicable_rules] +
                       [f"obs_{r.uuid}" for r in applicable_rules])
-        ds = NewRuleBasedDataset(len(applicable_rules), ds_columns)
         rows = []
 
         for i, comp in enumerate(compounds):
@@ -201,77 +246,18 @@ class NewRuleBasedDataset(Dataset):
                 else:
                     obs.append(0)
             rows.append([str(comp.uuid)] + feat + trig + obs)
-        ds.add_rows(rows)
+        ds = RuleBasedDataset(len(applicable_rules), ds_columns, data=rows)
         return ds
 
-
-    def __getitem__(self, item):
-        pass
-
-
-class RuleBasedDataset(Dataset):
-    def __init__(
-        self, columns: List[str], num_labels: int, data: List[List[str | int | float]] = None
-    ):
-        self.columns: List[str] = columns
-        self.num_labels: int = num_labels
-
-        if data is None:
-            self.data: List[List[str | int | float]] = list()
-        else:
-            self.data = data
-
-        self.num_features: int = len(columns) - self.num_labels
-        self._struct_features: Tuple[int, int] = self._block_indices("feature_")
-        self._triggered: Tuple[int, int] = self._block_indices("trig_")
-        self._observed: Tuple[int, int] = self._block_indices("obs_")
-
-    def _block_indices(self, prefix) -> Tuple[int, int]:
-        indices: List[int] = []
-        for i, feature in enumerate(self.columns):
-            if feature.startswith(prefix):
-                indices.append(i)
-
-        return min(indices), max(indices)
-
-    def structure_id(self):
-        return self.data[0][0]
-
-    def add_row(self, row: List[str | int | float]):
-        if len(self.columns) != len(row):
-            raise ValueError(f"Header and Data are not aligned {len(self.columns)} vs. {len(row)}")
-        self.data.append(row)
-
-    def times_triggered(self, rule_uuid) -> int:
-        idx = self.columns.index(f"trig_{rule_uuid}")
-
-        times_triggered = 0
-        for row in self.data:
-            if row[idx] == 1:
-                times_triggered += 1
-
-        return times_triggered
-
-    def struct_features(self) -> Tuple[int, int]:
-        return self._struct_features
-
-    def triggered(self) -> Tuple[int, int]:
-        return self._triggered
-
-    def observed(self) -> Tuple[int, int]:
-        return self._observed
-
-    def at(self, position: int) -> RuleBasedDataset:
-        return RuleBasedDataset(self.columns, self.num_labels, [self.data[position]])
-
-    def limit(self, limit: int) -> RuleBasedDataset:
-        return RuleBasedDataset(self.columns, self.num_labels, self.data[:limit])
-
     def classification_dataset(
         self, structures: List[str | "CompoundStructure"], applicable_rules: List["Rule"]
     ) -> Tuple[RuleBasedDataset, List[List[PredictionResult]]]:
         classify_data = []
         classify_products = []
+        ds_columns = (["structure_id"] +
+                      [f"feature_{i}" for i, _ in enumerate(FormatConverter.maccs(structures[0].smiles))] +
+                      [f"trig_{r.uuid}" for r in applicable_rules] +
+                      [f"obs_{r.uuid}" for r in applicable_rules])
         for struct in structures:
             if isinstance(struct, str):
                 struct_id = None
@@ -296,171 +282,8 @@ class RuleBasedDataset(Dataset):
 
             classify_data.append([struct_id] + features + trig + ([-1] * len(trig)))
             classify_products.append(prods)
-
-        return RuleBasedDataset(
-            columns=self.columns, num_labels=self.num_labels, data=classify_data
-        ), classify_products
-
-    @staticmethod
-    def generate_dataset(
-        reactions: List["Reaction"], applicable_rules: List["Rule"], educts_only: bool = True
-    ) -> RuleBasedDataset:
-        _structures = set()
-
-        for r in reactions:
-            for e in r.educts.all():
-                _structures.add(e)
-
-            if not educts_only:
-                for e in r.products:
-                    _structures.add(e)
-
-        compounds = sorted(_structures, key=lambda x: x.url)
-
-        triggered: Dict[str, Set[str]] = defaultdict(set)
-        observed: Set[str] = set()
-
-        # Apply rules on collected compounds and store tps
-        for i, comp in enumerate(compounds):
-            logger.debug(f"{i + 1}/{len(compounds)}...")
-
-            for rule in applicable_rules:
-                product_sets = rule.apply(comp.smiles)
-
-                if len(product_sets) == 0:
-                    continue
-
-                key = f"{rule.uuid} + {comp.uuid}"
-
-                if key in triggered:
-                    logger.info(f"{key} already present. Duplicate reaction?")
-
-                for prod_set in product_sets:
-                    for smi in prod_set:
-                        try:
-                            smi = FormatConverter.standardize(smi, remove_stereo=True)
-                        except Exception:
-                            # :shrug:
-                            logger.debug(f"Standardizing SMILES failed for {smi}")
-                            pass
-
-                        triggered[key].add(smi)
-
-        for i, r in enumerate(reactions):
-            logger.debug(f"{i + 1}/{len(reactions)}...")
-
-            if len(r.educts.all()) != 1:
-                logger.debug(f"Skipping {r.url} as it has {len(r.educts.all())} substrates!")
-                continue
-
-            for comp in r.educts.all():
-                for rule in applicable_rules:
-                    key = f"{rule.uuid} + {comp.uuid}"
-
-                    if key not in triggered:
-                        continue
-
-                    # standardize products from reactions for comparison
-                    standardized_products = []
-                    for cs in r.products.all():
-                        smi = cs.smiles
-
-                        try:
-                            smi = FormatConverter.standardize(smi, remove_stereo=True)
-                        except Exception as e:
-                            # :shrug:
-                            logger.debug(f"Standardizing SMILES failed for {smi}")
-                            pass
-
-                        standardized_products.append(smi)
-
-                    if len(set(standardized_products).difference(triggered[key])) == 0:
-                        observed.add(key)
-                    else:
-                        pass
-
-        ds = None
-
-        for i, comp in enumerate(compounds):
-            # Features
-            feat = FormatConverter.maccs(comp.smiles)
-            trig = []
-            obs = []
-
-            for rule in applicable_rules:
-                key = f"{rule.uuid} + {comp.uuid}"
-
-                # Check triggered
-                if key in triggered:
-                    trig.append(1)
-                else:
-                    trig.append(0)
-
-                # Check obs
-                if key in observed:
-                    obs.append(1)
-                elif key not in triggered:
-                    obs.append(None)
-                else:
-                    obs.append(0)
-
-            if ds is None:
-                header = (
-                    ["structure_id"]
-                    + [f"feature_{i}" for i, _ in enumerate(feat)]
-                    + [f"trig_{r.uuid}" for r in applicable_rules]
-                    + [f"obs_{r.uuid}" for r in applicable_rules]
-                )
-                ds = RuleBasedDataset(header, len(applicable_rules))
-
-            ds.add_row([str(comp.uuid)] + feat + trig + obs)
-
-        return ds
-
-    def X(self, exclude_id_col=True, na_replacement=0):
-        res = self.__getitem__(
-            (slice(None), slice(1 if exclude_id_col else 0, len(self.columns) - self.num_labels))
-        )
-        if na_replacement is not None:
-            res = [[x if x is not None else na_replacement for x in row] for row in res]
-        return res
-
-    def trig(self, na_replacement=0):
-        res = self.__getitem__((slice(None), slice(self._triggered[0], self._triggered[1])))
-        if na_replacement is not None:
-            res = [[x if x is not None else na_replacement for x in row] for row in res]
-        return res
-
-    def y(self, na_replacement=0):
-        res = self.__getitem__((slice(None), slice(len(self.columns) - self.num_labels, None)))
-        if na_replacement is not None:
-            res = [[x if x is not None else na_replacement for x in row] for row in res]
-        return res
-
-    def __getitem__(self, key):
-        if not isinstance(key, tuple):
-            raise TypeError("Dataset must be indexed with dataset[rows, columns]")
-
-        row_key, col_key = key
-
-        # Normalize rows
-        if isinstance(row_key, int):
-            rows = [self.data[row_key]]
-        else:
-            rows = self.data[row_key]
-
-        # Normalize columns
-        if isinstance(col_key, int):
-            res = [row[col_key] for row in rows]
-        else:
-            res = [
-                [row[i] for i in range(*col_key.indices(len(row)))]
-                if isinstance(col_key, slice)
-                else [row[i] for i in col_key]
-                for row in rows
-            ]
-
-        return res
+        ds = RuleBasedDataset(len(applicable_rules), ds_columns, data=classify_data)
+        return ds, classify_products
 
     def to_arff(self, path: "Path"):
         arff = f"@relation 'enviPy-dataset: -C {self.num_labels}'\n"
@@ -472,7 +295,7 @@ class RuleBasedDataset(Dataset):
                 arff += f"@attribute {c} {{0,1}}\n"
 
         arff += "\n@data\n"
-        for d in self.data:
+        for d in self:
             ys = ",".join([str(v if v is not None else "?") for v in d[-self.num_labels :]])
             xs = ",".join([str(v if v is not None else "?") for v in d[: self.num_features]])
             arff += f"{ys},{xs}\n"
@@ -481,14 +304,10 @@ class RuleBasedDataset(Dataset):
             fh.write(arff)
             fh.flush()
 
-    def __repr__(self):
-        return (
-            f"<Dataset #rows={len(self.data)} #cols={len(self.columns)} #labels={self.num_labels}>"
-        )
-
 
 class EnviFormerDataset(Dataset):
     def __init__(self, educts, products):
+        super().__init__()
         assert len(educts) == len(products), "Can't have unequal length educts and products"
 
     @staticmethod
diff --git a/uv.lock b/uv.lock
index f3e6d123..ffa473be 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.12"
 
 [[package]]
@@ -176,6 +176,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c9/af/0dcccc7fdcdf170f9a1585e5e96b6fb0ba1749ef6be8c89a6202284759bd/celery-5.5.3-py3-none-any.whl", hash = "sha256:0b5761a07057acee94694464ca482416b959568904c9dfa41ce8413a7d65d525", size = 438775, upload-time = "2025-06-01T11:08:09.94Z" },
 ]
 
+[[package]]
+name = "celery-stubs"
+version = "0.1.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mypy" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/98/14/b853ada8706a3a301396566b6dd405d1cbb24bff756236a12a01dbe766a4/celery-stubs-0.1.3.tar.gz", hash = "sha256:0fb5345820f8a2bd14e6ffcbef2d10181e12e40f8369f551d7acc99d8d514919", size = 46583, upload-time = "2023-02-10T02:20:11.837Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1c/7a/4ab2347d13f1f59d10a7337feb9beb002664119f286036785284c6bec150/celery_stubs-0.1.3-py3-none-any.whl", hash = "sha256:dfb9ad27614a8af028b2055bb4a4ae99ca5e9a8d871428a506646d62153218d7", size = 89085, upload-time = "2023-02-10T02:20:09.409Z" },
+]
+
 [[package]]
 name = "certifi"
 version = "2025.10.5"
@@ -525,7 +538,7 @@ wheels = [
 [[package]]
 name = "enviformer"
 version = "0.1.0"
-source = { git = "ssh://git@git.envipath.com/enviPath/enviformer.git?rev=v0.1.2#3f28f60cfa1df814cf7559303b5130933efa40ae" }
+source = { git = "ssh://git@git.envipath.com/enviPath/enviformer.git?branch=enhancement%2Fdataset_support#8edc3f30c46749af30c033a4faf9abe062b47970" }
 dependencies = [
     { name = "joblib" },
     { name = "lightning" },
@@ -546,7 +559,6 @@ dependencies = [
     { name = "django-ninja" },
     { name = "django-oauth-toolkit" },
     { name = "django-polymorphic" },
-    { name = "django-stubs" },
     { name = "enviformer" },
     { name = "envipy-additional-information" },
     { name = "envipy-ambit" },
@@ -554,6 +566,7 @@ dependencies = [
     { name = "epam-indigo" },
     { name = "gunicorn" },
     { name = "networkx" },
+    { name = "polars" },
     { name = "psycopg2-binary" },
     { name = "python-dotenv" },
     { name = "rdkit" },
@@ -566,6 +579,8 @@ dependencies = [
 
 [package.optional-dependencies]
 dev = [
+    { name = "celery-stubs" },
+    { name = "django-stubs" },
     { name = "poethepoet" },
     { name = "pre-commit" },
     { name = "ruff" },
@@ -577,15 +592,16 @@ ms-login = [
 [package.metadata]
 requires-dist = [
     { name = "celery", specifier = ">=5.5.2" },
+    { name = "celery-stubs", marker = "extra == 'dev'", specifier = "==0.1.3" },
     { name = "django", specifier = ">=5.2.1" },
     { name = "django-extensions", specifier = ">=4.1" },
     { name = "django-model-utils", specifier = ">=5.0.0" },
     { name = "django-ninja", specifier = ">=1.4.1" },
     { name = "django-oauth-toolkit", specifier = ">=3.0.1" },
     { name = "django-polymorphic", specifier = ">=4.1.0" },
-    { name = "django-stubs", specifier = ">=5.2.4" },
-    { name = "enviformer", git = "ssh://git@git.envipath.com/enviPath/enviformer.git?rev=v0.1.2" },
-    { name = "envipy-additional-information", git = "ssh://git@git.envipath.com/enviPath/enviPy-additional-information.git?rev=v0.1.4" },
+    { name = "django-stubs", marker = "extra == 'dev'", specifier = ">=5.2.4" },
+    { name = "enviformer", git = "ssh://git@git.envipath.com/enviPath/enviformer.git?branch=enhancement%2Fdataset_support" },
+    { name = "envipy-additional-information", git = "ssh://git@git.envipath.com/enviPath/enviPy-additional-information.git?rev=v0.1.7" },
     { name = "envipy-ambit", git = "ssh://git@git.envipath.com/enviPath/enviPy-ambit.git" },
     { name = "envipy-plugins", git = "ssh://git@git.envipath.com/enviPath/enviPy-plugins.git?rev=v0.1.0" },
     { name = "epam-indigo", specifier = ">=1.30.1" },
@@ -593,6 +609,7 @@ requires-dist = [
     { name = "msal", marker = "extra == 'ms-login'", specifier = ">=1.33.0" },
     { name = "networkx", specifier = ">=3.4.2" },
     { name = "poethepoet", marker = "extra == 'dev'", specifier = ">=0.37.0" },
+    { name = "polars", specifier = "==1.34.0" },
     { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4.3.0" },
     { name = "psycopg2-binary", specifier = ">=2.9.10" },
     { name = "python-dotenv", specifier = ">=1.1.0" },
@@ -608,8 +625,8 @@ provides-extras = ["ms-login", "dev"]
 
 [[package]]
 name = "envipy-additional-information"
-version = "0.1.0"
-source = { git = "ssh://git@git.envipath.com/enviPath/enviPy-additional-information.git?rev=v0.1.4#4da604090bf7cf1f3f552d69485472dbc623030a" }
+version = "0.1.7"
+source = { git = "ssh://git@git.envipath.com/enviPath/enviPy-additional-information.git?rev=v0.1.7#d02a5d5e6a931e6565ea86127813acf7e4b33a30" }
 dependencies = [
     { name = "pydantic" },
 ]
@@ -1074,6 +1091,47 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
 ]
 
+[[package]]
+name = "mypy"
+version = "1.18.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mypy-extensions" },
+    { name = "pathspec" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" },
+    { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" },
+    { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" },
+    { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" },
+    { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" },
+    { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" },
+    { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" },
+    { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" },
+    { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" },
+    { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" },
+]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+]
+
 [[package]]
 name = "networkx"
 version = "3.5"
@@ -1308,6 +1366,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/aa/18/a8444036c6dd65ba3624c63b734d3ba95ba63ace513078e1580590075d21/pastel-0.2.1-py2.py3-none-any.whl", hash = "sha256:4349225fcdf6c2bb34d483e523475de5bb04a5c10ef711263452cb37d7dd4364", size = 5955, upload-time = "2020-09-16T19:21:11.409Z" },
 ]
 
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
+]
+
 [[package]]
 name = "pillow"
 version = "11.3.0"
@@ -1396,6 +1463,32 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/92/1b/5337af1a6a478d25a3e3c56b9b4b42b0a160314e02f4a0498d5322c8dac4/poethepoet-0.37.0-py3-none-any.whl", hash = "sha256:861790276315abcc8df1b4bd60e28c3d48a06db273edd3092f3c94e1a46e5e22", size = 90062, upload-time = "2025-08-11T18:00:27.595Z" },
 ]
 
+[[package]]
+name = "polars"
+version = "1.34.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "polars-runtime-32" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a1/3e/35fcf5bf51404371bb172b289a5065778dc97adca4416e199c294125eb05/polars-1.34.0.tar.gz", hash = "sha256:5de5f871027db4b11bcf39215a2d6b13b4a80baf8a55c5862d4ebedfd5cd4013", size = 684309, upload-time = "2025-10-02T18:31:04.396Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6b/80/1791ac226bb989bef30fe8fde752b2021b6ec5dfd6e880262596aedf4c05/polars-1.34.0-py3-none-any.whl", hash = "sha256:40d2f357b4d9e447ad28bd2c9923e4318791a7c18eb68f31f1fbf11180f41391", size = 772686, upload-time = "2025-10-02T18:29:59.492Z" },
+]
+
+[[package]]
+name = "polars-runtime-32"
+version = "1.34.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/02/10/1189afb14cc47ed215ccf7fbd00ed21c48edfd89e51c16f8628a33ae4b1b/polars_runtime_32-1.34.0.tar.gz", hash = "sha256:ebe6f865128a0d833f53a3f6828360761ad86d1698bceb22bef9fd999500dc1c", size = 2634491, upload-time = "2025-10-02T18:31:05.502Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/97/35/bc4f1a9dcef61845e8e4e5d2318470b002b93a3564026f0643f562761ecb/polars_runtime_32-1.34.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:2878f9951e91121afe60c25433ef270b9a221e6ebf3de5f6642346b38cab3f03", size = 39655423, upload-time = "2025-10-02T18:30:02.846Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/bb/d655a103e75b7c81c47a3c2d276be0200c0c15cfb6fd47f17932ddcf7519/polars_runtime_32-1.34.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:fbc329c7d34a924228cc5dcdbbd4696d94411a3a5b15ad8bb868634c204e1951", size = 35986049, upload-time = "2025-10-02T18:30:05.848Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/ce/11ca850b7862cb43605e5d86cdf655614376e0a059871cf8305af5406554/polars_runtime_32-1.34.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93fa51d88a2d12ea996a5747aad5647d22a86cce73c80f208e61f487b10bc448", size = 40261269, upload-time = "2025-10-02T18:30:08.48Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/25/77d12018c35489e19f7650b40679714a834effafc25d61e8dcee7c4fafce/polars_runtime_32-1.34.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:79e4d696392c6d8d51f4347f0b167c52eef303c9d87093c0c68e8651198735b7", size = 37049077, upload-time = "2025-10-02T18:30:11.162Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/75/c30049d45ea1365151f86f650ed5354124ff3209f0abe588664c8eb13a31/polars_runtime_32-1.34.0-cp39-abi3-win_amd64.whl", hash = "sha256:2501d6b29d9001ea5ea2fd9b598787e10ddf45d8c4a87c2bead75159e8a15711", size = 40105782, upload-time = "2025-10-02T18:30:14.597Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/31/84efa27aa3478c8670bac1a720c8b1aee5c58c9c657c980e5e5c47fde883/polars_runtime_32-1.34.0-cp39-abi3-win_arm64.whl", hash = "sha256:f9ed1765378dfe0bcd1ac5ec570dd9eab27ea728bbc980cc9a76eebc55586559", size = 35873216, upload-time = "2025-10-02T18:30:17.439Z" },
+]
+
 [[package]]
 name = "pre-commit"
 version = "4.3.0"
@@ -1754,11 +1847,11 @@ wheels = [
 
 [[package]]
 name = "redis"
-version = "6.4.0"
+version = "7.0.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/0d/d6/e8b92798a5bd67d659d51a18170e91c16ac3b59738d91894651ee255ed49/redis-6.4.0.tar.gz", hash = "sha256:b01bc7282b8444e28ec36b261df5375183bb47a07eb9c603f284e89cbc5ef010", size = 4647399, upload-time = "2025-08-07T08:10:11.441Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/8f/f125feec0b958e8d22c8f0b492b30b1991d9499a4315dfde466cf4289edc/redis-7.0.1.tar.gz", hash = "sha256:c949df947dca995dc68fdf5a7863950bf6df24f8d6022394585acc98e81624f1", size = 4755322, upload-time = "2025-10-27T14:34:00.33Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e8/02/89e2ed7e85db6c93dfa9e8f691c5087df4e3551ab39081a4d7c6d1f90e05/redis-6.4.0-py3-none-any.whl", hash = "sha256:f0544fa9604264e9464cdf4814e7d4830f74b165d52f2a330a760a88dd248b7f", size = 279847, upload-time = "2025-08-07T08:10:09.84Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/97/9f22a33c475cda519f20aba6babb340fb2f2254a02fb947816960d1e669a/redis-7.0.1-py3-none-any.whl", hash = "sha256:4977af3c7d67f8f0eb8b6fec0dafc9605db9343142f634041fb0235f67c0588a", size = 339938, upload-time = "2025-10-27T14:33:58.553Z" },
 ]
 
 [[package]]