forked from enviPath/enviPy
[Feature] MultiGen Eval (Backend) (#117)
Fixes #16 Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#117
This commit is contained in:
@ -864,7 +864,7 @@ class PackageManager(object):
|
||||
|
||||
@staticmethod
|
||||
@transaction.atomic
|
||||
def import_pacakge(data: Dict[str, Any], owner: User, preserve_uuids=False, add_import_timestamp=True,
|
||||
def import_package(data: Dict[str, Any], owner: User, preserve_uuids=False, add_import_timestamp=True,
|
||||
trust_reviewed=False) -> Package:
|
||||
|
||||
importer = PackageImporter(data, preserve_uuids, add_import_timestamp, trust_reviewed)
|
||||
|
||||
482
epdb/models.py
482
epdb/models.py
@ -7,7 +7,7 @@ import secrets
|
||||
from abc import abstractmethod
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from typing import Union, List, Optional, Dict, Tuple, Set
|
||||
from typing import Union, List, Optional, Dict, Tuple, Set, Any
|
||||
from uuid import uuid4
|
||||
|
||||
import joblib
|
||||
@ -588,33 +588,33 @@ class Package(EnviPathModel):
|
||||
return f"{self.name} (pk={self.pk})"
|
||||
|
||||
@property
|
||||
def compounds(self):
|
||||
def compounds(self) -> QuerySet:
|
||||
return self.compound_set.all()
|
||||
|
||||
@property
|
||||
def rules(self):
|
||||
def rules(self) -> QuerySet:
|
||||
return self.rule_set.all()
|
||||
|
||||
@property
|
||||
def reactions(self):
|
||||
def reactions(self) -> QuerySet:
|
||||
return self.reaction_set.all()
|
||||
|
||||
@property
|
||||
def pathways(self) -> 'Pathway':
|
||||
def pathways(self) -> QuerySet:
|
||||
return self.pathway_set.all()
|
||||
|
||||
@property
|
||||
def scenarios(self):
|
||||
def scenarios(self) -> QuerySet:
|
||||
return self.scenario_set.all()
|
||||
|
||||
@property
|
||||
def models(self):
|
||||
def models(self) -> QuerySet:
|
||||
return self.epmodel_set.all()
|
||||
|
||||
def _url(self):
|
||||
return '{}/package/{}'.format(s.SERVER_URL, self.uuid)
|
||||
|
||||
def get_applicable_rules(self):
|
||||
def get_applicable_rules(self) -> List['Rule']:
|
||||
"""
|
||||
Returns a ordered set of rules where the following applies:
|
||||
1. All Composite will be added to result
|
||||
@ -650,11 +650,11 @@ class Compound(EnviPathModel, AliasMixin, ScenarioMixin, ChemicalIdentifierMixin
|
||||
external_identifiers = GenericRelation('ExternalIdentifier')
|
||||
|
||||
@property
|
||||
def structures(self):
|
||||
def structures(self) -> QuerySet:
|
||||
return CompoundStructure.objects.filter(compound=self)
|
||||
|
||||
@property
|
||||
def normalized_structure(self):
|
||||
def normalized_structure(self) -> 'CompoundStructure' :
|
||||
return CompoundStructure.objects.get(compound=self, normalized_structure=True)
|
||||
|
||||
def _url(self):
|
||||
@ -1635,8 +1635,8 @@ class Pathway(EnviPathModel, AliasMixin, ScenarioMixin):
|
||||
return new_pathway
|
||||
|
||||
@transaction.atomic
|
||||
def add_node(self, smiles: str, name: Optional[str] = None, description: Optional[str] = None):
|
||||
return Node.create(self, smiles, 0)
|
||||
def add_node(self, smiles: str, name: Optional[str] = None, description: Optional[str] = None, depth: Optional[int] = 0):
|
||||
return Node.create(self, smiles, depth, name=name, description=description)
|
||||
|
||||
@transaction.atomic
|
||||
def add_edge(self, start_nodes: List['Node'], end_nodes: List['Node'], rule: Optional['Rule'] = None,
|
||||
@ -1836,6 +1836,7 @@ class PackageBasedModel(EPModel):
|
||||
eval_results = JSONField(null=True, blank=True, default=dict)
|
||||
app_domain = models.ForeignKey('epdb.ApplicabilityDomain', on_delete=models.SET_NULL, null=True, blank=True,
|
||||
default=None)
|
||||
multigen_eval = models.BooleanField(null=False, blank=False, default=False)
|
||||
|
||||
INITIAL = "INITIAL"
|
||||
INITIALIZING = "INITIALIZING"
|
||||
@ -1861,6 +1862,24 @@ class PackageBasedModel(EPModel):
|
||||
def ready_for_prediction(self) -> bool:
|
||||
return self.model_status in [self.BUILT_NOT_EVALUATED, self.EVALUATING, self.FINISHED]
|
||||
|
||||
@property
|
||||
def pr_curve(self):
|
||||
if self.model_status != self.FINISHED:
|
||||
raise ValueError(f"Expected {self.FINISHED} but model is in status {self.model_status}")
|
||||
|
||||
res = []
|
||||
|
||||
thresholds = self.eval_results['average_precision_per_threshold'].keys()
|
||||
|
||||
for t in thresholds:
|
||||
res.append({
|
||||
'precision': self.eval_results['average_precision_per_threshold'][t],
|
||||
'recall': self.eval_results['average_recall_per_threshold'][t],
|
||||
'threshold': float(t)
|
||||
})
|
||||
|
||||
return res
|
||||
|
||||
@cached_property
|
||||
def applicable_rules(self) -> List['Rule']:
|
||||
"""
|
||||
@ -1897,14 +1916,6 @@ class PackageBasedModel(EPModel):
|
||||
# TODO
|
||||
return []
|
||||
|
||||
def _get_pathways(self):
|
||||
pathway_qs = Pathway.objects.none()
|
||||
for p in self.data_packages.all():
|
||||
pathway_qs |= p.pathways
|
||||
|
||||
pathway_qs = pathway_qs.distinct()
|
||||
return pathway_qs
|
||||
|
||||
def _get_reactions(self) -> QuerySet:
|
||||
return Reaction.objects.filter(package__in=self.data_packages.all()).distinct()
|
||||
|
||||
@ -1937,9 +1948,277 @@ class PackageBasedModel(EPModel):
|
||||
self.build_model()
|
||||
|
||||
@abstractmethod
|
||||
def build_model(self):
|
||||
def _fit_model(self, ds: Dataset):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _model_args(self) -> Dict[str, Any]:
|
||||
pass
|
||||
|
||||
def build_model(self):
|
||||
self.model_status = self.BUILDING
|
||||
self.save()
|
||||
|
||||
ds = self.load_dataset()
|
||||
|
||||
mod = self._fit_model(ds)
|
||||
|
||||
f = os.path.join(s.MODEL_DIR, f"{self.uuid}_mod.pkl")
|
||||
joblib.dump(mod, f)
|
||||
|
||||
if self.app_domain is not None:
|
||||
logger.debug("Building applicability domain...")
|
||||
self.app_domain.build()
|
||||
logger.debug("Done building applicability domain.")
|
||||
|
||||
self.model_status = self.BUILT_NOT_EVALUATED
|
||||
self.save()
|
||||
|
||||
def evaluate_model(self):
|
||||
|
||||
if self.model_status != self.BUILT_NOT_EVALUATED:
|
||||
raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
|
||||
|
||||
self.model_status = self.EVALUATING
|
||||
self.save()
|
||||
|
||||
def train_func(X, y, train_index, model_kwargs):
|
||||
clz = model_kwargs.pop('clz')
|
||||
if clz == 'RuleBaseRelativeReasoning':
|
||||
mod = RelativeReasoning(
|
||||
**model_kwargs
|
||||
)
|
||||
else:
|
||||
mod = EnsembleClassifierChain(
|
||||
**model_kwargs
|
||||
)
|
||||
|
||||
if train_index is not None:
|
||||
X, y = X[train_index], y[train_index]
|
||||
|
||||
mod.fit(X, y)
|
||||
return mod
|
||||
|
||||
def evaluate_sg(model, X, y, test_index, threshold):
|
||||
X_test = X[test_index]
|
||||
y_test = y[test_index]
|
||||
|
||||
y_pred = model.predict_proba(X_test)
|
||||
y_thresholded = (y_pred >= threshold).astype(int)
|
||||
|
||||
# Flatten them to get rid of np.nan
|
||||
y_test = np.asarray(y_test).flatten()
|
||||
y_pred = np.asarray(y_pred).flatten()
|
||||
y_thresholded = np.asarray(y_thresholded).flatten()
|
||||
|
||||
mask = ~np.isnan(y_test)
|
||||
y_test_filtered = y_test[mask]
|
||||
y_pred_filtered = y_pred[mask]
|
||||
y_thresholded_filtered = y_thresholded[mask]
|
||||
|
||||
acc = jaccard_score(y_test_filtered, y_thresholded_filtered, zero_division=0)
|
||||
|
||||
prec, rec = dict(), dict()
|
||||
|
||||
for t in np.arange(0, 1.05, 0.05):
|
||||
temp_thresholded = (y_pred_filtered >= t).astype(int)
|
||||
prec[f"{t:.2f}"] = precision_score(y_test_filtered, temp_thresholded, zero_division=0)
|
||||
rec[f"{t:.2f}"] = recall_score(y_test_filtered, temp_thresholded, zero_division=0)
|
||||
|
||||
return acc, prec, rec
|
||||
|
||||
def evaluate_mg(model, pathways: Union[QuerySet['Pathway']| List['Pathway']], threshold):
|
||||
thresholds = np.arange(0.1, 1.1, 0.1)
|
||||
|
||||
precision = {f"{t:.2f}": [] for t in thresholds}
|
||||
recall = {f"{t:.2f}": [] for t in thresholds}
|
||||
|
||||
# Note: only one root compound supported at this time
|
||||
root_compounds = [[p.default_node_label.smiles for p in p.root_nodes][0] for p in pathways]
|
||||
|
||||
# As we need a Model Instance in our setting, get a fresh copy from db, overwrite the serialized mode and
|
||||
# pass it to the setting used in prediction
|
||||
if isinstance(self, MLRelativeReasoning):
|
||||
mod = MLRelativeReasoning.objects.get(pk=self.pk)
|
||||
elif isinstance(self, RuleBasedRelativeReasoning):
|
||||
mod = RuleBasedRelativeReasoning.objects.get(pk=self.pk)
|
||||
|
||||
mod.model = model
|
||||
|
||||
s = Setting()
|
||||
s.model = mod
|
||||
s.model_threshold = thresholds.min()
|
||||
s.max_depth = 10
|
||||
s.max_nodes = 50
|
||||
|
||||
from epdb.logic import SPathway
|
||||
from utilities.ml import multigen_eval
|
||||
|
||||
pred_pathways = []
|
||||
for i, root in enumerate(root_compounds):
|
||||
logger.debug(f"Evaluating pathway {i + 1} of {len(root_compounds)}...")
|
||||
|
||||
spw = SPathway(root_nodes=root, prediction_setting=s)
|
||||
level = 0
|
||||
|
||||
while not spw.done:
|
||||
spw.predict_step(from_depth=level)
|
||||
level += 1
|
||||
|
||||
pred_pathways.append(spw)
|
||||
|
||||
mg_acc = 0.0
|
||||
for t in thresholds:
|
||||
for true, pred in zip(test_pathways, pred_pathways):
|
||||
acc, pre, rec = multigen_eval(true, pred, t)
|
||||
if abs(t - threshold) < 0.01:
|
||||
mg_acc = acc
|
||||
precision[f"{t:.2f}"].append(pre)
|
||||
recall[f"{t:.2f}"].append(rec)
|
||||
|
||||
precision = {k: sum(v) / len(v) if len(v) > 0 else 0 for k, v in precision.items()}
|
||||
recall = {k: sum(v) / len(v) if len(v) > 0 else 0 for k, v in recall.items()}
|
||||
return mg_acc, precision, recall
|
||||
|
||||
ds = self.load_dataset()
|
||||
|
||||
if isinstance(self, RuleBasedRelativeReasoning):
|
||||
X = np.array(ds.X(exclude_id_col=False, na_replacement=None))
|
||||
y = np.array(ds.y(na_replacement=np.nan))
|
||||
else:
|
||||
X = np.array(ds.X(na_replacement=np.nan))
|
||||
y = np.array(ds.y(na_replacement=np.nan))
|
||||
|
||||
n_splits = 20
|
||||
|
||||
shuff = ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42)
|
||||
splits = list(shuff.split(X))
|
||||
|
||||
from joblib import Parallel, delayed
|
||||
models = Parallel(n_jobs=10)(delayed(train_func)(X, y, train_index, self._model_args()) for train_index, _ in splits)
|
||||
evaluations = Parallel(n_jobs=10)(delayed(evaluate_sg)(model, X, y, test_index, self.threshold)
|
||||
for model, (_, test_index) in zip(models, splits))
|
||||
|
||||
def compute_averages(data):
|
||||
num_items = len(data)
|
||||
avg_first_item = sum(item[0] for item in data) / num_items
|
||||
|
||||
sum_dict2 = defaultdict(float)
|
||||
sum_dict3 = defaultdict(float)
|
||||
|
||||
for _, dict2, dict3 in data:
|
||||
for key in dict2:
|
||||
sum_dict2[key] += dict2[key]
|
||||
for key in dict3:
|
||||
sum_dict3[key] += dict3[key]
|
||||
|
||||
avg_dict2 = {key: val / num_items for key, val in sum_dict2.items()}
|
||||
avg_dict3 = {key: val / num_items for key, val in sum_dict3.items()}
|
||||
|
||||
return {
|
||||
"average_accuracy": float(avg_first_item),
|
||||
"average_precision_per_threshold": avg_dict2,
|
||||
"average_recall_per_threshold": avg_dict3
|
||||
}
|
||||
|
||||
self.eval_results = compute_averages(evaluations)
|
||||
|
||||
if self.multigen_eval:
|
||||
|
||||
# We have to consider 2 cases here:
|
||||
# 1. No eval packages provided -> Split Train data X times and train and evaluate model
|
||||
# 2. eval packages provided -> Use the already built model and do evaluation on the set provided.
|
||||
|
||||
if self.eval_packages.count() > 0:
|
||||
pathway_qs = Pathway.objects.filter(package__in=self.eval_packages.all()).distinct()
|
||||
evaluate_mg(self.model, pathway_qs, self.threshold)
|
||||
return
|
||||
|
||||
pathway_qs = Pathway.objects.prefetch_related(
|
||||
'node_set',
|
||||
'node_set__out_edges',
|
||||
'node_set__default_node_label',
|
||||
'node_set__scenarios',
|
||||
'edge_set',
|
||||
'edge_set__start_nodes',
|
||||
'edge_set__end_nodes',
|
||||
'edge_set__edge_label',
|
||||
'edge_set__scenarios'
|
||||
).filter(package__in=self.data_packages.all()).distinct()
|
||||
|
||||
pathways = []
|
||||
for pathway in pathway_qs:
|
||||
# There is one pathway with no root compounds, so this check is required
|
||||
if len(pathway.root_nodes) > 0:
|
||||
pathways.append(pathway)
|
||||
else:
|
||||
logging.warning(f"No root compound in pathway {pathway.name}, excluding from multigen evaluation")
|
||||
|
||||
# build lookup reaction -> {uuid1, uuid2} for overlap check
|
||||
reaction_to_educts = defaultdict(set)
|
||||
for pathway in pathways:
|
||||
for reaction in pathway.edges:
|
||||
for e in reaction.edge_label.educts.all():
|
||||
reaction_to_educts[str(reaction.edge_label.uuid)].add(str(e.uuid))
|
||||
|
||||
# build lookup to avoid recalculation of features, labels
|
||||
id_to_index = {uuid: i for i, uuid in enumerate(ds[:, 0])}
|
||||
|
||||
# Compute splits of the collected pathway
|
||||
splits = []
|
||||
for train, test in ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42).split(pathways):
|
||||
train_pathways = [pathways[i] for i in train]
|
||||
test_pathways = [pathways[i] for i in test]
|
||||
|
||||
# Collect structures from test pathways
|
||||
test_educts = set()
|
||||
for pathway in test_pathways:
|
||||
for reaction in pathway.edges:
|
||||
test_educts.update(reaction_to_educts[str(reaction.edge_label.uuid)])
|
||||
|
||||
split_ids = []
|
||||
overlap = 0
|
||||
# Collect indices of the structures contained in train pathways iff they're not present in any of
|
||||
# the test pathways
|
||||
for pathway in train_pathways:
|
||||
for reaction in pathway.edges:
|
||||
for educt in reaction_to_educts[str(reaction.edge_label.uuid)]:
|
||||
# Ensure compounds in the training set do not appear in the test set
|
||||
if educt not in test_educts:
|
||||
if educt in id_to_index:
|
||||
split_ids.append(id_to_index[str(educt)])
|
||||
try:
|
||||
split_ids.append(id_to_index[str(educt)])
|
||||
except KeyError:
|
||||
logger.debug(f"Couldn't find features in X for compound {educt}")
|
||||
else:
|
||||
overlap += 1
|
||||
|
||||
logging.debug(
|
||||
f"{overlap} compounds had to be removed from multigen split due to overlap within pathways")
|
||||
|
||||
# Get the rows from the dataset corresponding to compounds in the training set pathways
|
||||
split_x, split_y = X[split_ids], y[split_ids]
|
||||
splits.append([(split_x, split_y), test_pathways])
|
||||
|
||||
|
||||
# Build model on subsets obtained by pathway split
|
||||
trained_models = Parallel(n_jobs=10)(
|
||||
delayed(train_func)(split_x, split_y, np.arange(split_x.shape[0]), self._model_args()) for (split_x, split_y), _ in splits
|
||||
)
|
||||
|
||||
# Parallelizing multigen evaluate would be non-trivial, potentially possible but requires a lot of work
|
||||
multi_ret_vals = Parallel(n_jobs=1)(
|
||||
delayed(evaluate_mg)(model, test_pathways, self.threshold) for model, (_, test_pathways) in
|
||||
zip(trained_models, splits)
|
||||
)
|
||||
|
||||
|
||||
self.eval_results.update({f"multigen_{k}": v for k, v in compute_averages(multi_ret_vals).items()})
|
||||
|
||||
self.model_status = self.FINISHED
|
||||
self.save()
|
||||
|
||||
@staticmethod
|
||||
def combine_products_and_probs(rules: List['Rule'], probabilities, products):
|
||||
res = []
|
||||
@ -2011,21 +2290,22 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
|
||||
|
||||
return rbrr
|
||||
|
||||
def build_model(self):
|
||||
self.model_status = self.BUILDING
|
||||
self.save()
|
||||
def _fit_model(self, ds: Dataset):
|
||||
X, y = ds.X(exclude_id_col=False, na_replacement=None), ds.y(na_replacement=None)
|
||||
model = RelativeReasoning(
|
||||
start_index= ds.triggered()[0],
|
||||
end_index= ds.triggered()[1],
|
||||
)
|
||||
model.fit(X, y)
|
||||
return model
|
||||
|
||||
def _model_args(self):
|
||||
ds = self.load_dataset()
|
||||
labels = ds.y(na_replacement=None)
|
||||
|
||||
mod = RelativeReasoning(*ds.triggered())
|
||||
mod.fit(ds.X(exclude_id_col=False, na_replacement=None), ds.y(na_replacement=None))
|
||||
|
||||
f = os.path.join(s.MODEL_DIR, f"{self.uuid}_mod.pkl")
|
||||
joblib.dump(mod, f)
|
||||
|
||||
self.model_status = self.BUILT_NOT_EVALUATED
|
||||
self.save()
|
||||
return {
|
||||
'clz': 'RuleBaseRelativeReasoning',
|
||||
'start_index': ds.triggered()[0],
|
||||
'end_index': ds.triggered()[1],
|
||||
}
|
||||
|
||||
@cached_property
|
||||
def model(self) -> 'RelativeReasoning':
|
||||
@ -2038,7 +2318,6 @@ class RuleBasedRelativeReasoning(PackageBasedModel):
|
||||
classify_ds, classify_prods = ds.classification_dataset([smiles], self.applicable_rules)
|
||||
|
||||
mod = self.model
|
||||
|
||||
pred = mod.predict(classify_ds.X(exclude_id_col=False, na_replacement=None))
|
||||
|
||||
res = RuleBasedRelativeReasoning.combine_products_and_probs(self.applicable_rules, pred[0], classify_prods[0])
|
||||
@ -2102,118 +2381,23 @@ class MLRelativeReasoning(PackageBasedModel):
|
||||
|
||||
return mlrr
|
||||
|
||||
def build_model(self):
|
||||
self.model_status = self.BUILDING
|
||||
self.save()
|
||||
|
||||
start = datetime.now()
|
||||
|
||||
ds = self.load_dataset()
|
||||
def _fit_model(self, ds: Dataset):
|
||||
X, y = ds.X(na_replacement=np.nan), ds.y(na_replacement=np.nan)
|
||||
|
||||
mod = EnsembleClassifierChain(
|
||||
model = EnsembleClassifierChain(
|
||||
**s.DEFAULT_MODEL_PARAMS
|
||||
)
|
||||
mod.fit(X, y)
|
||||
model.fit(X, y)
|
||||
return model
|
||||
|
||||
end = datetime.now()
|
||||
logger.debug(f"fitting model took {(end - start).total_seconds()} seconds")
|
||||
|
||||
f = os.path.join(s.MODEL_DIR, f"{self.uuid}_mod.pkl")
|
||||
joblib.dump(mod, f)
|
||||
|
||||
if self.app_domain is not None:
|
||||
logger.debug("Building applicability domain...")
|
||||
self.app_domain.build()
|
||||
logger.debug("Done building applicability domain.")
|
||||
|
||||
self.model_status = self.BUILT_NOT_EVALUATED
|
||||
self.save()
|
||||
|
||||
def evaluate_model(self):
|
||||
|
||||
if self.model_status != self.BUILT_NOT_EVALUATED:
|
||||
raise ValueError(f"Can't evaluate a model in state {self.model_status}!")
|
||||
|
||||
self.model_status = self.EVALUATING
|
||||
self.save()
|
||||
|
||||
ds = self.load_dataset()
|
||||
|
||||
X = np.array(ds.X(na_replacement=np.nan))
|
||||
y = np.array(ds.y(na_replacement=np.nan))
|
||||
|
||||
n_splits = 20
|
||||
|
||||
shuff = ShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=42)
|
||||
|
||||
def train_and_evaluate(X, y, train_index, test_index, threshold):
|
||||
X_train, X_test = X[train_index], X[test_index]
|
||||
y_train, y_test = y[train_index], y[test_index]
|
||||
|
||||
model = EnsembleClassifierChain(
|
||||
**s.DEFAULT_MODEL_PARAMS
|
||||
)
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
y_pred = model.predict_proba(X_test)
|
||||
y_thresholded = (y_pred >= threshold).astype(int)
|
||||
|
||||
# Flatten them to get rid of np.nan
|
||||
y_test = np.asarray(y_test).flatten()
|
||||
y_pred = np.asarray(y_pred).flatten()
|
||||
y_thresholded = np.asarray(y_thresholded).flatten()
|
||||
|
||||
mask = ~np.isnan(y_test)
|
||||
y_test_filtered = y_test[mask]
|
||||
y_pred_filtered = y_pred[mask]
|
||||
y_thresholded_filtered = y_thresholded[mask]
|
||||
|
||||
acc = jaccard_score(y_test_filtered, y_thresholded_filtered, zero_division=0)
|
||||
|
||||
prec, rec = dict(), dict()
|
||||
|
||||
for t in np.arange(0, 1.05, 0.05):
|
||||
temp_thresholded = (y_pred_filtered >= t).astype(int)
|
||||
prec[f"{t:.2f}"] = precision_score(y_test_filtered, temp_thresholded, zero_division=0)
|
||||
rec[f"{t:.2f}"] = recall_score(y_test_filtered, temp_thresholded, zero_division=0)
|
||||
|
||||
return acc, prec, rec
|
||||
|
||||
from joblib import Parallel, delayed
|
||||
ret_vals = Parallel(n_jobs=10)(
|
||||
delayed(train_and_evaluate)(X, y, train_index, test_index, self.threshold)
|
||||
for train_index, test_index in shuff.split(X)
|
||||
)
|
||||
|
||||
def compute_averages(data):
|
||||
num_items = len(data)
|
||||
avg_first_item = sum(item[0] for item in data) / num_items
|
||||
|
||||
sum_dict2 = defaultdict(float)
|
||||
sum_dict3 = defaultdict(float)
|
||||
|
||||
for _, dict2, dict3 in data:
|
||||
for key in dict2:
|
||||
sum_dict2[key] += dict2[key]
|
||||
for key in dict3:
|
||||
sum_dict3[key] += dict3[key]
|
||||
|
||||
avg_dict2 = {key: val / num_items for key, val in sum_dict2.items()}
|
||||
avg_dict3 = {key: val / num_items for key, val in sum_dict3.items()}
|
||||
|
||||
return {
|
||||
"average_accuracy": float(avg_first_item),
|
||||
"average_precision_per_threshold": avg_dict2,
|
||||
"average_recall_per_threshold": avg_dict3
|
||||
}
|
||||
|
||||
self.eval_results = compute_averages(ret_vals)
|
||||
self.model_status = self.FINISHED
|
||||
self.save()
|
||||
def _model_args(self):
|
||||
return {
|
||||
'clz': 'MLRelativeReasoning',
|
||||
**s.DEFAULT_MODEL_PARAMS,
|
||||
}
|
||||
|
||||
@cached_property
|
||||
def model(self):
|
||||
def model(self) -> 'EnsembleClassifierChain':
|
||||
mod = joblib.load(os.path.join(s.MODEL_DIR, f'{self.uuid}_mod.pkl'))
|
||||
mod.base_clf.n_jobs = -1
|
||||
return mod
|
||||
@ -2230,24 +2414,6 @@ class MLRelativeReasoning(PackageBasedModel):
|
||||
logger.info(f"Full predict took {(end - start).total_seconds()}s")
|
||||
return res
|
||||
|
||||
@property
|
||||
def pr_curve(self):
|
||||
if self.model_status != self.FINISHED:
|
||||
raise ValueError(f"Expected {self.FINISHED} but model is in status {self.model_status}")
|
||||
|
||||
res = []
|
||||
|
||||
thresholds = self.eval_results['average_precision_per_threshold'].keys()
|
||||
|
||||
for t in thresholds:
|
||||
res.append({
|
||||
'precision': self.eval_results['average_precision_per_threshold'][t],
|
||||
'recall': self.eval_results['average_recall_per_threshold'][t],
|
||||
'threshold': float(t)
|
||||
})
|
||||
|
||||
return res
|
||||
|
||||
|
||||
class ApplicabilityDomain(EnviPathModel):
|
||||
model = models.ForeignKey(MLRelativeReasoning, on_delete=models.CASCADE)
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from django.conf import settings as s
|
||||
from django.db import transaction
|
||||
@ -35,4 +34,4 @@ def delete_epmodel_files(sender, instance, **kwargs):
|
||||
for f in os.listdir(s.MODEL_DIR):
|
||||
if f.startswith(mod_uuid):
|
||||
logger.info(f"Deleting {os.path.join(s.MODEL_DIR, f)}")
|
||||
shutil.rmtree(os.path.join(s.MODEL_DIR, f))
|
||||
os.remove(os.path.join(s.MODEL_DIR, f))
|
||||
|
||||
@ -1,12 +1,10 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from celery.signals import worker_process_init
|
||||
from celery import shared_task
|
||||
from epdb.models import Pathway, Node, Edge, EPModel, Setting
|
||||
from epdb.logic import SPathway
|
||||
|
||||
from utilities.chem import FormatConverter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -294,7 +294,7 @@ def packages(request):
|
||||
if hidden == 'import-legacy-package-json':
|
||||
pack = PackageManager.import_legacy_package(data, current_user)
|
||||
else:
|
||||
pack = PackageManager.import_pacakge(data, current_user)
|
||||
pack = PackageManager.import_package(data, current_user)
|
||||
|
||||
return redirect(pack.url)
|
||||
except UnicodeDecodeError:
|
||||
@ -772,10 +772,13 @@ def package_model(request, package_uuid, model_uuid):
|
||||
if hidden == 'delete':
|
||||
current_model.delete()
|
||||
return redirect(current_package.url + '/model')
|
||||
elif hidden == 'evaluate':
|
||||
from .tasks import evaluate_model
|
||||
evaluate_model.delay(current_model.pk)
|
||||
return redirect(current_model.url)
|
||||
else:
|
||||
return HttpResponseBadRequest()
|
||||
else:
|
||||
|
||||
name = request.POST.get('model-name', '').strip()
|
||||
description = request.POST.get('model-description', '').strip()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user