forked from enviPath/enviPy
Fixes #89 Co-authored-by: Tim Lorsbach <tim@lorsba.ch> Reviewed-on: enviPath/enviPy#92
674 lines
22 KiB
Python
674 lines
22 KiB
Python
from __future__ import annotations
|
|
|
|
import logging
|
|
from abc import ABC, abstractmethod
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
from typing import List, Dict, Set, Tuple
|
|
|
|
import numpy as np
|
|
from sklearn.base import BaseEstimator, ClassifierMixin
|
|
from sklearn.decomposition import PCA
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
from sklearn.metrics import accuracy_score
|
|
from sklearn.multioutput import ClassifierChain
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
from utilities.chem import FormatConverter, PredictionResult
|
|
|
|
|
|
@dataclass
|
|
class SCompound:
|
|
smiles: str
|
|
uuid: str = field(default=None, compare=False, hash=False)
|
|
|
|
def __hash__(self):
|
|
if not hasattr(self, '_hash'):
|
|
self._hash = hash((
|
|
self.smiles
|
|
))
|
|
return self._hash
|
|
|
|
|
|
@dataclass
|
|
class SReaction:
|
|
educts: List[SCompound]
|
|
products: List[SCompound]
|
|
rule_uuid: SRule = field(default=None, compare=False, hash=False)
|
|
reaction_uuid: str = field(default=None, compare=False, hash=False)
|
|
|
|
def __hash__(self):
|
|
if not hasattr(self, '_hash'):
|
|
self._hash = hash((
|
|
tuple(sorted(self.educts, key=lambda x: x.smiles)),
|
|
tuple(sorted(self.products, key=lambda x: x.smiles)),
|
|
))
|
|
return self._hash
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, SReaction):
|
|
return NotImplemented
|
|
return (
|
|
sorted(self.educts, key=lambda x: x.smiles) == sorted(other.educts, key=lambda x: x.smiles) and
|
|
sorted(self.products, key=lambda x: x.smiles) == sorted(other.products, key=lambda x: x.smiles)
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class SRule(ABC):
|
|
|
|
@abstractmethod
|
|
def apply(self):
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class SSimpleRule:
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class SParallelRule:
|
|
pass
|
|
|
|
|
|
class Dataset:
|
|
|
|
def __init__(self, columns: List[str], num_labels: int, data: List[List[str | int | float]] = None):
|
|
self.columns: List[str] = columns
|
|
self.num_labels: int = num_labels
|
|
|
|
if data is None:
|
|
self.data: List[List[str | int | float]] = list()
|
|
else:
|
|
self.data = data
|
|
|
|
self.num_features: int = len(columns) - self.num_labels
|
|
self._struct_features: Tuple[int, int] = self._block_indices('feature_')
|
|
self._triggered: Tuple[int, int] = self._block_indices('trig_')
|
|
self._observed: Tuple[int, int] = self._block_indices('obs_')
|
|
|
|
def _block_indices(self, prefix) -> Tuple[int, int]:
|
|
indices: List[int] = []
|
|
for i, feature in enumerate(self.columns):
|
|
if feature.startswith(prefix):
|
|
indices.append(i)
|
|
|
|
return min(indices), max(indices)
|
|
|
|
def structure_id(self):
|
|
return self.data[0][0]
|
|
|
|
def add_row(self, row: List[str | int | float]):
|
|
if len(self.columns) != len(row):
|
|
raise ValueError(f"Header and Data are not aligned {len(self.columns)} vs. {len(row)}")
|
|
self.data.append(row)
|
|
|
|
def times_triggered(self, rule_uuid) -> int:
|
|
idx = self.columns.index(f'trig_{rule_uuid}')
|
|
|
|
times_triggered = 0
|
|
for row in self.data:
|
|
if row[idx] == 1:
|
|
times_triggered += 1
|
|
|
|
return times_triggered
|
|
|
|
def struct_features(self) -> Tuple[int, int]:
|
|
return self._struct_features
|
|
|
|
def triggered(self) -> Tuple[int, int]:
|
|
return self._triggered
|
|
|
|
def observed(self) -> Tuple[int, int]:
|
|
return self._observed
|
|
|
|
def at(self, position: int) -> Dataset:
|
|
return Dataset(self.columns, self.num_labels, [self.data[position]])
|
|
|
|
def limit(self, limit: int) -> Dataset:
|
|
return Dataset(self.columns, self.num_labels, self.data[:limit])
|
|
|
|
def __iter__(self):
|
|
return (self.at(i) for i, _ in enumerate(self.data))
|
|
|
|
|
|
def classification_dataset(self, structures: List[str | 'CompoundStructure'], applicable_rules: List['Rule']) -> Tuple[Dataset, List[List[PredictionResult]]]:
|
|
classify_data = []
|
|
classify_products = []
|
|
for struct in structures:
|
|
|
|
if isinstance(struct, str):
|
|
struct_id = None
|
|
struct_smiles = struct
|
|
else:
|
|
struct_id = str(struct.uuid)
|
|
struct_smiles = struct.smiles
|
|
|
|
features = FormatConverter.maccs(struct_smiles)
|
|
|
|
trig = []
|
|
prods = []
|
|
for rule in applicable_rules:
|
|
products = rule.apply(struct_smiles)
|
|
|
|
if len(products):
|
|
trig.append(1)
|
|
prods.append(products)
|
|
else:
|
|
trig.append(0)
|
|
prods.append([])
|
|
|
|
classify_data.append([struct_id] + features + trig + ([-1] * len(trig)))
|
|
classify_products.append(prods)
|
|
|
|
return Dataset(columns=self.columns, num_labels=self.num_labels, data=classify_data), classify_products
|
|
|
|
@staticmethod
|
|
def generate_dataset(reactions: List['Reaction'], applicable_rules: List['Rule'], educts_only: bool = True) -> Dataset:
|
|
_structures = set()
|
|
|
|
for r in reactions:
|
|
for e in r.educts.all():
|
|
_structures.add(e)
|
|
|
|
if not educts_only:
|
|
for e in r.products:
|
|
_structures.add(e)
|
|
|
|
compounds = sorted(_structures, key=lambda x: x.url)
|
|
|
|
triggered: Dict[str, Set[str]] = defaultdict(set)
|
|
observed: Set[str] = set()
|
|
|
|
# Apply rules on collected compounds and store tps
|
|
for i, comp in enumerate(compounds):
|
|
logger.debug(f"{i + 1}/{len(compounds)}...")
|
|
|
|
for rule in applicable_rules:
|
|
product_sets = rule.apply(comp.smiles)
|
|
|
|
if len(product_sets) == 0:
|
|
continue
|
|
|
|
key = f"{rule.uuid} + {comp.uuid}"
|
|
|
|
if key in triggered:
|
|
logger.info(f"{key} already present. Duplicate reaction?")
|
|
|
|
for prod_set in product_sets:
|
|
for smi in prod_set:
|
|
|
|
try:
|
|
smi = FormatConverter.standardize(smi)
|
|
except Exception:
|
|
# :shrug:
|
|
logger.debug(f'Standardizing SMILES failed for {smi}')
|
|
pass
|
|
|
|
triggered[key].add(smi)
|
|
|
|
for i, r in enumerate(reactions):
|
|
logger.debug(f"{i + 1}/{len(reactions)}...")
|
|
|
|
if len(r.educts.all()) != 1:
|
|
logger.debug(f"Skipping {r.url} as it has {len(r.educts.all())} substrates!")
|
|
continue
|
|
|
|
for comp in r.educts.all():
|
|
for rule in applicable_rules:
|
|
key = f"{rule.uuid} + {comp.uuid}"
|
|
|
|
if key not in triggered:
|
|
continue
|
|
|
|
# standardize products from reactions for comparison
|
|
standardized_products = []
|
|
for cs in r.products.all():
|
|
smi = cs.smiles
|
|
|
|
try:
|
|
smi = FormatConverter.standardize(smi)
|
|
except Exception as e:
|
|
# :shrug:
|
|
logger.debug(f'Standardizing SMILES failed for {smi}')
|
|
pass
|
|
|
|
standardized_products.append(smi)
|
|
|
|
if len(set(standardized_products).difference(triggered[key])) == 0:
|
|
observed.add(key)
|
|
else:
|
|
pass
|
|
|
|
ds = None
|
|
|
|
for i, comp in enumerate(compounds):
|
|
# Features
|
|
feat = FormatConverter.maccs(comp.smiles)
|
|
trig = []
|
|
obs = []
|
|
|
|
for rule in applicable_rules:
|
|
key = f"{rule.uuid} + {comp.uuid}"
|
|
|
|
# Check triggered
|
|
if key in triggered:
|
|
trig.append(1)
|
|
else:
|
|
trig.append(0)
|
|
|
|
# Check obs
|
|
if key in observed:
|
|
obs.append(1)
|
|
elif key not in triggered:
|
|
obs.append(None)
|
|
else:
|
|
obs.append(0)
|
|
|
|
if ds is None:
|
|
header = ['structure_id'] + \
|
|
[f'feature_{i}' for i, _ in enumerate(feat)] \
|
|
+ [f'trig_{r.uuid}' for r in applicable_rules] \
|
|
+ [f'obs_{r.uuid}' for r in applicable_rules]
|
|
ds = Dataset(header, len(applicable_rules))
|
|
|
|
ds.add_row([str(comp.uuid)] + feat + trig + obs)
|
|
|
|
return ds
|
|
|
|
|
|
def X(self, exclude_id_col=True, na_replacement=0):
|
|
res = self.__getitem__((slice(None), slice(1 if exclude_id_col else 0, len(self.columns) - self.num_labels)))
|
|
if na_replacement is not None:
|
|
res = [[x if x is not None else na_replacement for x in row] for row in res]
|
|
return res
|
|
|
|
def trig(self, na_replacement=0):
|
|
res = self.__getitem__((slice(None), slice(self._triggered[0], self._triggered[1])))
|
|
if na_replacement is not None:
|
|
res = [[x if x is not None else na_replacement for x in row] for row in res]
|
|
return res
|
|
|
|
|
|
def y(self, na_replacement=0):
|
|
res = self.__getitem__((slice(None), slice(len(self.columns) - self.num_labels, None)))
|
|
if na_replacement is not None:
|
|
res = [[x if x is not None else na_replacement for x in row] for row in res]
|
|
return res
|
|
|
|
|
|
def __getitem__(self, key):
|
|
if not isinstance(key, tuple):
|
|
raise TypeError("Dataset must be indexed with dataset[rows, columns]")
|
|
|
|
row_key, col_key = key
|
|
|
|
# Normalize rows
|
|
if isinstance(row_key, int):
|
|
rows = [self.data[row_key]]
|
|
else:
|
|
rows = self.data[row_key]
|
|
|
|
# Normalize columns
|
|
if isinstance(col_key, int):
|
|
res = [row[col_key] for row in rows]
|
|
else:
|
|
res = [[row[i] for i in range(*col_key.indices(len(row)))] if isinstance(col_key, slice)
|
|
else [row[i] for i in col_key] for row in rows]
|
|
|
|
return res
|
|
|
|
def save(self, path: 'Path'):
|
|
import pickle
|
|
with open(path, "wb") as fh:
|
|
pickle.dump(self, fh)
|
|
|
|
@staticmethod
|
|
def load(path: 'Path') -> 'Dataset':
|
|
import pickle
|
|
return pickle.load(open(path, "rb"))
|
|
|
|
def to_arff(self, path: 'Path'):
|
|
arff = f"@relation 'enviPy-dataset: -C {self.num_labels}'\n"
|
|
arff += "\n"
|
|
for c in self.columns[-self.num_labels:] + self.columns[:self.num_features]:
|
|
if c == 'structure_id':
|
|
arff += f"@attribute {c} string\n"
|
|
else:
|
|
arff += f"@attribute {c} {{0,1}}\n"
|
|
|
|
arff += f"\n@data\n"
|
|
for d in self.data:
|
|
ys = ','.join([str(v if v is not None else '?') for v in d[-self.num_labels:]])
|
|
xs = ','.join([str(v if v is not None else '?') for v in d[:self.num_features]])
|
|
arff += f'{ys},{xs}\n'
|
|
|
|
with open(path, "w") as fh:
|
|
fh.write(arff)
|
|
fh.flush()
|
|
|
|
def __repr__(self):
|
|
return f"<Dataset #rows={len(self.data)} #cols={len(self.columns)} #labels={self.num_labels}>"
|
|
|
|
|
|
class SparseLabelECC(BaseEstimator, ClassifierMixin):
|
|
"""
|
|
Ensemble of Classifier Chains with sparse label removal.
|
|
Removes labels that are constant across all samples in training.
|
|
"""
|
|
|
|
def __init__(self, base_clf=RandomForestClassifier(n_estimators=100, max_features='log2', random_state=42),
|
|
num_chains: int = 10):
|
|
self.base_clf = base_clf
|
|
self.num_chains = num_chains
|
|
|
|
def fit(self, X, Y):
|
|
y = np.array(Y)
|
|
self.n_labels_ = y.shape[1]
|
|
self.removed_labels_ = {}
|
|
self.keep_columns_ = []
|
|
|
|
for col in range(self.n_labels_):
|
|
unique_values = np.unique(y[:, col])
|
|
if len(unique_values) == 1:
|
|
self.removed_labels_[col] = unique_values[0]
|
|
else:
|
|
self.keep_columns_.append(col)
|
|
|
|
y_reduced = y[:, self.keep_columns_]
|
|
self.chains_ = [ClassifierChain(self.base_clf) for i in range(self.num_chains)]
|
|
|
|
for i, chain in enumerate(self.chains_):
|
|
print(f"{datetime.now()} fitting {i + 1}/{self.num_chains}")
|
|
chain.fit(X, y_reduced)
|
|
|
|
return self
|
|
|
|
def predict(self, X, threshold=0.5):
|
|
avg_preds = np.mean([chain.predict(X) for chain in self.chains_], axis=0) > threshold
|
|
full_y = np.zeros((avg_preds.shape[0], self.n_labels_))
|
|
|
|
for idx, col in enumerate(self.keep_columns_):
|
|
full_y[:, col] = avg_preds[:, idx]
|
|
|
|
for col, value in self.removed_labels_.items():
|
|
full_y[:, col] = bool(value)
|
|
|
|
return full_y
|
|
|
|
def predict_proba(self, X):
|
|
avg_proba = np.mean([chain.predict_proba(X) for chain in self.chains_], axis=0)
|
|
full_y = np.zeros((avg_proba.shape[0], self.n_labels_))
|
|
|
|
for idx, col in enumerate(self.keep_columns_):
|
|
full_y[:, col] = avg_proba[:, idx]
|
|
|
|
for col, value in self.removed_labels_.items():
|
|
full_y[:, col] = float(value)
|
|
|
|
return full_y
|
|
|
|
def score(self, X, Y, sample_weight=None):
|
|
"""
|
|
Default scoring using subset accuracy (exact match).
|
|
"""
|
|
y_true = np.array(Y)
|
|
y_pred = self.predict(X)
|
|
return accuracy_score(y_true, y_pred, sample_weight=sample_weight)
|
|
|
|
|
|
|
|
import copy
|
|
|
|
import numpy as np
|
|
from sklearn.dummy import DummyClassifier
|
|
from sklearn.tree import DecisionTreeClassifier
|
|
|
|
|
|
class BinaryRelevance:
|
|
def __init__(self, baseline_clf):
|
|
self.clf = baseline_clf
|
|
self.classifiers = None
|
|
|
|
def fit(self, X, Y):
|
|
if self.classifiers is None:
|
|
self.classifiers = []
|
|
|
|
for l in range(len(Y[0])):
|
|
X_l = X[~np.isnan(Y[:, l])]
|
|
Y_l = (Y[~np.isnan(Y[:, l]), l])
|
|
if len(X_l) == 0: # all labels are nan -> predict 0
|
|
clf = DummyClassifier(strategy='constant', constant=0)
|
|
clf.fit([X[0]], [0])
|
|
self.classifiers.append(clf)
|
|
continue
|
|
elif len(np.unique(Y_l)) == 1: # only one class -> predict that class
|
|
clf = DummyClassifier(strategy='most_frequent')
|
|
else:
|
|
clf = copy.deepcopy(self.clf)
|
|
clf.fit(X_l, Y_l)
|
|
self.classifiers.append(clf)
|
|
|
|
def predict(self, X):
|
|
labels = []
|
|
for clf in self.classifiers:
|
|
labels.append(clf.predict(X))
|
|
return np.column_stack(labels)
|
|
|
|
def predict_proba(self, X):
|
|
labels = np.empty((len(X), 0))
|
|
for clf in self.classifiers:
|
|
pred = clf.predict_proba(X)
|
|
if pred.shape[1] > 1:
|
|
pred = pred[:, 1]
|
|
else:
|
|
pred = pred * clf.predict([X[0]])[0]
|
|
labels = np.column_stack((labels, pred))
|
|
return labels
|
|
|
|
|
|
class MissingValuesClassifierChain:
|
|
def __init__(self, base_clf):
|
|
self.base_clf = base_clf
|
|
self.permutation = None
|
|
self.classifiers = None
|
|
|
|
def fit(self, X, Y):
|
|
X = np.array(X)
|
|
Y = np.array(Y)
|
|
if self.permutation is None:
|
|
self.permutation = np.random.permutation(len(Y[0]))
|
|
|
|
Y = Y[:, self.permutation]
|
|
|
|
if self.classifiers is None:
|
|
self.classifiers = []
|
|
|
|
for p in range(len(self.permutation)):
|
|
X_p = X[~np.isnan(Y[:, p])]
|
|
Y_p = Y[~np.isnan(Y[:, p]), p]
|
|
if len(X_p) == 0: # all labels are nan -> predict 0
|
|
clf = DummyClassifier(strategy='constant', constant=0)
|
|
self.classifiers.append(clf.fit([X[0]], [0]))
|
|
elif len(np.unique(Y_p)) == 1: # only one class -> predict that class
|
|
clf = DummyClassifier(strategy='most_frequent')
|
|
self.classifiers.append(clf.fit(X_p, Y_p))
|
|
else:
|
|
clf = copy.deepcopy(self.base_clf)
|
|
self.classifiers.append(clf.fit(X_p, Y_p))
|
|
newcol = Y[:, p]
|
|
pred = clf.predict(X)
|
|
newcol[np.isnan(newcol)] = pred[np.isnan(newcol)] # fill in missing values with clf predictions
|
|
X = np.column_stack((X, newcol))
|
|
|
|
def predict(self, X):
|
|
labels = np.empty((len(X), 0))
|
|
for clf in self.classifiers:
|
|
pred = clf.predict(np.column_stack((X, labels)))
|
|
labels = np.column_stack((labels, pred))
|
|
return labels[:, np.argsort(self.permutation)]
|
|
|
|
def predict_proba(self, X):
|
|
labels = np.empty((len(X), 0))
|
|
for clf in self.classifiers:
|
|
pred = clf.predict_proba(np.column_stack((X, np.round(labels))))
|
|
if pred.shape[1] > 1:
|
|
pred = pred[:, 1]
|
|
else:
|
|
pred = pred * clf.predict(np.column_stack(([X[0]], np.round([labels[0]]))))[0]
|
|
labels = np.column_stack((labels, pred))
|
|
return labels[:, np.argsort(self.permutation)]
|
|
|
|
|
|
class EnsembleClassifierChain:
|
|
def __init__(self, base_clf, num_chains=10):
|
|
self.base_clf = base_clf
|
|
self.num_chains = num_chains
|
|
self.num_labels = None
|
|
self.classifiers = None
|
|
|
|
def fit(self, X, Y):
|
|
if self.classifiers is None:
|
|
self.classifiers = []
|
|
|
|
if self.num_labels is None:
|
|
self.num_labels = len(Y[0])
|
|
|
|
for p in range(self.num_chains):
|
|
print(f"{datetime.now()} fitting {p + 1}/{self.num_chains}")
|
|
clf = MissingValuesClassifierChain(self.base_clf)
|
|
clf.fit(X, Y)
|
|
self.classifiers.append(clf)
|
|
|
|
def predict(self, X):
|
|
labels = np.zeros((len(X), self.num_labels))
|
|
for clf in self.classifiers:
|
|
labels += clf.predict(X)
|
|
return np.round(labels / self.num_chains)
|
|
|
|
def predict_proba(self, X):
|
|
labels = np.zeros((len(X), self.num_labels))
|
|
for clf in self.classifiers:
|
|
labels += clf.predict_proba(X)
|
|
return labels / self.num_chains
|
|
|
|
|
|
class RelativeReasoning:
|
|
def __init__(self, start_index: int, end_index: int):
|
|
self.start_index: int = start_index
|
|
self.end_index: int = end_index
|
|
self.winmap: Dict[int, List[int]] = defaultdict(list)
|
|
self.min_count: int = 5
|
|
self.max_count: int = 0
|
|
|
|
def fit(self, X, Y):
|
|
n_instances = len(Y)
|
|
n_attributes = len(Y[0])
|
|
|
|
for i in range(n_attributes):
|
|
for j in range(n_attributes):
|
|
if i == j:
|
|
continue
|
|
|
|
countwin = 0
|
|
countloose = 0
|
|
countboth = 0
|
|
|
|
for k in range(n_instances):
|
|
vi = Y[k][i]
|
|
vj = Y[k][j]
|
|
|
|
if vi is None or vj is None:
|
|
continue
|
|
|
|
if vi < vj:
|
|
countwin += 1
|
|
elif vi > vj:
|
|
countloose += 1
|
|
elif vi == vj and vi == 1: # tie
|
|
countboth += 1
|
|
|
|
# We've seen more than self.min_count wins, more wins than loosing, no looses and no ties
|
|
if (
|
|
countwin >= self.min_count and
|
|
countwin > countloose and
|
|
(
|
|
countloose <= self.max_count or
|
|
self.max_count < 0
|
|
) and
|
|
countboth == 0
|
|
):
|
|
self.winmap[i].append(j)
|
|
|
|
def predict(self, X):
|
|
res = np.zeros((len(X), (self.end_index + 1 - self.start_index)))
|
|
|
|
for inst_idx, inst in enumerate(X):
|
|
for i, t in enumerate(inst[self.start_index: self.end_index + 1]):
|
|
res[inst_idx][i] = t
|
|
if t:
|
|
for i2, t2 in enumerate(inst[self.start_index: self.end_index + 1]):
|
|
if i != i2 and i2 in self.winmap.get(i, []) and X[t2]:
|
|
res[inst_idx][i] = 0
|
|
|
|
return res
|
|
|
|
def predict_proba(self, X):
|
|
return self.predict(X)
|
|
|
|
|
|
class ApplicabilityDomainPCA(PCA):
|
|
|
|
def __init__(self, num_neighbours: int = 5):
|
|
super().__init__(n_components=num_neighbours)
|
|
self.scaler = StandardScaler()
|
|
self.num_neighbours = num_neighbours
|
|
self.min_vals = None
|
|
self.max_vals = None
|
|
|
|
def build(self, train_dataset: 'Dataset'):
|
|
# transform
|
|
X_scaled = self.scaler.fit_transform(train_dataset.X())
|
|
# fit pca
|
|
X_pca = self.fit_transform(X_scaled)
|
|
|
|
self.max_vals = np.max(X_pca, axis=0)
|
|
self.min_vals = np.min(X_pca, axis=0)
|
|
|
|
def __transform(self, instances):
|
|
instances_scaled = self.scaler.transform(instances)
|
|
instances_pca = self.transform(instances_scaled)
|
|
return instances_pca
|
|
|
|
def is_applicable(self, classify_instances: 'Dataset'):
|
|
instances_pca = self.__transform(classify_instances.X())
|
|
|
|
is_applicable = []
|
|
for i, instance in enumerate(instances_pca):
|
|
is_applicable.append(True)
|
|
for min_v, max_v, new_v in zip(self.min_vals, self.max_vals, instance):
|
|
if not min_v <= new_v <= max_v:
|
|
is_applicable[i] = False
|
|
|
|
return is_applicable
|
|
|
|
|
|
def tanimoto_distance(a: List[int], b: List[int]):
|
|
if len(a) != len(b):
|
|
raise ValueError(f"Lists must be the same length {len(a)} != {len(b)}")
|
|
|
|
sum_a = sum(a)
|
|
sum_b = sum(b)
|
|
sum_c = sum(v1 and v2 for v1, v2 in zip(a, b))
|
|
|
|
if sum_a + sum_b - sum_c == 0:
|
|
return 0.0
|
|
|
|
return 1 - (sum_c / (sum_a + sum_b - sum_c))
|