import pandas as pd
from sklearn.naive_bayes import GaussianNB
import numpy as np
from src.data.preprocessing import split_and_scale
from src.utils.utils import save, load, check_path_exists
import os
import json
from skopt.utils import use_named_args
from skopt import gp_minimize
from src.models.pairwise import pairwise_optimize
[docs]class Evaluation(object):
""" A class to create perform model evaluations.
Attributes:
previous_results (str): Path to previously stored results
Methods:
__call__(X_y_train: pd.DataFrame, X_test: pd.DataFrame, qrels: pd.DataFrame, k: int = 50,
components_pca: int = 0, model=GaussianNB(), pairwise_model=None, pairwise_top_k: int = 50,
pairwise_train: bool = True, name: str = None, save_result: bool = True):
INSERT_DESCRIPTION
hyperparameter_optimization(model, search_space, X_y_train: pd.DataFrame, X_test: pd.DataFrame,
X_val: pd.DataFrame, qrels: pd.DataFrame, qrels_val: pd.DataFrame,
k: int = 50, components_pca: int = 0, pairwise_model=None,
pairwise_top_k: int = 50, pairwise_train: bool = True,
trials: int = 50, name: str = None, save_result: bool = True):
Performs hyperparameter optimization.
feature_selection(model, search_space, X_y_train: pd.DataFrame, X_test: pd.DataFrame, X_val: pd.DataFrame,
qrels: pd.DataFrame, qrels_val: pd.DataFrame, k: int = 50, components_pca: int = 0,
save_results: bool = True, name: str = None):
Performs feature selection.
compute_metrics(model, X: pd.DataFrame, y, X_test, test_pair, qrels: pd.DataFrame, k: int = 50,
components_pca: int = 0, pairwise_model=None, pairwise_top_k: int = 50,
pairwise_train: bool = True, name: str = None, save_result: bool = False):
Calculates metrics and saves them in a dataframe locally.
calculate_ranks(results: pd.DataFrame):
Returns relevant documents with their corresponding rank
average_precision_score(results: pd.DataFrame):
Calculates Average Precision
mean_average_precision_score(results: pd.DataFrame):
Calculates Mean Average Precision for a set of queries
metrics(results: pd.DataFrame, k: int = None):
Calculates accuracy, precision, recall and f1 globally and in the top-k area
normalized_discounted_cumulative_gain(results: pd.DataFrame):
Calculates Normalized Discounted Cumulative Gain
mean_normalized_discounted_cumulative_gain_score(results: pd.DataFrame):
Calculates Mean Normalized Cumulative Gain
mean_reciprocal_rank(results: pd.DataFrame):
Calculates Mean Reciprocal Rank
"""
def __init__(self, previous_results: str = 'data/results/results.pkl'):
""" Constructs Evaluation object.
Args:
previous_results (str): Path to previously stored resultsl
"""
self.previous_results = previous_results
if os.path.exists(previous_results):
self.results = load(previous_results)
else:
check_path_exists(os.path.dirname(previous_results))
self.results = pd.DataFrame()
def __call__(self,
X_y_train: pd.DataFrame,
X_test: pd.DataFrame,
qrels: pd.DataFrame,
k: int = 50,
components_pca: int = 0,
model=GaussianNB(),
pairwise_model=None,
pairwise_top_k: int = 50,
pairwise_train: bool = True,
name: str = None,
save_result: bool = True):
""" Evaluates model given data.
Args:
X_y_train (pd.DataFrame)
X_test (pd.DataFrame)
qrels (pd.DataFrame)
k (int)
components_pca (int)
model ()
pairwise_model (str)
pairwise_top_k (int)
pairwise_train (Boolean)
name (str)
save_result (Boolean)
Returns:
MRR (float)
"""
X, y, X_test, test_pair = split_and_scale(X_y_train, X_test, components_pca=components_pca)
performance = self.compute_metrics(model,
X,
y,
X_test,
test_pair,
qrels,
k,
components_pca,
pairwise_model,
pairwise_top_k,
pairwise_train,
name=name,
save_result=save_result)
print(f'MRR: {performance[0]}')
print(f'nDCG: {performance[1]}')
[docs] def hyperparameter_optimization(self, model, search_space,
X_y_train: pd.DataFrame,
X_test: pd.DataFrame,
X_val: pd.DataFrame,
qrels: pd.DataFrame,
qrels_val: pd.DataFrame,
k: int = 50,
components_pca: int = 0,
pairwise_model=None,
pairwise_top_k: int = 50,
pairwise_train: bool = True,
trials: int = 50,
name: str = None,
save_result: bool = True
):
""" Performs hyperparameter optimization.
Args:
model ():
search_space ():
X_y_train (pd.DataFrame):
X_test (pd.DataFrame):
X_val (pd.DataFrame):
qrels (pd.DataFrame):
qrels_val (pd.DataFrame):
k (int):
components_pca (int):
pairwise_model (str):
pairwise_top_k (int):
pairwise_train (Boolean):
trials (int):
name (str):
save_result (Boolean):
Returns:
tuple (float): MRR and nDCG
"""
@use_named_args(search_space)
def evaluate(**params):
model.set_params(**params)
return -1 * self.compute_metrics(model, X, y, X_val, val_pair, qrels_val, k, components_pca, pairwise_model,
pairwise_top_k, pairwise_train, name=name)[0]
X, y, X_test, test_pair, X_val, val_pair = split_and_scale(X_y_train, X_test, X_val, components_pca)
best_result = gp_minimize(evaluate, search_space, n_calls=trials)
print(f'Best MRR: {-1 * best_result.fun}')
print(f'Best Hyperparameters: {best_result.x}')
best_params_dict = {}
for space, value in zip(search_space, best_result.x):
best_params_dict[space.name] = value
test_set_performance = self.compute_metrics(model.set_params(**best_params_dict),
X, y, X_test, test_pair, qrels,
k, components_pca,
pairwise_model, pairwise_top_k, pairwise_train,
name=name, save_result=save_result)
print(f'MRR on test set: {test_set_performance[0]}')
print(f'nDCG on test set: {test_set_performance[1]}')
return test_set_performance[0]
[docs] def feature_selection(self, model,
X_y_train: pd.DataFrame,
X_test: pd.DataFrame,
qrels: pd.DataFrame,
k: int = 50,
components_pca: int = 0,
save_results: bool = True,
name: str = None
):
""" Performs feature selection.
Args:
model ():
X_y_train (pd.DataFrame):
X_test (pd.DataFrame):
qrels (pd.DataFrame):
k (int):
components_pca (int):
name (str):
save_results (Boolean):
Returns:
Selected Features (list):
"""
X, y, X_test, test_pair = split_and_scale(X_y_train, X_test, components_pca=components_pca)
features = list(X.columns)
added_columns = []
performances = []
current_best = (None, 0)
current_performance = -1
while len(added_columns) < len(features):
for feature in features:
if feature in added_columns:
continue
print(f'Testing features: {added_columns + [feature]}')
performance = self.compute_metrics(model,
X[added_columns + [feature]],
y,
X_test[added_columns + [feature]],
test_pair,
qrels,
k,
components_pca,
name=name,
save_result=save_results)[1]
if performance > current_performance and performance > current_best[1]:
current_best = (feature, performance)
if current_best[0] is not None:
current_performance = current_best[1]
added_columns.append(current_best[0])
performances.append(current_performance)
else:
break
current_best = (None, 0)
print(f'Current features: {added_columns}')
print(f'Current Performance: {current_performance}')
print(f'Best feature combination: {added_columns}')
print(f'MRR: {current_performance}')
return added_columns, performances
[docs] def compute_metrics(self, model,
X: pd.DataFrame,
y,
X_test,
test_pair,
qrels: pd.DataFrame,
k: int = 50,
components_pca: int = 0,
pairwise_model=None,
pairwise_top_k: int = 50,
pairwise_train: bool = True,
name: str = None,
save_result: bool = False):
""" Computes metrics.
Args:
model ():
X (pd.DataFrame):
y (pd.Series):
X_test (pd.DataFrame):
test_pair (pd.DataFrame):
qrels (pd.DataFrame):
k (int):
components_pca (int):
pairwise_model (str):
pairwise_top_k (int):
pairwise_train (Boolean):
name (str):
save_result (Boolean):
Returns:
MRR (float):
"""
model.fit(X, y)
confidences = pd.DataFrame(model.predict_proba(X_test))[1]
results = pd.DataFrame({
'confidence': confidences,
'qID': list(test_pair['qID']),
'pID': list(test_pair['pID']),
'relevant': [0] * len(confidences)
})
for i, qrel in qrels.iterrows():
results.loc[((results['pID'] == qrel['pID']) & (results['qID'] == qrel['qID'])), 'relevant'] = qrel[
'feedback']
if pairwise_model is not None:
results = pairwise_optimize(pairwise_model, results, X, y, X_test, pairwise_top_k, pairwise_train)
mrr = self.mean_reciprocal_rank(results)
map = self.mean_average_precision_score(results)
ndcg = self.mean_normalized_discounted_cumulative_gain_score(results)
metrics = self.metrics(results)
k_metrics = self.metrics(results, k)
if save_result:
self.results = pd.concat([self.results,
pd.DataFrame({'name': name,
'model': str(model),
'hyperparameters': str(model.get_params()),
'pairwise_model': pairwise_model,
'pairwise_k': pairwise_top_k if pairwise_model is not None else None,
'features': json.dumps(list(X.columns)),
'sampling_training': len(X),
'sampling_test': len(X_test),
'pca': components_pca,
'MRR': mrr,
'MAP': map,
'nDCG': ndcg,
'accuracy': metrics[0],
'precision': metrics[1],
'recall': metrics[2],
'f1': metrics[3],
f'accuracy@{k}': k_metrics[0],
f'precision@{k}': k_metrics[1],
f'recall@{k}': k_metrics[2],
f'f1@{k}': k_metrics[3]
}, index=[0])]).reset_index(drop=True)
save(self.results, self.previous_results)
return mrr, ndcg
[docs] def calculate_ranks(self, results: pd.DataFrame):
""" Calculates ranks.
Args:
results (pd.DataFrame):
Returns:
ranks (pd.DataFrame):
"""
ranks = results.sort_values('confidence', ascending=False)
ranks['rank'] = np.arange(1, len(ranks) + 1)
ranks = ranks[ranks['relevant'] >= 1]
ranks.index = np.arange(1, len(ranks) + 1)
return ranks
[docs] def average_precision_score(self, results: pd.DataFrame):
""" Calculates average precision score.
Args:
results (pd.DataFrame):
Returns:
AP (float):
"""
ranks = self.calculate_ranks(results)
sum = 0
for index, data in ranks.iterrows():
sum += index / data['rank']
return sum / len(ranks)
[docs] def mean_average_precision_score(self, results: pd.DataFrame):
""" Calculates mean average precision score.
Args:
results (pd.DataFrame):
Returns:
MAP (float):
"""
qIDs = results['qID'].unique()
sum = 0
for qID in qIDs:
sum += self.average_precision_score(results[results['qID'] == qID])
return sum / len(qIDs)
[docs] def metrics(self, results: pd.DataFrame, k: int = None):
""" Calculates metrics (accuracy, precision, recall, f1).
Args:
results (pd.DataFrame):
k (int):
Returns:
accuracy (float): Returns accuracy score of model
precision (float): Returns precision score of model
recall (float): Returns recall score of model
f_score (float): Returns f_score score of model
"""
if k is not None:
results = results.sort_values('confidence', ascending=False).groupby('qID').head(k)
tp = len(results[(results['confidence'] >= 0.5) & (results['relevant'] >= 1)])
fp = len(results[(results['confidence'] >= 0.5) & (results['relevant'] == 0)])
tn = len(results[(results['confidence'] < 0.5) & (results['relevant'] == 0)])
fn = len(results[(results['confidence'] < 0.5) & (results['relevant'] >= 1)])
accuracy = (tp + tn) / (tp + fp + tn + fn)
try:
precision = tp / (tp + fp)
except ZeroDivisionError:
precision = np.nan
try:
recall = tp / (tp + fn)
except ZeroDivisionError:
recall = np.nan
try:
f_score = (2 * precision * recall) / (precision + recall)
except ZeroDivisionError:
f_score = np.nan
return accuracy, precision, recall, f_score
[docs] def normalized_discounted_cumulative_gain(self, results: pd.DataFrame):
""" Calculates normalized discounted cumulative gain.
Args:
results (pd.DataFrame):
Returns:
nDCG (float):
"""
ranks = self.calculate_ranks(results)
dcg = 0
idcg = 0
for index, data in ranks.sort_values('relevant', ascending=False).reset_index().iterrows():
dcg += (2 ** data['relevant'] - 1) / np.log2(data['rank'] + 1)
idcg += (2 ** data['relevant'] - 1) / np.log2((index + 1) + 1)
return dcg / idcg
[docs] def mean_normalized_discounted_cumulative_gain_score(self, results: pd.DataFrame):
""" Calculates mean normalized discounted cumulative gain score.
Args:
results (pd.DataFrame):
Returns:
Mean of nDCG (float):
"""
qIDs = results['qID'].unique()
sum = 0
for qID in qIDs:
sum += self.normalized_discounted_cumulative_gain(results[results['qID'] == qID])
return sum / len(qIDs)
[docs] def mean_reciprocal_rank(self, results: pd.DataFrame, threshold: int = 3):
""" Calculates mean reciprocal rank.
Args:
results (pd.DataFrame):
Returns:
MRR (float):
"""
qIDs = results['qID'].unique()
sum = 0
for qID in qIDs:
ranks = self.calculate_ranks(results[results['qID'] == qID])
if len(ranks[ranks['relevant'] >= threshold]) > 0:
ranks = ranks[ranks['relevant'] >= threshold]
else:
ranks = ranks[ranks['relevant'] >= (threshold - 1)]
ranks = ranks.sort_values('rank', ascending=True).head(1)
sum += (1 / float(ranks['rank']))
return sum / len(qIDs)