accent_classification/accent_classification/evaluation.py

40 lines
1.3 KiB
Python

import numpy as np
import scipy as sp
import scipy.stats
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
# from https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
def mean_confidence_interval(data, confidence):
a = 1.0*np.array(data)
n = len(a)
m, se = np.mean(a), scipy.stats.sem(a)
h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
return m, m-h, m+h
# accumulated confusion matrix is added to cross_val_score
def cross_val_confusion_matrix(model, X, y, cv):
kf = KFold(n_splits=cv)
classLabels = np.unique(y)
classNumMax = classLabels.shape[0]
confusionMatrixAccumulated = np.zeros((classNumMax, classNumMax))
scores = []
for idx_train, idx_test in kf.split(X):
# split into train/test
x_train = X[idx_train, :]
x_test = X[idx_test, :]
y_train = y[idx_train]
y_test = y[idx_test]
modelfit = model.fit(x_train, y_train)
# evaluation
y_pred = modelfit.predict(x_test)
score = f1_score(y_test, y_pred, average='micro')
scores.append(score)
confusionMatrixAccumulated = confusionMatrixAccumulated + confusion_matrix(y_test, y_pred,
labels=classLabels)
scores = np.array(scores)
return scores, confusionMatrixAccumulated