40 lines
1.3 KiB
Python
40 lines
1.3 KiB
Python
import numpy as np
|
|
import scipy as sp
|
|
import scipy.stats
|
|
from sklearn.model_selection import KFold
|
|
from sklearn.metrics import f1_score
|
|
from sklearn.metrics import confusion_matrix
|
|
|
|
|
|
# from https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
|
|
def mean_confidence_interval(data, confidence):
|
|
a = 1.0*np.array(data)
|
|
n = len(a)
|
|
m, se = np.mean(a), scipy.stats.sem(a)
|
|
h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
|
|
return m, m-h, m+h
|
|
|
|
# accumulated confusion matrix is added to cross_val_score
|
|
def cross_val_confusion_matrix(model, X, y, cv):
|
|
kf = KFold(n_splits=cv)
|
|
classLabels = np.unique(y)
|
|
classNumMax = classLabels.shape[0]
|
|
confusionMatrixAccumulated = np.zeros((classNumMax, classNumMax))
|
|
scores = []
|
|
for idx_train, idx_test in kf.split(X):
|
|
# split into train/test
|
|
x_train = X[idx_train, :]
|
|
x_test = X[idx_test, :]
|
|
y_train = y[idx_train]
|
|
y_test = y[idx_test]
|
|
modelfit = model.fit(x_train, y_train)
|
|
|
|
# evaluation
|
|
y_pred = modelfit.predict(x_test)
|
|
|
|
score = f1_score(y_test, y_pred, average='micro')
|
|
scores.append(score)
|
|
confusionMatrixAccumulated = confusionMatrixAccumulated + confusion_matrix(y_test, y_pred,
|
|
labels=classLabels)
|
|
scores = np.array(scores)
|
|
return scores, confusionMatrixAccumulated |