import numpy as np import scipy as sp import scipy.stats from sklearn.model_selection import KFold from sklearn.metrics import f1_score from sklearn.metrics import confusion_matrix # from https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data def mean_confidence_interval(data, confidence): a = 1.0*np.array(data) n = len(a) m, se = np.mean(a), scipy.stats.sem(a) h = se * sp.stats.t._ppf((1+confidence)/2., n-1) return m, m-h, m+h # accumulated confusion matrix is added to cross_val_score def cross_val_confusion_matrix(model, X, y, cv): kf = KFold(n_splits=cv) classLabels = np.unique(y) classNumMax = classLabels.shape[0] confusionMatrixAccumulated = np.zeros((classNumMax, classNumMax)) scores = [] for idx_train, idx_test in kf.split(X): # split into train/test x_train = X[idx_train, :] x_test = X[idx_test, :] y_train = y[idx_train] y_test = y[idx_test] modelfit = model.fit(x_train, y_train) # evaluation y_pred = modelfit.predict(x_test) score = f1_score(y_test, y_pred, average='micro') scores.append(score) confusionMatrixAccumulated = confusionMatrixAccumulated + confusion_matrix(y_test, y_pred, labels=classLabels) scores = np.array(scores) return scores, confusionMatrixAccumulated