accent_classification/accent_classification/output_confusion_matrix.py

import os
import sys

import itertools
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix


def plot_confusion_matrix(cm, classes,
						  normalize=False,
						  title='Confusion matrix',
						  cmap=plt.cm.Blues):
	"""
	This function prints and plots the confusion matrix.
	Normalization can be applied by setting `normalize=True`.
	Note:
	this code is downloaded from: http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
	"""
	if normalize:
		cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
		print("Normalized confusion matrix")
	else:
		print('Confusion matrix, without normalization')

	_fontsize = 24
	plt.imshow(cm, interpolation='nearest', cmap=cmap)
	#plt.title(title, fontsize=_fontsize+2)
	#plt.colorbar()
	tick_marks = np.arange(len(classes))
	#plt.xticks(tick_marks, classes, rotation=45, fontsize=_fontsize-2)
	plt.xticks(tick_marks, classes, fontsize=_fontsize-4)
	plt.yticks(tick_marks, classes, fontsize=_fontsize-4)

	fmt = '.2f' if normalize else 'd'
	thresh = cm.max() / 2.
	for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
		plt.text(j, i, format(cm[i, j], fmt),
				 horizontalalignment="center",
				 color="white" if cm[i, j] > thresh else "black",
				 fontsize=_fontsize)

	plt.tight_layout()
	plt.subplots_adjust(bottom=0.2)
	plt.ylabel('True label', fontsize=_fontsize-4)
	plt.xlabel('Predicted label', fontsize=_fontsize-4)


if __name__ == "__main__":
	currDir = 'C:\\Users\\Aki\\source\\repos\\rug_VS\\dialect_identification\\dialect_identification'
	sys.path.append(os.path.join(os.path.dirname(sys.path[0]), currDir))

	regionLabels  = ['Groningen_and_Drenthe', 'Oost_Overijsel-Gelderland', 'Limburg']
	regionLabels2 = ['Groningen_and_Drenthe', 'Limburg']
	dirOut = currDir + '\\result\\same-utterance_with_cities'

	pred = np.load(dirOut + '\\pred_per_pid_3regions.npy')

	#accuracy = accuracy_score(pred[:, 1], pred[:, 2], normalize=True, sample_weight=None)
	#print('accuracy: {}%'.format(accuracy * 100))

	# confusion matrix
	cm = confusion_matrix(pred[:, 1], pred[:, 2], labels=regionLabels)
	# human perception (2 regions)
	#cm = np.array([[39, 57], [6, 104]])
	# human perception (3 regions)
	#cm = np.array([[22, 14, 52], [23, 21, 52], [5, 5, 100]])
	print(cm)

	np.set_printoptions(precision=2)

	plt.figure()
	plot_confusion_matrix(cm, classes=['GD', 'OG', 'LB'], normalize=True)
	#plot_confusion_matrix(cm, classes=['GD', 'LB'], normalize=True)

	#plt.show()
	plt.savefig(dirOut + '\\cm_machine_3regions_normalized.png')