stimmenfryslan/notebooks/Predict municipality.ipynb

1815 lines
128 KiB
Plaintext
Raw Normal View History

2018-09-28 10:35:17 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Predict municipality"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.5/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
" \"This module will be removed in 0.20.\", DeprecationWarning)\n"
]
}
],
"source": [
"import pandas\n",
"import MySQLdb\n",
"import numpy\n",
"import json\n",
"import re\n",
"\n",
"db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmenfryslan', charset='utf8')\n",
"\n",
"from matplotlib import pyplot\n",
"import folium\n",
"from IPython.display import display\n",
"from shapely.geometry import Polygon, MultiPolygon, shape, Point\n",
"from jupyter_progressbar import ProgressBar\n",
"from collections import defaultdict\n",
"from ipy_table import make_table\n",
"\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.cross_validation import train_test_split\n",
"from sklearn.svm import SVC\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from scipy import stats\n",
"from sklearn.model_selection import RandomizedSearchCV\n",
"from sklearn.metrics import classification_report\n",
"\n",
"from confusion_matrix import plot_confusion_matrix\n",
"from sklearn.metrics import confusion_matrix\n",
"\n",
"%matplotlib notebook\n",
"from matplotlib import pyplot\n",
"\n",
"import autosklearn.classification\n",
"from tpot import TPOTClassifier"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Borders of Frysian municipalities\n",
"\n",
"with open('Friesland_AL8.GeoJson') as f:\n",
" gemeentes = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"shapes = [shape(feature['geometry']) for feature in gemeentes['features']]\n",
"gemeente_names = [feature['properties']['name'] for feature in gemeentes['features']]\n",
"\n",
"def get_gemeente(point):\n",
" for i, shape in enumerate(shapes):\n",
" if shape.contains(point):\n",
" return i\n",
" return -1"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Answers to how participants state a word should be pronounces.\n",
"\n",
"answers = pandas.read_sql('''\n",
"SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text\n",
"FROM core_surveyresult as survey\n",
"INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n",
"INNER JOIN core_predictionquizresultquestionanswer as answer\n",
" ON result.id = answer.prediction_quiz_id\n",
"''', db)\n",
"\n",
"# Takes approximately 2 minutes\n",
"\n",
"gemeente_map = {\n",
" (lng, lat): get_gemeente(Point(lng, lat))\n",
" for lng, lat in set(zip(answers['user_lng'], answers['user_lat']))\n",
"}\n",
"\n",
"answers['gemeente'] = [\n",
" gemeente_map[(lng, lat)]\n",
" for lat, lng in zip(answers['user_lat'], answers['user_lng'])\n",
"]\n",
"\n",
"answers['pronunciation'] = [\n",
" s[s.find('(')+1:-1]\n",
" for s in answers['answer_text']\n",
"]\n",
"\n",
"answers['word'] = [\n",
" re.sub(r'\\(.*\\)', '', s).replace('\"', '').strip()\n",
" for s in answers['question_text']\n",
"]\n",
"\n",
"answers['input'] = [\n",
" pronunciation + '_' + word\n",
" for pronunciation, word in zip(answers['pronunciation'], answers['word'])\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"dataset = answers[['prediction_quiz_id', 'gemeente', 'input']].groupby(\n",
" ['prediction_quiz_id', 'gemeente']\n",
").aggregate('+'.join)\n",
"\n",
"dataset.reset_index(inplace = True)\n",
"dataset = dataset[dataset['gemeente'] >= 0]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"characters = list({x for x in dataset['input'] for x in x})"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(dataset['input'], dataset['gemeente'])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.5/site-packages/sklearn/model_selection/_split.py:605: Warning: The least populated class in y has only 1 members, which is too few. The minimum number of members in any class cannot be less than n_splits=3.\n",
" % (min_groups, self.n_splits)), Warning)\n"
]
},
{
"data": {
"text/plain": [
"RandomizedSearchCV(cv=None, error_score='raise',\n",
" estimator=Pipeline(memory=None,\n",
" steps=[('tfidf', TfidfVectorizer(analyzer='char', binary=False, decode_error='strict',\n",
" dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',\n",
" lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
" ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,\n",
" ...,\n",
" max_iter=-1, probability=False, random_state=None, shrinking=True,\n",
" tol=0.001, verbose=False))]),\n",
" fit_params=None, iid=True, n_iter=20, n_jobs=8,\n",
" param_distributions={'svm__cache_size': [2000], 'svm__coef0': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fef8dfbea90>, 'svm__kernel': ['linear', 'rbf'], 'tfidf__max_features': [20], 'svm__class_weight': [None], 'tfidf__ngram_range': [(1, 5), (1, 2), (1, 6), (1, 10), (1, 3)], 'svm__shr...t 0x7fef8dfbe6d8>, 'svm__C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fef8dfd5240>},\n",
" pre_dispatch='2*n_jobs', random_state=None, refit=True,\n",
" return_train_score='warn', scoring=None, verbose=0)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tfidf = TfidfVectorizer(analyzer='char')\n",
"svc = SVC()\n",
"\n",
"model = Pipeline(steps=[('tfidf', tfidf), ('svm', svc)])\n",
"\n",
"parameters = {\n",
" 'tfidf__ngram_range': [(1,5), (1,2), (1,6), (1,10), (1,3)],\n",
" 'tfidf__max_features': [20],\n",
" 'svm__C': stats.uniform(0, 100.),\n",
" 'svm__kernel': ['linear', 'rbf'],\n",
" 'svm__degree': stats.randint(0, 5),\n",
" 'svm__gamma': stats.uniform(0, 10.),\n",
" 'svm__coef0': stats.uniform(0, 10.),\n",
" 'svm__shrinking': [True],\n",
" 'svm__probability': [False],\n",
" 'svm__cache_size': [2000],\n",
" 'svm__class_weight': [None],\n",
" 'svm__verbose': [False],\n",
" 'svm__max_iter': [-1],\n",
" 'svm__random_state': [None],\n",
"}\n",
"\n",
"# run randomized search\n",
"n_iter_search = 20\n",
"random_search = RandomizedSearchCV(model, param_distributions=parameters, n_iter=n_iter_search, n_jobs=8)\n",
"random_search.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.38 0.53 0.44 91\n",
" 1 0.50 0.50 0.50 2\n",
" 2 0.35 0.22 0.27 54\n",
" 3 0.32 0.18 0.23 51\n",
" 4 0.38 0.59 0.46 46\n",
" 5 0.23 0.14 0.17 22\n",
" 6 0.00 0.00 0.00 2\n",
" 7 0.33 0.19 0.24 31\n",
" 8 1.00 0.03 0.06 35\n",
" 9 0.21 0.29 0.25 68\n",
" 10 0.00 0.00 0.00 4\n",
" 11 0.28 0.14 0.18 37\n",
" 12 0.00 0.00 0.00 4\n",
" 13 0.23 0.26 0.24 35\n",
" 14 0.42 0.72 0.53 134\n",
" 16 0.36 0.25 0.30 52\n",
" 18 0.48 0.19 0.27 73\n",
" 19 0.00 0.00 0.00 1\n",
"\n",
"avg / total 0.38 0.36 0.32 742\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.5/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
" 'precision', 'predicted', average, warn_for)\n"
]
}
],
"source": [
"y_pred = random_search.predict(X_test)\n",
"\n",
"print(classification_report(y_test, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.3557951482479784"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(y_pred == y_test).mean()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"existing_gemeente_names = [\n",
" gemeente_names[i] if i >= 0 else 'Onbekend'\n",
" for i in sorted(set(y_test)) # dataset['gemeente']))\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [],
"source": [
"y_pred = model.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.37870619946091644"
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(y_pred == y_test).mean()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Confusion matrix, without normalization\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApkAAAK8CAYAAACkzqTXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzs3XmYVMXZ9/HvDSOIsqnsA6iADKuyDKACsgguiIoCggphURHjGuVJ8iQmojG+Rk3ccAkmRo0LuEUFZREBWUT2RcWNCEYGUFAxgCAzPff7R5/BlgeY7fSZmZ7f57r6Yvp0dd1VXd1NddWpOubuiIiIiIiEqUJJF0BEREREUo86mSIiIiISOnUyRURERCR06mSKiIiISOjUyRQRERGR0KmTKSIiIiKhUydTREREREKnTqaIiIiIhE6dTBEREREJnTqZIiIiIhK6tJIugIiIiEh5ULH6se45u0u6GPjurTPc/axkx1EnU0RERCQCnrObyhkXlXQx2LPqoVpRxNF0uYiIiIiETiOZIiIiIpEwsPIzvld+aioiIiIikdFIpoiIiEgUDDAr6VJERiOZIiIiIhI6dTJFREREJHSaLhcRERGJihb+iIiIiIgUnTqZIiIiIhI6TZeLiIiIREWry0VEREREik4jmSIiIiKR0BV/RERERESKRZ1MEREREQmdpstFREREoqKFPyIiIiIiRadOpoiIiIiETtPlIiIiIlEwtLpcRERERKQ4NJIpIiIiEgnTwh8RERERkeJQJ1NEREREQqfpchEREZGoaOGPiIiIiEjRaSRTREREJCpa+CMiIiIiUnTqZIqIiIhI6DRdLiIiIhIJ08IfEREREZHiUCdTREREREKn6XIRERGRKBhaXS4iIiIiUhwayRQRERGJihb+iIiIiIgUnTqZIiIiIhI6TZeLiIiIREL7ZIqIiIiIFIs6mSIiIiISOk2Xi4iIiESlgvbJFBEREREpMo1kioiIiETB0MIfEREREZHiUCdTREREREKn6XIRERGRqJgW/oiIiIiIFJk6mSIiIiISOk2Xi4iIiERCl5UUERERESkWjWSKiIiIREULf0REREREik6dTBEREREJnabLRURERKKihT8iIiIiIkWnTqaIiIiIhE7T5SIiIiJRMNPqchERERGR4tBIpoiIiEhUtPBHRERERKTo1MkUERERkdBpulxEREQkKlr4IyIiIiJSdBrJFBEREYmEaeGPiIiIiEhxqJMpIiIiIqHTdLmIiIhIVLTwR0RERESk6NTJFBEREZHQabpcREREJAqGVpeLiIiIiBSHRjJFREREIqF9MkVEREREikWdTBEREREJnabLRURERKKifTJFRERERIpOnUwRERERCZ2my0VERESiotXlIiIiIiJFp5FMERERkaho4Y+IiIiISNGpkykiIiIiodN0uYiIiEgUTJeVFBEREREpFnUyRURERCR0mi4XERERiYpWl4uIiIiIFJ1GMkVEREQiYhrJFBEREREpOnUyRURERCR0mi4XERERiYCh6XIRERERkWJRJ1NEyiUzq2JmU8zsOzN7oRj5XGpmM8MsW0kxs+5m9nFJl0NEUoM6mSJSqpnZJWa2zMx2mtlmM5tmZt1CyHoQUBc4xt0HFzUTd3/G3c8IoTxJZWZuZs0Olcbd57t7RlRlEil3rJTcIqJOpoiUWmZ2I3AfcAfxDmFj4GHg/BCyPxb4xN1zQsirzDMznaMvIqFSJ1NESiUzqwHcBlzt7i+7+y53z3b3Ke7+P0GaymZ2n5ltCm73mVnl4LGeZrbRzG4ys6+CUdBRwWO3Ar8HhgQjpJeZ2Xgzezoh/nHB6F9acH+kmX1mZjvMbL2ZXZpwfEHC8041s6XBNPxSMzs14bG5ZvYHM1sY5DPTzGodpP555f9lQvkHmFk/M/vEzL4xs98kpO9sZovMbHuQdoKZVQoemxckWx3Ud0hC/r8ysy3AP/KOBc9pGsToENxvYGZbzaxnsRpWpFwzzEr+FhV1MkWktDoFOBz41yHS/BY4GWgHnAR0Bm5OeLweUANIBy4DHjKzo9z9FuKjo5Pdvaq7//1QBTGzI4EHgLPdvRpwKrDqAOmOBl4P0h4D/AV43cyOSUh2CTAKqANUAsYdInQ94q9BOvFO8WPAMKAj0B34nZkdH6SNAb8AahF/7U4Hfg7g7qcFaU4K6js5If+jiY/qjkkM7O7/Bn4FPG1mRwD/AJ5097mHKK+IyD7qZIpIaXUMsC2f6exLgdvc/St33wrcCgxPeDw7eDzb3d8AdgJFPecwF2hjZlXcfbO7f3CANOcAn7r7P909x92fAz4Czk1I8w93/8TddwPPE+8gH0w28Ed3zwYmEe9A3u/uO4L4a4l3rnH35e7+bhB3A/BXoEcB6nSLu/8QlOcn3P0xYB2wGKhPvFMvIlIg6mSKSGn1NVArn3MFGwCfJ9z/PDi2L4/9OqnfA1ULWxB33wUMAcYCm83sdTNrUYDy5JUpPeH+lkKU52t3jwV/53UCv0x4fHfe882suZlNNbMtZvZf4iO1B5yKT7DV3ffkk+YxoA3woLv/kE9aEclHSU+Va7pcRAQWAT8AAw6RZhPxqd48jYNjRbELOCLhfr3EB919hrv3JT6i9xHxzld+5ckrU1YRy1QYjxAv1wnuXh34DfmvI/VDPWhmVYkvvPo7MD44HUBEpEDUyRSRUsndvyN+HuJDwYKXI8zsMDM728zuCpI9B9xsZrWDBTS/B54+WJ75WAWcZmaNLb7o6H/zHjCzumZ2fnBu5g/Ep91zD5DHG0Bzi2+7lGZmQ4BWwNQilqkwqgH/BXYGo6xX7ff4l0CTQuZ5P7DM3S8nfq7po8UupUg5V9KjmBrJFBEB3P3PwI3EF/NsBb4ArgFeCZLcDiwD1gDvASuCY0WJ9SYwOchrOT/tGFYIyrEJ+Ib4uY77d+Jw96+B/sBNxKf7fwn0d/dtRSlTIY0jvqhoB/FR1sn7PT4eeNLiq88vyi8zMzsfOIsf63kj0MGCVfUiIvkx90POloiIiIhICCoefbwfecatJV0MdkwesdzdM5MdR5vvioiIiEQkyunqkqbpchEREREJnTqZIiIiIhI6TZeLiIiIRMHIf2OxFKKRTBEREREJnUYyU1CV6kd5tTrp+ScspkY1Dk96jANtRJgMUfywjGofhyjqEtUP8SjaP6pf2jm50bwD0iqUo2GSEETVLlFs5BLVepJUeY99/vkGtm3bFmlljGj3qSwqM/sFcDnx/7reA0YRvxDFJOKX/F0ODHf3vYfKR53MFFStTjqD73o+6XH+fF6rpMfYmxNNNzOKL81U6mRUiOg/mSjav1JaNN3M777PjiROjSMOiyROqvh21yH/jwxNTiz5n/+o3sup8h7r2iXpO/iUSWaWDlwHtHL33Wb2PDAU6Afc6+6TzOxR4DLiVxo7KE2Xi4iIiEiiNKCKmaURv9zuZqA38GLw+JMc+pK/+zIRERERkQiU9ulyd88ys3uA/wC7gZnEp8e3u3tOkGwjkO95eRrJFBERESlfapnZsoTbmLwHzOwo4HzgeKABcCTxS8wWmjqZ5VRuLMbz4wby+h0/B2Djmnd5ftwgJt90IS//dhjfbf48tFgzZ0znxNYZtG7RjLvvujO0fBPt2bOHXt1Opmvn9nTp0JY7/jA+KXHGjhnNsQ3rktm+bVLyh9SqC6RW+0dRl3Wffszp3TL33Zo1PIaJDz8Qepwo6hJVnKjqMvGh++l5cjt6ndKeqy4bzp49e0LJd9y1Y2if0Yg+XTvsO7b922+45MJ+nNapNZdc2I/t278NJRZE9x6D1Gr/FLPN3TMTbhMTHusDrHf3re6eDbwMdAVqBtPnAA2BrPyCqJNZTq15/Z8cld5k3/23J95Gnxv+xJA/v0zz7uew7MW/hhInFotxw3VX8+qUaaxcs5YXJj3Hh2vXhpJ3osqVKzNl+iwWLlnJgsUrmDVzBksXvxt6nGHDR/LKlGmh55soleqSSu0fVV2anZDBWwuW8daCZcx8ezFVqhzB2f3PDzVGVHWJIk5Uddm8KYu///Uhps1ZxJx
"text/plain": [
"<Figure size 705.6x720 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pyplot.rcParams['figure.figsize'] = (9.8, 10)\n",
"\n",
"plot_confusion_matrix(confusion_matrix(y_test, y_pred), classes=existing_gemeente_names)\n",
"pyplot.tight_layout()"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.36 0.37 0.36 84\n",
" 1 0.00 0.00 0.00 1\n",
" 2 0.24 0.18 0.21 45\n",
" 3 0.51 0.55 0.53 51\n",
" 4 0.43 0.33 0.38 57\n",
" 5 0.19 0.15 0.16 34\n",
" 6 0.00 0.00 0.00 2\n",
" 7 0.21 0.22 0.22 32\n",
" 8 0.05 0.06 0.05 33\n",
" 9 0.23 0.24 0.23 72\n",
" 10 0.00 0.00 0.00 3\n",
" 11 0.20 0.15 0.17 40\n",
" 12 0.00 0.00 0.00 3\n",
" 13 0.24 0.38 0.29 40\n",
" 14 0.53 0.48 0.50 124\n",
" 16 0.20 0.19 0.20 47\n",
" 18 0.41 0.44 0.43 72\n",
" 19 0.00 0.00 0.00 2\n",
"\n",
"avg / total 0.33 0.32 0.32 742\n",
"\n"
]
}
],
"source": [
"print(classification_report(y_test, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"import numpy\n",
"from keras.datasets import imdb\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense\n",
"from keras.layers import LSTM\n",
"from keras.layers.embeddings import Embedding\n",
"from keras.preprocessing import sequence\n",
"from keras.preprocessing import text\n",
"from keras.optimizers import Adam\n",
"\n",
"numpy.random.seed(7)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"char_to_num = {c: i for i, c in enumerate(characters)}"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'gɔ:n_gegaan+jun_avond+hø:l_heel+dɑɪ_dag+bɛi_bij+spɾɵts_sprak+eɪx_oog+jɛɾms_armen+tsi:s_kaas+dwɑɾkə_deurtje+sɪəɾə_koken+bwɑst_borst+fisk_vis+snɵən_zaterdag+tɾɑɪn_trein+ɡɪəl_geel+tosk_tand+sɛt_gezet+blɛ:t_blad'"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset['input'][0]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" 19,\n",
" ...]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[c.count('_') for c in dataset['input']]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"X = sequence.pad_sequences([[char_to_num[c] for c in c] for c in dataset['input']], value=len(char_to_num))\n",
"\n",
"y = numpy.array(dataset['gemeente'])\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"# from sklearn.preprocessing import OneHotEncoder\n",
"# n_values = max(X.ravel()) + 1\n",
"# enc = OneHotEncoder(n_values=n_values)\n",
"# X_ = enc.fit_transform(X)\n",
"# X_ = numpy.array(X_.todense()).reshape(X.shape + (n_values, ))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[31, 39, 31, ..., 37, 12, 48],\n",
" [36, 15, 27, ..., 5, 5, 31],\n",
" [26, 34, 6, ..., 5, 13, 48],\n",
" ...,\n",
" [ 3, 31, 19, ..., 39, 39, 37],\n",
" [48, 3, 36, ..., 27, 36, 41],\n",
" [ 0, 12, 41, ..., 43, 24, 26]], dtype=int32)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X[:, 20:]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"lstm = Sequential()\n",
"lstm.add(Embedding(X.max() + 1, 512, input_length=X.shape[1]))\n",
"lstm.add(LSTM(16))\n",
"lstm.add(Dense(max(y) + 1))"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"optimizer = Adam(lr=0.0001)\n",
"\n",
"lstm.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer=optimizer)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 2002 samples, validate on 223 samples\n",
"Epoch 1/100\n",
"2002/2002 [==============================] - 3s 1ms/step - loss: 2.6484 - acc: 0.1518 - val_loss: 2.7888 - val_acc: 0.1659\n",
"Epoch 2/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6440 - acc: 0.1548 - val_loss: 2.7866 - val_acc: 0.1749\n",
"Epoch 3/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6419 - acc: 0.1608 - val_loss: 2.7870 - val_acc: 0.1749\n",
"Epoch 4/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6396 - acc: 0.1603 - val_loss: 2.7872 - val_acc: 0.1749\n",
"Epoch 5/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6373 - acc: 0.1603 - val_loss: 2.7858 - val_acc: 0.1749\n",
"Epoch 6/100\n",
"2002/2002 [==============================] - 2s 789us/step - loss: 2.6356 - acc: 0.1598 - val_loss: 2.7876 - val_acc: 0.1749\n",
"Epoch 7/100\n",
"2002/2002 [==============================] - 2s 792us/step - loss: 2.6343 - acc: 0.1588 - val_loss: 2.7876 - val_acc: 0.1749\n",
"Epoch 8/100\n",
"2002/2002 [==============================] - 2s 794us/step - loss: 2.6325 - acc: 0.1573 - val_loss: 2.7874 - val_acc: 0.1794\n",
"Epoch 9/100\n",
"2002/2002 [==============================] - 2s 794us/step - loss: 2.6309 - acc: 0.1588 - val_loss: 2.7881 - val_acc: 0.1794\n",
"Epoch 10/100\n",
"2002/2002 [==============================] - 2s 798us/step - loss: 2.6293 - acc: 0.1638 - val_loss: 2.7892 - val_acc: 0.1794\n",
"Epoch 11/100\n",
"2002/2002 [==============================] - 2s 864us/step - loss: 2.6283 - acc: 0.1653 - val_loss: 2.7885 - val_acc: 0.1794\n",
"Epoch 12/100\n",
"2002/2002 [==============================] - 2s 796us/step - loss: 2.6267 - acc: 0.1643 - val_loss: 2.7879 - val_acc: 0.1794\n",
"Epoch 13/100\n",
"2002/2002 [==============================] - 2s 791us/step - loss: 2.6255 - acc: 0.1618 - val_loss: 2.7867 - val_acc: 0.1794\n",
"Epoch 14/100\n",
"2002/2002 [==============================] - 2s 789us/step - loss: 2.6241 - acc: 0.1648 - val_loss: 2.7870 - val_acc: 0.1794\n",
"Epoch 15/100\n",
"2002/2002 [==============================] - 2s 847us/step - loss: 2.6228 - acc: 0.1658 - val_loss: 2.7872 - val_acc: 0.1749\n",
"Epoch 16/100\n",
"2002/2002 [==============================] - 2s 818us/step - loss: 2.6217 - acc: 0.1648 - val_loss: 2.7877 - val_acc: 0.1749\n",
"Epoch 17/100\n",
"2002/2002 [==============================] - 2s 890us/step - loss: 2.6206 - acc: 0.1643 - val_loss: 2.7872 - val_acc: 0.1794\n",
"Epoch 18/100\n",
"2002/2002 [==============================] - 2s 824us/step - loss: 2.6197 - acc: 0.1633 - val_loss: 2.7853 - val_acc: 0.1794\n",
"Epoch 19/100\n",
"2002/2002 [==============================] - 2s 826us/step - loss: 2.6183 - acc: 0.1643 - val_loss: 2.7860 - val_acc: 0.1839\n",
"Epoch 20/100\n",
"2002/2002 [==============================] - 2s 826us/step - loss: 2.6169 - acc: 0.1663 - val_loss: 2.7876 - val_acc: 0.1839\n",
"Epoch 21/100\n",
"2002/2002 [==============================] - 2s 830us/step - loss: 2.6160 - acc: 0.1683 - val_loss: 2.7878 - val_acc: 0.1839\n",
"Epoch 22/100\n",
"2002/2002 [==============================] - 2s 854us/step - loss: 2.6148 - acc: 0.1658 - val_loss: 2.7868 - val_acc: 0.1839\n",
"Epoch 23/100\n",
"2002/2002 [==============================] - 2s 878us/step - loss: 2.6140 - acc: 0.1633 - val_loss: 2.7866 - val_acc: 0.1794\n",
"Epoch 24/100\n",
"2002/2002 [==============================] - 2s 915us/step - loss: 2.6128 - acc: 0.1633 - val_loss: 2.7901 - val_acc: 0.1839\n",
"Epoch 25/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6115 - acc: 0.1663 - val_loss: 2.7887 - val_acc: 0.1839\n",
"Epoch 26/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6103 - acc: 0.1653 - val_loss: 2.7846 - val_acc: 0.1839\n",
"Epoch 27/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6091 - acc: 0.1643 - val_loss: 2.7841 - val_acc: 0.1839\n",
"Epoch 28/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6079 - acc: 0.1653 - val_loss: 2.7829 - val_acc: 0.1928\n",
"Epoch 29/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6067 - acc: 0.1663 - val_loss: 2.7830 - val_acc: 0.1928\n",
"Epoch 30/100\n",
"2002/2002 [==============================] - 7s 3ms/step - loss: 2.6056 - acc: 0.1673 - val_loss: 2.7854 - val_acc: 0.1928\n",
"Epoch 31/100\n",
"2002/2002 [==============================] - 5s 2ms/step - loss: 2.6048 - acc: 0.1673 - val_loss: 2.7889 - val_acc: 0.2018\n",
"Epoch 32/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6041 - acc: 0.1658 - val_loss: 2.7906 - val_acc: 0.2018\n",
"Epoch 33/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6027 - acc: 0.1663 - val_loss: 2.7852 - val_acc: 0.1928\n",
"Epoch 34/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6015 - acc: 0.1648 - val_loss: 2.7860 - val_acc: 0.2018\n",
"Epoch 35/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.6004 - acc: 0.1638 - val_loss: 2.7853 - val_acc: 0.1883\n",
"Epoch 36/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5999 - acc: 0.1633 - val_loss: 2.7845 - val_acc: 0.2018\n",
"Epoch 37/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5986 - acc: 0.1633 - val_loss: 2.7846 - val_acc: 0.1973\n",
"Epoch 38/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5976 - acc: 0.1648 - val_loss: 2.7831 - val_acc: 0.1973\n",
"Epoch 39/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5964 - acc: 0.1623 - val_loss: 2.7925 - val_acc: 0.1883\n",
"Epoch 40/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5953 - acc: 0.1628 - val_loss: 2.7848 - val_acc: 0.1794\n",
"Epoch 41/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5939 - acc: 0.1628 - val_loss: 2.7826 - val_acc: 0.1704\n",
"Epoch 42/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5932 - acc: 0.1638 - val_loss: 2.7877 - val_acc: 0.1704\n",
"Epoch 43/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5922 - acc: 0.1638 - val_loss: 2.7897 - val_acc: 0.1704\n",
"Epoch 44/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5907 - acc: 0.1658 - val_loss: 2.7886 - val_acc: 0.1839\n",
"Epoch 45/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5901 - acc: 0.1673 - val_loss: 2.8038 - val_acc: 0.1794\n",
"Epoch 46/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5883 - acc: 0.1663 - val_loss: 2.8267 - val_acc: 0.1839\n",
"Epoch 47/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5874 - acc: 0.1668 - val_loss: 2.8270 - val_acc: 0.1928\n",
"Epoch 48/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5862 - acc: 0.1658 - val_loss: 2.8259 - val_acc: 0.1883\n",
"Epoch 49/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5852 - acc: 0.1633 - val_loss: 2.8346 - val_acc: 0.1928\n",
"Epoch 50/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5839 - acc: 0.1633 - val_loss: 2.8440 - val_acc: 0.1794\n",
"Epoch 51/100\n",
"2002/2002 [==============================] - 7s 4ms/step - loss: 2.5833 - acc: 0.1653 - val_loss: 2.8654 - val_acc: 0.1794\n",
"Epoch 52/100\n",
"2002/2002 [==============================] - 10s 5ms/step - loss: 2.5806 - acc: 0.1693 - val_loss: 2.8259 - val_acc: 0.1839\n",
"Epoch 53/100\n",
"2002/2002 [==============================] - 5s 3ms/step - loss: 2.5820 - acc: 0.1678 - val_loss: 2.8230 - val_acc: 0.1839\n",
"Epoch 54/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5801 - acc: 0.1623 - val_loss: 2.7956 - val_acc: 0.1839\n",
"Epoch 55/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5795 - acc: 0.1628 - val_loss: 2.8682 - val_acc: 0.1794\n",
"Epoch 56/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5792 - acc: 0.1648 - val_loss: 2.8311 - val_acc: 0.1794\n",
"Epoch 57/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5820 - acc: 0.1623 - val_loss: 2.8232 - val_acc: 0.1749\n",
"Epoch 58/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5769 - acc: 0.1633 - val_loss: 2.8238 - val_acc: 0.1794\n",
"Epoch 59/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5747 - acc: 0.1653 - val_loss: 2.8675 - val_acc: 0.1794\n",
"Epoch 60/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5739 - acc: 0.1618 - val_loss: 2.8241 - val_acc: 0.1794\n",
"Epoch 61/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5730 - acc: 0.1623 - val_loss: 2.8704 - val_acc: 0.1839\n",
"Epoch 62/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5702 - acc: 0.1613 - val_loss: 2.8851 - val_acc: 0.1794\n",
"Epoch 63/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5691 - acc: 0.1638 - val_loss: 2.8291 - val_acc: 0.1794\n",
"Epoch 64/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5704 - acc: 0.1648 - val_loss: 2.8298 - val_acc: 0.1883\n",
"Epoch 65/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5678 - acc: 0.1618 - val_loss: 2.9033 - val_acc: 0.1749\n",
"Epoch 66/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5703 - acc: 0.1648 - val_loss: 2.8294 - val_acc: 0.1794\n",
"Epoch 67/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5781 - acc: 0.1623 - val_loss: 2.8335 - val_acc: 0.1749\n",
"Epoch 68/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5768 - acc: 0.1543 - val_loss: 2.8851 - val_acc: 0.1839\n",
"Epoch 69/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5769 - acc: 0.1623 - val_loss: 2.8766 - val_acc: 0.1570\n",
"Epoch 70/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5727 - acc: 0.1633 - val_loss: 2.8750 - val_acc: 0.1749\n",
"Epoch 71/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5679 - acc: 0.1693 - val_loss: 2.8760 - val_acc: 0.1883\n",
"Epoch 72/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5668 - acc: 0.1713 - val_loss: 2.8747 - val_acc: 0.1794\n",
"Epoch 73/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5633 - acc: 0.1713 - val_loss: 2.8755 - val_acc: 0.1614\n",
"Epoch 74/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5619 - acc: 0.1713 - val_loss: 2.9167 - val_acc: 0.1749\n",
"Epoch 75/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5693 - acc: 0.1718 - val_loss: 2.9258 - val_acc: 0.1839\n",
"Epoch 76/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5667 - acc: 0.1733 - val_loss: 2.9193 - val_acc: 0.1839\n",
"Epoch 77/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5670 - acc: 0.1788 - val_loss: 2.9670 - val_acc: 0.1794\n",
"Epoch 78/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5653 - acc: 0.1723 - val_loss: 2.9759 - val_acc: 0.1794\n",
"Epoch 79/100\n",
"2002/2002 [==============================] - 3s 1ms/step - loss: 2.5642 - acc: 0.1763 - val_loss: 3.0098 - val_acc: 0.1749\n",
"Epoch 80/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5623 - acc: 0.1758 - val_loss: 3.0134 - val_acc: 0.1704\n",
"Epoch 81/100\n",
"2002/2002 [==============================] - 8s 4ms/step - loss: 2.5618 - acc: 0.1728 - val_loss: 3.0448 - val_acc: 0.1704\n",
"Epoch 82/100\n",
"2002/2002 [==============================] - 9s 4ms/step - loss: 2.5648 - acc: 0.1758 - val_loss: 3.0002 - val_acc: 0.1659\n",
"Epoch 83/100\n",
"2002/2002 [==============================] - 9s 5ms/step - loss: 2.5589 - acc: 0.1733 - val_loss: 2.9652 - val_acc: 0.1749\n",
"Epoch 84/100\n",
"2002/2002 [==============================] - 3s 1ms/step - loss: 2.5584 - acc: 0.1728 - val_loss: 3.0391 - val_acc: 0.1749\n",
"Epoch 85/100\n",
"2002/2002 [==============================] - 2s 829us/step - loss: 2.5575 - acc: 0.1678 - val_loss: 2.9984 - val_acc: 0.1480\n",
"Epoch 86/100\n",
"2002/2002 [==============================] - 2s 801us/step - loss: 2.5658 - acc: 0.1698 - val_loss: 3.0056 - val_acc: 0.1614\n",
"Epoch 87/100\n",
"2002/2002 [==============================] - 2s 800us/step - loss: 2.5643 - acc: 0.1663 - val_loss: 3.0066 - val_acc: 0.1525\n",
"Epoch 88/100\n",
"2002/2002 [==============================] - 2s 813us/step - loss: 2.5621 - acc: 0.1633 - val_loss: 2.9622 - val_acc: 0.1525\n",
"Epoch 89/100\n",
"2002/2002 [==============================] - 2s 792us/step - loss: 2.5612 - acc: 0.1653 - val_loss: 2.9627 - val_acc: 0.1570\n",
"Epoch 90/100\n",
"2002/2002 [==============================] - 2s 816us/step - loss: 2.5575 - acc: 0.1683 - val_loss: 2.9586 - val_acc: 0.1749\n",
"Epoch 91/100\n",
"2002/2002 [==============================] - 2s 796us/step - loss: 2.5545 - acc: 0.1708 - val_loss: 2.9602 - val_acc: 0.1525\n",
"Epoch 92/100\n",
"2002/2002 [==============================] - 2s 801us/step - loss: 2.5522 - acc: 0.1738 - val_loss: 2.9574 - val_acc: 0.1614\n",
"Epoch 93/100\n",
"2002/2002 [==============================] - 2s 797us/step - loss: 2.5490 - acc: 0.1733 - val_loss: 2.9680 - val_acc: 0.1794\n",
"Epoch 94/100\n",
"2002/2002 [==============================] - 2s 810us/step - loss: 2.5481 - acc: 0.1748 - val_loss: 3.0203 - val_acc: 0.1704\n",
"Epoch 95/100\n",
"2002/2002 [==============================] - 2s 928us/step - loss: 2.5457 - acc: 0.1723 - val_loss: 3.0504 - val_acc: 0.1570\n",
"Epoch 96/100\n",
"2002/2002 [==============================] - 2s 949us/step - loss: 2.5475 - acc: 0.1738 - val_loss: 3.0056 - val_acc: 0.1480\n",
"Epoch 97/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5450 - acc: 0.1728 - val_loss: 3.0211 - val_acc: 0.1614\n",
"Epoch 98/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5511 - acc: 0.1683 - val_loss: 3.0546 - val_acc: 0.1614\n",
"Epoch 99/100\n",
"2002/2002 [==============================] - 2s 1ms/step - loss: 2.5457 - acc: 0.1703 - val_loss: 3.0207 - val_acc: 0.1614\n",
"Epoch 100/100\n",
"2002/2002 [==============================] - 2s 908us/step - loss: 2.5444 - acc: 0.1718 - val_loss: 3.0648 - val_acc: 0.1614\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7feedf76a860>"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lstm.fit(X_train, y_train, batch_size=512, epochs=100, validation_split=0.1)# , validation_data=(X_test, y_test), verbose=2)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}