cleaned up gabmap file creation

2018-10-01 17:19:36 +02:00
parent e9ca31d8ec
commit d299069253
36 changed files with 42976 additions and 47776 deletions
--- a/notebooks/Create
+++ b/notebooks/Create
--- a/notebooks/Dialect
+++ b/notebooks/Dialect
--- a/notebooks/Gabmap
+++ b/notebooks/Gabmap
@@ -0,0 +1,83 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Gabmap format\n",
+    "\n",
+    "Exploration of the format of the lines in example Gabmap files Martijn had sent."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/martijn_format/Dutch613-coordinates.txt') as f:\n",
+    "    coordinates = list(f)\n",
+    "    \n",
+    "with open('../data/martijn_format/Nederlands-ipa.utxt') as f:\n",
+    "    table = list(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "coordinates[0].split('\\t')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "coordinates[1].split('\\t')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "table[0].split('\\t')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "table[1].split('\\t')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/Gabmap
+++ b/notebooks/Gabmap
@@ -0,0 +1,458 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Geographical pronunciation tables, simple example\n",
+    "\n",
+    "Simple example to create gabmap files for two words with few pronunciations an two regions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('..')\n",
+    "\n",
+    "import pandas\n",
+    "import MySQLdb\n",
+    "import json\n",
+    "import copy\n",
+    "\n",
+    "db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')\n",
+    "\n",
+    "from shapely.geometry import shape, Point\n",
+    "\n",
+    "from gabmap import create_gabmap_dataframes\n",
+    "\n",
+    "from stimmen.geojson import merge_features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/Friesland_wijken.geojson') as f:\n",
+    "    regions = json.load(f)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load and simplify"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Answers to how participants state a word should be pronounced\n",
+    "\n",
+    "answers = pandas.read_sql('''\n",
+    "SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text\n",
+    "FROM       core_surveyresult as survey\n",
+    "INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n",
+    "INNER JOIN core_predictionquizresultquestionanswer as answer\n",
+    "    ON result.id = answer.prediction_quiz_id\n",
+    "''', db)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regions_simple = merge_features(copy.deepcopy(regions),\n",
+    "    condition=lambda feature: feature['properties']['GM_NAAM'] == 'Heerenveen',\n",
+    ")\n",
+    "\n",
+    "regions_simple = merge_features(\n",
+    "    regions_simple,\n",
+    "    condition=lambda feature: feature['properties']['GM_NAAM'] == 'Leeuwarden',\n",
+    ")\n",
+    "regions_simple['features'] = regions_simple['features'][-2:]\n",
+    "\n",
+    "regions_simple['features'][0]['properties']['name'] = 'Heerenveen'\n",
+    "regions_simple['features'][1]['properties']['name'] = 'Leeuwarden'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "answers_simple = answers[\n",
+    "    (answers['question_text'] == '\"blad\" (aan een boom)') |\n",
+    "    (answers['question_text'] == '\"vis\"')\n",
+    "].copy()\n",
+    "\n",
+    "answers_simple['question_text'] = answers_simple['question_text'].map(\n",
+    "    lambda x: x.replace('\"', '').replace('*', ''))\n",
+    "\n",
+    "answers_simple['answer_text'] = answers_simple['answer_text'].map(\n",
+    "    lambda x: x[x.find('('):x.find(')')][1:])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Two words, boom and vis, with each 4 and 2 pronunciations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>answer_text</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>question_text</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>blad (aan een boom)</th>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>vis</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     answer_text\n",
+       "question_text                   \n",
+       "blad (aan een boom)            4\n",
+       "vis                            2"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "answers_simple.groupby('question_text').agg({'answer_text': lambda x: len(set(x))})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "centroids_example, pronunciations_example, counts_example = create_gabmap_dataframes(\n",
+    "    regions_simple, answers_simple,\n",
+    "    latitude_column='user_lat', longitude_column='user_lng',\n",
+    "    word_column='question_text', pronunciation_column='answer_text',\n",
+    "    region_name_property='name'\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Resulting tables\n",
+    "\n",
+    "Stored as tab separated files for gabmap"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>latitude</th>\n",
+       "      <th>longitude</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>#name</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Heerenveen</th>\n",
+       "      <td>52.996076</td>\n",
+       "      <td>5.977925</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Leeuwarden</th>\n",
+       "      <td>53.169940</td>\n",
+       "      <td>5.797613</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             latitude  longitude\n",
+       "#name                           \n",
+       "Heerenveen  52.996076   5.977925\n",
+       "Leeuwarden  53.169940   5.797613"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "centroids_example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>blad (aan een boom)</th>\n",
+       "      <th>vis</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Heerenveen</th>\n",
+       "      <td>blet / blɑt / blɔd / blɛ:t</td>\n",
+       "      <td>fisk / fɪs</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Leeuwarden</th>\n",
+       "      <td>blet / blɑt / blɔd / blɛ:t</td>\n",
+       "      <td>fisk / fɪs</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                   blad (aan een boom)         vis\n",
+       "                                                  \n",
+       "Heerenveen  blet / blɑt / blɔd / blɛ:t  fisk / fɪs\n",
+       "Leeuwarden  blet / blɑt / blɔd / blɛ:t  fisk / fɪs"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pronunciations_example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>blad (aan een boom): blet</th>\n",
+       "      <th>blad (aan een boom): blɑt</th>\n",
+       "      <th>blad (aan een boom): blɔd</th>\n",
+       "      <th>blad (aan een boom): blɛ:t</th>\n",
+       "      <th>vis: fisk</th>\n",
+       "      <th>vis: fɪs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Heerenveen</th>\n",
+       "      <td>31.654676</td>\n",
+       "      <td>2.158273</td>\n",
+       "      <td>2.158273</td>\n",
+       "      <td>64.028777</td>\n",
+       "      <td>52.517986</td>\n",
+       "      <td>47.482014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Leeuwarden</th>\n",
+       "      <td>7.865169</td>\n",
+       "      <td>7.022472</td>\n",
+       "      <td>8.707865</td>\n",
+       "      <td>76.404494</td>\n",
+       "      <td>75.000000</td>\n",
+       "      <td>25.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            blad (aan een boom): blet  blad (aan een boom): blɑt  \\\n",
+       "                                                                   \n",
+       "Heerenveen                  31.654676                   2.158273   \n",
+       "Leeuwarden                   7.865169                   7.022472   \n",
+       "\n",
+       "            blad (aan een boom): blɔd  blad (aan een boom): blɛ:t  vis: fisk  \\\n",
+       "                                                                               \n",
+       "Heerenveen                   2.158273                   64.028777  52.517986   \n",
+       "Leeuwarden                   8.707865                   76.404494  75.000000   \n",
+       "\n",
+       "             vis: fɪs  \n",
+       "                       \n",
+       "Heerenveen  47.482014  \n",
+       "Leeuwarden  25.000000  "
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "counts_example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pronunciations_example.to_csv('../data/Pronunciations_example.gabmap.tsv', sep='\\t')\n",
+    "counts_example.to_csv('../data/Pronunciation_percentages_example.gabmap.tsv', sep='\\t')\n",
+    "centroids_example.to_csv('../data/Centroids_example.gabmap.tsv', sep='\\t', columns=['longitude', 'latitude'])\n",
+    "with open('../data/Gabmap_example.geojson', 'w') as f:\n",
+    "    json.dump(regions_simple, f)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/Gabmap
+++ b/notebooks/Gabmap
@@ -0,0 +1,157 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Geographical pronunciation tables\n",
+    "\n",
+    "Creates gabmap files with region centroids, percentages and pronunciations for wijken in Friesland."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('..')\n",
+    "\n",
+    "import pandas\n",
+    "import MySQLdb\n",
+    "import json\n",
+    "import copy\n",
+    "\n",
+    "db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')\n",
+    "\n",
+    "from shapely.geometry import shape, Point\n",
+    "\n",
+    "from gabmap import create_gabmap_dataframes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/Friesland_wijken.geojson') as f:\n",
+    "    regions = json.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Answers to how participants state a word should be pronounced\n",
+    "\n",
+    "answers = pandas.read_sql('''\n",
+    "SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text\n",
+    "FROM       core_surveyresult as survey\n",
+    "INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n",
+    "INNER JOIN core_predictionquizresultquestionanswer as answer\n",
+    "    ON result.id = answer.prediction_quiz_id\n",
+    "''', db)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "zero_latlng_questions = {\n",
+    "    q\n",
+    "    for q, row in answers.groupby('question_text').agg('std').iterrows()\n",
+    "    if row['user_lat'] == 0 and row['user_lng'] == 0\n",
+    "}\n",
+    "answers_filtered = answers[answers['question_text'].map(lambda x: x not in zero_latlng_questions)].copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['gegaan', 'avond', 'heel', 'dag', 'bij (insect)', 'sprak (toe)',\n",
+       "       'oog', 'armen (lichaamsdeel)', 'kaas', 'deurtje', 'koken',\n",
+       "       'borst (lichaamsdeel)', 'vis', 'zaterdag', 'trein', 'geel', 'tand',\n",
+       "       'gezet', 'blad (aan een boom)'], dtype=object)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "answers_filtered['question_text'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "answers_filtered['question_text'] = answers_filtered['question_text'].map(\n",
+    "    lambda x: x.replace('\"', '').replace('*', ''))\n",
+    "\n",
+    "answers_filtered['answer_text'] = answers_filtered['answer_text'].map(\n",
+    "    lambda x: x[x.find('('):x.find(')')][1:])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "centroids, pronunciations, counts = create_gabmap_dataframes(\n",
+    "    regions, answers_filtered,\n",
+    "    latitude_column='user_lat', longitude_column='user_lng',\n",
+    "    word_column='question_text', pronunciation_column='answer_text',\n",
+    "    region_name_property='gemeente_en_wijk_naam'\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pronunciations.to_csv('../data/Friesland_wijken_pronunciations.gabmap.tsv', sep='\\t')\n",
+    "counts.to_csv('../data/Friesland_wijken_pronunciation_percentages.gabmap.tsv', sep='\\t')\n",
+    "centroids.to_csv('../data/Friesland_wijken_centroids.gabmap.tsv', sep='\\t', columns=['longitude', 'latitude'])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/Geographical
+++ b/notebooks/Geographical
--- a/notebooks/Group
+++ b/notebooks/Group
@@ -18,7 +18,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -49,13 +49,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
-    "with open('dialect_regions.geojson', 'r') as f:\n",
+    "with open('../data/fryslan_dialect_regions.geojson', 'r') as f:\n",
    "    geojson = json.load(f)\n",
    "\n",
    "dialect_regions = [region['properties']['dialect'] for region in geojson['features']]"
@@ -63,7 +63,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -97,7 +97,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -122,7 +122,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -143,13 +143,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "67ed3190256b447c81daf3df1f189318",
+       "model_id": "5825449a737b4fcab38a4f4ac2adfd87",
       "version_major": 2,
       "version_minor": 0
      },
@@ -167,7 +167,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -183,7 +183,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -202,13 +202,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "201b0aed64e8494db603de15b560d919",
+       "model_id": "8afad9f71e544658b554b828932d7769",
       "version_major": 2,
       "version_minor": 0
      },
@@ -226,7 +226,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
--- a/notebooks/Martijn
+++ b/notebooks/Martijn
@@ -1,430 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open('martijn_format/Dutch613-coordinates.txt') as f:\n",
-    "    coordinates = list(f)\n",
-    "    \n",
-    "with open('martijn_format/Nederlands-ipa.utxt') as f:\n",
-    "    table = list(f)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['Aalsmeer NH', '4.76163', '52.2693\\n']"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "coordinates[1].split('\\t')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['',\n",
-       " 'kippen',\n",
-       " 'mijn',\n",
-       " 'vriend',\n",
-       " 'bloemen',\n",
-       " 'spinnen',\n",
-       " 'machines',\n",
-       " 'werk',\n",
-       " 'op',\n",
-       " 'schip',\n",
-       " 'kregen',\n",
-       " 'beschimmeld',\n",
-       " 'brood',\n",
-       " 'timmerman',\n",
-       " 'splinter',\n",
-       " 'vinger',\n",
-       " 'fabriek',\n",
-       " 'vier',\n",
-       " 'bier',\n",
-       " 'twee',\n",
-       " 'drie',\n",
-       " 'hij',\n",
-       " 'knuppel',\n",
-       " 'ik',\n",
-       " 'knie',\n",
-       " 'gezien',\n",
-       " 'ragebol',\n",
-       " 'pet',\n",
-       " 'paddestoel',\n",
-       " 'kerel',\n",
-       " 'brede',\n",
-       " 'stenen',\n",
-       " 'breder',\n",
-       " 'breedste',\n",
-       " 'standbeeld',\n",
-       " 'duivel',\n",
-       " 'gebleven',\n",
-       " 'meester',\n",
-       " 'zee',\n",
-       " 'graag',\n",
-       " 'keelpijn',\n",
-       " 'steel',\n",
-       " 'bezem',\n",
-       " 'neen',\n",
-       " 'geroepen',\n",
-       " 'peer',\n",
-       " 'rijp',\n",
-       " 'geld',\n",
-       " 'ver',\n",
-       " 'brengen',\n",
-       " 'vrouw',\n",
-       " 'zwemmen',\n",
-       " 'sterk',\n",
-       " 'bed',\n",
-       " 'optillen',\n",
-       " 'metselaar',\n",
-       " 'springen',\n",
-       " 'boterham',\n",
-       " 'vader',\n",
-       " 'zes',\n",
-       " 'jaar',\n",
-       " 'school',\n",
-       " 'laten',\n",
-       " 'gaan',\n",
-       " 'water',\n",
-       " 'potten',\n",
-       " 'zijn',\n",
-       " 'veel',\n",
-       " 'maart',\n",
-       " 'nog',\n",
-       " 'koud',\n",
-       " 'kaars',\n",
-       " 'geeft',\n",
-       " 'licht',\n",
-       " 'paard',\n",
-       " 'tegen',\n",
-       " 'zwaluwen',\n",
-       " 'kaas',\n",
-       " 'motor',\n",
-       " 'dag',\n",
-       " 'avond',\n",
-       " 'jongetje',\n",
-       " 'barst',\n",
-       " 'brief',\n",
-       " 'hart',\n",
-       " 'spannen',\n",
-       " 'nieuwe',\n",
-       " 'kar',\n",
-       " 'zoon',\n",
-       " 'koning',\n",
-       " 'ook',\n",
-       " 'geweest',\n",
-       " 'rozen',\n",
-       " 'lange',\n",
-       " 'woord',\n",
-       " 'kindje',\n",
-       " 'was',\n",
-       " 'dochtertje',\n",
-       " 'bos',\n",
-       " 'ladder',\n",
-       " 'mond',\n",
-       " 'droog',\n",
-       " 'dorst',\n",
-       " 'weg',\n",
-       " 'krom',\n",
-       " 'liedje',\n",
-       " 'goed',\n",
-       " 'kelder',\n",
-       " 'voor',\n",
-       " 'moest',\n",
-       " 'ossenbloed',\n",
-       " 'drinken',\n",
-       " 'broer',\n",
-       " 'moe',\n",
-       " 'karnemelk',\n",
-       " 'dun',\n",
-       " 'zuur',\n",
-       " 'put',\n",
-       " 'uur',\n",
-       " 'Italië',\n",
-       " 'bergen',\n",
-       " 'vuur',\n",
-       " 'spuwen',\n",
-       " 'duwen',\n",
-       " 'hebben',\n",
-       " 'stuk',\n",
-       " 'brug',\n",
-       " 'veulen',\n",
-       " 'komen',\n",
-       " 'deur',\n",
-       " 'naaien',\n",
-       " 'gras',\n",
-       " 'brouwer',\n",
-       " 'bakken',\n",
-       " 'je',\n",
-       " 'eieren',\n",
-       " 'krijgen',\n",
-       " 'markt',\n",
-       " 'waren',\n",
-       " 'vijf',\n",
-       " 'eikels',\n",
-       " 'hooi',\n",
-       " 'is',\n",
-       " 'groen',\n",
-       " 'boompje',\n",
-       " 'wijn',\n",
-       " 'huis',\n",
-       " 'melk',\n",
-       " 'spuit',\n",
-       " 'koe',\n",
-       " 'koster',\n",
-       " 'kruiwagen',\n",
-       " 'buigen',\n",
-       " 'Duitsers',\n",
-       " 'blauw',\n",
-       " 'geslagen',\n",
-       " 'saus',\n",
-       " 'flauw',\n",
-       " 'sneeuw',\n",
-       " 'stad',\n",
-       " 'doen',\n",
-       " 'dopen',\n",
-       " 'doopvont',\n",
-       " 'soldaten',\n",
-       " 'dorsen',\n",
-       " 'binden',\n",
-       " 'gebonden\\n']"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "table[0].split('\\t')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['West-Terschelling',\n",
-       " 'kipən',\n",
-       " 'miŋ',\n",
-       " 'kɑ̟mərɑ̟ːt',\n",
-       " 'blʊmə',\n",
-       " 'spɪnə',\n",
-       " 'məsinəs / məʃinəs',\n",
-       " 'ʋɔrə̆k',\n",
-       " 'ʊp̬',\n",
-       " 'sxɪp',\n",
-       " 'kreːɣə̃ / krɪɣə̃',\n",
-       " 'fəsxɪməlt / bəsxɪməlt',\n",
-       " 'bro̝ˑə̆t',\n",
-       " 'tɪmərmɑn',\n",
-       " 'splɪntər',\n",
-       " 'fɪŋər',\n",
-       " 'fəbrik',\n",
-       " 'fjɔŭwər',\n",
-       " 'biˑə̆r',\n",
-       " 'twɑ̟',\n",
-       " 'treːĭjə',\n",
-       " 'hɛĭ',\n",
-       " 'knʏpəl / ɛinhɔŭt',\n",
-       " 'ɪk',\n",
-       " 'knɪbəl',\n",
-       " 'siˑə̆n',\n",
-       " 'rɑ̟ːɣəbɔl',\n",
-       " 'pɛt',\n",
-       " 'pɑ̟dəstuˑə̆l',\n",
-       " 'mɑ̟n',\n",
-       " 'breːdə / breːjə',\n",
-       " 'stɪn̩ː',\n",
-       " 'breːdər',\n",
-       " 'breːstə',\n",
-       " 'stɔndbeːlt',\n",
-       " 'dyvəl',\n",
-       " 'blɔŭn',\n",
-       " 'meːstər',\n",
-       " 'seˑ',\n",
-       " 'xrɑːx',\n",
-       " 'ətĩˑsĩkeːl',\n",
-       " 'stɛːl',\n",
-       " 'biːzəm',\n",
-       " 'neː',\n",
-       " 'rɔft',\n",
-       " 'pɔˑə̆r',\n",
-       " 'rip',\n",
-       " 'jɪlt',\n",
-       " 'fiˑə̆r',\n",
-       " 'brɪŋə',\n",
-       " 'ʋiːf',\n",
-       " 'swʊmə',\n",
-       " 'stɛrk',\n",
-       " 'bɛˑə̆t',\n",
-       " 'ʊptɪlən',\n",
-       " '',\n",
-       " 'sprɪŋə',\n",
-       " '',\n",
-       " 'tɔ̞ˑə̆',\n",
-       " 'sɛks',\n",
-       " 'jiə̆r',\n",
-       " 'sxuˑəl',\n",
-       " 'lɪtn̩̆',\n",
-       " 'xeˑə̆̃',\n",
-       " 'ʋɛtər',\n",
-       " 'pɔtn̩̆',\n",
-       " 'bɪn',\n",
-       " 'fʊlə',\n",
-       " 'mɑˑə̆t',\n",
-       " 'nɔx',\n",
-       " 'kɔˑə̆t',\n",
-       " 'kɛs',\n",
-       " 'jʊxt',\n",
-       " 'jɛxt',\n",
-       " 'hoĭsʲ',\n",
-       " 'tsjɪ',\n",
-       " 'swɑːlywə',\n",
-       " 'tsiːs / tsjiːs',\n",
-       " 'moˑtər',\n",
-       " 'dɛĭ',\n",
-       " 'ioŋ',\n",
-       " 'jʊŋkjə',\n",
-       " 'bœ̝st',\n",
-       " 'briːf',\n",
-       " 'hɔĭtʲ',\n",
-       " 'spɔnə',\n",
-       " 'niˑjə',\n",
-       " 'kɑ̟rə',\n",
-       " 'sɪn',\n",
-       " 'koˑə̆nɪŋ',\n",
-       " 'eˑə̆k',\n",
-       " 'ʋɛn',\n",
-       " 'roˑə̆zən',\n",
-       " 'lɑ̟ŋə',\n",
-       " 'ʋɔĭtʲ',\n",
-       " 'bɔ̞nʲ',\n",
-       " 'ʋɑ̟z',\n",
-       " 'fɑ̟mkə / dɔxtərtsə',\n",
-       " 'bʊs',\n",
-       " 'leˑə̆rt',\n",
-       " 'mylə',\n",
-       " 'drux',\n",
-       " 'toˑə̆st',\n",
-       " 'ʋɛĭ',\n",
-       " '',\n",
-       " 'fɛsjə',\n",
-       " 'xuˑət',\n",
-       " 'kɛldər',\n",
-       " 'fŭɑ̟r',\n",
-       " 'mɔs',\n",
-       " 'ɔsəbluˑət̬',\n",
-       " 'drɪŋkə',\n",
-       " 'bruər',\n",
-       " 'muˑə̆t',\n",
-       " 'suˑp / kɑrnəmoˑə̆lək',\n",
-       " 'tɪn',\n",
-       " 'suːr',\n",
-       " 'pʏt',\n",
-       " 'uːr',\n",
-       " 'itɑ̟ːljə',\n",
-       " 'bɑ̟rɣən',\n",
-       " 'fjuːr',\n",
-       " 'spiˑə̆n',\n",
-       " 'drʏkən',\n",
-       " 'hɑ̟bə',\n",
-       " 'stɪk',\n",
-       " 'brʏx',\n",
-       " 'fɔlʲtsə',\n",
-       " 'kʊmə',\n",
-       " 'doˑə̆r',\n",
-       " 'nɑːĭjə',\n",
-       " 'xɛs',\n",
-       " 'brɔŭwər',\n",
-       " 'bɑ̟kə',\n",
-       " 'do̞',\n",
-       " 'ɑːə̆jən',\n",
-       " 'kriˑjə',\n",
-       " 'mɑ̟rək / mɑrəkt',\n",
-       " 'wɑːrə̃',\n",
-       " 'fiːf',\n",
-       " 'ɛikəls',\n",
-       " 'heˑə̆',\n",
-       " 'ɪz',\n",
-       " 'xriˑə̆n',\n",
-       " 'boːmkə',\n",
-       " 'ʋin',\n",
-       " 'hyːs',\n",
-       " 'mʊə̆lək',\n",
-       " 'spœĭt',\n",
-       " 'ku',\n",
-       " 'kɔstər',\n",
-       " 'krødʋɛin',\n",
-       " 'buːɣə',\n",
-       " 'dytsərs',\n",
-       " 'blɑːŭ',\n",
-       " 'slɛin',\n",
-       " 'sjy',\n",
-       " '',\n",
-       " 'sneː',\n",
-       " 'stɑ̟t',\n",
-       " 'dweˑə̆n',\n",
-       " 'doːpə',\n",
-       " 'doːpfʊnt',\n",
-       " 'sɔldɑːtən',\n",
-       " 'tɛskjə',\n",
-       " 'binə',\n",
-       " 'boŋ\\n']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "table[1].split('\\t')"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/pronunciation.ipynb
+++ b/pronunciation.ipynb
--- a/municipality.ipynb
+++ b/municipality.ipynb
--- a/notebooks/Prediction
+++ b/notebooks/Prediction
@@ -1,433 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Pronunciation-based location prediction confusion\n",
-    "\n",
-    "Setup a pandas dataframe with in each row\n",
-    "\n",
-    " * participant provided (actual) location,\n",
-    " * 3 estimations made by Nanna's heuristic based in what the participant stated to be the correct pronunciation of a word\n",
-    " * distance between the actual and heuristic predicted location\n",
-    " \n",
-    "Averages of the distances are exported for visualisation in QGIS."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pickle\n",
-    "import pandas\n",
-    "import MySQLdb\n",
-    "import numpy\n",
-    "import itertools\n",
-    "import requests\n",
-    "import json\n",
-    "from vincenty import vincenty\n",
-    "\n",
-    "db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen')\n",
-    "\n",
-    "%matplotlib inline\n",
-    "from matplotlib import pyplot, rcParams\n",
-    "from jupyter_progressbar import ProgressBar\n",
-    "\n",
-    "# rcParams['font.family'] = 'Lucinda Console'\n",
-    "rcParams['font.size'] = '24'\n",
-    "rcParams['figure.figsize'] = (20, 10)\n",
-    "rcParams['figure.dpi'] = 100"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def simplify_area_name(x):\n",
-    "    return ' '.join(\n",
-    "        x.split('/') # split Dutch and Frysian name\n",
-    "        [0] # extract Dutch name\n",
-    "        .strip()\n",
-    "        .split(' ') # Split area name from province, mostly 'Fr'\n",
-    "        [:-1] # remove province\n",
-    "    ).strip().lower() # rejoin spaces in area name"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "metadata = pandas.read_sql('''SELECT answer.* FROM core_surveyresultquestionanswer as answer''', db)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "grouped = metadata.groupby(['survey_result_id', 'question_id']).agg({\n",
-    "    'question_text': 'first',\n",
-    "    'answer_text': lambda x: x if len(x) == 1 else ', '.join(x)\n",
-    "})\n",
-    "grouped.reset_index(inplace=True)\n",
-    "\n",
-    "grouped = grouped.pivot(index='survey_result_id', columns='question_text', values='answer_text')\n",
-    "\n",
-    "grouped = grouped.rename({\n",
-    "    'Do you go to school?': 'school',\n",
-    "    'Do you go to university?': 'university',\n",
-    "    'What is your age bracket?': 'age_bracket',\n",
-    "    'What is your age?': 'age',\n",
-    "    'What is your gender?': 'gender',\n",
-    "    'Which language are you the most proficient in?': 'language',\n",
-    "    'Which languages do you actively use in your life?': 'active-languages'\n",
-    "}, axis='columns')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "predictions = pandas.read_sql('''\n",
-    "SELECT \n",
-    "    sr.id as id,\n",
-    "    sr.area_name as actual_area,\n",
-    "    area1_name as area_prediction_1,\n",
-    "    area2_name as area_prediction_2,\n",
-    "    area3_name as area_prediction_3\n",
-    "FROM core_surveyresult as sr\n",
-    "INNER JOIN core_predictionquizresult as pq\n",
-    "    ON sr.id = pq.survey_result_id\n",
-    "''', db)\n",
-    "\n",
-    "predicted_areas = set(map(simplify_area_name,\n",
-    "    set(predictions['area_prediction_1']) |\n",
-    "    set(predictions['area_prediction_2']) |\n",
-    "    set(predictions['area_prediction_3'])\n",
-    "))\n",
-    "actual_areas = set(map(str.lower, predictions['actual_area']))\n",
-    "\n",
-    "areas = list(predicted_areas | actual_areas)\n",
-    "location_to_number = {l: i  for i, l in enumerate(areas)}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "simplified_predictions = pandas.DataFrame({\n",
-    "    'id': list(predictions['id']),\n",
-    "    'actual': list(map(str.lower, predictions['actual_area'])),\n",
-    "    'prediction_1': list(map(simplify_area_name, predictions['area_prediction_1'])),\n",
-    "    'prediction_2': list(map(simplify_area_name, predictions['area_prediction_2'])),\n",
-    "    'prediction_3': list(map(simplify_area_name, predictions['area_prediction_3'])),\n",
-    "})\n",
-    "# simplified_predictions.set_index('id')\n",
-    "simplified_predictions.to_excel('actual-predictions.xls')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "locations = {location for c in simplified_predictions.columns for location in simplified_predictions[c] if c != 'id'}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "names = pandas.read_csv('plaatsen_nl.csv')\n",
-    "\n",
-    "nonominatim = {\n",
-    "    name: [row['st_y'], row['x']]\n",
-    "    for _, row in names.iterrows()\n",
-    "    for column in ['bebouwdeko', 'naamoffici', 'naamnl', 'naamfries']\n",
-    "#     for _ in [ print(row[column]) ]\n",
-    "    if type(row[column]) == str\n",
-    "    for name in [row[column], row[column].lower().replace('-', ' ')]\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "21c016f24e23473e807ed3e9c2d942c6",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "nominatim = {\n",
-    "    l: json.loads(\n",
-    "        requests.get(\n",
-    "            'https://nominatim.openstreetmap.org/search.php?q=Netherlands%20'\n",
-    "            '{}&polygon_geojson=1&viewbox=&format=json'.format(l)\n",
-    "        ).text\n",
-    "    )\n",
-    "    for l in ProgressBar(locations)\n",
-    "    if l not in nonominatim\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "latlons = {\n",
-    "    l: (float(v[0]['lat']), float(v[0]['lon']))\n",
-    "    for l, v in nominatim.items()\n",
-    "    if len(v) > 0\n",
-    "}\n",
-    "latlons.update(nonominatim)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for c in {'actual', 'prediction_1', 'prediction_2', 'prediction_3'}:\n",
-    "    simplified_predictions['{}_latlon'.format(c)] = [\n",
-    "        latlons.get(l, numpy.nan)\n",
-    "        for l in simplified_predictions['{}'.format(c)]\n",
-    "    ]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for c in {'prediction_1_latlon', 'prediction_2_latlon', 'prediction_3_latlon'}:\n",
-    "    simplified_predictions['{}_distance'.format(c)] = [\n",
-    "        vincenty(x, y) if x == x and y == y else numpy.nan\n",
-    "        for x, y in zip(simplified_predictions['actual_latlon'], simplified_predictions[c])\n",
-    "    ]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "simplified_predictions = simplified_predictions[[\n",
-    "    'id', 'actual', 'actual_latlon', 'prediction_3_latlon_distance',\n",
-    "    'prediction_1_latlon_distance', 'prediction_2_latlon_distance'\n",
-    "]]\n",
-    "\n",
-    "simplified_predictions = simplified_predictions.rename({\n",
-    "    'prediction_3_latlon_distance': 'distance3',\n",
-    "    'prediction_1_latlon_distance': 'distance1',\n",
-    "    'prediction_2_latlon_distance': 'distance2'\n",
-    "}, axis='columns')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "simplified_predictions = simplified_predictions.join(grouped, on='id')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "simplified_predictions['latitude'] = simplified_predictions['actual_latlon'].map(lambda x: x[0] if x == x else None)\n",
-    "simplified_predictions['longitude'] = simplified_predictions['actual_latlon'].map(lambda x: x[1] if x == x else None)\n",
-    "simplified_predictions = simplified_predictions.drop('actual_latlon', axis='columns')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "simplified_predictions['age_groups'] = [\n",
-    "    {'0-10': '0-20', '11-20': '0-20',\n",
-    "     '21-30': '21-50', '31-40': '21-50', '41-50': '21-50',\n",
-    "     '51-60': '51-100', '61-70': '51-100', '71-80': '51-100', '81-90': '51-100', '91-100': '51-100'}.get(b, None)\n",
-    "    for b in simplified_predictions['age_bracket']\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "# age_groups = simplified_predictions.groupby(['age_groups', 'actual']).agg({\n",
-    "#     'distance1': ['mean', 'min', 'max', 'count', 'size'],\n",
-    "#     'latitude': 'first',\n",
-    "#     'longitude': 'first'\n",
-    "# })\n",
-    "# age_groups.index.get_level_values('age_groups')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "# gender_groups = simplified_predictions.groupby(['gender', 'actual']).agg({\n",
-    "#     'distance1': ['min', 'mean', 'max', 'count', 'size'],\n",
-    "#     'latitude': 'first',\n",
-    "#     'longitude': 'first'\n",
-    "# })\n",
-    "# gender_groups"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "summary = simplified_predictions[['latitude', 'longitude', 'distance1', 'distance2', 'distance3', 'actual']]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "summary.to_csv('points.csv')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "geojson = {\n",
-    "  \"type\": \"FeatureCollection\",\n",
-    "  \"features\": [\n",
-    "    {\n",
-    "      \"type\": \"Feature\",\n",
-    "      \"properties\": {\n",
-    "        \"distance 1\": row['prediction_1_latlon_distance'].mean() if row['prediction_1_latlon_distance'].isnull().sum() == 0 else -0.0001,\n",
-    "        \"distance 2\": row['prediction_2_latlon_distance'].mean() if row['prediction_2_latlon_distance'].isnull().sum() == 0 else -0.0001,\n",
-    "        \"distance 3\": row['prediction_3_latlon_distance'].mean() if row['prediction_3_latlon_distance'].isnull().sum() == 0 else -0.0001,\n",
-    "        \"actual\": actual\n",
-    "      },\n",
-    "      \"geometry\": {\n",
-    "        \"type\": \"Point\",\n",
-    "        \"coordinates\": list( actual_lat_lon )[::-1]\n",
-    "      }\n",
-    "    }\n",
-    "    for actual, row in simplified_predictions.groupby('actual')\n",
-    "    if actual != ''\n",
-    "#     for _ in [ print(row['actual_latlon']), print() ]\n",
-    "    for actual_lat_lon in [list(row['actual_latlon'])[0]] # alias \n",
-    "    if actual_lat_lon == actual_lat_lon\n",
-    "  ]\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "geojson = {\n",
-    "  \"type\": \"FeatureCollection\",\n",
-    "  \"features\": [\n",
-    "    {\n",
-    "      \"type\": \"Feature\",\n",
-    "      \"properties\": {\n",
-    "        \"distance 1\": row['prediction_1_latlon_distance'].mean() if row['prediction_1_latlon_distance'].isnull().sum() == 0 else -0.0001,\n",
-    "        \"distance 2\": row['prediction_2_latlon_distance'].mean() if row['prediction_2_latlon_distance'].isnull().sum() == 0 else -0.0001,\n",
-    "        \"distance 3\": row['prediction_3_latlon_distance'].mean() if row['prediction_3_latlon_distance'].isnull().sum() == 0 else -0.0001,\n",
-    "        \"actual\": actual\n",
-    "      },\n",
-    "      \"geometry\": {\n",
-    "        \"type\": \"Point\",\n",
-    "        \"coordinates\": list( actual_lat_lon )[::-1]\n",
-    "      }\n",
-    "    }\n",
-    "    for actual, row in simplified_predictions.groupby('actual')\n",
-    "    if actual != ''\n",
-    "#     for _ in [ print(row['actual_latlon']), print() ]\n",
-    "    for actual_lat_lon in [list(row['actual_latlon'])[0]] # alias \n",
-    "    if actual_lat_lon == actual_lat_lon\n",
-    "  ]\n",
-    "}"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/notebooks/Pronunciation
+++ b/notebooks/Pronunciation
--- a/notebooks/Pronunciation
+++ b/notebooks/Pronunciation
@@ -1,293 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Geographical pronunciation statistics"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas\n",
-    "import MySQLdb\n",
-    "import numpy\n",
-    "import json\n",
-    "\n",
-    "db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')\n",
-    "\n",
-    "%matplotlib notebook\n",
-    "from matplotlib import pyplot\n",
-    "import folium\n",
-    "from IPython.display import display\n",
-    "from shapely.geometry import Polygon, MultiPolygon, shape, Point\n",
-    "from jsbutton import JsButton\n",
-    "from jupyter_progressbar import ProgressBar\n",
-    "from collections import defaultdict\n",
-    "from ipy_table import make_table\n",
-    "from html import escape\n",
-    "\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "from matplotlib.colors import LogNorm\n",
-    "from sklearn import mixture\n",
-    "from skimage.measure import find_contours\n",
-    "from collections import Counter\n",
-    "from random import shuffle"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Borders of Frysian municipalities\n",
-    "\n",
-    "with open('Friesland_AL8.GeoJson') as f:\n",
-    "    gemeentes = json.load(f)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "coords = [feature['geometry'] for feature in gemeentes['features']]\n",
-    "coords_folium = [[[[c__[::-1] for c__ in c_] for c_ in c] for c in coords_['coordinates']] for coords_ in coords]\n",
-    "shapes = [shape(coords_) for coords_ in coords]\n",
-    "gemeente_names = [feature['properties']['name'] for feature in gemeentes['features']]\n",
-    "\n",
-    "def get_gemeente(point):\n",
-    "    for i, shape in enumerate(shapes):\n",
-    "        if shape.contains(point):\n",
-    "            return i\n",
-    "    return -1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Answers to how participants state a word should be pronounces.\n",
-    "\n",
-    "answers = pandas.read_sql('''\n",
-    "SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text\n",
-    "FROM       core_surveyresult as survey\n",
-    "INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n",
-    "INNER JOIN core_predictionquizresultquestionanswer as answer\n",
-    "    ON result.id = answer.prediction_quiz_id\n",
-    "''', db)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "zero_latlng_questions = {\n",
-    "    q\n",
-    "    for q, row in answers.groupby('question_text').agg('std').iterrows()\n",
-    "    if row['user_lat'] == 0 and row['user_lng'] == 0\n",
-    "}\n",
-    "answers_filtered = answers[answers['question_text'].map(lambda x: x not in zero_latlng_questions)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:10: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
-      "  # Remove the CWD from sys.path while we load stuff.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Takes approximately 2 minutes\n",
-    "\n",
-    "gemeente_map = {\n",
-    "    (lng, lat): get_gemeente(Point(lng, lat))\n",
-    "    for lng, lat in set(zip(answers_filtered['user_lng'], answers_filtered['user_lat']))\n",
-    "}\n",
-    "\n",
-    "answers_filtered['gemeente'] = [\n",
-    "    gemeente_map[(lng, lat)]\n",
-    "    for lat, lng in zip(answers_filtered['user_lat'], answers_filtered['user_lng'])\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
-      "  \n"
-     ]
-    }
-   ],
-   "source": [
-    "answers_filtered['question_text_url'] = answers_filtered['question_text'].map(\n",
-    "    lambda x: x.replace('\"', '').replace('*', ''))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e48eb24f5c43434bad4241d4bea53074",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "cmap = pyplot.get_cmap('YlOrRd')\n",
-    "\n",
-    "for question, rows in ProgressBar(\n",
-    "    answers_filtered.groupby('question_text_url'),\n",
-    "    size=len(answers_filtered['question_text_url'].unique())\n",
-    "):\n",
-    "    m = folium.Map((rows['user_lat'].median(), rows['user_lng'].median()), tiles=None, zoom_start=9)\n",
-    "    pecentage_labels = folium.FeatureGroup(name='pecentages', overlay=True)\n",
-    "    order = [a for _, a in sorted((\n",
-    "        (r['user_lat'], answer)\n",
-    "        for answer, r in rows.groupby('answer_text').count().iterrows()\n",
-    "    ), reverse=True)]\n",
-    "    gemeente_normalizer = {\n",
-    "        gemeente: r['user_lat']\n",
-    "        for gemeente, r in rows.groupby('gemeente').count().iterrows()\n",
-    "    }\n",
-    "    for answer_text in order:\n",
-    "        rows_ = rows[rows['answer_text'] == answer_text]\n",
-    "        if (rows_['gemeente'] >= 0).sum() <= 0:\n",
-    "            continue\n",
-    "\n",
-    "        spread = {\n",
-    "            gemeente: r['user_lat']\n",
-    "            for gemeente, r in rows_.groupby('gemeente').count().iterrows()\n",
-    "            if gemeente >= 0\n",
-    "        }\n",
-    "        n_answers = sum(spread.values())\n",
-    "        \n",
-    "        name = '{} ({})'.format(answer_text, n_answers)\n",
-    "        group = folium.FeatureGroup(name=name, overlay=False)\n",
-    "        folium.TileLayer(tiles='stamentoner').add_to(group)\n",
-    "        \n",
-    "        max_value = max(value / gemeente_normalizer[gemeente] for gemeente, value in spread.items())\n",
-    "        for gemeente, gemeente_name in enumerate(gemeente_names):\n",
-    "            if gemeente in spread:\n",
-    "                value = spread[gemeente]\n",
-    "                percentage = value / gemeente_normalizer[gemeente]\n",
-    "                color_value = percentage / max_value\n",
-    "                color = '#%02x%02x%02x' % tuple(int(255 * c) for c in cmap(color_value)[:3])\n",
-    "                \n",
-    "                polygon = folium.Polygon(coords_folium[gemeente], fill_color=color, fill_opacity=0.8,\n",
-    "                              color='#555555', popup='{} ({}, {}%)'.format(gemeente_name, value, round(100*percentage)))\n",
-    "                centroid = shapes[gemeente].centroid\n",
-    "                centroid = (centroid.y, centroid.x)\n",
-    "#                 folium.Circle(centroid, color='green', radius=200).add_to(group)\n",
-    "                folium.map.Marker(\n",
-    "                    [shapes[gemeente].centroid.y, shapes[gemeente].centroid.x],\n",
-    "                    icon=folium.DivIcon(\n",
-    "                        icon_size=(50, 24),\n",
-    "                        icon_anchor=(25, 12),\n",
-    "                        html='<div class=\"percentage-label\" style=\"font-size: 12pt; background-color: rgba(255,255,255,0.8); border-radius: 12px; text-align: center;\">{:d}%</div>'.format(int(100 * percentage)),\n",
-    "                    )\n",
-    "                ).add_to(group)\n",
-    "            else:\n",
-    "                polygon = folium.Polygon(coords_folium[gemeente], fill_color=None, fill_opacity=0, color='#555555')\n",
-    "            polygon.add_to(group)\n",
-    "        group.add_to(m)\n",
-    "    pecentage_labels.add_to(m)\n",
-    "    folium.map.LayerControl('topright', collapsed=False).add_to(m)\n",
-    "    JsButton(\n",
-    "        title='<i class=\"fas fa-tags\"></i>',\n",
-    "        function=\"\"\"\n",
-    "            function(btn, map){\n",
-    "                $('.percentage-label').toggle();\n",
-    "            }\n",
-    "        \"\"\"\n",
-    "    ).add_to(m)\n",
-    "#     display(m)\n",
-    "    m.save('maps/heatmaps/{}.html'.format(question))\n",
-    "#     break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import glob\n",
-    "with open('maps/heatmaps/index.html', 'w') as f:\n",
-    "    f.write('<html><head></head><body>' + \n",
-    "        '<br/>\\n'.join(\n",
-    "            '\\t<a href=\"http://herbertkruitbosch.com/pronunciation_maps/{}\">{}<a>'.format(fn[5:], fn[14:-5].replace('_', ' '))\n",
-    "            for fn in sorted(\n",
-    "                glob.glob('maps/heatmaps/*.html')\n",
-    "            )\n",
-    "    ) + \"</body></html>\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/notebooks/Pronunciation
+++ b/notebooks/Pronunciation
@@ -1,312 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Geographical pronunciation statistics"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 128,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas\n",
-    "import MySQLdb\n",
-    "import numpy\n",
-    "import json\n",
-    "\n",
-    "db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')\n",
-    "\n",
-    "%matplotlib notebook\n",
-    "from matplotlib import pyplot\n",
-    "import folium\n",
-    "from IPython.display import display\n",
-    "from shapely.geometry import Polygon, MultiPolygon, shape, Point\n",
-    "from jsbutton import JsButton\n",
-    "from shapely.geometry import LineString, MultiLineString\n",
-    "from jupyter_progressbar import ProgressBar\n",
-    "from collections import defaultdict, Counter\n",
-    "from ipy_table import make_table\n",
-    "from html import escape\n",
-    "\n",
-    "import numpy as np\n",
-    "from random import shuffle\n",
-    "import pickle\n",
-    "from jupyter_progressbar import ProgressBar"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 129,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open('friesland_wijken.p3', 'rb') as f:\n",
-    "    wijken, wijk_shapes = pickle.load(f)\n",
-    "\n",
-    "wijk_names = [wijk['properties']['GM_NAAM'] + ', ' + wijk['properties'].get('WK_NAAM', '') for wijk in wijken['features']]\n",
-    "\n",
-    "def get_wijk(point):\n",
-    "    for i, shape in enumerate(wijk_shapes):\n",
-    "        if shape.contains(point):\n",
-    "            return i\n",
-    "    return -1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 130,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def listify(rd_multipolygon):\n",
-    "    if len(rd_multipolygon) == 2 and tuple(map(type, rd_multipolygon)) == (float, float):\n",
-    "        return list(rd_multipolygon)\n",
-    "    return [\n",
-    "        listify(element)\n",
-    "        for element in rd_multipolygon\n",
-    "    ]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 131,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Answers to how participants state a word should be pronounces.\n",
-    "\n",
-    "answers = pandas.read_sql('''\n",
-    "SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text\n",
-    "FROM       core_surveyresult as survey\n",
-    "INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n",
-    "INNER JOIN core_predictionquizresultquestionanswer as answer\n",
-    "    ON result.id = answer.prediction_quiz_id\n",
-    "''', db)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 132,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "zero_latlng_questions = {\n",
-    "    q\n",
-    "    for q, row in answers.groupby('question_text').agg('std').iterrows()\n",
-    "    if row['user_lat'] == 0 and row['user_lng'] == 0\n",
-    "}\n",
-    "answers_filtered = answers[answers['question_text'].map(lambda x: x not in zero_latlng_questions)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 133,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def reverse(rd_multipolygon):\n",
-    "    if len(rd_multipolygon) == 2 and tuple(map(type, rd_multipolygon)) == (float, float):\n",
-    "        return rd_multipolygon[::-1]\n",
-    "    return [\n",
-    "        reverse(element)\n",
-    "        for element in rd_multipolygon\n",
-    "    ]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 134,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:10: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
-      "  # Remove the CWD from sys.path while we load stuff.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Takes approximately 2 minutes\n",
-    "points = set(zip(answers_filtered['user_lng'], answers_filtered['user_lat']))\n",
-    "\n",
-    "wijk_map = dict()\n",
-    "for lng, lat in points:\n",
-    "    wijk_map[(lng, lat)] = get_wijk(Point(lng, lat))\n",
-    "\n",
-    "answers_filtered['wijk'] = [\n",
-    "    wijk_map[(lng, lat)]\n",
-    "    for lat, lng in zip(answers_filtered['user_lat'], answers_filtered['user_lng'])\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 135,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
-      "  \n"
-     ]
-    }
-   ],
-   "source": [
-    "answers_filtered['question_text_url'] = answers_filtered['question_text'].map(\n",
-    "    lambda x: x.replace('\"', '').replace('*', ''))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 137,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ea89078b81da4daba82bcd4b1ddbe8c2",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "cmap = pyplot.get_cmap('YlOrRd')\n",
-    "\n",
-    "for question, rows in ProgressBar(\n",
-    "    answers_filtered.groupby('question_text_url'),\n",
-    "    size=len(answers_filtered['question_text_url'].unique())\n",
-    "):\n",
-    "    m = folium.Map((rows['user_lat'].median(), rows['user_lng'].median()), tiles=None, zoom_start=9)\n",
-    "    order = [a for _, a in sorted((\n",
-    "        (r['user_lat'], answer)\n",
-    "        for answer, r in rows.groupby('answer_text').count().iterrows()\n",
-    "    ), reverse=True)]\n",
-    "    wijk_normalizer = {\n",
-    "        wijk: r['user_lat']\n",
-    "        for wijk, r in rows.groupby('wijk').count().iterrows()\n",
-    "    }\n",
-    "    for answer_text in (order):\n",
-    "        rows_ = rows[rows['answer_text'] == answer_text]\n",
-    "        if (rows_['wijk'] >= 0).sum() <= 0:\n",
-    "            continue\n",
-    "\n",
-    "        spread = {\n",
-    "            wijk: r['user_lat']\n",
-    "            for wijk, r in rows_.groupby('wijk').count().iterrows()\n",
-    "            if wijk >= 0\n",
-    "        }\n",
-    "        n_answers = sum(spread.values())\n",
-    "        \n",
-    "        name = '{} ({})'.format(answer_text, n_answers)\n",
-    "        group = folium.FeatureGroup(name=name, overlay=False)\n",
-    "        folium.TileLayer(tiles='stamentoner').add_to(group)\n",
-    "        \n",
-    "        max_value = max(value / wijk_normalizer[wijk] for wijk, value in spread.items())\n",
-    "        \n",
-    "        for wijk, wijk_name in enumerate(wijk_names):\n",
-    "            coordinates = reverse(wijken['features'][wijk]['geometry']['coordinates'])\n",
-    "            if wijk in spread:\n",
-    "                value = spread[wijk]\n",
-    "                percentage = value / wijk_normalizer[wijk]\n",
-    "                color_value = percentage / max_value\n",
-    "                color = '#%02x%02x%02x' % tuple(int(255 * c) for c in cmap(color_value)[:3])\n",
-    "                \n",
-    "                polygon = folium.Polygon(\n",
-    "                    coordinates, fill_color=color, fill_opacity=0.8,\n",
-    "                    color='#555555', popup='{} ({}, {: 3d}%)'.format(wijk_name, value, int(100*percentage))\n",
-    "                    \n",
-    "                )\n",
-    "                centroid = wijk_shapes[wijk].centroid\n",
-    "                centroid = (centroid.y, centroid.x)\n",
-    "                folium.map.Marker(\n",
-    "                    [wijk_shapes[wijk].centroid.y, wijk_shapes[wijk].centroid.x],\n",
-    "                    icon=folium.DivIcon(\n",
-    "                        icon_size=(30, 16),\n",
-    "                        icon_anchor=(15, 8),\n",
-    "                        html='<div class=\"percentage-label\" style=\"font-size: 8pt; background-color: rgba(255,255,255,0.8); border-radius: 4px; text-align: center;\">{:d}%</div>'.format(int(100 * percentage)),\n",
-    "                    )\n",
-    "                ).add_to(group)\n",
-    "            else:\n",
-    "                polygon = folium.Polygon(coordinates, fill_color=None, fill_opacity=0, color='#555555')\n",
-    "            polygon.add_to(group)\n",
-    "        group.add_to(m)\n",
-    "    JsButton(\n",
-    "        title='<i class=\"fas fa-tags\"></i>',\n",
-    "        function=\"\"\"\n",
-    "            function(btn, map){\n",
-    "                $('.percentage-label').toggle();\n",
-    "            }\n",
-    "        \"\"\"\n",
-    "    ).add_to(m)\n",
-    "    folium.map.LayerControl('topright', collapsed=False).add_to(m)\n",
-    "#     display(m)\n",
-    "    m.save('maps/heatmaps-wijk/{}.html'.format(question))\n",
-    "#     break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 138,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import glob\n",
-    "with open('maps/heatmaps-wijk/index.html', 'w') as f:\n",
-    "    f.write('<html><head></head><body>' + \n",
-    "        '<br/>\\n'.join(\n",
-    "            '\\t<a href=\"{}\">{}<a>'.format(fn, fn[:-5].replace('_', ' '))\n",
-    "            for fn in sorted(\n",
-    "                glob.glob('maps/heatmaps-wijk/*.html')\n",
-    "            )\n",
-    "            for fn in [fn[len('maps/heatmaps-wijk/'):]]\n",
-    "    ) + \"</body></html>\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/notebooks/Pronunciation
+++ b/notebooks/Pronunciation
@@ -1,327 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Geographical pronunciation statistics"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas\n",
-    "import MySQLdb\n",
-    "import numpy\n",
-    "import json\n",
-    "\n",
-    "db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')\n",
-    "\n",
-    "%matplotlib notebook\n",
-    "from matplotlib import pyplot\n",
-    "import folium\n",
-    "from IPython.display import display\n",
-    "from shapely.geometry import Polygon, MultiPolygon, shape, Point\n",
-    "from jupyter_progressbar import ProgressBar\n",
-    "from collections import defaultdict\n",
-    "from ipy_table import make_table\n",
-    "from html import escape\n",
-    "\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "from matplotlib.colors import LogNorm\n",
-    "from sklearn import mixture\n",
-    "from skimage.measure import find_contours\n",
-    "from collections import Counter\n",
-    "from random import shuffle"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Borders of Frysian municipalities\n",
-    "\n",
-    "with open('Friesland_AL8.GeoJson') as f:\n",
-    "    gemeentes = json.load(f)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "shapes = [shape(feature['geometry']) for feature in gemeentes['features']]\n",
-    "gemeente_names = [feature['properties']['name'] for feature in gemeentes['features']]\n",
-    "\n",
-    "def get_gemeente(point):\n",
-    "    for i, shape in enumerate(shapes):\n",
-    "        if shape.contains(point):\n",
-    "            return i\n",
-    "    return -1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Answers to how participants state a word should be pronounces.\n",
-    "\n",
-    "answers = pandas.read_sql('''\n",
-    "SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text\n",
-    "FROM       core_surveyresult as survey\n",
-    "INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n",
-    "INNER JOIN core_predictionquizresultquestionanswer as answer\n",
-    "    ON result.id = answer.prediction_quiz_id\n",
-    "''', db)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Takes approximately 2 minutes\n",
-    "\n",
-    "gemeente_map = {\n",
-    "    (lng, lat): get_gemeente(Point(lng, lat))\n",
-    "    for lng, lat in set(zip(answers['user_lng'], answers['user_lat']))\n",
-    "}\n",
-    "\n",
-    "answers['gemeente'] = [\n",
-    "    gemeente_map[(lng, lat)]\n",
-    "    for lat, lng in zip(answers['user_lat'], answers['user_lng'])\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Mapping pronunciations\n",
-    "\n",
-    "The idea is to plot each pronunciation as a point of a different color, now only seems to show participation density.\n",
-    "\n",
-    "Slow, so started with the first question."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "# cmap = pyplot.get_cmap('gist_rainbow')\n",
-    "\n",
-    "# std = (1.89, 1.35)\n",
-    "\n",
-    "# for _, (question, rows) in zip(range(3), answers.groupby('question_text')):\n",
-    "#     plt.figure()\n",
-    "#     n_answers = len(rows.groupby('answer_text').count())\n",
-    "#     colors = cmap(range(256))[::256 // n_answers]\n",
-    "#     for (answer, rows_), color in zip(rows.groupby('answer_text'), colors):\n",
-    "#         if len(rows_) < 100:\n",
-    "#             continue\n",
-    "#         color = '#%02x%02x%02x' % tuple(int(c*255) for c in color[:3])\n",
-    "#         X = rows_[['user_lat', 'user_lng']].as_matrix()\n",
-    "\n",
-    "#         clf = mixture.GaussianMixture(n_components=5, covariance_type='full')\n",
-    "#         clf.fit(X)\n",
-    "#         xlim = numpy.percentile(X[:, 0], [1, 99.5])\n",
-    "#         ylim = numpy.percentile(X[:, 1], [1, 99.5])\n",
-    "#         xlim = [2*xlim[0] - xlim[1], 2*xlim[1] - xlim[0]]\n",
-    "#         ylim = [2*ylim[0] - ylim[1], 2*ylim[1] - ylim[0]]\n",
-    "        \n",
-    "#         x = np.linspace(*xlim, 1000)\n",
-    "#         y = np.linspace(*ylim, 1000)\n",
-    "#         xx, yy = np.meshgrid(x, y)\n",
-    "#         xxyy = np.array([xx.ravel(), yy.ravel()]).T\n",
-    "#         z = np.exp(clf.score_samples(xxyy))\n",
-    "#         z = z.reshape(xx.shape)\n",
-    "        \n",
-    "#         z_sorted = sorted(z.ravel(), reverse=True)\n",
-    "#         z_sorted_cumsum = np.cumsum(z_sorted)\n",
-    "#         split = np.where(z_sorted_cumsum > (z_sorted_cumsum[-1] * 0.5))[0][0]\n",
-    "#         threshold = z_sorted[split]\n",
-    "#         threshold\n",
-    "\n",
-    "# #         p = list(range(0, 100, 5))\n",
-    "\n",
-    "#         p = [80]\n",
-    "#         plt.contour(xx, yy, z, levels=[threshold], colors=[color])\n",
-    "#         plt.plot(X[:, 0], X[:, 1], '.', c=color)\n",
-    "#         plt.xlim(*xlim)\n",
-    "#         plt.ylim(*ylim)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "zero_latlng_questions = {\n",
-    "    q\n",
-    "    for q, row in answers.groupby('question_text').agg('std').iterrows()\n",
-    "    if row['user_lat'] == 0 and row['user_lng'] == 0\n",
-    "}\n",
-    "answers_filtered = answers[answers['question_text'].map(lambda x: x not in zero_latlng_questions)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "answers_filtered['question_text_url'] = answers_filtered['question_text'].map(\n",
-    "    lambda x: x.replace('\"', '').replace('*', ''))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_palette(n, no_black=True, no_white=True):\n",
-    "    with open('glasbey/{}_colors.txt'.format(n + no_black + no_white)) as f:\n",
-    "        return [\n",
-    "            '#%02x%02x%02x' % tuple(int(c) for c in line.replace('\\n', '').split(','))\n",
-    "            for line in f\n",
-    "            if not no_black or line != '0,0,0\\n'\n",
-    "            if not no_white or line != '255,255,255\\n'\n",
-    "        ]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "options = [x[1] for x in sorted([\n",
-    "    (row['user_lng'], answer_text)\n",
-    "    for answer_text, row in rows.groupby('answer_text').agg({'user_lng': 'count'}).iterrows()\n",
-    "], reverse=True)]\n",
-    "\n",
-    "groups = [options[:len(options) // 2], options[len(options) // 2:]]\n",
-    "groups"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "80000 / 350"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import glob\n",
-    "with open('index.html', 'w') as f:\n",
-    "    f.write('<html><head></head><body>' + \n",
-    "        '<br/>\\n'.join(\n",
-    "            '\\t<a href=\"http://herbertkruitbosch.com/pronunciation_maps/{}\">{}<a>'.format(fn, fn[:-4].replace('_', ' '))\n",
-    "            for fn in sorted(\n",
-    "                glob.glob('*_all.html') +\n",
-    "                glob.glob('*_larger.html') +\n",
-    "                glob.glob('*_smaller.html')\n",
-    "            )\n",
-    "    ) + \"</body></html>\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "# cmap = pyplot.get_cmap('gist_rainbow')\n",
-    "# colors = pyplot.get_cmap('tab20')\n",
-    "# colors = ['#e6194b', '#3cb44b', '#ffe119', '#0082c8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#d2f53c', '#fabebe', '#008080', '#e6beff', '#aa6e28', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000080', '#808080']\n",
-    "\n",
-    "std = (1.89, 1.35)\n",
-    "\n",
-    "for question, rows in answers_filtered.groupby('question_text_url'):\n",
-    "#     question = rows['question_text_url'][0]\n",
-    "    n_answers = len(rows.groupby('answer_text').count())\n",
-    "    \n",
-    "    \n",
-    "    options = [x[1] for x in sorted([\n",
-    "        (row['user_lng'], answer_text)\n",
-    "        for answer_text, row in rows.groupby('answer_text').agg({'user_lng': 'count'}).iterrows()\n",
-    "    ], reverse=True)]\n",
-    "    groups = [options]\n",
-    "    if n_answers > 6:\n",
-    "        groups.extend([options[:6], options[6:]])\n",
-    "    \n",
-    "    for group, group_name in zip(groups, ['all', 'larger', 'smaller']):\n",
-    "        m = folium.Map((rows['user_lat'].median(), rows['user_lng'].median()), tiles='stamentoner', zoom_start=9)\n",
-    "    #     colors = cmap(range(256))[::256 // n_answers]\n",
-    "        colors = get_palette(len(group))\n",
-    "        for answer, color in zip(group, colors):\n",
-    "            rows_ = rows[rows['answer_text'] == answer]\n",
-    "    #         color = '#%02x%02x%02x' % tuple(int(c*255) for c in color[:3])\n",
-    "            name = '<span style=\\\\\"color:{}; \\\\\">{} ({})'.format(color, escape(answer), len(rows_))\n",
-    "\n",
-    "            group = folium.FeatureGroup(name=name)\n",
-    "            colormap[name] = color\n",
-    "\n",
-    "            for point in zip(rows_['user_lat'], rows_['user_lng']):\n",
-    "                point = tuple(p + 0.01 * s * numpy.random.randn() for p, s in zip(point, std))\n",
-    "                folium.Circle(\n",
-    "                    point, color=None, fill_color=color,\n",
-    "                    radius=400*min(1, 100 / len(rows_)), fill_opacity=1 #1 - 0.5 * len(rows_) / len(rows)\n",
-    "                ).add_to(group)\n",
-    "            group.add_to(m)\n",
-    "        folium.map.LayerControl('topright', collapsed=False).add_to(m)\n",
-    "    \n",
-    "        print(group_name, question)\n",
-    "        if group_name == 'larger':\n",
-    "            display(m)\n",
-    "        m.save('{}_{}.html'.format(question, group_name))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/notebooks/Pronunciations
+++ b/notebooks/Pronunciations
@@ -1,397 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Geographical pronunciation statistics"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas\n",
-    "import MySQLdb\n",
-    "import numpy\n",
-    "import json\n",
-    "\n",
-    "db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')\n",
-    "\n",
-    "%matplotlib notebook\n",
-    "from matplotlib import pyplot\n",
-    "import folium\n",
-    "from IPython.display import display\n",
-    "from shapely.geometry import Polygon, MultiPolygon, shape, Point\n",
-    "from jsbutton import JsButton\n",
-    "from shapely.geometry import LineString, MultiLineString\n",
-    "from jupyter_progressbar import ProgressBar\n",
-    "from collections import defaultdict, Counter\n",
-    "from ipy_table import make_table\n",
-    "from html import escape\n",
-    "\n",
-    "import numpy as np\n",
-    "from random import shuffle\n",
-    "import pickle\n",
-    "from jupyter_progressbar import ProgressBar"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open('friesland_wijken_land_only.p3', 'rb') as f:\n",
-    "    wijken, wijk_shapes = pickle.load(f)\n",
-    "\n",
-    "for x in wijken['features']:\n",
-    "    x['type'] = 'Feature'\n",
-    "\n",
-    "with open('friesland_wijken_geojson.json', 'w') as f:\n",
-    "    wijken['features'] = wijken['features']\n",
-    "    json.dump(wijken, f, indent=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from osgeo import gdal, ogr\n",
-    "\n",
-    "srcDS = gdal.OpenEx('friesland_wijken_geojson.json')\n",
-    "ds = gdal.VectorTranslate('friesland_wijken_geojson.kml', srcDS, format='kml')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'k4luâ7mWBAgDSKhCVaysNdr TjeoE85JzëGúcM.,IRtp2-bLû69Un0wZF3Hv1iOfô'"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "''.join({\n",
-    "    c\n",
-    "    for wijk in wijken['features']\n",
-    "    for c in wijk['properties']['gemeente_en_wijk_naam']\n",
-    "})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open('friesland_wijken_land_only.p3', 'rb') as f:\n",
-    "    wijken, wijk_shapes = pickle.load(f)\n",
-    "\n",
-    "wijk_names = [wijk['properties']['gemeente_en_wijk_naam'] for wijk in wijken['features']]\n",
-    "\n",
-    "def get_wijk(point):\n",
-    "    for i, shape in enumerate(wijk_shapes):\n",
-    "        if shape.contains(point):\n",
-    "            return i\n",
-    "    return -1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def listify(rd_multipolygon):\n",
-    "    if len(rd_multipolygon) == 2 and tuple(map(type, rd_multipolygon)) == (float, float):\n",
-    "        return list(rd_multipolygon)\n",
-    "    return [\n",
-    "        listify(element)\n",
-    "        for element in rd_multipolygon\n",
-    "    ]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Answers to how participants state a word should be pronounces.\n",
-    "\n",
-    "answers = pandas.read_sql('''\n",
-    "SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text\n",
-    "FROM       core_surveyresult as survey\n",
-    "INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n",
-    "INNER JOIN core_predictionquizresultquestionanswer as answer\n",
-    "    ON result.id = answer.prediction_quiz_id\n",
-    "''', db)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "zero_latlng_questions = {\n",
-    "    q\n",
-    "    for q, row in answers.groupby('question_text').agg('std').iterrows()\n",
-    "    if row['user_lat'] == 0 and row['user_lng'] == 0\n",
-    "}\n",
-    "answers_filtered = answers[answers['question_text'].map(lambda x: x not in zero_latlng_questions)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def reverse(rd_multipolygon):\n",
-    "    if len(rd_multipolygon) == 2 and tuple(map(type, rd_multipolygon)) == (float, float):\n",
-    "        return rd_multipolygon[::-1]\n",
-    "    return [\n",
-    "        reverse(element)\n",
-    "        for element in rd_multipolygon\n",
-    "    ]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:10: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
-      "  # Remove the CWD from sys.path while we load stuff.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Takes approximately 2 minutes\n",
-    "points = set(zip(answers_filtered['user_lng'], answers_filtered['user_lat']))\n",
-    "\n",
-    "wijk_map = dict()\n",
-    "for lng, lat in points:\n",
-    "    wijk_map[(lng, lat)] = get_wijk(Point(lng, lat))\n",
-    "\n",
-    "answers_filtered['wijk'] = [\n",
-    "    wijk_map[(lng, lat)]\n",
-    "    for lat, lng in zip(answers_filtered['user_lat'], answers_filtered['user_lng'])\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
-      "  \n",
-      "/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
-      "  \"\"\"\n",
-      "/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
-      "  \n"
-     ]
-    }
-   ],
-   "source": [
-    "answers_filtered['question_text_url'] = answers_filtered['question_text'].map(\n",
-    "    lambda x: x.replace('\"', '').replace('*', ''))\n",
-    "\n",
-    "answers_filtered['wijk_name'] = answers_filtered['wijk'].map(\n",
-    "    lambda x: wijk_names[x])\n",
-    "\n",
-    "answers_filtered['answer_text_url'] = answers_filtered['answer_text'].map(\n",
-    "    lambda x: x[x.find('('):x.find(')')][1:])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "wijken = pandas.DataFrame([\n",
-    "    {'#name': name, 'longitude': shape.centroid.xy[0][0], 'latitude': shape.centroid.xy[1][0]}\n",
-    "    for name, shape in zip(wijk_names, wijk_shapes)\n",
-    "])\n",
-    "\n",
-    "wijken.set_index('#name', inplace=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "def merge_dicts(*args):\n",
-    "    for arg in args[1:]:\n",
-    "        args[0].update(arg)\n",
-    "    return args[0]\n",
-    "\n",
-    "\n",
-    "pronunciations = pandas.DataFrame([\n",
-    "    merge_dicts(\n",
-    "    {\n",
-    "        question: answers['answer_text_url']\n",
-    "        for question, answers in rows.groupby(\n",
-    "            'question_text_url'\n",
-    "        ).agg(\n",
-    "            {\n",
-    "                'answer_text_url': lambda x: [\n",
-    "                    {\n",
-    "                        'pronunciation': answer_text,\n",
-    "                        'count': answer_texts.count(answer_text)\n",
-    "                    }\n",
-    "                    for answer_texts in [list(x)]\n",
-    "                    for answer_text in sorted(set(x))\n",
-    "                    \n",
-    "                ]    \n",
-    "            }\n",
-    "        ).iterrows()\n",
-    "    }, {\n",
-    "       'wijk': wijk_names[wijk]\n",
-    "    })\n",
-    "    for wijk, rows in answers_filtered.groupby('wijk')\n",
-    "    if wijk >= 0\n",
-    "])\n",
-    "\n",
-    "pronunciations.set_index('wijk', inplace=True)\n",
-    "pronunciations\n",
-    "\n",
-    "columns = list(pronunciations.columns)\n",
-    "\n",
-    "counts = pandas.DataFrame([\n",
-    "    merge_dicts({\n",
-    "        column + \": \" + x['pronunciation']: 100 * x['count'] / total\n",
-    "        for column in columns\n",
-    "        for total in [sum(x['count'] for x in row[column])]\n",
-    "        for x in row[column]\n",
-    "    }, {'': wijk})\n",
-    "    for wijk, row in pronunciations.iterrows()\n",
-    "])\n",
-    "\n",
-    "pronunciations = pandas.DataFrame([\n",
-    "    merge_dicts({\n",
-    "        column: ' / '.join(str(x['pronunciation']) for x in row[column])\n",
-    "        for column in columns\n",
-    "    }, {'': wijk})\n",
-    "    for wijk, row in pronunciations.iterrows()\n",
-    "])\n",
-    "\n",
-    "pronunciations.set_index('', inplace=True)\n",
-    "counts.set_index('', inplace=True)\n",
-    "counts[counts != counts] = 0"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<function shapely.geometry.geo.shape(context)>"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pronunciations.to_csv('pronunciations_by_wijk.tsv', sep='\\t')\n",
-    "counts.to_csv('pronunciation_percentages_by_wijk.tsv', sep='\\t')\n",
-    "wijken.to_csv('wijk_centroid.tsv', sep='\\t', columns=['longitude', 'latitude'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open('pronunciations_by_wijk.tsv') as f:\n",
-    "    p = list(f)\n",
-    "    \n",
-    "with open('pronunciation_count_by_wijk.tsv') as f:\n",
-    "    c = list(f)\n",
-    "\n",
-    "with open('wijk_centroid.tsv') as f:\n",
-    "    w = list(f)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/notebooks/Regions.ipynb
+++ b/notebooks/Regions.ipynb
--- a/Segmentations.ipynb
+++ b/Segmentations.ipynb
@@ -1,5 +1,14 @@
 {
 "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Show province segmentation\n",
+    "\n",
+    "In gemeentes and wijken as calculated in `Segment Provinces in Wijken and Gemeentes.ipynb`."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 2,
@@ -34,7 +43,9 @@
  {
   "cell_type": "code",
   "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
   "outputs": [
    {
     "name": "stdout",
--- a/notebooks/Spider
+++ b/notebooks/Spider
--- a/notebooks/To
+++ b/notebooks/To
@@ -1,102 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from glob import glob\n",
-    "\n",
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "waves = [\n",
-    "    wave\n",
-    "    for location in os.listdir('data')\n",
-    "    for date in os.listdir(os.path.join('data', location))\n",
-    "    for wave in os.listdir(os.path.join('data', location, date))\n",
-    "]\n",
-    "assert len(waves) == len(set(waves)), \"Not all filenames are unique :(\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for location in os.listdir('data'):\n",
-    "    for date in os.listdir(os.path.join('data', location)):\n",
-    "        for wave in os.listdir(os.path.join('data', location, date)):\n",
-    "            source = os.path.join('data', location, date, wave)\n",
-    "            destination = os.path.join('per_word', wave.split('_')[1])\n",
-    "            if not os.path.isdir(destination):\n",
-    "                os.mkdir(destination)\n",
-    "            os.rename(source, os.path.join(destination, wave))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas\n",
-    "\n",
-    "data = pandas.read_csv('/home/herbert/picture-game-result-export.csv', delimiter=';')\n",
-    "data['Filename'] = [x.split('/')[-1] for x in data['Opname']]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for word in os.listdir('per_word'):\n",
-    "    for wave in os.listdir(os.path.join('per_word', word)):\n",
-    "        source = os.path.join('per_word', word, wave)\n",
-    "        if wave not in relevant:\n",
-    "            destination = os.path.join('per_word', word, 'irrelevant_accent_' + wave)\n",
-    "            os.rename(source, destination)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data.to_excel('/home/herbert/picture-game-result-export-filename.xlsx')"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.5.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}