stimmenfryslan/notebooks/Pronunciations Table per Wi...

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Geographical pronunciation statistics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas\n",
    "import MySQLdb\n",
    "import numpy\n",
    "import json\n",
    "\n",
    "db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')\n",
    "\n",
    "%matplotlib notebook\n",
    "from matplotlib import pyplot\n",
    "import folium\n",
    "from IPython.display import display\n",
    "from shapely.geometry import Polygon, MultiPolygon, shape, Point\n",
    "from jsbutton import JsButton\n",
    "from shapely.geometry import LineString, MultiLineString\n",
    "from jupyter_progressbar import ProgressBar\n",
    "from collections import defaultdict, Counter\n",
    "from ipy_table import make_table\n",
    "from html import escape\n",
    "\n",
    "import numpy as np\n",
    "from random import shuffle\n",
    "import pickle\n",
    "from jupyter_progressbar import ProgressBar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('friesland_wijken_land_only.p3', 'rb') as f:\n",
    "    wijken, wijk_shapes = pickle.load(f)\n",
    "\n",
    "for x in wijken['features']:\n",
    "    x['type'] = 'Feature'\n",
    "\n",
    "with open('friesland_wijken_geojson.json', 'w') as f:\n",
    "    wijken['features'] = wijken['features']\n",
    "    json.dump(wijken, f, indent=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from osgeo import gdal, ogr\n",
    "\n",
    "srcDS = gdal.OpenEx('friesland_wijken_geojson.json')\n",
    "ds = gdal.VectorTranslate('friesland_wijken_geojson.kml', srcDS, format='kml')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'k4luâ7mWBAgDSKhCVaysNdr TjeoE85JzëGúcM.,IRtp2-bLû69Un0wZF3Hv1iOfô'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "''.join({\n",
    "    c\n",
    "    for wijk in wijken['features']\n",
    "    for c in wijk['properties']['gemeente_en_wijk_naam']\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('friesland_wijken_land_only.p3', 'rb') as f:\n",
    "    wijken, wijk_shapes = pickle.load(f)\n",
    "\n",
    "wijk_names = [wijk['properties']['gemeente_en_wijk_naam'] for wijk in wijken['features']]\n",
    "\n",
    "def get_wijk(point):\n",
    "    for i, shape in enumerate(wijk_shapes):\n",
    "        if shape.contains(point):\n",
    "            return i\n",
    "    return -1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def listify(rd_multipolygon):\n",
    "    if len(rd_multipolygon) == 2 and tuple(map(type, rd_multipolygon)) == (float, float):\n",
    "        return list(rd_multipolygon)\n",
    "    return [\n",
    "        listify(element)\n",
    "        for element in rd_multipolygon\n",
    "    ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Answers to how participants state a word should be pronounces.\n",
    "\n",
    "answers = pandas.read_sql('''\n",
    "SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text\n",
    "FROM       core_surveyresult as survey\n",
    "INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n",
    "INNER JOIN core_predictionquizresultquestionanswer as answer\n",
    "    ON result.id = answer.prediction_quiz_id\n",
    "''', db)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "zero_latlng_questions = {\n",
    "    q\n",
    "    for q, row in answers.groupby('question_text').agg('std').iterrows()\n",
    "    if row['user_lat'] == 0 and row['user_lng'] == 0\n",
    "}\n",
    "answers_filtered = answers[answers['question_text'].map(lambda x: x not in zero_latlng_questions)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def reverse(rd_multipolygon):\n",
    "    if len(rd_multipolygon) == 2 and tuple(map(type, rd_multipolygon)) == (float, float):\n",
    "        return rd_multipolygon[::-1]\n",
    "    return [\n",
    "        reverse(element)\n",
    "        for element in rd_multipolygon\n",
    "    ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:10: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  # Remove the CWD from sys.path while we load stuff.\n"
     ]
    }
   ],
   "source": [
    "# Takes approximately 2 minutes\n",
    "points = set(zip(answers_filtered['user_lng'], answers_filtered['user_lat']))\n",
    "\n",
    "wijk_map = dict()\n",
    "for lng, lat in points:\n",
    "    wijk_map[(lng, lat)] = get_wijk(Point(lng, lat))\n",
    "\n",
    "answers_filtered['wijk'] = [\n",
    "    wijk_map[(lng, lat)]\n",
    "    for lat, lng in zip(answers_filtered['user_lat'], answers_filtered['user_lng'])\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \n",
      "/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"\n",
      "/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \n"
     ]
    }
   ],
   "source": [
    "answers_filtered['question_text_url'] = answers_filtered['question_text'].map(\n",
    "    lambda x: x.replace('\"', '').replace('*', ''))\n",
    "\n",
    "answers_filtered['wijk_name'] = answers_filtered['wijk'].map(\n",
    "    lambda x: wijk_names[x])\n",
    "\n",
    "answers_filtered['answer_text_url'] = answers_filtered['answer_text'].map(\n",
    "    lambda x: x[x.find('('):x.find(')')][1:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "wijken = pandas.DataFrame([\n",
    "    {'#name': name, 'longitude': shape.centroid.xy[0][0], 'latitude': shape.centroid.xy[1][0]}\n",
    "    for name, shape in zip(wijk_names, wijk_shapes)\n",
    "])\n",
    "\n",
    "wijken.set_index('#name', inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def merge_dicts(*args):\n",
    "    for arg in args[1:]:\n",
    "        args[0].update(arg)\n",
    "    return args[0]\n",
    "\n",
    "\n",
    "pronunciations = pandas.DataFrame([\n",
    "    merge_dicts(\n",
    "    {\n",
    "        question: answers['answer_text_url']\n",
    "        for question, answers in rows.groupby(\n",
    "            'question_text_url'\n",
    "        ).agg(\n",
    "            {\n",
    "                'answer_text_url': lambda x: [\n",
    "                    {\n",
    "                        'pronunciation': answer_text,\n",
    "                        'count': answer_texts.count(answer_text)\n",
    "                    }\n",
    "                    for answer_texts in [list(x)]\n",
    "                    for answer_text in sorted(set(x))\n",
    "                    \n",
    "                ]    \n",
    "            }\n",
    "        ).iterrows()\n",
    "    }, {\n",
    "       'wijk': wijk_names[wijk]\n",
    "    })\n",
    "    for wijk, rows in answers_filtered.groupby('wijk')\n",
    "    if wijk >= 0\n",
    "])\n",
    "\n",
    "pronunciations.set_index('wijk', inplace=True)\n",
    "pronunciations\n",
    "\n",
    "columns = list(pronunciations.columns)\n",
    "\n",
    "counts = pandas.DataFrame([\n",
    "    merge_dicts({\n",
    "        column + \": \" + x['pronunciation']: 100 * x['count'] / total\n",
    "        for column in columns\n",
    "        for total in [sum(x['count'] for x in row[column])]\n",
    "        for x in row[column]\n",
    "    }, {'': wijk})\n",
    "    for wijk, row in pronunciations.iterrows()\n",
    "])\n",
    "\n",
    "pronunciations = pandas.DataFrame([\n",
    "    merge_dicts({\n",
    "        column: ' / '.join(str(x['pronunciation']) for x in row[column])\n",
    "        for column in columns\n",
    "    }, {'': wijk})\n",
    "    for wijk, row in pronunciations.iterrows()\n",
    "])\n",
    "\n",
    "pronunciations.set_index('', inplace=True)\n",
    "counts.set_index('', inplace=True)\n",
    "counts[counts != counts] = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<function shapely.geometry.geo.shape(context)>"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "pronunciations.to_csv('pronunciations_by_wijk.tsv', sep='\\t')\n",
    "counts.to_csv('pronunciation_percentages_by_wijk.tsv', sep='\\t')\n",
    "wijken.to_csv('wijk_centroid.tsv', sep='\\t', columns=['longitude', 'latitude'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('pronunciations_by_wijk.tsv') as f:\n",
    "    p = list(f)\n",
    "    \n",
    "with open('pronunciation_count_by_wijk.tsv') as f:\n",
    "    c = list(f)\n",
    "\n",
    "with open('wijk_centroid.tsv') as f:\n",
    "    w = list(f)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
first commit 2018-09-28 10:35:17 +02:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"# Geographical pronunciation statistics"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 1,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"import pandas\n",`
			`"import MySQLdb\n",`
			`"import numpy\n",`
			`"import json\n",`
			`"\n",`
			`"db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')\n",`
			`"\n",`
			`"%matplotlib notebook\n",`
			`"from matplotlib import pyplot\n",`
			`"import folium\n",`
			`"from IPython.display import display\n",`
			`"from shapely.geometry import Polygon, MultiPolygon, shape, Point\n",`
			`"from jsbutton import JsButton\n",`
			`"from shapely.geometry import LineString, MultiLineString\n",`
			`"from jupyter_progressbar import ProgressBar\n",`
			`"from collections import defaultdict, Counter\n",`
			`"from ipy_table import make_table\n",`
			`"from html import escape\n",`
			`"\n",`
			`"import numpy as np\n",`
			`"from random import shuffle\n",`
			`"import pickle\n",`
			`"from jupyter_progressbar import ProgressBar"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 2,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"with open('friesland_wijken_land_only.p3', 'rb') as f:\n",`
			`" wijken, wijk_shapes = pickle.load(f)\n",`
			`"\n",`
			`"for x in wijken['features']:\n",`
			`" x['type'] = 'Feature'\n",`
			`"\n",`
			`"with open('friesland_wijken_geojson.json', 'w') as f:\n",`
			`" wijken['features'] = wijken['features']\n",`
			`" json.dump(wijken, f, indent=1)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 3,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"from osgeo import gdal, ogr\n",`
			`"\n",`
			`"srcDS = gdal.OpenEx('friesland_wijken_geojson.json')\n",`
			`"ds = gdal.VectorTranslate('friesland_wijken_geojson.kml', srcDS, format='kml')"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 4,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"'k4luâ7mWBAgDSKhCVaysNdr TjeoE85JzëGúcM.,IRtp2-bLû69Un0wZF3Hv1iOfô'"`
			`]`
			`},`
			`"execution_count": 4,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"''.join({\n",`
			`" c\n",`
			`" for wijk in wijken['features']\n",`
			`" for c in wijk['properties']['gemeente_en_wijk_naam']\n",`
			`"})"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 5,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"with open('friesland_wijken_land_only.p3', 'rb') as f:\n",`
			`" wijken, wijk_shapes = pickle.load(f)\n",`
			`"\n",`
			`"wijk_names = [wijk['properties']['gemeente_en_wijk_naam'] for wijk in wijken['features']]\n",`
			`"\n",`
			`"def get_wijk(point):\n",`
			`" for i, shape in enumerate(wijk_shapes):\n",`
			`" if shape.contains(point):\n",`
			`" return i\n",`
			`" return -1"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 6,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def listify(rd_multipolygon):\n",`
			`" if len(rd_multipolygon) == 2 and tuple(map(type, rd_multipolygon)) == (float, float):\n",`
			`" return list(rd_multipolygon)\n",`
			`" return [\n",`
			`" listify(element)\n",`
			`" for element in rd_multipolygon\n",`
			`" ]"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 7,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"# Answers to how participants state a word should be pronounces.\n",`
			`"\n",`
			`"answers = pandas.read_sql('''\n",`
			`"SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text\n",`
			`"FROM core_surveyresult as survey\n",`
			`"INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n",`
			`"INNER JOIN core_predictionquizresultquestionanswer as answer\n",`
			`" ON result.id = answer.prediction_quiz_id\n",`
			`"''', db)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 8,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"zero_latlng_questions = {\n",`
			`" q\n",`
			`" for q, row in answers.groupby('question_text').agg('std').iterrows()\n",`
			`" if row['user_lat'] == 0 and row['user_lng'] == 0\n",`
			`"}\n",`
			`"answers_filtered = answers[answers['question_text'].map(lambda x: x not in zero_latlng_questions)]"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 9,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def reverse(rd_multipolygon):\n",`
			`" if len(rd_multipolygon) == 2 and tuple(map(type, rd_multipolygon)) == (float, float):\n",`
			`" return rd_multipolygon[::-1]\n",`
			`" return [\n",`
			`" reverse(element)\n",`
			`" for element in rd_multipolygon\n",`
			`" ]"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 10,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:10: SettingWithCopyWarning: \n",`
			`"A value is trying to be set on a copy of a slice from a DataFrame.\n",`
			`"Try using .loc[row_indexer,col_indexer] = value instead\n",`
			`"\n",`
			`"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",`
			`" # Remove the CWD from sys.path while we load stuff.\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"# Takes approximately 2 minutes\n",`
			`"points = set(zip(answers_filtered['user_lng'], answers_filtered['user_lat']))\n",`
			`"\n",`
			`"wijk_map = dict()\n",`
			`"for lng, lat in points:\n",`
			`" wijk_map[(lng, lat)] = get_wijk(Point(lng, lat))\n",`
			`"\n",`
			`"answers_filtered['wijk'] = [\n",`
			`" wijk_map[(lng, lat)]\n",`
			`" for lat, lng in zip(answers_filtered['user_lat'], answers_filtered['user_lng'])\n",`
			`"]"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 11,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n",`
			`"A value is trying to be set on a copy of a slice from a DataFrame.\n",`
			`"Try using .loc[row_indexer,col_indexer] = value instead\n",`
			`"\n",`
			`"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",`
			`" \n",`
			`"/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",`
			`"A value is trying to be set on a copy of a slice from a DataFrame.\n",`
			`"Try using .loc[row_indexer,col_indexer] = value instead\n",`
			`"\n",`
			`"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",`
			`" \"\"\"\n",`
			`"/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: \n",`
			`"A value is trying to be set on a copy of a slice from a DataFrame.\n",`
			`"Try using .loc[row_indexer,col_indexer] = value instead\n",`
			`"\n",`
			`"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",`
			`" \n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"answers_filtered['question_text_url'] = answers_filtered['question_text'].map(\n",`
			`" lambda x: x.replace('\"', '').replace('*', ''))\n",`
			`"\n",`
			`"answers_filtered['wijk_name'] = answers_filtered['wijk'].map(\n",`
			`" lambda x: wijk_names[x])\n",`
			`"\n",`
			`"answers_filtered['answer_text_url'] = answers_filtered['answer_text'].map(\n",`
			`" lambda x: x[x.find('('):x.find(')')][1:])"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 12,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"wijken = pandas.DataFrame([\n",`
			`" {'#name': name, 'longitude': shape.centroid.xy[0][0], 'latitude': shape.centroid.xy[1][0]}\n",`
			`" for name, shape in zip(wijk_names, wijk_shapes)\n",`
			`"])\n",`
			`"\n",`
			`"wijken.set_index('#name', inplace=True)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 23,`
			`"metadata": {`
			`"scrolled": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"def merge_dicts(*args):\n",`
			`" for arg in args[1:]:\n",`
			`" args[0].update(arg)\n",`
			`" return args[0]\n",`
			`"\n",`
			`"\n",`
			`"pronunciations = pandas.DataFrame([\n",`
			`" merge_dicts(\n",`
			`" {\n",`
			`" question: answers['answer_text_url']\n",`
			`" for question, answers in rows.groupby(\n",`
			`" 'question_text_url'\n",`
			`" ).agg(\n",`
			`" {\n",`
			`" 'answer_text_url': lambda x: [\n",`
			`" {\n",`
			`" 'pronunciation': answer_text,\n",`
			`" 'count': answer_texts.count(answer_text)\n",`
			`" }\n",`
			`" for answer_texts in [list(x)]\n",`
			`" for answer_text in sorted(set(x))\n",`
			`" \n",`
			`" ] \n",`
			`" }\n",`
			`" ).iterrows()\n",`
			`" }, {\n",`
			`" 'wijk': wijk_names[wijk]\n",`
			`" })\n",`
			`" for wijk, rows in answers_filtered.groupby('wijk')\n",`
			`" if wijk >= 0\n",`
			`"])\n",`
			`"\n",`
			`"pronunciations.set_index('wijk', inplace=True)\n",`
			`"pronunciations\n",`
			`"\n",`
			`"columns = list(pronunciations.columns)\n",`
			`"\n",`
			`"counts = pandas.DataFrame([\n",`
			`" merge_dicts({\n",`
			`" column + \": \" + x['pronunciation']: 100 * x['count'] / total\n",`
			`" for column in columns\n",`
			`" for total in [sum(x['count'] for x in row[column])]\n",`
			`" for x in row[column]\n",`
			`" }, {'': wijk})\n",`
			`" for wijk, row in pronunciations.iterrows()\n",`
			`"])\n",`
			`"\n",`
			`"pronunciations = pandas.DataFrame([\n",`
			`" merge_dicts({\n",`
			`" column: ' / '.join(str(x['pronunciation']) for x in row[column])\n",`
			`" for column in columns\n",`
			`" }, {'': wijk})\n",`
			`" for wijk, row in pronunciations.iterrows()\n",`
			`"])\n",`
			`"\n",`
			`"pronunciations.set_index('', inplace=True)\n",`
			`"counts.set_index('', inplace=True)\n",`
			`"counts[counts != counts] = 0"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 29,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"<function shapely.geometry.geo.shape(context)>"`
			`]`
			`},`
			`"execution_count": 29,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"shape"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 26,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"pronunciations.to_csv('pronunciations_by_wijk.tsv', sep='\\t')\n",`
			`"counts.to_csv('pronunciation_percentages_by_wijk.tsv', sep='\\t')\n",`
			`"wijken.to_csv('wijk_centroid.tsv', sep='\\t', columns=['longitude', 'latitude'])"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 27,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"with open('pronunciations_by_wijk.tsv') as f:\n",`
			`" p = list(f)\n",`
			`" \n",`
			`"with open('pronunciation_count_by_wijk.tsv') as f:\n",`
			`" c = list(f)\n",`
			`"\n",`
			`"with open('wijk_centroid.tsv') as f:\n",`
			`" w = list(f)"`
			`]`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.6.5"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 2`
			`}`