2018-09-28 16:42:18 +02:00
2018-09-28 10:35:17 +02:00
"import folium\n",
"from collections import Counter\n",
2018-09-28 16:42:18 +02:00
"from math import sqrt, floor\n",
2018-09-28 10:35:17 +02:00
"import numpy as np\n",
2018-09-28 16:42:18 +02:00
"from imageio import imread\n",
2018-09-28 10:35:17 +02:00
"%matplotlib notebook\n",
"from matplotlib import pyplot as plt\n",
"from skimage.morphology import binary_closing\n",
"from skimage.measure import find_contours, label\n",
2018-09-28 16:42:18 +02:00
"import folium.plugins\n",
"from folium_jsbutton import JsButton"
2018-09-28 10:35:17 +02:00
2018-09-28 16:42:18 +02:00
"execution_count": 18,
2018-09-28 10:35:17 +02:00
2018-09-28 16:42:18 +02:00
"im = imread('../data/dialects.png')\n",
2018-09-28 10:35:17 +02:00
2018-09-28 16:42:18 +02:00
"color_occurence = Counter(map(tuple, im.reshape(-1,3)))\n",
"colors_sorted_by_occurence = [c for c, _ in sorted(\n",
" color_occurence.items(),\n",
" key=lambda x: x[1],\n",
" reverse=True)\n",
2018-09-28 10:35:17 +02:00
2018-09-28 16:42:18 +02:00
"execution_count": 31,
2018-09-28 10:35:17 +02:00
2018-09-28 16:42:18 +02:00
"text/plain": [
"<IPython.core.display.Javascript object>"
2018-09-28 16:42:18 +02:00
2018-09-28 10:35:17 +02:00
"text/plain": [
"<IPython.core.display.HTML object>"
2018-09-28 16:42:18 +02:00
"output_type": "display_data"
"source": [
"pallette_width = floor(sqrt(len(colors_sorted_by_occurence)))\n",
"pallette = np.array(colors_sorted_by_occurence[:pallette_width**2]).reshape(pallette_width, pallette_width, 3)\n",
"_, (ax0, ax1) = plt.subplots(1, 2)\n",
"for x in range(pallette_width):\n",
" for y in range(pallette_width):\n",
" ax0.text(x-0.5, y+0.5, str(x + y * pallette_width))\n",
"ax0.set_xticks([]), ax0.set_yticks([])\n",
"pallette_indices = [3, 4, 7, 8]\n",
"pallette = [colors_sorted_by_occurence[i] for i in pallette_indices]\n",
"pallette = np.array(pallette).reshape(1, -1, 3)\n",
"ax1.set_xticks([]), ax1.set_yticks([])\n",
"execution_count": 37,
2018-09-28 16:42:18 +02:00
"source": [
"bounds = [\n",
" [52.832432288794514, 5.354483127593994],\n",
" [53.41434089638827, 6.330699920654297]\n",
"m = folium.Map(\n",
" location=[(bounds[0][0] + bounds[1][0]) / 2, (bounds[0][1] + bounds[1][1]) / 2],\n",
" tiles='stamentoner',\n",
" zoom_start=9\n",
"source": [
2018-09-28 10:35:17 +02:00
"source": [
"plt.rcParams['figure.figsize'] = (9.5, 3)\n",
"ax0, ax1, ax2 = plt.subplots(1,3)[1]\n",
"ax0.set_xticks([]); ax0.set_yticks([])\n",
"ax1.set_xticks([]); ax1.set_yticks([])\n",
"ax2.set_xticks([]); ax2.set_yticks([])\n",
2018-09-28 16:42:18 +02:00
2018-09-28 10:35:17 +02:00
"stavoren_to_east_pixels = [295, 717]\n",
"north_to_south_pixels = [99, 525]"
2018-09-28 16:42:18 +02:00
"execution_count": null,
2018-09-28 10:35:17 +02:00
"outputs": [],
2018-09-28 10:35:17 +02:00
"source": [
"axes = plt.subplots(2,2)[1].ravel()\n",
"contours = []\n",
"for axis, c in zip(axes, relevant_colors):\n",
" bi = (im[:-100] == c[None,None]).min(axis=2)\n",
" bi = binary_closing(bi, np.ones((5,5)))\n",
" \n",
" labels = label(bi, background=False)\n",
" \n",
" contours.append(find_contours(bi, 0.5))\n",
" axis.imshow(bi)\n",
" for n, contour in enumerate(contours[-1][:1]):\n",
" axis.plot(contour[:, 1], contour[:, 0], linewidth=2)\n",
" axis.set_xticks([]); axis.set_yticks([])\n",
2018-09-28 16:42:18 +02:00
"execution_count": 3,
2018-09-28 10:35:17 +02:00
2018-09-28 16:42:18 +02:00
"outputs": [
"ename": "NameError",
"evalue": "name 'stavoren_to_east_coords' is not defined",
"output_type": "error",
"source": [
"a0, b0 = stavoren_to_east_coords\n",
"c0, d0 = stavoren_to_east_pixels\n",
"scale_x = lambda x: (x - c0) / (d0 - c0) * (b0 - a0) + a0\n",
"a1, b1 = north_to_south_coords\n",
"c1, d1 = north_to_south_pixels\n",
"scale_y = lambda x: (x - c1) / (d1 - c1) * (b1 - a1) + a1\n",
"contours_scaled = [\n",
" list(zip(scale_x(c[0][:, 1]), scale_y(c[0][:, 0])))\n",
" for c in contours\n",
2018-09-28 16:42:18 +02:00
"execution_count": 4,
2018-09-28 10:35:17 +02:00
"outputs": [
"ename": "NameError",
"evalue": "name 'contours_scaled' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-4-fc568b2f2fd5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m }\n\u001b[1;32m 11\u001b[0m }\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mcontour\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdialect\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcontours_scaled\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mregions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m ]\n\u001b[1;32m 14\u001b[0m })\n",
"\u001b[0;31mNameError\u001b[0m: name 'contours_scaled' is not defined"
"source": [
"geojson = json.dumps({\n",
" \"type\": \"FeatureCollection\",\n",
" \"features\": [\n",
" {\n",
" \"type\": \"Feature\",\n",
" \"properties\": {'dialect': dialect},\n",
" \"geometry\": {\n",
" \"type\": \"Polygon\",\n",
" \"coordinates\": [list(map(list, contour))]\n",
" }\n",
" }\n",
" for contour, dialect in zip(contours_scaled, regions)\n",
" ]\n",
"with open('dialect_regions.geojson', 'w') as f:\n",
" f.write(geojson)"
2018-09-28 16:42:18 +02:00
"execution_count": 5,
2018-09-28 10:35:17 +02:00
2018-09-28 16:42:18 +02:00
"ename": "NameError",
"evalue": "name 'north_to_south_coords' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-5-732d7d519e9d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m m = folium.Map(\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mlocation\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnorth_to_south_coords\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstavoren_to_east_coords\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mtiles\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'Mapbox Bright'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mzoom_start\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m9\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m )\n",
"\u001b[0;31mNameError\u001b[0m: name 'north_to_south_coords' is not defined"
2018-09-28 10:35:17 +02:00
2018-09-28 10:35:17 +02:00
"source": []
2018-09-28 10:35:17 +02:00
2018-09-28 16:42:18 +02:00
"execution_count": 6,
2018-09-28 10:35:17 +02:00
2018-09-28 16:42:18 +02:00
"outputs": [
"ename": "NameError",
"evalue": "name 'contours_scaled' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-6-1008e368979e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m shapes = {\n\u001b[1;32m 2\u001b[0m \u001b[0mdialect\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"type\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"Polygon\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"coordinates\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcontour\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mcontour\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdialect\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcontours_scaled\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mregions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m }\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'contours_scaled' is not defined"
2018-09-28 10:35:17 +02:00
"source": [
"shapes = {\n",
" dialect: shape({\"type\": \"Polygon\", \"coordinates\": [list(map(list, contour))]})\n",
" for contour, dialect in zip(contours_scaled, regions)\n",
"def regions_for(coordinate):\n",
" regions = {\n",
" region_name\n",
" for region_name, shape in shapes.items()\n",
" if shape.contains(Point(*coordinate))\n",
" }\n",
" return regions\n",
"def distance(shape, longitude, latitude):\n",
" ext = shape.exterior\n",
" p = ext.interpolate(ext.project(Point(longitude, latitude)))\n",
" return vincenty((latitude, longitude), (p.y, p.x))"
2018-09-28 16:42:18 +02:00
"execution_count": 7,
2018-09-28 10:35:17 +02:00
"metadata": {},
"# SELECT user_lat, user_lng, question_text, answer_text\n",
"picture_games = pandas.read_sql('''\n",
"SELECT as language, as picture,\n",
" survey.user_lat as latitude, survey.user_lng as longitude,\n",
" survey.area_name as area, survey.country_name as country,\n",
" result.recording as filename,\n",
" result.submitted_at as date\n",
"FROM core_surveyresult as survey\n",
"INNER JOIN core_picturegameresult as result ON = result.survey_result_id\n",
"INNER JOIN core_language as language ON = result.language_id\n",
"INNER JOIN core_picturegameitem as item\n",
" ON result.picture_game_item_id =\n",
"''', db)\n",
"# picture_games['filename'] = [filename.split('/')[-1] for filename in picture_games['filename']]\n",
"picture_games.set_index('filename', inplace=True)"
2018-09-28 16:42:18 +02:00
"execution_count": 8,
2018-09-28 10:35:17 +02:00
2018-09-28 16:42:18 +02:00
"application/vnd.jupyter.widget-view+json": {
"model_id": "c9ac8f69c77e461fa654e81dba282ca1",
"version_major": 2,
"version_minor": 0
2018-09-28 10:35:17 +02:00
"text/plain": [
"VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0…"
2018-09-28 16:42:18 +02:00
"ename": "NameError",
"evalue": "name 'regions_for' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-8-aabb5cdda548>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 12\u001b[0m for filename, (latitude, longitude) in ProgressBar(\n\u001b[1;32m 13\u001b[0m \u001b[0mpicture_games\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'latitude'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'longitude'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterrows\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0msize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpicture_games\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m )\n\u001b[1;32m 16\u001b[0m ]\n",
"\u001b[0;32m<ipython-input-8-aabb5cdda548>\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m'filename'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m }\n\u001b[0;32m---> 12\u001b[0;31m for filename, (latitude, longitude) in ProgressBar(\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0mpicture_games\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'latitude'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'longitude'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterrows\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0msize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpicture_games\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'regions_for' is not defined"
"source": [
"region_per_picture_game = [\n",
" {\n",
" 'dialects': [\n",
" {\n",
" 'dialect': dialect,\n",
" 'boundary_distance': distance(shapes[dialect], longitude, latitude),\n",
" }\n",
" for dialect in regions_for((longitude, latitude))\n",
" ],\n",
" 'filename': filename,\n",
" }\n",
" for filename, (latitude, longitude) in ProgressBar(\n",
" picture_games[['latitude', 'longitude']].iterrows(),\n",
" size=len(picture_games)\n",
" )\n",
2018-09-28 16:42:18 +02:00
"execution_count": null,
2018-09-28 10:35:17 +02:00
"outputs": [],
2018-09-28 10:35:17 +02:00
"source": [
"Counter(len(x['dialects']) for x in region_per_picture_game)"
2018-09-28 16:42:18 +02:00
"execution_count": null,
2018-09-28 10:35:17 +02:00
2018-09-28 16:42:18 +02:00
"outputs": [],
2018-09-28 10:35:17 +02:00
"source": [
"df = pandas.DataFrame([\n",
" [r['filename'], r['dialects'][0]['dialect'], r['dialects'][0]['boundary_distance']]\n",
" for r in region_per_picture_game\n",
" if len(r['dialects']) == 1\n",
"], columns = ['filename', 'dialect', 'boundary_distance'])\n",
2018-09-28 16:42:18 +02:00
"execution_count": 9,
2018-09-28 10:35:17 +02:00
"# SELECT user_lat, user_lng, question_text, answer_text\n",
"free_speech_games = pandas.read_sql('''\n",
"SELECT as language,\n",
" survey.user_lat as latitude, survey.user_lng as longitude,\n",
" survey.area_name as area, survey.country_name as country,\n",
" result.recording as filename,\n",
" result.submitted_at as date\n",
"FROM core_surveyresult as survey\n",
"INNER JOIN core_freespeechresult as result ON = result.survey_result_id\n",
"INNER JOIN core_language as language ON = result.language_id\n",
"''', db)\n",
"# free_speech_games['filename'] = [filename.split('/')[-1] for filename in games['filename']]\n",
"free_speech_games.set_index('filename', inplace=True)"
"nbformat_minor": 1