stimmenfryslan/notebooks/Prediction Quiz Participati...

333 lines
9.8 MiB
Plaintext
Raw Permalink Normal View History

2019-03-19 13:33:38 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('../')\n",
"\n",
"import pandas\n",
"import MySQLdb\n",
"import numpy\n",
"import json\n",
"\n",
"db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')\n",
"\n",
"%matplotlib notebook\n",
"import folium\n",
"from stimmen.geojson import inject_geojson_regions_into_dataframe\n",
"from stimmen.folium import pronunciation_bars, save_map, bar_map_css, FoliumCSS, color_bar, save_map\n",
"from stimmen.latitude_longitude import reverse_latitude_longitude\n",
"from IPython.display import display\n",
"\n",
"from collections import Counter\n",
"from matplotlib import pyplot\n",
"from math import log10, floor, ceil\n",
"import humanize"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/Friesland_wijken.geojson') as f:\n",
" regions = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Answers to how participants state a word should be pronounces.\n",
"\n",
"answers = pandas.read_sql('''\n",
"SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text, survey_result_id, result.submitted_at\n",
"FROM core_surveyresult as survey\n",
"INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n",
"INNER JOIN core_predictionquizresultquestionanswer as answer\n",
" ON result.id = answer.prediction_quiz_id\n",
"WHERE\n",
" survey.submitted_at >= '2017-09-17'\n",
" AND result.submitted_at >= '2017-09-17'\n",
"''', db)\n",
"\n",
"answers = inject_geojson_regions_into_dataframe(\n",
" regions, answers,\n",
" latitude_column='user_lat', longitude_column='user_lng',\n",
" region_name_property='gemeente_en_wijk_naam',\n",
" region_name_column='region'\n",
")\n",
"\n",
"answers['question_text'] = answers['question_text'].map(lambda x: x.replace('\"', '').replace('*', ''))\n",
"\n",
"answers['answer_text'] = answers['answer_text'].map(lambda x: x[x.find('('):x.find(')')][1:])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from shapely.geometry import box, shape\n",
"from pygeoif.geometry import mapping\n",
"from folium import Polygon\n",
"\n",
"background = box(2, 40, 10, 60)\n",
"for feature in regions['features']:\n",
" background = background.difference(shape(feature['geometry']))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"participation_per_region = Counter(answers['region'])\n",
"log_participation_per_region = {k: log10(v) for k, v in participation_per_region.items()}"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0.08</th>\n",
" <th>0.4335660219465108</th>\n",
" <th>0.7871320438930215</th>\n",
" <th>1.0</th>\n",
" <th>0.0</th>\n",
" <th>0.079999</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>color</th>\n",
" <td>#ffffccFF</td>\n",
" <td>#fdaf4aFF</td>\n",
" <td>#dd161dFF</td>\n",
" <td>#800026FF</td>\n",
" <td>black</td>\n",
" <td>black</td>\n",
" </tr>\n",
" <tr>\n",
" <th>value</th>\n",
" <td>10</td>\n",
" <td>100</td>\n",
" <td>1,000</td>\n",
" <td>4,000</td>\n",
" <td>no samples</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0.080000 0.433566 0.787132 1.000000 0.000000 0.079999\n",
"color #ffffccFF #fdaf4aFF #dd161dFF #800026FF black black\n",
"value 10 100 1,000 4,000 no samples NaN"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"start = floor(min(log_participation_per_region.values()))\n",
"before_end = (10**floor(max(log_participation_per_region.values())))\n",
"end = ceil(max(participation_per_region.values()) / before_end) * before_end\n",
"log_end = log10(end)\n",
"\n",
"scale = lambda x: (x - start) / (log_end - start)\n",
"\n",
"cmap = pyplot.get_cmap('YlOrRd')\n",
"tocolor = lambda x: '#{:02x}{:02x}{:02x}FF'.format(*(int(255*i) for i in cmap(x)[:3]))\n",
"\n",
"head_start = 0.08\n",
"colorbar_ticks = {\n",
" ((1-head_start)*p + head_start): {\n",
" 'value': humanize.intcomma(10**i if isinstance(i, int) else end),\n",
" 'color': tocolor(p)\n",
" }\n",
" for i in list(range(start, int(log_end)+1)) + [log_end] \n",
" for p in [scale(i)] # alias\n",
"}\n",
"colorbar_ticks[0] = {'color': 'black', 'value': 'no samples'}\n",
"colorbar_ticks[head_start-0.000001] = {'color': 'black'}\n",
"pandas.DataFrame(colorbar_ticks)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"width:100%;\"><div style=\"position:relative;width:100%;height:0;padding-bottom:60%;\"><iframe src=\"data:text/html;charset=utf-8;base64,PCFET0NUWVBFIGh0bWw+CjxoZWFkPiAgICAKICAgIDxtZXRhIGh0dHAtZXF1aXY9ImNvbnRlbnQtdHlwZSIgY29udGVudD0idGV4dC9odG1sOyBjaGFyc2V0PVVURi04IiAvPgogICAgPHNjcmlwdD5MX1BSRUZFUl9DQU5WQVM9ZmFsc2U7IExfTk9fVE9VQ0g9ZmFsc2U7IExfRElTQUJMRV8zRD1mYWxzZTs8L3NjcmlwdD4KICAgIDxzY3JpcHQgc3JjPSJodHRwczovL2Nkbi5qc2RlbGl2ci5uZXQvbnBtL2xlYWZsZXRAMS4yLjAvZGlzdC9sZWFmbGV0LmpzIj48L3NjcmlwdD4KICAgIDxzY3JpcHQgc3JjPSJodHRwczovL2FqYXguZ29vZ2xlYXBpcy5jb20vYWpheC9saWJzL2pxdWVyeS8xLjExLjEvanF1ZXJ5Lm1pbi5qcyI+PC9zY3JpcHQ+CiAgICA8c2NyaXB0IHNyYz0iaHR0cHM6Ly9tYXhjZG4uYm9vdHN0cmFwY2RuLmNvbS9ib290c3RyYXAvMy4yLjAvanMvYm9vdHN0cmFwLm1pbi5qcyI+PC9zY3JpcHQ+CiAgICA8c2NyaXB0IHNyYz0iaHR0cHM6Ly9jZG5qcy5jbG91ZGZsYXJlLmNvbS9hamF4L2xpYnMvTGVhZmxldC5hd2Vzb21lLW1hcmtlcnMvMi4wLjIvbGVhZmxldC5hd2Vzb21lLW1hcmtlcnMuanMiPjwvc2NyaXB0PgogICAgPGxpbmsgcmVsPSJzdHlsZXNoZWV0IiBocmVmPSJodHRwczovL2Nkbi5qc2RlbGl2ci5uZXQvbnBtL2xlYWZsZXRAMS4yLjAvZGlzdC9sZWFmbGV0LmNzcyIvPgogICAgPGxpbmsgcmVsPSJzdHlsZXNoZWV0IiBocmVmPSJodHRwczovL21heGNkbi5ib290c3RyYXBjZG4uY29tL2Jvb3RzdHJhcC8zLjIuMC9jc3MvYm9vdHN0cmFwLm1pbi5jc3MiLz4KICAgIDxsaW5rIHJlbD0ic3R5bGVzaGVldCIgaHJlZj0iaHR0cHM6Ly9tYXhjZG4uYm9vdHN0cmFwY2RuLmNvbS9ib290c3RyYXAvMy4yLjAvY3NzL2Jvb3RzdHJhcC10aGVtZS5taW4uY3NzIi8+CiAgICA8bGluayByZWw9InN0eWxlc2hlZXQiIGhyZWY9Imh0dHBzOi8vbWF4Y2RuLmJvb3RzdHJhcGNkbi5jb20vZm9udC1hd2Vzb21lLzQuNi4zL2Nzcy9mb250LWF3ZXNvbWUubWluLmNzcyIvPgogICAgPGxpbmsgcmVsPSJzdHlsZXNoZWV0IiBocmVmPSJodHRwczovL2NkbmpzLmNsb3VkZmxhcmUuY29tL2FqYXgvbGlicy9MZWFmbGV0LmF3ZXNvbWUtbWFya2Vycy8yLjAuMi9sZWFmbGV0LmF3ZXNvbWUtbWFya2Vycy5jc3MiLz4KICAgIDxsaW5rIHJlbD0ic3R5bGVzaGVldCIgaHJlZj0iaHR0cHM6Ly9yYXdnaXQuY29tL3B5dGhvbi12aXN1YWxpemF0aW9uL2ZvbGl1bS9tYXN0ZXIvZm9saXVtL3RlbXBsYXRlcy9sZWFmbGV0LmF3ZXNvbWUucm90YXRlLmNzcyIvPgogICAgPHN0eWxlPmh0bWwsIGJvZHkge3dpZHRoOiAxMDAlO2hlaWdodDogMTAwJTttYXJnaW46IDA7cGFkZGluZzogMDt9PC9zdHlsZT4KICAgIDxzdHlsZT4jbWFwIHtwb3NpdGlvbjphYnNvbHV0ZTt0b3A6MDtib3R0b206MDtyaWdodDowO2xlZnQ6MDt9PC9zdHlsZT4KICAgIAogICAgPHN0eWxlPiNtYXBfYzY0NDFlYTNjNTllNGMxYjk2OWM3ZDY4MjgzYTVmMTcgewogICAgICAgIHBvc2l0aW9uOiByZWxhdGl2ZTsKICAgICAgICB3aWR0aDogMTAwLjAlOwogICAgICAgIGhlaWdodDogMTAwLjAlOwogICAgICAgIGxlZnQ6IDAuMCU7CiAgICAgICAgdG9wOiAwLjAlOwogICAgICAgIH0KICAgIDwvc3R5bGU+CiAgICAKICAgICAgICA8c3R5bGU+CiAgICAgICAgCnRhYmxlLmxlYWZsZXQtZmlsdGVyLXRhYmxlIHRkIHsgcGFkZGluZy1sZWZ0OiAxMHB4OyBwYWRkaW5nLXJpZ2h0OiAxMHB4OyBib3JkZXItYm90dG9tOiA1cHg7fQoKdGFibGUubGVhZmxldC1maWx0ZXItdGFibGUgbGFiZWwgeyBkaXNwbGF5OiBpbmxpbmUtYmxvY2s7IG1pbi13aWR0aDogNTBweDsgfQoKLmNvbG9yYmFyIHsKICB3aWR0aDogY2FsYygxICogMjVweCk7CiAgcG9zaXRpb246IHJlbGF0aXZlOwogIGJhY2tncm91bmQtY29sb3I6IHJnYmEoMjU1LCAyNTUsIDI1NSwgMSk7CiAgcG9zaXRpb246IGZpeGVkOwogIHRvcDogY2FsYyg1MHB4ICogMSk7CiAgd2lkdGg6IGNhbGMoMjBweCAqIDEpOwogIGxlZnQ6IGNhbGMoNXB4ICogMSk7CiAgaGVpZ2h0OiBjYWxjKDEwMCUgLSAxICogMTAwcHgpOwogIHotaW5kZXg6IDEwMDA7Cn0KCi5jb2xvcmJhciAuY29sb3JzIHsKICB3aWR0aDogMTAwJTsKICBoZWlnaHQ6IDEwMCU7CiAgcG9zaXRpb246IGFic29sdXRlOwogIHRvcDogMDsgbGVmdDowOwogIGJhY2tncm91bmQtY29sb3I6IHJnYmEoMjU1LCAyNTUsIDI1NSwgMSk7Cn0KCi5jb2xvcmJhciAudGljayB7CiAgbGVmdDogMTAwJTsKICBwb3NpdGlvbjogYWJzb2x1dGU7CiAgd2lkdGg6IDIwJTsKICBib3JkZXItc3R5bGU6IHNvbGlkOwogIGJvcmRlci1jb2xvcjogYmxhY2s7CiAgYm9yZGVyLXdpZHRoOiAxcHggMCAwIDA7CiAgaGVpZ2h0OiAxMHB4Owp9CgouY29sb3JiYXIgLmxhYmVsIHsKICBib3JkZXItcmFkaXVzOiBjYWxjKDIwcHQgLyA0KTsKICBiYWNrZ3JvdW5kLWNvbG9yOiAjZmZmOwogIGNvbG9yOiAjMDAwOwogIGxlZnQ6MTMwJTsKICBwb3NpdGlvbjogYWJzb2x1dGU7CiAgYm9yZGVyOiAwOwogIGhlaWdodDogMjBwdDsKICBtYXJnaW4tdG9wOiBjYWxjKDIwcHQgLyAtMik7CiAgbGluZS1oZWlnaHQ6IDIwcHQ7CiAgdmVydGljYWwtYWxpZ246IG1pZGRsZTsKICBmb250LXNpemU6IDIwcHQ7CiAgcGFkZGluZzogMHB4IDNweDsKICBmb250LWZhbWlseTogR2FyYW1vbmQ7Cn0KICAgICAgICA8L3N0eWxlPgogICAgICAgIDxzY3JpcHQgbGFuZ2F1Z2U9IkphdmFTY3JpcHQiPgogICAgICAgIApqUXVlcnkucHJvdG90eXBlLmNvbG9yYmFyX2xlZ2VuZCA9IGZ1bmN0aW9uKGNvbG9ybWFwLCBzbW9vdGgpIHsKICAgICQodGhpcykuaHRtbCgnJykKICAgIHZhciBjb2xvcm1hcF8gPSB7fQogICAgZm9yICh2YXIgaWR4IGluIGNvbG9ybWFwKSB7CiAgICAgICAgY29sb3JtYXBfW3BhcnNlRmxvYXQoaWR4KV0gPSBjb2xvcm1hcFtpZHhdOwogICAgfQogICAgY29sb3JtYXAgPSBjb
],
"text/plain": [
"<folium.folium.Map at 0x7f0f436f9da0>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m = folium.Map(\n",
" (53.15936723072875 + 0.025, 5.618661585181898 + 0.15),\n",
" zoom_control = False,\n",
" tiles='stamentoner', zoom_start=9, attr=(\n",
" \"Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap\"\n",
" \", under CC BY SA.\"))\n",
"\n",
"for feature in regions['features']:\n",
" region_name = feature['properties']['gemeente_en_wijk_naam']\n",
" if region_name in log_participation_per_region:\n",
" log_participation = log_participation_per_region[region_name]\n",
" fill_color = tocolor(scale(log_participation))\n",
" folium.Polygon(\n",
" reverse_latitude_longitude(feature['geometry']['coordinates']),\n",
" fill_color=fill_color,\n",
" color='black',\n",
" fill_opacity=1,\n",
" ).add_to(m)\n",
" else:\n",
" folium.Polygon(\n",
" reverse_latitude_longitude(feature['geometry']['coordinates']),\n",
" fill_color='black',\n",
" color='black',\n",
" fill_opacity=1,\n",
" ).add_to(m)\n",
"\n",
"color_bar(colorbar_ticks, fontsize='20pt', scale=1).add_to(m)\n",
"\n",
"m.save('../maps/prediction_quiz_participation.html')\n",
"\n",
"m"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"m = folium.Map(\n",
" (53.15936723072875 + 0.025, 5.618661585181898 + 0.15),\n",
" zoom_control = False,\n",
" tiles='stamentoner', zoom_start=11, attr=(\n",
" \"Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap\"\n",
" \", under CC BY SA.\"))\n",
"\n",
"Polygon(\n",
" reverse_latitude_longitude(mapping(background)['coordinates']),\n",
" fill_color='#fff', color='#000000', fill_opacity=0.8\n",
").add_to(m)\n",
"\n",
"for feature in regions['features']:\n",
" region_name = feature['properties']['gemeente_en_wijk_naam']\n",
" if region_name in log_participation_per_region:\n",
" log_participation = log_participation_per_region[region_name]\n",
" fill_color = tocolor(scale(log_participation))\n",
" folium.Polygon(\n",
" reverse_latitude_longitude(feature['geometry']['coordinates']),\n",
" fill_color=fill_color,\n",
" color='black',\n",
" fill_opacity=1,\n",
" ).add_to(m)\n",
" \n",
"# region_shape = shape(feature['geometry'])\n",
"# if region_shape.area > 0.005:\n",
"# rect = region_shape.boundary.simplify(10000000)\n",
"# participation = participation_per_region[region_name]\n",
" \n",
"# folium.map.Marker(\n",
"# [rect.centroid.y, rect.centroid.x],\n",
"# icon=folium.DivIcon(\n",
"# icon_size=(50 / 12 * 50, 24 / 12 * 50),\n",
"# icon_anchor=(25 / 12 * 50, 50),\n",
"# html=(\n",
"# '<div class=\"percentage-label\" style=\"font-size: 30px; '\n",
"# 'background: transparent; text-align: center; color: white;\">'\n",
"# '{}</div>').format(humanize.intcomma(participation)),\n",
"# )\n",
"# ).add_to(m)\n",
" else:\n",
" folium.Polygon(\n",
" reverse_latitude_longitude(feature['geometry']['coordinates']),\n",
" fill_color='black',\n",
" color='black',\n",
" fill_opacity=1,\n",
" ).add_to(m)\n",
"\n",
"color_bar(colorbar_ticks, fontsize='50pt', scale=5).add_to(m)\n",
"\n",
"save_map(m, '../images/participation_per_neighborhood.png', resolution=(1900, 2000), headless=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}