stimmenfryslan/notebooks/Frysian pronunciation occurrence.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Frysian Pronunciation occurrence\n",
    "\n",
    "Each map displays the pronounciation occurence in Frysian municipalities for one word. Each pronunciation is represented by one map layer, and all the percentages in one layer add up to 100% + rounding errors."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Enable portforwording from 3307 locally to 3306 on the stimmen database machine\n",
    "# ssh -L 3307:127.0.0.1:3306 stimmen.housing.rug.nl\n",
    "\n",
    "import pandas\n",
    "import MySQLdb\n",
    "\n",
    "from getpass import getpass\n",
    "\n",
    "if 'mysql_password' not in globals():\n",
    "    mysql_password = getpass()\n",
    "\n",
    "db = MySQLdb.connect(\n",
    "    host='127.0.0.1', port=3307,\n",
    "    user='stimmen', passwd=mysql_password,\n",
    "    db='stimmen', charset='utf8'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('../')\n",
    "\n",
    "import pandas\n",
    "import numpy\n",
    "import json\n",
    "\n",
    "%matplotlib notebook\n",
    "from matplotlib import pyplot\n",
    "\n",
    "import folium\n",
    "from shapely.geometry import box, shape\n",
    "from shapely.ops import cascaded_union\n",
    "from pygeoif.geometry import mapping\n",
    "\n",
    "from folium import Polygon\n",
    "from IPython.display import display\n",
    "from folium_jsbutton import JsButton\n",
    "\n",
    "from stimmen.geojson import inject_geojson_regions_into_dataframe\n",
    "from stimmen.folium import pronunciation_heatmaps, color_bar, save_map, bar_map_css, FoliumCSS\n",
    "from stimmen.latitude_longitude import reverse_latitude_longitude\n",
    "\n",
    "from jupyter_progressbar import ProgressBar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_regions_and_styling(level):\n",
    "    \"\"\"Load a specific granularity of regions, in particular municipalities\n",
    "    (gemeentes) or neighborhoods (wijken) and get a function to style maps\n",
    "    suitable for saving to png\"\"\"\n",
    "    assert level in {'gemeentes', 'wijken'}\n",
    "    with open('../data/Friesland_{level}.geojson'.format(level=level)) as f:\n",
    "        regions = json.load(f)\n",
    "\n",
    "    union_of_all_municipalities = cascaded_union([\n",
    "        shape(feature['geometry'])\n",
    "        for feature in regions['features']\n",
    "    ])\n",
    "\n",
    "    # allows for creating a semi-transpartent background of regions outside of Fryslan, to avoid crowded maps\n",
    "    background = box(2, 40, 10, 60).difference(union_of_all_municipalities)\n",
    "    \n",
    "    cmap = pyplot.get_cmap('YlOrRd')\n",
    "    rgba = lambda x: 'rgba(' + ','.join(map(lambda x: '{:d}'.format(int(255*x)), x[:3])) + ',0.8)'\n",
    "\n",
    "    colorbar_ticks = {\n",
    "        p/100: {'color': rgba(cmap(int(p*2.55))), 'value': '{}%'.format(p)}\n",
    "        for p in range(0, 101, 20)\n",
    "    }\n",
    "\n",
    "    def add_image_styling_to_map(map_):\n",
    "        \"\"\" Add styling for png-images to the map:\n",
    "         - white background around Fryslan\n",
    "         - black legend with colored square markers\n",
    "         - bigger fonts\n",
    "         - legend on top, complete width of the image, spread across several columns\"\"\"\n",
    "        \n",
    "        # semi-transparent white background\n",
    "        Polygon(\n",
    "            reverse_latitude_longitude(mapping(background)['coordinates']),\n",
    "            fill_color='#fff', color='#000000', fill_opacity=0.8\n",
    "        ).add_to(map_)\n",
    "        \n",
    "        color_bar(colorbar_ticks, fontsize='50pt', scale=5).add_to(map_)\n",
    "        \n",
    "    def add_html_styling_to_map(map_):\n",
    "        folium.map.LayerControl('topright', collapsed=False).add_to(map_)\n",
    "        \n",
    "        color_bar(colorbar_ticks, fontsize='25pt', scale=2).add_to(map_)\n",
    "        \n",
    "        JsButton(\n",
    "            title='<i class=\"fas fa-tags\"></i>',\n",
    "            function=\"\"\"\n",
    "                function(btn, map){\n",
    "                    $('.percentage-label').toggle();\n",
    "                }\n",
    "            \"\"\"\n",
    "        ).add_to(map_)\n",
    "        \n",
    "    return regions, add_image_styling_to_map, add_html_styling_to_map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Answers to how participants state a word should be pronounced.\n",
    "\n",
    "answers = pandas.read_sql('''\n",
    "SELECT\n",
    "    prediction_quiz_id,\n",
    "    user_lat, user_lng,\n",
    "    question_text, answer_text\n",
    "FROM       core_surveyresult as survey\n",
    "INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n",
    "INNER JOIN core_predictionquizresultquestionanswer as answer\n",
    "    ON result.id = answer.prediction_quiz_id\n",
    "WHERE\n",
    "    survey.submitted_at >= '2017-09-17'\n",
    "    AND result.submitted_at >= '2017-09-17'\n",
    "''', db)\n",
    "\n",
    "answers['question_text'] = answers['question_text'].map(lambda x: x.replace('\"', '').replace('*', ''))\n",
    "answers['answer_text'] = answers['answer_text'].map(lambda x: x[x.find('('):x.find(')')][1:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d9d727b6088e4202b8a70024577a6b65",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2a699de690884a2a8945ea753dbd4339",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "maps = {'wijken': {}, 'gemeentes': {}}\n",
    "\n",
    "for region_granularity in maps.keys():\n",
    "    regions, add_image_styling_to_map, add_html_styling_to_map = get_regions_and_styling(\n",
    "        region_granularity)\n",
    "    \n",
    "    region_name_property = {\n",
    "        'gemeentes':'gemeente_naam',\n",
    "        'wijken':'gemeente_en_wijk_naam'\n",
    "    }[region_granularity]\n",
    "    \n",
    "    answers = inject_geojson_regions_into_dataframe(\n",
    "        regions, answers,\n",
    "        latitude_column='user_lat', longitude_column='user_lng',\n",
    "        region_name_property=region_name_property,\n",
    "        region_name_column='region'\n",
    "    )\n",
    "    \n",
    "    for word_index, (word, word_rows) in enumerate(ProgressBar(answers.groupby('question_text'))):\n",
    "        html_map = folium.Map(\n",
    "            (word_rows['user_lat'].median(), word_rows['user_lng'].median()),\n",
    "            tiles=None, zoom_start=9)\n",
    "        \n",
    "        def feature_groups(with_label=False):\n",
    "            return pronunciation_heatmaps(\n",
    "                regions, word_rows,\n",
    "                region_name_property=region_name_property,\n",
    "                region_name_column='region',\n",
    "                group_column='answer_text',\n",
    "                **({'label_font_size': 5} if with_label else {})\n",
    "            ).items()\n",
    "        \n",
    "        for pronunciation, feature_group in feature_groups():\n",
    "            image_map = folium.Map(\n",
    "#                 (53.15936723072875 + 0.025, 5.618661585181898 + 0.15),\n",
    "                (53.15936723072875 + 0.06, 5.618661585181898 + 0.15),\n",
    "                tiles=None, zoom_start=11, zoom_control=False\n",
    "            )\n",
    "            add_image_styling_to_map(image_map)\n",
    "            feature_group.add_to(image_map)\n",
    "            save_map(\n",
    "                image_map,\n",
    "                f'../images/heatmaps/{region_granularity}_{word}_{pronunciation}.png',\n",
    "                resolution=(2050, 2000),\n",
    "                headless=True\n",
    "            )\n",
    "        for pronunciation, feature_group in feature_groups(with_label=True):\n",
    "            feature_group.add_to(html_map)\n",
    "\n",
    "        add_html_styling_to_map(html_map)\n",
    "        html_map.save(f'../maps/heatmaps/{region_granularity}_{word}.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "with open('../maps/heatmaps/index.html', 'w') as f:\n",
    "    f.write('<html><head></head><body>' + \n",
    "        '<br/>\\n'.join(\n",
    "            '\\t<a href=\"{}\">{}<a>'.format(fn, fn[:-5].replace('_', ' '))\n",
    "            for fn in sorted(\n",
    "                glob.glob('../maps/heatmaps/*.html')\n",
    "            )\n",
    "            for fn in [fn[len('../maps/heatmaps/'):]]\n",
    "    ) + \"</body></html>\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}