stimmenfryslan/notebooks/Frysian pronunciation occur...

10 KiB

Frysian Pronunciation occurrence

Each map displays the pronounciation occurence in Frysian municipalities for one word. Each pronunciation is represented by one map layer, and all the percentages in one layer add up to 100% + rounding errors.

In [36]:
# Enable portforwording from 3307 locally to 3306 on the stimmen database machine
# ssh -L 3307:127.0.0.1:3306 stimmen.housing.rug.nl

import pandas
import MySQLdb

from getpass import getpass

if 'mysql_password' not in globals():
    mysql_password = getpass()

db = MySQLdb.connect(
    host='127.0.0.1', port=3307,
    user='stimmen', passwd=mysql_password,
    db='stimmen', charset='utf8'
)
In [37]:
import sys
sys.path.append('../')

import pandas
import numpy
import json

%matplotlib notebook
from matplotlib import pyplot

import folium
from shapely.geometry import box, shape
from shapely.ops import cascaded_union
from pygeoif.geometry import mapping

from folium import Polygon
from IPython.display import display
from folium_jsbutton import JsButton

from stimmen.geojson import inject_geojson_regions_into_dataframe
from stimmen.folium import pronunciation_heatmaps, color_bar, save_map, bar_map_css, FoliumCSS
from stimmen.latitude_longitude import reverse_latitude_longitude

from jupyter_progressbar import ProgressBar
In [38]:
def get_regions_and_styling(level):
    """Load a specific granularity of regions, in particular municipalities
    (gemeentes) or neighborhoods (wijken) and get a function to style maps
    suitable for saving to png"""
    assert level in {'gemeentes', 'wijken'}
    with open('../data/Friesland_{level}.geojson'.format(level=level)) as f:
        regions = json.load(f)

    union_of_all_municipalities = cascaded_union([
        shape(feature['geometry'])
        for feature in regions['features']
    ])

    # allows for creating a semi-transpartent background of regions outside of Fryslan, to avoid crowded maps
    background = box(2, 40, 10, 60).difference(union_of_all_municipalities)
    
    cmap = pyplot.get_cmap('YlOrRd')
    rgba = lambda x: 'rgba(' + ','.join(map(lambda x: '{:d}'.format(int(255*x)), x[:3])) + ',0.8)'

    colorbar_ticks = {
        p/100: {'color': rgba(cmap(int(p*2.55))), 'value': '{}%'.format(p)}
        for p in range(0, 101, 20)
    }

    def add_image_styling_to_map(map_):
        """ Add styling for png-images to the map:
         - white background around Fryslan
         - black legend with colored square markers
         - bigger fonts
         - legend on top, complete width of the image, spread across several columns"""
        
        # semi-transparent white background
        Polygon(
            reverse_latitude_longitude(mapping(background)['coordinates']),
            fill_color='#fff', color='#000000', fill_opacity=0.8
        ).add_to(map_)
        
        color_bar(colorbar_ticks, fontsize='50pt', scale=5).add_to(map_)
        
    def add_html_styling_to_map(map_):
        folium.map.LayerControl('topright', collapsed=False).add_to(map_)
        
        color_bar(colorbar_ticks, fontsize='25pt', scale=2).add_to(map_)
        
        JsButton(
            title='<i class="fas fa-tags"></i>',
            function="""
                function(btn, map){
                    $('.percentage-label').toggle();
                }
            """
        ).add_to(map_)
        
    return regions, add_image_styling_to_map, add_html_styling_to_map
In [39]:
# Answers to how participants state a word should be pronounced.

answers = pandas.read_sql('''
SELECT
    prediction_quiz_id,
    user_lat, user_lng,
    question_text, answer_text
FROM       core_surveyresult as survey
INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id
INNER JOIN core_predictionquizresultquestionanswer as answer
    ON result.id = answer.prediction_quiz_id
WHERE
    survey.submitted_at >= '2017-09-17'
    AND result.submitted_at >= '2017-09-17'
''', db)

answers['question_text'] = answers['question_text'].map(lambda x: x.replace('"', '').replace('*', ''))
answers['answer_text'] = answers['answer_text'].map(lambda x: x[x.find('('):x.find(')')][1:])
In [40]:
maps = {'wijken': {}, 'gemeentes': {}}

for region_granularity in maps.keys():
    regions, add_image_styling_to_map, add_html_styling_to_map = get_regions_and_styling(
        region_granularity)
    
    region_name_property = {
        'gemeentes':'gemeente_naam',
        'wijken':'gemeente_en_wijk_naam'
    }[region_granularity]
    
    answers = inject_geojson_regions_into_dataframe(
        regions, answers,
        latitude_column='user_lat', longitude_column='user_lng',
        region_name_property=region_name_property,
        region_name_column='region'
    )
    
    for word_index, (word, word_rows) in enumerate(ProgressBar(answers.groupby('question_text'))):
        html_map = folium.Map(
            (word_rows['user_lat'].median(), word_rows['user_lng'].median()),
            tiles=None, zoom_start=9)
        
        def feature_groups(with_label=False):
            return pronunciation_heatmaps(
                regions, word_rows,
                region_name_property=region_name_property,
                region_name_column='region',
                group_column='answer_text',
                **({'label_font_size': 5} if with_label else {})
            ).items()
        
        for pronunciation, feature_group in feature_groups():
            image_map = folium.Map(
#                 (53.15936723072875 + 0.025, 5.618661585181898 + 0.15),
                (53.15936723072875 + 0.06, 5.618661585181898 + 0.15),
                tiles=None, zoom_start=11, zoom_control=False
            )
            add_image_styling_to_map(image_map)
            feature_group.add_to(image_map)
            save_map(
                image_map,
                f'../images/heatmaps/{region_granularity}_{word}_{pronunciation}.png',
                resolution=(2050, 2000),
                headless=True
            )
        for pronunciation, feature_group in feature_groups(with_label=True):
            feature_group.add_to(html_map)

        add_html_styling_to_map(html_map)
        html_map.save(f'../maps/heatmaps/{region_granularity}_{word}.html')
VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0…
VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0…
In [41]:
import glob
with open('../maps/heatmaps/index.html', 'w') as f:
    f.write('<html><head></head><body>' + 
        '<br/>\n'.join(
            '\t<a href="{}">{}<a>'.format(fn, fn[:-5].replace('_', ' '))
            for fn in sorted(
                glob.glob('../maps/heatmaps/*.html')
            )
            for fn in [fn[len('../maps/heatmaps/'):]]
    ) + "</body></html>")