stimmenfryslan/notebooks/Gabmap Pronunciation Tables...

4.2 KiB

Geographical pronunciation tables

Creates gabmap files with region centroids, percentages and pronunciations for wijken in Friesland.

In [1]:
import sys
sys.path.append('..')

import pandas
import MySQLdb
import json
import copy

db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')

from shapely.geometry import shape, Point

from gabmap import create_gabmap_dataframes
In [2]:
with open('../data/Friesland_wijken.geojson') as f:
    regions = json.load(f)
In [3]:
# Answers to how participants state a word should be pronounced

answers = pandas.read_sql('''
SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text
FROM       core_surveyresult as survey
INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id
INNER JOIN core_predictionquizresultquestionanswer as answer
    ON result.id = answer.prediction_quiz_id
''', db)
In [4]:
zero_latlng_questions = {
    q
    for q, row in answers.groupby('question_text').agg('std').iterrows()
    if row['user_lat'] == 0 and row['user_lng'] == 0
}
answers_filtered = answers[answers['question_text'].map(lambda x: x not in zero_latlng_questions)].copy()
In [10]:
answers_filtered['question_text'].unique()
Out[10]:
array(['gegaan', 'avond', 'heel', 'dag', 'bij (insect)', 'sprak (toe)',
       'oog', 'armen (lichaamsdeel)', 'kaas', 'deurtje', 'koken',
       'borst (lichaamsdeel)', 'vis', 'zaterdag', 'trein', 'geel', 'tand',
       'gezet', 'blad (aan een boom)'], dtype=object)
In [6]:
answers_filtered['question_text'] = answers_filtered['question_text'].map(
    lambda x: x.replace('"', '').replace('*', ''))

answers_filtered['answer_text'] = answers_filtered['answer_text'].map(
    lambda x: x[x.find('('):x.find(')')][1:])
In [8]:
centroids, pronunciations, counts = create_gabmap_dataframes(
    regions, answers_filtered,
    latitude_column='user_lat', longitude_column='user_lng',
    word_column='question_text', pronunciation_column='answer_text',
    region_name_property='gemeente_en_wijk_naam'
)
In [14]:
pronunciations.to_csv('../data/Friesland_wijken_pronunciations.gabmap.tsv', sep='\t')
counts.to_csv('../data/Friesland_wijken_pronunciation_percentages.gabmap.tsv', sep='\t')
centroids.to_csv('../data/Friesland_wijken_centroids.gabmap.tsv', sep='\t', columns=['longitude', 'latitude'])