4.2 KiB
4.2 KiB
Geographical pronunciation tables¶
Creates gabmap files with region centroids, percentages and pronunciations for wijken in Friesland.
In [1]:
import sys sys.path.append('..') import pandas import MySQLdb import json import copy db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8') from shapely.geometry import shape, Point from gabmap import create_gabmap_dataframes
In [2]:
with open('../data/Friesland_wijken.geojson') as f: regions = json.load(f)
In [3]:
# Answers to how participants state a word should be pronounced answers = pandas.read_sql(''' SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text FROM core_surveyresult as survey INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id INNER JOIN core_predictionquizresultquestionanswer as answer ON result.id = answer.prediction_quiz_id ''', db)
In [4]:
zero_latlng_questions = { q for q, row in answers.groupby('question_text').agg('std').iterrows() if row['user_lat'] == 0 and row['user_lng'] == 0 } answers_filtered = answers[answers['question_text'].map(lambda x: x not in zero_latlng_questions)].copy()
In [10]:
answers_filtered['question_text'].unique()
Out[10]:
array(['gegaan', 'avond', 'heel', 'dag', 'bij (insect)', 'sprak (toe)', 'oog', 'armen (lichaamsdeel)', 'kaas', 'deurtje', 'koken', 'borst (lichaamsdeel)', 'vis', 'zaterdag', 'trein', 'geel', 'tand', 'gezet', 'blad (aan een boom)'], dtype=object)
In [6]:
answers_filtered['question_text'] = answers_filtered['question_text'].map( lambda x: x.replace('"', '').replace('*', '')) answers_filtered['answer_text'] = answers_filtered['answer_text'].map( lambda x: x[x.find('('):x.find(')')][1:])
In [8]:
centroids, pronunciations, counts = create_gabmap_dataframes( regions, answers_filtered, latitude_column='user_lat', longitude_column='user_lng', word_column='question_text', pronunciation_column='answer_text', region_name_property='gemeente_en_wijk_naam' )
In [14]:
pronunciations.to_csv('../data/Friesland_wijken_pronunciations.gabmap.tsv', sep='\t') counts.to_csv('../data/Friesland_wijken_pronunciation_percentages.gabmap.tsv', sep='\t') centroids.to_csv('../data/Friesland_wijken_centroids.gabmap.tsv', sep='\t', columns=['longitude', 'latitude'])