{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Geographical pronunciation tables, simple example\n", "\n", "Simple example to create gabmap files for two words with few pronunciations an two regions." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append('..')\n", "\n", "import pandas\n", "import MySQLdb\n", "import json\n", "import copy\n", "\n", "db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')\n", "\n", "from shapely.geometry import shape, Point\n", "\n", "from gabmap import create_gabmap_dataframes\n", "\n", "from stimmen.geojson import merge_features" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "with open('../data/Friesland_wijken.geojson') as f:\n", " regions = json.load(f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load and simplify" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Answers to how participants state a word should be pronounced\n", "\n", "answers = pandas.read_sql('''\n", "SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text\n", "FROM core_surveyresult as survey\n", "INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n", "INNER JOIN core_predictionquizresultquestionanswer as answer\n", " ON result.id = answer.prediction_quiz_id\n", "''', db)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "regions_simple = merge_features(copy.deepcopy(regions),\n", " condition=lambda feature: feature['properties']['GM_NAAM'] == 'Heerenveen',\n", ")\n", "\n", "regions_simple = merge_features(\n", " regions_simple,\n", " condition=lambda feature: feature['properties']['GM_NAAM'] == 'Leeuwarden',\n", ")\n", "regions_simple['features'] = regions_simple['features'][-2:]\n", "\n", "regions_simple['features'][0]['properties']['name'] = 'Heerenveen'\n", "regions_simple['features'][1]['properties']['name'] = 'Leeuwarden'" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "answers_simple = answers[\n", " (answers['question_text'] == '\"blad\" (aan een boom)') |\n", " (answers['question_text'] == '\"vis\"')\n", "].copy()\n", "\n", "answers_simple['question_text'] = answers_simple['question_text'].map(\n", " lambda x: x.replace('\"', '').replace('*', ''))\n", "\n", "answers_simple['answer_text'] = answers_simple['answer_text'].map(\n", " lambda x: x[x.find('('):x.find(')')][1:])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Two words, boom and vis, with each 4 and 2 pronunciations" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
answer_text
question_text
blad (aan een boom)4
vis2
\n", "
" ], "text/plain": [ " answer_text\n", "question_text \n", "blad (aan een boom) 4\n", "vis 2" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "answers_simple.groupby('question_text').agg({'answer_text': lambda x: len(set(x))})" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "centroids_example, pronunciations_example, counts_example = create_gabmap_dataframes(\n", " regions_simple, answers_simple,\n", " latitude_column='user_lat', longitude_column='user_lng',\n", " word_column='question_text', pronunciation_column='answer_text',\n", " region_name_property='name'\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Resulting tables\n", "\n", "Stored as tab separated files for gabmap" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
latitudelongitude
#name
Heerenveen52.9960765.977925
Leeuwarden53.1699405.797613
\n", "
" ], "text/plain": [ " latitude longitude\n", "#name \n", "Heerenveen 52.996076 5.977925\n", "Leeuwarden 53.169940 5.797613" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "centroids_example" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
blad (aan een boom)vis
Heerenveenblet / blɑt / blɔd / blɛ:tfisk / fɪs
Leeuwardenblet / blɑt / blɔd / blɛ:tfisk / fɪs
\n", "
" ], "text/plain": [ " blad (aan een boom) vis\n", " \n", "Heerenveen blet / blɑt / blɔd / blɛ:t fisk / fɪs\n", "Leeuwarden blet / blɑt / blɔd / blɛ:t fisk / fɪs" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pronunciations_example" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
blad (aan een boom): bletblad (aan een boom): blɑtblad (aan een boom): blɔdblad (aan een boom): blɛ:tvis: fiskvis: fɪs
Heerenveen31.6546762.1582732.15827364.02877752.51798647.482014
Leeuwarden7.8651697.0224728.70786576.40449475.00000025.000000
\n", "
" ], "text/plain": [ " blad (aan een boom): blet blad (aan een boom): blɑt \\\n", " \n", "Heerenveen 31.654676 2.158273 \n", "Leeuwarden 7.865169 7.022472 \n", "\n", " blad (aan een boom): blɔd blad (aan een boom): blɛ:t vis: fisk \\\n", " \n", "Heerenveen 2.158273 64.028777 52.517986 \n", "Leeuwarden 8.707865 76.404494 75.000000 \n", "\n", " vis: fɪs \n", " \n", "Heerenveen 47.482014 \n", "Leeuwarden 25.000000 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "counts_example" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "pronunciations_example.to_csv('../data/Pronunciations_example.gabmap.tsv', sep='\\t')\n", "counts_example.to_csv('../data/Pronunciation_percentages_example.gabmap.tsv', sep='\\t')\n", "centroids_example.to_csv('../data/Centroids_example.gabmap.tsv', sep='\\t', columns=['longitude', 'latitude'])\n", "with open('../data/Gabmap_example.geojson', 'w') as f:\n", " json.dump(regions_simple, f, indent=1)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }