diff --git a/Readme.md b/Readme.md index 9a99340..b5c9c7f 100644 --- a/Readme.md +++ b/Readme.md @@ -15,7 +15,7 @@ using image processing. Results - * [notebooks/dialect_regions.geojson](notebooks/dialect_regions.geojson) + * [data/dialect_regions.geojson](data/dialect_regions.geojson) ### Group recordings to Frysian dialect regions diff --git a/notebooks/Dialect Regions from image.ipynb b/notebooks/Dialect Regions from image.ipynb index df5ef1e..3863d51 100644 --- a/notebooks/Dialect Regions from image.ipynb +++ b/notebooks/Dialect Regions from image.ipynb @@ -1,909 +1,32 @@ { "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Extract dialect regions from image\n", - "\n", - "Using image processing, extract region polygons for the dialects depicted in this image\n", - "\n", - "![dialect regions](../data/dialects.png)" - ] - }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "from math import floor\n", - "\n", "import json\n", "import folium\n", - "from folium_jsbutton import JsButton\n", - "from imageio import imread\n", + "import pandas\n", + "import MySQLdb\n", "\n", "from collections import Counter\n", "\n", "from math import sqrt\n", "import numpy as np\n", + "from scipy.misc import imread\n", + "from shapely.geometry import shape, Point\n", + "from vincenty import vincenty\n", "\n", "%matplotlib notebook\n", "from matplotlib import pyplot as plt\n", "\n", "from skimage.morphology import binary_closing\n", - "from skimage.measure import find_contours, label" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Input\n", + "from skimage.measure import find_contours, label\n", + "from jupyter_progressbar import ProgressBar\n", "\n", - "Load the image and determine the used colors." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "im = imread('../data/dialects.png')\n", - "\n", - "color_occurence = Counter(map(tuple, im.reshape(-1,3)))\n", - "\n", - "color_sorted_by_occurence = [c for c, _ in sorted(\n", - " color_occurence.items(),\n", - " key=lambda x: x[1],\n", - " reverse=True\n", - ")]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Relevant colors\n", - "\n", - "Show the most used colors and select those of the relevant (dialect) regions" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "application/javascript": [ - "/* Put everything inside the global mpl namespace */\n", - "window.mpl = {};\n", - "\n", - "\n", - "mpl.get_websocket_type = function() {\n", - " if (typeof(WebSocket) !== 'undefined') {\n", - " return WebSocket;\n", - " } else if (typeof(MozWebSocket) !== 'undefined') {\n", - " return MozWebSocket;\n", - " } else {\n", - " alert('Your browser does not have WebSocket support.' +\n", - " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", - " 'Firefox 4 and 5 are also supported but you ' +\n", - " 'have to enable WebSockets in about:config.');\n", - " };\n", - "}\n", - "\n", - "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", - " this.id = figure_id;\n", - "\n", - " this.ws = websocket;\n", - "\n", - " this.supports_binary = (this.ws.binaryType != undefined);\n", - "\n", - " if (!this.supports_binary) {\n", - " var warnings = document.getElementById(\"mpl-warnings\");\n", - " if (warnings) {\n", - " warnings.style.display = 'block';\n", - " warnings.textContent = (\n", - " \"This browser does not support binary websocket messages. \" +\n", - " \"Performance may be slow.\");\n", - " }\n", - " }\n", - "\n", - " this.imageObj = new Image();\n", - "\n", - " this.context = undefined;\n", - " this.message = undefined;\n", - " this.canvas = undefined;\n", - " this.rubberband_canvas = undefined;\n", - " this.rubberband_context = undefined;\n", - " this.format_dropdown = undefined;\n", - "\n", - " this.image_mode = 'full';\n", - "\n", - " this.root = $('
');\n", - " this._root_extra_style(this.root)\n", - " this.root.attr('style', 'display: inline-block');\n", - "\n", - " $(parent_element).append(this.root);\n", - "\n", - " this._init_header(this);\n", - " this._init_canvas(this);\n", - " this._init_toolbar(this);\n", - "\n", - " var fig = this;\n", - "\n", - " this.waiting = false;\n", - "\n", - " this.ws.onopen = function () {\n", - " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", - " fig.send_message(\"send_image_mode\", {});\n", - " if (mpl.ratio != 1) {\n", - " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", - " }\n", - " fig.send_message(\"refresh\", {});\n", - " }\n", - "\n", - " this.imageObj.onload = function() {\n", - " if (fig.image_mode == 'full') {\n", - " // Full images could contain transparency (where diff images\n", - " // almost always do), so we need to clear the canvas so that\n", - " // there is no ghosting.\n", - " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", - " }\n", - " fig.context.drawImage(fig.imageObj, 0, 0);\n", - " };\n", - "\n", - " this.imageObj.onunload = function() {\n", - " fig.ws.close();\n", - " }\n", - "\n", - " this.ws.onmessage = this._make_on_message_function(this);\n", - "\n", - " this.ondownload = ondownload;\n", - "}\n", - "\n", - "mpl.figure.prototype._init_header = function() {\n", - " var titlebar = $(\n", - " '
');\n", - " var titletext = $(\n", - " '
');\n", - " titlebar.append(titletext)\n", - " this.root.append(titlebar);\n", - " this.header = titletext[0];\n", - "}\n", - "\n", - "\n", - "\n", - "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", - "\n", - "}\n", - "\n", - "\n", - "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", - "\n", - "}\n", - "\n", - "mpl.figure.prototype._init_canvas = function() {\n", - " var fig = this;\n", - "\n", - " var canvas_div = $('
');\n", - "\n", - " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", - "\n", - " function canvas_keyboard_event(event) {\n", - " return fig.key_event(event, event['data']);\n", - " }\n", - "\n", - " canvas_div.keydown('key_press', canvas_keyboard_event);\n", - " canvas_div.keyup('key_release', canvas_keyboard_event);\n", - " this.canvas_div = canvas_div\n", - " this._canvas_extra_style(canvas_div)\n", - " this.root.append(canvas_div);\n", - "\n", - " var canvas = $('');\n", - " canvas.addClass('mpl-canvas');\n", - " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", - "\n", - " this.canvas = canvas[0];\n", - " this.context = canvas[0].getContext(\"2d\");\n", - "\n", - " var backingStore = this.context.backingStorePixelRatio ||\n", - "\tthis.context.webkitBackingStorePixelRatio ||\n", - "\tthis.context.mozBackingStorePixelRatio ||\n", - "\tthis.context.msBackingStorePixelRatio ||\n", - "\tthis.context.oBackingStorePixelRatio ||\n", - "\tthis.context.backingStorePixelRatio || 1;\n", - "\n", - " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", - "\n", - " var rubberband = $('');\n", - " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", - "\n", - " var pass_mouse_events = true;\n", - "\n", - " canvas_div.resizable({\n", - " start: function(event, ui) {\n", - " pass_mouse_events = false;\n", - " },\n", - " resize: function(event, ui) {\n", - " fig.request_resize(ui.size.width, ui.size.height);\n", - " },\n", - " stop: function(event, ui) {\n", - " pass_mouse_events = true;\n", - " fig.request_resize(ui.size.width, ui.size.height);\n", - " },\n", - " });\n", - "\n", - " function mouse_event_fn(event) {\n", - " if (pass_mouse_events)\n", - " return fig.mouse_event(event, event['data']);\n", - " }\n", - "\n", - " rubberband.mousedown('button_press', mouse_event_fn);\n", - " rubberband.mouseup('button_release', mouse_event_fn);\n", - " // Throttle sequential mouse events to 1 every 20ms.\n", - " rubberband.mousemove('motion_notify', mouse_event_fn);\n", - "\n", - " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", - " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", - "\n", - " canvas_div.on(\"wheel\", function (event) {\n", - " event = event.originalEvent;\n", - " event['data'] = 'scroll'\n", - " if (event.deltaY < 0) {\n", - " event.step = 1;\n", - " } else {\n", - " event.step = -1;\n", - " }\n", - " mouse_event_fn(event);\n", - " });\n", - "\n", - " canvas_div.append(canvas);\n", - " canvas_div.append(rubberband);\n", - "\n", - " this.rubberband = rubberband;\n", - " this.rubberband_canvas = rubberband[0];\n", - " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", - " this.rubberband_context.strokeStyle = \"#000000\";\n", - "\n", - " this._resize_canvas = function(width, height) {\n", - " // Keep the size of the canvas, canvas container, and rubber band\n", - " // canvas in synch.\n", - " canvas_div.css('width', width)\n", - " canvas_div.css('height', height)\n", - "\n", - " canvas.attr('width', width * mpl.ratio);\n", - " canvas.attr('height', height * mpl.ratio);\n", - " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", - "\n", - " rubberband.attr('width', width);\n", - " rubberband.attr('height', height);\n", - " }\n", - "\n", - " // Set the figure to an initial 600x600px, this will subsequently be updated\n", - " // upon first draw.\n", - " this._resize_canvas(600, 600);\n", - "\n", - " // Disable right mouse context menu.\n", - " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", - " return false;\n", - " });\n", - "\n", - " function set_focus () {\n", - " canvas.focus();\n", - " canvas_div.focus();\n", - " }\n", - "\n", - " window.setTimeout(set_focus, 100);\n", - "}\n", - "\n", - "mpl.figure.prototype._init_toolbar = function() {\n", - " var fig = this;\n", - "\n", - " var nav_element = $('
')\n", - " nav_element.attr('style', 'width: 100%');\n", - " this.root.append(nav_element);\n", - "\n", - " // Define a callback function for later on.\n", - " function toolbar_event(event) {\n", - " return fig.toolbar_button_onclick(event['data']);\n", - " }\n", - " function toolbar_mouse_event(event) {\n", - " return fig.toolbar_button_onmouseover(event['data']);\n", - " }\n", - "\n", - " for(var toolbar_ind in mpl.toolbar_items) {\n", - " var name = mpl.toolbar_items[toolbar_ind][0];\n", - " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", - " var image = mpl.toolbar_items[toolbar_ind][2];\n", - " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", - "\n", - " if (!name) {\n", - " // put a spacer in here.\n", - " continue;\n", - " }\n", - " var button = $('');\n", - " button.click(method_name, toolbar_event);\n", - " button.mouseover(tooltip, toolbar_mouse_event);\n", - " nav_element.append(button);\n", - " }\n", - "\n", - " // Add the status bar.\n", - " var status_bar = $('');\n", - " nav_element.append(status_bar);\n", - " this.message = status_bar[0];\n", - "\n", - " // Add the close button to the window.\n", - " var buttongrp = $('
');\n", - " var button = $('');\n", - " button.click(function (evt) { fig.handle_close(fig, {}); } );\n", - " button.mouseover('Stop Interaction', toolbar_mouse_event);\n", - " buttongrp.append(button);\n", - " var titlebar = this.root.find($('.ui-dialog-titlebar'));\n", - " titlebar.prepend(buttongrp);\n", - "}\n", - "\n", - "mpl.figure.prototype._root_extra_style = function(el){\n", - " var fig = this\n", - " el.on(\"remove\", function(){\n", - "\tfig.close_ws(fig, {});\n", - " });\n", - "}\n", - "\n", - "mpl.figure.prototype._canvas_extra_style = function(el){\n", - " // this is important to make the div 'focusable\n", - " el.attr('tabindex', 0)\n", - " // reach out to IPython and tell the keyboard manager to turn it's self\n", - " // off when our div gets focus\n", - "\n", - " // location in version 3\n", - " if (IPython.notebook.keyboard_manager) {\n", - " IPython.notebook.keyboard_manager.register_events(el);\n", - " }\n", - " else {\n", - " // location in version 2\n", - " IPython.keyboard_manager.register_events(el);\n", - " }\n", - "\n", - "}\n", - "\n", - "mpl.figure.prototype._key_event_extra = function(event, name) {\n", - " var manager = IPython.notebook.keyboard_manager;\n", - " if (!manager)\n", - " manager = IPython.keyboard_manager;\n", - "\n", - " // Check for shift+enter\n", - " if (event.shiftKey && event.which == 13) {\n", - " this.canvas_div.blur();\n", - " event.shiftKey = false;\n", - " // Send a \"J\" for go to next cell\n", - " event.which = 74;\n", - " event.keyCode = 74;\n", - " manager.command_mode();\n", - " manager.handle_keydown(event);\n", - " }\n", - "}\n", - "\n", - "mpl.figure.prototype.handle_save = function(fig, msg) {\n", - " fig.ondownload(fig, null);\n", - "}\n", - "\n", - "\n", - "mpl.find_output_cell = function(html_output) {\n", - " // Return the cell and output element which can be found *uniquely* in the notebook.\n", - " // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n", - " // IPython event is triggered only after the cells have been serialised, which for\n", - " // our purposes (turning an active figure into a static one), is too late.\n", - " var cells = IPython.notebook.get_cells();\n", - " var ncells = cells.length;\n", - " for (var i=0; i= 3 moved mimebundle to data attribute of output\n", - " data = data.data;\n", - " }\n", - " if (data['text/html'] == html_output) {\n", - " return [cell, data, j];\n", - " }\n", - " }\n", - " }\n", - " }\n", - "}\n", - "\n", - "// Register the function which deals with the matplotlib target/channel.\n", - "// The kernel may be null if the page has been refreshed.\n", - "if (IPython.notebook.kernel != null) {\n", - " IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n", - "}\n" - ], "text/plain": [ "" ] @@ -1780,24 +137,72 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" ] }, "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "plt.rcParams['figure.figsize'] = (9.5, 3)\n", + "ax0, ax1, ax2 = plt.subplots(1,3)[1]\n", + "ax0.imshow(im)\n", + "ax1.imshow(composed_49.astype(int))\n", + "ax2.imshow(composed_4.astype(int))\n", + "ax0.set_xticks([]); ax0.set_yticks([])\n", + "ax1.set_xticks([]); ax1.set_yticks([])\n", + "ax2.set_xticks([]); ax2.set_yticks([])\n", + "plt.tight_layout()\n", + "\n", + "stavoren_to_east_coords = [5.354483127593994, 6.330699920654297]\n", + "north_to_south_coords = [53.41434089638827, 52.832432288794514]\n", + "\n", + "stavoren_to_east_pixels = [295, 717]\n", + "north_to_south_pixels = [99, 525]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "axes = plt.subplots(2,2)[1].ravel()\n", "contours = []\n", - "for axis, c in zip(axes, np.array(legend_colors)):\n", + "for axis, c in zip(axes, relevant_colors):\n", " bi = (im[:-100] == c[None,None]).min(axis=2)\n", " bi = binary_closing(bi, np.ones((5,5)))\n", " \n", " labels = label(bi, background=False)\n", + " \n", " contours.append(find_contours(bi, 0.5))\n", "\n", " axis.imshow(bi)\n", @@ -1807,23 +212,21 @@ "plt.tight_layout()" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "translate pixel coordinates to latitude - longitudes." - ] - }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "(y0, x1), (y1, x0) = bounds\n", + "a0, b0 = stavoren_to_east_coords\n", + "c0, d0 = stavoren_to_east_pixels\n", "\n", - "scale_x = lambda x: x0 + (x / im.shape[1]) * (x1 - x0)\n", - "scale_y = lambda y: y0 + (y / im.shape[0]) * (y1 - y0)\n", + "scale_x = lambda x: (x - c0) / (d0 - c0) * (b0 - a0) + a0\n", + "\n", + "a1, b1 = north_to_south_coords\n", + "c1, d1 = north_to_south_pixels\n", + "\n", + "scale_y = lambda x: (x - c1) / (d1 - c1) * (b1 - a1) + a1\n", "\n", "contours_scaled = [\n", " list(zip(scale_x(c[0][:, 1]), scale_y(c[0][:, 0])))\n", @@ -1831,16 +234,9 @@ "]" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Result" - ] - }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 14, "metadata": { "scrolled": true }, @@ -1861,32 +257,32 @@ " ]\n", "})\n", "\n", - "with open('../data/frysian_dialect_regions.geojson', 'w') as f:\n", + "with open('dialect_regions.geojson', 'w') as f:\n", " f.write(geojson)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
" + "
" ], "text/plain": [ - "" + "" ] }, - "execution_count": 8, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m = folium.Map(\n", - " location=center,\n", + " location=[sum(north_to_south_coords) / 2, sum(stavoren_to_east_coords) / 2],\n", " tiles='Mapbox Bright',\n", " zoom_start=9\n", ")\n", @@ -1894,6 +290,1294 @@ "\n", "m" ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "shapes = {\n", + " dialect: shape({\"type\": \"Polygon\", \"coordinates\": [list(map(list, contour))]})\n", + " for contour, dialect in zip(contours_scaled, regions)\n", + "}\n", + "\n", + "def regions_for(coordinate):\n", + " regions = {\n", + " region_name\n", + " for region_name, shape in shapes.items()\n", + " if shape.contains(Point(*coordinate))\n", + " }\n", + " return regions\n", + "\n", + "def distance(shape, longitude, latitude):\n", + " ext = shape.exterior\n", + " p = ext.interpolate(ext.project(Point(longitude, latitude)))\n", + " return vincenty((latitude, longitude), (p.y, p.x))" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "# SELECT user_lat, user_lng, question_text, answer_text\n", + "picture_games = pandas.read_sql('''\n", + "SELECT language.name as language, item.name as picture,\n", + " survey.user_lat as latitude, survey.user_lng as longitude,\n", + " survey.area_name as area, survey.country_name as country,\n", + " result.recording as filename,\n", + " result.submitted_at as date\n", + "FROM core_surveyresult as survey\n", + "INNER JOIN core_picturegameresult as result ON survey.id = result.survey_result_id\n", + "INNER JOIN core_language as language ON language.id = result.language_id\n", + "INNER JOIN core_picturegameitem as item\n", + " ON result.picture_game_item_id = item.id\n", + "''', db)\n", + "# picture_games['filename'] = [filename.split('/')[-1] for filename in picture_games['filename']]\n", + "picture_games.set_index('filename', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='0s passed', placeholder='0…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "region_per_picture_game = [\n", + " {\n", + " 'dialects': [\n", + " {\n", + " 'dialect': dialect,\n", + " 'boundary_distance': distance(shapes[dialect], longitude, latitude),\n", + " }\n", + " for dialect in regions_for((longitude, latitude))\n", + " ],\n", + " 'filename': filename,\n", + " }\n", + " for filename, (latitude, longitude) in ProgressBar(\n", + " picture_games[['latitude', 'longitude']].iterrows(),\n", + " size=len(picture_games)\n", + " )\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({1: 37681, 0: 6790, 2: 1235})" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Counter(len(x['dialects']) for x in region_per_picture_game)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
filenamedialectboundary_distance
0picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk0.783060
1picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk5.049926
2picture_game_recordings/Nederlands/2017-07-05/...Waldfrysk10.076206
3picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk6.822016
4picture_game_recordings/Frysk/2017-07-05/pg_sc...Noardhoeksk0.697901
5picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk11.483169
6picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk7.789364
7picture_game_recordings/English/2017-07-05/pg_...Waldfrysk7.743475
8picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk9.636752
9picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk9.141453
10picture_game_recordings/Nederlands/2017-07-05/...Noardhoeksk0.779276
11picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk10.876511
12picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk4.420720
13picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk5.376456
14picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk4.420720
15picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk2.249444
16picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk9.085181
17picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk10.760419
18picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk10.392223
19picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk2.104440
20picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk10.415752
21picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk8.863952
22picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk10.876511
23picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk10.912091
24picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk2.304845
25picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk10.779915
26picture_game_recordings/Nederlands/2017-07-05/...Waldfrysk9.627570
27picture_game_recordings/Nederlands/2017-07-05/...Waldfrysk4.783550
28picture_game_recordings/Nederlands/2017-07-05/...Waldfrysk4.048083
29picture_game_recordings/Frysk/2017-07-05/pg_sc...Waldfrysk10.982241
............
37651picture_game_recordings/Frysk/2018-03-11/pg_gi...Noardhoeksk5.089732
37652picture_game_recordings/Frysk/2018-04-07/pg_gi...Waldfrysk7.949465
37653picture_game_recordings/Frysk/2018-04-07/pg_gi...Waldfrysk7.585835
37654picture_game_recordings/Stadsfrys/2018-04-08/p...Klaaifrysk8.087441
37655picture_game_recordings/Nederlands/2018-04-09/...Waldfrysk4.533521
37656picture_game_recordings/Frysk/2018-04-11/pg_gi...Klaaifrysk8.158308
37657picture_game_recordings/Frysk/2018-04-20/pg_gi...Noardhoeksk5.234811
37658picture_game_recordings/Frysk/2018-04-21/pg_gi...Klaaifrysk7.336785
37659picture_game_recordings/Frysk/2018-04-21/pg_gi...Waldfrysk1.037296
37660picture_game_recordings/Frysk/2018-04-21/pg_gi...Sudwesthoeksk0.795104
37661picture_game_recordings/Frysk/2018-04-21/pg_gi...Klaaifrysk6.146968
37662picture_game_recordings/Frysk/2018-04-21/pg_gi...Waldfrysk9.839955
37663picture_game_recordings/Frysk/2018-04-22/pg_gi...Waldfrysk2.581705
37664picture_game_recordings/Frysk/2018-04-22/pg_gi...Sudwesthoeksk0.155174
37665picture_game_recordings/Stadsfrys/2018-04-22/p...Klaaifrysk4.157231
37666picture_game_recordings/Frysk/2018-04-22/pg_gi...Klaaifrysk0.485218
37667picture_game_recordings/Stadsfrys/2018-04-24/p...Klaaifrysk7.866078
37668picture_game_recordings/Frysk/2018-04-24/pg_gi...Klaaifrysk7.727236
37669picture_game_recordings/Frysk/2018-04-25/pg_gi...Waldfrysk6.965155
37670picture_game_recordings/Frysk/2018-04-28/pg_gi...Klaaifrysk4.287499
37671picture_game_recordings/Frysk/2018-04-28/pg_gi...Klaaifrysk4.138920
37672picture_game_recordings/Frysk/2018-04-28/pg_gi...Waldfrysk5.062945
37673picture_game_recordings/Frysk/2018-05-02/pg_gi...Sudwesthoeksk0.586068
37674picture_game_recordings/Frysk/2018-05-12/pg_gi...Klaaifrysk9.143622
37675picture_game_recordings/Frysk/2018-05-14/pg_gi...Noardhoeksk0.509192
37676picture_game_recordings/Frysk/2018-05-20/pg_gi...Noardhoeksk0.867314
37677picture_game_recordings/Frysk/2018-06-25/pg_gi...Noardhoeksk1.770019
37678picture_game_recordings/Frysk/2018-07-02/pg_gi...Waldfrysk5.647593
37679picture_game_recordings/Frysk/2018-07-05/pg_gi...Klaaifrysk11.202813
37680picture_game_recordings/Frysk/2018-08-12/pg_gi...Waldfrysk2.222390
\n", + "

37681 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " filename dialect \\\n", + "0 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "1 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "2 picture_game_recordings/Nederlands/2017-07-05/... Waldfrysk \n", + "3 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "4 picture_game_recordings/Frysk/2017-07-05/pg_sc... Noardhoeksk \n", + "5 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "6 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "7 picture_game_recordings/English/2017-07-05/pg_... Waldfrysk \n", + "8 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "9 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "10 picture_game_recordings/Nederlands/2017-07-05/... Noardhoeksk \n", + "11 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "12 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "13 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "14 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "15 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "16 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "17 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "18 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "19 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "20 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "21 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "22 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "23 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "24 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "25 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "26 picture_game_recordings/Nederlands/2017-07-05/... Waldfrysk \n", + "27 picture_game_recordings/Nederlands/2017-07-05/... Waldfrysk \n", + "28 picture_game_recordings/Nederlands/2017-07-05/... Waldfrysk \n", + "29 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n", + "... ... ... \n", + "37651 picture_game_recordings/Frysk/2018-03-11/pg_gi... Noardhoeksk \n", + "37652 picture_game_recordings/Frysk/2018-04-07/pg_gi... Waldfrysk \n", + "37653 picture_game_recordings/Frysk/2018-04-07/pg_gi... Waldfrysk \n", + "37654 picture_game_recordings/Stadsfrys/2018-04-08/p... Klaaifrysk \n", + "37655 picture_game_recordings/Nederlands/2018-04-09/... Waldfrysk \n", + "37656 picture_game_recordings/Frysk/2018-04-11/pg_gi... Klaaifrysk \n", + "37657 picture_game_recordings/Frysk/2018-04-20/pg_gi... Noardhoeksk \n", + "37658 picture_game_recordings/Frysk/2018-04-21/pg_gi... Klaaifrysk \n", + "37659 picture_game_recordings/Frysk/2018-04-21/pg_gi... Waldfrysk \n", + "37660 picture_game_recordings/Frysk/2018-04-21/pg_gi... Sudwesthoeksk \n", + "37661 picture_game_recordings/Frysk/2018-04-21/pg_gi... Klaaifrysk \n", + "37662 picture_game_recordings/Frysk/2018-04-21/pg_gi... Waldfrysk \n", + "37663 picture_game_recordings/Frysk/2018-04-22/pg_gi... Waldfrysk \n", + "37664 picture_game_recordings/Frysk/2018-04-22/pg_gi... Sudwesthoeksk \n", + "37665 picture_game_recordings/Stadsfrys/2018-04-22/p... Klaaifrysk \n", + "37666 picture_game_recordings/Frysk/2018-04-22/pg_gi... Klaaifrysk \n", + "37667 picture_game_recordings/Stadsfrys/2018-04-24/p... Klaaifrysk \n", + "37668 picture_game_recordings/Frysk/2018-04-24/pg_gi... Klaaifrysk \n", + "37669 picture_game_recordings/Frysk/2018-04-25/pg_gi... Waldfrysk \n", + "37670 picture_game_recordings/Frysk/2018-04-28/pg_gi... Klaaifrysk \n", + "37671 picture_game_recordings/Frysk/2018-04-28/pg_gi... Klaaifrysk \n", + "37672 picture_game_recordings/Frysk/2018-04-28/pg_gi... Waldfrysk \n", + "37673 picture_game_recordings/Frysk/2018-05-02/pg_gi... Sudwesthoeksk \n", + "37674 picture_game_recordings/Frysk/2018-05-12/pg_gi... Klaaifrysk \n", + "37675 picture_game_recordings/Frysk/2018-05-14/pg_gi... Noardhoeksk \n", + "37676 picture_game_recordings/Frysk/2018-05-20/pg_gi... Noardhoeksk \n", + "37677 picture_game_recordings/Frysk/2018-06-25/pg_gi... Noardhoeksk \n", + "37678 picture_game_recordings/Frysk/2018-07-02/pg_gi... Waldfrysk \n", + "37679 picture_game_recordings/Frysk/2018-07-05/pg_gi... Klaaifrysk \n", + "37680 picture_game_recordings/Frysk/2018-08-12/pg_gi... Waldfrysk \n", + "\n", + " boundary_distance \n", + "0 0.783060 \n", + "1 5.049926 \n", + "2 10.076206 \n", + "3 6.822016 \n", + "4 0.697901 \n", + "5 11.483169 \n", + "6 7.789364 \n", + "7 7.743475 \n", + "8 9.636752 \n", + "9 9.141453 \n", + "10 0.779276 \n", + "11 10.876511 \n", + "12 4.420720 \n", + "13 5.376456 \n", + "14 4.420720 \n", + "15 2.249444 \n", + "16 9.085181 \n", + "17 10.760419 \n", + "18 10.392223 \n", + "19 2.104440 \n", + "20 10.415752 \n", + "21 8.863952 \n", + "22 10.876511 \n", + "23 10.912091 \n", + "24 2.304845 \n", + "25 10.779915 \n", + "26 9.627570 \n", + "27 4.783550 \n", + "28 4.048083 \n", + "29 10.982241 \n", + "... ... \n", + "37651 5.089732 \n", + "37652 7.949465 \n", + "37653 7.585835 \n", + "37654 8.087441 \n", + "37655 4.533521 \n", + "37656 8.158308 \n", + "37657 5.234811 \n", + "37658 7.336785 \n", + "37659 1.037296 \n", + "37660 0.795104 \n", + "37661 6.146968 \n", + "37662 9.839955 \n", + "37663 2.581705 \n", + "37664 0.155174 \n", + "37665 4.157231 \n", + "37666 0.485218 \n", + "37667 7.866078 \n", + "37668 7.727236 \n", + "37669 6.965155 \n", + "37670 4.287499 \n", + "37671 4.138920 \n", + "37672 5.062945 \n", + "37673 0.586068 \n", + "37674 9.143622 \n", + "37675 0.509192 \n", + "37676 0.867314 \n", + "37677 1.770019 \n", + "37678 5.647593 \n", + "37679 11.202813 \n", + "37680 2.222390 \n", + "\n", + "[37681 rows x 3 columns]" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pandas.DataFrame([\n", + " [r['filename'], r['dialects'][0]['dialect'], r['dialects'][0]['boundary_distance']]\n", + " for r in region_per_picture_game\n", + " if len(r['dialects']) == 1\n", + "], columns = ['filename', 'dialect', 'boundary_distance'])\n", + "\n", + "df.to_excel('picture_game_recordings_by_dialect.xlsx')\n", + "df.to_csv('picture_game_recordings_by_dialect.csv')\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "# SELECT user_lat, user_lng, question_text, answer_text\n", + "free_speech_games = pandas.read_sql('''\n", + "SELECT language.name as language,\n", + " survey.user_lat as latitude, survey.user_lng as longitude,\n", + " survey.area_name as area, survey.country_name as country,\n", + " result.recording as filename,\n", + " result.submitted_at as date\n", + "FROM core_surveyresult as survey\n", + "INNER JOIN core_freespeechresult as result ON survey.id = result.survey_result_id\n", + "INNER JOIN core_language as language ON language.id = result.language_id\n", + "''', db)\n", + "# free_speech_games['filename'] = [filename.split('/')[-1] for filename in games['filename']]\n", + "free_speech_games.set_index('filename', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='0s passed', placeholder='0…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "region_per_free_speech = [\n", + " {\n", + " 'dialects': [\n", + " {\n", + " 'dialect': dialect,\n", + " 'boundary_distance': distance(shapes[dialect], longitude, latitude),\n", + " }\n", + " for dialect in regions_for((longitude, latitude))\n", + " ],\n", + " 'filename': filename,\n", + " }\n", + " for filename, (latitude, longitude) in ProgressBar(\n", + " free_speech_games[['latitude', 'longitude']].iterrows(),\n", + " size=len(free_speech_games)\n", + " )\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({1: 37681, 0: 6790, 2: 1235})" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Counter(len(x['dialects']) for x in region_per_free_speech)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
filenamedialectboundary_distance
0free_speech_recordings/Frysk/2017-07-05/fs_wha...Waldfrysk2.285980
1free_speech_recordings/Nederlands/2017-07-05/f...Waldfrysk3.524925
2free_speech_recordings/Frysk/2017-07-05/fs_wha...Waldfrysk5.376456
3free_speech_recordings/Frysk/2017-07-05/fs_wha...Waldfrysk10.415752
4free_speech_recordings/Frysk/2017-07-05/fs_jok...Waldfrysk10.760419
5free_speech_recordings/Frysk/2017-07-05/fs_jok...Waldfrysk10.876511
6free_speech_recordings/Frysk/2017-07-05/fs_jok...Waldfrysk10.980303
7free_speech_recordings/Frysk/2017-07-05/fs_you...Waldfrysk10.876511
8free_speech_recordings/Frysk/2017-07-05/fs_jok...Waldfrysk10.980303
9free_speech_recordings/Frysk/2017-07-05/fs_wha...Waldfrysk10.760419
10free_speech_recordings/Frysk/2017-07-05/fs_wha...Waldfrysk10.980303
11free_speech_recordings/Frysk/2017-07-05/fs_gro...Waldfrysk10.980303
12free_speech_recordings/English/2017-07-05/fs_w...Waldfrysk5.135179
13free_speech_recordings/Frysk/2017-07-05/fs_wha...Waldfrysk2.061136
14free_speech_recordings/Nederlands/2017-07-05/f...Waldfrysk2.598514
15free_speech_recordings/Frysk/2017-07-05/fs_wha...Waldfrysk10.392223
16free_speech_recordings/Nederlands/2017-07-05/f...Waldfrysk2.598514
17free_speech_recordings/Frysk/2017-07-05/fs_wha...Waldfrysk11.145294
18free_speech_recordings/Frysk/2017-07-05/fs_gro...Waldfrysk2.158520
19free_speech_recordings/Frysk/2017-07-05/fs_jok...Waldfrysk11.145294
20free_speech_recordings/English/2017-07-05/fs_w...Waldfrysk2.158520
21free_speech_recordings/Frysk/2017-07-05/fs_you...Waldfrysk11.145294
22free_speech_recordings/Frysk/2017-07-05/fs_wha...Waldfrysk4.081455
23free_speech_recordings/Nederlands/2017-07-05/f...Waldfrysk5.579423
24free_speech_recordings/Frysk/2017-07-05/fs_jok...Waldfrysk1.965881
25free_speech_recordings/Nederlands/2017-07-05/f...Waldfrysk5.579423
26free_speech_recordings/Frysk/2017-07-05/fs_gro...Waldfrysk2.042788
27free_speech_recordings/Frysk/2017-07-05/fs_rea...Waldfrysk5.046413
28free_speech_recordings/Frysk/2017-07-05/fs_wha...Waldfrysk1.965881
29free_speech_recordings/Nederlands/2017-07-05/f...Waldfrysk4.901555
............
282free_speech_recordings/Frysk/2017-11-29/fs_fav...Waldfrysk4.397417
283free_speech_recordings/Frysk/2017-12-01/fs_wha...Waldfrysk10.512088
284free_speech_recordings/Frysk/2017-12-08/fs_wha...Waldfrysk6.444389
285free_speech_recordings/Frysk/2017-12-08/fs_wha...Waldfrysk6.665675
286free_speech_recordings/Frysk/2017-12-08/fs_wha...Waldfrysk3.411324
287free_speech_recordings/Frysk/2017-12-08/fs_wha...Waldfrysk3.889477
288free_speech_recordings/Frysk/2017-12-08/fs_jok...Waldfrysk3.889477
289free_speech_recordings/Frysk/2017-12-09/fs_gro...Waldfrysk8.109339
290free_speech_recordings/Frysk/2017-12-11/fs_wha...Klaaifrysk0.188073
291free_speech_recordings/Nederlands/2017-12-12/f...Klaaifrysk5.503341
292free_speech_recordings/Frysk/2017-12-12/fs_wha...Klaaifrysk5.503341
293free_speech_recordings/Frysk/2017-12-13/fs_jok...Waldfrysk8.332972
294free_speech_recordings/Frysk/2017-12-14/fs_wha...Klaaifrysk15.105703
295free_speech_recordings/Frysk/2017-12-14/fs_you...Klaaifrysk14.787898
296free_speech_recordings/Frysk/2017-12-14/fs_you...Klaaifrysk14.787898
297free_speech_recordings/Frysk/2017-12-14/fs_jok...Klaaifrysk14.066749
298free_speech_recordings/Frysk/2017-12-14/fs_jok...Klaaifrysk14.066749
299free_speech_recordings/Frysk/2017-12-19/fs_jok...Waldfrysk9.411845
300free_speech_recordings/Frysk/2017-12-19/fs_jok...Waldfrysk9.579569
301free_speech_recordings/Frysk/2017-12-20/fs_gro...Waldfrysk1.346010
302free_speech_recordings/Frysk/2017-12-20/fs_wha...Waldfrysk1.346010
303free_speech_recordings/Frysk/2018-01-20/fs_wha...Waldfrysk8.009320
304free_speech_recordings/Nederlands/2018-04-08/f...Klaaifrysk5.481488
305free_speech_recordings/Stadsfrys/2018-04-08/fs...Klaaifrysk5.481488
306free_speech_recordings/Frysk/2018-04-21/fs_gro...Waldfrysk8.233029
307free_speech_recordings/Frysk/2018-04-22/fs_wha...Waldfrysk4.025229
308free_speech_recordings/Frysk/2018-04-25/fs_wha...Klaaifrysk8.213621
309free_speech_recordings/Frysk/2018-06-24/fs_wha...Noardhoeksk2.651287
310free_speech_recordings/Frysk/2018-06-30/fs_wha...Waldfrysk7.716322
311free_speech_recordings/Frysk/2018-08-20/fs_wha...Waldfrysk5.674670
\n", + "

312 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " filename dialect \\\n", + "0 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n", + "1 free_speech_recordings/Nederlands/2017-07-05/f... Waldfrysk \n", + "2 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n", + "3 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n", + "4 free_speech_recordings/Frysk/2017-07-05/fs_jok... Waldfrysk \n", + "5 free_speech_recordings/Frysk/2017-07-05/fs_jok... Waldfrysk \n", + "6 free_speech_recordings/Frysk/2017-07-05/fs_jok... Waldfrysk \n", + "7 free_speech_recordings/Frysk/2017-07-05/fs_you... Waldfrysk \n", + "8 free_speech_recordings/Frysk/2017-07-05/fs_jok... Waldfrysk \n", + "9 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n", + "10 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n", + "11 free_speech_recordings/Frysk/2017-07-05/fs_gro... Waldfrysk \n", + "12 free_speech_recordings/English/2017-07-05/fs_w... Waldfrysk \n", + "13 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n", + "14 free_speech_recordings/Nederlands/2017-07-05/f... Waldfrysk \n", + "15 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n", + "16 free_speech_recordings/Nederlands/2017-07-05/f... Waldfrysk \n", + "17 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n", + "18 free_speech_recordings/Frysk/2017-07-05/fs_gro... Waldfrysk \n", + "19 free_speech_recordings/Frysk/2017-07-05/fs_jok... Waldfrysk \n", + "20 free_speech_recordings/English/2017-07-05/fs_w... Waldfrysk \n", + "21 free_speech_recordings/Frysk/2017-07-05/fs_you... Waldfrysk \n", + "22 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n", + "23 free_speech_recordings/Nederlands/2017-07-05/f... Waldfrysk \n", + "24 free_speech_recordings/Frysk/2017-07-05/fs_jok... Waldfrysk \n", + "25 free_speech_recordings/Nederlands/2017-07-05/f... Waldfrysk \n", + "26 free_speech_recordings/Frysk/2017-07-05/fs_gro... Waldfrysk \n", + "27 free_speech_recordings/Frysk/2017-07-05/fs_rea... Waldfrysk \n", + "28 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n", + "29 free_speech_recordings/Nederlands/2017-07-05/f... Waldfrysk \n", + ".. ... ... \n", + "282 free_speech_recordings/Frysk/2017-11-29/fs_fav... Waldfrysk \n", + "283 free_speech_recordings/Frysk/2017-12-01/fs_wha... Waldfrysk \n", + "284 free_speech_recordings/Frysk/2017-12-08/fs_wha... Waldfrysk \n", + "285 free_speech_recordings/Frysk/2017-12-08/fs_wha... Waldfrysk \n", + "286 free_speech_recordings/Frysk/2017-12-08/fs_wha... Waldfrysk \n", + "287 free_speech_recordings/Frysk/2017-12-08/fs_wha... Waldfrysk \n", + "288 free_speech_recordings/Frysk/2017-12-08/fs_jok... Waldfrysk \n", + "289 free_speech_recordings/Frysk/2017-12-09/fs_gro... Waldfrysk \n", + "290 free_speech_recordings/Frysk/2017-12-11/fs_wha... Klaaifrysk \n", + "291 free_speech_recordings/Nederlands/2017-12-12/f... Klaaifrysk \n", + "292 free_speech_recordings/Frysk/2017-12-12/fs_wha... Klaaifrysk \n", + "293 free_speech_recordings/Frysk/2017-12-13/fs_jok... Waldfrysk \n", + "294 free_speech_recordings/Frysk/2017-12-14/fs_wha... Klaaifrysk \n", + "295 free_speech_recordings/Frysk/2017-12-14/fs_you... Klaaifrysk \n", + "296 free_speech_recordings/Frysk/2017-12-14/fs_you... Klaaifrysk \n", + "297 free_speech_recordings/Frysk/2017-12-14/fs_jok... Klaaifrysk \n", + "298 free_speech_recordings/Frysk/2017-12-14/fs_jok... Klaaifrysk \n", + "299 free_speech_recordings/Frysk/2017-12-19/fs_jok... Waldfrysk \n", + "300 free_speech_recordings/Frysk/2017-12-19/fs_jok... Waldfrysk \n", + "301 free_speech_recordings/Frysk/2017-12-20/fs_gro... Waldfrysk \n", + "302 free_speech_recordings/Frysk/2017-12-20/fs_wha... Waldfrysk \n", + "303 free_speech_recordings/Frysk/2018-01-20/fs_wha... Waldfrysk \n", + "304 free_speech_recordings/Nederlands/2018-04-08/f... Klaaifrysk \n", + "305 free_speech_recordings/Stadsfrys/2018-04-08/fs... Klaaifrysk \n", + "306 free_speech_recordings/Frysk/2018-04-21/fs_gro... Waldfrysk \n", + "307 free_speech_recordings/Frysk/2018-04-22/fs_wha... Waldfrysk \n", + "308 free_speech_recordings/Frysk/2018-04-25/fs_wha... Klaaifrysk \n", + "309 free_speech_recordings/Frysk/2018-06-24/fs_wha... Noardhoeksk \n", + "310 free_speech_recordings/Frysk/2018-06-30/fs_wha... Waldfrysk \n", + "311 free_speech_recordings/Frysk/2018-08-20/fs_wha... Waldfrysk \n", + "\n", + " boundary_distance \n", + "0 2.285980 \n", + "1 3.524925 \n", + "2 5.376456 \n", + "3 10.415752 \n", + "4 10.760419 \n", + "5 10.876511 \n", + "6 10.980303 \n", + "7 10.876511 \n", + "8 10.980303 \n", + "9 10.760419 \n", + "10 10.980303 \n", + "11 10.980303 \n", + "12 5.135179 \n", + "13 2.061136 \n", + "14 2.598514 \n", + "15 10.392223 \n", + "16 2.598514 \n", + "17 11.145294 \n", + "18 2.158520 \n", + "19 11.145294 \n", + "20 2.158520 \n", + "21 11.145294 \n", + "22 4.081455 \n", + "23 5.579423 \n", + "24 1.965881 \n", + "25 5.579423 \n", + "26 2.042788 \n", + "27 5.046413 \n", + "28 1.965881 \n", + "29 4.901555 \n", + ".. ... \n", + "282 4.397417 \n", + "283 10.512088 \n", + "284 6.444389 \n", + "285 6.665675 \n", + "286 3.411324 \n", + "287 3.889477 \n", + "288 3.889477 \n", + "289 8.109339 \n", + "290 0.188073 \n", + "291 5.503341 \n", + "292 5.503341 \n", + "293 8.332972 \n", + "294 15.105703 \n", + "295 14.787898 \n", + "296 14.787898 \n", + "297 14.066749 \n", + "298 14.066749 \n", + "299 9.411845 \n", + "300 9.579569 \n", + "301 1.346010 \n", + "302 1.346010 \n", + "303 8.009320 \n", + "304 5.481488 \n", + "305 5.481488 \n", + "306 8.233029 \n", + "307 4.025229 \n", + "308 8.213621 \n", + "309 2.651287 \n", + "310 7.716322 \n", + "311 5.674670 \n", + "\n", + "[312 rows x 3 columns]" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pandas.DataFrame([\n", + " [r['filename'], r['dialects'][0]['dialect'], r['dialects'][0]['boundary_distance']]\n", + " for r in region_per_free_speech\n", + " if len(r['dialects']) == 1\n", + "], columns = ['filename', 'dialect', 'boundary_distance'])\n", + "\n", + "df.to_excel('free_speech_recordings_by_dialect.xlsx')\n", + "df.to_csv('free_speech_recordings_by_dialect.csv')\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "" + ] } ], "metadata": { @@ -1905,7 +1589,7 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 3.0 }, "file_extension": ".py", "mimetype": "text/x-python", @@ -1916,5 +1600,5 @@ } }, "nbformat": 4, - "nbformat_minor": 1 -} + "nbformat_minor": 0 +} \ No newline at end of file