stimmenfryslan/notebooks/Dialect Regions from image....

1604 lines
595 KiB
Plaintext
Raw Normal View History

2018-09-28 10:35:17 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import folium\n",
"import pandas\n",
"import MySQLdb\n",
"\n",
"from collections import Counter\n",
"\n",
"from math import sqrt\n",
"import numpy as np\n",
"from scipy.misc import imread\n",
"from shapely.geometry import shape, Point\n",
"from vincenty import vincenty\n",
"\n",
"%matplotlib notebook\n",
"from matplotlib import pyplot as plt\n",
"\n",
"from skimage.morphology import binary_closing\n",
"from skimage.measure import find_contours, label\n",
"from jupyter_progressbar import ProgressBar\n",
"\n",
"db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/herbert/.virtualenvs/stimmenfryslan/lib/python3.6/site-packages/ipykernel_launcher.py:1: DeprecationWarning: `imread` is deprecated!\n",
"`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.\n",
"Use ``imageio.imread`` instead.\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"im = imread('dialects.png')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"colors = Counter(map(tuple, im.reshape(-1,3)))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"sorted_colors = [c for c, _ in sorted(colors.items(), key=lambda x: x[1], reverse=True)]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"blocks = [\n",
" np.ones((10,10,1)) * np.array(c)[None,None] \n",
" for c in sorted_colors\n",
"][:49]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"a, w = len(blocks), int(sqrt(len(blocks)))\n",
"\n",
"composed_49 = np.concatenate([\n",
" np.concatenate([\n",
" b\n",
" for b in blocks[a:b]\n",
" ], axis=1)\n",
" for a, b in zip(range(0, a, w), range(w, a, w))\n",
"], axis=0)\n",
"\n",
"relevant_colors = np.array([sorted_colors[b] for b in [3,4,7,8]])\n",
"regions = ['Klaaifrysk', 'Waldfrysk', 'Sudwesthoeksk', 'Noardhoeksk']\n",
"\n",
"composed_4 = np.concatenate([\n",
" blocks[b] for b in [3,4,7,8]\n",
"], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"type\": \"FeatureCollection\", \"features\": [{\"type\": \"Feature\", \"properties\": {\"dialect\": \"Klaaifrysk\"}, \"geometry\": {\"type\": \"Polygon\", \"coordinates\": [[[5.794012015464746, 52.912342273640135], [5.791698705528585, 52.912342273640135], [5.789385395592423, 52.912342273640135], [5.787072085656261, 52.912342273640135], [5.78591543068818, 52.9130252649636], [5.784758775720099, 52.913708256287066], [5.783602120752018, 52.91439124761054], [5.783602120752018, 52.91575723025747], [5.783602120752018, 52.9171232129044], [5.782445465783937, 52.91780620422787], [5.781288810815856, 52.91848919555134], [5.781288810815856, 52.91985517819827], [5.780132155847776, 52.920538169521734], [5.778975500879694, 52.92122116084521], [5.778975500879694, 52.92258714349214], [5.7778188459116135, 52.9232701348156], [5.776662190943533, 52.923953126139075], [5.776662190943533, 52.925319108786006], [5.7755055359754515, 52.92600210010947], [5.774348881007371, 52.92668509143294], [5.77319222603929, 52.92736808275641], [5.772035571071209, 52.928051074079875], [5.772035571071209, 52.929417056726805], [5.770878916103128, 52.93010004805027], [5.769722261135047, 52.93078303937374], [5.768565606166966, 52.93146603069721], [5.7674089511988855, 52.932149022020674], [5.766252296230804, 52.93283201334414], [5.7650956412627234, 52.933515004667605], [5.763938986294643, 52.93419799599108], [5.762782331326561, 52.93488098731454], [5.761625676358481, 52.93556397863801], [5.760469021390399, 52.93624696996147], [5.759312366422319, 52.93692996128494], [5.758155711454238, 52.93761295260841], [5.756999056486157, 52.93829594393188], [5.754685746549995, 52.93829594393188], [5.753529091581914, 52.93897893525534], [5.752372436613833, 52.93966192657881], [5.750059126677671, 52.93966192657881], [5.747745816741509, 52.93966192657881], [5.745432506805348, 52.93966192657881], [5.7442758518372665, 52.94034491790227], [5.743119196869186, 52.941027909225745], [5.740805886933024, 52.941027909225745], [5.738492576996862, 52.941027909225745], [5.736179267060701, 52.941027909225745], [5.7338659571245385, 52.941027909225745], [5.731552647188376, 52.941027909225745], [5.729239337252214, 52.941027909225745], [5.726926027316053, 52.941027909225745], [5.725769372347972, 52.94171090054921], [5.724612717379891, 52.942393891872676], [5.722299407443729, 52.942393891872676], [5.719986097507567, 52.942393891872676], [5.717672787571406, 52.942393891872676], [5.715359477635244, 52.942393891872676], [5.713046167699082, 52.942393891872676], [5.7107328577629195, 52.942393891872676], [5.708419547826758, 52.942393891872676], [5.706106237890596, 52.942393891872676], [5.703792927954434, 52.942393891872676], [5.701479618018272, 52.942393891872676], [5.7003229630501915, 52.94307688319614], [5.7003229630501915, 52.94444286584308], [5.7003229630501915, 52.94580884849001], [5.7003229630501915, 52.94717483113695], [5.7003229630501915, 52.94854081378388], [5.7003229630501915, 52.94990679643081], [5.7003229630501915, 52.95127277907775], [5.699166308082111, 52.95195577040121], [5.698009653114029, 52.95263876172468], [5.698009653114029, 52.954004744371616], [5.698009653114029, 52.95537072701855], [5.698009653114029, 52.95673670966548], [5.698009653114029, 52.958102692312416], [5.698009653114029, 52.959468674959346], [5.698009653114029, 52.960834657606284], [5.698009653114029, 52.962200640253215], [5.699166308082111, 52.96288363157668], [5.7003229630501915, 52.963566622900146], [5.701479618018272, 52.96424961422362], [5.702636272986354, 52.964932605547084], [5.702636272986354, 52.966298588194014], [5.702636272986354, 52.96766457084095], [5.702636272986354, 52.96903055348788], [5.702636272986354, 52.970396536134814], [5.702636272986354, 52.97176251878175], [5.701479618018272, 52.97244551010522], [5.699166308082111, 52.97244551010522], [5.696852998145949, 52.97244551010522], [5.694539688209787, 52.97244551010522], [5.692226378273625, 52.97244551010522], [5.691069723305544, 52.97176251878175], [5.6899130683374635, 52.971079527458286], [5.687599758401301, 52.971079527458286], [5.68644310343322,
]
}
],
"source": [
"print(geojson)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA7YAAAEsCAYAAAD3mC+UAAAgAElEQVR4nO3dv47jzJmo8QKOL8aAbuZLlE80wGbeyLPJASMHewObDjbYQHDgxBgYiwP0Zk6cLJRM5EDpBNIFNOoEUknFUpEsklV83yo+T+PFzPQftaTWiPp1SaSxRERERERERBVnpM8AERERERER0ZqALREREREREVUdsCUiIiIiIqKqA7ZERERERERUdcCWiIiIiIiIqg7YEhERERERUdUBWyIiIiIiIqo6YEtERERERERVB2yJiIiIiIio6oAtERERERERVR2wJSIiIiIioqoDtkRERERERFR1wJaIiIiIiIiqDtgSERERERFR1QFbIiIiIiIiqjpgS0RERERERFUHbImIiIiIiKjqgC0RERERERFVHbAlIiIiIiKiqgO2REREREREVHXAloiIiIiIiKoO2BIREREREVHVZYPt5+envVwu9nq92tvtxjAMs+lcr1d7uVzs5+dnrrs1IiIiIqqkbLC9XC7WGMMwDCM6l8sl190aEREREVVSNther1drjLH/878X+49/3hiGYTad//nf+y/Xrtdrrrs1ay3PRmEYRn54RgoR0XTZYHu73awxxv7jnzf785dlGIbZdP7xz/t90O12y3W3Zq3l2SgMw+gZnpFCRDQcsGUYpokpBdtano3y5cuXKkZ65StlzJ9NHfPtpn7++te/VjHSYE2d3M9IISJqKWDLMEwTUwq2tdy3ff36tYqpIfPD1DGdVT8fHx9VjDRYUyf3/RsRUUsBW4ZZMP4DDffv8OM5vkfpyyB9PeYcYCuPVmALbIEtsCUikgrYMszM8UEYwjb2ZwjglL8vOY25/w4fMElfr2sH2MqjFdgCW2ALbImIpAK2DLNgUgA59mfK5wx9TQpqY+8P8Ro7LzUPsJVHK7AFtsAW2BIRSQVsGWbmDOF0DK1TX5vyZ+z7+O8bOg9jXxf7+9jljp2OlgG28mgFtsAW2AJbIiKpgC3DLJgQeWN/xj5vzdeEHxsD7NjnT8HWGGPtzJFEL7CVRyuwBbbAFtgSEUkFbBmGec5cyGoCLrCVRyuwBbbAFtgSEUkFbBmm0QlXa2MfywVZYCs/0mAFtsAW2AJbIiLJgC3DNDghWktDVhq1P38BW2mwAltgC2yBLRGRZMCWYRqc0nCVwuvYAFt5tAJbYAtsgS0RkVTAlmEam5IrsxpB6wbYyqMV2AJbYAtsiYikArYM09jsEbU/fwFbabACW2ALbIEtEZFkwJZhGppSq7XaUfvzF7CVBiuwBbbAFtgSEUkGbBmmkSmFWlsBan/+ArbSYAW2wBbYAlsiIsmALcM0MHteqXUDbOXRCmyBLbAFtkREUgFbhpk5sQdD7mOxByJbna9ckK0Js/4AW3m0AltgC2yBLRGRVMCWYRLGPahY86DJP50S5y8FrFLo3mKArTxagS2wBbbAlohIKmDLqJstwLUGqbkeROW+PEOglf55bjXAVh6twBbYAltgS0QkFbBl1EwMm2tXFsOvlQatPzmvu6GVWumf6ZYDbOXRCmyBLbAFtkREUgFbRs1MPejwP3cIvP4DgC0fELnvPQRY97FSK6ixldo9rdb+/AVspcEKbIEtsAW2RESSAVtGzUg/sFmDWn+kUBnCVvrnufUAW3m0AltgC2yBLRGRVMCWUTPSD2zmPPAZQq3kGGALbCuYGhIHK7AFtsCWiGh2wJYpMnNXLR0WNc4ekVjjAFt5tAJbYAtsgS0RkVTAllk8IV6NMfbfPv89OlM41AZbMFvfAFt5tAJbYAtsgS0RkVTAlpk9Q3hNmSEw5obt0Iqxez+gbW+ArTxagS2wBbbAlohIKmDLJM/Yiuxa2P78le8BUApOfeAOIZipa4CtPFqBLbAFtsCWiEgqYMtMzlrQpsIxx6otQN3vAFt5tAJbYAtsgS0RkVTAlnmOD1i3ES25Qjt0HtY8GJG+Dpny437WIZi+fPkCbCuYGhIHK7AFtsCWiGh2wHbnkwuwDrHh+9acr5QHI2OXKQbdpQAGzdvdFsdmDEzAVh6twBbYAltgS0QkFbDd2eSEbGxVNlz1XXtehx58pFzGFNi6v4dfs/RPZt7kBBOwlUcrsAW2wBbYEhFJBWx3MrkxO4bXpauisdOZA9rw+w+dt9Q/Y9ANL1+rsB27DnN+D2Cbb6TBCmyBLbAFtkREkgHbHUwJzJZGz5oZOn/hAwT3vrE//a+d8/k1zhg0c5xm+MuB3GACtvJoBbbAFtgCWyIiqYBto2PMa7UzJ2qlL1fqZR9aaQ0/ZwyoY6cjCds5v1Qwxlj7x/++T4BMN6kgmXsZtwYTsJVHK7AFtsAW2BIRSQVsGxgfsbEp/dRjJv/P02HUh/MTqMG4j4U/m7H3L10xnXMZgO22Iw1WYAtsgS2wJSKSDNhWPFOgnYtbf+M5trdjcJvv5zeEUR+tY6gdg24MteHMRUnqz14CTMBWHq3AFtgCW2BLRCQVsK1s5mB2DmxjwPLf7+MW2K7/Ga7Fa+qknJ+1uHWrs2tWgoHt+pEGK7AFtsAW2BIRSQZsFY6/Ebv/iO6zdsPNU4vlpxRgx1Zvp25nP3/VgyJgOzzS1z+wBbbAFtgSEUkGbBVNCNlwcsGW1dd1P6PRn1+wCut/fEvQTsHWX1mVXGUFtvlG+voHtsAW2AJbIiLJgK2iGUNtLtg66MTgxSybrVdhczwduRXMbgnb2zdjbad4FNzWksYY9VPimN8lZmqboWHM8VTFHI1RPb8ZYEtENJXJdULANgOOCsNW+jK2OuJQWLhq62530iAFtplGwW0N2AJbYAtsiYikMrlOCNguRNGMBwhrnmIlfTlbmt5roKWRsBK27vJIoxTYZhgFtzVgC2yBLbAlIpLK5DohYDt/5j5AALXyUwtmwxm7TNIgBbaZRsHtDNgCW2ALbImIpDK5TgjYLgDSggcJoHbjn9EfXzuCqhW1KbB1txVpnALbFaPgdgZsgS2wBbZERFKZXCcEbOfN0gcJc/buKH0Zax9xAGwEW39qfloysJW/nQFbYAtsgS0RkVQm1wkB2/SZu1o7BFUAW/hnJA2AjLPmdiINVmCbOApuZ8AW2AJbYEtEJJXJdULAdgaYZqI25TT9z0vZWZD09aBp/Kcau/eJP/hXAtuaVnCBrfztDNgCW2ALbImIpDK5TgjYTgNhyYOClNP1cTb0vvD9Q6DbG3pbW5kdmyXXjTRWge2MUXAbA7bAFtgCWyIiqUyuEwK2E3iasUIbA+fYacdwmgLasY9LX2db/Ez2hFr7x/mrtu46kgYrsE0cBbcxYAtsgS2wJSKSyuQ6IWA7DoRU1K6FWvj31H/7728dtnsDrT9rblvScAW2E6Pg9gVsgS2wBbZERFKZXCcEbMenFGpjK7xD//a/JuXPVqaFw/XkmtSfbewXHNJwBbYTo+D2BWyBLbAFtkREUplcJwRsp6FQArZzvv+Sj9U+YDYNt/4vNWKHkHIP+KXxCmxHRsFtC9gCW2ALbImIpDK5TgjYJgArgln399Lfe+j9raK2BtA+0Rj8fWvcphwb2QeuNGCB7cAouE0DW2ALbIEtEZFUJtcJAdsEaAUbfP9j0uevpakBtQ6zMeBu9b2XjlbcAlv52zSwBbbAFtgSEUllcp0QsJ2e54YeyJa5fqUfsM9EpRRq18JWGrDAFtgCW2ALbImIKAzYMtVO+HpQ8QfsM1BZK2pZsVU8Cm7bwBbYAltgS0QkFbBlqhv32uAouqQftK8ELrAFtsBWfqTBCmyBLbAlIpofsGWqmDHM1gpbH7duTGffRhtstQIX2MrfnoEtsAW2wJaISCqT64SALVNqUkBbI2zdeY5hdmi0oFbj622BrfxtGtgCW2ALbImIpDK5TgjYMjlnDmZrRe4c0OYAbm7UAltlo+A2DWyBLbAFtkREUplcJwRsmRyTA7TaYbsGtEuBm+upx9owC2y9UXDbBrbAFtgCWyIiqUyuEwK2zJrJCVrNsM2J2jnIbW11FthGRsHtG9gCW2ALbImIpDK5TmhPsHU7MuJ4tPmuT1BbFrm5Vm
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"plt.rcParams['figure.figsize'] = (9.5, 3)\n",
"ax0, ax1, ax2 = plt.subplots(1,3)[1]\n",
"ax0.imshow(im)\n",
"ax1.imshow(composed_49.astype(int))\n",
"ax2.imshow(composed_4.astype(int))\n",
"ax0.set_xticks([]); ax0.set_yticks([])\n",
"ax1.set_xticks([]); ax1.set_yticks([])\n",
"ax2.set_xticks([]); ax2.set_yticks([])\n",
"plt.tight_layout()\n",
"\n",
"stavoren_to_east_coords = [5.354483127593994, 6.330699920654297]\n",
"north_to_south_coords = [53.41434089638827, 52.832432288794514]\n",
"\n",
"stavoren_to_east_pixels = [295, 717]\n",
"north_to_south_pixels = [99, 525]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA7YAAAEsCAYAAAD3mC+UAAAgAElEQVR4nO3deXxU9b3/8QlqwVZrFbuorUlgskAWIIEEkpBtUCsucUkIBGQXZN8J+ybIpiACKovssgfCUq1Llda6tN5qtXrbqnUbCL2/9rZJ71VbJf38/vAyk8NMgHw4yfmezOv5eLwf92HIMjO9+ZzPOzNzjkcAAAAAAHAxj9M3AAAAAACAC0GxBQAAAAC4GsUWAAAAAOBqFFsAAAAAgKtRbAEAAAAArkaxBQAAAAC4GsUWAAAAAOBqFFsAAAAAgKtRbAEAAAAArkaxBQAAAAC4GsUWAAAAAOBqFFsAAAAAgKtRbAEAAAAArkaxBQAAAAC4GsUWAAAAAOBqFFsAAAAAgKtRbAGFf//731JTUyP//ve/nb4piGD8/yFgH36fYAL+/xDQo9gCCtXV1eLxeMTv90tNTQ0hjsTv94vH45Hq6mqnfyUA12OuExPCXAf0KLaAwukDDyEmxO/3O/0rAbgec52YFOY60HAUW0Dh9F/2czw9Jd9TRIgjyfH0FI+Hv+wDdmCuExPCXAf0KLaAQk1NjXg8Hsn3FEmPqGJCHEm+p0g8Ho/U1NQ4/SsBuB5znZgQ5jqgR7EFFFiAiAlhAQLsw1wnJoS5DuhRbAEFFiBiQliAAPsw14kJYa4DehRbQIEFiJgQFiDAPsx1YkKY64AexRZQYAEiJoQFCLAPc52YEOY6oEexBRRYgIgJYQEC7MNcJyaEuQ7oUWwBBRYgYkJYgAD7MNeJCWGuA3oUW0CBBYiYEBYgwD7MdWJCmOuAHsUWUGABIiaEBQiwD3OdmBDmOqBHsQUUWICICWEBAuzDXCcmhLkO6FFsAQUWIGJCWIAA+zDXiQlhrgN6FFtAgQWImBAWIMA+zHViQpjrgB7FFlBgASImhAUIsA9znZgQ5jqgR7EFFFiAiAlhAQLsw1wnJoS5DuhRbAEFFiBiQliAAPsw14kJYa4DehRbQIEFiJgQFiDAPsx1YkKY64AexRZQYAEiJoQFCLAPc52YEOY6oEexBRRYgIgJYQEC7MNcJyaEuQ7oUWwBBRYgYkJYgAD7MNeJCWGuA3oUW0CBBYiYEBYgwD7MdWJCmOuAHsUWUGABIiaEBQiwD3OdmBDmOqBHsQUUWICICWEBAuzDXCcmhLkO6FFsAQUWIGJCWIAA+zDXiQlhrgN6FFtAgQWImBAWIMA+zHViQpjrgB7FFlBgASImhAUIsA9znZgQ5jqgR7EFFFiAiAlhAQLsw1wnJoS5DuhRbAEFFiBiQliAAPsw14kJYa4DehRbQIEFiJgQFiDAPsx1YkKY64AexRZQYAEiJoQFCLAPc52YEOY6oEexBRRYgIgJYQEC7MNcb34paNlH8r4zUPIvu0cKLyl1/PacT5jrgB7FFlBgASImhAUIsA9zvXmk8Bu9JT1zrsQN2SHR5UeDmXpE2ozaK8m3rpLuVw9x/HbWF+Y6oEexBRRYgIgJYQEC7MNcd3d8UcXSJXW6xI7Zby204TL1iKT2WCZ53xno+O0+M8x1QI9iCyiwABETwgIE2Ie57t4UtOwjib3XhxTY7EVbpXT1Sun/6HLxLd0YtuTGDdkhqTcsl25tx4nPgPvCXAf0KLaAAgsQMSEsQIB9mOvuTEHLPpLQb5OlrJauXilv/rZAak96Lfnvj1Jl9s6p9T6TGzOxUlJvWC4Frcocuz/MdUCPYgsosAARE8ICBNiHue6+nFlqk2bvk8pjJXKqyhtSauvmoz9kyv17Jkru4s3hC+6kQ5J75SBH7hNzHdCj2AIKLEDEhLAAAfZhrrsrvhYlklgWfHlxp3m75Z23u5+10IbLyQ/S5Mln+0nZ2odCym3H/MVScGnfJr1fzHVAj2ILKLAAERPCAgTYh7nurnTwLQuU0JQ5e1Wl9sy88vpN0nHubkvBjR1bIRnJ5U12v5jrgB7FFlBgASImhAUIsA9z3T3JSC4PPrNafliOvXbLBZfauu/DnbBlTsjLkzv4loqvRQlzHTAYxRZQYAE6v+ReOUg6p82SrvETJe/bA6SwZe8Gn3XSF1UsOdcMl8KLezl+f0wLCxBgH+a6O5J3xUCJmVgZKJyPHbnXtlJbN++9myV91jwU8uxtl9TpzHXAUBRbQIEF6NxJ7zxHoqceCXvWycSyjZLWdX69ZdUXVSzZ142QtG7zxXvvkxJdflS8w3ZK3rcHOH6/TAoLEGAf5ro7knzLw4Hjyb3rH2iUUls3644ODTmONeb1b5nrgB7FFlBgATp70jPn1ns5Bcv1Awdtk7Su86Wrd7zkf6tf4OvTMueF/fy2w3dJ7lXOnKnSxLAAAfZhrpufnGuGB48fMyrlzx90avRiu+npQaHXx/3hSOY6YCCKLaDAAhQ+uVcNkg6FSywLgHd6pQzfsFB8SzdK3IyD9RfdqUckvfMcSbp99VnLcOz4A5LRforj99WEsAAB9mGum592vdYFjgXLK8Y0eql9+6280PfaFi5t8FtqmOtA06DYAgosQKHpnDYrZAFYuHdi2GXh2ZeLQs48GS7tZ+2X1/7jBjnxfrrkLd5k+bfE3uslt/UQx++3k2EBAuzDXDc73dqOC5bLuXvkfz5t3+jFdudzfS3HnaZ4xRBzHdCj2AIKLEDWZF87IqSUTtw6W/51Ir7eheFzf6K88vpN8sRTg8OW2vLtM+Rzf0Lg8//6Yar0Xfug9fOmHJaMlGmO33+nwgIE2Ie5bm58F5WId+iOwOzf+NTgRi+1tSe98twrtwdffTRsJ2dFBgxHsQUUWICCKbi0TLxDnwwc/MvWPiTvvJ3boOXhrx+mysOVI2XUEwtk2o7p8uKrt8qpqtDPO1X19V/QO8zdYym4XeMmWG5T7pWDJL3LHMn5wXDHHx8WIMAdmOtmJOea+yT1xgcl+0fB97HWvbxP/pJN8lVVXJMU238ej5eshdsCPzszcRJzHTAYxRZQYAH6Or4WJZJQtjFw0M9ZtMXyLGtj5W8fpcioJxaEnIgqI7lcsq8dIbFjK4In+bhuhOOPEwsQYD7muvPJbT1YYiYEz8XQKWehZF0/yjLrh21Y1CSl9nT2/axP4Gcn9NvEXAcMRrEFFFiAvk7yrasCB/zUOXvkvXezmmzZOFXllaTZ+87r7Mt5VzTPywSxAAH2Ya47G99FJRI3cNs55/nU7TMbfLzoEVWsPtZ8+scu1jMiN/IfS5nrgB7FFlBgASqWrvETLZdd+MWvbm7Sv6LXnvTKS7+++byKbVq3BY4/XixAgNmY686lc6eZ5zXLo8uPyqrKEQ0utaej/SNq8SPBP+K2GbVPcq8azFwHDESxBRQifQHyRRVL3ODtTX4ij3D5+0cpMn/3ZMvi03vNCkmbv8tyKaHGvDyDU2EBAuwT6XPdqeRd3v+8S+2krbNVb3c5/bO0x5nP/O2k54OPBW5H7Nj9knX9aOY6YBiKLaAQ6QtQzg+GBQ7w3RZua7ITeZwtX56Ik9/+Nl/efitPTlV5pf+jywO3MWZipRR+o7fjjxsLEGCuSJ/rTqWrd3xIgb1x+Xqp/jhJ/vFJe/mqKk6+OJ5wQedvOP2zLuQY85cPO8hNy4PX0Y2ZdKhRXpbMXAf0KLaAQqQvQF1SpwUO7ov2TnC81J6Z9T8ZYlmSOqfNcvwxa4ywAAH2ifS57lRyrr3PMq8Hr1sif/0wNexsP11ynSi2p8vtnQ+vrvOy5L1ScGlf5jpgCIotoBDJC1DulYMsZx3e/0Jvx4vsmbn5wccDty9+wBbHH7PGCgsQYJ9InutOJtwJo7oseFLer3MywlNVXllWMVYSZ1ZI7LTDsvrQfY4dXz73J1ieuU25eQVzHTAExRZQiNQFKLf1YGkzKngm4tQ5e+TvH6U4XmTPzNB1i61LUofpjj92jREWIMA+kTrXnU7bEbvrfU/ttmf6y4n30+XuVatC/m3FwVENPjac/pkXeoz54D+7Sptph+pc33Yycx0wAMUWUIjUBSjpjjWWxcKJMyGfT957N0vuWLmm2ZdbFiDAPpE6151Ot9ixEj9gy3mfQOpCym3dn3
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"axes = plt.subplots(2,2)[1].ravel()\n",
"contours = []\n",
"for axis, c in zip(axes, relevant_colors):\n",
" bi = (im[:-100] == c[None,None]).min(axis=2)\n",
" bi = binary_closing(bi, np.ones((5,5)))\n",
" \n",
" labels = label(bi, background=False)\n",
" \n",
" contours.append(find_contours(bi, 0.5))\n",
"\n",
" axis.imshow(bi)\n",
" for n, contour in enumerate(contours[-1][:1]):\n",
" axis.plot(contour[:, 1], contour[:, 0], linewidth=2)\n",
" axis.set_xticks([]); axis.set_yticks([])\n",
"plt.tight_layout()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"a0, b0 = stavoren_to_east_coords\n",
"c0, d0 = stavoren_to_east_pixels\n",
"\n",
"scale_x = lambda x: (x - c0) / (d0 - c0) * (b0 - a0) + a0\n",
"\n",
"a1, b1 = north_to_south_coords\n",
"c1, d1 = north_to_south_pixels\n",
"\n",
"scale_y = lambda x: (x - c1) / (d1 - c1) * (b1 - a1) + a1\n",
"\n",
"contours_scaled = [\n",
" list(zip(scale_x(c[0][:, 1]), scale_y(c[0][:, 0])))\n",
" for c in contours\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"geojson = json.dumps({\n",
" \"type\": \"FeatureCollection\",\n",
" \"features\": [\n",
" {\n",
" \"type\": \"Feature\",\n",
" \"properties\": {'dialect': dialect},\n",
" \"geometry\": {\n",
" \"type\": \"Polygon\",\n",
" \"coordinates\": [list(map(list, contour))]\n",
" }\n",
" }\n",
" for contour, dialect in zip(contours_scaled, regions)\n",
" ]\n",
"})\n",
"\n",
"with open('dialect_regions.geojson', 'w') as f:\n",
" f.write(geojson)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"width:100%;\"><div style=\"position:relative;width:100%;height:0;padding-bottom:60%;\"><iframe src=\"data:text/html;charset=utf-8;base64,PCFET0NUWVBFIGh0bWw+CjxoZWFkPiAgICAKICAgIDxtZXRhIGh0dHAtZXF1aXY9ImNvbnRlbnQtdHlwZSIgY29udGVudD0idGV4dC9odG1sOyBjaGFyc2V0PVVURi04IiAvPgogICAgPHNjcmlwdD5MX1BSRUZFUl9DQU5WQVM9ZmFsc2U7IExfTk9fVE9VQ0g9ZmFsc2U7IExfRElTQUJMRV8zRD1mYWxzZTs8L3NjcmlwdD4KICAgIDxzY3JpcHQgc3JjPSJodHRwczovL2Nkbi5qc2RlbGl2ci5uZXQvbnBtL2xlYWZsZXRAMS4yLjAvZGlzdC9sZWFmbGV0LmpzIj48L3NjcmlwdD4KICAgIDxzY3JpcHQgc3JjPSJodHRwczovL2FqYXguZ29vZ2xlYXBpcy5jb20vYWpheC9saWJzL2pxdWVyeS8xLjExLjEvanF1ZXJ5Lm1pbi5qcyI+PC9zY3JpcHQ+CiAgICA8c2NyaXB0IHNyYz0iaHR0cHM6Ly9tYXhjZG4uYm9vdHN0cmFwY2RuLmNvbS9ib290c3RyYXAvMy4yLjAvanMvYm9vdHN0cmFwLm1pbi5qcyI+PC9zY3JpcHQ+CiAgICA8c2NyaXB0IHNyYz0iaHR0cHM6Ly9jZG5qcy5jbG91ZGZsYXJlLmNvbS9hamF4L2xpYnMvTGVhZmxldC5hd2Vzb21lLW1hcmtlcnMvMi4wLjIvbGVhZmxldC5hd2Vzb21lLW1hcmtlcnMuanMiPjwvc2NyaXB0PgogICAgPGxpbmsgcmVsPSJzdHlsZXNoZWV0IiBocmVmPSJodHRwczovL2Nkbi5qc2RlbGl2ci5uZXQvbnBtL2xlYWZsZXRAMS4yLjAvZGlzdC9sZWFmbGV0LmNzcyIvPgogICAgPGxpbmsgcmVsPSJzdHlsZXNoZWV0IiBocmVmPSJodHRwczovL21heGNkbi5ib290c3RyYXBjZG4uY29tL2Jvb3RzdHJhcC8zLjIuMC9jc3MvYm9vdHN0cmFwLm1pbi5jc3MiLz4KICAgIDxsaW5rIHJlbD0ic3R5bGVzaGVldCIgaHJlZj0iaHR0cHM6Ly9tYXhjZG4uYm9vdHN0cmFwY2RuLmNvbS9ib290c3RyYXAvMy4yLjAvY3NzL2Jvb3RzdHJhcC10aGVtZS5taW4uY3NzIi8+CiAgICA8bGluayByZWw9InN0eWxlc2hlZXQiIGhyZWY9Imh0dHBzOi8vbWF4Y2RuLmJvb3RzdHJhcGNkbi5jb20vZm9udC1hd2Vzb21lLzQuNi4zL2Nzcy9mb250LWF3ZXNvbWUubWluLmNzcyIvPgogICAgPGxpbmsgcmVsPSJzdHlsZXNoZWV0IiBocmVmPSJodHRwczovL2NkbmpzLmNsb3VkZmxhcmUuY29tL2FqYXgvbGlicy9MZWFmbGV0LmF3ZXNvbWUtbWFya2Vycy8yLjAuMi9sZWFmbGV0LmF3ZXNvbWUtbWFya2Vycy5jc3MiLz4KICAgIDxsaW5rIHJlbD0ic3R5bGVzaGVldCIgaHJlZj0iaHR0cHM6Ly9yYXdnaXQuY29tL3B5dGhvbi12aXN1YWxpemF0aW9uL2ZvbGl1bS9tYXN0ZXIvZm9saXVtL3RlbXBsYXRlcy9sZWFmbGV0LmF3ZXNvbWUucm90YXRlLmNzcyIvPgogICAgPHN0eWxlPmh0bWwsIGJvZHkge3dpZHRoOiAxMDAlO2hlaWdodDogMTAwJTttYXJnaW46IDA7cGFkZGluZzogMDt9PC9zdHlsZT4KICAgIDxzdHlsZT4jbWFwIHtwb3NpdGlvbjphYnNvbHV0ZTt0b3A6MDtib3R0b206MDtyaWdodDowO2xlZnQ6MDt9PC9zdHlsZT4KICAgIAogICAgPHN0eWxlPiNtYXBfYTI3M2ZkYWExMGVmNDVkNGJmYzQyM2EwMzU2OGIwNmMgewogICAgICAgIHBvc2l0aW9uOiByZWxhdGl2ZTsKICAgICAgICB3aWR0aDogMTAwLjAlOwogICAgICAgIGhlaWdodDogMTAwLjAlOwogICAgICAgIGxlZnQ6IDAuMCU7CiAgICAgICAgdG9wOiAwLjAlOwogICAgICAgIH0KICAgIDwvc3R5bGU+CjwvaGVhZD4KPGJvZHk+ICAgIAogICAgCiAgICA8ZGl2IGNsYXNzPSJmb2xpdW0tbWFwIiBpZD0ibWFwX2EyNzNmZGFhMTBlZjQ1ZDRiZmM0MjNhMDM1NjhiMDZjIiA+PC9kaXY+CjwvYm9keT4KPHNjcmlwdD4gICAgCiAgICAKICAgIAogICAgICAgIHZhciBib3VuZHMgPSBudWxsOwogICAgCgogICAgdmFyIG1hcF9hMjczZmRhYTEwZWY0NWQ0YmZjNDIzYTAzNTY4YjA2YyA9IEwubWFwKAogICAgICAgICdtYXBfYTI3M2ZkYWExMGVmNDVkNGJmYzQyM2EwMzU2OGIwNmMnLCB7CiAgICAgICAgY2VudGVyOiBbNTMuMTIzMzg2NTkyNTkxMzksIDUuODQyNTkxNTI0MTI0MTQ1NV0sCiAgICAgICAgem9vbTogOSwKICAgICAgICBtYXhCb3VuZHM6IGJvdW5kcywKICAgICAgICBsYXllcnM6IFtdLAogICAgICAgIHdvcmxkQ29weUp1bXA6IGZhbHNlLAogICAgICAgIGNyczogTC5DUlMuRVBTRzM4NTcsCiAgICAgICAgem9vbUNvbnRyb2w6IHRydWUsCiAgICAgICAgfSk7CgogICAgCiAgICAKICAgIHZhciB0aWxlX2xheWVyXzNkYjg0ZGU3ZTNjZTRiODc4NTkzNDAxYzRkMmRlODIzID0gTC50aWxlTGF5ZXIoCiAgICAgICAgJ2h0dHBzOi8ve3N9LnRpbGVzLm1hcGJveC5jb20vdjMvbWFwYm94LndvcmxkLWJyaWdodC97en0ve3h9L3t5fS5wbmcnLAogICAgICAgIHsKICAgICAgICAiYXR0cmlidXRpb24iOiBudWxsLAogICAgICAgICJkZXRlY3RSZXRpbmEiOiBmYWxzZSwKICAgICAgICAibWF4TmF0aXZlWm9vbSI6IDE4LAogICAgICAgICJtYXhab29tIjogMTgsCiAgICAgICAgIm1pblpvb20iOiAwLAogICAgICAgICJub1dyYXAiOiBmYWxzZSwKICAgICAgICAic3ViZG9tYWlucyI6ICJhYmMiCn0pLmFkZFRvKG1hcF9hMjczZmRhYTEwZWY0NWQ0YmZjNDIzYTAzNTY4YjA2Yyk7CiAgICAKICAgICAgICAKICAgICAgICB2YXIgZ2VvX2pzb25fNmRmNzg1NWVkMGZkNDk2OGJiYjY3MjM3YjJhNWRkZDQgPSBMLmdlb0pzb24oCiAgICAgICAgICAgIHsiZmVhdHVyZXMiOiBbeyJnZW9tZXRyeSI6IHsiY29vcmRpbmF0ZXMiOiBbW1s1Ljc5NDAxMjAxNTQ2NDc0NiwgNTIuOTEyMzQyMjczNjQwMTM1XSwgWzUuNzkxNjk4NzA1NTI4NTg1LCA1Mi45MTIzNDIyNzM2NDAxMzVdLCBbNS43ODkzODUzOTU1OTI0MjMsIDUyLjkxMjM0MjI3MzY0MDEzNV0sIFs1Ljc4NzA3MjA4NTY1NjI2MSwgNTIuOTEyMzQyMjczNjQwMTM1XSwgWzUuNzg1OTE1NDMwNjg4MTgsIDUyLjkxMzAyNTI2NDk2MzZdLCBbNS43ODQ3NTg3NzU3MjAwOTksIDUyLjkxMzcwODI1NjI4NzA2Nl0sIFs1Ljc4MzYwMjEyMDc1MjAxOCwgNTIuOTE0MzkxMjQ3NjEwNTRdLCBbNS43ODM2MDIxMjA3NTIwM
],
"text/plain": [
"<folium.folium.Map at 0x7fc8efce5400>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m = folium.Map(\n",
" location=[sum(north_to_south_coords) / 2, sum(stavoren_to_east_coords) / 2],\n",
" tiles='Mapbox Bright',\n",
" zoom_start=9\n",
")\n",
"folium.GeoJson('dialect_regions.geojson', name='geojson').add_to(m)\n",
"\n",
"m"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"shapes = {\n",
" dialect: shape({\"type\": \"Polygon\", \"coordinates\": [list(map(list, contour))]})\n",
" for contour, dialect in zip(contours_scaled, regions)\n",
"}\n",
"\n",
"def regions_for(coordinate):\n",
" regions = {\n",
" region_name\n",
" for region_name, shape in shapes.items()\n",
" if shape.contains(Point(*coordinate))\n",
" }\n",
" return regions\n",
"\n",
"def distance(shape, longitude, latitude):\n",
" ext = shape.exterior\n",
" p = ext.interpolate(ext.project(Point(longitude, latitude)))\n",
" return vincenty((latitude, longitude), (p.y, p.x))"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"# SELECT user_lat, user_lng, question_text, answer_text\n",
"picture_games = pandas.read_sql('''\n",
"SELECT language.name as language, item.name as picture,\n",
" survey.user_lat as latitude, survey.user_lng as longitude,\n",
" survey.area_name as area, survey.country_name as country,\n",
" result.recording as filename,\n",
" result.submitted_at as date\n",
"FROM core_surveyresult as survey\n",
"INNER JOIN core_picturegameresult as result ON survey.id = result.survey_result_id\n",
"INNER JOIN core_language as language ON language.id = result.language_id\n",
"INNER JOIN core_picturegameitem as item\n",
" ON result.picture_game_item_id = item.id\n",
"''', db)\n",
"# picture_games['filename'] = [filename.split('/')[-1] for filename in picture_games['filename']]\n",
"picture_games.set_index('filename', inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"region_per_picture_game = [\n",
" {\n",
" 'dialects': [\n",
" {\n",
" 'dialect': dialect,\n",
" 'boundary_distance': distance(shapes[dialect], longitude, latitude),\n",
" }\n",
" for dialect in regions_for((longitude, latitude))\n",
" ],\n",
" 'filename': filename,\n",
" }\n",
" for filename, (latitude, longitude) in ProgressBar(\n",
" picture_games[['latitude', 'longitude']].iterrows(),\n",
" size=len(picture_games)\n",
" )\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"Counter({1: 37681, 0: 6790, 2: 1235})"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Counter(len(x['dialects']) for x in region_per_picture_game)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>filename</th>\n",
" <th>dialect</th>\n",
" <th>boundary_distance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>0.783060</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>5.049926</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>picture_game_recordings/Nederlands/2017-07-05/...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.076206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>6.822016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Noardhoeksk</td>\n",
" <td>0.697901</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>11.483169</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>7.789364</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>picture_game_recordings/English/2017-07-05/pg_...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>7.743475</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>9.636752</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>9.141453</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>picture_game_recordings/Nederlands/2017-07-05/...</td>\n",
" <td>Noardhoeksk</td>\n",
" <td>0.779276</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.876511</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>4.420720</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>5.376456</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>4.420720</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>2.249444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>9.085181</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.760419</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.392223</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>2.104440</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.415752</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>8.863952</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.876511</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.912091</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>2.304845</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.779915</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>picture_game_recordings/Nederlands/2017-07-05/...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>9.627570</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>picture_game_recordings/Nederlands/2017-07-05/...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>4.783550</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>picture_game_recordings/Nederlands/2017-07-05/...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>4.048083</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>picture_game_recordings/Frysk/2017-07-05/pg_sc...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.982241</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37651</th>\n",
" <td>picture_game_recordings/Frysk/2018-03-11/pg_gi...</td>\n",
" <td>Noardhoeksk</td>\n",
" <td>5.089732</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37652</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-07/pg_gi...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>7.949465</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37653</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-07/pg_gi...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>7.585835</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37654</th>\n",
" <td>picture_game_recordings/Stadsfrys/2018-04-08/p...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>8.087441</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37655</th>\n",
" <td>picture_game_recordings/Nederlands/2018-04-09/...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>4.533521</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37656</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-11/pg_gi...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>8.158308</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37657</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-20/pg_gi...</td>\n",
" <td>Noardhoeksk</td>\n",
" <td>5.234811</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37658</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-21/pg_gi...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>7.336785</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37659</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-21/pg_gi...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>1.037296</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37660</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-21/pg_gi...</td>\n",
" <td>Sudwesthoeksk</td>\n",
" <td>0.795104</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37661</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-21/pg_gi...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>6.146968</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37662</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-21/pg_gi...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>9.839955</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37663</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-22/pg_gi...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>2.581705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37664</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-22/pg_gi...</td>\n",
" <td>Sudwesthoeksk</td>\n",
" <td>0.155174</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37665</th>\n",
" <td>picture_game_recordings/Stadsfrys/2018-04-22/p...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>4.157231</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37666</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-22/pg_gi...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>0.485218</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37667</th>\n",
" <td>picture_game_recordings/Stadsfrys/2018-04-24/p...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>7.866078</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37668</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-24/pg_gi...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>7.727236</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37669</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-25/pg_gi...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>6.965155</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37670</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-28/pg_gi...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>4.287499</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37671</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-28/pg_gi...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>4.138920</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37672</th>\n",
" <td>picture_game_recordings/Frysk/2018-04-28/pg_gi...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>5.062945</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37673</th>\n",
" <td>picture_game_recordings/Frysk/2018-05-02/pg_gi...</td>\n",
" <td>Sudwesthoeksk</td>\n",
" <td>0.586068</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37674</th>\n",
" <td>picture_game_recordings/Frysk/2018-05-12/pg_gi...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>9.143622</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37675</th>\n",
" <td>picture_game_recordings/Frysk/2018-05-14/pg_gi...</td>\n",
" <td>Noardhoeksk</td>\n",
" <td>0.509192</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37676</th>\n",
" <td>picture_game_recordings/Frysk/2018-05-20/pg_gi...</td>\n",
" <td>Noardhoeksk</td>\n",
" <td>0.867314</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37677</th>\n",
" <td>picture_game_recordings/Frysk/2018-06-25/pg_gi...</td>\n",
" <td>Noardhoeksk</td>\n",
" <td>1.770019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37678</th>\n",
" <td>picture_game_recordings/Frysk/2018-07-02/pg_gi...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>5.647593</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37679</th>\n",
" <td>picture_game_recordings/Frysk/2018-07-05/pg_gi...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>11.202813</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37680</th>\n",
" <td>picture_game_recordings/Frysk/2018-08-12/pg_gi...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>2.222390</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>37681 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" filename dialect \\\n",
"0 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"1 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"2 picture_game_recordings/Nederlands/2017-07-05/... Waldfrysk \n",
"3 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"4 picture_game_recordings/Frysk/2017-07-05/pg_sc... Noardhoeksk \n",
"5 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"6 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"7 picture_game_recordings/English/2017-07-05/pg_... Waldfrysk \n",
"8 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"9 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"10 picture_game_recordings/Nederlands/2017-07-05/... Noardhoeksk \n",
"11 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"12 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"13 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"14 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"15 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"16 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"17 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"18 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"19 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"20 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"21 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"22 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"23 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"24 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"25 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"26 picture_game_recordings/Nederlands/2017-07-05/... Waldfrysk \n",
"27 picture_game_recordings/Nederlands/2017-07-05/... Waldfrysk \n",
"28 picture_game_recordings/Nederlands/2017-07-05/... Waldfrysk \n",
"29 picture_game_recordings/Frysk/2017-07-05/pg_sc... Waldfrysk \n",
"... ... ... \n",
"37651 picture_game_recordings/Frysk/2018-03-11/pg_gi... Noardhoeksk \n",
"37652 picture_game_recordings/Frysk/2018-04-07/pg_gi... Waldfrysk \n",
"37653 picture_game_recordings/Frysk/2018-04-07/pg_gi... Waldfrysk \n",
"37654 picture_game_recordings/Stadsfrys/2018-04-08/p... Klaaifrysk \n",
"37655 picture_game_recordings/Nederlands/2018-04-09/... Waldfrysk \n",
"37656 picture_game_recordings/Frysk/2018-04-11/pg_gi... Klaaifrysk \n",
"37657 picture_game_recordings/Frysk/2018-04-20/pg_gi... Noardhoeksk \n",
"37658 picture_game_recordings/Frysk/2018-04-21/pg_gi... Klaaifrysk \n",
"37659 picture_game_recordings/Frysk/2018-04-21/pg_gi... Waldfrysk \n",
"37660 picture_game_recordings/Frysk/2018-04-21/pg_gi... Sudwesthoeksk \n",
"37661 picture_game_recordings/Frysk/2018-04-21/pg_gi... Klaaifrysk \n",
"37662 picture_game_recordings/Frysk/2018-04-21/pg_gi... Waldfrysk \n",
"37663 picture_game_recordings/Frysk/2018-04-22/pg_gi... Waldfrysk \n",
"37664 picture_game_recordings/Frysk/2018-04-22/pg_gi... Sudwesthoeksk \n",
"37665 picture_game_recordings/Stadsfrys/2018-04-22/p... Klaaifrysk \n",
"37666 picture_game_recordings/Frysk/2018-04-22/pg_gi... Klaaifrysk \n",
"37667 picture_game_recordings/Stadsfrys/2018-04-24/p... Klaaifrysk \n",
"37668 picture_game_recordings/Frysk/2018-04-24/pg_gi... Klaaifrysk \n",
"37669 picture_game_recordings/Frysk/2018-04-25/pg_gi... Waldfrysk \n",
"37670 picture_game_recordings/Frysk/2018-04-28/pg_gi... Klaaifrysk \n",
"37671 picture_game_recordings/Frysk/2018-04-28/pg_gi... Klaaifrysk \n",
"37672 picture_game_recordings/Frysk/2018-04-28/pg_gi... Waldfrysk \n",
"37673 picture_game_recordings/Frysk/2018-05-02/pg_gi... Sudwesthoeksk \n",
"37674 picture_game_recordings/Frysk/2018-05-12/pg_gi... Klaaifrysk \n",
"37675 picture_game_recordings/Frysk/2018-05-14/pg_gi... Noardhoeksk \n",
"37676 picture_game_recordings/Frysk/2018-05-20/pg_gi... Noardhoeksk \n",
"37677 picture_game_recordings/Frysk/2018-06-25/pg_gi... Noardhoeksk \n",
"37678 picture_game_recordings/Frysk/2018-07-02/pg_gi... Waldfrysk \n",
"37679 picture_game_recordings/Frysk/2018-07-05/pg_gi... Klaaifrysk \n",
"37680 picture_game_recordings/Frysk/2018-08-12/pg_gi... Waldfrysk \n",
"\n",
" boundary_distance \n",
"0 0.783060 \n",
"1 5.049926 \n",
"2 10.076206 \n",
"3 6.822016 \n",
"4 0.697901 \n",
"5 11.483169 \n",
"6 7.789364 \n",
"7 7.743475 \n",
"8 9.636752 \n",
"9 9.141453 \n",
"10 0.779276 \n",
"11 10.876511 \n",
"12 4.420720 \n",
"13 5.376456 \n",
"14 4.420720 \n",
"15 2.249444 \n",
"16 9.085181 \n",
"17 10.760419 \n",
"18 10.392223 \n",
"19 2.104440 \n",
"20 10.415752 \n",
"21 8.863952 \n",
"22 10.876511 \n",
"23 10.912091 \n",
"24 2.304845 \n",
"25 10.779915 \n",
"26 9.627570 \n",
"27 4.783550 \n",
"28 4.048083 \n",
"29 10.982241 \n",
"... ... \n",
"37651 5.089732 \n",
"37652 7.949465 \n",
"37653 7.585835 \n",
"37654 8.087441 \n",
"37655 4.533521 \n",
"37656 8.158308 \n",
"37657 5.234811 \n",
"37658 7.336785 \n",
"37659 1.037296 \n",
"37660 0.795104 \n",
"37661 6.146968 \n",
"37662 9.839955 \n",
"37663 2.581705 \n",
"37664 0.155174 \n",
"37665 4.157231 \n",
"37666 0.485218 \n",
"37667 7.866078 \n",
"37668 7.727236 \n",
"37669 6.965155 \n",
"37670 4.287499 \n",
"37671 4.138920 \n",
"37672 5.062945 \n",
"37673 0.586068 \n",
"37674 9.143622 \n",
"37675 0.509192 \n",
"37676 0.867314 \n",
"37677 1.770019 \n",
"37678 5.647593 \n",
"37679 11.202813 \n",
"37680 2.222390 \n",
"\n",
"[37681 rows x 3 columns]"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pandas.DataFrame([\n",
" [r['filename'], r['dialects'][0]['dialect'], r['dialects'][0]['boundary_distance']]\n",
" for r in region_per_picture_game\n",
" if len(r['dialects']) == 1\n",
"], columns = ['filename', 'dialect', 'boundary_distance'])\n",
"\n",
"df.to_excel('picture_game_recordings_by_dialect.xlsx')\n",
"df.to_csv('picture_game_recordings_by_dialect.csv')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"# SELECT user_lat, user_lng, question_text, answer_text\n",
"free_speech_games = pandas.read_sql('''\n",
"SELECT language.name as language,\n",
" survey.user_lat as latitude, survey.user_lng as longitude,\n",
" survey.area_name as area, survey.country_name as country,\n",
" result.recording as filename,\n",
" result.submitted_at as date\n",
"FROM core_surveyresult as survey\n",
"INNER JOIN core_freespeechresult as result ON survey.id = result.survey_result_id\n",
"INNER JOIN core_language as language ON language.id = result.language_id\n",
"''', db)\n",
"# free_speech_games['filename'] = [filename.split('/')[-1] for filename in games['filename']]\n",
"free_speech_games.set_index('filename', inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"region_per_free_speech = [\n",
" {\n",
" 'dialects': [\n",
" {\n",
" 'dialect': dialect,\n",
" 'boundary_distance': distance(shapes[dialect], longitude, latitude),\n",
" }\n",
" for dialect in regions_for((longitude, latitude))\n",
" ],\n",
" 'filename': filename,\n",
" }\n",
" for filename, (latitude, longitude) in ProgressBar(\n",
" free_speech_games[['latitude', 'longitude']].iterrows(),\n",
" size=len(free_speech_games)\n",
" )\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Counter({1: 37681, 0: 6790, 2: 1235})"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Counter(len(x['dialects']) for x in region_per_free_speech)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>filename</th>\n",
" <th>dialect</th>\n",
" <th>boundary_distance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>2.285980</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>free_speech_recordings/Nederlands/2017-07-05/f...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>3.524925</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>5.376456</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.415752</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_jok...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.760419</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_jok...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.876511</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_jok...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.980303</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_you...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.876511</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_jok...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.980303</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.760419</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.980303</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_gro...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.980303</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>free_speech_recordings/English/2017-07-05/fs_w...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>5.135179</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>2.061136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>free_speech_recordings/Nederlands/2017-07-05/f...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>2.598514</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.392223</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>free_speech_recordings/Nederlands/2017-07-05/f...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>2.598514</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>11.145294</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_gro...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>2.158520</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_jok...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>11.145294</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>free_speech_recordings/English/2017-07-05/fs_w...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>2.158520</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_you...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>11.145294</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>4.081455</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>free_speech_recordings/Nederlands/2017-07-05/f...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>5.579423</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_jok...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>1.965881</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>free_speech_recordings/Nederlands/2017-07-05/f...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>5.579423</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_gro...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>2.042788</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_rea...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>5.046413</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>free_speech_recordings/Frysk/2017-07-05/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>1.965881</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>free_speech_recordings/Nederlands/2017-07-05/f...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>4.901555</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>282</th>\n",
" <td>free_speech_recordings/Frysk/2017-11-29/fs_fav...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>4.397417</td>\n",
" </tr>\n",
" <tr>\n",
" <th>283</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-01/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>10.512088</td>\n",
" </tr>\n",
" <tr>\n",
" <th>284</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-08/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>6.444389</td>\n",
" </tr>\n",
" <tr>\n",
" <th>285</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-08/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>6.665675</td>\n",
" </tr>\n",
" <tr>\n",
" <th>286</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-08/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>3.411324</td>\n",
" </tr>\n",
" <tr>\n",
" <th>287</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-08/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>3.889477</td>\n",
" </tr>\n",
" <tr>\n",
" <th>288</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-08/fs_jok...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>3.889477</td>\n",
" </tr>\n",
" <tr>\n",
" <th>289</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-09/fs_gro...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>8.109339</td>\n",
" </tr>\n",
" <tr>\n",
" <th>290</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-11/fs_wha...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>0.188073</td>\n",
" </tr>\n",
" <tr>\n",
" <th>291</th>\n",
" <td>free_speech_recordings/Nederlands/2017-12-12/f...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>5.503341</td>\n",
" </tr>\n",
" <tr>\n",
" <th>292</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-12/fs_wha...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>5.503341</td>\n",
" </tr>\n",
" <tr>\n",
" <th>293</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-13/fs_jok...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>8.332972</td>\n",
" </tr>\n",
" <tr>\n",
" <th>294</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-14/fs_wha...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>15.105703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>295</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-14/fs_you...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>14.787898</td>\n",
" </tr>\n",
" <tr>\n",
" <th>296</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-14/fs_you...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>14.787898</td>\n",
" </tr>\n",
" <tr>\n",
" <th>297</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-14/fs_jok...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>14.066749</td>\n",
" </tr>\n",
" <tr>\n",
" <th>298</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-14/fs_jok...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>14.066749</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-19/fs_jok...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>9.411845</td>\n",
" </tr>\n",
" <tr>\n",
" <th>300</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-19/fs_jok...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>9.579569</td>\n",
" </tr>\n",
" <tr>\n",
" <th>301</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-20/fs_gro...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>1.346010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>302</th>\n",
" <td>free_speech_recordings/Frysk/2017-12-20/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>1.346010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>303</th>\n",
" <td>free_speech_recordings/Frysk/2018-01-20/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>8.009320</td>\n",
" </tr>\n",
" <tr>\n",
" <th>304</th>\n",
" <td>free_speech_recordings/Nederlands/2018-04-08/f...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>5.481488</td>\n",
" </tr>\n",
" <tr>\n",
" <th>305</th>\n",
" <td>free_speech_recordings/Stadsfrys/2018-04-08/fs...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>5.481488</td>\n",
" </tr>\n",
" <tr>\n",
" <th>306</th>\n",
" <td>free_speech_recordings/Frysk/2018-04-21/fs_gro...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>8.233029</td>\n",
" </tr>\n",
" <tr>\n",
" <th>307</th>\n",
" <td>free_speech_recordings/Frysk/2018-04-22/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>4.025229</td>\n",
" </tr>\n",
" <tr>\n",
" <th>308</th>\n",
" <td>free_speech_recordings/Frysk/2018-04-25/fs_wha...</td>\n",
" <td>Klaaifrysk</td>\n",
" <td>8.213621</td>\n",
" </tr>\n",
" <tr>\n",
" <th>309</th>\n",
" <td>free_speech_recordings/Frysk/2018-06-24/fs_wha...</td>\n",
" <td>Noardhoeksk</td>\n",
" <td>2.651287</td>\n",
" </tr>\n",
" <tr>\n",
" <th>310</th>\n",
" <td>free_speech_recordings/Frysk/2018-06-30/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>7.716322</td>\n",
" </tr>\n",
" <tr>\n",
" <th>311</th>\n",
" <td>free_speech_recordings/Frysk/2018-08-20/fs_wha...</td>\n",
" <td>Waldfrysk</td>\n",
" <td>5.674670</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>312 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" filename dialect \\\n",
"0 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n",
"1 free_speech_recordings/Nederlands/2017-07-05/f... Waldfrysk \n",
"2 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n",
"3 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n",
"4 free_speech_recordings/Frysk/2017-07-05/fs_jok... Waldfrysk \n",
"5 free_speech_recordings/Frysk/2017-07-05/fs_jok... Waldfrysk \n",
"6 free_speech_recordings/Frysk/2017-07-05/fs_jok... Waldfrysk \n",
"7 free_speech_recordings/Frysk/2017-07-05/fs_you... Waldfrysk \n",
"8 free_speech_recordings/Frysk/2017-07-05/fs_jok... Waldfrysk \n",
"9 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n",
"10 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n",
"11 free_speech_recordings/Frysk/2017-07-05/fs_gro... Waldfrysk \n",
"12 free_speech_recordings/English/2017-07-05/fs_w... Waldfrysk \n",
"13 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n",
"14 free_speech_recordings/Nederlands/2017-07-05/f... Waldfrysk \n",
"15 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n",
"16 free_speech_recordings/Nederlands/2017-07-05/f... Waldfrysk \n",
"17 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n",
"18 free_speech_recordings/Frysk/2017-07-05/fs_gro... Waldfrysk \n",
"19 free_speech_recordings/Frysk/2017-07-05/fs_jok... Waldfrysk \n",
"20 free_speech_recordings/English/2017-07-05/fs_w... Waldfrysk \n",
"21 free_speech_recordings/Frysk/2017-07-05/fs_you... Waldfrysk \n",
"22 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n",
"23 free_speech_recordings/Nederlands/2017-07-05/f... Waldfrysk \n",
"24 free_speech_recordings/Frysk/2017-07-05/fs_jok... Waldfrysk \n",
"25 free_speech_recordings/Nederlands/2017-07-05/f... Waldfrysk \n",
"26 free_speech_recordings/Frysk/2017-07-05/fs_gro... Waldfrysk \n",
"27 free_speech_recordings/Frysk/2017-07-05/fs_rea... Waldfrysk \n",
"28 free_speech_recordings/Frysk/2017-07-05/fs_wha... Waldfrysk \n",
"29 free_speech_recordings/Nederlands/2017-07-05/f... Waldfrysk \n",
".. ... ... \n",
"282 free_speech_recordings/Frysk/2017-11-29/fs_fav... Waldfrysk \n",
"283 free_speech_recordings/Frysk/2017-12-01/fs_wha... Waldfrysk \n",
"284 free_speech_recordings/Frysk/2017-12-08/fs_wha... Waldfrysk \n",
"285 free_speech_recordings/Frysk/2017-12-08/fs_wha... Waldfrysk \n",
"286 free_speech_recordings/Frysk/2017-12-08/fs_wha... Waldfrysk \n",
"287 free_speech_recordings/Frysk/2017-12-08/fs_wha... Waldfrysk \n",
"288 free_speech_recordings/Frysk/2017-12-08/fs_jok... Waldfrysk \n",
"289 free_speech_recordings/Frysk/2017-12-09/fs_gro... Waldfrysk \n",
"290 free_speech_recordings/Frysk/2017-12-11/fs_wha... Klaaifrysk \n",
"291 free_speech_recordings/Nederlands/2017-12-12/f... Klaaifrysk \n",
"292 free_speech_recordings/Frysk/2017-12-12/fs_wha... Klaaifrysk \n",
"293 free_speech_recordings/Frysk/2017-12-13/fs_jok... Waldfrysk \n",
"294 free_speech_recordings/Frysk/2017-12-14/fs_wha... Klaaifrysk \n",
"295 free_speech_recordings/Frysk/2017-12-14/fs_you... Klaaifrysk \n",
"296 free_speech_recordings/Frysk/2017-12-14/fs_you... Klaaifrysk \n",
"297 free_speech_recordings/Frysk/2017-12-14/fs_jok... Klaaifrysk \n",
"298 free_speech_recordings/Frysk/2017-12-14/fs_jok... Klaaifrysk \n",
"299 free_speech_recordings/Frysk/2017-12-19/fs_jok... Waldfrysk \n",
"300 free_speech_recordings/Frysk/2017-12-19/fs_jok... Waldfrysk \n",
"301 free_speech_recordings/Frysk/2017-12-20/fs_gro... Waldfrysk \n",
"302 free_speech_recordings/Frysk/2017-12-20/fs_wha... Waldfrysk \n",
"303 free_speech_recordings/Frysk/2018-01-20/fs_wha... Waldfrysk \n",
"304 free_speech_recordings/Nederlands/2018-04-08/f... Klaaifrysk \n",
"305 free_speech_recordings/Stadsfrys/2018-04-08/fs... Klaaifrysk \n",
"306 free_speech_recordings/Frysk/2018-04-21/fs_gro... Waldfrysk \n",
"307 free_speech_recordings/Frysk/2018-04-22/fs_wha... Waldfrysk \n",
"308 free_speech_recordings/Frysk/2018-04-25/fs_wha... Klaaifrysk \n",
"309 free_speech_recordings/Frysk/2018-06-24/fs_wha... Noardhoeksk \n",
"310 free_speech_recordings/Frysk/2018-06-30/fs_wha... Waldfrysk \n",
"311 free_speech_recordings/Frysk/2018-08-20/fs_wha... Waldfrysk \n",
"\n",
" boundary_distance \n",
"0 2.285980 \n",
"1 3.524925 \n",
"2 5.376456 \n",
"3 10.415752 \n",
"4 10.760419 \n",
"5 10.876511 \n",
"6 10.980303 \n",
"7 10.876511 \n",
"8 10.980303 \n",
"9 10.760419 \n",
"10 10.980303 \n",
"11 10.980303 \n",
"12 5.135179 \n",
"13 2.061136 \n",
"14 2.598514 \n",
"15 10.392223 \n",
"16 2.598514 \n",
"17 11.145294 \n",
"18 2.158520 \n",
"19 11.145294 \n",
"20 2.158520 \n",
"21 11.145294 \n",
"22 4.081455 \n",
"23 5.579423 \n",
"24 1.965881 \n",
"25 5.579423 \n",
"26 2.042788 \n",
"27 5.046413 \n",
"28 1.965881 \n",
"29 4.901555 \n",
".. ... \n",
"282 4.397417 \n",
"283 10.512088 \n",
"284 6.444389 \n",
"285 6.665675 \n",
"286 3.411324 \n",
"287 3.889477 \n",
"288 3.889477 \n",
"289 8.109339 \n",
"290 0.188073 \n",
"291 5.503341 \n",
"292 5.503341 \n",
"293 8.332972 \n",
"294 15.105703 \n",
"295 14.787898 \n",
"296 14.787898 \n",
"297 14.066749 \n",
"298 14.066749 \n",
"299 9.411845 \n",
"300 9.579569 \n",
"301 1.346010 \n",
"302 1.346010 \n",
"303 8.009320 \n",
"304 5.481488 \n",
"305 5.481488 \n",
"306 8.233029 \n",
"307 4.025229 \n",
"308 8.213621 \n",
"309 2.651287 \n",
"310 7.716322 \n",
"311 5.674670 \n",
"\n",
"[312 rows x 3 columns]"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pandas.DataFrame([\n",
" [r['filename'], r['dialects'][0]['dialect'], r['dialects'][0]['boundary_distance']]\n",
" for r in region_per_free_speech\n",
" if len(r['dialects']) == 1\n",
"], columns = ['filename', 'dialect', 'boundary_distance'])\n",
"\n",
"df.to_excel('free_speech_recordings_by_dialect.xlsx')\n",
"df.to_csv('free_speech_recordings_by_dialect.csv')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3.0
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 0
}