{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Dialect regions from image\n", " \n", "Use image processing to extract longitude-latitude polygons for four dialect regions illustrated in this image.\n", "\n", "![dialect regions](../data/dialects.png)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append('..')\n", "\n", "import folium\n", "import json\n", "\n", "from collections import Counter\n", "\n", "from math import sqrt, floor\n", "import numpy as np\n", "from imageio import imread\n", "\n", "%matplotlib notebook\n", "from matplotlib import pyplot as plt\n", "\n", "from skimage.morphology import binary_closing\n", "from skimage.measure import find_contours, label\n", "\n", "import folium.plugins\n", "from folium_jsbutton import JsButton\n", "\n", "from stimmen.latitude_longitude import reverse_latitude_longitude" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Input" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "im = imread('../data/dialects.png')\n", "regions = ['Klaaifrysk', 'Waldfrysk', 'Sudwesthoeksk', 'Noardhoeksk']\n", "\n", "color_occurence = Counter(map(tuple, im.reshape(-1,3)))\n", "colors_sorted_by_occurence = [c for c, _ in sorted(\n", " color_occurence.items(),\n", " key=lambda x: x[1],\n", " reverse=True)\n", "]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Figure out relevant colors" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pallette_width = floor(sqrt(len(colors_sorted_by_occurence)))\n", "pallette = np.array(colors_sorted_by_occurence[:pallette_width**2]).reshape(pallette_width, pallette_width, 3)\n", "\n", "_, (ax0, ax1) = plt.subplots(1, 2)\n", "ax0.imshow(pallette)\n", "for x in range(pallette_width):\n", " for y in range(pallette_width):\n", " ax0.text(x-0.5, y+0.5, str(x + y * pallette_width))\n", "ax0.set_xticks([]), ax0.set_yticks([])\n", "\n", "\n", "\n", "pallette_indices = [3, 4, 7, 8]\n", "pallette = [colors_sorted_by_occurence[i] for i in pallette_indices]\n", "pallette = np.array(pallette).reshape(1, -1, 3)\n", "ax1.imshow(pallette)\n", "ax1.set_xticks([]), ax1.set_yticks([])\n", "None" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Georeference image" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bounds = [\n", " [ 53.54634089638824, 6.530699920654293],\n", " [52.60043228879454, 4.684483127594012 ]\n", "]\n", "\n", "m = folium.Map(\n", " location=[(bounds[0][0] + bounds[1][0]) / 2, (bounds[0][1] + bounds[1][1]) / 2],\n", " tiles='stamentoner',\n", " zoom_start=9\n", ")\n", "\n", "overlay = folium.raster_layers.ImageOverlay(\n", " image='../data/dialects.png',\n", " bounds=bounds,\n", " opacity=0.5\n", ").add_to(m)\n", "\n", "overlay_id = overlay._id\n", "\n", "for number, (corner, direction, operation) in enumerate(\n", " (c, d, o)\n", " for c in ['_northEast', '_southWest'] for d in ['lat', 'lng'] for o in ['-', '+']\n", "):\n", " JsButton(title=str(number), function=\"\"\"function(map, item) {{\n", " var overlay = image_overlay_{overlay_id};\n", " var bounds = overlay.getBounds()\n", " bounds.{corner}.{direction} {operation}= 0.001;\n", " overlay.setBounds(bounds);\n", " \n", "}}\"\"\".format(overlay_id=overlay_id, corner=corner, direction=direction, operation=operation)).add_to(m)\n", "\n", "\n", "JsButton(title='B', function=\"\"\"function(map, item) {{\n", " var overlay = image_overlay_{overlay_id};\n", " var bounds = overlay.getBounds()\n", " console.log([bounds._northEast.lat, bounds._northEast.lng, bounds._southWest.lat, bounds._southWest.lng])\n", " \n", "}}\"\"\".format(overlay_id=overlay_id)).add_to(m)\n", " \n", "m" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Find polygons in pixelcoordinates" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "axes = plt.subplots(2,2)[1].ravel()\n", "contours = []\n", "for axis, c in zip(axes, pallette[0]):\n", " bi = (im[:-100] == c[None,None]).min(axis=2)\n", " bi = binary_closing(bi, np.ones((5,5)))\n", " \n", " labels = label(bi, background=False)\n", " \n", " contours.append(find_contours(bi, 0.5))\n", "\n", " axis.imshow(bi)\n", " for n, contour in enumerate(contours[-1][:1]):\n", " axis.plot(contour[:, 1], contour[:, 0], linewidth=2)\n", " axis.set_xticks([]); axis.set_yticks([])\n", "plt.tight_layout()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Convert to longitude-latitudes and write geojson" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(y0, x1), (y1, x0) = bounds\n", "\n", "scale_x = lambda x: ((x / im.shape[1]) * (x1 - x0) + x0).tolist()\n", "scale_y = lambda y: ((y / im.shape[0]) * (y1 - y0) + y0).tolist()\n", "\n", "contours_scaled = [\n", " list(zip(scale_x(c[0][:, 1]), scale_y(c[0][:, 0])))\n", " for c in contours\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "geojson = {\n", " \"type\": \"FeatureCollection\",\n", " \"features\": [\n", " {\n", " \"type\": \"Feature\",\n", " \"properties\": {'dialect': dialect},\n", " \"geometry\": {\n", " \"type\": \"Polygon\",\n", " \"coordinates\": [list(map(list, contour))]\n", " }\n", " }\n", " for contour, dialect in zip(contours_scaled, regions)\n", " ]\n", "}\n", "\n", "with open('../data/dialect_regions.geojson', 'w') as f:\n", " json.dump(geojson, f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## result" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "m = folium.Map(\n", " location=[(bounds[0][0] + bounds[1][0]) / 2, (bounds[0][1] + bounds[1][1]) / 2],\n", " tiles='stamentoner',\n", " zoom_start=9\n", ")\n", "\n", "for feature in geojson['features']:\n", " folium.Polygon(\n", " reverse_latitude_longitude(feature['geometry']['coordinates']),\n", " color='red',\n", " fill_color='white',\n", " fill_opacity=0,\n", " popup=feature['properties']['dialect']\n", " ).add_to(m)\n", "\n", "m" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 1 }