stimmenfryslan/notebooks/Dialect Regions from image....

293 lines
7.7 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Dialect regions from image\n",
" \n",
"Use image processing to extract longitude-latitude polygons for four dialect regions illustrated in this image.\n",
"\n",
"![dialect regions](../data/dialects.png)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('..')\n",
"\n",
"import folium\n",
"import json\n",
"\n",
"from collections import Counter\n",
"\n",
"from math import sqrt, floor\n",
"import numpy as np\n",
"from imageio import imread\n",
"\n",
"%matplotlib notebook\n",
"from matplotlib import pyplot as plt\n",
"\n",
"from skimage.morphology import binary_closing\n",
"from skimage.measure import find_contours, label\n",
"\n",
"import folium.plugins\n",
"from folium_jsbutton import JsButton\n",
"\n",
"from stimmen.latitude_longitude import reverse_latitude_longitude"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Input"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"im = imread('../data/dialects.png')\n",
"regions = ['Klaaifrysk', 'Waldfrysk', 'Sudwesthoeksk', 'Noardhoeksk']\n",
"\n",
"color_occurence = Counter(map(tuple, im.reshape(-1,3)))\n",
"colors_sorted_by_occurence = [c for c, _ in sorted(\n",
" color_occurence.items(),\n",
" key=lambda x: x[1],\n",
" reverse=True)\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Figure out relevant colors"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pallette_width = floor(sqrt(len(colors_sorted_by_occurence)))\n",
"pallette = np.array(colors_sorted_by_occurence[:pallette_width**2]).reshape(pallette_width, pallette_width, 3)\n",
"\n",
"_, (ax0, ax1) = plt.subplots(1, 2)\n",
"ax0.imshow(pallette)\n",
"for x in range(pallette_width):\n",
" for y in range(pallette_width):\n",
" ax0.text(x-0.5, y+0.5, str(x + y * pallette_width))\n",
"ax0.set_xticks([]), ax0.set_yticks([])\n",
"\n",
"\n",
"\n",
"pallette_indices = [3, 4, 7, 8]\n",
"pallette = [colors_sorted_by_occurence[i] for i in pallette_indices]\n",
"pallette = np.array(pallette).reshape(1, -1, 3)\n",
"ax1.imshow(pallette)\n",
"ax1.set_xticks([]), ax1.set_yticks([])\n",
"None"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Georeference image"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"bounds = [\n",
" [ 53.54634089638824, 6.530699920654293],\n",
" [52.60043228879454, 4.684483127594012 ]\n",
"]\n",
"\n",
"m = folium.Map(\n",
" location=[(bounds[0][0] + bounds[1][0]) / 2, (bounds[0][1] + bounds[1][1]) / 2],\n",
" tiles='stamentoner',\n",
" zoom_start=9\n",
")\n",
"\n",
"overlay = folium.raster_layers.ImageOverlay(\n",
" image='../data/dialects.png',\n",
" bounds=bounds,\n",
" opacity=0.5\n",
").add_to(m)\n",
"\n",
"overlay_id = overlay._id\n",
"\n",
"for number, (corner, direction, operation) in enumerate(\n",
" (c, d, o)\n",
" for c in ['_northEast', '_southWest'] for d in ['lat', 'lng'] for o in ['-', '+']\n",
"):\n",
" JsButton(title=str(number), function=\"\"\"function(map, item) {{\n",
" var overlay = image_overlay_{overlay_id};\n",
" var bounds = overlay.getBounds()\n",
" bounds.{corner}.{direction} {operation}= 0.001;\n",
" overlay.setBounds(bounds);\n",
" \n",
"}}\"\"\".format(overlay_id=overlay_id, corner=corner, direction=direction, operation=operation)).add_to(m)\n",
"\n",
"\n",
"JsButton(title='B', function=\"\"\"function(map, item) {{\n",
" var overlay = image_overlay_{overlay_id};\n",
" var bounds = overlay.getBounds()\n",
" console.log([bounds._northEast.lat, bounds._northEast.lng, bounds._southWest.lat, bounds._southWest.lng])\n",
" \n",
"}}\"\"\".format(overlay_id=overlay_id)).add_to(m)\n",
" \n",
"m"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Find polygons in pixelcoordinates"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"axes = plt.subplots(2,2)[1].ravel()\n",
"contours = []\n",
"for axis, c in zip(axes, pallette[0]):\n",
" bi = (im[:-100] == c[None,None]).min(axis=2)\n",
" bi = binary_closing(bi, np.ones((5,5)))\n",
" \n",
" labels = label(bi, background=False)\n",
" \n",
" contours.append(find_contours(bi, 0.5))\n",
"\n",
" axis.imshow(bi)\n",
" for n, contour in enumerate(contours[-1][:1]):\n",
" axis.plot(contour[:, 1], contour[:, 0], linewidth=2)\n",
" axis.set_xticks([]); axis.set_yticks([])\n",
"plt.tight_layout()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Convert to longitude-latitudes and write geojson"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"(y0, x1), (y1, x0) = bounds\n",
"\n",
"scale_x = lambda x: ((x / im.shape[1]) * (x1 - x0) + x0).tolist()\n",
"scale_y = lambda y: ((y / im.shape[0]) * (y1 - y0) + y0).tolist()\n",
"\n",
"contours_scaled = [\n",
" list(zip(scale_x(c[0][:, 1]), scale_y(c[0][:, 0])))\n",
" for c in contours\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"geojson = {\n",
" \"type\": \"FeatureCollection\",\n",
" \"features\": [\n",
" {\n",
" \"type\": \"Feature\",\n",
" \"properties\": {'dialect': dialect},\n",
" \"geometry\": {\n",
" \"type\": \"Polygon\",\n",
" \"coordinates\": [list(map(list, contour))]\n",
" }\n",
" }\n",
" for contour, dialect in zip(contours_scaled, regions)\n",
" ]\n",
"}\n",
"\n",
"with open('../data/dialect_regions.geojson', 'w') as f:\n",
" json.dump(geojson, f)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"m = folium.Map(\n",
" location=[(bounds[0][0] + bounds[1][0]) / 2, (bounds[0][1] + bounds[1][1]) / 2],\n",
" tiles='stamentoner',\n",
" zoom_start=9\n",
")\n",
"\n",
"for feature in geojson['features']:\n",
" folium.Polygon(\n",
" reverse_latitude_longitude(feature['geometry']['coordinates']),\n",
" color='red',\n",
" fill_color='white',\n",
" fill_opacity=0,\n",
" popup=feature['properties']['dialect']\n",
" ).add_to(m)\n",
"\n",
"m"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 1
}