293 lines
7.7 KiB
Plaintext
293 lines
7.7 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Dialect regions from image\n",
|
|
" \n",
|
|
"Use image processing to extract longitude-latitude polygons for four dialect regions illustrated in this image.\n",
|
|
"\n",
|
|
"data:image/s3,"s3://crabby-images/2af16/2af1670ff2a4422c2a90e2310c4a892255ecb7a4" alt="dialect regions""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import sys\n",
|
|
"sys.path.append('..')\n",
|
|
"\n",
|
|
"import folium\n",
|
|
"import json\n",
|
|
"\n",
|
|
"from collections import Counter\n",
|
|
"\n",
|
|
"from math import sqrt, floor\n",
|
|
"import numpy as np\n",
|
|
"from imageio import imread\n",
|
|
"\n",
|
|
"%matplotlib notebook\n",
|
|
"from matplotlib import pyplot as plt\n",
|
|
"\n",
|
|
"from skimage.morphology import binary_closing\n",
|
|
"from skimage.measure import find_contours, label\n",
|
|
"\n",
|
|
"import folium.plugins\n",
|
|
"from folium_jsbutton import JsButton\n",
|
|
"\n",
|
|
"from stimmen.latitude_longitude import reverse_latitude_longitude"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Input"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"im = imread('../data/dialects.png')\n",
|
|
"regions = ['Klaaifrysk', 'Waldfrysk', 'Sudwesthoeksk', 'Noardhoeksk']\n",
|
|
"\n",
|
|
"color_occurence = Counter(map(tuple, im.reshape(-1,3)))\n",
|
|
"colors_sorted_by_occurence = [c for c, _ in sorted(\n",
|
|
" color_occurence.items(),\n",
|
|
" key=lambda x: x[1],\n",
|
|
" reverse=True)\n",
|
|
"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Figure out relevant colors"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pallette_width = floor(sqrt(len(colors_sorted_by_occurence)))\n",
|
|
"pallette = np.array(colors_sorted_by_occurence[:pallette_width**2]).reshape(pallette_width, pallette_width, 3)\n",
|
|
"\n",
|
|
"_, (ax0, ax1) = plt.subplots(1, 2)\n",
|
|
"ax0.imshow(pallette)\n",
|
|
"for x in range(pallette_width):\n",
|
|
" for y in range(pallette_width):\n",
|
|
" ax0.text(x-0.5, y+0.5, str(x + y * pallette_width))\n",
|
|
"ax0.set_xticks([]), ax0.set_yticks([])\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"pallette_indices = [3, 4, 7, 8]\n",
|
|
"pallette = [colors_sorted_by_occurence[i] for i in pallette_indices]\n",
|
|
"pallette = np.array(pallette).reshape(1, -1, 3)\n",
|
|
"ax1.imshow(pallette)\n",
|
|
"ax1.set_xticks([]), ax1.set_yticks([])\n",
|
|
"None"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Georeference image"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"bounds = [\n",
|
|
" [ 53.54634089638824, 6.530699920654293],\n",
|
|
" [52.60043228879454, 4.684483127594012 ]\n",
|
|
"]\n",
|
|
"\n",
|
|
"m = folium.Map(\n",
|
|
" location=[(bounds[0][0] + bounds[1][0]) / 2, (bounds[0][1] + bounds[1][1]) / 2],\n",
|
|
" tiles='stamentoner',\n",
|
|
" zoom_start=9\n",
|
|
")\n",
|
|
"\n",
|
|
"overlay = folium.raster_layers.ImageOverlay(\n",
|
|
" image='../data/dialects.png',\n",
|
|
" bounds=bounds,\n",
|
|
" opacity=0.5\n",
|
|
").add_to(m)\n",
|
|
"\n",
|
|
"overlay_id = overlay._id\n",
|
|
"\n",
|
|
"for number, (corner, direction, operation) in enumerate(\n",
|
|
" (c, d, o)\n",
|
|
" for c in ['_northEast', '_southWest'] for d in ['lat', 'lng'] for o in ['-', '+']\n",
|
|
"):\n",
|
|
" JsButton(title=str(number), function=\"\"\"function(map, item) {{\n",
|
|
" var overlay = image_overlay_{overlay_id};\n",
|
|
" var bounds = overlay.getBounds()\n",
|
|
" bounds.{corner}.{direction} {operation}= 0.001;\n",
|
|
" overlay.setBounds(bounds);\n",
|
|
" \n",
|
|
"}}\"\"\".format(overlay_id=overlay_id, corner=corner, direction=direction, operation=operation)).add_to(m)\n",
|
|
"\n",
|
|
"\n",
|
|
"JsButton(title='B', function=\"\"\"function(map, item) {{\n",
|
|
" var overlay = image_overlay_{overlay_id};\n",
|
|
" var bounds = overlay.getBounds()\n",
|
|
" console.log([bounds._northEast.lat, bounds._northEast.lng, bounds._southWest.lat, bounds._southWest.lng])\n",
|
|
" \n",
|
|
"}}\"\"\".format(overlay_id=overlay_id)).add_to(m)\n",
|
|
" \n",
|
|
"m"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Find polygons in pixelcoordinates"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"axes = plt.subplots(2,2)[1].ravel()\n",
|
|
"contours = []\n",
|
|
"for axis, c in zip(axes, pallette[0]):\n",
|
|
" bi = (im[:-100] == c[None,None]).min(axis=2)\n",
|
|
" bi = binary_closing(bi, np.ones((5,5)))\n",
|
|
" \n",
|
|
" labels = label(bi, background=False)\n",
|
|
" \n",
|
|
" contours.append(find_contours(bi, 0.5))\n",
|
|
"\n",
|
|
" axis.imshow(bi)\n",
|
|
" for n, contour in enumerate(contours[-1][:1]):\n",
|
|
" axis.plot(contour[:, 1], contour[:, 0], linewidth=2)\n",
|
|
" axis.set_xticks([]); axis.set_yticks([])\n",
|
|
"plt.tight_layout()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Convert to longitude-latitudes and write geojson"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"(y0, x1), (y1, x0) = bounds\n",
|
|
"\n",
|
|
"scale_x = lambda x: ((x / im.shape[1]) * (x1 - x0) + x0).tolist()\n",
|
|
"scale_y = lambda y: ((y / im.shape[0]) * (y1 - y0) + y0).tolist()\n",
|
|
"\n",
|
|
"contours_scaled = [\n",
|
|
" list(zip(scale_x(c[0][:, 1]), scale_y(c[0][:, 0])))\n",
|
|
" for c in contours\n",
|
|
"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"geojson = {\n",
|
|
" \"type\": \"FeatureCollection\",\n",
|
|
" \"features\": [\n",
|
|
" {\n",
|
|
" \"type\": \"Feature\",\n",
|
|
" \"properties\": {'dialect': dialect},\n",
|
|
" \"geometry\": {\n",
|
|
" \"type\": \"Polygon\",\n",
|
|
" \"coordinates\": [list(map(list, contour))]\n",
|
|
" }\n",
|
|
" }\n",
|
|
" for contour, dialect in zip(contours_scaled, regions)\n",
|
|
" ]\n",
|
|
"}\n",
|
|
"\n",
|
|
"with open('../data/dialect_regions.geojson', 'w') as f:\n",
|
|
" json.dump(geojson, f)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## result"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"m = folium.Map(\n",
|
|
" location=[(bounds[0][0] + bounds[1][0]) / 2, (bounds[0][1] + bounds[1][1]) / 2],\n",
|
|
" tiles='stamentoner',\n",
|
|
" zoom_start=9\n",
|
|
")\n",
|
|
"\n",
|
|
"for feature in geojson['features']:\n",
|
|
" folium.Polygon(\n",
|
|
" reverse_latitude_longitude(feature['geometry']['coordinates']),\n",
|
|
" color='red',\n",
|
|
" fill_color='white',\n",
|
|
" fill_opacity=0,\n",
|
|
" popup=feature['properties']['dialect']\n",
|
|
" ).add_to(m)\n",
|
|
"\n",
|
|
"m"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|