stimmenfryslan/notebooks/Dialect Regions from image....

574 KiB

Extract dialect regions from image

Using image processing, extract region polygons for the dialects depicted in this image

dialect regions

In [1]:
from math import floor

import json
import folium
from folium_jsbutton import JsButton
from imageio import imread

from collections import Counter

from math import sqrt
import numpy as np

%matplotlib notebook
from matplotlib import pyplot as plt

from skimage.morphology import binary_closing
from skimage.measure import find_contours, label

Input

Load the image and determine the used colors.

In [2]:
im = imread('../data/dialects.png')

color_occurence = Counter(map(tuple, im.reshape(-1,3)))

color_sorted_by_occurence = [c for c, _ in sorted(
    color_occurence.items(),
    key=lambda x: x[1],
    reverse=True
)]

Relevant colors

Show the most used colors and select those of the relevant (dialect) regions

In [3]:
pallete_width = floor(sqrt(len(color_sorted_by_occurence)))
pallette = np.array(color_sorted_by_occurence)[:pallete_width*pallete_width]
pallette = pallette.reshape(pallete_width, pallete_width, 3)

_, (ax0, ax1) = plt.subplots(1,2)

ax0.imshow(pallette)
for x in range(pallete_width):
    for y in range(pallete_width):
        ax0.text(x-0.5, y+0.5, x + y*pallete_width)
ax0.set_xticks([])
ax0.set_yticks([])
ax0.set_title('(almost) all colors')

legend_color_indices = [3, 4, 7, 8]
legend_colors = [color_sorted_by_occurence[i] for i in legend_color_indices]
pallette = np.array(legend_colors).reshape(1, len(legend_color_indices), 3)

ax1.imshow(pallette)
ax1.set_xticks([])
ax1.set_yticks([])
ax1.set_title('selected colors')
None

regions = ['Klaaifrysk', 'Waldfrysk', 'Sudwesthoeksk', 'Noardhoeksk']
No description has been provided for this image

Georeferencing

Use folium to find the nort-east and south-west corners of the image.

In [4]:
bounds = [
    [53.54434089638824, 6.520699920654293],
    [52.59243228879456, 4.684483127594008]
]

center = (bounds[0][0] + bounds[1][0]) / 2, (bounds[0][1] + bounds[1][1]) / 2
m = folium.Map(center, zoom_start=9, tiles='stamentoner')


img = folium.raster_layers.ImageOverlay(
    name='Dialect regions',
    image='../data/dialects.png',
    bounds=bounds,
    opacity=0.6,
    interactive=True,
    cross_origin=False,
    zindex=1,
)
img.add_to(m)

control = 1
for corner, symbol0 in zip(['_northEast', '_southWest'], ['⌜', '⌟']):
    for axis, symbol1 in zip(['lng', 'lat'], ['⟷', '↕']):
        for direction in ['-', '+']:
            JsButton(
#                 title='{} {} {}'.format(symbol0, symbol1, direction),
                title = str(control),
                function="""
                function(btn, map) {{
                    var overlay = image_overlay_{overlay_id};
                    var bounds = overlay.getBounds();
                    bounds.{corner}.{axis} {direction}= 0.001;
                    overlay.setBounds(bounds);
                }}
            """.format(
                overlay_id = img._id, axis=axis, direction=direction,corner=corner
            )).add_to(m)
            control += 1
            
            
JsButton(
    title = '[]',
    function="""
    function(btn, map) {{
        var overlay = image_overlay_{overlay_id};
        var bounds = overlay.getBounds();
        console.log([[bounds._northEast.lat, bounds._northEast.lng], [bounds._southWest.lat, bounds._southWest.lat]]);
    }}
""".format(
    overlay_id = img._id
)).add_to(m)

m
Out[4]:

Locate contours

Find the countours for the connected components marked by the colors.

In [5]:
axes = plt.subplots(2,2)[1].ravel()
contours = []
for axis, c in zip(axes, np.array(legend_colors)):
    bi = (im[:-100] == c[None,None]).min(axis=2)
    bi = binary_closing(bi, np.ones((5,5)))
    
    labels = label(bi, background=False)
    contours.append(find_contours(bi, 0.5))

    axis.imshow(bi)
    for n, contour in enumerate(contours[-1][:1]):
        axis.plot(contour[:, 1], contour[:, 0], linewidth=2)
        axis.set_xticks([]); axis.set_yticks([])
plt.tight_layout()
No description has been provided for this image

translate pixel coordinates to latitude - longitudes.

In [6]:
(y0, x1), (y1, x0) = bounds

scale_x = lambda x: x0 + (x / im.shape[1]) * (x1 - x0)
scale_y = lambda y: y0 + (y / im.shape[0]) * (y1 - y0)

contours_scaled = [
    list(zip(scale_x(c[0][:, 1]), scale_y(c[0][:, 0])))
    for c in contours
]

Result

In [7]:
geojson = json.dumps({
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {'dialect': dialect},
      "geometry": {
        "type": "Polygon",
        "coordinates": [list(map(list, contour))]
      }
    }
    for contour, dialect in zip(contours_scaled, regions)
  ]
})

with open('../data/frysian_dialect_regions.geojson', 'w') as f:
    f.write(geojson)
In [8]:
m = folium.Map(
    location=center,
    tiles='Mapbox Bright',
    zoom_start=9
)
folium.GeoJson('dialect_regions.geojson', name='geojson').add_to(m)

m
Out[8]: