stimmenfryslan/notebooks/Dialect Regions from image....

77 KiB

In [36]:
import folium

from collections import Counter

from math import sqrt, floor
import numpy as np
from imageio import imread

%matplotlib notebook
from matplotlib import pyplot as plt

from skimage.morphology import binary_closing
from skimage.measure import find_contours, label

import folium.plugins
from folium_jsbutton import JsButton
In [18]:
im = imread('../data/dialects.png')

color_occurence = Counter(map(tuple, im.reshape(-1,3)))
colors_sorted_by_occurence = [c for c, _ in sorted(
    color_occurence.items(),
    key=lambda x: x[1],
    reverse=True)
]
In [31]:
pallette_width = floor(sqrt(len(colors_sorted_by_occurence)))
pallette = np.array(colors_sorted_by_occurence[:pallette_width**2]).reshape(pallette_width, pallette_width, 3)

_, (ax0, ax1) = plt.subplots(1, 2)
ax0.imshow(pallette)
for x in range(pallette_width):
    for y in range(pallette_width):
        ax0.text(x-0.5, y+0.5, str(x + y * pallette_width))
ax0.set_xticks([]), ax0.set_yticks([])



pallette_indices = [3, 4, 7, 8]
pallette = [colors_sorted_by_occurence[i] for i in pallette_indices]
pallette = np.array(pallette).reshape(1, -1, 3)
ax1.imshow(pallette)
ax1.set_xticks([]), ax1.set_yticks([])
None
No description has been provided for this image
In [37]:
bounds = [
    [52.832432288794514, 5.354483127593994],
    [53.41434089638827, 6.330699920654297]
]

m = folium.Map(
    location=[(bounds[0][0] + bounds[1][0]) / 2, (bounds[0][1] + bounds[1][1]) / 2],
    tiles='stamentoner',
    zoom_start=9
)

folium.raster_layers.ImageOverlay()

m
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-37-5561299adb2c> in <module>()
     10 )
     11 
---> 12 folium.plugins.ImageOverlay
     13 
     14 m

AttributeError: module 'folium.plugins' has no attribute 'ImageOverlay'
In [ ]:
print(geojson)
In [ ]:
plt.rcParams['figure.figsize'] = (9.5, 3)
ax0, ax1, ax2 = plt.subplots(1,3)[1]
ax0.imshow(im)
ax1.imshow(composed_49.astype(int))
ax2.imshow(composed_4.astype(int))
ax0.set_xticks([]); ax0.set_yticks([])
ax1.set_xticks([]); ax1.set_yticks([])
ax2.set_xticks([]); ax2.set_yticks([])
plt.tight_layout()



stavoren_to_east_pixels = [295, 717]
north_to_south_pixels = [99, 525]
In [ ]:
axes = plt.subplots(2,2)[1].ravel()
contours = []
for axis, c in zip(axes, relevant_colors):
    bi = (im[:-100] == c[None,None]).min(axis=2)
    bi = binary_closing(bi, np.ones((5,5)))
    
    labels = label(bi, background=False)
    
    contours.append(find_contours(bi, 0.5))

    axis.imshow(bi)
    for n, contour in enumerate(contours[-1][:1]):
        axis.plot(contour[:, 1], contour[:, 0], linewidth=2)
        axis.set_xticks([]); axis.set_yticks([])
plt.tight_layout()
In [3]:
a0, b0 = stavoren_to_east_coords
c0, d0 = stavoren_to_east_pixels

scale_x = lambda x: (x - c0) / (d0 - c0) * (b0 - a0) + a0

a1, b1 = north_to_south_coords
c1, d1 = north_to_south_pixels

scale_y = lambda x: (x - c1) / (d1 - c1) * (b1 - a1) + a1

contours_scaled = [
    list(zip(scale_x(c[0][:, 1]), scale_y(c[0][:, 0])))
    for c in contours
]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-7f649ab43c0e> in <module>()
----> 1 a0, b0 = stavoren_to_east_coords
      2 c0, d0 = stavoren_to_east_pixels
      3 
      4 scale_x = lambda x: (x - c0) / (d0 - c0) * (b0 - a0) + a0
      5 

NameError: name 'stavoren_to_east_coords' is not defined
In [4]:
geojson = json.dumps({
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {'dialect': dialect},
      "geometry": {
        "type": "Polygon",
        "coordinates": [list(map(list, contour))]
      }
    }
    for contour, dialect in zip(contours_scaled, regions)
  ]
})

with open('dialect_regions.geojson', 'w') as f:
    f.write(geojson)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-fc568b2f2fd5> in <module>()
     10       }
     11     }
---> 12     for contour, dialect in zip(contours_scaled, regions)
     13   ]
     14 })

NameError: name 'contours_scaled' is not defined
In [5]:
 
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-732d7d519e9d> in <module>()
      1 m = folium.Map(
----> 2     location=[sum(north_to_south_coords) / 2, sum(stavoren_to_east_coords) / 2],
      3     tiles='Mapbox Bright',
      4     zoom_start=9
      5 )

NameError: name 'north_to_south_coords' is not defined
In [6]:
shapes = {
    dialect: shape({"type": "Polygon", "coordinates": [list(map(list, contour))]})
    for contour, dialect in zip(contours_scaled, regions)
}

def regions_for(coordinate):
    regions = {
        region_name
        for region_name, shape in shapes.items()
        if shape.contains(Point(*coordinate))
    }
    return regions

def distance(shape, longitude, latitude):
    ext = shape.exterior
    p = ext.interpolate(ext.project(Point(longitude, latitude)))
    return vincenty((latitude, longitude), (p.y, p.x))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-1008e368979e> in <module>()
      1 shapes = {
      2     dialect: shape({"type": "Polygon", "coordinates": [list(map(list, contour))]})
----> 3     for contour, dialect in zip(contours_scaled, regions)
      4 }
      5 

NameError: name 'contours_scaled' is not defined
In [7]:
# SELECT user_lat, user_lng, question_text, answer_text
picture_games = pandas.read_sql('''
SELECT language.name as language, item.name as picture,
       survey.user_lat as latitude, survey.user_lng as longitude,
       survey.area_name as area, survey.country_name as country,
       result.recording as filename,
       result.submitted_at as date
FROM       core_surveyresult as survey
INNER JOIN core_picturegameresult as result ON survey.id = result.survey_result_id
INNER JOIN core_language as language ON language.id = result.language_id
INNER JOIN core_picturegameitem as item
    ON result.picture_game_item_id = item.id
''', db)
# picture_games['filename'] = [filename.split('/')[-1] for filename in picture_games['filename']]
picture_games.set_index('filename', inplace=True)
In [8]:
region_per_picture_game = [
    {
        'dialects': [
            {
                'dialect': dialect,
                'boundary_distance': distance(shapes[dialect], longitude, latitude),
            }
            for dialect in regions_for((longitude, latitude))
        ],
        'filename': filename,
    }
    for filename, (latitude, longitude) in ProgressBar(
        picture_games[['latitude', 'longitude']].iterrows(),
        size=len(picture_games)
    )
]
VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0…
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-aabb5cdda548> in <module>()
     12     for filename, (latitude, longitude) in ProgressBar(
     13         picture_games[['latitude', 'longitude']].iterrows(),
---> 14         size=len(picture_games)
     15     )
     16 ]

<ipython-input-8-aabb5cdda548> in <listcomp>(.0)
     10         'filename': filename,
     11     }
---> 12     for filename, (latitude, longitude) in ProgressBar(
     13         picture_games[['latitude', 'longitude']].iterrows(),
     14         size=len(picture_games)

NameError: name 'regions_for' is not defined
In [ ]:
Counter(len(x['dialects']) for x in region_per_picture_game)
In [ ]:
df = pandas.DataFrame([
    [r['filename'], r['dialects'][0]['dialect'], r['dialects'][0]['boundary_distance']]
    for r in region_per_picture_game
    if len(r['dialects']) == 1
], columns = ['filename', 'dialect', 'boundary_distance'])

df.to_excel('picture_game_recordings_by_dialect.xlsx')
df.to_csv('picture_game_recordings_by_dialect.csv')
df
In [9]:
# SELECT user_lat, user_lng, question_text, answer_text
free_speech_games = pandas.read_sql('''
SELECT language.name as language,
       survey.user_lat as latitude, survey.user_lng as longitude,
       survey.area_name as area, survey.country_name as country,
       result.recording as filename,
       result.submitted_at as date
FROM       core_surveyresult as survey
INNER JOIN core_freespeechresult as result ON survey.id = result.survey_result_id
INNER JOIN core_language as language ON language.id = result.language_id
''', db)
# free_speech_games['filename'] = [filename.split('/')[-1] for filename in games['filename']]
free_speech_games.set_index('filename', inplace=True)