cleaned up bar maps

This commit is contained in:
H.T. Kruitbosch 2018-10-03 16:11:31 +02:00
parent ce3c1e9ed1
commit 7da2bfc400
25 changed files with 78972 additions and 8 deletions

View File

@ -74,3 +74,10 @@ This is a simple example for the created gabmap files.
* [geojson](data/Gabmap_example.geojson) * [geojson](data/Gabmap_example.geojson)
* [percentages](data/Pronunciation_percentages_example.gabmap.tsv) * [percentages](data/Pronunciation_percentages_example.gabmap.tsv)
* [pronunciation](data/Pronunciations_example.gabmap.tsv) * [pronunciation](data/Pronunciations_example.gabmap.tsv)
### Bar Maps per word for Pronounciation Occurence in Frysian Municipalities
For each word, a map illustrates the pronunciation occurrence as measured by the prediction quiz, per Frysian
municipality.
[notebook](notebooks/Bar%20Maps%20per%20word%20for%20Pronounciation%20Occurence%20in%20Frysian%20Municipalities.ipynb)

File diff suppressed because one or more lines are too long

4463
maps/bar-maps/avond.html Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

3008
maps/bar-maps/dag.html Normal file

File diff suppressed because one or more lines are too long

4896
maps/bar-maps/deurtje.html Normal file

File diff suppressed because one or more lines are too long

3597
maps/bar-maps/geel.html Normal file

File diff suppressed because one or more lines are too long

5286
maps/bar-maps/gegaan.html Normal file

File diff suppressed because one or more lines are too long

4615
maps/bar-maps/gezet.html Normal file

File diff suppressed because one or more lines are too long

3437
maps/bar-maps/heel.html Normal file

File diff suppressed because one or more lines are too long

20
maps/bar-maps/index.html Normal file
View File

@ -0,0 +1,20 @@
<html><head></head><body> <a href="armen (lichaamsdeel).html">armen (lichaamsdeel)<a><br/>
<a href="avond.html">avond<a><br/>
<a href="bij (insect).html">bij (insect)<a><br/>
<a href="blad (aan een boom).html">blad (aan een boom)<a><br/>
<a href="borst (lichaamsdeel).html">borst (lichaamsdeel)<a><br/>
<a href="dag.html">dag<a><br/>
<a href="deurtje.html">deurtje<a><br/>
<a href="geel.html">geel<a><br/>
<a href="gegaan.html">gegaan<a><br/>
<a href="gezet.html">gezet<a><br/>
<a href="heel.html">heel<a><br/>
<a href="index.html">index<a><br/>
<a href="kaas.html">kaas<a><br/>
<a href="koken.html">koken<a><br/>
<a href="oog.html">oog<a><br/>
<a href="sprak (toe).html">sprak (toe)<a><br/>
<a href="tand.html">tand<a><br/>
<a href="trein.html">trein<a><br/>
<a href="vis.html">vis<a><br/>
<a href="zaterdag.html">zaterdag<a></body></html>

3874
maps/bar-maps/kaas.html Normal file

File diff suppressed because one or more lines are too long

4775
maps/bar-maps/koken.html Normal file

File diff suppressed because one or more lines are too long

4381
maps/bar-maps/oog.html Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

3987
maps/bar-maps/tand.html Normal file

File diff suppressed because one or more lines are too long

3753
maps/bar-maps/trein.html Normal file

File diff suppressed because one or more lines are too long

1947
maps/bar-maps/vis.html Normal file

File diff suppressed because one or more lines are too long

4658
maps/bar-maps/zaterdag.html Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -35,15 +35,15 @@
"comboBox_Label": null, "comboBox_Label": null,
"comboBox_ObjectType": 1, "comboBox_ObjectType": 1,
"heightWidget": { "heightWidget": {
"comboData": 1, "comboData": 101,
"comboText": "Absolute value", "comboText": " \"distance 1\"",
"editText": "0", "editText": "0",
"type": 4 "type": 4
}, },
"labelHeightWidget": { "labelHeightWidget": {
"comboData": 2, "comboData": 2,
"comboText": "Height from point", "comboText": "Height from point",
"editText": "7309.11282486", "editText": "7500",
"type": 6 "type": 6
}, },
"radioButton_IntersectingFeatures": true, "radioButton_IntersectingFeatures": true,
@ -62,12 +62,12 @@
"styleWidget2": { "styleWidget2": {
"comboData": 1, "comboData": 1,
"comboText": "Fixed value", "comboText": "Fixed value",
"editText": "1000", "editText": "300",
"type": 1 "type": 1
}, },
"styleWidget3": { "styleWidget3": {
"comboData": 103, "comboData": 101,
"comboText": "\"distance1\"", "comboText": "\"distance 1\"",
"editText": "300", "editText": "300",
"type": 1 "type": 1
}, },
@ -77,5 +77,11 @@
} }
}, },
"PluginVersion": "1.4.2", "PluginVersion": "1.4.2",
"Template": "3DViewer.html" "Template": "3DViewer.html",
"WORLD": {
"lineEdit_BaseSize": "100",
"lineEdit_Color": "",
"lineEdit_zFactor": "1.5",
"lineEdit_zShift": "0"
}
} }

172
stimmen/folium.py Normal file
View File

@ -0,0 +1,172 @@
import folium
from jupyter_progressbar import ProgressBar
from pygeoif.geometry import mapping
from shapely.geometry.geo import shape, box
from stimmen.cbs import data_file
from html import escape
import numpy as np
from stimmen.latitude_longitude import reverse_latitude_longitude
def get_palette(n, no_black=True, no_white=True):
with open(data_file('data', 'glasbey', '{}_colors.txt'.format(n + no_black + no_white))) as f:
return [
'#%02x%02x%02x' % tuple(int(c) for c in line.replace('\n', '').split(','))
for line in f
if not no_black or line != '0,0,0\n'
if not no_white or line != '255,255,255\n'
]
def colored_name(name, color):
return '<span style=\\"color:{}; \\">{}</span>'.format(color, name)
def region_area_cdf(region_shape, resolution=10000):
xmin, ymin, xmax, ymax = region_shape.bounds
shape_area = region_shape.area
spaces = np.linspace(xmin, xmax, resolution + 1)
return np.array([
box(xmin, ymin, xmax_, ymax).intersection(region_shape).area / shape_area
for xmax_ in spaces
])
# Only slightly faster than region_area_cdf.
# def fast_sliced_shape_areas(region_shape, recursions=13):
# results = np.zeros(2 ** recursions)
# xmin, ymin, xmax, ymax = region_shape.bounds
# total = 0
#
# def f(shape_, xmin, ymin, xmax, ymax, recursions, results_):
# nonlocal total
# shape_ = box(xmin, ymin, xmax, ymax).intersection(shape_)
# if recursions == 0:
# assert results_.shape == (1,)
# results_[0] = shape_.area
# total += shape_.area
# else:
# xmiddle = xmin + (xmax - xmin) / 2
# middle_index = len(results_) // 2
# f(shape_, xmin, ymin, xmiddle, ymax, recursions - 1, results_[:middle_index])
# f(shape_, xmiddle, ymin, xmax, ymax, recursions - 1, results_[middle_index:])
#
# f(region_shape, xmin, ymin, xmax, ymax, recursions, results)
# return results / results.sum() * region_shape.area
def area_adjust_boundaries(region_shape, boundaries, region_cdf_cache=None, resolution=10000):
"""Adjust the boundaries from percentage of the width of a shape, to percentage of the area of a shape"""
if region_cdf_cache is None:
region_cdf_cache = region_area_cdf(region_shape, resolution)
elif not isinstance(region_cdf_cache, np.ndarray):
region_cdf_cache = np.array(region_cdf_cache)
return width_adjust_boundaries(
region_shape,
np.abs(region_cdf_cache[None, :] - boundaries[:, None]).argmin(axis=1) / resolution
)
def width_adjust_boundaries(region_shape, boundaries):
xmin, _, xmax, _ = region_shape.bounds
return boundaries * (xmax - xmin) + xmin
def pronunciation_bars(
regions, dataframe,
region_name_property, region_name_column,
group_column='answer_text',
cutoff_percentage=0.05,
normalize_area=True,
progress_bar=False,
):
# all values of group_column that appear at least cutoff_percentage in one of the regions
relevant_groups = {
group
for region_name, region_rows in dataframe.groupby(region_name_column)
for group, aggregation in region_rows.groupby(
group_column).agg({group_column: len}).iterrows()
if aggregation[group_column] >= cutoff_percentage * len(region_rows)
}
group_to_color = dict(zip(relevant_groups, get_palette(len(relevant_groups))))
group_to_color['other'] = '#ccc'
n_other = len(dataframe) - sum(
sum(dataframe[group_column] == group_value)
for group_value in relevant_groups
)
# Each FeatureGroup represents all polygons (one for each region) of the relevant_groups
feature_groups = {
group_value: folium.FeatureGroup(
name=colored_name(
'{value} ({amount})'.format(value=escape(group_value), amount=amount),
color
),
overlay=True
)
for group_value, color in group_to_color.items()
for amount in [
sum(dataframe[group_column] == group_value)
if group_value != 'other' else
n_other
] # alias
}
progress_bar = ProgressBar if progress_bar else lambda x: x
# for each region, create the bar-polygons.
for feature in progress_bar(regions['features']):
region_name = feature['properties'][region_name_property]
region_rows = dataframe[dataframe[region_name_column] == region_name]
region_shape = shape(feature['geometry'])
_, ymin, _, ymax = region_shape.bounds
group_values_occurrence = {
group_value: aggregation[group_column]
for group_value, aggregation in region_rows.groupby(group_column).agg({group_column: len}).iterrows()
if group_value in relevant_groups
}
group_values_occurrence['other'] = len(region_rows) - sum(group_values_occurrence.values())
group_values, group_occurrences = zip(*sorted(
group_values_occurrence.items(),
key=lambda x: (x[0] == 'other', -x[1])
))
group_percentages = np.array(group_occurrences) / len(region_rows)
group_boundaries = np.cumsum((0,) + group_occurrences) / len(region_rows)
if normalize_area:
if '__region_shape_cdf_cache' not in feature['properties']:
feature['properties']['__region_shape_cdf_cache'] = region_area_cdf(region_shape).tolist()
group_boundaries = area_adjust_boundaries(
region_shape, group_boundaries,
region_cdf_cache=feature['properties']['__region_shape_cdf_cache']
)
else:
group_boundaries = width_adjust_boundaries(region_shape, group_boundaries)
for group_value, percentage, count, left_boundary, right_boundary in zip(
group_values,
group_percentages,
group_occurrences,
group_boundaries[:-1], group_boundaries[1:]
):
if count == 0 or left_boundary == right_boundary:
continue
bar_shape = region_shape.intersection(box(left_boundary, ymin, right_boundary, ymax))
if bar_shape.area == 0:
continue
polygon = folium.Polygon(
reverse_latitude_longitude(mapping(bar_shape)['coordinates']),
fill_color=group_to_color[group_value],
fill_opacity=0.8,
color=None,
popup='{} ({}, {: 3d}%)'.format(group_value, count, int(round(100 * percentage)))
)
polygon.add_to(feature_groups[group_value])
return feature_groups

View File

@ -1,5 +1,6 @@
from pygeoif.geometry import mapping from pygeoif.geometry import mapping
from shapely.geometry import shape from shapely.geometry import shape
from shapely.geometry.point import Point
def merge_features(geojson, condition, aggregate={}): def merge_features(geojson, condition, aggregate={}):
@ -41,3 +42,39 @@ def merge_features(geojson, condition, aggregate={}):
'properties': properties 'properties': properties
}) })
return geojson return geojson
def inject_geojson_regions_into_dataframe(
geojson, dataframe,
latitude_column='latitude', longitude_column='longitude',
region_name_property='name',
region_name_column='region'
):
"""adds a region_name_column column to the dataframe with the region name as specified
in the region_name_property of the geojson, by checking which geojson feature geometrically
contains the longitude and latitude of the dataframe's row. This allows for faster cross
reference between the geojson and the dataframe compared to always checking shape-point
containment when cross referencing. Operates in place."""
shapes = {
feature['properties'][region_name_property]: shape(feature['geometry'])
for feature in geojson['features']
}
def get_region_name(point):
nonlocal shapes
for region_name, region_shape in shapes.items():
if region_shape.contains(point):
return region_name
point_to_region_name = {
(latitude, longitude): get_region_name(point)
for latitude, longitude in set(zip(dataframe[latitude_column], dataframe[longitude_column]))
for point in [Point(longitude, latitude)] # alias
}
dataframe[region_name_column] = [
point_to_region_name[(latitude, longitude)]
for latitude, longitude in zip(dataframe[latitude_column], dataframe[longitude_column])
]
return dataframe