cleaned up bar maps
This commit is contained in:
parent
ce3c1e9ed1
commit
7da2bfc400
@ -74,3 +74,10 @@ This is a simple example for the created gabmap files.
|
||||
* [geojson](data/Gabmap_example.geojson)
|
||||
* [percentages](data/Pronunciation_percentages_example.gabmap.tsv)
|
||||
* [pronunciation](data/Pronunciations_example.gabmap.tsv)
|
||||
|
||||
### Bar Maps per word for Pronounciation Occurence in Frysian Municipalities
|
||||
|
||||
For each word, a map illustrates the pronunciation occurrence as measured by the prediction quiz, per Frysian
|
||||
municipality.
|
||||
|
||||
[notebook](notebooks/Bar%20Maps%20per%20word%20for%20Pronounciation%20Occurence%20in%20Frysian%20Municipalities.ipynb)
|
||||
|
5637
maps/bar-maps/armen (lichaamsdeel).html
Normal file
5637
maps/bar-maps/armen (lichaamsdeel).html
Normal file
File diff suppressed because one or more lines are too long
4463
maps/bar-maps/avond.html
Normal file
4463
maps/bar-maps/avond.html
Normal file
File diff suppressed because one or more lines are too long
4229
maps/bar-maps/bij (insect).html
Normal file
4229
maps/bar-maps/bij (insect).html
Normal file
File diff suppressed because one or more lines are too long
3008
maps/bar-maps/blad (aan een boom).html
Normal file
3008
maps/bar-maps/blad (aan een boom).html
Normal file
File diff suppressed because one or more lines are too long
4381
maps/bar-maps/borst (lichaamsdeel).html
Normal file
4381
maps/bar-maps/borst (lichaamsdeel).html
Normal file
File diff suppressed because one or more lines are too long
3008
maps/bar-maps/dag.html
Normal file
3008
maps/bar-maps/dag.html
Normal file
File diff suppressed because one or more lines are too long
4896
maps/bar-maps/deurtje.html
Normal file
4896
maps/bar-maps/deurtje.html
Normal file
File diff suppressed because one or more lines are too long
3597
maps/bar-maps/geel.html
Normal file
3597
maps/bar-maps/geel.html
Normal file
File diff suppressed because one or more lines are too long
5286
maps/bar-maps/gegaan.html
Normal file
5286
maps/bar-maps/gegaan.html
Normal file
File diff suppressed because one or more lines are too long
4615
maps/bar-maps/gezet.html
Normal file
4615
maps/bar-maps/gezet.html
Normal file
File diff suppressed because one or more lines are too long
3437
maps/bar-maps/heel.html
Normal file
3437
maps/bar-maps/heel.html
Normal file
File diff suppressed because one or more lines are too long
20
maps/bar-maps/index.html
Normal file
20
maps/bar-maps/index.html
Normal file
@ -0,0 +1,20 @@
|
||||
<html><head></head><body> <a href="armen (lichaamsdeel).html">armen (lichaamsdeel)<a><br/>
|
||||
<a href="avond.html">avond<a><br/>
|
||||
<a href="bij (insect).html">bij (insect)<a><br/>
|
||||
<a href="blad (aan een boom).html">blad (aan een boom)<a><br/>
|
||||
<a href="borst (lichaamsdeel).html">borst (lichaamsdeel)<a><br/>
|
||||
<a href="dag.html">dag<a><br/>
|
||||
<a href="deurtje.html">deurtje<a><br/>
|
||||
<a href="geel.html">geel<a><br/>
|
||||
<a href="gegaan.html">gegaan<a><br/>
|
||||
<a href="gezet.html">gezet<a><br/>
|
||||
<a href="heel.html">heel<a><br/>
|
||||
<a href="index.html">index<a><br/>
|
||||
<a href="kaas.html">kaas<a><br/>
|
||||
<a href="koken.html">koken<a><br/>
|
||||
<a href="oog.html">oog<a><br/>
|
||||
<a href="sprak (toe).html">sprak (toe)<a><br/>
|
||||
<a href="tand.html">tand<a><br/>
|
||||
<a href="trein.html">trein<a><br/>
|
||||
<a href="vis.html">vis<a><br/>
|
||||
<a href="zaterdag.html">zaterdag<a></body></html>
|
3874
maps/bar-maps/kaas.html
Normal file
3874
maps/bar-maps/kaas.html
Normal file
File diff suppressed because one or more lines are too long
4775
maps/bar-maps/koken.html
Normal file
4775
maps/bar-maps/koken.html
Normal file
File diff suppressed because one or more lines are too long
4381
maps/bar-maps/oog.html
Normal file
4381
maps/bar-maps/oog.html
Normal file
File diff suppressed because one or more lines are too long
4381
maps/bar-maps/sprak (toe).html
Normal file
4381
maps/bar-maps/sprak (toe).html
Normal file
File diff suppressed because one or more lines are too long
3987
maps/bar-maps/tand.html
Normal file
3987
maps/bar-maps/tand.html
Normal file
File diff suppressed because one or more lines are too long
3753
maps/bar-maps/trein.html
Normal file
3753
maps/bar-maps/trein.html
Normal file
File diff suppressed because one or more lines are too long
1947
maps/bar-maps/vis.html
Normal file
1947
maps/bar-maps/vis.html
Normal file
File diff suppressed because one or more lines are too long
4658
maps/bar-maps/zaterdag.html
Normal file
4658
maps/bar-maps/zaterdag.html
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -35,15 +35,15 @@
|
||||
"comboBox_Label": null,
|
||||
"comboBox_ObjectType": 1,
|
||||
"heightWidget": {
|
||||
"comboData": 1,
|
||||
"comboText": "Absolute value",
|
||||
"comboData": 101,
|
||||
"comboText": " \"distance 1\"",
|
||||
"editText": "0",
|
||||
"type": 4
|
||||
},
|
||||
"labelHeightWidget": {
|
||||
"comboData": 2,
|
||||
"comboText": "Height from point",
|
||||
"editText": "7309.11282486",
|
||||
"editText": "7500",
|
||||
"type": 6
|
||||
},
|
||||
"radioButton_IntersectingFeatures": true,
|
||||
@ -62,11 +62,11 @@
|
||||
"styleWidget2": {
|
||||
"comboData": 1,
|
||||
"comboText": "Fixed value",
|
||||
"editText": "1000",
|
||||
"editText": "300",
|
||||
"type": 1
|
||||
},
|
||||
"styleWidget3": {
|
||||
"comboData": 103,
|
||||
"comboData": 101,
|
||||
"comboText": "\"distance 1\"",
|
||||
"editText": "300",
|
||||
"type": 1
|
||||
@ -77,5 +77,11 @@
|
||||
}
|
||||
},
|
||||
"PluginVersion": "1.4.2",
|
||||
"Template": "3DViewer.html"
|
||||
"Template": "3DViewer.html",
|
||||
"WORLD": {
|
||||
"lineEdit_BaseSize": "100",
|
||||
"lineEdit_Color": "",
|
||||
"lineEdit_zFactor": "1.5",
|
||||
"lineEdit_zShift": "0"
|
||||
}
|
||||
}
|
172
stimmen/folium.py
Normal file
172
stimmen/folium.py
Normal file
@ -0,0 +1,172 @@
|
||||
import folium
|
||||
from jupyter_progressbar import ProgressBar
|
||||
from pygeoif.geometry import mapping
|
||||
from shapely.geometry.geo import shape, box
|
||||
|
||||
from stimmen.cbs import data_file
|
||||
from html import escape
|
||||
import numpy as np
|
||||
|
||||
from stimmen.latitude_longitude import reverse_latitude_longitude
|
||||
|
||||
|
||||
def get_palette(n, no_black=True, no_white=True):
|
||||
with open(data_file('data', 'glasbey', '{}_colors.txt'.format(n + no_black + no_white))) as f:
|
||||
return [
|
||||
'#%02x%02x%02x' % tuple(int(c) for c in line.replace('\n', '').split(','))
|
||||
for line in f
|
||||
if not no_black or line != '0,0,0\n'
|
||||
if not no_white or line != '255,255,255\n'
|
||||
]
|
||||
|
||||
|
||||
def colored_name(name, color):
|
||||
return '<span style=\\"color:{}; \\">{}</span>'.format(color, name)
|
||||
|
||||
|
||||
def region_area_cdf(region_shape, resolution=10000):
|
||||
xmin, ymin, xmax, ymax = region_shape.bounds
|
||||
shape_area = region_shape.area
|
||||
spaces = np.linspace(xmin, xmax, resolution + 1)
|
||||
return np.array([
|
||||
box(xmin, ymin, xmax_, ymax).intersection(region_shape).area / shape_area
|
||||
for xmax_ in spaces
|
||||
])
|
||||
|
||||
|
||||
# Only slightly faster than region_area_cdf.
|
||||
# def fast_sliced_shape_areas(region_shape, recursions=13):
|
||||
# results = np.zeros(2 ** recursions)
|
||||
# xmin, ymin, xmax, ymax = region_shape.bounds
|
||||
# total = 0
|
||||
#
|
||||
# def f(shape_, xmin, ymin, xmax, ymax, recursions, results_):
|
||||
# nonlocal total
|
||||
# shape_ = box(xmin, ymin, xmax, ymax).intersection(shape_)
|
||||
# if recursions == 0:
|
||||
# assert results_.shape == (1,)
|
||||
# results_[0] = shape_.area
|
||||
# total += shape_.area
|
||||
# else:
|
||||
# xmiddle = xmin + (xmax - xmin) / 2
|
||||
# middle_index = len(results_) // 2
|
||||
# f(shape_, xmin, ymin, xmiddle, ymax, recursions - 1, results_[:middle_index])
|
||||
# f(shape_, xmiddle, ymin, xmax, ymax, recursions - 1, results_[middle_index:])
|
||||
#
|
||||
# f(region_shape, xmin, ymin, xmax, ymax, recursions, results)
|
||||
# return results / results.sum() * region_shape.area
|
||||
|
||||
|
||||
def area_adjust_boundaries(region_shape, boundaries, region_cdf_cache=None, resolution=10000):
|
||||
"""Adjust the boundaries from percentage of the width of a shape, to percentage of the area of a shape"""
|
||||
if region_cdf_cache is None:
|
||||
region_cdf_cache = region_area_cdf(region_shape, resolution)
|
||||
elif not isinstance(region_cdf_cache, np.ndarray):
|
||||
region_cdf_cache = np.array(region_cdf_cache)
|
||||
return width_adjust_boundaries(
|
||||
region_shape,
|
||||
np.abs(region_cdf_cache[None, :] - boundaries[:, None]).argmin(axis=1) / resolution
|
||||
)
|
||||
|
||||
|
||||
def width_adjust_boundaries(region_shape, boundaries):
|
||||
xmin, _, xmax, _ = region_shape.bounds
|
||||
return boundaries * (xmax - xmin) + xmin
|
||||
|
||||
|
||||
def pronunciation_bars(
|
||||
regions, dataframe,
|
||||
region_name_property, region_name_column,
|
||||
group_column='answer_text',
|
||||
cutoff_percentage=0.05,
|
||||
normalize_area=True,
|
||||
progress_bar=False,
|
||||
):
|
||||
# all values of group_column that appear at least cutoff_percentage in one of the regions
|
||||
relevant_groups = {
|
||||
group
|
||||
for region_name, region_rows in dataframe.groupby(region_name_column)
|
||||
for group, aggregation in region_rows.groupby(
|
||||
group_column).agg({group_column: len}).iterrows()
|
||||
if aggregation[group_column] >= cutoff_percentage * len(region_rows)
|
||||
}
|
||||
|
||||
group_to_color = dict(zip(relevant_groups, get_palette(len(relevant_groups))))
|
||||
group_to_color['other'] = '#ccc'
|
||||
|
||||
n_other = len(dataframe) - sum(
|
||||
sum(dataframe[group_column] == group_value)
|
||||
for group_value in relevant_groups
|
||||
)
|
||||
|
||||
# Each FeatureGroup represents all polygons (one for each region) of the relevant_groups
|
||||
feature_groups = {
|
||||
group_value: folium.FeatureGroup(
|
||||
name=colored_name(
|
||||
'{value} ({amount})'.format(value=escape(group_value), amount=amount),
|
||||
color
|
||||
),
|
||||
overlay=True
|
||||
)
|
||||
for group_value, color in group_to_color.items()
|
||||
for amount in [
|
||||
sum(dataframe[group_column] == group_value)
|
||||
if group_value != 'other' else
|
||||
n_other
|
||||
] # alias
|
||||
}
|
||||
|
||||
progress_bar = ProgressBar if progress_bar else lambda x: x
|
||||
|
||||
# for each region, create the bar-polygons.
|
||||
for feature in progress_bar(regions['features']):
|
||||
region_name = feature['properties'][region_name_property]
|
||||
region_rows = dataframe[dataframe[region_name_column] == region_name]
|
||||
region_shape = shape(feature['geometry'])
|
||||
_, ymin, _, ymax = region_shape.bounds
|
||||
|
||||
group_values_occurrence = {
|
||||
group_value: aggregation[group_column]
|
||||
for group_value, aggregation in region_rows.groupby(group_column).agg({group_column: len}).iterrows()
|
||||
if group_value in relevant_groups
|
||||
}
|
||||
group_values_occurrence['other'] = len(region_rows) - sum(group_values_occurrence.values())
|
||||
group_values, group_occurrences = zip(*sorted(
|
||||
group_values_occurrence.items(),
|
||||
key=lambda x: (x[0] == 'other', -x[1])
|
||||
))
|
||||
|
||||
group_percentages = np.array(group_occurrences) / len(region_rows)
|
||||
group_boundaries = np.cumsum((0,) + group_occurrences) / len(region_rows)
|
||||
if normalize_area:
|
||||
if '__region_shape_cdf_cache' not in feature['properties']:
|
||||
feature['properties']['__region_shape_cdf_cache'] = region_area_cdf(region_shape).tolist()
|
||||
group_boundaries = area_adjust_boundaries(
|
||||
region_shape, group_boundaries,
|
||||
region_cdf_cache=feature['properties']['__region_shape_cdf_cache']
|
||||
)
|
||||
else:
|
||||
group_boundaries = width_adjust_boundaries(region_shape, group_boundaries)
|
||||
|
||||
for group_value, percentage, count, left_boundary, right_boundary in zip(
|
||||
group_values,
|
||||
group_percentages,
|
||||
group_occurrences,
|
||||
group_boundaries[:-1], group_boundaries[1:]
|
||||
):
|
||||
if count == 0 or left_boundary == right_boundary:
|
||||
continue
|
||||
|
||||
bar_shape = region_shape.intersection(box(left_boundary, ymin, right_boundary, ymax))
|
||||
if bar_shape.area == 0:
|
||||
continue
|
||||
polygon = folium.Polygon(
|
||||
reverse_latitude_longitude(mapping(bar_shape)['coordinates']),
|
||||
fill_color=group_to_color[group_value],
|
||||
fill_opacity=0.8,
|
||||
color=None,
|
||||
popup='{} ({}, {: 3d}%)'.format(group_value, count, int(round(100 * percentage)))
|
||||
)
|
||||
polygon.add_to(feature_groups[group_value])
|
||||
|
||||
return feature_groups
|
@ -1,5 +1,6 @@
|
||||
from pygeoif.geometry import mapping
|
||||
from shapely.geometry import shape
|
||||
from shapely.geometry.point import Point
|
||||
|
||||
|
||||
def merge_features(geojson, condition, aggregate={}):
|
||||
@ -41,3 +42,39 @@ def merge_features(geojson, condition, aggregate={}):
|
||||
'properties': properties
|
||||
})
|
||||
return geojson
|
||||
|
||||
|
||||
def inject_geojson_regions_into_dataframe(
|
||||
geojson, dataframe,
|
||||
latitude_column='latitude', longitude_column='longitude',
|
||||
region_name_property='name',
|
||||
region_name_column='region'
|
||||
):
|
||||
"""adds a region_name_column column to the dataframe with the region name as specified
|
||||
in the region_name_property of the geojson, by checking which geojson feature geometrically
|
||||
contains the longitude and latitude of the dataframe's row. This allows for faster cross
|
||||
reference between the geojson and the dataframe compared to always checking shape-point
|
||||
containment when cross referencing. Operates in place."""
|
||||
shapes = {
|
||||
feature['properties'][region_name_property]: shape(feature['geometry'])
|
||||
for feature in geojson['features']
|
||||
}
|
||||
|
||||
def get_region_name(point):
|
||||
nonlocal shapes
|
||||
for region_name, region_shape in shapes.items():
|
||||
if region_shape.contains(point):
|
||||
return region_name
|
||||
|
||||
point_to_region_name = {
|
||||
(latitude, longitude): get_region_name(point)
|
||||
for latitude, longitude in set(zip(dataframe[latitude_column], dataframe[longitude_column]))
|
||||
for point in [Point(longitude, latitude)] # alias
|
||||
}
|
||||
|
||||
dataframe[region_name_column] = [
|
||||
point_to_region_name[(latitude, longitude)]
|
||||
for latitude, longitude in zip(dataframe[latitude_column], dataframe[longitude_column])
|
||||
]
|
||||
return dataframe
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user