cleaned up bar maps
This commit is contained in:
parent
ce3c1e9ed1
commit
7da2bfc400
|
@ -74,3 +74,10 @@ This is a simple example for the created gabmap files.
|
|||
* [geojson](data/Gabmap_example.geojson)
|
||||
* [percentages](data/Pronunciation_percentages_example.gabmap.tsv)
|
||||
* [pronunciation](data/Pronunciations_example.gabmap.tsv)
|
||||
|
||||
### Bar Maps per word for Pronounciation Occurence in Frysian Municipalities
|
||||
|
||||
For each word, a map illustrates the pronunciation occurrence as measured by the prediction quiz, per Frysian
|
||||
municipality.
|
||||
|
||||
[notebook](notebooks/Bar%20Maps%20per%20word%20for%20Pronounciation%20Occurence%20in%20Frysian%20Municipalities.ipynb)
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,20 @@
|
|||
<html><head></head><body> <a href="armen (lichaamsdeel).html">armen (lichaamsdeel)<a><br/>
|
||||
<a href="avond.html">avond<a><br/>
|
||||
<a href="bij (insect).html">bij (insect)<a><br/>
|
||||
<a href="blad (aan een boom).html">blad (aan een boom)<a><br/>
|
||||
<a href="borst (lichaamsdeel).html">borst (lichaamsdeel)<a><br/>
|
||||
<a href="dag.html">dag<a><br/>
|
||||
<a href="deurtje.html">deurtje<a><br/>
|
||||
<a href="geel.html">geel<a><br/>
|
||||
<a href="gegaan.html">gegaan<a><br/>
|
||||
<a href="gezet.html">gezet<a><br/>
|
||||
<a href="heel.html">heel<a><br/>
|
||||
<a href="index.html">index<a><br/>
|
||||
<a href="kaas.html">kaas<a><br/>
|
||||
<a href="koken.html">koken<a><br/>
|
||||
<a href="oog.html">oog<a><br/>
|
||||
<a href="sprak (toe).html">sprak (toe)<a><br/>
|
||||
<a href="tand.html">tand<a><br/>
|
||||
<a href="trein.html">trein<a><br/>
|
||||
<a href="vis.html">vis<a><br/>
|
||||
<a href="zaterdag.html">zaterdag<a></body></html>
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -35,15 +35,15 @@
|
|||
"comboBox_Label": null,
|
||||
"comboBox_ObjectType": 1,
|
||||
"heightWidget": {
|
||||
"comboData": 1,
|
||||
"comboText": "Absolute value",
|
||||
"comboData": 101,
|
||||
"comboText": " \"distance 1\"",
|
||||
"editText": "0",
|
||||
"type": 4
|
||||
},
|
||||
"labelHeightWidget": {
|
||||
"comboData": 2,
|
||||
"comboText": "Height from point",
|
||||
"editText": "7309.11282486",
|
||||
"editText": "7500",
|
||||
"type": 6
|
||||
},
|
||||
"radioButton_IntersectingFeatures": true,
|
||||
|
@ -62,12 +62,12 @@
|
|||
"styleWidget2": {
|
||||
"comboData": 1,
|
||||
"comboText": "Fixed value",
|
||||
"editText": "1000",
|
||||
"editText": "300",
|
||||
"type": 1
|
||||
},
|
||||
"styleWidget3": {
|
||||
"comboData": 103,
|
||||
"comboText": "\"distance1\"",
|
||||
"comboData": 101,
|
||||
"comboText": "\"distance 1\"",
|
||||
"editText": "300",
|
||||
"type": 1
|
||||
},
|
||||
|
@ -77,5 +77,11 @@
|
|||
}
|
||||
},
|
||||
"PluginVersion": "1.4.2",
|
||||
"Template": "3DViewer.html"
|
||||
"Template": "3DViewer.html",
|
||||
"WORLD": {
|
||||
"lineEdit_BaseSize": "100",
|
||||
"lineEdit_Color": "",
|
||||
"lineEdit_zFactor": "1.5",
|
||||
"lineEdit_zShift": "0"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,172 @@
|
|||
import folium
|
||||
from jupyter_progressbar import ProgressBar
|
||||
from pygeoif.geometry import mapping
|
||||
from shapely.geometry.geo import shape, box
|
||||
|
||||
from stimmen.cbs import data_file
|
||||
from html import escape
|
||||
import numpy as np
|
||||
|
||||
from stimmen.latitude_longitude import reverse_latitude_longitude
|
||||
|
||||
|
||||
def get_palette(n, no_black=True, no_white=True):
|
||||
with open(data_file('data', 'glasbey', '{}_colors.txt'.format(n + no_black + no_white))) as f:
|
||||
return [
|
||||
'#%02x%02x%02x' % tuple(int(c) for c in line.replace('\n', '').split(','))
|
||||
for line in f
|
||||
if not no_black or line != '0,0,0\n'
|
||||
if not no_white or line != '255,255,255\n'
|
||||
]
|
||||
|
||||
|
||||
def colored_name(name, color):
|
||||
return '<span style=\\"color:{}; \\">{}</span>'.format(color, name)
|
||||
|
||||
|
||||
def region_area_cdf(region_shape, resolution=10000):
|
||||
xmin, ymin, xmax, ymax = region_shape.bounds
|
||||
shape_area = region_shape.area
|
||||
spaces = np.linspace(xmin, xmax, resolution + 1)
|
||||
return np.array([
|
||||
box(xmin, ymin, xmax_, ymax).intersection(region_shape).area / shape_area
|
||||
for xmax_ in spaces
|
||||
])
|
||||
|
||||
|
||||
# Only slightly faster than region_area_cdf.
|
||||
# def fast_sliced_shape_areas(region_shape, recursions=13):
|
||||
# results = np.zeros(2 ** recursions)
|
||||
# xmin, ymin, xmax, ymax = region_shape.bounds
|
||||
# total = 0
|
||||
#
|
||||
# def f(shape_, xmin, ymin, xmax, ymax, recursions, results_):
|
||||
# nonlocal total
|
||||
# shape_ = box(xmin, ymin, xmax, ymax).intersection(shape_)
|
||||
# if recursions == 0:
|
||||
# assert results_.shape == (1,)
|
||||
# results_[0] = shape_.area
|
||||
# total += shape_.area
|
||||
# else:
|
||||
# xmiddle = xmin + (xmax - xmin) / 2
|
||||
# middle_index = len(results_) // 2
|
||||
# f(shape_, xmin, ymin, xmiddle, ymax, recursions - 1, results_[:middle_index])
|
||||
# f(shape_, xmiddle, ymin, xmax, ymax, recursions - 1, results_[middle_index:])
|
||||
#
|
||||
# f(region_shape, xmin, ymin, xmax, ymax, recursions, results)
|
||||
# return results / results.sum() * region_shape.area
|
||||
|
||||
|
||||
def area_adjust_boundaries(region_shape, boundaries, region_cdf_cache=None, resolution=10000):
|
||||
"""Adjust the boundaries from percentage of the width of a shape, to percentage of the area of a shape"""
|
||||
if region_cdf_cache is None:
|
||||
region_cdf_cache = region_area_cdf(region_shape, resolution)
|
||||
elif not isinstance(region_cdf_cache, np.ndarray):
|
||||
region_cdf_cache = np.array(region_cdf_cache)
|
||||
return width_adjust_boundaries(
|
||||
region_shape,
|
||||
np.abs(region_cdf_cache[None, :] - boundaries[:, None]).argmin(axis=1) / resolution
|
||||
)
|
||||
|
||||
|
||||
def width_adjust_boundaries(region_shape, boundaries):
|
||||
xmin, _, xmax, _ = region_shape.bounds
|
||||
return boundaries * (xmax - xmin) + xmin
|
||||
|
||||
|
||||
def pronunciation_bars(
|
||||
regions, dataframe,
|
||||
region_name_property, region_name_column,
|
||||
group_column='answer_text',
|
||||
cutoff_percentage=0.05,
|
||||
normalize_area=True,
|
||||
progress_bar=False,
|
||||
):
|
||||
# all values of group_column that appear at least cutoff_percentage in one of the regions
|
||||
relevant_groups = {
|
||||
group
|
||||
for region_name, region_rows in dataframe.groupby(region_name_column)
|
||||
for group, aggregation in region_rows.groupby(
|
||||
group_column).agg({group_column: len}).iterrows()
|
||||
if aggregation[group_column] >= cutoff_percentage * len(region_rows)
|
||||
}
|
||||
|
||||
group_to_color = dict(zip(relevant_groups, get_palette(len(relevant_groups))))
|
||||
group_to_color['other'] = '#ccc'
|
||||
|
||||
n_other = len(dataframe) - sum(
|
||||
sum(dataframe[group_column] == group_value)
|
||||
for group_value in relevant_groups
|
||||
)
|
||||
|
||||
# Each FeatureGroup represents all polygons (one for each region) of the relevant_groups
|
||||
feature_groups = {
|
||||
group_value: folium.FeatureGroup(
|
||||
name=colored_name(
|
||||
'{value} ({amount})'.format(value=escape(group_value), amount=amount),
|
||||
color
|
||||
),
|
||||
overlay=True
|
||||
)
|
||||
for group_value, color in group_to_color.items()
|
||||
for amount in [
|
||||
sum(dataframe[group_column] == group_value)
|
||||
if group_value != 'other' else
|
||||
n_other
|
||||
] # alias
|
||||
}
|
||||
|
||||
progress_bar = ProgressBar if progress_bar else lambda x: x
|
||||
|
||||
# for each region, create the bar-polygons.
|
||||
for feature in progress_bar(regions['features']):
|
||||
region_name = feature['properties'][region_name_property]
|
||||
region_rows = dataframe[dataframe[region_name_column] == region_name]
|
||||
region_shape = shape(feature['geometry'])
|
||||
_, ymin, _, ymax = region_shape.bounds
|
||||
|
||||
group_values_occurrence = {
|
||||
group_value: aggregation[group_column]
|
||||
for group_value, aggregation in region_rows.groupby(group_column).agg({group_column: len}).iterrows()
|
||||
if group_value in relevant_groups
|
||||
}
|
||||
group_values_occurrence['other'] = len(region_rows) - sum(group_values_occurrence.values())
|
||||
group_values, group_occurrences = zip(*sorted(
|
||||
group_values_occurrence.items(),
|
||||
key=lambda x: (x[0] == 'other', -x[1])
|
||||
))
|
||||
|
||||
group_percentages = np.array(group_occurrences) / len(region_rows)
|
||||
group_boundaries = np.cumsum((0,) + group_occurrences) / len(region_rows)
|
||||
if normalize_area:
|
||||
if '__region_shape_cdf_cache' not in feature['properties']:
|
||||
feature['properties']['__region_shape_cdf_cache'] = region_area_cdf(region_shape).tolist()
|
||||
group_boundaries = area_adjust_boundaries(
|
||||
region_shape, group_boundaries,
|
||||
region_cdf_cache=feature['properties']['__region_shape_cdf_cache']
|
||||
)
|
||||
else:
|
||||
group_boundaries = width_adjust_boundaries(region_shape, group_boundaries)
|
||||
|
||||
for group_value, percentage, count, left_boundary, right_boundary in zip(
|
||||
group_values,
|
||||
group_percentages,
|
||||
group_occurrences,
|
||||
group_boundaries[:-1], group_boundaries[1:]
|
||||
):
|
||||
if count == 0 or left_boundary == right_boundary:
|
||||
continue
|
||||
|
||||
bar_shape = region_shape.intersection(box(left_boundary, ymin, right_boundary, ymax))
|
||||
if bar_shape.area == 0:
|
||||
continue
|
||||
polygon = folium.Polygon(
|
||||
reverse_latitude_longitude(mapping(bar_shape)['coordinates']),
|
||||
fill_color=group_to_color[group_value],
|
||||
fill_opacity=0.8,
|
||||
color=None,
|
||||
popup='{} ({}, {: 3d}%)'.format(group_value, count, int(round(100 * percentage)))
|
||||
)
|
||||
polygon.add_to(feature_groups[group_value])
|
||||
|
||||
return feature_groups
|
|
@ -1,5 +1,6 @@
|
|||
from pygeoif.geometry import mapping
|
||||
from shapely.geometry import shape
|
||||
from shapely.geometry.point import Point
|
||||
|
||||
|
||||
def merge_features(geojson, condition, aggregate={}):
|
||||
|
@ -40,4 +41,40 @@ def merge_features(geojson, condition, aggregate={}):
|
|||
'geometry': mapping(union),
|
||||
'properties': properties
|
||||
})
|
||||
return geojson
|
||||
return geojson
|
||||
|
||||
|
||||
def inject_geojson_regions_into_dataframe(
|
||||
geojson, dataframe,
|
||||
latitude_column='latitude', longitude_column='longitude',
|
||||
region_name_property='name',
|
||||
region_name_column='region'
|
||||
):
|
||||
"""adds a region_name_column column to the dataframe with the region name as specified
|
||||
in the region_name_property of the geojson, by checking which geojson feature geometrically
|
||||
contains the longitude and latitude of the dataframe's row. This allows for faster cross
|
||||
reference between the geojson and the dataframe compared to always checking shape-point
|
||||
containment when cross referencing. Operates in place."""
|
||||
shapes = {
|
||||
feature['properties'][region_name_property]: shape(feature['geometry'])
|
||||
for feature in geojson['features']
|
||||
}
|
||||
|
||||
def get_region_name(point):
|
||||
nonlocal shapes
|
||||
for region_name, region_shape in shapes.items():
|
||||
if region_shape.contains(point):
|
||||
return region_name
|
||||
|
||||
point_to_region_name = {
|
||||
(latitude, longitude): get_region_name(point)
|
||||
for latitude, longitude in set(zip(dataframe[latitude_column], dataframe[longitude_column]))
|
||||
for point in [Point(longitude, latitude)] # alias
|
||||
}
|
||||
|
||||
dataframe[region_name_column] = [
|
||||
point_to_region_name[(latitude, longitude)]
|
||||
for latitude, longitude in zip(dataframe[latitude_column], dataframe[longitude_column])
|
||||
]
|
||||
return dataframe
|
||||
|
||||
|
|
Loading…
Reference in New Issue