stimmenfryslan/stimmen/folium.py

173 lines
6.6 KiB
Python
Raw Normal View History

2018-10-03 16:11:31 +02:00
import folium
from jupyter_progressbar import ProgressBar
from pygeoif.geometry import mapping
from shapely.geometry.geo import shape, box
from stimmen.cbs import data_file
from html import escape
import numpy as np
from stimmen.latitude_longitude import reverse_latitude_longitude
def get_palette(n, no_black=True, no_white=True):
with open(data_file('data', 'glasbey', '{}_colors.txt'.format(n + no_black + no_white))) as f:
return [
'#%02x%02x%02x' % tuple(int(c) for c in line.replace('\n', '').split(','))
for line in f
if not no_black or line != '0,0,0\n'
if not no_white or line != '255,255,255\n'
]
def colored_name(name, color):
return '<span style=\\"color:{}; \\">{}</span>'.format(color, name)
def region_area_cdf(region_shape, resolution=10000):
xmin, ymin, xmax, ymax = region_shape.bounds
shape_area = region_shape.area
spaces = np.linspace(xmin, xmax, resolution + 1)
return np.array([
box(xmin, ymin, xmax_, ymax).intersection(region_shape).area / shape_area
for xmax_ in spaces
])
# Only slightly faster than region_area_cdf.
# def fast_sliced_shape_areas(region_shape, recursions=13):
# results = np.zeros(2 ** recursions)
# xmin, ymin, xmax, ymax = region_shape.bounds
# total = 0
#
# def f(shape_, xmin, ymin, xmax, ymax, recursions, results_):
# nonlocal total
# shape_ = box(xmin, ymin, xmax, ymax).intersection(shape_)
# if recursions == 0:
# assert results_.shape == (1,)
# results_[0] = shape_.area
# total += shape_.area
# else:
# xmiddle = xmin + (xmax - xmin) / 2
# middle_index = len(results_) // 2
# f(shape_, xmin, ymin, xmiddle, ymax, recursions - 1, results_[:middle_index])
# f(shape_, xmiddle, ymin, xmax, ymax, recursions - 1, results_[middle_index:])
#
# f(region_shape, xmin, ymin, xmax, ymax, recursions, results)
# return results / results.sum() * region_shape.area
def area_adjust_boundaries(region_shape, boundaries, region_cdf_cache=None, resolution=10000):
"""Adjust the boundaries from percentage of the width of a shape, to percentage of the area of a shape"""
if region_cdf_cache is None:
region_cdf_cache = region_area_cdf(region_shape, resolution)
elif not isinstance(region_cdf_cache, np.ndarray):
region_cdf_cache = np.array(region_cdf_cache)
return width_adjust_boundaries(
region_shape,
np.abs(region_cdf_cache[None, :] - boundaries[:, None]).argmin(axis=1) / resolution
)
def width_adjust_boundaries(region_shape, boundaries):
xmin, _, xmax, _ = region_shape.bounds
return boundaries * (xmax - xmin) + xmin
def pronunciation_bars(
regions, dataframe,
region_name_property, region_name_column,
group_column='answer_text',
cutoff_percentage=0.05,
normalize_area=True,
progress_bar=False,
):
# all values of group_column that appear at least cutoff_percentage in one of the regions
relevant_groups = {
group
for region_name, region_rows in dataframe.groupby(region_name_column)
for group, aggregation in region_rows.groupby(
group_column).agg({group_column: len}).iterrows()
if aggregation[group_column] >= cutoff_percentage * len(region_rows)
}
group_to_color = dict(zip(relevant_groups, get_palette(len(relevant_groups))))
group_to_color['other'] = '#ccc'
n_other = len(dataframe) - sum(
sum(dataframe[group_column] == group_value)
for group_value in relevant_groups
)
# Each FeatureGroup represents all polygons (one for each region) of the relevant_groups
feature_groups = {
group_value: folium.FeatureGroup(
name=colored_name(
'{value} ({amount})'.format(value=escape(group_value), amount=amount),
color
),
overlay=True
)
for group_value, color in group_to_color.items()
for amount in [
sum(dataframe[group_column] == group_value)
if group_value != 'other' else
n_other
] # alias
}
progress_bar = ProgressBar if progress_bar else lambda x: x
# for each region, create the bar-polygons.
for feature in progress_bar(regions['features']):
region_name = feature['properties'][region_name_property]
region_rows = dataframe[dataframe[region_name_column] == region_name]
region_shape = shape(feature['geometry'])
_, ymin, _, ymax = region_shape.bounds
group_values_occurrence = {
group_value: aggregation[group_column]
for group_value, aggregation in region_rows.groupby(group_column).agg({group_column: len}).iterrows()
if group_value in relevant_groups
}
group_values_occurrence['other'] = len(region_rows) - sum(group_values_occurrence.values())
group_values, group_occurrences = zip(*sorted(
group_values_occurrence.items(),
key=lambda x: (x[0] == 'other', -x[1])
))
group_percentages = np.array(group_occurrences) / len(region_rows)
group_boundaries = np.cumsum((0,) + group_occurrences) / len(region_rows)
if normalize_area:
if '__region_shape_cdf_cache' not in feature['properties']:
feature['properties']['__region_shape_cdf_cache'] = region_area_cdf(region_shape).tolist()
group_boundaries = area_adjust_boundaries(
region_shape, group_boundaries,
region_cdf_cache=feature['properties']['__region_shape_cdf_cache']
)
else:
group_boundaries = width_adjust_boundaries(region_shape, group_boundaries)
for group_value, percentage, count, left_boundary, right_boundary in zip(
group_values,
group_percentages,
group_occurrences,
group_boundaries[:-1], group_boundaries[1:]
):
if count == 0 or left_boundary == right_boundary:
continue
bar_shape = region_shape.intersection(box(left_boundary, ymin, right_boundary, ymax))
if bar_shape.area == 0:
continue
polygon = folium.Polygon(
reverse_latitude_longitude(mapping(bar_shape)['coordinates']),
fill_color=group_to_color[group_value],
fill_opacity=0.8,
color=None,
popup='{} ({}, {: 3d}%)'.format(group_value, count, int(round(100 * percentage)))
)
polygon.add_to(feature_groups[group_value])
return feature_groups