Cleaned up province segmentation

This commit is contained in:
H.T. Kruitbosch 2018-09-28 16:28:15 +02:00
parent d56eb3a700
commit 650d596f03
22 changed files with 20297 additions and 9417 deletions

View File

@ -15,7 +15,7 @@ using image processing.
Results Results
* [data/dialect_regions.geojson](data/dialect_regions.geojson) * [data/frysian_dialect_regions.geojson](data/frysian_dialect_regions.geojson)
### Group recordings to Frysian dialect regions ### Group recordings to Frysian dialect regions
@ -30,3 +30,29 @@ Results
* [data/free_speech_recordings_by_dialect.xlsx](data/free_speech_recordings_by_dialect.csv) * [data/free_speech_recordings_by_dialect.xlsx](data/free_speech_recordings_by_dialect.csv)
* [data/free_speech_recordings_by_dialect.xlsx](data/free_speech_recordings_by_dialect.xlsx) * [data/free_speech_recordings_by_dialect.xlsx](data/free_speech_recordings_by_dialect.xlsx)
### Segment Friesland (and Groningen) in Gemeentes and Wijken
[notebook](notebooks/Segment%20Provinces%20in%20Wijken%20and%20Gemeentes.ipynb)
Some of the wijken are merges, for example part of Leeuwarden, to avoid that the segementation gets too fine
grained.
[notebook](notebooks/Show%20Province%20Segmentations.ipynb)
Visualized maps of the segmentations.
Results:
* data/Friesland_gemeentes.geojson
* data/Friesland_gemeentes.kml
* data/Friesland_wijken.geojson
* data/Friesland_wijken.kml
* data/frysian_dialect_regions.geojson
* data/Groningen_gemeentes.geojson
* data/Groningen_gemeentes.kml
* data/Groningen_wijken.geojson
* data/Groningen_wijken.kml
* data/Zeeland_gemeentes.geojson
* data/Zeeland_gemeentes.kml
* data/Zeeland_wijken.geojson
* data/Zeeland_wijken.kml

File diff suppressed because one or more lines are too long

1954
data/Friesland_gemeentes.kml Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

5253
data/Friesland_wijken.kml Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

1148
data/Groningen_gemeentes.kml Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

3155
data/Groningen_wijken.kml Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

6708
data/Zeeland_wijken.kml Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1604
notebooks/Regions.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,139 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Segment provinces\n",
"\n",
"\n",
"Create wijk and gemeente level segmentations for all Dutch provinces and save as geojson and Gabmap KML.\n",
"\n",
"All is based on CBS data.\n",
"\n",
"For Friesland, several wijken are merged.\n",
"\n",
"Note: only applied to Groningen and Friesland, because other provinces give gemetry errors."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('../')\n",
"\n",
"from stimmen.latitude_longitude import reverse_latitude_longitude\n",
"from stimmen.shapefile import shapefiles_to_geojson\n",
"from stimmen.cbs import gwb_in_province, get_available_provinces\n",
"from stimmen.geojson import merge_features\n",
"from gabmap import as_gabmap_kml\n",
"\n",
"from shapely.geometry import shape, box, mapping\n",
"\n",
"import json\n",
"import folium\n",
"import pickle\n",
"\n",
"from collections import defaultdict"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"for province in ['Groningen', 'Friesland']:\n",
" wijken_geojson = gwb_in_province(province, 'wijk', 2018)\n",
" gemeente_geojson = gwb_in_province(province, 'gem', 2018)\n",
"\n",
" if province == 'Friesland':\n",
" for gemeente in {'Ameland', 'Harlingen', 'Schiermonnikoog', 'Terschelling', 'Vlieland'}:\n",
" merged_geojson = merge_features(\n",
" wijken_geojson.copy(),\n",
" condition=lambda feature: feature['properties']['GM_NAAM'] == gemeente,\n",
" aggregate={'WK_NAAM': ' '.join}\n",
" )\n",
"\n",
" merge_leeuwarden_only_above_latitude = 53.167\n",
" wijken_geojson = merge_features(\n",
" wijken_geojson,\n",
" condition=lambda feature: (\n",
" feature['properties']['GM_NAAM'] == 'Leeuwarden' and\n",
" shape(feature['geometry']).centroid.y > merge_leeuwarden_only_above_latitude\n",
" ),\n",
" aggregate={'WK_NAAM': ' '.join}\n",
" )\n",
" \n",
" #Some gemeentes appear twice in the cbs data.\n",
" for gemeente in [feature['properties']['GM_NAAM'] for feature in gemeente_geojson['features']]:\n",
" gemeente_geojson = merge_features(\n",
" gemeente_geojson, condition=lambda feature: feature['properties']['GM_NAAM'] == gemeente)\n",
" \n",
" for feature in wijken_geojson['features']:\n",
" feature['properties']['gemeente_en_wijk_naam'] = (\n",
" feature['properties']['GM_NAAM'] +\n",
" ', ' +\n",
" feature['properties'].get('WK_NAAM', '')\n",
" ).replace('&', 'en').replace('/', ' ').replace('\"', ' ').replace(\"'\", ' ')\n",
" \n",
" for feature in gemeente_geojson['features']:\n",
" feature['properties']['gemeente_naam'] = (\n",
" feature['properties']['GM_NAAM']\n",
" ).replace('&', 'en').replace('/', ' ').replace('\"', ' ').replace(\"'\", ' ')\n",
" \n",
" with open('../data/{}_wijken.geojson'.format(province), 'w') as f:\n",
" json.dump(wijken_geojson, f)\n",
" with open('../data/{}_gemeentes.geojson'.format(province), 'w') as f:\n",
" json.dump(gemeente_geojson, f)\n",
" \n",
" with open('../data/{}_wijken.kml'.format(province), 'w') as f:\n",
" f.write(as_gabmap_kml(wijken_geojson, name_property='gemeente_en_wijk_naam'))\n",
" with open('../data/{}_gemeentes.kml'.format(province), 'w') as f:\n",
" f.write(as_gabmap_kml(gemeente_geojson, name_property='gemeente_naam'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

93
stimmen/cbs.py Normal file
View File

@ -0,0 +1,93 @@
from threading import Thread
import time
from pygeoif.geometry import mapping
from shapely.geometry import shape, box
from stimmen.shapefile import shapefiles_to_geojson
import os.path
def data_file(*args):
return os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'..',
*args
)
__cache = {}
__last_access = None
def clear_cache_poll():
global __cache, __last_access
while True:
time.sleep(60*60)
if (time.time() - __last_access) > 60*60:
__cache = {}
Thread(target=clear_cache_poll, daemon=True).start()
def get_available_provinces():
file = data_file('data', 'geo', 'cbs', "2018_Imergis_provinciegrenzen_met_water")
provinces = shapefiles_to_geojson(file)['features']
return {province['properties']['provincien'] for province in provinces}
def province_geojson(province, with_water=False):
global __cache, __last_access
file = data_file(
'data', 'geo', 'cbs',
"2018_Imergis_provinciegrenzen_met_water"
if with_water else
"2018-Imergis_provinciegrenzen_kustlijn"
)
if (province, with_water) not in __cache:
provinces = shapefiles_to_geojson(file)['features']
available_provinces = get_available_provinces()
assert province in available_provinces, "Province {} not found, options: {}".format(
province, ', '.join(available_provinces))
__cache[(province, with_water)] = next(
province_ for province_ in provinces if province_['properties']['provincien'] == province)
__last_access = time.time()
return __cache[(province, with_water)]
def gwb_in_province(
province='Friesland', region_level='wijk', region_year='2018',
polygon_simplification=0.001, province_dilation=0.0005
):
assert region_level in {'gem', 'wijk', 'buurt'}, (
"region_level {} not supported, must be gem, wijk or buurt".format(region_level))
assert region_year in {2017, 2018}, (
"region_year {} not supported, must 2017 or 2018".format(region_year))
province_with_water = shape(province_geojson(province, with_water=True)['geometry'])
province_bounding_box = box(*province_with_water.bounds)
province_land_only = shape(province_geojson(province, with_water=False)['geometry'])
province_land_only_dilated = province_land_only.buffer(-province_dilation)
geojson = shapefiles_to_geojson(
data_file('data', 'geo', 'cbs', '{}_{}'.format(region_level, region_year)))
shapes = [shape(geojson_['geometry']) for geojson_ in geojson['features']]
shapes, geojson = map(list, zip(*(
(intersection.simplify(tolerance=polygon_simplification), geojson_)
for shape_, geojson_ in zip(shapes, geojson['features'])
if province_bounding_box.contains(shape_)
for intersection in [shape_.intersection(province_land_only_dilated)] # alias
if intersection.area > 0
)))
for geojson_, shape_ in zip(geojson, shapes):
geojson_['geometry'] = mapping(shape_)
return {"type": "FeatureCollection", "features": geojson}

43
stimmen/geojson.py Normal file
View File

@ -0,0 +1,43 @@
from pygeoif.geometry import mapping
from shapely.geometry import shape
def merge_features(geojson, condition, aggregate={}):
"""Merge the geometries using shapely's union for all the geojson's features that
meet the condition, condition get's passed an item of feature. Then aggregate the properties
in the aggregate dict using a function that get as input the list of property values off alle
matched features. Operates inplace."""
indices = [index for index, feature in enumerate(geojson['features']) if condition(feature)]
if len(indices) == 0: # also if there is one index, we
return geojson
properties = {
prop: agg([
properties_[prop]
for index in indices
for properties_ in [geojson['features'][index]['properties']]
if prop in properties_
])
for prop, agg in aggregate.items()
}
properties.update({
key: value
for index in indices
for key, value in geojson['features'][index]['properties'].items()
if key not in aggregate
})
if len(indices) == 1:
geojson['features'][indices[0]]['properties'] = properties
return geojson
union = shape(geojson['features'][indices[0]]['geometry'])
for index in indices[1:]:
union = union.union(shape(geojson['features'][index]['geometry']))
for index in indices[::-1]: # reverse, such that the 'todo' indices willnot change.
del geojson['features'][index]
geojson['features'].append({
'geometry': mapping(union),
'properties': properties
})
return geojson

View File

@ -0,0 +1,9 @@
def reverse_latitude_longitude(rd_multipolygon):
if len(rd_multipolygon) == 2 and tuple(map(type, rd_multipolygon)) == (float, float):
return rd_multipolygon[::-1]
return [
reverse_latitude_longitude(element)
for element in rd_multipolygon
]

43
stimmen/shapefile.py Normal file
View File

@ -0,0 +1,43 @@
from osgeo.osr import SpatialReference, CoordinateTransformation
import shapefile
epsg28992 = SpatialReference()
epsg28992.ImportFromEPSG(28992)
epsg28992.SetTOWGS84(565.237 ,50.0087 ,465.658 ,-0.406857 ,0.350733 ,-1.87035 ,4.0812)
epsg4326 = SpatialReference()
epsg4326.ImportFromEPSG(4326)
rd2latlon = CoordinateTransformation(epsg28992, epsg4326)
def rd_to_longlat(rd_multipolygon):
if len(rd_multipolygon) == 2 and tuple(map(type, rd_multipolygon)) == (float, float):
return list(rd2latlon.TransformPoint(*rd_multipolygon)[:2])
return [
rd_to_longlat(element)
for element in rd_multipolygon
]
def shapefiles_to_geojson(filename):
shape_file = shapefile.Reader(filename)
fields = shape_file.fields[1:]
field_names = [field[0] for field in fields]
buffer = []
for shape_record in shape_file.shapeRecords():
properties = dict(zip(field_names, shape_record.record))
geometry = shape_record.shape.__geo_interface__
geometry['coordinates'] = rd_to_longlat(geometry['coordinates'])
buffer.append({
'type': "Feature",
'geometry': geometry,
'properties': properties
})
return {
"type": "FeatureCollection",
"features": buffer
}