Cleaned up province segmentation
This commit is contained in:
parent
d56eb3a700
commit
650d596f03
28
Readme.md
28
Readme.md
|
@ -15,7 +15,7 @@ using image processing.
|
|||
|
||||
Results
|
||||
|
||||
* [data/dialect_regions.geojson](data/dialect_regions.geojson)
|
||||
* [data/frysian_dialect_regions.geojson](data/frysian_dialect_regions.geojson)
|
||||
|
||||
### Group recordings to Frysian dialect regions
|
||||
|
||||
|
@ -30,3 +30,29 @@ Results
|
|||
* [data/free_speech_recordings_by_dialect.xlsx](data/free_speech_recordings_by_dialect.csv)
|
||||
* [data/free_speech_recordings_by_dialect.xlsx](data/free_speech_recordings_by_dialect.xlsx)
|
||||
|
||||
### Segment Friesland (and Groningen) in Gemeentes and Wijken
|
||||
|
||||
[notebook](notebooks/Segment%20Provinces%20in%20Wijken%20and%20Gemeentes.ipynb)
|
||||
|
||||
Some of the wijken are merges, for example part of Leeuwarden, to avoid that the segementation gets too fine
|
||||
grained.
|
||||
|
||||
[notebook](notebooks/Show%20Province%20Segmentations.ipynb)
|
||||
|
||||
Visualized maps of the segmentations.
|
||||
|
||||
Results:
|
||||
|
||||
* data/Friesland_gemeentes.geojson
|
||||
* data/Friesland_gemeentes.kml
|
||||
* data/Friesland_wijken.geojson
|
||||
* data/Friesland_wijken.kml
|
||||
* data/frysian_dialect_regions.geojson
|
||||
* data/Groningen_gemeentes.geojson
|
||||
* data/Groningen_gemeentes.kml
|
||||
* data/Groningen_wijken.geojson
|
||||
* data/Groningen_wijken.kml
|
||||
* data/Zeeland_gemeentes.geojson
|
||||
* data/Zeeland_gemeentes.kml
|
||||
* data/Zeeland_wijken.geojson
|
||||
* data/Zeeland_wijken.kml
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,139 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Segment provinces\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Create wijk and gemeente level segmentations for all Dutch provinces and save as geojson and Gabmap KML.\n",
|
||||
"\n",
|
||||
"All is based on CBS data.\n",
|
||||
"\n",
|
||||
"For Friesland, several wijken are merged.\n",
|
||||
"\n",
|
||||
"Note: only applied to Groningen and Friesland, because other provinces give gemetry errors."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"\n",
|
||||
"%autoreload 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.append('../')\n",
|
||||
"\n",
|
||||
"from stimmen.latitude_longitude import reverse_latitude_longitude\n",
|
||||
"from stimmen.shapefile import shapefiles_to_geojson\n",
|
||||
"from stimmen.cbs import gwb_in_province, get_available_provinces\n",
|
||||
"from stimmen.geojson import merge_features\n",
|
||||
"from gabmap import as_gabmap_kml\n",
|
||||
"\n",
|
||||
"from shapely.geometry import shape, box, mapping\n",
|
||||
"\n",
|
||||
"import json\n",
|
||||
"import folium\n",
|
||||
"import pickle\n",
|
||||
"\n",
|
||||
"from collections import defaultdict"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for province in ['Groningen', 'Friesland']:\n",
|
||||
" wijken_geojson = gwb_in_province(province, 'wijk', 2018)\n",
|
||||
" gemeente_geojson = gwb_in_province(province, 'gem', 2018)\n",
|
||||
"\n",
|
||||
" if province == 'Friesland':\n",
|
||||
" for gemeente in {'Ameland', 'Harlingen', 'Schiermonnikoog', 'Terschelling', 'Vlieland'}:\n",
|
||||
" merged_geojson = merge_features(\n",
|
||||
" wijken_geojson.copy(),\n",
|
||||
" condition=lambda feature: feature['properties']['GM_NAAM'] == gemeente,\n",
|
||||
" aggregate={'WK_NAAM': ' '.join}\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" merge_leeuwarden_only_above_latitude = 53.167\n",
|
||||
" wijken_geojson = merge_features(\n",
|
||||
" wijken_geojson,\n",
|
||||
" condition=lambda feature: (\n",
|
||||
" feature['properties']['GM_NAAM'] == 'Leeuwarden' and\n",
|
||||
" shape(feature['geometry']).centroid.y > merge_leeuwarden_only_above_latitude\n",
|
||||
" ),\n",
|
||||
" aggregate={'WK_NAAM': ' '.join}\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" #Some gemeentes appear twice in the cbs data.\n",
|
||||
" for gemeente in [feature['properties']['GM_NAAM'] for feature in gemeente_geojson['features']]:\n",
|
||||
" gemeente_geojson = merge_features(\n",
|
||||
" gemeente_geojson, condition=lambda feature: feature['properties']['GM_NAAM'] == gemeente)\n",
|
||||
" \n",
|
||||
" for feature in wijken_geojson['features']:\n",
|
||||
" feature['properties']['gemeente_en_wijk_naam'] = (\n",
|
||||
" feature['properties']['GM_NAAM'] +\n",
|
||||
" ', ' +\n",
|
||||
" feature['properties'].get('WK_NAAM', '')\n",
|
||||
" ).replace('&', 'en').replace('/', ' ').replace('\"', ' ').replace(\"'\", ' ')\n",
|
||||
" \n",
|
||||
" for feature in gemeente_geojson['features']:\n",
|
||||
" feature['properties']['gemeente_naam'] = (\n",
|
||||
" feature['properties']['GM_NAAM']\n",
|
||||
" ).replace('&', 'en').replace('/', ' ').replace('\"', ' ').replace(\"'\", ' ')\n",
|
||||
" \n",
|
||||
" with open('../data/{}_wijken.geojson'.format(province), 'w') as f:\n",
|
||||
" json.dump(wijken_geojson, f)\n",
|
||||
" with open('../data/{}_gemeentes.geojson'.format(province), 'w') as f:\n",
|
||||
" json.dump(gemeente_geojson, f)\n",
|
||||
" \n",
|
||||
" with open('../data/{}_wijken.kml'.format(province), 'w') as f:\n",
|
||||
" f.write(as_gabmap_kml(wijken_geojson, name_property='gemeente_en_wijk_naam'))\n",
|
||||
" with open('../data/{}_gemeentes.kml'.format(province), 'w') as f:\n",
|
||||
" f.write(as_gabmap_kml(gemeente_geojson, name_property='gemeente_naam'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,93 @@
|
|||
from threading import Thread
|
||||
import time
|
||||
|
||||
from pygeoif.geometry import mapping
|
||||
from shapely.geometry import shape, box
|
||||
|
||||
from stimmen.shapefile import shapefiles_to_geojson
|
||||
|
||||
import os.path
|
||||
|
||||
|
||||
def data_file(*args):
|
||||
return os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
'..',
|
||||
*args
|
||||
)
|
||||
|
||||
__cache = {}
|
||||
__last_access = None
|
||||
|
||||
|
||||
def clear_cache_poll():
|
||||
global __cache, __last_access
|
||||
while True:
|
||||
time.sleep(60*60)
|
||||
if (time.time() - __last_access) > 60*60:
|
||||
__cache = {}
|
||||
|
||||
|
||||
Thread(target=clear_cache_poll, daemon=True).start()
|
||||
|
||||
|
||||
def get_available_provinces():
|
||||
file = data_file('data', 'geo', 'cbs', "2018_Imergis_provinciegrenzen_met_water")
|
||||
provinces = shapefiles_to_geojson(file)['features']
|
||||
return {province['properties']['provincien'] for province in provinces}
|
||||
|
||||
|
||||
def province_geojson(province, with_water=False):
|
||||
global __cache, __last_access
|
||||
file = data_file(
|
||||
'data', 'geo', 'cbs',
|
||||
"2018_Imergis_provinciegrenzen_met_water"
|
||||
if with_water else
|
||||
"2018-Imergis_provinciegrenzen_kustlijn"
|
||||
)
|
||||
|
||||
if (province, with_water) not in __cache:
|
||||
provinces = shapefiles_to_geojson(file)['features']
|
||||
available_provinces = get_available_provinces()
|
||||
|
||||
assert province in available_provinces, "Province {} not found, options: {}".format(
|
||||
province, ', '.join(available_provinces))
|
||||
|
||||
__cache[(province, with_water)] = next(
|
||||
province_ for province_ in provinces if province_['properties']['provincien'] == province)
|
||||
|
||||
__last_access = time.time()
|
||||
return __cache[(province, with_water)]
|
||||
|
||||
|
||||
def gwb_in_province(
|
||||
province='Friesland', region_level='wijk', region_year='2018',
|
||||
polygon_simplification=0.001, province_dilation=0.0005
|
||||
):
|
||||
assert region_level in {'gem', 'wijk', 'buurt'}, (
|
||||
"region_level {} not supported, must be gem, wijk or buurt".format(region_level))
|
||||
assert region_year in {2017, 2018}, (
|
||||
"region_year {} not supported, must 2017 or 2018".format(region_year))
|
||||
|
||||
province_with_water = shape(province_geojson(province, with_water=True)['geometry'])
|
||||
province_bounding_box = box(*province_with_water.bounds)
|
||||
|
||||
province_land_only = shape(province_geojson(province, with_water=False)['geometry'])
|
||||
province_land_only_dilated = province_land_only.buffer(-province_dilation)
|
||||
|
||||
geojson = shapefiles_to_geojson(
|
||||
data_file('data', 'geo', 'cbs', '{}_{}'.format(region_level, region_year)))
|
||||
shapes = [shape(geojson_['geometry']) for geojson_ in geojson['features']]
|
||||
|
||||
shapes, geojson = map(list, zip(*(
|
||||
(intersection.simplify(tolerance=polygon_simplification), geojson_)
|
||||
for shape_, geojson_ in zip(shapes, geojson['features'])
|
||||
if province_bounding_box.contains(shape_)
|
||||
for intersection in [shape_.intersection(province_land_only_dilated)] # alias
|
||||
if intersection.area > 0
|
||||
)))
|
||||
|
||||
for geojson_, shape_ in zip(geojson, shapes):
|
||||
geojson_['geometry'] = mapping(shape_)
|
||||
|
||||
return {"type": "FeatureCollection", "features": geojson}
|
|
@ -0,0 +1,43 @@
|
|||
from pygeoif.geometry import mapping
|
||||
from shapely.geometry import shape
|
||||
|
||||
|
||||
def merge_features(geojson, condition, aggregate={}):
|
||||
"""Merge the geometries using shapely's union for all the geojson's features that
|
||||
meet the condition, condition get's passed an item of feature. Then aggregate the properties
|
||||
in the aggregate dict using a function that get as input the list of property values off alle
|
||||
matched features. Operates inplace."""
|
||||
indices = [index for index, feature in enumerate(geojson['features']) if condition(feature)]
|
||||
if len(indices) == 0: # also if there is one index, we
|
||||
return geojson
|
||||
properties = {
|
||||
prop: agg([
|
||||
properties_[prop]
|
||||
for index in indices
|
||||
for properties_ in [geojson['features'][index]['properties']]
|
||||
if prop in properties_
|
||||
])
|
||||
for prop, agg in aggregate.items()
|
||||
}
|
||||
properties.update({
|
||||
key: value
|
||||
for index in indices
|
||||
for key, value in geojson['features'][index]['properties'].items()
|
||||
if key not in aggregate
|
||||
})
|
||||
if len(indices) == 1:
|
||||
geojson['features'][indices[0]]['properties'] = properties
|
||||
return geojson
|
||||
|
||||
union = shape(geojson['features'][indices[0]]['geometry'])
|
||||
for index in indices[1:]:
|
||||
union = union.union(shape(geojson['features'][index]['geometry']))
|
||||
|
||||
for index in indices[::-1]: # reverse, such that the 'todo' indices willnot change.
|
||||
del geojson['features'][index]
|
||||
|
||||
geojson['features'].append({
|
||||
'geometry': mapping(union),
|
||||
'properties': properties
|
||||
})
|
||||
return geojson
|
|
@ -0,0 +1,9 @@
|
|||
|
||||
|
||||
def reverse_latitude_longitude(rd_multipolygon):
|
||||
if len(rd_multipolygon) == 2 and tuple(map(type, rd_multipolygon)) == (float, float):
|
||||
return rd_multipolygon[::-1]
|
||||
return [
|
||||
reverse_latitude_longitude(element)
|
||||
for element in rd_multipolygon
|
||||
]
|
|
@ -0,0 +1,43 @@
|
|||
from osgeo.osr import SpatialReference, CoordinateTransformation
|
||||
import shapefile
|
||||
|
||||
epsg28992 = SpatialReference()
|
||||
epsg28992.ImportFromEPSG(28992)
|
||||
|
||||
epsg28992.SetTOWGS84(565.237 ,50.0087 ,465.658 ,-0.406857 ,0.350733 ,-1.87035 ,4.0812)
|
||||
|
||||
epsg4326 = SpatialReference()
|
||||
epsg4326.ImportFromEPSG(4326)
|
||||
|
||||
rd2latlon = CoordinateTransformation(epsg28992, epsg4326)
|
||||
|
||||
|
||||
def rd_to_longlat(rd_multipolygon):
|
||||
if len(rd_multipolygon) == 2 and tuple(map(type, rd_multipolygon)) == (float, float):
|
||||
return list(rd2latlon.TransformPoint(*rd_multipolygon)[:2])
|
||||
return [
|
||||
rd_to_longlat(element)
|
||||
for element in rd_multipolygon
|
||||
]
|
||||
|
||||
|
||||
def shapefiles_to_geojson(filename):
|
||||
shape_file = shapefile.Reader(filename)
|
||||
fields = shape_file.fields[1:]
|
||||
field_names = [field[0] for field in fields]
|
||||
|
||||
buffer = []
|
||||
for shape_record in shape_file.shapeRecords():
|
||||
properties = dict(zip(field_names, shape_record.record))
|
||||
geometry = shape_record.shape.__geo_interface__
|
||||
geometry['coordinates'] = rd_to_longlat(geometry['coordinates'])
|
||||
|
||||
buffer.append({
|
||||
'type': "Feature",
|
||||
'geometry': geometry,
|
||||
'properties': properties
|
||||
})
|
||||
return {
|
||||
"type": "FeatureCollection",
|
||||
"features": buffer
|
||||
}
|
Loading…
Reference in New Issue