stimmenfryslan/notebooks/Spider confusion map.ipynb

940 KiB

Pronunciation-based location prediction confusion

Confusion of pronunciation-based predictions of someone's location

In [1]:
import pickle
import folium
import pandas
import MySQLdb
import numpy
from sklearn.metrics import confusion_matrix
from folium.plugins import HeatMap
from IPython.display import display
import itertools
import requests
import json
import random
from vincenty import vincenty

db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmenfryslan')


%matplotlib inline
from matplotlib import pyplot, rcParams
from jupyter_progressbar import ProgressBar

# rcParams['font.family'] = 'Lucinda Console'
rcParams['font.size'] = '24'
rcParams['figure.figsize'] = (20, 10)
rcParams['figure.dpi'] = 100

from jupyter_progressbar import ProgressBar
In [2]:
with open('simplified_predictions.p3', 'rb') as f:
    simplified_predictions = pickle.load(f)
In [6]:
simplified_predictions['actual_latlon_rounded'] = [
    (
        (x[0] // 0.1) * 0.1,
        (x[1] // 0.1) * 0.1,
    ) if x == x else numpy.nan
    for x in simplified_predictions['actual_latlon']
]
In [7]:
actual_latlon = [tuple(x) for x in simplified_predictions['actual_latlon'] if x == x]

m = folium.Map((52.16370310266682, 5.601701825033824), tiles='stamentoner', zoom_start=8)

for p in set(actual_latlon):
    if p == p:
        pp = ((p[0] // 0.1) * 0.1, (p[1] // 0.1) * 0.1)
        folium.Circle(pp, radius=10).add_to(m)

m
Out[7]:
In [15]:
from collections import Counter
In [16]:
confusions = Counter([
    (tuple(actual), tuple(predicted))
    for actual, predicted in zip(
        ProgressBar(simplified_predictions['actual_latlon_rounded']),
        simplified_predictions['prediction_1_latlon']
    )
    if actual == actual and predicted == predicted
])
VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0…
In [ ]:
m = folium.Map((52.16370310266682, 5.601701825033824), tiles='stamentoner', zoom_start=8)

for (actual, predicted), count in confusions.items():
    
    folium.PolyLine([actual, predicted], color="red", weight=numpy.log(count), opacity=1).add_to(m)

m
In [17]:
m = folium.Map((52.16370310266682, 5.601701825033824), tiles='stamentoner', zoom_start=8)

for (actual, predicted), count in confusions.items():
    
    folium.PolyLine([actual, predicted], color="red", weight=numpy.log(count), opacity=1).add_to(m)

m
Out[17]:
In [ ]: