accent_classification/accent_classification/test_code.py

45 lines
815 B
Python

import Levenshtein
import numpy as np
a = 'hello'
b = 'haall'
# approximate
infinite = 100
# make distance matrix D
len_a = len(a)
len_b = len(b)
D_ = np.zeros((len_a, len_b)).astype(int)
for ia in range(0, len_a):
a_ = a[ia]
for ib in range(0, len_b):
b_ = b[ib]
if a_ == b_:
D_[ia, ib] = 1
D = np.zeros((len_a+1, len_b+1)).astype(int)
D[1:len_a+1, 1:len_b+1] = D_
D[0, :] = infinite
D[:, 0] = infinite
D[0, 0] = 0
# calculate accumulated distance
indexPath = []
for ia in range(0, len_a):
for ib in range(0, len_b):
a_ = a[ia]
b_ = b[ib]
option = (D[ia, ib]+D[ia+1, ib+1], D[ia, ib+1], D[ia+1, ib])
Dmin = np.min(option)
D[ia+1, ib+1] = D[ia+1, ib+1]+Dmin
index = list(option).index(Dmin)
indexPath[ia, ib] = index
# back trace
ia = len_a
ib = len_b
#while (ia > 0 or ib > 0):
# tb