41 lines
1.1 KiB
Python
41 lines
1.1 KiB
Python
import numpy as np
|
|
from sklearn import manifold
|
|
import Levenshtein
|
|
|
|
# x: ndarray (dnum x dim)
|
|
# n: number of samples to extract
|
|
# OUTPUT
|
|
# index: index of the chosen samples
|
|
#
|
|
def extractRandomSample(x, n):
|
|
xRowMax = x.shape[0]
|
|
indexOriginal = np.arange(xRowMax)
|
|
indexChosen = np.random.choice(indexOriginal, n, False)
|
|
xChosen = x[indexChosen, :]
|
|
return (xChosen, indexChosen)
|
|
|
|
# x: 1d string ndarray
|
|
def makeLevenshteinMatrix(x):
|
|
xRowMax = x.shape[0]
|
|
xLevenshtein = np.ones((xRowMax, xRowMax), dtype='int')
|
|
|
|
for xRow in range(0, xRowMax):
|
|
for xCol in range(0, xRowMax):
|
|
dist = Levenshtein.distance(x[xRow], x[xCol]);
|
|
xLevenshtein[xRow, xCol] = dist
|
|
return xLevenshtein
|
|
|
|
# x: 1d string ndarray
|
|
def calcLevenshteinArray(word, x):
|
|
xRowMax = x.shape[0]
|
|
xLevenshtein = np.zeros(x.shape, dtype='int')
|
|
|
|
for xRow in range(0, xRowMax):
|
|
dist = Levenshtein.distance(word, x[xRow]);
|
|
xLevenshtein[xRow] = dist
|
|
return xLevenshtein
|
|
|
|
def MDS(x):
|
|
mds = manifold.MDS(n_components=2, dissimilarity="precomputed", random_state=6)
|
|
xmds = mds.fit_transform(x)
|
|
return xmds |