import numpy as np from sklearn import manifold import Levenshtein # x: ndarray (dnum x dim) # n: number of samples to extract # OUTPUT # index: index of the chosen samples # def extractRandomSample(x, n): xRowMax = x.shape[0] indexOriginal = np.arange(xRowMax) indexChosen = np.random.choice(indexOriginal, n, False) xChosen = x[indexChosen, :] return (xChosen, indexChosen) # x: 1d string ndarray def makeLevenshteinMatrix(x): xRowMax = x.shape[0] xLevenshtein = np.ones((xRowMax, xRowMax), dtype='int') for xRow in range(0, xRowMax): for xCol in range(0, xRowMax): dist = Levenshtein.distance(x[xRow], x[xCol]); xLevenshtein[xRow, xCol] = dist return xLevenshtein # x: 1d string ndarray def calcLevenshteinArray(word, x): xRowMax = x.shape[0] xLevenshtein = np.zeros(x.shape, dtype='int') for xRow in range(0, xRowMax): dist = Levenshtein.distance(word, x[xRow]); xLevenshtein[xRow] = dist return xLevenshtein def MDS(x): mds = manifold.MDS(n_components=2, dissimilarity="precomputed", random_state=6) xmds = mds.fit_transform(x) return xmds