Skip to content

Instantly share code, notes, and snippets.

@digitalex
Created November 29, 2012 12:33
Show Gist options
  • Save digitalex/4168735 to your computer and use it in GitHub Desktop.
Save digitalex/4168735 to your computer and use it in GitHub Desktop.
Levenshtein w/diagonal stripe optimization
import java.util.*;
import java.io.*;
public class Dictionary {
private List<String> words;
public Dictionary(List<String> list) {
this.words = list;
}
public static Dictionary readFromFile(String filename) throws IOException {
Scanner scanner = new Scanner(new File(filename));
List<String> words = new ArrayList<String>(178691);
while (scanner.hasNext()) {
words.add(scanner.next().trim().toUpperCase());
}
return new Dictionary(words);
}
public final int getMinDistance(String word) {
String prevWord = "";
int bestSoFar = 1000;
for (String w : this.words) {
int lowerBound = Math.abs(w.length() - word.length());
if (bestSoFar > lowerBound) {
int prefixLength = commonPrefixLength(prevWord, w);
int k = bestSoFar - 1;
int d = levenshtein(w, word.toUpperCase(), k, prefixLength);
bestSoFar = Math.min(bestSoFar, d);
if (bestSoFar == 0) { return 0; }
prevWord = w;
}
}
return bestSoFar;
}
public static final int commonPrefixLength(String s, String t) {
int i = 0, n = Math.min(s.length(), t.length());
while (i < n && s.charAt(i) == t.charAt(i)) { i++; }
return i;
}
private final int levenshtein(String s, String t, int k, int firstRow) {
if (s.equals(t)) { return 0; }
int m = s.length() + 1;
int n = t.length() + 1;
int lowerBound = Integer.MAX_VALUE;
for (int i = firstRow + 1; i < m; i++) {
for (int j = Math.max(i-k, 1); j < Math.min(i+k+1, n); j++) {
if (s.charAt(i-1) == t.charAt(j-1)) {
this.m[i][j] = this.m[i-1][j-1];
}
else {
// substitution
this.m[i][j] = this.m[i-1][j-1] + 1;
// deletion
if (j < i+k) // ignore [i-1][j] if it's outside of diagonal stripe
this.m[i][j] = Math.min(this.m[i][j], this.m[i-1][j] + 1);
// insertion
if (j > i-k) // ignore [i][j-1] if it's outside of diagonal stripe
this.m[i][j] = Math.min(this.m[i][j], this.m[i][j-1] + 1);
}
lowerBound = Math.min(lowerBound, this.m[i][j]);
}
// Since the result can't possibly be less than best so far;
if (lowerBound > k) {
return Integer.MAX_VALUE;
}
}
return this.m[m-1][n-1];
}
private int[][] m = {
{0 ,1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25},
{1 ,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{2 ,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{3 ,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{4 ,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{5 ,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{6 ,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{7 ,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{8 ,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{9 ,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{10,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{11,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{12,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{13,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{14,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{15,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{16,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{17,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{18,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{19,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{20,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{21,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{22,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{23,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{24,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{25,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment