Created
February 21, 2024 09:50
-
-
Save ehbc221/c3c362ac3b073942a3416574ce66bc96 to your computer and use it in GitHub Desktop.
Wagner Fischer algorithm for spell checkers / autocorrects
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Credits : @b001io on github | |
// Repo link : https://github.com/b001io/wagner-fischer/blob/main/wagner_fischer.py | |
import java.io.BufferedReader; | |
import java.io.FileReader; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import java.util.List; | |
public class Main { | |
public static List<String> loadDictionary(String filePath) throws IOException { | |
List<String> dictionary = new ArrayList<>(); | |
BufferedReader reader = new BufferedReader(new FileReader(filePath)); | |
String line; | |
while ((line = reader.readLine()) != null) { | |
dictionary.add(line.trim()); | |
} | |
reader.close(); | |
return dictionary; | |
} | |
public static int wagnerFischer(String s1, String s2) { | |
int len_s1 = s1.length(); | |
int len_s2 = s2.length(); | |
if (len_s1 > len_s2) { | |
String temp = s1; | |
s1 = s2; | |
s2 = temp; | |
int tempLen = len_s1; | |
len_s1 = len_s2; | |
len_s2 = tempLen; | |
} | |
int[] currentRow = new int[len_s1 + 1]; | |
for (int i = 0; i <= len_s1; i++) { | |
currentRow[i] = i; | |
} | |
for (int i = 1; i <= len_s2; i++) { | |
int[] previousRow = currentRow.clone(); | |
currentRow[0] = i; | |
for (int j = 1; j <= len_s1; j++) { | |
int add = previousRow[j] + 1; | |
int delete = currentRow[j - 1] + 1; | |
int change = previousRow[j - 1] + (s1.charAt(j - 1) != s2.charAt(i - 1) ? 1 : 0); | |
currentRow[j] = Math.min(Math.min(add, delete), change); | |
} | |
} | |
return currentRow[len_s1]; | |
} | |
public static List<Pair<String, Integer>> spellCheck(String word, List<String> dictionary) { | |
List<Pair<String, Integer>> suggestions = new ArrayList<>(); | |
for (String correctWord : dictionary) { | |
int distance = wagnerFischer(word, correctWord); | |
suggestions.add(new Pair<>(correctWord, distance)); | |
} | |
suggestions.sort((p1, p2) -> Integer.compare(p1.getValue(), p2.getValue())); | |
return suggestions.subList(0, Math.min(10, suggestions.size())); | |
} | |
public static void main(String[] args) throws IOException { | |
List<String> dictionary = loadDictionary("words.txt"); | |
String misspelledWord = "wrlod"; | |
List<Pair<String, Integer>> suggestions = spellCheck(misspelledWord, dictionary); | |
System.out.println("Top 10 suggestions for '" + misspelledWord + "':"); | |
for (Pair<String, Integer> pair : suggestions) { | |
System.out.println(pair.getKey() + " (Distance: " + pair.getValue() + ")"); | |
} | |
} | |
static class Pair<K, V> { | |
private final K key; | |
private final V value; | |
public Pair(K key, V value) { | |
this.key = key; | |
this.value = value; | |
} | |
public K getKey() { | |
return key; | |
} | |
public V getValue() { | |
return value; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment