Created
July 12, 2025 08:17
-
-
Save quicksilver0/2341e5632473617d0c55a606f4b69d30 to your computer and use it in GitHub Desktop.
rapidfuzz usage example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pip install rapidfuzz | |
from rapidfuzz import process | |
import re | |
def suggest_names_fuzzy(text, name_database, score_cutoff=80): | |
suggestions = [] | |
# Here re.finditer(r'\b\w+\b', text) change to the list of tokens found with spacy. | |
# Example how to create a list of tokens from text with spacy: https://habr.com/ru/articles/531940/ | |
for match in re.finditer(r'\b\w+\b', text): | |
word = match.group() | |
index = match.start() | |
match_result = process.extractOne(word, name_database, score_cutoff=score_cutoff) | |
if match_result: | |
name, score, _ = match_result | |
if word != name: | |
suggestions.append(f"Maybe you mean '{name}' at index {index}") | |
return suggestions | |
# Example usage: | |
text = "Alic went to the market. Then she met Chralie and Bobb." | |
name_database = ["Alice", "Charlie", "Bob", "Diana"] | |
result = suggest_names_fuzzy(text, name_database) | |
for r in result: | |
print(r) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment