Last active
August 23, 2019 11:26
-
-
Save heolin/3decccb891b74d383ff019f8ed367da9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ahocorapy.keywordtree import KeywordTree | |
digit2letter = { | |
0: ["#"], | |
1: ["#"], | |
2: ['a', 'b', 'c'], | |
3: ['d', 'e', 'f'], | |
4: ['g', 'h', 'i'], | |
5: ['j', 'k', 'l'], | |
6: ['m', 'n', 'o'], | |
7: ['p', 'q', 'r', 's'], | |
8: ['t', 'u', 'v'], | |
9: ['w', 'x', 'y', 'z'] | |
} | |
def get_all_texts(number): | |
digit, rest = number[0], number[1:] | |
def _get_text(digit, rest): | |
if not digit: | |
return [""] | |
results = [] | |
for letter in digit2letter[int(digit)]: | |
for r in _get_text(rest[:1], rest[1:]): | |
results.append(letter + r) | |
return results | |
return _get_text(digit, rest) | |
class NumberFinder: | |
def __init__(self, dictionary_path="/usr/share/dict/american-english"): | |
words = open(dictionary_path).read().split("\n") | |
self.kwtree = KeywordTree(case_insensitive=False) | |
for word in words: | |
self.kwtree.add(word.lower()) | |
self.kwtree.finalize() | |
def replace_words(self, texts, min_length): | |
results = set() | |
for text in texts: | |
for word, _ in self.kwtree.search_all(text): | |
if len(word) >= min_length: | |
index = text.index(word) | |
_word = "{} {} {}".format(text[:index], word.upper(), text[index+len(word):]).strip().replace(" ", " ") | |
results.add(_word) | |
return results | |
def generate_numbers(self, texts, max_iterations, min_length): | |
final = set() | |
results = self.replace_words(texts, min_length) | |
while len(results) > 0 and max_iterations > 0: | |
final.update(results) | |
results = self.replace_words(results, max(min_length-1, 3)) | |
max_iterations -= 1 | |
return final | |
def clear_numbers(self, results, number): | |
final_numbers = set() | |
for text in results: | |
result = "" | |
index = 0 | |
for letter in list(text): | |
if letter != " " and letter == letter.lower(): | |
result += number[index] | |
else: | |
result += letter | |
if letter != " ": | |
index += 1 | |
final_numbers.add(result) | |
return final_numbers | |
def find(self, number, max_iterations=3, min_length=4): | |
texts = get_all_texts(number) | |
results = self.generate_numbers(texts, max_iterations, min_length) | |
final = self.clear_numbers(results, number) | |
return list(final) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment