Skip to content

Instantly share code, notes, and snippets.

@heolin
Last active August 23, 2019 11:26
Show Gist options
  • Save heolin/3decccb891b74d383ff019f8ed367da9 to your computer and use it in GitHub Desktop.
Save heolin/3decccb891b74d383ff019f8ed367da9 to your computer and use it in GitHub Desktop.
from ahocorapy.keywordtree import KeywordTree
digit2letter = {
0: ["#"],
1: ["#"],
2: ['a', 'b', 'c'],
3: ['d', 'e', 'f'],
4: ['g', 'h', 'i'],
5: ['j', 'k', 'l'],
6: ['m', 'n', 'o'],
7: ['p', 'q', 'r', 's'],
8: ['t', 'u', 'v'],
9: ['w', 'x', 'y', 'z']
}
def get_all_texts(number):
digit, rest = number[0], number[1:]
def _get_text(digit, rest):
if not digit:
return [""]
results = []
for letter in digit2letter[int(digit)]:
for r in _get_text(rest[:1], rest[1:]):
results.append(letter + r)
return results
return _get_text(digit, rest)
class NumberFinder:
def __init__(self, dictionary_path="/usr/share/dict/american-english"):
words = open(dictionary_path).read().split("\n")
self.kwtree = KeywordTree(case_insensitive=False)
for word in words:
self.kwtree.add(word.lower())
self.kwtree.finalize()
def replace_words(self, texts, min_length):
results = set()
for text in texts:
for word, _ in self.kwtree.search_all(text):
if len(word) >= min_length:
index = text.index(word)
_word = "{} {} {}".format(text[:index], word.upper(), text[index+len(word):]).strip().replace(" ", " ")
results.add(_word)
return results
def generate_numbers(self, texts, max_iterations, min_length):
final = set()
results = self.replace_words(texts, min_length)
while len(results) > 0 and max_iterations > 0:
final.update(results)
results = self.replace_words(results, max(min_length-1, 3))
max_iterations -= 1
return final
def clear_numbers(self, results, number):
final_numbers = set()
for text in results:
result = ""
index = 0
for letter in list(text):
if letter != " " and letter == letter.lower():
result += number[index]
else:
result += letter
if letter != " ":
index += 1
final_numbers.add(result)
return final_numbers
def find(self, number, max_iterations=3, min_length=4):
texts = get_all_texts(number)
results = self.generate_numbers(texts, max_iterations, min_length)
final = self.clear_numbers(results, number)
return list(final)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment