Created
May 2, 2021 02:11
-
-
Save makmac213/70111156c051d6c644978c7e4582ab9b to your computer and use it in GitHub Desktop.
Suggest words
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk import word_tokenize, ngrams, FreqDist | |
from nltk.corpus import cmudict | |
import string | |
f = open('poem.txt', 'r') | |
doc = f.readlines() | |
texts = ' '.join(doc) | |
texts = texts.replace('\n', ' ') | |
tokenized = word_tokenize(texts) | |
phoneme_dict = dict(cmudict.entries()) | |
bigram_freq = FreqDist() | |
for bigram in ngrams(tokenized, 2): | |
bigram_freq[bigram] += 1 | |
bigram_dict = {} | |
bigram_list = list(bigram_freq) | |
for item in bigram_list: | |
if item[0] in bigram_dict: | |
bigram_dict[item[0]].append(item[1]) | |
else: | |
bigram_dict[item[0]] = [item[1]] | |
def filter_words_by_syllable_count(arr, max=1): | |
filtered = filter(lambda s: syllables_in_word(s) <= max, arr) | |
return list(set(filtered)) | |
def suggest_words_from_string(str, **kwargs): | |
ret = filter(lambda s: s.lower().startswith(str), tokenized) | |
syllables = kwargs.get('syllables') | |
if syllables: | |
ret = filter_words_by_syllable_count(ret, max=syllables) | |
return list(set(ret)) | |
def syllables_in_word(word): | |
word = word.lower() | |
count = 0 | |
vowels = "aeiouy" | |
if word[0] in vowels: | |
count += 1 | |
for index in range(1, len(word)): | |
if word[index] in vowels and word[index - 1] not in vowels: | |
count += 1 | |
if word.endswith("e") and not word.endswith('le'): | |
count -= 1 | |
if count == 0: | |
count += 1 | |
return count | |
def suggest_from_previous_word(prev_word, **kwargs): | |
ret = bigram_dict[prev_word] | |
syllables = kwargs.get('syllables') | |
if syllables: | |
ret = filter_words_by_syllable_count(ret, max=syllables) | |
try: | |
return ret | |
except KeyError: | |
return [] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment