Skip to content

Instantly share code, notes, and snippets.

@makmac213
Created May 2, 2021 02:11
Show Gist options
  • Save makmac213/70111156c051d6c644978c7e4582ab9b to your computer and use it in GitHub Desktop.
Save makmac213/70111156c051d6c644978c7e4582ab9b to your computer and use it in GitHub Desktop.
Suggest words
from nltk import word_tokenize, ngrams, FreqDist
from nltk.corpus import cmudict
import string
f = open('poem.txt', 'r')
doc = f.readlines()
texts = ' '.join(doc)
texts = texts.replace('\n', ' ')
tokenized = word_tokenize(texts)
phoneme_dict = dict(cmudict.entries())
bigram_freq = FreqDist()
for bigram in ngrams(tokenized, 2):
bigram_freq[bigram] += 1
bigram_dict = {}
bigram_list = list(bigram_freq)
for item in bigram_list:
if item[0] in bigram_dict:
bigram_dict[item[0]].append(item[1])
else:
bigram_dict[item[0]] = [item[1]]
def filter_words_by_syllable_count(arr, max=1):
filtered = filter(lambda s: syllables_in_word(s) <= max, arr)
return list(set(filtered))
def suggest_words_from_string(str, **kwargs):
ret = filter(lambda s: s.lower().startswith(str), tokenized)
syllables = kwargs.get('syllables')
if syllables:
ret = filter_words_by_syllable_count(ret, max=syllables)
return list(set(ret))
def syllables_in_word(word):
word = word.lower()
count = 0
vowels = "aeiouy"
if word[0] in vowels:
count += 1
for index in range(1, len(word)):
if word[index] in vowels and word[index - 1] not in vowels:
count += 1
if word.endswith("e") and not word.endswith('le'):
count -= 1
if count == 0:
count += 1
return count
def suggest_from_previous_word(prev_word, **kwargs):
ret = bigram_dict[prev_word]
syllables = kwargs.get('syllables')
if syllables:
ret = filter_words_by_syllable_count(ret, max=syllables)
try:
return ret
except KeyError:
return []
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment