Created
November 3, 2015 12:58
-
-
Save d-baker/f8353e5ad87c1e4b9e7a to your computer and use it in GitHub Desktop.
word chainer made for nanogenmo 2014. not a markov chain.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
class PseudoMarkov(): | |
def __init__(self): | |
self.associations = {} | |
# pass it the corpus filepath | |
def gen(self, filepath): | |
sentences = [] | |
with open(filepath) as fp: | |
sentences = fp.readlines() | |
for sentence in sentences: | |
self.breakup(sentence.split()) | |
s = random.choice(sentences) | |
words = random.choice(sentences).split() | |
i = 0 | |
while random.randint(0, len(words)-1) > len(words)-1: | |
i+=1 | |
seedword = words[i] | |
prefix = " ".join(w for w in words[:i]) | |
text = prefix + " " + seedword + " " + self.chain(seedword) | |
# don't know where the key errors are coming from, so just sweeping them | |
# under the carpet with this | |
while text.split()[-1] in self.associations: | |
text += self.chain(text.split()[-1]) | |
return text.strip() | |
# ughhh don't ask | |
def breakup(self, sentence): | |
max = len(sentence) - 1 | |
for i in range (0, max): | |
if sentence[i] in self.associations: | |
l = self.associations.get(sentence[i]) | |
if sentence[i+1] not in l: | |
if i >= max: | |
l.append(sentence[i].lower()) | |
else: | |
l.append(sentence[i+1].lower()) | |
else: | |
if i >= max: | |
self.associations[sentence[i]] = [sentence[i]] | |
else: | |
self.associations[sentence[i]] = [sentence[i+1]] | |
if len(sentence) > 1: | |
self.breakup(sentence[1:]) | |
def chain(self, seedword): | |
if seedword in self.associations: | |
return random.choice(self.associations[seedword]) + " " | |
else: | |
return seedword + " " | |
if __name__ == "__main__": | |
chainer = PseudoMarkov() | |
print chainer.gen("corpus.txt") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment