Skip to content

Instantly share code, notes, and snippets.

@ardamavi
Last active September 8, 2017 11:55
Show Gist options
  • Save ardamavi/1154a2864f6f84e03e4ba66a4b8b0b78 to your computer and use it in GitHub Desktop.
Save ardamavi/1154a2864f6f84e03e4ba66a4b8b0b78 to your computer and use it in GitHub Desktop.
Sentence to Tensor
# Arda Mavi
import string
import numpy as np
characters = string.printable # All printable characters.
token_index = dict(zip(range(0, len(characters)), characters))
max_word = 140
max_length = 80
char_len = 100 #len(token_index)
sentence_array = np.zeros((max_word, max_length, char_len))
def encode(sentence):
this_sentence_array = np.array(sentence_array, copy=True)
for i, word in enumerate(sentence.split(' ')):
for j, char in enumerate(word):
index = characters.index(char)
this_sentence_array[i, j, index] = 1.
return this_sentence_array
def decode(array):
this_sentence = ''
for i, word in enumerate(array):
if word[0].nonzero()[0].size == 0:
break
for j, char_list in enumerate(word):
char_index = char_list.nonzero()[0]
if char_index.size == 0:
break
char = token_index.get(char_index[0])
this_sentence += char
this_sentence += ' '
return this_sentence[:-1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment