Skip to content

Instantly share code, notes, and snippets.

@tdiggelm
Created December 3, 2019 10:15
Show Gist options
  • Save tdiggelm/fc86bad6850e83371fdfb72b693d293e to your computer and use it in GitHub Desktop.
Save tdiggelm/fc86bad6850e83371fdfb72b693d293e to your computer and use it in GitHub Desktop.
Test TPU
! pip3 install nltk --user
import os
import tensorflow as tf
import numpy as np
print(tf.__version__)
import nltk
nltk.download('movie_reviews')
nltk.download('punkt')
from nltk.corpus import movie_reviews as mov
tpu_address = os.environ['TPU_NAME']
cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
tpu=tpu_address)
tf.config.experimental_connect_to_cluster(cluster_resolver)
tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
tpu_strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver)
vocab_size=30000
embd_size=128
hidden_size=256
n_categories=2
with tpu_strategy.scope():
model = tf.keras.models.Sequential([
tf.keras.layers.Embedding(vocab_size, embd_size),
tf.keras.layers.Bidirectional(
tf.keras.layers.GRU(hidden_size, return_sequences=True)),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(n_categories)
])
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
model.compile(optimizer='adam', loss=loss, metrics=metrics)
def random_split(*arrays, train_size=0.8):
n_all = arrays[0].shape[0]
n_train = int(train_size*n_all)
ind = np.random.permutation(n_all)
ind_train = ind[:n_train]
ind_test = ind[n_train:]
splitted = []
for arr in arrays:
splitted.append((arr[ind_train], arr[ind_test]))
return splitted
fileids = mov.fileids()
texts = [mov.raw(fid) for fid in fileids]
labels = [mov.categories(fid)[0] for fid in fileids]
tok = tf.keras.preprocessing.text.Tokenizer(vocab_size)
tok.fit_on_texts(texts)
x_all = tok.texts_to_sequences(texts)
x_all = tf.keras.preprocessing.sequence.pad_sequences(x_all, 512)
y_all = np.array([l=='pos' for l in labels], dtype=np.int32)
(x_train, x_test), (y_train, y_test) = random_split(x_all, y_all)
model.fit(x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment