tdiggelm · December 3, 2019 10:15
diff --git a/tpu_test.py b/tpu_test.py
 ! pip3 install nltk --user

 import os
 import tensorflow as tf
 import numpy as np
 print(tf.__version__)

 import nltk
 nltk.download('movie_reviews')
 nltk.download('punkt')
 from nltk.corpus import movie_reviews as mov

 tpu_address = os.environ['TPU_NAME']
 cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
    tpu=tpu_address)
 tf.config.experimental_connect_to_cluster(cluster_resolver)
 tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
 tpu_strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver)

 vocab_size=30000
 embd_size=128
 hidden_size=256
 n_categories=2

 with tpu_strategy.scope():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Embedding(vocab_size, embd_size),
        tf.keras.layers.Bidirectional(
            tf.keras.layers.GRU(hidden_size, return_sequences=True)),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(n_categories)
    ])
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
    model.compile(optimizer='adam', loss=loss, metrics=metrics)
    
 def random_split(*arrays, train_size=0.8):
    n_all = arrays[0].shape[0]
    n_train = int(train_size*n_all)
    ind = np.random.permutation(n_all)
    ind_train = ind[:n_train]
    ind_test = ind[n_train:]
    splitted = []
    for arr in arrays:
        splitted.append((arr[ind_train], arr[ind_test]))
    return splitted
 fileids = mov.fileids()
 texts = [mov.raw(fid) for fid in fileids]
 labels = [mov.categories(fid)[0] for fid in fileids]
 tok = tf.keras.preprocessing.text.Tokenizer(vocab_size)
 tok.fit_on_texts(texts)
 x_all = tok.texts_to_sequences(texts)
 x_all = tf.keras.preprocessing.sequence.pad_sequences(x_all, 512)
 y_all = np.array([l=='pos' for l in labels], dtype=np.int32)
 (x_train, x_test), (y_train, y_test) = random_split(x_all, y_all)

 model.fit(x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test))
	! pip3 install nltk --user

	import os
	import tensorflow as tf
	import numpy as np
	print(tf.__version__)

	import nltk
	nltk.download('movie_reviews')
	nltk.download('punkt')
	from nltk.corpus import movie_reviews as mov

	tpu_address = os.environ['TPU_NAME']
	cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
	tpu=tpu_address)
	tf.config.experimental_connect_to_cluster(cluster_resolver)
	tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
	tpu_strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver)

	vocab_size=30000
	embd_size=128
	hidden_size=256
	n_categories=2

	with tpu_strategy.scope():
	model = tf.keras.models.Sequential([
	tf.keras.layers.Embedding(vocab_size, embd_size),
	tf.keras.layers.Bidirectional(
	tf.keras.layers.GRU(hidden_size, return_sequences=True)),
	tf.keras.layers.GlobalAveragePooling1D(),
	tf.keras.layers.Dense(n_categories)
	])
	loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
	metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
	model.compile(optimizer='adam', loss=loss, metrics=metrics)

	def random_split(*arrays, train_size=0.8):
	n_all = arrays[0].shape[0]
	n_train = int(train_size*n_all)
	ind = np.random.permutation(n_all)
	ind_train = ind[:n_train]
	ind_test = ind[n_train:]
	splitted = []
	for arr in arrays:
	splitted.append((arr[ind_train], arr[ind_test]))
	return splitted
	fileids = mov.fileids()
	texts = [mov.raw(fid) for fid in fileids]
	labels = [mov.categories(fid)[0] for fid in fileids]
	tok = tf.keras.preprocessing.text.Tokenizer(vocab_size)
	tok.fit_on_texts(texts)
	x_all = tok.texts_to_sequences(texts)
	x_all = tf.keras.preprocessing.sequence.pad_sequences(x_all, 512)
	y_all = np.array([l=='pos' for l in labels], dtype=np.int32)
	(x_train, x_test), (y_train, y_test) = random_split(x_all, y_all)

	model.fit(x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test))