Skip to content

Instantly share code, notes, and snippets.

View JanSchm's full-sized avatar

Jan Schmitz JanSchm

View GitHub Profile
import random
# Define the probability distribution for the dice roll
# Each number has an equal probability of being rolled
probability_distribution = [1, 2, 3, 4, 5, 6]
# Set the number of simulations to run
num_simulations = 20000
# Initialize a list to store the results of the simulations
callbacks = [tf.keras.callbacks.ModelCheckpoint('SiameseTriplet_AlbertBase_epoch{epoch:02d}_val-loss{val_loss:.6f}.hdf5', monitor='val_loss', save_best_only=True, verbose=1),]
# Train the network
history = model.fit(train_gen,
steps_per_epoch = len(df_train)//BATCH_SIZE+1,
batch_size=None,
verbose=1,
epochs=25,
shuffle=True,
validation_data=val_gen,
# Load ALBERT-base model
albert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/albert_en_base/2", trainable=True)
# Siamese ALBERT model
input_word_ids = tf.keras.layers.Input(shape=(128,), dtype=tf.int32)
input_mask = tf.keras.layers.Input(shape=(128,), dtype=tf.int32)
input_type_ids = tf.keras.layers.Input(shape=(128,), dtype=tf.int32)
albert_layer = albert_encoder({'input_word_ids': input_word_ids, 'input_mask': input_mask, 'input_type_ids': input_type_ids})['pooled_output']
# Load ALBERT tokenizer
albert_tokenizer = hub.KerasLayer("http://tfhub.dev/tensorflow/albert_en_preprocess/2")
# Define Data Generator function for online learning
class DataGenerator(tf.keras.utils.Sequence):
def __init__(self, data, tokenizer, batch_size):
self.data = data
self.tokenizer = tokenizer
self.batch_size = batch_size
def create_model():
'''Initialize time and transformer layers'''
time_embedding = Time2Vector(seq_len)
attn_layer1 = TransformerEncoder(d_k, d_v, n_heads, ff_dim)
attn_layer2 = TransformerEncoder(d_k, d_v, n_heads, ff_dim)
attn_layer3 = TransformerEncoder(d_k, d_v, n_heads, ff_dim)
'''Construct model'''
in_seq = Input(shape=(seq_len, 5))
x = time_embedding(in_seq)
class TransformerEncoder(Layer):
def __init__(self, d_k, d_v, n_heads, ff_dim, dropout=0.1, **kwargs):
super(TransformerEncoder, self).__init__()
self.d_k = d_k
self.d_v = d_v
self.n_heads = n_heads
self.ff_dim = ff_dim
self.attn_heads = list()
self.dropout_rate = dropout
class MultiAttention(Layer):
def __init__(self, d_k, d_v, n_heads):
super(MultiAttention, self).__init__()
self.d_k = d_k
self.d_v = d_v
self.n_heads = n_heads
self.attn_heads = list()
def build(self, input_shape):
for n in range(self.n_heads):
class SingleAttention(Layer):
def __init__(self, d_k, d_v):
super(SingleAttention, self).__init__()
self.d_k = d_k
self.d_v = d_v
def build(self, input_shape):
self.query = Dense(self.d_k, input_shape=input_shape, kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform')
self.key = Dense(self.d_k, input_shape=input_shape, kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform')
self.value = Dense(self.d_v, input_shape=input_shape, kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform')
class Time2Vector(Layer):
def __init__(self, seq_len, **kwargs):
super(Time2Vector, self).__init__()
self.seq_len = seq_len
def build(self, input_shape):
self.weights_linear = self.add_weight(name='weight_linear',
shape=(int(self.seq_len),),
initializer='uniform',
trainable=True)
model.fit(X_train, y_train,
batch_size=2048,
verbose=2,
callbacks=[callback],
epochs=200,
validation_data=(X_val, y_val),)