-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Closed
Description
I was wondering what is the proper way to load pre-trained embeddings using Keras-Tensorflow with SageMaker. Normally you would load pretrained embeddings (such as GLove) into memory and then assign them to your embedding layer as follows:
embedding = layers.Embedding(50000,300), weights=[embedding_matrix])(text)
where embedding_matrix is a (50k,300) pretrained embedding matrix. But i'm not sure how to actually load the embedding matrix into memory in the keras_model_fn function in the entry point file. Help would be appreciated it. My entrypoint file is as follows:
import numpy as np
import os
import json
import pickle
import sys
import traceback
import tensorflow as tf
from tensorflow.python.estimator.export.export import build_raw_serving_input_receiver_fn
from tensorflow.python.keras._impl.keras.layers import Dense
from tensorflow.python.keras._impl.keras.layers import Dropout
from tensorflow.python.keras._impl.keras.layers import LSTM
from tensorflow.python.keras._impl.keras.layers.embeddings import Embedding
from tensorflow.python.keras._impl.keras.optimizers import Adam
from tensorflow.python.keras._impl.keras.callbacks import ModelCheckpoint
from tensorflow.python.keras._impl.keras.callbacks import CSVLogger
from tensorflow.python.keras._impl.keras.callbacks import EarlyStopping
from tensorflow.python.keras._impl.keras.callbacks import LambdaCallback
from tensorflow.python.keras._impl.keras import metrics
from tensorflow.python.keras._impl.keras.models import Model
from tensorflow.python.keras._impl.keras import layers
from tensorflow.python.keras._impl.keras import Input
NUM_CLASSES = 2
NUM_DATA_BATCHES = 5
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 10000 * NUM_DATA_BATCHES
BATCH_SIZE = 256
INPUT_TENSOR_NAME_1 = 'text1' # needs to match the name of the first layer + "_input"
INPUT_TENSOR_NAME_2 = 'text2' # needs to match the name of the first layer + "_input"
INPUT_TENSOR_NAME_3 = 'title1' # needs to match the name of the first layer + "_input"
INPUT_TENSOR_NAME_4 = 'title2' # needs to match the name of the first layer + "_input"
def keras_model_fn(training_dir):
"""keras_model_fn receives hyperparameters from the training job and returns a compiled keras model.
The model will transformed in a TensorFlow Estimator before training and it will saved in a TensorFlow Serving
SavedModel in the end of training.
Args:
hyperparameters: The hyperparameters passed to SageMaker TrainingJob that runs your TensorFlow training
script.
Returns: A compiled Keras model
"""
text_input_1 = Input(shape=(None,), dtype='int32', name='text1')
embedded_text_1 = layers.Embedding(50000,300)(text_input_1)
embed_drop_1=Dropout(.5)(embedded_text_1)
text_input_2 = Input(shape=(None,), dtype='int32', name='text2')
embedded_text_2 = layers.Embedding(50000,300,)(text_input_2)
embed_drop_2=Dropout(.5)(embedded_text_2)
shared_lstm_text = LSTM(256)
left_output_text = shared_lstm_text(embed_drop_1)
right_output_text = shared_lstm_text(embed_drop_2)
title_input_1 = Input(shape=(None,), dtype='int32', name='title1')
embedded_title_1 = layers.Embedding(50000,300)(title_input_1)
embed_drop_3=Dropout(.5)(embedded_title_1)
title_input_2 = Input(shape=(None,), dtype='int32', name='title2')
embedded_title_2 = layers.Embedding(50000,300)(title_input_2)
embed_drop_4=Dropout(.5)(embedded_title_2)
shared_lstm_title = LSTM(128)
left_output_title = shared_lstm_title(embed_drop_3)
right_output_title = shared_lstm_title(embed_drop_4)
# Calculates the distance as defined by the MaLSTM model
# malstm_distance = Merge(mode=lambda x: exponent_neg_manhattan_distance(x[0], x[1]), output_shape=lambda x: (x[0][0], 1))([left_output, right_output])
merged = layers.concatenate([left_output_text, right_output_text,left_output_title, right_output_title], axis=-1)
drop_1 = Dropout(.3)(merged)
dense_1 = layers.Dense(256, activation='sigmoid')(drop_1)
drop_2 = Dropout(.3)(dense_1)
dense_2 = layers.Dense(128, activation='sigmoid')(drop_2)
predictions = layers.Dense(1, activation='sigmoid')(dense_2)
# Pack it all up into a model
shared_layer_model = Model([text_input_1, text_input_2,title_input_1,title_input_2], [predictions])
shared_layer_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return shared_layer_model
def train_input_fn(training_dir , hyperparameters = None):
return _input_fn(training_dir,"train")
def eval_input_fn(training_dir , hyperparameters = None):
return _input_fn(training_dir,"dev")
def serving_input_fn(hyperparameters = None):
text_ph_1 = tf.placeholder(tf.int32, shape=[None,500])
text_ph_2 = tf.placeholder(tf.int32, shape=[None,500])
title_ph_1 = tf.placeholder(tf.int32, shape=[None,20])
title_ph_2 = tf.placeholder(tf.int32, shape=[None,20])
#label is not required since serving is only used for inference
feature_placeholders = {"text1":text_ph_1,"text2":text_ph_2,"title1":title_ph_1,"title2":title_ph_2}
return build_raw_serving_input_receiver_fn(feature_placeholders)()
def _input_fn(training_dir,mode):
if mode=="train":
train_text_1=np.vstack((np.load(training_dir+"/negative_"+mode+"_text_1.npy"),np.load(training_dir+"/positive_"+mode+"_text_1.npy")))
train_text_2=np.vstack((np.load(training_dir+"/negative_"+mode+"_text_2.npy"),np.load(training_dir+"/positive_"+mode+"_text_2.npy")))
else:
train_text_1=np.load(training_dir+"/"+mode+"_text_1.npy")
train_text_2=np.load(training_dir+"/"+mode+"_text_2.npy")
train_title_1=np.load(training_dir+"/"+mode+"_title_1.npy")
train_title_2=np.load(training_dir+"/"+mode+"_title_2.npy")
y=np.load(training_dir+"/"+mode+"_targets.npy")
y=y.reshape((y.shape[0],1)).astype(np.float32)
permutation = np.random.permutation(train_text_1.shape[0])
train_text_1=train_text_1[permutation]
train_text_2=train_text_2[permutation]
train_title_1=train_title_1[permutation]
train_title_2=train_title_2[permutation]
y=y[permutation]
x={INPUT_TENSOR_NAME_1: train_text_1,
INPUT_TENSOR_NAME_2: train_text_2,
INPUT_TENSOR_NAME_3: train_title_1,
INPUT_TENSOR_NAME_4: train_title_2}
dataset=tf.estimator.inputs.numpy_input_fn(x=x,y=y,batch_size=BATCH_SIZE,num_epochs=10,shuffle=False)()
return dataset
Metadata
Metadata
Assignees
Labels
No labels