Skip to content

How do I load pre-trained embeddings? #151

@samuelhkahn

Description

@samuelhkahn

I was wondering what is the proper way to load pre-trained embeddings using Keras-Tensorflow with SageMaker. Normally you would load pretrained embeddings (such as GLove) into memory and then assign them to your embedding layer as follows:

embedding = layers.Embedding(50000,300), weights=[embedding_matrix])(text)

where embedding_matrix is a (50k,300) pretrained embedding matrix. But i'm not sure how to actually load the embedding matrix into memory in the keras_model_fn function in the entry point file. Help would be appreciated it. My entrypoint file is as follows:

import numpy as np
import os
import json
import pickle
import sys
import traceback
import tensorflow as tf
from tensorflow.python.estimator.export.export import build_raw_serving_input_receiver_fn
from tensorflow.python.keras._impl.keras.layers import Dense
from tensorflow.python.keras._impl.keras.layers import Dropout
from tensorflow.python.keras._impl.keras.layers import LSTM
from tensorflow.python.keras._impl.keras.layers.embeddings import Embedding
from tensorflow.python.keras._impl.keras.optimizers import Adam
from tensorflow.python.keras._impl.keras.callbacks import ModelCheckpoint
from tensorflow.python.keras._impl.keras.callbacks import CSVLogger
from tensorflow.python.keras._impl.keras.callbacks import EarlyStopping
from tensorflow.python.keras._impl.keras.callbacks import LambdaCallback
from tensorflow.python.keras._impl.keras import metrics
from tensorflow.python.keras._impl.keras.models import Model
from tensorflow.python.keras._impl.keras import layers
from tensorflow.python.keras._impl.keras import Input

NUM_CLASSES = 2
NUM_DATA_BATCHES = 5
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 10000 * NUM_DATA_BATCHES
BATCH_SIZE = 256
INPUT_TENSOR_NAME_1 = 'text1' # needs to match the name of the first layer + "_input"
INPUT_TENSOR_NAME_2 = 'text2' # needs to match the name of the first layer + "_input"
INPUT_TENSOR_NAME_3 = 'title1' # needs to match the name of the first layer + "_input"
INPUT_TENSOR_NAME_4 = 'title2' # needs to match the name of the first layer + "_input"



def keras_model_fn(training_dir):
    """keras_model_fn receives hyperparameters from the training job and returns a compiled keras model.
    The model will transformed in a TensorFlow Estimator before training and it will saved in a TensorFlow Serving
    SavedModel in the end of training.

    Args:
        hyperparameters: The hyperparameters passed to SageMaker TrainingJob that runs your TensorFlow training
                         script.
    Returns: A compiled Keras model
    """

    text_input_1 = Input(shape=(None,), dtype='int32', name='text1')
    embedded_text_1 = layers.Embedding(50000,300)(text_input_1)
    embed_drop_1=Dropout(.5)(embedded_text_1)

    text_input_2 = Input(shape=(None,), dtype='int32', name='text2')
    embedded_text_2 = layers.Embedding(50000,300,)(text_input_2)
    embed_drop_2=Dropout(.5)(embedded_text_2)


    shared_lstm_text = LSTM(256)
    left_output_text = shared_lstm_text(embed_drop_1)
    right_output_text = shared_lstm_text(embed_drop_2)

    title_input_1 = Input(shape=(None,), dtype='int32', name='title1')
    embedded_title_1 = layers.Embedding(50000,300)(title_input_1)
    embed_drop_3=Dropout(.5)(embedded_title_1)

    title_input_2 = Input(shape=(None,), dtype='int32', name='title2')
    embedded_title_2 = layers.Embedding(50000,300)(title_input_2)
    embed_drop_4=Dropout(.5)(embedded_title_2)

    shared_lstm_title = LSTM(128)
    left_output_title = shared_lstm_title(embed_drop_3)
    right_output_title = shared_lstm_title(embed_drop_4)
    # Calculates the distance as defined by the MaLSTM model
    # malstm_distance = Merge(mode=lambda x: exponent_neg_manhattan_distance(x[0], x[1]), output_shape=lambda x: (x[0][0], 1))([left_output, right_output])
    merged = layers.concatenate([left_output_text, right_output_text,left_output_title, right_output_title], axis=-1)
    drop_1 = Dropout(.3)(merged)
    dense_1 = layers.Dense(256, activation='sigmoid')(drop_1)
    drop_2 = Dropout(.3)(dense_1)

    dense_2 = layers.Dense(128, activation='sigmoid')(drop_2)


    predictions = layers.Dense(1, activation='sigmoid')(dense_2)

    # Pack it all up into a model
    shared_layer_model = Model([text_input_1, text_input_2,title_input_1,title_input_2], [predictions])
    shared_layer_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return shared_layer_model


def train_input_fn(training_dir , hyperparameters = None):

    return _input_fn(training_dir,"train")

def eval_input_fn(training_dir , hyperparameters = None):

    return _input_fn(training_dir,"dev")

def serving_input_fn(hyperparameters = None):

    text_ph_1 = tf.placeholder(tf.int32, shape=[None,500])
    text_ph_2 = tf.placeholder(tf.int32, shape=[None,500])
    title_ph_1 = tf.placeholder(tf.int32, shape=[None,20])
    title_ph_2 = tf.placeholder(tf.int32, shape=[None,20])

    #label is not required since serving is only used for inference
    feature_placeholders = {"text1":text_ph_1,"text2":text_ph_2,"title1":title_ph_1,"title2":title_ph_2}
    return build_raw_serving_input_receiver_fn(feature_placeholders)()

def _input_fn(training_dir,mode):


    if mode=="train":
        train_text_1=np.vstack((np.load(training_dir+"/negative_"+mode+"_text_1.npy"),np.load(training_dir+"/positive_"+mode+"_text_1.npy")))
        train_text_2=np.vstack((np.load(training_dir+"/negative_"+mode+"_text_2.npy"),np.load(training_dir+"/positive_"+mode+"_text_2.npy")))
    else:
        train_text_1=np.load(training_dir+"/"+mode+"_text_1.npy")
        train_text_2=np.load(training_dir+"/"+mode+"_text_2.npy")
    train_title_1=np.load(training_dir+"/"+mode+"_title_1.npy")
    train_title_2=np.load(training_dir+"/"+mode+"_title_2.npy")

    y=np.load(training_dir+"/"+mode+"_targets.npy")
    y=y.reshape((y.shape[0],1)).astype(np.float32)

    permutation = np.random.permutation(train_text_1.shape[0])

    train_text_1=train_text_1[permutation]
    train_text_2=train_text_2[permutation]

    train_title_1=train_title_1[permutation]
    train_title_2=train_title_2[permutation]

    y=y[permutation]

    x={INPUT_TENSOR_NAME_1: train_text_1, 
       INPUT_TENSOR_NAME_2: train_text_2,
       INPUT_TENSOR_NAME_3: train_title_1, 
       INPUT_TENSOR_NAME_4: train_title_2}
    dataset=tf.estimator.inputs.numpy_input_fn(x=x,y=y,batch_size=BATCH_SIZE,num_epochs=10,shuffle=False)()


    return dataset

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions