diff --git a/tensorflow_addons/examples/optimizers_lazyadam.ipynb b/tensorflow_addons/examples/optimizers_lazyadam.ipynb new file mode 100644 index 0000000000..c87c5e53a0 --- /dev/null +++ b/tensorflow_addons/examples/optimizers_lazyadam.ipynb @@ -0,0 +1,275 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "optimizers_lazyadam.ipynb", + "version": "0.3.2", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "metadata": { + "colab_type": "text", + "id": "Tce3stUlHN0L" + }, + "cell_type": "markdown", + "source": [ + "##### Copyright 2019 The TensorFlow Authors.\n", + "\n" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "tuOe1ymfHZPu", + "cellView": "form", + "colab": {} + }, + "cell_type": "code", + "source": [ + "#@title Licensed under the Apache License, Version 2.0\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "colab_type": "text", + "id": "MfBg1C5NB3X0" + }, + "cell_type": "markdown", + "source": [ + "# TensorFlow Addons Optimizers: LazyAdam\n", + "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
" + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "xHxb-dlhMIzW" + }, + "cell_type": "markdown", + "source": [ + "# Overview\n", + "\n", + "This notebook will demonstrate how to use the lazy adam optimizer from the Addons package.\n" + ] + }, + { + "metadata": { + "id": "bQwBbFVAyHJ_", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# LazyAdam\n", + "\n", + "> LazyAdam is a variant of the Adam optimizer that handles sparse updates moreefficiently.\n", + " The original Adam algorithm maintains two moving-average accumulators for\n", + " each trainable variable; the accumulators are updated at every step.\n", + " This class provides lazier handling of gradient updates for sparse\n", + " variables. It only updates moving-average accumulators for sparse variable\n", + " indices that appear in the current batch, rather than updating the\n", + " accumulators for all indices. Compared with the original Adam optimizer,\n", + " it can provide large improvements in model training throughput for some\n", + " applications. However, it provides slightly different semantics than the\n", + " original Adam algorithm, and may lead to different empirical results." + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "MUXex9ctTuDB" + }, + "cell_type": "markdown", + "source": [ + "## Setup" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "IqR2PQG4ZaZ0", + "colab": {} + }, + "cell_type": "code", + "source": [ + "!pip install tensorflow-gpu==2.0.0.a0\n", + "!pip install tensorflow-addons\n", + "from __future__ import absolute_import, division, print_function, unicode_literals\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_addons as tfa\n", + "import tensorflow_datasets as tfds\n", + "import numpy as np\n", + "from matplotlib import pyplot as plt" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "ys65MwOLKnXq", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# Hyperparameters\n", + "batch_size=64\n", + "epochs=10" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "KR01t9v_fxbT", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Build the Model" + ] + }, + { + "metadata": { + "id": "djpoAvfWNyL5", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "model = tf.keras.Sequential([\n", + " tf.keras.layers.Dense(64, input_shape=(784,), activation='relu', name='dense_1'),\n", + " tf.keras.layers.Dense(64, activation='relu', name='dense_2'),\n", + " tf.keras.layers.Dense(10, activation='softmax', name='predictions'),\n", + "])" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "0_D7CZqkv_Hj", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Prep the Data" + ] + }, + { + "metadata": { + "id": "U0bS3SyowBoB", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# Load MNIST dataset as NumPy arrays\n", + "dataset = {}\n", + "num_validation = 10000\n", + "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()\n", + "\n", + "# Preprocess the data\n", + "x_train = x_train.reshape(-1, 784).astype('float32') / 255\n", + "x_test = x_test.reshape(-1, 784).astype('float32') / 255" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "HYE-BxhOzFQp", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Train and Evaluate\n", + "\n", + "Simply replace typical keras optimizers with the new tfa optimizer " + ] + }, + { + "metadata": { + "id": "NxfYhtiSzHf-", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# Compile the model\n", + "model.compile(\n", + " optimizer=tfa.optimizers.LazyAdam(0.001), # Utilize TFA optimizer\n", + " loss=tf.keras.losses.SparseCategoricalCrossentropy(),\n", + " metrics=['accuracy'])\n", + "\n", + "# Train the network\n", + "history = model.fit(\n", + " x_train,\n", + " y_train,\n", + " batch_size=batch_size,\n", + " epochs=epochs)\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "1Y--0tK69SXf", + "colab_type": "code", + "outputId": "163a7751-e35b-4d9f-cc07-1f8580bdf6bf", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + } + }, + "cell_type": "code", + "source": [ + "# Evaluate the network\n", + "print('Evaluate on test data:')\n", + "results = model.evaluate(x_test, y_test, batch_size=128)\n", + "print('Test loss = {0}, Test acc: {1}'.format(results[0], results[1]))" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Evaluate on test data:\n", + "10000/10000 [==============================] - 0s 21us/sample - loss: 0.0884 - accuracy: 0.9752\n", + "Test loss = 0.08840992146739736, Test acc: 0.9751999974250793\n" + ], + "name": "stdout" + } + ] + } + ] +} \ No newline at end of file diff --git a/tensorflow_addons/examples/tfa_optimizer.py b/tensorflow_addons/examples/tfa_optimizer.py deleted file mode 100644 index 14b955a5f0..0000000000 --- a/tensorflow_addons/examples/tfa_optimizer.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""MNIST example utilizing an optimizer from TensorFlow Addons.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -import tensorflow_addons as tfa - -VALIDATION_SAMPLES = 10000 - - -def build_mnist_model(): - """Build a simple dense network for processing MNIST data. - - :return: Keras `Model` - """ - inputs = tf.keras.Input(shape=(784,), name='digits') - net = tf.keras.layers.Dense(64, activation='relu', name='dense_1')(inputs) - net = tf.keras.layers.Dense(64, activation='relu', name='dense_2')(net) - net = tf.keras.layers.Dense( - 10, activation='softmax', name='predictions')(net) - - return tf.keras.Model(inputs=inputs, outputs=net) - - -def generate_data(num_validation): - """Download and preprocess the MNIST dataset. - - :num_validaton: Number of samples to use in validation set - :return: Dictionary of data split into train/test/val - """ - dataset = {} - - # Load MNIST dataset as NumPy arrays - (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() - - # Preprocess the data - x_train = x_train.reshape(-1, 784).astype('float32') / 255 - x_test = x_test.reshape(-1, 784).astype('float32') / 255 - - # Subset validation set - dataset['x_train'] = x_train[:-num_validation] - dataset['y_train'] = y_train[:-num_validation] - dataset['x_val'] = x_train[-num_validation:] - dataset['y_val'] = y_train[-num_validation:] - - dataset['x_test'] = x_test - dataset['y_test'] = y_test - - return dataset - - -def train_and_eval(): - """Train and evalute simple MNIST model using LazyAdam.""" - data = generate_data(num_validation=VALIDATION_SAMPLES) - dense_net = build_mnist_model() - dense_net.compile( - optimizer=tfa.optimizers.LazyAdam(0.001), - loss=tf.keras.losses.SparseCategoricalCrossentropy(), - metrics=['accuracy']) - - # Train the network - history = dense_net.fit( - data['x_train'], - data['y_train'], - batch_size=64, - epochs=10, - validation_data=(data['x_val'], data['y_val'])) - - # Evaluate the network - print('Evaluate on test data:') - results = dense_net.evaluate( - data['x_test'], data['y_test'], batch_size=128) - print('Test loss, Test acc:', results) - - -if __name__ == "__main__": - train_and_eval()