diff --git a/tensorflow_addons/examples/optimizers_lazyadam.ipynb b/tensorflow_addons/examples/optimizers_lazyadam.ipynb
new file mode 100644
index 0000000000..c87c5e53a0
--- /dev/null
+++ b/tensorflow_addons/examples/optimizers_lazyadam.ipynb
@@ -0,0 +1,275 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "optimizers_lazyadam.ipynb",
+ "version": "0.3.2",
+ "provenance": [],
+ "collapsed_sections": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "accelerator": "GPU"
+ },
+ "cells": [
+ {
+ "metadata": {
+ "colab_type": "text",
+ "id": "Tce3stUlHN0L"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "##### Copyright 2019 The TensorFlow Authors.\n",
+ "\n"
+ ]
+ },
+ {
+ "metadata": {
+ "colab_type": "code",
+ "id": "tuOe1ymfHZPu",
+ "cellView": "form",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "#@title Licensed under the Apache License, Version 2.0\n",
+ "# you may not use this file except in compliance with the License.\n",
+ "# You may obtain a copy of the License at\n",
+ "#\n",
+ "# https://www.apache.org/licenses/LICENSE-2.0\n",
+ "#\n",
+ "# Unless required by applicable law or agreed to in writing, software\n",
+ "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+ "# See the License for the specific language governing permissions and\n",
+ "# limitations under the License."
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "colab_type": "text",
+ "id": "MfBg1C5NB3X0"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "# TensorFlow Addons Optimizers: LazyAdam\n",
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "metadata": {
+ "colab_type": "text",
+ "id": "xHxb-dlhMIzW"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "# Overview\n",
+ "\n",
+ "This notebook will demonstrate how to use the lazy adam optimizer from the Addons package.\n"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "bQwBbFVAyHJ_",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "# LazyAdam\n",
+ "\n",
+ "> LazyAdam is a variant of the Adam optimizer that handles sparse updates moreefficiently.\n",
+ " The original Adam algorithm maintains two moving-average accumulators for\n",
+ " each trainable variable; the accumulators are updated at every step.\n",
+ " This class provides lazier handling of gradient updates for sparse\n",
+ " variables. It only updates moving-average accumulators for sparse variable\n",
+ " indices that appear in the current batch, rather than updating the\n",
+ " accumulators for all indices. Compared with the original Adam optimizer,\n",
+ " it can provide large improvements in model training throughput for some\n",
+ " applications. However, it provides slightly different semantics than the\n",
+ " original Adam algorithm, and may lead to different empirical results."
+ ]
+ },
+ {
+ "metadata": {
+ "colab_type": "text",
+ "id": "MUXex9ctTuDB"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "## Setup"
+ ]
+ },
+ {
+ "metadata": {
+ "colab_type": "code",
+ "id": "IqR2PQG4ZaZ0",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "!pip install tensorflow-gpu==2.0.0.a0\n",
+ "!pip install tensorflow-addons\n",
+ "from __future__ import absolute_import, division, print_function, unicode_literals\n",
+ "\n",
+ "import tensorflow as tf\n",
+ "import tensorflow_addons as tfa\n",
+ "import tensorflow_datasets as tfds\n",
+ "import numpy as np\n",
+ "from matplotlib import pyplot as plt"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "ys65MwOLKnXq",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "# Hyperparameters\n",
+ "batch_size=64\n",
+ "epochs=10"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "KR01t9v_fxbT",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "# Build the Model"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "djpoAvfWNyL5",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "model = tf.keras.Sequential([\n",
+ " tf.keras.layers.Dense(64, input_shape=(784,), activation='relu', name='dense_1'),\n",
+ " tf.keras.layers.Dense(64, activation='relu', name='dense_2'),\n",
+ " tf.keras.layers.Dense(10, activation='softmax', name='predictions'),\n",
+ "])"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "0_D7CZqkv_Hj",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "# Prep the Data"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "U0bS3SyowBoB",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "# Load MNIST dataset as NumPy arrays\n",
+ "dataset = {}\n",
+ "num_validation = 10000\n",
+ "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()\n",
+ "\n",
+ "# Preprocess the data\n",
+ "x_train = x_train.reshape(-1, 784).astype('float32') / 255\n",
+ "x_test = x_test.reshape(-1, 784).astype('float32') / 255"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "HYE-BxhOzFQp",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "# Train and Evaluate\n",
+ "\n",
+ "Simply replace typical keras optimizers with the new tfa optimizer "
+ ]
+ },
+ {
+ "metadata": {
+ "id": "NxfYhtiSzHf-",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "# Compile the model\n",
+ "model.compile(\n",
+ " optimizer=tfa.optimizers.LazyAdam(0.001), # Utilize TFA optimizer\n",
+ " loss=tf.keras.losses.SparseCategoricalCrossentropy(),\n",
+ " metrics=['accuracy'])\n",
+ "\n",
+ "# Train the network\n",
+ "history = model.fit(\n",
+ " x_train,\n",
+ " y_train,\n",
+ " batch_size=batch_size,\n",
+ " epochs=epochs)\n"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "1Y--0tK69SXf",
+ "colab_type": "code",
+ "outputId": "163a7751-e35b-4d9f-cc07-1f8580bdf6bf",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 68
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "# Evaluate the network\n",
+ "print('Evaluate on test data:')\n",
+ "results = model.evaluate(x_test, y_test, batch_size=128)\n",
+ "print('Test loss = {0}, Test acc: {1}'.format(results[0], results[1]))"
+ ],
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Evaluate on test data:\n",
+ "10000/10000 [==============================] - 0s 21us/sample - loss: 0.0884 - accuracy: 0.9752\n",
+ "Test loss = 0.08840992146739736, Test acc: 0.9751999974250793\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/tensorflow_addons/examples/tfa_optimizer.py b/tensorflow_addons/examples/tfa_optimizer.py
deleted file mode 100644
index 14b955a5f0..0000000000
--- a/tensorflow_addons/examples/tfa_optimizer.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""MNIST example utilizing an optimizer from TensorFlow Addons."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-import tensorflow_addons as tfa
-
-VALIDATION_SAMPLES = 10000
-
-
-def build_mnist_model():
- """Build a simple dense network for processing MNIST data.
-
- :return: Keras `Model`
- """
- inputs = tf.keras.Input(shape=(784,), name='digits')
- net = tf.keras.layers.Dense(64, activation='relu', name='dense_1')(inputs)
- net = tf.keras.layers.Dense(64, activation='relu', name='dense_2')(net)
- net = tf.keras.layers.Dense(
- 10, activation='softmax', name='predictions')(net)
-
- return tf.keras.Model(inputs=inputs, outputs=net)
-
-
-def generate_data(num_validation):
- """Download and preprocess the MNIST dataset.
-
- :num_validaton: Number of samples to use in validation set
- :return: Dictionary of data split into train/test/val
- """
- dataset = {}
-
- # Load MNIST dataset as NumPy arrays
- (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
-
- # Preprocess the data
- x_train = x_train.reshape(-1, 784).astype('float32') / 255
- x_test = x_test.reshape(-1, 784).astype('float32') / 255
-
- # Subset validation set
- dataset['x_train'] = x_train[:-num_validation]
- dataset['y_train'] = y_train[:-num_validation]
- dataset['x_val'] = x_train[-num_validation:]
- dataset['y_val'] = y_train[-num_validation:]
-
- dataset['x_test'] = x_test
- dataset['y_test'] = y_test
-
- return dataset
-
-
-def train_and_eval():
- """Train and evalute simple MNIST model using LazyAdam."""
- data = generate_data(num_validation=VALIDATION_SAMPLES)
- dense_net = build_mnist_model()
- dense_net.compile(
- optimizer=tfa.optimizers.LazyAdam(0.001),
- loss=tf.keras.losses.SparseCategoricalCrossentropy(),
- metrics=['accuracy'])
-
- # Train the network
- history = dense_net.fit(
- data['x_train'],
- data['y_train'],
- batch_size=64,
- epochs=10,
- validation_data=(data['x_val'], data['y_val']))
-
- # Evaluate the network
- print('Evaluate on test data:')
- results = dense_net.evaluate(
- data['x_test'], data['y_test'], batch_size=128)
- print('Test loss, Test acc:', results)
-
-
-if __name__ == "__main__":
- train_and_eval()