diff --git a/tensorflow_addons/examples/optimizers_lazyadam.ipynb b/tensorflow_addons/examples/optimizers_lazyadam.ipynb
new file mode 100644
index 0000000000..c87c5e53a0
--- /dev/null
+++ b/tensorflow_addons/examples/optimizers_lazyadam.ipynb
@@ -0,0 +1,275 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "optimizers_lazyadam.ipynb",
+      "version": "0.3.2",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "Tce3stUlHN0L"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "##### Copyright 2019 The TensorFlow Authors.\n",
+        "\n"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "tuOe1ymfHZPu",
+        "cellView": "form",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "MfBg1C5NB3X0"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "# TensorFlow Addons Optimizers: LazyAdam\n",
+        "\n",
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/addons/blob/master/tensorflow_addons/examples/notebooks/optimizers_lazyadam.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/addons/blob/master/tensorflow_addons/examples/notebooks/optimizers_lazyadam.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "xHxb-dlhMIzW"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "# Overview\n",
+        "\n",
+        "This notebook will demonstrate how to use the lazy adam optimizer from the Addons package.\n"
+      ]
+    },
+    {
+      "metadata": {
+        "id": "bQwBbFVAyHJ_",
+        "colab_type": "text"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "# LazyAdam\n",
+        "\n",
+        "> LazyAdam is a variant of the Adam optimizer that handles sparse updates moreefficiently.\n",
+        "    The original Adam algorithm maintains two moving-average accumulators for\n",
+        "    each trainable variable; the accumulators are updated at every step.\n",
+        "    This class provides lazier handling of gradient updates for sparse\n",
+        "    variables.  It only updates moving-average accumulators for sparse variable\n",
+        "    indices that appear in the current batch, rather than updating the\n",
+        "    accumulators for all indices. Compared with the original Adam optimizer,\n",
+        "    it can provide large improvements in model training throughput for some\n",
+        "    applications. However, it provides slightly different semantics than the\n",
+        "    original Adam algorithm, and may lead to different empirical results."
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "MUXex9ctTuDB"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "IqR2PQG4ZaZ0",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "!pip install tensorflow-gpu==2.0.0.a0\n",
+        "!pip install tensorflow-addons\n",
+        "from __future__ import absolute_import, division, print_function, unicode_literals\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_addons as tfa\n",
+        "import tensorflow_datasets as tfds\n",
+        "import numpy as np\n",
+        "from matplotlib import pyplot as plt"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "ys65MwOLKnXq",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "# Hyperparameters\n",
+        "batch_size=64\n",
+        "epochs=10"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "KR01t9v_fxbT",
+        "colab_type": "text"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "# Build the Model"
+      ]
+    },
+    {
+      "metadata": {
+        "id": "djpoAvfWNyL5",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "model = tf.keras.Sequential([\n",
+        "    tf.keras.layers.Dense(64, input_shape=(784,), activation='relu', name='dense_1'),\n",
+        "    tf.keras.layers.Dense(64, activation='relu', name='dense_2'),\n",
+        "    tf.keras.layers.Dense(10, activation='softmax', name='predictions'),\n",
+        "])"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "0_D7CZqkv_Hj",
+        "colab_type": "text"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "# Prep the Data"
+      ]
+    },
+    {
+      "metadata": {
+        "id": "U0bS3SyowBoB",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "# Load MNIST dataset as NumPy arrays\n",
+        "dataset = {}\n",
+        "num_validation = 10000\n",
+        "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()\n",
+        "\n",
+        "# Preprocess the data\n",
+        "x_train = x_train.reshape(-1, 784).astype('float32') / 255\n",
+        "x_test = x_test.reshape(-1, 784).astype('float32') / 255"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "HYE-BxhOzFQp",
+        "colab_type": "text"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "# Train and Evaluate\n",
+        "\n",
+        "Simply replace typical keras optimizers with the new tfa optimizer "
+      ]
+    },
+    {
+      "metadata": {
+        "id": "NxfYhtiSzHf-",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "# Compile the model\n",
+        "model.compile(\n",
+        "    optimizer=tfa.optimizers.LazyAdam(0.001),  # Utilize TFA optimizer\n",
+        "    loss=tf.keras.losses.SparseCategoricalCrossentropy(),\n",
+        "    metrics=['accuracy'])\n",
+        "\n",
+        "# Train the network\n",
+        "history = model.fit(\n",
+        "    x_train,\n",
+        "    y_train,\n",
+        "    batch_size=batch_size,\n",
+        "    epochs=epochs)\n"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "1Y--0tK69SXf",
+        "colab_type": "code",
+        "outputId": "163a7751-e35b-4d9f-cc07-1f8580bdf6bf",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 68
+        }
+      },
+      "cell_type": "code",
+      "source": [
+        "# Evaluate the network\n",
+        "print('Evaluate on test data:')\n",
+        "results = model.evaluate(x_test, y_test, batch_size=128)\n",
+        "print('Test loss = {0}, Test acc: {1}'.format(results[0], results[1]))"
+      ],
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Evaluate on test data:\n",
+            "10000/10000 [==============================] - 0s 21us/sample - loss: 0.0884 - accuracy: 0.9752\n",
+            "Test loss = 0.08840992146739736, Test acc: 0.9751999974250793\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tensorflow_addons/examples/tfa_optimizer.py b/tensorflow_addons/examples/tfa_optimizer.py
deleted file mode 100644
index 14b955a5f0..0000000000
--- a/tensorflow_addons/examples/tfa_optimizer.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""MNIST example utilizing an optimizer from TensorFlow Addons."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-import tensorflow_addons as tfa
-
-VALIDATION_SAMPLES = 10000
-
-
-def build_mnist_model():
-    """Build a simple dense network for processing MNIST data.
-
-    :return: Keras `Model`
-    """
-    inputs = tf.keras.Input(shape=(784,), name='digits')
-    net = tf.keras.layers.Dense(64, activation='relu', name='dense_1')(inputs)
-    net = tf.keras.layers.Dense(64, activation='relu', name='dense_2')(net)
-    net = tf.keras.layers.Dense(
-        10, activation='softmax', name='predictions')(net)
-
-    return tf.keras.Model(inputs=inputs, outputs=net)
-
-
-def generate_data(num_validation):
-    """Download and preprocess the MNIST dataset.
-
-    :num_validaton: Number of samples to use in validation set
-    :return: Dictionary of data split into train/test/val
-    """
-    dataset = {}
-
-    # Load MNIST dataset as NumPy arrays
-    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
-
-    # Preprocess the data
-    x_train = x_train.reshape(-1, 784).astype('float32') / 255
-    x_test = x_test.reshape(-1, 784).astype('float32') / 255
-
-    # Subset validation set
-    dataset['x_train'] = x_train[:-num_validation]
-    dataset['y_train'] = y_train[:-num_validation]
-    dataset['x_val'] = x_train[-num_validation:]
-    dataset['y_val'] = y_train[-num_validation:]
-
-    dataset['x_test'] = x_test
-    dataset['y_test'] = y_test
-
-    return dataset
-
-
-def train_and_eval():
-    """Train and evalute simple MNIST model using LazyAdam."""
-    data = generate_data(num_validation=VALIDATION_SAMPLES)
-    dense_net = build_mnist_model()
-    dense_net.compile(
-        optimizer=tfa.optimizers.LazyAdam(0.001),
-        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
-        metrics=['accuracy'])
-
-    # Train the network
-    history = dense_net.fit(
-        data['x_train'],
-        data['y_train'],
-        batch_size=64,
-        epochs=10,
-        validation_data=(data['x_val'], data['y_val']))
-
-    # Evaluate the network
-    print('Evaluate on test data:')
-    results = dense_net.evaluate(
-        data['x_test'], data['y_test'], batch_size=128)
-    print('Test loss, Test acc:', results)
-
-
-if __name__ == "__main__":
-    train_and_eval()