From 9b5c7b8c86a7fa57ae7fd96be3f4b0922a7634dc Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Mon, 17 Aug 2020 15:48:31 -0700
Subject: [PATCH 01/44] Create noisy_dense.py

---
 tensorflow_addons/layers/noisy_dense.py | 232 ++++++++++++++++++++++++
 1 file changed, 232 insertions(+)
 create mode 100644 tensorflow_addons/layers/noisy_dense.py

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
new file mode 100644
index 0000000000..8793eb6108
--- /dev/null
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -0,0 +1,232 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Orginal implementation from keras_contrib/layer/normalization
+# =============================================================================
+
+import tensorflow as tf
+from tensorflow.keras import layers, activations, initializers, regularizers, constraints
+from tensorflow.python.ops import math_ops, nn_ops, sparse_ops, gen_math_ops, standard_ops
+from tensorflow.python.keras.engine.input_spec import InputSpec
+from tensorflow.python.framework import dtypes, tensor_shape, sparse_tensor
+from tensorflow.python.keras.engine.base_layer import Layer
+
+@tf..utils.register_keras_serializable(package="Addons")
+class NoisyDense(Layer):
+  """Like normal dense layer but random noisy is added to the weights matrix. But
+  as the network improves the random noise is decayed until it is insignificant. 
+  A `NoisyDense` layer implements the operation:
+  `output = activation(dot(input, µ_kernel + (σ_kernel * ε_kernel)) + bias)`
+  where `activation` is the element-wise activation function
+  passed as the `activation` argument, `µ_kernel` is your average weights matrix
+  created by the layer, σ_kernel is a weights matrix that controls the importance of 
+  the ε_kernel which is just random noise, and `bias` is a bias vector created by the layer
+  (only applicable if `use_bias` is `True`).
+  Besides, layer attributes cannot be modified after the layer has been called
+  once (except the `trainable` attribute).
+  Example:
+  >>> # Create a `Sequential` model and add a Dense layer as the first layer.
+  >>> model = tf.keras.models.Sequential()
+  >>> model.add(tf.keras.Input(shape=(16,)))
+  >>> model.add(tf.keras.layers.NoisyDense(32, activation='relu'))
+  >>> # Now the model will take as input arrays of shape (None, 16)
+  >>> # and output arrays of shape (None, 32).
+  >>> # Note that after the first layer, you don't need to specify
+  >>> # the size of the input anymore:
+  >>> model.add(tf.keras.layers.NoisyDense(32))
+  >>> model.output_shape
+  (None, 32)
+  Arguments:
+    units: Positive integer, dimensionality of the output space.
+    activation: Activation function to use.
+      If you don't specify anything, no activation is applied
+      (ie. "linear" activation: `a(x) = x`).
+    use_bias: Boolean, whether the layer uses a bias vector.
+    kernel_regularizer: Regularizer function applied to
+      the `kernel` weights matrix.
+    bias_regularizer: Regularizer function applied to the bias vector.
+    activity_regularizer: Regularizer function applied to
+      the output of the layer (its "activation").
+    kernel_constraint: Constraint function applied to
+      the `kernel` weights matrix.
+    bias_constraint: Constraint function applied to the bias vector.
+  Input shape:
+    N-D tensor with shape: `(batch_size, ..., input_dim)`.
+    The most common situation would be
+    a 2D input with shape `(batch_size, input_dim)`.
+  Output shape:
+    N-D tensor with shape: `(batch_size, ..., units)`.
+    For instance, for a 2D input with shape `(batch_size, input_dim)`,
+    the output would have shape `(batch_size, units)`.
+  """
+
+  def __init__(self,
+               units,
+               activation=None,
+               use_bias=True,
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activity_regularizer=None,
+               kernel_constraint=None,
+               bias_constraint=None,
+               **kwargs):
+    super(NoisyDense, self).__init__(activity_regularizer=activity_regularizer, **kwargs)
+
+    self.units = int(units) if not isinstance(units, int) else units
+    self.activation = activations.get(activation)
+    self.use_bias = use_bias
+    self.kernel_regularizer = regularizers.get(kernel_regularizer)
+    self.bias_regularizer = regularizers.get(bias_regularizer)
+    self.kernel_constraint = constraints.get(kernel_constraint)
+    self.bias_constraint = constraints.get(bias_constraint)
+
+    self.input_spec = InputSpec(min_ndim=2)
+    self.supports_masking = True
+
+  def build(self, input_shape):
+    # Make sure dtype is correct
+    dtype = dtypes.as_dtype(self.dtype or K.floatx())
+    if not (dtype.is_floating or dtype.is_complex):
+      raise TypeError('Unable to build `Dense` layer with non-floating point '
+                      'dtype %s' % (dtype,))
+
+    input_shape = tensor_shape.TensorShape(input_shape)
+    self.last_dim = tensor_shape.dimension_value(input_shape[-1])
+    sqrt_dim = self.last_dim ** (1/2)
+    if self.last_dim is None:
+      raise ValueError('The last dimension of the inputs to `Dense` '
+                       'should be defined. Found `None`.')
+    self.input_spec = InputSpec(min_ndim=2, axes={-1: self.last_dim})
+
+    self.σ_init = initializers.Constant(value=0.5/sqrt_dim)
+    self.µ_init = initializers.RandomUniform(minval=-1/sqrt_dim, maxval=1/sqrt_dim)
+
+    # Learnable parameters
+    # Agent will learn to decay σ as it improves creating a sort of learned epsilon decay
+    self.σ_kernel = self.add_weight(
+        "σ_kernel",
+        shape=[self.last_dim, self.units], 
+        initializer=self.σ_init,
+        regularizer=self.kernel_regularizer,
+        constraint=self.kernel_constraint, 
+        dtype=self.dtype,
+        trainable=True)
+
+    self.µ_kernel = self.add_weight(
+        "µ_kernel",
+        shape=[self.last_dim, self.units], 
+        initializer=self.µ_init,
+        regularizer=self.kernel_regularizer,
+        constraint=self.kernel_constraint, 
+        dtype=self.dtype,
+        trainable=True)
+
+    if self.use_bias:
+      self.σ_bias = self.add_weight(
+          "σ_bias",
+          shape=[self.units,],
+          initializer=self.σ_init,
+          regularizer=self.bias_regularizer,
+          constraint=self.bias_constraint,
+          dtype=self.dtype,
+          trainable=True)
+
+      self.µ_bias = self.add_weight(
+          "µ_bias",
+          shape=[self.units,],
+          initializer=self.µ_init,
+          regularizer=self.bias_regularizer,
+          constraint=self.bias_constraint,
+          dtype=self.dtype,
+          trainable=True)
+
+    self.built = True
+
+  @staticmethod
+  def _scale_noise(x):
+    return tf.sign(x)*tf.sqrt(tf.abs(x))
+
+  def call(self, inputs):
+    dtype = self._compute_dtype_object
+    if inputs.dtype.base_dtype != dtype.base_dtype:
+      inputs = math_ops.cast(inputs, dtype=dtype)
+
+    # Fixed parameters added as the noise
+    ε_i = tf.random.normal([self.last_dim, self.units])
+    ε_j = tf.random.normal([self.units,])
+
+    # Creates the factorised Gaussian noise
+    f = NoisyDense._scale_noise
+    ε_kernel = f(ε_i) * f(ε_j)
+    ε_bias = f(ε_j)
+
+    # Performs: y = (µw + σw · εw)x + µb + σb · εb
+    # to calculate the output
+    rank = inputs.shape.rank
+    if rank == 2 or rank is None:
+      if isinstance(inputs, sparse_tensor.SparseTensor):
+        outputs = sparse_ops.sparse_tensor_dense_matmul(inputs, self.µ_kernel + (self.σ_kernel * ε_kernel))
+      else:
+        outputs = gen_math_ops.mat_mul(inputs, self.µ_kernel + (self.σ_kernel * ε_kernel))
+    # Broadcast kernel to inputs.
+    else:
+      outputs = standard_ops.tensordot(inputs, self.µ_kernel + (self.σ_kernel * ε_kernel), [[rank - 1], [0]])
+      # Reshape the output back to the original ndim of the input.
+      if not context.executing_eagerly():
+        shape = inputs.shape.as_list()
+        output_shape = shape[:-1] + [kernel.shape[-1]]
+        outputs.set_shape(output_shape)
+
+    if self.use_bias:
+      noisy_bias = self.µ_bias + (self.σ_bias * ε_bias)
+      outputs = nn_ops.bias_add(outputs, noisy_bias)
+
+    if self.activation is not None:
+      outputs = self.activation(outputs)
+
+    return outputs
+
+
+  def compute_output_shape(self, input_shape):
+    input_shape = tensor_shape.TensorShape(input_shape)
+    input_shape = input_shape.with_rank_at_least(2)
+    if tensor_shape.dimension_value(input_shape[-1]) is None:
+      raise ValueError(
+          'The innermost dimension of input_shape must be defined, but saw: %s'
+          % input_shape)
+    return input_shape[:-1].concatenate(self.units)
+
+  def get_config(self):
+    config = super(NoisyDense, self).get_config()
+    config.update({
+        'units':
+            self.units,
+        'activation':
+            activations.serialize(self.activation),
+        'σ_initializer':
+            initializers.serialize(self.σ_init),
+        'µ_initializer':
+            initializers.serialize(self.µ_init),
+        'kernel_regularizer':
+            regularizers.serialize(self.kernel_regularizer),
+        'bias_regularizer':
+            regularizers.serialize(self.bias_regularizer),
+        'activity_regularizer':
+            regularizers.serialize(self.activity_regularizer),
+        'kernel_constraint':
+            constraints.serialize(self.kernel_constraint),
+        'bias_constraint':
+            constraints.serialize(self.bias_constraint)
+    })
+    return config

From baf35740183f3d32f15ffa6f7b2b49da71c2f902 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Mon, 17 Aug 2020 16:05:38 -0700
Subject: [PATCH 02/44] Create noisy_dense_test.py

---
 .../layers/tests/noisy_dense_test.py          | 78 +++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 tensorflow_addons/layers/tests/noisy_dense_test.py

diff --git a/tensorflow_addons/layers/tests/noisy_dense_test.py b/tensorflow_addons/layers/tests/noisy_dense_test.py
new file mode 100644
index 0000000000..d4462f25a4
--- /dev/null
+++ b/tensorflow_addons/layers/tests/noisy_dense_test.py
@@ -0,0 +1,78 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests NoisyDense layer."""
+
+
+import numpy as np
+from tensorflow.python import keras
+from tensorflow.python.keras import testing_utils
+from tensorflow_addons.layers.noisy_dense import NoisyDense
+from tensorflow.python.framework import ops
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.framework import tensor_spec
+
+
+@keras_parameterized.run_all_keras_modes
+class NoisyDenseTest(keras_parameterized.TestCase):
+  def test_noisy_dense(self):
+    testing_utils.layer_test(
+        NoisyDense, kwargs={'units': 3}, input_shape=(3, 2))
+
+    testing_utils.layer_test(
+        NoisyDense, kwargs={'units': 3}, input_shape=(3, 4, 2))
+
+    testing_utils.layer_test(
+        NoisyDense, kwargs={'units': 3}, input_shape=(None, None, 2))
+
+    testing_utils.layer_test(
+        NoisyDense, kwargs={'units': 3}, input_shape=(3, 4, 5, 2))
+
+  def test_noisy_dense_dtype(self):
+    inputs = ops.convert_to_tensor_v2(
+        np.random.randint(low=0, high=7, size=(2, 2)))
+    layer = NoisyDense(5, dtype='float32')
+    outputs = layer(inputs)
+    self.assertEqual(outputs.dtype, 'float32')
+
+  def test_noisy_dense_with_policy(self):
+    inputs = ops.convert_to_tensor_v2(
+        np.random.randint(low=0, high=7, size=(2, 2)))
+    layer = NoisyDense(5, dtype=policy.Policy('mixed_float16'))
+    outputs = layer(inputs)
+    output_signature = layer.compute_output_signature(
+        tensor_spec.TensorSpec(dtype='float16', shape=(2, 2)))
+    self.assertEqual(output_signature.dtype, dtypes.float16)
+    self.assertEqual(output_signature.shape, (2, 5))
+    self.assertEqual(outputs.dtype, 'float16')
+    self.assertEqual(layer.kernel.dtype, 'float32')
+
+  def test_noisy_dense_regularization(self):
+    layer = NoisyDense(
+        3,
+        kernel_regularizer=keras.regularizers.l1(0.01),
+        bias_regularizer='l1',
+        activity_regularizer='l2',
+        name='noisy_dense_reg')
+    layer(keras.backend.variable(np.ones((2, 4))))
+    self.assertEqual(3, len(layer.losses))
+
+  def test_noisy_dense_constraints(self):
+    k_constraint = keras.constraints.max_norm(0.01)
+    b_constraint = keras.constraints.max_norm(0.01)
+    layer = NoisyDense(
+        3, kernel_constraint=k_constraint, bias_constraint=b_constraint)
+    layer(keras.backend.variable(np.ones((2, 4))))
+    self.assertEqual(layer.kernel.constraint, k_constraint)
+    self.assertEqual(layer.bias.constraint, b_constraint)

From dfa4dcc926882d4e8003bf9ede805c7e33e883a2 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Mon, 17 Aug 2020 16:06:49 -0700
Subject: [PATCH 03/44] Update __init__.py

---
 tensorflow_addons/layers/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow_addons/layers/__init__.py b/tensorflow_addons/layers/__init__.py
index 594d025a9f..f8a0d9a11a 100644
--- a/tensorflow_addons/layers/__init__.py
+++ b/tensorflow_addons/layers/__init__.py
@@ -38,3 +38,4 @@
 from tensorflow_addons.layers.tlu import TLU
 from tensorflow_addons.layers.wrappers import WeightNormalization
 from tensorflow_addons.layers.esn import ESN
+from tensorflow_addons.layers.noisy_dense import NoisyDense

From d823bed021abb508057f2851e1f3dcd04abd5457 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Mon, 17 Aug 2020 16:10:15 -0700
Subject: [PATCH 04/44] Fix minor typo

---
 tensorflow_addons/layers/noisy_dense.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 8793eb6108..d7a2b3ca1e 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -22,7 +22,7 @@
 from tensorflow.python.framework import dtypes, tensor_shape, sparse_tensor
 from tensorflow.python.keras.engine.base_layer import Layer
 
-@tf..utils.register_keras_serializable(package="Addons")
+@tf.utils.register_keras_serializable(package="Addons")
 class NoisyDense(Layer):
   """Like normal dense layer but random noisy is added to the weights matrix. But
   as the network improves the random noise is decayed until it is insignificant. 

From 08bb63eb55aecf62117eb32ef2d606a47202adf3 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Mon, 17 Aug 2020 16:37:29 -0700
Subject: [PATCH 05/44] Update noisy_dense_test.py

---
 .../layers/tests/noisy_dense_test.py          | 94 +++++++++----------
 1 file changed, 46 insertions(+), 48 deletions(-)

diff --git a/tensorflow_addons/layers/tests/noisy_dense_test.py b/tensorflow_addons/layers/tests/noisy_dense_test.py
index d4462f25a4..61afc3c94f 100644
--- a/tensorflow_addons/layers/tests/noisy_dense_test.py
+++ b/tensorflow_addons/layers/tests/noisy_dense_test.py
@@ -16,63 +16,61 @@
 
 
 import numpy as np
+import pytest
 from tensorflow.python import keras
-from tensorflow.python.keras import testing_utils
+from tensorflow_addons.utils import test_utils
 from tensorflow_addons.layers.noisy_dense import NoisyDense
 from tensorflow.python.framework import ops
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.framework import tensor_spec
+from tensorflow.debugging import assert_equal
+from tensorflow.python.keras.mixed_precision.experimental import policy
 
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
+def test_noisy_dense(dtype):
+  test_utils.layer_test(
+      NoisyDense, kwargs={'units': 3, "dtype": dtype}, input_shape=(3, 2))
 
-@keras_parameterized.run_all_keras_modes
-class NoisyDenseTest(keras_parameterized.TestCase):
-  def test_noisy_dense(self):
-    testing_utils.layer_test(
-        NoisyDense, kwargs={'units': 3}, input_shape=(3, 2))
+  test_utils.layer_test(
+      NoisyDense, kwargs={'units': 3, "dtype": dtype}, input_shape=(3, 4, 2))
 
-    testing_utils.layer_test(
-        NoisyDense, kwargs={'units': 3}, input_shape=(3, 4, 2))
+  test_utils.layer_test(
+      NoisyDense, kwargs={'units': 3, "dtype": dtype}, input_shape=(None, None, 2))
 
-    testing_utils.layer_test(
-        NoisyDense, kwargs={'units': 3}, input_shape=(None, None, 2))
+  test_utils.layer_test(
+      NoisyDense, kwargs={'units': 3, "dtype": dtype}, input_shape=(3, 4, 5, 2))
 
-    testing_utils.layer_test(
-        NoisyDense, kwargs={'units': 3}, input_shape=(3, 4, 5, 2))
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+def test_noisy_dense_with_policy():
+  inputs = ops.convert_to_tensor_v2(
+      np.random.randint(low=0, high=7, size=(2, 2)))
+  layer = NoisyDense(5, dtype=policy.Policy('mixed_float16'))
+  outputs = layer(inputs)
+  output_signature = layer.compute_output_signature(
+      tensor_spec.TensorSpec(dtype='float16', shape=(2, 2)))
+  assert_equal(output_signature.dtype, dtypes.float16)
+  assert_equal(output_signature.shape, (2, 5))
+  sassert_equal(outputs.dtype, 'float16')
+  assert_equal(layer.kernel.dtype, 'float32')
 
-  def test_noisy_dense_dtype(self):
-    inputs = ops.convert_to_tensor_v2(
-        np.random.randint(low=0, high=7, size=(2, 2)))
-    layer = NoisyDense(5, dtype='float32')
-    outputs = layer(inputs)
-    self.assertEqual(outputs.dtype, 'float32')
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+def test_noisy_dense_regularization(self):
+  layer = NoisyDense(
+      3,
+      kernel_regularizer=keras.regularizers.l1(0.01),
+      bias_regularizer='l1',
+      activity_regularizer='l2',
+      name='noisy_dense_reg')
+  layer(keras.backend.variable(np.ones((2, 4))))
+  assert_equal(3, len(layer.losses))
 
-  def test_noisy_dense_with_policy(self):
-    inputs = ops.convert_to_tensor_v2(
-        np.random.randint(low=0, high=7, size=(2, 2)))
-    layer = NoisyDense(5, dtype=policy.Policy('mixed_float16'))
-    outputs = layer(inputs)
-    output_signature = layer.compute_output_signature(
-        tensor_spec.TensorSpec(dtype='float16', shape=(2, 2)))
-    self.assertEqual(output_signature.dtype, dtypes.float16)
-    self.assertEqual(output_signature.shape, (2, 5))
-    self.assertEqual(outputs.dtype, 'float16')
-    self.assertEqual(layer.kernel.dtype, 'float32')
-
-  def test_noisy_dense_regularization(self):
-    layer = NoisyDense(
-        3,
-        kernel_regularizer=keras.regularizers.l1(0.01),
-        bias_regularizer='l1',
-        activity_regularizer='l2',
-        name='noisy_dense_reg')
-    layer(keras.backend.variable(np.ones((2, 4))))
-    self.assertEqual(3, len(layer.losses))
-
-  def test_noisy_dense_constraints(self):
-    k_constraint = keras.constraints.max_norm(0.01)
-    b_constraint = keras.constraints.max_norm(0.01)
-    layer = NoisyDense(
-        3, kernel_constraint=k_constraint, bias_constraint=b_constraint)
-    layer(keras.backend.variable(np.ones((2, 4))))
-    self.assertEqual(layer.kernel.constraint, k_constraint)
-    self.assertEqual(layer.bias.constraint, b_constraint)
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+def test_noisy_dense_constraints(self):
+  k_constraint = keras.constraints.max_norm(0.01)
+  b_constraint = keras.constraints.max_norm(0.01)
+  layer = NoisyDense(
+      3, kernel_constraint=k_constraint, bias_constraint=b_constraint)
+  layer(keras.backend.variable(np.ones((2, 4))))
+  assert_equal(layer.kernel.constraint, k_constraint)
+  assert_equal(layer.bias.constraint, b_constraint)

From 68d6ccb0965806e9663105aaa82c5da81c785546 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Mon, 17 Aug 2020 16:38:50 -0700
Subject: [PATCH 06/44] Update comments

---
 tensorflow_addons/layers/noisy_dense.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index d7a2b3ca1e..f28645afd6 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -39,12 +39,12 @@ class NoisyDense(Layer):
   >>> # Create a `Sequential` model and add a Dense layer as the first layer.
   >>> model = tf.keras.models.Sequential()
   >>> model.add(tf.keras.Input(shape=(16,)))
-  >>> model.add(tf.keras.layers.NoisyDense(32, activation='relu'))
+  >>> model.add(NoisyDense(32, activation='relu'))
   >>> # Now the model will take as input arrays of shape (None, 16)
   >>> # and output arrays of shape (None, 32).
   >>> # Note that after the first layer, you don't need to specify
   >>> # the size of the input anymore:
-  >>> model.add(tf.keras.layers.NoisyDense(32))
+  >>> model.add(NoisyDense(32))
   >>> model.output_shape
   (None, 32)
   Arguments:

From 5a564c0a48e6e9372e1fdd330507a1c8b1b78582 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Mon, 17 Aug 2020 16:40:32 -0700
Subject: [PATCH 07/44] Update comments

---
 tensorflow_addons/layers/noisy_dense.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index f28645afd6..cff2184790 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -26,6 +26,7 @@
 class NoisyDense(Layer):
   """Like normal dense layer but random noisy is added to the weights matrix. But
   as the network improves the random noise is decayed until it is insignificant. 
+  
   A `NoisyDense` layer implements the operation:
   `output = activation(dot(input, µ_kernel + (σ_kernel * ε_kernel)) + bias)`
   where `activation` is the element-wise activation function
@@ -33,8 +34,7 @@ class NoisyDense(Layer):
   created by the layer, σ_kernel is a weights matrix that controls the importance of 
   the ε_kernel which is just random noise, and `bias` is a bias vector created by the layer
   (only applicable if `use_bias` is `True`).
-  Besides, layer attributes cannot be modified after the layer has been called
-  once (except the `trainable` attribute).
+
   Example:
   >>> # Create a `Sequential` model and add a Dense layer as the first layer.
   >>> model = tf.keras.models.Sequential()
@@ -47,6 +47,7 @@ class NoisyDense(Layer):
   >>> model.add(NoisyDense(32))
   >>> model.output_shape
   (None, 32)
+  
   Arguments:
     units: Positive integer, dimensionality of the output space.
     activation: Activation function to use.
@@ -61,10 +62,12 @@ class NoisyDense(Layer):
     kernel_constraint: Constraint function applied to
       the `kernel` weights matrix.
     bias_constraint: Constraint function applied to the bias vector.
+    
   Input shape:
     N-D tensor with shape: `(batch_size, ..., input_dim)`.
     The most common situation would be
     a 2D input with shape `(batch_size, input_dim)`.
+    
   Output shape:
     N-D tensor with shape: `(batch_size, ..., units)`.
     For instance, for a 2D input with shape `(batch_size, input_dim)`,

From 1785723db692d807890fed89f74e8315be8b40a5 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Mon, 17 Aug 2020 16:41:44 -0700
Subject: [PATCH 08/44] Update noisy_dense.py

---
 tensorflow_addons/layers/noisy_dense.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index cff2184790..9668bd5481 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -20,10 +20,9 @@
 from tensorflow.python.ops import math_ops, nn_ops, sparse_ops, gen_math_ops, standard_ops
 from tensorflow.python.keras.engine.input_spec import InputSpec
 from tensorflow.python.framework import dtypes, tensor_shape, sparse_tensor
-from tensorflow.python.keras.engine.base_layer import Layer
 
 @tf.utils.register_keras_serializable(package="Addons")
-class NoisyDense(Layer):
+class NoisyDense(tf.keras.layers.Layer):
   """Like normal dense layer but random noisy is added to the weights matrix. But
   as the network improves the random noise is decayed until it is insignificant. 
   

From bdc7dfe38b897ed9e061e046f89de84101aa4cc1 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Mon, 17 Aug 2020 17:08:47 -0700
Subject: [PATCH 09/44] fix typo

---
 tensorflow_addons/layers/noisy_dense.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 9668bd5481..832f240b8a 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -21,7 +21,7 @@
 from tensorflow.python.keras.engine.input_spec import InputSpec
 from tensorflow.python.framework import dtypes, tensor_shape, sparse_tensor
 
-@tf.utils.register_keras_serializable(package="Addons")
+@tf.keras.utils.register_keras_serializable(package="Addons")
 class NoisyDense(tf.keras.layers.Layer):
   """Like normal dense layer but random noisy is added to the weights matrix. But
   as the network improves the random noise is decayed until it is insignificant. 

From 882bbde8fa6f7c88f69557311f1744309e229ce4 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Mon, 17 Aug 2020 21:15:41 -0700
Subject: [PATCH 10/44] Update noisy_dense.py

---
 tensorflow_addons/layers/noisy_dense.py | 358 +++++++++++++-----------
 1 file changed, 190 insertions(+), 168 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 832f240b8a..9eed42daa4 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -16,21 +16,35 @@
 # =============================================================================
 
 import tensorflow as tf
-from tensorflow.keras import layers, activations, initializers, regularizers, constraints
-from tensorflow.python.ops import math_ops, nn_ops, sparse_ops, gen_math_ops, standard_ops
+from tensorflow.keras import (
+    activations,
+    initializers,
+    regularizers,
+    constraints,
+)
+from tensorflow.python.ops import (
+    math_ops,
+    nn_ops,
+    sparse_ops,
+    gen_math_ops,
+    standard_ops,
+)
+from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.engine.input_spec import InputSpec
 from tensorflow.python.framework import dtypes, tensor_shape, sparse_tensor
+from tensorflow.python.eager import context
+
 
 @tf.keras.utils.register_keras_serializable(package="Addons")
 class NoisyDense(tf.keras.layers.Layer):
-  """Like normal dense layer but random noisy is added to the weights matrix. But
-  as the network improves the random noise is decayed until it is insignificant. 
-  
+    """Like normal dense layer but random noisy is added to the weights matrix. But
+  as the network improves the random noise is decayed until it is insignificant.
+
   A `NoisyDense` layer implements the operation:
   `output = activation(dot(input, µ_kernel + (σ_kernel * ε_kernel)) + bias)`
   where `activation` is the element-wise activation function
   passed as the `activation` argument, `µ_kernel` is your average weights matrix
-  created by the layer, σ_kernel is a weights matrix that controls the importance of 
+  created by the layer, σ_kernel is a weights matrix that controls the importance of
   the ε_kernel which is just random noise, and `bias` is a bias vector created by the layer
   (only applicable if `use_bias` is `True`).
 
@@ -46,7 +60,7 @@ class NoisyDense(tf.keras.layers.Layer):
   >>> model.add(NoisyDense(32))
   >>> model.output_shape
   (None, 32)
-  
+
   Arguments:
     units: Positive integer, dimensionality of the output space.
     activation: Activation function to use.
@@ -61,174 +75,182 @@ class NoisyDense(tf.keras.layers.Layer):
     kernel_constraint: Constraint function applied to
       the `kernel` weights matrix.
     bias_constraint: Constraint function applied to the bias vector.
-    
+
   Input shape:
     N-D tensor with shape: `(batch_size, ..., input_dim)`.
     The most common situation would be
     a 2D input with shape `(batch_size, input_dim)`.
-    
+
   Output shape:
     N-D tensor with shape: `(batch_size, ..., units)`.
     For instance, for a 2D input with shape `(batch_size, input_dim)`,
     the output would have shape `(batch_size, units)`.
   """
 
-  def __init__(self,
-               units,
-               activation=None,
-               use_bias=True,
-               kernel_regularizer=None,
-               bias_regularizer=None,
-               activity_regularizer=None,
-               kernel_constraint=None,
-               bias_constraint=None,
-               **kwargs):
-    super(NoisyDense, self).__init__(activity_regularizer=activity_regularizer, **kwargs)
-
-    self.units = int(units) if not isinstance(units, int) else units
-    self.activation = activations.get(activation)
-    self.use_bias = use_bias
-    self.kernel_regularizer = regularizers.get(kernel_regularizer)
-    self.bias_regularizer = regularizers.get(bias_regularizer)
-    self.kernel_constraint = constraints.get(kernel_constraint)
-    self.bias_constraint = constraints.get(bias_constraint)
-
-    self.input_spec = InputSpec(min_ndim=2)
-    self.supports_masking = True
-
-  def build(self, input_shape):
-    # Make sure dtype is correct
-    dtype = dtypes.as_dtype(self.dtype or K.floatx())
-    if not (dtype.is_floating or dtype.is_complex):
-      raise TypeError('Unable to build `Dense` layer with non-floating point '
-                      'dtype %s' % (dtype,))
-
-    input_shape = tensor_shape.TensorShape(input_shape)
-    self.last_dim = tensor_shape.dimension_value(input_shape[-1])
-    sqrt_dim = self.last_dim ** (1/2)
-    if self.last_dim is None:
-      raise ValueError('The last dimension of the inputs to `Dense` '
-                       'should be defined. Found `None`.')
-    self.input_spec = InputSpec(min_ndim=2, axes={-1: self.last_dim})
-
-    self.σ_init = initializers.Constant(value=0.5/sqrt_dim)
-    self.µ_init = initializers.RandomUniform(minval=-1/sqrt_dim, maxval=1/sqrt_dim)
-
-    # Learnable parameters
-    # Agent will learn to decay σ as it improves creating a sort of learned epsilon decay
-    self.σ_kernel = self.add_weight(
-        "σ_kernel",
-        shape=[self.last_dim, self.units], 
-        initializer=self.σ_init,
-        regularizer=self.kernel_regularizer,
-        constraint=self.kernel_constraint, 
-        dtype=self.dtype,
-        trainable=True)
-
-    self.µ_kernel = self.add_weight(
-        "µ_kernel",
-        shape=[self.last_dim, self.units], 
-        initializer=self.µ_init,
-        regularizer=self.kernel_regularizer,
-        constraint=self.kernel_constraint, 
-        dtype=self.dtype,
-        trainable=True)
-
-    if self.use_bias:
-      self.σ_bias = self.add_weight(
-          "σ_bias",
-          shape=[self.units,],
-          initializer=self.σ_init,
-          regularizer=self.bias_regularizer,
-          constraint=self.bias_constraint,
-          dtype=self.dtype,
-          trainable=True)
-
-      self.µ_bias = self.add_weight(
-          "µ_bias",
-          shape=[self.units,],
-          initializer=self.µ_init,
-          regularizer=self.bias_regularizer,
-          constraint=self.bias_constraint,
-          dtype=self.dtype,
-          trainable=True)
-
-    self.built = True
-
-  @staticmethod
-  def _scale_noise(x):
-    return tf.sign(x)*tf.sqrt(tf.abs(x))
-
-  def call(self, inputs):
-    dtype = self._compute_dtype_object
-    if inputs.dtype.base_dtype != dtype.base_dtype:
-      inputs = math_ops.cast(inputs, dtype=dtype)
-
-    # Fixed parameters added as the noise
-    ε_i = tf.random.normal([self.last_dim, self.units])
-    ε_j = tf.random.normal([self.units,])
-
-    # Creates the factorised Gaussian noise
-    f = NoisyDense._scale_noise
-    ε_kernel = f(ε_i) * f(ε_j)
-    ε_bias = f(ε_j)
-
-    # Performs: y = (µw + σw · εw)x + µb + σb · εb
-    # to calculate the output
-    rank = inputs.shape.rank
-    if rank == 2 or rank is None:
-      if isinstance(inputs, sparse_tensor.SparseTensor):
-        outputs = sparse_ops.sparse_tensor_dense_matmul(inputs, self.µ_kernel + (self.σ_kernel * ε_kernel))
-      else:
-        outputs = gen_math_ops.mat_mul(inputs, self.µ_kernel + (self.σ_kernel * ε_kernel))
-    # Broadcast kernel to inputs.
-    else:
-      outputs = standard_ops.tensordot(inputs, self.µ_kernel + (self.σ_kernel * ε_kernel), [[rank - 1], [0]])
-      # Reshape the output back to the original ndim of the input.
-      if not context.executing_eagerly():
-        shape = inputs.shape.as_list()
-        output_shape = shape[:-1] + [kernel.shape[-1]]
-        outputs.set_shape(output_shape)
-
-    if self.use_bias:
-      noisy_bias = self.µ_bias + (self.σ_bias * ε_bias)
-      outputs = nn_ops.bias_add(outputs, noisy_bias)
-
-    if self.activation is not None:
-      outputs = self.activation(outputs)
-
-    return outputs
-
-
-  def compute_output_shape(self, input_shape):
-    input_shape = tensor_shape.TensorShape(input_shape)
-    input_shape = input_shape.with_rank_at_least(2)
-    if tensor_shape.dimension_value(input_shape[-1]) is None:
-      raise ValueError(
-          'The innermost dimension of input_shape must be defined, but saw: %s'
-          % input_shape)
-    return input_shape[:-1].concatenate(self.units)
-
-  def get_config(self):
-    config = super(NoisyDense, self).get_config()
-    config.update({
-        'units':
-            self.units,
-        'activation':
-            activations.serialize(self.activation),
-        'σ_initializer':
-            initializers.serialize(self.σ_init),
-        'µ_initializer':
-            initializers.serialize(self.µ_init),
-        'kernel_regularizer':
-            regularizers.serialize(self.kernel_regularizer),
-        'bias_regularizer':
-            regularizers.serialize(self.bias_regularizer),
-        'activity_regularizer':
-            regularizers.serialize(self.activity_regularizer),
-        'kernel_constraint':
-            constraints.serialize(self.kernel_constraint),
-        'bias_constraint':
-            constraints.serialize(self.bias_constraint)
-    })
-    return config
+    def __init__(
+        self,
+        units,
+        activation=None,
+        use_bias=True,
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        **kwargs
+    ):
+        super(NoisyDense, self).__init__(
+            activity_regularizer=activity_regularizer, **kwargs
+        )
+
+        self.units = int(units) if not isinstance(units, int) else units
+        self.activation = activations.get(activation)
+        self.use_bias = use_bias
+        self.kernel_regularizer = regularizers.get(kernel_regularizer)
+        self.bias_regularizer = regularizers.get(bias_regularizer)
+        self.kernel_constraint = constraints.get(kernel_constraint)
+        self.bias_constraint = constraints.get(bias_constraint)
+
+        self.input_spec = InputSpec(min_ndim=2)
+        self.supports_masking = True
+
+    def build(self, input_shape):
+        # Make sure dtype is correct
+        dtype = dtypes.as_dtype(self.dtype or K.floatx())
+        if not (dtype.is_floating or dtype.is_complex):
+            raise TypeError(
+                "Unable to build `Dense` layer with non-floating point "
+                "dtype %s" % (dtype,)
+            )
+
+        input_shape = tensor_shape.TensorShape(input_shape)
+        self.last_dim = tensor_shape.dimension_value(input_shape[-1])
+        sqrt_dim = self.last_dim ** (1 / 2)
+        if self.last_dim is None:
+            raise ValueError(
+                "The last dimension of the inputs to `Dense` "
+                "should be defined. Found `None`."
+            )
+        self.input_spec = InputSpec(min_ndim=2, axes={-1: self.last_dim})
+
+        self.σ_init = initializers.Constant(value=0.5 / sqrt_dim)
+        self.µ_init = initializers.RandomUniform(
+            minval=-1 / sqrt_dim, maxval=1 / sqrt_dim
+        )
+
+        # Learnable parameters
+        # Agent will learn to decay σ as it improves creating a sort of learned epsilon decay
+        self.σ_kernel = self.add_weight(
+            "σ_kernel",
+            shape=[self.last_dim, self.units],
+            initializer=self.σ_init,
+            regularizer=self.kernel_regularizer,
+            constraint=self.kernel_constraint,
+            dtype=self.dtype,
+            trainable=True,
+        )
+
+        self.µ_kernel = self.add_weight(
+            "µ_kernel",
+            shape=[self.last_dim, self.units],
+            initializer=self.µ_init,
+            regularizer=self.kernel_regularizer,
+            constraint=self.kernel_constraint,
+            dtype=self.dtype,
+            trainable=True,
+        )
+
+        if self.use_bias:
+            self.σ_bias = self.add_weight(
+                "σ_bias",
+                shape=[self.units,],
+                initializer=self.σ_init,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+                dtype=self.dtype,
+                trainable=True,
+            )
+
+            self.µ_bias = self.add_weight(
+                "µ_bias",
+                shape=[self.units,],
+                initializer=self.µ_init,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+                dtype=self.dtype,
+                trainable=True,
+            )
+
+        self.built = True
+
+    @staticmethod
+    def _scale_noise(x):
+        return tf.sign(x) * tf.sqrt(tf.abs(x))
+
+    def call(self, inputs):
+        dtype = self._compute_dtype_object
+        if inputs.dtype.base_dtype != dtype.base_dtype:
+            inputs = math_ops.cast(inputs, dtype=dtype)
+
+        # Fixed parameters added as the noise
+        ε_i = tf.random.normal([self.last_dim, self.units], dtype=dtype)
+        ε_j = tf.random.normal([self.units,], dtype=dtype)
+
+        # Creates the factorised Gaussian noise
+        f = NoisyDense._scale_noise
+        ε_kernel = f(ε_i) * f(ε_j)
+        ε_bias = f(ε_j)
+
+        # Performs: y = (µw + σw · εw)x + µb + σb · εb
+        # to calculate the output
+        kernel = self.µ_kernel + (self.σ_kernel * ε_kernel)
+        rank = inputs.shape.rank
+        if rank == 2 or rank is None:
+            if isinstance(inputs, sparse_tensor.SparseTensor):
+                outputs = sparse_ops.sparse_tensor_dense_matmul(inputs, kernel)
+            else:
+                outputs = gen_math_ops.mat_mul(inputs, kernel)
+        # Broadcast kernel to inputs.
+        else:
+            outputs = standard_ops.tensordot(inputs, kernel, [[rank - 1], [0]])
+            # Reshape the output back to the original ndim of the input.
+            if not context.executing_eagerly():
+                shape = inputs.shape.as_list()
+                output_shape = shape[:-1] + [kernel.shape[-1]]
+                outputs.set_shape(output_shape)
+
+        if self.use_bias:
+            noisy_bias = self.µ_bias + (self.σ_bias * ε_bias)
+            outputs = nn_ops.bias_add(outputs, noisy_bias)
+
+        if self.activation is not None:
+            outputs = self.activation(outputs)
+
+        return outputs
+
+    def compute_output_shape(self, input_shape):
+        input_shape = tensor_shape.TensorShape(input_shape)
+        input_shape = input_shape.with_rank_at_least(2)
+        if tensor_shape.dimension_value(input_shape[-1]) is None:
+            raise ValueError(
+                "The innermost dimension of input_shape must be defined, but saw: %s"
+                % input_shape
+            )
+        return input_shape[:-1].concatenate(self.units)
+
+    def get_config(self):
+        config = super(NoisyDense, self).get_config()
+        config.update(
+            {
+                "units": self.units,
+                "activation": activations.serialize(self.activation),
+                "kernel_regularizer": regularizers.serialize(self.kernel_regularizer),
+                "bias_regularizer": regularizers.serialize(self.bias_regularizer),
+                "activity_regularizer": regularizers.serialize(
+                    self.activity_regularizer
+                ),
+                "kernel_constraint": constraints.serialize(self.kernel_constraint),
+                "bias_constraint": constraints.serialize(self.bias_constraint),
+            }
+        )
+        return config

From 5c09b506070734d23a1ea355fa168b1adf8ed755 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Mon, 17 Aug 2020 21:16:24 -0700
Subject: [PATCH 11/44] Update noisy_dense_test.py

---
 .../layers/tests/noisy_dense_test.py          | 91 +++++++++++--------
 1 file changed, 51 insertions(+), 40 deletions(-)

diff --git a/tensorflow_addons/layers/tests/noisy_dense_test.py b/tensorflow_addons/layers/tests/noisy_dense_test.py
index 61afc3c94f..f0877a1238 100644
--- a/tensorflow_addons/layers/tests/noisy_dense_test.py
+++ b/tensorflow_addons/layers/tests/noisy_dense_test.py
@@ -21,56 +21,67 @@
 from tensorflow_addons.utils import test_utils
 from tensorflow_addons.layers.noisy_dense import NoisyDense
 from tensorflow.python.framework import ops
-from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.framework import tensor_spec
-from tensorflow.debugging import assert_equal
 from tensorflow.python.keras.mixed_precision.experimental import policy
+from tensorflow.python.framework import dtypes
 
-@pytest.mark.usefixtures("maybe_run_functions_eagerly")
-@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
-def test_noisy_dense(dtype):
-  test_utils.layer_test(
-      NoisyDense, kwargs={'units': 3, "dtype": dtype}, input_shape=(3, 2))
 
-  test_utils.layer_test(
-      NoisyDense, kwargs={'units': 3, "dtype": dtype}, input_shape=(3, 4, 2))
+def test_noisy_dense():
+    test_utils.layer_test(NoisyDense, kwargs={"units": 3}, input_shape=(3, 2))
+
+    test_utils.layer_test(NoisyDense, kwargs={"units": 3}, input_shape=(3, 4, 2))
+
+    test_utils.layer_test(NoisyDense, kwargs={"units": 3}, input_shape=(None, None, 2))
+
+    test_utils.layer_test(NoisyDense, kwargs={"units": 3}, input_shape=(3, 4, 5, 2))
 
-  test_utils.layer_test(
-      NoisyDense, kwargs={'units': 3, "dtype": dtype}, input_shape=(None, None, 2))
 
-  test_utils.layer_test(
-      NoisyDense, kwargs={'units': 3, "dtype": dtype}, input_shape=(3, 4, 5, 2))
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+@pytest.mark.parametrize("dtype", ["float16", "float32", "float64"])
+def test_noisy_dense_dtype(dtype):
+    inputs = ops.convert_to_tensor_v2(
+        np.random.randint(low=0, high=7, size=(2, 2)), dtype=dtype
+    )
+    layer = NoisyDense(5, dtype=dtype)
+    outputs = layer(inputs)
+    np.testing.assert_array_equal(outputs.dtype, dtype)
+
 
 @pytest.mark.usefixtures("maybe_run_functions_eagerly")
 def test_noisy_dense_with_policy():
-  inputs = ops.convert_to_tensor_v2(
-      np.random.randint(low=0, high=7, size=(2, 2)))
-  layer = NoisyDense(5, dtype=policy.Policy('mixed_float16'))
-  outputs = layer(inputs)
-  output_signature = layer.compute_output_signature(
-      tensor_spec.TensorSpec(dtype='float16', shape=(2, 2)))
-  assert_equal(output_signature.dtype, dtypes.float16)
-  assert_equal(output_signature.shape, (2, 5))
-  sassert_equal(outputs.dtype, 'float16')
-  assert_equal(layer.kernel.dtype, 'float32')
+    inputs = ops.convert_to_tensor_v2(np.random.randint(low=0, high=7, size=(2, 2)))
+    layer = NoisyDense(5, dtype=policy.Policy("mixed_float16"))
+    outputs = layer(inputs)
+    output_signature = layer.compute_output_signature(
+        tensor_spec.TensorSpec(dtype="float16", shape=(2, 2))
+    )
+    np.testing.assert_array_equal(output_signature.dtype, dtypes.float16)
+    np.testing.assert_array_equal(output_signature.shape, (2, 5))
+    np.testing.assert_array_equal(outputs.dtype, "float16")
+    np.testing.assert_array_equal(layer.µ_kernel.dtype, "float32")
+    np.testing.assert_array_equal(layer.σ_kernel.dtype, "float32")
+
 
 @pytest.mark.usefixtures("maybe_run_functions_eagerly")
-def test_noisy_dense_regularization(self):
-  layer = NoisyDense(
-      3,
-      kernel_regularizer=keras.regularizers.l1(0.01),
-      bias_regularizer='l1',
-      activity_regularizer='l2',
-      name='noisy_dense_reg')
-  layer(keras.backend.variable(np.ones((2, 4))))
-  assert_equal(3, len(layer.losses))
+def test_noisy_dense_regularization():
+    layer = NoisyDense(
+        3,
+        kernel_regularizer=keras.regularizers.l1(0.01),
+        bias_regularizer="l1",
+        activity_regularizer="l2",
+        name="noisy_dense_reg",
+    )
+    layer(keras.backend.variable(np.ones((2, 4))))
+    np.testing.assert_array_equal(5, len(layer.losses))
+
 
 @pytest.mark.usefixtures("maybe_run_functions_eagerly")
-def test_noisy_dense_constraints(self):
-  k_constraint = keras.constraints.max_norm(0.01)
-  b_constraint = keras.constraints.max_norm(0.01)
-  layer = NoisyDense(
-      3, kernel_constraint=k_constraint, bias_constraint=b_constraint)
-  layer(keras.backend.variable(np.ones((2, 4))))
-  assert_equal(layer.kernel.constraint, k_constraint)
-  assert_equal(layer.bias.constraint, b_constraint)
+def test_noisy_dense_constraints():
+    k_constraint = keras.constraints.max_norm(0.01)
+    b_constraint = keras.constraints.max_norm(0.01)
+    layer = NoisyDense(3, kernel_constraint=k_constraint, bias_constraint=b_constraint)
+    layer(keras.backend.variable(np.ones((2, 4))))
+    np.testing.assert_array_equal(layer.µ_kernel.constraint, k_constraint)
+    np.testing.assert_array_equal(layer.σ_kernel.constraint, k_constraint)
+    np.testing.assert_array_equal(layer.µ_bias.constraint, b_constraint)
+    np.testing.assert_array_equal(layer.σ_bias.constraint, b_constraint)

From 5e16eef4dc72454ddcd7d0ec4381a55857185f93 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sat, 22 Aug 2020 21:36:25 -0700
Subject: [PATCH 12/44] Fix compliance issues

---
 tensorflow_addons/layers/noisy_dense.py | 41 +++++++++++--------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 9eed42daa4..c64bc1ed10 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -22,17 +22,8 @@
     regularizers,
     constraints,
 )
-from tensorflow.python.ops import (
-    math_ops,
-    nn_ops,
-    sparse_ops,
-    gen_math_ops,
-    standard_ops,
-)
-from tensorflow.python.keras import backend as K
-from tensorflow.python.keras.engine.input_spec import InputSpec
-from tensorflow.python.framework import dtypes, tensor_shape, sparse_tensor
-from tensorflow.python.eager import context
+from tensorflow.keras import backend as K
+from tensorflow.keras.layers import InputSpec
 
 
 @tf.keras.utils.register_keras_serializable(package="Addons")
@@ -116,15 +107,15 @@ def __init__(
 
     def build(self, input_shape):
         # Make sure dtype is correct
-        dtype = dtypes.as_dtype(self.dtype or K.floatx())
+        dtype = tf.dtypes.as_dtype(self.dtype or K.floatx())
         if not (dtype.is_floating or dtype.is_complex):
             raise TypeError(
                 "Unable to build `Dense` layer with non-floating point "
                 "dtype %s" % (dtype,)
             )
 
-        input_shape = tensor_shape.TensorShape(input_shape)
-        self.last_dim = tensor_shape.dimension_value(input_shape[-1])
+        input_shape = tf.TensorShape(input_shape)
+        self.last_dim = tf.compat.dimension_value(input_shape[-1])
         sqrt_dim = self.last_dim ** (1 / 2)
         if self.last_dim is None:
             raise ValueError(
@@ -190,7 +181,7 @@ def _scale_noise(x):
     def call(self, inputs):
         dtype = self._compute_dtype_object
         if inputs.dtype.base_dtype != dtype.base_dtype:
-            inputs = math_ops.cast(inputs, dtype=dtype)
+            inputs = tf.cast(inputs, dtype=dtype)
 
         # Fixed parameters added as the noise
         ε_i = tf.random.normal([self.last_dim, self.units], dtype=dtype)
@@ -204,24 +195,28 @@ def call(self, inputs):
         # Performs: y = (µw + σw · εw)x + µb + σb · εb
         # to calculate the output
         kernel = self.µ_kernel + (self.σ_kernel * ε_kernel)
+
+        if inputs.dtype.base_dtype != dtype.base_dtype:
+            inputs = tf.cast(inputs, dtype=dtype)
+
         rank = inputs.shape.rank
         if rank == 2 or rank is None:
-            if isinstance(inputs, sparse_tensor.SparseTensor):
-                outputs = sparse_ops.sparse_tensor_dense_matmul(inputs, kernel)
+            if isinstance(inputs, tf.sparse.SparseTensor):
+                outputs = tf.sparse.sparse_dense_matmul(inputs, kernel)
             else:
-                outputs = gen_math_ops.mat_mul(inputs, kernel)
+                outputs = tf.linalg.matmul(inputs, kernel)
         # Broadcast kernel to inputs.
         else:
-            outputs = standard_ops.tensordot(inputs, kernel, [[rank - 1], [0]])
+            outputs = tf.tensordot(inputs, kernel, [[rank - 1], [0]])
             # Reshape the output back to the original ndim of the input.
-            if not context.executing_eagerly():
+            if not tf.executing_eagerly():
                 shape = inputs.shape.as_list()
                 output_shape = shape[:-1] + [kernel.shape[-1]]
                 outputs.set_shape(output_shape)
 
         if self.use_bias:
             noisy_bias = self.µ_bias + (self.σ_bias * ε_bias)
-            outputs = nn_ops.bias_add(outputs, noisy_bias)
+            outputs = tf.nn.bias_add(outputs, noisy_bias)
 
         if self.activation is not None:
             outputs = self.activation(outputs)
@@ -229,9 +224,9 @@ def call(self, inputs):
         return outputs
 
     def compute_output_shape(self, input_shape):
-        input_shape = tensor_shape.TensorShape(input_shape)
+        input_shape = tf.TensorShape(input_shape)
         input_shape = input_shape.with_rank_at_least(2)
-        if tensor_shape.dimension_value(input_shape[-1]) is None:
+        if tf.compat.dimension_value(input_shape[-1]) is None:
             raise ValueError(
                 "The innermost dimension of input_shape must be defined, but saw: %s"
                 % input_shape

From 4b14b8aac0a11473f30f5a272cc09b964a17a9b3 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sat, 22 Aug 2020 21:37:01 -0700
Subject: [PATCH 13/44] Fix compliance issues

---
 .../layers/tests/noisy_dense_test.py          | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/tensorflow_addons/layers/tests/noisy_dense_test.py b/tensorflow_addons/layers/tests/noisy_dense_test.py
index f0877a1238..4dfee42650 100644
--- a/tensorflow_addons/layers/tests/noisy_dense_test.py
+++ b/tensorflow_addons/layers/tests/noisy_dense_test.py
@@ -15,15 +15,14 @@
 """Tests NoisyDense layer."""
 
 
-import numpy as np
 import pytest
-from tensorflow.python import keras
+import numpy as np
+
+import tensorflow as tf
+from tensorflow import keras
 from tensorflow_addons.utils import test_utils
 from tensorflow_addons.layers.noisy_dense import NoisyDense
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.keras.mixed_precision.experimental import policy
-from tensorflow.python.framework import dtypes
+from tensorflow.keras.mixed_precision.experimental import Policy
 
 
 def test_noisy_dense():
@@ -39,7 +38,7 @@ def test_noisy_dense():
 @pytest.mark.usefixtures("maybe_run_functions_eagerly")
 @pytest.mark.parametrize("dtype", ["float16", "float32", "float64"])
 def test_noisy_dense_dtype(dtype):
-    inputs = ops.convert_to_tensor_v2(
+    inputs = tf.convert_to_tensor(
         np.random.randint(low=0, high=7, size=(2, 2)), dtype=dtype
     )
     layer = NoisyDense(5, dtype=dtype)
@@ -49,13 +48,13 @@ def test_noisy_dense_dtype(dtype):
 
 @pytest.mark.usefixtures("maybe_run_functions_eagerly")
 def test_noisy_dense_with_policy():
-    inputs = ops.convert_to_tensor_v2(np.random.randint(low=0, high=7, size=(2, 2)))
-    layer = NoisyDense(5, dtype=policy.Policy("mixed_float16"))
+    inputs = tf.convert_to_tensor(np.random.randint(low=0, high=7, size=(2, 2)))
+    layer = NoisyDense(5, dtype=Policy("mixed_float16"))
     outputs = layer(inputs)
     output_signature = layer.compute_output_signature(
-        tensor_spec.TensorSpec(dtype="float16", shape=(2, 2))
+        tf.TensorSpec(dtype="float16", shape=(2, 2))
     )
-    np.testing.assert_array_equal(output_signature.dtype, dtypes.float16)
+    np.testing.assert_array_equal(output_signature.dtype, tf.dtypes.float16)
     np.testing.assert_array_equal(output_signature.shape, (2, 5))
     np.testing.assert_array_equal(outputs.dtype, "float16")
     np.testing.assert_array_equal(layer.µ_kernel.dtype, "float32")

From 9e1f82fbdebf7b120d030fc1f2ef315ff5d84c64 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sun, 23 Aug 2020 11:07:57 -0700
Subject: [PATCH 14/44] Update comments

---
 tensorflow_addons/layers/noisy_dense.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index c64bc1ed10..17f81031a9 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -1,4 +1,4 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,9 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-# Orginal implementation from keras_contrib/layer/normalization
-# =============================================================================
+# ==============================================================================
 
 import tensorflow as tf
 from tensorflow.keras import (
@@ -28,8 +26,8 @@
 
 @tf.keras.utils.register_keras_serializable(package="Addons")
 class NoisyDense(tf.keras.layers.Layer):
-    """Like normal dense layer but random noisy is added to the weights matrix. But
-  as the network improves the random noise is decayed until it is insignificant.
+    """Like normal dense layer (https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/layers/core.py#L1067-L1233)
+  but random noisy is added to the weights matrix. But as the network improves the random noise is decayed until it is insignificant.
 
   A `NoisyDense` layer implements the operation:
   `output = activation(dot(input, µ_kernel + (σ_kernel * ε_kernel)) + bias)`

From 57ebf5dd50402b9d8ac446c71329b2ac163a13d3 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sun, 23 Aug 2020 11:08:23 -0700
Subject: [PATCH 15/44] Fix typo

---
 tensorflow_addons/layers/tests/noisy_dense_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow_addons/layers/tests/noisy_dense_test.py b/tensorflow_addons/layers/tests/noisy_dense_test.py
index 4dfee42650..c5d1ee2964 100644
--- a/tensorflow_addons/layers/tests/noisy_dense_test.py
+++ b/tensorflow_addons/layers/tests/noisy_dense_test.py
@@ -1,4 +1,4 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 009873b3652311fb2c9248d374a5db7e9c7b2301 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Thu, 27 Aug 2020 08:58:20 -0700
Subject: [PATCH 16/44] Update CODEOWNERS

---
 .github/CODEOWNERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 879f140855..396cfa58ab 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -105,6 +105,8 @@
 /tensorflow_addons/layers/tests/esn_test.py @pedrolarben
 /tensorflow_addons/layers/snake.py @failure-to-thrive
 /tensorflow_addons/layers/tests/snake_test.py @failure-to-thrive
+/tensorflow_addons/layers/noisy_dense.py @leonshams
+/tensorflow_addons/layers/noisy_dense_test.py @leonshams
 
 /tensorflow_addons/losses/contrastive.py @windqaq
 /tensorflow_addons/losses/tests/contrastive_test.py @windqaq

From 840ab1ca1a0267e88700db26b12e29d469fd6194 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Thu, 27 Aug 2020 09:13:13 -0700
Subject: [PATCH 17/44] Update CODEOWNERS

---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 396cfa58ab..2070520eb7 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -106,7 +106,7 @@
 /tensorflow_addons/layers/snake.py @failure-to-thrive
 /tensorflow_addons/layers/tests/snake_test.py @failure-to-thrive
 /tensorflow_addons/layers/noisy_dense.py @leonshams
-/tensorflow_addons/layers/noisy_dense_test.py @leonshams
+/tensorflow_addons/layers/tests/noisy_dense_test.py @leonshams
 
 /tensorflow_addons/losses/contrastive.py @windqaq
 /tensorflow_addons/losses/tests/contrastive_test.py @windqaq

From fa54c00d94b7d48918fe2eff2b27fca59cd75fe8 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sat, 29 Aug 2020 14:22:29 -0700
Subject: [PATCH 18/44] add use bias to config

---
 tensorflow_addons/layers/noisy_dense.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 17f81031a9..13964f9f06 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -1,4 +1,4 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
+# Orginal implementation from keras_contrib/layer/normalization
+# =============================================================================
 
 import tensorflow as tf
 from tensorflow.keras import (
@@ -26,8 +28,8 @@
 
 @tf.keras.utils.register_keras_serializable(package="Addons")
 class NoisyDense(tf.keras.layers.Layer):
-    """Like normal dense layer (https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/layers/core.py#L1067-L1233)
-  but random noisy is added to the weights matrix. But as the network improves the random noise is decayed until it is insignificant.
+    """Like normal dense layer but random noisy is added to the weights matrix. But
+  as the network improves the random noise is decayed until it is insignificant.
 
   A `NoisyDense` layer implements the operation:
   `output = activation(dot(input, µ_kernel + (σ_kernel * ε_kernel)) + bias)`
@@ -237,6 +239,7 @@ def get_config(self):
             {
                 "units": self.units,
                 "activation": activations.serialize(self.activation),
+                "use_bias": self.use_bias,
                 "kernel_regularizer": regularizers.serialize(self.kernel_regularizer),
                 "bias_regularizer": regularizers.serialize(self.bias_regularizer),
                 "activity_regularizer": regularizers.serialize(

From d87069a687068879eb93f064ab6bc9811a9f7723 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sat, 29 Aug 2020 19:52:01 -0700
Subject: [PATCH 19/44] Update noisy_dense.py


From 82e979f37cef4580d93894ed3822f3611f20e5fd Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sat, 29 Aug 2020 19:59:01 -0700
Subject: [PATCH 20/44] Update CODEOWNERS

---
 .github/CODEOWNERS | 435 +++++++++++++++++++++++++--------------------
 1 file changed, 245 insertions(+), 190 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 2070520eb7..b1e2245caa 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,190 +1,245 @@
-# TensorFlow Addons Codeowners
-
-#####################################################################################
-# Subpackage Owners
-
-/tensorflow_addons/activations/ @facaiy @seanpmorgan
-/tensorflow_addons/callbacks/ @shun-lin
-/tensorflow_addons/image/ @windqaq @facaiy
-/tensorflow_addons/layers/ @seanpmorgan @facaiy
-/tensorflow_addons/losses/ @facaiy @windqaq
-/tensorflow_addons/metrics/ @marload
-/tensorflow_addons/optimizers/ @facaiy @windqaq
-/tensorflow_addons/rnn/ @qlzh727
-/tensorflow_addons/seq2seq/ @qlzh727 @guillaumekln
-/tensorflow_addons/text/ @seanpmorgan @facaiy
-
-/tensorflow_addons/custom_ops/image/ @windqaq @facaiy
-/tensorflow_addons/custom_ops/seq2seq/ @qlzh727
-/tensorflow_addons/custom_ops/text/ @seanpmorgan @facaiy
-
-#####################################################################################
-# Submodule Owners
-# These will not always trigger a GitHub review because submodule owners do not
-# always have write access. However, a bot will notify them of the needed review.
-
-/tensorflow_addons/activations/gelu.py @aakashkumarnain @windqaq
-/tensorflow_addons/activations/tests/gelu_test.py @aakashkumarnain @windqaq
-/tensorflow_addons/activations/hardshrink.py @windqaq
-/tensorflow_addons/activations/tests/hardshrink_test.py @windqaq
-/tensorflow_addons/activations/lisht.py @windqaq
-/tensorflow_addons/activations/tests/lisht_test.py @windqaq
-/tensorflow_addons/activations/mish.py @windqaq @digantamisra98
-/tensorflow_addons/activations/tests/mish_test.py @windqaq @digantamisra98
-/tensorflow_addons/activations/rrelu.py @fsx950223
-/tensorflow_addons/activations/tests/rrelu_test.py @fsx950223
-/tensorflow_addons/activations/softshrink.py @windqaq
-/tensorflow_addons/activations/tests/softshrink_test.py @windqaq
-/tensorflow_addons/activations/sparsemax.py @andreasmadsen
-/tensorflow_addons/activations/tests/sparsemax_test.py @andreasmadsen
-/tensorflow_addons/activations/tanhshrink.py @fsx950223
-/tensorflow_addons/activations/tests/tanhshrink_test.py @fsx950223
-/tensorflow_addons/activations/snake.py @failure-to-thrive
-/tensorflow_addons/activations/tests/snake_test.py @failure-to-thrive
-
-/tensorflow_addons/callbacks/average_model_checkpoint.py @squadrick
-/tensorflow_addons/callbacks/time_stopping.py @shun-lin
-/tensorflow_addons/callbacks/tests/time_stopping_test.py @shun-lin
-/tensorflow_addons/callbacks/tqdm_progress_bar.py @shun-lin
-/tensorflow_addons/callbacks/tests/tqdm_progress_bar_test.py @shun-lin
-
-/tensorflow_addons/image/color_ops.py @abhichou4
-/tensorflow_addons/image/tests/color_ops_test.py @abhichou4
-/tensorflow_addons/image/connected_components.py @sayoojbk
-/tensorflow_addons/image/tests/connected_components_test.py @sayoojbk
-/tensorflow_addons/image/cutout_ops.py @fsx950223
-/tensorflow_addons/image/tests/cutout_ops_test.py @fsx950223
-/tensorflow_addons/image/dense_image_warp.py @windQAQ
-/tensorflow_addons/image/tests/dense_image_warp_test.py @windQAQ
-/tensorflow_addons/image/distance_transform.py @mels630
-/tensorflow_addons/image/tests/distance_transform_test.py @mels630
-/tensorflow_addons/image/distort_image_ops.py @windqaq
-/tensorflow_addons/image/tests/distort_image_ops_test.py @windqaq
-/tensorflow_addons/image/filters.py @mainak431 @ghosalsattam
-/tensorflow_addons/image/tests/filters_test.py @mainak431 @ghosalsattam
-/tensorflow_addons/image/interpolate_spline.py
-/tensorflow_addons/image/tests/interpolate_spline_test.py
-/tensorflow_addons/image/resampler_ops.py @autoih
-/tensorflow_addons/image/tests/resampler_ops_test.py @autoih
-/tensorflow_addons/image/sparse_image_warp.py
-/tensorflow_addons/image/tests/sparse_image_warp_test.py
-/tensorflow_addons/image/transform_ops.py @mels630
-/tensorflow_addons/image/tests/transform_ops_test.py @mels630
-/tensorflow_addons/image/translate_ops.py @sayoojbk
-/tensorflow_addons/image/tests/translate_ops_test.py @sayoojbk
-
-/tensorflow_addons/layers/adaptive_pooling.py @Susmit-A
-/tensorflow_addons/layers/tests/adaptive_pooling_test.py @Susmit-A
-/tensorflow_addons/layers/gelu.py @aakashkumarnain
-/tensorflow_addons/layers/tests/gelu_test.py @aakashkumarnain
-/tensorflow_addons/layers/maxout.py @failure-to-thrive
-/tensorflow_addons/layers/tests/maxout_test.py @failure-to-thrive
-/tensorflow_addons/layers/multihead_attention.py @cgarciae
-/tensorflow_addons/layers/tests/multihead_attention_test.py @cgarciae
-/tensorflow_addons/layers/netvlad.py @joel-shor
-/tensorflow_addons/layers/tests/netvlad_test.py @joel-shor
-/tensorflow_addons/layers/normalizations.py @smokrow
-/tensorflow_addons/layers/tests/normalizations_test.py @smokrow
-/tensorflow_addons/layers/optical_flow.py @failure-to-thrive
-/tensorflow_addons/layers/tests/optical_flow_test.py @failure-to-thrive
-/tensorflow_addons/layers/poincare.py @rahulunair
-/tensorflow_addons/layers/tests/poincare_test.py @rahulunair
-/tensorflow_addons/layers/polynomial.py @tanzhenyu
-/tensorflow_addons/layers/tests/polynomial_test.py @tanzhenyu
-/tensorflow_addons/layers/sparsemax.py @andreasmadsen
-/tensorflow_addons/layers/tests/sparsemax_test.py @andreasmadsen
-/tensorflow_addons/layers/spectral_normalization.py @charlielito
-/tensorflow_addons/layers/tests/spectral_normalization_test.py @charlielito
-/tensorflow_addons/layers/spatial_pyramid_pooling.py @Susmit-A
-/tensorflow_addons/layers/tests/spatial_pyramid_pooling_test.py @Susmit-A
-/tensorflow_addons/layers/tlu.py @aakashkumarnain
-/tensorflow_addons/layers/tests/tlu_test.py @aakashkumarnain
-/tensorflow_addons/layers/wrappers.py @seanpmorgan
-/tensorflow_addons/layers/tests/wrappers_test.py @seanpmorgan
-/tensorflow_addons/layers/esn.py @pedrolarben
-/tensorflow_addons/layers/tests/esn_test.py @pedrolarben
-/tensorflow_addons/layers/snake.py @failure-to-thrive
-/tensorflow_addons/layers/tests/snake_test.py @failure-to-thrive
-/tensorflow_addons/layers/noisy_dense.py @leonshams
-/tensorflow_addons/layers/tests/noisy_dense_test.py @leonshams
-
-/tensorflow_addons/losses/contrastive.py @windqaq
-/tensorflow_addons/losses/tests/contrastive_test.py @windqaq
-/tensorflow_addons/losses/focal_loss.py @aakashkumarnain @ssaishruthi
-/tensorflow_addons/losses/tests/focal_loss_test.py @aakashkumarnain @ssaishruthi
-/tensorflow_addons/losses/giou_loss.py @fsx950223
-/tensorflow_addons/losses/tests/giou_loss_test.py @fsx950223
-/tensorflow_addons/losses/lifted.py @rahulunair
-/tensorflow_addons/losses/tests/lifted_test.py @rahulunair
-/tensorflow_addons/losses/metric_learning.py
-/tensorflow_addons/losses/npairs.py @windqaq
-/tensorflow_addons/losses/tests/npairs_test.py @windqaq
-/tensorflow_addons/losses/quantiles.py @romainbrault
-/tensorflow_addons/losses/tests/quantiles_test.py @romainbrault
-/tensorflow_addons/losses/sparsemax_loss.py @andreasmadsen
-/tensorflow_addons/losses/tests/sparsemax_loss_test.py @andreasmadsen
-/tensorflow_addons/losses/triplet.py @lc0
-/tensorflow_addons/losses/tests/triplet_test.py @lc0
-/tensorflow_addons/losses/kappa_loss.py @wenmin-wu
-/tensorflow_addons/losses/tests/kappa_loss_test.py @wenmin-wu
-
-/tensorflow_addons/metrics/cohens_kappa.py @aakashkumarnain
-/tensorflow_addons/metrics/tests/cohens_kappa_test.py @aakashkumarnain
-/tensorflow_addons/metrics/f_scores.py @ssaishruthi @marload
-/tensorflow_addons/metrics/tests/f_scores_test.py @ssaishruthi @marload
-/tensorflow_addons/metrics/hamming.py @ssaishruthi
-/tensorflow_addons/metrics/tests/hamming_test.py @ssaishruthi
-/tensorflow_addons/metrics/matthews_correlation_coefficient.py @autoih @marload
-/tensorflow_addons/metrics/tests/matthews_correlation_coefficient_test.py @autoih @marload
-/tensorflow_addons/metrics/multilabel_confusion_matrix.py @ssaishruthi
-/tensorflow_addons/metrics/tests/multilabel_confusion_matrix_test.py @ssaishruthi
-/tensorflow_addons/metrics/r_square.py @ssaishruthi @marload
-/tensorflow_addons/metrics/tests/r_square_test.py @ssaishruthi @marload
-/tensorflow_addons/metrics/geometric_mean.py @marload
-/tensorflow_addons/metrics/tests/geometric_mean_test.py @marload
-
-/tensorflow_addons/optimizers/average_wrapper.py @squadrick
-/tensorflow_addons/optimizers/conditional_gradient.py @pkan2 @lokhande-vishnu
-/tensorflow_addons/optimizers/tests/conditional_gradient_test.py @pkan2 @lokhande-vishnu
-/tensorflow_addons/optimizers/cyclical_learning_rate.py @raphaelmeudec
-/tensorflow_addons/optimizers/tests/cyclical_learning_rate_test.py @raphaelmeudec
-/tensorflow_addons/optimizers/lamb.py @junjiek
-/tensorflow_addons/optimizers/tests/lamb_test.py @junjiek
-/tensorflow_addons/optimizers/lazy_adam.py @ssaishruthi
-/tensorflow_addons/optimizers/tests/lazy_adam_test.py @ssaishruthi
-/tensorflow_addons/optimizers/lookahead.py @cyberzhg
-/tensorflow_addons/optimizers/tests/lookahead_test.py @cyberzhg
-/tensorflow_addons/optimizers/moving_average.py @squadrick
-/tensorflow_addons/optimizers/tests/moving_average_test.py @squadrick
-/tensorflow_addons/optimizers/novograd.py @shreyashpatodia
-/tensorflow_addons/optimizers/tests/novograd_test.py @shreyashpatodia
-/tensorflow_addons/optimizers/proximal_adagrad.py @WindQAQ
-/tensorflow_addons/optimizers/tests/proximal_adagrad_test.py @WindQAQ
-/tensorflow_addons/optimizers/rectified_adam.py @cyberzhg
-/tensorflow_addons/optimizers/tests/rectified_adam_test.py @cyberzhg
-/tensorflow_addons/optimizers/stochastic_weight_averaging.py @shreyashpatodia
-/tensorflow_addons/optimizers/tests/stochastic_weight_averaging_test.py @shreyashpatodia
-/tensorflow_addons/optimizers/weight_decay_optimizers.py @philjd
-/tensorflow_addons/optimizers/tests/weight_decay_optimizers_test.py @philjd
-/tensorflow_addons/optimizers/yogi.py @manzilz
-/tensorflow_addons/optimizers/tests/yogi_test.py @manzilz
-
-/tensorflow_addons/rnn/esn_cell.py @pedrolarben
-/tensorflow_addons/rnn/tests/esn_cell_test.py @pedrolarben
-/tensorflow_addons/rnn/layer_norm_lstm_cell.py @qlzh727
-/tensorflow_addons/rnn/tests/layer_norm_lstm_cell_test.py @qlzh727
-/tensorflow_addons/rnn/layer_norm_simple_rnn_cell.py @qlzh727
-/tensorflow_addons/rnn/tests/layer_norm_simple_rnn_cell_test.py @qlzh727
-/tensorflow_addons/rnn/nas_cell.py @qlzh727
-/tensorflow_addons/rnn/tests/nas_cell_test.py @qlzh727
-/tensorflow_addons/rnn/peephole_lstm_cell.py @qlzh727
-/tensorflow_addons/rnn/tests/peephole_lstm_cell_test.py @qlzh727
-
-/tensorflow_addons/seq2seq/ @qlzh727 @guillaumekln
-
-/tensorflow_addons/text/crf.py @squadrick
-/tensorflow_addons/text/tests/crf_test.py @squadrick
-/tensorflow_addons/text/parse_time_op.py @helinwang
-/tensorflow_addons/text/tests/parse_time_op_test.py @helinwang
-/tensorflow_addons/text/skip_gram_ops.py @rahulunair
-/tensorflow_addons/text/tests/skip_gram_ops_test.py @rahulunair
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import tensorflow as tf
+from tensorflow.keras import (
+    activations,
+    initializers,
+    regularizers,
+    constraints,
+)
+from tensorflow.keras import backend as K
+from tensorflow.keras.layers import InputSpec
+
+
+@tf.keras.utils.register_keras_serializable(package="Addons")
+class NoisyDense(tf.keras.layers.Layer):
+    """Like normal dense layer (https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/layers/core.py#L1067-L1233)
+  but random noisy is added to the weights matrix. But as the network improves the random noise is decayed until it is insignificant.
+  A `NoisyDense` layer implements the operation:
+  `output = activation(dot(input, µ_kernel + (σ_kernel * ε_kernel)) + bias)`
+  where `activation` is the element-wise activation function
+  passed as the `activation` argument, `µ_kernel` is your average weights matrix
+  created by the layer, σ_kernel is a weights matrix that controls the importance of
+  the ε_kernel which is just random noise, and `bias` is a bias vector created by the layer
+  (only applicable if `use_bias` is `True`).
+  Example:
+  >>> # Create a `Sequential` model and add a Dense layer as the first layer.
+  >>> model = tf.keras.models.Sequential()
+  >>> model.add(tf.keras.Input(shape=(16,)))
+  >>> model.add(NoisyDense(32, activation='relu'))
+  >>> # Now the model will take as input arrays of shape (None, 16)
+  >>> # and output arrays of shape (None, 32).
+  >>> # Note that after the first layer, you don't need to specify
+  >>> # the size of the input anymore:
+  >>> model.add(NoisyDense(32))
+  >>> model.output_shape
+  (None, 32)
+  Arguments:
+    units: Positive integer, dimensionality of the output space.
+    activation: Activation function to use.
+      If you don't specify anything, no activation is applied
+      (ie. "linear" activation: `a(x) = x`).
+    use_bias: Boolean, whether the layer uses a bias vector.
+    kernel_regularizer: Regularizer function applied to
+      the `kernel` weights matrix.
+    bias_regularizer: Regularizer function applied to the bias vector.
+    activity_regularizer: Regularizer function applied to
+      the output of the layer (its "activation").
+    kernel_constraint: Constraint function applied to
+      the `kernel` weights matrix.
+    bias_constraint: Constraint function applied to the bias vector.
+  Input shape:
+    N-D tensor with shape: `(batch_size, ..., input_dim)`.
+    The most common situation would be
+    a 2D input with shape `(batch_size, input_dim)`.
+  Output shape:
+    N-D tensor with shape: `(batch_size, ..., units)`.
+    For instance, for a 2D input with shape `(batch_size, input_dim)`,
+    the output would have shape `(batch_size, units)`.
+  """
+
+    def __init__(
+        self,
+        units,
+        activation=None,
+        use_bias=True,
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        **kwargs
+    ):
+        super(NoisyDense, self).__init__(
+            activity_regularizer=activity_regularizer, **kwargs
+        )
+
+        self.units = int(units) if not isinstance(units, int) else units
+        self.activation = activations.get(activation)
+        self.use_bias = use_bias
+        self.kernel_regularizer = regularizers.get(kernel_regularizer)
+        self.bias_regularizer = regularizers.get(bias_regularizer)
+        self.kernel_constraint = constraints.get(kernel_constraint)
+        self.bias_constraint = constraints.get(bias_constraint)
+
+        self.input_spec = InputSpec(min_ndim=2)
+        self.supports_masking = True
+
+    def build(self, input_shape):
+        # Make sure dtype is correct
+        dtype = tf.dtypes.as_dtype(self.dtype or K.floatx())
+        if not (dtype.is_floating or dtype.is_complex):
+            raise TypeError(
+                "Unable to build `Dense` layer with non-floating point "
+                "dtype %s" % (dtype,)
+            )
+
+        input_shape = tf.TensorShape(input_shape)
+        self.last_dim = tf.compat.dimension_value(input_shape[-1])
+        sqrt_dim = self.last_dim ** (1 / 2)
+        if self.last_dim is None:
+            raise ValueError(
+                "The last dimension of the inputs to `Dense` "
+                "should be defined. Found `None`."
+            )
+        self.input_spec = InputSpec(min_ndim=2, axes={-1: self.last_dim})
+
+        self.σ_init = initializers.Constant(value=0.5 / sqrt_dim)
+        self.µ_init = initializers.RandomUniform(
+            minval=-1 / sqrt_dim, maxval=1 / sqrt_dim
+        )
+
+        # Learnable parameters
+        # Agent will learn to decay σ as it improves creating a sort of learned epsilon decay
+        self.σ_kernel = self.add_weight(
+            "σ_kernel",
+            shape=[self.last_dim, self.units],
+            initializer=self.σ_init,
+            regularizer=self.kernel_regularizer,
+            constraint=self.kernel_constraint,
+            dtype=self.dtype,
+            trainable=True,
+        )
+
+        self.µ_kernel = self.add_weight(
+            "µ_kernel",
+            shape=[self.last_dim, self.units],
+            initializer=self.µ_init,
+            regularizer=self.kernel_regularizer,
+            constraint=self.kernel_constraint,
+            dtype=self.dtype,
+            trainable=True,
+        )
+
+        if self.use_bias:
+            self.σ_bias = self.add_weight(
+                "σ_bias",
+                shape=[self.units,],
+                initializer=self.σ_init,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+                dtype=self.dtype,
+                trainable=True,
+            )
+
+            self.µ_bias = self.add_weight(
+                "µ_bias",
+                shape=[self.units,],
+                initializer=self.µ_init,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+                dtype=self.dtype,
+                trainable=True,
+            )
+
+        self.built = True
+
+    @staticmethod
+    def _scale_noise(x):
+        return tf.sign(x) * tf.sqrt(tf.abs(x))
+
+    def call(self, inputs):
+        dtype = self._compute_dtype_object
+        if inputs.dtype.base_dtype != dtype.base_dtype:
+            inputs = tf.cast(inputs, dtype=dtype)
+
+        # Fixed parameters added as the noise
+        ε_i = tf.random.normal([self.last_dim, self.units], dtype=dtype)
+        ε_j = tf.random.normal([self.units,], dtype=dtype)
+
+        # Creates the factorised Gaussian noise
+        f = NoisyDense._scale_noise
+        ε_kernel = f(ε_i) * f(ε_j)
+        ε_bias = f(ε_j)
+
+        # Performs: y = (µw + σw · εw)x + µb + σb · εb
+        # to calculate the output
+        kernel = self.µ_kernel + (self.σ_kernel * ε_kernel)
+
+        if inputs.dtype.base_dtype != dtype.base_dtype:
+            inputs = tf.cast(inputs, dtype=dtype)
+
+        rank = inputs.shape.rank
+        if rank == 2 or rank is None:
+            if isinstance(inputs, tf.sparse.SparseTensor):
+                outputs = tf.sparse.sparse_dense_matmul(inputs, kernel)
+            else:
+                outputs = tf.linalg.matmul(inputs, kernel)
+        # Broadcast kernel to inputs.
+        else:
+            outputs = tf.tensordot(inputs, kernel, [[rank - 1], [0]])
+            # Reshape the output back to the original ndim of the input.
+            if not tf.executing_eagerly():
+                shape = inputs.shape.as_list()
+                output_shape = shape[:-1] + [kernel.shape[-1]]
+                outputs.set_shape(output_shape)
+
+        if self.use_bias:
+            noisy_bias = self.µ_bias + (self.σ_bias * ε_bias)
+            outputs = tf.nn.bias_add(outputs, noisy_bias)
+
+        if self.activation is not None:
+            outputs = self.activation(outputs)
+
+        return outputs
+
+    def compute_output_shape(self, input_shape):
+        input_shape = tf.TensorShape(input_shape)
+        input_shape = input_shape.with_rank_at_least(2)
+        if tf.compat.dimension_value(input_shape[-1]) is None:
+            raise ValueError(
+                "The innermost dimension of input_shape must be defined, but saw: %s"
+                % input_shape
+            )
+        return input_shape[:-1].concatenate(self.units)
+
+    def get_config(self):
+        config = super(NoisyDense, self).get_config()
+        config.update(
+            {
+                "units": self.units,
+                "activation": activations.serialize(self.activation),
+                "use_bias": self.use_bias,
+                "kernel_regularizer": regularizers.serialize(self.kernel_regularizer),
+                "bias_regularizer": regularizers.serialize(self.bias_regularizer),
+                "activity_regularizer": regularizers.serialize(
+                    self.activity_regularizer
+                ),
+                "kernel_constraint": constraints.serialize(self.kernel_constraint),
+                "bias_constraint": constraints.serialize(self.bias_constraint),
+            }
+        )
+        return config

From 48d3c57382271576e9c7e8acbb6cb6e9fb287064 Mon Sep 17 00:00:00 2001
From: schaall <52867365+schaall@users.noreply.github.com>
Date: Sat, 29 Aug 2020 20:06:27 -0700
Subject: [PATCH 21/44] Revert "Update CODEOWNERS"

This reverts commit 82e979f37cef4580d93894ed3822f3611f20e5fd.
---
 .github/CODEOWNERS | 435 ++++++++++++++++++++-------------------------
 1 file changed, 190 insertions(+), 245 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index b1e2245caa..2070520eb7 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,245 +1,190 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-import tensorflow as tf
-from tensorflow.keras import (
-    activations,
-    initializers,
-    regularizers,
-    constraints,
-)
-from tensorflow.keras import backend as K
-from tensorflow.keras.layers import InputSpec
-
-
-@tf.keras.utils.register_keras_serializable(package="Addons")
-class NoisyDense(tf.keras.layers.Layer):
-    """Like normal dense layer (https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/layers/core.py#L1067-L1233)
-  but random noisy is added to the weights matrix. But as the network improves the random noise is decayed until it is insignificant.
-  A `NoisyDense` layer implements the operation:
-  `output = activation(dot(input, µ_kernel + (σ_kernel * ε_kernel)) + bias)`
-  where `activation` is the element-wise activation function
-  passed as the `activation` argument, `µ_kernel` is your average weights matrix
-  created by the layer, σ_kernel is a weights matrix that controls the importance of
-  the ε_kernel which is just random noise, and `bias` is a bias vector created by the layer
-  (only applicable if `use_bias` is `True`).
-  Example:
-  >>> # Create a `Sequential` model and add a Dense layer as the first layer.
-  >>> model = tf.keras.models.Sequential()
-  >>> model.add(tf.keras.Input(shape=(16,)))
-  >>> model.add(NoisyDense(32, activation='relu'))
-  >>> # Now the model will take as input arrays of shape (None, 16)
-  >>> # and output arrays of shape (None, 32).
-  >>> # Note that after the first layer, you don't need to specify
-  >>> # the size of the input anymore:
-  >>> model.add(NoisyDense(32))
-  >>> model.output_shape
-  (None, 32)
-  Arguments:
-    units: Positive integer, dimensionality of the output space.
-    activation: Activation function to use.
-      If you don't specify anything, no activation is applied
-      (ie. "linear" activation: `a(x) = x`).
-    use_bias: Boolean, whether the layer uses a bias vector.
-    kernel_regularizer: Regularizer function applied to
-      the `kernel` weights matrix.
-    bias_regularizer: Regularizer function applied to the bias vector.
-    activity_regularizer: Regularizer function applied to
-      the output of the layer (its "activation").
-    kernel_constraint: Constraint function applied to
-      the `kernel` weights matrix.
-    bias_constraint: Constraint function applied to the bias vector.
-  Input shape:
-    N-D tensor with shape: `(batch_size, ..., input_dim)`.
-    The most common situation would be
-    a 2D input with shape `(batch_size, input_dim)`.
-  Output shape:
-    N-D tensor with shape: `(batch_size, ..., units)`.
-    For instance, for a 2D input with shape `(batch_size, input_dim)`,
-    the output would have shape `(batch_size, units)`.
-  """
-
-    def __init__(
-        self,
-        units,
-        activation=None,
-        use_bias=True,
-        kernel_regularizer=None,
-        bias_regularizer=None,
-        activity_regularizer=None,
-        kernel_constraint=None,
-        bias_constraint=None,
-        **kwargs
-    ):
-        super(NoisyDense, self).__init__(
-            activity_regularizer=activity_regularizer, **kwargs
-        )
-
-        self.units = int(units) if not isinstance(units, int) else units
-        self.activation = activations.get(activation)
-        self.use_bias = use_bias
-        self.kernel_regularizer = regularizers.get(kernel_regularizer)
-        self.bias_regularizer = regularizers.get(bias_regularizer)
-        self.kernel_constraint = constraints.get(kernel_constraint)
-        self.bias_constraint = constraints.get(bias_constraint)
-
-        self.input_spec = InputSpec(min_ndim=2)
-        self.supports_masking = True
-
-    def build(self, input_shape):
-        # Make sure dtype is correct
-        dtype = tf.dtypes.as_dtype(self.dtype or K.floatx())
-        if not (dtype.is_floating or dtype.is_complex):
-            raise TypeError(
-                "Unable to build `Dense` layer with non-floating point "
-                "dtype %s" % (dtype,)
-            )
-
-        input_shape = tf.TensorShape(input_shape)
-        self.last_dim = tf.compat.dimension_value(input_shape[-1])
-        sqrt_dim = self.last_dim ** (1 / 2)
-        if self.last_dim is None:
-            raise ValueError(
-                "The last dimension of the inputs to `Dense` "
-                "should be defined. Found `None`."
-            )
-        self.input_spec = InputSpec(min_ndim=2, axes={-1: self.last_dim})
-
-        self.σ_init = initializers.Constant(value=0.5 / sqrt_dim)
-        self.µ_init = initializers.RandomUniform(
-            minval=-1 / sqrt_dim, maxval=1 / sqrt_dim
-        )
-
-        # Learnable parameters
-        # Agent will learn to decay σ as it improves creating a sort of learned epsilon decay
-        self.σ_kernel = self.add_weight(
-            "σ_kernel",
-            shape=[self.last_dim, self.units],
-            initializer=self.σ_init,
-            regularizer=self.kernel_regularizer,
-            constraint=self.kernel_constraint,
-            dtype=self.dtype,
-            trainable=True,
-        )
-
-        self.µ_kernel = self.add_weight(
-            "µ_kernel",
-            shape=[self.last_dim, self.units],
-            initializer=self.µ_init,
-            regularizer=self.kernel_regularizer,
-            constraint=self.kernel_constraint,
-            dtype=self.dtype,
-            trainable=True,
-        )
-
-        if self.use_bias:
-            self.σ_bias = self.add_weight(
-                "σ_bias",
-                shape=[self.units,],
-                initializer=self.σ_init,
-                regularizer=self.bias_regularizer,
-                constraint=self.bias_constraint,
-                dtype=self.dtype,
-                trainable=True,
-            )
-
-            self.µ_bias = self.add_weight(
-                "µ_bias",
-                shape=[self.units,],
-                initializer=self.µ_init,
-                regularizer=self.bias_regularizer,
-                constraint=self.bias_constraint,
-                dtype=self.dtype,
-                trainable=True,
-            )
-
-        self.built = True
-
-    @staticmethod
-    def _scale_noise(x):
-        return tf.sign(x) * tf.sqrt(tf.abs(x))
-
-    def call(self, inputs):
-        dtype = self._compute_dtype_object
-        if inputs.dtype.base_dtype != dtype.base_dtype:
-            inputs = tf.cast(inputs, dtype=dtype)
-
-        # Fixed parameters added as the noise
-        ε_i = tf.random.normal([self.last_dim, self.units], dtype=dtype)
-        ε_j = tf.random.normal([self.units,], dtype=dtype)
-
-        # Creates the factorised Gaussian noise
-        f = NoisyDense._scale_noise
-        ε_kernel = f(ε_i) * f(ε_j)
-        ε_bias = f(ε_j)
-
-        # Performs: y = (µw + σw · εw)x + µb + σb · εb
-        # to calculate the output
-        kernel = self.µ_kernel + (self.σ_kernel * ε_kernel)
-
-        if inputs.dtype.base_dtype != dtype.base_dtype:
-            inputs = tf.cast(inputs, dtype=dtype)
-
-        rank = inputs.shape.rank
-        if rank == 2 or rank is None:
-            if isinstance(inputs, tf.sparse.SparseTensor):
-                outputs = tf.sparse.sparse_dense_matmul(inputs, kernel)
-            else:
-                outputs = tf.linalg.matmul(inputs, kernel)
-        # Broadcast kernel to inputs.
-        else:
-            outputs = tf.tensordot(inputs, kernel, [[rank - 1], [0]])
-            # Reshape the output back to the original ndim of the input.
-            if not tf.executing_eagerly():
-                shape = inputs.shape.as_list()
-                output_shape = shape[:-1] + [kernel.shape[-1]]
-                outputs.set_shape(output_shape)
-
-        if self.use_bias:
-            noisy_bias = self.µ_bias + (self.σ_bias * ε_bias)
-            outputs = tf.nn.bias_add(outputs, noisy_bias)
-
-        if self.activation is not None:
-            outputs = self.activation(outputs)
-
-        return outputs
-
-    def compute_output_shape(self, input_shape):
-        input_shape = tf.TensorShape(input_shape)
-        input_shape = input_shape.with_rank_at_least(2)
-        if tf.compat.dimension_value(input_shape[-1]) is None:
-            raise ValueError(
-                "The innermost dimension of input_shape must be defined, but saw: %s"
-                % input_shape
-            )
-        return input_shape[:-1].concatenate(self.units)
-
-    def get_config(self):
-        config = super(NoisyDense, self).get_config()
-        config.update(
-            {
-                "units": self.units,
-                "activation": activations.serialize(self.activation),
-                "use_bias": self.use_bias,
-                "kernel_regularizer": regularizers.serialize(self.kernel_regularizer),
-                "bias_regularizer": regularizers.serialize(self.bias_regularizer),
-                "activity_regularizer": regularizers.serialize(
-                    self.activity_regularizer
-                ),
-                "kernel_constraint": constraints.serialize(self.kernel_constraint),
-                "bias_constraint": constraints.serialize(self.bias_constraint),
-            }
-        )
-        return config
+# TensorFlow Addons Codeowners
+
+#####################################################################################
+# Subpackage Owners
+
+/tensorflow_addons/activations/ @facaiy @seanpmorgan
+/tensorflow_addons/callbacks/ @shun-lin
+/tensorflow_addons/image/ @windqaq @facaiy
+/tensorflow_addons/layers/ @seanpmorgan @facaiy
+/tensorflow_addons/losses/ @facaiy @windqaq
+/tensorflow_addons/metrics/ @marload
+/tensorflow_addons/optimizers/ @facaiy @windqaq
+/tensorflow_addons/rnn/ @qlzh727
+/tensorflow_addons/seq2seq/ @qlzh727 @guillaumekln
+/tensorflow_addons/text/ @seanpmorgan @facaiy
+
+/tensorflow_addons/custom_ops/image/ @windqaq @facaiy
+/tensorflow_addons/custom_ops/seq2seq/ @qlzh727
+/tensorflow_addons/custom_ops/text/ @seanpmorgan @facaiy
+
+#####################################################################################
+# Submodule Owners
+# These will not always trigger a GitHub review because submodule owners do not
+# always have write access. However, a bot will notify them of the needed review.
+
+/tensorflow_addons/activations/gelu.py @aakashkumarnain @windqaq
+/tensorflow_addons/activations/tests/gelu_test.py @aakashkumarnain @windqaq
+/tensorflow_addons/activations/hardshrink.py @windqaq
+/tensorflow_addons/activations/tests/hardshrink_test.py @windqaq
+/tensorflow_addons/activations/lisht.py @windqaq
+/tensorflow_addons/activations/tests/lisht_test.py @windqaq
+/tensorflow_addons/activations/mish.py @windqaq @digantamisra98
+/tensorflow_addons/activations/tests/mish_test.py @windqaq @digantamisra98
+/tensorflow_addons/activations/rrelu.py @fsx950223
+/tensorflow_addons/activations/tests/rrelu_test.py @fsx950223
+/tensorflow_addons/activations/softshrink.py @windqaq
+/tensorflow_addons/activations/tests/softshrink_test.py @windqaq
+/tensorflow_addons/activations/sparsemax.py @andreasmadsen
+/tensorflow_addons/activations/tests/sparsemax_test.py @andreasmadsen
+/tensorflow_addons/activations/tanhshrink.py @fsx950223
+/tensorflow_addons/activations/tests/tanhshrink_test.py @fsx950223
+/tensorflow_addons/activations/snake.py @failure-to-thrive
+/tensorflow_addons/activations/tests/snake_test.py @failure-to-thrive
+
+/tensorflow_addons/callbacks/average_model_checkpoint.py @squadrick
+/tensorflow_addons/callbacks/time_stopping.py @shun-lin
+/tensorflow_addons/callbacks/tests/time_stopping_test.py @shun-lin
+/tensorflow_addons/callbacks/tqdm_progress_bar.py @shun-lin
+/tensorflow_addons/callbacks/tests/tqdm_progress_bar_test.py @shun-lin
+
+/tensorflow_addons/image/color_ops.py @abhichou4
+/tensorflow_addons/image/tests/color_ops_test.py @abhichou4
+/tensorflow_addons/image/connected_components.py @sayoojbk
+/tensorflow_addons/image/tests/connected_components_test.py @sayoojbk
+/tensorflow_addons/image/cutout_ops.py @fsx950223
+/tensorflow_addons/image/tests/cutout_ops_test.py @fsx950223
+/tensorflow_addons/image/dense_image_warp.py @windQAQ
+/tensorflow_addons/image/tests/dense_image_warp_test.py @windQAQ
+/tensorflow_addons/image/distance_transform.py @mels630
+/tensorflow_addons/image/tests/distance_transform_test.py @mels630
+/tensorflow_addons/image/distort_image_ops.py @windqaq
+/tensorflow_addons/image/tests/distort_image_ops_test.py @windqaq
+/tensorflow_addons/image/filters.py @mainak431 @ghosalsattam
+/tensorflow_addons/image/tests/filters_test.py @mainak431 @ghosalsattam
+/tensorflow_addons/image/interpolate_spline.py
+/tensorflow_addons/image/tests/interpolate_spline_test.py
+/tensorflow_addons/image/resampler_ops.py @autoih
+/tensorflow_addons/image/tests/resampler_ops_test.py @autoih
+/tensorflow_addons/image/sparse_image_warp.py
+/tensorflow_addons/image/tests/sparse_image_warp_test.py
+/tensorflow_addons/image/transform_ops.py @mels630
+/tensorflow_addons/image/tests/transform_ops_test.py @mels630
+/tensorflow_addons/image/translate_ops.py @sayoojbk
+/tensorflow_addons/image/tests/translate_ops_test.py @sayoojbk
+
+/tensorflow_addons/layers/adaptive_pooling.py @Susmit-A
+/tensorflow_addons/layers/tests/adaptive_pooling_test.py @Susmit-A
+/tensorflow_addons/layers/gelu.py @aakashkumarnain
+/tensorflow_addons/layers/tests/gelu_test.py @aakashkumarnain
+/tensorflow_addons/layers/maxout.py @failure-to-thrive
+/tensorflow_addons/layers/tests/maxout_test.py @failure-to-thrive
+/tensorflow_addons/layers/multihead_attention.py @cgarciae
+/tensorflow_addons/layers/tests/multihead_attention_test.py @cgarciae
+/tensorflow_addons/layers/netvlad.py @joel-shor
+/tensorflow_addons/layers/tests/netvlad_test.py @joel-shor
+/tensorflow_addons/layers/normalizations.py @smokrow
+/tensorflow_addons/layers/tests/normalizations_test.py @smokrow
+/tensorflow_addons/layers/optical_flow.py @failure-to-thrive
+/tensorflow_addons/layers/tests/optical_flow_test.py @failure-to-thrive
+/tensorflow_addons/layers/poincare.py @rahulunair
+/tensorflow_addons/layers/tests/poincare_test.py @rahulunair
+/tensorflow_addons/layers/polynomial.py @tanzhenyu
+/tensorflow_addons/layers/tests/polynomial_test.py @tanzhenyu
+/tensorflow_addons/layers/sparsemax.py @andreasmadsen
+/tensorflow_addons/layers/tests/sparsemax_test.py @andreasmadsen
+/tensorflow_addons/layers/spectral_normalization.py @charlielito
+/tensorflow_addons/layers/tests/spectral_normalization_test.py @charlielito
+/tensorflow_addons/layers/spatial_pyramid_pooling.py @Susmit-A
+/tensorflow_addons/layers/tests/spatial_pyramid_pooling_test.py @Susmit-A
+/tensorflow_addons/layers/tlu.py @aakashkumarnain
+/tensorflow_addons/layers/tests/tlu_test.py @aakashkumarnain
+/tensorflow_addons/layers/wrappers.py @seanpmorgan
+/tensorflow_addons/layers/tests/wrappers_test.py @seanpmorgan
+/tensorflow_addons/layers/esn.py @pedrolarben
+/tensorflow_addons/layers/tests/esn_test.py @pedrolarben
+/tensorflow_addons/layers/snake.py @failure-to-thrive
+/tensorflow_addons/layers/tests/snake_test.py @failure-to-thrive
+/tensorflow_addons/layers/noisy_dense.py @leonshams
+/tensorflow_addons/layers/tests/noisy_dense_test.py @leonshams
+
+/tensorflow_addons/losses/contrastive.py @windqaq
+/tensorflow_addons/losses/tests/contrastive_test.py @windqaq
+/tensorflow_addons/losses/focal_loss.py @aakashkumarnain @ssaishruthi
+/tensorflow_addons/losses/tests/focal_loss_test.py @aakashkumarnain @ssaishruthi
+/tensorflow_addons/losses/giou_loss.py @fsx950223
+/tensorflow_addons/losses/tests/giou_loss_test.py @fsx950223
+/tensorflow_addons/losses/lifted.py @rahulunair
+/tensorflow_addons/losses/tests/lifted_test.py @rahulunair
+/tensorflow_addons/losses/metric_learning.py
+/tensorflow_addons/losses/npairs.py @windqaq
+/tensorflow_addons/losses/tests/npairs_test.py @windqaq
+/tensorflow_addons/losses/quantiles.py @romainbrault
+/tensorflow_addons/losses/tests/quantiles_test.py @romainbrault
+/tensorflow_addons/losses/sparsemax_loss.py @andreasmadsen
+/tensorflow_addons/losses/tests/sparsemax_loss_test.py @andreasmadsen
+/tensorflow_addons/losses/triplet.py @lc0
+/tensorflow_addons/losses/tests/triplet_test.py @lc0
+/tensorflow_addons/losses/kappa_loss.py @wenmin-wu
+/tensorflow_addons/losses/tests/kappa_loss_test.py @wenmin-wu
+
+/tensorflow_addons/metrics/cohens_kappa.py @aakashkumarnain
+/tensorflow_addons/metrics/tests/cohens_kappa_test.py @aakashkumarnain
+/tensorflow_addons/metrics/f_scores.py @ssaishruthi @marload
+/tensorflow_addons/metrics/tests/f_scores_test.py @ssaishruthi @marload
+/tensorflow_addons/metrics/hamming.py @ssaishruthi
+/tensorflow_addons/metrics/tests/hamming_test.py @ssaishruthi
+/tensorflow_addons/metrics/matthews_correlation_coefficient.py @autoih @marload
+/tensorflow_addons/metrics/tests/matthews_correlation_coefficient_test.py @autoih @marload
+/tensorflow_addons/metrics/multilabel_confusion_matrix.py @ssaishruthi
+/tensorflow_addons/metrics/tests/multilabel_confusion_matrix_test.py @ssaishruthi
+/tensorflow_addons/metrics/r_square.py @ssaishruthi @marload
+/tensorflow_addons/metrics/tests/r_square_test.py @ssaishruthi @marload
+/tensorflow_addons/metrics/geometric_mean.py @marload
+/tensorflow_addons/metrics/tests/geometric_mean_test.py @marload
+
+/tensorflow_addons/optimizers/average_wrapper.py @squadrick
+/tensorflow_addons/optimizers/conditional_gradient.py @pkan2 @lokhande-vishnu
+/tensorflow_addons/optimizers/tests/conditional_gradient_test.py @pkan2 @lokhande-vishnu
+/tensorflow_addons/optimizers/cyclical_learning_rate.py @raphaelmeudec
+/tensorflow_addons/optimizers/tests/cyclical_learning_rate_test.py @raphaelmeudec
+/tensorflow_addons/optimizers/lamb.py @junjiek
+/tensorflow_addons/optimizers/tests/lamb_test.py @junjiek
+/tensorflow_addons/optimizers/lazy_adam.py @ssaishruthi
+/tensorflow_addons/optimizers/tests/lazy_adam_test.py @ssaishruthi
+/tensorflow_addons/optimizers/lookahead.py @cyberzhg
+/tensorflow_addons/optimizers/tests/lookahead_test.py @cyberzhg
+/tensorflow_addons/optimizers/moving_average.py @squadrick
+/tensorflow_addons/optimizers/tests/moving_average_test.py @squadrick
+/tensorflow_addons/optimizers/novograd.py @shreyashpatodia
+/tensorflow_addons/optimizers/tests/novograd_test.py @shreyashpatodia
+/tensorflow_addons/optimizers/proximal_adagrad.py @WindQAQ
+/tensorflow_addons/optimizers/tests/proximal_adagrad_test.py @WindQAQ
+/tensorflow_addons/optimizers/rectified_adam.py @cyberzhg
+/tensorflow_addons/optimizers/tests/rectified_adam_test.py @cyberzhg
+/tensorflow_addons/optimizers/stochastic_weight_averaging.py @shreyashpatodia
+/tensorflow_addons/optimizers/tests/stochastic_weight_averaging_test.py @shreyashpatodia
+/tensorflow_addons/optimizers/weight_decay_optimizers.py @philjd
+/tensorflow_addons/optimizers/tests/weight_decay_optimizers_test.py @philjd
+/tensorflow_addons/optimizers/yogi.py @manzilz
+/tensorflow_addons/optimizers/tests/yogi_test.py @manzilz
+
+/tensorflow_addons/rnn/esn_cell.py @pedrolarben
+/tensorflow_addons/rnn/tests/esn_cell_test.py @pedrolarben
+/tensorflow_addons/rnn/layer_norm_lstm_cell.py @qlzh727
+/tensorflow_addons/rnn/tests/layer_norm_lstm_cell_test.py @qlzh727
+/tensorflow_addons/rnn/layer_norm_simple_rnn_cell.py @qlzh727
+/tensorflow_addons/rnn/tests/layer_norm_simple_rnn_cell_test.py @qlzh727
+/tensorflow_addons/rnn/nas_cell.py @qlzh727
+/tensorflow_addons/rnn/tests/nas_cell_test.py @qlzh727
+/tensorflow_addons/rnn/peephole_lstm_cell.py @qlzh727
+/tensorflow_addons/rnn/tests/peephole_lstm_cell_test.py @qlzh727
+
+/tensorflow_addons/seq2seq/ @qlzh727 @guillaumekln
+
+/tensorflow_addons/text/crf.py @squadrick
+/tensorflow_addons/text/tests/crf_test.py @squadrick
+/tensorflow_addons/text/parse_time_op.py @helinwang
+/tensorflow_addons/text/tests/parse_time_op_test.py @helinwang
+/tensorflow_addons/text/skip_gram_ops.py @rahulunair
+/tensorflow_addons/text/tests/skip_gram_ops_test.py @rahulunair

From 114842fae0d567ca06816fc1f0ce7540e5299e84 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sat, 29 Aug 2020 20:09:57 -0700
Subject: [PATCH 22/44] Update noisy_dense.py

---
 tensorflow_addons/layers/noisy_dense.py | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 13964f9f06..b1e2245caa 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -1,4 +1,4 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,9 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-# Orginal implementation from keras_contrib/layer/normalization
-# =============================================================================
+# ==============================================================================
 
 import tensorflow as tf
 from tensorflow.keras import (
@@ -28,9 +26,8 @@
 
 @tf.keras.utils.register_keras_serializable(package="Addons")
 class NoisyDense(tf.keras.layers.Layer):
-    """Like normal dense layer but random noisy is added to the weights matrix. But
-  as the network improves the random noise is decayed until it is insignificant.
-
+    """Like normal dense layer (https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/layers/core.py#L1067-L1233)
+  but random noisy is added to the weights matrix. But as the network improves the random noise is decayed until it is insignificant.
   A `NoisyDense` layer implements the operation:
   `output = activation(dot(input, µ_kernel + (σ_kernel * ε_kernel)) + bias)`
   where `activation` is the element-wise activation function
@@ -38,7 +35,6 @@ class NoisyDense(tf.keras.layers.Layer):
   created by the layer, σ_kernel is a weights matrix that controls the importance of
   the ε_kernel which is just random noise, and `bias` is a bias vector created by the layer
   (only applicable if `use_bias` is `True`).
-
   Example:
   >>> # Create a `Sequential` model and add a Dense layer as the first layer.
   >>> model = tf.keras.models.Sequential()
@@ -51,7 +47,6 @@ class NoisyDense(tf.keras.layers.Layer):
   >>> model.add(NoisyDense(32))
   >>> model.output_shape
   (None, 32)
-
   Arguments:
     units: Positive integer, dimensionality of the output space.
     activation: Activation function to use.
@@ -66,12 +61,10 @@ class NoisyDense(tf.keras.layers.Layer):
     kernel_constraint: Constraint function applied to
       the `kernel` weights matrix.
     bias_constraint: Constraint function applied to the bias vector.
-
   Input shape:
     N-D tensor with shape: `(batch_size, ..., input_dim)`.
     The most common situation would be
     a 2D input with shape `(batch_size, input_dim)`.
-
   Output shape:
     N-D tensor with shape: `(batch_size, ..., units)`.
     For instance, for a 2D input with shape `(batch_size, input_dim)`,

From 4e2ed8e6f87631678ec5f5dca2096c8ce1db95ac Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sat, 29 Aug 2020 20:12:01 -0700
Subject: [PATCH 23/44] Update noisy_dense.py

---
 tensorflow_addons/layers/noisy_dense.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index b1e2245caa..c09a057d99 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -231,8 +231,8 @@ def get_config(self):
         config.update(
             {
                 "units": self.units,
-                "activation": activations.serialize(self.activation),
                 "use_bias": self.use_bias,
+                "activation": activations.serialize(self.activation),
                 "kernel_regularizer": regularizers.serialize(self.kernel_regularizer),
                 "bias_regularizer": regularizers.serialize(self.bias_regularizer),
                 "activity_regularizer": regularizers.serialize(

From 4fb4bffac2d936e5dcc1db234ccd9402e88e879e Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sat, 29 Aug 2020 20:15:21 -0700
Subject: [PATCH 24/44] Update noisy_dense.py

---
 tensorflow_addons/layers/noisy_dense.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index c09a057d99..b1e2245caa 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -231,8 +231,8 @@ def get_config(self):
         config.update(
             {
                 "units": self.units,
-                "use_bias": self.use_bias,
                 "activation": activations.serialize(self.activation),
+                "use_bias": self.use_bias,
                 "kernel_regularizer": regularizers.serialize(self.kernel_regularizer),
                 "bias_regularizer": regularizers.serialize(self.bias_regularizer),
                 "activity_regularizer": regularizers.serialize(

From b874f0b743d3d013a9f7ab1951ec82c7b6d013db Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sat, 29 Aug 2020 20:32:41 -0700
Subject: [PATCH 25/44] Update noisy_dense.py

---
 tensorflow_addons/layers/noisy_dense.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index b1e2245caa..4cc7fc0d87 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -28,6 +28,7 @@
 class NoisyDense(tf.keras.layers.Layer):
     """Like normal dense layer (https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/layers/core.py#L1067-L1233)
   but random noisy is added to the weights matrix. But as the network improves the random noise is decayed until it is insignificant.
+
   A `NoisyDense` layer implements the operation:
   `output = activation(dot(input, µ_kernel + (σ_kernel * ε_kernel)) + bias)`
   where `activation` is the element-wise activation function
@@ -35,6 +36,7 @@ class NoisyDense(tf.keras.layers.Layer):
   created by the layer, σ_kernel is a weights matrix that controls the importance of
   the ε_kernel which is just random noise, and `bias` is a bias vector created by the layer
   (only applicable if `use_bias` is `True`).
+
   Example:
   >>> # Create a `Sequential` model and add a Dense layer as the first layer.
   >>> model = tf.keras.models.Sequential()
@@ -47,6 +49,7 @@ class NoisyDense(tf.keras.layers.Layer):
   >>> model.add(NoisyDense(32))
   >>> model.output_shape
   (None, 32)
+
   Arguments:
     units: Positive integer, dimensionality of the output space.
     activation: Activation function to use.
@@ -61,10 +64,12 @@ class NoisyDense(tf.keras.layers.Layer):
     kernel_constraint: Constraint function applied to
       the `kernel` weights matrix.
     bias_constraint: Constraint function applied to the bias vector.
+
   Input shape:
     N-D tensor with shape: `(batch_size, ..., input_dim)`.
     The most common situation would be
     a 2D input with shape `(batch_size, input_dim)`.
+
   Output shape:
     N-D tensor with shape: `(batch_size, ..., units)`.
     For instance, for a 2D input with shape `(batch_size, input_dim)`,

From 7852e62a871d8b670953c2d7f4d106d52fc1d594 Mon Sep 17 00:00:00 2001
From: schaall <52867365+schaall@users.noreply.github.com>
Date: Sat, 29 Aug 2020 20:38:40 -0700
Subject: [PATCH 26/44] Revert "Update CODEOWNERS"

This reverts commit 840ab1ca1a0267e88700db26b12e29d469fd6194.
---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 2070520eb7..396cfa58ab 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -106,7 +106,7 @@
 /tensorflow_addons/layers/snake.py @failure-to-thrive
 /tensorflow_addons/layers/tests/snake_test.py @failure-to-thrive
 /tensorflow_addons/layers/noisy_dense.py @leonshams
-/tensorflow_addons/layers/tests/noisy_dense_test.py @leonshams
+/tensorflow_addons/layers/noisy_dense_test.py @leonshams
 
 /tensorflow_addons/losses/contrastive.py @windqaq
 /tensorflow_addons/losses/tests/contrastive_test.py @windqaq

From 133bb20cd13a0cdf79bda8e758be02b9923bb0d2 Mon Sep 17 00:00:00 2001
From: schaall <52867365+schaall@users.noreply.github.com>
Date: Sat, 29 Aug 2020 20:39:33 -0700
Subject: [PATCH 27/44] Revert "Revert "Update CODEOWNERS""

This reverts commit 7852e62a871d8b670953c2d7f4d106d52fc1d594.
---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 396cfa58ab..2070520eb7 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -106,7 +106,7 @@
 /tensorflow_addons/layers/snake.py @failure-to-thrive
 /tensorflow_addons/layers/tests/snake_test.py @failure-to-thrive
 /tensorflow_addons/layers/noisy_dense.py @leonshams
-/tensorflow_addons/layers/noisy_dense_test.py @leonshams
+/tensorflow_addons/layers/tests/noisy_dense_test.py @leonshams
 
 /tensorflow_addons/losses/contrastive.py @windqaq
 /tensorflow_addons/losses/tests/contrastive_test.py @windqaq

From f57f895ac798f7102bca6e1812c21952cf52caa6 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sun, 30 Aug 2020 01:34:10 -0700
Subject: [PATCH 28/44] Update noisy_dense.py

---
 tensorflow_addons/layers/noisy_dense.py | 57 +++++++++++++------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 4cc7fc0d87..aacc935260 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -26,31 +26,32 @@
 
 @tf.keras.utils.register_keras_serializable(package="Addons")
 class NoisyDense(tf.keras.layers.Layer):
-    """Like normal dense layer (https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/layers/core.py#L1067-L1233)
-  but random noisy is added to the weights matrix. But as the network improves the random noise is decayed until it is insignificant.
-
-  A `NoisyDense` layer implements the operation:
-  `output = activation(dot(input, µ_kernel + (σ_kernel * ε_kernel)) + bias)`
-  where `activation` is the element-wise activation function
-  passed as the `activation` argument, `µ_kernel` is your average weights matrix
-  created by the layer, σ_kernel is a weights matrix that controls the importance of
-  the ε_kernel which is just random noise, and `bias` is a bias vector created by the layer
-  (only applicable if `use_bias` is `True`).
-
-  Example:
-  >>> # Create a `Sequential` model and add a Dense layer as the first layer.
-  >>> model = tf.keras.models.Sequential()
-  >>> model.add(tf.keras.Input(shape=(16,)))
-  >>> model.add(NoisyDense(32, activation='relu'))
-  >>> # Now the model will take as input arrays of shape (None, 16)
-  >>> # and output arrays of shape (None, 32).
-  >>> # Note that after the first layer, you don't need to specify
-  >>> # the size of the input anymore:
-  >>> model.add(NoisyDense(32))
-  >>> model.output_shape
-  (None, 32)
-
-  Arguments:
+    """
+    Like normal dense layer (https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/layers/core.py#L1067-L1233)
+    but random noisy is added to the weights matrix. But as the network improves the random noise is decayed until it is insignificant.
+
+    A `NoisyDense` layer implements the operation:
+    `output = activation(dot(input, µ_kernel + (σ_kernel * ε_kernel)) + bias)`
+    where `activation` is the element-wise activation function
+    passed as the `activation` argument, `µ_kernel` is your average weights matrix
+    created by the layer, σ_kernel is a weights matrix that controls the importance of
+    the ε_kernel which is just random noise, and `bias` is a bias vector created by the layer
+    (only applicable if `use_bias` is `True`).
+
+    Example:
+    >>> # Create a `Sequential` model and add a Dense layer as the first layer.
+    >>> model = tf.keras.models.Sequential()
+    >>> model.add(tf.keras.Input(shape=(16,)))
+    >>> model.add(NoisyDense(32, activation='relu'))
+    >>> # Now the model will take as input arrays of shape (None, 16)
+    >>> # and output arrays of shape (None, 32).
+    >>> # Note that after the first layer, you don't need to specify
+    >>> # the size of the input anymore:
+    >>> model.add(NoisyDense(32))
+    >>> model.output_shape
+    (None, 32)
+
+    Arguments:
     units: Positive integer, dimensionality of the output space.
     activation: Activation function to use.
       If you don't specify anything, no activation is applied
@@ -65,16 +66,16 @@ class NoisyDense(tf.keras.layers.Layer):
       the `kernel` weights matrix.
     bias_constraint: Constraint function applied to the bias vector.
 
-  Input shape:
+    Input shape:
     N-D tensor with shape: `(batch_size, ..., input_dim)`.
     The most common situation would be
     a 2D input with shape `(batch_size, input_dim)`.
 
-  Output shape:
+    Output shape:
     N-D tensor with shape: `(batch_size, ..., units)`.
     For instance, for a 2D input with shape `(batch_size, input_dim)`,
     the output would have shape `(batch_size, units)`.
-  """
+    """
 
     def __init__(
         self,

From 0a95587e17d7fe5655f607a04f960c64542f10bb Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sun, 30 Aug 2020 01:38:25 -0700
Subject: [PATCH 29/44] Code reformatted with updated black

---
 tensorflow_addons/layers/noisy_dense.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index aacc935260..99d18cbc10 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -153,7 +153,9 @@ def build(self, input_shape):
         if self.use_bias:
             self.σ_bias = self.add_weight(
                 "σ_bias",
-                shape=[self.units,],
+                shape=[
+                    self.units,
+                ],
                 initializer=self.σ_init,
                 regularizer=self.bias_regularizer,
                 constraint=self.bias_constraint,
@@ -163,7 +165,9 @@ def build(self, input_shape):
 
             self.µ_bias = self.add_weight(
                 "µ_bias",
-                shape=[self.units,],
+                shape=[
+                    self.units,
+                ],
                 initializer=self.µ_init,
                 regularizer=self.bias_regularizer,
                 constraint=self.bias_constraint,
@@ -184,7 +188,12 @@ def call(self, inputs):
 
         # Fixed parameters added as the noise
         ε_i = tf.random.normal([self.last_dim, self.units], dtype=dtype)
-        ε_j = tf.random.normal([self.units,], dtype=dtype)
+        ε_j = tf.random.normal(
+            [
+                self.units,
+            ],
+            dtype=dtype,
+        )
 
         # Creates the factorised Gaussian noise
         f = NoisyDense._scale_noise

From 85827960b58eeafa527ac1b0f0eb82f24b8d1327 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Wed, 2 Sep 2020 08:17:31 -0700
Subject: [PATCH 30/44] Update noisy_dense.py

---
 tensorflow_addons/layers/noisy_dense.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 99d18cbc10..93f3237303 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -174,7 +174,9 @@ def build(self, input_shape):
                 dtype=self.dtype,
                 trainable=True,
             )
-
+        else:
+            self.σ_bias = None
+            self.µ_bias = None
         self.built = True
 
     @staticmethod

From 0598762351d08ac89415123d36bcf6921897e928 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Wed, 2 Sep 2020 09:31:31 -0700
Subject: [PATCH 31/44] Update noisy_dense.py


From 442f7e3606e2faaf96e10f59ef8c3bd1a699e2aa Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Fri, 4 Sep 2020 19:22:14 -0700
Subject: [PATCH 32/44] Update noisy_dense.py

---
 tensorflow_addons/layers/noisy_dense.py | 27 ++++++++++++++++---------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 93f3237303..8aa00dcd33 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -183,12 +183,11 @@ def build(self, input_shape):
     def _scale_noise(x):
         return tf.sign(x) * tf.sqrt(tf.abs(x))
 
-    def call(self, inputs):
+    # Create the factorised Gaussian noise
+    def reset_noise(self):
         dtype = self._compute_dtype_object
-        if inputs.dtype.base_dtype != dtype.base_dtype:
-            inputs = tf.cast(inputs, dtype=dtype)
 
-        # Fixed parameters added as the noise
+        # Generate random noise
         ε_i = tf.random.normal([self.last_dim, self.units], dtype=dtype)
         ε_j = tf.random.normal(
             [
@@ -197,14 +196,22 @@ def call(self, inputs):
             dtype=dtype,
         )
 
-        # Creates the factorised Gaussian noise
-        f = NoisyDense._scale_noise
-        ε_kernel = f(ε_i) * f(ε_j)
-        ε_bias = f(ε_j)
+        # Scale the random noise
+        self.ε_kernel = NoisyDense._scale_noise(ε_i) * NoisyDense._scale_noise(ε_j)
+        self.ε_bias = NoisyDense._scale_noise(ε_j)
+
+    def call(self, inputs, reset_noise=True):
+        dtype = self._compute_dtype_object
+        if inputs.dtype.base_dtype != dtype.base_dtype:
+            inputs = tf.cast(inputs, dtype=dtype)
+
+        # Generate fixed parameters added as the noise
+        if reset_noise:
+            self.reset_noise()
 
         # Performs: y = (µw + σw · εw)x + µb + σb · εb
         # to calculate the output
-        kernel = self.µ_kernel + (self.σ_kernel * ε_kernel)
+        kernel = self.µ_kernel + (self.σ_kernel * self.ε_kernel)
 
         if inputs.dtype.base_dtype != dtype.base_dtype:
             inputs = tf.cast(inputs, dtype=dtype)
@@ -225,7 +232,7 @@ def call(self, inputs):
                 outputs.set_shape(output_shape)
 
         if self.use_bias:
-            noisy_bias = self.µ_bias + (self.σ_bias * ε_bias)
+            noisy_bias = self.µ_bias + (self.σ_bias * self.ε_bias)
             outputs = tf.nn.bias_add(outputs, noisy_bias)
 
         if self.activation is not None:

From 3e0fcdb7722fe4d009e5b4faf498968ddfbc9028 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Fri, 4 Sep 2020 19:23:04 -0700
Subject: [PATCH 33/44] Added support for manual noise reset

---
 .../layers/tests/noisy_dense_test.py          | 47 +++++++++++++++++--
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/tensorflow_addons/layers/tests/noisy_dense_test.py b/tensorflow_addons/layers/tests/noisy_dense_test.py
index c5d1ee2964..fe2fdaf729 100644
--- a/tensorflow_addons/layers/tests/noisy_dense_test.py
+++ b/tensorflow_addons/layers/tests/noisy_dense_test.py
@@ -41,7 +41,7 @@ def test_noisy_dense_dtype(dtype):
     inputs = tf.convert_to_tensor(
         np.random.randint(low=0, high=7, size=(2, 2)), dtype=dtype
     )
-    layer = NoisyDense(5, dtype=dtype)
+    layer = NoisyDense(5, dtype=dtype, name="noisy_dense_" + dtype)
     outputs = layer(inputs)
     np.testing.assert_array_equal(outputs.dtype, dtype)
 
@@ -49,7 +49,7 @@ def test_noisy_dense_dtype(dtype):
 @pytest.mark.usefixtures("maybe_run_functions_eagerly")
 def test_noisy_dense_with_policy():
     inputs = tf.convert_to_tensor(np.random.randint(low=0, high=7, size=(2, 2)))
-    layer = NoisyDense(5, dtype=Policy("mixed_float16"))
+    layer = NoisyDense(5, dtype=Policy("mixed_float16"), name="noisy_dense_policy")
     outputs = layer(inputs)
     output_signature = layer.compute_output_signature(
         tf.TensorSpec(dtype="float16", shape=(2, 2))
@@ -78,9 +78,50 @@ def test_noisy_dense_regularization():
 def test_noisy_dense_constraints():
     k_constraint = keras.constraints.max_norm(0.01)
     b_constraint = keras.constraints.max_norm(0.01)
-    layer = NoisyDense(3, kernel_constraint=k_constraint, bias_constraint=b_constraint)
+    layer = NoisyDense(
+        3,
+        kernel_constraint=k_constraint,
+        bias_constraint=b_constraint,
+        name="noisy_dense_constriants",
+    )
     layer(keras.backend.variable(np.ones((2, 4))))
     np.testing.assert_array_equal(layer.µ_kernel.constraint, k_constraint)
     np.testing.assert_array_equal(layer.σ_kernel.constraint, k_constraint)
     np.testing.assert_array_equal(layer.µ_bias.constraint, b_constraint)
     np.testing.assert_array_equal(layer.σ_bias.constraint, b_constraint)
+
+
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+def test_noisy_dense_automatic_reset_noise():
+    inputs = tf.convert_to_tensor(np.random.randint(low=0, high=7, size=(2, 2)))
+    layer = NoisyDense(5, name="noise_dense_auto_reset_noise")
+    layer(inputs)
+    initial_ε_kernel = layer.ε_kernel
+    initial_ε_bias = layer.ε_bias
+    layer(inputs)
+    new_ε_kernel = layer.ε_kernel
+    new_ε_bias = layer.ε_bias
+    np.testing.assert_raises(
+        AssertionError, np.testing.assert_array_equal, initial_ε_kernel, new_ε_kernel
+    )
+    np.testing.assert_raises(
+        AssertionError, np.testing.assert_array_equal, initial_ε_bias, new_ε_bias
+    )
+
+
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+def test_noisy_dense_manual_reset_noise():
+    inputs = tf.convert_to_tensor(np.random.randint(low=0, high=7, size=(2, 2)))
+    layer = NoisyDense(5, name="noise_dense_manual_reset_noise")
+    layer(inputs)
+    initial_ε_kernel = layer.ε_kernel
+    initial_ε_bias = layer.ε_bias
+    layer.reset_noise()
+    new_ε_kernel = layer.ε_kernel
+    new_ε_bias = layer.ε_bias
+    np.testing.assert_raises(
+        AssertionError, np.testing.assert_array_equal, initial_ε_kernel, new_ε_kernel
+    )
+    np.testing.assert_raises(
+        AssertionError, np.testing.assert_array_equal, initial_ε_bias, new_ε_bias
+    )

From ee96d5f07e8fa46884161bebb2657d0cee6e0675 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Fri, 4 Sep 2020 19:52:06 -0700
Subject: [PATCH 34/44] support for noise removal

---
 tensorflow_addons/layers/noisy_dense.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 8aa00dcd33..64c5a14d51 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -200,6 +200,11 @@ def reset_noise(self):
         self.ε_kernel = NoisyDense._scale_noise(ε_i) * NoisyDense._scale_noise(ε_j)
         self.ε_bias = NoisyDense._scale_noise(ε_j)
 
+    def remove_noise(self):
+        dtype = self._compute_dtype_object
+        self.ε_kernel = tf.zeros([self.last_dim, self.units], dtype=dtype)
+        self.ε_bias = tf.zeros([self.last_dim, self.units], dtype=dtype)
+
     def call(self, inputs, reset_noise=True):
         dtype = self._compute_dtype_object
         if inputs.dtype.base_dtype != dtype.base_dtype:

From 6c33f228381c0b74b780c31bd00b868c8f2150b7 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Fri, 4 Sep 2020 19:52:37 -0700
Subject: [PATCH 35/44] tests for noise removal

---
 .../layers/tests/noisy_dense_test.py          | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tensorflow_addons/layers/tests/noisy_dense_test.py b/tensorflow_addons/layers/tests/noisy_dense_test.py
index fe2fdaf729..0aac478851 100644
--- a/tensorflow_addons/layers/tests/noisy_dense_test.py
+++ b/tensorflow_addons/layers/tests/noisy_dense_test.py
@@ -125,3 +125,24 @@ def test_noisy_dense_manual_reset_noise():
     np.testing.assert_raises(
         AssertionError, np.testing.assert_array_equal, initial_ε_bias, new_ε_bias
     )
+
+
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+def test_noisy_dense_remove_noise():
+    inputs = tf.convert_to_tensor(np.random.randint(low=0, high=7, size=(2, 2)))
+    layer = NoisyDense(5, name="noise_dense_manual_reset_noise")
+    layer(inputs)
+    initial_ε_kernel = layer.ε_kernel
+    initial_ε_bias = layer.ε_bias
+    layer.remove_noise()
+    new_ε_kernel = layer.ε_kernel
+    new_ε_bias = layer.ε_bias
+    zeros = tf.zeros(initial_ε_kernel.shape, dtype=initial_ε_kernel.dtype)
+    np.testing.assert_raises(
+        AssertionError, np.testing.assert_array_equal, initial_ε_kernel, new_ε_kernel
+    )
+    np.testing.assert_raises(
+        AssertionError, np.testing.assert_array_equal, initial_ε_bias, new_ε_bias
+    )
+    np.testing.assert_array_equal(zeros, new_ε_kernel)
+    np.testing.assert_array_equal(zeros, new_ε_bias)

From 1ac4699fbb0011dc9bb20915d25bf1bbb3da5c86 Mon Sep 17 00:00:00 2001
From: schaall <52867365+schaall@users.noreply.github.com>
Date: Fri, 11 Sep 2020 13:12:39 -0700
Subject: [PATCH 36/44] use typecheck and remove unicode,

---
 tensorflow_addons/layers/noisy_dense.py | 95 +++++++++++++------------
 1 file changed, 50 insertions(+), 45 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 64c5a14d51..5f15a77777 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -22,24 +22,28 @@
 )
 from tensorflow.keras import backend as K
 from tensorflow.keras.layers import InputSpec
+from typeguard import typechecked
+
+from tensorflow_addons.utils import types
+
+
+def _scale_noise(x):
+    return tf.sign(x) * tf.sqrt(tf.abs(x))
 
 
 @tf.keras.utils.register_keras_serializable(package="Addons")
 class NoisyDense(tf.keras.layers.Layer):
-    """
-    Like normal dense layer (https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/layers/core.py#L1067-L1233)
-    but random noisy is added to the weights matrix. But as the network improves the random noise is decayed until it is insignificant.
+    r"""Like normal dense layer (https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/layers/core.py#L1067-L1233)
+    but random noise is added to the weights matrix. As the network improves the random noise is decayed until it is insignificant.
 
     A `NoisyDense` layer implements the operation:
-    `output = activation(dot(input, µ_kernel + (σ_kernel * ε_kernel)) + bias)`
-    where `activation` is the element-wise activation function
-    passed as the `activation` argument, `µ_kernel` is your average weights matrix
-    created by the layer, σ_kernel is a weights matrix that controls the importance of
-    the ε_kernel which is just random noise, and `bias` is a bias vector created by the layer
-    (only applicable if `use_bias` is `True`).
+    $$
+    \mathrm{NoisyDense}(x) = \mathrm{activation}(\mathrm{dot}(x, \mu + (\sigma \cdot \epsilon)) + \mathrm{bias})
+    $$
+    with bias only being added if `use_bias` is `True`.
 
     Example:
-    >>> # Create a `Sequential` model and add a Dense layer as the first layer.
+    >>> # Create a `Sequential` model and add a NoisyDense layer as the first layer.
     >>> model = tf.keras.models.Sequential()
     >>> model.add(tf.keras.Input(shape=(16,)))
     >>> model.add(NoisyDense(32, activation='relu'))
@@ -53,6 +57,8 @@ class NoisyDense(tf.keras.layers.Layer):
 
     Arguments:
     units: Positive integer, dimensionality of the output space.
+    sigma: A float between 0-1 used as a standard deviation figure and is
+      applied to the gaussian noise layer (`sigma_kernel` and `sigma_bias`).
     activation: Activation function to use.
       If you don't specify anything, no activation is applied
       (ie. "linear" activation: `a(x) = x`).
@@ -77,23 +83,26 @@ class NoisyDense(tf.keras.layers.Layer):
     the output would have shape `(batch_size, units)`.
     """
 
+    @typechecked
     def __init__(
         self,
-        units,
-        activation=None,
-        use_bias=True,
-        kernel_regularizer=None,
-        bias_regularizer=None,
-        activity_regularizer=None,
-        kernel_constraint=None,
-        bias_constraint=None,
+        units: int,
+        sigma: float = 0.5,
+        activation: types.Activation = None,
+        use_bias: bool = True,
+        kernel_regularizer: types.Regularizer = None,
+        bias_regularizer: types.Regularizer = None,
+        activity_regularizer: types.Regularizer = None,
+        kernel_constraint: types.Constraint = None,
+        bias_constraint: types.Constraint = None,
         **kwargs
     ):
         super(NoisyDense, self).__init__(
             activity_regularizer=activity_regularizer, **kwargs
         )
 
-        self.units = int(units) if not isinstance(units, int) else units
+        self.units = units
+        self.sigma = sigma
         self.activation = activations.get(activation)
         self.use_bias = use_bias
         self.kernel_regularizer = regularizers.get(kernel_regularizer)
@@ -123,27 +132,27 @@ def build(self, input_shape):
             )
         self.input_spec = InputSpec(min_ndim=2, axes={-1: self.last_dim})
 
-        self.σ_init = initializers.Constant(value=0.5 / sqrt_dim)
-        self.µ_init = initializers.RandomUniform(
+        self.sigma_init = initializers.Constant(value=self.sigma / sqrt_dim)
+        self.mu_init = initializers.RandomUniform(
             minval=-1 / sqrt_dim, maxval=1 / sqrt_dim
         )
 
         # Learnable parameters
-        # Agent will learn to decay σ as it improves creating a sort of learned epsilon decay
-        self.σ_kernel = self.add_weight(
-            "σ_kernel",
+        # Agent will learn to decay sigma as it improves creating a sort of learned epsilon decay
+        self.sigma_kernel = self.add_weight(
+            "sigma_kernel",
             shape=[self.last_dim, self.units],
-            initializer=self.σ_init,
+            initializer=self.sigma_init,
             regularizer=self.kernel_regularizer,
             constraint=self.kernel_constraint,
             dtype=self.dtype,
             trainable=True,
         )
 
-        self.µ_kernel = self.add_weight(
-            "µ_kernel",
+        self.mu_kernel = self.add_weight(
+            "mu_kernel",
             shape=[self.last_dim, self.units],
-            initializer=self.µ_init,
+            initializer=self.mu_init,
             regularizer=self.kernel_regularizer,
             constraint=self.kernel_constraint,
             dtype=self.dtype,
@@ -151,38 +160,34 @@ def build(self, input_shape):
         )
 
         if self.use_bias:
-            self.σ_bias = self.add_weight(
-                "σ_bias",
+            self.sigma_bias = self.add_weight(
+                "sigma_bias",
                 shape=[
                     self.units,
                 ],
-                initializer=self.σ_init,
+                initializer=self.sigma_init,
                 regularizer=self.bias_regularizer,
                 constraint=self.bias_constraint,
                 dtype=self.dtype,
                 trainable=True,
             )
 
-            self.µ_bias = self.add_weight(
-                "µ_bias",
+            self.mu_bias = self.add_weight(
+                "mu_bias",
                 shape=[
                     self.units,
                 ],
-                initializer=self.µ_init,
+                initializer=self.mu_init,
                 regularizer=self.bias_regularizer,
                 constraint=self.bias_constraint,
                 dtype=self.dtype,
                 trainable=True,
             )
         else:
-            self.σ_bias = None
-            self.µ_bias = None
+            self.sigma_bias = None
+            self.mu_bias = None
         self.built = True
 
-    @staticmethod
-    def _scale_noise(x):
-        return tf.sign(x) * tf.sqrt(tf.abs(x))
-
     # Create the factorised Gaussian noise
     def reset_noise(self):
         dtype = self._compute_dtype_object
@@ -197,8 +202,8 @@ def reset_noise(self):
         )
 
         # Scale the random noise
-        self.ε_kernel = NoisyDense._scale_noise(ε_i) * NoisyDense._scale_noise(ε_j)
-        self.ε_bias = NoisyDense._scale_noise(ε_j)
+        self.ε_kernel = _scale_noise(ε_i) * _scale_noise(ε_j)
+        self.ε_bias = _scale_noise(ε_j)
 
     def remove_noise(self):
         dtype = self._compute_dtype_object
@@ -214,9 +219,9 @@ def call(self, inputs, reset_noise=True):
         if reset_noise:
             self.reset_noise()
 
-        # Performs: y = (µw + σw · εw)x + µb + σb · εb
+        # Performs: y = (muw + sigmaw · εw)x + mub + sigmab · εb
         # to calculate the output
-        kernel = self.µ_kernel + (self.σ_kernel * self.ε_kernel)
+        kernel = self.mu_kernel + (self.sigma_kernel * self.ε_kernel)
 
         if inputs.dtype.base_dtype != dtype.base_dtype:
             inputs = tf.cast(inputs, dtype=dtype)
@@ -237,7 +242,7 @@ def call(self, inputs, reset_noise=True):
                 outputs.set_shape(output_shape)
 
         if self.use_bias:
-            noisy_bias = self.µ_bias + (self.σ_bias * self.ε_bias)
+            noisy_bias = self.mu_bias + (self.sigma_bias * self.ε_bias)
             outputs = tf.nn.bias_add(outputs, noisy_bias)
 
         if self.activation is not None:

From 14462f425c01d298e92a5c509a85af20995acad1 Mon Sep 17 00:00:00 2001
From: schaall <52867365+schaall@users.noreply.github.com>
Date: Fri, 11 Sep 2020 13:43:14 -0700
Subject: [PATCH 37/44] fix typo and code cleanup

---
 tensorflow_addons/layers/noisy_dense.py       | 43 ++++++-----
 .../layers/tests/noisy_dense_test.py          | 72 ++++++++++++-------
 2 files changed, 71 insertions(+), 44 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 5f15a77777..2ee3c44016 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -33,17 +33,21 @@ def _scale_noise(x):
 
 @tf.keras.utils.register_keras_serializable(package="Addons")
 class NoisyDense(tf.keras.layers.Layer):
-    r"""Like normal dense layer (https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/layers/core.py#L1067-L1233)
-    but random noise is added to the weights matrix. As the network improves the random noise is decayed until it is insignificant.
+    r"""Like normal dense layer but random noise is added to the weights
+    matrix. As the network improves the random noise is decayed until
+    it is insignificant.
 
     A `NoisyDense` layer implements the operation:
     $$
-    \mathrm{NoisyDense}(x) = \mathrm{activation}(\mathrm{dot}(x, \mu + (\sigma \cdot \epsilon)) + \mathrm{bias})
+    \mathrm{NoisyDense}(x) =
+    \mathrm{activation}(\mathrm{dot}(x, \mu + (\sigma \cdot \eps))
+    + \mathrm{bias})
     $$
     with bias only being added if `use_bias` is `True`.
 
     Example:
-    >>> # Create a `Sequential` model and add a NoisyDense layer as the first layer.
+    >>> # Create a `Sequential` model and add a NoisyDense
+    >>> # layer as the first layer.
     >>> model = tf.keras.models.Sequential()
     >>> model.add(tf.keras.Input(shape=(16,)))
     >>> model.add(NoisyDense(32, activation='relu'))
@@ -138,7 +142,6 @@ def build(self, input_shape):
         )
 
         # Learnable parameters
-        # Agent will learn to decay sigma as it improves creating a sort of learned epsilon decay
         self.sigma_kernel = self.add_weight(
             "sigma_kernel",
             shape=[self.last_dim, self.units],
@@ -193,8 +196,8 @@ def reset_noise(self):
         dtype = self._compute_dtype_object
 
         # Generate random noise
-        ε_i = tf.random.normal([self.last_dim, self.units], dtype=dtype)
-        ε_j = tf.random.normal(
+        eps_i = tf.random.normal([self.last_dim, self.units], dtype=dtype)
+        eps_j = tf.random.normal(
             [
                 self.units,
             ],
@@ -202,13 +205,13 @@ def reset_noise(self):
         )
 
         # Scale the random noise
-        self.ε_kernel = _scale_noise(ε_i) * _scale_noise(ε_j)
-        self.ε_bias = _scale_noise(ε_j)
+        self.eps_kernel = _scale_noise(eps_i) * _scale_noise(eps_j)
+        self.eps_bias = _scale_noise(eps_j)
 
     def remove_noise(self):
         dtype = self._compute_dtype_object
-        self.ε_kernel = tf.zeros([self.last_dim, self.units], dtype=dtype)
-        self.ε_bias = tf.zeros([self.last_dim, self.units], dtype=dtype)
+        self.eps_kernel = tf.zeros([self.last_dim, self.units], dtype=dtype)
+        self.eps_bias = tf.zeros([self.last_dim, self.units], dtype=dtype)
 
     def call(self, inputs, reset_noise=True):
         dtype = self._compute_dtype_object
@@ -219,9 +222,15 @@ def call(self, inputs, reset_noise=True):
         if reset_noise:
             self.reset_noise()
 
-        # Performs: y = (muw + sigmaw · εw)x + mub + sigmab · εb
-        # to calculate the output
-        kernel = self.mu_kernel + (self.sigma_kernel * self.ε_kernel)
+        r"""
+        Perform:
+        $$
+        y \stackrel{\text{def}}{=}
+        (\mu^w + \sigma^w \odot \eps^w)x + \mu^b + \sigma^b \odot \eps^b
+        $$
+        to calculate the output
+        """
+        kernel = self.mu_kernel + (self.sigma_kernel * self.eps_kernel)
 
         if inputs.dtype.base_dtype != dtype.base_dtype:
             inputs = tf.cast(inputs, dtype=dtype)
@@ -242,7 +251,7 @@ def call(self, inputs, reset_noise=True):
                 outputs.set_shape(output_shape)
 
         if self.use_bias:
-            noisy_bias = self.mu_bias + (self.sigma_bias * self.ε_bias)
+            noisy_bias = self.mu_bias + (self.sigma_bias * self.eps_bias)
             outputs = tf.nn.bias_add(outputs, noisy_bias)
 
         if self.activation is not None:
@@ -255,8 +264,8 @@ def compute_output_shape(self, input_shape):
         input_shape = input_shape.with_rank_at_least(2)
         if tf.compat.dimension_value(input_shape[-1]) is None:
             raise ValueError(
-                "The innermost dimension of input_shape must be defined, but saw: %s"
-                % input_shape
+                "The innermost dimension of input_shape must be defined"
+                ", but saw: %s" % input_shape
             )
         return input_shape[:-1].concatenate(self.units)
 
diff --git a/tensorflow_addons/layers/tests/noisy_dense_test.py b/tensorflow_addons/layers/tests/noisy_dense_test.py
index 0aac478851..95b851b386 100644
--- a/tensorflow_addons/layers/tests/noisy_dense_test.py
+++ b/tensorflow_addons/layers/tests/noisy_dense_test.py
@@ -57,8 +57,8 @@ def test_noisy_dense_with_policy():
     np.testing.assert_array_equal(output_signature.dtype, tf.dtypes.float16)
     np.testing.assert_array_equal(output_signature.shape, (2, 5))
     np.testing.assert_array_equal(outputs.dtype, "float16")
-    np.testing.assert_array_equal(layer.µ_kernel.dtype, "float32")
-    np.testing.assert_array_equal(layer.σ_kernel.dtype, "float32")
+    np.testing.assert_array_equal(layer.mu_kernel.dtype, "float32")
+    np.testing.assert_array_equal(layer.sigma_kernel.dtype, "float32")
 
 
 @pytest.mark.usefixtures("maybe_run_functions_eagerly")
@@ -85,10 +85,10 @@ def test_noisy_dense_constraints():
         name="noisy_dense_constriants",
     )
     layer(keras.backend.variable(np.ones((2, 4))))
-    np.testing.assert_array_equal(layer.µ_kernel.constraint, k_constraint)
-    np.testing.assert_array_equal(layer.σ_kernel.constraint, k_constraint)
-    np.testing.assert_array_equal(layer.µ_bias.constraint, b_constraint)
-    np.testing.assert_array_equal(layer.σ_bias.constraint, b_constraint)
+    np.testing.assert_array_equal(layer.mu_kernel.constraint, k_constraint)
+    np.testing.assert_array_equal(layer.sigma_kernel.constraint, k_constraint)
+    np.testing.assert_array_equal(layer.mu_bias.constraint, b_constraint)
+    np.testing.assert_array_equal(layer.sigma_bias.constraint, b_constraint)
 
 
 @pytest.mark.usefixtures("maybe_run_functions_eagerly")
@@ -96,16 +96,22 @@ def test_noisy_dense_automatic_reset_noise():
     inputs = tf.convert_to_tensor(np.random.randint(low=0, high=7, size=(2, 2)))
     layer = NoisyDense(5, name="noise_dense_auto_reset_noise")
     layer(inputs)
-    initial_ε_kernel = layer.ε_kernel
-    initial_ε_bias = layer.ε_bias
+    initial_eps_kernel = layer.eps_kernel
+    initial_eps_bias = layer.eps_bias
     layer(inputs)
-    new_ε_kernel = layer.ε_kernel
-    new_ε_bias = layer.ε_bias
+    new_eps_kernel = layer.eps_kernel
+    new_eps_bias = layer.eps_bias
     np.testing.assert_raises(
-        AssertionError, np.testing.assert_array_equal, initial_ε_kernel, new_ε_kernel
+        AssertionError,
+        np.testing.assert_array_equal,
+        initial_eps_kernel,
+        new_eps_kernel,
     )
     np.testing.assert_raises(
-        AssertionError, np.testing.assert_array_equal, initial_ε_bias, new_ε_bias
+        AssertionError,
+        np.testing.assert_array_equal,
+        initial_eps_bias,
+        new_eps_bias,
     )
 
 
@@ -114,16 +120,22 @@ def test_noisy_dense_manual_reset_noise():
     inputs = tf.convert_to_tensor(np.random.randint(low=0, high=7, size=(2, 2)))
     layer = NoisyDense(5, name="noise_dense_manual_reset_noise")
     layer(inputs)
-    initial_ε_kernel = layer.ε_kernel
-    initial_ε_bias = layer.ε_bias
+    initial_eps_kernel = layer.eps_kernel
+    initial_eps_bias = layer.eps_bias
     layer.reset_noise()
-    new_ε_kernel = layer.ε_kernel
-    new_ε_bias = layer.ε_bias
+    new_eps_kernel = layer.eps_kernel
+    new_eps_bias = layer.eps_bias
     np.testing.assert_raises(
-        AssertionError, np.testing.assert_array_equal, initial_ε_kernel, new_ε_kernel
+        AssertionError,
+        np.testing.assert_array_equal,
+        initial_eps_kernel,
+        new_eps_kernel,
     )
     np.testing.assert_raises(
-        AssertionError, np.testing.assert_array_equal, initial_ε_bias, new_ε_bias
+        AssertionError,
+        np.testing.assert_array_equal,
+        initial_eps_bias,
+        new_eps_bias,
     )
 
 
@@ -132,17 +144,23 @@ def test_noisy_dense_remove_noise():
     inputs = tf.convert_to_tensor(np.random.randint(low=0, high=7, size=(2, 2)))
     layer = NoisyDense(5, name="noise_dense_manual_reset_noise")
     layer(inputs)
-    initial_ε_kernel = layer.ε_kernel
-    initial_ε_bias = layer.ε_bias
+    initial_eps_kernel = layer.eps_kernel
+    initial_eps_bias = layer.eps_bias
     layer.remove_noise()
-    new_ε_kernel = layer.ε_kernel
-    new_ε_bias = layer.ε_bias
-    zeros = tf.zeros(initial_ε_kernel.shape, dtype=initial_ε_kernel.dtype)
+    new_eps_kernel = layer.eps_kernel
+    new_eps_bias = layer.eps_bias
+    zeros = tf.zeros(initial_eps_kernel.shape, dtype=initial_eps_kernel.dtype)
     np.testing.assert_raises(
-        AssertionError, np.testing.assert_array_equal, initial_ε_kernel, new_ε_kernel
+        AssertionError,
+        np.testing.assert_array_equal,
+        initial_eps_kernel,
+        new_eps_kernel,
     )
     np.testing.assert_raises(
-        AssertionError, np.testing.assert_array_equal, initial_ε_bias, new_ε_bias
+        AssertionError,
+        np.testing.assert_array_equal,
+        initial_eps_bias,
+        new_eps_bias,
     )
-    np.testing.assert_array_equal(zeros, new_ε_kernel)
-    np.testing.assert_array_equal(zeros, new_ε_bias)
+    np.testing.assert_array_equal(zeros, new_eps_kernel)
+    np.testing.assert_array_equal(zeros, new_eps_bias)

From d4ad13652db3fa7cab14791ba5ebf6c2fa459872 Mon Sep 17 00:00:00 2001
From: schaall <52867365+schaall@users.noreply.github.com>
Date: Fri, 11 Sep 2020 15:19:47 -0700
Subject: [PATCH 38/44] control noise removal through call

---
 tensorflow_addons/layers/noisy_dense.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 2ee3c44016..96cf9ad106 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -213,13 +213,15 @@ def remove_noise(self):
         self.eps_kernel = tf.zeros([self.last_dim, self.units], dtype=dtype)
         self.eps_bias = tf.zeros([self.last_dim, self.units], dtype=dtype)
 
-    def call(self, inputs, reset_noise=True):
+    def call(self, inputs, reset_noise=True, remove_noise=False):
         dtype = self._compute_dtype_object
         if inputs.dtype.base_dtype != dtype.base_dtype:
             inputs = tf.cast(inputs, dtype=dtype)
 
         # Generate fixed parameters added as the noise
-        if reset_noise:
+        if remove_noise:
+            self.remove_noise()
+        elif reset_noise:
             self.reset_noise()
 
         r"""

From da02bb1d182b8fa6be3308398c31badaeb5c7f1b Mon Sep 17 00:00:00 2001
From: schaall <52867365+schaall@users.noreply.github.com>
Date: Sat, 12 Sep 2020 01:12:38 -0700
Subject: [PATCH 39/44] Inherit from Dense instead of Layer

---
 tensorflow_addons/layers/noisy_dense.py       | 149 +++++++-----------
 .../layers/tests/noisy_dense_test.py          |  33 +---
 2 files changed, 60 insertions(+), 122 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 96cf9ad106..fb69cf2d2c 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -32,10 +32,9 @@ def _scale_noise(x):
 
 
 @tf.keras.utils.register_keras_serializable(package="Addons")
-class NoisyDense(tf.keras.layers.Layer):
-    r"""Like normal dense layer but random noise is added to the weights
-    matrix. As the network improves the random noise is decayed until
-    it is insignificant.
+class NoisyDense(tf.keras.layers.Dense):
+    r"""Noisy dense layer that inject random noise to
+    the weights of normal dense layer.
 
     A `NoisyDense` layer implements the operation:
     $$
@@ -60,31 +59,31 @@ class NoisyDense(tf.keras.layers.Layer):
     (None, 32)
 
     Arguments:
-    units: Positive integer, dimensionality of the output space.
-    sigma: A float between 0-1 used as a standard deviation figure and is
-      applied to the gaussian noise layer (`sigma_kernel` and `sigma_bias`).
-    activation: Activation function to use.
-      If you don't specify anything, no activation is applied
-      (ie. "linear" activation: `a(x) = x`).
-    use_bias: Boolean, whether the layer uses a bias vector.
-    kernel_regularizer: Regularizer function applied to
-      the `kernel` weights matrix.
-    bias_regularizer: Regularizer function applied to the bias vector.
-    activity_regularizer: Regularizer function applied to
-      the output of the layer (its "activation").
-    kernel_constraint: Constraint function applied to
-      the `kernel` weights matrix.
-    bias_constraint: Constraint function applied to the bias vector.
+      units: Positive integer, dimensionality of the output space.
+      sigma: A float between 0-1 used as a standard deviation figure and is
+        applied to the gaussian noise layer (`sigma_kernel` and `sigma_bias`).
+      activation: Activation function to use.
+        If you don't specify anything, no activation is applied
+        (ie. "linear" activation: `a(x) = x`).
+      use_bias: Boolean, whether the layer uses a bias vector.
+      kernel_regularizer: Regularizer function applied to
+        the `kernel` weights matrix.
+      bias_regularizer: Regularizer function applied to the bias vector.
+      activity_regularizer: Regularizer function applied to
+        the output of the layer (its "activation").
+      kernel_constraint: Constraint function applied to
+        the `kernel` weights matrix.
+      bias_constraint: Constraint function applied to the bias vector.
 
     Input shape:
-    N-D tensor with shape: `(batch_size, ..., input_dim)`.
-    The most common situation would be
-    a 2D input with shape `(batch_size, input_dim)`.
+      N-D tensor with shape: `(batch_size, ..., input_dim)`.
+      The most common situation would be
+      a 2D input with shape `(batch_size, input_dim)`.
 
     Output shape:
-    N-D tensor with shape: `(batch_size, ..., units)`.
-    For instance, for a 2D input with shape `(batch_size, input_dim)`,
-    the output would have shape `(batch_size, units)`.
+      N-D tensor with shape: `(batch_size, ..., units)`.
+      For instance, for a 2D input with shape `(batch_size, input_dim)`,
+      the output would have shape `(batch_size, units)`.
     """
 
     @typechecked
@@ -101,21 +100,20 @@ def __init__(
         bias_constraint: types.Constraint = None,
         **kwargs
     ):
-        super(NoisyDense, self).__init__(
-            activity_regularizer=activity_regularizer, **kwargs
+        super().__init__(
+            units=units,
+            activation=activation,
+            use_bias=use_bias,
+            kernel_regularizer=kernel_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            kernel_constraint=kernel_constraint,
+            bias_constraint=bias_constraint,
+            **kwargs,
         )
-
-        self.units = units
+        delattr(self, "kernel_initializer")
+        delattr(self, "bias_initializer")
         self.sigma = sigma
-        self.activation = activations.get(activation)
-        self.use_bias = use_bias
-        self.kernel_regularizer = regularizers.get(kernel_regularizer)
-        self.bias_regularizer = regularizers.get(bias_regularizer)
-        self.kernel_constraint = constraints.get(kernel_constraint)
-        self.bias_constraint = constraints.get(bias_constraint)
-
-        self.input_spec = InputSpec(min_ndim=2)
-        self.supports_masking = True
 
     def build(self, input_shape):
         # Make sure dtype is correct
@@ -189,10 +187,20 @@ def build(self, input_shape):
         else:
             self.sigma_bias = None
             self.mu_bias = None
+        self._reset_noise()
         self.built = True
 
+    @property
+    def kernel(self):
+        return self.mu_kernel + (self.sigma_kernel * self.eps_kernel)
+
+    @property
+    def bias(self):
+        if self.use_bias:
+            return self.mu_bias + (self.sigma_bias * self.eps_bias)
+
     # Create the factorised Gaussian noise
-    def reset_noise(self):
+    def _reset_noise(self):
         dtype = self._compute_dtype_object
 
         # Generate random noise
@@ -208,74 +216,27 @@ def reset_noise(self):
         self.eps_kernel = _scale_noise(eps_i) * _scale_noise(eps_j)
         self.eps_bias = _scale_noise(eps_j)
 
-    def remove_noise(self):
+    def _remove_noise(self):
         dtype = self._compute_dtype_object
         self.eps_kernel = tf.zeros([self.last_dim, self.units], dtype=dtype)
-        self.eps_bias = tf.zeros([self.last_dim, self.units], dtype=dtype)
+        self.eps_bias = tf.zeros([self.units], dtype=dtype)
 
     def call(self, inputs, reset_noise=True, remove_noise=False):
-        dtype = self._compute_dtype_object
-        if inputs.dtype.base_dtype != dtype.base_dtype:
-            inputs = tf.cast(inputs, dtype=dtype)
-
         # Generate fixed parameters added as the noise
         if remove_noise:
-            self.remove_noise()
+            self._remove_noise()
         elif reset_noise:
-            self.reset_noise()
-
-        r"""
-        Perform:
-        $$
-        y \stackrel{\text{def}}{=}
-        (\mu^w + \sigma^w \odot \eps^w)x + \mu^b + \sigma^b \odot \eps^b
-        $$
-        to calculate the output
-        """
-        kernel = self.mu_kernel + (self.sigma_kernel * self.eps_kernel)
-
-        if inputs.dtype.base_dtype != dtype.base_dtype:
-            inputs = tf.cast(inputs, dtype=dtype)
+            self._reset_noise()
 
-        rank = inputs.shape.rank
-        if rank == 2 or rank is None:
-            if isinstance(inputs, tf.sparse.SparseTensor):
-                outputs = tf.sparse.sparse_dense_matmul(inputs, kernel)
-            else:
-                outputs = tf.linalg.matmul(inputs, kernel)
-        # Broadcast kernel to inputs.
-        else:
-            outputs = tf.tensordot(inputs, kernel, [[rank - 1], [0]])
-            # Reshape the output back to the original ndim of the input.
-            if not tf.executing_eagerly():
-                shape = inputs.shape.as_list()
-                output_shape = shape[:-1] + [kernel.shape[-1]]
-                outputs.set_shape(output_shape)
-
-        if self.use_bias:
-            noisy_bias = self.mu_bias + (self.sigma_bias * self.eps_bias)
-            outputs = tf.nn.bias_add(outputs, noisy_bias)
-
-        if self.activation is not None:
-            outputs = self.activation(outputs)
-
-        return outputs
-
-    def compute_output_shape(self, input_shape):
-        input_shape = tf.TensorShape(input_shape)
-        input_shape = input_shape.with_rank_at_least(2)
-        if tf.compat.dimension_value(input_shape[-1]) is None:
-            raise ValueError(
-                "The innermost dimension of input_shape must be defined"
-                ", but saw: %s" % input_shape
-            )
-        return input_shape[:-1].concatenate(self.units)
+        # TODO(WindQAQ): Replace this with `dense()` once public.
+        return super().call(inputs)
 
     def get_config(self):
-        config = super(NoisyDense, self).get_config()
+        config = super(tf.keras.layers.Dense, self).get_config()
         config.update(
             {
                 "units": self.units,
+                "sigma": self.sigma,
                 "activation": activations.serialize(self.activation),
                 "use_bias": self.use_bias,
                 "kernel_regularizer": regularizers.serialize(self.kernel_regularizer),
diff --git a/tensorflow_addons/layers/tests/noisy_dense_test.py b/tensorflow_addons/layers/tests/noisy_dense_test.py
index 95b851b386..f716046924 100644
--- a/tensorflow_addons/layers/tests/noisy_dense_test.py
+++ b/tensorflow_addons/layers/tests/noisy_dense_test.py
@@ -115,30 +115,6 @@ def test_noisy_dense_automatic_reset_noise():
     )
 
 
-@pytest.mark.usefixtures("maybe_run_functions_eagerly")
-def test_noisy_dense_manual_reset_noise():
-    inputs = tf.convert_to_tensor(np.random.randint(low=0, high=7, size=(2, 2)))
-    layer = NoisyDense(5, name="noise_dense_manual_reset_noise")
-    layer(inputs)
-    initial_eps_kernel = layer.eps_kernel
-    initial_eps_bias = layer.eps_bias
-    layer.reset_noise()
-    new_eps_kernel = layer.eps_kernel
-    new_eps_bias = layer.eps_bias
-    np.testing.assert_raises(
-        AssertionError,
-        np.testing.assert_array_equal,
-        initial_eps_kernel,
-        new_eps_kernel,
-    )
-    np.testing.assert_raises(
-        AssertionError,
-        np.testing.assert_array_equal,
-        initial_eps_bias,
-        new_eps_bias,
-    )
-
-
 @pytest.mark.usefixtures("maybe_run_functions_eagerly")
 def test_noisy_dense_remove_noise():
     inputs = tf.convert_to_tensor(np.random.randint(low=0, high=7, size=(2, 2)))
@@ -146,10 +122,11 @@ def test_noisy_dense_remove_noise():
     layer(inputs)
     initial_eps_kernel = layer.eps_kernel
     initial_eps_bias = layer.eps_bias
-    layer.remove_noise()
+    layer(inputs, reset_noise=False, remove_noise=True)
     new_eps_kernel = layer.eps_kernel
     new_eps_bias = layer.eps_bias
-    zeros = tf.zeros(initial_eps_kernel.shape, dtype=initial_eps_kernel.dtype)
+    kernel_zeros = tf.zeros(initial_eps_kernel.shape, dtype=initial_eps_kernel.dtype)
+    bias_zeros = tf.zeros(initial_eps_bias.shape, dtype=initial_eps_kernel.dtype)
     np.testing.assert_raises(
         AssertionError,
         np.testing.assert_array_equal,
@@ -162,5 +139,5 @@ def test_noisy_dense_remove_noise():
         initial_eps_bias,
         new_eps_bias,
     )
-    np.testing.assert_array_equal(zeros, new_eps_kernel)
-    np.testing.assert_array_equal(zeros, new_eps_bias)
+    np.testing.assert_array_equal(kernel_zeros, new_eps_kernel)
+    np.testing.assert_array_equal(bias_zeros, new_eps_bias)

From 5730a9274763b90e765ffa8f5dcdb07af7684243 Mon Sep 17 00:00:00 2001
From: schaall <52867365+schaall@users.noreply.github.com>
Date: Sat, 12 Sep 2020 01:18:54 -0700
Subject: [PATCH 40/44] Added missing comment

---
 tensorflow_addons/layers/noisy_dense.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index fb69cf2d2c..529c5082d6 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -232,6 +232,7 @@ def call(self, inputs, reset_noise=True, remove_noise=False):
         return super().call(inputs)
 
     def get_config(self):
+        # TODO(WindQAQ): Get rid of this hacky way.
         config = super(tf.keras.layers.Dense, self).get_config()
         config.update(
             {

From 999c8b7b56aeaa3cb2a1cf1f33d38215a35564b9 Mon Sep 17 00:00:00 2001
From: schaall <52867365+schaall@users.noreply.github.com>
Date: Sat, 12 Sep 2020 18:22:05 -0700
Subject: [PATCH 41/44] Documentation and test improvement

---
 tensorflow_addons/layers/noisy_dense.py       | 22 ++++++++++++++-----
 .../layers/tests/noisy_dense_test.py          | 16 ++++++--------
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 529c5082d6..90210f8553 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -33,16 +33,22 @@ def _scale_noise(x):
 
 @tf.keras.utils.register_keras_serializable(package="Addons")
 class NoisyDense(tf.keras.layers.Dense):
-    r"""Noisy dense layer that inject random noise to
-    the weights of normal dense layer.
+    r"""Noisy dense layer that injects random noise to the weights of dense layer.
+
+    Noisy dense layers are fully connected layers whose weights and biases are
+    augmented by factorised Gaussian noise. The factorised Gaussian noise is
+    controlled through gradient descent by a second weights layer.
 
     A `NoisyDense` layer implements the operation:
     $$
     \mathrm{NoisyDense}(x) =
-    \mathrm{activation}(\mathrm{dot}(x, \mu + (\sigma \cdot \eps))
+    \mathrm{activation}(\mathrm{dot}(x, \mu + (\sigma \cdot \epsilon))
     + \mathrm{bias})
     $$
-    with bias only being added if `use_bias` is `True`.
+    Where $\mu$ is the standard weights layer, $\epsilon$ is the factorised
+    Gaussian noise, and $\sigma$ is a second weights layer which controls
+    $\epsilon$.
+    Note: bias only added if `use_bias` is `True`.
 
     Example:
     >>> # Create a `Sequential` model and add a NoisyDense
@@ -84,6 +90,9 @@ class NoisyDense(tf.keras.layers.Dense):
       N-D tensor with shape: `(batch_size, ..., units)`.
       For instance, for a 2D input with shape `(batch_size, input_dim)`,
       the output would have shape `(batch_size, units)`.
+
+    References:
+      - [Noisy Networks for Explanation](https://arxiv.org/pdf/1706.10295.pdf)
     """
 
     @typechecked
@@ -199,8 +208,9 @@ def bias(self):
         if self.use_bias:
             return self.mu_bias + (self.sigma_bias * self.eps_bias)
 
-    # Create the factorised Gaussian noise
     def _reset_noise(self):
+        """Create the factorised Gaussian noise."""
+
         dtype = self._compute_dtype_object
 
         # Generate random noise
@@ -217,6 +227,8 @@ def _reset_noise(self):
         self.eps_bias = _scale_noise(eps_j)
 
     def _remove_noise(self):
+        """Remove the factorised Gaussian noise."""
+
         dtype = self._compute_dtype_object
         self.eps_kernel = tf.zeros([self.last_dim, self.units], dtype=dtype)
         self.eps_bias = tf.zeros([self.units], dtype=dtype)
diff --git a/tensorflow_addons/layers/tests/noisy_dense_test.py b/tensorflow_addons/layers/tests/noisy_dense_test.py
index f716046924..9f76307518 100644
--- a/tensorflow_addons/layers/tests/noisy_dense_test.py
+++ b/tensorflow_addons/layers/tests/noisy_dense_test.py
@@ -20,19 +20,17 @@
 
 import tensorflow as tf
 from tensorflow import keras
-from tensorflow_addons.utils import test_utils
-from tensorflow_addons.layers.noisy_dense import NoisyDense
 from tensorflow.keras.mixed_precision.experimental import Policy
 
+from tensorflow_addons.utils import test_utils
+from tensorflow_addons.layers.noisy_dense import NoisyDense
 
-def test_noisy_dense():
-    test_utils.layer_test(NoisyDense, kwargs={"units": 3}, input_shape=(3, 2))
-
-    test_utils.layer_test(NoisyDense, kwargs={"units": 3}, input_shape=(3, 4, 2))
-
-    test_utils.layer_test(NoisyDense, kwargs={"units": 3}, input_shape=(None, None, 2))
 
-    test_utils.layer_test(NoisyDense, kwargs={"units": 3}, input_shape=(3, 4, 5, 2))
+@pytest.mark.parametrize(
+    "input_shape", [(3, 2), (3, 4, 2), (None, None, 2), (3, 4, 5, 2)]
+)
+def test_noisy_dense(input_shape):
+    test_utils.layer_test(NoisyDense, kwargs={"units": 3}, input_shape=input_shape)
 
 
 @pytest.mark.usefixtures("maybe_run_functions_eagerly")

From 5d73c907395df0299dfc360ec9946a16429412a7 Mon Sep 17 00:00:00 2001
From: Leon Shams <52867365+LeonShams@users.noreply.github.com>
Date: Sat, 12 Sep 2020 18:35:20 -0700
Subject: [PATCH 42/44] fix typo

---
 tensorflow_addons/layers/noisy_dense.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 90210f8553..574abf1b6e 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -45,7 +45,7 @@ class NoisyDense(tf.keras.layers.Dense):
     \mathrm{activation}(\mathrm{dot}(x, \mu + (\sigma \cdot \epsilon))
     + \mathrm{bias})
     $$
-    Where $\mu$ is the standard weights layer, $\epsilon$ is the factorised
+    where $\mu$ is the standard weights layer, $\epsilon$ is the factorised
     Gaussian noise, and $\sigma$ is a second weights layer which controls
     $\epsilon$.
     Note: bias only added if `use_bias` is `True`.

From 4cdf577e27a0cea7af3f1a1b88a4c4488dfd3489 Mon Sep 17 00:00:00 2001
From: schaall <52867365+schaall@users.noreply.github.com>
Date: Sat, 12 Sep 2020 19:36:11 -0700
Subject: [PATCH 43/44] minor formatting changes

---
 tensorflow_addons/layers/noisy_dense.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 90210f8553..319f6bc416 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -51,6 +51,7 @@ class NoisyDense(tf.keras.layers.Dense):
     Note: bias only added if `use_bias` is `True`.
 
     Example:
+
     >>> # Create a `Sequential` model and add a NoisyDense
     >>> # layer as the first layer.
     >>> model = tf.keras.models.Sequential()
@@ -143,16 +144,14 @@ def build(self, input_shape):
             )
         self.input_spec = InputSpec(min_ndim=2, axes={-1: self.last_dim})
 
-        self.sigma_init = initializers.Constant(value=self.sigma / sqrt_dim)
-        self.mu_init = initializers.RandomUniform(
-            minval=-1 / sqrt_dim, maxval=1 / sqrt_dim
-        )
+        sigma_init = initializers.Constant(value=self.sigma / sqrt_dim)
+        mu_init = initializers.RandomUniform(minval=-1 / sqrt_dim, maxval=1 / sqrt_dim)
 
         # Learnable parameters
         self.sigma_kernel = self.add_weight(
             "sigma_kernel",
             shape=[self.last_dim, self.units],
-            initializer=self.sigma_init,
+            initializer=sigma_init,
             regularizer=self.kernel_regularizer,
             constraint=self.kernel_constraint,
             dtype=self.dtype,
@@ -162,7 +161,7 @@ def build(self, input_shape):
         self.mu_kernel = self.add_weight(
             "mu_kernel",
             shape=[self.last_dim, self.units],
-            initializer=self.mu_init,
+            initializer=mu_init,
             regularizer=self.kernel_regularizer,
             constraint=self.kernel_constraint,
             dtype=self.dtype,
@@ -175,7 +174,7 @@ def build(self, input_shape):
                 shape=[
                     self.units,
                 ],
-                initializer=self.sigma_init,
+                initializer=sigma_init,
                 regularizer=self.bias_regularizer,
                 constraint=self.bias_constraint,
                 dtype=self.dtype,
@@ -187,7 +186,7 @@ def build(self, input_shape):
                 shape=[
                     self.units,
                 ],
-                initializer=self.mu_init,
+                initializer=mu_init,
                 regularizer=self.bias_regularizer,
                 constraint=self.bias_constraint,
                 dtype=self.dtype,

From 3cd021746ec2a61ff4b1eaa8a9291045d65e6c41 Mon Sep 17 00:00:00 2001
From: schaall <52867365+schaall@users.noreply.github.com>
Date: Sun, 13 Sep 2020 18:40:38 -0700
Subject: [PATCH 44/44] minor formatting fix

---
 tensorflow_addons/layers/noisy_dense.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow_addons/layers/noisy_dense.py b/tensorflow_addons/layers/noisy_dense.py
index 2ec0433dca..647b28db7d 100644
--- a/tensorflow_addons/layers/noisy_dense.py
+++ b/tensorflow_addons/layers/noisy_dense.py
@@ -48,6 +48,7 @@ class NoisyDense(tf.keras.layers.Dense):
     where $\mu$ is the standard weights layer, $\epsilon$ is the factorised
     Gaussian noise, and $\sigma$ is a second weights layer which controls
     $\epsilon$.
+
     Note: bias only added if `use_bias` is `True`.
 
     Example: