From fdc77ab73878cad49f4f2bca16f235a052262576 Mon Sep 17 00:00:00 2001
From: AakashKumarNain <aakashnain@outlook.com>
Date: Fri, 13 Dec 2019 17:30:47 +0530
Subject: [PATCH 1/8] add filter response normalization

---
 tensorflow_addons/layers/__init__.py       |   3 +-
 tensorflow_addons/layers/normalizations.py | 222 +++++++++++++++++++++
 2 files changed, 224 insertions(+), 1 deletion(-)

diff --git a/tensorflow_addons/layers/__init__.py b/tensorflow_addons/layers/__init__.py
index d527e16362..7c488c5ea9 100644
--- a/tensorflow_addons/layers/__init__.py
+++ b/tensorflow_addons/layers/__init__.py
@@ -22,7 +22,8 @@
 from tensorflow_addons.layers.maxout import Maxout
 from tensorflow_addons.layers.normalizations import GroupNormalization
 from tensorflow_addons.layers.normalizations import InstanceNormalization
+from tensorflow_addons.layers.normalizations import FilterResponseNormalization
 from tensorflow_addons.layers.optical_flow import CorrelationCost
 from tensorflow_addons.layers.poincare import PoincareNormalize
 from tensorflow_addons.layers.sparsemax import Sparsemax
-from tensorflow_addons.layers.wrappers import WeightNormalization
\ No newline at end of file
+from tensorflow_addons.layers.wrappers import WeightNormalization
diff --git a/tensorflow_addons/layers/normalizations.py b/tensorflow_addons/layers/normalizations.py
index 7358101b10..a1aaaa2caf 100644
--- a/tensorflow_addons/layers/normalizations.py
+++ b/tensorflow_addons/layers/normalizations.py
@@ -316,3 +316,225 @@ def __init__(self, **kwargs):
 
         kwargs["groups"] = -1
         super(InstanceNormalization, self).__init__(**kwargs)
+
+
+@tf.keras.utils.register_keras_serializable(package='Addons')
+class FilterResponseNormalization(tf.keras.layers.Layer):
+    """Filter response normalization layer.
+
+    Filter Response Normalization (FRN), a normalization
+    method that enables models trained with per-channel
+    normalization to achieve high accuracy. It performs better than
+    all other normalization techniques for small batches and is par
+    with Batch Normalization for big batch sizes.
+
+    The filter response normalization layer consists of two components:
+        1. FRN: Filter response normalization component
+        2. TLU: Thresholded Linear Unit (TLU) an activation function to
+        use with FRN resulting in a further improvement in accuracy
+        outperforming Batch Normalization even at large batch sizes
+        without any batch dependency.
+
+    Because FRN layer comes with own activation unit, hence no activation
+    function should be used in the previous layer and the next intermediate
+    layer to FRN layer.
+
+    Arguments
+        axis: List of axes that should be normalized.
+        epsilon: Small float added to variance to avoid dividing by zero.
+        beta_initializer: Initializer for the beta weight.
+        gamma_initializer: Initializer for the gamma weight.
+        tau_initializer: Initializer for the tau weight.
+        eps_var_initializer: Initializer for the extra trainable parameter
+        added to epsilon.
+        beta_regularizer: Optional regularizer for the beta weight.
+        gamma_regularizer: Optional regularizer for the gamma weight.
+        tau_regularizer: Optional regularizer for the tau weight.
+        beta_constraint: Optional constraint for the beta weight.
+        gamma_constraint: Optional constraint for the gamma weight.
+        tau_constraint: Optional constraint for the tau weight.
+        name: Optional name for the layer
+
+    Input shape
+        Arbitrary. Use the keyword argument `input_shape`
+        (tuple of integers, does not include the samples axis)
+        when using this layer as the first layer in a model.
+
+    Output shape
+        Same shape as input.
+
+    References
+        - [Filter Response Normalization Layer: Eliminating Batch Dependence
+        in the training of Deep Neural Networks]
+        (https://arxiv.org/abs/1911.09737)
+    """
+
+    def __init__(self,
+                 epsilon=1e-6,
+                 axis=[1, 2],
+                 beta_initializer='zeros',
+                 gamma_initializer='ones',
+                 tau_initializer='zeros',
+                 eps_var_initializer='zeros',
+                 beta_regularizer=None,
+                 gamma_regularizer=None,
+                 tau_regularizer=None,
+                 beta_constraint=None,
+                 gamma_constraint=None,
+                 tau_constraint=None,
+                 name=None,
+                 **kwargs):
+        super(FilterResponseNormalization, self).__init__(name=name, **kwargs)
+        self.epsilon = epsilon
+        self.beta_initializer = tf.keras.initializers.get(beta_initializer)
+        self.gamma_initializer = tf.keras.initializers.get(gamma_initializer)
+        self.tau_initializer = tf.keras.initializers.get(tau_initializer)
+        self.beta_regularizer = tf.keras.regularizers.get(beta_regularizer)
+        self.gamma_regularizer = tf.keras.regularizers.get(gamma_regularizer)
+        self.tau_regularizer = tf.keras.regularizers.get(tau_regularizer)
+        self.beta_constraint = tf.keras.constraints.get(beta_constraint)
+        self.gamma_constraint = tf.keras.constraints.get(gamma_constraint)
+        self.tau_constraint = tf.keras.constraints.get(tau_constraint)
+        self.eps_var_initializer = tf.keras.initializers.get(
+            eps_var_initializer)
+        self.supports_masking = True
+        if isinstance(axis, list):
+            self.axis = axis[:]
+        else:
+            raise TypeError('axis must be list, type given: %s' % type(axis))
+        self._check_axis()
+
+    def build(self, input_shape):
+        self._check_if_input_shape_is_none(input_shape)
+        self._create_input_spec(input_shape)
+        self._add_gamma_weight(input_shape)
+        self._add_beta_weight(input_shape)
+        self._add_tau_weight(input_shape)
+        self._add_eps_var_weight()
+        self.built = True
+        super(FilterResponseNormalization, self).build(input_shape)
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
+
+    def get_config(self):
+        config = {
+            'axis':
+            self.axis,
+            'epsilon':
+            self.epsilon,
+            'beta_initializer':
+            tf.keras.initializers.serialize(self.beta_initializer),
+            'gamma_initializer':
+            tf.keras.initializers.serialize(self.gamma_initializer),
+            'tau_initializer':
+            tf.keras.initializers.serialize(self.tau_initializer),
+            'eps_var_initializer':
+            tf.keras.initializers.serialize(self.eps_var_initializer),
+            'beta_regularizer':
+            tf.keras.regularizers.serialize(self.beta_regularizer),
+            'gamma_regularizer':
+            tf.keras.regularizers.serialize(self.gamma_regularizer),
+            'tau_regularizer':
+            tf.keras.regularizers.serialize(self.tau_regularizer),
+            'beta_constraint':
+            tf.keras.constraints.serialize(self.beta_constraint),
+            'gamma_constraint':
+            tf.keras.constraints.serialize(self.gamma_constraint),
+            'tau_constraint':
+            tf.keras.constraints.serialize(self.tau_constraint)
+        }
+        base_config = super(FilterResponseNormalization, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def call(self, inputs):
+        # Compute the mean norm of activations per channel.
+        nu2 = tf.reduce_mean(tf.square(inputs), axis=self.axis, keepdims=True)
+        # Perform FRN.
+        inputs = inputs * tf.math.rsqrt(nu2 + tf.math.abs(self.epsilon))
+        # Return after applying the Offset-ReLU non-linearity.
+        return tf.maximum(self.gamma * inputs + self.beta,
+                          self.tau + self.eps_var)
+
+    def _create_input_spec(self, input_shape):
+        ndims = len(input_shape.shape)
+        for idx, x in enumerate(self.axis):
+            if x < 0:
+                self.axis[idx] = ndims + x
+
+        # Validate axes
+        for x in self.axis:
+            if x < 0 or x >= ndims:
+                raise ValueError('Invalid axis: %d' % x)
+        if len(self.axis) != len(set(self.axis)):
+            raise ValueError('Duplicate axis: %s' % self.axis)
+        axis_to_dim = {x: input_shape[x] for x in self.axis}
+        self.input_spec = tf.keras.layers.InputSpec(
+            ndim=ndims, axes=axis_to_dim)
+
+    def _check_axis(self):
+        if self.axis == 0:
+            raise ValueError(
+                "You are trying to normalize your batch axis. Do you want to "
+                "use tf.layer.batch_normalization instead")
+        if self.axis == -1:
+            raise ValueError(
+                "You are trying to normalize your channel axis. You may want"
+                "to use GroupNormalization layer instead from tf_addons")
+        if self.axis != [1, 2]:
+            raise ValueError(
+                "FilterResponseNormalization operates on per-channel basis."
+                "Axis value should represent the spatial dimensions")
+
+    def _check_if_input_shape_is_none(self, input_shape):
+        dim1, dim2 = input_shape[self.axis[0]], input_shape[self.axis[1]]
+        if dim1 is None or dim2 is None:
+            raise ValueError('Axis ' + str(self.axis) + ' of '
+                             'input tensor should have a defined dimension '
+                             'but the layer received an input with shape ' +
+                             str(input_shape) + '.')
+
+    def _add_gamma_weight(self, input_shape):
+        # Get the channel dimension
+        dim = input_shape.shape[-1]
+        shape = [1, 1, 1, dim]
+        # Initialize gamma with shape (1, 1, 1, C)
+        self.gamma = self.add_weight(
+            shape=shape,
+            name='gamma',
+            initializer=self.gamma_initializer,
+            regularizer=self.gamma_regularizer,
+            constraint=self.gamma_constraint)
+
+    def _add_beta_weight(self, input_shape):
+        # Get the channel dimension
+        dim = input_shape.shape[-1]
+        shape = [1, 1, 1, dim]
+        # Initialize beta with shape (1, 1, 1, C)
+        self.beta = self.add_weight(
+            shape=shape,
+            name='beta',
+            initializer=self.beta_initializer,
+            regularizer=self.beta_regularizer,
+            constraint=self.beta_constraint)
+
+    def _add_tau_weight(self, input_shape):
+        # Get the channel dimension
+        dim = input_shape.shape[-1]
+        shape = [1, 1, 1, dim]
+        # Initialize tau with shape (1, 1, 1, C)
+        self.tau = self.add_weight(
+            shape=shape,
+            name='tau',
+            initializer=self.tau_initializer,
+            regularizer=self.tau_regularizer,
+            constraint=self.tau_constraint)
+
+    def _add_eps_var_weight(self):
+        shape = (1,)
+        self.eps_var = self.add_weight(
+            shape=shape,
+            name='eps_var',
+            initializer=self.eps_var_initializer,
+            regularizer=None,
+            constraint=tf.keras.constraints.non_neg)

From 3030d95cd00174e34dad8b2ce69c41a02edf4862 Mon Sep 17 00:00:00 2001
From: AakashKumarNain <aakashnain@outlook.com>
Date: Tue, 10 Mar 2020 14:27:32 +0530
Subject: [PATCH 2/8] update FRN layer, tests still failing

---
 tensorflow_addons/layers/normalizations.py    | 137 ++++++++----------
 .../layers/normalizations_test.py             |  62 ++++++++
 2 files changed, 121 insertions(+), 78 deletions(-)

diff --git a/tensorflow_addons/layers/normalizations.py b/tensorflow_addons/layers/normalizations.py
index 79b390c602..37d1834a81 100644
--- a/tensorflow_addons/layers/normalizations.py
+++ b/tensorflow_addons/layers/normalizations.py
@@ -331,39 +331,27 @@ class FilterResponseNormalization(tf.keras.layers.Layer):
     method that enables models trained with per-channel
     normalization to achieve high accuracy. It performs better than
     all other normalization techniques for small batches and is par
-    with Batch Normalization for big batch sizes.
-
-    The filter response normalization layer consists of two components:
-        1. FRN: Filter response normalization component
-        2. TLU: Thresholded Linear Unit (TLU) an activation function to
-        use with FRN resulting in a further improvement in accuracy
-        outperforming Batch Normalization even at large batch sizes
-        without any batch dependency.
-
-    Because FRN layer comes with own activation unit, hence no activation
-    function should be used in the previous layer and the next intermediate
-    layer to FRN layer.
+    with Batch Normalization for bigger batch sizes.
 
     Arguments
-        axis: List of axes that should be normalized.
-        epsilon: Small float added to variance to avoid dividing by zero.
+        axis: List of axes that should be normalized. This should represent the
+        spatial dimensions.
+        epsilon: Small float value added to variance to avoid dividing by zero.
         beta_initializer: Initializer for the beta weight.
         gamma_initializer: Initializer for the gamma weight.
-        tau_initializer: Initializer for the tau weight.
-        eps_var_initializer: Initializer for the extra trainable parameter
-        added to epsilon.
         beta_regularizer: Optional regularizer for the beta weight.
         gamma_regularizer: Optional regularizer for the gamma weight.
-        tau_regularizer: Optional regularizer for the tau weight.
         beta_constraint: Optional constraint for the beta weight.
         gamma_constraint: Optional constraint for the gamma weight.
-        tau_constraint: Optional constraint for the tau weight.
+        learned_epsilon: (bool) Whether to add another learnable
+        epsilon parameter or not.
         name: Optional name for the layer
 
     Input shape
         Arbitrary. Use the keyword argument `input_shape`
         (tuple of integers, does not include the samples axis)
-        when using this layer as the first layer in a model.
+        when using this layer as the first layer in a model. This layer
+        wokrs on a 4-D tensor where the tensor should have the shape [N X H X W X C]
 
     Output shape
         Same shape as input.
@@ -376,38 +364,40 @@ class FilterResponseNormalization(tf.keras.layers.Layer):
 
     def __init__(
         self,
-        epsilon=1e-6,
-        axis=[1, 2],
-        beta_initializer="zeros",
-        gamma_initializer="ones",
-        tau_initializer="zeros",
-        eps_var_initializer="zeros",
-        beta_regularizer=None,
-        gamma_regularizer=None,
-        tau_regularizer=None,
-        beta_constraint=None,
-        gamma_constraint=None,
-        tau_constraint=None,
-        name=None,
+        epsilon: float = 1e-6,
+        axis: list = [1, 2],
+        beta_initializer: types.Initializer = "zeros",
+        gamma_initializer: types.Initializer = "ones",
+        beta_regularizer: types.Regularizer = None,
+        gamma_regularizer: types.Regularizer = None,
+        beta_constraint: types.Constraint = None,
+        gamma_constraint: types.Constraint = None,
+        learned_epsilon: bool = False,
+        name: str = None,
         **kwargs
     ):
-        super(FilterResponseNormalization, self).__init__(name=name, **kwargs)
+        super().__init__(name=name, **kwargs)
         self.epsilon = epsilon
         self.beta_initializer = tf.keras.initializers.get(beta_initializer)
         self.gamma_initializer = tf.keras.initializers.get(gamma_initializer)
-        self.tau_initializer = tf.keras.initializers.get(tau_initializer)
         self.beta_regularizer = tf.keras.regularizers.get(beta_regularizer)
         self.gamma_regularizer = tf.keras.regularizers.get(gamma_regularizer)
-        self.tau_regularizer = tf.keras.regularizers.get(tau_regularizer)
         self.beta_constraint = tf.keras.constraints.get(beta_constraint)
         self.gamma_constraint = tf.keras.constraints.get(gamma_constraint)
-        self.tau_constraint = tf.keras.constraints.get(tau_constraint)
-        self.eps_var_initializer = tf.keras.initializers.get(eps_var_initializer)
+        self.use_eps_learned = learned_epsilon
         self.supports_masking = True
+
+        if self.use_eps_learned:
+            self.eps_learned_initializer = tf.keras.initializers.Constant(1e-4)
+        else:
+            self.eps_learned_initializer = None
+
         if isinstance(axis, list):
             self.axis = axis[:]
+        elif isinstance(axis, int):
+            self.axis = axis
         else:
-            raise TypeError("axis must be list, type given: %s" % type(axis))
+            raise TypeError("axis must be int or list, type given: %s" % type(axis))
         self._check_axis()
 
     def build(self, input_shape):
@@ -415,10 +405,11 @@ def build(self, input_shape):
         self._create_input_spec(input_shape)
         self._add_gamma_weight(input_shape)
         self._add_beta_weight(input_shape)
-        self._add_tau_weight(input_shape)
-        self._add_eps_var_weight()
+
+        if self.use_eps_learned:
+            self._add_eps_learned_weight()
         self.built = True
-        super(FilterResponseNormalization, self).build(input_shape)
+        super().build(input_shape)
 
     def compute_output_shape(self, input_shape):
         return input_shape
@@ -427,36 +418,34 @@ def get_config(self):
         config = {
             "axis": self.axis,
             "epsilon": self.epsilon,
+            "learned_epsilon": self.use_eps_learned,
             "beta_initializer": tf.keras.initializers.serialize(self.beta_initializer),
             "gamma_initializer": tf.keras.initializers.serialize(
                 self.gamma_initializer
             ),
-            "tau_initializer": tf.keras.initializers.serialize(self.tau_initializer),
-            "eps_var_initializer": tf.keras.initializers.serialize(
-                self.eps_var_initializer
+            "eps_learned_initializer": tf.keras.initializers.serialize(
+                self.eps_learned_initializer
             ),
             "beta_regularizer": tf.keras.regularizers.serialize(self.beta_regularizer),
             "gamma_regularizer": tf.keras.regularizers.serialize(
                 self.gamma_regularizer
             ),
-            "tau_regularizer": tf.keras.regularizers.serialize(self.tau_regularizer),
             "beta_constraint": tf.keras.constraints.serialize(self.beta_constraint),
             "gamma_constraint": tf.keras.constraints.serialize(self.gamma_constraint),
-            "tau_constraint": tf.keras.constraints.serialize(self.tau_constraint),
         }
-        base_config = super(FilterResponseNormalization, self).get_config()
-        return dict(list(base_config.items()) + list(config.items()))
+        base_config = super().get_config()
+        return dict(**base_config, **config)
 
     def call(self, inputs):
-        # Compute the mean norm of activations per channel.
+        epsilon = tf.math.abs(self.epsilon)
+        if self.use_eps_learned:
+            epsilon += self.eps_learned
         nu2 = tf.reduce_mean(tf.square(inputs), axis=self.axis, keepdims=True)
-        # Perform FRN.
-        inputs = inputs * tf.math.rsqrt(nu2 + tf.math.abs(self.epsilon))
-        # Return after applying the Offset-ReLU non-linearity.
-        return tf.maximum(self.gamma * inputs + self.beta, self.tau + self.eps_var)
+        inputs *= tf.math.rsqrt(nu2 + epsilon)
+        return self.gamma * inputs + self.beta
 
     def _create_input_spec(self, input_shape):
-        ndims = len(input_shape.shape)
+        ndims = len(tf.TensorShape(input_shape))
         for idx, x in enumerate(self.axis):
             if x < 0:
                 self.axis[idx] = ndims + x
@@ -465,26 +454,28 @@ def _create_input_spec(self, input_shape):
         for x in self.axis:
             if x < 0 or x >= ndims:
                 raise ValueError("Invalid axis: %d" % x)
+
         if len(self.axis) != len(set(self.axis)):
             raise ValueError("Duplicate axis: %s" % self.axis)
+
         axis_to_dim = {x: input_shape[x] for x in self.axis}
         self.input_spec = tf.keras.layers.InputSpec(ndim=ndims, axes=axis_to_dim)
 
     def _check_axis(self):
         if self.axis == 0:
             raise ValueError(
-                "You are trying to normalize your batch axis. Do you want to "
+                "You are trying to normalize your batch axis. You may want to "
                 "use tf.layer.batch_normalization instead"
             )
         if self.axis == -1:
             raise ValueError(
-                "You are trying to normalize your channel axis. You may want"
-                "to use GroupNormalization layer instead from tf_addons"
+                "You are trying to normalize your channel axis. You may want to "
+                "use GroupNormalization layer instead from tf_addons"
             )
         if self.axis != [1, 2]:
             raise ValueError(
                 "FilterResponseNormalization operates on per-channel basis."
-                "Axis value should represent the spatial dimensions"
+                "Axis values should be spatial dimensions"
             )
 
     def _check_if_input_shape_is_none(self, input_shape):
@@ -498,12 +489,13 @@ def _check_if_input_shape_is_none(self, input_shape):
 
     def _add_gamma_weight(self, input_shape):
         # Get the channel dimension
-        dim = input_shape.shape[-1]
+        dim = input_shape[-1]
         shape = [1, 1, 1, dim]
         # Initialize gamma with shape (1, 1, 1, C)
         self.gamma = self.add_weight(
             shape=shape,
             name="gamma",
+            dtype=self.dtype,
             initializer=self.gamma_initializer,
             regularizer=self.gamma_regularizer,
             constraint=self.gamma_constraint,
@@ -511,36 +503,25 @@ def _add_gamma_weight(self, input_shape):
 
     def _add_beta_weight(self, input_shape):
         # Get the channel dimension
-        dim = input_shape.shape[-1]
+        dim = input_shape[-1]
         shape = [1, 1, 1, dim]
         # Initialize beta with shape (1, 1, 1, C)
         self.beta = self.add_weight(
             shape=shape,
             name="beta",
+            dtype=self.dtype,
             initializer=self.beta_initializer,
             regularizer=self.beta_regularizer,
             constraint=self.beta_constraint,
         )
 
-    def _add_tau_weight(self, input_shape):
-        # Get the channel dimension
-        dim = input_shape.shape[-1]
-        shape = [1, 1, 1, dim]
-        # Initialize tau with shape (1, 1, 1, C)
-        self.tau = self.add_weight(
-            shape=shape,
-            name="tau",
-            initializer=self.tau_initializer,
-            regularizer=self.tau_regularizer,
-            constraint=self.tau_constraint,
-        )
-
-    def _add_eps_var_weight(self):
+    def _add_eps_learned_weight(self):
         shape = (1,)
-        self.eps_var = self.add_weight(
+        self.eps_learned = self.add_weight(
             shape=shape,
-            name="eps_var",
-            initializer=self.eps_var_initializer,
+            name="learned_epsilon",
+            dtype=self.dtype,
+            initializer=tf.keras.initializers.get(self.eps_learned_initializer),
             regularizer=None,
             constraint=tf.keras.constraints.non_neg,
         )
diff --git a/tensorflow_addons/layers/normalizations_test.py b/tensorflow_addons/layers/normalizations_test.py
index 61b21e8b40..d072b34680 100644
--- a/tensorflow_addons/layers/normalizations_test.py
+++ b/tensorflow_addons/layers/normalizations_test.py
@@ -21,6 +21,7 @@
 
 from tensorflow_addons.layers.normalizations import GroupNormalization
 from tensorflow_addons.layers.normalizations import InstanceNormalization
+from tensorflow_addons.layers.normalizations import FilterResponseNormalization
 from tensorflow_addons.utils import test_utils
 
 
@@ -331,5 +332,66 @@ def test_groupnorm_convnet_no_center_no_scale(self):
         )
 
 
+@test_utils.run_all_in_graph_and_eager_modes
+class FilterResponseNormalizationTest(tf.test.TestCase):
+    def calculate_frn(self, x, beta=0.2, gamma=1, eps=1e-6, learned_epsilon=False):
+        if learned_epsilon:
+            eps = eps + 1e-4
+        nu2 = tf.reduce_mean(tf.square(x), axis=[1, 2], keepdims=True)
+        x = x * tf.math.rsqrt(nu2 + tf.abs(eps))
+        return gamma * x + beta
+
+    def test_random_inputs(self):
+        inputs = np.random.rand(28, 28, 1).astype(np.float32)
+        frn = FilterResponseNormalization(
+            beta_initializer="zeros", gamma_initializer="ones"
+        )
+        frn.build((28, 28, 1))
+        observed = frn(inputs)
+        expected = self.calculate_frn(inputs, beta=0, gamma=1)
+        self.assertAllClose(expected, observed)
+
+        frn = FilterResponseNormalization(
+            beta_initializer="ones", gamma_initializer="ones"
+        )
+        frn.build((28, 28, 1))
+        observed = frn(inputs)
+        expected = self.calculate_frn(inputs, beta=1, gamma=1)
+        self.assertAllClose(expected, observed)
+
+        frn = FilterResponseNormalization(
+            beta_initializer=tf.keras.initializers.Constant(0.5),
+            gamma_initializer="ones",
+        )
+        frn.build((28, 28, 1))
+        observed = frn(inputs)
+        expected = self.calculate_frn(inputs, beta=0.5, gamma=1)
+        self.assertAllClose(expected, observed)
+
+        frn = FilterResponseNormalization(
+            beta_initializer=tf.keras.initializers.Constant(0.5),
+            gamma_initializer="ones",
+            learned_epsilon=True,
+        )
+        frn.build((28, 28, 1))
+        observed = frn(inputs)
+        expected = self.calculate_frn(inputs, beta=0.5, gamma=1, learned_epsilon=True)
+        self.assertAllClose(expected, observed)
+
+    def test_keras_model(self):
+        frn = FilterResponseNormalization(
+            beta_initializer="ones", gamma_initializer="ones"
+        )
+        random_inputs = np.random.rand(10, 32, 32, 3).astype(np.float32)
+        random_labels = np.random.randint(2, size=(10,)).astype(np.float32)
+        input_layer = tf.keras.layers.Input(shape=(32, 32, 3))
+        x = frn(input_layer)
+        x = tf.keras.layers.Flatten()(x)
+        out = tf.keras.layers.Dense(1, activation="sigmoid")
+        model = tf.keras.models.Model(input_layer, out)
+        model.compile(loss="binary_crossentropy", optimizer="sgd")
+        model.fit(random_inputs, random_labels, epochs=2)
+
+
 if __name__ == "__main__":
     sys.exit(pytest.main([__file__]))

From d278cfa0052e079b4b650fcd19d12bd82756234e Mon Sep 17 00:00:00 2001
From: AakashKumarNain <aakashnain@outlook.com>
Date: Tue, 10 Mar 2020 21:41:07 +0530
Subject: [PATCH 3/8] update test cases and set seed

---
 .../layers/normalizations_test.py             | 35 +++++++++++--------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/tensorflow_addons/layers/normalizations_test.py b/tensorflow_addons/layers/normalizations_test.py
index d072b34680..ae66885da2 100644
--- a/tensorflow_addons/layers/normalizations_test.py
+++ b/tensorflow_addons/layers/normalizations_test.py
@@ -341,44 +341,51 @@ def calculate_frn(self, x, beta=0.2, gamma=1, eps=1e-6, learned_epsilon=False):
         x = x * tf.math.rsqrt(nu2 + tf.abs(eps))
         return gamma * x + beta
 
-    def test_random_inputs(self):
+    def set_random_seed(self):
+        seed = 0x2020
+        np.random.seed(seed)
+        tf.random.set_seed(seed)
+
+    def test_with_gamma(self):
+        self.set_random_seed()
         inputs = np.random.rand(28, 28, 1).astype(np.float32)
         frn = FilterResponseNormalization(
             beta_initializer="zeros", gamma_initializer="ones"
         )
         frn.build((28, 28, 1))
         observed = frn(inputs)
+        self.evaluate(tf.compat.v1.global_variables_initializer())
         expected = self.calculate_frn(inputs, beta=0, gamma=1)
-        self.assertAllClose(expected, observed)
+        self.assertAllClose(expected, observed[0])
 
+    def test_with_beta(self):
+        self.set_random_seed()
+        inputs = np.random.rand(28, 28, 1).astype(np.float32)
         frn = FilterResponseNormalization(
             beta_initializer="ones", gamma_initializer="ones"
         )
         frn.build((28, 28, 1))
         observed = frn(inputs)
+        self.evaluate(tf.compat.v1.global_variables_initializer())
         expected = self.calculate_frn(inputs, beta=1, gamma=1)
-        self.assertAllClose(expected, observed)
-
-        frn = FilterResponseNormalization(
-            beta_initializer=tf.keras.initializers.Constant(0.5),
-            gamma_initializer="ones",
-        )
-        frn.build((28, 28, 1))
-        observed = frn(inputs)
-        expected = self.calculate_frn(inputs, beta=0.5, gamma=1)
-        self.assertAllClose(expected, observed)
+        self.assertAllClose(expected, observed[0])
 
+    def test_with_epsilon(self):
+        self.set_random_seed()
+        inputs = np.random.rand(28, 28, 1).astype(np.float32)
         frn = FilterResponseNormalization(
             beta_initializer=tf.keras.initializers.Constant(0.5),
             gamma_initializer="ones",
             learned_epsilon=True,
         )
         frn.build((28, 28, 1))
+        self.evaluate(tf.compat.v1.global_variables_initializer())
         observed = frn(inputs)
         expected = self.calculate_frn(inputs, beta=0.5, gamma=1, learned_epsilon=True)
-        self.assertAllClose(expected, observed)
+        self.assertAllClose(expected, observed[0])
 
     def test_keras_model(self):
+        self.set_random_seed()
         frn = FilterResponseNormalization(
             beta_initializer="ones", gamma_initializer="ones"
         )
@@ -387,7 +394,7 @@ def test_keras_model(self):
         input_layer = tf.keras.layers.Input(shape=(32, 32, 3))
         x = frn(input_layer)
         x = tf.keras.layers.Flatten()(x)
-        out = tf.keras.layers.Dense(1, activation="sigmoid")
+        out = tf.keras.layers.Dense(1, activation="sigmoid")(x)
         model = tf.keras.models.Model(input_layer, out)
         model.compile(loss="binary_crossentropy", optimizer="sgd")
         model.fit(random_inputs, random_labels, epochs=2)

From f907777b5aab57602e88a0f8ce1cc46cb3392fe4 Mon Sep 17 00:00:00 2001
From: AakashKumarNain <aakashnain@outlook.com>
Date: Wed, 25 Mar 2020 15:36:23 +0530
Subject: [PATCH 4/8] refactor code

---
 tensorflow_addons/layers/__init__.py          |   2 +-
 tensorflow_addons/layers/normalizations.py    |  70 ++++----
 .../layers/normalizations_test.py             | 151 ++++++++++--------
 3 files changed, 125 insertions(+), 98 deletions(-)

diff --git a/tensorflow_addons/layers/__init__.py b/tensorflow_addons/layers/__init__.py
index dba1e1b7c4..576ae7a4b3 100644
--- a/tensorflow_addons/layers/__init__.py
+++ b/tensorflow_addons/layers/__init__.py
@@ -17,9 +17,9 @@
 from tensorflow_addons.layers.gelu import GELU
 from tensorflow_addons.layers.maxout import Maxout
 from tensorflow_addons.layers.multihead_attention import MultiHeadAttention
+from tensorflow_addons.layers.normalizations import FilterResponseNormalization
 from tensorflow_addons.layers.normalizations import GroupNormalization
 from tensorflow_addons.layers.normalizations import InstanceNormalization
-from tensorflow_addons.layers.normalizations import FilterResponseNormalization
 from tensorflow_addons.layers.optical_flow import CorrelationCost
 from tensorflow_addons.layers.poincare import PoincareNormalize
 from tensorflow_addons.layers.polynomial import PolynomialCrossing
diff --git a/tensorflow_addons/layers/normalizations.py b/tensorflow_addons/layers/normalizations.py
index 37d1834a81..4d97d3abea 100644
--- a/tensorflow_addons/layers/normalizations.py
+++ b/tensorflow_addons/layers/normalizations.py
@@ -335,7 +335,7 @@ class FilterResponseNormalization(tf.keras.layers.Layer):
 
     Arguments
         axis: List of axes that should be normalized. This should represent the
-        spatial dimensions.
+              spatial dimensions.
         epsilon: Small float value added to variance to avoid dividing by zero.
         beta_initializer: Initializer for the beta weight.
         gamma_initializer: Initializer for the gamma weight.
@@ -350,8 +350,10 @@ class FilterResponseNormalization(tf.keras.layers.Layer):
     Input shape
         Arbitrary. Use the keyword argument `input_shape`
         (tuple of integers, does not include the samples axis)
-        when using this layer as the first layer in a model. This layer
-        wokrs on a 4-D tensor where the tensor should have the shape [N X H X W X C]
+        when using this layer as the first layer in a model. This layer, as of now,
+        works on a 4-D tensor where the tensor should have the shape [N X H X W X C]
+
+        TODO: Add support for more input shapes, especially for FC layers.
 
     Output shape
         Same shape as input.
@@ -377,7 +379,7 @@ def __init__(
         **kwargs
     ):
         super().__init__(name=name, **kwargs)
-        self.epsilon = epsilon
+        self.epsilon = tf.cast(epsilon, dtype=self.dtype)
         self.beta_initializer = tf.keras.initializers.get(beta_initializer)
         self.gamma_initializer = tf.keras.initializers.get(gamma_initializer)
         self.beta_regularizer = tf.keras.regularizers.get(beta_regularizer)
@@ -392,15 +394,14 @@ def __init__(
         else:
             self.eps_learned_initializer = None
 
-        if isinstance(axis, list):
-            self.axis = axis[:]
-        elif isinstance(axis, int):
-            self.axis = axis
-        else:
-            raise TypeError("axis must be int or list, type given: %s" % type(axis))
-        self._check_axis()
+        self._check_axis(axis)
 
     def build(self, input_shape):
+        if len(tf.TensorShape(input_shape)) != 4:
+            raise ValueError(
+                """Only 4-D tensors (CNNs) are supported
+        as of now."""
+            )
         self._check_if_input_shape_is_none(input_shape)
         self._create_input_spec(input_shape)
         self._add_gamma_weight(input_shape)
@@ -408,9 +409,16 @@ def build(self, input_shape):
 
         if self.use_eps_learned:
             self._add_eps_learned_weight()
-        self.built = True
         super().build(input_shape)
 
+    def call(self, inputs):
+        epsilon = tf.math.abs(self.epsilon)
+        if self.use_eps_learned:
+            epsilon += self.eps_learned
+        nu2 = tf.reduce_mean(tf.square(inputs), axis=self.axis, keepdims=True)
+        inputs *= tf.math.rsqrt(nu2 + epsilon)
+        return self.gamma * inputs + self.beta
+
     def compute_output_shape(self, input_shape):
         return input_shape
 
@@ -436,14 +444,6 @@ def get_config(self):
         base_config = super().get_config()
         return dict(**base_config, **config)
 
-    def call(self, inputs):
-        epsilon = tf.math.abs(self.epsilon)
-        if self.use_eps_learned:
-            epsilon += self.eps_learned
-        nu2 = tf.reduce_mean(tf.square(inputs), axis=self.axis, keepdims=True)
-        inputs *= tf.math.rsqrt(nu2 + epsilon)
-        return self.gamma * inputs + self.beta
-
     def _create_input_spec(self, input_shape):
         ndims = len(tf.TensorShape(input_shape))
         for idx, x in enumerate(self.axis):
@@ -461,30 +461,40 @@ def _create_input_spec(self, input_shape):
         axis_to_dim = {x: input_shape[x] for x in self.axis}
         self.input_spec = tf.keras.layers.InputSpec(ndim=ndims, axes=axis_to_dim)
 
-    def _check_axis(self):
+    def _check_axis(self, axis):
+        if isinstance(axis, (int, list)):
+            self.axis = axis
+        else:
+            raise TypeError(
+                """axis must be int or list,
+                    type given: %s"""
+                % type(axis)
+            )
+
         if self.axis == 0:
             raise ValueError(
-                "You are trying to normalize your batch axis. You may want to "
-                "use tf.layer.batch_normalization instead"
+                """You are trying to normalize your batch axis. You may want to
+                use tf.keras.layers.batch_normalization instead."""
             )
         if self.axis == -1:
             raise ValueError(
-                "You are trying to normalize your channel axis. You may want to "
-                "use GroupNormalization layer instead from tf_addons"
+                """You are trying to normalize your channel axis. You may want to
+                use tfa.layers.GroupNormalization instead."""
             )
         if self.axis != [1, 2]:
             raise ValueError(
-                "FilterResponseNormalization operates on per-channel basis."
-                "Axis values should be spatial dimensions"
+                """FilterResponseNormalization operates on per-channel basis.
+                Axis values should be a list of spatial dimensions."""
             )
 
     def _check_if_input_shape_is_none(self, input_shape):
         dim1, dim2 = input_shape[self.axis[0]], input_shape[self.axis[1]]
         if dim1 is None or dim2 is None:
             raise ValueError(
-                "Axis " + str(self.axis) + " of "
-                "input tensor should have a defined dimension "
-                "but the layer received an input with shape " + str(input_shape) + "."
+                """Axis {} of input tensor should have a defined dimension but
+                the layer received an input with shape {}.""".format(
+                    self.axis, input_shape
+                )
             )
 
     def _add_gamma_weight(self, input_shape):
diff --git a/tensorflow_addons/layers/normalizations_test.py b/tensorflow_addons/layers/normalizations_test.py
index ae66885da2..63b15dff20 100644
--- a/tensorflow_addons/layers/normalizations_test.py
+++ b/tensorflow_addons/layers/normalizations_test.py
@@ -19,9 +19,9 @@
 import numpy as np
 import tensorflow as tf
 
+from tensorflow_addons.layers.normalizations import FilterResponseNormalization
 from tensorflow_addons.layers.normalizations import GroupNormalization
 from tensorflow_addons.layers.normalizations import InstanceNormalization
-from tensorflow_addons.layers.normalizations import FilterResponseNormalization
 from tensorflow_addons.utils import test_utils
 
 
@@ -332,72 +332,89 @@ def test_groupnorm_convnet_no_center_no_scale(self):
         )
 
 
-@test_utils.run_all_in_graph_and_eager_modes
-class FilterResponseNormalizationTest(tf.test.TestCase):
-    def calculate_frn(self, x, beta=0.2, gamma=1, eps=1e-6, learned_epsilon=False):
-        if learned_epsilon:
-            eps = eps + 1e-4
-        nu2 = tf.reduce_mean(tf.square(x), axis=[1, 2], keepdims=True)
-        x = x * tf.math.rsqrt(nu2 + tf.abs(eps))
-        return gamma * x + beta
-
-    def set_random_seed(self):
-        seed = 0x2020
-        np.random.seed(seed)
-        tf.random.set_seed(seed)
-
-    def test_with_gamma(self):
-        self.set_random_seed()
-        inputs = np.random.rand(28, 28, 1).astype(np.float32)
-        frn = FilterResponseNormalization(
-            beta_initializer="zeros", gamma_initializer="ones"
-        )
-        frn.build((28, 28, 1))
-        observed = frn(inputs)
-        self.evaluate(tf.compat.v1.global_variables_initializer())
-        expected = self.calculate_frn(inputs, beta=0, gamma=1)
-        self.assertAllClose(expected, observed[0])
-
-    def test_with_beta(self):
-        self.set_random_seed()
-        inputs = np.random.rand(28, 28, 1).astype(np.float32)
-        frn = FilterResponseNormalization(
-            beta_initializer="ones", gamma_initializer="ones"
-        )
-        frn.build((28, 28, 1))
-        observed = frn(inputs)
-        self.evaluate(tf.compat.v1.global_variables_initializer())
-        expected = self.calculate_frn(inputs, beta=1, gamma=1)
-        self.assertAllClose(expected, observed[0])
-
-    def test_with_epsilon(self):
-        self.set_random_seed()
-        inputs = np.random.rand(28, 28, 1).astype(np.float32)
-        frn = FilterResponseNormalization(
-            beta_initializer=tf.keras.initializers.Constant(0.5),
-            gamma_initializer="ones",
-            learned_epsilon=True,
-        )
-        frn.build((28, 28, 1))
-        self.evaluate(tf.compat.v1.global_variables_initializer())
-        observed = frn(inputs)
-        expected = self.calculate_frn(inputs, beta=0.5, gamma=1, learned_epsilon=True)
-        self.assertAllClose(expected, observed[0])
-
-    def test_keras_model(self):
-        self.set_random_seed()
-        frn = FilterResponseNormalization(
-            beta_initializer="ones", gamma_initializer="ones"
-        )
-        random_inputs = np.random.rand(10, 32, 32, 3).astype(np.float32)
-        random_labels = np.random.randint(2, size=(10,)).astype(np.float32)
-        input_layer = tf.keras.layers.Input(shape=(32, 32, 3))
-        x = frn(input_layer)
-        x = tf.keras.layers.Flatten()(x)
-        out = tf.keras.layers.Dense(1, activation="sigmoid")(x)
-        model = tf.keras.models.Model(input_layer, out)
-        model.compile(loss="binary_crossentropy", optimizer="sgd")
-        model.fit(random_inputs, random_labels, epochs=2)
+def calculate_frn(
+    x, beta=0.2, gamma=1, eps=1e-6, learned_epsilon=False, dtype=np.float32
+):
+    if learned_epsilon:
+        eps = eps + 1e-4
+    eps = tf.cast(eps, dtype=dtype)
+    nu2 = tf.reduce_mean(tf.square(x), axis=[1, 2], keepdims=True)
+    x = x * tf.math.rsqrt(nu2 + tf.abs(eps))
+    return gamma * x + beta
+
+
+def set_random_seed():
+    seed = 0x2020
+    np.random.seed(seed)
+    tf.random.set_seed(seed)
+
+
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
+def test_with_beta(dtype):
+    set_random_seed()
+    inputs = np.random.rand(28, 28, 1).astype(dtype)
+    inputs = np.expand_dims(inputs, axis=0)
+    frn = FilterResponseNormalization(
+        beta_initializer="ones", gamma_initializer="ones", dtype=dtype
+    )
+    frn.build((None, 28, 28, 1))
+    observed = frn(inputs)
+    expected = calculate_frn(inputs, beta=1, gamma=1, dtype=dtype)
+    np.testing.assert_allclose(expected[0], observed[0])
+
+
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
+def test_with_gamma(dtype):
+    set_random_seed()
+    inputs = np.random.rand(28, 28, 1).astype(dtype)
+    inputs = np.expand_dims(inputs, axis=0)
+    frn = FilterResponseNormalization(
+        beta_initializer="zeros", gamma_initializer="ones", dtype=dtype
+    )
+    frn.build((None, 28, 28, 1))
+    observed = frn(inputs)
+    expected = calculate_frn(inputs, beta=0, gamma=1, dtype=dtype)
+    np.testing.assert_allclose(expected[0], observed[0])
+
+
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
+def test_with_epsilon(dtype):
+    set_random_seed()
+    inputs = np.random.rand(28, 28, 1).astype(dtype)
+    inputs = np.expand_dims(inputs, axis=0)
+    frn = FilterResponseNormalization(
+        beta_initializer=tf.keras.initializers.Constant(0.5),
+        gamma_initializer="ones",
+        learned_epsilon=True,
+        dtype=dtype,
+    )
+    frn.build((None, 28, 28, 1))
+    observed = frn(inputs)
+    expected = calculate_frn(
+        inputs, beta=0.5, gamma=1, learned_epsilon=True, dtype=dtype
+    )
+    np.testing.assert_allclose(expected[0], observed[0])
+
+
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
+def test_keras_model(dtype):
+    set_random_seed()
+    frn = FilterResponseNormalization(
+        beta_initializer="ones", gamma_initializer="ones", dtype=dtype
+    )
+    random_inputs = np.random.rand(10, 32, 32, 3).astype(dtype)
+    random_labels = np.random.randint(2, size=(10,)).astype(dtype)
+    input_layer = tf.keras.layers.Input(shape=(32, 32, 3))
+    x = frn(input_layer)
+    x = tf.keras.layers.Flatten()(x)
+    out = tf.keras.layers.Dense(1, activation="sigmoid")(x)
+    model = tf.keras.models.Model(input_layer, out)
+    model.compile(loss="binary_crossentropy", optimizer="sgd")
+    model.fit(random_inputs, random_labels, epochs=2)
 
 
 if __name__ == "__main__":

From f14511f6bc6c495bc24322c1e645cb4a0992a1d4 Mon Sep 17 00:00:00 2001
From: AakashKumarNain <aakashnain@outlook.com>
Date: Wed, 25 Mar 2020 15:55:03 +0530
Subject: [PATCH 5/8] add serialization test

---
 tensorflow_addons/layers/normalizations_test.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tensorflow_addons/layers/normalizations_test.py b/tensorflow_addons/layers/normalizations_test.py
index 63b15dff20..0b4ffbae41 100644
--- a/tensorflow_addons/layers/normalizations_test.py
+++ b/tensorflow_addons/layers/normalizations_test.py
@@ -417,5 +417,15 @@ def test_keras_model(dtype):
     model.fit(random_inputs, random_labels, epochs=2)
 
 
+@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
+def test_serialization(dtype):
+    frn = FilterResponseNormalization(
+        beta_initializer="ones", gamma_initializer="ones", dtype=dtype
+    )
+    serialized_frn = tf.keras.layers.serialize(frn)
+    new_layer = tf.keras.layers.deserialize(serialized_frn)
+    assert frn.get_config() == new_layer.get_config()
+
+
 if __name__ == "__main__":
     sys.exit(pytest.main([__file__]))

From 2e458326f2bf605669497b1711cff16785d6349d Mon Sep 17 00:00:00 2001
From: AakashKumarNain <aakashnain@outlook.com>
Date: Wed, 25 Mar 2020 16:16:52 +0530
Subject: [PATCH 6/8] bug fix in serialization

---
 tensorflow_addons/layers/normalizations.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tensorflow_addons/layers/normalizations.py b/tensorflow_addons/layers/normalizations.py
index 4d97d3abea..f85b0b3cdd 100644
--- a/tensorflow_addons/layers/normalizations.py
+++ b/tensorflow_addons/layers/normalizations.py
@@ -431,9 +431,6 @@ def get_config(self):
             "gamma_initializer": tf.keras.initializers.serialize(
                 self.gamma_initializer
             ),
-            "eps_learned_initializer": tf.keras.initializers.serialize(
-                self.eps_learned_initializer
-            ),
             "beta_regularizer": tf.keras.regularizers.serialize(self.beta_regularizer),
             "gamma_regularizer": tf.keras.regularizers.serialize(
                 self.gamma_regularizer

From 932656f7c6e7027caffaf28bb3506bca08332458 Mon Sep 17 00:00:00 2001
From: AakashKumarNain <aakashnain@outlook.com>
Date: Wed, 25 Mar 2020 17:21:38 +0530
Subject: [PATCH 7/8] move epsilon weights to constructor

---
 tensorflow_addons/layers/normalizations.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/tensorflow_addons/layers/normalizations.py b/tensorflow_addons/layers/normalizations.py
index f85b0b3cdd..88f37c72c1 100644
--- a/tensorflow_addons/layers/normalizations.py
+++ b/tensorflow_addons/layers/normalizations.py
@@ -391,6 +391,14 @@ def __init__(
 
         if self.use_eps_learned:
             self.eps_learned_initializer = tf.keras.initializers.Constant(1e-4)
+            self.eps_learned = self.add_weight(
+                shape=(1,),
+                name="learned_epsilon",
+                dtype=self.dtype,
+                initializer=tf.keras.initializers.get(self.eps_learned_initializer),
+                regularizer=None,
+                constraint=tf.keras.constraints.non_neg,
+            )
         else:
             self.eps_learned_initializer = None
 
@@ -406,9 +414,6 @@ def build(self, input_shape):
         self._create_input_spec(input_shape)
         self._add_gamma_weight(input_shape)
         self._add_beta_weight(input_shape)
-
-        if self.use_eps_learned:
-            self._add_eps_learned_weight()
         super().build(input_shape)
 
     def call(self, inputs):
@@ -521,14 +526,3 @@ def _add_beta_weight(self, input_shape):
             regularizer=self.beta_regularizer,
             constraint=self.beta_constraint,
         )
-
-    def _add_eps_learned_weight(self):
-        shape = (1,)
-        self.eps_learned = self.add_weight(
-            shape=shape,
-            name="learned_epsilon",
-            dtype=self.dtype,
-            initializer=tf.keras.initializers.get(self.eps_learned_initializer),
-            regularizer=None,
-            constraint=tf.keras.constraints.non_neg,
-        )

From 618c83fca673c7bf1b248c3649406d0fb426ee56 Mon Sep 17 00:00:00 2001
From: AakashKumarNain <aakashnain@outlook.com>
Date: Thu, 26 Mar 2020 22:13:48 +0530
Subject: [PATCH 8/8] remove extra checks, add TODO and add grads check for
 epsilon

---
 tensorflow_addons/layers/normalizations.py    | 44 +++++++++----------
 .../layers/normalizations_test.py             | 21 +++++++++
 2 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/tensorflow_addons/layers/normalizations.py b/tensorflow_addons/layers/normalizations.py
index 88f37c72c1..ca2cad07fc 100644
--- a/tensorflow_addons/layers/normalizations.py
+++ b/tensorflow_addons/layers/normalizations.py
@@ -336,7 +336,7 @@ class FilterResponseNormalization(tf.keras.layers.Layer):
     Arguments
         axis: List of axes that should be normalized. This should represent the
               spatial dimensions.
-        epsilon: Small float value added to variance to avoid dividing by zero.
+        epsilon: Small positive float value added to variance to avoid dividing by zero.
         beta_initializer: Initializer for the beta weight.
         gamma_initializer: Initializer for the gamma weight.
         beta_regularizer: Optional regularizer for the beta weight.
@@ -353,7 +353,7 @@ class FilterResponseNormalization(tf.keras.layers.Layer):
         when using this layer as the first layer in a model. This layer, as of now,
         works on a 4-D tensor where the tensor should have the shape [N X H X W X C]
 
-        TODO: Add support for more input shapes, especially for FC layers.
+        TODO: Add support for NCHW data format and FC layers.
 
     Output shape
         Same shape as input.
@@ -375,11 +375,12 @@ def __init__(
         beta_constraint: types.Constraint = None,
         gamma_constraint: types.Constraint = None,
         learned_epsilon: bool = False,
+        learned_epsilon_constraint: types.Constraint = None,
         name: str = None,
         **kwargs
     ):
         super().__init__(name=name, **kwargs)
-        self.epsilon = tf.cast(epsilon, dtype=self.dtype)
+        self.epsilon = tf.math.abs(tf.cast(epsilon, dtype=self.dtype))
         self.beta_initializer = tf.keras.initializers.get(beta_initializer)
         self.gamma_initializer = tf.keras.initializers.get(gamma_initializer)
         self.beta_regularizer = tf.keras.regularizers.get(beta_regularizer)
@@ -391,16 +392,20 @@ def __init__(
 
         if self.use_eps_learned:
             self.eps_learned_initializer = tf.keras.initializers.Constant(1e-4)
+            self.eps_learned_constraint = tf.keras.constraints.get(
+                learned_epsilon_constraint
+            )
             self.eps_learned = self.add_weight(
                 shape=(1,),
                 name="learned_epsilon",
                 dtype=self.dtype,
                 initializer=tf.keras.initializers.get(self.eps_learned_initializer),
                 regularizer=None,
-                constraint=tf.keras.constraints.non_neg,
+                constraint=self.eps_learned_constraint,
             )
         else:
             self.eps_learned_initializer = None
+            self.eps_learned_constraint = None
 
         self._check_axis(axis)
 
@@ -417,12 +422,12 @@ def build(self, input_shape):
         super().build(input_shape)
 
     def call(self, inputs):
-        epsilon = tf.math.abs(self.epsilon)
+        epsilon = self.epsilon
         if self.use_eps_learned:
-            epsilon += self.eps_learned
+            epsilon += tf.math.abs(self.eps_learned)
         nu2 = tf.reduce_mean(tf.square(inputs), axis=self.axis, keepdims=True)
-        inputs *= tf.math.rsqrt(nu2 + epsilon)
-        return self.gamma * inputs + self.beta
+        normalized_inputs = inputs * tf.math.rsqrt(nu2 + epsilon)
+        return self.gamma * normalized_inputs + self.beta
 
     def compute_output_shape(self, input_shape):
         return input_shape
@@ -442,6 +447,9 @@ def get_config(self):
             ),
             "beta_constraint": tf.keras.constraints.serialize(self.beta_constraint),
             "gamma_constraint": tf.keras.constraints.serialize(self.gamma_constraint),
+            "learned_epsilon_constraint": tf.keras.constraints.serialize(
+                self.eps_learned_constraint
+            ),
         }
         base_config = super().get_config()
         return dict(**base_config, **config)
@@ -464,25 +472,13 @@ def _create_input_spec(self, input_shape):
         self.input_spec = tf.keras.layers.InputSpec(ndim=ndims, axes=axis_to_dim)
 
     def _check_axis(self, axis):
-        if isinstance(axis, (int, list)):
-            self.axis = axis
-        else:
+        if not isinstance(axis, list):
             raise TypeError(
-                """axis must be int or list,
-                    type given: %s"""
-                % type(axis)
+                """Expected a list of values but got {}.""".format(type(axis))
             )
+        else:
+            self.axis = axis
 
-        if self.axis == 0:
-            raise ValueError(
-                """You are trying to normalize your batch axis. You may want to
-                use tf.keras.layers.batch_normalization instead."""
-            )
-        if self.axis == -1:
-            raise ValueError(
-                """You are trying to normalize your channel axis. You may want to
-                use tfa.layers.GroupNormalization instead."""
-            )
         if self.axis != [1, 2]:
             raise ValueError(
                 """FilterResponseNormalization operates on per-channel basis.
diff --git a/tensorflow_addons/layers/normalizations_test.py b/tensorflow_addons/layers/normalizations_test.py
index 0b4ffbae41..3f209c9264 100644
--- a/tensorflow_addons/layers/normalizations_test.py
+++ b/tensorflow_addons/layers/normalizations_test.py
@@ -427,5 +427,26 @@ def test_serialization(dtype):
     assert frn.get_config() == new_layer.get_config()
 
 
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
+def test_eps_gards(dtype):
+    set_random_seed()
+    random_inputs = np.random.rand(10, 32, 32, 3).astype(np.float32)
+    random_labels = np.random.randint(2, size=(10,)).astype(np.float32)
+    input_layer = tf.keras.layers.Input(shape=(32, 32, 3))
+    frn = FilterResponseNormalization(
+        beta_initializer="ones", gamma_initializer="ones", learned_epsilon=True
+    )
+    initial_eps_value = frn.eps_learned.numpy()[0]
+    x = frn(input_layer)
+    x = tf.keras.layers.Flatten()(x)
+    out = tf.keras.layers.Dense(1, activation="sigmoid")(x)
+    model = tf.keras.models.Model(input_layer, out)
+    model.compile(loss="binary_crossentropy", optimizer="sgd")
+    model.fit(random_inputs, random_labels, epochs=1)
+    final_eps_value = frn.eps_learned.numpy()[0]
+    assert initial_eps_value != final_eps_value
+
+
 if __name__ == "__main__":
     sys.exit(pytest.main([__file__]))