Implement MovingAverage optimizer

Squadrick · Squadrick · commit 20bb03c9a851 · 2019-04-28T00:10:58.000+05:30
* Port MovingAverageOptimizer from tf.contrib.opt

* Inherits base Keras optimizer_v2

* `swapping_saver` replaced with `assign_average_vars`

* Update test cases for TF2.X

* Update docs
diff --git a/tensorflow_addons/optimizers/BUILD b/tensorflow_addons/optimizers/BUILD
@@ -7,6 +7,7 @@ py_library(
     srcs = [
         "__init__.py",
         "lazy_adam.py",
+        "moving_average.py",
     ],
     srcs_version = "PY2AND3",
     deps = [
diff --git a/tensorflow_addons/optimizers/README.md b/tensorflow_addons/optimizers/README.md
@@ -4,11 +4,13 @@
 | Submodule  | Maintainers  | Contact Info   |
 |:---------- |:------------- |:--------------|
 | lazy_adam |  SIG-Addons | addons@tensorflow.org   |
+| moving_average | Dheeraj R. Reddy | dheeraj98reddy@gmail.com |
 
 ## Components
 | Submodule | Optimizer  | Reference                                   |
 |:----------------------- |:---------------------- |:---------|
 | lazy_adam | LazyAdam | https://arxiv.org/abs/1412.6980      |
+| moving_average | MovingAverage | |
 
 
 ## Contribution Guidelines
diff --git a/tensorflow_addons/optimizers/__init__.py b/tensorflow_addons/optimizers/__init__.py
@@ -19,3 +19,4 @@
 from __future__ import print_function
 
 from tensorflow_addons.optimizers.lazy_adam import LazyAdam
+from tensorflow_addons.optimizers.moving_average import MovingAverage
diff --git a/tensorflow_addons/optimizers/moving_average.py b/tensorflow_addons/optimizers/moving_average.py
@@ -0,0 +1,127 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from tensorflow_addons.utils import keras_utils
+
+
+@keras_utils.register_keras_custom_object
+class MovingAverage(tf.keras.optimizers.Optimizer):
+    """Optimizer that computes a moving average of the variables.
+
+    Empirically it has been found that using the moving average of the trained
+    parameters of a deep network is better than using its trained parameters
+    directly. This optimizer allows you to compute this moving average and swap
+    the variables at save time so that any code outside of the training loop
+    will use by default the average values instead of the original ones.
+
+    Example of usage:
+
+    ```python
+    opt = tf.keras.optimizers.SGD(learning_rate)
+    opt = tfa.optimizers.MovingAverage(opt)
+
+    ```
+
+    """
+
+    def __init__(self,
+                 optimizer,
+                 average_decay=0.1,
+                 num_updates=None,
+                 seq_update=True,
+                 name="MovingAverage",
+                 **kwargs):
+
+        super(MovingAverage, self).__init__(name, **kwargs)
+
+        if not isinstance(optimizer, tf.keras.optimizers.Optimizer):
+            raise TypeError(
+                "optimzer is not an object of tf.keras.optimizers.Optimizer")
+
+        self._optimizer = optimizer
+
+        with tf.keras.backend.name_scope(self.__class__.__name__):
+            self._ema = tf.train.ExponentialMovingAverage(
+                average_decay, num_updates=num_updates)
+
+        self._average_decay = average_decay
+        self._num_updates = num_updates
+        self._seq_update = seq_update
+
+    def _create_slots(self, var_list):
+        self._optimizer._create_slots(var_list)  # pylint: disable=protected-access
+
+    def _resource_apply_dense(self, grad, var):
+        return self._optimizer._resource_apply_dense(grad, var)  # pylint: disable=protected-access
+
+    def _resource_apply_sparse_duplicate_indices(self, grad, var, indices):
+        return self._optimizer._resource_apply_sparse_duplicate_indices(  # pylint: disable=protected-access
+            grad, var, indices)
+
+    def _resource_apply_sparse(self, grad, var, indices):
+        return self._optimizer._resource_apply_sparse(grad, var, indices)  # pylint: disable=protected-access
+
+    def apply_gradients(self, grads_and_vars, name=None):
+        # pop = tf.print(grads_and_vars)
+        train_op = self._optimizer.apply_gradients(grads_and_vars, name=name)
+        var_list = [v for (_, v) in grads_and_vars]
+
+        if self._seq_update:
+            with tf.control_dependencies([train_op]):
+                ma_op = self._ema.apply(var_list)
+        else:
+            ma_op = self._ema.apply(var_list)
+
+        return tf.group(train_op, ma_op, name="train_with_avg")
+
+    def get_config(self):
+        config = {
+            'average_decay': self._average_decay,
+            'num_updates': self._num_updates,
+            'seq_update': self._seq_update
+        }
+        base_config = self._optimizer.get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def assign_average_vars(self, var_list):
+        """Update variables in var_list with the running mean of the variables.
+
+        Example:
+        ```python
+        model = tf.Sequential([...])
+        opt = tfa.optimizers.MovingAverage(
+            tf.keras.optimizers.SGD(lr=2.0), 0.5)
+
+        model.compile(opt, ...)
+        model.fit(x, y, ...)
+
+        # Update the weights to their mean before saving
+        opt.assign_average_vars(model.variables)
+
+        model.save('model.h5')
+
+        ```
+        """
+        assign = tf.group([v.assign(self._ema.average(v)) for v in var_list])
+        return assign
+
+    @property
+    def weights(self):
+        return self._optimizer.weights
diff --git a/tensorflow_addons/optimizers/moving_average_test.py b/tensorflow_addons/optimizers/moving_average_test.py
@@ -0,0 +1,118 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for MovingAverage optimizers."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import six
+
+import tensorflow as tf
+
+import moving_average
+from tensorflow_addons.utils import test_utils
+
+
+class MovingAverageTest(tf.test.TestCase):
+    @test_utils.run_deprecated_v1
+    def test_run(self):
+        for seq_update in [True, False]:
+            orig_var0 = [1.0, 2.0]
+            orig_var1 = [3.0, 4.0]
+
+            var0 = tf.Variable(orig_var0)
+            var1 = tf.Variable(orig_var1)
+
+            grads0 = tf.constant([0.1, 0.1])
+            grads1 = tf.constant([0.01, 0.01])
+
+            opt = moving_average.MovingAverage(
+                tf.keras.optimizers.SGD(lr=2.0),
+                average_decay=0.5,
+                seq_update=seq_update)
+
+            update = opt.apply_gradients(
+                list(six.moves.zip([grads0, grads1], [var0, var1])))
+
+            ema_var0 = opt._ema.average(var0)  # pylint: disable=protected-access
+            ema_var1 = opt._ema.average(var1)  # pylint: disable=protected-access
+
+            self.evaluate(tf.compat.v1.global_variables_initializer())
+            self.evaluate(update)
+
+            self.assertAllClose(var0.read_value(), [0.8, 1.8])
+            self.assertAllClose(var1.read_value(), [2.98, 3.98])
+
+            if seq_update:
+                self.assertAllClose(ema_var0.read_value(), [0.9, 1.9])
+                self.assertAllClose(ema_var1.read_value(), [2.99, 3.99])
+
+            assign = opt.assign_average_vars([var0, var1])
+            self.evaluate(assign)
+
+            if seq_update:
+                self.assertAllClose(self.evaluate(var0), [0.9, 1.9])
+                self.assertAllClose(self.evaluate(var1), [2.99, 3.99])
+
+            perturb = tf.group([
+                var0.assign_add([1.0, 1.0]),
+                var1.assign_add([2.0, 2.0]),
+                ema_var0.assign_add([3.0, 3.0]),
+                ema_var1.assign_add([4.0, 4.0])
+            ])
+            self.evaluate(perturb)
+
+            if seq_update:
+                self.assertAllClose(self.evaluate(var0), [1.9, 2.9])
+                self.assertAllClose(self.evaluate(var1), [4.99, 5.99])
+                self.assertAllClose(self.evaluate(ema_var0), [3.9, 4.9])
+                self.assertAllClose(self.evaluate(ema_var1), [6.99, 7.99])
+
+    @test_utils.run_in_graph_and_eager_modes
+    def test_opt_failure(self):
+        base_opt = None
+        for seq_update in [True, False]:
+            with self.assertRaises(TypeError):
+                moving_average.MovingAverage(base_opt, 0.5, seq_update)
+
+    @test_utils.run_deprecated_v1
+    def test_model_weights_update(self):
+        grad = tf.Variable([[0.1]])
+        model = tf.keras.Sequential([
+            tf.keras.layers.Dense(
+                1,
+                kernel_initializer=tf.keras.initializers.Constant([[1.0]]),
+                use_bias=False)
+        ])
+
+        model.build(input_shape=[1, 1])
+
+        opt = moving_average.MovingAverage(
+            tf.keras.optimizers.SGD(lr=2.0), 0.5)
+
+        update = opt.apply_gradients(
+            list(six.moves.zip([grad], model.variables)))
+
+        self.evaluate(tf.compat.v1.global_variables_initializer())
+        self.evaluate(update)
+        self.assertAllClose(model.variables[0].read_value(), [[0.8]])
+
+        mean_update = opt.assign_average_vars(model.variables)
+        self.evaluate(mean_update)
+        self.assertAllClose(model.variables[0].read_value(), [[0.9]])
+
+
+if __name__ == '__main__':
+    tf.test.main()

Original file line number	Diff line number	Diff line change
`@@ -19,3 +19,4 @@`
`19`	`19`	`from __future__ import print_function`
`20`	`20`
`21`	`21`	`from tensorflow_addons.optimizers.lazy_adam import LazyAdam`
	`22`	`+from tensorflow_addons.optimizers.moving_average import MovingAverage`