From 77ee266d42faf2e68d22c9657c24ff2de9e224e7 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 12:02:51 -0700 Subject: [PATCH 01/23] Fix MultiOptimizer list of layers --- .../discriminative_layer_training.py | 78 +++++++++---------- .../discriminative_layer_training_test.py | 57 ++++++++++++-- 2 files changed, 87 insertions(+), 48 deletions(-) diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index 494c29d365..d06e0c9401 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -14,7 +14,7 @@ # ============================================================================== """Discriminative Layer Training Optimizer for TensorFlow.""" -from typing import Union +from typing import List, Union import tensorflow as tf from typeguard import typechecked @@ -29,50 +29,46 @@ class MultiOptimizer(tf.keras.optimizers.Optimizer): Each optimizer will optimize only the weights associated with its paired layer. This can be used to implement discriminative layer training by assigning different learning rates to each optimizer - layer pair. (Optimizer, list(Layers)) pairs are also supported. Please note that the layers must be - instantiated before instantiating the optimizer. + layer pair. `(tf.keras.optimizers.Optimizer, List[tf.keras.layers.Layer])` pairs are also supported. + Please note that the layers must be instantiated before instantiating the optimizer. Args: optimizers_and_layers: a list of tuples of an optimizer and a layer or model. Each tuple should contain - exactly 1 instantiated optimizer and 1 object that subclasses tf.keras.Model or tf.keras.Layer. Nested + exactly 1 instantiated optimizer and 1 object that subclasses `tf.keras.Model` or `tf.keras.layers.Layer`. Nested layers and models will be automatically discovered. Alternatively, in place of a single layer, you can pass a list of layers. optimizer_specs: specialized list for serialization. Should be left as None for almost all cases. If you are - loading a serialized version of this optimizer, please use tf.keras.models.load_model after saving a + loading a serialized version of this optimizer, please use `tf.keras.models.load_model` after saving a model compiled with this optimizer. Usage: - ```python - model = get_model() - - opt1 = tf.keras.optimizers.Adam(learning_rate=1e-4) - opt2 = tf.keras.optimizers.Adam(learning_rate=1e-2) - - opt_layer_pairs = [(opt1, model.layers[0]), (opt2, model.layers[1:])] - - loss = tf.keras.losses.MSE - optimizer = tfa.optimizers.MultiOpt(opt_layer_pairs) - - model.compile(optimizer=optimizer, loss = loss) - - model.fit(x,y) - ''' + >>> model = tf.keras.Sequential([ + ... tf.keras.Input(shape=(4,)), + ... tf.keras.layers.Dense(8), + ... tf.keras.layers.Dense(16), + ... tf.keras.layers.Dense(32), + ... ]) + >>> optimizer1 = tf.keras.optimizers.Adam(learning_rate=1e-4) + >>> optimizer2 = tf.keras.optimizers.Adam(learning_rate=1e-2) + >>> optimizers_and_layers = [(optimizer1, model.layers[0]), (optimizer2, model.layers[1:])] + >>> optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers) + >>> model.compile(optimizer=optimizer, loss="mse") Reference: - [Universal Language Model Fine-tuning for Text Classification](https://arxiv.org/abs/1801.06146) - [Collaborative Layer-wise Discriminative Learning in Deep Neural Networks](https://arxiv.org/abs/1607.05440) + - [Universal Language Model Fine-tuning for Text Classification](https://arxiv.org/abs/1801.06146) + - [Collaborative Layer-wise Discriminative Learning in Deep Neural Networks](https://arxiv.org/abs/1607.05440) - Notes: + Note: - Currently, MultiOpt does not support callbacks that modify optimizers. However, you can instantiate - optimizer layer pairs with tf.keras.optimizers.schedules.LearningRateSchedule instead of a static learning - rate. + Currently, MultiOpt does not support callbacks that modify optimizers. + However, you can instantiate optimizer layer pairs with + `tf.keras.optimizers.schedules.LearningRateSchedule` + instead of a static learning rate. - This code should function on CPU, GPU, and TPU. Apply the with strategy.scope() context as you + This code should function on CPU, GPU, and TPU. Apply the with `tf.distribute.Strategy().scope()` context as you would with any other optimizer. - """ @typechecked @@ -131,29 +127,29 @@ def get_config(self): return config @classmethod - def create_optimizer_spec(cls, optimizer_instance, layer): - - assert isinstance( - optimizer_instance, tf.keras.optimizers.Optimizer - ), "Object passed is not an instance of tf.keras.optimizers.Optimizer" - - assert isinstance(layer, tf.keras.layers.Layer) or isinstance( - layer, tf.keras.Model - ), "Object passed is not an instance of tf.keras.layers.Layer nor tf.keras.Model" - - if type(layer) == list: + def create_optimizer_spec( + cls, + optimizer: tf.keras.optimizers.Optimizer, + layer: Union[ + tf.keras.Model, + tf.keras.Sequential, + tf.keras.layers.Layer, + List[tf.keras.layers.Layer], + ], + ): + if isinstance(layer, list): weights = [var.name for sublayer in layer for var in sublayer.weights] else: weights = [var.name for var in layer.weights] return { - "optimizer": optimizer_instance, + "optimizer": optimizer, "weights": weights, } @classmethod def maybe_initialize_optimizer_spec(cls, optimizer_spec): - if type(optimizer_spec["optimizer"]) == dict: + if isinstance(optimizer_spec["optimizer"], dict): optimizer_spec["optimizer"] = tf.keras.optimizers.deserialize( optimizer_spec["optimizer"] ) diff --git a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py index 08a096b840..3324859885 100644 --- a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py +++ b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py @@ -23,6 +23,7 @@ def _dtypes_to_test(use_gpu): + # TODO(WindQAQ): Clean up this in TF2.4 # Based on issue #347 in the following link, # "https://github.com/tensorflow/addons/issues/347" # tf.half is not registered for 'ResourceScatterUpdate' OpKernel @@ -31,8 +32,6 @@ def _dtypes_to_test(use_gpu): # The function "_DtypesToTest" is from # "https://github.com/tensorflow/tensorflow/blob/5d4a6cee737a1dc6c20172a1dc1 # 5df10def2df72/tensorflow/python/kernel_tests/conv_ops_3d_test.py#L53-L62" - # TODO(WindQAQ): Clean up this in TF2.4 - if use_gpu: return [tf.float32, tf.float64] else: @@ -42,9 +41,8 @@ def _dtypes_to_test(use_gpu): @pytest.mark.with_device(["cpu", "gpu"]) @pytest.mark.parametrize("dtype", [tf.float16, tf.float32, tf.float64]) @pytest.mark.parametrize("serialize", [True, False]) -def test_fit_layer_optimizer(dtype, device, serialize): +def test_fit_layer_optimizer(dtype, device, serialize, tmpdir): # Test ensures that each optimizer is only optimizing its own layer with its learning rate - if "gpu" in device and dtype == tf.float16: pytest.xfail("See https://github.com/tensorflow/addons/issues/347") @@ -72,9 +70,9 @@ def test_fit_layer_optimizer(dtype, device, serialize): # serialize whole model including optimizer, clear the session, then reload the whole model. if serialize: - model.save("test", save_format="tf") + model.save(tmpdir, save_format="tf") tf.keras.backend.clear_session() - model = tf.keras.models.load_model("test") + model = tf.keras.models.load_model(tmpdir) model.fit(x, y, batch_size=8, epochs=10) @@ -95,8 +93,53 @@ def test_fit_layer_optimizer(dtype, device, serialize): ) -def test_serialization(): +def test_list_of_layers(): + model = tf.keras.Sequential( + [ + tf.keras.Input(shape=(4,)), + tf.keras.layers.Dense(16), + tf.keras.layers.Dense(16), + tf.keras.layers.Dense(32), + tf.keras.layers.Dense(32), + ] + ) + + optimizers_and_layers = [ + (tf.keras.optimizers.SGD(learning_rate=0.0), model.layers[0]), + (tf.keras.optimizers.Adam(), model.layers[1]), + (tf.keras.optimizers.Adam(), model.layers[2:]), + ] + + weights_before_train = [ + [weight.numpy() for weight in layer.weights] for layer in model.layers + ] + multi_optimizer = MultiOptimizer(optimizers_and_layers) + model.compile(multi_optimizer, loss="mse") + + x = np.random.rand(128, 4) + y = np.random.rand(128, 32) + model.fit(x, y, batch_size=32, epochs=10) + + loss = model.evaluate(x, y) + assert loss < 0.15 + + weights_after_train = [ + [weight.numpy() for weight in layer.weights] for layer in model.layers + ] + + for w_before, w_after in zip(weights_before_train[0], weights_after_train[0]): + test_utils.assert_allclose_according_to_type(w_before, w_after) + + for layer_before, layer_after in zip( + weights_before_train[1:], weights_after_train[1:] + ): + for w_before, w_after in zip(layer_before, layer_after): + with np.testing.assert_raises(AssertionError): + test_utils.assert_allclose_according_to_type(w_before, w_after) + + +def test_serialization(): model = tf.keras.Sequential( [tf.keras.Input(shape=[1]), tf.keras.layers.Dense(1), tf.keras.layers.Dense(1)] ) From 7ba62e1229fc453cdf29526399f9f674858a2692 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 12:06:41 -0700 Subject: [PATCH 02/23] Fix name --- tensorflow_addons/optimizers/discriminative_layer_training.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index d06e0c9401..d8c6f21d8f 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -95,10 +95,10 @@ def __init__( else: raise RuntimeError( - "You must specify either an list of optimizers and layers or a list of optimizer_specs" + "Must specify either an list of optimizers and layers or a list of optimizer_specs" ) - def apply_gradients(self, grads_and_vars, name=None, **kwargs): + def apply_gradients(self, grads_and_vars, **kwargs): """Wrapped apply_gradient method. Returns a list of tf ops to be executed. From 8217c81ef3833b8de5458dc14e710a50bd2fbd56 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 12:12:39 -0700 Subject: [PATCH 03/23] Remove unused tests --- .../discriminative_layer_training_test.py | 23 +------------------ 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py index 3324859885..e87f8199fc 100644 --- a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py +++ b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py @@ -22,30 +22,9 @@ from tensorflow_addons.utils import test_utils -def _dtypes_to_test(use_gpu): - # TODO(WindQAQ): Clean up this in TF2.4 - # Based on issue #347 in the following link, - # "https://github.com/tensorflow/addons/issues/347" - # tf.half is not registered for 'ResourceScatterUpdate' OpKernel - # for 'GPU' devices. - # So we have to remove tf.half when testing with gpu. - # The function "_DtypesToTest" is from - # "https://github.com/tensorflow/tensorflow/blob/5d4a6cee737a1dc6c20172a1dc1 - # 5df10def2df72/tensorflow/python/kernel_tests/conv_ops_3d_test.py#L53-L62" - if use_gpu: - return [tf.float32, tf.float64] - else: - return [tf.half, tf.float32, tf.float64] - - @pytest.mark.with_device(["cpu", "gpu"]) -@pytest.mark.parametrize("dtype", [tf.float16, tf.float32, tf.float64]) @pytest.mark.parametrize("serialize", [True, False]) -def test_fit_layer_optimizer(dtype, device, serialize, tmpdir): - # Test ensures that each optimizer is only optimizing its own layer with its learning rate - if "gpu" in device and dtype == tf.float16: - pytest.xfail("See https://github.com/tensorflow/addons/issues/347") - +def test_fit_layer_optimizer(device, serialize, tmpdir): model = tf.keras.Sequential( [tf.keras.Input(shape=[1]), tf.keras.layers.Dense(1), tf.keras.layers.Dense(1)] ) From 9496b887197e343669efdf193f12dafcde2dfa50 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 12:27:13 -0700 Subject: [PATCH 04/23] Change list to iterable --- .../optimizers/discriminative_layer_training.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index d8c6f21d8f..c484e10a2b 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -14,7 +14,7 @@ # ============================================================================== """Discriminative Layer Training Optimizer for TensorFlow.""" -from typing import List, Union +from typing import Iterable, List, Union import tensorflow as tf from typeguard import typechecked @@ -74,8 +74,8 @@ class MultiOptimizer(tf.keras.optimizers.Optimizer): @typechecked def __init__( self, - optimizers_and_layers: Union[list, None] = None, - optimizer_specs: Union[list, None] = None, + optimizers_and_layers: Union[Iterable, None] = None, + optimizer_specs: Union[Iterable, None] = None, name: str = "MultiOptimzer", **kwargs ): From db132b768042418c4877c02a62217f0eb099e3bd Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 12:29:35 -0700 Subject: [PATCH 05/23] Update doc --- .../discriminative_layer_training.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index c484e10a2b..8ef338e338 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -24,22 +24,25 @@ class MultiOptimizer(tf.keras.optimizers.Optimizer): """Multi Optimizer Wrapper for Discriminative Layer Training. - Creates a wrapper around a set of instantiated optimizer layer pairs. Generally useful for transfer learning - of deep networks. + Creates a wrapper around a set of instantiated optimizer layer pairs. + Generally useful for transfer learning of deep networks. - Each optimizer will optimize only the weights associated with its paired layer. This can be used - to implement discriminative layer training by assigning different learning rates to each optimizer + Each optimizer will optimize only the weights associated with its paired layer. + This can be used to implement discriminative layer training by assigning + different learning rates to each optimizer layer pair. `(tf.keras.optimizers.Optimizer, List[tf.keras.layers.Layer])` pairs are also supported. Please note that the layers must be instantiated before instantiating the optimizer. Args: - optimizers_and_layers: a list of tuples of an optimizer and a layer or model. Each tuple should contain - exactly 1 instantiated optimizer and 1 object that subclasses `tf.keras.Model` or `tf.keras.layers.Layer`. Nested - layers and models will be automatically discovered. Alternatively, in place of a single layer, you can pass - a list of layers. - optimizer_specs: specialized list for serialization. Should be left as None for almost all cases. If you are - loading a serialized version of this optimizer, please use `tf.keras.models.load_model` after saving a - model compiled with this optimizer. + optimizers_and_layers: a list of tuples of an optimizer and a layer or model. + Each tuple should contain exactly 1 instantiated optimizer and 1 object that + subclasses `tf.keras.Model`, `tf.keras.Sequential` or `tf.keras.layers.Layer`. + Nested layers and models will be automatically discovered. + Alternatively, in place of a single layer, you can pass a list of layers. + optimizer_specs: specialized list for serialization. + Should be left as None for almost all cases. + If you are loading a serialized version of this optimizer, + please use `tf.keras.models.load_model` after saving a model compiled with this optimizer. Usage: @@ -62,7 +65,7 @@ class MultiOptimizer(tf.keras.optimizers.Optimizer): Note: - Currently, MultiOpt does not support callbacks that modify optimizers. + Currently, `tfa.optimizers.MultiOptimizer` does not support callbacks that modify optimizers. However, you can instantiate optimizer layer pairs with `tf.keras.optimizers.schedules.LearningRateSchedule` instead of a static learning rate. @@ -76,7 +79,7 @@ def __init__( self, optimizers_and_layers: Union[Iterable, None] = None, optimizer_specs: Union[Iterable, None] = None, - name: str = "MultiOptimzer", + name: str = "MultiOptimizer", **kwargs ): From 015d58110788f5c744b3d9e63ba22cf439d12c63 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 12:31:09 -0700 Subject: [PATCH 06/23] Update code snippet --- .../optimizers/discriminative_layer_training.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index 8ef338e338..1a813355ed 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -52,9 +52,11 @@ class MultiOptimizer(tf.keras.optimizers.Optimizer): ... tf.keras.layers.Dense(16), ... tf.keras.layers.Dense(32), ... ]) - >>> optimizer1 = tf.keras.optimizers.Adam(learning_rate=1e-4) - >>> optimizer2 = tf.keras.optimizers.Adam(learning_rate=1e-2) - >>> optimizers_and_layers = [(optimizer1, model.layers[0]), (optimizer2, model.layers[1:])] + >>> optimizers = [ + ... tf.keras.optimizers.Adam(learning_rate=1e-4), + ... tf.keras.optimizers.Adam(learning_rate=1e-2) + ... ] + >>> optimizers_and_layers = [(optimizers[0], model.layers[0]), (optimizers[1], model.layers[1:])] >>> optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers) >>> model.compile(optimizer=optimizer, loss="mse") From 10cc29966bad85dce12ca695b75db7cf66a59c73 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 12:31:35 -0700 Subject: [PATCH 07/23] Update doc --- tensorflow_addons/optimizers/discriminative_layer_training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index 1a813355ed..35d5b4d79d 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -72,7 +72,7 @@ class MultiOptimizer(tf.keras.optimizers.Optimizer): `tf.keras.optimizers.schedules.LearningRateSchedule` instead of a static learning rate. - This code should function on CPU, GPU, and TPU. Apply the with `tf.distribute.Strategy().scope()` context as you + This code should function on CPU, GPU, and TPU. Apply with `tf.distribute.Strategy().scope()` context as you would with any other optimizer. """ From 049ee25d4414076994d4326e4e70d9f3b1dfbd87 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 12:32:17 -0700 Subject: [PATCH 08/23] Back to list --- .../optimizers/discriminative_layer_training.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index 35d5b4d79d..800cfb7426 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -14,7 +14,7 @@ # ============================================================================== """Discriminative Layer Training Optimizer for TensorFlow.""" -from typing import Iterable, List, Union +from typing import List, Union import tensorflow as tf from typeguard import typechecked @@ -79,8 +79,8 @@ class MultiOptimizer(tf.keras.optimizers.Optimizer): @typechecked def __init__( self, - optimizers_and_layers: Union[Iterable, None] = None, - optimizer_specs: Union[Iterable, None] = None, + optimizers_and_layers: Union[list, None] = None, + optimizer_specs: Union[list, None] = None, name: str = "MultiOptimizer", **kwargs ): From a95bf2f2ab2e14a61ed3239f8d65135c7fdd4c30 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 12:37:39 -0700 Subject: [PATCH 09/23] Update error message --- tensorflow_addons/optimizers/discriminative_layer_training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index 800cfb7426..5792c176a5 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -100,7 +100,7 @@ def __init__( else: raise RuntimeError( - "Must specify either an list of optimizers and layers or a list of optimizer_specs" + "Must specify one of `optimizers_and_layers` or `optimizer_specs`." ) def apply_gradients(self, grads_and_vars, **kwargs): From 919390c3352a0efac56c74a7ae6d921a56724171 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 12:43:03 -0700 Subject: [PATCH 10/23] Update doc --- .../optimizers/discriminative_layer_training.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index 5792c176a5..3e645eea10 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -106,8 +106,7 @@ def __init__( def apply_gradients(self, grads_and_vars, **kwargs): """Wrapped apply_gradient method. - Returns a list of tf ops to be executed. - Name of variable is used rather than var.ref() to enable serialization and deserialization. + Returns an operation to be executed. """ for spec in self.optimizer_specs: @@ -142,6 +141,10 @@ def create_optimizer_spec( List[tf.keras.layers.Layer], ], ): + """Creates a serializable optimizer spec. + + The name of each variable is used rather than `var.ref()` to enable serialization and deserialization. + """ if isinstance(layer, list): weights = [var.name for sublayer in layer for var in sublayer.weights] else: From 3dbcaa8119420ddd1e1c3682351ae29d21be272f Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 12:43:12 -0700 Subject: [PATCH 11/23] Fix tmpdir fixture --- .../optimizers/tests/discriminative_layer_training_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py index e87f8199fc..d74d84f190 100644 --- a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py +++ b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py @@ -49,7 +49,7 @@ def test_fit_layer_optimizer(device, serialize, tmpdir): # serialize whole model including optimizer, clear the session, then reload the whole model. if serialize: - model.save(tmpdir, save_format="tf") + model.save(str(tmpdir), save_format="tf") tf.keras.backend.clear_session() model = tf.keras.models.load_model(tmpdir) From 1eb159ed003ccec452f25db32c702070e0cfdcd2 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 13:52:04 -0700 Subject: [PATCH 12/23] Fix tmpdir --- .../optimizers/tests/discriminative_layer_training_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py index d74d84f190..37c4568253 100644 --- a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py +++ b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py @@ -51,7 +51,7 @@ def test_fit_layer_optimizer(device, serialize, tmpdir): if serialize: model.save(str(tmpdir), save_format="tf") tf.keras.backend.clear_session() - model = tf.keras.models.load_model(tmpdir) + model = tf.keras.models.load_model(str(tmpdir)) model.fit(x, y, batch_size=8, epochs=10) From 96891165e484dd5d70cde514dcafe2ea5d09ac3a Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 15:51:58 -0700 Subject: [PATCH 13/23] Update doc --- .../optimizers/discriminative_layer_training.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index 3e645eea10..4f370b7229 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -29,8 +29,8 @@ class MultiOptimizer(tf.keras.optimizers.Optimizer): Each optimizer will optimize only the weights associated with its paired layer. This can be used to implement discriminative layer training by assigning - different learning rates to each optimizer - layer pair. `(tf.keras.optimizers.Optimizer, List[tf.keras.layers.Layer])` pairs are also supported. + different learning rates to each optimizer layer pair. + `(tf.keras.optimizers.Optimizer, List[tf.keras.layers.Layer])` pairs are also supported. Please note that the layers must be instantiated before instantiating the optimizer. Args: @@ -61,9 +61,8 @@ class MultiOptimizer(tf.keras.optimizers.Optimizer): >>> model.compile(optimizer=optimizer, loss="mse") Reference: - - - [Universal Language Model Fine-tuning for Text Classification](https://arxiv.org/abs/1801.06146) - - [Collaborative Layer-wise Discriminative Learning in Deep Neural Networks](https://arxiv.org/abs/1607.05440) + - [Universal Language Model Fine-tuning for Text Classification](https://arxiv.org/abs/1801.06146) + - [Collaborative Layer-wise Discriminative Learning in Deep Neural Networks](https://arxiv.org/abs/1607.05440) Note: From d334fabbbee0ae6ef8c35bb124dcec5fee09dd81 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 25 Sep 2020 17:06:35 -0700 Subject: [PATCH 14/23] Add test on tf.keras.Model --- .../discriminative_layer_training_test.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py index 37c4568253..7c281bf832 100644 --- a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py +++ b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py @@ -118,6 +118,31 @@ def test_list_of_layers(): test_utils.assert_allclose_according_to_type(w_before, w_after) +def test_model(): + input = tf.keras.Input(shape=(4,)) + output = tf.keras.layers.Dense(16)(input) + output = tf.keras.layers.Dense(16)(output) + output = tf.keras.layers.Dense(32)(output) + output = tf.keras.layers.Dense(32)(output) + model = tf.keras.Model(input, output) + + # Adam optimizer on the whole model and an additional SGD on the last layer. + optimizers_and_layers = [ + (tf.keras.optimizers.Adam(), model), + (tf.keras.optimizers.SGD(), model.layers[-1]), + ] + + multi_optimizer = MultiOptimizer(optimizers_and_layers) + model.compile(multi_optimizer, loss="mse") + + x = np.random.rand(128, 4) + y = np.random.rand(128, 32) + model.fit(x, y, batch_size=32, epochs=10) + + loss = model.evaluate(x, y) + assert loss < 0.15 + + def test_serialization(): model = tf.keras.Sequential( [tf.keras.Input(shape=[1]), tf.keras.layers.Dense(1), tf.keras.layers.Dense(1)] From ecb90c1dae790aa8025340025e90fd013f4b7ee7 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sat, 26 Sep 2020 12:49:36 -0700 Subject: [PATCH 15/23] Add nested model tests --- .../discriminative_layer_training_test.py | 95 ++++++++++++++++++- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py index 7c281bf832..c000a783b5 100644 --- a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py +++ b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py @@ -119,12 +119,12 @@ def test_list_of_layers(): def test_model(): - input = tf.keras.Input(shape=(4,)) - output = tf.keras.layers.Dense(16)(input) + inputs = tf.keras.Input(shape=(4,)) + output = tf.keras.layers.Dense(16)(inputs) output = tf.keras.layers.Dense(16)(output) output = tf.keras.layers.Dense(32)(output) output = tf.keras.layers.Dense(32)(output) - model = tf.keras.Model(input, output) + model = tf.keras.Model(inputs, output) # Adam optimizer on the whole model and an additional SGD on the last layer. optimizers_and_layers = [ @@ -143,6 +143,95 @@ def test_model(): assert loss < 0.15 +def test_pretrained_model(): + resnet = tf.keras.applications.ResNet50(include_top=False, weights=None) + dense = tf.keras.layers.Dense(32) + model = tf.keras.Sequential([resnet, dense]) + + resnet_weights_before_train = [ + weight.numpy() for weight in resnet.trainable_weights + ] + dense_weights_before_train = [weight.numpy() for weight in dense.weights] + + optimizers_and_layers = [(tf.keras.optimizers.SGD(), dense)] + + multi_optimizer = MultiOptimizer(optimizers_and_layers) + model.compile(multi_optimizer, loss="mse") + + x = np.random.rand(128, 32, 32, 3) + y = np.random.rand(128, 32) + model.fit(x, y, batch_size=32) + + resnet_weights_after_train = [weight.numpy() for weight in resnet.trainable_weights] + dense_weights_after_train = [weight.numpy() for weight in dense.weights] + + for w_after, w_before in zip( + resnet_weights_before_train, resnet_weights_after_train + ): + test_utils.assert_allclose_according_to_type(w_before, w_after) + + for w_after, w_before in zip(dense_weights_before_train, dense_weights_after_train): + with np.testing.assert_raises(AssertionError): + test_utils.assert_allclose_according_to_type(w_before, w_after) + + +def test_nested_model(): + def get_model(): + inputs = tf.keras.Input(shape=(4,)) + outputs = tf.keras.layers.Dense(1)(inputs) + return tf.keras.Model(inputs, outputs) + + model1 = get_model() + model2 = get_model() + model3 = get_model() + + inputs = tf.keras.Input(shape=(4,)) + y1 = model1(inputs) + y2 = model2(inputs) + y3 = model3(inputs) + outputs = tf.keras.layers.Average()([y1, y2, y3]) + model = tf.keras.Model(inputs, outputs) + + optimizers_and_layers = [ + (tf.keras.optimizers.SGD(), model1), + (tf.keras.optimizers.SGD(learning_rate=0.0), model2), + (tf.keras.optimizers.SGD(), model3), + ] + + model1_weights_before_train = [weight.numpy() for weight in model1.weights] + model2_weights_before_train = [weight.numpy() for weight in model2.weights] + model3_weights_before_train = [weight.numpy() for weight in model3.weights] + + multi_optimizer = MultiOptimizer(optimizers_and_layers) + + model.compile(multi_optimizer, loss="mse") + + x = np.random.rand(128, 4) + y = np.random.rand(128, 4) + model.fit(x, y) + + model1_weights_after_train = [weight.numpy() for weight in model1.weights] + model2_weights_after_train = [weight.numpy() for weight in model2.weights] + model3_weights_after_train = [weight.numpy() for weight in model3.weights] + + for w_after, w_before in zip( + model1_weights_before_train, model1_weights_after_train + ): + with np.testing.assert_raises(AssertionError): + test_utils.assert_allclose_according_to_type(w_before, w_after) + + for w_after, w_before in zip( + model2_weights_before_train, model2_weights_after_train + ): + test_utils.assert_allclose_according_to_type(w_before, w_after) + + for w_after, w_before in zip( + model3_weights_before_train, model3_weights_after_train + ): + with np.testing.assert_raises(AssertionError): + test_utils.assert_allclose_according_to_type(w_before, w_after) + + def test_serialization(): model = tf.keras.Sequential( [tf.keras.Input(shape=[1]), tf.keras.layers.Dense(1), tf.keras.layers.Dense(1)] From d7c3c09cbb86d75070faf11b89f1a928bbe791b4 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sat, 26 Sep 2020 18:15:43 -0700 Subject: [PATCH 16/23] Better naming --- .../optimizers/discriminative_layer_training.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index 4f370b7229..51da3fd981 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -88,8 +88,8 @@ def __init__( if optimizer_specs is None and optimizers_and_layers is not None: self.optimizer_specs = [ - self.create_optimizer_spec(opt, layer) - for opt, layer in optimizers_and_layers + self.create_optimizer_spec(optimizer, layers_or_model) + for optimizer, layers_or_model in optimizers_and_layers ] elif optimizer_specs is not None and optimizers_and_layers is None: @@ -133,7 +133,7 @@ def get_config(self): def create_optimizer_spec( cls, optimizer: tf.keras.optimizers.Optimizer, - layer: Union[ + layers_or_model: Union[ tf.keras.Model, tf.keras.Sequential, tf.keras.layers.Layer, @@ -144,10 +144,12 @@ def create_optimizer_spec( The name of each variable is used rather than `var.ref()` to enable serialization and deserialization. """ - if isinstance(layer, list): - weights = [var.name for sublayer in layer for var in sublayer.weights] + if isinstance(layers_or_model, list): + weights = [ + var.name for sublayer in layers_or_model for var in sublayer.weights + ] else: - weights = [var.name for var in layer.weights] + weights = [var.name for var in layers_or_model.weights] return { "optimizer": optimizer, From 31aa6efd0f8b32c20b3fe6321e3dde9822dd8607 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sat, 26 Sep 2020 18:15:56 -0700 Subject: [PATCH 17/23] Add custom subclass model tests --- .../discriminative_layer_training_test.py | 103 +++++++++++++----- 1 file changed, 74 insertions(+), 29 deletions(-) diff --git a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py index c000a783b5..7f148c7129 100644 --- a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py +++ b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py @@ -22,6 +22,17 @@ from tensorflow_addons.utils import test_utils +def assert_list_allclose(a, b): + for x, y in zip(a, b): + test_utils.assert_allclose_according_to_type(x, y) + + +def assert_list_not_allclose(a, b): + for x, y in zip(a, b): + with np.testing.assert_raises(AssertionError): + test_utils.assert_allclose_according_to_type(x, y) + + @pytest.mark.with_device(["cpu", "gpu"]) @pytest.mark.parametrize("serialize", [True, False]) def test_fit_layer_optimizer(device, serialize, tmpdir): @@ -107,15 +118,12 @@ def test_list_of_layers(): [weight.numpy() for weight in layer.weights] for layer in model.layers ] - for w_before, w_after in zip(weights_before_train[0], weights_after_train[0]): - test_utils.assert_allclose_according_to_type(w_before, w_after) + assert_list_allclose(weights_before_train[0], weights_after_train[0]) for layer_before, layer_after in zip( weights_before_train[1:], weights_after_train[1:] ): - for w_before, w_after in zip(layer_before, layer_after): - with np.testing.assert_raises(AssertionError): - test_utils.assert_allclose_according_to_type(w_before, w_after) + assert_list_not_allclose(layer_before, layer_after) def test_model(): @@ -143,6 +151,62 @@ def test_model(): assert loss < 0.15 +def test_subclass_model(): + class Block(tf.keras.Model): + def __init__(self, units): + super().__init__() + self.dense1 = tf.keras.layers.Dense(units) + self.dense2 = tf.keras.layers.Dense(units) + + def call(self, x): + return self.dense2(self.dense1(x)) + + class Custom(tf.keras.Model): + def __init__(self): + super().__init__() + self.block1 = Block(16) + self.block2 = Block(32) + + def call(self, x): + return self.block2(self.block1(x)) + + model = Custom() + model.build(input_shape=(None, 4)) + + optimizers_and_layers = [ + (tf.keras.optimizers.SGD(learning_rate=0.0), model.block1), + (tf.keras.optimizers.Adam(), model.block2), + ] + + block1_weights_before_train = [weight.numpy() for weight in model.block1.weights] + block2_weights_before_train = [weight.numpy() for weight in model.block2.weights] + + multi_optimizer = MultiOptimizer(optimizers_and_layers) + + x = np.random.rand(128, 4).astype(np.float32) + y = np.random.rand(128, 32).astype(np.float32) + mse = tf.keras.losses.MeanSquaredError() + + for _ in range(10): + for i in range(0, 128, 32): + x_batch = x[i : i + 32] + y_batch = y[i : i + 32] + with tf.GradientTape() as tape: + loss = mse(y_batch, model(x_batch)) + + grads = tape.gradient(loss, model.trainable_variables) + multi_optimizer.apply_gradients(zip(grads, model.trainable_variables)) + + loss = mse(y, model(x)).numpy() + assert loss < 0.15 + + block1_weights_after_train = [weight.numpy() for weight in model.block1.weights] + block2_weights_after_train = [weight.numpy() for weight in model.block2.weights] + + assert_list_allclose(block1_weights_before_train, block1_weights_after_train) + assert_list_not_allclose(block2_weights_before_train, block2_weights_after_train) + + def test_pretrained_model(): resnet = tf.keras.applications.ResNet50(include_top=False, weights=None) dense = tf.keras.layers.Dense(32) @@ -165,14 +229,8 @@ def test_pretrained_model(): resnet_weights_after_train = [weight.numpy() for weight in resnet.trainable_weights] dense_weights_after_train = [weight.numpy() for weight in dense.weights] - for w_after, w_before in zip( - resnet_weights_before_train, resnet_weights_after_train - ): - test_utils.assert_allclose_according_to_type(w_before, w_after) - - for w_after, w_before in zip(dense_weights_before_train, dense_weights_after_train): - with np.testing.assert_raises(AssertionError): - test_utils.assert_allclose_according_to_type(w_before, w_after) + assert_list_allclose(resnet_weights_before_train, resnet_weights_after_train) + assert_list_not_allclose(dense_weights_before_train, dense_weights_after_train) def test_nested_model(): @@ -214,22 +272,9 @@ def get_model(): model2_weights_after_train = [weight.numpy() for weight in model2.weights] model3_weights_after_train = [weight.numpy() for weight in model3.weights] - for w_after, w_before in zip( - model1_weights_before_train, model1_weights_after_train - ): - with np.testing.assert_raises(AssertionError): - test_utils.assert_allclose_according_to_type(w_before, w_after) - - for w_after, w_before in zip( - model2_weights_before_train, model2_weights_after_train - ): - test_utils.assert_allclose_according_to_type(w_before, w_after) - - for w_after, w_before in zip( - model3_weights_before_train, model3_weights_after_train - ): - with np.testing.assert_raises(AssertionError): - test_utils.assert_allclose_according_to_type(w_before, w_after) + assert_list_not_allclose(model1_weights_before_train, model1_weights_after_train) + assert_list_allclose(model2_weights_before_train, model2_weights_after_train) + assert_list_not_allclose(model3_weights_before_train, model3_weights_after_train) def test_serialization(): From 7036ecc6e73041f341f399a1f79f15b4957819f3 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sat, 26 Sep 2020 18:18:58 -0700 Subject: [PATCH 18/23] Inherit from Layer --- .../optimizers/tests/discriminative_layer_training_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py index 7f148c7129..420bd9d493 100644 --- a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py +++ b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py @@ -152,7 +152,7 @@ def test_model(): def test_subclass_model(): - class Block(tf.keras.Model): + class Block(tf.keras.layers.Layer): def __init__(self, units): super().__init__() self.dense1 = tf.keras.layers.Dense(units) From e6bbaa7af9bf3c5bfda44f26c65a4a6c8f5f8a85 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sat, 26 Sep 2020 18:23:09 -0700 Subject: [PATCH 19/23] Move assert_not_allclose to test_utils --- .../discriminative_layer_training_test.py | 5 ++--- tensorflow_addons/utils/test_utils.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py index 420bd9d493..80c37bf32d 100644 --- a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py +++ b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py @@ -24,13 +24,12 @@ def assert_list_allclose(a, b): for x, y in zip(a, b): - test_utils.assert_allclose_according_to_type(x, y) + np.testing.assert_allclose(x, y) def assert_list_not_allclose(a, b): for x, y in zip(a, b): - with np.testing.assert_raises(AssertionError): - test_utils.assert_allclose_according_to_type(x, y) + test_utils.assert_not_allclose(x, y) @pytest.mark.with_device(["cpu", "gpu"]) diff --git a/tensorflow_addons/utils/test_utils.py b/tensorflow_addons/utils/test_utils.py index cdb33bcf82..c4206a3d31 100644 --- a/tensorflow_addons/utils/test_utils.py +++ b/tensorflow_addons/utils/test_utils.py @@ -217,6 +217,25 @@ def pytest_collection_modifyitems(items): item.add_marker(pytest.mark.skip("The gpu is not available.")) +def assert_not_allclose(a, b, **kwargs): + """Assert that two numpy arrays, do not have near values. + + Args: + a: the first value to compare. + b: the second value to compare. + **kwargs: additional keyword arguments to be passed to the underlying + `np.testing.assert_allclose` call. + + Raises: + AssertionError: If `a` and `b` are unexpectedly close at all elements. + """ + try: + np.testing.assert_allclose(a, b, **kwargs) + except AssertionError: + return + raise AssertionError("The two values are close at all elements") + + def assert_allclose_according_to_type( a, b, From 2de24dfb6b9a00a7327de82cd32f3766fb24fa8f Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sat, 26 Sep 2020 18:28:24 -0700 Subject: [PATCH 20/23] Change input to ones --- .../discriminative_layer_training_test.py | 29 +++++++------------ 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py index 80c37bf32d..fbb5ad6a38 100644 --- a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py +++ b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py @@ -106,13 +106,10 @@ def test_list_of_layers(): multi_optimizer = MultiOptimizer(optimizers_and_layers) model.compile(multi_optimizer, loss="mse") - x = np.random.rand(128, 4) - y = np.random.rand(128, 32) + x = np.ones((128, 4)).astype(np.float32) + y = np.ones((128, 32)).astype(np.float32) model.fit(x, y, batch_size=32, epochs=10) - loss = model.evaluate(x, y) - assert loss < 0.15 - weights_after_train = [ [weight.numpy() for weight in layer.weights] for layer in model.layers ] @@ -142,13 +139,10 @@ def test_model(): multi_optimizer = MultiOptimizer(optimizers_and_layers) model.compile(multi_optimizer, loss="mse") - x = np.random.rand(128, 4) - y = np.random.rand(128, 32) + x = np.ones((128, 4)).astype(np.float32) + y = np.ones((128, 32)).astype(np.float32) model.fit(x, y, batch_size=32, epochs=10) - loss = model.evaluate(x, y) - assert loss < 0.15 - def test_subclass_model(): class Block(tf.keras.layers.Layer): @@ -182,8 +176,8 @@ def call(self, x): multi_optimizer = MultiOptimizer(optimizers_and_layers) - x = np.random.rand(128, 4).astype(np.float32) - y = np.random.rand(128, 32).astype(np.float32) + x = np.ones((128, 4)).astype(np.float32) + y = np.ones((128, 32)).astype(np.float32) mse = tf.keras.losses.MeanSquaredError() for _ in range(10): @@ -196,9 +190,6 @@ def call(self, x): grads = tape.gradient(loss, model.trainable_variables) multi_optimizer.apply_gradients(zip(grads, model.trainable_variables)) - loss = mse(y, model(x)).numpy() - assert loss < 0.15 - block1_weights_after_train = [weight.numpy() for weight in model.block1.weights] block2_weights_after_train = [weight.numpy() for weight in model.block2.weights] @@ -221,8 +212,8 @@ def test_pretrained_model(): multi_optimizer = MultiOptimizer(optimizers_and_layers) model.compile(multi_optimizer, loss="mse") - x = np.random.rand(128, 32, 32, 3) - y = np.random.rand(128, 32) + x = np.ones((128, 32, 32, 3)).astype(np.float32) + y = np.ones((128, 32)).astype(np.float32) model.fit(x, y, batch_size=32) resnet_weights_after_train = [weight.numpy() for weight in resnet.trainable_weights] @@ -263,8 +254,8 @@ def get_model(): model.compile(multi_optimizer, loss="mse") - x = np.random.rand(128, 4) - y = np.random.rand(128, 4) + x = np.ones((128, 4)).astype(np.float32) + y = np.ones((128, 32)).astype(np.float32) model.fit(x, y) model1_weights_after_train = [weight.numpy() for weight in model1.weights] From 17334489969611e937bc3a5c2d5af736d3007a6e Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sat, 26 Sep 2020 18:28:41 -0700 Subject: [PATCH 21/23] Inherit from Model --- .../optimizers/tests/discriminative_layer_training_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py index fbb5ad6a38..b9fa798179 100644 --- a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py +++ b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py @@ -145,7 +145,7 @@ def test_model(): def test_subclass_model(): - class Block(tf.keras.layers.Layer): + class Block(tf.keras.Model): def __init__(self, units): super().__init__() self.dense1 = tf.keras.layers.Dense(units) From f6443080280033de66440f66d9d1a66164506958 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sat, 26 Sep 2020 18:35:13 -0700 Subject: [PATCH 22/23] Test all weights instead of first one --- .../discriminative_layer_training_test.py | 24 +++++-------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py index b9fa798179..80a613e586 100644 --- a/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py +++ b/tensorflow_addons/optimizers/tests/discriminative_layer_training_test.py @@ -42,10 +42,8 @@ def test_fit_layer_optimizer(device, serialize, tmpdir): x = np.array(np.ones([100])) y = np.array(np.ones([100])) - weights_before_train = ( - model.layers[0].weights[0].numpy(), - model.layers[1].weights[0].numpy(), - ) + dense1_weights_before_train = [weight.numpy() for weight in model.layers[0].weights] + dense2_weights_before_train = [weight.numpy() for weight in model.layers[1].weights] opt1 = tf.keras.optimizers.Adam(learning_rate=1e-3) opt2 = tf.keras.optimizers.SGD(learning_rate=0) @@ -65,21 +63,11 @@ def test_fit_layer_optimizer(device, serialize, tmpdir): model.fit(x, y, batch_size=8, epochs=10) - weights_after_train = ( - model.layers[0].weights[0].numpy(), - model.layers[1].weights[0].numpy(), - ) - - with np.testing.assert_raises(AssertionError): - # expect weights to be different for layer 1 - test_utils.assert_allclose_according_to_type( - weights_before_train[0], weights_after_train[0] - ) + dense1_weights_after_train = [weight.numpy() for weight in model.layers[0].weights] + dense2_weights_after_train = [weight.numpy() for weight in model.layers[1].weights] - # expect weights to be same for layer 2 - test_utils.assert_allclose_according_to_type( - weights_before_train[1], weights_after_train[1] - ) + assert_list_not_allclose(dense1_weights_before_train, dense1_weights_after_train) + assert_list_allclose(dense2_weights_before_train, dense2_weights_after_train) def test_list_of_layers(): From 7db5362509ddb43cd1a92832deca0b10c118a633 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sat, 26 Sep 2020 18:43:55 -0700 Subject: [PATCH 23/23] Update doc --- .../optimizers/discriminative_layer_training.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index 51da3fd981..80add77378 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -64,12 +64,10 @@ class MultiOptimizer(tf.keras.optimizers.Optimizer): - [Universal Language Model Fine-tuning for Text Classification](https://arxiv.org/abs/1801.06146) - [Collaborative Layer-wise Discriminative Learning in Deep Neural Networks](https://arxiv.org/abs/1607.05440) - Note: - - Currently, `tfa.optimizers.MultiOptimizer` does not support callbacks that modify optimizers. - However, you can instantiate optimizer layer pairs with - `tf.keras.optimizers.schedules.LearningRateSchedule` - instead of a static learning rate. + Note: Currently, `tfa.optimizers.MultiOptimizer` does not support callbacks that modify optimizers. + However, you can instantiate optimizer layer pairs with + `tf.keras.optimizers.schedules.LearningRateSchedule` + instead of a static learning rate. This code should function on CPU, GPU, and TPU. Apply with `tf.distribute.Strategy().scope()` context as you would with any other optimizer.