diff --git a/tensorflow_addons/optimizers/__init__.py b/tensorflow_addons/optimizers/__init__.py index 6a23397175..07b52a83fa 100644 --- a/tensorflow_addons/optimizers/__init__.py +++ b/tensorflow_addons/optimizers/__init__.py @@ -14,6 +14,7 @@ # ============================================================================== """Additional optimizers that conform to Keras API.""" +from tensorflow_addons.optimizers.constants import KerasLegacyOptimizer from tensorflow_addons.optimizers.average_wrapper import AveragedOptimizerWrapper from tensorflow_addons.optimizers.conditional_gradient import ConditionalGradient from tensorflow_addons.optimizers.cyclical_learning_rate import CyclicalLearningRate diff --git a/tensorflow_addons/optimizers/adabelief.py b/tensorflow_addons/optimizers/adabelief.py index 33297f4a33..58808ed696 100644 --- a/tensorflow_addons/optimizers/adabelief.py +++ b/tensorflow_addons/optimizers/adabelief.py @@ -17,11 +17,12 @@ import tensorflow as tf from tensorflow_addons.utils.types import FloatTensorLike +from tensorflow_addons.optimizers import KerasLegacyOptimizer from typing import Union, Callable, Dict @tf.keras.utils.register_keras_serializable(package="Addons") -class AdaBelief(tf.keras.optimizers.Optimizer): +class AdaBelief(KerasLegacyOptimizer): """Variant of the Adam optimizer. It achieves fast convergence as Adam and generalization comparable to SGD. diff --git a/tensorflow_addons/optimizers/average_wrapper.py b/tensorflow_addons/optimizers/average_wrapper.py index 0ff4992441..fe67b9bd74 100644 --- a/tensorflow_addons/optimizers/average_wrapper.py +++ b/tensorflow_addons/optimizers/average_wrapper.py @@ -17,12 +17,12 @@ import warnings import tensorflow as tf +from tensorflow_addons.optimizers import KerasLegacyOptimizer from tensorflow_addons.utils import types - from typeguard import typechecked -class AveragedOptimizerWrapper(tf.keras.optimizers.Optimizer, metaclass=abc.ABCMeta): +class AveragedOptimizerWrapper(KerasLegacyOptimizer, metaclass=abc.ABCMeta): @typechecked def __init__( self, optimizer: types.Optimizer, name: str = "AverageOptimizer", **kwargs @@ -32,9 +32,12 @@ def __init__( if isinstance(optimizer, str): optimizer = tf.keras.optimizers.get(optimizer) - if not isinstance(optimizer, tf.keras.optimizers.Optimizer): + if not isinstance( + optimizer, (tf.keras.optimizers.Optimizer, KerasLegacyOptimizer) + ): raise TypeError( - "optimizer is not an object of tf.keras.optimizers.Optimizer" + "optimizer is not an object of tf.keras.optimizers.Optimizer " + "or tf.keras.optimizers.legacy.Optimizer (if you have tf version >= 2.9.0)." ) self._optimizer = optimizer diff --git a/tensorflow_addons/optimizers/cocob.py b/tensorflow_addons/optimizers/cocob.py index 9af39d5d9f..6e887f06e5 100644 --- a/tensorflow_addons/optimizers/cocob.py +++ b/tensorflow_addons/optimizers/cocob.py @@ -17,9 +17,11 @@ from typeguard import typechecked import tensorflow as tf +from tensorflow_addons.optimizers import KerasLegacyOptimizer + @tf.keras.utils.register_keras_serializable(package="Addons") -class COCOB(tf.keras.optimizers.Optimizer): +class COCOB(KerasLegacyOptimizer): """Optimizer that implements COCOB Backprop Algorithm Reference: diff --git a/tensorflow_addons/optimizers/conditional_gradient.py b/tensorflow_addons/optimizers/conditional_gradient.py index 4e6592ad07..6b79626710 100644 --- a/tensorflow_addons/optimizers/conditional_gradient.py +++ b/tensorflow_addons/optimizers/conditional_gradient.py @@ -15,6 +15,7 @@ """Conditional Gradient optimizer.""" import tensorflow as tf +from tensorflow_addons.optimizers import KerasLegacyOptimizer from tensorflow_addons.utils.types import FloatTensorLike from typeguard import typechecked @@ -22,7 +23,7 @@ @tf.keras.utils.register_keras_serializable(package="Addons") -class ConditionalGradient(tf.keras.optimizers.Optimizer): +class ConditionalGradient(KerasLegacyOptimizer): """Optimizer that implements the Conditional Gradient optimization. This optimizer helps handle constraints well. diff --git a/tensorflow_addons/optimizers/constants.py b/tensorflow_addons/optimizers/constants.py new file mode 100644 index 0000000000..de6a498b69 --- /dev/null +++ b/tensorflow_addons/optimizers/constants.py @@ -0,0 +1,21 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import importlib +import tensorflow as tf + +if importlib.util.find_spec("tensorflow.keras.optimizers.legacy") is not None: + KerasLegacyOptimizer = tf.keras.optimizers.legacy.Optimizer +else: + KerasLegacyOptimizer = tf.keras.optimizers.Optimizer diff --git a/tensorflow_addons/optimizers/cyclical_learning_rate.py b/tensorflow_addons/optimizers/cyclical_learning_rate.py index 905aad0c45..f4ec176977 100644 --- a/tensorflow_addons/optimizers/cyclical_learning_rate.py +++ b/tensorflow_addons/optimizers/cyclical_learning_rate.py @@ -58,7 +58,7 @@ def __init__( ``` You can pass this schedule directly into a - `tf.keras.optimizers.Optimizer` as the learning rate. + `tf.keras.optimizers.legacy.Optimizer` as the learning rate. Args: initial_learning_rate: A scalar `float32` or `float64` `Tensor` or @@ -146,7 +146,7 @@ def __init__( ``` You can pass this schedule directly into a - `tf.keras.optimizers.Optimizer` as the learning rate. + `tf.keras.optimizers.legacy.Optimizer` as the learning rate. Args: initial_learning_rate: A scalar `float32` or `float64` `Tensor` or @@ -215,7 +215,7 @@ def __init__( ``` You can pass this schedule directly into a - `tf.keras.optimizers.Optimizer` as the learning rate. + `tf.keras.optimizers.legacy.Optimizer` as the learning rate. Args: initial_learning_rate: A scalar `float32` or `float64` `Tensor` or @@ -286,7 +286,7 @@ def __init__( ``` You can pass this schedule directly into a - `tf.keras.optimizers.Optimizer` as the learning rate. + `tf.keras.optimizers.legacy.Optimizer` as the learning rate. Args: initial_learning_rate: A scalar `float32` or `float64` `Tensor` or diff --git a/tensorflow_addons/optimizers/discriminative_layer_training.py b/tensorflow_addons/optimizers/discriminative_layer_training.py index 22fb19798d..1e51fb6616 100644 --- a/tensorflow_addons/optimizers/discriminative_layer_training.py +++ b/tensorflow_addons/optimizers/discriminative_layer_training.py @@ -17,11 +17,13 @@ from typing import List, Union import tensorflow as tf + +from tensorflow_addons.optimizers import KerasLegacyOptimizer from typeguard import typechecked @tf.keras.utils.register_keras_serializable(package="Addons") -class MultiOptimizer(tf.keras.optimizers.Optimizer): +class MultiOptimizer(KerasLegacyOptimizer): """Multi Optimizer Wrapper for Discriminative Layer Training. Creates a wrapper around a set of instantiated optimizer layer pairs. @@ -30,7 +32,7 @@ class MultiOptimizer(tf.keras.optimizers.Optimizer): Each optimizer will optimize only the weights associated with its paired layer. This can be used to implement discriminative layer training by assigning different learning rates to each optimizer layer pair. - `(tf.keras.optimizers.Optimizer, List[tf.keras.layers.Layer])` pairs are also supported. + `(tf.keras.optimizers.legacy.Optimizer, List[tf.keras.layers.Layer])` pairs are also supported. Please note that the layers must be instantiated before instantiating the optimizer. Args: @@ -130,7 +132,7 @@ def get_config(self): @classmethod def create_optimizer_spec( cls, - optimizer: tf.keras.optimizers.Optimizer, + optimizer: KerasLegacyOptimizer, layers_or_model: Union[ tf.keras.Model, tf.keras.Sequential, diff --git a/tensorflow_addons/optimizers/lamb.py b/tensorflow_addons/optimizers/lamb.py index b166657251..df06c01d8a 100644 --- a/tensorflow_addons/optimizers/lamb.py +++ b/tensorflow_addons/optimizers/lamb.py @@ -24,12 +24,13 @@ from typeguard import typechecked import tensorflow as tf +from tensorflow_addons.optimizers import KerasLegacyOptimizer from tensorflow_addons.utils.types import FloatTensorLike from tensorflow_addons.optimizers.utils import is_variable_matched_by_regexes @tf.keras.utils.register_keras_serializable(package="Addons") -class LAMB(tf.keras.optimizers.Optimizer): +class LAMB(KerasLegacyOptimizer): """Optimizer that implements the Layer-wise Adaptive Moments (LAMB). See paper [Large Batch Optimization for Deep Learning: Training BERT diff --git a/tensorflow_addons/optimizers/lazy_adam.py b/tensorflow_addons/optimizers/lazy_adam.py index 7b4cf8e201..ad8570bc3c 100644 --- a/tensorflow_addons/optimizers/lazy_adam.py +++ b/tensorflow_addons/optimizers/lazy_adam.py @@ -20,6 +20,7 @@ original Adam algorithm, and may lead to different empirical results. """ +import importlib import tensorflow as tf from tensorflow_addons.utils.types import FloatTensorLike @@ -27,8 +28,14 @@ from typing import Union, Callable +if importlib.util.find_spec("tensorflow.keras.optimizers.legacy") is not None: + adam_optimizer_class = tf.keras.optimizers.legacy.Adam +else: + adam_optimizer_class = tf.keras.optimizers.Adam + + @tf.keras.utils.register_keras_serializable(package="Addons") -class LazyAdam(tf.keras.optimizers.Adam): +class LazyAdam(adam_optimizer_class): """Variant of the Adam optimizer that handles sparse updates more efficiently. diff --git a/tensorflow_addons/optimizers/lookahead.py b/tensorflow_addons/optimizers/lookahead.py index 70d732979e..a10091164d 100644 --- a/tensorflow_addons/optimizers/lookahead.py +++ b/tensorflow_addons/optimizers/lookahead.py @@ -16,11 +16,12 @@ import tensorflow as tf from tensorflow_addons.utils import types +from tensorflow_addons.optimizers import KerasLegacyOptimizer from typeguard import typechecked @tf.keras.utils.register_keras_serializable(package="Addons") -class Lookahead(tf.keras.optimizers.Optimizer): +class Lookahead(KerasLegacyOptimizer): """This class allows to extend optimizers with the lookahead mechanism. The mechanism is proposed by Michael R. Zhang et.al in the paper @@ -71,9 +72,12 @@ def __init__( if isinstance(optimizer, str): optimizer = tf.keras.optimizers.get(optimizer) - if not isinstance(optimizer, tf.keras.optimizers.Optimizer): + if not isinstance( + optimizer, (tf.keras.optimizers.Optimizer, KerasLegacyOptimizer) + ): raise TypeError( - "optimizer is not an object of tf.keras.optimizers.Optimizer" + "optimizer is not an object of tf.keras.optimizers.Optimizer " + "or tf.keras.optimizers.legacy.Optimizer (if you have tf version >= 2.9.0)." ) self._optimizer = optimizer diff --git a/tensorflow_addons/optimizers/moving_average.py b/tensorflow_addons/optimizers/moving_average.py index 79432ded82..9ef010478f 100644 --- a/tensorflow_addons/optimizers/moving_average.py +++ b/tensorflow_addons/optimizers/moving_average.py @@ -55,7 +55,7 @@ def __init__( r"""Construct a new MovingAverage optimizer. Args: - optimizer: str or `tf.keras.optimizers.Optimizer` that will be + optimizer: str or `tf.keras.optimizers.legacy.Optimizer` that will be used to compute and apply gradients. average_decay: float. Decay to use to maintain the moving averages of trained variables. diff --git a/tensorflow_addons/optimizers/novograd.py b/tensorflow_addons/optimizers/novograd.py index 159ec55f0b..cea891faac 100644 --- a/tensorflow_addons/optimizers/novograd.py +++ b/tensorflow_addons/optimizers/novograd.py @@ -16,13 +16,13 @@ import tensorflow as tf from tensorflow_addons.utils.types import FloatTensorLike - +from tensorflow_addons.optimizers import KerasLegacyOptimizer from typing import Union, Callable from typeguard import typechecked @tf.keras.utils.register_keras_serializable(package="Addons") -class NovoGrad(tf.keras.optimizers.Optimizer): +class NovoGrad(KerasLegacyOptimizer): """Optimizer that implements NovoGrad. The NovoGrad Optimizer was first proposed in [Stochastic Gradient diff --git a/tensorflow_addons/optimizers/proximal_adagrad.py b/tensorflow_addons/optimizers/proximal_adagrad.py index c8e1aa0ff2..eb5853cdcb 100644 --- a/tensorflow_addons/optimizers/proximal_adagrad.py +++ b/tensorflow_addons/optimizers/proximal_adagrad.py @@ -19,11 +19,12 @@ import tensorflow as tf from typeguard import typechecked +from tensorflow_addons.optimizers import KerasLegacyOptimizer from tensorflow_addons.utils.types import FloatTensorLike @tf.keras.utils.register_keras_serializable(package="Addons") -class ProximalAdagrad(tf.keras.optimizers.Optimizer): +class ProximalAdagrad(KerasLegacyOptimizer): """Optimizer that implements the Proximal Adagrad algorithm. References: diff --git a/tensorflow_addons/optimizers/rectified_adam.py b/tensorflow_addons/optimizers/rectified_adam.py index c482d56d10..9d963e8014 100644 --- a/tensorflow_addons/optimizers/rectified_adam.py +++ b/tensorflow_addons/optimizers/rectified_adam.py @@ -16,12 +16,13 @@ import tensorflow as tf from tensorflow_addons.utils.types import FloatTensorLike +from tensorflow_addons.optimizers import KerasLegacyOptimizer from typing import Union, Callable, Dict from typeguard import typechecked @tf.keras.utils.register_keras_serializable(package="Addons") -class RectifiedAdam(tf.keras.optimizers.Optimizer): +class RectifiedAdam(KerasLegacyOptimizer): """Variant of the Adam optimizer whose adaptive learning rate is rectified so as to have a consistent variance. diff --git a/tensorflow_addons/optimizers/tests/standard_test.py b/tensorflow_addons/optimizers/tests/standard_test.py index f1d284ad68..c0568b82f9 100644 --- a/tensorflow_addons/optimizers/tests/standard_test.py +++ b/tensorflow_addons/optimizers/tests/standard_test.py @@ -18,6 +18,7 @@ import tensorflow as tf from tensorflow_addons import optimizers +from tensorflow_addons.optimizers import KerasLegacyOptimizer from tensorflow_addons.utils.test_utils import discover_classes class_exceptions = [ @@ -29,12 +30,10 @@ "ConditionalGradient", # is wrapper "Lookahead", # is wrapper "MovingAverage", # is wrapper + "KerasLegacyOptimizer", # is a constantc ] - -classes_to_test = discover_classes( - optimizers, tf.keras.optimizers.Optimizer, class_exceptions -) +classes_to_test = discover_classes(optimizers, KerasLegacyOptimizer, class_exceptions) @pytest.mark.parametrize("optimizer", classes_to_test) diff --git a/tensorflow_addons/optimizers/tests/weight_decay_optimizers_test.py b/tensorflow_addons/optimizers/tests/weight_decay_optimizers_test.py index edf3b97e0e..8099c556c0 100644 --- a/tensorflow_addons/optimizers/tests/weight_decay_optimizers_test.py +++ b/tensorflow_addons/optimizers/tests/weight_decay_optimizers_test.py @@ -14,6 +14,7 @@ # ============================================================================== """Tests for optimizers with weight decay.""" +import importlib import numpy as np import pytest import tensorflow as tf @@ -401,13 +402,17 @@ def test_var_list_with_exclude_list_sgdw(dtype): ) +if importlib.util.find_spec("tensorflow.keras.optimizers.legacy") is not None: + optimizer_class = tf.keras.optimizers.legacy.SGD +else: + optimizer_class = tf.keras.optimizers.SGD + + @pytest.mark.parametrize( "optimizer", [ weight_decay_optimizers.SGDW, - weight_decay_optimizers.extend_with_decoupled_weight_decay( - tf.keras.optimizers.SGD - ), + weight_decay_optimizers.extend_with_decoupled_weight_decay(optimizer_class), ], ) @pytest.mark.parametrize("dtype", [(tf.half, 0), (tf.float32, 1), (tf.float64, 2)]) diff --git a/tensorflow_addons/optimizers/weight_decay_optimizers.py b/tensorflow_addons/optimizers/weight_decay_optimizers.py index c4fbd60e5a..1fc491d547 100644 --- a/tensorflow_addons/optimizers/weight_decay_optimizers.py +++ b/tensorflow_addons/optimizers/weight_decay_optimizers.py @@ -14,6 +14,7 @@ # ============================================================================== """Base class to make optimizers weight decay ready.""" +import importlib import tensorflow as tf from tensorflow_addons.utils.types import FloatTensorLike from tensorflow_addons.optimizers.utils import is_variable_matched_by_regexes @@ -261,10 +262,18 @@ def _do_use_weight_decay(self, var): return var.ref() in self._decay_var_list +if importlib.util.find_spec("tensorflow.keras.optimizers.legacy") is not None: + keras_legacy_optimizer = Union[ + tf.keras.optimizers.legacy.Optimizer, tf.keras.optimizers.Optimizer + ] +else: + keras_legacy_optimizer = tf.keras.optimizers.Optimizer + + @typechecked def extend_with_decoupled_weight_decay( - base_optimizer: Type[tf.keras.optimizers.Optimizer], -) -> Type[tf.keras.optimizers.Optimizer]: + base_optimizer: Type[keras_legacy_optimizer], +) -> Type[keras_legacy_optimizer]: """Factory function returning an optimizer class with decoupled weight decay. diff --git a/tensorflow_addons/optimizers/yogi.py b/tensorflow_addons/optimizers/yogi.py index 0d4389ac1c..8d7e0a272b 100644 --- a/tensorflow_addons/optimizers/yogi.py +++ b/tensorflow_addons/optimizers/yogi.py @@ -25,6 +25,7 @@ import tensorflow as tf from tensorflow_addons.utils.types import FloatTensorLike +from tensorflow_addons.optimizers import KerasLegacyOptimizer from typeguard import typechecked from typing import Union, Callable @@ -50,7 +51,7 @@ def _solve(a, b, c): @tf.keras.utils.register_keras_serializable(package="Addons") -class Yogi(tf.keras.optimizers.Optimizer): +class Yogi(KerasLegacyOptimizer): """Optimizer that implements the Yogi algorithm in Keras. See Algorithm 2 of diff --git a/tensorflow_addons/utils/types.py b/tensorflow_addons/utils/types.py index 1c2cb0851d..4bfa0dacf6 100644 --- a/tensorflow_addons/utils/types.py +++ b/tensorflow_addons/utils/types.py @@ -16,6 +16,7 @@ from typing import Union, Callable, List +import importlib import numpy as np import tensorflow as tf @@ -46,7 +47,12 @@ Regularizer = Union[None, dict, str, Callable, tf.keras.regularizers.Regularizer] Constraint = Union[None, dict, str, Callable, tf.keras.constraints.Constraint] Activation = Union[None, str, Callable] -Optimizer = Union[tf.keras.optimizers.Optimizer, str] +if importlib.util.find_spec("tensorflow.keras.optimizers.legacy") is not None: + Optimizer = Union[ + tf.keras.optimizers.Optimizer, tf.keras.optimizers.legacy.Optimizer, str + ] +else: + Optimizer = Union[tf.keras.optimizers.Optimizer, str] TensorLike = Union[ List[Union[Number, list]], diff --git a/tools/testing/source_code_test.py b/tools/testing/source_code_test.py index 47c4c73c4d..421c83d958 100644 --- a/tools/testing/source_code_test.py +++ b/tools/testing/source_code_test.py @@ -18,7 +18,9 @@ from typedapi import ensure_api_is_typed +import importlib import tensorflow_addons as tfa +import tensorflow as tf BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) @@ -40,6 +42,9 @@ def test_api_typed(): exception_list = [ tfa.rnn.PeepholeLSTMCell, ] + if importlib.util.find_spec("tensorflow.keras.optimizers.legacy") is not None: + exception_list.append(tf.keras.optimizers.legacy.Optimizer) + help_message = ( "You can also take a look at the section about it in the CONTRIBUTING.md:\n" "https://github.com/tensorflow/addons/blob/master/CONTRIBUTING.md#about-type-hints"