tensorflow · Squadrick · Sep 17, 2019 · Sep 17, 2019
diff --git a/tensorflow_addons/optimizers/lazy_adam.py b/tensorflow_addons/optimizers/lazy_adam.py
@@ -55,6 +55,33 @@ def __init__(self,
                  amsgrad=False,
                  name='LazyAdam',
                  **kwargs):
+        """Constructs a new LazyAdam optimizer.
+
+        Args:
+          learning_rate: A `Tensor` or a floating point value.
+            The learning rate.
+          beta_1: A `float` value or a constant `float` tensor.
+            The exponential decay rate for the 1st moment estimates.
+          beta_2: A `float` value or a constant `float` tensor.
+            The exponential decay rate for the 2nd moment estimates.
+          epsilon: A small constant for numerical stability.
+            This epsilon is "epsilon hat" in
+            [Adam: A Method for Stochastic Optimization. Kingma et al., 2014]
+            (http://arxiv.org/abs/1412.6980) (in the formula just
+            before Section 2.1), not the epsilon in Algorithm 1 of the paper.
+          amsgrad: `boolean`. Whether to apply AMSGrad variant of this
+            algorithm from the paper "On the Convergence of Adam and beyond".
+            Note that this argument is currently not supported and the
+            argument can only be `False`.
+          name: Optional name for the operations created when applying
+            gradients. Defaults to "LazyAdam".
+          **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`,
+            `lr`, `decay`}. `clipnorm` is clip gradients by norm; `clipvalue`
+            is clip gradients by value, `decay` is included for backward
+            compatibility to allow time inverse decay of learning rate. `lr`
+            is included for backward compatibility, recommended to use
+            `learning_rate` instead.
+        """
         super(LazyAdam, self).__init__(
             learning_rate=learning_rate,
             beta_1=beta_1,