From 07cb497f67b052c0ad8fbc7c09436275427e9e3f Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Tue, 17 Sep 2019 23:22:55 +0800 Subject: [PATCH] add documentation --- tensorflow_addons/optimizers/lazy_adam.py | 27 +++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tensorflow_addons/optimizers/lazy_adam.py b/tensorflow_addons/optimizers/lazy_adam.py index 494d108732..31762f9c25 100644 --- a/tensorflow_addons/optimizers/lazy_adam.py +++ b/tensorflow_addons/optimizers/lazy_adam.py @@ -55,6 +55,33 @@ def __init__(self, amsgrad=False, name='LazyAdam', **kwargs): + """Constructs a new LazyAdam optimizer. + + Args: + learning_rate: A `Tensor` or a floating point value. + The learning rate. + beta_1: A `float` value or a constant `float` tensor. + The exponential decay rate for the 1st moment estimates. + beta_2: A `float` value or a constant `float` tensor. + The exponential decay rate for the 2nd moment estimates. + epsilon: A small constant for numerical stability. + This epsilon is "epsilon hat" in + [Adam: A Method for Stochastic Optimization. Kingma et al., 2014] + (http://arxiv.org/abs/1412.6980) (in the formula just + before Section 2.1), not the epsilon in Algorithm 1 of the paper. + amsgrad: `boolean`. Whether to apply AMSGrad variant of this + algorithm from the paper "On the Convergence of Adam and beyond". + Note that this argument is currently not supported and the + argument can only be `False`. + name: Optional name for the operations created when applying + gradients. Defaults to "LazyAdam". + **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, + `lr`, `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` + is clip gradients by value, `decay` is included for backward + compatibility to allow time inverse decay of learning rate. `lr` + is included for backward compatibility, recommended to use + `learning_rate` instead. + """ super(LazyAdam, self).__init__( learning_rate=learning_rate, beta_1=beta_1,