File tree Expand file tree Collapse file tree 2 files changed +7
-3
lines changed Expand file tree Collapse file tree 2 files changed +7
-3
lines changed Original file line number Diff line number Diff line change @@ -432,17 +432,17 @@ def _register_adam_variants(registry: OptimizerRegistry) -> None:
432432 OptimInfo (
433433 name = 'adafactorbv' ,
434434 opt_class = AdafactorBigVision ,
435- description = 'Big Vision variant of Adafactor with factored gradients, half precision momentum. ' ,
435+ description = 'Big Vision variant of Adafactor with factored gradients, half precision momentum' ,
436436 ),
437437 OptimInfo (
438438 name = 'adopt' ,
439439 opt_class = Adopt ,
440- description = 'Memory-efficient implementation of Adam with factored gradients ' ,
440+ description = 'Modified Adam that can converge with any β2 with the optimal rate ' ,
441441 ),
442442 OptimInfo (
443443 name = 'adoptw' ,
444444 opt_class = Adopt ,
445- description = 'Memory-efficient implementation of Adam with factored gradients ' ,
445+ description = 'Modified AdamW (decoupled decay) that can converge with any β2 with the optimal rate ' ,
446446 defaults = {'decoupled' : True }
447447 ),
448448 ]
Original file line number Diff line number Diff line change @@ -51,6 +51,10 @@ def _get_value(x):
5151
5252
5353class Adopt (Optimizer ):
54+ """
55+ ADOPT: Modified Adam Can Converge with Any β2 with the Optimal Rate: https://arxiv.org/abs/2411.02853
56+
57+ """
5458 def __init__ (
5559 self ,
5660 params ,
You can’t perform that action at this time.
0 commit comments