diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 2478f698e659b..6697181bb98f3 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""nn.Module with additional great features.""" +"""The LightningModule - an nn.Module with many additional features.""" import collections import copy @@ -93,16 +93,16 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self._loaded_optimizer_states_dict = {} - #: Pointer to the trainer object + # pointer to the trainer object self.trainer = None self._distrib_type = None self._device_type = None - #: True if using amp + # true if using amp self.use_amp: bool = False - #: The precision used + # the precision used self.precision: int = 32 # optionally can be set by user @@ -117,6 +117,17 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self._metric_attributes: Optional[Dict[int, str]] = None def optimizers(self, use_pl_optimizer: bool = True) -> Union[Optimizer, List[Optimizer], List[LightningOptimizer]]: + """ + Returns the optimizer(s) that are being used during training. Useful for manual optimization. + + Args: + use_pl_optimizer: If ``True``, will wrap the optimizer(s) in a + :class:`~pytorch_lightning.core.optimizer.LightningOptimizer` for automatic handling of precision and + profiling. + + Returns: + A single optimizer, or a list of optimizers in case multiple ones are present. + """ if use_pl_optimizer: opts = list(self.trainer.lightning_optimizers.values()) else: @@ -129,6 +140,13 @@ def optimizers(self, use_pl_optimizer: bool = True) -> Union[Optimizer, List[Opt return opts def lr_schedulers(self) -> Optional[Union[Any, List[Any]]]: + """ + Returns the learning rate scheduler(s) that are being used during training. Useful for manual optimization. + + Returns: + A single scheduler, or a list of schedulers in case multiple ones are present, or ``None`` if no + schedulers were returned in :meth:`configure_optimizers`. + """ if not self.trainer.lr_schedulers: return None @@ -144,32 +162,43 @@ def lr_schedulers(self) -> Optional[Union[Any, List[Any]]]: @property def example_input_array(self) -> Any: + """ + The example input array is a specification of what the module can consume in the :meth:`forward` method. + The return type is interpreted as follows: + + - Single tensor: It is assumed the model takes a single argument, i.e., + ``model.forward(model.example_input_array)`` + - Tuple: The input array should be interpreted as a sequence of positional arguments, i.e., + ``model.forward(*model.example_input_array)`` + - Dict: The input array represents named keyword arguments, i.e., + ``model.forward(**model.example_input_array)`` + """ return self._example_input_array + @example_input_array.setter + def example_input_array(self, example: Any) -> None: + self._example_input_array = example + @property def current_epoch(self) -> int: - """The current epoch""" + """The current epoch in the Trainer. If no Trainer is attached, this propery is 0.""" return self.trainer.current_epoch if self.trainer else 0 @property def global_step(self) -> int: - """Total training batches seen across all epochs""" + """Total training batches seen across all epochs. If no Trainer is attached, this propery is 0.""" return self.trainer.global_step if self.trainer else 0 @property def global_rank(self) -> int: - """ The index of the current process across all nodes and devices. """ + """The index of the current process across all nodes and devices.""" return self.trainer.global_rank if self.trainer else 0 @property def local_rank(self) -> int: - """ The index of the current process within a single node. """ + """The index of the current process within a single node.""" return self.trainer.local_rank if self.trainer else 0 - @example_input_array.setter - def example_input_array(self, example: Any) -> None: - self._example_input_array = example - @property def datamodule(self) -> Any: warning_cache.deprecation( @@ -204,7 +233,7 @@ def datamodule(self, datamodule: Any) -> None: @property def on_gpu(self): """ - True if your model is currently running on GPUs. + Returns ``True`` if this model is currently located on a GPU. Useful to set flags around the LightningModule for different CPU vs GPU behavior. """ return self.device.type == "cuda" @@ -212,7 +241,7 @@ def on_gpu(self): @property def automatic_optimization(self) -> bool: """ - If False you are responsible for calling .backward, .step, zero_grad. + If set to ``False`` you are responsible for calling ``.backward()``, ``.step()``, ``.zero_grad()``. """ return self._automatic_optimization @@ -223,8 +252,9 @@ def automatic_optimization(self, automatic_optimization: bool) -> None: @property def truncated_bptt_steps(self) -> int: """ - truncated_bptt_steps: Truncated back prop breaks performs backprop every k steps of much a longer sequence. - If this is > 0, the training step is passed ``hiddens``. + Enables `Truncated Backpropagation Through Time` in the Trainer when set to a positive integer. It represents + the number of times :meth:`training_step` gets called before backpropagation. If this is > 0, the + :meth:`training_step` receives an additional argument ``hiddens`` and is expected to return a hidden state. """ return self._truncated_bptt_steps @@ -298,13 +328,13 @@ def log( rank_zero_only: Optional[bool] = None, ) -> None: """ - Log a key, value + Log a key, value pair. Example:: self.log('train_loss', loss) - The default behavior per hook is as follows + The default behavior per hook is as follows: .. csv-table:: ``*`` also applies to the test loop :header: "LightningModule Hook", "on_step", "on_epoch", "prog_bar", "logger" @@ -448,7 +478,7 @@ def log_dict( add_dataloader_idx: bool = True, ) -> None: """ - Log a dictionary of values at once + Log a dictionary of values at once. Example:: @@ -592,16 +622,14 @@ def all_gather( sync_grads: bool = False, ): r""" - Allows users to call ``self.all_gather()`` from the LightningModule, thus making - the ```all_gather``` operation accelerator agnostic. - - ```all_gather``` is a function provided by accelerators to gather a tensor from several - distributed processes + Allows users to call ``self.all_gather()`` from the LightningModule, thus making the ``all_gather`` operation + accelerator agnostic. ``all_gather`` is a function provided by accelerators to gather a tensor from several + distributed processes. Args: - tensor: int, float, tensor of shape (batch, ...), or a (possibly nested) collection thereof. + data: int, float, tensor of shape (batch, ...), or a (possibly nested) collection thereof. group: the process group to gather results from. Defaults to all processes (world) - sync_grads: flag that allows users to synchronize gradients for all_gather op + sync_grads: flag that allows users to synchronize gradients for the all_gather operation Return: A tensor of shape (world_size, batch, ...), or if the input was a collection @@ -756,7 +784,7 @@ def training_step_end(self, training_step_outputs): def training_epoch_end(self, outputs: EPOCH_OUTPUT) -> None: """ Called at the end of the training epoch with the outputs of all training steps. - Use this in case you need to do something with all the outputs for every training_step. + Use this in case you need to do something with all the outputs returned by :meth:`training_step`. .. code-block:: python @@ -816,8 +844,6 @@ def validation_step(self, *args, **kwargs) -> Optional[STEP_OUTPUT]: (only if multiple val dataloaders used) Return: - Any of. - - Any object or value - ``None`` - Validation will skip to the next batch @@ -1237,9 +1263,9 @@ def configure_optimizers(self): - **Single optimizer**. - **List or Tuple** of optimizers. - **Two lists** - The first list has multiple optimizers, and the second has multiple LR schedulers - (or multiple ``lr_dict``). + (or multiple ``lr_dict``). - **Dictionary**, with an ``"optimizer"`` key, and (optionally) a ``"lr_scheduler"`` - key whose value is a single LR scheduler or ``lr_dict``. + key whose value is a single LR scheduler or ``lr_dict``. - **Tuple of dictionaries** as described above, with an optional ``"frequency"`` key. - **None** - Fit will run without any optimizer. @@ -1394,10 +1420,8 @@ def configure_optimizers(self): def manual_backward(self, loss: Tensor, optimizer: Optional[Optimizer] = None, *args, **kwargs) -> None: """ - Call this directly from your training_step when doing optimizations manually. - By using this we can ensure that all the proper scaling when using 16-bit etc has been done for you. - - This function forwards all args to the .backward() call as well. + Call this directly from your :meth:`training_step` when doing optimizations manually. + By using this, Lightning can ensure that all the proper scaling gets applied when using mixed precision. See :ref:`manual optimization` for more examples. @@ -1410,6 +1434,12 @@ def training_step(...): # automatically applies scaling, etc... self.manual_backward(loss) opt.step() + + Args: + loss: The tensor on which to compute gradients. Must have a graph attached. + optimizer: This argument is unused and deprecated. It will be removed in v1.4. + *args: Additional positional arguments to be forwarded to :meth:`~torch.Tensor.backward` + **kwargs: Additional keyword arguments to be forwarded to :meth:`~torch.Tensor.backward` """ if optimizer is not None: rank_zero_deprecation( @@ -1426,22 +1456,19 @@ def training_step(...): def backward(self, loss: Tensor, optimizer: Optimizer, optimizer_idx: int, *args, **kwargs) -> None: """ - Override backward with your own implementation if you need to. + Called to perform backward on the loss returned in :meth:`training_step`. + Override this hook with your own implementation if you need to. Args: - loss: Loss is already scaled by accumulated grads + loss: The loss tensor returned by :meth:`training_step`. If gradient accumulation is used, the loss here + holds the normalized value (scaled by 1 / accumulation steps). optimizer: Current optimizer being used optimizer_idx: Index of the current optimizer being used - Called to perform backward step. - Feel free to override as needed. - The loss passed in has already been scaled for accumulated gradients if requested. - Example:: def backward(self, loss, optimizer, optimizer_idx): loss.backward() - """ if self.automatic_optimization or self._running_manual_backward: loss.backward(*args, **kwargs) @@ -1450,18 +1477,16 @@ def toggle_optimizer(self, optimizer: Optimizer, optimizer_idx: int): """ Makes sure only the gradients of the current optimizer's parameters are calculated in the training step to prevent dangling gradients in multiple-optimizer setup. - - .. note:: Only called when using multiple optimizers - - Override for your own behavior - - It works with ``untoggle_optimizer`` to make sure param_requires_grad_state is properly reset. + It works with :meth:`untoggle_optimizer` to make sure ``param_requires_grad_state`` is properly reset. + Override for your own behavior. Args: - optimizer: Current optimizer used in training_loop - optimizer_idx: Current optimizer idx in training_loop - """ + optimizer: Current optimizer used in the training loop + optimizer_idx: Current optimizer idx in the training loop + Note: + Only called when using multiple optimizers + """ # Iterate over all optimizer parameters to preserve their `requires_grad` information # in case these are pre-defined during `configure_optimizers` param_requires_grad_state = {} @@ -1483,12 +1508,14 @@ def toggle_optimizer(self, optimizer: Optimizer, optimizer_idx: int): def untoggle_optimizer(self, optimizer_idx: int): """ - .. note:: Only called when using multiple optimizers - - Override for your own behavior + Resets the state of required gradients that were toggled with :meth:`toggle_optimizer`. + Override for your own behavior. Args: - optimizer_idx: Current optimizer idx in training_loop + optimizer_idx: Current optimizer idx in the training loop + + Note: + Only called when using multiple optimizers """ for opt_idx, opt in enumerate(self.optimizers(use_pl_optimizer=False)): if optimizer_idx != opt_idx: @@ -1520,8 +1547,7 @@ def optimizer_step( Warning: If you are overriding this method, make sure that you pass the ``optimizer_closure`` parameter to ``optimizer.step()`` function as shown in the examples. This ensures that - ``training_step()``, ``optimizer.zero_grad()``, ``backward()`` are called within - :meth:`~pytorch_lightning.loops.training_batch_loop.TrainingBatchLoop.advance`. + ``training_step()``, ``optimizer.zero_grad()``, ``backward()`` are called within the training loop. Args: epoch: Current epoch @@ -1662,6 +1688,21 @@ def tbptt_split_batch(self, batch, split_size): return splits def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None) -> Optional[ModelSummary]: + """ + Summarize this LightningModule. + + Args: + mode: Can be either ``'top'`` (summarize only direct submodules) or ``'full'`` (summarize all layers). + + .. deprecated:: v1.4 + This parameter was deprecated in v1.4 in favor of `max_depth` and will be removed in v1.6. + + max_depth: The maximum depth of layer nesting that the summary will include. A value of 0 turns the + layer summary off. Default: 1. + + Return: + The model summary object + """ model_summary = None # temporary mapping from mode to max_depth @@ -1801,16 +1842,16 @@ def save_hyperparameters( ignore: Optional[Union[Sequence[str], str]] = None, frame: Optional[types.FrameType] = None ) -> None: - """Save model arguments to ``hparams`` attribute. + """Save model arguments to the ``hparams`` attribute. Args: - args: single object of `dict`, `NameSpace` or `OmegaConf` - or string names or arguments from class ``__init__`` - ignore: an argument name or a list of argument names from - class ``__init__`` to be ignored - frame: a frame object. Default is None + args: single object of type :class:`dict`, :class:`~argparse.Namespace`, `OmegaConf` + or strings representing the argument names in ``__init__``. + ignore: an argument name or a list of argument names in ``__init__`` to be ignored + frame: a frame object. Default is ``None``. Example:: + >>> class ManuallyArgsModel(LightningModule): ... def __init__(self, arg1, arg2, arg3): ... super().__init__() @@ -1889,7 +1930,7 @@ def to_onnx( **kwargs, ): """ - Saves the model in ONNX format + Saves the model in ONNX format. Args: file_path: The path of the file the onnx model should be saved to. @@ -1941,8 +1982,8 @@ def to_torchscript( ) -> Union[ScriptModule, Dict[str, ScriptModule]]: """ By default compiles the whole model to a :class:`~torch.jit.ScriptModule`. - If you want to use tracing, please provided the argument `method='trace'` and make sure that either the - example_inputs argument is provided, or the model has self.example_input_array set. + If you want to use tracing, please provided the argument ``method='trace'`` and make sure that either the + `example_inputs` argument is provided, or the model has :attr:`example_input_array` set. If you would like to customize the modules that are scripted you should override this method. In case you want to return multiple modules, we recommend using a dictionary. @@ -1950,7 +1991,7 @@ def to_torchscript( file_path: Path where to save the torchscript. Default: None (no file saved). method: Whether to use TorchScript's script or trace method. Default: 'script' example_inputs: An input to be used to do tracing when method is set to 'trace'. - Default: None (Use self.example_input_array) + Default: None (uses :attr:`example_input_array`) **kwargs: Additional arguments that will be passed to the :func:`torch.jit.script` or :func:`torch.jit.trace` function. @@ -1980,7 +2021,7 @@ def to_torchscript( True Return: - This LightningModule as a torchscript, regardless of whether file_path is + This LightningModule as a torchscript, regardless of whether `file_path` is defined or not. """ mode = self.training @@ -2014,12 +2055,20 @@ def to_torchscript( @property def hparams(self) -> Union[AttributeDict, dict, Namespace]: + """ + The collection of hyperparameters saved with :meth:`save_hyperparameters`. It is mutable by the user. + For the frozen set of initial hyperparameters, use :attr:`hparams_initial`. + """ if not hasattr(self, "_hparams"): self._hparams = AttributeDict() return self._hparams @property def hparams_initial(self) -> AttributeDict: + """ + The collection of hyperparameters saved with :meth:`save_hyperparameters`. These contents are read-only. + Manual updates to the saved hyperparameters can instead be performed through :attr:`hparams`. + """ if not hasattr(self, "_hparams_initial"): return AttributeDict() # prevent any change @@ -2027,6 +2076,10 @@ def hparams_initial(self) -> AttributeDict: @property def model_size(self) -> float: + """ + The model's size in megabytes. The computation includes everything in the + :meth:`~torch.nn.Module.state_dict`, i.e., by default the parameteters and buffers. + """ # todo: think about better way without need to dump model to drive tmp_name = f"{uuid.uuid4().hex}.pt" torch.save(self.state_dict(), tmp_name) @@ -2035,8 +2088,8 @@ def model_size(self) -> float: return size_mb def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: - """Appends the :attr:`trainer.callback_metrics` dictionary to the given queue. - + """ + Appends the :attr:`trainer.callback_metrics` dictionary to the given queue. To avoid issues with memory sharing, we cast the data to numpy. Args: @@ -2048,8 +2101,8 @@ def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: queue.put(callback_metrics) def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: - """Retrieve the :attr:`trainer.callback_metrics` dictionary from the given queue. - + """ + Retrieve the :attr:`trainer.callback_metrics` dictionary from the given queue. To preserve consistency, we cast back the data to ``torch.Tensor``. Args: