diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 6ed5d6c31f719..072daf9dffcb6 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -203,8 +203,10 @@ def __init__(
                     and will be removed in v1.7.0. Please use the ``strategy`` argument instead.
 
             accumulate_grad_batches: Accumulates grads every k batches or as set up in the dict.
+                Default: ``None``.
 
             amp_backend: The mixed precision backend to use ("native" or "apex").
+                Default: ``'native''``.
 
             amp_level: The optimization level to use (O1, O2, etc...). By default it will be set to "O2"
                 if ``amp_backend`` is set to "apex".
@@ -213,23 +215,29 @@ def __init__(
                 trying to optimize initial learning for faster convergence. trainer.tune() method will
                 set the suggested learning rate in self.lr or self.learning_rate in the LightningModule.
                 To use a different key set a string instead of True with the key name.
+                Default: ``False``.
 
             auto_scale_batch_size: If set to True, will `initially` run a batch size
                 finder trying to find the largest batch size that fits into memory.
                 The result will be stored in self.batch_size in the LightningModule.
                 Additionally, can be set to either `power` that estimates the batch size through
                 a power search or `binsearch` that estimates the batch size through a binary search.
+                Default: ``False``.
 
             auto_select_gpus: If enabled and ``gpus`` is an integer, pick available
                 gpus automatically. This is especially useful when
                 GPUs are configured to be in "exclusive mode", such
                 that only one process at a time can access them.
+                Default: ``False``.
 
-            benchmark: If true enables cudnn.benchmark.
+            benchmark: If ``True``, enables cudnn.benchmark.
+                Default: ``False``.
 
             callbacks: Add a callback or list of callbacks.
+                Default: ``None``.
 
             checkpoint_callback: If ``True``, enable checkpointing.
+                Default: ``None``.
 
                 .. deprecated:: v1.5
                     ``checkpoint_callback`` has been deprecated in v1.5 and will be removed in v1.7.
@@ -238,14 +246,18 @@ def __init__(
             enable_checkpointing: If ``True``, enable checkpointing.
                 It will configure a default ModelCheckpoint callback if there is no user-defined ModelCheckpoint in
                 :paramref:`~pytorch_lightning.trainer.trainer.Trainer.callbacks`.
+                Default: ``True``.
 
             check_val_every_n_epoch: Check val every n train epochs.
+                Default: ``1``.
+
 
             default_root_dir: Default path for logs and weights when no logger/ckpt_callback passed.
                 Default: ``os.getcwd()``.
                 Can be remote file paths such as `s3://mybucket/path` or 'hdfs://path/'
 
             detect_anomaly: Enable anomaly detection for the autograd engine.
+                Default: ``False``.
 
             deterministic: If ``True``, sets whether PyTorch operations must use deterministic algorithms.
                 Default: ``False``.
@@ -255,6 +267,7 @@ def __init__(
 
             fast_dev_run: Runs n if set to ``n`` (int) else 1 if set to ``True`` batch(es)
                 of train, val and test to find any bugs (ie: a sort of unit test).
+                Default: ``False``.
 
             flush_logs_every_n_steps: How often to flush logs to disk (defaults to every 100 steps).
 
@@ -263,27 +276,34 @@ def __init__(
                     Please configure flushing directly in the logger instead.
 
             gpus: Number of GPUs to train on (int) or which GPUs to train on (list or str) applied per node
+                Default: ``None``.
 
             gradient_clip_val: The value at which to clip gradients. Passing ``gradient_clip_val=None`` disables
                 gradient clipping. If using Automatic Mixed Precision (AMP), the gradients will be unscaled before.
+                Default: ``None``.
 
             gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"``
                 to clip by value, and ``gradient_clip_algorithm="norm"`` to clip by norm. By default it will
                 be set to ``"norm"``.
 
             limit_train_batches: How much of training dataset to check (float = fraction, int = num_batches).
+                Default: ``1.0``.
 
             limit_val_batches: How much of validation dataset to check (float = fraction, int = num_batches).
+                Default: ``1.0``.
 
             limit_test_batches: How much of test dataset to check (float = fraction, int = num_batches).
+                Default: ``1.0``.
 
             limit_predict_batches: How much of prediction dataset to check (float = fraction, int = num_batches).
+                Default: ``1.0``.
 
             logger: Logger (or iterable collection of loggers) for experiment tracking. A ``True`` value uses
                 the default ``TensorBoardLogger``. ``False`` will disable logging. If multiple loggers are
                 provided and the `save_dir` property of that logger is not set, local files (checkpoints,
                 profiler traces, etc.) are saved in ``default_root_dir`` rather than in the ``log_dir`` of any
                 of the individual loggers.
+                Default: ``True``.
 
             log_gpu_memory: None, 'min_max', 'all'. Might slow performance.
 
@@ -291,7 +311,8 @@ def __init__(
                     Deprecated in v1.5.0 and will be removed in v1.7.0
                     Please use the ``DeviceStatsMonitor`` callback directly instead.
 
-            log_every_n_steps: How often to log within steps (defaults to every 50 steps).
+            log_every_n_steps: How often to log within steps.
+                Default: ``50``.
 
             prepare_data_per_node: If True, each LOCAL_RANK=0 will call prepare data.
                 Otherwise only NODE_RANK=0, LOCAL_RANK=0 will prepare data
@@ -319,15 +340,20 @@ def __init__(
                     pass ``enable_progress_bar = False`` to the Trainer.
 
             enable_progress_bar: Whether to enable to progress bar by default.
+                Default: ``False``.
 
             profiler: To profile individual steps during training and assist in identifying bottlenecks.
+                Default: ``None``.
 
             overfit_batches: Overfit a fraction of training data (float) or a set number of batches (int).
+                Default: ``0.0``.
 
             plugins: Plugins allow modification of core behavior like ddp and amp, and enable custom lightning plugins.
+                Default: ``None``.
 
             precision: Double precision (64), full precision (32), half precision (16) or bfloat16 precision (bf16).
                 Can be used on CPU, GPU, TPUs or IPUs.
+                Default: ``32``.
 
             max_epochs: Stop training once this number of epochs is reached. Disabled by default (None).
                 If both max_epochs and max_steps are not specified, defaults to ``max_epochs = 1000``.
@@ -339,21 +365,25 @@ def __init__(
                 and ``max_epochs = None``, will default to ``max_epochs = 1000``. To enable infinite training, set
                 ``max_epochs`` to ``-1``.
 
-            min_steps: Force training for at least these number of steps. Disabled by default (None).
+            min_steps: Force training for at least these number of steps. Disabled by default (``None``).
 
-            max_time: Stop training after this amount of time has passed. Disabled by default (None).
+            max_time: Stop training after this amount of time has passed. Disabled by default (``None``).
                 The time duration can be specified in the format DD:HH:MM:SS (days, hours, minutes seconds), as a
                 :class:`datetime.timedelta`, or a dictionary with keys that will be passed to
                 :class:`datetime.timedelta`.
 
             num_nodes: Number of GPU nodes for distributed training.
+                Default: ``1``.
 
             num_processes: Number of processes for distributed training with ``accelerator="cpu"``.
+                Default: ``1``.
 
             num_sanity_val_steps: Sanity check runs n validation batches before starting the training routine.
                 Set it to `-1` to run all batches in all validation dataloaders.
+                Default: ``2``.
 
             reload_dataloaders_every_n_epochs: Set to a non-negative integer to reload dataloaders every n epochs.
+                Default: ``0``.
 
             replace_sampler_ddp: Explicitly enables or disables sampler replacement. If not specified this
                 will toggled automatically when DDP is used. By default it will add ``shuffle=True`` for
@@ -370,8 +400,10 @@ def __init__(
 
             strategy: Supports different training strategies with aliases
                 as well custom training type plugins.
+                Default: ``None``.
 
             sync_batchnorm: Synchronize batch norm layers between process groups/whole world.
+                Default: ``False``.
 
             terminate_on_nan: If set to True, will terminate training (by raising a `ValueError`) at the
                 end of each training batch, if any of the parameters or the loss are NaN or +/-inf.
@@ -381,18 +413,24 @@ def __init__(
                     Please use ``detect_anomaly`` instead.
 
             detect_anomaly: Enable anomaly detection for the autograd engine.
+                Default: ``False``.
 
-            tpu_cores: How many TPU cores to train on (1 or 8) / Single TPU to train on [1]
+            tpu_cores: How many TPU cores to train on (1 or 8) / Single TPU to train on (1)
+                Default: ``None``.
 
             ipus: How many IPUs to train on.
+                Default: ``None``.
 
             track_grad_norm: -1 no tracking. Otherwise tracks that p-norm. May be set to 'inf' infinity-norm. If using
                 Automatic Mixed Precision (AMP), the gradients will be unscaled before logging them.
+                Default: ``-1``.
 
             val_check_interval: How often to check the validation set. Use float to check within a training epoch,
                 use int to check every n steps (batches).
+                Default: ``1.0``.
 
             enable_model_summary: Whether to enable model summarization by default.
+                Default: ``True``.
 
             weights_summary: Prints a summary of the weights when training begins.
 
@@ -410,14 +448,17 @@ def __init__(
 
             move_metrics_to_cpu: Whether to force internal logged metrics to be moved to cpu.
                 This can save some gpu memory, but can make training slower. Use with attention.
+                Default: ``False``.
 
             multiple_trainloader_mode: How to loop over the datasets when there are multiple train loaders.
                 In 'max_size_cycle' mode, the trainer ends one epoch when the largest dataset is traversed,
                 and smaller datasets reload when running out of their data. In 'min_size' mode, all the datasets
                 reload when reaching the minimum length of datasets.
+                Default: ``"max_size_cycle"``.
 
             stochastic_weight_avg: Whether to use `Stochastic Weight Averaging (SWA)
                 <https://pytorch.org/blog/pytorch-1.6-now-includes-stochastic-weight-averaging/>`_.
+                Default: ``False``.
 
                 .. deprecated:: v1.5
                     ``stochastic_weight_avg`` has been deprecated in v1.5 and will be removed in v1.7.