diff --git a/examples/pl_basics/autoencoder.py b/examples/pl_basics/autoencoder.py
index ae8c7b6611920..98b5ff3700a2a 100644
--- a/examples/pl_basics/autoencoder.py
+++ b/examples/pl_basics/autoencoder.py
@@ -50,18 +50,18 @@ def __init__(
     ) -> None:
         """
         Args:
-            num_samples: Number of images displayed in the grid. Default: ``3``.
+            num_samples: Number of images displayed in the grid.
             nrow: Number of images displayed in each row of the grid.
-                The final grid size is ``(B / nrow, nrow)``. Default: ``8``.
-            padding: Amount of padding. Default: ``2``.
+                The final grid size is ``(B / nrow, nrow)``.
+            padding: Amount of padding.
             normalize: If ``True``, shift the image to the range (0, 1),
-                by the min and max values specified by :attr:`range`. Default: ``False``.
+                by the min and max values specified by :attr:`range`.
             norm_range: Tuple (min, max) where min and max are numbers,
                 then these numbers are used to normalize the image. By default, min and max
                 are computed from the tensor.
             scale_each: If ``True``, scale each image in the batch of
-                images separately rather than the (min, max) over all images. Default: ``False``.
-            pad_value: Value for the padded pixels. Default: ``0``.
+                images separately rather than the (min, max) over all images.
+            pad_value: Value for the padded pixels.
         """
         if not _TORCHVISION_AVAILABLE:  # pragma: no cover
             raise ModuleNotFoundError("You want to use `torchvision` which is not installed yet.")
diff --git a/src/pytorch_lightning/callbacks/model_checkpoint.py b/src/pytorch_lightning/callbacks/model_checkpoint.py
index 78f0cb8193f4e..746fd31767c76 100644
--- a/src/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/src/pytorch_lightning/callbacks/model_checkpoint.py
@@ -81,10 +81,10 @@ class ModelCheckpoint(Checkpoint):
                 ... )
 
             By default, filename is ``None`` and will be set to ``'{epoch}-{step}'``.
-        monitor: quantity to monitor. By default it is ``None`` which saves a checkpoint only for the last epoch.
-        verbose: verbosity mode. Default: ``False``.
+        monitor: quantity to monitor. By default, it is ``None`` which saves a checkpoint only for the last epoch.
+        verbose: verbosity mode.
         save_last: When ``True``, saves an exact copy of the checkpoint to a file `last.ckpt` whenever a checkpoint
-            file gets saved. This allows accessing the latest checkpoint in a deterministic manner. Default: ``None``.
+            file gets saved. This allows accessing the latest checkpoint in a deterministic manner.
         save_top_k: if ``save_top_k == k``,
             the best k models according to the quantity monitored will be saved.
             if ``save_top_k == 0``, no models are saved.
diff --git a/src/pytorch_lightning/callbacks/progress/rich_progress.py b/src/pytorch_lightning/callbacks/progress/rich_progress.py
index 1704a8f43effa..5e35382007232 100644
--- a/src/pytorch_lightning/callbacks/progress/rich_progress.py
+++ b/src/pytorch_lightning/callbacks/progress/rich_progress.py
@@ -223,7 +223,7 @@ class RichProgressBar(ProgressBarBase):
     Args:
         refresh_rate: Determines at which rate (in number of batches) the progress bars get updated.
             Set it to ``0`` to disable the display.
-        leave: Leaves the finished progress bar in the terminal at the end of the epoch. Default: False
+        leave: Leaves the finished progress bar in the terminal at the end of the epoch.
         theme: Contains styles used to stylize the progress bar.
         console_kwargs: Args for constructing a `Console`
 
diff --git a/src/pytorch_lightning/callbacks/stochastic_weight_avg.py b/src/pytorch_lightning/callbacks/stochastic_weight_avg.py
index c7705775bc267..0a848bbcd6bd0 100644
--- a/src/pytorch_lightning/callbacks/stochastic_weight_avg.py
+++ b/src/pytorch_lightning/callbacks/stochastic_weight_avg.py
@@ -78,7 +78,7 @@ def __init__(
                 the ``swa_epoch_start``-th epoch. If provided as float between 0 and 1,
                 the procedure will start from ``int(swa_epoch_start * max_epochs)`` epoch
 
-            annealing_epochs: number of epochs in the annealing phase (default: 10)
+            annealing_epochs: number of epochs in the annealing phase.
 
             annealing_strategy: Specifies the annealing strategy (default: "cos"):
 
@@ -89,11 +89,10 @@ def __init__(
                 the function must take in the current value of the
                 :class:`AveragedModel` parameter, the current value of :attr:`model`
                 parameter and the number of models already averaged; if None,
-                equally weighted average is used (default: ``None``)
+                equally weighted average is used (default: ``None``).
 
             device: if provided, the averaged model will be stored on the ``device``.
                 When None is provided, it will infer the `device` from ``pl_module``.
-                (default: ``"cpu"``)
 
         """
 
diff --git a/src/pytorch_lightning/core/mixins/hparams_mixin.py b/src/pytorch_lightning/core/mixins/hparams_mixin.py
index 56ef099a788e2..e59aed34d5b31 100644
--- a/src/pytorch_lightning/core/mixins/hparams_mixin.py
+++ b/src/pytorch_lightning/core/mixins/hparams_mixin.py
@@ -44,7 +44,7 @@ def save_hyperparameters(
             ignore: an argument name or a list of argument names from
                 class ``__init__`` to be ignored
             frame: a frame object. Default is None
-            logger: Whether to send the hyperparameters to the logger. Default: True
+            logger: Whether to send the hyperparameters to the logger.
 
         Example::
             >>> from pytorch_lightning.core.mixins import HyperparametersMixin
diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py
index cf86a4ccb756c..40f58e0ef8501 100644
--- a/src/pytorch_lightning/core/module.py
+++ b/src/pytorch_lightning/core/module.py
@@ -1880,7 +1880,7 @@ def to_torchscript(
 
         Args:
             file_path: Path where to save the torchscript. Default: None (no file saved).
-            method: Whether to use TorchScript's script or trace method. Default: 'script'
+            method: Whether to use TorchScript's script or trace method.
             example_inputs: An input to be used to do tracing when method is set to 'trace'.
               Default: None (uses :attr:`example_input_array`)
             **kwargs: Additional arguments that will be passed to the :func:`torch.jit.script` or
diff --git a/src/pytorch_lightning/strategies/fully_sharded.py b/src/pytorch_lightning/strategies/fully_sharded.py
index d876b3523002e..fe70a7f7814b0 100644
--- a/src/pytorch_lightning/strategies/fully_sharded.py
+++ b/src/pytorch_lightning/strategies/fully_sharded.py
@@ -89,32 +89,23 @@ def __init__(
 
         Arguments:
             cpu_offload: Offload FP32 params to CPU. Only usable in precision=16 mode.
-                (Default: False).
             move_grads_to_cpu: Moves gradient shards to CPU after reduction.
                 Only disable if using CPU based optimizers
-                (Default to ``cpu_offload``).
-            flatten_parameters: Flattens parameter into single contiguous tensor for speed efficiency
-                (Default: True).
+            flatten_parameters: Flattens parameter into single contiguous tensor for speed efficiency.
             reshard_after_forward: Reshard parameters after the forward pass, which saves memory but slows
                 down training. This is only relevant when resharding individual layers.
-                (Default: True).
             fp32_reduce_scatter: Reduce-Scatter gradients in FP32. Only relevant in mixed precision
-                (Default: None).
             compute_dtype: dtype for full parameters for computation. Default to torch.float32,
                 unless using mixed precision, in which case defaults to torch.float16.
-                (Default: None).
             bucket_cap_mb: bucket parameters so that gradient reduction
                 can potentially overlap with backward computation.
                 bucket_cap_mb controls the bucket size in MegaBytes (MB).
                 Buckets are sub-divided based on world_size,
                 so the max shard size is roughly bucket_cap_mb / world_size.
                 Values <= 0 disable bucketing.
-                (Default: 25).
             min_num_params: Number of parameters to wrap when using FairScale ``auto_wrap``.
-                (Default: 1e8)
             state_dict_to_cpu: Whether to return parameters (returned by :func:`state_dict`) on CPU device.
                 If ``False``, this will default to ``compute_device``.
-                (Default: True).
         """
 
         super().__init__(
diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py
index 00e070d36e33d..3b0f053c40c0a 100644
--- a/src/pytorch_lightning/trainer/trainer.py
+++ b/src/pytorch_lightning/trainer/trainer.py
@@ -179,10 +179,8 @@ def __init__(
                 as well as custom accelerator instances.
 
             accumulate_grad_batches: Accumulates grads every k batches or as set up in the dict.
-                Default: ``None``.
 
             amp_backend: The mixed precision backend to use ("native" or "apex").
-                Default: ``'native''``.
 
                 .. deprecated:: v1.9
                     Setting ``amp_backend`` inside the ``Trainer`` is deprecated in v1.8.0 and will be removed
@@ -199,7 +197,6 @@ def __init__(
                 trying to optimize initial learning for faster convergence. trainer.tune() method will
                 set the suggested learning rate in self.lr or self.learning_rate in the LightningModule.
                 To use a different key set a string instead of True with the key name.
-                Default: ``False``.
 
             auto_scale_batch_size: If set to True, will `initially` run a batch size
                 finder trying to find the largest batch size that fits into memory.
@@ -207,13 +204,11 @@ def __init__(
                 or LightningDataModule depending on your setup.
                 Additionally, can be set to either `power` that estimates the batch size through
                 a power search or `binsearch` that estimates the batch size through a binary search.
-                Default: ``False``.
 
             auto_select_gpus: If enabled and ``gpus`` or ``devices`` is an integer, pick available
                 gpus automatically. This is especially useful when
                 GPUs are configured to be in "exclusive mode", such
                 that only one process at a time can access them.
-                Default: ``False``.
 
                 .. deprecated:: v1.9
                     ``auto_select_gpus`` has been deprecated in v1.9.0 and will be removed in v1.10.0.
@@ -224,42 +219,33 @@ def __init__(
                 The value for ``torch.backends.cudnn.benchmark`` set in the current session will be used
                 (``False`` if not manually set). If :paramref:`~pytorch_lightning.trainer.Trainer.deterministic` is set
                 to ``True``, this will default to ``False``. Override to manually set a different value.
-                Default: ``None``.
 
             callbacks: Add a callback or list of callbacks.
-                Default: ``None``.
 
             enable_checkpointing: If ``True``, enable checkpointing.
                 It will configure a default ModelCheckpoint callback if there is no user-defined ModelCheckpoint in
                 :paramref:`~pytorch_lightning.trainer.trainer.Trainer.callbacks`.
-                Default: ``True``.
 
             check_val_every_n_epoch: Perform a validation loop every after every `N` training epochs. If ``None``,
                 validation will be done solely based on the number of training batches, requiring ``val_check_interval``
                 to be an integer value.
-                Default: ``1``.
 
             default_root_dir: Default path for logs and weights when no logger/ckpt_callback passed.
-                Default: ``os.getcwd()``.
                 Can be remote file paths such as `s3://mybucket/path` or 'hdfs://path/'
 
             detect_anomaly: Enable anomaly detection for the autograd engine.
-                Default: ``False``.
 
             deterministic: If ``True``, sets whether PyTorch operations must use deterministic algorithms.
                 Set to ``"warn"`` to use deterministic algorithms whenever possible, throwing warnings on operations
                 that don't support deterministic mode (requires PyTorch 1.11+). If not set, defaults to ``False``.
-                Default: ``None``.
 
             devices: Will be mapped to either `gpus`, `tpu_cores`, `num_processes` or `ipus`,
                 based on the accelerator type.
 
             fast_dev_run: Runs n if set to ``n`` (int) else 1 if set to ``True`` batch(es)
                 of train, val and test to find any bugs (ie: a sort of unit test).
-                Default: ``False``.
 
-            gpus: Number of GPUs to train on (int) or which GPUs to train on (list or str) applied per node
-                Default: ``None``.
+            gpus: Number of GPUs to train on (int) or which GPUs to train on (list or str) applied per node.
 
                 .. deprecated:: v1.7
                     ``gpus`` has been deprecated in v1.7 and will be removed in v2.0.
@@ -267,48 +253,36 @@ def __init__(
 
             gradient_clip_val: The value at which to clip gradients. Passing ``gradient_clip_val=None`` disables
                 gradient clipping. If using Automatic Mixed Precision (AMP), the gradients will be unscaled before.
-                Default: ``None``.
 
             gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"``
-                to clip by value, and ``gradient_clip_algorithm="norm"`` to clip by norm. By default it will
+                to clip by value, and ``gradient_clip_algorithm="norm"`` to clip by norm. By default, it will
                 be set to ``"norm"``.
 
             limit_train_batches: How much of training dataset to check (float = fraction, int = num_batches).
-                Default: ``1.0``.
 
             limit_val_batches: How much of validation dataset to check (float = fraction, int = num_batches).
-                Default: ``1.0``.
 
             limit_test_batches: How much of test dataset to check (float = fraction, int = num_batches).
-                Default: ``1.0``.
 
             limit_predict_batches: How much of prediction dataset to check (float = fraction, int = num_batches).
-                Default: ``1.0``.
 
             logger: Logger (or iterable collection of loggers) for experiment tracking. A ``True`` value uses
                 the default ``TensorBoardLogger``. ``False`` will disable logging. If multiple loggers are
                 provided, local files (checkpoints, profiler traces, etc.) are saved in the ``log_dir`` of
                 the first logger.
-                Default: ``True``.
 
             log_every_n_steps: How often to log within steps.
-                Default: ``50``.
 
             enable_progress_bar: Whether to enable to progress bar by default.
-                Default: ``True``.
 
             profiler: To profile individual steps during training and assist in identifying bottlenecks.
-                Default: ``None``.
 
             overfit_batches: Overfit a fraction of training/validation data (float) or a set number of batches (int).
-                Default: ``0.0``.
 
             plugins: Plugins allow modification of core behavior like ddp and amp, and enable custom lightning plugins.
-                Default: ``None``.
 
             precision: Double precision (64), full precision (32), half precision (16) or bfloat16 precision (bf16).
                 Can be used on CPU, GPU, TPUs, HPUs or IPUs.
-                Default: ``32``.
 
             max_epochs: Stop training once this number of epochs is reached. Disabled by default (None).
                 If both max_epochs and max_steps are not specified, defaults to ``max_epochs = 1000``.
@@ -328,10 +302,8 @@ def __init__(
                 :class:`datetime.timedelta`.
 
             num_nodes: Number of GPU nodes for distributed training.
-                Default: ``1``.
 
             num_processes: Number of processes for distributed training with ``accelerator="cpu"``.
-                Default: ``1``.
 
                 .. deprecated:: v1.7
                     ``num_processes`` has been deprecated in v1.7 and will be removed in v2.0.
@@ -339,10 +311,8 @@ def __init__(
 
             num_sanity_val_steps: Sanity check runs n validation batches before starting the training routine.
                 Set it to `-1` to run all batches in all validation dataloaders.
-                Default: ``2``.
 
             reload_dataloaders_every_n_epochs: Set to a non-negative integer to reload dataloaders every n epochs.
-                Default: ``0``.
 
             replace_sampler_ddp: Explicitly enables or disables sampler replacement. If not specified this
                 will toggled automatically when DDP is used. By default it will add ``shuffle=True`` for
@@ -359,20 +329,16 @@ def __init__(
 
             strategy: Supports different training strategies with aliases
                 as well custom strategies.
-                Default: ``None``.
 
             sync_batchnorm: Synchronize batch norm layers between process groups/whole world.
-                Default: ``False``.
 
-            tpu_cores: How many TPU cores to train on (1 or 8) / Single TPU to train on (1)
-                Default: ``None``.
+            tpu_cores: How many TPU cores to train on (1 or 8) / Single TPU to train on (1).
 
                 .. deprecated:: v1.7
                     ``tpu_cores`` has been deprecated in v1.7 and will be removed in v2.0.
                     Please use ``accelerator='tpu'`` and ``devices=x`` instead.
 
             ipus: How many IPUs to train on.
-                Default: ``None``.
 
                 .. deprecated:: v1.7
                     ``ipus`` has been deprecated in v1.7 and will be removed in v2.0.
@@ -380,27 +346,22 @@ def __init__(
 
             track_grad_norm: -1 no tracking. Otherwise tracks that p-norm. May be set to 'inf' infinity-norm. If using
                 Automatic Mixed Precision (AMP), the gradients will be unscaled before logging them.
-                Default: ``-1``.
 
             val_check_interval: How often to check the validation set. Pass a ``float`` in the range [0.0, 1.0] to check
                 after a fraction of the training epoch. Pass an ``int`` to check after a fixed number of training
                 batches. An ``int`` value can only be higher than the number of training batches when
                 ``check_val_every_n_epoch=None``, which validates after every ``N`` training batches
                 across epochs or during iteration-based training.
-                Default: ``1.0``.
 
             enable_model_summary: Whether to enable model summarization by default.
-                Default: ``True``.
 
             move_metrics_to_cpu: Whether to force internal logged metrics to be moved to cpu.
                 This can save some gpu memory, but can make training slower. Use with attention.
-                Default: ``False``.
 
             multiple_trainloader_mode: How to loop over the datasets when there are multiple train loaders.
                 In 'max_size_cycle' mode, the trainer ends one epoch when the largest dataset is traversed,
                 and smaller datasets reload when running out of their data. In 'min_size' mode, all the datasets
                 reload when reaching the minimum length of datasets.
-                Default: ``"max_size_cycle"``.
 
             inference_mode: Whether to use :func:`torch.inference_mode` or :func:`torch.no_grad` during
                 evaluation (``validate``/``test``/``predict``).
diff --git a/src/pytorch_lightning/tuner/lr_finder.py b/src/pytorch_lightning/tuner/lr_finder.py
index 563e65653f27f..289d677d10d24 100644
--- a/src/pytorch_lightning/tuner/lr_finder.py
+++ b/src/pytorch_lightning/tuner/lr_finder.py
@@ -415,7 +415,7 @@ class _LinearLR(_TORCH_LRSCHEDULER):
 
         num_iter: the number of iterations over which the test occurs.
 
-        last_epoch: the index of last epoch. Default: -1.
+        last_epoch: the index of last epoch.
     """
 
     def __init__(self, optimizer: torch.optim.Optimizer, end_lr: float, num_iter: int, last_epoch: int = -1):
@@ -450,7 +450,7 @@ class _ExponentialLR(_TORCH_LRSCHEDULER):
 
         num_iter: the number of iterations over which the test occurs.
 
-        last_epoch: the index of last epoch. Default: -1.
+        last_epoch: the index of last epoch.
     """
 
     def __init__(self, optimizer: torch.optim.Optimizer, end_lr: float, num_iter: int, last_epoch: int = -1):
diff --git a/src/pytorch_lightning/utilities/model_summary/model_summary.py b/src/pytorch_lightning/utilities/model_summary/model_summary.py
index 3fad851664d4f..aeeeb1aac6cb3 100644
--- a/src/pytorch_lightning/utilities/model_summary/model_summary.py
+++ b/src/pytorch_lightning/utilities/model_summary/model_summary.py
@@ -423,7 +423,7 @@ def summarize(lightning_module: "pl.LightningModule", max_depth: int = 1) -> Mod
         lightning_module: `LightningModule` to summarize.
 
         max_depth: The maximum depth of layer nesting that the summary will include. A value of 0 turns the
-            layer summary off. Default: 1.
+            layer summary off.
 
     Return:
         The model summary object