Lightning-AI · awaelchli · Oct 28, 2021 · Oct 27, 2021 · Oct 27, 2021 · Oct 27, 2021
diff --git a/docs/source/advanced/mixed_precision.rst b/docs/source/advanced/mixed_precision.rst
@@ -50,14 +50,14 @@ BFloat16 Mixed precision is similar to FP16 mixed precision, however we maintain
 Since BFloat16 is more stable than FP16 during training, we do not need to worry about any gradient scaling or nan gradient values that comes with using FP16 mixed precision.
 
 .. testcode::
-    :skipif: not _TORCH_GREATER_EQUAL_DEV_1_10 or not torch.cuda.is_available()
+    :skipif: not _TORCH_GREATER_EQUAL_1_10 or not torch.cuda.is_available()
 
     Trainer(gpus=1, precision="bf16")
 
 It is also possible to use BFloat16 mixed precision on the CPU, relying on MKLDNN under the hood.
 
 .. testcode::
-    :skipif: not _TORCH_GREATER_EQUAL_DEV_1_10
+    :skipif: not _TORCH_GREATER_EQUAL_1_10
 
     Trainer(precision="bf16")
 

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -377,7 +377,7 @@ def package_list_from_file(file):
     _XLA_AVAILABLE,
     _TPU_AVAILABLE,
     _TORCHVISION_AVAILABLE,
-    _TORCH_GREATER_EQUAL_DEV_1_10,
+    _TORCH_GREATER_EQUAL_1_10,
     _module_available,
 )
 _JSONARGPARSE_AVAILABLE = _module_available("jsonargparse")

diff --git a/pl_examples/basic_examples/README.md b/pl_examples/basic_examples/README.md
@@ -6,7 +6,7 @@ Use these examples to test how Lightning works.
 
 5 MNIST examples showing how to gradually convert from pure PyTorch to PyTorch Lightning.
 
-The transition through [LightningLite](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.rst) from pure PyTorch is optional but it might helpful to learn about it.
+The transition through [LightningLite](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.rst) from pure PyTorch is optional but it might be helpful to learn about it.
 
 #### 1 . Image Classifier with Vanilla PyTorch
 
@@ -21,7 +21,7 @@ ______________________________________________________________________
 
 #### 2. Image Classifier with LightningLite
 
-Trains a simple CNN over MNIST using [LightningLite](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.rst).
+This script shows you how to scale the previous script to enable GPU and multi GPU training using [LightningLite](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.rst).
 
 ```bash
 # cpu / multiple gpus if available
@@ -30,7 +30,10 @@ python mnist_examples/image_classifier_2_lite.py
 
 ______________________________________________________________________
 
-Trains a simple CNN over MNIST where `LightningLite` is almost a `LightningModule`.
+#### 3. Image Classifier - Conversion Lite to Lightning
+
+This script shows you to prepare your conversion from [LightningLite](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.rst)
+to `LightningModule`.
 
 ```bash
 # cpu / multiple gpus if available
@@ -41,7 +44,7 @@ ______________________________________________________________________
 
 #### 4. Image Classifier with LightningModule
 
-Trains a simple CNN over MNIST with `Lightning Trainer` and the converted `LightningModule`.
+This script shows you how the result of the conversion to the `LightningModule` and finally get all the benefits from Lightning.
 
 ```bash
 # cpu
@@ -55,7 +58,7 @@ ______________________________________________________________________
 
 #### 5. Image Classifier with LightningModule + LightningDataModule
 
-Trains a simple CNN over MNIST with `Lightning Trainer` and the converted `LightningModule` and `LightningDataModule`
+This script shows you how extracts the data related components to a `LightningDataModule`.
 
 ```bash
 # cpu
@@ -64,8 +67,8 @@ python mnist_examples/image_classifier_5_lightning_datamodule.py
 # gpus (any number)
 python mnist_examples/image_classifier_5_lightning_datamodule.py --trainer.gpus 2
 
-# Distributed Data Parallel
-python backbone_image_classifier.py --trainer.gpus 2 --trainer.accelerator ddp
+# data parallel
+python mnist_examples/image_classifier_5_lightning_datamodule.py --trainer.gpus 2 --trainer.accelerator 'dp'
 ```
 
 ______________________________________________________________________

diff --git a/pl_examples/basic_examples/mnist_examples/README.md b/pl_examples/basic_examples/mnist_examples/README.md
@@ -2,7 +2,7 @@
 
 5 MNIST examples showing how to gradually convert from pure PyTorch to PyTorch Lightning.
 
-The transition through [LightningLite](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.rst) from pure PyTorch is optional but it might helpful to learn about it.
+The transition through [LightningLite](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.rst) from pure PyTorch is optional but it might be helpful to learn about it.
 
 #### 1 . Image Classifier with Vanilla PyTorch
 
@@ -17,7 +17,7 @@ ______________________________________________________________________
 
 #### 2. Image Classifier with LightningLite
 
-Trains a simple CNN over MNIST using [LightningLite](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.rst).
+This script shows you how to scale the previous script to enable GPU and multi GPU training using [LightningLite](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.rst).
 
 ```bash
 # cpu / multiple gpus if available
@@ -28,7 +28,8 @@ ______________________________________________________________________
 
 #### 3. Image Classifier - Conversion Lite to Lightning
 
-Trains a simple CNN over MNIST where `LightningLite` is almost a `LightningModule`.
+This script shows you to prepare your conversion from  [LightningLite](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.rst)
+to `LightningModule`.
 
 ```bash
 # cpu / multiple gpus if available
@@ -39,21 +40,21 @@ ______________________________________________________________________
 
 #### 4. Image Classifier with LightningModule
 
-Trains a simple CNN over MNIST with `Lightning Trainer` and the converted `LightningModule`.
+This script shows you how the result of the conversion to the `LightningModule` and finally get all the benefits from Lightning.
 
 ```bash
 # cpu
-python mnist_examples/image_classifier_4_lightning.py
+python image_classifier_4_lightning.py
 
 # gpus (any number)
-python mnist_examples/image_classifier_4_lightning.py --trainer.gpus 2
+python image_classifier_4_lightning.py --trainer.gpus 2
 ```
 
 ______________________________________________________________________
 
 #### 5. Image Classifier with LightningModule + LightningDataModule
 
-Trains a simple CNN over MNIST with `Lightning Trainer` and the converted `LightningModule` and `LightningDataModule`
+This script shows you how extracts the data related components to a `LightningDataModule`.
 
 ```bash
 # cpu

diff --git a/pl_examples/basic_examples/mnist_examples/image_classifier_1_pytorch.py b/pl_examples/basic_examples/mnist_examples/image_classifier_1_pytorch.py
@@ -52,64 +52,90 @@ def forward(self, x):
         return output
 
 
-def train(args, model, device, train_loader, optimizer, epoch):
-    model.train()
-    for batch_idx, (data, target) in enumerate(train_loader):
-        data, target = data.to(device), target.to(device)
-        optimizer.zero_grad()
-        output = model(data)
-        loss = F.nll_loss(output, target)
-        loss.backward()
-        optimizer.step()
-        if (batch_idx == 0) or ((batch_idx + 1) % args.log_interval == 0):
-            print(
-                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
-                    epoch,
-                    batch_idx * len(data),
-                    len(train_loader.dataset),
-                    100.0 * batch_idx / len(train_loader),
-                    loss.item(),
-                )
-            )
-            if args.dry_run:
-                break
+def run(hparams):
+
+    torch.manual_seed(hparams.seed)
+
+    use_cuda = torch.cuda.is_available()
+    device = torch.device("cuda" if use_cuda else "cpu")
+
+    transform = T.Compose([T.ToTensor(), T.Normalize((0.1307,), (0.3081,))])
+    train_dataset = MNIST("./data", train=True, download=True, transform=transform)
+    test_dataset = MNIST("./data", train=False, transform=transform)
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset,
+        batch_size=hparams.batch_size,
+    )
+    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=hparams.batch_size)
 
+    model = Net().to(device)
+    optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr)
+
+    scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma)
+
+    # EPOCH LOOP
+    for epoch in range(1, hparams.epochs + 1):
 
-def test(args, model, device, test_loader):
-    model.eval()
-    test_loss = 0
-    correct = 0
-    with torch.no_grad():
-        for data, target in test_loader:
+        # TRAINING LOOP
+        model.train()
+        for batch_idx, (data, target) in enumerate(train_loader):
             data, target = data.to(device), target.to(device)
+            optimizer.zero_grad()
             output = model(data)
-            test_loss += F.nll_loss(output, target, reduction="sum").item()  # sum up batch loss
-            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
-            correct += pred.eq(target.view_as(pred)).sum().item()
-            if args.dry_run:
-                break
-
-    test_loss /= len(test_loader.dataset)
+            loss = F.nll_loss(output, target)
+            loss.backward()
+            optimizer.step()
+            if (batch_idx == 0) or ((batch_idx + 1) % hparams.log_interval == 0):
+                print(
+                    "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
+                        epoch,
+                        batch_idx * len(data),
+                        len(train_loader.dataset),
+                        100.0 * batch_idx / len(train_loader),
+                        loss.item(),
+                    )
+                )
+                if hparams.dry_run:
+                    break
+        scheduler.step()
 
-    print(
-        "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
-            test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset)
+        # TESTING LOOP
+        model.eval()
+        test_loss = 0
+        correct = 0
+        with torch.no_grad():
+            for data, target in test_loader:
+                data, target = data.to(device), target.to(device)
+                output = model(data)
+                test_loss += F.nll_loss(output, target, reduction="sum").item()  # sum up batch loss
+                pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
+                correct += pred.eq(target.view_as(pred)).sum().item()
+                if hparams.dry_run:
+                    break
+
+        test_loss /= len(test_loader.dataset)
+
+        print(
+            "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
+                test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset)
+            )
         )
-    )
+
+        if hparams.dry_run:
+            break
+
+    if hparams.save_model:
+        torch.save(model.state_dict(), "mnist_cnn.pt")
 
 
 def main():
     parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
     parser.add_argument(
         "--batch-size", type=int, default=64, metavar="N", help="input batch size for training (default: 64)"
     )
-    parser.add_argument(
-        "--test-batch-size", type=int, default=1000, metavar="N", help="input batch size for testing (default: 1000)"
-    )
     parser.add_argument("--epochs", type=int, default=14, metavar="N", help="number of epochs to train (default: 14)")
     parser.add_argument("--lr", type=float, default=1.0, metavar="LR", help="learning rate (default: 1.0)")
     parser.add_argument("--gamma", type=float, default=0.7, metavar="M", help="Learning rate step gamma (default: 0.7)")
-    parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA training")
     parser.add_argument("--dry-run", action="store_true", default=False, help="quickly check a single pass")
     parser.add_argument("--seed", type=int, default=1, metavar="S", help="random seed (default: 1)")
     parser.add_argument(
@@ -120,40 +146,8 @@ def main():
         help="how many batches to wait before logging training status",
     )
     parser.add_argument("--save-model", action="store_true", default=False, help="For Saving the current Model")
-    args = parser.parse_args()
-    use_cuda = not args.no_cuda and torch.cuda.is_available()
-
-    torch.manual_seed(args.seed)
-
-    device = torch.device("cuda" if use_cuda else "cpu")
-
-    train_kwargs = {"batch_size": args.batch_size}
-    test_kwargs = {"batch_size": args.test_batch_size}
-    if use_cuda:
-        cuda_kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True}
-        train_kwargs.update(cuda_kwargs)
-        test_kwargs.update(cuda_kwargs)
-
-    transform = T.Compose([T.ToTensor(), T.Normalize((0.1307,), (0.3081,))])
-    train_dataset = MNIST("./data", train=True, download=True, transform=transform)
-    test_dataset = MNIST("./data", train=False, transform=transform)
-    train_loader = torch.utils.data.DataLoader(train_dataset, **train_kwargs)
-    test_loader = torch.utils.data.DataLoader(test_dataset, **test_kwargs)
-
-    model = Net().to(device)
-    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
-
-    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
-    for epoch in range(1, args.epochs + 1):
-        train(args, model, device, train_loader, optimizer, epoch)
-        test(args, model, device, test_loader)
-        scheduler.step()
-
-        if args.dry_run:
-            break
-
-    if args.save_model:
-        torch.save(model.state_dict(), "mnist_cnn.pt")
+    hparams = parser.parse_args()
+    run(hparams)
 
 
 if __name__ == "__main__":