diff --git a/beginner_source/blitz/autograd_tutorial.py b/beginner_source/blitz/autograd_tutorial.py
index 78a1869b211..b0399060ee4 100644
--- a/beginner_source/blitz/autograd_tutorial.py
+++ b/beginner_source/blitz/autograd_tutorial.py
@@ -30,8 +30,8 @@
 
 To prevent tracking history (and using memory), you can also wrap the code block
 in ``with torch.no_grad():``. This can be particularly helpful when evaluating a
-model because the model may have trainable parameters with `requires_grad=True`,
-but for which we don't need the gradients.
+model because the model may have trainable parameters with
+``requires_grad=True``, but for which we don't need the gradients.
 
 There’s one more class which is very important for autograd
 implementation - a ``Function``.
@@ -52,12 +52,12 @@
 import torch
 
 ###############################################################
-# Create a tensor and set requires_grad=True to track computation with it
+# Create a tensor and set ``requires_grad=True`` to track computation with it
 x = torch.ones(2, 2, requires_grad=True)
 print(x)
 
 ###############################################################
-# Do an operation of tensor:
+# Do a tensor operation:
 y = x + 2
 print(y)
 
@@ -66,7 +66,7 @@
 print(y.grad_fn)
 
 ###############################################################
-# Do more operations on y
+# Do more operations on ``y``
 z = y * y * 3
 out = z.mean()
 
@@ -86,14 +86,14 @@
 ###############################################################
 # Gradients
 # ---------
-# Let's backprop now
+# Let's backprop now.
 # Because ``out`` contains a single scalar, ``out.backward()`` is
 # equivalent to ``out.backward(torch.tensor(1))``.
 
 out.backward()
 
 ###############################################################
-# print gradients d(out)/dx
+# Print gradients d(out)/dx
 #
 
 print(x.grad)
@@ -129,7 +129,7 @@
 ###############################################################
 # You can also stop autograd from tracking history on Tensors
 # with ``.requires_grad=True`` by wrapping the code block in
-# ``with torch.no_grad()``:
+# ``with torch.no_grad():``
 print(x.requires_grad)
 print((x ** 2).requires_grad)
 
diff --git a/beginner_source/blitz/cifar10_tutorial.py b/beginner_source/blitz/cifar10_tutorial.py
index 0182e2e9961..8724e95c596 100644
--- a/beginner_source/blitz/cifar10_tutorial.py
+++ b/beginner_source/blitz/cifar10_tutorial.py
@@ -207,7 +207,7 @@ def forward(self, x):
 
 ########################################################################
 # The outputs are energies for the 10 classes.
-# Higher the energy for a class, the more the network
+# The higher the energy for a class, the more the network
 # thinks that the image is of the particular class.
 # So, let's get the index of the highest energy:
 _, predicted = torch.max(outputs, 1)
@@ -266,7 +266,7 @@ def forward(self, x):
 #
 # Training on GPU
 # ----------------
-# Just like how you transfer a Tensor on to the GPU, you transfer the neural
+# Just like how you transfer a Tensor onto the GPU, you transfer the neural
 # net onto the GPU.
 #
 # Let's first define our device as the first visible cuda device if we have
@@ -274,12 +274,12 @@ def forward(self, x):
 
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
-# Assume that we are on a CUDA machine, then this should print a CUDA device:
+# Assuming that we are on a CUDA machine, this should print a CUDA device:
 
 print(device)
 
 ########################################################################
-# The rest of this section assumes that `device` is a CUDA device.
+# The rest of this section assumes that ``device`` is a CUDA device.
 #
 # Then these methods will recursively go over all modules and convert their
 # parameters and buffers to CUDA tensors:
diff --git a/beginner_source/blitz/neural_networks_tutorial.py b/beginner_source/blitz/neural_networks_tutorial.py
index f3c2ad7fd84..05113e8ac17 100644
--- a/beginner_source/blitz/neural_networks_tutorial.py
+++ b/beginner_source/blitz/neural_networks_tutorial.py
@@ -90,8 +90,8 @@ def num_flat_features(self, x):
 print(params[0].size())  # conv1's .weight
 
 ########################################################################
-# Let try a random 32x32 input
-# Note: Expected input size to this net(LeNet) is 32x32. To use this net on
+# Let try a random 32x32 input.
+# Note: expected input size of this net (LeNet) is 32x32. To use this net on
 # MNIST dataset, please resize the images from the dataset to 32x32.
 
 input = torch.randn(1, 1, 32, 32)
@@ -130,8 +130,8 @@ def num_flat_features(self, x):
 #      registered as a parameter when assigned as an attribute to a*
 #      ``Module``.
 #   -  ``autograd.Function`` - Implements *forward and backward definitions
-#      of an autograd operation*. Every ``Tensor`` operation, creates at
-#      least a single ``Function`` node, that connects to functions that
+#      of an autograd operation*. Every ``Tensor`` operation creates at
+#      least a single ``Function`` node that connects to functions that
 #      created a ``Tensor`` and *encodes its history*.
 #
 # **At this point, we covered:**