pytorch
diff --git a/‎advanced_source/semi_structured_sparse.py‎
Lines changed: 2 additions & 2 deletions b/‎advanced_source/semi_structured_sparse.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎beginner_source/blitz/autograd_tutorial.py‎
Lines changed: 1 addition & 1 deletion b/‎beginner_source/blitz/autograd_tutorial.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎beginner_source/blitz/cifar10_tutorial.py‎
Lines changed: 2 additions & 1 deletion b/‎beginner_source/blitz/cifar10_tutorial.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎beginner_source/examples_autograd/polynomial_autograd.py‎
Lines changed: 12 additions & 6 deletions b/‎beginner_source/examples_autograd/polynomial_autograd.py‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎beginner_source/examples_nn/polynomial_nn.py‎
Lines changed: 1 addition & 1 deletion b/‎beginner_source/examples_nn/polynomial_nn.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎beginner_source/examples_nn/polynomial_optim.py‎
Lines changed: 1 addition & 1 deletion b/‎beginner_source/examples_nn/polynomial_optim.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎beginner_source/examples_tensor/polynomial_numpy.py‎
Lines changed: 1 addition & 1 deletion b/‎beginner_source/examples_tensor/polynomial_numpy.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎beginner_source/examples_tensor/polynomial_tensor.py‎
Lines changed: 1 addition & 1 deletion b/‎beginner_source/examples_tensor/polynomial_tensor.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎beginner_source/hybrid_frontend/README.txt‎
Lines changed: 0 additions & 10 deletions b/‎beginner_source/hybrid_frontend/README.txt‎
Lines changed: 0 additions & 10 deletions
@@ -43,6 +43,8 @@
 # -  A NVIDIA GPU with semi-structured sparsity support (Compute
 #    Capability 8.0+).
 #
+#  .. note:: This tutorial is tested on an NVIDIA A100 80GB GPU. You may not see similar speedups on newer GPU architectures, For the latest information on semi-structured sparsity support, please refer to the README `here <https://github.com/pytorch/ao/tree/main/torchao/sparsity#torchao-sparsity>
+#
 # This tutorial is designed for beginners to semi-structured sparsity and
 # sparsity in general. For users with existing 2:4 sparse models,
 # accelerating ``nn.Linear`` layers for inference with
@@ -52,7 +54,6 @@
 import torch
 from torch.sparse import to_sparse_semi_structured, SparseSemiStructuredTensor
 from torch.utils.benchmark import Timer
-SparseSemiStructuredTensor._FORCE_CUTLASS = True
 
 # mask Linear weight to be 2:4 sparse
 mask = torch.Tensor([0, 0, 1, 1]).tile((3072, 2560)).cuda().bool()
@@ -207,7 +208,6 @@
 import transformers
 
 # force CUTLASS use if ``cuSPARSELt`` is not available
-SparseSemiStructuredTensor._FORCE_CUTLASS = True
 torch.manual_seed(100)
 
 # Set default device to "cuda:0"
 
@@ -67,7 +67,7 @@
 loss.backward() # backward pass
 
 ############################################################
-# Next, we load an optimizer, in this case SGD with a learning rate of 0.01 and `momentum <https://towardsdatascience.com/stochastic-gradient-descent-with-momentum-a84097641a5d>`__ of 0.9.
+# Next, we load an optimizer, in this case SGD with a learning rate of 0.01 and `momentum <https://medium.com/data-science/stochastic-gradient-descent-with-momentum-a84097641a5d>`__ of 0.9.
 # We register all the parameters of the model in the optimizer.
 #
 
 
@@ -65,7 +65,8 @@
 
 ########################################################################
 # .. note::
-#     If running on Windows and you get a BrokenPipeError, try setting
+#     If you are running this tutorial on Windows or MacOS and encounter a
+#     BrokenPipeError or RuntimeError related to multiprocessing, try setting
 #     the num_worker of torch.utils.data.DataLoader() to 0.
 
 transform = transforms.Compose(
 
@@ -1,4 +1,4 @@
-"""
+r"""
 PyTorch: Tensors and autograd
 -------------------------------
 
@@ -27,8 +27,8 @@
 # Create Tensors to hold input and outputs.
 # By default, requires_grad=False, which indicates that we do not need to
 # compute gradients with respect to these Tensors during the backward pass.
-x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
-y = torch.sin(x)
+x = torch.linspace(-1, 1, 2000, dtype=dtype)
+y = torch.exp(x) # A Taylor expansion would be 1 + x + (1/2) x**2 + (1/3!) x**3 + ...
 
 # Create random Tensors for weights. For a third order polynomial, we need
 # 4 weights: y = a + b x + c x^2 + d x^3
@@ -39,17 +39,23 @@
 c = torch.randn((), dtype=dtype, requires_grad=True)
 d = torch.randn((), dtype=dtype, requires_grad=True)
 
-learning_rate = 1e-6
-for t in range(2000):
+initial_loss = 1.
+learning_rate = 1e-5
+for t in range(5000):
     # Forward pass: compute predicted y using operations on Tensors.
     y_pred = a + b * x + c * x ** 2 + d * x ** 3
 
     # Compute and print loss using operations on Tensors.
     # Now loss is a Tensor of shape (1,)
     # loss.item() gets the scalar value held in the loss.
     loss = (y_pred - y).pow(2).sum()
+
+    # Calculare initial loss, so we can report loss relative to it
+    if t==0:
+        initial_loss=loss.item()
+
     if t % 100 == 99:
-        print(t, loss.item())
+        print(f'Iteration t = {t:4d}  loss(t)/loss(0) = {round(loss.item()/initial_loss, 6):10.6f}  a = {a.item():10.6f}  b = {b.item():10.6f}  c = {c.item():10.6f}  d = {d.item():10.6f}')
 
     # Use autograd to compute the backward pass. This call will compute the
     # gradient of loss with respect to all Tensors with requires_grad=True.
 
@@ -4,7 +4,7 @@
 -----------
 
 A third order polynomial, trained to predict :math:`y=\sin(x)` from :math:`-\pi`
-to :math:`pi` by minimizing squared Euclidean distance.
+to :math:`\pi` by minimizing squared Euclidean distance.
 
 This implementation uses the nn package from PyTorch to build the network.
 PyTorch autograd makes it easy to define computational graphs and take gradients,
 
@@ -4,7 +4,7 @@
 --------------
 
 A third order polynomial, trained to predict :math:`y=\sin(x)` from :math:`-\pi`
-to :math:`pi` by minimizing squared Euclidean distance.
+to :math:`\pi` by minimizing squared Euclidean distance.
 
 This implementation uses the nn package from PyTorch to build the network.
 
 
@@ -4,7 +4,7 @@
 --------------
 
 A third order polynomial, trained to predict :math:`y=\sin(x)` from :math:`-\pi`
-to :math:`pi` by minimizing squared Euclidean distance.
+to :math:`\pi` by minimizing squared Euclidean distance.
 
 This implementation uses numpy to manually compute the forward pass, loss, and
 backward pass.
 
@@ -4,7 +4,7 @@
 ----------------
 
 A third order polynomial, trained to predict :math:`y=\sin(x)` from :math:`-\pi`
-to :math:`pi` by minimizing squared Euclidean distance.
+to :math:`\pi` by minimizing squared Euclidean distance.
 
 This implementation uses PyTorch tensors to manually compute the forward pass,
 loss, and backward pass.
Original file line number	Diff line number	Diff line change
`@@ -67,7 +67,7 @@`
`67`	`67`	`loss.backward() # backward pass`
`68`	`68`
`69`	`69`	`############################################################`
`70`		-# Next, we load an optimizer, in this case SGD with a learning rate of 0.01 and `momentum <https://towardsdatascience.com/stochastic-gradient-descent-with-momentum-a84097641a5d>`__ of 0.9.
	`70`	+# Next, we load an optimizer, in this case SGD with a learning rate of 0.01 and `momentum <https://medium.com/data-science/stochastic-gradient-descent-with-momentum-a84097641a5d>`__ of 0.9.
`71`	`71`	`# We register all the parameters of the model in the optimizer.`
`72`	`72`	`#`
`73`	`73`