torch.compile tutorial update for pt2 stable release (#2224) (#2248)

Svetlana Karslioglu · williamwen42 · web-flow · commit 5c1fe74fefcf · 2023-03-15T15:48:10.000-07:00
* torch.compile tutorial update for pt2 stable release
* remove speedup numbers

---------

Co-authored-by: William Wen &lt;williamwen@meta.com&gt;
diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py
@@ -38,7 +38,6 @@
     "recipes_source/recipes/profiler_recipe",
     "recipes_source/recipes/save_load_across_devices",
     "recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model",
-    "intermediate_source/torch_compile_tutorial_",
     "recipes_source/recipes/dynamic_quantization",
     "recipes_source/recipes/saving_and_loading_a_general_checkpoint",
     "recipes_source/recipes/benchmark",
@@ -54,7 +53,6 @@
     "intermediate_source/tensorboard_profiler_tutorial" # reenable after 2.0 release.
 ]
 
-
 def tutorial_source_dirs() -> List[Path]:
     return [
         p.relative_to(REPO_ROOT).with_name(p.stem[:-7])
diff --git a/intermediate_source/torch_compile_tutorial.py b/intermediate_source/torch_compile_tutorial.py
@@ -28,7 +28,7 @@
 #
 # **Required pip Dependencies**
 #
-# - ``torch >= 1.14``
+# - ``torch >= 2.0``
 # - ``torchvision``
 # - ``numpy``
 # - ``scipy``
@@ -52,9 +52,6 @@
 
 import torch
 
-import torch._inductor.config
-torch._inductor.config.cpp.cxx = ("g++",)
-
 def foo(x, y):
     a = torch.sin(x)
     b = torch.cos(x)
@@ -133,6 +130,11 @@ def evaluate(mod, inp):
     return mod(inp)
 
 model = init_model()
+
+# Reset since we are using a different mode.
+import torch._dynamo
+torch._dynamo.reset()
+
 evaluate_opt = torch.compile(evaluate, mode="reduce-overhead")
 
 inp = generate_data(16)[0]
@@ -174,8 +176,7 @@ def evaluate(mod, inp):
 
 ######################################################################
 # And indeed, we can see that running our model with ``torch.compile``
-# results in a significant speedup. On an NVIDIA A100 GPU, we observe a
-# 2.3x speedup. Speedup mainly comes from reducing Python overhead and
+# results in a significant speedup. Speedup mainly comes from reducing Python overhead and
 # GPU read/writes, and so the observed speedup may vary on factors such as model
 # architecture and batch size. For example, if a model's architecture is simple
 # and the amount of data is large, then the bottleneck would be
@@ -231,9 +232,8 @@ def train(mod, data):
 
 ######################################################################
 # Again, we can see that ``torch.compile`` takes longer in the first
-# iteration, as it must compile the model, but afterward, we see
-# significant speedups compared to eager. On an NVIDIA A100 GPU, we
-# observe a 2.2x speedup.
+# iteration, as it must compile the model, but in subsequent iterations, we see
+# significant speedups compared to eager.
 
 ######################################################################
 # Comparison to TorchScript and FX Tracing
@@ -297,6 +297,9 @@ def test_fns(fn1, fn2, args):
 # Now we can see that ``torch.compile`` correctly handles
 # data-dependent control flow.
 
+# Reset since we are using a different mode.
+torch._dynamo.reset()
+
 compile_f1 = torch.compile(f1)
 print("compile 1, 1:", test_fns(f1, compile_f1, (inp1, inp2)))
 print("compile 1, 2:", test_fns(f1, compile_f1, (-inp1, inp2)))
@@ -394,7 +397,6 @@ def custom_backend(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor])
     gm.graph.print_tabular()
     return gm.forward
 
-import torch._dynamo
 # Reset since we are using a different backend.
 torch._dynamo.reset()
 
@@ -489,4 +491,4 @@ def bar(a, b):
 # In this tutorial, we introduced ``torch.compile`` by covering
 # basic usage, demonstrating speedups over eager mode, comparing to previous
 # PyTorch compiler solutions, and briefly investigating TorchDynamo and its interactions
-# with FX graphs. We hope that you will give ``torch.compile`` a try!
+# with FX graphs. We hope that you will give ``torch.compile`` a try!
diff --git a/intermediate_source/torch_compile_tutorial.rst b/intermediate_source/torch_compile_tutorial.rst