add unset_fake_temporarily with minor changes

zewenli98 · zewenli98 · commit 9851bfea7a48 · 2025-09-16T14:32:01.000-07:00
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
@@ -513,7 +513,7 @@ def compile(
 
     if kwargs.get("debug", False):
         warnings.warn(
-            "`debug` is deprecated. Please use `with torch_tensorrt.dynamo.Debugger(...)` to wrap your compilation call to enable debugging functionality",
+            "`debug` is deprecated. Please use `with torch_tensorrt.dynamo.Debugger(...)` to wrap your compilation call to enable debugging functionality.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -1122,6 +1122,7 @@ def convert_exported_program_to_serialized_trt_engine(
     Returns:
         bytes: Serialized TensorRT engine, can either be saved to a file or deserialized via TensorRT APIs
     """
+
     if kwargs.get("debug", False):
         warnings.warn(
             "`debug` is deprecated. Please use `with torch_tensorrt.dynamo.Debugger(...)` to wrap your compilation call to enable debugging functionality.",
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py
@@ -325,8 +325,9 @@ def native_group_norm(
 
     shape = [1, group] + [1] * (rank - 2)
 
-    weight_torch = torch.ones(shape)
-    bias_torch = torch.zeros(shape)
+    with unset_fake_temporarily():
+        weight_torch = torch.ones(shape)
+        bias_torch = torch.zeros(shape)
 
     weight_one = get_trt_tensor(ctx, weight_torch, f"{name}_weight_one", input.dtype)
     bias_zero = get_trt_tensor(ctx, bias_torch, f"{name}_bias_zero", input.dtype)
diff --git a/tools/perf/README.md b/tools/perf/README.md
@@ -44,7 +44,7 @@ Benchmark scripts depends on following Python packages in addition to requiremen
 Here are the list of `CompileSpec` options that can be provided directly to compile the pytorch module
 
 * `--backends` : Comma separated string of backends. Eg: torch, ts_trt, dynamo, torch_compile, inductor, onnx_trt
-* `--model` : Name of the model file (Can be a torchscript module or a tensorrt engine (ending in `.plan` extension)). If the backend is `dynamo` or `torch_compile`, the input should be a Pytorch module (instead of a torchscript module).
+* `--model` : Name of the model file (Can be a torchscript module or a tensorrt engine (pairing with `--is_trt_engine`)). If the backend is `dynamo` or `torch_compile`, the input should be a Pytorch module (instead of a torchscript module).
 * `--model_torch` : Name of the PyTorch model file (optional, only necessary if `dynamo` or `torch_compile` is a chosen backend)
 * `--onnx` : ONNX model file which helps bypass the step of exporting ONNX from `model_torch`. If this argument is provided, the ONNX will be directly converted to TRT engine
 * `--inputs` : List of input shapes & dtypes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT
@@ -61,16 +61,16 @@ Eg:
 ```
   python perf_run.py --model ${MODELS_DIR}/vgg16_scripted.jit.pt \
                      --model_torch ${MODELS_DIR}/vgg16_torch.pt \
-                     --precision fp32,fp16 --inputs="(1, 3, 224, 224)@fp32" \
+                     --precision fp32,fp16 \
+                     --inputs "(1, 3, 224, 224)@fp32" \
                      --batch_size 1 \
-                     --backends torch,ts_trt,dynamo,torch_compile,tensorrt \
+                     --backends torch,ts_trt,dynamo,torch_compile,inductor,onnx_trt \
                      --report "vgg_perf_bs1.txt"
 ```
 
 Note:
 
 1. Please note that measuring INT8 performance is only supported via a `calibration cache` file or QAT mode for `torch_tensorrt` backend.
-2. TensorRT engine filename should end with `.plan` otherwise it will be treated as Torchscript module.
 
 ### Example models
 
diff --git a/tools/perf/perf_run.py b/tools/perf/perf_run.py
@@ -480,7 +480,15 @@ def run_onnx_trt(
             onnx_path = params["onnx"]
         else:
             onnx_path = f"{params['model_torch']}-onnx-trt.onnx"
-            torch.onnx.export(model, tuple(input_tensors), onnx_path, dynamo=True)
+            len_output = len(model(*input_tensors))
+            # to match the output names with Torch-TRT engine's
+            torch.onnx.export(
+                model,
+                tuple(input_tensors),
+                onnx_path,
+                dynamo=True,
+                output_names=[f"output{i}" for i in range(len_output)],
+            )
         start_compile = timeit.default_timer()
         builder = trt.Builder(logger)
         network = builder.create_network(