refactor: Modify prepare_inputs, remove lower_precision

peri044 · peri044 · commit 65530357f259 · 2023-07-14T02:14:40.000-07:00
Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;

chore: refactor

Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;
diff --git a/py/torch_tensorrt/dynamo/_defaults.py b/py/torch_tensorrt/dynamo/_defaults.py
@@ -1,7 +1,6 @@
-from torch_tensorrt.fx.utils import LowerPrecision
+import torch
 
-
-PRECISION = LowerPrecision.FP32
+PRECISION = torch.float32
 DEBUG = False
 WORKSPACE_SIZE = 0
 MIN_BLOCK_SIZE = 5
diff --git a/py/torch_tensorrt/dynamo/_settings.py b/py/torch_tensorrt/dynamo/_settings.py
@@ -1,7 +1,6 @@
 from dataclasses import dataclass, field
 from typing import Optional, Sequence
-
-from torch_tensorrt.fx.utils import LowerPrecision
+import torch
 from torch_tensorrt.dynamo._defaults import (
     PRECISION,
     DEBUG,
@@ -17,7 +16,7 @@
 
 @dataclass
 class CompilationSettings:
-    precision: LowerPrecision = PRECISION
+    precision: torch.dtype = PRECISION
     debug: bool = DEBUG
     workspace_size: int = WORKSPACE_SIZE
     min_block_size: int = MIN_BLOCK_SIZE
diff --git a/py/torch_tensorrt/dynamo/backend/__init__.py b/py/torch_tensorrt/dynamo/backend/__init__.py
@@ -1,2 +1 @@
 from .backends import torch_tensorrt_backend
-from .compile import compile
diff --git a/py/torch_tensorrt/dynamo/compile.py b/py/torch_tensorrt/dynamo/compile.py
@@ -6,7 +6,6 @@
 
 from typing import Any, Optional, Sequence
 from torch_tensorrt import EngineCapability, Device
-from torch_tensorrt.fx.utils import LowerPrecision
 from torch.fx.passes.pass_manager import PassManager
 from torch.fx.passes.shape_prop import ShapeProp
 from torch_tensorrt.dynamo.aten_tracer import trace
@@ -78,119 +77,63 @@ def compile(
     if not isinstance(inputs, collections.abc.Sequence):
         inputs = [inputs]
 
-    inputs = prepare_inputs(inputs, prepare_device(device))
+    torchtrt_inputs, torch_inputs = prepare_inputs(inputs, prepare_device(device))
 
     if (
         torch.float16 in enabled_precisions
         or torch_tensorrt.dtype.half in enabled_precisions
     ):
-        lower_precision = LowerPrecision.FP16
+        precision = torch.float16
     elif (
         torch.float32 in enabled_precisions
         or torch_tensorrt.dtype.float in enabled_precisions
     ):
-        lower_precision = LowerPrecision.FP32
+        precision = torch.float32
     elif len(enabled_precisions) == 0:
         logger.info(f"No precision specified, defaulting to {PRECISION}")
-        lower_precision = PRECISION
+        precision = PRECISION
     else:
         raise ValueError(
             f"Precision {enabled_precisions} not supported in the Dynamo Path"
         )
 
+    compilation_options = {
+        "precision": precision,
+        "debug": debug,
+        "workspace_size": workspace_size,
+        "min_block_size": min_block_size,
+        "torch_executed_ops": torch_executed_ops,
+        "pass_through_build_failures": pass_through_build_failures,
+        "max_aux_streams": max_aux_streams,
+        "version_compatible": version_compatible,
+        "optimization_level": optimization_level,
+        "use_python_runtime": use_python_runtime,
+    }
+
     if kwargs.get("ir", "dynamo") == "torch_compile":
-        custom_backend = create_backend(
-            precision=lower_precision,
-            debug=debug,
-            workspace_size=workspace_size,
-            min_block_size=min_block_size,
-            torch_executed_ops=torch_executed_ops,
-            pass_through_build_failures=pass_through_build_failures,
-            max_aux_streams=max_aux_streams,
-            version_compatible=version_compatible,
-            optimization_level=optimization_level,
-            use_python_runtime=use_python_runtime,
-            **kwargs,
+        model = torch.compile(
+            gm,
+            backend=torch_tensorrt_backend,
+            options={**compilation_options, **kwargs},
         )
-        model = torch.compile(gm, backend=custom_backend)
         # Ensure compilation occurs by calling the function with provided inputs
-        model(*inputs)
+        model(*torch_inputs)
         return model
 
     else:
-        settings = CompilationSettings(
-            debug=debug,
-            precision=lower_precision,
-            workspace_size=workspace_size,
-            min_block_size=min_block_size,
-            torch_executed_ops=torch_executed_ops,
-            pass_through_build_failures=pass_through_build_failures,
-            max_aux_streams=max_aux_streams,
-            version_compatible=version_compatible,
-            optimization_level=optimization_level,
-            use_python_runtime=use_python_runtime,
-        )
-
-        model = trace(gm, inputs, **kwargs)
+        settings = CompilationSettings(**compilation_options)
+        model = trace(gm, torch_inputs, **kwargs)
 
         if kwargs.get("use_capability_partitioner", None):
-            model = lower_model(model, inputs)
-            return _compile_module(model, inputs, settings)
+            model = lower_model(model, torch_inputs)
+            return _compile_module(model, torch_inputs, settings)
         else:
-            split_result = lower_model_using_trt_splitter(model, inputs)
-            trt_module = _compile_graph(split_result, inputs, settings)
+            split_result = lower_model_using_trt_splitter(model, torch_inputs)
+            trt_module = _compile_graph(split_result, torch_inputs, settings)
 
             return trt_module
 
 
-def create_backend(
-    precision: LowerPrecision = PRECISION,
-    debug: bool = DEBUG,
-    workspace_size: int = WORKSPACE_SIZE,
-    min_block_size: int = MIN_BLOCK_SIZE,
-    torch_executed_ops: Sequence[str] = set(),
-    pass_through_build_failures: bool = PASS_THROUGH_BUILD_FAILURES,
-    max_aux_streams: Optional[int] = MAX_AUX_STREAMS,
-    version_compatible: bool = VERSION_COMPATIBLE,
-    optimization_level: Optional[int] = OPTIMIZATION_LEVEL,
-    use_python_runtime: Optional[bool] = USE_PYTHON_RUNTIME,
-    **kwargs,
-):
-    """Create torch.compile backend given specified arguments
-
-    Args:
-        precision: Model Layer precision
-        debug: Whether to print out verbose debugging information
-        workspace_size: Workspace TRT is allowed to use for the module (0 is default)
-        min_block_size: Minimum number of operators per TRT-Engine Block
-        torch_executed_ops: Sequence of operations to run in Torch, regardless of converter coverage
-        pass_through_build_failures: Whether to fail on TRT engine build errors (True) or not (False)
-        max_aux_streams: Maximum number of allowed auxiliary TRT streams for each engine
-        version_compatible: Provide version forward-compatibility for engine plan files
-        optimization_level: Builder optimization 0-5, higher levels imply longer build time,
-            searching for more optimization options. TRT defaults to 3
-        use_python_runtime: Whether to strictly use Python runtime or C++ runtime. To auto-select a runtime
-            based on C++ dependency presence (preferentially choosing C++ runtime if available), leave the
-            argument as None
-    Returns:
-        Backend for torch.compile
-    """
-    return partial(
-        torch_tensorrt_backend,
-        debug=debug,
-        precision=precision,
-        workspace_size=workspace_size,
-        min_block_size=min_block_size,
-        torch_executed_ops=torch_executed_ops,
-        pass_through_build_failures=pass_through_build_failures,
-        max_aux_streams=max_aux_streams,
-        version_compatible=version_compatible,
-        optimization_level=optimization_level,
-        use_python_runtime=use_python_runtime,
-        **kwargs,
-    )
-
-
 def _compile_graph(
     split_result: TRTSplitter,
     inputs: Any,
@@ -234,7 +177,7 @@ def lower_model(model: torch.nn.Module, inputs: Any, **kwargs):
         [fuse_permute_matmul, fuse_permute_linear]
     )
     lowered_model = graph_optimization_pm(model)
-    if isinstance(lowered_model, torch.fx.GraphModule):
-        ShapeProp(lowered_model).propagate(*inputs)
+    # if isinstance(lowered_model, torch.fx.GraphModule):
+    #     ShapeProp(lowered_model).propagate(*inputs)
 
     return lowered_model
diff --git a/py/torch_tensorrt/dynamo/conversion/conversion.py b/py/torch_tensorrt/dynamo/conversion/conversion.py
@@ -41,7 +41,7 @@ def convert_module(
     )
     interpreter_result = interpreter.run(
         workspace_size=settings.workspace_size,
-        lower_precision=settings.precision,
+        precision=settings.precision,
         profiling_verbosity=(
             trt.ProfilingVerbosity.VERBOSE
             if settings.debug
diff --git a/py/torch_tensorrt/dynamo/conversion/trt_interpreter.py b/py/torch_tensorrt/dynamo/conversion/trt_interpreter.py
@@ -19,7 +19,6 @@
 from torch_tensorrt.fx.observer import Observer
 from torch_tensorrt.fx.utils import (
     get_dynamic_dims,
-    LowerPrecision,
     unified_dtype_converter,
     Frameworks,
 )
@@ -98,7 +97,7 @@ def validate_conversion(self):
     def run(
         self,
         workspace_size=0,
-        lower_precision=LowerPrecision.FP16,
+        precision=torch.float32,
         sparse_weights=False,
         disable_tf32=False,
         force_fp32_output=False,
@@ -115,7 +114,7 @@ def run(
         Build TensorRT engine with some configs.
         Args:
             workspace_size: Amount of memory used by TensorRT to store intermediate buffers within an operation.
-            lower_precision: the precision model layers are running on (TensorRT will choose the best perforamnce precision).
+            precision: the precision model layers are running on (TensorRT will choose the best perforamnce precision).
             sparse_weights: allow the builder to examine weights and use optimized functions when weights have suitable sparsity
             force_fp32_output: force output to be fp32
             strict_type_constraints: Usually we should set it to False unless we want to control the precision of certain layer for numeric reasons.
@@ -131,22 +130,14 @@ def run(
         """
         TRT_INTERPRETER_CALL_PRE_OBSERVER.observe(self.module)
 
-        # For float outputs, we set their dtype to fp16 only if lower_precision == LowerPrecision.FP16 and
+        # For float outputs, we set their dtype to fp16 only if precision == torch.float16 and
         # force_fp32_output=False. Overriden by specifying output_dtypes
-        self.output_fp16 = (
-            not force_fp32_output and lower_precision == LowerPrecision.FP16
-        )
+        self.output_fp16 = not force_fp32_output and precision == torch.float16
 
-        if (
-            lower_precision == LowerPrecision.INT8
-            and not self.builder.platform_has_fast_int8
-        ):
+        if precision == torch.int8 and not self.builder.platform_has_fast_int8:
             raise RuntimeError("Current platform doesn't support fast native int8!")
 
-        if (
-            lower_precision == LowerPrecision.FP16
-            and not self.builder.platform_has_fast_fp16
-        ):
+        if precision == torch.float16 and not self.builder.platform_has_fast_fp16:
             warnings.warn("Current platform doesn't support fast native fp16!")
 
         self.input_specs_iter = 0
@@ -190,10 +181,10 @@ def run(
                 _LOGGER.info(f"Using optimization level {optimization_level}")
                 builder_config.builder_optimization_level = optimization_level
 
-        if lower_precision == LowerPrecision.FP16:
+        if precision == torch.float16:
             builder_config.set_flag(trt.BuilderFlag.FP16)
 
-        if lower_precision == LowerPrecision.INT8:
+        if precision == torch.int8:
             builder_config.set_flag(trt.BuilderFlag.INT8)
 
         if sparse_weights:
diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py
@@ -3,7 +3,7 @@
 from dataclasses import replace, fields
 from torch_tensorrt.dynamo import CompilationSettings
 from typing import Any, Union, Sequence, Dict
-from torch_tensorrt import _Input, Device
+from torch_tensorrt import Input, Device
 from typing import Optional
 
 logger = logging.getLogger(__name__)
@@ -55,43 +55,51 @@ def cosine_similarity(gt_tensor, pred_tensor):
 
 
 def prepare_inputs(
-    inputs: Union[_Input.Input, torch.Tensor, Sequence, Dict],
+    inputs: Union[Input, torch.Tensor, Sequence, Dict],
     device: torch.device = torch.device("cuda"),
 ) -> Any:
-    if isinstance(inputs, _Input.Input):
+    if isinstance(inputs, Input):
         if isinstance(inputs.shape, dict):
-            return inputs.example_tensor(optimization_profile_field="opt_shape").to(
-                device
-            )
+            return inputs, inputs.example_tensor(
+                optimization_profile_field="opt_shape"
+            ).to(device)
         else:
-            return inputs.example_tensor().to(device)
+            return inputs, inputs.example_tensor().to(device)
 
     elif isinstance(inputs, torch.Tensor):
-        return inputs
+        return Input.from_tensor(inputs), inputs
 
     elif isinstance(inputs, list):
         prepared_input = list()
-
+        torchtrt_inputs = []
+        torch_inputs = []
         for input_obj in inputs:
-            prepared_input.append(prepare_inputs(input_obj))
+            torchtrt_input, torch_input = prepare_inputs(input_obj)
+            torchtrt_inputs.append(torchtrt_input)
+            torch_inputs.append(torch_input)
 
-        return prepared_input
+        return torchtrt_inputs, torch_inputs
 
     elif isinstance(inputs, tuple):
-        prepared_input = list()
-
+        torchtrt_inputs = []
+        torch_inputs = []
         for input_obj in inputs:
-            prepared_input.append(prepare_inputs(input_obj))
+            torchtrt_input, torch_input = prepare_inputs(input_obj)
+            torchtrt_inputs.append(torchtrt_input)
+            torch_inputs.append(torch_input)
 
-        return tuple(prepared_input)
+        return tuple(torchtrt_inputs), tuple(torch_inputs)
 
     elif isinstance(inputs, dict):
-        prepared_input = dict()
+        torchtrt_inputs = dict()
+        torch_inputs = dict()
 
         for key, input_obj in inputs.items():
-            prepared_input[key] = prepare_inputs(input_obj)
+            torchtrt_input, torch_input = prepare_inputs(input_obj)
+            torchtrt_inputs[key] = torchtrt_input
+            torch_inputs[key] = torch_input
 
-        return prepared_input
+        return torchtrt_inputs, torch_inputs
 
     else:
         raise ValueError(

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1 @@`
`1`	`1`	`from .backends import torch_tensorrt_backend`
`2`		`-from .compile import compile`
Original file line number	Diff line number	Diff line change
`@@ -41,7 +41,7 @@ def convert_module(`
`41`	`41`	`)`
`42`	`42`	`interpreter_result = interpreter.run(`
`43`	`43`	`workspace_size=settings.workspace_size,`
`44`		`- lower_precision=settings.precision,`
	`44`	`+ precision=settings.precision,`
`45`	`45`	`profiling_verbosity=(`
`46`	`46`	`trt.ProfilingVerbosity.VERBOSE`
`47`	`47`	`if settings.debug`