matmul changes, bmm changes and adding broadcastable

apbose · apbose · commit 09a52b990826 · 2023-04-19T08:04:31.000-07:00
diff --git a/py/torch_tensorrt/fx/converters/converter_utils.py b/py/torch_tensorrt/fx/converters/converter_utils.py
@@ -77,7 +77,7 @@ def get_positive_dim(dim: int, dim_size: int) -> int:
     return dim
 
 
-def set_layer_name(layer: TRTLayer, target: Target, name: str) -> None:
+def set_layer_name(layer: TRTLayer, target: Target, name: str, is_acc=True) -> None:
     """
     Set the TensorRT layer name to "[TensorRT Layer Type]_[Original Op Name]_[FX Node Name with Suffix]"
 
@@ -87,7 +87,7 @@ def set_layer_name(layer: TRTLayer, target: Target, name: str) -> None:
             the node represents.
         name (str): Consists of fx node.name with optional suffix.
     """
-    target_name = target if isinstance(target, str) else f"acc_ops.{target.__name__}"
+    target_name = target if isinstance(target, str) else f"acc_ops.{target.__name__}" if is_acc else f"aten_ops.{target.__name__}"
     layer.name = f"[{layer.type.name}]-[{target_name}]-[{name}]"
 
 
@@ -288,6 +288,39 @@ def prepend_ones(
     return layer.get_output(0)
 
 
+def broadcastable(
+    a: TRTTensor,
+    b: TRTTensor,
+) -> bool:
+    "Check if two tensors are broadcastable according to torch rules"
+    a_shape = tuple(a.shape)
+    b_shape = tuple(b.shape)
+    print("a shape is", a_shape)
+    print("b shape is", b_shape)
+    # check from the trailing
+    diff = len(a_shape) - len(b_shape)
+    if diff == 0:
+        return True
+    if diff > 0:
+        max = len(a_shape)
+        min = len(b_shape)
+        greater_tensor = a_shape
+        lesser_tensor = b_shape
+    elif diff < 0:
+        max = len(b_shape)
+        min = len(a_shape)
+        greater_tensor = b_shape
+        lesser_tensor = a_shape
+    j = min - 1
+    for i in range(max - 1, diff - 1, -1):
+        if not (
+            greater_tensor[i] != lesser_tensor[j]
+            and (greater_tensor[i] == 1 or lesser_tensor[i] == 1)
+        ):
+            return False
+    return True
+
+
 def broadcast(
     network: TRTNetwork,
     a: TRTTensor,
diff --git a/py/torch_tensorrt/fx/converters/operator.py b/py/torch_tensorrt/fx/converters/operator.py
@@ -16,6 +16,7 @@
 from .converter_utils import set_layer_name
 from .converter_utils import get_trt_tensor
 from .converter_utils import broadcast
+from .converter_utils import broadcastable
 from .converter_utils import squeeze_left
 from .converter_utils import dtype_uniform
 from .converter_utils import get_trt_plugin
@@ -1117,7 +1118,17 @@ def add_expand(network, target, kwargs, name):
 
     ranks = len(input_val.shape)
     # TRT does not support different dimension size
-    assert len(shape) == ranks
+    #though this condition is not seen in the case of bmm 
+    # where input_t and shape dimensions are not equal
+    assert len(shape) >= ranks
+    if(len(shape) != ranks):
+            shape_tuple = tuple([0] * len(shape))
+            shape_tensor = get_trt_tensor(network, input_t, f"{name}_shape")
+            input_val, shape_tensor = broadcast(network, input_val, shape_tensor, 
+                                  f"{name}_input_val",
+                                  f"{name}_shape_val")
+            ranks = len(shape)
+            
     shape = [input_val.shape[i] if shape[i] == -1 else shape[i] for i in range(ranks)]
 
     inshape = tuple(input_val.shape)
diff --git a/py/torch_tensorrt/fx/passes/lower_basic_pass_aten.py b/py/torch_tensorrt/fx/passes/lower_basic_pass_aten.py
@@ -416,29 +416,46 @@ def compose_bmm(
             node = n
             input_n = node.all_input_nodes[0]
             other_n = node.all_input_nodes[1]
+
+             # If no input nodes are available, the bmm argument itself could be an input
+            # Alternatively, if the node has no users, it can be eliminated
+            if len(input_n.all_input_nodes) == 0 or len(node.users) == 0:
+                return PassResult(module, modified)
+            
             output = next(iter(node.users))
             input_input_n = input_n.all_input_nodes[0]
             if (
                 input_input_n.target != torch.ops.aten.expand.default
                 and input_n.target != torch.ops.aten.view.default
             ):
-                raise RuntimeError(
-                    "Bmm is addressed in fixed pattern. A new pattern is met!"
+                _LOGGER.warn(
+                    "Bmm is addressed in fixed pattern. "
+                    + f"A new pattern {input_input_n.target}, {input_n.target} is met! "
+                    + "Skipping bmm lowering on this operation"
                 )
+                return PassResult(module, modified)
+            
             real_input = input_input_n.all_input_nodes[0]
             input_other_n = other_n.all_input_nodes[0]
             if (
                 input_other_n.target != torch.ops.aten.expand.default
                 and other_n.target != torch.ops.aten.view.default
             ):
-                raise RuntimeError(
-                    "Bmm is addressed in fixed pattern. A new pattern is met!"
+                _LOGGER.warn(
+                    "Bmm is addressed in fixed pattern. "
+                    + f"A new pattern {input_other_n.target}, {other_n.target} is met! "
+                    + "Skipping bmm lowering on this operation"
                 )
+                return PassResult(module, modified)
+            
             real_other = input_other_n.all_input_nodes[0]
             if len(real_other.meta["val"].size()) == 2:
                 new_func = aten_compose_bmm_2d
-            if len(real_other.meta["val"].size()) == 3:
+            elif len(real_other.meta["val"].size()) == 3:
                 new_func = aten_compose_bmm_3d
+            else:
+                # No valid bmm replacement exists for the specified dimensions
+                return PassResult(module, modified)
 
             with module.graph.inserting_after(node):
                 new_args = (real_input, real_other)
@@ -449,6 +466,7 @@ def compose_bmm(
                     kwargs=None,
                 )
             output.replace_all_uses_with(new_node)
+            modified = True
 
     module.graph.eliminate_dead_code()
     module.recompile()
diff --git a/py/torch_tensorrt/fx/test/converters/aten_op/test_matmul_aten.py b/py/torch_tensorrt/fx/test/converters/aten_op/test_matmul_aten.py
@@ -6,22 +6,109 @@
 from torch.testing._internal.common_utils import run_tests
 from torch_tensorrt.fx.tools.common_fx2trt import DispatchTestCase, InputTensorSpec
 
+import torch
+import torch.nn as nn
+import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops
+from parameterized import parameterized
+from torch.testing._internal.common_utils import run_tests
+from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec
+
 
 class TestMatMulConverter(DispatchTestCase):
-    def test_matmul(self):
-        class TestModule(torch.nn.Module):
-            def forward(self, x, y):
-                return torch.matmul(x, y)
-
-        inputOne = torch.randn(2, 32)
-        inputTwo = torch.randn(32, 2)
-        inputs = [inputOne, inputTwo]
+    @parameterized.expand(
+        [
+            ("2_2", (2, 3), (3, 2)),
+            ("2_2", (2, 3), (3, 1)),
+            #FIXME torch.ops.aten.mv.default for (2,3), (3,1) - should mv be lowered to mm?
+            # (2,3), (3,) torch.ops.aten.mv.default
+            # Following cases use torch.ops.aten.bmm.defauly 
+            #("4_3", (3,1,3,2), (2,2,3)),
+            #("3_4", (3,1,3,2), (2,2,3)),
+            #("3_4", (2, 2, 3), (3, 1, 3, 3)),
+            #("4_2", (1, 2, 2, 3), (3, 2)),
+        ]
+    )
+    def test_matmul_other_constant(self, _, input_shape, other_shape):
+        class MatMul(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.other = nn.Parameter(torch.randn(*other_shape))
+
+            def forward(self, input):
+                return torch.matmul(input, self.other)
+
+        inputs = [torch.randn(*input_shape)]
+        
+        self.run_test(
+            MatMul(),
+            inputs,
+            expected_ops={torch.ops.aten.mm.default},
+            test_explicit_batch_dim=(len(input_shape) >= 1),
+        )
+
+    @parameterized.expand(
+        [
+            ("2_2", (2, 3), (3, 2)),
+            ("1_2", (1, 3), (3, 2)),
+            #FIXME torch.ops.aten.mv.default for (2,3), (3,1) - should mv be lowered to mm?
+            # (2,3), (3,) torch.ops.aten.mv.default
+            # Following cases use torch.ops.aten.bmm.defauly 
+            #("4_3", (3,1,3,2), (2,2,3)),
+            #("3_4", (3,1,3,2), (2,2,3)),
+            #("3_4", (2, 2, 3), (3, 1, 3, 3)),
+            #("4_2", (1, 2, 2, 3), (3, 2)),
+            
+        ]
+    )
+    def test_matmul_input_constant(self, _, input_shape, other_shape):
+        class MatMul(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.input = nn.Parameter(torch.randn(*input_shape))
+
+            def forward(self, other):
+                return torch.matmul(self.input, other)
+
+        inputs = [torch.randn(*other_shape)]
+
+        self.run_test(
+            MatMul(),
+            inputs,
+            expected_ops={torch.ops.aten.mm.default},
+            test_explicit_batch_dim=True 
+            #test_explicit_batch_dim=(len(other_shape) <= 2),
+        )
+
+    @parameterized.expand(
+        [
+            ("2_2", (2, 3), (3, 2)),
+            # ("2_3", (2, 3), (2, 3, 4)),
+            # ("4_4", (2, 2, 2, 3), (2, 1, 3, 2)),
+            # ("4_2", (2, 1, 2, 3), (3, 2)),
+            # ("2_1", (2, 3), (3,)),
+            # ("1_2", (3,), (3, 2)),
+            # ("1_1", (3,), (3,)),
+        ]
+    )
+    def test_matmul(self, _, input_shape, other_shape):
+        class MatMul(nn.Module):
+            def forward(self, input, other):
+                return torch.matmul(input, other)
+
+        inputs = [torch.randn(*input_shape), torch.randn(*other_shape)]
+        test_explicit_batch_dim = not(
+            input_shape[0] == other_shape[0]
+            and len(input_shape) > 2
+            and len(other_shape) > 2
+        )
         self.run_test(
-            TestModule(),
+            MatMul(),
             inputs,
             expected_ops={torch.ops.aten.mm.default},
+            test_explicit_batch_dim=test_explicit_batch_dim,
         )
 
+    #FIXME: dynamic shape is giving bmm
 
 if __name__ == "__main__":
-    run_tests()
+    run_tests()