Fix the math in the FuseMulTensorIntoQuantPass.

eigen-k · facebook-github-bot · commit 4b995902caf2 · 2025-06-24T17:03:16.000-07:00
Summary: The new scale value was calculated incorrectly, fixing that with this diff. See the details of the new scale calculation in the comments in the pass.

Differential Revision: D77267667
diff --git a/backends/cadence/aot/fuse_ops.py b/backends/cadence/aot/fuse_ops.py
@@ -882,7 +882,13 @@ def attempt_fusion(
         assert isinstance(prev_scale, (int, float))
         mul_scalar = full_node.args[1]
         assert isinstance(mul_scalar, (int, float))
-        new_scale = float(prev_scale) * float(mul_scalar)
+        # The reason why we divide previous scale by the mul value to get a new scale:
+        # y = x * mul_scalar
+        # q = zp + y / prev_scale
+        # q = zp + x * mul_scalar / prev_scale
+        # new_scale = prev_scale / mul_scalar
+        # q = zp + x / new_scale
+        new_scale = float(prev_scale) / float(mul_scalar)
 
         logging.debug(
             f"Fused {mul_node} and {full_node} into {quant_node}. Updated scale from {quant_node.args[1]} to {new_scale}"
diff --git a/backends/cadence/aot/tests/test_fusion_ops_passes.py b/backends/cadence/aot/tests/test_fusion_ops_passes.py
@@ -598,7 +598,7 @@ def test_fuse_mul_scalar_into_dequant(self) -> None:
         self.assertEqual(deq_scale, dequant_scale * mul_value)
 
     def test_fuse_mul_into_quant(self) -> None:
-        quant_scale = 1.5
+        quant_scale = 5
         mul_value = 10
 
         builder = GraphBuilder()
@@ -613,7 +613,7 @@ def test_fuse_mul_into_quant(self) -> None:
         )
         quant = builder.call_operator(
             op=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
-            args=(mul, quant_scale, 0, 0, 255, torch.uint8),
+            args=(mul, quant_scale, 7, 0, 255, torch.uint8),
         )
         builder.output([quant])
         original_graph = builder.get_graph_module()
@@ -631,14 +631,20 @@ def test_fuse_mul_into_quant(self) -> None:
         )
 
         # verify that the quant scale value was updated correctly
-        deq_scale = -1
+        new_quant_scale = -1
         for node in converted_graph.graph.nodes:
             if (
                 node.target
                 == exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
             ):
-                deq_scale = node.args[1]
-        self.assertEqual(deq_scale, quant_scale * mul_value)
+                new_quant_scale = node.args[1]
+        self.assertEqual(new_quant_scale, quant_scale / mul_value)
+
+        # verify the math is correct
+        inp = torch.randn(4, 32, dtype=torch.float32)
+        original_out = original_graph(inp)[0]
+        new_out = converted_graph(inp)[0]
+        assert torch.equal(original_out, new_out)
 
     def test_fuse_then_transpose_pass(self) -> None:
         # Create a graph with full -> transpose.