add more uts

yiliu30 · yiliu30 · commit d7f99dad97e5 · 2025-01-22T04:16:14.000-05:00
Signed-off-by: yiliu30 &lt;yi4.liu@intel.com&gt;
diff --git a/neural_compressor/torch/algorithms/pt2e_quant/half_precision_rewriter.py b/neural_compressor/torch/algorithms/pt2e_quant/half_precision_rewriter.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 """Rewrite the FP32 operators to FP16 or BF16 operators."""
 
+from collections import defaultdict
 from dataclasses import dataclass
 from functools import partial
 from typing import Any, Callable, Dict, List, Tuple
@@ -50,25 +51,31 @@ class PatternPair:
 
 # key: torch func
 # value: the tuple of args
-FuncArgsMappingType: TypeAlias = Dict[TorchFuncType, Tuple[torch.Tensor, ...]]
+FuncArgsMappingType: TypeAlias = Dict[TorchFuncType, List[Tuple[torch.Tensor, ...]]]
 
 
 # Align with xiq, as it relay on xiq's set_module_xx capability
 FN_ARGS_MAPPING: FuncArgsMappingType = {
-    torch.nn.functional.linear: (torch.randn(0, 0), torch.randn(0, 0)),  # linear w/o bias
-    torch.nn.functional.linear: (torch.randn(0, 0), torch.randn(0, 0), torch.randn(0)),  # linear w/ bias
-    torch.nn.functional.conv2d: (torch.randn(1, 1, 1, 1), torch.randn(1, 1, 1, 1)),  # conv2d w/o bias
-    torch.nn.functional.conv2d: (torch.randn(1, 1, 1, 1), torch.randn(1, 1, 1, 1), torch.randn(1)),  # conv2d w/ bias
-    torch.matmul: (torch.randn(0, 0), torch.randn(0, 0)),  # matmul
-    torch.matmul: (torch.randn(0, 0, 0), torch.randn(0, 0, 0)),  # matmul
-    torch.matmul: (torch.randn(0, 0, 0, 0), torch.randn(0, 0, 0, 0)),  # matmul
+    # Note: ORDER is matter
+    torch.nn.functional.linear: [
+        (torch.randn(0, 0), torch.randn(0, 0)),  # linear w/o bias
+        (torch.randn(0, 0), torch.randn(0, 0), torch.randn(0)),  # linear w/ bias
+    ],
+    torch.nn.functional.conv2d: [
+        (torch.randn(1, 1, 1, 1), torch.randn(1, 1, 1, 1)),  # conv2d w/o bias
+        (torch.randn(1, 1, 1, 1), torch.randn(1, 1, 1, 1), torch.randn(1)),  # conv2d w/ bias
+    ],
+    torch.matmul: [
+        (torch.randn(0, 0), torch.randn(0, 0)),
+        (torch.randn(0, 0, 0), torch.randn(0, 0, 0)),
+        (torch.randn(0, 0, 0, 0), torch.randn(0, 0, 0, 0)),
+    ],
 }
 
 # module cls <-> function name
 NN_MODULES_TO_NN_FN = {
     torch.nn.Linear: torch.nn.functional.linear,
     torch.nn.Conv2d: torch.nn.functional.conv2d,
-    torch.nn.MaxPool2d: torch.nn.functional.max_pool2d,
 }
 
 # Use the mapping from xiq
@@ -78,7 +85,10 @@ class PatternPair:
 
 
 PatternRegistryType: TypeAlias = Dict[TorchFuncType, PatternPair]
-HALF_PRECISION_PATTERN_REGISTRY: Dict[torch.dtype, PatternRegistryType] = {torch.float16: {}, torch.bfloat16: {}}
+HALF_PRECISION_PATTERN_REGISTRY: Dict[torch.dtype, PatternRegistryType] = {
+    torch.float16: defaultdict(list),
+    torch.bfloat16: defaultdict(list),
+}
 
 # FP16_PATTERN_REGISTRY: PatternRegistryType = HALF_PRECISION_PATTERN_REGISTRY[torch.float16]
 # BF16_PATTERN_REGISTRY: PatternRegistryType = HALF_PRECISION_PATTERN_REGISTRY[torch.bfloat16]
@@ -108,10 +118,11 @@ def replace_fn_wrapper(fn_args, fn):
 
 
 def _register_pattern_pair(dtype: torch.dtype) -> None:
-    for fn, fn_args in FN_ARGS_MAPPING.items():
-        logger.debug(f"Registering search and replace patterns for {fn} with args: {fn_args}.")
-        pattern_pair = pattern_factory(fn, fn_args)
-        HALF_PRECISION_PATTERN_REGISTRY[dtype][fn] = pattern_pair
+    for fn, fn_args_lst in FN_ARGS_MAPPING.items():
+        for fn_args in fn_args_lst:
+            logger.debug(f"Registering search and replace patterns for {fn} with args: {fn_args}.")
+            pattern_pair = pattern_factory(fn, fn_args)
+            HALF_PRECISION_PATTERN_REGISTRY[dtype][fn].append(pattern_pair)
     utils.logger.debug(
         f"Registered {len(HALF_PRECISION_PATTERN_REGISTRY[dtype])} search and replace patterns for {dtype}."
     )
@@ -194,9 +205,10 @@ def get_unquantized_node_set(gm: torch.fx.GraphModule):
 
 def transformation(gm: torch.fx.GraphModule, node_candidate_list: List[str], target_dtype: torch.dtype = torch.float16):
     """Convert the nodes in `node_candidate_list` to `target_dtype` if possible."""
-    for pattern_pair in HALF_PRECISION_PATTERN_REGISTRY[target_dtype].values():
-        apply_single_pattern_pair(gm, pattern_pair, node_candidate_list)
-    utils.logger.info("Half precision conversion is done:")
+    for pattern_pair_lst in HALF_PRECISION_PATTERN_REGISTRY[target_dtype].values():
+        for pattern_pair in pattern_pair_lst:
+            apply_single_pattern_pair(gm, pattern_pair, node_candidate_list)
+    utils.logger.info(f"Half precision conversion({target_dtype}) completed.")
     if utils.level_name == "DEBUG":  # pragma: no cover
         gm.print_readable(True)
 
@@ -249,5 +261,7 @@ def get_half_precision_node_set(gm, config):
     for node in possible_node_set:
         if node.target in SUPPORTED_OPERATORS:
             half_precision_node_set.add(node)
-    utils.logger.info(f"Found {len(half_precision_node_set)} nodes to convert to half precision.")
+    utils.logger.info(
+        f"Found {len(half_precision_node_set)} nodes to convert to half precision: {half_precision_node_set}"
+    )
     return half_precision_node_set
diff --git a/test/3x/torch/quantization/test_pt2e_quant.py b/test/3x/torch/quantization/test_pt2e_quant.py
@@ -29,7 +29,6 @@ def _is_ipex_imported():
     monkeypatch.setattr("neural_compressor.torch.quantization.algorithm_entry.is_ipex_imported", _is_ipex_imported)
     monkeypatch.setattr("neural_compressor.torch.export.pt2e_export.is_ipex_imported", _is_ipex_imported)
 
-
 class TestPT2EQuantization:
     def teardown_class(self):
         shutil.rmtree("saved_results", ignore_errors=True)
@@ -53,15 +52,15 @@ def forward(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
         return bar, example_inputs
 
     @staticmethod
-    def build_model_include_conv_and_linear():
+    def build_model_include_conv_and_linear(bias=True):
         class Model(torch.nn.Module):
-            def __init__(self):
+            def __init__(self, bias=True):
                 super(Model, self).__init__()
-                self.conv1 = torch.nn.Conv2d(3, 6, 5)
+                self.conv1 = torch.nn.Conv2d(3, 6, 5, bias=bias)
                 self.pool = torch.nn.MaxPool2d(2, 2)
-                self.conv2 = torch.nn.Conv2d(6, 16, 5)
-                self.fc1 = torch.nn.Linear(16 * 5 * 5, 120)
-                self.fc2 = torch.nn.Linear(120, 84)
+                self.conv2 = torch.nn.Conv2d(6, 16, 5, bias=bias)
+                self.fc1 = torch.nn.Linear(16 * 5 * 5, 120, bias=bias)
+                self.fc2 = torch.nn.Linear(120, 84, bias=bias)
 
             def forward(self, x):
                 x = self.conv1(x)
@@ -74,7 +73,7 @@ def forward(self, x):
 
                 return x
 
-        model = Model()
+        model = Model(bias)
         example_inputs = (torch.randn(1, 3, 32, 32),)
         return model, example_inputs
 
@@ -286,19 +285,54 @@ def test_mixed_fp16_and_int8(self, force_not_import_ipex):
 
     @pytest.mark.skipif(not GT_OR_EQUAL_TORCH_VERSION_2_5, reason="Requires torch>=2.5")
     @pytest.mark.parametrize("half_precision_dtype", ["fp16", "bf16"])
-    def test_auto_tune_mixed_int8_and_16bits(self, half_precision_dtype, force_not_import_ipex):
+    @pytest.mark.parametrize("op_name", ["conv1", "fc1"])
+    @pytest.mark.parametrize("bias", [True, False])
+    def test_auto_tune_mixed_int8_and_16bits(self, half_precision_dtype, op_name, bias, force_not_import_ipex):
+        # Test for auto-tune with mixed int8 and 16bits
+        # Just make sure the pattern matches, not the accuracy.
         # config1: int8 for all
-        # config2: half precision for linear
+        # config2: half precision for linear/conv
         from neural_compressor.torch.quantization.config import INT8StaticQuantConfig
         from neural_compressor.torch.quantization.autotune import autotune, TuningConfig
+
         config1 = INT8StaticQuantConfig()
-        config2 = INT8StaticQuantConfig().set_local("fc1", StaticQuantConfig(w_dtype=half_precision_dtype, act_dtype=half_precision_dtype))
+        config2 = INT8StaticQuantConfig().set_local(
+            op_name, StaticQuantConfig(w_dtype=half_precision_dtype, act_dtype=half_precision_dtype)
+        )
         tune_config = TuningConfig(config_set=[config1, config2], tolerable_loss=-0.1)
+        eval_result = [1, 1, 2]
+
         def fake_eval_fn(model):
-            return 1.0
+            res = eval_result.pop(0)
+            return res
+
         def run_fn(model):
             for i in range(2):
                 model(*example_inputs)
-        model, example_inputs = self.build_model_include_conv_and_linear()
+
+        model, example_inputs = self.build_model_include_conv_and_linear(bias)
         model = export(model, example_inputs=example_inputs)
-        qmodel = autotune(model=model, tune_config=tune_config, eval_fn=fake_eval_fn,run_fn=run_fn, example_inputs=example_inputs)
+        qmodel = autotune(
+            model=model, tune_config=tune_config, eval_fn=fake_eval_fn, run_fn=run_fn, example_inputs=example_inputs
+        )
+
+        # check the half node
+        expected_node_occurrence = {
+            # 4 `aten.to` for target op if bias else 3
+            torch.ops.aten.to.dtype: (3 + int(bias))
+        }
+        expected_node_occurrence = {
+            torch_test_quant_common.NodeSpec.call_function(k): v for k, v in expected_node_occurrence.items()
+        }
+        node_in_graph = self.get_node_in_graph(qmodel)
+        for node, cnt in expected_node_occurrence.items():
+            assert (
+                node_in_graph.get(node, 0) == cnt
+            ), f"Node {node} should occur {cnt} times, but {node_in_graph.get(node, 0)}"
+        # inference
+        from torch._inductor import config
+
+        config.freezing = True
+        opt_model = torch.compile(qmodel)
+        out = opt_model(*example_inputs)
+        assert out is not None