[FSW-12066] small fixes in xpu quantized func (#145)

ulivne · XuehaoSun · commit de7c53115681 · 2025-05-13T10:18:43.000+08:00
diff --git a/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_func_wrappers/xpu/xpu_quantized_func_wrapper.py b/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_func_wrappers/xpu/xpu_quantized_func_wrapper.py
@@ -65,6 +65,7 @@ def get_default_quantized_func(self):
 
 _OP_TYPE_XPU_QUANTIZED_WRAPPER_CLASSES = {
                                           OP_TYPE.LINEAR_GEMM : QuantizedXPUMatmul,
+                                          OP_TYPE.MATMUL_GEMM : QuantizedXPUMatmul,
                                           OP_TYPE.CAST_TO_FP8 : QuantizedXPUCastToFP8Base,
                                           OP_TYPE.CAST_FROM_FP8 : QuantizedXPUCastFromFP8Base
                                          }
diff --git a/test/3x/torch/algorithms/fp8_quant_xpu/unit_tests/test_xpu_basic.py b/test/3x/torch/algorithms/fp8_quant_xpu/unit_tests/test_xpu_basic.py
@@ -61,7 +61,6 @@ def forward(self, input):
         return self.my_linear(input)
 
 
-@pytest.mark.skip(reason="FSW-13402 device hangs")
 def test_xpu_basic_mamtul():
     # test convert flow and quantized func
     my_model = MyModelMatmul()

Original file line number	Diff line number	Diff line change
`@@ -65,6 +65,7 @@ def get_default_quantized_func(self):`
`65`	`65`
`66`	`66`	`_OP_TYPE_XPU_QUANTIZED_WRAPPER_CLASSES = {`
`67`	`67`	`OP_TYPE.LINEAR_GEMM : QuantizedXPUMatmul,`
	`68`	`+ OP_TYPE.MATMUL_GEMM : QuantizedXPUMatmul,`
`68`	`69`	`OP_TYPE.CAST_TO_FP8 : QuantizedXPUCastToFP8Base,`
`69`	`70`	`OP_TYPE.CAST_FROM_FP8 : QuantizedXPUCastFromFP8Base`
`70`	`71`	`}`