[SW-197607] INC- change hard coded gaudi 2 scales for optimal weight … (#221)

Yantom1 · xinhe3 · commit 3f73725de561 · 2025-07-15T08:30:40.000+03:00
* [SW-197607] INC- change hard coded gaudi 2 scales for optimal weight quantization

* cr fix
diff --git a/neural_compressor/torch/algorithms/fp8_quant/_core/fp_utils.py b/neural_compressor/torch/algorithms/fp8_quant/_core/fp_utils.py
@@ -120,21 +120,28 @@ def get_fullscales_by_expbias_set(dtype, device, expbias_set):
     return [get_fullscale(dtype, device, exp_bias=eb) for eb in expbias_set]
 
 
-def get_fp8_hw_alligned_scales(dtype, device):
+def get_fp8_hw_alligned_scales_by_device(dtype, device):
+    if device not in [GAUDI2, GAUDI3]:
+        raise ValueError(
+            f"{device} is not supported"
+        )
     exp_bias_set = EXP_BIAS_SETS.get((device, dtype), None)
     return (
         None
         if exp_bias_set is None
         else [x / get_fullscale(dtype, device) for x in get_fullscales_by_expbias_set(dtype, device, exp_bias_set)]
     )
 
+def get_fp8_hw_alligned_scales(dtype):
+    inc_device_type = auto_detect_accelerator().get_inc_accelerator_type()
+    return get_fp8_hw_alligned_scales_by_device(dtype, inc_device_type)
 
 DEVICES_SCALE_FACTORS = {
     INCAcceleratorType.GAUDI2: 4,
     INCAcceleratorType.GAUDI3: 1,
 }
 FP8_143_SCALES = {
-    device: get_fp8_hw_alligned_scales(torch.float8_e4m3fn, device) for device in DEVICES_SCALE_FACTORS.keys()
+    device: get_fp8_hw_alligned_scales_by_device(torch.float8_e4m3fn, device) for device in DEVICES_SCALE_FACTORS.keys()
 }
 FP8_143_SCALES_TRAITS = {
     device: (
diff --git a/neural_compressor/torch/algorithms/fp8_quant/_core/scale.py b/neural_compressor/torch/algorithms/fp8_quant/_core/scale.py
@@ -17,6 +17,7 @@
 from ..model_configs import ModuleConfig, ModuleExtraConfig
 from .scale_methods import ops_quantizer
 from .._quant_common.quant_config import ScaleMethod
+from .fp_utils import get_fp8_hw_alligned_scales
 import torch
 
 
@@ -77,7 +78,6 @@ def prepare_layer_scales(mod, mod_name, config, mod_type_str, measurement, scale
             )
     return mod_extra_config, save_file
 
-
 scale_method_mapping = {
     (ScaleMethod.UNIT_SCALE, "maxabs"): "unit_scale",
     (ScaleMethod.UNIT_SCALE, "maxabs_per_channel"): "unit_scale",
@@ -158,7 +158,7 @@ def prepare_layer_scales(mod, mod_name, config, mod_type_str, measurement, scale
     "act_maxabs_pts_hw_weight_opt_pts_hw": {
         "input_backoff": 0.25,
         "weight_backoff": 0.5,
-        "weight_scales": [2.0**s for s in [4, 0, -4, -8]],
+        "weight_scales": get_fp8_hw_alligned_scales(torch.float8_e4m3fn)
     },
     "smoothquant_weights_maxabs_pow2": {
         "input_backoff": 0.25,