[FSW-13914] Fix gaudi specific code in common location (#224)

ulivne · xinhe3 · commit 7a9d63ef8727 · 2025-07-15T08:39:09.000+03:00
Move Gaudi specific code to internal scopes, so it won't
be imported in FS/JS env

Signed-off-by: Xin He &lt;xinhe3@habana.ai&gt;
diff --git a/neural_compressor/torch/algorithms/fp8_quant/_core/fp_utils.py b/neural_compressor/torch/algorithms/fp8_quant/_core/fp_utils.py
@@ -16,6 +16,8 @@
 from enum import Enum
 from .common import ModuleConfig
 from neural_compressor.torch.utils.auto_accelerator import auto_detect_accelerator, INCAcceleratorType
+from neural_compressor.torch.utils import logger
+
 cur_accelerator = auto_detect_accelerator()
 
 descale_fcn = lambda x, scale: torch.mul(x, scale)
@@ -122,9 +124,8 @@ def get_fullscales_by_expbias_set(dtype, device, expbias_set):
 
 def get_fp8_hw_alligned_scales_by_device(dtype, device):
     if device not in [GAUDI2, GAUDI3]:
-        raise ValueError(
-            f"{device} is not supported"
-        )
+        logger.warning("hw aligned scales not supported for device {}".format(device))
+        return None # only Gaudis support hw aligned scales
     exp_bias_set = EXP_BIAS_SETS.get((device, dtype), None)
     return (
         None
@@ -157,6 +158,10 @@ def calc_maxabs_scale(xmaxabs, fullscale, backoff=1):
     return scale
 
 def mmse_scale_multi(x, ref_scale, scales, lp_dtype, hp_dtype):
+    if not scales:
+        raise ValueError(
+            "got empty scale list. it is possible that scale method isn't supported by current device."
+        )
     # TODO: SW-176672 move weights to hpu before the scale calculations
     x = x.to("hpu")
     Nch = x.shape[-1]
@@ -180,6 +185,10 @@ def mmse_scale_multi(x, ref_scale, scales, lp_dtype, hp_dtype):
 
 
 def mmse_scale(x, scales, lp_dtype, hp_dtype):
+    if not scales:
+        raise ValueError(
+            "got empty scale list. it is possible that scale method isn't supported by current device."
+        )
     # TODO: SW-176672 move weights to hpu before the scale calculations
     x = x.to("hpu")
     opt_err = torch.ones(1, dtype=hp_dtype, device=x.device) * torch.inf
diff --git a/neural_compressor/torch/algorithms/fp8_quant/_core/utils.py b/neural_compressor/torch/algorithms/fp8_quant/_core/utils.py
@@ -21,10 +21,9 @@
 from .quantize import quantize
 from .scale import scale_method_mapping, scaling_params
 from .common import is_runtime_scale_patching
-
+from neural_compressor.torch.utils.auto_accelerator import is_any_gaudi_accelerator
 import os
 import re
-import habana_frameworks.torch.utils.experimental as htexp
 
 
 def update_mod_dict(config):
@@ -91,7 +90,8 @@ def quantize_dynamic_op(config, mod_type):
 
 
 def set_runtime_scale_patching_mode(scaling_method_name):
-    if is_runtime_scale_patching() and hasattr(htexp, "_set_scale_attributes"):
+    import habana_frameworks.torch.utils.experimental as htexp # importing in local scope since it is gaudi specific
+    if is_runtime_scale_patching():
         assert (
             scaling_method_name in runtime_scale_patching_supported_methods_list
         ), f"Scaling method \"{scaling_method_name}\" is not supported for runtime scale patching (graph recompile reduction). Cannot set scaling attributes."
@@ -125,5 +125,7 @@ def prepare_model(model):
         scaling_method_name = scale_method_mapping[(config.cfg["scale_method"], config.cfg["observer"])]
         scaling_params[scaling_method_name].update(config.cfg["scale_params"])
         config.cfg["scale_params"] = scaling_params[scaling_method_name]
-        set_runtime_scale_patching_mode(scaling_method_name)
+
+        if is_any_gaudi_accelerator(config.cfg["device_type"]):
+            set_runtime_scale_patching_mode(scaling_method_name)
         return quantize(model, mod_list)
diff --git a/neural_compressor/torch/utils/auto_accelerator.py b/neural_compressor/torch/utils/auto_accelerator.py
@@ -462,3 +462,7 @@ def auto_detect_accelerator(device_name="auto") -> Auto_Accelerator:
 # INC_TARGET_DEVICE = "CPU" python ...
 # or
 # CUDA_VISIBLE_DEVICES="" python ...
+
+
+def is_any_gaudi_accelerator(acc_type: INCAcceleratorType):
+    return acc_type.value > INCAcceleratorType.GAUDI_MIN.value
diff --git a/test/3x/torch/algorithms/fp8_quant_xpu/unit_tests/aux_files/empty_measure_hooks_maxabs.npz b/test/3x/torch/algorithms/fp8_quant_xpu/unit_tests/aux_files/empty_measure_hooks_maxabs.npz
@@ -0,0 +1 @@
+{}