Skip to content

Commit 9b8090f

Browse files
yiliu30Yi4Liu
authored andcommitted
[SW-217321] Add autoround UTs Back (#197)
* add autoround UTs back Change-Id: I0614ffd8be4f89e9787037ee99e24a60f8548b49 --------- Signed-off-by: Yi Liu <[email protected]> Co-authored-by: Yi Liu <[email protected]> Signed-off-by: Xin He <[email protected]>
1 parent 520f11f commit 9b8090f

File tree

3 files changed

+9
-9
lines changed

3 files changed

+9
-9
lines changed

neural_compressor/torch/algorithms/weight_only/save_load.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -897,7 +897,7 @@ def _init_hf_model(self, model_class, config):
897897
from transformers.modeling_utils import no_init_weights
898898
from transformers.utils import ContextManagers
899899

900-
_fast_init = self.kwargs.pop("_fast_init", True)
900+
_ = self.kwargs.pop("_fast_init", True)
901901
torch_dtype = self.kwargs.pop("torch_dtype", "auto")
902902
is_sharded = self.kwargs.pop("is_sharded", False)
903903
sharded_metadata = self.kwargs.pop("sharded_metadata", None)

test/3x/torch/quantization/fp8_quant/test_save_load.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def test_save_vllm_compatible_model():
7373
tokenizer = transformers.AutoTokenizer.from_pretrained(name)
7474
tokenizer.save_pretrained("saved_results_qwen")
7575

76-
76+
@pytest.mark.skip(reason="[SW-226589] Skip this test since the model was updated")
7777
def test_load_model_provided_by_neuralmagic():
7878
model_name_or_path = "neuralmagic/Qwen2-0.5B-Instruct-FP8"
7979
hpu_mem0 = get_used_hpu_mem_MB()

test/3x/torch/quantization/weight_only/test_autoround.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ def run_fn(model, dataloader):
6969
else:
7070
model(data)
7171

72-
@pytest.mark.skip(reason="SW-217321 pytorch inductor error")
7372
@pytest.mark.skipif(is_habana_framework_installed(), reason="These tests are not supported on HPU for now.")
7473
@pytest.mark.skipif(not auto_round_installed, reason="auto_round module is not installed")
7574
class TestAutoRoundCPU:
@@ -284,7 +283,6 @@ def test_mllm(self):
284283
# loaded_model = load("saved_results_tiny-random-GPTJForCausalLM", format="huggingface", trust_remote_code=True)
285284

286285

287-
@pytest.mark.skip(reason="SW-217321 pytorch inductor error")
288286
@pytest.mark.skipif(not is_habana_framework_installed(), reason="Habana framework is not installed")
289287
@pytest.mark.skipif(os.getenv("PT_HPU_LAZY_MODE", "0") == "1", reason="Lazy mode is enabled")
290288
@pytest.mark.skipif(not auto_round_installed, reason="auto_round module is not installed")
@@ -366,7 +364,7 @@ def test_autoround_w4a8(self):
366364
@pytest.mark.parametrize("quant_lm_head", [True, False])
367365
def test_autoround(self, quant_lm_head):
368366
fp32_model = copy.deepcopy(self.tiny_llama_model)
369-
quant_config = AutoRoundConfig(nsamples=32, seqlen=10, iters=10, scale_dtype="fp32")
367+
quant_config = AutoRoundConfig(nsamples=32, seqlen=10, iters=10, act_dtype="fp32", scale_dtype="fp32")
370368
if quant_lm_head is False:
371369
quant_config.set_local("lm_head", AutoRoundConfig(dtype="fp32"))
372370
logger.info(f"Test AutoRound with config {quant_config}")
@@ -377,30 +375,32 @@ def test_autoround(self, quant_lm_head):
377375
run_fn(model, self.dataloader)
378376
q_model = convert(model)
379377
assert "model.layers.0.self_attn.k_proj" in q_model.autoround_config.keys()
380-
assert "scale" in q_model.autoround_config["model.layers.0.self_attn.k_proj"].keys()
378+
assert "scale_dtype" in q_model.autoround_config["model.layers.0.self_attn.k_proj"].keys()
381379
assert torch.float32 == q_model.autoround_config["model.layers.0.self_attn.k_proj"]["scale_dtype"]
382380
assert isinstance(q_model.model.layers[0].self_attn.k_proj, WeightOnlyLinear), "packing model failed."
383381
if quant_lm_head is True:
384382
assert isinstance(q_model.lm_head, WeightOnlyLinear), "quantization for lm_head failed."
385383

386384
def test_int4_dtype(self):
387385
fp32_model = copy.deepcopy(self.tiny_llama_model)
388-
quant_config = AutoRoundConfig(dtype="int4", nsamples=32, seqlen=10, iters=10, scale_dtype="fp32")
386+
quant_config = AutoRoundConfig(
387+
dtype="int4", nsamples=32, seqlen=10, iters=10, act_dtype="fp32", scale_dtype="fp32"
388+
)
389389
logger.info(f"Test AutoRound with config {quant_config}")
390390

391391
# prepare + convert API
392392
model = prepare(model=fp32_model, quant_config=quant_config)
393393
run_fn(model, self.dataloader)
394394
q_model = convert(model)
395395
assert "model.layers.0.self_attn.k_proj" in q_model.autoround_config.keys()
396-
assert "scale" in q_model.autoround_config["model.layers.0.self_attn.k_proj"].keys()
396+
assert "scale_dtype" in q_model.autoround_config["model.layers.0.self_attn.k_proj"].keys()
397397
assert torch.float32 == q_model.autoround_config["model.layers.0.self_attn.k_proj"]["scale_dtype"]
398398
assert isinstance(q_model.model.layers[0].self_attn.k_proj, WeightOnlyLinear), "packing model failed."
399399

400400
def test_autoround_with_quantize_API(self):
401401
model = copy.deepcopy(self.tiny_llama_model)
402402

403-
quant_config = AutoRoundConfig(nsamples=32, seqlen=10, iters=10, scale_dtype="fp32")
403+
quant_config = AutoRoundConfig(nsamples=32, seqlen=10, iters=10, act_dtype="fp32", scale_dtype="fp32")
404404
quant_config.set_local("lm_head", AutoRoundConfig(dtype="fp32"))
405405

406406
logger.info(f"Test AutoRound with config {quant_config}")

0 commit comments

Comments
 (0)