@@ -69,7 +69,6 @@ def run_fn(model, dataloader):
6969 else :
7070 model (data )
7171
72- @pytest .mark .skip (reason = "SW-217321 pytorch inductor error" )
7372@pytest .mark .skipif (is_habana_framework_installed (), reason = "These tests are not supported on HPU for now." )
7473@pytest .mark .skipif (not auto_round_installed , reason = "auto_round module is not installed" )
7574class TestAutoRoundCPU :
@@ -284,7 +283,6 @@ def test_mllm(self):
284283 # loaded_model = load("saved_results_tiny-random-GPTJForCausalLM", format="huggingface", trust_remote_code=True)
285284
286285
287- @pytest .mark .skip (reason = "SW-217321 pytorch inductor error" )
288286@pytest .mark .skipif (not is_habana_framework_installed (), reason = "Habana framework is not installed" )
289287@pytest .mark .skipif (os .getenv ("PT_HPU_LAZY_MODE" , "0" ) == "1" , reason = "Lazy mode is enabled" )
290288@pytest .mark .skipif (not auto_round_installed , reason = "auto_round module is not installed" )
@@ -366,7 +364,7 @@ def test_autoround_w4a8(self):
366364 @pytest .mark .parametrize ("quant_lm_head" , [True , False ])
367365 def test_autoround (self , quant_lm_head ):
368366 fp32_model = copy .deepcopy (self .tiny_llama_model )
369- quant_config = AutoRoundConfig (nsamples = 32 , seqlen = 10 , iters = 10 , scale_dtype = "fp32" )
367+ quant_config = AutoRoundConfig (nsamples = 32 , seqlen = 10 , iters = 10 , act_dtype = "fp32" , scale_dtype = "fp32" )
370368 if quant_lm_head is False :
371369 quant_config .set_local ("lm_head" , AutoRoundConfig (dtype = "fp32" ))
372370 logger .info (f"Test AutoRound with config { quant_config } " )
@@ -377,30 +375,32 @@ def test_autoround(self, quant_lm_head):
377375 run_fn (model , self .dataloader )
378376 q_model = convert (model )
379377 assert "model.layers.0.self_attn.k_proj" in q_model .autoround_config .keys ()
380- assert "scale " in q_model .autoround_config ["model.layers.0.self_attn.k_proj" ].keys ()
378+ assert "scale_dtype " in q_model .autoround_config ["model.layers.0.self_attn.k_proj" ].keys ()
381379 assert torch .float32 == q_model .autoround_config ["model.layers.0.self_attn.k_proj" ]["scale_dtype" ]
382380 assert isinstance (q_model .model .layers [0 ].self_attn .k_proj , WeightOnlyLinear ), "packing model failed."
383381 if quant_lm_head is True :
384382 assert isinstance (q_model .lm_head , WeightOnlyLinear ), "quantization for lm_head failed."
385383
386384 def test_int4_dtype (self ):
387385 fp32_model = copy .deepcopy (self .tiny_llama_model )
388- quant_config = AutoRoundConfig (dtype = "int4" , nsamples = 32 , seqlen = 10 , iters = 10 , scale_dtype = "fp32" )
386+ quant_config = AutoRoundConfig (
387+ dtype = "int4" , nsamples = 32 , seqlen = 10 , iters = 10 , act_dtype = "fp32" , scale_dtype = "fp32"
388+ )
389389 logger .info (f"Test AutoRound with config { quant_config } " )
390390
391391 # prepare + convert API
392392 model = prepare (model = fp32_model , quant_config = quant_config )
393393 run_fn (model , self .dataloader )
394394 q_model = convert (model )
395395 assert "model.layers.0.self_attn.k_proj" in q_model .autoround_config .keys ()
396- assert "scale " in q_model .autoround_config ["model.layers.0.self_attn.k_proj" ].keys ()
396+ assert "scale_dtype " in q_model .autoround_config ["model.layers.0.self_attn.k_proj" ].keys ()
397397 assert torch .float32 == q_model .autoround_config ["model.layers.0.self_attn.k_proj" ]["scale_dtype" ]
398398 assert isinstance (q_model .model .layers [0 ].self_attn .k_proj , WeightOnlyLinear ), "packing model failed."
399399
400400 def test_autoround_with_quantize_API (self ):
401401 model = copy .deepcopy (self .tiny_llama_model )
402402
403- quant_config = AutoRoundConfig (nsamples = 32 , seqlen = 10 , iters = 10 , scale_dtype = "fp32" )
403+ quant_config = AutoRoundConfig (nsamples = 32 , seqlen = 10 , iters = 10 , act_dtype = "fp32" , scale_dtype = "fp32" )
404404 quant_config .set_local ("lm_head" , AutoRoundConfig (dtype = "fp32" ))
405405
406406 logger .info (f"Test AutoRound with config { quant_config } " )
0 commit comments