File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
test/3x/torch/quantization/fp8_quant Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -16,7 +16,7 @@ def test_two_step_layer_wise():
1616 # requires transformers >= 4.43.0, torch_dtype=config.torch_dtype
1717 # facebook/opt-350m parameters on disk is in torch.float16 dtype
1818 cpu_mem0 = get_used_cpu_mem_MB ()
19- model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = config .torch_dtype )
19+ model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = config .torch_dtype , use_safetensors = True )
2020 cpu_mem1 = get_used_cpu_mem_MB ()
2121 assert (cpu_mem1 - cpu_mem0 ) < 100 , "model with memory mapping should use no more than 100MiB."
2222
@@ -33,7 +33,7 @@ def test_two_step_layer_wise():
3333
3434 # fp16 llama2-7b is converted to bf16 during quantization layer-by-layer.
3535 cpu_mem0 = get_used_cpu_mem_MB ()
36- new_model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = config .torch_dtype )
36+ new_model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = config .torch_dtype , use_safetensors = True )
3737 cpu_mem2 = get_used_cpu_mem_MB ()
3838 model = convert (new_model , qconfig )
3939 assert (cpu_mem2 - cpu_mem0 ) < 100 , "model with memory mapping should use no more than 100MiB."
You can’t perform that action at this time.
0 commit comments