File tree Expand file tree Collapse file tree 1 file changed +7
-3
lines changed
neural_compressor/transformers/quantization Expand file tree Collapse file tree 1 file changed +7
-3
lines changed Original file line number Diff line number Diff line change @@ -660,9 +660,13 @@ def convert_to_GPTQ_checkpoints(model, quantization_config):
660660 new_module .n_pack = 32 // bits
661661 scales = module ._op_context .get_scales ().t ().contiguous ()
662662 bias = module ._op_context .get_bias ()
663- qzeros = new_module .pack_tensor_with_numpy (
664- module ._op_context .get_zero_points ().t ().to (torch .uint8 ) - 1
665- ).contiguous ()
663+ qzeros = module ._op_context .get_zero_points ().t ().to (torch .uint8 )
664+ # For group_size = -1, the dimensions of scale and qzeros will be 1
665+ if len (scales .shape ) == 1 :
666+ scales = scales .unsqueeze (0 )
667+ if len (qzeros .shape ) == 1 :
668+ qzeros = qzeros .unsqueeze (0 )
669+ qzeros = new_module .pack_tensor_with_numpy (qzeros - 1 ).contiguous ()
666670 g_idx = module ._op_context .get_g_idx ()
667671
668672 new_module .qweight = qweight
You can’t perform that action at this time.
0 commit comments