Adapt to _convert_weight_to_int4pack new behavior

manuelcandales · manuelcandales · commit 4595971e2d31 · 2024-07-25T14:21:20.000-04:00
diff --git a/torchao/quantization/GPTQ.py b/torchao/quantization/GPTQ.py
@@ -720,6 +720,8 @@ def _create_quantized_state_dict(
                     self.precision, # dtype for scales_and_zeros
                 )
                 # TODO: just get the device from mod.weight.device?
+                w_cpu = w_int4x8.cpu()
+                w_int4x8 = (w_cpu[::, ::2] << 4 | w_cpu[::, 1::2]).to(torch.uint8)
                 weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(w_int4x8.to(self.device), self.inner_k_tiles)
                 cur_state_dict[f"{fqn}.weight"] = weight_int4pack.to(self.device)
                 cur_state_dict[f"{fqn}.scales_and_zeros"] = scales_and_zeros.to(self.device)

Original file line number	Diff line number	Diff line change
`@@ -720,6 +720,8 @@ def _create_quantized_state_dict(`
`720`	`720`	`self.precision, # dtype for scales_and_zeros`
`721`	`721`	`)`
`722`	`722`	`# TODO: just get the device from mod.weight.device?`
	`723`	`+ w_cpu = w_int4x8.cpu()`
	`724`	`+ w_int4x8 = (w_cpu[::, ::2] << 4 \| w_cpu[::, 1::2]).to(torch.uint8)`
`723`	`725`	`weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(w_int4x8.to(self.device), self.inner_k_tiles)`
`724`	`726`	`cur_state_dict[f"{fqn}.weight"] = weight_int4pack.to(self.device)`
`725`	`727`	`cur_state_dict[f"{fqn}.scales_and_zeros"] = scales_and_zeros.to(self.device)`