You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository was archived by the owner on Sep 10, 2025. It is now read-only.
f"Specification for quantizer {quantizer} has extraneous key {key}. Ignoring."
95
-
)
87
+
print(f"Specification for quantizer {quantizer} has extraneous key {key}. Ignoring.")
96
88
delq_kwargs[key]
97
89
returnq_kwargs
98
90
@@ -232,7 +224,6 @@ def quantize_model(
232
224
ifquantizer=="embedding:wx":
233
225
# These quantizers require float32 input weights. Note that after quantization,
234
226
# the weights will no longer be float32, but lowbit integers
235
-
236
227
ifget_precision() !=torch.float32:
237
228
print(
238
229
f"Quantizer {quantizer} requires float32 inputs, but received {get_precision()}. Changing dtype to float32. Note that after quantization, the weights will be lowbit integers, not float32."
@@ -261,15 +252,13 @@ def quantize_model(
261
252
)
262
253
# We set global precision from quantize options if it is specified at cli.py:485
263
254
# so the precision returned by get_precision() is always the authoritative precision/dtype in torchchat
0 commit comments