Skip to content

Commit 8bd6ac8

Browse files
authored
Fixing vllm runs for dynamic quantization (#210)
1 parent d94bfc1 commit 8bd6ac8

File tree

1 file changed

+1
-5
lines changed

1 file changed

+1
-5
lines changed

neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -348,11 +348,7 @@ def __init__(self, mod, parent, mod_extra_config, *args, **kwargs):
348348
kwargs["func_names"] = ("resolve_input", )
349349
super().__init__(mod, parent, mod_extra_config, *args, **kwargs)
350350
# TODO [SW-224403]: Enable dynamic quantization in row parallel allreduce
351-
allreduce_quantization_enable = (
352-
False
353-
if self.is_dynamic_quantization
354-
else get_hqt_config(mod).cfg["row_parallel_linear_allreduce_quantization"]
355-
)
351+
allreduce_quantization_enable = get_hqt_config(mod).cfg["row_parallel_linear_allreduce_quantization"]
356352
if self.quantization_mode in (QuantMode.MEASURE, QuantMode.SHAPE):
357353
self.forward = self.forward_measure_reduce if self.reduce_results and self.tp_size > 1 else self.forward_measure_no_reduce
358354

0 commit comments

Comments
 (0)