Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 1cd144c

Browse files
committed
Missed intialize entry point with conditional unwrap
1 parent 174bbe8 commit 1cd144c

File tree

3 files changed

+15
-7
lines changed

3 files changed

+15
-7
lines changed

build/builder.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ def _initialize_model(
440440
quantize,
441441
tokenizer=None,
442442
max_seq_length=None,
443-
support_tensor_subclass:bool=True,
443+
support_tensor_subclass: bool = True,
444444
):
445445
print("Loading model...")
446446

@@ -511,7 +511,13 @@ def _initialize_model(
511511
if quantize:
512512
print(f"Quantizing the model with: {quantize}")
513513
with measure_time("Time to quantize model: {time:.02f} seconds"):
514-
quantize_model(model, builder_args.device, quantize, tokenizer, support_tensor_subclass)
514+
quantize_model(
515+
model,
516+
builder_args.device,
517+
quantize,
518+
tokenizer,
519+
support_tensor_subclass,
520+
)
515521
device_sync(device=builder_args.device)
516522

517523
if builder_args.setup_caches:

export.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ def main(args):
126126
quantize,
127127
tokenizer,
128128
max_seq_length=builder_args.max_seq_length,
129+
support_tensor_subclass=output_dso_path is None,
129130
)
130131
model_to_pte = model
131132
model_to_dso = model

quantization/quantize.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,19 @@
4343
Int8DynActInt4WeightQuantizer,
4444
quantize_,
4545
)
46+
from torchao.utils import unwrap_tensor_subclass
4647

4748

4849
#########################################################################
4950
### torchchat quantization API ###
5051

5152

5253
def quantize_model(
53-
model: nn.Module,
54-
device,
55-
quantize_options,
56-
tokenizer=None,
57-
support_tensor_subclass:bool=True
54+
model: nn.Module,
55+
device,
56+
quantize_options,
57+
tokenizer=None,
58+
support_tensor_subclass: bool = True,
5859
):
5960
"""
6061
Quantize the specified model using the quantizers described by

0 commit comments

Comments
 (0)