Missed intialize entry point with conditional unwrap

Jack-Khuu · Jack-Khuu · commit 1cd144cb5728 · 2024-08-16T02:29:50.000-07:00
diff --git a/build/builder.py b/build/builder.py
@@ -440,7 +440,7 @@ def _initialize_model(
     quantize,
     tokenizer=None,
     max_seq_length=None,
-    support_tensor_subclass:bool=True,
+    support_tensor_subclass: bool = True,
 ):
     print("Loading model...")
 
@@ -511,7 +511,13 @@ def _initialize_model(
         if quantize:
             print(f"Quantizing the model with: {quantize}")
             with measure_time("Time to quantize model: {time:.02f} seconds"):
-                quantize_model(model, builder_args.device, quantize, tokenizer, support_tensor_subclass)
+                quantize_model(
+                    model,
+                    builder_args.device,
+                    quantize,
+                    tokenizer,
+                    support_tensor_subclass,
+                )
                 device_sync(device=builder_args.device)
 
         if builder_args.setup_caches:
diff --git a/export.py b/export.py
@@ -126,6 +126,7 @@ def main(args):
             quantize,
             tokenizer,
             max_seq_length=builder_args.max_seq_length,
+            support_tensor_subclass=output_dso_path is None,
         )
         model_to_pte = model
         model_to_dso = model
diff --git a/quantization/quantize.py b/quantization/quantize.py
@@ -43,18 +43,19 @@
     Int8DynActInt4WeightQuantizer,
     quantize_,
 )
+from torchao.utils import unwrap_tensor_subclass
 
 
 #########################################################################
 ###                  torchchat quantization API                       ###
 
 
 def quantize_model(
-    model: nn.Module, 
-    device, 
-    quantize_options, 
-    tokenizer=None, 
-    support_tensor_subclass:bool=True
+    model: nn.Module,
+    device,
+    quantize_options,
+    tokenizer=None,
+    support_tensor_subclass: bool = True,
 ):
     """
     Quantize the specified model using the quantizers described by

Original file line number	Diff line number	Diff line change
`@@ -126,6 +126,7 @@ def main(args):`
`126`	`126`	`quantize,`
`127`	`127`	`tokenizer,`
`128`	`128`	`max_seq_length=builder_args.max_seq_length,`
	`129`	`+ support_tensor_subclass=output_dso_path is None,`
`129`	`130`	`)`
`130`	`131`	`model_to_pte = model`
`131`	`132`	`model_to_dso = model`