@@ -169,7 +169,7 @@ def __init__(self, model: nn.Module, pooling=None, **kwargs):
169
169
self .model , _ = PoolingTransform .apply (self .model , pooling )
170
170
171
171
self .model .base_model .config .use_cache = True
172
- self .hash_params ["qeff_class " ] = self .__class__ .__name__
172
+ self .hash_params ["qeff_auto_class " ] = self .__class__ .__name__
173
173
174
174
@classmethod
175
175
@with_replaced_quantizers
@@ -430,7 +430,7 @@ class QEffVisionEncoderForTextImageToTextModel(QEFFBaseModel):
430
430
def __init__ (self , model : nn .modules , ** kwargs ):
431
431
super ().__init__ (model , ** kwargs )
432
432
self .model = model .get_qeff_vision_encoder ()
433
- self .hash_params ["qeff_class " ] = self .__class__ .__name__
433
+ self .hash_params ["qeff_auto_class " ] = self .__class__ .__name__
434
434
435
435
def export (self , inputs , output_names , dynamic_axes , export_dir = None ):
436
436
return self ._export (inputs , output_names , dynamic_axes , export_dir )
@@ -485,7 +485,7 @@ class QEffCausalLMForTextImageToTextModel(QEFFBaseModel):
485
485
def __init__ (self , model , ** kwargs ):
486
486
super ().__init__ (model , ** kwargs )
487
487
self .model = model .get_qeff_language_decoder ()
488
- self .hash_params ["qeff_class " ] = self .__class__ .__name__
488
+ self .hash_params ["qeff_auto_class " ] = self .__class__ .__name__
489
489
490
490
def export (self , inputs , output_names , dynamic_axes , export_dir = None ):
491
491
return self ._export (inputs , output_names , dynamic_axes , export_dir )
@@ -773,7 +773,7 @@ def kv_offload_generate(
773
773
inputs ["input_ids" ],
774
774
(0 , padded_len - input_ids_length ),
775
775
"constant" ,
776
- 1 ,
776
+ pad_token_id ,
777
777
)
778
778
inputs ["attention_mask" ] = torch .nn .functional .pad (
779
779
inputs ["attention_mask" ], (0 , padded_len - input_ids_length ), "constant" , 0
@@ -911,7 +911,7 @@ def __init__(
911
911
self .model .config .vision_config .use_flash_attn = "false"
912
912
else :
913
913
self .model .config .text_config .use_cache = True
914
- self .hash_params ["qeff_class " ] = self .__class__ .__name__
914
+ self .hash_params ["qeff_auto_class " ] = self .__class__ .__name__
915
915
916
916
@classmethod
917
917
def from_pretrained (
@@ -1091,7 +1091,7 @@ def cloud_ai_100_generate(
1091
1091
inputs ["input_ids" ],
1092
1092
(0 , padded_len - input_ids_length ),
1093
1093
"constant" ,
1094
- 1 ,
1094
+ pad_token_id ,
1095
1095
)
1096
1096
inputs ["attention_mask" ] = torch .nn .functional .pad (
1097
1097
inputs ["attention_mask" ], (0 , padded_len - input_ids_length ), "constant" , 0
@@ -1360,7 +1360,7 @@ def __init__(
1360
1360
self .pretrained_model_name_or_path = kwargs .get ("pretrained_model_name_or_path" , None )
1361
1361
self .model , transformed = SpDTransform .apply (self .model , qaic_config , ** kwargs )
1362
1362
self .is_tlm = transformed
1363
- self .hash_params ["qeff_class " ] = self .__class__ .__name__
1363
+ self .hash_params ["qeff_auto_class " ] = self .__class__ .__name__
1364
1364
# ---Sampling---
1365
1365
# Note: SamplerTransform should be applied after all other transforms
1366
1366
# are done. The role of the sampler is to just add nodes at the output of the
@@ -1901,7 +1901,7 @@ def __init__(self, model: nn.Module, **kwargs):
1901
1901
super ().__init__ (model , ** kwargs )
1902
1902
self .model .config .use_cache = True
1903
1903
self .num_layers = model .config .num_hidden_layers
1904
- self .hash_params ["qeff_class " ] = self .__class__ .__name__
1904
+ self .hash_params ["qeff_auto_class " ] = self .__class__ .__name__
1905
1905
1906
1906
@property
1907
1907
def get_model_config (self ) -> dict :
0 commit comments