File tree Expand file tree Collapse file tree 4 files changed +15
-8
lines changed Expand file tree Collapse file tree 4 files changed +15
-8
lines changed Original file line number Diff line number Diff line change @@ -204,7 +204,9 @@ def check_weights_exist(weight_dir):
204204 f"No weight files found in { weight_dir } ! Weight files should be either .bin or .safetensors file types."
205205 )
206206 safetensors_l = [f for f in os .listdir (weight_dir ) if f .endswith (".safetensors" )]
207- bin_l = [f for f in os .listdir (weight_dir ) if f .endswith (".bin" ) and "embedding" not in f ]
207+ bin_l = [
208+ f for f in os .listdir (weight_dir ) if f .endswith (".bin" ) and "embedding" not in f
209+ ]
208210 if len (safetensors_l ) & len (bin_l ):
209211 raise RuntimeError (
210212 "Weights should only be in either .bin or .safetensors format, not both."
Original file line number Diff line number Diff line change @@ -462,7 +462,6 @@ def main():
462462 "eos_token_id_tensor" : torch .tensor (tokenizer .eos_token_id ),
463463 "response_cap" : args .response_cap ,
464464 },
465- keep_in_memory = True
466465 )
467466
468467 for chunk_idx , chunk in enumerate (models ):
Original file line number Diff line number Diff line change @@ -751,6 +751,7 @@ def get_example_inputs(
751751 for _ in range (2 * self .num_blocks )
752752 ],
753753 )
754+ # Specify dims that would be dynamic during calibration
754755 # Note: Assume cache size fixed shape as torch dynamic shape cannot handle dim 3 being
755756 # combination of 2 dynamic dims
756757 if get_dym_shape :
Original file line number Diff line number Diff line change 1- model=${1:- ' llama3' }
1+ model=${1:- ' llama3.2-3b ' }
22chunks=${2:- 4}
33tok=${3:- 128}
44cache=${4:- 512}
55cal=${5:- None}
66pres=${6:- A16W4}
77
8- if [ $model = " llama3" ]
8+ if [ $model = " llama3.2-3b" ]
9+ then
10+ config_path=Llama-3.2-3B-Instruct/config.json
11+ pref=" --preformatter aot_utils/llm_utils/preformatter_templates/llama3.json"
12+ elif [ $model = " llama3.2-1b" ]
13+ then
14+ config_path=Llama-3.2-1B-Instruct/config.json
15+ pref=" --preformatter aot_utils/llm_utils/preformatter_templates/llama3.json"
16+ elif [ $model = " llama3" ]
917then
1018 config_path=llama3-8B-instruct/config.json
1119 pref=" --preformatter aot_utils/llm_utils/preformatter_templates/llama3.json"
1220elif [ $model = " llama2" ]
1321then
1422 config_path=llama2-7B-chat/config.json
1523 pref=" --preformatter aot_utils/llm_utils/preformatter_templates/llama2_short.json"
16- else
17- # will remove once stable
18- config_path=llama_1b_50k/config.json
19- pref=" "
2024fi
25+
2126if [ $cal = " None" ]
2227then
2328 data=" "
You can’t perform that action at this time.
0 commit comments