11export CHECKPOINT_PATH=../../../checkpoints # path to checkpoints folder
22
33# README BENCHMARKS
4- export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf
5- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemlite-4-None --write_result benchmark_results.txt
4+ # export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf
5+
6+ export MODEL_REPO=meta-llama/Meta-Llama-3-8B
67
7- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-8-4-64 --write_result benchmark_results.txt
8- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-32-4-64 --write_result benchmark_results.txt
9- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-8-4-None --write_result benchmark_results.txt
10- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-32-4-None --write_result benchmark_results.txt
11- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-8-8-64 --write_result benchmark_results.txt
12- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-32-8-64 --write_result benchmark_results.txt
138
149# python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-8-4-64 --write_result benchmark_results.txt
1510# python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-16-4-64 --write_result benchmark_results.txt
@@ -105,7 +100,7 @@ export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf
105100# python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int8wo --write_result benchmark_results.txt
106101# python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemlite-8-None --write_result benchmark_results.txt
107102
108- # export MODEL_REPO=meta-llama/Meta-Llama-3-8B
103+ export MODEL_REPO=meta-llama/Meta-Llama-3-8B
109104# # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt
110105# # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --precision float16
111106# # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int4wo-64 --write_result benchmark_results.txt
@@ -148,16 +143,16 @@ export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf
148143# python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt --batch_size 32
149144# python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt --batch_size 128
150145
151- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --write_result benchmark_results.txt
152- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --write_result benchmark_results.txt --precision float16
153- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --quantization int4wo-64 --write_result benchmark_results.txt
154- python generate.py --compile -- checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub -8-4-64 --write_result benchmark_results.txt
155- python generate.py --compile -- checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub -32-4-64 --write_result benchmark_results.txt
156- python generate.py --compile -- checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub -8-4-None --write_result benchmark_results.txt
157- python generate.py --compile -- checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub -32-4-None --write_result benchmark_results.txt
158- python generate.py --compile -- checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub -8-8-None --write_result benchmark_results.txt
159- python generate.py --compile -- checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub -32-8-None --write_result benchmark_results.txt
160- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --quantization int8wo --write_result benchmark_results.txt
146+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt
147+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --precision float16
148+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int4wo-64 --write_result benchmark_results.txt
149+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemlite -8-4-64 --write_result benchmark_results.txt --num_samples 1
150+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemlite -32-4-64 --write_result benchmark_results.txt --num_samples 1
151+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemlite -8-4-None --write_result benchmark_results.txt --num_samples 1 # not working
152+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemlite -32-4-None --write_result benchmark_results.txt --num_samples 1
153+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemlite -8-8-None --write_result benchmark_results.txt --num_samples 1
154+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemlite -32-8-None --write_result benchmark_results.txt --num_samples 1
155+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int8wo --write_result benchmark_results.txt
161156# python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemlite-8-None --write_result benchmark_results.txt
162157
163158# # 2:4 sparse model
@@ -169,24 +164,24 @@ python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/mode
169164# python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt --batch_size 32
170165# python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt --batch_size 128
171166
172- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --write_result benchmark_results.txt --batch_size 8
173- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --write_result benchmark_results.txt --precision float16 --batch_size 8
174- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --quantization int4wo-64 --write_result benchmark_results.txt --batch_size 8
175- python generate.py --compile -- checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub -8-4-64 --write_result benchmark_results.txt --batch_size 8
176- python generate.py --compile -- checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub -32-4-64 --write_result benchmark_results.txt --batch_size 8
177- python generate.py --compile -- checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub -8-4-None --write_result benchmark_results.txt --batch_size 8
178- python generate.py --compile -- checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub -32-4-None --write_result benchmark_results.txt --batch_size 8
179- python generate.py --compile -- checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub -8-8-None --write_result benchmark_results.txt --batch_size 8
180- python generate.py --compile -- checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub -32-8-None --write_result benchmark_results.txt --batch_size 8
181- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --quantization int8wo --write_result benchmark_results.txt --batch_size 8
182-
183- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --write_result benchmark_results.txt --batch_size 32
184- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --write_result benchmark_results.txt --precision float16 --batch_size 32
185- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --quantization int4wo-64 --write_result benchmark_results.txt --batch_size 32
186- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub-8-4-64 --write_result benchmark_results.txt --batch_size 32
187- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub-32-4-64 --write_result benchmark_results.txt --batch_size 32
188- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub-8-4-None --write_result benchmark_results.txt --batch_size 32
189- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub-32-4-None --write_result benchmark_results.txt --batch_size 32
190- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub-8-8-None --write_result benchmark_results.txt --batch_size 32
191- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision float16 --quantization gemsub-32-8-None --write_result benchmark_results.txt --batch_size 32
192- python generate.py --compile --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --quantization int8wo --write_result benchmark_results.txt --batch_size 32
167+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --batch_size 8
168+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --precision float16 --batch_size 8
169+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int4wo-64 --write_result benchmark_results.txt --batch_size 8
170+ # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemlite -8-4-64 --write_result benchmark_results.txt --batch_size 8 --num_samples 1
171+ # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemlite -32-4-64 --write_result benchmark_results.txt --batch_size 8 --num_samples 1
172+ # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemlite -8-4-None --write_result benchmark_results.txt --batch_size 8 --num_samples 1
173+ # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemlite -32-4-None --write_result benchmark_results.txt --batch_size 8 --num_samples 1
174+ # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemlite -8-8-None --write_result benchmark_results.txt --batch_size 8 --num_samples 1
175+ # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemlite -32-8-None --write_result benchmark_results.txt --batch_size 8 --num_samples 1
176+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int8wo --write_result benchmark_results.txt --batch_size 8
177+
178+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --batch_size 32
179+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --precision float16 --batch_size 32
180+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int4wo-64 --write_result benchmark_results.txt --batch_size 32
181+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-8-4-64 --write_result benchmark_results.txt --batch_size 32
182+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-32-4-64 --write_result benchmark_results.txt --batch_size 32
183+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-8-4-None --write_result benchmark_results.txt --batch_size 32
184+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-32-4-None --write_result benchmark_results.txt --batch_size 32
185+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-8-8-None --write_result benchmark_results.txt --batch_size 32
186+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision float16 --quantization gemsub-32-8-None --write_result benchmark_results.txt --batch_size 32
187+ # python generate.py --compile --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int8wo --write_result benchmark_results.txt --batch_size 32
0 commit comments