@@ -140,10 +140,11 @@ static std::string get_gpu_info() {
140140}
141141
142142// command line params
143- enum output_formats {CSV, JSON, MARKDOWN, SQL};
143+ enum output_formats {NONE, CSV, JSON, MARKDOWN, SQL};
144144
145145static const char * output_format_str (output_formats format) {
146146 switch (format) {
147+ case NONE: return " none" ;
147148 case CSV: return " csv" ;
148149 case JSON: return " json" ;
149150 case MARKDOWN: return " md" ;
@@ -152,6 +153,23 @@ static const char * output_format_str(output_formats format) {
152153 }
153154}
154155
156+ static bool output_format_from_str (const std::string & s, output_formats & format) {
157+ if (s == " none" ) {
158+ format = NONE;
159+ } else if (s == " csv" ) {
160+ format = CSV;
161+ } else if (s == " json" ) {
162+ format = JSON;
163+ } else if (s == " md" ) {
164+ format = MARKDOWN;
165+ } else if (s == " sql" ) {
166+ format = SQL;
167+ } else {
168+ return false ;
169+ }
170+ return true ;
171+ }
172+
155173static const char * split_mode_str (llama_split_mode mode) {
156174 switch (mode) {
157175 case LLAMA_SPLIT_MODE_NONE: return " none" ;
@@ -190,31 +208,33 @@ struct cmd_params {
190208 int reps;
191209 bool verbose;
192210 output_formats output_format;
211+ output_formats output_format_stderr;
193212};
194213
195214static const cmd_params cmd_params_defaults = {
196- /* model */ {" models/7B/ggml-model-q4_0.gguf" },
197- /* n_prompt */ {512 },
198- /* n_gen */ {128 },
199- /* n_pg */ {},
200- /* n_batch */ {2048 },
201- /* n_ubatch */ {512 },
202- /* type_k */ {GGML_TYPE_F16},
203- /* type_v */ {GGML_TYPE_F16},
204- /* n_threads */ {cpu_get_num_math ()},
205- /* n_gpu_layers */ {99 },
206- /* rpc_servers */ {" " },
207- /* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
208- /* main_gpu */ {0 },
209- /* no_kv_offload */ {false },
210- /* flash_attn */ {false },
211- /* tensor_split */ {std::vector<float >(llama_max_devices (), 0 .0f )},
212- /* use_mmap */ {true },
213- /* embeddings */ {false },
214- /* numa */ GGML_NUMA_STRATEGY_DISABLED,
215- /* reps */ 5 ,
216- /* verbose */ false ,
217- /* output_format */ MARKDOWN
215+ /* model */ {" models/7B/ggml-model-q4_0.gguf" },
216+ /* n_prompt */ {512 },
217+ /* n_gen */ {128 },
218+ /* n_pg */ {},
219+ /* n_batch */ {2048 },
220+ /* n_ubatch */ {512 },
221+ /* type_k */ {GGML_TYPE_F16},
222+ /* type_v */ {GGML_TYPE_F16},
223+ /* n_threads */ {cpu_get_num_math ()},
224+ /* n_gpu_layers */ {99 },
225+ /* rpc_servers */ {" " },
226+ /* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
227+ /* main_gpu */ {0 },
228+ /* no_kv_offload */ {false },
229+ /* flash_attn */ {false },
230+ /* tensor_split */ {std::vector<float >(llama_max_devices (), 0 .0f )},
231+ /* use_mmap */ {true },
232+ /* embeddings */ {false },
233+ /* numa */ GGML_NUMA_STRATEGY_DISABLED,
234+ /* reps */ 5 ,
235+ /* verbose */ false ,
236+ /* output_format */ MARKDOWN,
237+ /* output_format_stderr */ NONE,
218238};
219239
220240static void print_usage (int /* argc */ , char ** argv) {
@@ -243,6 +263,7 @@ static void print_usage(int /* argc */, char ** argv) {
243263 printf (" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n " );
244264 printf (" -r, --repetitions <n> (default: %d)\n " , cmd_params_defaults.reps );
245265 printf (" -o, --output <csv|json|md|sql> (default: %s)\n " , output_format_str (cmd_params_defaults.output_format ));
266+ printf (" -oe, --output-err <csv|json|md|sql> (default: %s)\n " , output_format_str (cmd_params_defaults.output_format_stderr ));
246267 printf (" -v, --verbose (default: %s)\n " , cmd_params_defaults.verbose ? " 1" : " 0" );
247268 printf (" \n " );
248269 printf (" Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n " );
@@ -284,6 +305,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
284305
285306 params.verbose = cmd_params_defaults.verbose ;
286307 params.output_format = cmd_params_defaults.output_format ;
308+ params.output_format_stderr = cmd_params_defaults.output_format_stderr ;
287309 params.reps = cmd_params_defaults.reps ;
288310
289311 for (int i = 1 ; i < argc; i++) {
@@ -493,18 +515,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
493515 invalid_param = true ;
494516 break ;
495517 }
496- if (argv[i] == std::string (" csv" )) {
497- params.output_format = CSV;
498- } else if (argv[i] == std::string (" json" )) {
499- params.output_format = JSON;
500- } else if (argv[i] == std::string (" md" )) {
501- params.output_format = MARKDOWN;
502- } else if (argv[i] == std::string (" sql" )) {
503- params.output_format = SQL;
504- } else {
518+ invalid_param = !output_format_from_str (argv[i], params.output_format );
519+ } else if (arg == " -oe" || arg == " --output-err" ) {
520+ if (++i >= argc) {
505521 invalid_param = true ;
506522 break ;
507523 }
524+ invalid_param = !output_format_from_str (argv[i], params.output_format_stderr );
508525 } else if (arg == " -v" || arg == " --verbose" ) {
509526 params.verbose = true ;
510527 } else {
@@ -1278,6 +1295,22 @@ static void llama_null_log_callback(enum ggml_log_level level, const char * text
12781295 (void ) user_data;
12791296}
12801297
1298+ static std::unique_ptr<printer> create_printer (output_formats format) {
1299+ switch (format) {
1300+ case NONE:
1301+ return nullptr ;
1302+ case CSV:
1303+ return std::unique_ptr<printer>(new csv_printer ());
1304+ case JSON:
1305+ return std::unique_ptr<printer>(new json_printer ());
1306+ case MARKDOWN:
1307+ return std::unique_ptr<printer>(new markdown_printer ());
1308+ case SQL:
1309+ return std::unique_ptr<printer>(new sql_printer ());
1310+ }
1311+ GGML_ASSERT (false );
1312+ }
1313+
12811314int main (int argc, char ** argv) {
12821315 // try to set locale for unicode characters in markdown
12831316 setlocale (LC_CTYPE, " .UTF-8" );
@@ -1304,26 +1337,18 @@ int main(int argc, char ** argv) {
13041337 llama_numa_init (params.numa );
13051338
13061339 // initialize printer
1307- std::unique_ptr<printer> p;
1308- switch (params.output_format ) {
1309- case CSV:
1310- p.reset (new csv_printer ());
1311- break ;
1312- case JSON:
1313- p.reset (new json_printer ());
1314- break ;
1315- case MARKDOWN:
1316- p.reset (new markdown_printer ());
1317- break ;
1318- case SQL:
1319- p.reset (new sql_printer ());
1320- break ;
1321- default :
1322- assert (false );
1323- exit (1 );
1340+ std::unique_ptr<printer> p = create_printer (params.output_format );
1341+ std::unique_ptr<printer> p_err = create_printer (params.output_format_stderr );
1342+
1343+ if (p) {
1344+ p->fout = stdout;
1345+ p->print_header (params);
1346+ }
1347+
1348+ if (p_err) {
1349+ p_err->fout = stderr;
1350+ p_err->print_header (params);
13241351 }
1325- p->fout = stdout;
1326- p->print_header (params);
13271352
13281353 std::vector<cmd_params_instance> params_instances = get_cmd_params_instances (params);
13291354
@@ -1381,7 +1406,15 @@ int main(int argc, char ** argv) {
13811406 t.samples_ns .push_back (t_ns);
13821407 }
13831408
1384- p->print_test (t);
1409+ if (p) {
1410+ p->print_test (t);
1411+ fflush (p->fout );
1412+ }
1413+
1414+ if (p_err) {
1415+ p_err->print_test (t);
1416+ fflush (p_err->fout );
1417+ }
13851418
13861419 llama_print_timings (ctx);
13871420
@@ -1390,7 +1423,13 @@ int main(int argc, char ** argv) {
13901423
13911424 llama_free_model (lmodel);
13921425
1393- p->print_footer ();
1426+ if (p) {
1427+ p->print_footer ();
1428+ }
1429+
1430+ if (p_err) {
1431+ p_err->print_footer ();
1432+ }
13941433
13951434 llama_backend_free ();
13961435
0 commit comments