1717#include < ctime>
1818#include < fstream>
1919#include < iostream>
20+ #include < sstream>
2021#include < string>
2122#include < vector>
2223
3637#pragma warning(disable: 4244 4267) // possible loss of data
3738#endif
3839
39- static llama_context ** g_ctx;
40+ static llama_context ** g_ctx;
41+ static llama_model ** g_model;
42+ static gpt_params * g_params;
43+ static std::vector<llama_token> * g_input_tokens;
44+ static std::ostringstream * g_output_ss;
45+ static std::vector<llama_token> * g_output_tokens;
4046static bool is_interacting = false ;
4147
48+ void write_logfile (
49+ const llama_context * ctx, const gpt_params & params, const llama_model * model,
50+ const std::vector<llama_token> input_tokens, const std::string output, const std::vector<llama_token> output_tokens) {
51+
52+ if (params.logdir .empty ()) {
53+ return ;
54+ }
55+
56+ const std::string timestamp = get_sortable_timestamp ();
57+
58+ const bool success = create_directory_with_parents (params.logdir );
59+ if (!success) {
60+ fprintf (stderr, " %s: warning: failed to create logdir %s, cannot write logfile\n " ,
61+ __func__, params.logdir .c_str ());
62+ return ;
63+ }
64+
65+ const std::string logfile_path = params.logdir + timestamp + " .yml" ;
66+ FILE * logfile = fopen (logfile_path.c_str (), " w" );
67+
68+ if (logfile == NULL ) {
69+ fprintf (stderr, " %s: failed to open logfile %s\n " , __func__, logfile_path.c_str ());
70+ return ;
71+ }
72+
73+ fprintf (logfile, " binary: main\n " );
74+ char model_desc[128 ];
75+ llama_model_desc (model, model_desc, sizeof (model_desc));
76+ dump_non_result_info_yaml (logfile, params, ctx, timestamp, input_tokens, model_desc);
77+
78+ fprintf (logfile, " \n " );
79+ fprintf (logfile, " ######################\n " );
80+ fprintf (logfile, " # Generation Results #\n " );
81+ fprintf (logfile, " ######################\n " );
82+ fprintf (logfile, " \n " );
83+
84+ dump_string_yaml_multiline (logfile, " output" , output.c_str ());
85+ dump_vector_int_yaml (logfile, " output_tokens" , output_tokens);
86+
87+ llama_dump_timing_info_yaml (logfile, ctx);
88+ fclose (logfile);
89+ }
90+
4291#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
4392void sigint_handler (int signo) {
4493 if (signo == SIGINT) {
@@ -48,6 +97,7 @@ void sigint_handler(int signo) {
4897 console::cleanup ();
4998 printf (" \n " );
5099 llama_print_timings (*g_ctx);
100+ write_logfile (*g_ctx, *g_params, *g_model, *g_input_tokens, g_output_ss->str (), *g_output_tokens);
51101 _exit (130 );
52102 }
53103 }
@@ -56,6 +106,7 @@ void sigint_handler(int signo) {
56106
57107int main (int argc, char ** argv) {
58108 gpt_params params;
109+ g_params = ¶ms;
59110
60111 if (gpt_params_parse (argc, argv, params) == false ) {
61112 return 1 ;
@@ -116,6 +167,7 @@ int main(int argc, char ** argv) {
116167 llama_model * model;
117168 llama_context * ctx;
118169 llama_context * ctx_guidance = NULL ;
170+ g_model = &model;
119171 g_ctx = &ctx;
120172
121173 // load the model and apply lora adapter, if any
@@ -397,6 +449,10 @@ int main(int argc, char ** argv) {
397449 int n_session_consumed = 0 ;
398450 int n_past_guidance = 0 ;
399451
452+ std::vector<int > input_tokens; g_input_tokens = &input_tokens;
453+ std::vector<int > output_tokens; g_output_tokens = &output_tokens;
454+ std::ostringstream output_ss; g_output_ss = &output_ss;
455+
400456 // the first thing we will do is to output the prompt, so set color accordingly
401457 console::set_display (console::prompt);
402458
@@ -667,7 +723,15 @@ int main(int argc, char ** argv) {
667723 // display text
668724 if (input_echo) {
669725 for (auto id : embd) {
670- printf (" %s" , llama_token_to_piece (ctx, id).c_str ());
726+ const std::string token_str = llama_token_to_piece (ctx, id);
727+ printf (" %s" , token_str.c_str ());
728+
729+ if (embd.size () > 1 ) {
730+ input_tokens.push_back (id);
731+ } else {
732+ output_tokens.push_back (id);
733+ output_ss << token_str;
734+ }
671735 }
672736 fflush (stdout);
673737 }
@@ -761,6 +825,8 @@ int main(int argc, char ** argv) {
761825 printf (" %s" , params.input_suffix .c_str ());
762826 }
763827
828+ const size_t original_size = embd_inp.size ();
829+
764830 // instruct mode: insert instruction prefix
765831 if (params.instruct && !is_antiprompt) {
766832 n_consumed = embd_inp.size ();
@@ -775,6 +841,12 @@ int main(int argc, char ** argv) {
775841 embd_inp.insert (embd_inp.end (), inp_sfx.begin (), inp_sfx.end ());
776842 }
777843
844+ for (size_t i = original_size; i < embd_inp.size (); ++i) {
845+ const llama_token token = embd_inp[i];
846+ output_tokens.push_back (token);
847+ output_ss << llama_token_to_piece (ctx, token);
848+ }
849+
778850 n_remain -= line_inp.size ();
779851 }
780852
@@ -817,6 +889,8 @@ int main(int argc, char ** argv) {
817889 }
818890
819891 llama_print_timings (ctx);
892+ write_logfile (ctx, params, model, input_tokens, output_ss.str (), output_tokens);
893+
820894 if (ctx_guidance) { llama_free (ctx_guidance); }
821895 llama_free (ctx);
822896 llama_free_model (model);
0 commit comments