@@ -177,9 +177,11 @@ struct server_slot {
177177 size_t n_sent_text = 0 ; // number of sent text character
178178
179179 int64_t t_start_process_prompt;
180+ int64_t t_start_reasoning;
180181 int64_t t_start_generation;
181182
182183 double t_prompt_processing; // ms
184+ double t_reasoning_token_generation; // ms
183185 double t_token_generation; // ms
184186
185187 std::function<void (int )> callback_on_release;
@@ -394,15 +396,20 @@ struct server_slot {
394396 const double t_prompt = t_prompt_processing / n_prompt_tokens_processed;
395397 const double n_prompt_second = 1e3 / t_prompt_processing * n_prompt_tokens_processed;
396398
399+ const double t_reasoning = t_reasoning_token_generation / n_reasoning_tokens;
400+ const double n_reasoning_second = 1e3 / t_reasoning_token_generation * n_reasoning_tokens;
401+
397402 const double t_gen = t_token_generation / n_decoded;
398403 const double n_gen_second = 1e3 / t_token_generation * n_decoded;
399404
400405 SLT_INF (*this ,
401406 " \n "
402407 " prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n "
408+ " reasoning time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n "
403409 " eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n "
404410 " total time = %10.2f ms / %5d tokens\n " ,
405411 t_prompt_processing, n_prompt_tokens_processed, t_prompt, n_prompt_second,
412+ t_reasoning_token_generation, n_reasoning_tokens, t_reasoning, n_reasoning_second,
406413 t_token_generation, n_decoded, t_gen, n_gen_second,
407414 t_prompt_processing + t_token_generation, n_prompt_tokens_processed + n_decoded);
408415
@@ -1200,11 +1207,12 @@ struct server_context_impl {
12001207 slot.reasoning = REASONING_STATE_REASONING;
12011208 slot.reasoning_end_tag = rstatus.end_tag ;
12021209 slot.n_reasoning_tokens = 0 ;
1210+ slot.t_start_reasoning = ggml_time_us ();
12031211 }
12041212 } else if (!rstatus.active && slot.reasoning == REASONING_STATE_REASONING) {
12051213 SLT_DBG (slot, " detected reasoning end '%s' via parser\n " , rstatus.end_tag .c_str ());
12061214 slot.reasoning = REASONING_STATE_FINISHED;
1207- slot.n_reasoning_tokens = 0 ;
1215+ slot.t_reasoning_token_generation = ( ggml_time_us () - slot. t_start_reasoning ) / 1e3 ;
12081216 }
12091217
12101218 if (slot.reasoning == REASONING_STATE_REASONING) {
@@ -1254,7 +1262,7 @@ struct server_context_impl {
12541262 if (slot.forced_tokens .empty ()) {
12551263 SLT_DBG (slot, " completed forced reasoning close with '%s'\n " , slot.reasoning_end_tag .c_str ());
12561264 slot.reasoning = REASONING_STATE_FINISHED;
1257- slot.n_reasoning_tokens = 0 ;
1265+ slot.t_reasoning_token_generation = ( ggml_time_us () - slot. t_start_reasoning ) / 1e3 ;
12581266 }
12591267 }
12601268 }
0 commit comments