From a69054cb2b787dfef55340409e0211c44f7f523c Mon Sep 17 00:00:00 2001 From: Dhanji Prasanna Date: Mon, 8 Sep 2025 14:04:07 +1000 Subject: [PATCH] Time to First Token --- crates/g3-core/src/lib.rs | 55 +++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 647cea3..1a77556 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -234,7 +234,11 @@ impl Agent { tools, and can interact with multiple at once. When you want to perform an action, use 'I' as the pronoun. # Available Tools -- shell: + +- name: shell + type: function + usage: shell [command] + description: \" Execute a command in the shell. This will return the output and error concatenated into a single string, as @@ -249,23 +253,14 @@ impl Agent { Multiple commands: Use ; or && to chain commands, avoid newlines Pathnames: Use absolute paths and avoid cd unless explicitly requested + \" - Usage: - - Call the `shell` tool with the desired bash/shell commands. - -- search: - Search the web for information about any topic. - -- final_output: +- name: final_output + type: function + usage: final_output [summary] + description: \" This tool signals the final output for a user in a conversation and MUST be used for the final message to the user. You must - pass in a detailed summary of the work done to this tool call. - - Purpose: - - Collects the final output for a user - - Provides clear validation feedback when output isn't valid - - Usage: - - Call the `final_output` tool with a summary of the work performed. + pass in a detailed summary of the work done so far.\" # Response Guidelines - Use Markdown formatting for all responses. @@ -278,12 +273,10 @@ impl Agent { IMPORTANT INSTRUCTIONS: -Please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. -Only terminate your turn when you are sure that the problem is solved. - -If you are not sure about file content or codebase structure, or other information pertaining to the user's request, -use your tools to read files and gather the relevant information: do NOT guess or make up an answer. It is important -you use tools that can assist with providing the right context. +Break down your task into smaller steps and do one step and tool call at a time. +Do not try to use multiple tools at once. +**After you get the tool result back, consider the result and then proceed to do +the next step and tool call if required.** "); if show_prompt { @@ -319,7 +312,7 @@ you use tools that can assist with providing the right context. // Time the LLM call with cancellation support and streaming let llm_start = Instant::now(); - let response_content = tokio::select! { + let (response_content, think_time) = tokio::select! { result = self.stream_completion(request) => result?, _ = cancellation_token.cancelled() => { return Err(anyhow::anyhow!("Operation cancelled by user")); @@ -357,8 +350,9 @@ you use tools that can assist with providing the right context. if show_timing { let timing_summary = format!( - "\n💭 {} | ⚡️ {}", + "\n⏱️ {} | 💭 {} | ⚡️ {}", Self::format_duration(llm_duration), + Self::format_duration(think_time), Self::format_duration(exec_duration) ); Ok(format!("{}\n{}", result, timing_summary)) @@ -371,13 +365,16 @@ you use tools that can assist with providing the right context. &self.context_window } - async fn stream_completion(&self, request: CompletionRequest) -> Result { + async fn stream_completion(&self, request: CompletionRequest) -> Result<(String, Duration)> { use tokio_stream::StreamExt; let provider = self.providers.get(None)?; let mut stream = provider.stream(request).await?; let mut full_content = String::new(); + let mut first_token_time: Option = None; + let stream_start = Instant::now(); + print!("🤖 "); // Start the response indicator use std::io::{self, Write}; io::stdout().flush()?; @@ -385,6 +382,11 @@ you use tools that can assist with providing the right context. while let Some(chunk_result) = stream.next().await { match chunk_result { Ok(chunk) => { + // Record time to first token + if first_token_time.is_none() && !chunk.content.is_empty() { + first_token_time = Some(stream_start.elapsed()); + } + print!("{}", chunk.content); io::stdout().flush()?; full_content.push_str(&chunk.content); @@ -401,7 +403,8 @@ you use tools that can assist with providing the right context. } println!(); // New line after streaming completes - Ok(full_content) + let ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed()); + Ok((full_content, ttft)) } fn format_duration(duration: Duration) -> String {