From 743d6224687678f453c3dc1de79a0363d5f919d1 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Mon, 22 Dec 2025 17:22:54 +1100 Subject: [PATCH] Add token usage and context % to timing footer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added a quality-of-life feature that displays: - Tokens used in the current turn (from LLM response, not estimated) - Current context window usage percentage These are displayed dimmed after the timing info: ⏱️ 1.2s | 💭 0.3s 1234tk | 45% ctx The token count comes directly from the LLM's usage response data, not from any estimation. If no usage data is available from the LLM, only the context percentage is shown. --- crates/g3-core/src/lib.rs | 56 ++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 612c094..9df5cc9 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -205,6 +205,7 @@ impl g3_computer_control::WebDriverController for WebDriverSession { WebDriverSession::Chrome(driver) => driver.quit().await, } } + } // Additional methods for WebDriverSession that aren't part of the WebDriverController trait @@ -3925,6 +3926,7 @@ impl Agent { const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 5; // Limit auto-summary retries (increased from 2 for better recovery) let mut final_output_called = false; // Track if final_output was called // Note: Session-level duplicate tracking was removed - we only prevent sequential duplicates (DUP IN CHUNK, DUP IN MSG) + let mut turn_accumulated_usage: Option = None; // Track token usage for timing footer // Check if we need to summarize before starting if self.context_window.should_summarize() { @@ -4170,6 +4172,7 @@ impl Agent { // Capture usage data if available if let Some(ref usage) = chunk.usage { accumulated_usage = Some(usage.clone()); + turn_accumulated_usage = Some(usage.clone()); debug!( "Received usage data - prompt: {}, completion: {}, total: {}", usage.prompt_tokens, usage.completion_tokens, usage.total_tokens @@ -4866,11 +4869,17 @@ impl Agent { // Add timing if needed let final_response = if show_timing { + let turn_tokens = turn_accumulated_usage.as_ref().map(|u| u.total_tokens); + let timing_footer = Self::format_timing_footer( + stream_start.elapsed(), + _ttft, + turn_tokens, + self.context_window.percentage_used(), + ); format!( - "{}\n\n⏱️ {} | 💭 {}", + "{}\n\n{}", full_response, - Self::format_duration(stream_start.elapsed()), - Self::format_duration(_ttft) + timing_footer ) } else { full_response @@ -5119,11 +5128,17 @@ impl Agent { // Add timing if needed let final_response = if show_timing { + let turn_tokens = turn_accumulated_usage.as_ref().map(|u| u.total_tokens); + let timing_footer = Self::format_timing_footer( + stream_start.elapsed(), + _ttft, + turn_tokens, + self.context_window.percentage_used(), + ); format!( - "{}\n\n⏱️ {} | 💭 {}", + "{}\n\n{}", full_response, - Self::format_duration(stream_start.elapsed()), - Self::format_duration(_ttft) + timing_footer ) } else { full_response @@ -5140,11 +5155,17 @@ impl Agent { // Add timing if needed let final_response = if show_timing { + let turn_tokens = turn_accumulated_usage.as_ref().map(|u| u.total_tokens); + let timing_footer = Self::format_timing_footer( + stream_start.elapsed(), + _ttft, + turn_tokens, + self.context_window.percentage_used(), + ); format!( - "{}\n\n⏱️ {} | 💭 {}", + "{}\n\n{}", full_response, - Self::format_duration(stream_start.elapsed()), - Self::format_duration(_ttft) + timing_footer ) } else { full_response @@ -6977,6 +6998,23 @@ impl Agent { format!("{}m {:.1}s", minutes, remaining_seconds) } } + + /// Format the timing footer with optional token usage info + fn format_timing_footer( + elapsed: Duration, + ttft: Duration, + turn_tokens: Option, + context_percentage: f32, + ) -> String { + let timing = format!("⏱️ {} | 💭 {}", Self::format_duration(elapsed), Self::format_duration(ttft)); + + // Add token usage info if available (dimmed) + if let Some(tokens) = turn_tokens { + format!("{} \x1b[2m{}tk | {:.0}% ctx\x1b[0m", timing, tokens, context_percentage) + } else { + format!("{} \x1b[2m{:.0}% ctx\x1b[0m", timing, context_percentage) + } + } } // Note: JSON tool call filtering is now handled by UiWriter::filter_json_tool_calls (implemented in g3-cli)