From 1980e625110a8b7d198938082192ea82757bd0f0 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Wed, 7 Jan 2026 11:16:42 +1100 Subject: [PATCH] Improve code readability in g3-core - streaming_parser.rs: Rename has_message_like_keys to args_contain_prose_fragments with improved documentation explaining the heuristic for detecting malformed tool calls where LLM prose leaked into JSON keys - context_window.rs: Simplify build_thin_result_message using early return pattern and match expression for cleaner control flow Agent: carmack --- crates/g3-core/src/context_window.rs | 55 ++++++++++---------------- crates/g3-core/src/streaming_parser.rs | 29 ++++++++------ 2 files changed, 37 insertions(+), 47 deletions(-) diff --git a/crates/g3-core/src/context_window.rs b/crates/g3-core/src/context_window.rs index a16f194..6db485b 100644 --- a/crates/g3-core/src/context_window.rs +++ b/crates/g3-core/src/context_window.rs @@ -646,46 +646,33 @@ Format this as a detailed but concise summary that can be used to resume the con tool_call_leaned_count: usize, chars_saved: usize, ) -> (String, usize) { - let emoji = scope.emoji(); - let label = scope.label(); let scope_desc = match scope { ThinScope::FirstThird => "", ThinScope::All => " across entire history", }; - if leaned_count > 0 && tool_call_leaned_count > 0 { - ( - format!( - "{} Context {} at {}%: {} tool results + {} tool calls{}, ~{} chars saved", - emoji, label, current_percentage, leaned_count, tool_call_leaned_count, scope_desc, chars_saved - ), - chars_saved, - ) - } else if leaned_count > 0 { - ( - format!( - "{} Context {} at {}%: {} tool results{}, ~{} chars saved", - emoji, label, current_percentage, leaned_count, scope_desc, chars_saved - ), - chars_saved, - ) - } else if tool_call_leaned_count > 0 { - ( - format!( - "{} Context {} at {}%: {} tool calls{}, ~{} chars saved", - emoji, label, current_percentage, tool_call_leaned_count, scope_desc, chars_saved - ), - chars_saved, - ) - } else { - ( - format!( - "ℹ Context {} triggered at {}% but no large tool results or tool calls found{}", - scope.error_action(), current_percentage, scope_desc - ), - 0, - ) + // Nothing was thinned + if leaned_count == 0 && tool_call_leaned_count == 0 { + let msg = format!( + "ℹ Context {} triggered at {}% but no large tool results or tool calls found{}", + scope.error_action(), current_percentage, scope_desc + ); + return (msg, 0); } + + // Build description of what was thinned + let what_thinned = match (leaned_count > 0, tool_call_leaned_count > 0) { + (true, true) => format!("{} tool results + {} tool calls", leaned_count, tool_call_leaned_count), + (true, false) => format!("{} tool results", leaned_count), + (false, true) => format!("{} tool calls", tool_call_leaned_count), + (false, false) => unreachable!(), // handled above + }; + + let msg = format!( + "{} Context {} at {}%: {}{}, ~{} chars saved", + scope.emoji(), scope.label(), current_percentage, what_thinned, scope_desc, chars_saved + ); + (msg, chars_saved) } /// Recalculate token usage based on current conversation history diff --git a/crates/g3-core/src/streaming_parser.rs b/crates/g3-core/src/streaming_parser.rs index 3caca88..bfe140f 100644 --- a/crates/g3-core/src/streaming_parser.rs +++ b/crates/g3-core/src/streaming_parser.rs @@ -77,20 +77,23 @@ impl StreamingToolParser { best_start } - /// Validate that tool call args don't contain message-like content. - /// This detects malformed tool calls where agent messages got mixed into args. - fn has_message_like_keys(args: &serde_json::Map) -> bool { + /// Detect malformed tool calls where LLM prose leaked into JSON keys. + /// + /// When the LLM "stutters" or mixes formats, it sometimes emits JSON where + /// the keys are actually fragments of conversational text rather than valid + /// parameter names. This heuristic catches such cases by looking for: + /// - Unusually long keys (>100 chars) + /// - Newlines in keys (never valid in JSON keys) + /// - Common LLM response phrases that indicate prose, not parameters + fn args_contain_prose_fragments(args: &serde_json::Map) -> bool { + const PROSE_MARKERS: &[&str] = &[ + "I'll", "Let me", "Here's", "I can", "I need", "First", "Now", "The ", + ]; + args.keys().any(|key| { key.len() > 100 || key.contains('\n') - || key.contains("I'll") - || key.contains("Let me") - || key.contains("Here's") - || key.contains("I can") - || key.contains("I need") - || key.contains("First") - || key.contains("Now") - || key.contains("The ") + || PROSE_MARKERS.iter().any(|marker| key.contains(marker)) }) } @@ -172,7 +175,7 @@ impl StreamingToolParser { if let Ok(tool_call) = serde_json::from_str::(json_str) { // Validate that args is an object with reasonable keys if let Some(args_obj) = tool_call.args.as_object() { - if Self::has_message_like_keys(args_obj) { + if Self::args_contain_prose_fragments(args_obj) { debug!( "Detected malformed tool call with message-like keys, skipping" ); @@ -220,7 +223,7 @@ impl StreamingToolParser { if let Ok(tool_call) = serde_json::from_str::(json_str) { if let Some(args_obj) = tool_call.args.as_object() { - if !Self::has_message_like_keys(args_obj) { + if !Self::args_contain_prose_fragments(args_obj) { debug!( "Found tool call at position {}: {:?}", abs_start, tool_call.tool