diff --git a/crates/g3-cli/src/filter_json.rs b/crates/g3-cli/src/filter_json.rs index 0e4650a..ee140a2 100644 --- a/crates/g3-cli/src/filter_json.rs +++ b/crates/g3-cli/src/filter_json.rs @@ -323,7 +323,14 @@ fn handle_suppressing_char(state: &mut FilterState, ch: char, _output: &mut Stri // Limit buffer size to prevent unbounded growth if state.buffer.len() > 200 { - let keep_from = state.buffer.len() - 100; + // Find a valid character boundary near the 100-byte mark from the end + // We can't just slice at byte offset - multi-byte chars (like emojis) would panic + let target_keep = state.buffer.len() - 100; + // Find the nearest char boundary at or after target_keep + let keep_from = state.buffer.char_indices() + .map(|(i, _)| i) + .find(|&i| i >= target_keep) + .unwrap_or(0); state.buffer = state.buffer[keep_from..].to_string(); } } @@ -413,4 +420,22 @@ mod tests { } assert_eq!(result, "Before\n\nAfter"); } + + #[test] + fn test_buffer_truncation_with_multibyte_chars() { + // This test ensures that buffer truncation doesn't panic on multi-byte characters + // The bug was: slicing at byte offset 100 from end could land mid-emoji + reset_json_tool_state(); + + // Create a string with emojis that's over 200 bytes to trigger truncation + // Each emoji is 4 bytes, so we need ~50+ emojis to exceed 200 bytes + let emoji_heavy = "🔄".repeat(60); // 240 bytes of emojis + let input = format!("Text\n{{\"tool\": \"shell\", \"args\": {{\"data\": \"{}\"}}}}\nMore", emoji_heavy); + + // This should not panic - the fix ensures we find valid char boundaries + let result = filter_json_tool_calls(&input); + + // The tool call should be filtered out + assert_eq!(result, "Text\n\nMore"); + } } diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index bed2564..302e104 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -1841,6 +1841,7 @@ impl Agent { let mut raw_chunks: Vec = Vec::new(); // Store raw chunks for debugging let mut _last_error: Option = None; let mut accumulated_usage: Option = None; + let mut stream_stop_reason: Option = None; // Track why the stream stopped while let Some(chunk_result) = stream.next().await { match chunk_result { @@ -2277,6 +2278,12 @@ impl Agent { if chunk.finished { debug!("Stream finished: tool_executed={}, current_response_len={}, full_response_len={}, chunks_received={}", tool_executed, current_response.len(), full_response.len(), chunks_received); + + // Capture the stop reason from the final chunk + if let Some(ref reason) = chunk.stop_reason { + debug!("Stream stop_reason: {}", reason); + stream_stop_reason = Some(reason.clone()); + } // Stream finished - check if we should continue or return if !tool_executed { @@ -2498,10 +2505,18 @@ impl Agent { debug!("Detected unexecuted tool call in buffer - this may indicate a parsing issue"); warn!("Unexecuted tool call detected in buffer after stream ended"); } + + // Check if the response was truncated due to max_tokens + let was_truncated_by_max_tokens = stream_stop_reason.as_deref() == Some("max_tokens"); + if was_truncated_by_max_tokens { + debug!("Response was truncated due to max_tokens limit"); + warn!("LLM response was cut off due to max_tokens limit - will auto-continue"); + } // Auto-continue if tools were executed and we are in autonomous mode // OR if the LLM emitted an incomplete tool call (truncated JSON) // OR if the LLM emitted a complete tool call that wasn't executed + // OR if the response was truncated due to max_tokens // This ensures we don't return control when the LLM clearly intended to call a tool // Note: We removed the redundant condition (any_tool_executed && is_empty_response) // because it's already covered by (any_tool_executed ) @@ -2509,7 +2524,8 @@ impl Agent { // the user may be asking questions and we should return control to them let should_auto_continue = self.is_autonomous && ((any_tool_executed ) || has_incomplete_tool_call - || has_unexecuted_tool_call); + || has_unexecuted_tool_call + || was_truncated_by_max_tokens); if should_auto_continue { if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS { auto_summary_attempts += 1; diff --git a/crates/g3-core/src/streaming_parser.rs b/crates/g3-core/src/streaming_parser.rs index 15fd9d0..4eec670 100644 --- a/crates/g3-core/src/streaming_parser.rs +++ b/crates/g3-core/src/streaming_parser.rs @@ -440,6 +440,7 @@ Some text after"#; finished: true, tool_calls: None, usage: None, + stop_reason: None, }; let tools = parser.process_chunk(&chunk); diff --git a/crates/g3-core/tests/duplicate_detection_test.rs b/crates/g3-core/tests/duplicate_detection_test.rs index 747a2b8..4d55cee 100644 --- a/crates/g3-core/tests/duplicate_detection_test.rs +++ b/crates/g3-core/tests/duplicate_detection_test.rs @@ -13,6 +13,7 @@ fn chunk(content: &str, finished: bool) -> CompletionChunk { finished, tool_calls: None, usage: None, + stop_reason: None, } } diff --git a/crates/g3-core/tests/incomplete_tool_call_test.rs b/crates/g3-core/tests/incomplete_tool_call_test.rs index 4366672..a0e4a3d 100644 --- a/crates/g3-core/tests/incomplete_tool_call_test.rs +++ b/crates/g3-core/tests/incomplete_tool_call_test.rs @@ -17,6 +17,7 @@ fn test_has_incomplete_tool_call_no_tool_pattern() { finished: false, tool_calls: None, usage: None, + stop_reason: None, }; parser.process_chunk(&chunk); assert!(!parser.has_incomplete_tool_call()); @@ -30,6 +31,7 @@ fn test_has_incomplete_tool_call_complete_tool_call() { finished: false, tool_calls: None, usage: None, + stop_reason: None, }; parser.process_chunk(&chunk); // Complete JSON should NOT be detected as incomplete @@ -45,6 +47,7 @@ fn test_has_incomplete_tool_call_truncated_tool_call() { finished: false, tool_calls: None, usage: None, + stop_reason: None, }; parser.process_chunk(&chunk); // Incomplete JSON should be detected @@ -60,6 +63,7 @@ fn test_has_incomplete_tool_call_truncated_mid_value() { finished: false, tool_calls: None, usage: None, + stop_reason: None, }; parser.process_chunk(&chunk); // Incomplete JSON should be detected @@ -77,6 +81,7 @@ fn test_has_incomplete_tool_call_with_text_before() { finished: false, tool_calls: None, usage: None, + stop_reason: None, }; parser.process_chunk(&chunk); // Incomplete JSON should be detected @@ -93,6 +98,7 @@ fn test_has_incomplete_tool_call_malformed_like_trace() { finished: false, tool_calls: None, usage: None, + stop_reason: None, }; parser.process_chunk(&chunk); // Truncated JSON (missing closing braces) should be detected as incomplete @@ -113,6 +119,7 @@ fn test_has_unexecuted_tool_call_no_tool_pattern() { finished: false, tool_calls: None, usage: None, + stop_reason: None, }; parser.process_chunk(&chunk); assert!(!parser.has_unexecuted_tool_call()); @@ -126,6 +133,7 @@ fn test_has_unexecuted_tool_call_complete_tool_call() { finished: false, tool_calls: None, usage: None, + stop_reason: None, }; parser.process_chunk(&chunk); // Complete JSON tool call that wasn't executed should be detected @@ -140,6 +148,7 @@ fn test_has_unexecuted_tool_call_incomplete_json() { finished: false, tool_calls: None, usage: None, + stop_reason: None, }; parser.process_chunk(&chunk); // Incomplete JSON should NOT be detected as unexecuted (it's incomplete, not unexecuted) @@ -157,6 +166,7 @@ Some trailing text after the JSON"#.to_string(), finished: false, tool_calls: None, usage: None, + stop_reason: None, }; parser.process_chunk(&chunk); // Complete JSON tool call should be detected even with trailing text @@ -175,6 +185,7 @@ I'll execute this command now."#.to_string(), finished: false, tool_calls: None, usage: None, + stop_reason: None, }; parser.process_chunk(&chunk); // Complete JSON tool call should be detected diff --git a/crates/g3-core/tests/streaming_parser_test.rs b/crates/g3-core/tests/streaming_parser_test.rs index 2d33777..d83b62e 100644 --- a/crates/g3-core/tests/streaming_parser_test.rs +++ b/crates/g3-core/tests/streaming_parser_test.rs @@ -17,6 +17,7 @@ fn chunk(content: &str, finished: bool) -> CompletionChunk { finished, tool_calls: None, usage: None, + stop_reason: None, } } diff --git a/crates/g3-providers/src/anthropic.rs b/crates/g3-providers/src/anthropic.rs index 4e1e177..f0775ab 100644 --- a/crates/g3-providers/src/anthropic.rs +++ b/crates/g3-providers/src/anthropic.rs @@ -112,7 +112,7 @@ use tracing::{debug, error}; use crate::{ CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message, MessageRole, Tool, ToolCall, Usage, - streaming::{decode_utf8_streaming, make_final_chunk, make_text_chunk, make_tool_chunk}, + streaming::{decode_utf8_streaming, make_final_chunk, make_final_chunk_with_reason, make_text_chunk, make_tool_chunk}, }; const ANTHROPIC_API_URL: &str = "https://api.anthropic.com/v1/messages"; @@ -395,6 +395,7 @@ impl AnthropicProvider { let mut accumulated_usage: Option = None; let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences let mut message_stopped = false; // Track if we've received message_stop + let mut stop_reason: Option = None; // Track why the message stopped while let Some(chunk_result) = stream.next().await { match chunk_result { @@ -583,10 +584,20 @@ impl AnthropicProvider { current_tool_calls.clear(); } } + "message_delta" => { + // message_delta contains the stop_reason and final usage + if let Some(delta) = &event.delta { + if let Some(reason) = &delta.stop_reason { + debug!("Received stop_reason: {}", reason); + stop_reason = Some(reason.clone()); + } + } + // Usage is also in message_delta but we get it from message_start + } "message_stop" => { debug!("Received message stop event"); message_stopped = true; - let final_chunk = make_final_chunk(current_tool_calls.clone(), accumulated_usage.clone()); + let final_chunk = make_final_chunk_with_reason(current_tool_calls.clone(), accumulated_usage.clone(), stop_reason.clone()); if tx.send(Ok(final_chunk)).await.is_err() { debug!("Receiver dropped, stopping stream"); } @@ -931,6 +942,8 @@ struct AnthropicStreamMessage { struct AnthropicDelta { text: Option, partial_json: Option, + #[serde(default)] + stop_reason: Option, } #[derive(Debug, Deserialize)] diff --git a/crates/g3-providers/src/databricks.rs b/crates/g3-providers/src/databricks.rs index e7a23c7..bb25893 100644 --- a/crates/g3-providers/src/databricks.rs +++ b/crates/g3-providers/src/databricks.rs @@ -493,6 +493,7 @@ impl DatabricksProvider { finished: false, usage: None, tool_calls: None, + stop_reason: None, }; if tx.send(Ok(text_chunk)).await.is_err() { debug!("Receiver dropped"); diff --git a/crates/g3-providers/src/lib.rs b/crates/g3-providers/src/lib.rs index 7a71785..a14ca03 100644 --- a/crates/g3-providers/src/lib.rs +++ b/crates/g3-providers/src/lib.rs @@ -187,6 +187,8 @@ pub struct CompletionChunk { pub finished: bool, pub tool_calls: Option>, pub usage: Option, // Add usage tracking for streaming + /// Stop reason from the API (e.g., "end_turn", "max_tokens", "stop_sequence") + pub stop_reason: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/g3-providers/src/openai.rs b/crates/g3-providers/src/openai.rs index aa567ed..8dd05b0 100644 --- a/crates/g3-providers/src/openai.rs +++ b/crates/g3-providers/src/openai.rs @@ -157,6 +157,7 @@ impl OpenAIProvider { finished: true, tool_calls, usage: accumulated_usage.clone(), + stop_reason: None, // TODO: Extract from OpenAI response }; let _ = tx.send(Ok(final_chunk)).await; } diff --git a/crates/g3-providers/src/streaming.rs b/crates/g3-providers/src/streaming.rs index 2295d61..15a3762 100644 --- a/crates/g3-providers/src/streaming.rs +++ b/crates/g3-providers/src/streaming.rs @@ -61,6 +61,22 @@ pub fn make_final_chunk(tool_calls: Vec, usage: Option) -> Comp } else { Some(tool_calls) }, + stop_reason: None, + } +} + +/// Create a final completion chunk with stop reason. +pub fn make_final_chunk_with_reason(tool_calls: Vec, usage: Option, stop_reason: Option) -> CompletionChunk { + CompletionChunk { + content: String::new(), + finished: true, + usage, + tool_calls: if tool_calls.is_empty() { + None + } else { + Some(tool_calls) + }, + stop_reason, } } @@ -71,6 +87,7 @@ pub fn make_text_chunk(content: String) -> CompletionChunk { finished: false, usage: None, tool_calls: None, + stop_reason: None, } } @@ -81,5 +98,6 @@ pub fn make_tool_chunk(tool_calls: Vec) -> CompletionChunk { finished: false, usage: None, tool_calls: Some(tool_calls), + stop_reason: None, } }