Fix panic on multi-byte chars in filter_json buffer truncation

The buffer truncation code was slicing at a raw byte offset which could land in the middle of a multi-byte character (like emojis), causing a panic. Fixed by using char_indices() to find valid character boundaries. Also added stop_reason field to CompletionChunk initializers in tests to complete the stop_reason feature addition. - Fix byte boundary panic in filter_json.rs line 327 - Add test for multi-byte character handling - Update test files with missing stop_reason field
2026-01-09 15:20:57 +11:00
parent c470964628
commit e301075666
11 changed files with 94 additions and 4 deletions
--- a/crates/g3-cli/src/filter_json.rs
+++ b/crates/g3-cli/src/filter_json.rs
@@ -323,7 +323,14 @@ fn handle_suppressing_char(state: &mut FilterState, ch: char, _output: &mut Stri
        
        // Limit buffer size to prevent unbounded growth
        if state.buffer.len() > 200 {
-            let keep_from = state.buffer.len() - 100;
+            // Find a valid character boundary near the 100-byte mark from the end
+            // We can't just slice at byte offset - multi-byte chars (like emojis) would panic
+            let target_keep = state.buffer.len() - 100;
+            // Find the nearest char boundary at or after target_keep
+            let keep_from = state.buffer.char_indices()
+                .map(|(i, _)| i)
+                .find(|&i| i >= target_keep)
+                .unwrap_or(0);
            state.buffer = state.buffer[keep_from..].to_string();
        }
    }
@@ -413,4 +420,22 @@ mod tests {
        }
        assert_eq!(result, "Before\n\nAfter");
    }
+
+    #[test]
+    fn test_buffer_truncation_with_multibyte_chars() {
+        // This test ensures that buffer truncation doesn't panic on multi-byte characters
+        // The bug was: slicing at byte offset 100 from end could land mid-emoji
+        reset_json_tool_state();
+        
+        // Create a string with emojis that's over 200 bytes to trigger truncation
+        // Each emoji is 4 bytes, so we need ~50+ emojis to exceed 200 bytes
+        let emoji_heavy = "🔄".repeat(60); // 240 bytes of emojis
+        let input = format!("Text\n{{\"tool\": \"shell\", \"args\": {{\"data\": \"{}\"}}}}\nMore", emoji_heavy);
+        
+        // This should not panic - the fix ensures we find valid char boundaries
+        let result = filter_json_tool_calls(&input);
+        
+        // The tool call should be filtered out
+        assert_eq!(result, "Text\n\nMore");
+    }
 }
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -1841,6 +1841,7 @@ impl<W: UiWriter> Agent<W> {
            let mut raw_chunks: Vec<String> = Vec::new(); // Store raw chunks for debugging
            let mut _last_error: Option<String> = None;
            let mut accumulated_usage: Option<g3_providers::Usage> = None;
+            let mut stream_stop_reason: Option<String> = None; // Track why the stream stopped

            while let Some(chunk_result) = stream.next().await {
                match chunk_result {
@@ -2277,6 +2278,12 @@ impl<W: UiWriter> Agent<W> {
                        if chunk.finished {
                            debug!("Stream finished: tool_executed={}, current_response_len={}, full_response_len={}, chunks_received={}",
                                tool_executed, current_response.len(), full_response.len(), chunks_received);
+                            
+                            // Capture the stop reason from the final chunk
+                            if let Some(ref reason) = chunk.stop_reason {
+                                debug!("Stream stop_reason: {}", reason);
+                                stream_stop_reason = Some(reason.clone());
+                            }

                            // Stream finished - check if we should continue or return
                            if !tool_executed {
@@ -2498,10 +2505,18 @@ impl<W: UiWriter> Agent<W> {
                    debug!("Detected unexecuted tool call in buffer - this may indicate a parsing issue");
                    warn!("Unexecuted tool call detected in buffer after stream ended");
                }
+                
+                // Check if the response was truncated due to max_tokens
+                let was_truncated_by_max_tokens = stream_stop_reason.as_deref() == Some("max_tokens");
+                if was_truncated_by_max_tokens {
+                    debug!("Response was truncated due to max_tokens limit");
+                    warn!("LLM response was cut off due to max_tokens limit - will auto-continue");
+                }

                // Auto-continue if tools were executed and we are in autonomous mode
                // OR if the LLM emitted an incomplete tool call (truncated JSON)
                // OR if the LLM emitted a complete tool call that wasn't executed
+                // OR if the response was truncated due to max_tokens
                // This ensures we don't return control when the LLM clearly intended to call a tool
                // Note: We removed the redundant condition (any_tool_executed && is_empty_response)
                // because it's already covered by (any_tool_executed )
@@ -2509,7 +2524,8 @@ impl<W: UiWriter> Agent<W> {
                // the user may be asking questions and we should return control to them
                let should_auto_continue = self.is_autonomous && ((any_tool_executed ) 
                    || has_incomplete_tool_call 
-                    || has_unexecuted_tool_call);
+                    || has_unexecuted_tool_call
+                    || was_truncated_by_max_tokens);
                if should_auto_continue {
                    if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS {
                        auto_summary_attempts += 1;
--- a/crates/g3-core/src/streaming_parser.rs
+++ b/crates/g3-core/src/streaming_parser.rs
@@ -440,6 +440,7 @@ Some text after"#;
            finished: true,
            tool_calls: None,
            usage: None,
+        stop_reason: None,
        };
        
        let tools = parser.process_chunk(&chunk);
--- a/crates/g3-core/tests/duplicate_detection_test.rs
+++ b/crates/g3-core/tests/duplicate_detection_test.rs
@@ -13,6 +13,7 @@ fn chunk(content: &str, finished: bool) -> CompletionChunk {
        finished,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    }
 }

--- a/crates/g3-core/tests/incomplete_tool_call_test.rs
+++ b/crates/g3-core/tests/incomplete_tool_call_test.rs
@@ -17,6 +17,7 @@ fn test_has_incomplete_tool_call_no_tool_pattern() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    assert!(!parser.has_incomplete_tool_call());
@@ -30,6 +31,7 @@ fn test_has_incomplete_tool_call_complete_tool_call() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Complete JSON should NOT be detected as incomplete
@@ -45,6 +47,7 @@ fn test_has_incomplete_tool_call_truncated_tool_call() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Incomplete JSON should be detected
@@ -60,6 +63,7 @@ fn test_has_incomplete_tool_call_truncated_mid_value() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Incomplete JSON should be detected
@@ -77,6 +81,7 @@ fn test_has_incomplete_tool_call_with_text_before() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Incomplete JSON should be detected
@@ -93,6 +98,7 @@ fn test_has_incomplete_tool_call_malformed_like_trace() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Truncated JSON (missing closing braces) should be detected as incomplete
@@ -113,6 +119,7 @@ fn test_has_unexecuted_tool_call_no_tool_pattern() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    assert!(!parser.has_unexecuted_tool_call());
@@ -126,6 +133,7 @@ fn test_has_unexecuted_tool_call_complete_tool_call() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Complete JSON tool call that wasn't executed should be detected
@@ -140,6 +148,7 @@ fn test_has_unexecuted_tool_call_incomplete_json() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Incomplete JSON should NOT be detected as unexecuted (it's incomplete, not unexecuted)
@@ -157,6 +166,7 @@ Some trailing text after the JSON"#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Complete JSON tool call should be detected even with trailing text
@@ -175,6 +185,7 @@ I'll execute this command now."#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Complete JSON tool call should be detected
--- a/crates/g3-core/tests/streaming_parser_test.rs
+++ b/crates/g3-core/tests/streaming_parser_test.rs
@@ -17,6 +17,7 @@ fn chunk(content: &str, finished: bool) -> CompletionChunk {
        finished,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    }
 }

--- a/crates/g3-providers/src/anthropic.rs
+++ b/crates/g3-providers/src/anthropic.rs
@@ -112,7 +112,7 @@ use tracing::{debug, error};
 use crate::{
    CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message,
    MessageRole, Tool, ToolCall, Usage,
-    streaming::{decode_utf8_streaming, make_final_chunk, make_text_chunk, make_tool_chunk},
+    streaming::{decode_utf8_streaming, make_final_chunk, make_final_chunk_with_reason, make_text_chunk, make_tool_chunk},
 };

 const ANTHROPIC_API_URL: &str = "https://api.anthropic.com/v1/messages";
@@ -395,6 +395,7 @@ impl AnthropicProvider {
        let mut accumulated_usage: Option<Usage> = None;
        let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences
        let mut message_stopped = false; // Track if we've received message_stop
+        let mut stop_reason: Option<String> = None; // Track why the message stopped

        while let Some(chunk_result) = stream.next().await {
            match chunk_result {
@@ -583,10 +584,20 @@ impl AnthropicProvider {
                                                current_tool_calls.clear();
                                            }
                                        }
+                                        "message_delta" => {
+                                            // message_delta contains the stop_reason and final usage
+                                            if let Some(delta) = &event.delta {
+                                                if let Some(reason) = &delta.stop_reason {
+                                                    debug!("Received stop_reason: {}", reason);
+                                                    stop_reason = Some(reason.clone());
+                                                }
+                                            }
+                                            // Usage is also in message_delta but we get it from message_start
+                                        }
                                        "message_stop" => {
                                            debug!("Received message stop event");
                                            message_stopped = true;
-                                            let final_chunk = make_final_chunk(current_tool_calls.clone(), accumulated_usage.clone());
+                                            let final_chunk = make_final_chunk_with_reason(current_tool_calls.clone(), accumulated_usage.clone(), stop_reason.clone());
                                            if tx.send(Ok(final_chunk)).await.is_err() {
                                                debug!("Receiver dropped, stopping stream");
                                            }
@@ -931,6 +942,8 @@ struct AnthropicStreamMessage {
 struct AnthropicDelta {
    text: Option<String>,
    partial_json: Option<String>,
+    #[serde(default)]
+    stop_reason: Option<String>,
 }

 #[derive(Debug, Deserialize)]
--- a/crates/g3-providers/src/databricks.rs
+++ b/crates/g3-providers/src/databricks.rs
@@ -493,6 +493,7 @@ impl DatabricksProvider {
                                finished: false,
                                usage: None,
                                tool_calls: None,
+                                stop_reason: None,
                            };
                            if tx.send(Ok(text_chunk)).await.is_err() {
                                debug!("Receiver dropped");
--- a/crates/g3-providers/src/lib.rs
+++ b/crates/g3-providers/src/lib.rs
@@ -187,6 +187,8 @@ pub struct CompletionChunk {
    pub finished: bool,
    pub tool_calls: Option<Vec<ToolCall>>,
    pub usage: Option<Usage>, // Add usage tracking for streaming
+    /// Stop reason from the API (e.g., "end_turn", "max_tokens", "stop_sequence")
+    pub stop_reason: Option<String>,
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
--- a/crates/g3-providers/src/openai.rs
+++ b/crates/g3-providers/src/openai.rs
@@ -157,6 +157,7 @@ impl OpenAIProvider {
                                        finished: true,
                                        tool_calls,
                                        usage: accumulated_usage.clone(),
+                                        stop_reason: None, // TODO: Extract from OpenAI response
                                    };
                                    let _ = tx.send(Ok(final_chunk)).await;
                                }
--- a/crates/g3-providers/src/streaming.rs
+++ b/crates/g3-providers/src/streaming.rs
@@ -61,6 +61,22 @@ pub fn make_final_chunk(tool_calls: Vec<ToolCall>, usage: Option<Usage>) -> Comp
        } else {
            Some(tool_calls)
        },
+        stop_reason: None,
+    }
+}
+
+/// Create a final completion chunk with stop reason.
+pub fn make_final_chunk_with_reason(tool_calls: Vec<ToolCall>, usage: Option<Usage>, stop_reason: Option<String>) -> CompletionChunk {
+    CompletionChunk {
+        content: String::new(),
+        finished: true,
+        usage,
+        tool_calls: if tool_calls.is_empty() {
+            None
+        } else {
+            Some(tool_calls)
+        },
+        stop_reason,
    }
 }

@@ -71,6 +87,7 @@ pub fn make_text_chunk(content: String) -> CompletionChunk {
        finished: false,
        usage: None,
        tool_calls: None,
+        stop_reason: None,
    }
 }

@@ -81,5 +98,6 @@ pub fn make_tool_chunk(tool_calls: Vec<ToolCall>) -> CompletionChunk {
        finished: false,
        usage: None,
        tool_calls: Some(tool_calls),
+        stop_reason: None,
    }
 }