Fix panic on multi-byte chars in filter_json buffer truncation

The buffer truncation code was slicing at a raw byte offset which could land in the middle of a multi-byte character (like emojis), causing a panic. Fixed by using char_indices() to find valid character boundaries. Also added stop_reason field to CompletionChunk initializers in tests to complete the stop_reason feature addition. - Fix byte boundary panic in filter_json.rs line 327 - Add test for multi-byte character handling - Update test files with missing stop_reason field
2026-01-09 15:20:57 +11:00
parent c470964628
commit e301075666
11 changed files with 94 additions and 4 deletions
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -1841,6 +1841,7 @@ impl<W: UiWriter> Agent<W> {
            let mut raw_chunks: Vec<String> = Vec::new(); // Store raw chunks for debugging
            let mut _last_error: Option<String> = None;
            let mut accumulated_usage: Option<g3_providers::Usage> = None;
+            let mut stream_stop_reason: Option<String> = None; // Track why the stream stopped

            while let Some(chunk_result) = stream.next().await {
                match chunk_result {
@@ -2277,6 +2278,12 @@ impl<W: UiWriter> Agent<W> {
                        if chunk.finished {
                            debug!("Stream finished: tool_executed={}, current_response_len={}, full_response_len={}, chunks_received={}",
                                tool_executed, current_response.len(), full_response.len(), chunks_received);
+                            
+                            // Capture the stop reason from the final chunk
+                            if let Some(ref reason) = chunk.stop_reason {
+                                debug!("Stream stop_reason: {}", reason);
+                                stream_stop_reason = Some(reason.clone());
+                            }

                            // Stream finished - check if we should continue or return
                            if !tool_executed {
@@ -2498,10 +2505,18 @@ impl<W: UiWriter> Agent<W> {
                    debug!("Detected unexecuted tool call in buffer - this may indicate a parsing issue");
                    warn!("Unexecuted tool call detected in buffer after stream ended");
                }
+                
+                // Check if the response was truncated due to max_tokens
+                let was_truncated_by_max_tokens = stream_stop_reason.as_deref() == Some("max_tokens");
+                if was_truncated_by_max_tokens {
+                    debug!("Response was truncated due to max_tokens limit");
+                    warn!("LLM response was cut off due to max_tokens limit - will auto-continue");
+                }

                // Auto-continue if tools were executed and we are in autonomous mode
                // OR if the LLM emitted an incomplete tool call (truncated JSON)
                // OR if the LLM emitted a complete tool call that wasn't executed
+                // OR if the response was truncated due to max_tokens
                // This ensures we don't return control when the LLM clearly intended to call a tool
                // Note: We removed the redundant condition (any_tool_executed && is_empty_response)
                // because it's already covered by (any_tool_executed )
@@ -2509,7 +2524,8 @@ impl<W: UiWriter> Agent<W> {
                // the user may be asking questions and we should return control to them
                let should_auto_continue = self.is_autonomous && ((any_tool_executed ) 
                    || has_incomplete_tool_call 
-                    || has_unexecuted_tool_call);
+                    || has_unexecuted_tool_call
+                    || was_truncated_by_max_tokens);
                if should_auto_continue {
                    if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS {
                        auto_summary_attempts += 1;
--- a/crates/g3-core/src/streaming_parser.rs
+++ b/crates/g3-core/src/streaming_parser.rs
@@ -440,6 +440,7 @@ Some text after"#;
            finished: true,
            tool_calls: None,
            usage: None,
+        stop_reason: None,
        };
        
        let tools = parser.process_chunk(&chunk);
--- a/crates/g3-core/tests/duplicate_detection_test.rs
+++ b/crates/g3-core/tests/duplicate_detection_test.rs
@@ -13,6 +13,7 @@ fn chunk(content: &str, finished: bool) -> CompletionChunk {
        finished,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    }
 }

--- a/crates/g3-core/tests/incomplete_tool_call_test.rs
+++ b/crates/g3-core/tests/incomplete_tool_call_test.rs
@@ -17,6 +17,7 @@ fn test_has_incomplete_tool_call_no_tool_pattern() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    assert!(!parser.has_incomplete_tool_call());
@@ -30,6 +31,7 @@ fn test_has_incomplete_tool_call_complete_tool_call() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Complete JSON should NOT be detected as incomplete
@@ -45,6 +47,7 @@ fn test_has_incomplete_tool_call_truncated_tool_call() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Incomplete JSON should be detected
@@ -60,6 +63,7 @@ fn test_has_incomplete_tool_call_truncated_mid_value() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Incomplete JSON should be detected
@@ -77,6 +81,7 @@ fn test_has_incomplete_tool_call_with_text_before() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Incomplete JSON should be detected
@@ -93,6 +98,7 @@ fn test_has_incomplete_tool_call_malformed_like_trace() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Truncated JSON (missing closing braces) should be detected as incomplete
@@ -113,6 +119,7 @@ fn test_has_unexecuted_tool_call_no_tool_pattern() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    assert!(!parser.has_unexecuted_tool_call());
@@ -126,6 +133,7 @@ fn test_has_unexecuted_tool_call_complete_tool_call() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Complete JSON tool call that wasn't executed should be detected
@@ -140,6 +148,7 @@ fn test_has_unexecuted_tool_call_incomplete_json() {
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Incomplete JSON should NOT be detected as unexecuted (it's incomplete, not unexecuted)
@@ -157,6 +166,7 @@ Some trailing text after the JSON"#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Complete JSON tool call should be detected even with trailing text
@@ -175,6 +185,7 @@ I'll execute this command now."#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    };
    parser.process_chunk(&chunk);
    // Complete JSON tool call should be detected
--- a/crates/g3-core/tests/streaming_parser_test.rs
+++ b/crates/g3-core/tests/streaming_parser_test.rs
@@ -17,6 +17,7 @@ fn chunk(content: &str, finished: bool) -> CompletionChunk {
        finished,
        tool_calls: None,
        usage: None,
+        stop_reason: None,
    }
 }