diff --git a/crates/g3-cli/src/filter_json.rs b/crates/g3-cli/src/filter_json.rs
index 0e4650a..ee140a2 100644
--- a/crates/g3-cli/src/filter_json.rs
+++ b/crates/g3-cli/src/filter_json.rs
@@ -323,7 +323,14 @@ fn handle_suppressing_char(state: &mut FilterState, ch: char, _output: &mut Stri
         
         // Limit buffer size to prevent unbounded growth
         if state.buffer.len() > 200 {
-            let keep_from = state.buffer.len() - 100;
+            // Find a valid character boundary near the 100-byte mark from the end
+            // We can't just slice at byte offset - multi-byte chars (like emojis) would panic
+            let target_keep = state.buffer.len() - 100;
+            // Find the nearest char boundary at or after target_keep
+            let keep_from = state.buffer.char_indices()
+                .map(|(i, _)| i)
+                .find(|&i| i >= target_keep)
+                .unwrap_or(0);
             state.buffer = state.buffer[keep_from..].to_string();
         }
     }
@@ -413,4 +420,22 @@ mod tests {
         }
         assert_eq!(result, "Before\n\nAfter");
     }
+
+    #[test]
+    fn test_buffer_truncation_with_multibyte_chars() {
+        // This test ensures that buffer truncation doesn't panic on multi-byte characters
+        // The bug was: slicing at byte offset 100 from end could land mid-emoji
+        reset_json_tool_state();
+        
+        // Create a string with emojis that's over 200 bytes to trigger truncation
+        // Each emoji is 4 bytes, so we need ~50+ emojis to exceed 200 bytes
+        let emoji_heavy = "🔄".repeat(60); // 240 bytes of emojis
+        let input = format!("Text\n{{\"tool\": \"shell\", \"args\": {{\"data\": \"{}\"}}}}\nMore", emoji_heavy);
+        
+        // This should not panic - the fix ensures we find valid char boundaries
+        let result = filter_json_tool_calls(&input);
+        
+        // The tool call should be filtered out
+        assert_eq!(result, "Text\n\nMore");
+    }
 }
diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs
index bed2564..302e104 100644
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -1841,6 +1841,7 @@ impl<W: UiWriter> Agent<W> {
             let mut raw_chunks: Vec<String> = Vec::new(); // Store raw chunks for debugging
             let mut _last_error: Option<String> = None;
             let mut accumulated_usage: Option<g3_providers::Usage> = None;
+            let mut stream_stop_reason: Option<String> = None; // Track why the stream stopped
 
             while let Some(chunk_result) = stream.next().await {
                 match chunk_result {
@@ -2277,6 +2278,12 @@ impl<W: UiWriter> Agent<W> {
                         if chunk.finished {
                             debug!("Stream finished: tool_executed={}, current_response_len={}, full_response_len={}, chunks_received={}",
                                 tool_executed, current_response.len(), full_response.len(), chunks_received);
+                            
+                            // Capture the stop reason from the final chunk
+                            if let Some(ref reason) = chunk.stop_reason {
+                                debug!("Stream stop_reason: {}", reason);
+                                stream_stop_reason = Some(reason.clone());
+                            }
 
                             // Stream finished - check if we should continue or return
                             if !tool_executed {
@@ -2498,10 +2505,18 @@ impl<W: UiWriter> Agent<W> {
                     debug!("Detected unexecuted tool call in buffer - this may indicate a parsing issue");
                     warn!("Unexecuted tool call detected in buffer after stream ended");
                 }
+                
+                // Check if the response was truncated due to max_tokens
+                let was_truncated_by_max_tokens = stream_stop_reason.as_deref() == Some("max_tokens");
+                if was_truncated_by_max_tokens {
+                    debug!("Response was truncated due to max_tokens limit");
+                    warn!("LLM response was cut off due to max_tokens limit - will auto-continue");
+                }
 
                 // Auto-continue if tools were executed and we are in autonomous mode
                 // OR if the LLM emitted an incomplete tool call (truncated JSON)
                 // OR if the LLM emitted a complete tool call that wasn't executed
+                // OR if the response was truncated due to max_tokens
                 // This ensures we don't return control when the LLM clearly intended to call a tool
                 // Note: We removed the redundant condition (any_tool_executed && is_empty_response)
                 // because it's already covered by (any_tool_executed )
@@ -2509,7 +2524,8 @@ impl<W: UiWriter> Agent<W> {
                 // the user may be asking questions and we should return control to them
                 let should_auto_continue = self.is_autonomous && ((any_tool_executed ) 
                     || has_incomplete_tool_call 
-                    || has_unexecuted_tool_call);
+                    || has_unexecuted_tool_call
+                    || was_truncated_by_max_tokens);
                 if should_auto_continue {
                     if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS {
                         auto_summary_attempts += 1;
diff --git a/crates/g3-core/src/streaming_parser.rs b/crates/g3-core/src/streaming_parser.rs
index 15fd9d0..4eec670 100644
--- a/crates/g3-core/src/streaming_parser.rs
+++ b/crates/g3-core/src/streaming_parser.rs
@@ -440,6 +440,7 @@ Some text after"#;
             finished: true,
             tool_calls: None,
             usage: None,
+        stop_reason: None,
         };
         
         let tools = parser.process_chunk(&chunk);
diff --git a/crates/g3-core/tests/duplicate_detection_test.rs b/crates/g3-core/tests/duplicate_detection_test.rs
index 747a2b8..4d55cee 100644
--- a/crates/g3-core/tests/duplicate_detection_test.rs
+++ b/crates/g3-core/tests/duplicate_detection_test.rs
@@ -13,6 +13,7 @@ fn chunk(content: &str, finished: bool) -> CompletionChunk {
         finished,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     }
 }
 
diff --git a/crates/g3-core/tests/incomplete_tool_call_test.rs b/crates/g3-core/tests/incomplete_tool_call_test.rs
index 4366672..a0e4a3d 100644
--- a/crates/g3-core/tests/incomplete_tool_call_test.rs
+++ b/crates/g3-core/tests/incomplete_tool_call_test.rs
@@ -17,6 +17,7 @@ fn test_has_incomplete_tool_call_no_tool_pattern() {
         finished: false,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     };
     parser.process_chunk(&chunk);
     assert!(!parser.has_incomplete_tool_call());
@@ -30,6 +31,7 @@ fn test_has_incomplete_tool_call_complete_tool_call() {
         finished: false,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     };
     parser.process_chunk(&chunk);
     // Complete JSON should NOT be detected as incomplete
@@ -45,6 +47,7 @@ fn test_has_incomplete_tool_call_truncated_tool_call() {
         finished: false,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     };
     parser.process_chunk(&chunk);
     // Incomplete JSON should be detected
@@ -60,6 +63,7 @@ fn test_has_incomplete_tool_call_truncated_mid_value() {
         finished: false,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     };
     parser.process_chunk(&chunk);
     // Incomplete JSON should be detected
@@ -77,6 +81,7 @@ fn test_has_incomplete_tool_call_with_text_before() {
         finished: false,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     };
     parser.process_chunk(&chunk);
     // Incomplete JSON should be detected
@@ -93,6 +98,7 @@ fn test_has_incomplete_tool_call_malformed_like_trace() {
         finished: false,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     };
     parser.process_chunk(&chunk);
     // Truncated JSON (missing closing braces) should be detected as incomplete
@@ -113,6 +119,7 @@ fn test_has_unexecuted_tool_call_no_tool_pattern() {
         finished: false,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     };
     parser.process_chunk(&chunk);
     assert!(!parser.has_unexecuted_tool_call());
@@ -126,6 +133,7 @@ fn test_has_unexecuted_tool_call_complete_tool_call() {
         finished: false,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     };
     parser.process_chunk(&chunk);
     // Complete JSON tool call that wasn't executed should be detected
@@ -140,6 +148,7 @@ fn test_has_unexecuted_tool_call_incomplete_json() {
         finished: false,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     };
     parser.process_chunk(&chunk);
     // Incomplete JSON should NOT be detected as unexecuted (it's incomplete, not unexecuted)
@@ -157,6 +166,7 @@ Some trailing text after the JSON"#.to_string(),
         finished: false,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     };
     parser.process_chunk(&chunk);
     // Complete JSON tool call should be detected even with trailing text
@@ -175,6 +185,7 @@ I'll execute this command now."#.to_string(),
         finished: false,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     };
     parser.process_chunk(&chunk);
     // Complete JSON tool call should be detected
diff --git a/crates/g3-core/tests/streaming_parser_test.rs b/crates/g3-core/tests/streaming_parser_test.rs
index 2d33777..d83b62e 100644
--- a/crates/g3-core/tests/streaming_parser_test.rs
+++ b/crates/g3-core/tests/streaming_parser_test.rs
@@ -17,6 +17,7 @@ fn chunk(content: &str, finished: bool) -> CompletionChunk {
         finished,
         tool_calls: None,
         usage: None,
+        stop_reason: None,
     }
 }
 
diff --git a/crates/g3-providers/src/anthropic.rs b/crates/g3-providers/src/anthropic.rs
index 4e1e177..f0775ab 100644
--- a/crates/g3-providers/src/anthropic.rs
+++ b/crates/g3-providers/src/anthropic.rs
@@ -112,7 +112,7 @@ use tracing::{debug, error};
 use crate::{
     CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message,
     MessageRole, Tool, ToolCall, Usage,
-    streaming::{decode_utf8_streaming, make_final_chunk, make_text_chunk, make_tool_chunk},
+    streaming::{decode_utf8_streaming, make_final_chunk, make_final_chunk_with_reason, make_text_chunk, make_tool_chunk},
 };
 
 const ANTHROPIC_API_URL: &str = "https://api.anthropic.com/v1/messages";
@@ -395,6 +395,7 @@ impl AnthropicProvider {
         let mut accumulated_usage: Option<Usage> = None;
         let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences
         let mut message_stopped = false; // Track if we've received message_stop
+        let mut stop_reason: Option<String> = None; // Track why the message stopped
 
         while let Some(chunk_result) = stream.next().await {
             match chunk_result {
@@ -583,10 +584,20 @@ impl AnthropicProvider {
                                                 current_tool_calls.clear();
                                             }
                                         }
+                                        "message_delta" => {
+                                            // message_delta contains the stop_reason and final usage
+                                            if let Some(delta) = &event.delta {
+                                                if let Some(reason) = &delta.stop_reason {
+                                                    debug!("Received stop_reason: {}", reason);
+                                                    stop_reason = Some(reason.clone());
+                                                }
+                                            }
+                                            // Usage is also in message_delta but we get it from message_start
+                                        }
                                         "message_stop" => {
                                             debug!("Received message stop event");
                                             message_stopped = true;
-                                            let final_chunk = make_final_chunk(current_tool_calls.clone(), accumulated_usage.clone());
+                                            let final_chunk = make_final_chunk_with_reason(current_tool_calls.clone(), accumulated_usage.clone(), stop_reason.clone());
                                             if tx.send(Ok(final_chunk)).await.is_err() {
                                                 debug!("Receiver dropped, stopping stream");
                                             }
@@ -931,6 +942,8 @@ struct AnthropicStreamMessage {
 struct AnthropicDelta {
     text: Option<String>,
     partial_json: Option<String>,
+    #[serde(default)]
+    stop_reason: Option<String>,
 }
 
 #[derive(Debug, Deserialize)]
diff --git a/crates/g3-providers/src/databricks.rs b/crates/g3-providers/src/databricks.rs
index e7a23c7..bb25893 100644
--- a/crates/g3-providers/src/databricks.rs
+++ b/crates/g3-providers/src/databricks.rs
@@ -493,6 +493,7 @@ impl DatabricksProvider {
                                 finished: false,
                                 usage: None,
                                 tool_calls: None,
+                                stop_reason: None,
                             };
                             if tx.send(Ok(text_chunk)).await.is_err() {
                                 debug!("Receiver dropped");
diff --git a/crates/g3-providers/src/lib.rs b/crates/g3-providers/src/lib.rs
index 7a71785..a14ca03 100644
--- a/crates/g3-providers/src/lib.rs
+++ b/crates/g3-providers/src/lib.rs
@@ -187,6 +187,8 @@ pub struct CompletionChunk {
     pub finished: bool,
     pub tool_calls: Option<Vec<ToolCall>>,
     pub usage: Option<Usage>, // Add usage tracking for streaming
+    /// Stop reason from the API (e.g., "end_turn", "max_tokens", "stop_sequence")
+    pub stop_reason: Option<String>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
diff --git a/crates/g3-providers/src/openai.rs b/crates/g3-providers/src/openai.rs
index aa567ed..8dd05b0 100644
--- a/crates/g3-providers/src/openai.rs
+++ b/crates/g3-providers/src/openai.rs
@@ -157,6 +157,7 @@ impl OpenAIProvider {
                                         finished: true,
                                         tool_calls,
                                         usage: accumulated_usage.clone(),
+                                        stop_reason: None, // TODO: Extract from OpenAI response
                                     };
                                     let _ = tx.send(Ok(final_chunk)).await;
                                 }
diff --git a/crates/g3-providers/src/streaming.rs b/crates/g3-providers/src/streaming.rs
index 2295d61..15a3762 100644
--- a/crates/g3-providers/src/streaming.rs
+++ b/crates/g3-providers/src/streaming.rs
@@ -61,6 +61,22 @@ pub fn make_final_chunk(tool_calls: Vec<ToolCall>, usage: Option<Usage>) -> Comp
         } else {
             Some(tool_calls)
         },
+        stop_reason: None,
+    }
+}
+
+/// Create a final completion chunk with stop reason.
+pub fn make_final_chunk_with_reason(tool_calls: Vec<ToolCall>, usage: Option<Usage>, stop_reason: Option<String>) -> CompletionChunk {
+    CompletionChunk {
+        content: String::new(),
+        finished: true,
+        usage,
+        tool_calls: if tool_calls.is_empty() {
+            None
+        } else {
+            Some(tool_calls)
+        },
+        stop_reason,
     }
 }
 
@@ -71,6 +87,7 @@ pub fn make_text_chunk(content: String) -> CompletionChunk {
         finished: false,
         usage: None,
         tool_calls: None,
+        stop_reason: None,
     }
 }
 
@@ -81,5 +98,6 @@ pub fn make_tool_chunk(tool_calls: Vec<ToolCall>) -> CompletionChunk {
         finished: false,
         usage: None,
         tool_calls: Some(tool_calls),
+        stop_reason: None,
     }
 }