tool calling support for anthropic

2025-09-09 14:25:39 +10:00
parent 02d95e01a0
commit fa34755851
9 changed files with 705 additions and 121 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -732,6 +732,7 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-trait",
+ "futures-util",
 "g3-config",
 "g3-execution",
 "g3-providers",
--- a/README.md
+++ b/README.md
@@ -39,19 +39,19 @@ Create a configuration file at `~/.config/g3/config.toml`:

 ```toml
 [providers]
-default_provider = "openai"
+default_provider = "anthropic"
+
+[providers.anthropic]
+api_key = "your-anthropic-api-key"
+model = "claude-3-5-sonnet-20241022"
+max_tokens = 4096
+temperature = 0.1

 [providers.openai]
 api_key = "your-openai-api-key"
 model = "gpt-4"
 max_tokens = 2048
 temperature = 0.1
-
-[providers.anthropic]
-api_key = "your-anthropic-api-key"
-model = "claude-3-sonnet-20240229"
-max_tokens = 2048
-temperature = 0.1
 ```

 ### Local Embedded Models
--- a/crates/g3-config/src/lib.rs
+++ b/crates/g3-config/src/lib.rs
@@ -58,7 +58,7 @@ impl Default for Config {
                openai: None,
                anthropic: None,
                embedded: None,
-                default_provider: "openai".to_string(),
+                default_provider: "anthropic".to_string(),
            },
            agent: AgentConfig {
                max_context_length: 8192,
--- a/crates/g3-core/Cargo.toml
+++ b/crates/g3-core/Cargo.toml
@@ -21,3 +21,4 @@ tokio-stream = "0.1"
 llama_cpp = { version = "0.3.2", features = ["metal"] }
 shellexpand = "3.1"
 tokio-util = "0.7"
+futures-util = "0.3"
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -7,7 +7,7 @@ use std::fs;
 use std::path::Path;
 use std::time::{Duration, Instant};
 use tokio_util::sync::CancellationToken;
-use tracing::{error, info, warn};
+use tracing::{error, info, warn, debug};

 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ToolCall {
@@ -229,7 +229,9 @@ impl Agent {
        }

        // Set default provider
+        debug!("Setting default provider to: {}", config.providers.default_provider);
        providers.set_default(&config.providers.default_provider)?;
+        debug!("Default provider set successfully");

        // Determine context window size based on active provider
        let context_length = Self::determine_context_length(&config, &providers)?;
@@ -364,8 +366,10 @@ impl Agent {

        let _provider = self.providers.get(None)?;

+        // Only add system message if this is the first interaction (empty conversation history)
+        if self.context_window.conversation_history.is_empty() {
            let system_prompt = format!(
-            "You are G3, a general-purpose AI agent. Your goal is to analyze and solve problems step by step.
+                "You are G3, a general-purpose AI agent. Your goal is to analyze and solve problems by writing code.

 # Tool Call Format

@@ -381,18 +385,21 @@ The tool will execute immediately and you'll receive the result to continue with
  - Format: {{\"tool\": \"shell\", \"args\": {{\"command\": \"your_command_here\"}}}}
  - Example: {{\"tool\": \"shell\", \"args\": {{\"command\": \"ls ~/Downloads\"}}}}

- **final_output**: Signal task completion
+- **final_output**: Signal task completion with a summary of work done in markdown format
  - Format: {{\"tool\": \"final_output\", \"args\": {{\"summary\": \"what_was_accomplished\"}}}}

 # Instructions

-1. Break down tasks into small steps
+1. Analyze the request and break down into smaller tasks if appropriate
 2. Execute ONE tool at a time
-3. Wait for the result before proceeding
-4. Use the actual file paths on the system
-5. End with final_output when done
+3. STOP when the original request was satisfied
+4. End with final_output when done
+
+# Response Guidelines
+
+- Use Markdown formatting for all responses except tool calls.
+- Whenever calling tools, use the pronoun 'I'

-Let's start with the first step of your task.
 ");

            if show_prompt {
@@ -406,23 +413,25 @@ Let's start with the first step of your task.
            // Add system message to context window
            let system_message = Message {
                role: MessageRole::System,
-            content: system_prompt.clone(),
+                content: system_prompt,
            };
-        self.context_window.add_message(system_message.clone());
+            self.context_window.add_message(system_message);
+        }

        // Add user message to context window
        let user_message = Message {
            role: MessageRole::User,
            content: format!("Task: {}", description),
        };
-        self.context_window.add_message(user_message.clone());
+        self.context_window.add_message(user_message);

-        let messages = vec![system_message, user_message];
+        // Use the complete conversation history for the request
+        let messages = self.context_window.conversation_history.clone();

        let request = CompletionRequest {
            messages,
            max_tokens: Some(2048),
-            temperature: Some(0.2),
+            temperature: Some(0.1),
            stream: true, // Enable streaming
        };

@@ -520,12 +529,15 @@ Let's start with the first step of your task.
        &self.context_window
    }

-    async fn stream_completion(&self, request: CompletionRequest) -> Result<(String, Duration)> {
+    async fn stream_completion(
+        &mut self,
+        request: CompletionRequest,
+    ) -> Result<(String, Duration)> {
        self.stream_completion_with_tools(request).await
    }

    async fn stream_completion_with_tools(
-        &self,
+        &mut self,
        mut request: CompletionRequest,
    ) -> Result<(String, Duration)> {
        use std::io::{self, Write};
@@ -587,8 +599,34 @@ Let's start with the first step of your task.
                            first_token_time = Some(stream_start.elapsed());
                        }

-                        // Check for tool calls in the streaming content
-                        if let Some((tool_call, tool_end_pos)) = parser.add_chunk(&chunk.content) {
+                        // Check for tool calls - either from JSON parsing (embedded models) 
+                        // or from native tool calls (Anthropic, OpenAI, etc.)
+                        let mut detected_tool_call = None;
+                        
+                        // First check for native tool calls in the chunk
+                        if let Some(ref tool_calls) = chunk.tool_calls {
+                            debug!("Found native tool calls in chunk: {:?}", tool_calls);
+                            if let Some(first_tool) = tool_calls.first() {
+                                // Convert native tool call to our internal format
+                                detected_tool_call = Some((
+                                    crate::ToolCall {
+                                        tool: first_tool.tool.clone(),
+                                        args: first_tool.args.clone(),
+                                    },
+                                    current_response.len(), // Position doesn't matter for native calls
+                                ));
+                                debug!("Converted native tool call: {:?}", detected_tool_call);
+                            }
+                        } else {
+                            debug!("No native tool calls in chunk, chunk.tool_calls is None");
+                        }
+                        
+                        // If no native tool calls, check for JSON tool calls in text (embedded models)
+                        if detected_tool_call.is_none() {
+                            detected_tool_call = parser.add_chunk(&chunk.content);
+                        }
+                        
+                        if let Some((tool_call, tool_end_pos)) = detected_tool_call {
                            // Found a complete tool call! Stop streaming and execute it
                            let content_before_tool = parser.get_content_before_tool(tool_end_pos);

@@ -621,7 +659,7 @@ Let's start with the first step of your task.
                            // Tool call header
                            println!("┌─ {}", tool_call.tool);
                            if let Some(args_obj) = tool_call.args.as_object() {
-                                for (key, value) in args_obj {
+                                for (_key, value) in args_obj {
                                    let value_str = match value {
                                        serde_json::Value::String(s) => s.clone(),
                                        _ => value.to_string(),
@@ -664,7 +702,7 @@ Let's start with the first step of your task.
                            print!("🤖 "); // Continue response indicator
                            io::stdout().flush()?;

-                            // Update the conversation with the tool call and result
+                            // Add the tool call and result to the context window immediately
                            let tool_message = Message {
                                role: MessageRole::Assistant,
                                content: format!(
@@ -679,8 +717,12 @@ Let's start with the first step of your task.
                                content: format!("Tool result: {}", tool_result),
                            };

-                            //request.messages.push(tool_message);
-                            request.messages.push(result_message);
+                            // Add to context window for persistence
+                            self.context_window.add_message(tool_message);
+                            self.context_window.add_message(result_message);
+
+                            // Update the request with the new context for next iteration
+                            request.messages = self.context_window.conversation_history.clone();

                            full_response.push_str(display_content);
                            full_response.push_str(&format!(
--- a/crates/g3-core/src/providers/anthropic.rs
+++ b/crates/g3-core/src/providers/anthropic.rs
@@ -1,10 +1,14 @@
-use g3_providers::{LLMProvider, CompletionRequest, CompletionResponse, CompletionStream, CompletionChunk, Usage, Message, MessageRole};
+use g3_providers::{LLMProvider, CompletionRequest, CompletionResponse, CompletionStream, CompletionChunk, Usage, Message, MessageRole, ToolCall};
 use anyhow::Result;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
-use tracing::{debug, error};
+use serde_json::Value;
+use tracing::{debug, error, info};
 use tokio::sync::mpsc;
 use tokio_stream::wrappers::ReceiverStream;
+use tokio_stream::StreamExt;
+use futures_util::stream::Stream;
+use std::pin::Pin;

 pub struct AnthropicProvider {
    client: Client,
@@ -22,26 +26,68 @@ struct AnthropicRequest {
    max_tokens: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    temperature: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    tools: Option<Vec<AnthropicTool>>,
+}
+
+#[derive(Debug, Serialize)]
+struct AnthropicTool {
+    name: String,
+    description: String,
+    input_schema: Value,
 }

 #[derive(Debug, Serialize)]
 struct AnthropicMessage {
    role: String,
+    content: AnthropicMessageContent,
+}
+
+#[derive(Debug, Serialize)]
+#[serde(untagged)]
+enum AnthropicMessageContent {
+    Text(String),
+    Blocks(Vec<AnthropicContentBlock>),
+}
+
+#[derive(Debug, Serialize)]
+#[serde(tag = "type")]
+enum AnthropicContentBlock {
+    #[serde(rename = "text")]
+    Text { text: String },
+    #[serde(rename = "tool_use")]
+    ToolUse {
+        id: String,
+        name: String,
+        input: Value,
+    },
+    #[serde(rename = "tool_result")]
+    ToolResult {
+        tool_use_id: String,
        content: String,
+    },
 }

 #[derive(Debug, Deserialize)]
 struct AnthropicResponse {
-    content: Vec<AnthropicContent>,
+    content: Vec<AnthropicResponseContent>,
    usage: AnthropicUsage,
    model: String,
+    #[serde(default)]
+    stop_reason: Option<String>,
 }

 #[derive(Debug, Deserialize)]
-struct AnthropicContent {
-    #[serde(rename = "type")]
-    content_type: String,
-    text: String,
+#[serde(tag = "type")]
+enum AnthropicResponseContent {
+    #[serde(rename = "text")]
+    Text { text: String },
+    #[serde(rename = "tool_use")]
+    ToolUse {
+        id: String,
+        name: String,
+        input: Value,
+    },
 }

 #[derive(Debug, Deserialize)]
@@ -50,6 +96,24 @@ struct AnthropicUsage {
    output_tokens: u32,
 }

+// Streaming response structures
+#[derive(Debug, Deserialize)]
+struct AnthropicStreamEvent {
+    #[serde(rename = "type")]
+    event_type: String,
+    #[serde(flatten)]
+    data: Value,
+}
+
+#[derive(Debug, Deserialize)]
+struct AnthropicStreamDelta {
+    #[serde(rename = "type")]
+    delta_type: String,
+    text: Option<String>,
+    #[serde(flatten)]
+    other: Value,
+}
+
 impl AnthropicProvider {
    pub fn new(api_key: String, model: String) -> Result<Self> {
        let client = Client::new();
@@ -68,15 +132,209 @@ impl AnthropicProvider {
                MessageRole::User => "user".to_string(),
                MessageRole::Assistant => "assistant".to_string(),
            },
-            content: message.content.clone(),
+            content: AnthropicMessageContent::Text(message.content.clone()),
        }
    }
+
+    fn create_tools() -> Vec<AnthropicTool> {
+        vec![
+            AnthropicTool {
+                name: "shell".to_string(),
+                description: "Execute a shell command and return the output".to_string(),
+                input_schema: serde_json::json!({
+                    "type": "object",
+                    "properties": {
+                        "command": {
+                            "type": "string",
+                            "description": "The shell command to execute"
+                        }
+                    },
+                    "required": ["command"]
+                }),
+            },
+            AnthropicTool {
+                name: "final_output".to_string(),
+                description: "Provide a final summary or output for the task".to_string(),
+                input_schema: serde_json::json!({
+                    "type": "object",
+                    "properties": {
+                        "summary": {
+                            "type": "string",
+                            "description": "A summary of what was accomplished"
+                        }
+                    },
+                    "required": ["summary"]
+                }),
+            },
+        ]
+    }
+
+    fn extract_content_and_tools(&self, response: &AnthropicResponse) -> (String, Vec<(String, String, Value)>) {
+        let mut text_content = String::new();
+        let mut tool_calls = Vec::new();
+
+        for content in &response.content {
+            match content {
+                AnthropicResponseContent::Text { text } => {
+                    if !text_content.is_empty() {
+                        text_content.push('\n');
+                    }
+                    text_content.push_str(text);
+                }
+                AnthropicResponseContent::ToolUse { id, name, input } => {
+                    tool_calls.push((id.clone(), name.clone(), input.clone()));
+                }
+            }
+        }
+
+        (text_content, tool_calls)
+    }
+
+    async fn execute_tool(&self, tool_name: &str, input: &Value) -> Result<String> {
+        match tool_name {
+            "shell" => {
+                if let Some(command) = input.get("command").and_then(|v| v.as_str()) {
+                    info!("Executing shell command via Anthropic tool: {}", command);
+                    
+                    // Import the CodeExecutor from g3-execution
+                    use g3_execution::CodeExecutor;
+                    
+                    let executor = CodeExecutor::new();
+                    match executor.execute_code("bash", command).await {
+                        Ok(result) => {
+                            if result.success {
+                                Ok(if result.stdout.is_empty() {
+                                    "✅ Command executed successfully".to_string()
+                                } else {
+                                    result.stdout
+                                })
+                            } else {
+                                Ok(format!("❌ Command failed: {}", result.stderr))
+                            }
+                        }
+                        Err(e) => {
+                            error!("Shell execution error: {}", e);
+                            Ok(format!("❌ Execution error: {}", e))
+                        }
+                    }
+                } else {
+                    Ok("❌ Missing command argument".to_string())
+                }
+            }
+            "final_output" => {
+                if let Some(summary) = input.get("summary").and_then(|v| v.as_str()) {
+                    Ok(format!("📋 Final Output: {}", summary))
+                } else {
+                    Ok("📋 Task completed".to_string())
+                }
+            }
+            _ => {
+                error!("Unknown tool: {}", tool_name);
+                Ok(format!("❓ Unknown tool: {}", tool_name))
+            }
+        }
+    }
+
+    async fn complete_with_tools(&self, request: CompletionRequest) -> Result<CompletionResponse> {
+        // Separate system messages from other messages
+        let mut system_content: Option<String> = None;
+        let mut non_system_messages = Vec::new();
+        
+        for message in &request.messages {
+            match message.role {
+                MessageRole::System => {
+                    // Combine multiple system messages if present
+                    if let Some(existing) = &system_content {
+                        system_content = Some(format!("{}\n\n{}", existing, message.content));
+                    } else {
+                        system_content = Some(message.content.clone());
+                    }
+                }
+                _ => {
+                    non_system_messages.push(self.convert_message(message));
+                }
+            }
+        }
+        
+        let anthropic_request = AnthropicRequest {
+            model: self.model.clone(),
+            system: system_content,
+            messages: non_system_messages,
+            max_tokens: request.max_tokens,
+            temperature: request.temperature,
+            tools: Some(Self::create_tools()),
+        };
+        
+        let response = self
+            .client
+            .post("https://api.anthropic.com/v1/messages")
+            .header("x-api-key", &self.api_key)
+            .header("Content-Type", "application/json")
+            .header("anthropic-version", "2023-06-01")
+            .json(&anthropic_request)
+            .send()
+            .await?;
+        
+        if !response.status().is_success() {
+            let error_text = response.text().await?;
+            error!("Anthropic API error: {}", error_text);
+            anyhow::bail!("Anthropic API error: {}", error_text);
+        }
+        
+        let anthropic_response: AnthropicResponse = response.json().await?;
+        debug!("Anthropic response: {:?}", anthropic_response);
+        
+        let (text_content, tool_calls) = self.extract_content_and_tools(&anthropic_response);
+        
+        // For the completion API, we'll execute tools and return the combined result
+        let final_content = if !tool_calls.is_empty() {
+            info!("Anthropic response contains {} tool calls", tool_calls.len());
+            
+            let mut content_with_tools = text_content.clone();
+            for (_id, name, input) in tool_calls {
+                // Execute the tool call
+                let tool_result = match self.execute_tool(&name, &input).await {
+                    Ok(result) => result,
+                    Err(e) => format!("Error executing tool {}: {}", name, e),
+                };
+                
+                // Append tool execution info to content
+                content_with_tools.push_str(&format!(
+                    "\n\nTool executed: {} -> {}\n",
+                    name, tool_result
+                ));
+            }
+            content_with_tools
+        } else {
+            text_content
+        };
+        
+        Ok(CompletionResponse {
+            content: final_content,
+            usage: Usage {
+                prompt_tokens: anthropic_response.usage.input_tokens,
+                completion_tokens: anthropic_response.usage.output_tokens,
+                total_tokens: anthropic_response.usage.input_tokens + anthropic_response.usage.output_tokens,
+            },
+            model: anthropic_response.model,
+        })
+    }
 }

 #[async_trait::async_trait]
 impl LLMProvider for AnthropicProvider {
    async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse> {
-        debug!("Making Anthropic completion request");
+        debug!("Making Anthropic completion request with tools");
+        
+        // This is a simplified implementation - for full tool support,
+        // we should use the streaming method with proper tool handling
+        self.complete_with_tools(request).await
+    }
+    
+    async fn stream(&self, request: CompletionRequest) -> Result<CompletionStream> {
+        debug!("Making Anthropic streaming request with tools");
+        
+        let (tx, rx) = mpsc::channel(100);
        
        // Separate system messages from other messages
        let mut system_content: Option<String> = None;
@@ -104,58 +362,196 @@ impl LLMProvider for AnthropicProvider {
            messages: non_system_messages,
            max_tokens: request.max_tokens,
            temperature: request.temperature,
+            tools: Some(Self::create_tools()),
        };
        
-        let response = self
-            .client
+        // Add stream parameter
+        let mut request_json = serde_json::to_value(&anthropic_request)?;
+        request_json["stream"] = serde_json::Value::Bool(true);
+        
+        let client = self.client.clone();
+        let api_key = self.api_key.clone();
+        
+        tokio::spawn(async move {
+            debug!("Sending Anthropic streaming request with tools: {:?}", request_json);
+            let response = client
                .post("https://api.anthropic.com/v1/messages")
-            .header("x-api-key", &self.api_key)
+                .header("x-api-key", &api_key)
                .header("Content-Type", "application/json")
                .header("anthropic-version", "2023-06-01")
-            .json(&anthropic_request)
+                .json(&request_json)
                .send()
-            .await?;
+                .await;
                
-        if !response.status().is_success() {
-            let error_text = response.text().await?;
-            error!("Anthropic API error: {}", error_text);
-            anyhow::bail!("Anthropic API error: {}", error_text);
+            let response = match response {
+                Ok(resp) => {
+                    if !resp.status().is_success() {
+                        let error_text = resp.text().await.unwrap_or_default();
+                        let _ = tx.send(Err(anyhow::anyhow!("Anthropic API error: {}", error_text))).await;
+                        return;
                    }
-        
-        let anthropic_response: AnthropicResponse = response.json().await?;
-        
-        let content = anthropic_response
-            .content
-            .first()
-            .map(|content| content.text.clone())
-            .unwrap_or_default();
-        
-        Ok(CompletionResponse {
-            content,
-            usage: Usage {
-                prompt_tokens: anthropic_response.usage.input_tokens,
-                completion_tokens: anthropic_response.usage.output_tokens,
-                total_tokens: anthropic_response.usage.input_tokens + anthropic_response.usage.output_tokens,
-            },
-            model: anthropic_response.model,
-        })
+                    resp
+                }
+                Err(e) => {
+                    let _ = tx.send(Err(e.into())).await;
+                    return;
                }
-    
-    async fn stream(&self, request: CompletionRequest) -> Result<CompletionStream> {
-        debug!("Making Anthropic streaming request");
-        
-        let (tx, rx) = mpsc::channel(100);
-        
-        // For now, just send the complete response as a single chunk
-        // In a real implementation, we'd handle Server-Sent Events
-        let completion = self.complete(request).await?;
-        
-        let chunk = CompletionChunk {
-            content: completion.content,
-            finished: true,
            };
            
-        tx.send(Ok(chunk)).await.map_err(|_| anyhow::anyhow!("Failed to send chunk"))?;
+            // Handle Server-Sent Events
+            let mut stream = response.bytes_stream();
+            let mut buffer = String::new();
+            let mut pending_tool_calls = Vec::new();
+            
+            while let Some(chunk_result) = stream.next().await {
+                let chunk = match chunk_result {
+                    Ok(bytes) => bytes,
+                    Err(e) => {
+                        let _ = tx.send(Err(e.into())).await;
+                        break;
+                    }
+                };
+                
+                let chunk_str = match std::str::from_utf8(&chunk) {
+                    Ok(s) => s,
+                    Err(_) => continue,
+                };
+                
+                buffer.push_str(chunk_str);
+                
+                // Process complete lines
+                while let Some(line_end) = buffer.find('\n') {
+                    let line = buffer[..line_end].trim().to_string();
+                    buffer.drain(..line_end + 1);
+                    
+                    if line.is_empty() {
+                        continue;
+                    }
+                    
+                    // Parse SSE format: "data: {...}"
+                    if let Some(data) = line.strip_prefix("data: ") {
+                        debug!("Raw SSE data: {}", data);
+                        if data == "[DONE]" {
+                            // Send any pending tool calls first
+                            if !pending_tool_calls.is_empty() {
+                                let tool_chunk = CompletionChunk {
+                                    content: String::new(),
+                                    finished: false,
+                                    tool_calls: Some(pending_tool_calls.clone()),
+                                };
+                                let _ = tx.send(Ok(tool_chunk)).await;
+                                pending_tool_calls.clear();
+                            }
+                            
+                            // Send final chunk
+                            let final_chunk = CompletionChunk {
+                                content: String::new(),
+                                finished: true,
+                                tool_calls: None,
+                            };
+                            let _ = tx.send(Ok(final_chunk)).await;
+                            break;
+                        }
+                        
+                        // Parse the JSON event
+                        match serde_json::from_str::<AnthropicStreamEvent>(data) {
+                            Ok(event) => {
+                                debug!("Received Anthropic event: type={}, data={:?}", event.event_type, event.data);
+                                match event.event_type.as_str() {
+                                    "content_block_start" => {
+                                        // Check if this is a tool use block
+                                        if let Some(content_block) = event.data.get("content_block") {
+                                            if let Some(block_type) = content_block.get("type").and_then(|t| t.as_str()) {
+                                                if block_type == "tool_use" {
+                                                    // Extract tool call information immediately
+                                                    if let (Some(id), Some(name), Some(input)) = (
+                                                        content_block.get("id").and_then(|v| v.as_str()),
+                                                        content_block.get("name").and_then(|v| v.as_str()),
+                                                        content_block.get("input")
+                                                    ) {
+                                                        let tool_call = ToolCall {
+                                                            id: id.to_string(),
+                                                            tool: name.to_string(),
+                                                            args: input.clone(),
+                                                        };
+                                                        debug!("Added tool call from content_block_start: {:?}", tool_call);
+                                                        pending_tool_calls.push(tool_call);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                    "content_block_delta" => {
+                                        // Extract text from delta
+                                        if let Some(delta) = event.data.get("delta") {
+                                            if let Some(text) = delta.get("text").and_then(|t| t.as_str()) {
+                                                let chunk = CompletionChunk {
+                                                    content: text.to_string(),
+                                                    finished: false,
+                                                    tool_calls: None,
+                                                };
+                                                if tx.send(Ok(chunk)).await.is_err() {
+                                                    break;
+                                                }
+                                            }
+                                        }
+                                    }
+                                    "content_block_stop" => {
+                                        // Check if we have a complete tool use block
+                                        if let Some(content_block) = event.data.get("content_block") {
+                                            if let Some(block_type) = content_block.get("type").and_then(|t| t.as_str()) {
+                                                if block_type == "tool_use" {
+                                                    // Extract tool call information
+                                                    if let (Some(id), Some(name), Some(input)) = (
+                                                        content_block.get("id").and_then(|v| v.as_str()),
+                                                        content_block.get("name").and_then(|v| v.as_str()),
+                                                        content_block.get("input")
+                                                    ) {
+                                                        let tool_call = ToolCall {
+                                                            id: id.to_string(),
+                                                            tool: name.to_string(),
+                                                            args: input.clone(),
+                                                        };
+                                                        pending_tool_calls.push(tool_call);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                        debug!("Content block finished");
+                                    }
+                                    "message_stop" => {
+                                        // Send any pending tool calls first
+                                        if !pending_tool_calls.is_empty() {
+                                            let tool_chunk = CompletionChunk {
+                                                content: String::new(),
+                                                finished: false,
+                                                tool_calls: Some(pending_tool_calls.clone()),
+                                            };
+                                            let _ = tx.send(Ok(tool_chunk)).await;
+                                        }
+                                        
+                                        // Message finished
+                                        let final_chunk = CompletionChunk {
+                                            content: String::new(),
+                                            finished: true,
+                                            tool_calls: None,
+                                        };
+                                        let _ = tx.send(Ok(final_chunk)).await;
+                                        break;
+                                    }
+                                    _ => {
+                                        debug!("Unhandled event type: {}", event.event_type);
+                                    }
+                                }
+                            }
+                            Err(e) => {
+                                debug!("Failed to parse streaming event: {} - Data: {}", e, data);
+                            }
+                        }
+                    }
+                }
+            }
+        });
        
        Ok(ReceiverStream::new(rx))
    }
@@ -167,4 +563,8 @@ impl LLMProvider for AnthropicProvider {
    fn model(&self) -> &str {
        &self.model
    }
+    
+    fn has_native_tool_calling(&self) -> bool {
+        true
+    }
 }
--- a/crates/g3-core/src/providers/embedded.rs
+++ b/crates/g3-core/src/providers/embedded.rs
@@ -8,12 +8,12 @@ use llama_cpp::{
    LlamaModel, LlamaParams, LlamaSession, SessionParams,
 };
 use std::path::Path;
-use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::atomic::AtomicBool;
 use std::sync::Arc;
 use tokio::sync::mpsc;
 use tokio::sync::Mutex;
 use tokio_stream::wrappers::ReceiverStream;
-use tracing::{debug, error, info, warn};
+use tracing::{debug, error, info};

 pub struct EmbeddedProvider {
    model: Arc<LlamaModel>,
@@ -129,6 +129,9 @@ impl EmbeddedProvider {
        debug!("Context calculation: prompt_tokens={}, context_length={}, available_tokens={}, dynamic_max_tokens={}",
               prompt_tokens, self.context_length, available_tokens, dynamic_max_tokens);

+        // Get stop sequences before entering the closure
+        let stop_sequences = self.get_stop_sequences();
+
        // Add timeout to the entire operation
        let timeout_duration = std::time::Duration::from_secs(30); // Increased timeout for larger contexts

@@ -202,8 +205,16 @@ impl EmbeddedProvider {
                    }

                    // Stop on completion markers
-                    if generated_text.contains("</s>") || generated_text.contains("[/INST]") {
-                        debug!("Hit CodeLlama stop sequence at {} tokens", token_count);
+                    let mut hit_stop = false;
+                    for stop_seq in &stop_sequences {
+                        if generated_text.contains(stop_seq) {
+                            debug!("Hit stop sequence '{}' at {} tokens", stop_seq, token_count);
+                            hit_stop = true;
+                            break;
+                        }
+                    }
+                    
+                    if hit_stop {
                        break;
                    }
                }
@@ -213,7 +224,8 @@ impl EmbeddedProvider {
                    token_count,
                    start_time.elapsed()
                );
-                Ok((generated_text.trim().to_string(), token_count))
+                
+                Ok((generated_text, token_count))
            }),
        )
        .await;
@@ -226,7 +238,8 @@ impl EmbeddedProvider {
                            "Completed generation: {} tokens (dynamic limit was {})",
                            token_count, dynamic_max_tokens
                        );
-                        Ok(text)
+                        // Clean stop sequences from the generated text after the closure
+                        Ok(self.clean_stop_sequences(&text))
                    }
                    Err(e) => Err(e),
                },
@@ -245,6 +258,78 @@ impl EmbeddedProvider {
        // This is conservative - actual tokenization might be different
        (text.len() as f32 / 4.0).ceil() as u32
    }
+
+    // Helper function to get stop sequences based on model type
+    fn get_stop_sequences(&self) -> Vec<&'static str> {
+        // Determine model type from model_name
+        let model_name_lower = self.model_name.to_lowercase();
+        
+        if model_name_lower.contains("codellama") || model_name_lower.contains("code-llama") {
+            vec![
+                "</s>",           // End of sequence
+                "[/INST]",        // End of instruction  
+                "<</SYS>>",       // End of system message
+                "[INST]",         // Start of new instruction (shouldn't appear in response)
+                "<<SYS>>",        // Start of system (shouldn't appear in response)
+            ]
+        } else if model_name_lower.contains("llama") {
+            vec![
+                "</s>",           // End of sequence
+                "[/INST]",        // End of instruction
+                "<</SYS>>",       // End of system message
+                "### Human:",     // Conversation format
+                "### Assistant:", // Conversation format
+                "[INST]",         // Start of new instruction
+            ]
+        } else if model_name_lower.contains("mistral") {
+            vec![
+                "</s>",           // End of sequence
+                "[/INST]",        // End of instruction
+                "<|im_end|>",     // ChatML format
+            ]
+        } else if model_name_lower.contains("vicuna") || model_name_lower.contains("wizard") {
+            vec![
+                "### Human:",     // Conversation format
+                "### Assistant:", // Conversation format
+                "USER:",          // Alternative format
+                "ASSISTANT:",     // Alternative format
+                "</s>",           // End of sequence
+            ]
+        } else if model_name_lower.contains("alpaca") {
+            vec![
+                "### Instruction:", // Alpaca format
+                "### Response:",    // Alpaca format
+                "### Input:",       // Alpaca format
+                "</s>",            // End of sequence
+            ]
+        } else {
+            // Generic/unknown model - use common stop sequences
+            vec![
+                "</s>",           // Most common end sequence
+                "<|endoftext|>",  // GPT-style
+                "<|im_end|>",     // ChatML
+                "### Human:",     // Common conversation format
+                "### Assistant:", // Common conversation format
+                "[/INST]",        // Instruction format
+                "<</SYS>>",       // System format
+            ]
+        }
+    }
+
+    // Helper function to clean up stop sequences from generated text
+    fn clean_stop_sequences(&self, text: &str) -> String {
+        let mut cleaned = text.to_string();
+        let stop_sequences = self.get_stop_sequences();
+        
+        for stop_seq in &stop_sequences {
+            if let Some(pos) = cleaned.find(stop_seq) {
+                cleaned.truncate(pos);
+                break; // Only remove the first occurrence to avoid over-truncation
+            }
+        }
+        
+        cleaned.trim().to_string()
+    }
 }

 #[async_trait::async_trait]
@@ -334,6 +419,17 @@ impl LLMProvider for EmbeddedProvider {
            let mut accumulated_text = String::new();
            let mut token_count = 0;
            
+            // Get stop sequences dynamically based on model type
+            // We need to create a temporary EmbeddedProvider instance to access the method
+            // Since we can't access self in the spawned task, we'll use a static approach
+            let stop_sequences = if prompt.contains("[INST]") || prompt.contains("<<SYS>>") {
+                // Llama/CodeLlama format detected
+                vec!["</s>", "[/INST]", "<</SYS>>", "[INST]", "<<SYS>>", "### Human:", "### Assistant:"]
+            } else {
+                // Generic format
+                vec!["</s>", "<|endoftext|>", "<|im_end|>", "### Human:", "### Assistant:", "[/INST]", "<</SYS>>"]
+            };
+
            // Stream tokens with proper limits
            while let Some(token) = completion_handle.next_token() {
                let token_string = session.model().token_to_piece(token);
@@ -341,36 +437,66 @@ impl LLMProvider for EmbeddedProvider {
                accumulated_text.push_str(&token_string);
                token_count += 1;

+                // Check if we've hit a stop sequence
+                let mut hit_stop = false;
+                for stop_seq in &stop_sequences {
+                    if accumulated_text.contains(stop_seq) {
+                        debug!("Hit stop sequence in streaming: {}", stop_seq);
+                        hit_stop = true;
+                        break;
+                    }
+                }
+
+                if hit_stop {
+                    // Don't send the token that contains the stop sequence
+                    // Instead, send only the part before the stop sequence
+                    let mut clean_accumulated = accumulated_text.clone();
+                    for stop_seq in &stop_sequences {
+                        if let Some(pos) = clean_accumulated.find(stop_seq) {
+                            clean_accumulated.truncate(pos);
+                            break;
+                        }
+                    }
+                    
+                    // Calculate what part we haven't sent yet
+                    let already_sent_len = accumulated_text.len() - token_string.len();
+                    if clean_accumulated.len() > already_sent_len {
+                        let remaining_to_send = &clean_accumulated[already_sent_len..];
+                        if !remaining_to_send.is_empty() {
+                            let chunk = CompletionChunk {
+                                content: remaining_to_send.to_string(),
+                                finished: false,
+                                tool_calls: None,
+                            };
+                            let _ = tx.blocking_send(Ok(chunk));
+                        }
+                    }
+                    break;
+                } else {
+                    // Normal token, send it
                    let chunk = CompletionChunk {
                        content: token_string.clone(),
                        finished: false,
+                        tool_calls: None,
                    };

                    if tx.blocking_send(Ok(chunk)).is_err() {
                        break; // Receiver dropped
                    }
+                }

                // Enforce token limit
                if token_count >= max_tokens as usize {
                    debug!("Reached max token limit in streaming: {}", max_tokens);
                    break;
                }
-
-                // Stop if we hit common stop sequences
-                if accumulated_text.contains("### Human")
-                    || accumulated_text.contains("### System")
-                    || accumulated_text.contains("<|end|>")
-                    || accumulated_text.contains("</s>")
-                {
-                    debug!("Hit stop sequence in streaming, stopping generation");
-                    break;
-                }
            }

            // Send final chunk
            let final_chunk = CompletionChunk {
                content: String::new(),
                finished: true,
+                tool_calls: None,
            };
            let _ = tx.blocking_send(Ok(final_chunk));
        });
--- a/crates/g3-core/src/providers/openai.rs
+++ b/crates/g3-core/src/providers/openai.rs
@@ -140,6 +140,7 @@ impl LLMProvider for OpenAIProvider {
        let chunk = CompletionChunk {
            content: completion.content,
            finished: true,
+            tool_calls: None,
        };
        
        tx.send(Ok(chunk)).await.map_err(|_| anyhow::anyhow!("Failed to send chunk"))?;
--- a/crates/g3-providers/src/lib.rs
+++ b/crates/g3-providers/src/lib.rs
@@ -16,6 +16,11 @@ pub trait LLMProvider: Send + Sync {
    
    /// Get the model name
    fn model(&self) -> &str;
+    
+    /// Check if the provider supports native tool calling
+    fn has_native_tool_calling(&self) -> bool {
+        false
+    }
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -60,6 +65,14 @@ pub type CompletionStream = tokio_stream::wrappers::ReceiverStream<Result<Comple
 pub struct CompletionChunk {
    pub content: String,
    pub finished: bool,
+    pub tool_calls: Option<Vec<ToolCall>>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ToolCall {
+    pub id: String,
+    pub tool: String,
+    pub args: serde_json::Value,
 }

 /// Provider registry for managing multiple LLM providers