tool calling support for anthropic
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -732,6 +732,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"futures-util",
|
||||
"g3-config",
|
||||
"g3-execution",
|
||||
"g3-providers",
|
||||
|
||||
14
README.md
14
README.md
@@ -39,19 +39,19 @@ Create a configuration file at `~/.config/g3/config.toml`:
|
||||
|
||||
```toml
|
||||
[providers]
|
||||
default_provider = "openai"
|
||||
default_provider = "anthropic"
|
||||
|
||||
[providers.anthropic]
|
||||
api_key = "your-anthropic-api-key"
|
||||
model = "claude-3-5-sonnet-20241022"
|
||||
max_tokens = 4096
|
||||
temperature = 0.1
|
||||
|
||||
[providers.openai]
|
||||
api_key = "your-openai-api-key"
|
||||
model = "gpt-4"
|
||||
max_tokens = 2048
|
||||
temperature = 0.1
|
||||
|
||||
[providers.anthropic]
|
||||
api_key = "your-anthropic-api-key"
|
||||
model = "claude-3-sonnet-20240229"
|
||||
max_tokens = 2048
|
||||
temperature = 0.1
|
||||
```
|
||||
|
||||
### Local Embedded Models
|
||||
|
||||
@@ -58,7 +58,7 @@ impl Default for Config {
|
||||
openai: None,
|
||||
anthropic: None,
|
||||
embedded: None,
|
||||
default_provider: "openai".to_string(),
|
||||
default_provider: "anthropic".to_string(),
|
||||
},
|
||||
agent: AgentConfig {
|
||||
max_context_length: 8192,
|
||||
|
||||
@@ -21,3 +21,4 @@ tokio-stream = "0.1"
|
||||
llama_cpp = { version = "0.3.2", features = ["metal"] }
|
||||
shellexpand = "3.1"
|
||||
tokio-util = "0.7"
|
||||
futures-util = "0.3"
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::fs;
|
||||
use std::path::Path;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info, warn};
|
||||
use tracing::{error, info, warn, debug};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ToolCall {
|
||||
@@ -229,7 +229,9 @@ impl Agent {
|
||||
}
|
||||
|
||||
// Set default provider
|
||||
debug!("Setting default provider to: {}", config.providers.default_provider);
|
||||
providers.set_default(&config.providers.default_provider)?;
|
||||
debug!("Default provider set successfully");
|
||||
|
||||
// Determine context window size based on active provider
|
||||
let context_length = Self::determine_context_length(&config, &providers)?;
|
||||
@@ -364,8 +366,10 @@ impl Agent {
|
||||
|
||||
let _provider = self.providers.get(None)?;
|
||||
|
||||
// Only add system message if this is the first interaction (empty conversation history)
|
||||
if self.context_window.conversation_history.is_empty() {
|
||||
let system_prompt = format!(
|
||||
"You are G3, a general-purpose AI agent. Your goal is to analyze and solve problems step by step.
|
||||
"You are G3, a general-purpose AI agent. Your goal is to analyze and solve problems by writing code.
|
||||
|
||||
# Tool Call Format
|
||||
|
||||
@@ -381,18 +385,21 @@ The tool will execute immediately and you'll receive the result to continue with
|
||||
- Format: {{\"tool\": \"shell\", \"args\": {{\"command\": \"your_command_here\"}}}}
|
||||
- Example: {{\"tool\": \"shell\", \"args\": {{\"command\": \"ls ~/Downloads\"}}}}
|
||||
|
||||
- **final_output**: Signal task completion
|
||||
- **final_output**: Signal task completion with a summary of work done in markdown format
|
||||
- Format: {{\"tool\": \"final_output\", \"args\": {{\"summary\": \"what_was_accomplished\"}}}}
|
||||
|
||||
# Instructions
|
||||
|
||||
1. Break down tasks into small steps
|
||||
1. Analyze the request and break down into smaller tasks if appropriate
|
||||
2. Execute ONE tool at a time
|
||||
3. Wait for the result before proceeding
|
||||
4. Use the actual file paths on the system
|
||||
5. End with final_output when done
|
||||
3. STOP when the original request was satisfied
|
||||
4. End with final_output when done
|
||||
|
||||
# Response Guidelines
|
||||
|
||||
- Use Markdown formatting for all responses except tool calls.
|
||||
- Whenever calling tools, use the pronoun 'I'
|
||||
|
||||
Let's start with the first step of your task.
|
||||
");
|
||||
|
||||
if show_prompt {
|
||||
@@ -406,23 +413,25 @@ Let's start with the first step of your task.
|
||||
// Add system message to context window
|
||||
let system_message = Message {
|
||||
role: MessageRole::System,
|
||||
content: system_prompt.clone(),
|
||||
content: system_prompt,
|
||||
};
|
||||
self.context_window.add_message(system_message.clone());
|
||||
self.context_window.add_message(system_message);
|
||||
}
|
||||
|
||||
// Add user message to context window
|
||||
let user_message = Message {
|
||||
role: MessageRole::User,
|
||||
content: format!("Task: {}", description),
|
||||
};
|
||||
self.context_window.add_message(user_message.clone());
|
||||
self.context_window.add_message(user_message);
|
||||
|
||||
let messages = vec![system_message, user_message];
|
||||
// Use the complete conversation history for the request
|
||||
let messages = self.context_window.conversation_history.clone();
|
||||
|
||||
let request = CompletionRequest {
|
||||
messages,
|
||||
max_tokens: Some(2048),
|
||||
temperature: Some(0.2),
|
||||
temperature: Some(0.1),
|
||||
stream: true, // Enable streaming
|
||||
};
|
||||
|
||||
@@ -520,12 +529,15 @@ Let's start with the first step of your task.
|
||||
&self.context_window
|
||||
}
|
||||
|
||||
async fn stream_completion(&self, request: CompletionRequest) -> Result<(String, Duration)> {
|
||||
async fn stream_completion(
|
||||
&mut self,
|
||||
request: CompletionRequest,
|
||||
) -> Result<(String, Duration)> {
|
||||
self.stream_completion_with_tools(request).await
|
||||
}
|
||||
|
||||
async fn stream_completion_with_tools(
|
||||
&self,
|
||||
&mut self,
|
||||
mut request: CompletionRequest,
|
||||
) -> Result<(String, Duration)> {
|
||||
use std::io::{self, Write};
|
||||
@@ -587,8 +599,34 @@ Let's start with the first step of your task.
|
||||
first_token_time = Some(stream_start.elapsed());
|
||||
}
|
||||
|
||||
// Check for tool calls in the streaming content
|
||||
if let Some((tool_call, tool_end_pos)) = parser.add_chunk(&chunk.content) {
|
||||
// Check for tool calls - either from JSON parsing (embedded models)
|
||||
// or from native tool calls (Anthropic, OpenAI, etc.)
|
||||
let mut detected_tool_call = None;
|
||||
|
||||
// First check for native tool calls in the chunk
|
||||
if let Some(ref tool_calls) = chunk.tool_calls {
|
||||
debug!("Found native tool calls in chunk: {:?}", tool_calls);
|
||||
if let Some(first_tool) = tool_calls.first() {
|
||||
// Convert native tool call to our internal format
|
||||
detected_tool_call = Some((
|
||||
crate::ToolCall {
|
||||
tool: first_tool.tool.clone(),
|
||||
args: first_tool.args.clone(),
|
||||
},
|
||||
current_response.len(), // Position doesn't matter for native calls
|
||||
));
|
||||
debug!("Converted native tool call: {:?}", detected_tool_call);
|
||||
}
|
||||
} else {
|
||||
debug!("No native tool calls in chunk, chunk.tool_calls is None");
|
||||
}
|
||||
|
||||
// If no native tool calls, check for JSON tool calls in text (embedded models)
|
||||
if detected_tool_call.is_none() {
|
||||
detected_tool_call = parser.add_chunk(&chunk.content);
|
||||
}
|
||||
|
||||
if let Some((tool_call, tool_end_pos)) = detected_tool_call {
|
||||
// Found a complete tool call! Stop streaming and execute it
|
||||
let content_before_tool = parser.get_content_before_tool(tool_end_pos);
|
||||
|
||||
@@ -621,7 +659,7 @@ Let's start with the first step of your task.
|
||||
// Tool call header
|
||||
println!("┌─ {}", tool_call.tool);
|
||||
if let Some(args_obj) = tool_call.args.as_object() {
|
||||
for (key, value) in args_obj {
|
||||
for (_key, value) in args_obj {
|
||||
let value_str = match value {
|
||||
serde_json::Value::String(s) => s.clone(),
|
||||
_ => value.to_string(),
|
||||
@@ -664,7 +702,7 @@ Let's start with the first step of your task.
|
||||
print!("🤖 "); // Continue response indicator
|
||||
io::stdout().flush()?;
|
||||
|
||||
// Update the conversation with the tool call and result
|
||||
// Add the tool call and result to the context window immediately
|
||||
let tool_message = Message {
|
||||
role: MessageRole::Assistant,
|
||||
content: format!(
|
||||
@@ -679,8 +717,12 @@ Let's start with the first step of your task.
|
||||
content: format!("Tool result: {}", tool_result),
|
||||
};
|
||||
|
||||
//request.messages.push(tool_message);
|
||||
request.messages.push(result_message);
|
||||
// Add to context window for persistence
|
||||
self.context_window.add_message(tool_message);
|
||||
self.context_window.add_message(result_message);
|
||||
|
||||
// Update the request with the new context for next iteration
|
||||
request.messages = self.context_window.conversation_history.clone();
|
||||
|
||||
full_response.push_str(display_content);
|
||||
full_response.push_str(&format!(
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
use g3_providers::{LLMProvider, CompletionRequest, CompletionResponse, CompletionStream, CompletionChunk, Usage, Message, MessageRole};
|
||||
use g3_providers::{LLMProvider, CompletionRequest, CompletionResponse, CompletionStream, CompletionChunk, Usage, Message, MessageRole, ToolCall};
|
||||
use anyhow::Result;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{debug, error};
|
||||
use serde_json::Value;
|
||||
use tracing::{debug, error, info};
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tokio_stream::StreamExt;
|
||||
use futures_util::stream::Stream;
|
||||
use std::pin::Pin;
|
||||
|
||||
pub struct AnthropicProvider {
|
||||
client: Client,
|
||||
@@ -22,26 +26,68 @@ struct AnthropicRequest {
|
||||
max_tokens: Option<u32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
temperature: Option<f32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
tools: Option<Vec<AnthropicTool>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct AnthropicTool {
|
||||
name: String,
|
||||
description: String,
|
||||
input_schema: Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct AnthropicMessage {
|
||||
role: String,
|
||||
content: AnthropicMessageContent,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(untagged)]
|
||||
enum AnthropicMessageContent {
|
||||
Text(String),
|
||||
Blocks(Vec<AnthropicContentBlock>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(tag = "type")]
|
||||
enum AnthropicContentBlock {
|
||||
#[serde(rename = "text")]
|
||||
Text { text: String },
|
||||
#[serde(rename = "tool_use")]
|
||||
ToolUse {
|
||||
id: String,
|
||||
name: String,
|
||||
input: Value,
|
||||
},
|
||||
#[serde(rename = "tool_result")]
|
||||
ToolResult {
|
||||
tool_use_id: String,
|
||||
content: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AnthropicResponse {
|
||||
content: Vec<AnthropicContent>,
|
||||
content: Vec<AnthropicResponseContent>,
|
||||
usage: AnthropicUsage,
|
||||
model: String,
|
||||
#[serde(default)]
|
||||
stop_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AnthropicContent {
|
||||
#[serde(rename = "type")]
|
||||
content_type: String,
|
||||
text: String,
|
||||
#[serde(tag = "type")]
|
||||
enum AnthropicResponseContent {
|
||||
#[serde(rename = "text")]
|
||||
Text { text: String },
|
||||
#[serde(rename = "tool_use")]
|
||||
ToolUse {
|
||||
id: String,
|
||||
name: String,
|
||||
input: Value,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
@@ -50,6 +96,24 @@ struct AnthropicUsage {
|
||||
output_tokens: u32,
|
||||
}
|
||||
|
||||
// Streaming response structures
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AnthropicStreamEvent {
|
||||
#[serde(rename = "type")]
|
||||
event_type: String,
|
||||
#[serde(flatten)]
|
||||
data: Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AnthropicStreamDelta {
|
||||
#[serde(rename = "type")]
|
||||
delta_type: String,
|
||||
text: Option<String>,
|
||||
#[serde(flatten)]
|
||||
other: Value,
|
||||
}
|
||||
|
||||
impl AnthropicProvider {
|
||||
pub fn new(api_key: String, model: String) -> Result<Self> {
|
||||
let client = Client::new();
|
||||
@@ -68,15 +132,209 @@ impl AnthropicProvider {
|
||||
MessageRole::User => "user".to_string(),
|
||||
MessageRole::Assistant => "assistant".to_string(),
|
||||
},
|
||||
content: message.content.clone(),
|
||||
content: AnthropicMessageContent::Text(message.content.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_tools() -> Vec<AnthropicTool> {
|
||||
vec![
|
||||
AnthropicTool {
|
||||
name: "shell".to_string(),
|
||||
description: "Execute a shell command and return the output".to_string(),
|
||||
input_schema: serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"command": {
|
||||
"type": "string",
|
||||
"description": "The shell command to execute"
|
||||
}
|
||||
},
|
||||
"required": ["command"]
|
||||
}),
|
||||
},
|
||||
AnthropicTool {
|
||||
name: "final_output".to_string(),
|
||||
description: "Provide a final summary or output for the task".to_string(),
|
||||
input_schema: serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"summary": {
|
||||
"type": "string",
|
||||
"description": "A summary of what was accomplished"
|
||||
}
|
||||
},
|
||||
"required": ["summary"]
|
||||
}),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn extract_content_and_tools(&self, response: &AnthropicResponse) -> (String, Vec<(String, String, Value)>) {
|
||||
let mut text_content = String::new();
|
||||
let mut tool_calls = Vec::new();
|
||||
|
||||
for content in &response.content {
|
||||
match content {
|
||||
AnthropicResponseContent::Text { text } => {
|
||||
if !text_content.is_empty() {
|
||||
text_content.push('\n');
|
||||
}
|
||||
text_content.push_str(text);
|
||||
}
|
||||
AnthropicResponseContent::ToolUse { id, name, input } => {
|
||||
tool_calls.push((id.clone(), name.clone(), input.clone()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(text_content, tool_calls)
|
||||
}
|
||||
|
||||
async fn execute_tool(&self, tool_name: &str, input: &Value) -> Result<String> {
|
||||
match tool_name {
|
||||
"shell" => {
|
||||
if let Some(command) = input.get("command").and_then(|v| v.as_str()) {
|
||||
info!("Executing shell command via Anthropic tool: {}", command);
|
||||
|
||||
// Import the CodeExecutor from g3-execution
|
||||
use g3_execution::CodeExecutor;
|
||||
|
||||
let executor = CodeExecutor::new();
|
||||
match executor.execute_code("bash", command).await {
|
||||
Ok(result) => {
|
||||
if result.success {
|
||||
Ok(if result.stdout.is_empty() {
|
||||
"✅ Command executed successfully".to_string()
|
||||
} else {
|
||||
result.stdout
|
||||
})
|
||||
} else {
|
||||
Ok(format!("❌ Command failed: {}", result.stderr))
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Shell execution error: {}", e);
|
||||
Ok(format!("❌ Execution error: {}", e))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Ok("❌ Missing command argument".to_string())
|
||||
}
|
||||
}
|
||||
"final_output" => {
|
||||
if let Some(summary) = input.get("summary").and_then(|v| v.as_str()) {
|
||||
Ok(format!("📋 Final Output: {}", summary))
|
||||
} else {
|
||||
Ok("📋 Task completed".to_string())
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
error!("Unknown tool: {}", tool_name);
|
||||
Ok(format!("❓ Unknown tool: {}", tool_name))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn complete_with_tools(&self, request: CompletionRequest) -> Result<CompletionResponse> {
|
||||
// Separate system messages from other messages
|
||||
let mut system_content: Option<String> = None;
|
||||
let mut non_system_messages = Vec::new();
|
||||
|
||||
for message in &request.messages {
|
||||
match message.role {
|
||||
MessageRole::System => {
|
||||
// Combine multiple system messages if present
|
||||
if let Some(existing) = &system_content {
|
||||
system_content = Some(format!("{}\n\n{}", existing, message.content));
|
||||
} else {
|
||||
system_content = Some(message.content.clone());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
non_system_messages.push(self.convert_message(message));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let anthropic_request = AnthropicRequest {
|
||||
model: self.model.clone(),
|
||||
system: system_content,
|
||||
messages: non_system_messages,
|
||||
max_tokens: request.max_tokens,
|
||||
temperature: request.temperature,
|
||||
tools: Some(Self::create_tools()),
|
||||
};
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post("https://api.anthropic.com/v1/messages")
|
||||
.header("x-api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.header("anthropic-version", "2023-06-01")
|
||||
.json(&anthropic_request)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let error_text = response.text().await?;
|
||||
error!("Anthropic API error: {}", error_text);
|
||||
anyhow::bail!("Anthropic API error: {}", error_text);
|
||||
}
|
||||
|
||||
let anthropic_response: AnthropicResponse = response.json().await?;
|
||||
debug!("Anthropic response: {:?}", anthropic_response);
|
||||
|
||||
let (text_content, tool_calls) = self.extract_content_and_tools(&anthropic_response);
|
||||
|
||||
// For the completion API, we'll execute tools and return the combined result
|
||||
let final_content = if !tool_calls.is_empty() {
|
||||
info!("Anthropic response contains {} tool calls", tool_calls.len());
|
||||
|
||||
let mut content_with_tools = text_content.clone();
|
||||
for (_id, name, input) in tool_calls {
|
||||
// Execute the tool call
|
||||
let tool_result = match self.execute_tool(&name, &input).await {
|
||||
Ok(result) => result,
|
||||
Err(e) => format!("Error executing tool {}: {}", name, e),
|
||||
};
|
||||
|
||||
// Append tool execution info to content
|
||||
content_with_tools.push_str(&format!(
|
||||
"\n\nTool executed: {} -> {}\n",
|
||||
name, tool_result
|
||||
));
|
||||
}
|
||||
content_with_tools
|
||||
} else {
|
||||
text_content
|
||||
};
|
||||
|
||||
Ok(CompletionResponse {
|
||||
content: final_content,
|
||||
usage: Usage {
|
||||
prompt_tokens: anthropic_response.usage.input_tokens,
|
||||
completion_tokens: anthropic_response.usage.output_tokens,
|
||||
total_tokens: anthropic_response.usage.input_tokens + anthropic_response.usage.output_tokens,
|
||||
},
|
||||
model: anthropic_response.model,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl LLMProvider for AnthropicProvider {
|
||||
async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse> {
|
||||
debug!("Making Anthropic completion request");
|
||||
debug!("Making Anthropic completion request with tools");
|
||||
|
||||
// This is a simplified implementation - for full tool support,
|
||||
// we should use the streaming method with proper tool handling
|
||||
self.complete_with_tools(request).await
|
||||
}
|
||||
|
||||
async fn stream(&self, request: CompletionRequest) -> Result<CompletionStream> {
|
||||
debug!("Making Anthropic streaming request with tools");
|
||||
|
||||
let (tx, rx) = mpsc::channel(100);
|
||||
|
||||
// Separate system messages from other messages
|
||||
let mut system_content: Option<String> = None;
|
||||
@@ -104,58 +362,196 @@ impl LLMProvider for AnthropicProvider {
|
||||
messages: non_system_messages,
|
||||
max_tokens: request.max_tokens,
|
||||
temperature: request.temperature,
|
||||
tools: Some(Self::create_tools()),
|
||||
};
|
||||
|
||||
let response = self
|
||||
.client
|
||||
// Add stream parameter
|
||||
let mut request_json = serde_json::to_value(&anthropic_request)?;
|
||||
request_json["stream"] = serde_json::Value::Bool(true);
|
||||
|
||||
let client = self.client.clone();
|
||||
let api_key = self.api_key.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
debug!("Sending Anthropic streaming request with tools: {:?}", request_json);
|
||||
let response = client
|
||||
.post("https://api.anthropic.com/v1/messages")
|
||||
.header("x-api-key", &self.api_key)
|
||||
.header("x-api-key", &api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.header("anthropic-version", "2023-06-01")
|
||||
.json(&anthropic_request)
|
||||
.json(&request_json)
|
||||
.send()
|
||||
.await?;
|
||||
.await;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let error_text = response.text().await?;
|
||||
error!("Anthropic API error: {}", error_text);
|
||||
anyhow::bail!("Anthropic API error: {}", error_text);
|
||||
let response = match response {
|
||||
Ok(resp) => {
|
||||
if !resp.status().is_success() {
|
||||
let error_text = resp.text().await.unwrap_or_default();
|
||||
let _ = tx.send(Err(anyhow::anyhow!("Anthropic API error: {}", error_text))).await;
|
||||
return;
|
||||
}
|
||||
|
||||
let anthropic_response: AnthropicResponse = response.json().await?;
|
||||
|
||||
let content = anthropic_response
|
||||
.content
|
||||
.first()
|
||||
.map(|content| content.text.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
Ok(CompletionResponse {
|
||||
content,
|
||||
usage: Usage {
|
||||
prompt_tokens: anthropic_response.usage.input_tokens,
|
||||
completion_tokens: anthropic_response.usage.output_tokens,
|
||||
total_tokens: anthropic_response.usage.input_tokens + anthropic_response.usage.output_tokens,
|
||||
},
|
||||
model: anthropic_response.model,
|
||||
})
|
||||
resp
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = tx.send(Err(e.into())).await;
|
||||
return;
|
||||
}
|
||||
|
||||
async fn stream(&self, request: CompletionRequest) -> Result<CompletionStream> {
|
||||
debug!("Making Anthropic streaming request");
|
||||
|
||||
let (tx, rx) = mpsc::channel(100);
|
||||
|
||||
// For now, just send the complete response as a single chunk
|
||||
// In a real implementation, we'd handle Server-Sent Events
|
||||
let completion = self.complete(request).await?;
|
||||
|
||||
let chunk = CompletionChunk {
|
||||
content: completion.content,
|
||||
finished: true,
|
||||
};
|
||||
|
||||
tx.send(Ok(chunk)).await.map_err(|_| anyhow::anyhow!("Failed to send chunk"))?;
|
||||
// Handle Server-Sent Events
|
||||
let mut stream = response.bytes_stream();
|
||||
let mut buffer = String::new();
|
||||
let mut pending_tool_calls = Vec::new();
|
||||
|
||||
while let Some(chunk_result) = stream.next().await {
|
||||
let chunk = match chunk_result {
|
||||
Ok(bytes) => bytes,
|
||||
Err(e) => {
|
||||
let _ = tx.send(Err(e.into())).await;
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
let chunk_str = match std::str::from_utf8(&chunk) {
|
||||
Ok(s) => s,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
buffer.push_str(chunk_str);
|
||||
|
||||
// Process complete lines
|
||||
while let Some(line_end) = buffer.find('\n') {
|
||||
let line = buffer[..line_end].trim().to_string();
|
||||
buffer.drain(..line_end + 1);
|
||||
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse SSE format: "data: {...}"
|
||||
if let Some(data) = line.strip_prefix("data: ") {
|
||||
debug!("Raw SSE data: {}", data);
|
||||
if data == "[DONE]" {
|
||||
// Send any pending tool calls first
|
||||
if !pending_tool_calls.is_empty() {
|
||||
let tool_chunk = CompletionChunk {
|
||||
content: String::new(),
|
||||
finished: false,
|
||||
tool_calls: Some(pending_tool_calls.clone()),
|
||||
};
|
||||
let _ = tx.send(Ok(tool_chunk)).await;
|
||||
pending_tool_calls.clear();
|
||||
}
|
||||
|
||||
// Send final chunk
|
||||
let final_chunk = CompletionChunk {
|
||||
content: String::new(),
|
||||
finished: true,
|
||||
tool_calls: None,
|
||||
};
|
||||
let _ = tx.send(Ok(final_chunk)).await;
|
||||
break;
|
||||
}
|
||||
|
||||
// Parse the JSON event
|
||||
match serde_json::from_str::<AnthropicStreamEvent>(data) {
|
||||
Ok(event) => {
|
||||
debug!("Received Anthropic event: type={}, data={:?}", event.event_type, event.data);
|
||||
match event.event_type.as_str() {
|
||||
"content_block_start" => {
|
||||
// Check if this is a tool use block
|
||||
if let Some(content_block) = event.data.get("content_block") {
|
||||
if let Some(block_type) = content_block.get("type").and_then(|t| t.as_str()) {
|
||||
if block_type == "tool_use" {
|
||||
// Extract tool call information immediately
|
||||
if let (Some(id), Some(name), Some(input)) = (
|
||||
content_block.get("id").and_then(|v| v.as_str()),
|
||||
content_block.get("name").and_then(|v| v.as_str()),
|
||||
content_block.get("input")
|
||||
) {
|
||||
let tool_call = ToolCall {
|
||||
id: id.to_string(),
|
||||
tool: name.to_string(),
|
||||
args: input.clone(),
|
||||
};
|
||||
debug!("Added tool call from content_block_start: {:?}", tool_call);
|
||||
pending_tool_calls.push(tool_call);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"content_block_delta" => {
|
||||
// Extract text from delta
|
||||
if let Some(delta) = event.data.get("delta") {
|
||||
if let Some(text) = delta.get("text").and_then(|t| t.as_str()) {
|
||||
let chunk = CompletionChunk {
|
||||
content: text.to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
};
|
||||
if tx.send(Ok(chunk)).await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"content_block_stop" => {
|
||||
// Check if we have a complete tool use block
|
||||
if let Some(content_block) = event.data.get("content_block") {
|
||||
if let Some(block_type) = content_block.get("type").and_then(|t| t.as_str()) {
|
||||
if block_type == "tool_use" {
|
||||
// Extract tool call information
|
||||
if let (Some(id), Some(name), Some(input)) = (
|
||||
content_block.get("id").and_then(|v| v.as_str()),
|
||||
content_block.get("name").and_then(|v| v.as_str()),
|
||||
content_block.get("input")
|
||||
) {
|
||||
let tool_call = ToolCall {
|
||||
id: id.to_string(),
|
||||
tool: name.to_string(),
|
||||
args: input.clone(),
|
||||
};
|
||||
pending_tool_calls.push(tool_call);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
debug!("Content block finished");
|
||||
}
|
||||
"message_stop" => {
|
||||
// Send any pending tool calls first
|
||||
if !pending_tool_calls.is_empty() {
|
||||
let tool_chunk = CompletionChunk {
|
||||
content: String::new(),
|
||||
finished: false,
|
||||
tool_calls: Some(pending_tool_calls.clone()),
|
||||
};
|
||||
let _ = tx.send(Ok(tool_chunk)).await;
|
||||
}
|
||||
|
||||
// Message finished
|
||||
let final_chunk = CompletionChunk {
|
||||
content: String::new(),
|
||||
finished: true,
|
||||
tool_calls: None,
|
||||
};
|
||||
let _ = tx.send(Ok(final_chunk)).await;
|
||||
break;
|
||||
}
|
||||
_ => {
|
||||
debug!("Unhandled event type: {}", event.event_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Failed to parse streaming event: {} - Data: {}", e, data);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(ReceiverStream::new(rx))
|
||||
}
|
||||
@@ -167,4 +563,8 @@ impl LLMProvider for AnthropicProvider {
|
||||
fn model(&self) -> &str {
|
||||
&self.model
|
||||
}
|
||||
|
||||
fn has_native_tool_calling(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,12 +8,12 @@ use llama_cpp::{
|
||||
LlamaModel, LlamaParams, LlamaSession, SessionParams,
|
||||
};
|
||||
use std::path::Path;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use tracing::{debug, error, info};
|
||||
|
||||
pub struct EmbeddedProvider {
|
||||
model: Arc<LlamaModel>,
|
||||
@@ -129,6 +129,9 @@ impl EmbeddedProvider {
|
||||
debug!("Context calculation: prompt_tokens={}, context_length={}, available_tokens={}, dynamic_max_tokens={}",
|
||||
prompt_tokens, self.context_length, available_tokens, dynamic_max_tokens);
|
||||
|
||||
// Get stop sequences before entering the closure
|
||||
let stop_sequences = self.get_stop_sequences();
|
||||
|
||||
// Add timeout to the entire operation
|
||||
let timeout_duration = std::time::Duration::from_secs(30); // Increased timeout for larger contexts
|
||||
|
||||
@@ -202,8 +205,16 @@ impl EmbeddedProvider {
|
||||
}
|
||||
|
||||
// Stop on completion markers
|
||||
if generated_text.contains("</s>") || generated_text.contains("[/INST]") {
|
||||
debug!("Hit CodeLlama stop sequence at {} tokens", token_count);
|
||||
let mut hit_stop = false;
|
||||
for stop_seq in &stop_sequences {
|
||||
if generated_text.contains(stop_seq) {
|
||||
debug!("Hit stop sequence '{}' at {} tokens", stop_seq, token_count);
|
||||
hit_stop = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if hit_stop {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -213,7 +224,8 @@ impl EmbeddedProvider {
|
||||
token_count,
|
||||
start_time.elapsed()
|
||||
);
|
||||
Ok((generated_text.trim().to_string(), token_count))
|
||||
|
||||
Ok((generated_text, token_count))
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
@@ -226,7 +238,8 @@ impl EmbeddedProvider {
|
||||
"Completed generation: {} tokens (dynamic limit was {})",
|
||||
token_count, dynamic_max_tokens
|
||||
);
|
||||
Ok(text)
|
||||
// Clean stop sequences from the generated text after the closure
|
||||
Ok(self.clean_stop_sequences(&text))
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
},
|
||||
@@ -245,6 +258,78 @@ impl EmbeddedProvider {
|
||||
// This is conservative - actual tokenization might be different
|
||||
(text.len() as f32 / 4.0).ceil() as u32
|
||||
}
|
||||
|
||||
// Helper function to get stop sequences based on model type
|
||||
fn get_stop_sequences(&self) -> Vec<&'static str> {
|
||||
// Determine model type from model_name
|
||||
let model_name_lower = self.model_name.to_lowercase();
|
||||
|
||||
if model_name_lower.contains("codellama") || model_name_lower.contains("code-llama") {
|
||||
vec![
|
||||
"</s>", // End of sequence
|
||||
"[/INST]", // End of instruction
|
||||
"<</SYS>>", // End of system message
|
||||
"[INST]", // Start of new instruction (shouldn't appear in response)
|
||||
"<<SYS>>", // Start of system (shouldn't appear in response)
|
||||
]
|
||||
} else if model_name_lower.contains("llama") {
|
||||
vec![
|
||||
"</s>", // End of sequence
|
||||
"[/INST]", // End of instruction
|
||||
"<</SYS>>", // End of system message
|
||||
"### Human:", // Conversation format
|
||||
"### Assistant:", // Conversation format
|
||||
"[INST]", // Start of new instruction
|
||||
]
|
||||
} else if model_name_lower.contains("mistral") {
|
||||
vec![
|
||||
"</s>", // End of sequence
|
||||
"[/INST]", // End of instruction
|
||||
"<|im_end|>", // ChatML format
|
||||
]
|
||||
} else if model_name_lower.contains("vicuna") || model_name_lower.contains("wizard") {
|
||||
vec![
|
||||
"### Human:", // Conversation format
|
||||
"### Assistant:", // Conversation format
|
||||
"USER:", // Alternative format
|
||||
"ASSISTANT:", // Alternative format
|
||||
"</s>", // End of sequence
|
||||
]
|
||||
} else if model_name_lower.contains("alpaca") {
|
||||
vec![
|
||||
"### Instruction:", // Alpaca format
|
||||
"### Response:", // Alpaca format
|
||||
"### Input:", // Alpaca format
|
||||
"</s>", // End of sequence
|
||||
]
|
||||
} else {
|
||||
// Generic/unknown model - use common stop sequences
|
||||
vec![
|
||||
"</s>", // Most common end sequence
|
||||
"<|endoftext|>", // GPT-style
|
||||
"<|im_end|>", // ChatML
|
||||
"### Human:", // Common conversation format
|
||||
"### Assistant:", // Common conversation format
|
||||
"[/INST]", // Instruction format
|
||||
"<</SYS>>", // System format
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to clean up stop sequences from generated text
|
||||
fn clean_stop_sequences(&self, text: &str) -> String {
|
||||
let mut cleaned = text.to_string();
|
||||
let stop_sequences = self.get_stop_sequences();
|
||||
|
||||
for stop_seq in &stop_sequences {
|
||||
if let Some(pos) = cleaned.find(stop_seq) {
|
||||
cleaned.truncate(pos);
|
||||
break; // Only remove the first occurrence to avoid over-truncation
|
||||
}
|
||||
}
|
||||
|
||||
cleaned.trim().to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -334,6 +419,17 @@ impl LLMProvider for EmbeddedProvider {
|
||||
let mut accumulated_text = String::new();
|
||||
let mut token_count = 0;
|
||||
|
||||
// Get stop sequences dynamically based on model type
|
||||
// We need to create a temporary EmbeddedProvider instance to access the method
|
||||
// Since we can't access self in the spawned task, we'll use a static approach
|
||||
let stop_sequences = if prompt.contains("[INST]") || prompt.contains("<<SYS>>") {
|
||||
// Llama/CodeLlama format detected
|
||||
vec!["</s>", "[/INST]", "<</SYS>>", "[INST]", "<<SYS>>", "### Human:", "### Assistant:"]
|
||||
} else {
|
||||
// Generic format
|
||||
vec!["</s>", "<|endoftext|>", "<|im_end|>", "### Human:", "### Assistant:", "[/INST]", "<</SYS>>"]
|
||||
};
|
||||
|
||||
// Stream tokens with proper limits
|
||||
while let Some(token) = completion_handle.next_token() {
|
||||
let token_string = session.model().token_to_piece(token);
|
||||
@@ -341,36 +437,66 @@ impl LLMProvider for EmbeddedProvider {
|
||||
accumulated_text.push_str(&token_string);
|
||||
token_count += 1;
|
||||
|
||||
// Check if we've hit a stop sequence
|
||||
let mut hit_stop = false;
|
||||
for stop_seq in &stop_sequences {
|
||||
if accumulated_text.contains(stop_seq) {
|
||||
debug!("Hit stop sequence in streaming: {}", stop_seq);
|
||||
hit_stop = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if hit_stop {
|
||||
// Don't send the token that contains the stop sequence
|
||||
// Instead, send only the part before the stop sequence
|
||||
let mut clean_accumulated = accumulated_text.clone();
|
||||
for stop_seq in &stop_sequences {
|
||||
if let Some(pos) = clean_accumulated.find(stop_seq) {
|
||||
clean_accumulated.truncate(pos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate what part we haven't sent yet
|
||||
let already_sent_len = accumulated_text.len() - token_string.len();
|
||||
if clean_accumulated.len() > already_sent_len {
|
||||
let remaining_to_send = &clean_accumulated[already_sent_len..];
|
||||
if !remaining_to_send.is_empty() {
|
||||
let chunk = CompletionChunk {
|
||||
content: remaining_to_send.to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
};
|
||||
let _ = tx.blocking_send(Ok(chunk));
|
||||
}
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
// Normal token, send it
|
||||
let chunk = CompletionChunk {
|
||||
content: token_string.clone(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
};
|
||||
|
||||
if tx.blocking_send(Ok(chunk)).is_err() {
|
||||
break; // Receiver dropped
|
||||
}
|
||||
}
|
||||
|
||||
// Enforce token limit
|
||||
if token_count >= max_tokens as usize {
|
||||
debug!("Reached max token limit in streaming: {}", max_tokens);
|
||||
break;
|
||||
}
|
||||
|
||||
// Stop if we hit common stop sequences
|
||||
if accumulated_text.contains("### Human")
|
||||
|| accumulated_text.contains("### System")
|
||||
|| accumulated_text.contains("<|end|>")
|
||||
|| accumulated_text.contains("</s>")
|
||||
{
|
||||
debug!("Hit stop sequence in streaming, stopping generation");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Send final chunk
|
||||
let final_chunk = CompletionChunk {
|
||||
content: String::new(),
|
||||
finished: true,
|
||||
tool_calls: None,
|
||||
};
|
||||
let _ = tx.blocking_send(Ok(final_chunk));
|
||||
});
|
||||
|
||||
@@ -140,6 +140,7 @@ impl LLMProvider for OpenAIProvider {
|
||||
let chunk = CompletionChunk {
|
||||
content: completion.content,
|
||||
finished: true,
|
||||
tool_calls: None,
|
||||
};
|
||||
|
||||
tx.send(Ok(chunk)).await.map_err(|_| anyhow::anyhow!("Failed to send chunk"))?;
|
||||
|
||||
@@ -16,6 +16,11 @@ pub trait LLMProvider: Send + Sync {
|
||||
|
||||
/// Get the model name
|
||||
fn model(&self) -> &str;
|
||||
|
||||
/// Check if the provider supports native tool calling
|
||||
fn has_native_tool_calling(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -60,6 +65,14 @@ pub type CompletionStream = tokio_stream::wrappers::ReceiverStream<Result<Comple
|
||||
pub struct CompletionChunk {
|
||||
pub content: String,
|
||||
pub finished: bool,
|
||||
pub tool_calls: Option<Vec<ToolCall>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ToolCall {
|
||||
pub id: String,
|
||||
pub tool: String,
|
||||
pub args: serde_json::Value,
|
||||
}
|
||||
|
||||
/// Provider registry for managing multiple LLM providers
|
||||
|
||||
Reference in New Issue
Block a user