Add comprehensive MockProvider integration tests
Added 6 new integration tests for stream_completion_with_tools: - test_text_before_tool_call_preserved: text before native tool call is saved - test_native_tool_call_execution: native tool calls execute correctly - test_duplicate_tool_calls_skipped: sequential duplicates are detected - test_json_fallback_tool_calling: JSON tool calls work without native support - test_text_after_tool_execution_preserved: follow-up text is saved - test_multiple_tool_calls_executed: multiple tool calls in sequence work Also added MockResponse helper methods: - text_then_native_tool(): text followed by native tool call - duplicate_native_tool_calls(): same tool call twice (for dedup testing) Fixed text_with_json_tool() to ensure "tool" key comes before "args" (serde_json alphabetizes keys, breaking pattern detection). Total: 18 integration tests covering historical bugs and core behaviors.
This commit is contained in:
@@ -462,3 +462,186 @@ async fn test_streaming_no_chunk_duplication() {
|
||||
content
|
||||
);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Tool Execution Tests
|
||||
// =============================================================================
|
||||
|
||||
/// Test: Text before a tool call is preserved in context
|
||||
///
|
||||
/// When the LLM outputs text followed by a tool call, both should be preserved.
|
||||
#[tokio::test]
|
||||
async fn test_text_before_tool_call_preserved() {
|
||||
let provider = MockProvider::new()
|
||||
.with_native_tool_calling(true)
|
||||
.with_response(MockResponse::text_then_native_tool(
|
||||
"Let me check that for you.",
|
||||
"shell",
|
||||
serde_json::json!({"command": "echo hello"}),
|
||||
))
|
||||
.with_default_response(MockResponse::text("Done!"));
|
||||
|
||||
let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
|
||||
|
||||
let result = agent.execute_task("Run a command", None, false).await;
|
||||
assert!(result.is_ok(), "Task should succeed: {:?}", result.err());
|
||||
|
||||
// Find the assistant message that contains the pre-tool text
|
||||
let history = &agent.get_context_window().conversation_history;
|
||||
let has_pre_tool_text = history
|
||||
.iter()
|
||||
.any(|m| matches!(m.role, MessageRole::Assistant) && m.content.contains("check that for you"));
|
||||
|
||||
assert!(has_pre_tool_text, "Pre-tool text should be preserved in context");
|
||||
}
|
||||
|
||||
/// Test: Native tool calls are executed correctly
|
||||
#[tokio::test]
|
||||
async fn test_native_tool_call_execution() {
|
||||
let provider = MockProvider::new()
|
||||
.with_native_tool_calling(true)
|
||||
.with_response(MockResponse::native_tool_call(
|
||||
"shell",
|
||||
serde_json::json!({"command": "echo test_output"}),
|
||||
))
|
||||
.with_default_response(MockResponse::text("Command executed successfully."));
|
||||
|
||||
let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
|
||||
|
||||
let result = agent.execute_task("Run echo", None, false).await;
|
||||
assert!(result.is_ok(), "Task should succeed: {:?}", result.err());
|
||||
|
||||
// Check that tool result is in context
|
||||
let history = &agent.get_context_window().conversation_history;
|
||||
let has_tool_result = history
|
||||
.iter()
|
||||
.any(|m| matches!(m.role, MessageRole::User) && m.content.contains("Tool result:") && m.content.contains("test_output"));
|
||||
|
||||
assert!(has_tool_result, "Tool result should be in context");
|
||||
}
|
||||
|
||||
/// Test: Duplicate sequential tool calls are skipped
|
||||
///
|
||||
/// When the LLM emits the same tool call twice in a row, only one should execute.
|
||||
#[tokio::test]
|
||||
async fn test_duplicate_tool_calls_skipped() {
|
||||
// This test uses native tool calling with duplicate tool calls
|
||||
let provider = MockProvider::new()
|
||||
.with_native_tool_calling(true)
|
||||
.with_response(MockResponse::duplicate_native_tool_calls(
|
||||
"shell",
|
||||
serde_json::json!({"command": "echo duplicate_test"}),
|
||||
))
|
||||
.with_default_response(MockResponse::text("Done."));
|
||||
|
||||
let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
|
||||
|
||||
let result = agent.execute_task("Run command", None, false).await;
|
||||
assert!(result.is_ok(), "Task should succeed: {:?}", result.err());
|
||||
|
||||
// Count tool results - should only have one
|
||||
let history = &agent.get_context_window().conversation_history;
|
||||
let tool_result_count = history
|
||||
.iter()
|
||||
.filter(|m| matches!(m.role, MessageRole::User) && m.content.contains("Tool result:") && m.content.contains("duplicate_test"))
|
||||
.count();
|
||||
|
||||
assert_eq!(tool_result_count, 1, "Duplicate tool call should be skipped, got {} results", tool_result_count);
|
||||
}
|
||||
|
||||
/// Test: JSON fallback tool calling works when provider doesn't support native
|
||||
///
|
||||
/// When the provider doesn't have native tool calling, the agent should
|
||||
/// detect JSON tool calls in the text content.
|
||||
#[tokio::test]
|
||||
async fn test_json_fallback_tool_calling() {
|
||||
// Provider WITHOUT native tool calling - uses JSON fallback
|
||||
let provider = MockProvider::new()
|
||||
.with_native_tool_calling(false)
|
||||
.with_response(MockResponse::text_with_json_tool(
|
||||
"Let me run that command.",
|
||||
"shell",
|
||||
serde_json::json!({"command": "echo json_fallback_test"}),
|
||||
))
|
||||
.with_default_response(MockResponse::text("Command completed."));
|
||||
|
||||
let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
|
||||
|
||||
let result = agent.execute_task("Run a command", None, false).await;
|
||||
assert!(result.is_ok(), "Task should succeed: {:?}", result.err());
|
||||
|
||||
// Check that tool result is in context (proves JSON was parsed and executed)
|
||||
let history = &agent.get_context_window().conversation_history;
|
||||
let has_tool_result = history
|
||||
.iter()
|
||||
.any(|m| matches!(m.role, MessageRole::User) && m.content.contains("Tool result:") && m.content.contains("json_fallback_test"));
|
||||
|
||||
assert!(has_tool_result, "JSON fallback tool should have been executed");
|
||||
}
|
||||
|
||||
/// Test: Text after tool execution is preserved
|
||||
///
|
||||
/// When the LLM outputs text after a tool is executed (in the follow-up response),
|
||||
/// that text should be preserved in context.
|
||||
#[tokio::test]
|
||||
async fn test_text_after_tool_execution_preserved() {
|
||||
let provider = MockProvider::new()
|
||||
.with_native_tool_calling(true)
|
||||
.with_response(MockResponse::native_tool_call(
|
||||
"shell",
|
||||
serde_json::json!({"command": "echo hello"}),
|
||||
))
|
||||
// The follow-up response after tool execution
|
||||
.with_response(MockResponse::text("The command ran successfully and output 'hello'."));
|
||||
|
||||
let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
|
||||
|
||||
let result = agent.execute_task("Run echo hello", None, false).await;
|
||||
assert!(result.is_ok(), "Task should succeed: {:?}", result.err());
|
||||
|
||||
// Check that the follow-up text is in context
|
||||
let history = &agent.get_context_window().conversation_history;
|
||||
let has_followup = history
|
||||
.iter()
|
||||
.any(|m| matches!(m.role, MessageRole::Assistant) && m.content.contains("ran successfully"));
|
||||
|
||||
assert!(has_followup, "Follow-up text after tool execution should be preserved");
|
||||
}
|
||||
|
||||
/// Test: Multiple different tool calls in sequence
|
||||
///
|
||||
/// When the LLM makes multiple tool calls, they should all be executed.
|
||||
#[tokio::test]
|
||||
async fn test_multiple_tool_calls_executed() {
|
||||
// First response: tool call 1
|
||||
// Second response: tool call 2
|
||||
// Third response: final text
|
||||
let provider = MockProvider::new()
|
||||
.with_native_tool_calling(true)
|
||||
.with_response(MockResponse::native_tool_call(
|
||||
"shell",
|
||||
serde_json::json!({"command": "echo first_tool"}),
|
||||
))
|
||||
.with_response(MockResponse::native_tool_call(
|
||||
"shell",
|
||||
serde_json::json!({"command": "echo second_tool"}),
|
||||
))
|
||||
.with_response(MockResponse::text("Both commands completed."));
|
||||
|
||||
let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
|
||||
|
||||
let result = agent.execute_task("Run two commands", None, false).await;
|
||||
assert!(result.is_ok(), "Task should succeed: {:?}", result.err());
|
||||
|
||||
// Check that both tool results are in context
|
||||
let history = &agent.get_context_window().conversation_history;
|
||||
let first_result = history
|
||||
.iter()
|
||||
.any(|m| matches!(m.role, MessageRole::User) && m.content.contains("first_tool"));
|
||||
let second_result = history
|
||||
.iter()
|
||||
.any(|m| matches!(m.role, MessageRole::User) && m.content.contains("second_tool"));
|
||||
|
||||
assert!(first_result, "First tool result should be in context");
|
||||
assert!(second_result, "Second tool result should be in context");
|
||||
}
|
||||
|
||||
@@ -159,13 +159,49 @@ impl MockResponse {
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a response with text followed by a native tool call
|
||||
pub fn text_then_native_tool(text: &str, tool: &str, args: serde_json::Value) -> Self {
|
||||
Self {
|
||||
chunks: vec![
|
||||
MockChunk::content(text),
|
||||
MockChunk::tool_streaming(tool),
|
||||
MockChunk::tool_call(tool, args),
|
||||
MockChunk::finished("tool_use"),
|
||||
],
|
||||
usage: Usage {
|
||||
prompt_tokens: 100,
|
||||
completion_tokens: 50 + text.len() as u32 / 4,
|
||||
total_tokens: 150 + text.len() as u32 / 4,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a response with duplicate native tool calls (same tool called twice)
|
||||
/// Used to test duplicate detection
|
||||
pub fn duplicate_native_tool_calls(tool: &str, args: serde_json::Value) -> Self {
|
||||
Self {
|
||||
chunks: vec![
|
||||
MockChunk::tool_streaming(tool),
|
||||
MockChunk::tool_call(tool, args.clone()),
|
||||
// Second identical tool call
|
||||
MockChunk::tool_streaming(tool),
|
||||
MockChunk::tool_call(tool, args),
|
||||
MockChunk::finished("tool_use"),
|
||||
],
|
||||
usage: Usage {
|
||||
prompt_tokens: 100,
|
||||
completion_tokens: 100,
|
||||
total_tokens: 200,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a response with text followed by a JSON tool call (non-native)
|
||||
pub fn text_with_json_tool(text: &str, tool: &str, args: serde_json::Value) -> Self {
|
||||
let tool_json = serde_json::json!({
|
||||
"tool": tool,
|
||||
"args": args
|
||||
});
|
||||
let tool_str = serde_json::to_string(&tool_json).unwrap();
|
||||
// Manually construct JSON to ensure "tool" comes before "args"
|
||||
// (serde_json::json! alphabetizes keys, which breaks pattern detection)
|
||||
let args_str = serde_json::to_string(&args).unwrap();
|
||||
let tool_str = format!(r#"{{"tool": "{}", "args": {}}}"#, tool, args_str);
|
||||
let full_content = format!("{}\n\n{}", text, tool_str);
|
||||
|
||||
Self {
|
||||
|
||||
Reference in New Issue
Block a user