g3/crates/g3-core/tests/duplicate_detection_test.rs

//! Tests for tool call duplicate detection
//!
//! These tests ensure that duplicate detection only catches IMMEDIATELY SEQUENTIAL
//! duplicates, not legitimate re-use of tools with text between them.

use g3_core::StreamingToolParser;
use g3_providers::CompletionChunk;

// Helper to create a chunk
fn chunk(content: &str, finished: bool) -> CompletionChunk {
    CompletionChunk {
        content: content.to_string(),
        finished,
        tool_calls: None,
        usage: None,
        stop_reason: None,
    }
}

// =============================================================================
// Test: find_complete_json_object_end helper function
// =============================================================================

#[test]
fn test_find_complete_json_object_end_simple() {
    let json = r#"{"tool": "test", "args": {}}"#;
    let end = StreamingToolParser::find_complete_json_object_end(json);
    assert!(end.is_some(), "Should find end of complete JSON");
    assert_eq!(end.unwrap(), json.len() - 1, "End should be at last character");
}

#[test]
fn test_find_complete_json_object_end_nested() {
    let json = r#"{"tool": "test", "args": {"nested": {"deep": true}}}"#;
    let end = StreamingToolParser::find_complete_json_object_end(json);
    assert!(end.is_some(), "Should find end of nested JSON");
    assert_eq!(end.unwrap(), json.len() - 1);
}

#[test]
fn test_find_complete_json_object_end_with_trailing_text() {
    let json = r#"{"tool": "test", "args": {}} some text after"#;
    let end = StreamingToolParser::find_complete_json_object_end(json);
    assert!(end.is_some(), "Should find end of JSON even with trailing text");
    // The end should be at the closing brace, not at the end of the string
    let end_pos = end.unwrap();
    assert_eq!(&json[end_pos..end_pos+1], "}", "End should be at closing brace");
}

#[test]
fn test_find_complete_json_object_end_incomplete() {
    let json = r#"{"tool": "test", "args": {"#;
    let end = StreamingToolParser::find_complete_json_object_end(json);
    assert!(end.is_none(), "Should return None for incomplete JSON");
}

// =============================================================================
// Test: Tool calls separated by text should NOT be duplicates
// =============================================================================

#[test]
fn test_same_tool_with_text_between_not_duplicate() {
    // This tests the scenario where the LLM calls the same tool twice
    // but with explanatory text between them - this should NOT be a duplicate
    let mut parser = StreamingToolParser::new();

    // First tool call
    let content1 = r#"{"tool": "todo_read", "args": {}}"#;
    let tools1 = parser.process_chunk(&chunk(content1, true));
    assert_eq!(tools1.len(), 1, "First tool call should be detected");
    assert_eq!(tools1[0].tool, "todo_read");

    // Reset parser (simulating what happens after tool execution)
    parser.reset();

    // Some text, then the same tool call again
    let content2 = r#"Now let me check the TODO again to verify my changes.
{"tool": "todo_read", "args": {}}"#;
    let tools2 = parser.process_chunk(&chunk(content2, true));

    // The second tool call should be detected - it's NOT a duplicate
    // because there's text before it
    assert_eq!(tools2.len(), 1, "Second tool call should be detected (not a duplicate)");
    assert_eq!(tools2[0].tool, "todo_read");
}

#[test]
fn test_different_tools_back_to_back_not_duplicate() {
    let mut parser = StreamingToolParser::new();

    // Two different tool calls back to back
    let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
{"tool": "shell", "args": {"command": "ls"}}"#;

    let tools = parser.process_chunk(&chunk(content, true));

    // Both should be detected - they're different tools
    assert!(tools.len() >= 1, "Should detect tool calls");
    // At minimum, the first one should be detected
    assert_eq!(tools[0].tool, "read_file");
}

#[test]
fn test_same_tool_different_args_not_duplicate() {
    let mut parser = StreamingToolParser::new();

    // Same tool but different arguments - NOT a duplicate
    let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
{"tool": "read_file", "args": {"file_path": "b.txt"}}"#;

    let tools = parser.process_chunk(&chunk(content, true));

    // Both should be detected - different args means not a duplicate
    assert!(tools.len() >= 1, "Should detect tool calls");
}

// =============================================================================
// Test: Immediately sequential identical tool calls ARE duplicates
// =============================================================================

#[test]
fn test_identical_tool_calls_back_to_back_are_duplicates() {
    // This tests the scenario where the LLM stutters and outputs
    // the exact same tool call twice in a row - this IS a duplicate
    let mut parser = StreamingToolParser::new();

    // Two identical tool calls with no text between them
    let content = r#"{"tool": "todo_read", "args": {}}
{"tool": "todo_read", "args": {}}"#;

    let tools = parser.process_chunk(&chunk(content, true));

    // The parser should detect both, but the deduplication logic
    // (which happens at a higher level in the agent) should mark
    // the second one as a duplicate
    // Here we just verify both are parsed
    assert!(tools.len() >= 1, "Should detect at least one tool call");
}

// =============================================================================
// Test: Text content detection for duplicate logic
// =============================================================================

#[test]
fn test_has_text_after_tool_call() {
    // Helper test to verify we can detect text after a tool call
    let content_with_text = r#"{"tool": "test", "args": {}} Some text after"#;
    let content_without_text = r#"{"tool": "test", "args": {}}"#;
    let content_with_whitespace_only = r#"{"tool": "test", "args": {}}
  "#;

    // Find the end of the JSON in each case
    let end1 = StreamingToolParser::find_complete_json_object_end(content_with_text).unwrap();
    let end2 = StreamingToolParser::find_complete_json_object_end(content_without_text).unwrap();
    let end3 = StreamingToolParser::find_complete_json_object_end(content_with_whitespace_only).unwrap();

    // Check what's after the JSON
    let after1 = content_with_text[end1 + 1..].trim();
    let after2 = content_without_text.get(end2 + 1..).unwrap_or("").trim();
    let after3 = content_with_whitespace_only[end3 + 1..].trim();

    assert!(!after1.is_empty(), "Should have text after tool call");
    assert!(after2.is_empty(), "Should have no text after tool call");
    assert!(after3.is_empty(), "Whitespace-only should count as no text");
}

// =============================================================================
// Test: Edge cases
// =============================================================================

#[test]
fn test_tool_call_with_newlines_between() {
    let mut parser = StreamingToolParser::new();

    // Tool calls separated by multiple newlines (but no actual text)
    // This SHOULD be considered a duplicate since there's no meaningful text
    let content = r#"{"tool": "todo_read", "args": {}}


{"tool": "todo_read", "args": {}}"#;

    let tools = parser.process_chunk(&chunk(content, true));
    assert!(tools.len() >= 1, "Should detect at least one tool call");
}

#[test]
fn test_tool_call_with_whitespace_text_between() {
    let mut parser = StreamingToolParser::new();

    // Tool calls separated by text that's just whitespace and punctuation
    // The key is whether there's "meaningful" text
    let content = r#"{"tool": "todo_read", "args": {}}
OK, now again:
{"tool": "todo_read", "args": {}}"#;

    let tools = parser.process_chunk(&chunk(content, true));

    // Both should be detected since there's text between them
    assert!(tools.len() >= 1, "Should detect tool calls");
}

#[test]
fn test_tool_call_in_middle_of_text() {
    let mut parser = StreamingToolParser::new();

    // Tool call surrounded by text
    let content = r#"Let me read the file first.
{"tool": "read_file", "args": {"file_path": "test.txt"}}
Now I'll analyze the contents."#;

    let tools = parser.process_chunk(&chunk(content, true));
    assert_eq!(tools.len(), 1, "Should detect the tool call");
    assert_eq!(tools[0].tool, "read_file");
}

#[test]
fn test_multiple_different_tool_calls_with_text() {
    let mut parser = StreamingToolParser::new();

    // Multiple different tool calls with text between each
    let content = r#"First, let me read the file:
{"tool": "read_file", "args": {"file_path": "test.txt"}}
Now let me check the TODO:
{"tool": "todo_read", "args": {}}
Finally, let me run a command:
{"tool": "shell", "args": {"command": "ls"}}"#;

    let tools = parser.process_chunk(&chunk(content, true));

    // All three should be detected
    assert!(tools.len() >= 1, "Should detect tool calls");
}