diff --git a/agents/breaker.md b/agents/breaker.md new file mode 100644 index 0000000..f1602f5 --- /dev/null +++ b/agents/breaker.md @@ -0,0 +1,79 @@ +You are **Breaker**. + +Your role is to **find real failures**: bugs, brittleness, edge cases, and unsafe assumptions. +You are adversarial and methodical. You try to make the system fail fast, then explain why. + +You are **whitebox-aware** (you may read internals to choose targets), your findings must be grounded in **observable behavior** and **minimal repros**. + +--- + +## Prime Directive +**DO NOT CHANGE PRODUCTION CODE.** + +- You must not modify application/runtime code, architecture, assets, or documentation. +- You may add **minimal isolated repro fixtures** (e.g., tiny inputs) only if necessary to make a failure deterministic. + +--- + +## What You Produce +Your output is a **bounded breakage/QA report** with high-signal items only. + +For each issue you report, include: + +### 1) Title +Short, specific failure statement. + +### 2) Repro +- exact command / steps +- minimal input(s) or state needed +- expected vs actual + +### 3) Diagnosis +- suspected root cause with file:line pointers +- triggering conditions +- deterministic vs flaky + +### 4) Impact +- severity (crash / data loss / incorrect behavior / annoying) +- likelihood (rare / common) + +### 5) Next probe (optional) +If not fully proven, state the single most informative next experiment. + +IMPORTANT: Write your report to: `analysis/breaker/YYYY-MM-DD.md` (today's date) + +--- + +## Exploration Rules +- Start broad, then shrink: find a failure, then minimize it. +- Prefer **minimal repros** over exhaustive enumeration. +- Prefer **integration-style failures** (end-to-end behavior) over unit-internal assertions. +- In addition to repo exploration, use git diffs to guide exploration. +- If you cannot reproduce, say so plainly and list what’s missing. + +--- + +## Explicit Bans (Noise Control) +You must not: +- generate large test suites +- chase coverage +- list speculative “what if” edge cases without evidence +- propose refactors or redesigns + +No hype. No “next steps” backlog. + +--- + +## Output Size Discipline +- Report **0–5 issues max**. +- If you find more, keep only the most severe or most likely. +- If nothing meaningful is found, write: `No actionable failures found.` + +--- + +## Success Criteria +You succeed when: +- failures are real and reproducible +- repros are minimal and deterministic when possible +- diagnoses are crisp and grounded +- output is concise and high-signal \ No newline at end of file diff --git a/crates/g3-cli/src/filter_json.rs b/crates/g3-cli/src/filter_json.rs index ee140a2..7ddd597 100644 --- a/crates/g3-cli/src/filter_json.rs +++ b/crates/g3-cli/src/filter_json.rs @@ -53,6 +53,8 @@ struct FilterState { at_line_start: bool, /// Whitespace seen after newline (before potential {) pending_whitespace: String, + /// Newlines accumulated at line start (before potential tool call) + pending_newlines: String, } impl FilterState { @@ -65,6 +67,7 @@ impl FilterState { escape_next: false, at_line_start: true, // Start of input counts as line start pending_whitespace: String::new(), + pending_newlines: String::new(), } } @@ -76,6 +79,7 @@ impl FilterState { self.escape_next = false; self.at_line_start = true; self.pending_whitespace.clear(); + self.pending_newlines.clear(); } } @@ -192,11 +196,16 @@ pub fn filter_json_tool_calls(content: &str) -> String { fn handle_streaming_char(state: &mut FilterState, ch: char, output: &mut String) { match ch { '\n' => { - // Output the newline and any pending whitespace - output.push_str(&state.pending_whitespace); - output.push(ch); - state.pending_whitespace.clear(); - state.at_line_start = true; + // Buffer extra newlines at line start - they may precede a tool call + // Always output the first newline, but buffer subsequent ones + if state.at_line_start { + state.pending_newlines.push(ch); + } else { + // First newline after content - output it and enter line start mode + output.push(ch); + state.at_line_start = true; + state.pending_newlines.clear(); // Reset - this newline was output + } } ' ' | '\t' if state.at_line_start => { // Accumulate whitespace at line start @@ -208,10 +217,12 @@ fn handle_streaming_char(state: &mut FilterState, ch: char, output: &mut String) state.state = State::Buffering; state.buffer.clear(); state.buffer.push(ch); - // Don't output pending_whitespace yet - we might need to suppress it + // Don't output pending_newlines or pending_whitespace yet - we might need to suppress them } _ => { - // Regular character - output any pending whitespace first + // Regular character - output any pending newlines and whitespace first + output.push_str(&state.pending_newlines); + state.pending_newlines.clear(); output.push_str(&state.pending_whitespace); state.pending_whitespace.clear(); output.push(ch); @@ -233,15 +244,18 @@ fn handle_buffering_char(state: &mut FilterState, ch: char, output: &mut String) state.brace_depth = 1; // We already have the opening { state.in_string = true; // We're inside the "tool" value string state.escape_next = false; - // Discard pending_whitespace (it's part of the tool call line) + // Discard pending_newlines and pending_whitespace (they're part of the tool call) + state.pending_newlines.clear(); state.pending_whitespace.clear(); state.buffer.clear(); } Some(false) => { // Not a tool call - release buffered content debug!("Not a tool call - releasing buffer"); + output.push_str(&state.pending_newlines); output.push_str(&state.pending_whitespace); output.push_str(&state.buffer); + state.pending_newlines.clear(); state.pending_whitespace.clear(); state.buffer.clear(); state.state = State::Streaming; @@ -252,8 +266,10 @@ fn handle_buffering_char(state: &mut FilterState, ch: char, output: &mut String) if state.buffer.len() > MAX_BUFFER_FOR_DETECTION { // Too long without confirmation - not a tool call debug!("Buffer exceeded max length - not a tool call"); + output.push_str(&state.pending_newlines); output.push_str(&state.pending_whitespace); output.push_str(&state.buffer); + state.pending_newlines.clear(); state.pending_whitespace.clear(); state.buffer.clear(); state.state = State::Streaming; @@ -347,6 +363,24 @@ pub fn reset_json_tool_state() { }); } +/// Flushes any pending content from the JSON filter. +/// +/// Call this at the end of streaming to ensure any buffered newlines +/// or whitespace that wasn't followed by a tool call gets output. +pub fn flush_json_tool_filter() -> String { + JSON_TOOL_STATE.with(|state| { + let mut state = state.borrow_mut(); + let mut output = String::new(); + // Output any pending newlines and whitespace + output.push_str(&state.pending_newlines); + output.push_str(&state.pending_whitespace); + output.push_str(&state.buffer); + state.pending_newlines.clear(); + state.pending_whitespace.clear(); + state.buffer.clear(); + output + }) +} #[cfg(test)] mod tests { @@ -438,4 +472,28 @@ mod tests { // The tool call should be filtered out assert_eq!(result, "Text\n\nMore"); } + + #[test] + fn test_multiple_newlines_before_tool_call_suppressed() { + // This test verifies that extra blank lines before a tool call are suppressed. + // This fixes the visual issue where many blank lines appeared before tool calls. + reset_json_tool_state(); + + // Input has 4 newlines before the tool call (3 blank lines) + let input = "Before\n\n\n\n{\"tool\": \"shell\", \"args\": {}}\nAfter"; + let result = filter_json_tool_calls(input); + + // Only one newline should remain before where the tool call was + // (the first newline after "Before" is preserved, extra ones are suppressed) + assert_eq!(result, "Before\n\nAfter"); + } + + #[test] + fn test_single_newline_before_tool_call_preserved() { + // A single newline before a tool call should be preserved + reset_json_tool_state(); + let input = "Before\n{\"tool\": \"shell\", \"args\": {}}\nAfter"; + let result = filter_json_tool_calls(input); + assert_eq!(result, "Before\n\nAfter"); + } } diff --git a/crates/g3-cli/tests/filter_json_stress_test.rs b/crates/g3-cli/tests/filter_json_stress_test.rs index 2fc8d7a..33b8b14 100644 --- a/crates/g3-cli/tests/filter_json_stress_test.rs +++ b/crates/g3-cli/tests/filter_json_stress_test.rs @@ -3,7 +3,7 @@ //! These tests hammer the filter with malformed JSON, partial tool calls, //! edge cases, and adversarial inputs to ensure robustness. -use g3_cli::filter_json::{filter_json_tool_calls, reset_json_tool_state}; +use g3_cli::filter_json::{filter_json_tool_calls, flush_json_tool_filter, reset_json_tool_state}; // ============================================================================ // Malformed JSON Tests @@ -478,7 +478,9 @@ fn test_empty_input() { #[test] fn test_just_newline() { reset_json_tool_state(); - assert_eq!(filter_json_tool_calls("\n"), "\n"); + let result = filter_json_tool_calls("\n"); + let flushed = flush_json_tool_filter(); + assert_eq!(format!("{}{}", result, flushed), "\n"); } #[test] diff --git a/crates/g3-cli/tests/filter_json_test.rs b/crates/g3-cli/tests/filter_json_test.rs index 4b8c378..c5decf0 100644 --- a/crates/g3-cli/tests/filter_json_test.rs +++ b/crates/g3-cli/tests/filter_json_test.rs @@ -211,7 +211,8 @@ After"#; {"tool": "final_output", "args": {"summary": "Task completed successfully"}}"#; let result = filter_json_tool_calls(input); - let expected = "\n"; + // Leading newline before tool call at start of input is suppressed + let expected = ""; assert_eq!(result, expected); } @@ -509,13 +510,13 @@ End"#; let result = filter_json_tool_calls(input); - // The tool call starts on its own line after the read_file output, - // so it should be filtered out. Only the read_file output should remain. + // The tool call starts on its own line after the read_file output. + // The tool call is filtered out, and extra newlines before it are suppressed. + // Only one newline remains (the line ending after "1ms"). let expected = r#"┌─ read_file | ./crates/g3-cli/src/ui_writer_impl.rs [13000..13300] │ } │ (11 lines) └─ ⚡️ 1ms - "#; assert_eq!( result, expected, @@ -538,9 +539,10 @@ End"#; let result = filter_json_tool_calls(input); // The shell tool call starts at line beginning, so it should be filtered out - // Only the surrounding text should remain - // Note: The tool call is on its own line, so filtering leaves an empty line - let expected = "Let me create a test case:\n\n\n\nDone."; + // Only the surrounding text should remain. + // Extra newlines before the tool call are suppressed (one blank line before + // becomes just the line ending), but newlines after are preserved. + let expected = "Let me create a test case:\n\n\nDone."; assert_eq!( result, expected,