From e771382bd051041862e267f75d0abda347567e72 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Fri, 19 Dec 2025 16:14:03 +1100 Subject: [PATCH] agent mode + fowler bot --- agents/fowler.md | 148 ++++++++++++++++++++++++++++++++++ crates/g3-cli/src/lib.rs | 119 +++++++++++++++++++++++++++ crates/g3-core/src/lib.rs | 66 ++++++++++++--- crates/g3-core/src/prompts.rs | 20 +++++ 4 files changed, 342 insertions(+), 11 deletions(-) create mode 100644 agents/fowler.md diff --git a/agents/fowler.md b/agents/fowler.md new file mode 100644 index 0000000..e1cbaaf --- /dev/null +++ b/agents/fowler.md @@ -0,0 +1,148 @@ +You are fowler, a specialized software refactoring agent, named after Martin Fowler. +Your job is to improve clarity, correctness, robustness, and maintainability of existing code while preserving behavior. +You are allergic to cleverness. + +MISSION +Refactor code to: +- KISS / readability first +- deduplicate and eliminate near-duplicates +- reduce cyclomatic complexity and deep nesting +- make code act as documentation (names, structure, shape) +- increase robustness at boundaries +- aggressively prevent code-path aliasing (multiple “almost equivalent” logic paths that drift over time) + +You do not add features. +You do not change externally observable behavior unless explicitly instructed. + +CORE LAWS +1. Behavior is sacred. +2. One rule → one implementation. +3. Explicit beats clever. +4. Small units, sharp names. +5. Design for drift-resistance. +6. Invalid states should be unrepresentable where practical. + +TESTING DOCTRINE (NON-NEGOTIABLE) + +Purpose: +Tests exist to: +1. Lock behavior during refactors +2. Buy permission to simplify + +They are not written to chase coverage metrics. + +When tests-first is REQUIRED: +Before any non-trivial refactor, you MUST create minimal characterization tests if: +- logic is branch-heavy, rule-based, or stateful +- duplicated or aliased logic is about to be unified +- behavior is implicit, under-documented, or historically fragile +- there is no meaningful existing coverage of decision logic + +These tests: +- are black-box +- assert outputs, side effects, and error behavior +- focus on edges, invariants, and special cases +- are few but sufficient + +When tests-first is NOT required: +- purely mechanical refactors (rename, extract with zero logic change) +- code already protected by strong tests and types +- trivial hygiene far from decision logic + +Keep vs delete: +- Keep any test that captures desired external behavior. +- Delete only temporary probes: + - logging + - exploratory assertions + - throwaway snapshots tied to internals + +If a test prevented a regression, it stays. + +TESTS AS DESIGN FEEDBACK (MANDATORY) + +Tests are not just seatbelts — they are design probes. + +When tests exist (new or old), you MUST: +- look for simplifications enabled by specified behavior +- collapse conditionals tests prove equivalent +- merge code paths tests show are behaviorally identical +- remove parameters, flags, branches, or abstractions that tests do not meaningfully distinguish +- inline defensive abstractions whose only purpose was uncertainty + +Tests buy deletion rights. Use them. + +Guardrail: +Do not simplify: +- speculative future hooks +- externally consumed configuration or APIs +- behavior not exercised or clearly implied by tests + +If you choose not to simplify, say why. + +MANDATORY WORKFLOW + +A) Triage & Understanding +Briefly summarize: +- what the code does +- where complexity, duplication, or aliasing exists +- current test coverage (or lack thereof) + +Explicitly state whether characterization tests are required and why. + +B) Safety Net (if needed) +Create minimal characterization tests before refactoring. +Explain what behavior they lock down. + +C) Refactor Plan (small, reversible steps) +Prefer: +- extract / inline functions +- rename for clarity +- guard clauses to flatten nesting +- consolidate duplicated logic +- isolate side effects from pure logic +- single canonical decision functions +- centralized validation and normalization + +Avoid speculative abstractions. + +D) Execute +- small diffs +- mechanical changes +- comments only when naming/structure cannot carry intent + +E) Verify +- run tests / typecheck / lint +- confirm new and existing tests pass +- ensure no behavior drift + +CODE-PATH ALIASING (HIGHEST-PRIORITY FAILURE MODE) + +You must: +- identify duplicated or near-duplicated logic +- unify it behind a single canonical implementation +- route all callers through that path +- add tripwires where appropriate: + - assertions + - exhaustive matches + - centralized normalization + - explicit “unreachable” guards + +OUTPUT FORMAT (ALWAYS) + +1) What I changed +2) Why it’s safer now (explicitly mention aliasing eliminated) +3) Tests added or relied upon (and how they enabled simplification) +4) Risks / watchouts +5) Patch +6) Optional next steps (no scope creep) + +STYLE CONSTRAINTS +- Boring names win. +- No new dependencies unless asked. +- No architecture for its own sake. +- Assume the next reader is tired, busy, and suspicious. + +# IMPORTANT +Do not ask any questions, directly perform the aforementioned actions on the current project +if behavior cannot be safely inferred, then state explicitly and STOP refactoring. +Otherwise state assumptions briefly and proceed. diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index 6bcff99..1e07d82 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -382,6 +382,10 @@ pub struct Cli { /// Enable fast codebase discovery before first LLM turn #[arg(long, value_name = "PATH")] pub codebase_fast_start: Option, + + /// Run as a specialized agent (loads prompt from agents/.md) + #[arg(long, value_name = "NAME", conflicts_with_all = ["autonomous", "auto", "chat", "planning"])] + pub agent: Option, } pub async fn run() -> Result<()> { @@ -420,6 +424,28 @@ pub async fn run() -> Result<()> { .await; } + // Check if agent mode is enabled + if let Some(agent_name) = &cli.agent { + return run_agent_mode( + agent_name, + cli.workspace.clone(), + cli.config.as_deref(), + cli.quiet, + ) + .await; + } + + // Check if agent mode is enabled + if let Some(agent_name) = &cli.agent { + return run_agent_mode( + agent_name, + cli.workspace.clone(), + cli.config.as_deref(), + cli.quiet, + ) + .await; + } + // Only initialize logging if not in retro mode if !cli.machine { // Initialize logging with filtering @@ -622,6 +648,99 @@ pub async fn run() -> Result<()> { Ok(()) } +/// Run agent mode - loads a specialized agent prompt and executes a single task +async fn run_agent_mode( + agent_name: &str, + workspace: Option, + config_path: Option<&str>, + _quiet: bool, +) -> Result<()> { + use g3_core::get_agent_system_prompt; + + // Initialize logging + use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; + let filter = EnvFilter::from_default_env() + .add_directive("g3_core=info".parse().unwrap()) + .add_directive("g3_cli=info".parse().unwrap()) + .add_directive("llama_cpp=off".parse().unwrap()) + .add_directive("llama=off".parse().unwrap()); + tracing_subscriber::registry() + .with(tracing_subscriber::fmt::layer()) + .with(filter) + .init(); + + let output = SimpleOutput::new(); + + // Determine workspace directory (current dir if not specified) + let workspace_dir = workspace.unwrap_or_else(|| std::env::current_dir().unwrap_or_default()); + + // Load agent prompt from agents/.md + let agent_prompt_path = workspace_dir.join("agents").join(format!("{}.md", agent_name)); + + // Also check in the g3 installation directory + let agent_prompt = if agent_prompt_path.exists() { + std::fs::read_to_string(&agent_prompt_path) + .map_err(|e| anyhow::anyhow!("Failed to read agent prompt from {:?}: {}", agent_prompt_path, e))? + } else { + // Try to find agents/ relative to the executable or in common locations + let exe_dir = std::env::current_exe() + .ok() + .and_then(|p| p.parent().map(|p| p.to_path_buf())); + + let possible_paths = [ + exe_dir.as_ref().map(|d| d.join("agents").join(format!("{}.md", agent_name))), + Some(PathBuf::from(format!("agents/{}.md", agent_name))), + ]; + + let mut found_prompt = None; + for path_opt in possible_paths.iter().flatten() { + if path_opt.exists() { + found_prompt = Some(std::fs::read_to_string(path_opt) + .map_err(|e| anyhow::anyhow!("Failed to read agent prompt from {:?}: {}", path_opt, e))?); + break; + } + } + + found_prompt.ok_or_else(|| anyhow::anyhow!( + "Agent prompt not found: agents/{}.md\nSearched in: {:?} and current directory", + agent_name, agent_prompt_path + ))? + }; + + output.print(&format!("🤖 Running as agent: {}", agent_name)); + output.print(&format!("📁 Working directory: {:?}", workspace_dir)); + + // Load config + let config = g3_config::Config::load(config_path)?; + + // Generate the combined system prompt (agent prompt + tool instructions) + let system_prompt = get_agent_system_prompt(&agent_prompt, config.agent.allow_multiple_tool_calls); + + // Read README if present + let readme_content = std::fs::read_to_string(workspace_dir.join("README.md")).ok(); + let readme_for_prompt = readme_content.map(|content| { + format!("📚 Project README (from README.md):\n\n{}", content) + }); + + // Create agent with custom system prompt + let ui_writer = ConsoleUiWriter::new(); + let mut agent = Agent::new_with_custom_prompt( + config, + ui_writer, + system_prompt, + readme_for_prompt, + ).await?; + + // The agent prompt should contain instructions to start working immediately + // Send an initial message to trigger the agent + let initial_task = "Begin your analysis and work on the current project. Follow your mission and workflow as specified in your instructions."; + + let _result = agent.execute_task(initial_task, None, true).await?; + + output.print("\n✅ Agent mode completed"); + Ok(()) +} + /// Run flock mode - parallel multi-agent development async fn run_flock_mode( project_dir: PathBuf, diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 1fb9105..bfc2c05 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -10,6 +10,9 @@ pub use task_result::TaskResult; pub use retry::{RetryConfig, RetryResult, execute_with_retry, retry_operation}; pub use feedback_extraction::{ExtractedFeedback, FeedbackSource, FeedbackExtractionConfig, extract_coach_feedback}; +// Export agent prompt generation for CLI use +pub use prompts::get_agent_system_prompt; + #[cfg(test)] mod task_result_comprehensive_tests; use crate::ui_writer::UiWriter; @@ -1174,7 +1177,7 @@ impl Agent { ui_writer: W, readme_content: Option, ) -> Result { - Self::new_with_mode_and_readme(config, ui_writer, false, readme_content, false).await + Self::new_with_mode_and_readme(config, ui_writer, false, readme_content, false, None).await } pub async fn new_autonomous_with_readme( @@ -1182,7 +1185,7 @@ impl Agent { ui_writer: W, readme_content: Option, ) -> Result { - Self::new_with_mode_and_readme(config, ui_writer, true, readme_content, false).await + Self::new_with_mode_and_readme(config, ui_writer, true, readme_content, false, None).await } pub async fn new_autonomous(config: Config, ui_writer: W) -> Result { @@ -1199,7 +1202,7 @@ impl Agent { readme_content: Option, quiet: bool, ) -> Result { - Self::new_with_mode_and_readme(config, ui_writer, false, readme_content, quiet).await + Self::new_with_mode_and_readme(config, ui_writer, false, readme_content, quiet, None).await } pub async fn new_autonomous_with_readme_and_quiet( @@ -1208,7 +1211,18 @@ impl Agent { readme_content: Option, quiet: bool, ) -> Result { - Self::new_with_mode_and_readme(config, ui_writer, true, readme_content, quiet).await + Self::new_with_mode_and_readme(config, ui_writer, true, readme_content, quiet, None).await + } + + /// Create a new agent with a custom system prompt (for agent mode) + /// The custom_system_prompt replaces the default G3 system prompt entirely + pub async fn new_with_custom_prompt( + config: Config, + ui_writer: W, + custom_system_prompt: String, + readme_content: Option, + ) -> Result { + Self::new_with_mode_and_readme(config, ui_writer, false, readme_content, false, Some(custom_system_prompt)).await } async fn new_with_mode( @@ -1217,7 +1231,7 @@ impl Agent { is_autonomous: bool, quiet: bool, ) -> Result { - Self::new_with_mode_and_readme(config, ui_writer, is_autonomous, None, quiet).await + Self::new_with_mode_and_readme(config, ui_writer, is_autonomous, None, quiet, None).await } async fn new_with_mode_and_readme( @@ -1226,6 +1240,7 @@ impl Agent { is_autonomous: bool, readme_content: Option, quiet: bool, + custom_system_prompt: Option, ) -> Result { let mut providers = ProviderRegistry::new(); @@ -1374,12 +1389,18 @@ impl Agent { let provider_has_native_tool_calling = provider.has_native_tool_calling(); let _ = provider; // Drop provider reference to avoid borrowing issues - let system_prompt = if provider_has_native_tool_calling { - // For native tool calling providers, use a more explicit system prompt - get_system_prompt_for_native(config.agent.allow_multiple_tool_calls) + let system_prompt = if let Some(custom_prompt) = custom_system_prompt { + // Use custom system prompt (for agent mode) + custom_prompt } else { - // For non-native providers (embedded models), use JSON format instructions - SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE.to_string() + // Use default system prompt based on provider capabilities + if provider_has_native_tool_calling { + // For native tool calling providers, use a more explicit system prompt + get_system_prompt_for_native(config.agent.allow_multiple_tool_calls) + } else { + // For non-native providers (embedded models), use JSON format instructions + SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE.to_string() + } }; let system_message = Message::new(MessageRole::System, system_prompt); @@ -1484,7 +1505,10 @@ impl Agent { ); } - if !first_message.content.contains("You are G3") { + // Check for system prompt markers that are present in both standard and agent mode + // Agent mode replaces the identity line but keeps all other instructions + let has_tool_instructions = first_message.content.contains("IMPORTANT: You must call tools to achieve goals"); + if !has_tool_instructions { panic!("FATAL: First system message does not contain the system prompt. This likely means the README was added before the system prompt."); } } @@ -3630,6 +3654,7 @@ impl Agent { const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 2; // Limit auto-summary retries let mut last_action_was_tool = false; // Track if the last action was a tool call (vs text response) let mut any_text_response = false; // Track if LLM ever provided a text response + let mut executed_tools_in_session: std::collections::HashSet = std::collections::HashSet::new(); // Track executed tools to prevent duplicates // Check if we need to summarize before starting if self.context_window.should_summarize() { @@ -4028,6 +4053,22 @@ impl Agent { for (tool_call, duplicate_type) in deduplicated_tools { debug!("Processing completed tool call: {:?}", tool_call); + // Check if this tool was already executed in this session + let tool_key = format!("{}:{}", tool_call.tool, serde_json::to_string(&tool_call.args).unwrap_or_default()); + if executed_tools_in_session.contains(&tool_key) { + // Log the duplicate with red prefix + let prefixed_tool_name = format!("🟥 {} DUP IN SESSION", tool_call.tool); + let warning_msg = format!( + "⚠️ Duplicate tool call detected (already executed in session): Skipping {} with args {}", + tool_call.tool, + serde_json::to_string(&tool_call.args).unwrap_or_else(|_| "".to_string()) + ); + let mut modified_tool_call = tool_call.clone(); + modified_tool_call.tool = prefixed_tool_name; + self.log_tool_call(&modified_tool_call, &warning_msg); + continue; // Skip execution of duplicate + } + // If it's a duplicate, log it and return a warning if let Some(dup_type) = &duplicate_type { // Log the duplicate with red prefix @@ -4364,6 +4405,9 @@ impl Agent { any_tool_executed = true; // Track across all iterations last_action_was_tool = true; // Last action was a tool call + // Add to executed tools set to prevent re-execution in this session + executed_tools_in_session.insert(tool_key.clone()); + // Reset the JSON tool call filter state after each tool execution // This ensures the filter doesn't stay in suppression mode for subsequent streaming content fixed_filter_json::reset_fixed_json_tool_state(); diff --git a/crates/g3-core/src/prompts.rs b/crates/g3-core/src/prompts.rs index de4ba13..55cc24c 100644 --- a/crates/g3-core/src/prompts.rs +++ b/crates/g3-core/src/prompts.rs @@ -390,3 +390,23 @@ If you can complete it with 1-2 tool calls, skip TODO. pub const SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE: &'static str = concatcp!(SYSTEM_NON_NATIVE_TOOL_USE, CODING_STYLE); + +/// The G3 identity line that gets replaced in agent mode +const G3_IDENTITY_LINE: &str = "You are G3, an AI programming agent of the same skill level as a seasoned engineer at a major technology company. You analyze given tasks and write code to achieve goals."; + +/// Generate a system prompt for agent mode by combining the agent's custom prompt +/// with the full G3 system prompt (including TODO tools, code search, webdriver, coding style, etc.) +/// +/// The agent_prompt replaces only the G3 identity line at the start of the prompt. +/// Everything else (tool instructions, coding guidelines, etc.) is preserved. +pub fn get_agent_system_prompt(agent_prompt: &str, allow_multiple_tool_calls: bool) -> String { + // Get the full system prompt (with or without parallel tool calls) + let full_prompt = if allow_multiple_tool_calls { + get_system_prompt_for_native(true) + } else { + SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string() + }; + + // Replace only the G3 identity line with the custom agent prompt + full_prompt.replace(G3_IDENTITY_LINE, agent_prompt.trim()) +}