Add plan approval gate to block file changes without approved plan

- Add check_plan_approval_gate() in tools/plan.rs that runs after each tool call - Detects file changes via git status --porcelain when plan exists but not approved - Reverts changes: git checkout for modified files, rm for new untracked files - Returns blocking message instructing LLM to create/approve plan first - Add ApprovalGateResult enum with Allowed/Blocked/NotGitRepo variants - Add set_session_id() and set_working_dir() methods on Agent for testing - Add integration test using MockProvider to simulate blocked write_file
2026-02-05 11:34:10 +11:00
parent add8060526
commit c347a73cbd
5 changed files with 423 additions and 3 deletions
--- a/crates/g3-core/new_file.txt
+++ b/crates/g3-core/new_file.txt
@@ -0,0 +1 @@
 This should be blocked!
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -47,6 +47,7 @@ pub use prompts::get_agent_system_prompt;
 #[cfg(test)]
 mod task_result_comprehensive_tests;
 use crate::ui_writer::UiWriter;
 use tools::plan::{check_plan_approval_gate, ApprovalGateResult};
 #[cfg(test)]
 mod tilde_expansion_tests;
@@ -753,6 +754,16 @@ impl<W: UiWriter> Agent<W> {
        self.session_id.as_deref()
    }
    /// Set the session ID (useful for testing)
    pub fn set_session_id(&mut self, session_id: String) {
        self.session_id = Some(session_id);
    }
    /// Set the working directory (useful for testing)
    pub fn set_working_dir(&mut self, working_dir: String) {
        self.working_dir = Some(working_dir);
    }
    // =========================================================================
    // TASK EXECUTION
    // =========================================================================
@@ -2889,6 +2900,17 @@ Skip if nothing new. Be brief."#;
        self.tool_calls_this_turn.push(tool_call.tool.clone());
        let result = self.execute_tool_inner_in_dir(tool_call, working_dir).await;
        // Check plan approval gate after tool execution
        if let Some(session_id) = &self.session_id {
            if let ApprovalGateResult::Blocked { message, .. } = 
                check_plan_approval_gate(session_id, working_dir) 
            {
                // Return the blocking message instead of the tool result
                return Ok(message);
            }
        }
        let log_str = match &result {
            Ok(s) => s.clone(),
            Err(e) => format!("ERROR: {}", e),
--- a/crates/g3-core/src/tools/invariants.rs
+++ b/crates/g3-core/src/tools/invariants.rs
@@ -903,6 +903,77 @@ pub fn format_evaluation_results(eval: &RulespecEvaluation) -> String {
    output
 }
 /// Format a rulespec as human-readable markdown.
 /// 
 /// This produces a rich, readable format suitable for tool output,
 /// not raw YAML.
 pub fn format_rulespec_markdown(rulespec: &Rulespec) -> String {
    let mut output = String::new();
    output.push_str("\n");
    output.push_str("### Invariants (Rulespec)\n\n");
    if rulespec.claims.is_empty() && rulespec.predicates.is_empty() {
        output.push_str("_No invariants defined._\n");
        return output;
    }
    // Group predicates by source
    let task_predicates: Vec<_> = rulespec.predicates.iter()
        .filter(|p| p.source == InvariantSource::TaskPrompt)
        .collect();
    let memory_predicates: Vec<_> = rulespec.predicates.iter()
        .filter(|p| p.source == InvariantSource::Memory)
        .collect();
    // Build claim lookup for selector display
    let claims: std::collections::HashMap<&str, &Claim> = rulespec.claims.iter()
        .map(|c| (c.name.as_str(), c))
        .collect();
    // Format predicates from task prompt
    if !task_predicates.is_empty() {
        output.push_str("**From Task:**\n");
        for pred in &task_predicates {
            format_predicate_markdown(&mut output, pred, &claims);
        }
        output.push_str("\n");
    }
    // Format predicates from memory
    if !memory_predicates.is_empty() {
        output.push_str("**From Memory:**\n");
        for pred in &memory_predicates {
            format_predicate_markdown(&mut output, pred, &claims);
        }
        output.push_str("\n");
    }
    output
 }
 /// Format a single predicate as a markdown list item.
 fn format_predicate_markdown(
    output: &mut String,
    pred: &Predicate,
    claims: &std::collections::HashMap<&str, &Claim>,
 ) {
    let selector = claims.get(pred.claim.as_str())
        .map(|c| c.selector.as_str())
        .unwrap_or(&pred.claim);
    let value_str = match &pred.value {
        Some(v) => format!(" `{}`", yaml_to_display(v)),
        None => String::new(),
    };
    output.push_str(&format!("- `{}` **{}**{}\n", selector, pred.rule, value_str));
    if let Some(notes) = &pred.notes {
        output.push_str(&format!("  - _{}_\n", notes));
    }
 }
 // ============================================================================
 // Tests
 // ============================================================================
@@ -1274,4 +1345,56 @@ mod tests {
        // Should deserialize back
        let _: ActionEnvelope = serde_yaml::from_str(&yaml).unwrap();
    }
    // ========================================================================
    // Format Rulespec Markdown Tests
    // ========================================================================
    #[test]
    fn test_format_rulespec_markdown_empty() {
        let rulespec = Rulespec::new();
        let output = format_rulespec_markdown(&rulespec);
        assert!(output.contains("### Invariants (Rulespec)"));
        assert!(output.contains("_No invariants defined._"));
    }
    #[test]
    fn test_format_rulespec_markdown_with_predicates() {
        let mut rulespec = Rulespec::new();
        rulespec.add_claim(Claim::new("caps", "csv_importer.capabilities"));
        rulespec.add_predicate(
            Predicate::new("caps", PredicateRule::Contains, InvariantSource::TaskPrompt)
                .with_value(YamlValue::String("handle_tsv".to_string()))
                .with_notes("User requested TSV support")
        );
        rulespec.add_predicate(
            Predicate::new("caps", PredicateRule::Exists, InvariantSource::Memory)
        );
        let output = format_rulespec_markdown(&rulespec);
        assert!(output.contains("### Invariants (Rulespec)"));
        assert!(output.contains("**From Task:**"));
        assert!(output.contains("**From Memory:**"));
        assert!(output.contains("`csv_importer.capabilities`"));
        assert!(output.contains("**contains**"));
        assert!(output.contains("`handle_tsv`"));
        assert!(output.contains("_User requested TSV support_"));
        assert!(output.contains("**exists**"));
    }
    #[test]
    fn test_format_rulespec_markdown_task_only() {
        let mut rulespec = Rulespec::new();
        rulespec.add_claim(Claim::new("test", "foo.bar"));
        rulespec.add_predicate(
            Predicate::new("test", PredicateRule::Exists, InvariantSource::TaskPrompt)
        );
        let output = format_rulespec_markdown(&rulespec);
        assert!(output.contains("**From Task:**"));
        assert!(!output.contains("**From Memory:**"));
    }
 }
--- a/crates/g3-core/src/tools/plan.rs
+++ b/crates/g3-core/src/tools/plan.rs
@@ -20,7 +20,7 @@ use crate::ToolCall;
 use super::executor::ToolContext;
-use super::invariants::{get_envelope_path, get_rulespec_path};
+use super::invariants::{format_rulespec_markdown, get_envelope_path, get_rulespec_path, read_rulespec};
 // ============================================================================
 // Plan Schema
@@ -866,14 +866,30 @@ pub async fn execute_plan_write<W: UiWriter>(
    let yaml = serde_yaml::to_string(&plan)?;
    ctx.ui_writer.print_plan_compact(Some(&yaml), Some(&plan_path_str), true);
    // Read and format rulespec if it exists
    let rulespec_section = match read_rulespec(session_id) {
        Ok(Some(rulespec)) => format_rulespec_markdown(&rulespec),
        Ok(None) => "\n_No rulespec generated._\n".to_string(),
        Err(_) => "\n_No rulespec generated._\n".to_string(),
    };
    // Check if plan is now complete and trigger verification
    if plan.is_complete() && plan.is_approved() {
        let verification = plan_verify(&plan, ctx.working_dir);
        let verification_output = format_verification_results(&verification, ctx.session_id);
-        return Ok(format!("✅ Plan updated: {}\n{}", plan.status_summary(), verification_output));
+        return Ok(format!(
            "✅ Plan updated: {}\n{}\n{}",
            plan.status_summary(),
            verification_output,
            rulespec_section
        ));
    }
-    Ok(format!("✅ Plan updated: {}", plan.status_summary()))
+    Ok(format!(
        "✅ Plan updated: {}\n{}",
        plan.status_summary(),
        rulespec_section
    ))
 }
 /// Execute the `plan_approve` tool.
@@ -916,6 +932,124 @@ pub async fn execute_plan_approve<W: UiWriter>(
    ))
 }
 // ============================================================================
 // Plan Approval Gate
 // ============================================================================
 /// Result of checking the plan approval gate after a tool execution.
 #[derive(Debug)]
 pub enum ApprovalGateResult {
    /// No plan exists, or plan is approved - allow the operation
    Allowed,
    /// Plan exists but not approved, and files were changed - blocked
    Blocked {
        /// Message to inject into the conversation
        message: String,
        /// Files that were reverted
        reverted_files: Vec<String>,
    },
    /// Not a git repository - skip the check
    NotGitRepo,
 }
 /// Check if file changes occurred without an approved plan, and revert them if so.
 /// 
 /// This function should be called after each tool execution when in plan mode.
 /// It checks `git status --porcelain` for changes, and if a plan exists but isn't
 /// approved, it reverts those changes and returns a blocking message.
 pub fn check_plan_approval_gate(session_id: &str, working_dir: Option<&str>) -> ApprovalGateResult {
    let dir = working_dir.unwrap_or(".");
    // Check if this is a git repository
    let git_check = std::process::Command::new("git")
        .args(["rev-parse", "--git-dir"])
        .current_dir(dir)
        .output();
    if git_check.is_err() || !git_check.unwrap().status.success() {
        return ApprovalGateResult::NotGitRepo;
    }
    // Check if a plan exists and whether it's approved
    let plan = match read_plan(session_id) {
        Ok(Some(plan)) => plan,
        Ok(None) => return ApprovalGateResult::Allowed, // No plan, allow
        Err(_) => return ApprovalGateResult::Allowed,   // Can't read plan, allow
    };
    if plan.is_approved() {
        return ApprovalGateResult::Allowed;
    }
    // Plan exists but not approved - check for file changes
    let status_output = std::process::Command::new("git")
        .args(["status", "--porcelain"])
        .current_dir(dir)
        .output();
    let output = match status_output {
        Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
        _ => return ApprovalGateResult::Allowed, // Can't get status, allow
    };
    if output.trim().is_empty() {
        return ApprovalGateResult::Allowed; // No changes
    }
    // Parse changed files and revert them
    let mut reverted_files = Vec::new();
    for line in output.lines() {
        if line.len() < 3 {
            continue;
        }
        let status = &line[0..2];
        let file_path = line[3..].trim();
        match status {
            "??" => {
                // Untracked file - remove it
                let _ = std::fs::remove_file(std::path::Path::new(dir).join(file_path));
                reverted_files.push(format!("{} (new file)", file_path));
            }
            _ => {
                // Modified/added/deleted tracked file - git checkout
                let _ = std::process::Command::new("git")
                    .args(["checkout", "--", file_path])
                    .current_dir(dir)
                    .output();
                reverted_files.push(format!("{} (modified)", file_path));
            }
        }
    }
    if reverted_files.is_empty() {
        return ApprovalGateResult::Allowed;
    }
    let files_list = reverted_files.iter()
        .map(|f| format!("  - {}", f))
        .collect::<Vec<_>>()
        .join("\n");
    let message = format!(
        "⚠️ IMPLEMENTATION BLOCKED\n\n\
         You modified files without an approved plan:\n\
         {}\n\n\
         These changes have been reverted.\n\n\
         Before implementing, you must:\n\
         1. Create a plan with `plan_write`\n\
         2. Request the user's explicit approval or edits to plan\n\n\
         Do not attempt to implement until the plan is approved.",
        files_list
    );
    ApprovalGateResult::Blocked {
        message,
        reverted_files,
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -1202,4 +1336,18 @@ items: []
        assert!(VerificationStatus::Warning("warn".to_string()).is_warning_or_error());
        assert!(VerificationStatus::Error("err".to_string()).is_warning_or_error());
    }
    #[test]
    fn test_approval_gate_no_plan() {
        // With a non-existent session, there's no plan, so it should allow
        let result = check_plan_approval_gate("nonexistent-session-xyz", Some("."));
        assert!(matches!(result, ApprovalGateResult::Allowed));
    }
    #[test]
    fn test_approval_gate_not_git_repo() {
        // /tmp is typically not a git repo
        let result = check_plan_approval_gate("any-session", Some("/tmp"));
        assert!(matches!(result, ApprovalGateResult::NotGitRepo));
    }
 }
--- a/crates/g3-core/tests/mock_provider_integration_test.rs
+++ b/crates/g3-core/tests/mock_provider_integration_test.rs
@@ -822,3 +822,129 @@ async fn test_llm_repeats_text_before_each_tool_call() {
        preamble_count
    );
 }
 // =============================================================================
 // Plan Approval Gate Tests
 // =============================================================================
 /// Test: File changes are blocked when plan exists but is not approved
 ///
 /// Scenario:
 /// 1. Create a plan (unapproved)
 /// 2. LLM tries to write a file
 /// 3. The file change should be reverted and a blocking message returned
 #[tokio::test]
 async fn test_plan_approval_gate_blocks_unapproved_changes() {
    use g3_core::tools::plan::{write_plan, Plan, PlanItem, Checks, Check, PlanState};
    use std::fs;
    // Create a temp directory that IS a git repo
    let temp_dir = TempDir::new().unwrap();
    let temp_path = temp_dir.path();
    // Initialize git repo
    std::process::Command::new("git")
        .args(["init"])
        .current_dir(temp_path)
        .output()
        .expect("Failed to init git repo");
    // Configure git user for the repo (needed for commits)
    std::process::Command::new("git")
        .args(["config", "user.email", "test@test.com"])
        .current_dir(temp_path)
        .output()
        .expect("Failed to configure git email");
    std::process::Command::new("git")
        .args(["config", "user.name", "Test"])
        .current_dir(temp_path)
        .output()
        .expect("Failed to configure git name");
    // Create an initial commit so we have a clean state
    let readme_path = temp_path.join("README.md");
    fs::write(&readme_path, "# Test").unwrap();
    std::process::Command::new("git")
        .args(["add", "."])
        .current_dir(temp_path)
        .output()
        .expect("Failed to git add");
    std::process::Command::new("git")
        .args(["commit", "-m", "initial"])
        .current_dir(temp_path)
        .output()
        .expect("Failed to git commit");
    // Use absolute path so the file is written to the temp git repo
    let new_file_path = temp_path.join("new_file.txt");
    // Create a mock provider that will try to write a file
    let provider = MockProvider::new()
        .with_native_tool_calling(true)
        .with_response(MockResponse::native_tool_call(
            "write_file",
            serde_json::json!({
                "file_path": new_file_path.to_string_lossy(),
                "content": "This should be blocked!"
            }),
        ))
        .with_default_response(MockResponse::text("I tried to write a file."));
    // Create agent with a specific session ID
    let mut registry = ProviderRegistry::new();
    registry.register(provider);
    let config = g3_config::Config::default();
    let mut agent = Agent::new_for_test(config, NullUiWriter, registry)
        .await
        .expect("Failed to create agent");
    // Set a session ID so the plan can be found
    let session_id = "test-approval-gate-session";
    agent.set_session_id(session_id.to_string());
    // Set the working directory to the temp git repo
    agent.set_working_dir(temp_path.to_string_lossy().to_string());
    // Create an unapproved plan for this session
    let mut plan = Plan::new("test-plan");
    plan.items.push(PlanItem {
        id: "I1".to_string(),
        description: "Test item".to_string(),
        state: PlanState::Todo,
        touches: vec!["src/test.rs".to_string()],
        checks: Checks {
            happy: Check::new("happy", "target"),
            negative: Check::new("negative", "target"),
            boundary: Check::new("boundary", "target"),
        },
        evidence: vec![],
        notes: None,
    });
    // Note: NOT calling plan.approve() - plan is unapproved
    write_plan(session_id, &plan).expect("Failed to write plan");
    // Execute task - the LLM will try to write a file
    let result = agent.execute_task(
        "Write a new file",
        None,  // language
        false  // auto_execute
    ).await;
    assert!(result.is_ok(), "Task should complete (with blocking message): {:?}", result.err());
    // The new file should NOT exist (it was reverted)
    assert!(
        !new_file_path.exists(),
        "New file should have been reverted/deleted"
    );
    // Check that the blocking message was returned
    let history = &agent.get_context_window().conversation_history;
    let has_blocking_message = history
        .iter()
        .any(|m| m.content.contains("IMPLEMENTATION BLOCKED"));
    assert!(has_blocking_message, "Should have blocking message in context");
 }