Add plan approval gate to block file changes without approved plan

- Add check_plan_approval_gate() in tools/plan.rs that runs after each tool call
- Detects file changes via git status --porcelain when plan exists but not approved
- Reverts changes: git checkout for modified files, rm for new untracked files
- Returns blocking message instructing LLM to create/approve plan first
- Add ApprovalGateResult enum with Allowed/Blocked/NotGitRepo variants
- Add set_session_id() and set_working_dir() methods on Agent for testing
- Add integration test using MockProvider to simulate blocked write_file
This commit is contained in:
Dhanji R. Prasanna
2026-02-05 11:34:10 +11:00
parent add8060526
commit c347a73cbd
5 changed files with 423 additions and 3 deletions

View File

@@ -47,6 +47,7 @@ pub use prompts::get_agent_system_prompt;
#[cfg(test)]
mod task_result_comprehensive_tests;
use crate::ui_writer::UiWriter;
use tools::plan::{check_plan_approval_gate, ApprovalGateResult};
#[cfg(test)]
mod tilde_expansion_tests;
@@ -753,6 +754,16 @@ impl<W: UiWriter> Agent<W> {
self.session_id.as_deref()
}
/// Set the session ID (useful for testing)
pub fn set_session_id(&mut self, session_id: String) {
self.session_id = Some(session_id);
}
/// Set the working directory (useful for testing)
pub fn set_working_dir(&mut self, working_dir: String) {
self.working_dir = Some(working_dir);
}
// =========================================================================
// TASK EXECUTION
// =========================================================================
@@ -2889,6 +2900,17 @@ Skip if nothing new. Be brief."#;
self.tool_calls_this_turn.push(tool_call.tool.clone());
let result = self.execute_tool_inner_in_dir(tool_call, working_dir).await;
// Check plan approval gate after tool execution
if let Some(session_id) = &self.session_id {
if let ApprovalGateResult::Blocked { message, .. } =
check_plan_approval_gate(session_id, working_dir)
{
// Return the blocking message instead of the tool result
return Ok(message);
}
}
let log_str = match &result {
Ok(s) => s.clone(),
Err(e) => format!("ERROR: {}", e),

View File

@@ -903,6 +903,77 @@ pub fn format_evaluation_results(eval: &RulespecEvaluation) -> String {
output
}
/// Format a rulespec as human-readable markdown.
///
/// This produces a rich, readable format suitable for tool output,
/// not raw YAML.
pub fn format_rulespec_markdown(rulespec: &Rulespec) -> String {
let mut output = String::new();
output.push_str("\n");
output.push_str("### Invariants (Rulespec)\n\n");
if rulespec.claims.is_empty() && rulespec.predicates.is_empty() {
output.push_str("_No invariants defined._\n");
return output;
}
// Group predicates by source
let task_predicates: Vec<_> = rulespec.predicates.iter()
.filter(|p| p.source == InvariantSource::TaskPrompt)
.collect();
let memory_predicates: Vec<_> = rulespec.predicates.iter()
.filter(|p| p.source == InvariantSource::Memory)
.collect();
// Build claim lookup for selector display
let claims: std::collections::HashMap<&str, &Claim> = rulespec.claims.iter()
.map(|c| (c.name.as_str(), c))
.collect();
// Format predicates from task prompt
if !task_predicates.is_empty() {
output.push_str("**From Task:**\n");
for pred in &task_predicates {
format_predicate_markdown(&mut output, pred, &claims);
}
output.push_str("\n");
}
// Format predicates from memory
if !memory_predicates.is_empty() {
output.push_str("**From Memory:**\n");
for pred in &memory_predicates {
format_predicate_markdown(&mut output, pred, &claims);
}
output.push_str("\n");
}
output
}
/// Format a single predicate as a markdown list item.
fn format_predicate_markdown(
output: &mut String,
pred: &Predicate,
claims: &std::collections::HashMap<&str, &Claim>,
) {
let selector = claims.get(pred.claim.as_str())
.map(|c| c.selector.as_str())
.unwrap_or(&pred.claim);
let value_str = match &pred.value {
Some(v) => format!(" `{}`", yaml_to_display(v)),
None => String::new(),
};
output.push_str(&format!("- `{}` **{}**{}\n", selector, pred.rule, value_str));
if let Some(notes) = &pred.notes {
output.push_str(&format!(" - _{}_\n", notes));
}
}
// ============================================================================
// Tests
// ============================================================================
@@ -1274,4 +1345,56 @@ mod tests {
// Should deserialize back
let _: ActionEnvelope = serde_yaml::from_str(&yaml).unwrap();
}
// ========================================================================
// Format Rulespec Markdown Tests
// ========================================================================
#[test]
fn test_format_rulespec_markdown_empty() {
let rulespec = Rulespec::new();
let output = format_rulespec_markdown(&rulespec);
assert!(output.contains("### Invariants (Rulespec)"));
assert!(output.contains("_No invariants defined._"));
}
#[test]
fn test_format_rulespec_markdown_with_predicates() {
let mut rulespec = Rulespec::new();
rulespec.add_claim(Claim::new("caps", "csv_importer.capabilities"));
rulespec.add_predicate(
Predicate::new("caps", PredicateRule::Contains, InvariantSource::TaskPrompt)
.with_value(YamlValue::String("handle_tsv".to_string()))
.with_notes("User requested TSV support")
);
rulespec.add_predicate(
Predicate::new("caps", PredicateRule::Exists, InvariantSource::Memory)
);
let output = format_rulespec_markdown(&rulespec);
assert!(output.contains("### Invariants (Rulespec)"));
assert!(output.contains("**From Task:**"));
assert!(output.contains("**From Memory:**"));
assert!(output.contains("`csv_importer.capabilities`"));
assert!(output.contains("**contains**"));
assert!(output.contains("`handle_tsv`"));
assert!(output.contains("_User requested TSV support_"));
assert!(output.contains("**exists**"));
}
#[test]
fn test_format_rulespec_markdown_task_only() {
let mut rulespec = Rulespec::new();
rulespec.add_claim(Claim::new("test", "foo.bar"));
rulespec.add_predicate(
Predicate::new("test", PredicateRule::Exists, InvariantSource::TaskPrompt)
);
let output = format_rulespec_markdown(&rulespec);
assert!(output.contains("**From Task:**"));
assert!(!output.contains("**From Memory:**"));
}
}

View File

@@ -20,7 +20,7 @@ use crate::ToolCall;
use super::executor::ToolContext;
use super::invariants::{get_envelope_path, get_rulespec_path};
use super::invariants::{format_rulespec_markdown, get_envelope_path, get_rulespec_path, read_rulespec};
// ============================================================================
// Plan Schema
@@ -866,14 +866,30 @@ pub async fn execute_plan_write<W: UiWriter>(
let yaml = serde_yaml::to_string(&plan)?;
ctx.ui_writer.print_plan_compact(Some(&yaml), Some(&plan_path_str), true);
// Read and format rulespec if it exists
let rulespec_section = match read_rulespec(session_id) {
Ok(Some(rulespec)) => format_rulespec_markdown(&rulespec),
Ok(None) => "\n_No rulespec generated._\n".to_string(),
Err(_) => "\n_No rulespec generated._\n".to_string(),
};
// Check if plan is now complete and trigger verification
if plan.is_complete() && plan.is_approved() {
let verification = plan_verify(&plan, ctx.working_dir);
let verification_output = format_verification_results(&verification, ctx.session_id);
return Ok(format!("✅ Plan updated: {}\n{}", plan.status_summary(), verification_output));
return Ok(format!(
"✅ Plan updated: {}\n{}\n{}",
plan.status_summary(),
verification_output,
rulespec_section
));
}
Ok(format!("✅ Plan updated: {}", plan.status_summary()))
Ok(format!(
"✅ Plan updated: {}\n{}",
plan.status_summary(),
rulespec_section
))
}
/// Execute the `plan_approve` tool.
@@ -916,6 +932,124 @@ pub async fn execute_plan_approve<W: UiWriter>(
))
}
// ============================================================================
// Plan Approval Gate
// ============================================================================
/// Result of checking the plan approval gate after a tool execution.
#[derive(Debug)]
pub enum ApprovalGateResult {
/// No plan exists, or plan is approved - allow the operation
Allowed,
/// Plan exists but not approved, and files were changed - blocked
Blocked {
/// Message to inject into the conversation
message: String,
/// Files that were reverted
reverted_files: Vec<String>,
},
/// Not a git repository - skip the check
NotGitRepo,
}
/// Check if file changes occurred without an approved plan, and revert them if so.
///
/// This function should be called after each tool execution when in plan mode.
/// It checks `git status --porcelain` for changes, and if a plan exists but isn't
/// approved, it reverts those changes and returns a blocking message.
pub fn check_plan_approval_gate(session_id: &str, working_dir: Option<&str>) -> ApprovalGateResult {
let dir = working_dir.unwrap_or(".");
// Check if this is a git repository
let git_check = std::process::Command::new("git")
.args(["rev-parse", "--git-dir"])
.current_dir(dir)
.output();
if git_check.is_err() || !git_check.unwrap().status.success() {
return ApprovalGateResult::NotGitRepo;
}
// Check if a plan exists and whether it's approved
let plan = match read_plan(session_id) {
Ok(Some(plan)) => plan,
Ok(None) => return ApprovalGateResult::Allowed, // No plan, allow
Err(_) => return ApprovalGateResult::Allowed, // Can't read plan, allow
};
if plan.is_approved() {
return ApprovalGateResult::Allowed;
}
// Plan exists but not approved - check for file changes
let status_output = std::process::Command::new("git")
.args(["status", "--porcelain"])
.current_dir(dir)
.output();
let output = match status_output {
Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
_ => return ApprovalGateResult::Allowed, // Can't get status, allow
};
if output.trim().is_empty() {
return ApprovalGateResult::Allowed; // No changes
}
// Parse changed files and revert them
let mut reverted_files = Vec::new();
for line in output.lines() {
if line.len() < 3 {
continue;
}
let status = &line[0..2];
let file_path = line[3..].trim();
match status {
"??" => {
// Untracked file - remove it
let _ = std::fs::remove_file(std::path::Path::new(dir).join(file_path));
reverted_files.push(format!("{} (new file)", file_path));
}
_ => {
// Modified/added/deleted tracked file - git checkout
let _ = std::process::Command::new("git")
.args(["checkout", "--", file_path])
.current_dir(dir)
.output();
reverted_files.push(format!("{} (modified)", file_path));
}
}
}
if reverted_files.is_empty() {
return ApprovalGateResult::Allowed;
}
let files_list = reverted_files.iter()
.map(|f| format!(" - {}", f))
.collect::<Vec<_>>()
.join("\n");
let message = format!(
"⚠️ IMPLEMENTATION BLOCKED\n\n\
You modified files without an approved plan:\n\
{}\n\n\
These changes have been reverted.\n\n\
Before implementing, you must:\n\
1. Create a plan with `plan_write`\n\
2. Request the user's explicit approval or edits to plan\n\n\
Do not attempt to implement until the plan is approved.",
files_list
);
ApprovalGateResult::Blocked {
message,
reverted_files,
}
}
#[cfg(test)]
mod tests {
use super::*;
@@ -1202,4 +1336,18 @@ items: []
assert!(VerificationStatus::Warning("warn".to_string()).is_warning_or_error());
assert!(VerificationStatus::Error("err".to_string()).is_warning_or_error());
}
#[test]
fn test_approval_gate_no_plan() {
// With a non-existent session, there's no plan, so it should allow
let result = check_plan_approval_gate("nonexistent-session-xyz", Some("."));
assert!(matches!(result, ApprovalGateResult::Allowed));
}
#[test]
fn test_approval_gate_not_git_repo() {
// /tmp is typically not a git repo
let result = check_plan_approval_gate("any-session", Some("/tmp"));
assert!(matches!(result, ApprovalGateResult::NotGitRepo));
}
}