Add plan approval gate to block file changes without approved plan
- Add check_plan_approval_gate() in tools/plan.rs that runs after each tool call - Detects file changes via git status --porcelain when plan exists but not approved - Reverts changes: git checkout for modified files, rm for new untracked files - Returns blocking message instructing LLM to create/approve plan first - Add ApprovalGateResult enum with Allowed/Blocked/NotGitRepo variants - Add set_session_id() and set_working_dir() methods on Agent for testing - Add integration test using MockProvider to simulate blocked write_file
This commit is contained in:
1
crates/g3-core/new_file.txt
Normal file
1
crates/g3-core/new_file.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
This should be blocked!
|
||||||
@@ -47,6 +47,7 @@ pub use prompts::get_agent_system_prompt;
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod task_result_comprehensive_tests;
|
mod task_result_comprehensive_tests;
|
||||||
use crate::ui_writer::UiWriter;
|
use crate::ui_writer::UiWriter;
|
||||||
|
use tools::plan::{check_plan_approval_gate, ApprovalGateResult};
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tilde_expansion_tests;
|
mod tilde_expansion_tests;
|
||||||
@@ -753,6 +754,16 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
self.session_id.as_deref()
|
self.session_id.as_deref()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the session ID (useful for testing)
|
||||||
|
pub fn set_session_id(&mut self, session_id: String) {
|
||||||
|
self.session_id = Some(session_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the working directory (useful for testing)
|
||||||
|
pub fn set_working_dir(&mut self, working_dir: String) {
|
||||||
|
self.working_dir = Some(working_dir);
|
||||||
|
}
|
||||||
|
|
||||||
// =========================================================================
|
// =========================================================================
|
||||||
// TASK EXECUTION
|
// TASK EXECUTION
|
||||||
// =========================================================================
|
// =========================================================================
|
||||||
@@ -2889,6 +2900,17 @@ Skip if nothing new. Be brief."#;
|
|||||||
self.tool_calls_this_turn.push(tool_call.tool.clone());
|
self.tool_calls_this_turn.push(tool_call.tool.clone());
|
||||||
|
|
||||||
let result = self.execute_tool_inner_in_dir(tool_call, working_dir).await;
|
let result = self.execute_tool_inner_in_dir(tool_call, working_dir).await;
|
||||||
|
|
||||||
|
// Check plan approval gate after tool execution
|
||||||
|
if let Some(session_id) = &self.session_id {
|
||||||
|
if let ApprovalGateResult::Blocked { message, .. } =
|
||||||
|
check_plan_approval_gate(session_id, working_dir)
|
||||||
|
{
|
||||||
|
// Return the blocking message instead of the tool result
|
||||||
|
return Ok(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let log_str = match &result {
|
let log_str = match &result {
|
||||||
Ok(s) => s.clone(),
|
Ok(s) => s.clone(),
|
||||||
Err(e) => format!("ERROR: {}", e),
|
Err(e) => format!("ERROR: {}", e),
|
||||||
|
|||||||
@@ -903,6 +903,77 @@ pub fn format_evaluation_results(eval: &RulespecEvaluation) -> String {
|
|||||||
output
|
output
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Format a rulespec as human-readable markdown.
|
||||||
|
///
|
||||||
|
/// This produces a rich, readable format suitable for tool output,
|
||||||
|
/// not raw YAML.
|
||||||
|
pub fn format_rulespec_markdown(rulespec: &Rulespec) -> String {
|
||||||
|
let mut output = String::new();
|
||||||
|
|
||||||
|
output.push_str("\n");
|
||||||
|
output.push_str("### Invariants (Rulespec)\n\n");
|
||||||
|
|
||||||
|
if rulespec.claims.is_empty() && rulespec.predicates.is_empty() {
|
||||||
|
output.push_str("_No invariants defined._\n");
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Group predicates by source
|
||||||
|
let task_predicates: Vec<_> = rulespec.predicates.iter()
|
||||||
|
.filter(|p| p.source == InvariantSource::TaskPrompt)
|
||||||
|
.collect();
|
||||||
|
let memory_predicates: Vec<_> = rulespec.predicates.iter()
|
||||||
|
.filter(|p| p.source == InvariantSource::Memory)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Build claim lookup for selector display
|
||||||
|
let claims: std::collections::HashMap<&str, &Claim> = rulespec.claims.iter()
|
||||||
|
.map(|c| (c.name.as_str(), c))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Format predicates from task prompt
|
||||||
|
if !task_predicates.is_empty() {
|
||||||
|
output.push_str("**From Task:**\n");
|
||||||
|
for pred in &task_predicates {
|
||||||
|
format_predicate_markdown(&mut output, pred, &claims);
|
||||||
|
}
|
||||||
|
output.push_str("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format predicates from memory
|
||||||
|
if !memory_predicates.is_empty() {
|
||||||
|
output.push_str("**From Memory:**\n");
|
||||||
|
for pred in &memory_predicates {
|
||||||
|
format_predicate_markdown(&mut output, pred, &claims);
|
||||||
|
}
|
||||||
|
output.push_str("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
output
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Format a single predicate as a markdown list item.
|
||||||
|
fn format_predicate_markdown(
|
||||||
|
output: &mut String,
|
||||||
|
pred: &Predicate,
|
||||||
|
claims: &std::collections::HashMap<&str, &Claim>,
|
||||||
|
) {
|
||||||
|
let selector = claims.get(pred.claim.as_str())
|
||||||
|
.map(|c| c.selector.as_str())
|
||||||
|
.unwrap_or(&pred.claim);
|
||||||
|
|
||||||
|
let value_str = match &pred.value {
|
||||||
|
Some(v) => format!(" `{}`", yaml_to_display(v)),
|
||||||
|
None => String::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
output.push_str(&format!("- `{}` **{}**{}\n", selector, pred.rule, value_str));
|
||||||
|
|
||||||
|
if let Some(notes) = &pred.notes {
|
||||||
|
output.push_str(&format!(" - _{}_\n", notes));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Tests
|
// Tests
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -1274,4 +1345,56 @@ mod tests {
|
|||||||
// Should deserialize back
|
// Should deserialize back
|
||||||
let _: ActionEnvelope = serde_yaml::from_str(&yaml).unwrap();
|
let _: ActionEnvelope = serde_yaml::from_str(&yaml).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ========================================================================
|
||||||
|
// Format Rulespec Markdown Tests
|
||||||
|
// ========================================================================
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_format_rulespec_markdown_empty() {
|
||||||
|
let rulespec = Rulespec::new();
|
||||||
|
let output = format_rulespec_markdown(&rulespec);
|
||||||
|
|
||||||
|
assert!(output.contains("### Invariants (Rulespec)"));
|
||||||
|
assert!(output.contains("_No invariants defined._"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_format_rulespec_markdown_with_predicates() {
|
||||||
|
let mut rulespec = Rulespec::new();
|
||||||
|
rulespec.add_claim(Claim::new("caps", "csv_importer.capabilities"));
|
||||||
|
rulespec.add_predicate(
|
||||||
|
Predicate::new("caps", PredicateRule::Contains, InvariantSource::TaskPrompt)
|
||||||
|
.with_value(YamlValue::String("handle_tsv".to_string()))
|
||||||
|
.with_notes("User requested TSV support")
|
||||||
|
);
|
||||||
|
rulespec.add_predicate(
|
||||||
|
Predicate::new("caps", PredicateRule::Exists, InvariantSource::Memory)
|
||||||
|
);
|
||||||
|
|
||||||
|
let output = format_rulespec_markdown(&rulespec);
|
||||||
|
|
||||||
|
assert!(output.contains("### Invariants (Rulespec)"));
|
||||||
|
assert!(output.contains("**From Task:**"));
|
||||||
|
assert!(output.contains("**From Memory:**"));
|
||||||
|
assert!(output.contains("`csv_importer.capabilities`"));
|
||||||
|
assert!(output.contains("**contains**"));
|
||||||
|
assert!(output.contains("`handle_tsv`"));
|
||||||
|
assert!(output.contains("_User requested TSV support_"));
|
||||||
|
assert!(output.contains("**exists**"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_format_rulespec_markdown_task_only() {
|
||||||
|
let mut rulespec = Rulespec::new();
|
||||||
|
rulespec.add_claim(Claim::new("test", "foo.bar"));
|
||||||
|
rulespec.add_predicate(
|
||||||
|
Predicate::new("test", PredicateRule::Exists, InvariantSource::TaskPrompt)
|
||||||
|
);
|
||||||
|
|
||||||
|
let output = format_rulespec_markdown(&rulespec);
|
||||||
|
|
||||||
|
assert!(output.contains("**From Task:**"));
|
||||||
|
assert!(!output.contains("**From Memory:**"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ use crate::ToolCall;
|
|||||||
|
|
||||||
use super::executor::ToolContext;
|
use super::executor::ToolContext;
|
||||||
|
|
||||||
use super::invariants::{get_envelope_path, get_rulespec_path};
|
use super::invariants::{format_rulespec_markdown, get_envelope_path, get_rulespec_path, read_rulespec};
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Plan Schema
|
// Plan Schema
|
||||||
@@ -866,14 +866,30 @@ pub async fn execute_plan_write<W: UiWriter>(
|
|||||||
let yaml = serde_yaml::to_string(&plan)?;
|
let yaml = serde_yaml::to_string(&plan)?;
|
||||||
ctx.ui_writer.print_plan_compact(Some(&yaml), Some(&plan_path_str), true);
|
ctx.ui_writer.print_plan_compact(Some(&yaml), Some(&plan_path_str), true);
|
||||||
|
|
||||||
|
// Read and format rulespec if it exists
|
||||||
|
let rulespec_section = match read_rulespec(session_id) {
|
||||||
|
Ok(Some(rulespec)) => format_rulespec_markdown(&rulespec),
|
||||||
|
Ok(None) => "\n_No rulespec generated._\n".to_string(),
|
||||||
|
Err(_) => "\n_No rulespec generated._\n".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
// Check if plan is now complete and trigger verification
|
// Check if plan is now complete and trigger verification
|
||||||
if plan.is_complete() && plan.is_approved() {
|
if plan.is_complete() && plan.is_approved() {
|
||||||
let verification = plan_verify(&plan, ctx.working_dir);
|
let verification = plan_verify(&plan, ctx.working_dir);
|
||||||
let verification_output = format_verification_results(&verification, ctx.session_id);
|
let verification_output = format_verification_results(&verification, ctx.session_id);
|
||||||
return Ok(format!("✅ Plan updated: {}\n{}", plan.status_summary(), verification_output));
|
return Ok(format!(
|
||||||
|
"✅ Plan updated: {}\n{}\n{}",
|
||||||
|
plan.status_summary(),
|
||||||
|
verification_output,
|
||||||
|
rulespec_section
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(format!("✅ Plan updated: {}", plan.status_summary()))
|
Ok(format!(
|
||||||
|
"✅ Plan updated: {}\n{}",
|
||||||
|
plan.status_summary(),
|
||||||
|
rulespec_section
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Execute the `plan_approve` tool.
|
/// Execute the `plan_approve` tool.
|
||||||
@@ -916,6 +932,124 @@ pub async fn execute_plan_approve<W: UiWriter>(
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Plan Approval Gate
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
/// Result of checking the plan approval gate after a tool execution.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum ApprovalGateResult {
|
||||||
|
/// No plan exists, or plan is approved - allow the operation
|
||||||
|
Allowed,
|
||||||
|
/// Plan exists but not approved, and files were changed - blocked
|
||||||
|
Blocked {
|
||||||
|
/// Message to inject into the conversation
|
||||||
|
message: String,
|
||||||
|
/// Files that were reverted
|
||||||
|
reverted_files: Vec<String>,
|
||||||
|
},
|
||||||
|
/// Not a git repository - skip the check
|
||||||
|
NotGitRepo,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if file changes occurred without an approved plan, and revert them if so.
|
||||||
|
///
|
||||||
|
/// This function should be called after each tool execution when in plan mode.
|
||||||
|
/// It checks `git status --porcelain` for changes, and if a plan exists but isn't
|
||||||
|
/// approved, it reverts those changes and returns a blocking message.
|
||||||
|
pub fn check_plan_approval_gate(session_id: &str, working_dir: Option<&str>) -> ApprovalGateResult {
|
||||||
|
let dir = working_dir.unwrap_or(".");
|
||||||
|
|
||||||
|
// Check if this is a git repository
|
||||||
|
let git_check = std::process::Command::new("git")
|
||||||
|
.args(["rev-parse", "--git-dir"])
|
||||||
|
.current_dir(dir)
|
||||||
|
.output();
|
||||||
|
|
||||||
|
if git_check.is_err() || !git_check.unwrap().status.success() {
|
||||||
|
return ApprovalGateResult::NotGitRepo;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if a plan exists and whether it's approved
|
||||||
|
let plan = match read_plan(session_id) {
|
||||||
|
Ok(Some(plan)) => plan,
|
||||||
|
Ok(None) => return ApprovalGateResult::Allowed, // No plan, allow
|
||||||
|
Err(_) => return ApprovalGateResult::Allowed, // Can't read plan, allow
|
||||||
|
};
|
||||||
|
|
||||||
|
if plan.is_approved() {
|
||||||
|
return ApprovalGateResult::Allowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Plan exists but not approved - check for file changes
|
||||||
|
let status_output = std::process::Command::new("git")
|
||||||
|
.args(["status", "--porcelain"])
|
||||||
|
.current_dir(dir)
|
||||||
|
.output();
|
||||||
|
|
||||||
|
let output = match status_output {
|
||||||
|
Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
|
||||||
|
_ => return ApprovalGateResult::Allowed, // Can't get status, allow
|
||||||
|
};
|
||||||
|
|
||||||
|
if output.trim().is_empty() {
|
||||||
|
return ApprovalGateResult::Allowed; // No changes
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse changed files and revert them
|
||||||
|
let mut reverted_files = Vec::new();
|
||||||
|
|
||||||
|
for line in output.lines() {
|
||||||
|
if line.len() < 3 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let status = &line[0..2];
|
||||||
|
let file_path = line[3..].trim();
|
||||||
|
|
||||||
|
match status {
|
||||||
|
"??" => {
|
||||||
|
// Untracked file - remove it
|
||||||
|
let _ = std::fs::remove_file(std::path::Path::new(dir).join(file_path));
|
||||||
|
reverted_files.push(format!("{} (new file)", file_path));
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// Modified/added/deleted tracked file - git checkout
|
||||||
|
let _ = std::process::Command::new("git")
|
||||||
|
.args(["checkout", "--", file_path])
|
||||||
|
.current_dir(dir)
|
||||||
|
.output();
|
||||||
|
reverted_files.push(format!("{} (modified)", file_path));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if reverted_files.is_empty() {
|
||||||
|
return ApprovalGateResult::Allowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
let files_list = reverted_files.iter()
|
||||||
|
.map(|f| format!(" - {}", f))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n");
|
||||||
|
|
||||||
|
let message = format!(
|
||||||
|
"⚠️ IMPLEMENTATION BLOCKED\n\n\
|
||||||
|
You modified files without an approved plan:\n\
|
||||||
|
{}\n\n\
|
||||||
|
These changes have been reverted.\n\n\
|
||||||
|
Before implementing, you must:\n\
|
||||||
|
1. Create a plan with `plan_write`\n\
|
||||||
|
2. Request the user's explicit approval or edits to plan\n\n\
|
||||||
|
Do not attempt to implement until the plan is approved.",
|
||||||
|
files_list
|
||||||
|
);
|
||||||
|
|
||||||
|
ApprovalGateResult::Blocked {
|
||||||
|
message,
|
||||||
|
reverted_files,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -1202,4 +1336,18 @@ items: []
|
|||||||
assert!(VerificationStatus::Warning("warn".to_string()).is_warning_or_error());
|
assert!(VerificationStatus::Warning("warn".to_string()).is_warning_or_error());
|
||||||
assert!(VerificationStatus::Error("err".to_string()).is_warning_or_error());
|
assert!(VerificationStatus::Error("err".to_string()).is_warning_or_error());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_approval_gate_no_plan() {
|
||||||
|
// With a non-existent session, there's no plan, so it should allow
|
||||||
|
let result = check_plan_approval_gate("nonexistent-session-xyz", Some("."));
|
||||||
|
assert!(matches!(result, ApprovalGateResult::Allowed));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_approval_gate_not_git_repo() {
|
||||||
|
// /tmp is typically not a git repo
|
||||||
|
let result = check_plan_approval_gate("any-session", Some("/tmp"));
|
||||||
|
assert!(matches!(result, ApprovalGateResult::NotGitRepo));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -822,3 +822,129 @@ async fn test_llm_repeats_text_before_each_tool_call() {
|
|||||||
preamble_count
|
preamble_count
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Plan Approval Gate Tests
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/// Test: File changes are blocked when plan exists but is not approved
|
||||||
|
///
|
||||||
|
/// Scenario:
|
||||||
|
/// 1. Create a plan (unapproved)
|
||||||
|
/// 2. LLM tries to write a file
|
||||||
|
/// 3. The file change should be reverted and a blocking message returned
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_plan_approval_gate_blocks_unapproved_changes() {
|
||||||
|
use g3_core::tools::plan::{write_plan, Plan, PlanItem, Checks, Check, PlanState};
|
||||||
|
use std::fs;
|
||||||
|
|
||||||
|
// Create a temp directory that IS a git repo
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let temp_path = temp_dir.path();
|
||||||
|
|
||||||
|
// Initialize git repo
|
||||||
|
std::process::Command::new("git")
|
||||||
|
.args(["init"])
|
||||||
|
.current_dir(temp_path)
|
||||||
|
.output()
|
||||||
|
.expect("Failed to init git repo");
|
||||||
|
|
||||||
|
// Configure git user for the repo (needed for commits)
|
||||||
|
std::process::Command::new("git")
|
||||||
|
.args(["config", "user.email", "test@test.com"])
|
||||||
|
.current_dir(temp_path)
|
||||||
|
.output()
|
||||||
|
.expect("Failed to configure git email");
|
||||||
|
std::process::Command::new("git")
|
||||||
|
.args(["config", "user.name", "Test"])
|
||||||
|
.current_dir(temp_path)
|
||||||
|
.output()
|
||||||
|
.expect("Failed to configure git name");
|
||||||
|
|
||||||
|
// Create an initial commit so we have a clean state
|
||||||
|
let readme_path = temp_path.join("README.md");
|
||||||
|
fs::write(&readme_path, "# Test").unwrap();
|
||||||
|
std::process::Command::new("git")
|
||||||
|
.args(["add", "."])
|
||||||
|
.current_dir(temp_path)
|
||||||
|
.output()
|
||||||
|
.expect("Failed to git add");
|
||||||
|
std::process::Command::new("git")
|
||||||
|
.args(["commit", "-m", "initial"])
|
||||||
|
.current_dir(temp_path)
|
||||||
|
.output()
|
||||||
|
.expect("Failed to git commit");
|
||||||
|
|
||||||
|
// Use absolute path so the file is written to the temp git repo
|
||||||
|
let new_file_path = temp_path.join("new_file.txt");
|
||||||
|
|
||||||
|
// Create a mock provider that will try to write a file
|
||||||
|
let provider = MockProvider::new()
|
||||||
|
.with_native_tool_calling(true)
|
||||||
|
.with_response(MockResponse::native_tool_call(
|
||||||
|
"write_file",
|
||||||
|
serde_json::json!({
|
||||||
|
"file_path": new_file_path.to_string_lossy(),
|
||||||
|
"content": "This should be blocked!"
|
||||||
|
}),
|
||||||
|
))
|
||||||
|
.with_default_response(MockResponse::text("I tried to write a file."));
|
||||||
|
|
||||||
|
// Create agent with a specific session ID
|
||||||
|
let mut registry = ProviderRegistry::new();
|
||||||
|
registry.register(provider);
|
||||||
|
let config = g3_config::Config::default();
|
||||||
|
|
||||||
|
let mut agent = Agent::new_for_test(config, NullUiWriter, registry)
|
||||||
|
.await
|
||||||
|
.expect("Failed to create agent");
|
||||||
|
|
||||||
|
// Set a session ID so the plan can be found
|
||||||
|
let session_id = "test-approval-gate-session";
|
||||||
|
agent.set_session_id(session_id.to_string());
|
||||||
|
|
||||||
|
// Set the working directory to the temp git repo
|
||||||
|
agent.set_working_dir(temp_path.to_string_lossy().to_string());
|
||||||
|
|
||||||
|
// Create an unapproved plan for this session
|
||||||
|
let mut plan = Plan::new("test-plan");
|
||||||
|
plan.items.push(PlanItem {
|
||||||
|
id: "I1".to_string(),
|
||||||
|
description: "Test item".to_string(),
|
||||||
|
state: PlanState::Todo,
|
||||||
|
touches: vec!["src/test.rs".to_string()],
|
||||||
|
checks: Checks {
|
||||||
|
happy: Check::new("happy", "target"),
|
||||||
|
negative: Check::new("negative", "target"),
|
||||||
|
boundary: Check::new("boundary", "target"),
|
||||||
|
},
|
||||||
|
evidence: vec![],
|
||||||
|
notes: None,
|
||||||
|
});
|
||||||
|
// Note: NOT calling plan.approve() - plan is unapproved
|
||||||
|
|
||||||
|
write_plan(session_id, &plan).expect("Failed to write plan");
|
||||||
|
|
||||||
|
// Execute task - the LLM will try to write a file
|
||||||
|
let result = agent.execute_task(
|
||||||
|
"Write a new file",
|
||||||
|
None, // language
|
||||||
|
false // auto_execute
|
||||||
|
).await;
|
||||||
|
|
||||||
|
assert!(result.is_ok(), "Task should complete (with blocking message): {:?}", result.err());
|
||||||
|
|
||||||
|
// The new file should NOT exist (it was reverted)
|
||||||
|
assert!(
|
||||||
|
!new_file_path.exists(),
|
||||||
|
"New file should have been reverted/deleted"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Check that the blocking message was returned
|
||||||
|
let history = &agent.get_context_window().conversation_history;
|
||||||
|
let has_blocking_message = history
|
||||||
|
.iter()
|
||||||
|
.any(|m| m.content.contains("IMPLEMENTATION BLOCKED"));
|
||||||
|
|
||||||
|
assert!(has_blocking_message, "Should have blocking message in context");
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user