diff --git a/crates/g3-core/new_file.txt b/crates/g3-core/new_file.txt new file mode 100644 index 0000000..d2386ac --- /dev/null +++ b/crates/g3-core/new_file.txt @@ -0,0 +1 @@ +This should be blocked! \ No newline at end of file diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 4d4e706..ecbb7c1 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -47,6 +47,7 @@ pub use prompts::get_agent_system_prompt; #[cfg(test)] mod task_result_comprehensive_tests; use crate::ui_writer::UiWriter; +use tools::plan::{check_plan_approval_gate, ApprovalGateResult}; #[cfg(test)] mod tilde_expansion_tests; @@ -753,6 +754,16 @@ impl Agent { self.session_id.as_deref() } + /// Set the session ID (useful for testing) + pub fn set_session_id(&mut self, session_id: String) { + self.session_id = Some(session_id); + } + + /// Set the working directory (useful for testing) + pub fn set_working_dir(&mut self, working_dir: String) { + self.working_dir = Some(working_dir); + } + // ========================================================================= // TASK EXECUTION // ========================================================================= @@ -2889,6 +2900,17 @@ Skip if nothing new. Be brief."#; self.tool_calls_this_turn.push(tool_call.tool.clone()); let result = self.execute_tool_inner_in_dir(tool_call, working_dir).await; + + // Check plan approval gate after tool execution + if let Some(session_id) = &self.session_id { + if let ApprovalGateResult::Blocked { message, .. } = + check_plan_approval_gate(session_id, working_dir) + { + // Return the blocking message instead of the tool result + return Ok(message); + } + } + let log_str = match &result { Ok(s) => s.clone(), Err(e) => format!("ERROR: {}", e), diff --git a/crates/g3-core/src/tools/invariants.rs b/crates/g3-core/src/tools/invariants.rs index 284ce58..e5f277e 100644 --- a/crates/g3-core/src/tools/invariants.rs +++ b/crates/g3-core/src/tools/invariants.rs @@ -903,6 +903,77 @@ pub fn format_evaluation_results(eval: &RulespecEvaluation) -> String { output } +/// Format a rulespec as human-readable markdown. +/// +/// This produces a rich, readable format suitable for tool output, +/// not raw YAML. +pub fn format_rulespec_markdown(rulespec: &Rulespec) -> String { + let mut output = String::new(); + + output.push_str("\n"); + output.push_str("### Invariants (Rulespec)\n\n"); + + if rulespec.claims.is_empty() && rulespec.predicates.is_empty() { + output.push_str("_No invariants defined._\n"); + return output; + } + + // Group predicates by source + let task_predicates: Vec<_> = rulespec.predicates.iter() + .filter(|p| p.source == InvariantSource::TaskPrompt) + .collect(); + let memory_predicates: Vec<_> = rulespec.predicates.iter() + .filter(|p| p.source == InvariantSource::Memory) + .collect(); + + // Build claim lookup for selector display + let claims: std::collections::HashMap<&str, &Claim> = rulespec.claims.iter() + .map(|c| (c.name.as_str(), c)) + .collect(); + + // Format predicates from task prompt + if !task_predicates.is_empty() { + output.push_str("**From Task:**\n"); + for pred in &task_predicates { + format_predicate_markdown(&mut output, pred, &claims); + } + output.push_str("\n"); + } + + // Format predicates from memory + if !memory_predicates.is_empty() { + output.push_str("**From Memory:**\n"); + for pred in &memory_predicates { + format_predicate_markdown(&mut output, pred, &claims); + } + output.push_str("\n"); + } + + output +} + +/// Format a single predicate as a markdown list item. +fn format_predicate_markdown( + output: &mut String, + pred: &Predicate, + claims: &std::collections::HashMap<&str, &Claim>, +) { + let selector = claims.get(pred.claim.as_str()) + .map(|c| c.selector.as_str()) + .unwrap_or(&pred.claim); + + let value_str = match &pred.value { + Some(v) => format!(" `{}`", yaml_to_display(v)), + None => String::new(), + }; + + output.push_str(&format!("- `{}` **{}**{}\n", selector, pred.rule, value_str)); + + if let Some(notes) = &pred.notes { + output.push_str(&format!(" - _{}_\n", notes)); + } +} + // ============================================================================ // Tests // ============================================================================ @@ -1274,4 +1345,56 @@ mod tests { // Should deserialize back let _: ActionEnvelope = serde_yaml::from_str(&yaml).unwrap(); } + + // ======================================================================== + // Format Rulespec Markdown Tests + // ======================================================================== + + #[test] + fn test_format_rulespec_markdown_empty() { + let rulespec = Rulespec::new(); + let output = format_rulespec_markdown(&rulespec); + + assert!(output.contains("### Invariants (Rulespec)")); + assert!(output.contains("_No invariants defined._")); + } + + #[test] + fn test_format_rulespec_markdown_with_predicates() { + let mut rulespec = Rulespec::new(); + rulespec.add_claim(Claim::new("caps", "csv_importer.capabilities")); + rulespec.add_predicate( + Predicate::new("caps", PredicateRule::Contains, InvariantSource::TaskPrompt) + .with_value(YamlValue::String("handle_tsv".to_string())) + .with_notes("User requested TSV support") + ); + rulespec.add_predicate( + Predicate::new("caps", PredicateRule::Exists, InvariantSource::Memory) + ); + + let output = format_rulespec_markdown(&rulespec); + + assert!(output.contains("### Invariants (Rulespec)")); + assert!(output.contains("**From Task:**")); + assert!(output.contains("**From Memory:**")); + assert!(output.contains("`csv_importer.capabilities`")); + assert!(output.contains("**contains**")); + assert!(output.contains("`handle_tsv`")); + assert!(output.contains("_User requested TSV support_")); + assert!(output.contains("**exists**")); + } + + #[test] + fn test_format_rulespec_markdown_task_only() { + let mut rulespec = Rulespec::new(); + rulespec.add_claim(Claim::new("test", "foo.bar")); + rulespec.add_predicate( + Predicate::new("test", PredicateRule::Exists, InvariantSource::TaskPrompt) + ); + + let output = format_rulespec_markdown(&rulespec); + + assert!(output.contains("**From Task:**")); + assert!(!output.contains("**From Memory:**")); + } } diff --git a/crates/g3-core/src/tools/plan.rs b/crates/g3-core/src/tools/plan.rs index 4d77b1b..e1d8c81 100644 --- a/crates/g3-core/src/tools/plan.rs +++ b/crates/g3-core/src/tools/plan.rs @@ -20,7 +20,7 @@ use crate::ToolCall; use super::executor::ToolContext; -use super::invariants::{get_envelope_path, get_rulespec_path}; +use super::invariants::{format_rulespec_markdown, get_envelope_path, get_rulespec_path, read_rulespec}; // ============================================================================ // Plan Schema @@ -866,14 +866,30 @@ pub async fn execute_plan_write( let yaml = serde_yaml::to_string(&plan)?; ctx.ui_writer.print_plan_compact(Some(&yaml), Some(&plan_path_str), true); + // Read and format rulespec if it exists + let rulespec_section = match read_rulespec(session_id) { + Ok(Some(rulespec)) => format_rulespec_markdown(&rulespec), + Ok(None) => "\n_No rulespec generated._\n".to_string(), + Err(_) => "\n_No rulespec generated._\n".to_string(), + }; + // Check if plan is now complete and trigger verification if plan.is_complete() && plan.is_approved() { let verification = plan_verify(&plan, ctx.working_dir); let verification_output = format_verification_results(&verification, ctx.session_id); - return Ok(format!("✅ Plan updated: {}\n{}", plan.status_summary(), verification_output)); + return Ok(format!( + "✅ Plan updated: {}\n{}\n{}", + plan.status_summary(), + verification_output, + rulespec_section + )); } - Ok(format!("✅ Plan updated: {}", plan.status_summary())) + Ok(format!( + "✅ Plan updated: {}\n{}", + plan.status_summary(), + rulespec_section + )) } /// Execute the `plan_approve` tool. @@ -916,6 +932,124 @@ pub async fn execute_plan_approve( )) } +// ============================================================================ +// Plan Approval Gate +// ============================================================================ + +/// Result of checking the plan approval gate after a tool execution. +#[derive(Debug)] +pub enum ApprovalGateResult { + /// No plan exists, or plan is approved - allow the operation + Allowed, + /// Plan exists but not approved, and files were changed - blocked + Blocked { + /// Message to inject into the conversation + message: String, + /// Files that were reverted + reverted_files: Vec, + }, + /// Not a git repository - skip the check + NotGitRepo, +} + +/// Check if file changes occurred without an approved plan, and revert them if so. +/// +/// This function should be called after each tool execution when in plan mode. +/// It checks `git status --porcelain` for changes, and if a plan exists but isn't +/// approved, it reverts those changes and returns a blocking message. +pub fn check_plan_approval_gate(session_id: &str, working_dir: Option<&str>) -> ApprovalGateResult { + let dir = working_dir.unwrap_or("."); + + // Check if this is a git repository + let git_check = std::process::Command::new("git") + .args(["rev-parse", "--git-dir"]) + .current_dir(dir) + .output(); + + if git_check.is_err() || !git_check.unwrap().status.success() { + return ApprovalGateResult::NotGitRepo; + } + + // Check if a plan exists and whether it's approved + let plan = match read_plan(session_id) { + Ok(Some(plan)) => plan, + Ok(None) => return ApprovalGateResult::Allowed, // No plan, allow + Err(_) => return ApprovalGateResult::Allowed, // Can't read plan, allow + }; + + if plan.is_approved() { + return ApprovalGateResult::Allowed; + } + + // Plan exists but not approved - check for file changes + let status_output = std::process::Command::new("git") + .args(["status", "--porcelain"]) + .current_dir(dir) + .output(); + + let output = match status_output { + Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(), + _ => return ApprovalGateResult::Allowed, // Can't get status, allow + }; + + if output.trim().is_empty() { + return ApprovalGateResult::Allowed; // No changes + } + + // Parse changed files and revert them + let mut reverted_files = Vec::new(); + + for line in output.lines() { + if line.len() < 3 { + continue; + } + let status = &line[0..2]; + let file_path = line[3..].trim(); + + match status { + "??" => { + // Untracked file - remove it + let _ = std::fs::remove_file(std::path::Path::new(dir).join(file_path)); + reverted_files.push(format!("{} (new file)", file_path)); + } + _ => { + // Modified/added/deleted tracked file - git checkout + let _ = std::process::Command::new("git") + .args(["checkout", "--", file_path]) + .current_dir(dir) + .output(); + reverted_files.push(format!("{} (modified)", file_path)); + } + } + } + + if reverted_files.is_empty() { + return ApprovalGateResult::Allowed; + } + + let files_list = reverted_files.iter() + .map(|f| format!(" - {}", f)) + .collect::>() + .join("\n"); + + let message = format!( + "⚠️ IMPLEMENTATION BLOCKED\n\n\ + You modified files without an approved plan:\n\ + {}\n\n\ + These changes have been reverted.\n\n\ + Before implementing, you must:\n\ + 1. Create a plan with `plan_write`\n\ + 2. Request the user's explicit approval or edits to plan\n\n\ + Do not attempt to implement until the plan is approved.", + files_list + ); + + ApprovalGateResult::Blocked { + message, + reverted_files, + } +} + #[cfg(test)] mod tests { use super::*; @@ -1202,4 +1336,18 @@ items: [] assert!(VerificationStatus::Warning("warn".to_string()).is_warning_or_error()); assert!(VerificationStatus::Error("err".to_string()).is_warning_or_error()); } + + #[test] + fn test_approval_gate_no_plan() { + // With a non-existent session, there's no plan, so it should allow + let result = check_plan_approval_gate("nonexistent-session-xyz", Some(".")); + assert!(matches!(result, ApprovalGateResult::Allowed)); + } + + #[test] + fn test_approval_gate_not_git_repo() { + // /tmp is typically not a git repo + let result = check_plan_approval_gate("any-session", Some("/tmp")); + assert!(matches!(result, ApprovalGateResult::NotGitRepo)); + } } diff --git a/crates/g3-core/tests/mock_provider_integration_test.rs b/crates/g3-core/tests/mock_provider_integration_test.rs index d737e34..c34f2f7 100644 --- a/crates/g3-core/tests/mock_provider_integration_test.rs +++ b/crates/g3-core/tests/mock_provider_integration_test.rs @@ -822,3 +822,129 @@ async fn test_llm_repeats_text_before_each_tool_call() { preamble_count ); } + +// ============================================================================= +// Plan Approval Gate Tests +// ============================================================================= + +/// Test: File changes are blocked when plan exists but is not approved +/// +/// Scenario: +/// 1. Create a plan (unapproved) +/// 2. LLM tries to write a file +/// 3. The file change should be reverted and a blocking message returned +#[tokio::test] +async fn test_plan_approval_gate_blocks_unapproved_changes() { + use g3_core::tools::plan::{write_plan, Plan, PlanItem, Checks, Check, PlanState}; + use std::fs; + + // Create a temp directory that IS a git repo + let temp_dir = TempDir::new().unwrap(); + let temp_path = temp_dir.path(); + + // Initialize git repo + std::process::Command::new("git") + .args(["init"]) + .current_dir(temp_path) + .output() + .expect("Failed to init git repo"); + + // Configure git user for the repo (needed for commits) + std::process::Command::new("git") + .args(["config", "user.email", "test@test.com"]) + .current_dir(temp_path) + .output() + .expect("Failed to configure git email"); + std::process::Command::new("git") + .args(["config", "user.name", "Test"]) + .current_dir(temp_path) + .output() + .expect("Failed to configure git name"); + + // Create an initial commit so we have a clean state + let readme_path = temp_path.join("README.md"); + fs::write(&readme_path, "# Test").unwrap(); + std::process::Command::new("git") + .args(["add", "."]) + .current_dir(temp_path) + .output() + .expect("Failed to git add"); + std::process::Command::new("git") + .args(["commit", "-m", "initial"]) + .current_dir(temp_path) + .output() + .expect("Failed to git commit"); + + // Use absolute path so the file is written to the temp git repo + let new_file_path = temp_path.join("new_file.txt"); + + // Create a mock provider that will try to write a file + let provider = MockProvider::new() + .with_native_tool_calling(true) + .with_response(MockResponse::native_tool_call( + "write_file", + serde_json::json!({ + "file_path": new_file_path.to_string_lossy(), + "content": "This should be blocked!" + }), + )) + .with_default_response(MockResponse::text("I tried to write a file.")); + + // Create agent with a specific session ID + let mut registry = ProviderRegistry::new(); + registry.register(provider); + let config = g3_config::Config::default(); + + let mut agent = Agent::new_for_test(config, NullUiWriter, registry) + .await + .expect("Failed to create agent"); + + // Set a session ID so the plan can be found + let session_id = "test-approval-gate-session"; + agent.set_session_id(session_id.to_string()); + + // Set the working directory to the temp git repo + agent.set_working_dir(temp_path.to_string_lossy().to_string()); + + // Create an unapproved plan for this session + let mut plan = Plan::new("test-plan"); + plan.items.push(PlanItem { + id: "I1".to_string(), + description: "Test item".to_string(), + state: PlanState::Todo, + touches: vec!["src/test.rs".to_string()], + checks: Checks { + happy: Check::new("happy", "target"), + negative: Check::new("negative", "target"), + boundary: Check::new("boundary", "target"), + }, + evidence: vec![], + notes: None, + }); + // Note: NOT calling plan.approve() - plan is unapproved + + write_plan(session_id, &plan).expect("Failed to write plan"); + + // Execute task - the LLM will try to write a file + let result = agent.execute_task( + "Write a new file", + None, // language + false // auto_execute + ).await; + + assert!(result.is_ok(), "Task should complete (with blocking message): {:?}", result.err()); + + // The new file should NOT exist (it was reverted) + assert!( + !new_file_path.exists(), + "New file should have been reverted/deleted" + ); + + // Check that the blocking message was returned + let history = &agent.get_context_window().conversation_history; + let has_blocking_message = history + .iter() + .any(|m| m.content.contains("IMPLEMENTATION BLOCKED")); + + assert!(has_blocking_message, "Should have blocking message in context"); +}