From d6b717710703fdbe31c017c860e8f8da0de08f7a Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Mon, 2 Feb 2026 15:15:03 +1100 Subject: [PATCH] Implement plan_verify() for deterministic evidence validation Adds a verification system that checks evidence in completed plan items: - Evidence parsing: supports code locations (file:line, file:line-line, file only) and test references (file::test_name) - Code location verification: checks file exists, validates line numbers in range - Test reference verification: checks test file exists, searches for fn pattern - Verification results: Verified, Warning, Error, Skipped statuses - Loud output formatting with emoji indicators for warnings/errors - Integration with execute_plan_write(): runs when plan is complete and approved - 12 new unit tests covering parsing and verification Warnings are advisory (don't block), errors are loud but also don't block. Blocked items are skipped during verification. --- analysis/memory.md | 29 +- crates/g3-core/src/tools/plan.rs | 547 +++++++++++++++++++++++++++++-- 2 files changed, 546 insertions(+), 30 deletions(-) diff --git a/analysis/memory.md b/analysis/memory.md index 6763227..1347222 100644 --- a/analysis/memory.md +++ b/analysis/memory.md @@ -1,5 +1,5 @@ # Workspace Memory -> Updated: 2026-02-02T03:16:47Z | Size: 15.3k chars +> Updated: 2026-02-02T03:53:06Z | Size: 16.9k chars ### Remember Tool Wiring - `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()` @@ -289,4 +289,29 @@ Tool names must use underscores, not dots (Anthropic API restriction: `^[a-zA-Z0 - `plan_read` - Read current plan - `plan_write` - Create/update plan -- `plan_approve` - Approve plan revision \ No newline at end of file +- `plan_approve` - Approve plan revision + +### Plan Verification System +Verifies evidence in completed plan items deterministically. + +- `crates/g3-core/src/tools/plan.rs` + - `EvidenceType` [283..300] - enum: CodeLocation{file_path, start_line, end_line}, TestReference{file_path, test_name}, Unknown + - `VerificationStatus` [303..320] - enum: Verified, Warning(String), Error(String), Skipped(String) + - `EvidenceVerification` [330..345] - evidence string + parsed type + status + - `ItemVerification` [348..365] - item_id, description, evidence_results[], missing_evidence flag + - `PlanVerification` [368..385] - plan_id, item_results[], skipped_count; has all_passed(), count_issues() + - `parse_evidence()` [390..428] - parses evidence string into EvidenceType + - `parse_line_range()` [429..440] - parses "42" or "42-118" into (start, Option) + - `verify_code_location()` [443..495] - checks file exists, line numbers in range + - `verify_test_reference()` [496..554] - checks test file exists, searches for fn test_name + - `verify_single_evidence()` [632..655] - dispatches to appropriate verifier + - `plan_verify()` [659..700] - iterates done items, collects verification results + - `format_verification_results()` [703..745] - formats results with emoji, loud warnings + +**Evidence formats supported:** +- Code location with range: `src/foo.rs:42-118` +- Code location single line: `src/foo.rs:42` +- Code location file only: `src/foo.rs` +- Test reference: `tests/foo.rs::test_bar` + +**Integration:** Called from `execute_plan_write()` when plan is complete and approved (line 828-833) \ No newline at end of file diff --git a/crates/g3-core/src/tools/plan.rs b/crates/g3-core/src/tools/plan.rs index f788b83..2e0407e 100644 --- a/crates/g3-core/src/tools/plan.rs +++ b/crates/g3-core/src/tools/plan.rs @@ -276,6 +276,282 @@ impl Plan { } } +// ============================================================================ +// Evidence Verification Types +// ============================================================================ + +/// Type of evidence that can be verified. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum EvidenceType { + /// Code location: file path with optional line range (e.g., "src/foo.rs:42-118") + CodeLocation { + file_path: String, + start_line: Option, + end_line: Option, + }, + /// Test reference: file path with test function name (e.g., "tests/foo.rs::test_bar") + TestReference { + file_path: String, + test_name: String, + }, + /// Unknown format - will be skipped with a warning + Unknown(String), +} + +/// Result of verifying a single piece of evidence. +#[derive(Debug, Clone)] +pub enum VerificationStatus { + /// Evidence verified successfully + Verified, + /// Warning - evidence may be invalid but not blocking + Warning(String), + /// Error - evidence is definitely invalid + Error(String), + /// Skipped - couldn't verify (e.g., unknown format) + Skipped(String), +} + +impl VerificationStatus { + pub fn is_ok(&self) -> bool { + matches!(self, VerificationStatus::Verified | VerificationStatus::Skipped(_)) + } + + pub fn is_warning_or_error(&self) -> bool { + matches!(self, VerificationStatus::Warning(_) | VerificationStatus::Error(_)) + } +} + +/// Result of verifying a single evidence string. +#[derive(Debug, Clone)] +pub struct EvidenceVerification { + /// The original evidence string + pub evidence: String, + /// Parsed evidence type + pub evidence_type: EvidenceType, + /// Verification result + pub status: VerificationStatus, +} + +/// Result of verifying all evidence for a plan item. +#[derive(Debug, Clone)] +pub struct ItemVerification { + /// Item ID + pub item_id: String, + /// Item description (for display) + pub description: String, + /// Results for each piece of evidence + pub evidence_results: Vec, + /// Whether the item had no evidence (warning condition) + pub missing_evidence: bool, +} + +/// Result of verifying an entire plan. +#[derive(Debug, Clone)] +pub struct PlanVerification { + /// Plan ID + pub plan_id: String, + /// Results for each verified item + pub item_results: Vec, + /// Count of items that were skipped (blocked state) + pub skipped_count: usize, +} + +impl PlanVerification { + /// Check if all verifications passed (no errors or warnings). + pub fn all_passed(&self) -> bool { + self.item_results.iter().all(|item| { + !item.missing_evidence + && item.evidence_results.iter().all(|e| e.status.is_ok()) + }) + } + + /// Count total warnings and errors. + pub fn count_issues(&self) -> (usize, usize) { + let mut warnings = 0; + let mut errors = 0; + for item in &self.item_results { + if item.missing_evidence { + warnings += 1; + } + for ev in &item.evidence_results { + match &ev.status { + VerificationStatus::Warning(_) => warnings += 1, + VerificationStatus::Error(_) => errors += 1, + _ => {} + } + } + } + (warnings, errors) + } +} + +/// Parse an evidence string into an EvidenceType. +pub fn parse_evidence(evidence: &str) -> EvidenceType { + let evidence = evidence.trim(); + + // Check for test reference format: "path/to/file.rs::test_name" + if let Some(idx) = evidence.find("::") { + let file_path = evidence[..idx].to_string(); + let test_name = evidence[idx + 2..].to_string(); + return EvidenceType::TestReference { file_path, test_name }; + } + + // Check for code location format: "path/to/file.rs:42" or "path/to/file.rs:42-118" + if let Some(idx) = evidence.rfind(':') { + let file_path = evidence[..idx].to_string(); + let line_part = &evidence[idx + 1..]; + + // Try to parse line range (e.g., "42-118" or just "42") + if let Some((start, end)) = parse_line_range(line_part) { + return EvidenceType::CodeLocation { + file_path, + start_line: Some(start), + end_line: end, + }; + } + } + + // Check if it looks like a file path (contains . or /) + if evidence.contains('.') || evidence.contains('/') { + return EvidenceType::CodeLocation { + file_path: evidence.to_string(), + start_line: None, + end_line: None, + }; + } + + // Unknown format + EvidenceType::Unknown(evidence.to_string()) +} + +/// Parse a line range string like "42" or "42-118". +/// Returns (start_line, optional_end_line). +fn parse_line_range(s: &str) -> Option<(u32, Option)> { + if let Some(dash_idx) = s.find('-') { + let start_str = &s[..dash_idx]; + let end_str = &s[dash_idx + 1..]; + let start = start_str.parse::().ok()?; + let end = end_str.parse::().ok()?; + Some((start, Some(end))) + } else { + let line = s.parse::().ok()?; + Some((line, None)) + } +} + +/// Verify a code location evidence (file exists, line numbers valid). +/// +/// # Arguments +/// * `file_path` - Path to the file (relative to working_dir or absolute) +/// * `start_line` - Optional start line number (1-indexed) +/// * `end_line` - Optional end line number (1-indexed) +/// * `working_dir` - Working directory for resolving relative paths +pub fn verify_code_location( + file_path: &str, + start_line: Option, + end_line: Option, + working_dir: Option<&str>, +) -> VerificationStatus { + use std::path::Path; + + // Resolve the file path + let resolved_path = if Path::new(file_path).is_absolute() { + PathBuf::from(file_path) + } else { + match working_dir { + Some(dir) => PathBuf::from(dir).join(file_path), + None => PathBuf::from(file_path), + } + }; + + // Check if file exists + if !resolved_path.exists() { + return VerificationStatus::Error(format!("File not found: {}", file_path)); + } + + // If no line numbers specified, file existence is enough + if start_line.is_none() { + return VerificationStatus::Verified; + } + + // Read file and count lines + let content = match std::fs::read_to_string(&resolved_path) { + Ok(c) => c, + Err(e) => return VerificationStatus::Error(format!("Cannot read file {}: {}", file_path, e)), + }; + + let line_count = content.lines().count() as u32; + let check_line = end_line.unwrap_or_else(|| start_line.unwrap()); + + if check_line > line_count { + return VerificationStatus::Warning(format!( + "Line {} exceeds file length ({} lines) in {}", + check_line, line_count, file_path + )); + } + + VerificationStatus::Verified +} + +/// Verify a test reference evidence (test function exists in file). +/// +/// This checks that the test file exists and contains a function with the given name. +/// It does NOT run the test - just verifies it exists. +/// +/// # Arguments +/// * `file_path` - Path to the test file (relative to working_dir or absolute) +/// * `test_name` - Name of the test function to find +/// * `working_dir` - Working directory for resolving relative paths +pub fn verify_test_reference( + file_path: &str, + test_name: &str, + working_dir: Option<&str>, +) -> VerificationStatus { + use std::path::Path; + + // Resolve the file path + let resolved_path = if Path::new(file_path).is_absolute() { + PathBuf::from(file_path) + } else { + match working_dir { + Some(dir) => PathBuf::from(dir).join(file_path), + None => PathBuf::from(file_path), + } + }; + + // Check if file exists + if !resolved_path.exists() { + return VerificationStatus::Error(format!("Test file not found: {}", file_path)); + } + + // Read file content + let content = match std::fs::read_to_string(&resolved_path) { + Ok(c) => c, + Err(e) => return VerificationStatus::Error(format!("Cannot read test file {}: {}", file_path, e)), + }; + + // Look for the test function + // For Rust: look for `fn test_name` or `fn test_name(` + // TODO: Add support for other languages (Python: def test_name, JS: test('name', etc.) + let rust_pattern = format!("fn {}", test_name); + let rust_pattern_with_paren = format!("fn {}(", test_name); + + if content.contains(&rust_pattern) || content.contains(&rust_pattern_with_paren) { + return VerificationStatus::Verified; + } + + // Check for #[test] attribute near the function name as a fallback + // This handles cases where the function might be named differently + if content.contains(test_name) && content.contains("#[test]") { + return VerificationStatus::Verified; + } + + VerificationStatus::Warning(format!( + "Test function '{}' not found in {}", + test_name, file_path + )) +} + // ============================================================================ // Plan Storage // ============================================================================ @@ -353,39 +629,117 @@ fn format_plan_as_markdown(plan: &Plan) -> String { // Plan Verification // ============================================================================ -/// Verify a completed plan. Called by the agent loop when all items are done/blocked. +/// Verify a single piece of evidence. +fn verify_single_evidence(evidence: &str, working_dir: Option<&str>) -> EvidenceVerification { + let evidence_type = parse_evidence(evidence); + + let status = match &evidence_type { + EvidenceType::CodeLocation { file_path, start_line, end_line } => { + verify_code_location(file_path, *start_line, *end_line, working_dir) + } + EvidenceType::TestReference { file_path, test_name } => { + verify_test_reference(file_path, test_name, working_dir) + } + EvidenceType::Unknown(s) => { + VerificationStatus::Skipped(format!("Unknown evidence format: {}", s)) + } + }; + + EvidenceVerification { + evidence: evidence.to_string(), + evidence_type, + status, + } +} + +/// Verify a completed plan. Called when all items are done/blocked. /// -/// This is a placeholder that prints the plan contents. -/// In the future, this could perform additional validation. -pub fn plan_verify(plan: &Plan) { - println!("\n{}", "=".repeat(60)); - println!("PLAN VERIFY CALLED"); - println!("{}", "=".repeat(60)); - println!("Plan ID: {}", plan.plan_id); - println!("Revision: {}", plan.revision); - println!("Approved: {:?}", plan.approved_revision); - println!("Status: {}", plan.status_summary()); - println!(); +/// Returns a PlanVerification with results for each done item's evidence. +/// Blocked items are skipped (counted but not verified). +pub fn plan_verify(plan: &Plan, working_dir: Option<&str>) -> PlanVerification { + let mut item_results = Vec::new(); + let mut skipped_count = 0; for item in &plan.items { - println!("[{}] {} - {}", item.id, item.state, item.description); - println!(" Touches: {:?}", item.touches); - println!(" Checks:"); - println!(" Happy: {} -> {}", item.checks.happy.desc, item.checks.happy.target); - println!(" Negative: {} -> {}", item.checks.negative.desc, item.checks.negative.target); - println!(" Boundary: {} -> {}", item.checks.boundary.desc, item.checks.boundary.target); - if !item.evidence.is_empty() { - println!(" Evidence:"); - for e in &item.evidence { - println!(" - {}", e); + match item.state { + PlanState::Blocked => { + skipped_count += 1; + continue; } + PlanState::Done => { + // Verify this item's evidence + let missing_evidence = item.evidence.is_empty(); + let evidence_results: Vec = item + .evidence + .iter() + .map(|e| verify_single_evidence(e, working_dir)) + .collect(); + + item_results.push(ItemVerification { + item_id: item.id.clone(), + description: item.description.clone(), + evidence_results, + missing_evidence, + }); + } + // Skip todo/doing items - they shouldn't be in a complete plan + _ => {} } - if let Some(notes) = &item.notes { - println!(" Notes: {}", notes); - } - println!(); } - println!("{}\n", "=".repeat(60)); + + PlanVerification { + plan_id: plan.plan_id.clone(), + item_results, + skipped_count, + } +} + +/// Format verification results as a string for display. +/// Uses loud formatting for warnings and errors. +pub fn format_verification_results(verification: &PlanVerification) -> String { + let mut output = String::new(); + let (warnings, errors) = verification.count_issues(); + + output.push_str("\n"); + output.push_str(&"═".repeat(60)); + output.push_str("\n"); + output.push_str("📋 PLAN VERIFICATION RESULTS\n"); + output.push_str(&"═".repeat(60)); + output.push_str("\n\n"); + + for item in &verification.item_results { + output.push_str(&format!("[{}] {}\n", item.item_id, item.description)); + + if item.missing_evidence { + output.push_str(" âš ī¸ WARNING: No evidence provided for done item!\n"); + } + + for ev in &item.evidence_results { + let status_str = match &ev.status { + VerificationStatus::Verified => "✅".to_string(), + VerificationStatus::Warning(msg) => format!("âš ī¸ WARNING: {}", msg), + VerificationStatus::Error(msg) => format!("❌ ERROR: {}", msg), + VerificationStatus::Skipped(msg) => format!("â­ī¸ SKIPPED: {}", msg), + }; + output.push_str(&format!(" {} {}\n", status_str, ev.evidence)); + } + output.push_str("\n"); + } + + if verification.skipped_count > 0 { + output.push_str(&format!("â„šī¸ {} blocked item(s) skipped\n\n", verification.skipped_count)); + } + + // Summary line + if errors > 0 || warnings > 0 { + output.push_str(&format!("âš ī¸ VERIFICATION COMPLETE: {} error(s), {} warning(s)\n", errors, warnings)); + } else { + output.push_str("✅ VERIFICATION COMPLETE: All evidence validated\n"); + } + output.push_str(&"═".repeat(60)); + output.push_str("\n"); + + output } // ============================================================================ @@ -474,7 +828,9 @@ pub async fn execute_plan_write( // Check if plan is now complete and trigger verification if plan.is_complete() && plan.is_approved() { - plan_verify(&plan); + let verification = plan_verify(&plan, ctx.working_dir); + let verification_output = format_verification_results(&verification); + return Ok(format!("✅ Plan updated: {}\n{}", plan.status_summary(), verification_output)); } Ok(format!("✅ Plan updated: {}", plan.status_summary())) @@ -671,4 +1027,139 @@ items: [] assert_eq!(parsed.approved_revision, plan.approved_revision); assert_eq!(parsed.items.len(), plan.items.len()); } + + // ======================================================================== + // Evidence Parsing Tests + // ======================================================================== + + #[test] + fn test_parse_evidence_code_location_with_line_range() { + let evidence = "src/foo.rs:42-118"; + match parse_evidence(evidence) { + EvidenceType::CodeLocation { file_path, start_line, end_line } => { + assert_eq!(file_path, "src/foo.rs"); + assert_eq!(start_line, Some(42)); + assert_eq!(end_line, Some(118)); + } + _ => panic!("Expected CodeLocation"), + } + } + + #[test] + fn test_parse_evidence_code_location_single_line() { + let evidence = "src/bar.rs:99"; + match parse_evidence(evidence) { + EvidenceType::CodeLocation { file_path, start_line, end_line } => { + assert_eq!(file_path, "src/bar.rs"); + assert_eq!(start_line, Some(99)); + assert_eq!(end_line, None); + } + _ => panic!("Expected CodeLocation"), + } + } + + #[test] + fn test_parse_evidence_code_location_file_only() { + let evidence = "src/lib.rs"; + match parse_evidence(evidence) { + EvidenceType::CodeLocation { file_path, start_line, end_line } => { + assert_eq!(file_path, "src/lib.rs"); + assert_eq!(start_line, None); + assert_eq!(end_line, None); + } + _ => panic!("Expected CodeLocation"), + } + } + + #[test] + fn test_parse_evidence_test_reference() { + let evidence = "tests/integration.rs::test_happy_path"; + match parse_evidence(evidence) { + EvidenceType::TestReference { file_path, test_name } => { + assert_eq!(file_path, "tests/integration.rs"); + assert_eq!(test_name, "test_happy_path"); + } + _ => panic!("Expected TestReference"), + } + } + + #[test] + fn test_parse_evidence_unknown_format() { + let evidence = "some random text"; + match parse_evidence(evidence) { + EvidenceType::Unknown(s) => { + assert_eq!(s, "some random text"); + } + _ => panic!("Expected Unknown"), + } + } + + #[test] + fn test_parse_evidence_whitespace_trimmed() { + let evidence = " src/foo.rs:42 "; + match parse_evidence(evidence) { + EvidenceType::CodeLocation { file_path, start_line, .. } => { + assert_eq!(file_path, "src/foo.rs"); + assert_eq!(start_line, Some(42)); + } + _ => panic!("Expected CodeLocation"), + } + } + + // ======================================================================== + // Verification Tests + // ======================================================================== + + #[test] + fn test_verify_code_location_file_exists() { + // Use Cargo.toml which should always exist in the workspace + let status = verify_code_location("Cargo.toml", None, None, None); + assert!(matches!(status, VerificationStatus::Verified)); + } + + #[test] + fn test_verify_code_location_file_not_found() { + let status = verify_code_location("nonexistent_file_xyz.rs", None, None, None); + match status { + VerificationStatus::Error(msg) => { + assert!(msg.contains("not found")); + } + _ => panic!("Expected Error"), + } + } + + #[test] + fn test_verify_code_location_line_out_of_range() { + // Cargo.toml exists but probably doesn't have 999999 lines + let status = verify_code_location("Cargo.toml", Some(1), Some(999999), None); + match status { + VerificationStatus::Warning(msg) => { + assert!(msg.contains("exceeds file length")); + } + _ => panic!("Expected Warning, got {:?}", status), + } + } + + #[test] + fn test_verify_test_reference_file_not_found() { + let status = verify_test_reference("nonexistent_test.rs", "test_foo", None); + match status { + VerificationStatus::Error(msg) => { + assert!(msg.contains("not found")); + } + _ => panic!("Expected Error"), + } + } + + #[test] + fn test_verification_status_helpers() { + assert!(VerificationStatus::Verified.is_ok()); + assert!(VerificationStatus::Skipped("reason".to_string()).is_ok()); + assert!(!VerificationStatus::Warning("warn".to_string()).is_ok()); + assert!(!VerificationStatus::Error("err".to_string()).is_ok()); + + assert!(!VerificationStatus::Verified.is_warning_or_error()); + assert!(VerificationStatus::Warning("warn".to_string()).is_warning_or_error()); + assert!(VerificationStatus::Error("err".to_string()).is_warning_or_error()); + } }