Implement plan_verify() for deterministic evidence validation

Adds a verification system that checks evidence in completed plan items: - Evidence parsing: supports code locations (file:line, file:line-line, file only) and test references (file::test_name) - Code location verification: checks file exists, validates line numbers in range - Test reference verification: checks test file exists, searches for fn pattern - Verification results: Verified, Warning, Error, Skipped statuses - Loud output formatting with emoji indicators for warnings/errors - Integration with execute_plan_write(): runs when plan is complete and approved - 12 new unit tests covering parsing and verification Warnings are advisory (don't block), errors are loud but also don't block. Blocked items are skipped during verification.
2026-02-02 15:15:03 +11:00
parent a63950d8f5
commit d6b7177107
2 changed files with 546 additions and 30 deletions
--- a/analysis/memory.md
+++ b/analysis/memory.md
@@ -1,5 +1,5 @@
 # Workspace Memory
-> Updated: 2026-02-02T03:16:47Z | Size: 15.3k chars
+> Updated: 2026-02-02T03:53:06Z | Size: 16.9k chars
 ### Remember Tool Wiring
 - `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()`
@@ -289,4 +289,29 @@ Tool names must use underscores, not dots (Anthropic API restriction: `^[a-zA-Z0
 - `plan_read` - Read current plan
 - `plan_write` - Create/update plan
- `plan_approve` - Approve plan revision
+- `plan_approve` - Approve plan revision
 ### Plan Verification System
 Verifies evidence in completed plan items deterministically.
 - `crates/g3-core/src/tools/plan.rs`
  - `EvidenceType` [283..300] - enum: CodeLocation{file_path, start_line, end_line}, TestReference{file_path, test_name}, Unknown
  - `VerificationStatus` [303..320] - enum: Verified, Warning(String), Error(String), Skipped(String)
  - `EvidenceVerification` [330..345] - evidence string + parsed type + status
  - `ItemVerification` [348..365] - item_id, description, evidence_results[], missing_evidence flag
  - `PlanVerification` [368..385] - plan_id, item_results[], skipped_count; has all_passed(), count_issues()
  - `parse_evidence()` [390..428] - parses evidence string into EvidenceType
  - `parse_line_range()` [429..440] - parses "42" or "42-118" into (start, Option<end>)
  - `verify_code_location()` [443..495] - checks file exists, line numbers in range
  - `verify_test_reference()` [496..554] - checks test file exists, searches for fn test_name
  - `verify_single_evidence()` [632..655] - dispatches to appropriate verifier
  - `plan_verify()` [659..700] - iterates done items, collects verification results
  - `format_verification_results()` [703..745] - formats results with emoji, loud warnings
 **Evidence formats supported:**
 - Code location with range: `src/foo.rs:42-118`
 - Code location single line: `src/foo.rs:42`
 - Code location file only: `src/foo.rs`
 - Test reference: `tests/foo.rs::test_bar`
 **Integration:** Called from `execute_plan_write()` when plan is complete and approved (line 828-833)
--- a/crates/g3-core/src/tools/plan.rs
+++ b/crates/g3-core/src/tools/plan.rs
@@ -276,6 +276,282 @@ impl Plan {
    }
 }
 // ============================================================================
 // Evidence Verification Types
 // ============================================================================
 /// Type of evidence that can be verified.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum EvidenceType {
    /// Code location: file path with optional line range (e.g., "src/foo.rs:42-118")
    CodeLocation {
        file_path: String,
        start_line: Option<u32>,
        end_line: Option<u32>,
    },
    /// Test reference: file path with test function name (e.g., "tests/foo.rs::test_bar")
    TestReference {
        file_path: String,
        test_name: String,
    },
    /// Unknown format - will be skipped with a warning
    Unknown(String),
 }
 /// Result of verifying a single piece of evidence.
 #[derive(Debug, Clone)]
 pub enum VerificationStatus {
    /// Evidence verified successfully
    Verified,
    /// Warning - evidence may be invalid but not blocking
    Warning(String),
    /// Error - evidence is definitely invalid
    Error(String),
    /// Skipped - couldn't verify (e.g., unknown format)
    Skipped(String),
 }
 impl VerificationStatus {
    pub fn is_ok(&self) -> bool {
        matches!(self, VerificationStatus::Verified | VerificationStatus::Skipped(_))
    }
    pub fn is_warning_or_error(&self) -> bool {
        matches!(self, VerificationStatus::Warning(_) | VerificationStatus::Error(_))
    }
 }
 /// Result of verifying a single evidence string.
 #[derive(Debug, Clone)]
 pub struct EvidenceVerification {
    /// The original evidence string
    pub evidence: String,
    /// Parsed evidence type
    pub evidence_type: EvidenceType,
    /// Verification result
    pub status: VerificationStatus,
 }
 /// Result of verifying all evidence for a plan item.
 #[derive(Debug, Clone)]
 pub struct ItemVerification {
    /// Item ID
    pub item_id: String,
    /// Item description (for display)
    pub description: String,
    /// Results for each piece of evidence
    pub evidence_results: Vec<EvidenceVerification>,
    /// Whether the item had no evidence (warning condition)
    pub missing_evidence: bool,
 }
 /// Result of verifying an entire plan.
 #[derive(Debug, Clone)]
 pub struct PlanVerification {
    /// Plan ID
    pub plan_id: String,
    /// Results for each verified item
    pub item_results: Vec<ItemVerification>,
    /// Count of items that were skipped (blocked state)
    pub skipped_count: usize,
 }
 impl PlanVerification {
    /// Check if all verifications passed (no errors or warnings).
    pub fn all_passed(&self) -> bool {
        self.item_results.iter().all(|item| {
            !item.missing_evidence
                && item.evidence_results.iter().all(|e| e.status.is_ok())
        })
    }
    /// Count total warnings and errors.
    pub fn count_issues(&self) -> (usize, usize) {
        let mut warnings = 0;
        let mut errors = 0;
        for item in &self.item_results {
            if item.missing_evidence {
                warnings += 1;
            }
            for ev in &item.evidence_results {
                match &ev.status {
                    VerificationStatus::Warning(_) => warnings += 1,
                    VerificationStatus::Error(_) => errors += 1,
                    _ => {}
                }
            }
        }
        (warnings, errors)
    }
 }
 /// Parse an evidence string into an EvidenceType.
 pub fn parse_evidence(evidence: &str) -> EvidenceType {
    let evidence = evidence.trim();
    // Check for test reference format: "path/to/file.rs::test_name"
    if let Some(idx) = evidence.find("::") {
        let file_path = evidence[..idx].to_string();
        let test_name = evidence[idx + 2..].to_string();
        return EvidenceType::TestReference { file_path, test_name };
    }
    // Check for code location format: "path/to/file.rs:42" or "path/to/file.rs:42-118"
    if let Some(idx) = evidence.rfind(':') {
        let file_path = evidence[..idx].to_string();
        let line_part = &evidence[idx + 1..];
        // Try to parse line range (e.g., "42-118" or just "42")
        if let Some((start, end)) = parse_line_range(line_part) {
            return EvidenceType::CodeLocation {
                file_path,
                start_line: Some(start),
                end_line: end,
            };
        }
    }
    // Check if it looks like a file path (contains . or /)
    if evidence.contains('.') || evidence.contains('/') {
        return EvidenceType::CodeLocation {
            file_path: evidence.to_string(),
            start_line: None,
            end_line: None,
        };
    }
    // Unknown format
    EvidenceType::Unknown(evidence.to_string())
 }
 /// Parse a line range string like "42" or "42-118".
 /// Returns (start_line, optional_end_line).
 fn parse_line_range(s: &str) -> Option<(u32, Option<u32>)> {
    if let Some(dash_idx) = s.find('-') {
        let start_str = &s[..dash_idx];
        let end_str = &s[dash_idx + 1..];
        let start = start_str.parse::<u32>().ok()?;
        let end = end_str.parse::<u32>().ok()?;
        Some((start, Some(end)))
    } else {
        let line = s.parse::<u32>().ok()?;
        Some((line, None))
    }
 }
 /// Verify a code location evidence (file exists, line numbers valid).
 /// 
 /// # Arguments
 /// * `file_path` - Path to the file (relative to working_dir or absolute)
 /// * `start_line` - Optional start line number (1-indexed)
 /// * `end_line` - Optional end line number (1-indexed)
 /// * `working_dir` - Working directory for resolving relative paths
 pub fn verify_code_location(
    file_path: &str,
    start_line: Option<u32>,
    end_line: Option<u32>,
    working_dir: Option<&str>,
 ) -> VerificationStatus {
    use std::path::Path;
    // Resolve the file path
    let resolved_path = if Path::new(file_path).is_absolute() {
        PathBuf::from(file_path)
    } else {
        match working_dir {
            Some(dir) => PathBuf::from(dir).join(file_path),
            None => PathBuf::from(file_path),
        }
    };
    // Check if file exists
    if !resolved_path.exists() {
        return VerificationStatus::Error(format!("File not found: {}", file_path));
    }
    // If no line numbers specified, file existence is enough
    if start_line.is_none() {
        return VerificationStatus::Verified;
    }
    // Read file and count lines
    let content = match std::fs::read_to_string(&resolved_path) {
        Ok(c) => c,
        Err(e) => return VerificationStatus::Error(format!("Cannot read file {}: {}", file_path, e)),
    };
    let line_count = content.lines().count() as u32;
    let check_line = end_line.unwrap_or_else(|| start_line.unwrap());
    if check_line > line_count {
        return VerificationStatus::Warning(format!(
            "Line {} exceeds file length ({} lines) in {}",
            check_line, line_count, file_path
        ));
    }
    VerificationStatus::Verified
 }
 /// Verify a test reference evidence (test function exists in file).
 /// 
 /// This checks that the test file exists and contains a function with the given name.
 /// It does NOT run the test - just verifies it exists.
 /// 
 /// # Arguments
 /// * `file_path` - Path to the test file (relative to working_dir or absolute)
 /// * `test_name` - Name of the test function to find
 /// * `working_dir` - Working directory for resolving relative paths
 pub fn verify_test_reference(
    file_path: &str,
    test_name: &str,
    working_dir: Option<&str>,
 ) -> VerificationStatus {
    use std::path::Path;
    // Resolve the file path
    let resolved_path = if Path::new(file_path).is_absolute() {
        PathBuf::from(file_path)
    } else {
        match working_dir {
            Some(dir) => PathBuf::from(dir).join(file_path),
            None => PathBuf::from(file_path),
        }
    };
    // Check if file exists
    if !resolved_path.exists() {
        return VerificationStatus::Error(format!("Test file not found: {}", file_path));
    }
    // Read file content
    let content = match std::fs::read_to_string(&resolved_path) {
        Ok(c) => c,
        Err(e) => return VerificationStatus::Error(format!("Cannot read test file {}: {}", file_path, e)),
    };
    // Look for the test function
    // For Rust: look for `fn test_name` or `fn test_name(`
    // TODO: Add support for other languages (Python: def test_name, JS: test('name', etc.)
    let rust_pattern = format!("fn {}", test_name);
    let rust_pattern_with_paren = format!("fn {}(", test_name);
    if content.contains(&rust_pattern) || content.contains(&rust_pattern_with_paren) {
        return VerificationStatus::Verified;
    }
    // Check for #[test] attribute near the function name as a fallback
    // This handles cases where the function might be named differently
    if content.contains(test_name) && content.contains("#[test]") {
        return VerificationStatus::Verified;
    }
    VerificationStatus::Warning(format!(
        "Test function '{}' not found in {}",
        test_name, file_path
    ))
 }
 // ============================================================================
 // Plan Storage
 // ============================================================================
@@ -353,39 +629,117 @@ fn format_plan_as_markdown(plan: &Plan) -> String {
 // Plan Verification
 // ============================================================================
-/// Verify a completed plan. Called by the agent loop when all items are done/blocked.
+/// Verify a single piece of evidence.
 fn verify_single_evidence(evidence: &str, working_dir: Option<&str>) -> EvidenceVerification {
    let evidence_type = parse_evidence(evidence);
    let status = match &evidence_type {
        EvidenceType::CodeLocation { file_path, start_line, end_line } => {
            verify_code_location(file_path, *start_line, *end_line, working_dir)
        }
        EvidenceType::TestReference { file_path, test_name } => {
            verify_test_reference(file_path, test_name, working_dir)
        }
        EvidenceType::Unknown(s) => {
            VerificationStatus::Skipped(format!("Unknown evidence format: {}", s))
        }
    };
    EvidenceVerification {
        evidence: evidence.to_string(),
        evidence_type,
        status,
    }
 }
 /// Verify a completed plan. Called when all items are done/blocked.
 /// 
-/// This is a placeholder that prints the plan contents.
+/// Returns a PlanVerification with results for each done item's evidence.
-/// In the future, this could perform additional validation.
+/// Blocked items are skipped (counted but not verified).
-pub fn plan_verify(plan: &Plan) {
+pub fn plan_verify(plan: &Plan, working_dir: Option<&str>) -> PlanVerification {
-    println!("\n{}", "=".repeat(60));
+    let mut item_results = Vec::new();
-    println!("PLAN VERIFY CALLED");
+    let mut skipped_count = 0;
    println!("{}", "=".repeat(60));
    println!("Plan ID: {}", plan.plan_id);
    println!("Revision: {}", plan.revision);
    println!("Approved: {:?}", plan.approved_revision);
    println!("Status: {}", plan.status_summary());
    println!();
    for item in &plan.items {
-        println!("[{}] {} - {}", item.id, item.state, item.description);
+        match item.state {
-        println!("  Touches: {:?}", item.touches);
+            PlanState::Blocked => {
-        println!("  Checks:");
+                skipped_count += 1;
-        println!("    Happy: {} -> {}", item.checks.happy.desc, item.checks.happy.target);
+                continue;
        println!("    Negative: {} -> {}", item.checks.negative.desc, item.checks.negative.target);
        println!("    Boundary: {} -> {}", item.checks.boundary.desc, item.checks.boundary.target);
        if !item.evidence.is_empty() {
            println!("  Evidence:");
            for e in &item.evidence {
                println!("    - {}", e);
            }
            PlanState::Done => {
                // Verify this item's evidence
                let missing_evidence = item.evidence.is_empty();
                let evidence_results: Vec<EvidenceVerification> = item
                    .evidence
                    .iter()
                    .map(|e| verify_single_evidence(e, working_dir))
                    .collect();
                item_results.push(ItemVerification {
                    item_id: item.id.clone(),
                    description: item.description.clone(),
                    evidence_results,
                    missing_evidence,
                });
            }
            // Skip todo/doing items - they shouldn't be in a complete plan
            _ => {}
        }
        if let Some(notes) = &item.notes {
            println!("  Notes: {}", notes);
        }
        println!();
    }
-    println!("{}\n", "=".repeat(60));
+    
    PlanVerification {
        plan_id: plan.plan_id.clone(),
        item_results,
        skipped_count,
    }
 }
 /// Format verification results as a string for display.
 /// Uses loud formatting for warnings and errors.
 pub fn format_verification_results(verification: &PlanVerification) -> String {
    let mut output = String::new();
    let (warnings, errors) = verification.count_issues();
    output.push_str("\n");
    output.push_str(&"═".repeat(60));
    output.push_str("\n");
    output.push_str("📋 PLAN VERIFICATION RESULTS\n");
    output.push_str(&"═".repeat(60));
    output.push_str("\n\n");
    for item in &verification.item_results {
        output.push_str(&format!("[{}] {}\n", item.item_id, item.description));
        if item.missing_evidence {
            output.push_str("  ⚠️  WARNING: No evidence provided for done item!\n");
        }
        for ev in &item.evidence_results {
            let status_str = match &ev.status {
                VerificationStatus::Verified => "✅".to_string(),
                VerificationStatus::Warning(msg) => format!("⚠️  WARNING: {}", msg),
                VerificationStatus::Error(msg) => format!("❌ ERROR: {}", msg),
                VerificationStatus::Skipped(msg) => format!("⏭️  SKIPPED: {}", msg),
            };
            output.push_str(&format!("  {} {}\n", status_str, ev.evidence));
        }
        output.push_str("\n");
    }
    if verification.skipped_count > 0 {
        output.push_str(&format!("ℹ️  {} blocked item(s) skipped\n\n", verification.skipped_count));
    }
    // Summary line
    if errors > 0 || warnings > 0 {
        output.push_str(&format!("⚠️  VERIFICATION COMPLETE: {} error(s), {} warning(s)\n", errors, warnings));
    } else {
        output.push_str("✅ VERIFICATION COMPLETE: All evidence validated\n");
    }
    output.push_str(&"═".repeat(60));
    output.push_str("\n");
    output
 }
 // ============================================================================
@@ -474,7 +828,9 @@ pub async fn execute_plan_write<W: UiWriter>(
    // Check if plan is now complete and trigger verification
    if plan.is_complete() && plan.is_approved() {
-        plan_verify(&plan);
+        let verification = plan_verify(&plan, ctx.working_dir);
        let verification_output = format_verification_results(&verification);
        return Ok(format!("✅ Plan updated: {}\n{}", plan.status_summary(), verification_output));
    }
    Ok(format!("✅ Plan updated: {}", plan.status_summary()))
@@ -671,4 +1027,139 @@ items: []
        assert_eq!(parsed.approved_revision, plan.approved_revision);
        assert_eq!(parsed.items.len(), plan.items.len());
    }
    // ========================================================================
    // Evidence Parsing Tests
    // ========================================================================
    #[test]
    fn test_parse_evidence_code_location_with_line_range() {
        let evidence = "src/foo.rs:42-118";
        match parse_evidence(evidence) {
            EvidenceType::CodeLocation { file_path, start_line, end_line } => {
                assert_eq!(file_path, "src/foo.rs");
                assert_eq!(start_line, Some(42));
                assert_eq!(end_line, Some(118));
            }
            _ => panic!("Expected CodeLocation"),
        }
    }
    #[test]
    fn test_parse_evidence_code_location_single_line() {
        let evidence = "src/bar.rs:99";
        match parse_evidence(evidence) {
            EvidenceType::CodeLocation { file_path, start_line, end_line } => {
                assert_eq!(file_path, "src/bar.rs");
                assert_eq!(start_line, Some(99));
                assert_eq!(end_line, None);
            }
            _ => panic!("Expected CodeLocation"),
        }
    }
    #[test]
    fn test_parse_evidence_code_location_file_only() {
        let evidence = "src/lib.rs";
        match parse_evidence(evidence) {
            EvidenceType::CodeLocation { file_path, start_line, end_line } => {
                assert_eq!(file_path, "src/lib.rs");
                assert_eq!(start_line, None);
                assert_eq!(end_line, None);
            }
            _ => panic!("Expected CodeLocation"),
        }
    }
    #[test]
    fn test_parse_evidence_test_reference() {
        let evidence = "tests/integration.rs::test_happy_path";
        match parse_evidence(evidence) {
            EvidenceType::TestReference { file_path, test_name } => {
                assert_eq!(file_path, "tests/integration.rs");
                assert_eq!(test_name, "test_happy_path");
            }
            _ => panic!("Expected TestReference"),
        }
    }
    #[test]
    fn test_parse_evidence_unknown_format() {
        let evidence = "some random text";
        match parse_evidence(evidence) {
            EvidenceType::Unknown(s) => {
                assert_eq!(s, "some random text");
            }
            _ => panic!("Expected Unknown"),
        }
    }
    #[test]
    fn test_parse_evidence_whitespace_trimmed() {
        let evidence = "  src/foo.rs:42  ";
        match parse_evidence(evidence) {
            EvidenceType::CodeLocation { file_path, start_line, .. } => {
                assert_eq!(file_path, "src/foo.rs");
                assert_eq!(start_line, Some(42));
            }
            _ => panic!("Expected CodeLocation"),
        }
    }
    // ========================================================================
    // Verification Tests
    // ========================================================================
    #[test]
    fn test_verify_code_location_file_exists() {
        // Use Cargo.toml which should always exist in the workspace
        let status = verify_code_location("Cargo.toml", None, None, None);
        assert!(matches!(status, VerificationStatus::Verified));
    }
    #[test]
    fn test_verify_code_location_file_not_found() {
        let status = verify_code_location("nonexistent_file_xyz.rs", None, None, None);
        match status {
            VerificationStatus::Error(msg) => {
                assert!(msg.contains("not found"));
            }
            _ => panic!("Expected Error"),
        }
    }
    #[test]
    fn test_verify_code_location_line_out_of_range() {
        // Cargo.toml exists but probably doesn't have 999999 lines
        let status = verify_code_location("Cargo.toml", Some(1), Some(999999), None);
        match status {
            VerificationStatus::Warning(msg) => {
                assert!(msg.contains("exceeds file length"));
            }
            _ => panic!("Expected Warning, got {:?}", status),
        }
    }
    #[test]
    fn test_verify_test_reference_file_not_found() {
        let status = verify_test_reference("nonexistent_test.rs", "test_foo", None);
        match status {
            VerificationStatus::Error(msg) => {
                assert!(msg.contains("not found"));
            }
            _ => panic!("Expected Error"),
        }
    }
    #[test]
    fn test_verification_status_helpers() {
        assert!(VerificationStatus::Verified.is_ok());
        assert!(VerificationStatus::Skipped("reason".to_string()).is_ok());
        assert!(!VerificationStatus::Warning("warn".to_string()).is_ok());
        assert!(!VerificationStatus::Error("err".to_string()).is_ok());
        assert!(!VerificationStatus::Verified.is_warning_or_error());
        assert!(VerificationStatus::Warning("warn".to_string()).is_warning_or_error());
        assert!(VerificationStatus::Error("err".to_string()).is_warning_or_error());
    }
 }