Add datalog-based invariant verification system

Implement a new datalog verification layer using datafrog that: - Compiles rulespec to datalog on plan_approve - Extracts facts from action envelope using selectors - Executes datalog rules on plan_verify - Writes evaluation results to datalog_evaluation.txt (shadow mode) Key components: - crates/g3-core/src/tools/datalog.rs: Full datalog module with: - compile_rulespec(): Validates and compiles rulespec - extract_facts(): Extracts facts from envelope YAML - execute_rules(): Runs datafrog iteration - 23 comprehensive tests - crates/g3-core/src/tools/plan.rs: - execute_plan_approve(): Now compiles rulespec on approval - shadow_datalog_verify(): Runs datalog and writes to eval file Results are written to .g3/sessions/<id>/datalog_evaluation.txt for inspection, NOT injected into context window (shadow mode).
2026-02-06 13:50:54 +11:00
parent bcd50190c6
commit abfac197ab
6 changed files with 1199 additions and 4 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -927,6 +927,12 @@ dependencies = [
 "syn",
 ]

+[[package]]
+name = "datafrog"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0afaad2b26fa326569eb264b1363e8ae3357618c43982b3f285f0774ce76b69"
+
 [[package]]
 name = "deranged"
 version = "0.5.5"
@@ -1443,6 +1449,7 @@ dependencies = [
 "async-trait",
 "base64 0.22.1",
 "chrono",
+ "datafrog",
 "futures-util",
 "g3-computer-control",
 "g3-config",
--- a/analysis/memory.md
+++ b/analysis/memory.md
@@ -1,5 +1,5 @@
 # Workspace Memory
-> Updated: 2026-02-05T09:15:01Z | Size: 17.9k chars
+> Updated: 2026-02-06T00:59:11Z | Size: 20.2k chars

 ### Remember Tool Wiring
 - `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()`
@@ -340,4 +340,27 @@ Makes tool output responsive to terminal width - no line wrapping, with 4-char r
  - `update_tool_output_line()` [407..445] - uses clip_line() with dynamic width
  - `print_tool_output_line()` [447..454] - uses clip_line() for output lines
  - `print_tool_output_header()` [293..410] - uses compress_path/compress_command
-  - `print_tool_compact()` [475..635] - width-aware compact tool display
+  - `print_tool_compact()` [475..635] - width-aware compact tool display
+
+### Datalog Invariant Verification
+- `crates/g3-core/src/tools/datalog.rs` [0..37000]
+  - `CompiledPredicate` [47..67] - id, claim_name, selector, rule, expected_value, source, notes
+  - `CompiledRulespec` [70..80] - plan_id, compiled_at_revision, predicates, claims
+  - `compile_rulespec()` [88..140] - validates selectors, builds claim lookup, converts to CompiledPredicate
+  - `Fact` [170..180] - claim_name, value (extracted from envelope)
+  - `extract_facts()` [190..210] - uses Selector to navigate envelope YAML
+  - `extract_values_recursive()` [215..250] - handles arrays/objects/scalars, adds __length facts
+  - `DatalogPredicateResult` [255..275] - id, claim_name, rule, expected_value, passed, reason, source, notes
+  - `DatalogExecutionResult` [280..295] - predicate_results, fact_count, passed_count, failed_count
+  - `execute_rules()` [300..340] - builds fact lookup, uses datafrog Iteration, evaluates predicates
+  - `evaluate_predicate_datalog()` [345..480] - handles all PredicateRule types
+  - `get_compiled_rulespec_path()` [500..505] - `.g3/sessions/<id>/rulespec.compiled.json`
+  - `save_compiled_rulespec()`, `load_compiled_rulespec()` [510..530] - JSON serialization
+  - `format_datalog_results()` [540..620] - formats results for shadow mode display
+- `crates/g3-core/src/tools/plan.rs`
+  - `shadow_datalog_verify()` [716..760] - loads compiled rulespec + envelope, runs datalog, prints to stderr
+  - `execute_plan_approve()` [1030..1095] - compiles rulespec on approval, saves to rulespec.compiled.json
+
+**Datalog Flow**:
+1. `plan_approve` → `compile_rulespec()` → saves `rulespec.compiled.json`
+2. `plan_verify` → `shadow_datalog_verify()` → loads compiled + envelope → `extract_facts()` → `execute_rules()` → `eprint!()` (shadow mode)
--- a/crates/g3-core/Cargo.toml
+++ b/crates/g3-core/Cargo.toml
@@ -47,6 +47,9 @@ tree-sitter-racket = "0.24"
 streaming-iterator = "0.1"
 walkdir = "2.4"

+# Datalog engine for invariant verification
+datafrog = "2.0.1"
+
 base64 = "0.22.1"

 [dev-dependencies]
--- a/crates/g3-core/src/tools/datalog.rs
+++ b/crates/g3-core/src/tools/datalog.rs
--- a/crates/g3-core/src/tools/mod.rs
+++ b/crates/g3-core/src/tools/mod.rs
@@ -12,6 +12,7 @@

 pub mod executor;
 pub mod acd;
+pub mod datalog;
 pub mod file_ops;
 pub mod invariants;
 pub mod memory;
--- a/crates/g3-core/src/tools/plan.rs
+++ b/crates/g3-core/src/tools/plan.rs
@@ -21,6 +21,8 @@ use crate::ToolCall;
 use super::executor::ToolContext;

 use super::invariants::{format_envelope_markdown, format_rulespec_markdown, get_envelope_path, get_rulespec_path, read_envelope, read_rulespec, write_rulespec, Rulespec};
+use super::datalog::{compile_rulespec, save_compiled_rulespec, format_datalog_results};
+use super::datalog::{load_compiled_rulespec, extract_facts, execute_rules};

 // ============================================================================
 // Plan Schema
@@ -708,6 +710,62 @@ pub fn plan_verify(plan: &Plan, working_dir: Option<&str>) -> PlanVerification {
    }
 }

+/// Shadow datalog verification - runs datalog rules and writes to evaluation file.
+/// This is for dry-run/shadow testing - results are written to
+/// `.g3/sessions/<id>/datalog_evaluation.txt`, NOT injected into context window.
+fn shadow_datalog_verify(session_id: &str) {
+    // Load compiled rulespec
+    let compiled = match load_compiled_rulespec(session_id) {
+        Ok(Some(c)) => c,
+        Ok(None) => {
+            eprintln!("\n⚠️  [SHADOW] No compiled rulespec found - skipping datalog verification");
+            return;
+        }
+        Err(e) => {
+            eprintln!("\n⚠️  [SHADOW] Failed to load compiled rulespec: {}", e);
+            return;
+        }
+    };
+
+    if compiled.is_empty() {
+        eprintln!("\n⚠️  [SHADOW] Compiled rulespec has no predicates - skipping datalog verification");
+        return;
+    }
+
+    // Load envelope
+    let envelope = match read_envelope(session_id) {
+        Ok(Some(e)) => e,
+        Ok(None) => {
+            eprintln!("\n⚠️  [SHADOW] No envelope found - skipping datalog verification");
+            return;
+        }
+        Err(e) => {
+            eprintln!("\n⚠️  [SHADOW] Failed to load envelope: {}", e);
+            return;
+        }
+    };
+
+    // Extract facts from envelope
+    let facts = extract_facts(&envelope, &compiled);
+
+    // Execute datalog rules
+    let result = execute_rules(&compiled, &facts);
+
+    // Format results
+    let output = format_datalog_results(&result);
+
+    // Write to evaluation file (shadow mode - not in context window)
+    let eval_path = get_session_logs_dir(session_id).join("datalog_evaluation.txt");
+    match std::fs::write(&eval_path, &output) {
+        Ok(_) => {
+            eprintln!("📊 Datalog evaluation written to: {}", eval_path.display());
+        }
+        Err(e) => {
+            eprintln!("⚠️  Failed to write datalog evaluation: {}", e);
+        }
+    }
+}
+
 /// Format verification results as a string for display.
 /// Uses loud formatting for warnings and errors.
 /// If session_id is provided, also prints rulespec and envelope file locations.
@@ -767,6 +825,9 @@ pub fn format_verification_results(verification: &PlanVerification, session_id:
        output.push_str(&format!("  {} Envelope: {}\n", envelope_status, envelope_path.display()));
        
        output.push_str("\n");
+
+        // Shadow datalog verification - print to stderr, NOT included in tool output
+        shadow_datalog_verify(sid);
    }
    
    output.push_str(&"═".repeat(60));
@@ -1007,14 +1068,43 @@ pub async fn execute_plan_approve<W: UiWriter>(
    // Approve the plan
    plan.approve();

+    // Compile rulespec to datalog on approval
+    let compile_message;
+    match read_rulespec(session_id) {
+        Ok(Some(rulespec)) => {
+            match compile_rulespec(&rulespec, &plan.plan_id, plan.revision) {
+                Ok(compiled) => {
+                    if let Err(e) = save_compiled_rulespec(session_id, &compiled) {
+                        compile_message = format!("\n⚠️ Failed to save compiled rulespec: {}", e);
+                    } else {
+                        compile_message = format!(
+                            "\n📜 Compiled {} invariant(s) to datalog rules.",
+                            compiled.predicates.len()
+                        );
+                    }
+                }
+                Err(e) => {
+                    compile_message = format!("\n⚠️ Failed to compile rulespec: {}", e);
+                }
+            }
+        }
+        Ok(None) => {
+            compile_message = "\n⚠️ No rulespec found - datalog verification will be skipped.".to_string();
+        }
+        Err(e) => {
+            compile_message = format!("\n⚠️ Failed to read rulespec: {}", e);
+        }
+    }
+
    // Write back
    if let Err(e) = write_plan(session_id, &plan) {
        return Ok(format!("❌ Failed to save approved plan: {}", e));
    }

    Ok(format!(
-        "✅ Plan approved at revision {}. You may now begin implementation.",
-        plan.revision
+        "✅ Plan approved at revision {}. You may now begin implementation.{}",
+        plan.revision,
+        compile_message
    ))
 }