Add datalog-based invariant verification system

Implement a new datalog verification layer using datafrog that:

- Compiles rulespec to datalog on plan_approve
- Extracts facts from action envelope using selectors
- Executes datalog rules on plan_verify
- Writes evaluation results to datalog_evaluation.txt (shadow mode)

Key components:
- crates/g3-core/src/tools/datalog.rs: Full datalog module with:
  - compile_rulespec(): Validates and compiles rulespec
  - extract_facts(): Extracts facts from envelope YAML
  - execute_rules(): Runs datafrog iteration
  - 23 comprehensive tests

- crates/g3-core/src/tools/plan.rs:
  - execute_plan_approve(): Now compiles rulespec on approval
  - shadow_datalog_verify(): Runs datalog and writes to eval file

Results are written to .g3/sessions/<id>/datalog_evaluation.txt
for inspection, NOT injected into context window (shadow mode).
This commit is contained in:
Dhanji R. Prasanna
2026-02-06 13:50:54 +11:00
parent bcd50190c6
commit abfac197ab
6 changed files with 1199 additions and 4 deletions

7
Cargo.lock generated
View File

@@ -927,6 +927,12 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "datafrog"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0afaad2b26fa326569eb264b1363e8ae3357618c43982b3f285f0774ce76b69"
[[package]] [[package]]
name = "deranged" name = "deranged"
version = "0.5.5" version = "0.5.5"
@@ -1443,6 +1449,7 @@ dependencies = [
"async-trait", "async-trait",
"base64 0.22.1", "base64 0.22.1",
"chrono", "chrono",
"datafrog",
"futures-util", "futures-util",
"g3-computer-control", "g3-computer-control",
"g3-config", "g3-config",

View File

@@ -1,5 +1,5 @@
# Workspace Memory # Workspace Memory
> Updated: 2026-02-05T09:15:01Z | Size: 17.9k chars > Updated: 2026-02-06T00:59:11Z | Size: 20.2k chars
### Remember Tool Wiring ### Remember Tool Wiring
- `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()` - `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()`
@@ -341,3 +341,26 @@ Makes tool output responsive to terminal width - no line wrapping, with 4-char r
- `print_tool_output_line()` [447..454] - uses clip_line() for output lines - `print_tool_output_line()` [447..454] - uses clip_line() for output lines
- `print_tool_output_header()` [293..410] - uses compress_path/compress_command - `print_tool_output_header()` [293..410] - uses compress_path/compress_command
- `print_tool_compact()` [475..635] - width-aware compact tool display - `print_tool_compact()` [475..635] - width-aware compact tool display
### Datalog Invariant Verification
- `crates/g3-core/src/tools/datalog.rs` [0..37000]
- `CompiledPredicate` [47..67] - id, claim_name, selector, rule, expected_value, source, notes
- `CompiledRulespec` [70..80] - plan_id, compiled_at_revision, predicates, claims
- `compile_rulespec()` [88..140] - validates selectors, builds claim lookup, converts to CompiledPredicate
- `Fact` [170..180] - claim_name, value (extracted from envelope)
- `extract_facts()` [190..210] - uses Selector to navigate envelope YAML
- `extract_values_recursive()` [215..250] - handles arrays/objects/scalars, adds __length facts
- `DatalogPredicateResult` [255..275] - id, claim_name, rule, expected_value, passed, reason, source, notes
- `DatalogExecutionResult` [280..295] - predicate_results, fact_count, passed_count, failed_count
- `execute_rules()` [300..340] - builds fact lookup, uses datafrog Iteration, evaluates predicates
- `evaluate_predicate_datalog()` [345..480] - handles all PredicateRule types
- `get_compiled_rulespec_path()` [500..505] - `.g3/sessions/<id>/rulespec.compiled.json`
- `save_compiled_rulespec()`, `load_compiled_rulespec()` [510..530] - JSON serialization
- `format_datalog_results()` [540..620] - formats results for shadow mode display
- `crates/g3-core/src/tools/plan.rs`
- `shadow_datalog_verify()` [716..760] - loads compiled rulespec + envelope, runs datalog, prints to stderr
- `execute_plan_approve()` [1030..1095] - compiles rulespec on approval, saves to rulespec.compiled.json
**Datalog Flow**:
1. `plan_approve``compile_rulespec()` → saves `rulespec.compiled.json`
2. `plan_verify``shadow_datalog_verify()` → loads compiled + envelope → `extract_facts()``execute_rules()``eprint!()` (shadow mode)

View File

@@ -47,6 +47,9 @@ tree-sitter-racket = "0.24"
streaming-iterator = "0.1" streaming-iterator = "0.1"
walkdir = "2.4" walkdir = "2.4"
# Datalog engine for invariant verification
datafrog = "2.0.1"
base64 = "0.22.1" base64 = "0.22.1"
[dev-dependencies] [dev-dependencies]

File diff suppressed because it is too large Load Diff

View File

@@ -12,6 +12,7 @@
pub mod executor; pub mod executor;
pub mod acd; pub mod acd;
pub mod datalog;
pub mod file_ops; pub mod file_ops;
pub mod invariants; pub mod invariants;
pub mod memory; pub mod memory;

View File

@@ -21,6 +21,8 @@ use crate::ToolCall;
use super::executor::ToolContext; use super::executor::ToolContext;
use super::invariants::{format_envelope_markdown, format_rulespec_markdown, get_envelope_path, get_rulespec_path, read_envelope, read_rulespec, write_rulespec, Rulespec}; use super::invariants::{format_envelope_markdown, format_rulespec_markdown, get_envelope_path, get_rulespec_path, read_envelope, read_rulespec, write_rulespec, Rulespec};
use super::datalog::{compile_rulespec, save_compiled_rulespec, format_datalog_results};
use super::datalog::{load_compiled_rulespec, extract_facts, execute_rules};
// ============================================================================ // ============================================================================
// Plan Schema // Plan Schema
@@ -708,6 +710,62 @@ pub fn plan_verify(plan: &Plan, working_dir: Option<&str>) -> PlanVerification {
} }
} }
/// Shadow datalog verification - runs datalog rules and writes to evaluation file.
/// This is for dry-run/shadow testing - results are written to
/// `.g3/sessions/<id>/datalog_evaluation.txt`, NOT injected into context window.
fn shadow_datalog_verify(session_id: &str) {
// Load compiled rulespec
let compiled = match load_compiled_rulespec(session_id) {
Ok(Some(c)) => c,
Ok(None) => {
eprintln!("\n⚠️ [SHADOW] No compiled rulespec found - skipping datalog verification");
return;
}
Err(e) => {
eprintln!("\n⚠️ [SHADOW] Failed to load compiled rulespec: {}", e);
return;
}
};
if compiled.is_empty() {
eprintln!("\n⚠️ [SHADOW] Compiled rulespec has no predicates - skipping datalog verification");
return;
}
// Load envelope
let envelope = match read_envelope(session_id) {
Ok(Some(e)) => e,
Ok(None) => {
eprintln!("\n⚠️ [SHADOW] No envelope found - skipping datalog verification");
return;
}
Err(e) => {
eprintln!("\n⚠️ [SHADOW] Failed to load envelope: {}", e);
return;
}
};
// Extract facts from envelope
let facts = extract_facts(&envelope, &compiled);
// Execute datalog rules
let result = execute_rules(&compiled, &facts);
// Format results
let output = format_datalog_results(&result);
// Write to evaluation file (shadow mode - not in context window)
let eval_path = get_session_logs_dir(session_id).join("datalog_evaluation.txt");
match std::fs::write(&eval_path, &output) {
Ok(_) => {
eprintln!("📊 Datalog evaluation written to: {}", eval_path.display());
}
Err(e) => {
eprintln!("⚠️ Failed to write datalog evaluation: {}", e);
}
}
}
/// Format verification results as a string for display. /// Format verification results as a string for display.
/// Uses loud formatting for warnings and errors. /// Uses loud formatting for warnings and errors.
/// If session_id is provided, also prints rulespec and envelope file locations. /// If session_id is provided, also prints rulespec and envelope file locations.
@@ -767,6 +825,9 @@ pub fn format_verification_results(verification: &PlanVerification, session_id:
output.push_str(&format!(" {} Envelope: {}\n", envelope_status, envelope_path.display())); output.push_str(&format!(" {} Envelope: {}\n", envelope_status, envelope_path.display()));
output.push_str("\n"); output.push_str("\n");
// Shadow datalog verification - print to stderr, NOT included in tool output
shadow_datalog_verify(sid);
} }
output.push_str(&"".repeat(60)); output.push_str(&"".repeat(60));
@@ -1007,14 +1068,43 @@ pub async fn execute_plan_approve<W: UiWriter>(
// Approve the plan // Approve the plan
plan.approve(); plan.approve();
// Compile rulespec to datalog on approval
let compile_message;
match read_rulespec(session_id) {
Ok(Some(rulespec)) => {
match compile_rulespec(&rulespec, &plan.plan_id, plan.revision) {
Ok(compiled) => {
if let Err(e) = save_compiled_rulespec(session_id, &compiled) {
compile_message = format!("\n⚠️ Failed to save compiled rulespec: {}", e);
} else {
compile_message = format!(
"\n📜 Compiled {} invariant(s) to datalog rules.",
compiled.predicates.len()
);
}
}
Err(e) => {
compile_message = format!("\n⚠️ Failed to compile rulespec: {}", e);
}
}
}
Ok(None) => {
compile_message = "\n⚠️ No rulespec found - datalog verification will be skipped.".to_string();
}
Err(e) => {
compile_message = format!("\n⚠️ Failed to read rulespec: {}", e);
}
}
// Write back // Write back
if let Err(e) = write_plan(session_id, &plan) { if let Err(e) = write_plan(session_id, &plan) {
return Ok(format!("❌ Failed to save approved plan: {}", e)); return Ok(format!("❌ Failed to save approved plan: {}", e));
} }
Ok(format!( Ok(format!(
"✅ Plan approved at revision {}. You may now begin implementation.", "✅ Plan approved at revision {}. You may now begin implementation.{}",
plan.revision plan.revision,
compile_message
)) ))
} }