Add datalog-based invariant verification system
Implement a new datalog verification layer using datafrog that: - Compiles rulespec to datalog on plan_approve - Extracts facts from action envelope using selectors - Executes datalog rules on plan_verify - Writes evaluation results to datalog_evaluation.txt (shadow mode) Key components: - crates/g3-core/src/tools/datalog.rs: Full datalog module with: - compile_rulespec(): Validates and compiles rulespec - extract_facts(): Extracts facts from envelope YAML - execute_rules(): Runs datafrog iteration - 23 comprehensive tests - crates/g3-core/src/tools/plan.rs: - execute_plan_approve(): Now compiles rulespec on approval - shadow_datalog_verify(): Runs datalog and writes to eval file Results are written to .g3/sessions/<id>/datalog_evaluation.txt for inspection, NOT injected into context window (shadow mode).
This commit is contained in:
7
Cargo.lock
generated
7
Cargo.lock
generated
@@ -927,6 +927,12 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafrog"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a0afaad2b26fa326569eb264b1363e8ae3357618c43982b3f285f0774ce76b69"
|
||||
|
||||
[[package]]
|
||||
name = "deranged"
|
||||
version = "0.5.5"
|
||||
@@ -1443,6 +1449,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.22.1",
|
||||
"chrono",
|
||||
"datafrog",
|
||||
"futures-util",
|
||||
"g3-computer-control",
|
||||
"g3-config",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Workspace Memory
|
||||
> Updated: 2026-02-05T09:15:01Z | Size: 17.9k chars
|
||||
> Updated: 2026-02-06T00:59:11Z | Size: 20.2k chars
|
||||
|
||||
### Remember Tool Wiring
|
||||
- `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()`
|
||||
@@ -340,4 +340,27 @@ Makes tool output responsive to terminal width - no line wrapping, with 4-char r
|
||||
- `update_tool_output_line()` [407..445] - uses clip_line() with dynamic width
|
||||
- `print_tool_output_line()` [447..454] - uses clip_line() for output lines
|
||||
- `print_tool_output_header()` [293..410] - uses compress_path/compress_command
|
||||
- `print_tool_compact()` [475..635] - width-aware compact tool display
|
||||
- `print_tool_compact()` [475..635] - width-aware compact tool display
|
||||
|
||||
### Datalog Invariant Verification
|
||||
- `crates/g3-core/src/tools/datalog.rs` [0..37000]
|
||||
- `CompiledPredicate` [47..67] - id, claim_name, selector, rule, expected_value, source, notes
|
||||
- `CompiledRulespec` [70..80] - plan_id, compiled_at_revision, predicates, claims
|
||||
- `compile_rulespec()` [88..140] - validates selectors, builds claim lookup, converts to CompiledPredicate
|
||||
- `Fact` [170..180] - claim_name, value (extracted from envelope)
|
||||
- `extract_facts()` [190..210] - uses Selector to navigate envelope YAML
|
||||
- `extract_values_recursive()` [215..250] - handles arrays/objects/scalars, adds __length facts
|
||||
- `DatalogPredicateResult` [255..275] - id, claim_name, rule, expected_value, passed, reason, source, notes
|
||||
- `DatalogExecutionResult` [280..295] - predicate_results, fact_count, passed_count, failed_count
|
||||
- `execute_rules()` [300..340] - builds fact lookup, uses datafrog Iteration, evaluates predicates
|
||||
- `evaluate_predicate_datalog()` [345..480] - handles all PredicateRule types
|
||||
- `get_compiled_rulespec_path()` [500..505] - `.g3/sessions/<id>/rulespec.compiled.json`
|
||||
- `save_compiled_rulespec()`, `load_compiled_rulespec()` [510..530] - JSON serialization
|
||||
- `format_datalog_results()` [540..620] - formats results for shadow mode display
|
||||
- `crates/g3-core/src/tools/plan.rs`
|
||||
- `shadow_datalog_verify()` [716..760] - loads compiled rulespec + envelope, runs datalog, prints to stderr
|
||||
- `execute_plan_approve()` [1030..1095] - compiles rulespec on approval, saves to rulespec.compiled.json
|
||||
|
||||
**Datalog Flow**:
|
||||
1. `plan_approve` → `compile_rulespec()` → saves `rulespec.compiled.json`
|
||||
2. `plan_verify` → `shadow_datalog_verify()` → loads compiled + envelope → `extract_facts()` → `execute_rules()` → `eprint!()` (shadow mode)
|
||||
@@ -47,6 +47,9 @@ tree-sitter-racket = "0.24"
|
||||
streaming-iterator = "0.1"
|
||||
walkdir = "2.4"
|
||||
|
||||
# Datalog engine for invariant verification
|
||||
datafrog = "2.0.1"
|
||||
|
||||
base64 = "0.22.1"
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
1071
crates/g3-core/src/tools/datalog.rs
Normal file
1071
crates/g3-core/src/tools/datalog.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -12,6 +12,7 @@
|
||||
|
||||
pub mod executor;
|
||||
pub mod acd;
|
||||
pub mod datalog;
|
||||
pub mod file_ops;
|
||||
pub mod invariants;
|
||||
pub mod memory;
|
||||
|
||||
@@ -21,6 +21,8 @@ use crate::ToolCall;
|
||||
use super::executor::ToolContext;
|
||||
|
||||
use super::invariants::{format_envelope_markdown, format_rulespec_markdown, get_envelope_path, get_rulespec_path, read_envelope, read_rulespec, write_rulespec, Rulespec};
|
||||
use super::datalog::{compile_rulespec, save_compiled_rulespec, format_datalog_results};
|
||||
use super::datalog::{load_compiled_rulespec, extract_facts, execute_rules};
|
||||
|
||||
// ============================================================================
|
||||
// Plan Schema
|
||||
@@ -708,6 +710,62 @@ pub fn plan_verify(plan: &Plan, working_dir: Option<&str>) -> PlanVerification {
|
||||
}
|
||||
}
|
||||
|
||||
/// Shadow datalog verification - runs datalog rules and writes to evaluation file.
|
||||
/// This is for dry-run/shadow testing - results are written to
|
||||
/// `.g3/sessions/<id>/datalog_evaluation.txt`, NOT injected into context window.
|
||||
fn shadow_datalog_verify(session_id: &str) {
|
||||
// Load compiled rulespec
|
||||
let compiled = match load_compiled_rulespec(session_id) {
|
||||
Ok(Some(c)) => c,
|
||||
Ok(None) => {
|
||||
eprintln!("\n⚠️ [SHADOW] No compiled rulespec found - skipping datalog verification");
|
||||
return;
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("\n⚠️ [SHADOW] Failed to load compiled rulespec: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
if compiled.is_empty() {
|
||||
eprintln!("\n⚠️ [SHADOW] Compiled rulespec has no predicates - skipping datalog verification");
|
||||
return;
|
||||
}
|
||||
|
||||
// Load envelope
|
||||
let envelope = match read_envelope(session_id) {
|
||||
Ok(Some(e)) => e,
|
||||
Ok(None) => {
|
||||
eprintln!("\n⚠️ [SHADOW] No envelope found - skipping datalog verification");
|
||||
return;
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("\n⚠️ [SHADOW] Failed to load envelope: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Extract facts from envelope
|
||||
let facts = extract_facts(&envelope, &compiled);
|
||||
|
||||
// Execute datalog rules
|
||||
let result = execute_rules(&compiled, &facts);
|
||||
|
||||
// Format results
|
||||
let output = format_datalog_results(&result);
|
||||
|
||||
// Write to evaluation file (shadow mode - not in context window)
|
||||
let eval_path = get_session_logs_dir(session_id).join("datalog_evaluation.txt");
|
||||
match std::fs::write(&eval_path, &output) {
|
||||
Ok(_) => {
|
||||
eprintln!("📊 Datalog evaluation written to: {}", eval_path.display());
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("⚠️ Failed to write datalog evaluation: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Format verification results as a string for display.
|
||||
/// Uses loud formatting for warnings and errors.
|
||||
/// If session_id is provided, also prints rulespec and envelope file locations.
|
||||
@@ -767,6 +825,9 @@ pub fn format_verification_results(verification: &PlanVerification, session_id:
|
||||
output.push_str(&format!(" {} Envelope: {}\n", envelope_status, envelope_path.display()));
|
||||
|
||||
output.push_str("\n");
|
||||
|
||||
// Shadow datalog verification - print to stderr, NOT included in tool output
|
||||
shadow_datalog_verify(sid);
|
||||
}
|
||||
|
||||
output.push_str(&"═".repeat(60));
|
||||
@@ -1007,14 +1068,43 @@ pub async fn execute_plan_approve<W: UiWriter>(
|
||||
// Approve the plan
|
||||
plan.approve();
|
||||
|
||||
// Compile rulespec to datalog on approval
|
||||
let compile_message;
|
||||
match read_rulespec(session_id) {
|
||||
Ok(Some(rulespec)) => {
|
||||
match compile_rulespec(&rulespec, &plan.plan_id, plan.revision) {
|
||||
Ok(compiled) => {
|
||||
if let Err(e) = save_compiled_rulespec(session_id, &compiled) {
|
||||
compile_message = format!("\n⚠️ Failed to save compiled rulespec: {}", e);
|
||||
} else {
|
||||
compile_message = format!(
|
||||
"\n📜 Compiled {} invariant(s) to datalog rules.",
|
||||
compiled.predicates.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
compile_message = format!("\n⚠️ Failed to compile rulespec: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
compile_message = "\n⚠️ No rulespec found - datalog verification will be skipped.".to_string();
|
||||
}
|
||||
Err(e) => {
|
||||
compile_message = format!("\n⚠️ Failed to read rulespec: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Write back
|
||||
if let Err(e) = write_plan(session_id, &plan) {
|
||||
return Ok(format!("❌ Failed to save approved plan: {}", e));
|
||||
}
|
||||
|
||||
Ok(format!(
|
||||
"✅ Plan approved at revision {}. You may now begin implementation.",
|
||||
plan.revision
|
||||
"✅ Plan approved at revision {}. You may now begin implementation.{}",
|
||||
plan.revision,
|
||||
compile_message
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user