fix: generate actual Soufflé datalog in .dl files instead of YAML
The rulespec compiler was writing serde_yaml::to_string(&compiled) into rulespec.compiled.dl files — just YAML, not datalog at all. Added format_datalog_program() that produces proper Soufflé-style datalog: - .decl relation declarations (claim_value, claim_length, predicate_pass, predicate_fail) - Fact assertions from the envelope - Rules for all 9 predicate types (exists, not_exists, equals, contains, greater_than, less_than, min_length, max_length, matches) - .output directives for query results Updated verify_envelope() to call the new function instead of serde_yaml::to_string(). Added 8 unit tests covering all rule types, edge cases, and the butler rulespec example.
This commit is contained in:
@@ -534,6 +534,171 @@ fn evaluate_predicate_datalog(
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Datalog Program Generation
|
||||
// ============================================================================
|
||||
|
||||
/// Escape a string value for use in a datalog literal.
|
||||
///
|
||||
/// Replaces backslashes, double quotes, and newlines with escape sequences.
|
||||
fn escape_datalog_string(s: &str) -> String {
|
||||
s.replace('\\', "\\\\")
|
||||
.replace('"', "\\\"")
|
||||
.replace('\n', "\\n")
|
||||
.replace('\r', "\\r")
|
||||
.replace('\t', "\\t")
|
||||
}
|
||||
|
||||
/// Format a compiled rulespec and extracted facts as a datalog program.
|
||||
///
|
||||
/// Produces a textual `.dl` file with:
|
||||
/// - Relation declarations (`.decl`)
|
||||
/// - Fact assertions from the envelope
|
||||
/// - Rules derived from rulespec predicates
|
||||
/// - An output directive for query results
|
||||
///
|
||||
/// This is a Soufflé-style datalog dialect, which is the most widely
|
||||
/// used textual datalog format.
|
||||
pub fn format_datalog_program(
|
||||
compiled: &CompiledRulespec,
|
||||
facts: &HashSet<Fact>,
|
||||
) -> String {
|
||||
let mut out = String::new();
|
||||
|
||||
// ── Header ──────────────────────────────────────────────────────
|
||||
out.push_str("// Auto-generated datalog program\n");
|
||||
out.push_str(&format!("// Plan: {}\n", compiled.plan_id));
|
||||
out.push_str(&format!("// Compiled at revision: {}\n", compiled.compiled_at_revision));
|
||||
out.push_str("\n");
|
||||
|
||||
// ── Relation declarations ───────────────────────────────────────
|
||||
out.push_str("// --- Relation declarations ---\n");
|
||||
out.push_str(".decl claim_value(claim: symbol, value: symbol)\n");
|
||||
out.push_str(".decl claim_length(claim: symbol, length: number)\n");
|
||||
out.push_str(".decl predicate_pass(id: number)\n");
|
||||
out.push_str(".decl predicate_fail(id: number)\n");
|
||||
out.push_str("\n");
|
||||
out.push_str(".output predicate_pass\n");
|
||||
out.push_str(".output predicate_fail\n");
|
||||
out.push_str("\n");
|
||||
|
||||
// ── Facts ───────────────────────────────────────────────────────
|
||||
out.push_str("// --- Facts (from envelope) ---\n");
|
||||
// Sort for deterministic output
|
||||
let mut sorted_facts: Vec<&Fact> = facts.iter().collect();
|
||||
sorted_facts.sort_by(|a, b| (&a.claim_name, &a.value).cmp(&(&b.claim_name, &b.value)));
|
||||
|
||||
for fact in &sorted_facts {
|
||||
if fact.claim_name.ends_with(".__length") {
|
||||
// Length facts go into the claim_length relation
|
||||
let base_claim = fact.claim_name.trim_end_matches(".__length");
|
||||
if let Ok(n) = fact.value.parse::<i64>() {
|
||||
out.push_str(&format!(
|
||||
"claim_length(\"{}\", {}).\n",
|
||||
escape_datalog_string(base_claim),
|
||||
n,
|
||||
));
|
||||
}
|
||||
} else {
|
||||
out.push_str(&format!(
|
||||
"claim_value(\"{}\", \"{}\").\n",
|
||||
escape_datalog_string(&fact.claim_name),
|
||||
escape_datalog_string(&fact.value),
|
||||
));
|
||||
}
|
||||
}
|
||||
out.push_str("\n");
|
||||
|
||||
// ── Rules (from predicates) ─────────────────────────────────────
|
||||
out.push_str("// --- Rules (from rulespec predicates) ---\n");
|
||||
for pred in &compiled.predicates {
|
||||
let id = pred.id;
|
||||
let claim = escape_datalog_string(&pred.claim_name);
|
||||
let expected = pred
|
||||
.expected_value
|
||||
.as_deref()
|
||||
.map(|v| escape_datalog_string(v))
|
||||
.unwrap_or_default();
|
||||
|
||||
// Emit a comment describing the predicate
|
||||
out.push_str(&format!(
|
||||
"// pred[{}]: {} {} {}{}\n",
|
||||
id,
|
||||
pred.rule,
|
||||
pred.claim_name,
|
||||
pred.expected_value.as_deref().map(|v| format!("'{}'", v)).unwrap_or_default(),
|
||||
pred.notes.as_deref().map(|n| format!(" -- {}", n)).unwrap_or_default(),
|
||||
));
|
||||
|
||||
match pred.rule {
|
||||
PredicateRule::Exists => {
|
||||
out.push_str(&format!(
|
||||
"predicate_pass({}) :- claim_value(\"{}\", _).\n",
|
||||
id, claim,
|
||||
));
|
||||
}
|
||||
PredicateRule::NotExists => {
|
||||
// Pass when no matching fact exists
|
||||
out.push_str(&format!(
|
||||
"predicate_pass({}) :- !claim_value(\"{}\", _).\n",
|
||||
id, claim,
|
||||
));
|
||||
}
|
||||
PredicateRule::Equals => {
|
||||
out.push_str(&format!(
|
||||
"predicate_pass({}) :- claim_value(\"{}\", \"{}\").\n",
|
||||
id, claim, expected,
|
||||
));
|
||||
}
|
||||
PredicateRule::Contains => {
|
||||
out.push_str(&format!(
|
||||
"predicate_pass({}) :- claim_value(\"{}\", \"{}\").\n",
|
||||
id, claim, expected,
|
||||
));
|
||||
}
|
||||
PredicateRule::GreaterThan => {
|
||||
out.push_str(&format!(
|
||||
"predicate_pass({}) :- claim_value(\"{}\", V), to_number(V, N), N > {}.\n",
|
||||
id, claim, expected,
|
||||
));
|
||||
}
|
||||
PredicateRule::LessThan => {
|
||||
out.push_str(&format!(
|
||||
"predicate_pass({}) :- claim_value(\"{}\", V), to_number(V, N), N < {}.\n",
|
||||
id, claim, expected,
|
||||
));
|
||||
}
|
||||
PredicateRule::MinLength => {
|
||||
out.push_str(&format!(
|
||||
"predicate_pass({}) :- claim_length(\"{}\", N), N >= {}.\n",
|
||||
id, claim, expected,
|
||||
));
|
||||
}
|
||||
PredicateRule::MaxLength => {
|
||||
out.push_str(&format!(
|
||||
"predicate_pass({}) :- claim_length(\"{}\", N), N <= {}.\n",
|
||||
id, claim, expected,
|
||||
));
|
||||
}
|
||||
PredicateRule::Matches => {
|
||||
// Regex matching expressed as a match functor
|
||||
out.push_str(&format!(
|
||||
"predicate_pass({}) :- claim_value(\"{}\", V), match(\"{}\", V).\n",
|
||||
id, claim, expected,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Derive failure as the negation of pass
|
||||
out.push_str(&format!(
|
||||
"predicate_fail({}) :- !predicate_pass({}).\n",
|
||||
id, id,
|
||||
));
|
||||
out.push_str("\n");
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
// ============================================================================
|
||||
// ============================================================================
|
||||
// Formatting
|
||||
@@ -1039,4 +1204,232 @@ mod tests {
|
||||
assert!(output.contains("✅"));
|
||||
assert!(output.contains("Facts extracted:"));
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// Datalog Program Generation Tests
|
||||
// ========================================================================
|
||||
|
||||
#[test]
|
||||
fn test_format_datalog_program_butler_example() {
|
||||
// Mirrors the butler rulespec: email_reviewed equals true
|
||||
let mut rulespec = Rulespec::new();
|
||||
rulespec.claims.push(Claim::new("email_reviewed", "facts.reviewed"));
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("email_reviewed", PredicateRule::Equals, InvariantSource::TaskPrompt)
|
||||
.with_value(YamlValue::Bool(true))
|
||||
.with_notes("Outgoing emails must be manually reviewed before sending"),
|
||||
);
|
||||
|
||||
let compiled = compile_rulespec(&rulespec, "outbound-email", 0).unwrap();
|
||||
|
||||
let mut envelope = ActionEnvelope::new();
|
||||
envelope.add_fact("facts", serde_yaml::from_str("reviewed: true").unwrap());
|
||||
let facts = extract_facts(&envelope, &compiled);
|
||||
|
||||
let dl = format_datalog_program(&compiled, &facts);
|
||||
|
||||
// Header
|
||||
assert!(dl.contains("// Auto-generated datalog program"));
|
||||
assert!(dl.contains("// Plan: outbound-email"));
|
||||
|
||||
// Relation declarations
|
||||
assert!(dl.contains(".decl claim_value(claim: symbol, value: symbol)"));
|
||||
assert!(dl.contains(".decl claim_length(claim: symbol, length: number)"));
|
||||
assert!(dl.contains(".decl predicate_pass(id: number)"));
|
||||
assert!(dl.contains(".decl predicate_fail(id: number)"));
|
||||
assert!(dl.contains(".output predicate_pass"));
|
||||
assert!(dl.contains(".output predicate_fail"));
|
||||
|
||||
// Facts
|
||||
assert!(dl.contains(r#"claim_value("email_reviewed", "true")."#));
|
||||
|
||||
// Rule for equals
|
||||
assert!(dl.contains(r#"predicate_pass(0) :- claim_value("email_reviewed", "true")."#));
|
||||
assert!(dl.contains("predicate_fail(0) :- !predicate_pass(0)."));
|
||||
|
||||
// Comment with notes
|
||||
assert!(dl.contains("Outgoing emails must be manually reviewed"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_datalog_program_empty_rulespec() {
|
||||
let rulespec = Rulespec::new();
|
||||
let compiled = compile_rulespec(&rulespec, "empty", 0).unwrap();
|
||||
let facts = std::collections::HashSet::new();
|
||||
|
||||
let dl = format_datalog_program(&compiled, &facts);
|
||||
|
||||
// Should still have valid structure
|
||||
assert!(dl.contains(".decl claim_value"));
|
||||
assert!(dl.contains(".decl predicate_pass"));
|
||||
assert!(dl.contains("// --- Facts (from envelope) ---"));
|
||||
assert!(dl.contains("// --- Rules (from rulespec predicates) ---"));
|
||||
|
||||
// No fact assertions (lines ending with period) or rules beyond declarations
|
||||
assert!(!dl.contains(r#"claim_value(""#));
|
||||
assert!(!dl.contains("predicate_pass(0)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_datalog_program_empty_facts() {
|
||||
let mut rulespec = Rulespec::new();
|
||||
rulespec.claims.push(Claim::new("test", "foo.bar"));
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("test", PredicateRule::Exists, InvariantSource::TaskPrompt),
|
||||
);
|
||||
|
||||
let compiled = compile_rulespec(&rulespec, "test", 1).unwrap();
|
||||
let facts = std::collections::HashSet::new();
|
||||
|
||||
let dl = format_datalog_program(&compiled, &facts);
|
||||
|
||||
// Has declarations and rules but no fact assertions
|
||||
assert!(dl.contains(".decl claim_value"));
|
||||
assert!(dl.contains("predicate_pass(0) :- claim_value"));
|
||||
assert!(dl.contains("predicate_fail(0) :- !predicate_pass(0)"));
|
||||
// No claim_value facts
|
||||
// The rules section will reference claim_value("test", _) but the facts section should not
|
||||
let facts_section = dl.split("// --- Rules").next().unwrap();
|
||||
assert!(!facts_section.contains(r#"claim_value("test""#));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_datalog_program_special_characters() {
|
||||
let mut rulespec = Rulespec::new();
|
||||
rulespec.claims.push(Claim::new("msg", "message"));
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("msg", PredicateRule::Equals, InvariantSource::TaskPrompt)
|
||||
.with_value(YamlValue::String("hello \"world\"".to_string())),
|
||||
);
|
||||
|
||||
let compiled = compile_rulespec(&rulespec, "test", 0).unwrap();
|
||||
|
||||
let mut envelope = ActionEnvelope::new();
|
||||
envelope.add_fact("message", YamlValue::String("hello \"world\"".to_string()));
|
||||
let facts = extract_facts(&envelope, &compiled);
|
||||
|
||||
let dl = format_datalog_program(&compiled, &facts);
|
||||
|
||||
// Quotes should be escaped
|
||||
assert!(dl.contains(r#"\"world\""#));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_datalog_program_all_rule_types() {
|
||||
let mut rulespec = Rulespec::new();
|
||||
|
||||
// Create claims for each rule type
|
||||
rulespec.claims.push(Claim::new("c_exists", "a"));
|
||||
rulespec.claims.push(Claim::new("c_not_exists", "b"));
|
||||
rulespec.claims.push(Claim::new("c_equals", "c"));
|
||||
rulespec.claims.push(Claim::new("c_contains", "d"));
|
||||
rulespec.claims.push(Claim::new("c_gt", "e"));
|
||||
rulespec.claims.push(Claim::new("c_lt", "f"));
|
||||
rulespec.claims.push(Claim::new("c_min", "g"));
|
||||
rulespec.claims.push(Claim::new("c_max", "h"));
|
||||
rulespec.claims.push(Claim::new("c_matches", "i"));
|
||||
|
||||
// Add one predicate per rule type
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("c_exists", PredicateRule::Exists, InvariantSource::TaskPrompt),
|
||||
);
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("c_not_exists", PredicateRule::NotExists, InvariantSource::TaskPrompt),
|
||||
);
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("c_equals", PredicateRule::Equals, InvariantSource::TaskPrompt)
|
||||
.with_value(YamlValue::String("val".to_string())),
|
||||
);
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("c_contains", PredicateRule::Contains, InvariantSource::TaskPrompt)
|
||||
.with_value(YamlValue::String("item".to_string())),
|
||||
);
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("c_gt", PredicateRule::GreaterThan, InvariantSource::TaskPrompt)
|
||||
.with_value(YamlValue::Number(10.into())),
|
||||
);
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("c_lt", PredicateRule::LessThan, InvariantSource::TaskPrompt)
|
||||
.with_value(YamlValue::Number(100.into())),
|
||||
);
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("c_min", PredicateRule::MinLength, InvariantSource::TaskPrompt)
|
||||
.with_value(YamlValue::Number(2.into())),
|
||||
);
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("c_max", PredicateRule::MaxLength, InvariantSource::TaskPrompt)
|
||||
.with_value(YamlValue::Number(5.into())),
|
||||
);
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("c_matches", PredicateRule::Matches, InvariantSource::TaskPrompt)
|
||||
.with_value(YamlValue::String("^foo.*".to_string())),
|
||||
);
|
||||
|
||||
let compiled = compile_rulespec(&rulespec, "all-rules", 1).unwrap();
|
||||
let facts = std::collections::HashSet::new();
|
||||
|
||||
let dl = format_datalog_program(&compiled, &facts);
|
||||
|
||||
// Each rule type produces a distinct pattern
|
||||
assert!(dl.contains(r#"predicate_pass(0) :- claim_value("c_exists", _)."#));
|
||||
assert!(dl.contains(r#"predicate_pass(1) :- !claim_value("c_not_exists", _)."#));
|
||||
assert!(dl.contains(r#"predicate_pass(2) :- claim_value("c_equals", "val")."#));
|
||||
assert!(dl.contains(r#"predicate_pass(3) :- claim_value("c_contains", "item")."#));
|
||||
assert!(dl.contains(r#"predicate_pass(4) :- claim_value("c_gt", V), to_number(V, N), N > 10."#));
|
||||
assert!(dl.contains(r#"predicate_pass(5) :- claim_value("c_lt", V), to_number(V, N), N < 100."#));
|
||||
assert!(dl.contains(r#"predicate_pass(6) :- claim_length("c_min", N), N >= 2."#));
|
||||
assert!(dl.contains(r#"predicate_pass(7) :- claim_length("c_max", N), N <= 5."#));
|
||||
assert!(dl.contains(r#"predicate_pass(8) :- claim_value("c_matches", V), match("^foo.*", V)."#));
|
||||
|
||||
// Each has a corresponding fail rule
|
||||
for i in 0..9 {
|
||||
assert!(dl.contains(&format!("predicate_fail({}) :- !predicate_pass({}).", i, i)));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_datalog_program_length_facts() {
|
||||
let mut rulespec = Rulespec::new();
|
||||
rulespec.claims.push(Claim::new("caps", "csv_importer.capabilities"));
|
||||
rulespec.predicates.push(
|
||||
Predicate::new("caps", PredicateRule::MinLength, InvariantSource::TaskPrompt)
|
||||
.with_value(YamlValue::Number(2.into())),
|
||||
);
|
||||
|
||||
let compiled = compile_rulespec(&rulespec, "test", 0).unwrap();
|
||||
|
||||
let envelope = make_test_envelope();
|
||||
let facts = extract_facts(&envelope, &compiled);
|
||||
|
||||
let dl = format_datalog_program(&compiled, &facts);
|
||||
|
||||
// Length facts should use claim_length relation
|
||||
assert!(dl.contains(r#"claim_length("caps", 3)."#));
|
||||
// Individual values should use claim_value
|
||||
assert!(dl.contains(r#"claim_value("caps", "handle_tsv")."#));
|
||||
assert!(dl.contains(r#"claim_value("caps", "handle_headers")."#));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_datalog_program_deterministic_output() {
|
||||
let envelope = make_test_envelope();
|
||||
let rulespec = make_test_rulespec();
|
||||
let compiled = compile_rulespec(&rulespec, "test", 1).unwrap();
|
||||
let facts = extract_facts(&envelope, &compiled);
|
||||
|
||||
let dl1 = format_datalog_program(&compiled, &facts);
|
||||
let dl2 = format_datalog_program(&compiled, &facts);
|
||||
|
||||
// Output should be identical across calls (sorted facts)
|
||||
assert_eq!(dl1, dl2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_datalog_string() {
|
||||
assert_eq!(escape_datalog_string("hello"), "hello");
|
||||
assert_eq!(escape_datalog_string("say \"hi\""), "say \\\"hi\\\"");
|
||||
assert_eq!(escape_datalog_string("line1\nline2"), "line1\\nline2");
|
||||
assert_eq!(escape_datalog_string("tab\there"), "tab\\there");
|
||||
assert_eq!(escape_datalog_string("back\\slash"), "back\\\\slash");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ use super::invariants::{
|
||||
format_envelope_markdown, get_envelope_path, read_envelope, read_rulespec,
|
||||
write_envelope, ActionEnvelope,
|
||||
};
|
||||
use super::datalog::{compile_rulespec, extract_facts, execute_rules, format_datalog_results};
|
||||
use super::datalog::{compile_rulespec, extract_facts, execute_rules, format_datalog_program, format_datalog_results};
|
||||
|
||||
// ============================================================================
|
||||
// Tool Implementation
|
||||
@@ -147,8 +147,8 @@ pub fn verify_envelope(session_id: &str, working_dir: &Path) -> String {
|
||||
|
||||
// Write compiled rules to .dl file
|
||||
let dl_path = session_dir.join("rulespec.compiled.dl");
|
||||
let compiled_yaml = serde_yaml::to_string(&compiled).unwrap_or_default();
|
||||
if let Err(e) = std::fs::write(&dl_path, &compiled_yaml) {
|
||||
let datalog_program = format_datalog_program(&compiled, &facts);
|
||||
if let Err(e) = std::fs::write(&dl_path, &datalog_program) {
|
||||
eprintln!("⚠️ Failed to write compiled rules: {}", e);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user