fix: generate actual Soufflé datalog in .dl files instead of YAML

The rulespec compiler was writing serde_yaml::to_string(&compiled) into rulespec.compiled.dl files — just YAML, not datalog at all. Added format_datalog_program() that produces proper Soufflé-style datalog: - .decl relation declarations (claim_value, claim_length, predicate_pass, predicate_fail) - Fact assertions from the envelope - Rules for all 9 predicate types (exists, not_exists, equals, contains, greater_than, less_than, min_length, max_length, matches) - .output directives for query results Updated verify_envelope() to call the new function instead of serde_yaml::to_string(). Added 8 unit tests covering all rule types, edge cases, and the butler rulespec example.
2026-02-07 12:33:50 +11:00
parent 5085f10717
commit 51dfe71a2b
2 changed files with 396 additions and 3 deletions
--- a/crates/g3-core/src/tools/datalog.rs
+++ b/crates/g3-core/src/tools/datalog.rs
@@ -534,6 +534,171 @@ fn evaluate_predicate_datalog(
    }
 }

+// ============================================================================
+// Datalog Program Generation
+// ============================================================================
+
+/// Escape a string value for use in a datalog literal.
+///
+/// Replaces backslashes, double quotes, and newlines with escape sequences.
+fn escape_datalog_string(s: &str) -> String {
+    s.replace('\\', "\\\\")
+        .replace('"', "\\\"")
+        .replace('\n', "\\n")
+        .replace('\r', "\\r")
+        .replace('\t', "\\t")
+}
+
+/// Format a compiled rulespec and extracted facts as a datalog program.
+///
+/// Produces a textual `.dl` file with:
+/// - Relation declarations (`.decl`)
+/// - Fact assertions from the envelope
+/// - Rules derived from rulespec predicates
+/// - An output directive for query results
+///
+/// This is a Soufflé-style datalog dialect, which is the most widely
+/// used textual datalog format.
+pub fn format_datalog_program(
+    compiled: &CompiledRulespec,
+    facts: &HashSet<Fact>,
+) -> String {
+    let mut out = String::new();
+
+    // ── Header ──────────────────────────────────────────────────────
+    out.push_str("// Auto-generated datalog program\n");
+    out.push_str(&format!("// Plan: {}\n", compiled.plan_id));
+    out.push_str(&format!("// Compiled at revision: {}\n", compiled.compiled_at_revision));
+    out.push_str("\n");
+
+    // ── Relation declarations ───────────────────────────────────────
+    out.push_str("// --- Relation declarations ---\n");
+    out.push_str(".decl claim_value(claim: symbol, value: symbol)\n");
+    out.push_str(".decl claim_length(claim: symbol, length: number)\n");
+    out.push_str(".decl predicate_pass(id: number)\n");
+    out.push_str(".decl predicate_fail(id: number)\n");
+    out.push_str("\n");
+    out.push_str(".output predicate_pass\n");
+    out.push_str(".output predicate_fail\n");
+    out.push_str("\n");
+
+    // ── Facts ───────────────────────────────────────────────────────
+    out.push_str("// --- Facts (from envelope) ---\n");
+    // Sort for deterministic output
+    let mut sorted_facts: Vec<&Fact> = facts.iter().collect();
+    sorted_facts.sort_by(|a, b| (&a.claim_name, &a.value).cmp(&(&b.claim_name, &b.value)));
+
+    for fact in &sorted_facts {
+        if fact.claim_name.ends_with(".__length") {
+            // Length facts go into the claim_length relation
+            let base_claim = fact.claim_name.trim_end_matches(".__length");
+            if let Ok(n) = fact.value.parse::<i64>() {
+                out.push_str(&format!(
+                    "claim_length(\"{}\", {}).\n",
+                    escape_datalog_string(base_claim),
+                    n,
+                ));
+            }
+        } else {
+            out.push_str(&format!(
+                "claim_value(\"{}\", \"{}\").\n",
+                escape_datalog_string(&fact.claim_name),
+                escape_datalog_string(&fact.value),
+            ));
+        }
+    }
+    out.push_str("\n");
+
+    // ── Rules (from predicates) ─────────────────────────────────────
+    out.push_str("// --- Rules (from rulespec predicates) ---\n");
+    for pred in &compiled.predicates {
+        let id = pred.id;
+        let claim = escape_datalog_string(&pred.claim_name);
+        let expected = pred
+            .expected_value
+            .as_deref()
+            .map(|v| escape_datalog_string(v))
+            .unwrap_or_default();
+
+        // Emit a comment describing the predicate
+        out.push_str(&format!(
+            "// pred[{}]: {} {} {}{}\n",
+            id,
+            pred.rule,
+            pred.claim_name,
+            pred.expected_value.as_deref().map(|v| format!("'{}'", v)).unwrap_or_default(),
+            pred.notes.as_deref().map(|n| format!("  -- {}", n)).unwrap_or_default(),
+        ));
+
+        match pred.rule {
+            PredicateRule::Exists => {
+                out.push_str(&format!(
+                    "predicate_pass({}) :- claim_value(\"{}\", _).\n",
+                    id, claim,
+                ));
+            }
+            PredicateRule::NotExists => {
+                // Pass when no matching fact exists
+                out.push_str(&format!(
+                    "predicate_pass({}) :- !claim_value(\"{}\", _).\n",
+                    id, claim,
+                ));
+            }
+            PredicateRule::Equals => {
+                out.push_str(&format!(
+                    "predicate_pass({}) :- claim_value(\"{}\", \"{}\").\n",
+                    id, claim, expected,
+                ));
+            }
+            PredicateRule::Contains => {
+                out.push_str(&format!(
+                    "predicate_pass({}) :- claim_value(\"{}\", \"{}\").\n",
+                    id, claim, expected,
+                ));
+            }
+            PredicateRule::GreaterThan => {
+                out.push_str(&format!(
+                    "predicate_pass({}) :- claim_value(\"{}\", V), to_number(V, N), N > {}.\n",
+                    id, claim, expected,
+                ));
+            }
+            PredicateRule::LessThan => {
+                out.push_str(&format!(
+                    "predicate_pass({}) :- claim_value(\"{}\", V), to_number(V, N), N < {}.\n",
+                    id, claim, expected,
+                ));
+            }
+            PredicateRule::MinLength => {
+                out.push_str(&format!(
+                    "predicate_pass({}) :- claim_length(\"{}\", N), N >= {}.\n",
+                    id, claim, expected,
+                ));
+            }
+            PredicateRule::MaxLength => {
+                out.push_str(&format!(
+                    "predicate_pass({}) :- claim_length(\"{}\", N), N <= {}.\n",
+                    id, claim, expected,
+                ));
+            }
+            PredicateRule::Matches => {
+                // Regex matching expressed as a match functor
+                out.push_str(&format!(
+                    "predicate_pass({}) :- claim_value(\"{}\", V), match(\"{}\", V).\n",
+                    id, claim, expected,
+                ));
+            }
+        }
+
+        // Derive failure as the negation of pass
+        out.push_str(&format!(
+            "predicate_fail({}) :- !predicate_pass({}).\n",
+            id, id,
+        ));
+        out.push_str("\n");
+    }
+
+    out
+}
 // ============================================================================
 // ============================================================================
 // Formatting
@@ -1039,4 +1204,232 @@ mod tests {
        assert!(output.contains("✅"));
        assert!(output.contains("Facts extracted:"));
    }
+
+    // ========================================================================
+    // Datalog Program Generation Tests
+    // ========================================================================
+
+    #[test]
+    fn test_format_datalog_program_butler_example() {
+        // Mirrors the butler rulespec: email_reviewed equals true
+        let mut rulespec = Rulespec::new();
+        rulespec.claims.push(Claim::new("email_reviewed", "facts.reviewed"));
+        rulespec.predicates.push(
+            Predicate::new("email_reviewed", PredicateRule::Equals, InvariantSource::TaskPrompt)
+                .with_value(YamlValue::Bool(true))
+                .with_notes("Outgoing emails must be manually reviewed before sending"),
+        );
+
+        let compiled = compile_rulespec(&rulespec, "outbound-email", 0).unwrap();
+
+        let mut envelope = ActionEnvelope::new();
+        envelope.add_fact("facts", serde_yaml::from_str("reviewed: true").unwrap());
+        let facts = extract_facts(&envelope, &compiled);
+
+        let dl = format_datalog_program(&compiled, &facts);
+
+        // Header
+        assert!(dl.contains("// Auto-generated datalog program"));
+        assert!(dl.contains("// Plan: outbound-email"));
+
+        // Relation declarations
+        assert!(dl.contains(".decl claim_value(claim: symbol, value: symbol)"));
+        assert!(dl.contains(".decl claim_length(claim: symbol, length: number)"));
+        assert!(dl.contains(".decl predicate_pass(id: number)"));
+        assert!(dl.contains(".decl predicate_fail(id: number)"));
+        assert!(dl.contains(".output predicate_pass"));
+        assert!(dl.contains(".output predicate_fail"));
+
+        // Facts
+        assert!(dl.contains(r#"claim_value("email_reviewed", "true")."#));
+
+        // Rule for equals
+        assert!(dl.contains(r#"predicate_pass(0) :- claim_value("email_reviewed", "true")."#));
+        assert!(dl.contains("predicate_fail(0) :- !predicate_pass(0)."));
+
+        // Comment with notes
+        assert!(dl.contains("Outgoing emails must be manually reviewed"));
+    }
+
+    #[test]
+    fn test_format_datalog_program_empty_rulespec() {
+        let rulespec = Rulespec::new();
+        let compiled = compile_rulespec(&rulespec, "empty", 0).unwrap();
+        let facts = std::collections::HashSet::new();
+
+        let dl = format_datalog_program(&compiled, &facts);
+
+        // Should still have valid structure
+        assert!(dl.contains(".decl claim_value"));
+        assert!(dl.contains(".decl predicate_pass"));
+        assert!(dl.contains("// --- Facts (from envelope) ---"));
+        assert!(dl.contains("// --- Rules (from rulespec predicates) ---"));
+
+        // No fact assertions (lines ending with period) or rules beyond declarations
+        assert!(!dl.contains(r#"claim_value(""#));
+        assert!(!dl.contains("predicate_pass(0)"));
+    }
+
+    #[test]
+    fn test_format_datalog_program_empty_facts() {
+        let mut rulespec = Rulespec::new();
+        rulespec.claims.push(Claim::new("test", "foo.bar"));
+        rulespec.predicates.push(
+            Predicate::new("test", PredicateRule::Exists, InvariantSource::TaskPrompt),
+        );
+
+        let compiled = compile_rulespec(&rulespec, "test", 1).unwrap();
+        let facts = std::collections::HashSet::new();
+
+        let dl = format_datalog_program(&compiled, &facts);
+
+        // Has declarations and rules but no fact assertions
+        assert!(dl.contains(".decl claim_value"));
+        assert!(dl.contains("predicate_pass(0) :- claim_value"));
+        assert!(dl.contains("predicate_fail(0) :- !predicate_pass(0)"));
+        // No claim_value facts
+        // The rules section will reference claim_value("test", _) but the facts section should not
+        let facts_section = dl.split("// --- Rules").next().unwrap();
+        assert!(!facts_section.contains(r#"claim_value("test""#));
+    }
+
+    #[test]
+    fn test_format_datalog_program_special_characters() {
+        let mut rulespec = Rulespec::new();
+        rulespec.claims.push(Claim::new("msg", "message"));
+        rulespec.predicates.push(
+            Predicate::new("msg", PredicateRule::Equals, InvariantSource::TaskPrompt)
+                .with_value(YamlValue::String("hello \"world\"".to_string())),
+        );
+
+        let compiled = compile_rulespec(&rulespec, "test", 0).unwrap();
+
+        let mut envelope = ActionEnvelope::new();
+        envelope.add_fact("message", YamlValue::String("hello \"world\"".to_string()));
+        let facts = extract_facts(&envelope, &compiled);
+
+        let dl = format_datalog_program(&compiled, &facts);
+
+        // Quotes should be escaped
+        assert!(dl.contains(r#"\"world\""#));
+    }
+
+    #[test]
+    fn test_format_datalog_program_all_rule_types() {
+        let mut rulespec = Rulespec::new();
+
+        // Create claims for each rule type
+        rulespec.claims.push(Claim::new("c_exists", "a"));
+        rulespec.claims.push(Claim::new("c_not_exists", "b"));
+        rulespec.claims.push(Claim::new("c_equals", "c"));
+        rulespec.claims.push(Claim::new("c_contains", "d"));
+        rulespec.claims.push(Claim::new("c_gt", "e"));
+        rulespec.claims.push(Claim::new("c_lt", "f"));
+        rulespec.claims.push(Claim::new("c_min", "g"));
+        rulespec.claims.push(Claim::new("c_max", "h"));
+        rulespec.claims.push(Claim::new("c_matches", "i"));
+
+        // Add one predicate per rule type
+        rulespec.predicates.push(
+            Predicate::new("c_exists", PredicateRule::Exists, InvariantSource::TaskPrompt),
+        );
+        rulespec.predicates.push(
+            Predicate::new("c_not_exists", PredicateRule::NotExists, InvariantSource::TaskPrompt),
+        );
+        rulespec.predicates.push(
+            Predicate::new("c_equals", PredicateRule::Equals, InvariantSource::TaskPrompt)
+                .with_value(YamlValue::String("val".to_string())),
+        );
+        rulespec.predicates.push(
+            Predicate::new("c_contains", PredicateRule::Contains, InvariantSource::TaskPrompt)
+                .with_value(YamlValue::String("item".to_string())),
+        );
+        rulespec.predicates.push(
+            Predicate::new("c_gt", PredicateRule::GreaterThan, InvariantSource::TaskPrompt)
+                .with_value(YamlValue::Number(10.into())),
+        );
+        rulespec.predicates.push(
+            Predicate::new("c_lt", PredicateRule::LessThan, InvariantSource::TaskPrompt)
+                .with_value(YamlValue::Number(100.into())),
+        );
+        rulespec.predicates.push(
+            Predicate::new("c_min", PredicateRule::MinLength, InvariantSource::TaskPrompt)
+                .with_value(YamlValue::Number(2.into())),
+        );
+        rulespec.predicates.push(
+            Predicate::new("c_max", PredicateRule::MaxLength, InvariantSource::TaskPrompt)
+                .with_value(YamlValue::Number(5.into())),
+        );
+        rulespec.predicates.push(
+            Predicate::new("c_matches", PredicateRule::Matches, InvariantSource::TaskPrompt)
+                .with_value(YamlValue::String("^foo.*".to_string())),
+        );
+
+        let compiled = compile_rulespec(&rulespec, "all-rules", 1).unwrap();
+        let facts = std::collections::HashSet::new();
+
+        let dl = format_datalog_program(&compiled, &facts);
+
+        // Each rule type produces a distinct pattern
+        assert!(dl.contains(r#"predicate_pass(0) :- claim_value("c_exists", _)."#));
+        assert!(dl.contains(r#"predicate_pass(1) :- !claim_value("c_not_exists", _)."#));
+        assert!(dl.contains(r#"predicate_pass(2) :- claim_value("c_equals", "val")."#));
+        assert!(dl.contains(r#"predicate_pass(3) :- claim_value("c_contains", "item")."#));
+        assert!(dl.contains(r#"predicate_pass(4) :- claim_value("c_gt", V), to_number(V, N), N > 10."#));
+        assert!(dl.contains(r#"predicate_pass(5) :- claim_value("c_lt", V), to_number(V, N), N < 100."#));
+        assert!(dl.contains(r#"predicate_pass(6) :- claim_length("c_min", N), N >= 2."#));
+        assert!(dl.contains(r#"predicate_pass(7) :- claim_length("c_max", N), N <= 5."#));
+        assert!(dl.contains(r#"predicate_pass(8) :- claim_value("c_matches", V), match("^foo.*", V)."#));
+
+        // Each has a corresponding fail rule
+        for i in 0..9 {
+            assert!(dl.contains(&format!("predicate_fail({}) :- !predicate_pass({}).", i, i)));
+        }
+    }
+
+    #[test]
+    fn test_format_datalog_program_length_facts() {
+        let mut rulespec = Rulespec::new();
+        rulespec.claims.push(Claim::new("caps", "csv_importer.capabilities"));
+        rulespec.predicates.push(
+            Predicate::new("caps", PredicateRule::MinLength, InvariantSource::TaskPrompt)
+                .with_value(YamlValue::Number(2.into())),
+        );
+
+        let compiled = compile_rulespec(&rulespec, "test", 0).unwrap();
+
+        let envelope = make_test_envelope();
+        let facts = extract_facts(&envelope, &compiled);
+
+        let dl = format_datalog_program(&compiled, &facts);
+
+        // Length facts should use claim_length relation
+        assert!(dl.contains(r#"claim_length("caps", 3)."#));
+        // Individual values should use claim_value
+        assert!(dl.contains(r#"claim_value("caps", "handle_tsv")."#));
+        assert!(dl.contains(r#"claim_value("caps", "handle_headers")."#));
+    }
+
+    #[test]
+    fn test_format_datalog_program_deterministic_output() {
+        let envelope = make_test_envelope();
+        let rulespec = make_test_rulespec();
+        let compiled = compile_rulespec(&rulespec, "test", 1).unwrap();
+        let facts = extract_facts(&envelope, &compiled);
+
+        let dl1 = format_datalog_program(&compiled, &facts);
+        let dl2 = format_datalog_program(&compiled, &facts);
+
+        // Output should be identical across calls (sorted facts)
+        assert_eq!(dl1, dl2);
+    }
+
+    #[test]
+    fn test_escape_datalog_string() {
+        assert_eq!(escape_datalog_string("hello"), "hello");
+        assert_eq!(escape_datalog_string("say \"hi\""), "say \\\"hi\\\"");
+        assert_eq!(escape_datalog_string("line1\nline2"), "line1\\nline2");
+        assert_eq!(escape_datalog_string("tab\there"), "tab\\there");
+        assert_eq!(escape_datalog_string("back\\slash"), "back\\\\slash");
+    }
 }
--- a/crates/g3-core/src/tools/envelope.rs
+++ b/crates/g3-core/src/tools/envelope.rs
@@ -25,7 +25,7 @@ use super::invariants::{
    format_envelope_markdown, get_envelope_path, read_envelope, read_rulespec,
    write_envelope, ActionEnvelope,
 };
-use super::datalog::{compile_rulespec, extract_facts, execute_rules, format_datalog_results};
+use super::datalog::{compile_rulespec, extract_facts, execute_rules, format_datalog_program, format_datalog_results};

 // ============================================================================
 // Tool Implementation
@@ -147,8 +147,8 @@ pub fn verify_envelope(session_id: &str, working_dir: &Path) -> String {

    // Write compiled rules to .dl file
    let dl_path = session_dir.join("rulespec.compiled.dl");
-    let compiled_yaml = serde_yaml::to_string(&compiled).unwrap_or_default();
-    if let Err(e) = std::fs::write(&dl_path, &compiled_yaml) {
+    let datalog_program = format_datalog_program(&compiled, &facts);
+    if let Err(e) = std::fs::write(&dl_path, &datalog_program) {
        eprintln!("⚠️  Failed to write compiled rules: {}", e);
    }