fix: extract_facts fallback for facts-prefixed selectors in datalog verification

Root cause: ActionEnvelope.to_yaml_value() creates a Mapping from the facts HashMap without a 'facts:' wrapper key, but rulespec selectors may include a 'facts.' prefix (e.g. 'facts.feature.done' instead of 'feature.done'). This caused zero facts to be extracted, making all predicate evaluations fail. Fix: extract_facts() now tries the selector against the unwrapped envelope value first, and if empty, retries against a facts-wrapped version as fallback. Also: - Strengthened write_envelope tool description to require top-level facts: key, file paths for evidence, and allow free-form notes - Updated system prompt with matching rules - Added 6 new tests (4 unit, 2 integration) - Strengthened existing integration test to verify fact count > 0
2026-02-07 14:42:39 +11:00
parent b045d0c5e9
commit 328eecfcad
5 changed files with 282 additions and 4 deletions
--- a/analysis/memory.md
+++ b/analysis/memory.md
@@ -1,5 +1,5 @@
 # Workspace Memory
-> Updated: 2026-02-07T01:27:41Z | Size: 23.6k chars
+> Updated: 2026-02-07T03:33:32Z | Size: 24.6k chars
 ### Remember Tool Wiring
 - `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()`
@@ -405,4 +405,11 @@ Makes tool output responsive to terminal width - no line wrapping, with 4-char r
  - Handles all 9 PredicateRule types: Exists, NotExists, Equals, Contains, GreaterThan, LessThan, MinLength, MaxLength, Matches
  - Length facts (`__length` suffix) go into `claim_length` relation
 - `crates/g3-core/src/tools/envelope.rs` [150] - `verify_envelope()` now calls `format_datalog_program()` instead of `serde_yaml::to_string()`
- **Bug fixed**: `.dl` files previously contained YAML (just serialized CompiledRulespec), now contain actual Soufflé datalog
+- **Bug fixed**: `.dl` files previously contained YAML (just serialized CompiledRulespec), now contain actual Soufflé datalog
 ### Datalog Fact Extraction Fix (2026-02-07)
 - `crates/g3-core/src/tools/datalog.rs` [188..207] - `extract_facts()` now has fallback: if selector returns empty on unwrapped envelope value, retries against a `facts`-wrapped version. This handles rulespec selectors written as `facts.feature.done` when `to_yaml_value()` strips the `facts:` wrapper.
 - Root cause: `ActionEnvelope.to_yaml_value()` creates a Mapping from the `facts` HashMap WITHOUT a `facts` key wrapper, but rulespec selectors may include a `facts.` prefix.
 - New unit tests: `test_extract_facts_with_facts_prefix_selector`, `test_extract_facts_roundtrip_from_yaml`, `test_execute_rules_full_pipeline_with_facts_prefix`, `test_execute_rules_full_pipeline_without_facts_prefix`
 - New integration tests: `test_plan_verify_rulespec_with_facts_prefix_selectors`, `test_plan_verify_mixed_pass_fail`
 - Strengthened: `test_plan_verify_with_analysis_rulespec` now asserts `Facts extracted: 0` is NOT in output
--- a/crates/g3-core/src/tool_definitions.rs
+++ b/crates/g3-core/src/tool_definitions.rs
@@ -199,7 +199,7 @@ fn create_core_tools() -> Vec<Tool> {
            "properties": {
                "facts": {
                    "type": "string",
-                    "description": "The envelope facts as YAML. A map of named fact groups, each containing evidence about completed work (capabilities, files, tests, etc.)."
+                    "description": "The envelope facts as YAML. MUST have a top-level `facts:` key containing all fact groups. No other top-level keys are allowed except envelope metadata (e.g. `type:`). Each fact group is a named map under `facts:`. Use file paths for evidence so the validator can verify them (e.g. `src/foo.rs`, `src/foo.rs:42`, `tests/bar.rs::test_name`). Free-form notes can go alongside paths.\n\nExample:\n\nfacts:\n  csv_importer:\n    capabilities: [handle_headers, handle_tsv]\n    file: \"src/import/csv.rs\"\n    tests: [\"test_valid_csv\", \"test_missing_column\"]\n  api_changes:\n    breaking: false\n  breaking_changes: null"
                }
            },
            "required": ["facts"]
--- a/crates/g3-core/src/tools/datalog.rs
+++ b/crates/g3-core/src/tools/datalog.rs
@@ -188,6 +188,10 @@ pub struct Fact {
 pub fn extract_facts(envelope: &ActionEnvelope, compiled: &CompiledRulespec) -> HashSet<Fact> {
    let mut facts = HashSet::new();
    let envelope_value = envelope.to_yaml_value();
    // Build a "facts"-wrapped version so selectors with a "facts." prefix also work.
    let mut wrapped = serde_yaml::Mapping::new();
    wrapped.insert(YamlValue::String("facts".into()), envelope_value.clone());
    let wrapped_value = YamlValue::Mapping(wrapped);
    for (claim_name, selector_str) in &compiled.claims {
        let selector = match Selector::parse(selector_str) {
@@ -197,6 +201,16 @@ pub fn extract_facts(envelope: &ActionEnvelope, compiled: &CompiledRulespec) ->
        let values = selector.select(&envelope_value);
        // If the selector didn't match anything on the unwrapped value,
        // try against the "facts"-wrapped version. This handles rulespec
        // selectors written as "facts.feature.done" when the envelope
        // stores facts without the "facts" wrapper key.
        let values = if values.is_empty() {
            selector.select(&wrapped_value)
        } else {
            values
        };
        for value in values {
            // Extract individual values from the selected result
            extract_values_recursive(claim_name, &value, &mut facts);
@@ -967,6 +981,64 @@ mod tests {
        }));
    }
    #[test]
    fn test_extract_facts_with_facts_prefix_selector() {
        // Simulate a rulespec that uses "facts." prefix in selectors
        // (common when rulespec authors think of the envelope YAML structure
        // which has a top-level "facts:" key)
        let mut envelope = ActionEnvelope::new();
        envelope.add_fact(
            "csv_importer",
            serde_yaml::from_str("capabilities: [handle_csv, handle_tsv]\nfile: src/csv.rs").unwrap(),
        );
        let mut rulespec = Rulespec::new();
        // Selector uses "facts." prefix — should still work via fallback
        rulespec.claims.push(Claim::new("caps", "facts.csv_importer.capabilities"));
        rulespec.claims.push(Claim::new("file", "facts.csv_importer.file"));
        let compiled = compile_rulespec(&rulespec, "test", 1).unwrap();
        let facts = extract_facts(&envelope, &compiled);
        assert!(!facts.is_empty(), "Should extract facts even with 'facts.' prefix selector");
        assert!(facts.contains(&Fact {
            claim_name: "caps".to_string(),
            value: "handle_csv".to_string(),
        }));
        assert!(facts.contains(&Fact {
            claim_name: "caps".to_string(),
            value: "handle_tsv".to_string(),
        }));
        assert!(facts.contains(&Fact {
            claim_name: "file".to_string(),
            value: "src/csv.rs".to_string(),
        }));
    }
    #[test]
    fn test_extract_facts_roundtrip_from_yaml() {
        // Simulate the real write_envelope → read_envelope → extract_facts flow
        let yaml = "facts:\n  feature:\n    done: true\n    capabilities: [handle_csv, handle_tsv]\n    file: src/lib.rs";
        let envelope: ActionEnvelope = serde_yaml::from_str(yaml).unwrap();
        assert!(!envelope.facts.is_empty(), "Envelope should have facts after parsing");
        let mut rulespec = Rulespec::new();
        rulespec.claims.push(Claim::new("feature_done", "feature.done"));
        rulespec.claims.push(Claim::new("caps", "feature.capabilities"));
        let compiled = compile_rulespec(&rulespec, "test", 1).unwrap();
        let facts = extract_facts(&envelope, &compiled);
        assert!(!facts.is_empty(), "Should extract facts from round-tripped envelope");
        assert!(facts.contains(&Fact {
            claim_name: "feature_done".to_string(),
            value: "true".to_string(),
        }));
        assert!(facts.contains(&Fact {
            claim_name: "caps".to_string(),
            value: "handle_csv".to_string(),
        }));
    }
    // ========================================================================
    // Execution Tests
    // ========================================================================
@@ -1205,6 +1277,71 @@ mod tests {
        assert!(output.contains("Facts extracted:"));
    }
    #[test]
    fn test_execute_rules_full_pipeline_with_facts_prefix() {
        // End-to-end: YAML envelope → extract_facts (with facts. prefix) → execute_rules
        let yaml = "facts:\n  my_feature:\n    capabilities: [fast_search, caching]\n    file: src/search.rs\n    breaking: false";
        let envelope: ActionEnvelope = serde_yaml::from_str(yaml).unwrap();
        let mut rulespec = Rulespec::new();
        // Use facts. prefix selectors (the common mistake)
        rulespec.claims.push(Claim::new("caps", "facts.my_feature.capabilities"));
        rulespec.claims.push(Claim::new("file", "facts.my_feature.file"));
        rulespec.claims.push(Claim::new("breaking", "facts.my_feature.breaking"));
        rulespec.predicates.push(
            Predicate::new("caps", PredicateRule::Contains, InvariantSource::TaskPrompt)
                .with_value(YamlValue::String("fast_search".to_string())),
        );
        rulespec.predicates.push(
            Predicate::new("file", PredicateRule::Exists, InvariantSource::TaskPrompt),
        );
        rulespec.predicates.push(
            Predicate::new("breaking", PredicateRule::Equals, InvariantSource::TaskPrompt)
                .with_value(YamlValue::String("false".to_string())),
        );
        let compiled = compile_rulespec(&rulespec, "test", 1).unwrap();
        let facts = extract_facts(&envelope, &compiled);
        assert!(facts.len() > 0, "Should extract facts with facts. prefix selectors");
        let result = execute_rules(&compiled, &facts);
        assert_eq!(result.fact_count, facts.len());
        assert!(result.all_passed(), "All predicates should pass: {:?}",
            result.predicate_results.iter()
                .filter(|r| !r.passed)
                .map(|r| format!("{}: {}", r.claim_name, r.reason))
                .collect::<Vec<_>>());
        assert_eq!(result.passed_count, 3);
        assert_eq!(result.failed_count, 0);
    }
    #[test]
    fn test_execute_rules_full_pipeline_without_facts_prefix() {
        // End-to-end: YAML envelope → extract_facts (without facts. prefix) → execute_rules
        let yaml = "facts:\n  my_feature:\n    capabilities: [fast_search, caching]\n    file: src/search.rs";
        let envelope: ActionEnvelope = serde_yaml::from_str(yaml).unwrap();
        let mut rulespec = Rulespec::new();
        // Use direct selectors (no facts. prefix)
        rulespec.claims.push(Claim::new("caps", "my_feature.capabilities"));
        rulespec.claims.push(Claim::new("file", "my_feature.file"));
        rulespec.predicates.push(
            Predicate::new("caps", PredicateRule::Contains, InvariantSource::TaskPrompt)
                .with_value(YamlValue::String("caching".to_string())),
        );
        rulespec.predicates.push(
            Predicate::new("file", PredicateRule::Exists, InvariantSource::TaskPrompt),
        );
        let compiled = compile_rulespec(&rulespec, "test", 1).unwrap();
        let facts = extract_facts(&envelope, &compiled);
        assert!(facts.len() > 0, "Should extract facts without facts. prefix");
        let result = execute_rules(&compiled, &facts);
        assert!(result.all_passed());
        assert_eq!(result.passed_count, 2);
    }
    // ========================================================================
    // Datalog Program Generation Tests
    // ========================================================================
--- a/crates/g3-core/tests/tool_execution_roundtrip_test.rs
+++ b/crates/g3-core/tests/tool_execution_roundtrip_test.rs
@@ -627,8 +627,12 @@ predicates:
        // Verify evaluation content shows pass
        let eval_content = fs::read_to_string(&eval_path).unwrap();
-        assert!(eval_content.contains("satisfied") || eval_content.contains("PASS"),
+        assert!(eval_content.contains("satisfied"),
            "Evaluation should show passing results: {}", eval_content);
        // Verify facts were actually extracted (not zero)
        assert!(!eval_content.contains("Facts extracted: 0"),
            "Should have extracted facts: {}", eval_content);
    }
    /// Test: plan_verify works gracefully when analysis/rulespec.yaml is absent
@@ -731,4 +735,131 @@ predicates:
        assert!(eval_content.contains("FAIL") || eval_content.contains("fail"),
            "Evaluation should show failing results: {}", eval_content);
    }
    /// Test: write_envelope with facts. prefix selectors in rulespec still extracts facts
    #[tokio::test]
    #[serial]
    async fn test_plan_verify_rulespec_with_facts_prefix_selectors() {
        let temp_dir = TempDir::new().unwrap();
        let mut agent = create_test_agent(&temp_dir).await;
        agent.init_session_id_for_test("datalog-facts-prefix-test");
        let session_id = agent.get_session_id().unwrap().to_string();
        // Write a plan and approve it
        let write_call = make_tool_call(
            "plan_write",
            serde_json::json!({
                "plan": "plan_id: datalog-test\nrevision: 1\nitems:\n  - id: I1\n    description: Implement feature\n    state: todo\n    touches: [src/lib.rs]\n    checks:\n      happy: {desc: Works, target: lib}\n      negative: [{desc: Errors, target: lib}]\n      boundary: [{desc: Edge, target: lib}]"
            }),
        );
        agent.execute_tool(&write_call).await.unwrap();
        let approve_call = make_tool_call("plan_approve", serde_json::json!({}));
        agent.execute_tool(&approve_call).await.unwrap();
        // Create a dummy evidence file
        let src_dir = temp_dir.path().join("src");
        fs::create_dir_all(&src_dir).unwrap();
        fs::write(src_dir.join("lib.rs"), "// test file").unwrap();
        // Write rulespec with facts. prefix selectors
        let analysis_dir = temp_dir.path().join("analysis");
        fs::create_dir_all(&analysis_dir).unwrap();
        fs::write(
            analysis_dir.join("rulespec.yaml"),
            "claims:\n  - name: feature_done\n    selector: facts.feature.done\n  - name: caps\n    selector: facts.feature.capabilities\npredicates:\n  - claim: feature_done\n    rule: exists\n    source: task_prompt\n    notes: Feature must be marked done\n  - claim: caps\n    rule: contains\n    value: handle_csv\n    source: task_prompt\n    notes: Must support CSV\n",
        )
        .unwrap();
        // Call write_envelope
        let envelope_call = make_tool_call(
            "write_envelope",
            serde_json::json!({
                "facts": "facts:\n  feature:\n    done: true\n    capabilities: [handle_csv, handle_tsv]\n    file: src/lib.rs"
            }),
        );
        let result = agent.execute_tool(&envelope_call).await.unwrap();
        assert!(result.contains("Envelope written"), "Should confirm envelope written: {}", result);
        // Check evaluation file
        let session_dir = temp_dir
            .path()
            .join(".g3")
            .join("sessions")
            .join(&session_id);
        let eval_path = session_dir.join("datalog_evaluation.txt");
        assert!(eval_path.exists(), "Evaluation report should exist");
        let eval_content = fs::read_to_string(&eval_path).unwrap();
        // Facts should be extracted despite facts. prefix selectors
        assert!(!eval_content.contains("Facts extracted: 0"),
            "Should extract facts even with facts. prefix selectors: {}", eval_content);
        assert!(eval_content.contains("satisfied"),
            "All predicates should pass: {}", eval_content);
    }
    /// Test: write_envelope with multiple claims and mixed pass/fail results
    #[tokio::test]
    #[serial]
    async fn test_plan_verify_mixed_pass_fail() {
        let temp_dir = TempDir::new().unwrap();
        let mut agent = create_test_agent(&temp_dir).await;
        agent.init_session_id_for_test("datalog-mixed-test");
        let session_id = agent.get_session_id().unwrap().to_string();
        // Write a plan and approve it
        let write_call = make_tool_call(
            "plan_write",
            serde_json::json!({
                "plan": "plan_id: datalog-test\nrevision: 1\nitems:\n  - id: I1\n    description: Implement feature\n    state: todo\n    touches: [src/lib.rs]\n    checks:\n      happy: {desc: Works, target: lib}\n      negative: [{desc: Errors, target: lib}]\n      boundary: [{desc: Edge, target: lib}]"
            }),
        );
        agent.execute_tool(&write_call).await.unwrap();
        let approve_call = make_tool_call("plan_approve", serde_json::json!({}));
        agent.execute_tool(&approve_call).await.unwrap();
        // Create a dummy evidence file
        let src_dir = temp_dir.path().join("src");
        fs::create_dir_all(&src_dir).unwrap();
        fs::write(src_dir.join("lib.rs"), "// test file").unwrap();
        // Write rulespec with one passing and one failing predicate
        let analysis_dir = temp_dir.path().join("analysis");
        fs::create_dir_all(&analysis_dir).unwrap();
        fs::write(
            analysis_dir.join("rulespec.yaml"),
            "claims:\n  - name: feature_done\n    selector: feature.done\n  - name: missing_thing\n    selector: nonexistent.field\npredicates:\n  - claim: feature_done\n    rule: exists\n    source: task_prompt\n    notes: This should pass\n  - claim: missing_thing\n    rule: exists\n    source: task_prompt\n    notes: This should fail\n",
        )
        .unwrap();
        // Call write_envelope
        let envelope_call = make_tool_call(
            "write_envelope",
            serde_json::json!({
                "facts": "facts:\n  feature:\n    done: true\n    capabilities: [handle_csv]\n    file: src/lib.rs"
            }),
        );
        let result = agent.execute_tool(&envelope_call).await.unwrap();
        assert!(result.contains("Envelope written"), "Should confirm envelope written: {}", result);
        // Should report mixed results
        assert!(result.contains("failed"), "Should report failures in tool output: {}", result);
        // Check evaluation file
        let session_dir = temp_dir
            .path()
            .join(".g3")
            .join("sessions")
            .join(&session_id);
        let eval_path = session_dir.join("datalog_evaluation.txt");
        assert!(eval_path.exists(), "Evaluation report should exist");
        let eval_content = fs::read_to_string(&eval_path).unwrap();
        // Should have extracted some facts (from the feature that exists)
        assert!(!eval_content.contains("Facts extracted: 0"),
            "Should extract some facts: {}", eval_content);
        // Should show 1 pass and 1 fail
        assert!(eval_content.contains("1/2") || (eval_content.contains("passed") && eval_content.contains("failed")),
            "Should show mixed results: {}", eval_content);
    }
 }
--- a/prompts/system/native.md
+++ b/prompts/system/native.md
@@ -93,6 +93,9 @@ facts:
 ```
 **Rules:**
 - All fact groups MUST go under the top-level `facts:` key. No other top-level keys except envelope metadata (e.g. `type:`)
 - Use file paths as evidence values so the validator can check them: `"src/foo.rs"`, `"src/foo.rs:42"`, `"tests/bar.rs::test_name"`
 - Free-form notes are allowed alongside file paths (e.g. `notes: "Refactored from old module"`)
 - Selectors in `analysis/rulespec.yaml` (e.g., `csv_importer.capabilities`) are evaluated against envelope facts
 - Use dot notation for nested access: `api_changes.breaking`
 - Use `null` to explicitly assert absence (for `not_exists` predicates)