From 328eecfcadfbea070a4a251ae010d36f12aa0be0 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Sat, 7 Feb 2026 14:42:39 +1100 Subject: [PATCH] fix: extract_facts fallback for facts-prefixed selectors in datalog verification Root cause: ActionEnvelope.to_yaml_value() creates a Mapping from the facts HashMap without a 'facts:' wrapper key, but rulespec selectors may include a 'facts.' prefix (e.g. 'facts.feature.done' instead of 'feature.done'). This caused zero facts to be extracted, making all predicate evaluations fail. Fix: extract_facts() now tries the selector against the unwrapped envelope value first, and if empty, retries against a facts-wrapped version as fallback. Also: - Strengthened write_envelope tool description to require top-level facts: key, file paths for evidence, and allow free-form notes - Updated system prompt with matching rules - Added 6 new tests (4 unit, 2 integration) - Strengthened existing integration test to verify fact count > 0 --- analysis/memory.md | 11 +- crates/g3-core/src/tool_definitions.rs | 2 +- crates/g3-core/src/tools/datalog.rs | 137 ++++++++++++++++++ .../tests/tool_execution_roundtrip_test.rs | 133 ++++++++++++++++- prompts/system/native.md | 3 + 5 files changed, 282 insertions(+), 4 deletions(-) diff --git a/analysis/memory.md b/analysis/memory.md index 189e2a1..4125b33 100644 --- a/analysis/memory.md +++ b/analysis/memory.md @@ -1,5 +1,5 @@ # Workspace Memory -> Updated: 2026-02-07T01:27:41Z | Size: 23.6k chars +> Updated: 2026-02-07T03:33:32Z | Size: 24.6k chars ### Remember Tool Wiring - `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()` @@ -405,4 +405,11 @@ Makes tool output responsive to terminal width - no line wrapping, with 4-char r - Handles all 9 PredicateRule types: Exists, NotExists, Equals, Contains, GreaterThan, LessThan, MinLength, MaxLength, Matches - Length facts (`__length` suffix) go into `claim_length` relation - `crates/g3-core/src/tools/envelope.rs` [150] - `verify_envelope()` now calls `format_datalog_program()` instead of `serde_yaml::to_string()` -- **Bug fixed**: `.dl` files previously contained YAML (just serialized CompiledRulespec), now contain actual Soufflé datalog \ No newline at end of file +- **Bug fixed**: `.dl` files previously contained YAML (just serialized CompiledRulespec), now contain actual Soufflé datalog + +### Datalog Fact Extraction Fix (2026-02-07) +- `crates/g3-core/src/tools/datalog.rs` [188..207] - `extract_facts()` now has fallback: if selector returns empty on unwrapped envelope value, retries against a `facts`-wrapped version. This handles rulespec selectors written as `facts.feature.done` when `to_yaml_value()` strips the `facts:` wrapper. +- Root cause: `ActionEnvelope.to_yaml_value()` creates a Mapping from the `facts` HashMap WITHOUT a `facts` key wrapper, but rulespec selectors may include a `facts.` prefix. +- New unit tests: `test_extract_facts_with_facts_prefix_selector`, `test_extract_facts_roundtrip_from_yaml`, `test_execute_rules_full_pipeline_with_facts_prefix`, `test_execute_rules_full_pipeline_without_facts_prefix` +- New integration tests: `test_plan_verify_rulespec_with_facts_prefix_selectors`, `test_plan_verify_mixed_pass_fail` +- Strengthened: `test_plan_verify_with_analysis_rulespec` now asserts `Facts extracted: 0` is NOT in output \ No newline at end of file diff --git a/crates/g3-core/src/tool_definitions.rs b/crates/g3-core/src/tool_definitions.rs index 9ea632e..68bc1b1 100644 --- a/crates/g3-core/src/tool_definitions.rs +++ b/crates/g3-core/src/tool_definitions.rs @@ -199,7 +199,7 @@ fn create_core_tools() -> Vec { "properties": { "facts": { "type": "string", - "description": "The envelope facts as YAML. A map of named fact groups, each containing evidence about completed work (capabilities, files, tests, etc.)." + "description": "The envelope facts as YAML. MUST have a top-level `facts:` key containing all fact groups. No other top-level keys are allowed except envelope metadata (e.g. `type:`). Each fact group is a named map under `facts:`. Use file paths for evidence so the validator can verify them (e.g. `src/foo.rs`, `src/foo.rs:42`, `tests/bar.rs::test_name`). Free-form notes can go alongside paths.\n\nExample:\n\nfacts:\n csv_importer:\n capabilities: [handle_headers, handle_tsv]\n file: \"src/import/csv.rs\"\n tests: [\"test_valid_csv\", \"test_missing_column\"]\n api_changes:\n breaking: false\n breaking_changes: null" } }, "required": ["facts"] diff --git a/crates/g3-core/src/tools/datalog.rs b/crates/g3-core/src/tools/datalog.rs index aad0494..8e21fce 100644 --- a/crates/g3-core/src/tools/datalog.rs +++ b/crates/g3-core/src/tools/datalog.rs @@ -188,6 +188,10 @@ pub struct Fact { pub fn extract_facts(envelope: &ActionEnvelope, compiled: &CompiledRulespec) -> HashSet { let mut facts = HashSet::new(); let envelope_value = envelope.to_yaml_value(); + // Build a "facts"-wrapped version so selectors with a "facts." prefix also work. + let mut wrapped = serde_yaml::Mapping::new(); + wrapped.insert(YamlValue::String("facts".into()), envelope_value.clone()); + let wrapped_value = YamlValue::Mapping(wrapped); for (claim_name, selector_str) in &compiled.claims { let selector = match Selector::parse(selector_str) { @@ -197,6 +201,16 @@ pub fn extract_facts(envelope: &ActionEnvelope, compiled: &CompiledRulespec) -> let values = selector.select(&envelope_value); + // If the selector didn't match anything on the unwrapped value, + // try against the "facts"-wrapped version. This handles rulespec + // selectors written as "facts.feature.done" when the envelope + // stores facts without the "facts" wrapper key. + let values = if values.is_empty() { + selector.select(&wrapped_value) + } else { + values + }; + for value in values { // Extract individual values from the selected result extract_values_recursive(claim_name, &value, &mut facts); @@ -967,6 +981,64 @@ mod tests { })); } + #[test] + fn test_extract_facts_with_facts_prefix_selector() { + // Simulate a rulespec that uses "facts." prefix in selectors + // (common when rulespec authors think of the envelope YAML structure + // which has a top-level "facts:" key) + let mut envelope = ActionEnvelope::new(); + envelope.add_fact( + "csv_importer", + serde_yaml::from_str("capabilities: [handle_csv, handle_tsv]\nfile: src/csv.rs").unwrap(), + ); + + let mut rulespec = Rulespec::new(); + // Selector uses "facts." prefix — should still work via fallback + rulespec.claims.push(Claim::new("caps", "facts.csv_importer.capabilities")); + rulespec.claims.push(Claim::new("file", "facts.csv_importer.file")); + let compiled = compile_rulespec(&rulespec, "test", 1).unwrap(); + + let facts = extract_facts(&envelope, &compiled); + + assert!(!facts.is_empty(), "Should extract facts even with 'facts.' prefix selector"); + assert!(facts.contains(&Fact { + claim_name: "caps".to_string(), + value: "handle_csv".to_string(), + })); + assert!(facts.contains(&Fact { + claim_name: "caps".to_string(), + value: "handle_tsv".to_string(), + })); + assert!(facts.contains(&Fact { + claim_name: "file".to_string(), + value: "src/csv.rs".to_string(), + })); + } + + #[test] + fn test_extract_facts_roundtrip_from_yaml() { + // Simulate the real write_envelope → read_envelope → extract_facts flow + let yaml = "facts:\n feature:\n done: true\n capabilities: [handle_csv, handle_tsv]\n file: src/lib.rs"; + let envelope: ActionEnvelope = serde_yaml::from_str(yaml).unwrap(); + assert!(!envelope.facts.is_empty(), "Envelope should have facts after parsing"); + + let mut rulespec = Rulespec::new(); + rulespec.claims.push(Claim::new("feature_done", "feature.done")); + rulespec.claims.push(Claim::new("caps", "feature.capabilities")); + let compiled = compile_rulespec(&rulespec, "test", 1).unwrap(); + + let facts = extract_facts(&envelope, &compiled); + assert!(!facts.is_empty(), "Should extract facts from round-tripped envelope"); + assert!(facts.contains(&Fact { + claim_name: "feature_done".to_string(), + value: "true".to_string(), + })); + assert!(facts.contains(&Fact { + claim_name: "caps".to_string(), + value: "handle_csv".to_string(), + })); + } + // ======================================================================== // Execution Tests // ======================================================================== @@ -1205,6 +1277,71 @@ mod tests { assert!(output.contains("Facts extracted:")); } + #[test] + fn test_execute_rules_full_pipeline_with_facts_prefix() { + // End-to-end: YAML envelope → extract_facts (with facts. prefix) → execute_rules + let yaml = "facts:\n my_feature:\n capabilities: [fast_search, caching]\n file: src/search.rs\n breaking: false"; + let envelope: ActionEnvelope = serde_yaml::from_str(yaml).unwrap(); + + let mut rulespec = Rulespec::new(); + // Use facts. prefix selectors (the common mistake) + rulespec.claims.push(Claim::new("caps", "facts.my_feature.capabilities")); + rulespec.claims.push(Claim::new("file", "facts.my_feature.file")); + rulespec.claims.push(Claim::new("breaking", "facts.my_feature.breaking")); + rulespec.predicates.push( + Predicate::new("caps", PredicateRule::Contains, InvariantSource::TaskPrompt) + .with_value(YamlValue::String("fast_search".to_string())), + ); + rulespec.predicates.push( + Predicate::new("file", PredicateRule::Exists, InvariantSource::TaskPrompt), + ); + rulespec.predicates.push( + Predicate::new("breaking", PredicateRule::Equals, InvariantSource::TaskPrompt) + .with_value(YamlValue::String("false".to_string())), + ); + + let compiled = compile_rulespec(&rulespec, "test", 1).unwrap(); + let facts = extract_facts(&envelope, &compiled); + assert!(facts.len() > 0, "Should extract facts with facts. prefix selectors"); + + let result = execute_rules(&compiled, &facts); + assert_eq!(result.fact_count, facts.len()); + assert!(result.all_passed(), "All predicates should pass: {:?}", + result.predicate_results.iter() + .filter(|r| !r.passed) + .map(|r| format!("{}: {}", r.claim_name, r.reason)) + .collect::>()); + assert_eq!(result.passed_count, 3); + assert_eq!(result.failed_count, 0); + } + + #[test] + fn test_execute_rules_full_pipeline_without_facts_prefix() { + // End-to-end: YAML envelope → extract_facts (without facts. prefix) → execute_rules + let yaml = "facts:\n my_feature:\n capabilities: [fast_search, caching]\n file: src/search.rs"; + let envelope: ActionEnvelope = serde_yaml::from_str(yaml).unwrap(); + + let mut rulespec = Rulespec::new(); + // Use direct selectors (no facts. prefix) + rulespec.claims.push(Claim::new("caps", "my_feature.capabilities")); + rulespec.claims.push(Claim::new("file", "my_feature.file")); + rulespec.predicates.push( + Predicate::new("caps", PredicateRule::Contains, InvariantSource::TaskPrompt) + .with_value(YamlValue::String("caching".to_string())), + ); + rulespec.predicates.push( + Predicate::new("file", PredicateRule::Exists, InvariantSource::TaskPrompt), + ); + + let compiled = compile_rulespec(&rulespec, "test", 1).unwrap(); + let facts = extract_facts(&envelope, &compiled); + assert!(facts.len() > 0, "Should extract facts without facts. prefix"); + + let result = execute_rules(&compiled, &facts); + assert!(result.all_passed()); + assert_eq!(result.passed_count, 2); + } + // ======================================================================== // Datalog Program Generation Tests // ======================================================================== diff --git a/crates/g3-core/tests/tool_execution_roundtrip_test.rs b/crates/g3-core/tests/tool_execution_roundtrip_test.rs index 97b938a..935f1b5 100644 --- a/crates/g3-core/tests/tool_execution_roundtrip_test.rs +++ b/crates/g3-core/tests/tool_execution_roundtrip_test.rs @@ -627,8 +627,12 @@ predicates: // Verify evaluation content shows pass let eval_content = fs::read_to_string(&eval_path).unwrap(); - assert!(eval_content.contains("satisfied") || eval_content.contains("PASS"), + assert!(eval_content.contains("satisfied"), "Evaluation should show passing results: {}", eval_content); + + // Verify facts were actually extracted (not zero) + assert!(!eval_content.contains("Facts extracted: 0"), + "Should have extracted facts: {}", eval_content); } /// Test: plan_verify works gracefully when analysis/rulespec.yaml is absent @@ -731,4 +735,131 @@ predicates: assert!(eval_content.contains("FAIL") || eval_content.contains("fail"), "Evaluation should show failing results: {}", eval_content); } + + /// Test: write_envelope with facts. prefix selectors in rulespec still extracts facts + #[tokio::test] + #[serial] + async fn test_plan_verify_rulespec_with_facts_prefix_selectors() { + let temp_dir = TempDir::new().unwrap(); + let mut agent = create_test_agent(&temp_dir).await; + + agent.init_session_id_for_test("datalog-facts-prefix-test"); + let session_id = agent.get_session_id().unwrap().to_string(); + + // Write a plan and approve it + let write_call = make_tool_call( + "plan_write", + serde_json::json!({ + "plan": "plan_id: datalog-test\nrevision: 1\nitems:\n - id: I1\n description: Implement feature\n state: todo\n touches: [src/lib.rs]\n checks:\n happy: {desc: Works, target: lib}\n negative: [{desc: Errors, target: lib}]\n boundary: [{desc: Edge, target: lib}]" + }), + ); + agent.execute_tool(&write_call).await.unwrap(); + let approve_call = make_tool_call("plan_approve", serde_json::json!({})); + agent.execute_tool(&approve_call).await.unwrap(); + + // Create a dummy evidence file + let src_dir = temp_dir.path().join("src"); + fs::create_dir_all(&src_dir).unwrap(); + fs::write(src_dir.join("lib.rs"), "// test file").unwrap(); + + // Write rulespec with facts. prefix selectors + let analysis_dir = temp_dir.path().join("analysis"); + fs::create_dir_all(&analysis_dir).unwrap(); + fs::write( + analysis_dir.join("rulespec.yaml"), + "claims:\n - name: feature_done\n selector: facts.feature.done\n - name: caps\n selector: facts.feature.capabilities\npredicates:\n - claim: feature_done\n rule: exists\n source: task_prompt\n notes: Feature must be marked done\n - claim: caps\n rule: contains\n value: handle_csv\n source: task_prompt\n notes: Must support CSV\n", + ) + .unwrap(); + + // Call write_envelope + let envelope_call = make_tool_call( + "write_envelope", + serde_json::json!({ + "facts": "facts:\n feature:\n done: true\n capabilities: [handle_csv, handle_tsv]\n file: src/lib.rs" + }), + ); + let result = agent.execute_tool(&envelope_call).await.unwrap(); + assert!(result.contains("Envelope written"), "Should confirm envelope written: {}", result); + + // Check evaluation file + let session_dir = temp_dir + .path() + .join(".g3") + .join("sessions") + .join(&session_id); + let eval_path = session_dir.join("datalog_evaluation.txt"); + assert!(eval_path.exists(), "Evaluation report should exist"); + + let eval_content = fs::read_to_string(&eval_path).unwrap(); + // Facts should be extracted despite facts. prefix selectors + assert!(!eval_content.contains("Facts extracted: 0"), + "Should extract facts even with facts. prefix selectors: {}", eval_content); + assert!(eval_content.contains("satisfied"), + "All predicates should pass: {}", eval_content); + } + + /// Test: write_envelope with multiple claims and mixed pass/fail results + #[tokio::test] + #[serial] + async fn test_plan_verify_mixed_pass_fail() { + let temp_dir = TempDir::new().unwrap(); + let mut agent = create_test_agent(&temp_dir).await; + + agent.init_session_id_for_test("datalog-mixed-test"); + let session_id = agent.get_session_id().unwrap().to_string(); + + // Write a plan and approve it + let write_call = make_tool_call( + "plan_write", + serde_json::json!({ + "plan": "plan_id: datalog-test\nrevision: 1\nitems:\n - id: I1\n description: Implement feature\n state: todo\n touches: [src/lib.rs]\n checks:\n happy: {desc: Works, target: lib}\n negative: [{desc: Errors, target: lib}]\n boundary: [{desc: Edge, target: lib}]" + }), + ); + agent.execute_tool(&write_call).await.unwrap(); + let approve_call = make_tool_call("plan_approve", serde_json::json!({})); + agent.execute_tool(&approve_call).await.unwrap(); + + // Create a dummy evidence file + let src_dir = temp_dir.path().join("src"); + fs::create_dir_all(&src_dir).unwrap(); + fs::write(src_dir.join("lib.rs"), "// test file").unwrap(); + + // Write rulespec with one passing and one failing predicate + let analysis_dir = temp_dir.path().join("analysis"); + fs::create_dir_all(&analysis_dir).unwrap(); + fs::write( + analysis_dir.join("rulespec.yaml"), + "claims:\n - name: feature_done\n selector: feature.done\n - name: missing_thing\n selector: nonexistent.field\npredicates:\n - claim: feature_done\n rule: exists\n source: task_prompt\n notes: This should pass\n - claim: missing_thing\n rule: exists\n source: task_prompt\n notes: This should fail\n", + ) + .unwrap(); + + // Call write_envelope + let envelope_call = make_tool_call( + "write_envelope", + serde_json::json!({ + "facts": "facts:\n feature:\n done: true\n capabilities: [handle_csv]\n file: src/lib.rs" + }), + ); + let result = agent.execute_tool(&envelope_call).await.unwrap(); + assert!(result.contains("Envelope written"), "Should confirm envelope written: {}", result); + // Should report mixed results + assert!(result.contains("failed"), "Should report failures in tool output: {}", result); + + // Check evaluation file + let session_dir = temp_dir + .path() + .join(".g3") + .join("sessions") + .join(&session_id); + let eval_path = session_dir.join("datalog_evaluation.txt"); + assert!(eval_path.exists(), "Evaluation report should exist"); + + let eval_content = fs::read_to_string(&eval_path).unwrap(); + // Should have extracted some facts (from the feature that exists) + assert!(!eval_content.contains("Facts extracted: 0"), + "Should extract some facts: {}", eval_content); + // Should show 1 pass and 1 fail + assert!(eval_content.contains("1/2") || (eval_content.contains("passed") && eval_content.contains("failed")), + "Should show mixed results: {}", eval_content); + } } diff --git a/prompts/system/native.md b/prompts/system/native.md index ab546e5..cf0c0ba 100644 --- a/prompts/system/native.md +++ b/prompts/system/native.md @@ -93,6 +93,9 @@ facts: ``` **Rules:** +- All fact groups MUST go under the top-level `facts:` key. No other top-level keys except envelope metadata (e.g. `type:`) +- Use file paths as evidence values so the validator can check them: `"src/foo.rs"`, `"src/foo.rs:42"`, `"tests/bar.rs::test_name"` +- Free-form notes are allowed alongside file paths (e.g. `notes: "Refactored from old module"`) - Selectors in `analysis/rulespec.yaml` (e.g., `csv_importer.capabilities`) are evaluated against envelope facts - Use dot notation for nested access: `api_changes.breaking` - Use `null` to explicitly assert absence (for `not_exists` predicates)