Add write_envelope tool with verify_envelope for explicit envelope creation

- New crates/g3-core/src/tools/envelope.rs with execute_write_envelope() and verify_envelope() (moved from shadow_datalog_verify in plan.rs) - write_envelope accepts YAML facts, writes envelope.yaml to session dir, then runs datalog verification against analysis/rulespec.yaml in shadow mode - plan_verify() now only checks envelope existence (no longer runs datalog) - Tool count: 13 -> 14 - Updated system prompt to instruct agents to call write_envelope before marking last plan item done - Updated integration tests to use write_envelope tool directly Workflow: write_envelope -> verify_envelope -> datalog shadow artifacts plan_write(done) -> plan_verify -> checks envelope exists
2026-02-06 16:09:07 +11:00
parent f7a240a99b
commit 7032e75fc6
8 changed files with 283 additions and 116 deletions
--- a/crates/g3-core/src/tool_definitions.rs
+++ b/crates/g3-core/src/tool_definitions.rs
@@ -190,6 +190,22 @@ fn create_core_tools() -> Vec<Tool> {
        }),
    });

+    // Action Envelope tool
+    tools.push(Tool {
+        name: "write_envelope".to_string(),
+        description: "Write the action envelope for the current plan. Call this as your final step before marking the last plan item done. The envelope captures facts about completed work and is verified against analysis/rulespec.yaml if present.".to_string(),
+        input_schema: json!({
+            "type": "object",
+            "properties": {
+                "facts": {
+                    "type": "string",
+                    "description": "The envelope facts as YAML. A map of named fact groups, each containing evidence about completed work (capabilities, files, tests, etc.)."
+                }
+            },
+            "required": ["facts"]
+        }),
+    });
+
    tools.push(Tool {
        name: "plan_write".to_string(),
        description: "Create or update the Plan for this session. Provide the plan as YAML with plan_id, revision, and items array.".to_string(),
@@ -275,17 +291,17 @@ mod tests {
        let tools = create_core_tools();
        // Core tools: shell, background_process, read_file, read_image,
        // write_file, str_replace, code_search, plan_read, plan_write, plan_approve,
-        // remember, rehydrate, load_toolset
-        // (13 total - research tools are now in a loadable toolset)
-        assert_eq!(tools.len(), 13);
+        // remember, rehydrate, load_toolset, write_envelope
+        // (14 total - research tools are now in a loadable toolset)
+        assert_eq!(tools.len(), 14);
    }

    #[test]
    fn test_create_tool_definitions_core_only() {
        let config = ToolConfig::default();
        let tools = create_tool_definitions(config);
-        // 13 core tools (webdriver and research are now JIT-loaded)
-        assert_eq!(tools.len(), 13);
+        // 14 core tools (webdriver and research are now JIT-loaded)
+        assert_eq!(tools.len(), 14);
    }

    #[test]
@@ -293,7 +309,7 @@ mod tests {
        let config = ToolConfig::new(true);
        let tools = create_tool_definitions(config);
        // Webdriver and research tools are now JIT-loaded, so only core tools are included
-        assert_eq!(tools.len(), 13);
+        assert_eq!(tools.len(), 14);
    }

    #[test]
--- a/crates/g3-core/src/tool_dispatch.rs
+++ b/crates/g3-core/src/tool_dispatch.rs
@@ -7,7 +7,7 @@ use anyhow::Result;
 use tracing::{debug, warn};

 use crate::tools::executor::ToolContext;
-use crate::tools::{acd, file_ops, memory, misc, plan, research, shell, toolsets, webdriver};
+use crate::tools::{acd, envelope, file_ops, memory, misc, plan, research, shell, toolsets, webdriver};
 use crate::ui_writer::UiWriter;
 use crate::ToolCall;

@@ -37,6 +37,9 @@ pub async fn dispatch_tool<W: UiWriter>(
        "plan_write" => plan::execute_plan_write(tool_call, ctx).await,
        "plan_approve" => plan::execute_plan_approve(tool_call, ctx).await,

+        // Action Envelope
+        "write_envelope" => envelope::execute_write_envelope(tool_call, ctx).await,
+
        // Miscellaneous tools
        "code_search" => misc::execute_code_search(tool_call, ctx).await,

--- a/crates/g3-core/src/tools/envelope.rs
+++ b/crates/g3-core/src/tools/envelope.rs
@@ -0,0 +1,184 @@
+//! Action Envelope tool - writes and verifies the action envelope.
+//!
+//! The `write_envelope` tool is the agent's explicit final step before
+//! completing a plan. It:
+//! 1. Parses the provided YAML facts into an ActionEnvelope
+//! 2. Writes it to the session's `envelope.yaml`
+//! 3. Runs `verify_envelope()` which compiles the rulespec and executes
+//!    datalog verification in shadow form (results written to files, not
+//!    injected into context)
+//!
+//! This creates a clear happens-before edge: envelope creation + verification
+//! must complete before `plan_verify()` (triggered on plan completion) checks
+//! that the envelope exists.
+
+use anyhow::Result;
+use std::path::Path;
+use tracing::debug;
+
+use crate::paths::get_session_logs_dir;
+use crate::ui_writer::UiWriter;
+use crate::ToolCall;
+
+use super::executor::ToolContext;
+use super::invariants::{
+    format_envelope_markdown, get_envelope_path, read_envelope, read_rulespec,
+    write_envelope, ActionEnvelope,
+};
+use super::datalog::{compile_rulespec, extract_facts, execute_rules, format_datalog_results};
+
+// ============================================================================
+// Tool Implementation
+// ============================================================================
+
+/// Execute the `write_envelope` tool.
+///
+/// Accepts YAML facts, writes the action envelope, and runs verification.
+pub async fn execute_write_envelope<W: UiWriter>(
+    tool_call: &ToolCall,
+    ctx: &mut ToolContext<'_, W>,
+) -> Result<String> {
+    debug!("Processing write_envelope tool call");
+
+    let session_id = match ctx.session_id {
+        Some(id) => id,
+        None => return Ok("❌ No active session - envelopes are session-scoped.".to_string()),
+    };
+
+    // Get the facts YAML from args
+    let facts_yaml = match tool_call.args.get("facts").and_then(|v| v.as_str()) {
+        Some(f) => f,
+        None => return Ok("❌ Missing 'facts' argument. Provide the envelope facts as YAML.".to_string()),
+    };
+
+    // Parse the YAML into an ActionEnvelope
+    let envelope: ActionEnvelope = match serde_yaml::from_str(facts_yaml) {
+        Ok(e) => e,
+        Err(e) => return Ok(format!("❌ Invalid envelope YAML: {}", e)),
+    };
+
+    // Write the envelope to disk
+    if let Err(e) = write_envelope(session_id, &envelope) {
+        return Ok(format!("❌ Failed to write envelope: {}", e));
+    }
+
+    let envelope_path = get_envelope_path(session_id);
+    let mut output = format!(
+        "✅ Envelope written: {}\n{}",
+        envelope_path.display(),
+        format_envelope_markdown(&envelope),
+    );
+
+    // Run verification against rulespec (shadow mode)
+    let effective_wd = ctx.working_dir
+        .map(Path::new)
+        .unwrap_or_else(|| Path::new("."));
+    let verification_note = verify_envelope(session_id, effective_wd);
+    output.push_str(&verification_note);
+
+    Ok(output)
+}
+
+// ============================================================================
+// Envelope Verification
+// ============================================================================
+
+/// Verify the action envelope against the compiled rulespec using datalog.
+///
+/// This is the core verification step that:
+/// 1. Reads `analysis/rulespec.yaml` from the working directory
+/// 2. Compiles it into datalog relations
+/// 3. Loads the envelope from the session
+/// 4. Extracts facts and runs datalog rules
+/// 5. Writes results to session artifacts (shadow mode - stderr + files)
+///
+/// Returns a short status string for inclusion in tool output.
+pub fn verify_envelope(session_id: &str, working_dir: &Path) -> String {
+    // Read rulespec from analysis/rulespec.yaml
+    let rulespec = match read_rulespec(working_dir) {
+        Ok(Some(rs)) => rs,
+        Ok(None) => {
+            eprintln!("\nℹ️  No analysis/rulespec.yaml found - skipping datalog verification");
+            return "\nℹ️  No rulespec found — skipping invariant verification.\n".to_string();
+        }
+        Err(e) => {
+            eprintln!("\n⚠️  Failed to read analysis/rulespec.yaml: {}", e);
+            return format!("\n⚠️  Failed to read rulespec: {}\n", e);
+        }
+    };
+
+    // Compile rulespec on-the-fly
+    let compiled = match compile_rulespec(&rulespec, "envelope-verify", 0) {
+        Ok(c) => c,
+        Err(e) => {
+            eprintln!("\n⚠️  Failed to compile rulespec: {}", e);
+            return format!("\n⚠️  Failed to compile rulespec: {}\n", e);
+        }
+    };
+
+    if compiled.is_empty() {
+        eprintln!("\nℹ️  Rulespec has no predicates - skipping datalog verification");
+        return "\nℹ️  Rulespec has no predicates — skipping invariant verification.\n".to_string();
+    }
+
+    // Load envelope
+    let envelope = match read_envelope(session_id) {
+        Ok(Some(e)) => e,
+        Ok(None) => {
+            eprintln!("\n⚠️  No envelope found - skipping datalog verification");
+            return "\n⚠️  No envelope found — skipping invariant verification.\n".to_string();
+        }
+        Err(e) => {
+            eprintln!("\n⚠️  Failed to load envelope: {}", e);
+            return format!("\n⚠️  Failed to load envelope: {}\n", e);
+        }
+    };
+
+    // Extract facts from envelope
+    let facts = extract_facts(&envelope, &compiled);
+
+    // Execute datalog rules
+    let result = execute_rules(&compiled, &facts);
+
+    // Format results
+    let output = format_datalog_results(&result);
+
+    let session_dir = get_session_logs_dir(session_id);
+
+    // Write compiled rules to .dl file
+    let dl_path = session_dir.join("rulespec.compiled.dl");
+    let compiled_yaml = serde_yaml::to_string(&compiled).unwrap_or_default();
+    if let Err(e) = std::fs::write(&dl_path, &compiled_yaml) {
+        eprintln!("⚠️  Failed to write compiled rules: {}", e);
+    }
+
+    // Write evaluation report
+    let eval_path = session_dir.join("datalog_evaluation.txt");
+    match std::fs::write(&eval_path, &output) {
+        Ok(_) => {
+            eprintln!("📊 Compiled rules: {}", dl_path.display());
+            eprintln!("📊 Evaluation report: {}", eval_path.display());
+        }
+        Err(e) => {
+            eprintln!("⚠️  Failed to write datalog evaluation: {}", e);
+        }
+    }
+
+    // Return a summary for the tool output
+    let summary = if result.failed_count == 0 {
+        format!(
+            "\n✅ Invariant verification: {}/{} passed\n",
+            result.passed_count,
+            result.passed_count + result.failed_count,
+        )
+    } else {
+        format!(
+            "\n⚠️  Invariant verification: {}/{} passed, {} failed\n",
+            result.passed_count,
+            result.passed_count + result.failed_count,
+            result.failed_count,
+        )
+    };
+
+    summary
+}
--- a/crates/g3-core/src/tools/mod.rs
+++ b/crates/g3-core/src/tools/mod.rs
@@ -13,6 +13,7 @@
 pub mod executor;
 pub mod acd;
 pub mod datalog;
+pub mod envelope;
 pub mod file_ops;
 pub mod invariants;
 pub mod memory;
--- a/crates/g3-core/src/tools/plan.rs
+++ b/crates/g3-core/src/tools/plan.rs
@@ -12,6 +12,7 @@ use anyhow::{anyhow, Result};
 use serde::{Deserialize, Serialize};
 use std::fmt;
 use std::path::PathBuf;
+use std::path::Path;
 use tracing::debug;

 use crate::paths::{ensure_session_dir, get_session_logs_dir};
@@ -20,10 +21,7 @@ use crate::ToolCall;

 use super::executor::ToolContext;

-use std::path::Path;
-use super::invariants::{format_envelope_markdown, get_envelope_path, read_envelope, read_rulespec};
-use super::datalog::{compile_rulespec, format_datalog_results};
-use super::datalog::{extract_facts, execute_rules};
+use super::invariants::{format_envelope_markdown, get_envelope_path, read_envelope};

 // ============================================================================
 // Plan Schema
@@ -711,85 +709,11 @@ pub fn plan_verify(plan: &Plan, working_dir: Option<&str>) -> PlanVerification {
    }
 }

-/// Shadow datalog verification - runs datalog rules and writes to evaluation file.
-/// This is for dry-run/shadow testing - results are written to
-/// `.g3/sessions/<id>/datalog_evaluation.txt`, NOT injected into context window.
-fn shadow_datalog_verify(session_id: &str, working_dir: &Path) {
-    // Read rulespec from analysis/rulespec.yaml
-    let rulespec = match read_rulespec(working_dir) {
-        Ok(Some(rs)) => rs,
-        Ok(None) => {
-            eprintln!("\nℹ️  No analysis/rulespec.yaml found - skipping datalog verification");
-            return;
-        }
-        Err(e) => {
-            eprintln!("\n⚠️  Failed to read analysis/rulespec.yaml: {}", e);
-            return;
-        }
-    };
-
-    // Compile rulespec on-the-fly
-    let compiled = match compile_rulespec(&rulespec, "plan-verify", 0) {
-        Ok(c) => c,
-        Err(e) => {
-            eprintln!("\n⚠️  Failed to compile rulespec: {}", e);
-            return;
-        }
-    };
-
-    if compiled.is_empty() {
-        eprintln!("\nℹ️  Rulespec has no predicates - skipping datalog verification");
-        return;
-    }
-
-    // Load envelope
-    let envelope = match read_envelope(session_id) {
-        Ok(Some(e)) => e,
-        Ok(None) => {
-            eprintln!("\n⚠️  No envelope found - skipping datalog verification");
-            return;
-        }
-        Err(e) => {
-            eprintln!("\n⚠️  Failed to load envelope: {}", e);
-            return;
-        }
-    };
-
-    // Extract facts from envelope
-    let facts = extract_facts(&envelope, &compiled);
-
-    // Execute datalog rules
-    let result = execute_rules(&compiled, &facts);
-
-    // Format results
-    let output = format_datalog_results(&result);
-
-    let session_dir = get_session_logs_dir(session_id);
-
-    // Write compiled rules to .dl file
-    let dl_path = session_dir.join("rulespec.compiled.dl");
-    let compiled_yaml = serde_yaml::to_string(&compiled).unwrap_or_default();
-    if let Err(e) = std::fs::write(&dl_path, &compiled_yaml) {
-        eprintln!("⚠️  Failed to write compiled rules: {}", e);
-    }
-
-    // Write evaluation report
-    let eval_path = session_dir.join("datalog_evaluation.txt");
-    match std::fs::write(&eval_path, &output) {
-        Ok(_) => {
-            eprintln!("📊 Compiled rules: {}", dl_path.display());
-            eprintln!("📊 Evaluation report: {}", eval_path.display());
-        }
-        Err(e) => {
-            eprintln!("⚠️  Failed to write datalog evaluation: {}", e);
-        }
-    }
-}

 /// Format verification results as a string for display.
 /// Uses loud formatting for warnings and errors.
-/// If session_id is provided, also prints envelope file location and runs datalog verification.
-pub fn format_verification_results(verification: &PlanVerification, session_id: Option<&str>, working_dir: Option<&Path>) -> String {
+/// If session_id is provided, also checks that envelope.yaml exists at the expected path.
+pub fn format_verification_results(verification: &PlanVerification, session_id: Option<&str>, _working_dir: Option<&Path>) -> String {
    let mut output = String::new();
    let (warnings, errors) = verification.count_issues();
    
@@ -841,11 +765,6 @@ pub fn format_verification_results(verification: &PlanVerification, session_id:

        output.push_str("\n");

-        // Shadow datalog verification - print to stderr, NOT included in tool output
-        let effective_wd = working_dir
-            .map(|p| p.to_path_buf())
-            .unwrap_or_else(|| std::env::current_dir().unwrap_or_default());
-        shadow_datalog_verify(sid, &effective_wd);
    }
    
    output.push_str(&"═".repeat(60));