feat: Add invariants system for Plan Mode verification

Adds rulespec.yaml and envelope.yaml support for machine-readable invariant checking during plan completion. - Add invariants module with Rulespec, ActionEnvelope, and evaluation logic - Add Invariants section to system prompt with workflow instructions - Show rulespec/envelope file status in plan verification output - Rulespec written during planning (captures constraints from task) - Envelope written after implementation (documents what was built)
2026-02-04 20:47:51 +11:00
parent 95d9847354
commit 3046f0dd6e
6 changed files with 1397 additions and 6 deletions
--- a/crates/g3-core/src/prompts.rs
+++ b/crates/g3-core/src/prompts.rs
@@ -95,6 +95,71 @@ When done, add evidence and notes:
  notes: \"Extended existing parser instead of creating duplicate\"
 ```

+## Invariants
+
+For plans with 3+ items, you MUST extract invariants from the task and write them as a **rulespec**.
+
+### What are Invariants?
+
+Invariants are constraints that MUST or MUST NOT hold. Extract them from:
+- **task_prompt**: What the user explicitly requires (\"must support TSV\", \"must not break existing API\")
+- **memory**: Persistent rules from AGENTS.md or workspace memory (\"must be Send + Sync\", \"must not block async runtime\")
+
+### Rulespec Structure
+
+Write invariants as a `rulespec.yaml` file with claims and predicates:
+
+```yaml
+claims:
+  - name: csv_capabilities
+    selector: \"csv_importer.capabilities\"
+  - name: api_changes
+    selector: \"breaking_changes\"
+
+predicates:
+  - claim: csv_capabilities
+    rule: contains
+    value: \"handle_tsv\"
+    source: task_prompt
+    notes: \"User explicitly requested TSV support in addition to CSV\"
+  - claim: api_changes
+    rule: not_exists
+    source: memory
+    notes: \"AGENTS.md requires backward compatibility\"
+```
+
+### Predicate Rules
+
+- `contains`: Array contains value, or string contains substring
+- `equals`: Exact match
+- `exists`: Value is present
+- `not_exists`: Value is absent
+- `min_length` / `max_length`: Array size constraints
+- `greater_than` / `less_than`: Numeric comparisons
+- `matches`: Regex pattern match
+
+### Action Envelope
+
+As the FINAL step, write an `envelope.yaml` with facts about completed work:
+
+```yaml
+facts:
+  csv_importer:
+    capabilities: [handle_headers, handle_tsv, handle_quoted]
+    file: \"src/import/csv.rs\"
+    tests: [\"test_tsv_import\", \"test_header_detection\"]
+  breaking_changes: null  # Explicitly absent
+```
+
+### Workflow
+
+1. While drafting the plan, write `rulespec.yaml` with claims and predicates extracted from the task
+2. Implement all plan items
+3. After all work is complete, write `envelope.yaml` with facts about the completed work
+4. **THEN** call `plan_write` to mark the final item done - verification will check that both files exist
+
+**IMPORTANT**: Write envelope.yaml AFTER completing all implementation work, but BEFORE the final `plan_write` call. The verification step checks for these files when the plan completes.
+
 ## Benefits

 ✓ Prevents missed steps
--- a/crates/g3-core/src/streaming.rs
+++ b/crates/g3-core/src/streaming.rs
@@ -63,7 +63,7 @@ pub fn format_tool_result_summary(
    tool_result: &str,
    tool_success: bool,
 ) -> ToolOutputFormat {
-    let is_todo_tool = tool_name == "todo_read" || tool_name == "todo_write";
+    let is_self_handled = is_self_handled_tool(tool_name);
    let is_compact_tool = matches!(
        tool_name,
        "read_file"
@@ -77,7 +77,7 @@ pub fn format_tool_result_summary(
            | "plan_approve"
    );

-    if is_todo_tool {
+    if is_self_handled {
        ToolOutputFormat::SelfHandled
    } else if is_compact_tool {
        if !tool_success {
--- a/crates/g3-core/src/tools/invariants.rs
+++ b/crates/g3-core/src/tools/invariants.rs
--- a/crates/g3-core/src/tools/mod.rs
+++ b/crates/g3-core/src/tools/mod.rs
@@ -14,6 +14,7 @@
 pub mod executor;
 pub mod acd;
 pub mod file_ops;
+pub mod invariants;
 pub mod memory;
 pub mod misc;
 pub mod plan;
--- a/crates/g3-core/src/tools/plan.rs
+++ b/crates/g3-core/src/tools/plan.rs
@@ -20,6 +20,8 @@ use crate::ToolCall;

 use super::executor::ToolContext;

+use super::invariants::{get_envelope_path, get_rulespec_path};
+
 // ============================================================================
 // Plan Schema
 // ============================================================================
@@ -696,7 +698,8 @@ pub fn plan_verify(plan: &Plan, working_dir: Option<&str>) -> PlanVerification {

 /// Format verification results as a string for display.
 /// Uses loud formatting for warnings and errors.
-pub fn format_verification_results(verification: &PlanVerification) -> String {
+/// If session_id is provided, also prints rulespec and envelope file locations.
+pub fn format_verification_results(verification: &PlanVerification, session_id: Option<&str>) -> String {
    let mut output = String::new();
    let (warnings, errors) = verification.count_issues();
    
@@ -736,6 +739,24 @@ pub fn format_verification_results(verification: &PlanVerification) -> String {
    } else {
        output.push_str("✅ VERIFICATION COMPLETE: All evidence validated\n");
    }
+    
+    // Print rulespec and envelope locations if session_id provided
+    if let Some(sid) = session_id {
+        output.push_str("\n");
+        output.push_str("📜 INVARIANTS\n");
+        
+        let rulespec_path = get_rulespec_path(sid);
+        let envelope_path = get_envelope_path(sid);
+        
+        let rulespec_status = if rulespec_path.exists() { "✅" } else { "⚠️  (not found)" };
+        let envelope_status = if envelope_path.exists() { "✅" } else { "⚠️  (not found)" };
+        
+        output.push_str(&format!("  {} Rulespec: {}\n", rulespec_status, rulespec_path.display()));
+        output.push_str(&format!("  {} Envelope: {}\n", envelope_status, envelope_path.display()));
+        
+        output.push_str("\n");
+    }
+    
    output.push_str(&"═".repeat(60));
    output.push_str("\n");
    
@@ -848,7 +869,7 @@ pub async fn execute_plan_write<W: UiWriter>(
    // Check if plan is now complete and trigger verification
    if plan.is_complete() && plan.is_approved() {
        let verification = plan_verify(&plan, ctx.working_dir);
-        let verification_output = format_verification_results(&verification);
+        let verification_output = format_verification_results(&verification, ctx.session_id);
        return Ok(format!("✅ Plan updated: {}\n{}", plan.status_summary(), verification_output));
    }