diff --git a/crates/g3-core/src/prompts.rs b/crates/g3-core/src/prompts.rs index 20d3ab3..c20dbf9 100644 --- a/crates/g3-core/src/prompts.rs +++ b/crates/g3-core/src/prompts.rs @@ -58,8 +58,10 @@ Short description for providers without native calling specs: - Example: {\"tool\": \"plan_read\", \"args\": {}} - **plan_write**: Create or update the Plan with YAML content - - Format: {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: my-plan\\nitems: [...]\"}} - - Example: {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: feature-x\\nitems:\\n - id: I1\\n description: Add feature\\n state: todo\\n touches: [src/lib.rs]\\n checks:\\n happy: {desc: Works, target: lib}\\n negative:\\n - {desc: Errors, target: lib}\\n boundary:\\n - {desc: Edge, target: lib}\"}} + - Format: {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: my-plan\\nitems: [...]\", \"rulespec\": \"claims: [...]\\npredicates: [...]\"}} + - For NEW plans, rulespec is REQUIRED. For updates, it's optional. + - Example (new plan): {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: feature-x\\nitems:\\n - id: I1\\n description: Add feature\\n state: todo\\n touches: [src/lib.rs]\\n checks:\\n happy: {desc: Works, target: lib}\\n negative:\\n - {desc: Errors, target: lib}\\n boundary:\\n - {desc: Edge, target: lib}\", \"rulespec\": \"claims:\\n - name: feature\\n selector: feature.done\\npredicates:\\n - claim: feature\\n rule: exists\\n source: task_prompt\"}} + - Example (update): {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: feature-x\\nitems:\\n - id: I1\\n state: done\\n evidence: [src/lib.rs:42]\\n notes: Implemented\"}} - **plan_approve**: Approve the current plan revision (called by user) - Format: {\"tool\": \"plan_approve\", \"args\": {}} diff --git a/crates/g3-core/src/tool_definitions.rs b/crates/g3-core/src/tool_definitions.rs index 092fcbd..1cd8e57 100644 --- a/crates/g3-core/src/tool_definitions.rs +++ b/crates/g3-core/src/tool_definitions.rs @@ -199,13 +199,17 @@ fn create_core_tools() -> Vec { tools.push(Tool { name: "plan_write".to_string(), - description: "Create or update the Plan for this session. Provide plan as YAML with plan_id and items array. See system prompt for full schema (items need: id, description, state, touches, checks with happy/negative/boundary). Evidence and notes required when marking done.".to_string(), + description: "Create or update the Plan for this session. For NEW plans, you MUST provide both 'plan' and 'rulespec' arguments. The rulespec defines invariants (constraints that must/must not hold) extracted from the task and memory. For plan UPDATES, rulespec is optional.".to_string(), input_schema: json!({ "type": "object", "properties": { "plan": { "type": "string", "description": "The plan as YAML. Must include plan_id and items array." + }, + "rulespec": { + "type": "string", + "description": "The rulespec as YAML with claims and predicates. REQUIRED for new plans, optional for updates. Defines invariants from task_prompt and memory." } }, "required": ["plan"] diff --git a/crates/g3-core/src/tools/plan.rs b/crates/g3-core/src/tools/plan.rs index e57b5e6..53d9c89 100644 --- a/crates/g3-core/src/tools/plan.rs +++ b/crates/g3-core/src/tools/plan.rs @@ -20,7 +20,7 @@ use crate::ToolCall; use super::executor::ToolContext; -use super::invariants::{format_envelope_markdown, format_rulespec_markdown, get_envelope_path, get_rulespec_path, read_envelope, read_rulespec}; +use super::invariants::{format_envelope_markdown, format_rulespec_markdown, get_envelope_path, get_rulespec_path, read_envelope, read_rulespec, write_rulespec, Rulespec}; // ============================================================================ // Plan Schema @@ -845,14 +845,57 @@ pub async fn execute_plan_write( None => return Ok("❌ Missing 'plan' argument. Provide the plan as YAML.".to_string()), }; + // Get optional rulespec content from args + let rulespec_yaml = tool_call.args.get("rulespec").and_then(|v| v.as_str()); + // Parse the YAML let mut plan: Plan = match serde_yaml::from_str(plan_yaml) { Ok(p) => p, Err(e) => return Ok(format!("❌ Invalid plan YAML: {}", e)), }; - // Load existing plan to preserve approved_revision and increment revision - if let Some(existing) = read_plan(session_id)? { + // Load existing plan to check if this is a new plan or an update + let existing_plan = read_plan(session_id)?; + let is_new_plan = existing_plan.is_none(); + + // For NEW plans, rulespec is REQUIRED + // This prevents the tautology problem where invariants are written after implementation + if is_new_plan && rulespec_yaml.is_none() { + return Ok("❌ Missing 'rulespec' argument. New plans MUST include a rulespec with invariants.\n\n\ + The rulespec defines constraints that MUST or MUST NOT hold, extracted from:\n\ + - task_prompt: What the user explicitly requires\n\ + - memory: Persistent rules from workspace memory\n\n\ + Example rulespec:\n\ + ```yaml\n\ + claims:\n\ + - name: feature_capabilities\n\ + selector: \"feature.capabilities\"\n\ + predicates:\n\ + - claim: feature_capabilities\n\ + rule: contains\n\ + value: \"required_feature\"\n\ + source: task_prompt\n\ + notes: \"User explicitly requested this\"\n\ + ```".to_string()); + } + + // Parse and validate rulespec if provided + let rulespec: Option = if let Some(yaml) = rulespec_yaml { + match serde_yaml::from_str(yaml) { + Ok(r) => { + let rs: Rulespec = r; + if let Err(e) = rs.validate() { + return Ok(format!("❌ Invalid rulespec: {}", e)); + } + Some(rs) + } + Err(e) => return Ok(format!("❌ Invalid rulespec YAML: {}", e)), + } + } else { + None + }; + + if let Some(existing) = existing_plan { // Preserve approved_revision from existing plan plan.approved_revision = existing.approved_revision; // Increment revision @@ -888,17 +931,23 @@ pub async fn execute_plan_write( return Ok(format!("❌ Failed to write plan: {}", e)); } + // Write the rulespec if provided (atomically with plan) + if let Some(ref rs) = rulespec { + if let Err(e) = write_rulespec(session_id, rs) { + return Ok(format!("❌ Failed to write rulespec: {}", e)); + } + } + // Display the plan in compact format let plan_path = get_plan_path(session_id); let plan_path_str = plan_path.to_string_lossy().to_string(); let yaml = serde_yaml::to_string(&plan)?; ctx.ui_writer.print_plan_compact(Some(&yaml), Some(&plan_path_str), true); - // Read and format rulespec if it exists - let rulespec_section = match read_rulespec(session_id) { - Ok(Some(rulespec)) => format_rulespec_markdown(&rulespec), - Ok(None) => "\n_No rulespec generated._\n".to_string(), - Err(_) => "\n_No rulespec generated._\n".to_string(), + // Format rulespec section - use provided rulespec or read from disk + let rulespec_section = match rulespec.as_ref().or(read_rulespec(session_id).ok().flatten().as_ref()) { + Some(rs) => format_rulespec_markdown(rs), + None => "\n_No rulespec defined._\n".to_string(), }; // Read and format envelope if it exists diff --git a/crates/g3-core/tests/stream_completion_characterization_test.rs b/crates/g3-core/tests/stream_completion_characterization_test.rs index e9c4cf4..f98aa0b 100644 --- a/crates/g3-core/tests/stream_completion_characterization_test.rs +++ b/crates/g3-core/tests/stream_completion_characterization_test.rs @@ -621,6 +621,15 @@ items: boundary: - desc: Edge cases target: test::module"# + , + "rulespec": r#"claims: + - name: test_feature + selector: test.done +predicates: + - claim: test_feature + rule: exists + source: task_prompt + notes: Test invariant"# }), }; let write_result = agent.execute_tool(&write_call).await.unwrap(); diff --git a/crates/g3-core/tests/tool_execution_roundtrip_test.rs b/crates/g3-core/tests/tool_execution_roundtrip_test.rs index 2e93a56..45286ba 100644 --- a/crates/g3-core/tests/tool_execution_roundtrip_test.rs +++ b/crates/g3-core/tests/tool_execution_roundtrip_test.rs @@ -425,6 +425,14 @@ items: boundary: - desc: Edge target: test"# + , + "rulespec": r#"claims: + - name: test_feature + selector: test.done +predicates: + - claim: test_feature + rule: exists + source: task_prompt"# }), ); @@ -479,6 +487,14 @@ items: happy: {desc: Works, target: test} negative: [{desc: Errors, target: test}] boundary: [{desc: Edge, target: test}]"# + , + "rulespec": r#"claims: + - name: approval_test + selector: test.approved +predicates: + - claim: approval_test + rule: exists + source: task_prompt"# }), ); agent.execute_tool(&write_call).await.unwrap(); diff --git a/prompts/system/native.md b/prompts/system/native.md index e119f7f..8cce316 100644 --- a/prompts/system/native.md +++ b/prompts/system/native.md @@ -31,7 +31,7 @@ Plan Mode is a cognitive forcing system that prevents: ## Workflow -1. **Draft**: Call `plan_read` to check for existing plan, then `plan_write` to create/update +1. **Draft**: Call `plan_read` to check for existing plan, then `plan_write` with BOTH plan AND rulespec 2. **Approval**: Ask user to approve before starting work ("'approve', or edit plan?"). In non-interactive mode (autonomous/one-shot), plans auto-approve on write. 3. **Execute**: Implement items, updating plan with `plan_write` to mark progress 4. **Complete**: When all items are done/blocked, verification runs automatically @@ -56,46 +56,16 @@ When drafting a plan, you MUST: - Keep items ~7 by default - Commit to where the work will live (touches) - Provide all three checks (happy, negative, boundary) +- **Include rulespec with invariants** (required for new plans) When updating a plan: - Cannot remove items from an approved plan (mark as blocked instead) - Must provide evidence and notes when marking item as done +- Rulespec is optional for updates (already saved from initial creation) -## Example Plan Item +## Invariants (Rulespec) -```yaml -- id: I1 - description: "Add CSV import for comic book metadata" - state: todo - touches: ["src/import", "src/library"] - checks: - happy: - desc: "Valid CSV imports 3 comics" - target: "import::csv" - negative: - - desc: "Missing column errors with MissingColumn" - target: "import::csv" - - desc: "Malformed row errors with ParseError" - target: "import::csv" - boundary: - - desc: "Empty file yields empty import without error" - target: "import::csv" - - desc: "File with only headers yields empty import" - target: "import::csv" -``` - -When done, add evidence and notes: -```yaml - state: done - evidence: - - "src/import/csv.rs:42-118" - - "tests/import_csv.rs::test_valid_csv" - notes: "Extended existing parser instead of creating duplicate" -``` - -## Invariants - -For all plans, you MUST extract invariants from each task and write them as a **rulespec**. +For all NEW plans, you MUST extract invariants and provide them as the `rulespec` argument to `plan_write`. ### What are Invariants? @@ -105,8 +75,6 @@ Invariants are constraints that MUST or MUST NOT hold. Extract them from: ### Rulespec Structure -Write invariants as a `rulespec.yaml` file with claims and predicates: - ```yaml claims: - name: csv_capabilities @@ -136,9 +104,84 @@ predicates: - `greater_than` / `less_than`: Numeric comparisons - `matches`: Regex pattern match -### Action Envelope +## Example: Creating a New Plan -As the FINAL step, write an `envelope.yaml` with facts about completed work: +When creating a NEW plan, call `plan_write` with BOTH arguments: + +``` +plan_write( + plan: " + plan_id: csv-import-feature + items: + - id: I1 + description: Add CSV import for comic book metadata + state: todo + touches: [src/import, src/library] + checks: + happy: + desc: Valid CSV imports 3 comics + target: import::csv + negative: + - desc: Missing column errors with MissingColumn + target: import::csv + boundary: + - desc: Empty file yields empty import without error + target: import::csv + ", + rulespec: " + claims: + - name: csv_capabilities + selector: csv_importer.capabilities + - name: api_changes + selector: breaking_changes + predicates: + - claim: csv_capabilities + rule: contains + value: handle_tsv + source: task_prompt + notes: User explicitly requested TSV support + - claim: api_changes + rule: not_exists + source: memory + notes: AGENTS.md requires backward compatibility + " +) +``` + +## Example: Updating a Plan + +When UPDATING an existing plan (marking items done), only `plan` is required: + +``` +plan_write( + plan: " + plan_id: csv-import-feature + items: + - id: I1 + description: Add CSV import for comic book metadata + state: done + touches: [src/import, src/library] + checks: + happy: + desc: Valid CSV imports 3 comics + target: import::csv + negative: + - desc: Missing column errors with MissingColumn + target: import::csv + boundary: + - desc: Empty file yields empty import without error + target: import::csv + evidence: + - src/import/csv.rs:42-118 + - tests/import_csv.rs::test_valid_csv + notes: Extended existing parser instead of creating duplicate + " +) +``` + +## Action Envelope + +As the FINAL step before marking the last item done, write an `envelope.yaml` with facts about completed work: ```yaml facts: @@ -149,12 +192,7 @@ facts: breaking_changes: null # Explicitly absent ``` -### Workflow - -1. While drafting the plan, write `rulespec.yaml` with claims and predicates extracted from the task -2. Implement all plan items -3. After all work is complete, write `envelope.yaml` with facts about the completed work -4. **THEN** call `plan_write` to mark the final item done - verification will check both files +The envelope is verified against the rulespec when the plan completes. # Workspace Memory