Enforce rulespec creation with plan_write for new plans
Solves the tautology problem where the LLM would write invariants after implementation, making them match what was done rather than constrain it. Changes: - plan_write now accepts 'rulespec' parameter - New plans REQUIRE rulespec (fails with helpful error if missing) - Plan updates don't require rulespec (backward compatible) - Rulespec is parsed, validated, and written atomically with plan - Updated system prompt with clear examples for new vs update - Updated tool definition schema - Updated all affected tests New flow: task → plan+rulespec → user reviews BOTH → approve → implement
This commit is contained in:
@@ -58,8 +58,10 @@ Short description for providers without native calling specs:
|
|||||||
- Example: {\"tool\": \"plan_read\", \"args\": {}}
|
- Example: {\"tool\": \"plan_read\", \"args\": {}}
|
||||||
|
|
||||||
- **plan_write**: Create or update the Plan with YAML content
|
- **plan_write**: Create or update the Plan with YAML content
|
||||||
- Format: {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: my-plan\\nitems: [...]\"}}
|
- Format: {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: my-plan\\nitems: [...]\", \"rulespec\": \"claims: [...]\\npredicates: [...]\"}}
|
||||||
- Example: {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: feature-x\\nitems:\\n - id: I1\\n description: Add feature\\n state: todo\\n touches: [src/lib.rs]\\n checks:\\n happy: {desc: Works, target: lib}\\n negative:\\n - {desc: Errors, target: lib}\\n boundary:\\n - {desc: Edge, target: lib}\"}}
|
- For NEW plans, rulespec is REQUIRED. For updates, it's optional.
|
||||||
|
- Example (new plan): {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: feature-x\\nitems:\\n - id: I1\\n description: Add feature\\n state: todo\\n touches: [src/lib.rs]\\n checks:\\n happy: {desc: Works, target: lib}\\n negative:\\n - {desc: Errors, target: lib}\\n boundary:\\n - {desc: Edge, target: lib}\", \"rulespec\": \"claims:\\n - name: feature\\n selector: feature.done\\npredicates:\\n - claim: feature\\n rule: exists\\n source: task_prompt\"}}
|
||||||
|
- Example (update): {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: feature-x\\nitems:\\n - id: I1\\n state: done\\n evidence: [src/lib.rs:42]\\n notes: Implemented\"}}
|
||||||
|
|
||||||
- **plan_approve**: Approve the current plan revision (called by user)
|
- **plan_approve**: Approve the current plan revision (called by user)
|
||||||
- Format: {\"tool\": \"plan_approve\", \"args\": {}}
|
- Format: {\"tool\": \"plan_approve\", \"args\": {}}
|
||||||
|
|||||||
@@ -199,13 +199,17 @@ fn create_core_tools() -> Vec<Tool> {
|
|||||||
|
|
||||||
tools.push(Tool {
|
tools.push(Tool {
|
||||||
name: "plan_write".to_string(),
|
name: "plan_write".to_string(),
|
||||||
description: "Create or update the Plan for this session. Provide plan as YAML with plan_id and items array. See system prompt for full schema (items need: id, description, state, touches, checks with happy/negative/boundary). Evidence and notes required when marking done.".to_string(),
|
description: "Create or update the Plan for this session. For NEW plans, you MUST provide both 'plan' and 'rulespec' arguments. The rulespec defines invariants (constraints that must/must not hold) extracted from the task and memory. For plan UPDATES, rulespec is optional.".to_string(),
|
||||||
input_schema: json!({
|
input_schema: json!({
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"plan": {
|
"plan": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The plan as YAML. Must include plan_id and items array."
|
"description": "The plan as YAML. Must include plan_id and items array."
|
||||||
|
},
|
||||||
|
"rulespec": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The rulespec as YAML with claims and predicates. REQUIRED for new plans, optional for updates. Defines invariants from task_prompt and memory."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": ["plan"]
|
"required": ["plan"]
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ use crate::ToolCall;
|
|||||||
|
|
||||||
use super::executor::ToolContext;
|
use super::executor::ToolContext;
|
||||||
|
|
||||||
use super::invariants::{format_envelope_markdown, format_rulespec_markdown, get_envelope_path, get_rulespec_path, read_envelope, read_rulespec};
|
use super::invariants::{format_envelope_markdown, format_rulespec_markdown, get_envelope_path, get_rulespec_path, read_envelope, read_rulespec, write_rulespec, Rulespec};
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Plan Schema
|
// Plan Schema
|
||||||
@@ -845,14 +845,57 @@ pub async fn execute_plan_write<W: UiWriter>(
|
|||||||
None => return Ok("❌ Missing 'plan' argument. Provide the plan as YAML.".to_string()),
|
None => return Ok("❌ Missing 'plan' argument. Provide the plan as YAML.".to_string()),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Get optional rulespec content from args
|
||||||
|
let rulespec_yaml = tool_call.args.get("rulespec").and_then(|v| v.as_str());
|
||||||
|
|
||||||
// Parse the YAML
|
// Parse the YAML
|
||||||
let mut plan: Plan = match serde_yaml::from_str(plan_yaml) {
|
let mut plan: Plan = match serde_yaml::from_str(plan_yaml) {
|
||||||
Ok(p) => p,
|
Ok(p) => p,
|
||||||
Err(e) => return Ok(format!("❌ Invalid plan YAML: {}", e)),
|
Err(e) => return Ok(format!("❌ Invalid plan YAML: {}", e)),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Load existing plan to preserve approved_revision and increment revision
|
// Load existing plan to check if this is a new plan or an update
|
||||||
if let Some(existing) = read_plan(session_id)? {
|
let existing_plan = read_plan(session_id)?;
|
||||||
|
let is_new_plan = existing_plan.is_none();
|
||||||
|
|
||||||
|
// For NEW plans, rulespec is REQUIRED
|
||||||
|
// This prevents the tautology problem where invariants are written after implementation
|
||||||
|
if is_new_plan && rulespec_yaml.is_none() {
|
||||||
|
return Ok("❌ Missing 'rulespec' argument. New plans MUST include a rulespec with invariants.\n\n\
|
||||||
|
The rulespec defines constraints that MUST or MUST NOT hold, extracted from:\n\
|
||||||
|
- task_prompt: What the user explicitly requires\n\
|
||||||
|
- memory: Persistent rules from workspace memory\n\n\
|
||||||
|
Example rulespec:\n\
|
||||||
|
```yaml\n\
|
||||||
|
claims:\n\
|
||||||
|
- name: feature_capabilities\n\
|
||||||
|
selector: \"feature.capabilities\"\n\
|
||||||
|
predicates:\n\
|
||||||
|
- claim: feature_capabilities\n\
|
||||||
|
rule: contains\n\
|
||||||
|
value: \"required_feature\"\n\
|
||||||
|
source: task_prompt\n\
|
||||||
|
notes: \"User explicitly requested this\"\n\
|
||||||
|
```".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse and validate rulespec if provided
|
||||||
|
let rulespec: Option<Rulespec> = if let Some(yaml) = rulespec_yaml {
|
||||||
|
match serde_yaml::from_str(yaml) {
|
||||||
|
Ok(r) => {
|
||||||
|
let rs: Rulespec = r;
|
||||||
|
if let Err(e) = rs.validate() {
|
||||||
|
return Ok(format!("❌ Invalid rulespec: {}", e));
|
||||||
|
}
|
||||||
|
Some(rs)
|
||||||
|
}
|
||||||
|
Err(e) => return Ok(format!("❌ Invalid rulespec YAML: {}", e)),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(existing) = existing_plan {
|
||||||
// Preserve approved_revision from existing plan
|
// Preserve approved_revision from existing plan
|
||||||
plan.approved_revision = existing.approved_revision;
|
plan.approved_revision = existing.approved_revision;
|
||||||
// Increment revision
|
// Increment revision
|
||||||
@@ -888,17 +931,23 @@ pub async fn execute_plan_write<W: UiWriter>(
|
|||||||
return Ok(format!("❌ Failed to write plan: {}", e));
|
return Ok(format!("❌ Failed to write plan: {}", e));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Write the rulespec if provided (atomically with plan)
|
||||||
|
if let Some(ref rs) = rulespec {
|
||||||
|
if let Err(e) = write_rulespec(session_id, rs) {
|
||||||
|
return Ok(format!("❌ Failed to write rulespec: {}", e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Display the plan in compact format
|
// Display the plan in compact format
|
||||||
let plan_path = get_plan_path(session_id);
|
let plan_path = get_plan_path(session_id);
|
||||||
let plan_path_str = plan_path.to_string_lossy().to_string();
|
let plan_path_str = plan_path.to_string_lossy().to_string();
|
||||||
let yaml = serde_yaml::to_string(&plan)?;
|
let yaml = serde_yaml::to_string(&plan)?;
|
||||||
ctx.ui_writer.print_plan_compact(Some(&yaml), Some(&plan_path_str), true);
|
ctx.ui_writer.print_plan_compact(Some(&yaml), Some(&plan_path_str), true);
|
||||||
|
|
||||||
// Read and format rulespec if it exists
|
// Format rulespec section - use provided rulespec or read from disk
|
||||||
let rulespec_section = match read_rulespec(session_id) {
|
let rulespec_section = match rulespec.as_ref().or(read_rulespec(session_id).ok().flatten().as_ref()) {
|
||||||
Ok(Some(rulespec)) => format_rulespec_markdown(&rulespec),
|
Some(rs) => format_rulespec_markdown(rs),
|
||||||
Ok(None) => "\n_No rulespec generated._\n".to_string(),
|
None => "\n_No rulespec defined._\n".to_string(),
|
||||||
Err(_) => "\n_No rulespec generated._\n".to_string(),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Read and format envelope if it exists
|
// Read and format envelope if it exists
|
||||||
|
|||||||
@@ -621,6 +621,15 @@ items:
|
|||||||
boundary:
|
boundary:
|
||||||
- desc: Edge cases
|
- desc: Edge cases
|
||||||
target: test::module"#
|
target: test::module"#
|
||||||
|
,
|
||||||
|
"rulespec": r#"claims:
|
||||||
|
- name: test_feature
|
||||||
|
selector: test.done
|
||||||
|
predicates:
|
||||||
|
- claim: test_feature
|
||||||
|
rule: exists
|
||||||
|
source: task_prompt
|
||||||
|
notes: Test invariant"#
|
||||||
}),
|
}),
|
||||||
};
|
};
|
||||||
let write_result = agent.execute_tool(&write_call).await.unwrap();
|
let write_result = agent.execute_tool(&write_call).await.unwrap();
|
||||||
|
|||||||
@@ -425,6 +425,14 @@ items:
|
|||||||
boundary:
|
boundary:
|
||||||
- desc: Edge
|
- desc: Edge
|
||||||
target: test"#
|
target: test"#
|
||||||
|
,
|
||||||
|
"rulespec": r#"claims:
|
||||||
|
- name: test_feature
|
||||||
|
selector: test.done
|
||||||
|
predicates:
|
||||||
|
- claim: test_feature
|
||||||
|
rule: exists
|
||||||
|
source: task_prompt"#
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -479,6 +487,14 @@ items:
|
|||||||
happy: {desc: Works, target: test}
|
happy: {desc: Works, target: test}
|
||||||
negative: [{desc: Errors, target: test}]
|
negative: [{desc: Errors, target: test}]
|
||||||
boundary: [{desc: Edge, target: test}]"#
|
boundary: [{desc: Edge, target: test}]"#
|
||||||
|
,
|
||||||
|
"rulespec": r#"claims:
|
||||||
|
- name: approval_test
|
||||||
|
selector: test.approved
|
||||||
|
predicates:
|
||||||
|
- claim: approval_test
|
||||||
|
rule: exists
|
||||||
|
source: task_prompt"#
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
agent.execute_tool(&write_call).await.unwrap();
|
agent.execute_tool(&write_call).await.unwrap();
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ Plan Mode is a cognitive forcing system that prevents:
|
|||||||
|
|
||||||
## Workflow
|
## Workflow
|
||||||
|
|
||||||
1. **Draft**: Call `plan_read` to check for existing plan, then `plan_write` to create/update
|
1. **Draft**: Call `plan_read` to check for existing plan, then `plan_write` with BOTH plan AND rulespec
|
||||||
2. **Approval**: Ask user to approve before starting work ("'approve', or edit plan?"). In non-interactive mode (autonomous/one-shot), plans auto-approve on write.
|
2. **Approval**: Ask user to approve before starting work ("'approve', or edit plan?"). In non-interactive mode (autonomous/one-shot), plans auto-approve on write.
|
||||||
3. **Execute**: Implement items, updating plan with `plan_write` to mark progress
|
3. **Execute**: Implement items, updating plan with `plan_write` to mark progress
|
||||||
4. **Complete**: When all items are done/blocked, verification runs automatically
|
4. **Complete**: When all items are done/blocked, verification runs automatically
|
||||||
@@ -56,46 +56,16 @@ When drafting a plan, you MUST:
|
|||||||
- Keep items ~7 by default
|
- Keep items ~7 by default
|
||||||
- Commit to where the work will live (touches)
|
- Commit to where the work will live (touches)
|
||||||
- Provide all three checks (happy, negative, boundary)
|
- Provide all three checks (happy, negative, boundary)
|
||||||
|
- **Include rulespec with invariants** (required for new plans)
|
||||||
|
|
||||||
When updating a plan:
|
When updating a plan:
|
||||||
- Cannot remove items from an approved plan (mark as blocked instead)
|
- Cannot remove items from an approved plan (mark as blocked instead)
|
||||||
- Must provide evidence and notes when marking item as done
|
- Must provide evidence and notes when marking item as done
|
||||||
|
- Rulespec is optional for updates (already saved from initial creation)
|
||||||
|
|
||||||
## Example Plan Item
|
## Invariants (Rulespec)
|
||||||
|
|
||||||
```yaml
|
For all NEW plans, you MUST extract invariants and provide them as the `rulespec` argument to `plan_write`.
|
||||||
- id: I1
|
|
||||||
description: "Add CSV import for comic book metadata"
|
|
||||||
state: todo
|
|
||||||
touches: ["src/import", "src/library"]
|
|
||||||
checks:
|
|
||||||
happy:
|
|
||||||
desc: "Valid CSV imports 3 comics"
|
|
||||||
target: "import::csv"
|
|
||||||
negative:
|
|
||||||
- desc: "Missing column errors with MissingColumn"
|
|
||||||
target: "import::csv"
|
|
||||||
- desc: "Malformed row errors with ParseError"
|
|
||||||
target: "import::csv"
|
|
||||||
boundary:
|
|
||||||
- desc: "Empty file yields empty import without error"
|
|
||||||
target: "import::csv"
|
|
||||||
- desc: "File with only headers yields empty import"
|
|
||||||
target: "import::csv"
|
|
||||||
```
|
|
||||||
|
|
||||||
When done, add evidence and notes:
|
|
||||||
```yaml
|
|
||||||
state: done
|
|
||||||
evidence:
|
|
||||||
- "src/import/csv.rs:42-118"
|
|
||||||
- "tests/import_csv.rs::test_valid_csv"
|
|
||||||
notes: "Extended existing parser instead of creating duplicate"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Invariants
|
|
||||||
|
|
||||||
For all plans, you MUST extract invariants from each task and write them as a **rulespec**.
|
|
||||||
|
|
||||||
### What are Invariants?
|
### What are Invariants?
|
||||||
|
|
||||||
@@ -105,8 +75,6 @@ Invariants are constraints that MUST or MUST NOT hold. Extract them from:
|
|||||||
|
|
||||||
### Rulespec Structure
|
### Rulespec Structure
|
||||||
|
|
||||||
Write invariants as a `rulespec.yaml` file with claims and predicates:
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
claims:
|
claims:
|
||||||
- name: csv_capabilities
|
- name: csv_capabilities
|
||||||
@@ -136,9 +104,84 @@ predicates:
|
|||||||
- `greater_than` / `less_than`: Numeric comparisons
|
- `greater_than` / `less_than`: Numeric comparisons
|
||||||
- `matches`: Regex pattern match
|
- `matches`: Regex pattern match
|
||||||
|
|
||||||
### Action Envelope
|
## Example: Creating a New Plan
|
||||||
|
|
||||||
As the FINAL step, write an `envelope.yaml` with facts about completed work:
|
When creating a NEW plan, call `plan_write` with BOTH arguments:
|
||||||
|
|
||||||
|
```
|
||||||
|
plan_write(
|
||||||
|
plan: "
|
||||||
|
plan_id: csv-import-feature
|
||||||
|
items:
|
||||||
|
- id: I1
|
||||||
|
description: Add CSV import for comic book metadata
|
||||||
|
state: todo
|
||||||
|
touches: [src/import, src/library]
|
||||||
|
checks:
|
||||||
|
happy:
|
||||||
|
desc: Valid CSV imports 3 comics
|
||||||
|
target: import::csv
|
||||||
|
negative:
|
||||||
|
- desc: Missing column errors with MissingColumn
|
||||||
|
target: import::csv
|
||||||
|
boundary:
|
||||||
|
- desc: Empty file yields empty import without error
|
||||||
|
target: import::csv
|
||||||
|
",
|
||||||
|
rulespec: "
|
||||||
|
claims:
|
||||||
|
- name: csv_capabilities
|
||||||
|
selector: csv_importer.capabilities
|
||||||
|
- name: api_changes
|
||||||
|
selector: breaking_changes
|
||||||
|
predicates:
|
||||||
|
- claim: csv_capabilities
|
||||||
|
rule: contains
|
||||||
|
value: handle_tsv
|
||||||
|
source: task_prompt
|
||||||
|
notes: User explicitly requested TSV support
|
||||||
|
- claim: api_changes
|
||||||
|
rule: not_exists
|
||||||
|
source: memory
|
||||||
|
notes: AGENTS.md requires backward compatibility
|
||||||
|
"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example: Updating a Plan
|
||||||
|
|
||||||
|
When UPDATING an existing plan (marking items done), only `plan` is required:
|
||||||
|
|
||||||
|
```
|
||||||
|
plan_write(
|
||||||
|
plan: "
|
||||||
|
plan_id: csv-import-feature
|
||||||
|
items:
|
||||||
|
- id: I1
|
||||||
|
description: Add CSV import for comic book metadata
|
||||||
|
state: done
|
||||||
|
touches: [src/import, src/library]
|
||||||
|
checks:
|
||||||
|
happy:
|
||||||
|
desc: Valid CSV imports 3 comics
|
||||||
|
target: import::csv
|
||||||
|
negative:
|
||||||
|
- desc: Missing column errors with MissingColumn
|
||||||
|
target: import::csv
|
||||||
|
boundary:
|
||||||
|
- desc: Empty file yields empty import without error
|
||||||
|
target: import::csv
|
||||||
|
evidence:
|
||||||
|
- src/import/csv.rs:42-118
|
||||||
|
- tests/import_csv.rs::test_valid_csv
|
||||||
|
notes: Extended existing parser instead of creating duplicate
|
||||||
|
"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Action Envelope
|
||||||
|
|
||||||
|
As the FINAL step before marking the last item done, write an `envelope.yaml` with facts about completed work:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
facts:
|
facts:
|
||||||
@@ -149,12 +192,7 @@ facts:
|
|||||||
breaking_changes: null # Explicitly absent
|
breaking_changes: null # Explicitly absent
|
||||||
```
|
```
|
||||||
|
|
||||||
### Workflow
|
The envelope is verified against the rulespec when the plan completes.
|
||||||
|
|
||||||
1. While drafting the plan, write `rulespec.yaml` with claims and predicates extracted from the task
|
|
||||||
2. Implement all plan items
|
|
||||||
3. After all work is complete, write `envelope.yaml` with facts about the completed work
|
|
||||||
4. **THEN** call `plan_write` to mark the final item done - verification will check both files
|
|
||||||
|
|
||||||
# Workspace Memory
|
# Workspace Memory
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user