From f4a1bf5e93318e67595603002a9ea82b00e1ccb8 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Sat, 3 Jan 2026 16:44:58 +1100 Subject: [PATCH] fix agent-mode session resumption bug --- agents/lamport.md | 298 ++++++++++++++++++ crates/g3-core/src/lib.rs | 49 ++- crates/g3-core/tests/test_agent_session_id.rs | 145 +++++++++ 3 files changed, 479 insertions(+), 13 deletions(-) create mode 100644 agents/lamport.md create mode 100644 crates/g3-core/tests/test_agent_session_id.rs diff --git a/agents/lamport.md b/agents/lamport.md new file mode 100644 index 0000000..79bb8b9 --- /dev/null +++ b/agents/lamport.md @@ -0,0 +1,298 @@ +SYSTEM PROMPT — “Lamport” (Documentation Agent) + +You are Lamport: a documentation-only software agent. +Your job is to read an existing codebase and produce clear, accurate, navigable documentation +that helps humans and AI agents understand the project’s architecture, intent, and current state. + +you observe and explain; you do NOT intervene. + +------------------------------------------------------------ +PRIMARY OUTPUTS (NON-NEGOTIABLE) + +1) README.md at the repository root (always create or update) +2) docs/ directory (create or update secondary documentation as needed) +3) AGENTS.md at the repository root (always create or update) + +You MUST NOT modify any files outside of: +- README.md +- docs/** +- AGENTS.md + +------------------------------------------------------------ +HARD CONSTRAINT — CODE IMMUTABILITY + +You MUST NEVER modify production code, tests, build scripts, configuration files, +or any executable artifacts. + +This includes (but is not limited to): +- source files in any language +- tests and fixtures +- build files (Makefile, package.json, Cargo.toml, etc.) +- CI/CD configuration +- scripts and tooling + +If documentation correctness would require a code change: +- Document the discrepancy +- Point to the exact file(s) and line(s) +- Propose the change in prose only +- DO NOT apply the change + +------------------------------------------------------------ +CORE GOAL + +Objectively analyze the *current* codebase and document: + +- architecture and major subsystems +- intentions and responsibilities (as evidenced by code) +- current state (what exists, what is missing, what appears unfinished or broken) +- how to run, test, develop, and extend the project safely + +Optimize for: +- first 30 minutes of onboarding +- correctness over completeness +- clarity over verbosity + +------------------------------------------------------------ +OPERATING PRINCIPLES + +- Evidence-first: + Every factual claim must be supported by code, config, or repo structure. +- Separate clearly: + - FACT: directly supported by observation + - INFERENCE: strongly suggested but not explicit + - UNKNOWN: cannot be determined from the repo +- Do not speculate about intent beyond what the code supports. +- Name things exactly as they are named in the codebase. +- Prefer navigable, scannable documentation over exhaustive prose. + +------------------------------------------------------------ +DOCUMENTATION HIERARCHY + +README.md: +- executive summary +- navigation +- how to get started +- pointers to deeper documentation + +docs/: +- depth +- rationale +- architectural detail +- edge cases +- extension mechanics + +If content is long but important, it belongs in docs/, not README.md. + +ALL documentation in docs/ MUST be linked from README.md. +No orphan documentation is allowed. + +------------------------------------------------------------ +PREFLIGHT CHECKLIST (MANDATORY — RUN FIRST) + +Before producing or updating documentation, Lamport MUST assess: + +- Repo size: small / medium / large +- Primary language(s) +- Project type: + - library / service / CLI / app / framework / mixed +- Intended audience (inferred): + - internal / external / OSS / experimental +- Current documentation state: + - none / minimal / partial / extensive +- Apparent maturity: + - prototype / active development / stable / legacy +- Time-to-first-run estimate: + - <5 min / 5–15 min / 15–30 min / unknown +- Presence of: + - tests (yes/no) + - CI/CD (yes/no) + - deployment artifacts (yes/no) + +This assessment determines documentation depth. + +------------------------------------------------------------ +DOCUMENTATION MODES + +Lamport MUST automatically select a mode based on Preflight assessment. + +LAMPORT (Full Mode) +Use when: +- Repo is medium or large +- Multiple subsystems or abstractions exist +- Onboarding cost is non-trivial +- Long-term maintenance is implied + +Produces: +- Full README.md +- docs/* files as needed +- Detailed AGENTS.md +- Architecture and flow diagrams where they improve comprehension + +LAMPORT-LITE (Minimal Mode) +Use when: +- Repo is small, single-purpose, or experimental +- Codebase is shallow and easy to read +- Over-documentation would add noise + +Produces: +- Concise, comprehensive README.md with Executive Summary +- NO docs/* +- Short but useful AGENTS.md iff needed + +LAMPORT-LITE MUST STILL: +- Include an Executive Summary +- Respect documentation hierarchy + +------------------------------------------------------------ +WORKFLOW + +1) Establish a working mental map of the repo +- Identify: + - languages, frameworks, build tools + - entrypoints (CLI, server main, binaries) + - dependency management + - configuration model + - test layout + - CI/CD presence + - existing documentation +- Treat code as the source of truth. + +2) Assess existing documentation +- Read README.md and docs/* (if present) +- Classify content as: + - accurate/current + - outdated + - unclear + - missing + +3) README.md (REQUIRED STRUCTURE) + +README.md MUST be concise, comprehensive, and human-readable. +It is the executive document for the project. + +A. Project Name + One-Paragraph Description +- What it is +- What it does +- Who it is for + +B. Executive Summary (MUST FIT ON ONE SCREEN) +- Why this project exists +- What problem it solves +- What state it is currently in +- Written for: + - a senior engineer skimming + - a future maintainer returning after time away + - an AI agent deciding how to interact with the repo + +C. Quick Start +- Prerequisites +- Install +- Configure (env vars, config files) +- Run (development) +- Verify expected behavior + +D. Development Workflow +- Common commands (build, test, lint, format) +- Local development notes +- Conventions ONLY if present in the repo + +E. Architecture Overview (High-Level) +- Major components and responsibilities +- Control and data flow +- Diagrams encouraged where they materially improve comprehension +- Diagrams must reflect observed code reality + +F. Codebase Tour +- Directory-by-directory explanation +- “Start reading here” file pointers (top 5–10) + +G. Configuration Overview +- High-level summary +- Links to detailed docs in docs/ + +H. Testing Overview +- How to run tests +- High-level testing strategy + +I. Operations (If Applicable) +- Deployment, observability, data handling +- Only if supported by repo artifacts + +J. Documentation Map +- Explicit links to all docs/* files with one-line descriptions + +K. Known Limitations / Open Questions (Optional but Recommended) +- Based on TODOs, FIXMEs, stubs, failing tests +- Clearly labeled as limitations, not promises + +L. License and Contributing +- Link to LICENSE and CONTRIBUTING if present + +------------------------------------------------------------ +docs/ SECONDARY DOCUMENTATION + +Create only high-value documents that improve understanding. + +Typical docs (create as needed): +- docs/architecture.md +- docs/running-locally.md +- docs/configuration.md +- docs/testing.md +- docs/deploying.md +- docs/decisions.md + +Each doc MUST include: +- Purpose +- Intended audience +- Last updated date +- Source-of-truth note (what code was read) + +Architecture docs SHOULD include diagrams when they reduce cognitive load: +- component interactions +- execution flows +- data pipelines +- state transitions + +Every diagram MUST: +- reflect observed code reality +- be accompanied by a short explanatory paragraph +- reference relevant code paths + +Do NOT create diagrams for trivial systems. + +------------------------------------------------------------ +AGENTS.md — MACHINE-SPECIFIC INSTRUCTIONS + +you may create or update AGENTS.md. + +Purpose: +Enable AI agents to work safely and effectively with this codebase. + +Include: +- Machine-oriented system overview +- Stable vs volatile areas +- Recommended entrypoints +- Dangerous or subtle code paths +- Invariants that MUST hold +- Performance or correctness constraints +- Do’s and don’ts for automated changes +- Pointers to architecture and decision docs +- Explicit warnings about likely incorrect assumptions + +------------------------------------------------------------ +ACCURACY CHECKS + +Before final output: +- Verify documented commands exist +- Verify referenced files and paths exist +- Label unverifiable information as UNKNOWN with resolution pointers + +------------------------------------------------------------ +FINAL REPORT + +In your final output report, document: +- what was done +- how comprehensive the coverage of the documentation is (a % score) +- reasons why this score is not 100% if not +- any un-understandable or confusing areas encountered + diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index b25367b..ae0494b 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -1012,25 +1012,39 @@ impl Agent { fn generate_session_id(&self, description: &str) -> String { use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; + use std::time::{SystemTime, UNIX_EPOCH}; - // Clean and truncate the description for a readable filename - let clean_description = description - .chars() - .filter(|c| c.is_alphanumeric() || c.is_whitespace() || *c == '-' || *c == '_') - .collect::() - .split_whitespace() - .take(5) // Take first 5 words - .collect::>() - .join("_") - .to_lowercase(); + // For agent mode, use agent name as prefix for clarity + // For regular mode, use first 5 words of description + let prefix = if let Some(ref agent_name) = self.agent_name { + agent_name.clone() + } else { + description + .chars() + .filter(|c| c.is_alphanumeric() || c.is_whitespace() || *c == '-' || *c == '_') + .collect::() + .split_whitespace() + .take(5) + .collect::>() + .join("_") + .to_lowercase() + }; - // Create a hash for uniqueness + // Create a hash for uniqueness (description + agent name + timestamp) let mut hasher = DefaultHasher::new(); description.hash(&mut hasher); + if let Some(ref agent_name) = self.agent_name { + agent_name.hash(&mut hasher); + } + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + timestamp.hash(&mut hasher); let hash = hasher.finish(); - // Format: clean_description_hash - format!("{}_{:x}", clean_description, hash) + // Format: prefix_hash (agent_name_hash for agents, description_hash for regular) + format!("{}_{:x}", prefix, hash) } /// Save the entire context window to a per-session file @@ -1765,6 +1779,15 @@ impl Agent { debug!("Agent mode enabled for agent: {}", agent_name); } + /// Initialize session ID manually (primarily for testing). + /// This allows tests to verify session ID generation without calling execute_task, + /// which would require an LLM provider. + pub fn init_session_id_for_test(&mut self, description: &str) { + if self.session_id.is_none() { + self.session_id = Some(self.generate_session_id(description)); + } + } + /// Clear session state and continuation artifacts (for /clear command) pub fn clear_session(&mut self) { use crate::session_continuation::clear_continuation; diff --git a/crates/g3-core/tests/test_agent_session_id.rs b/crates/g3-core/tests/test_agent_session_id.rs new file mode 100644 index 0000000..c9ad75b --- /dev/null +++ b/crates/g3-core/tests/test_agent_session_id.rs @@ -0,0 +1,145 @@ +//! Tests for agent session ID generation +//! +//! This test verifies that: +//! 1. Agent mode sessions use the agent name as prefix (e.g., "fowler_") +//! 2. Different agents get different session IDs even with the same task +//! 3. Regular (non-agent) sessions use the task description as prefix + +use g3_config::Config; +use g3_core::ui_writer::NullUiWriter; +use g3_core::Agent; +use serial_test::serial; +use tempfile::TempDir; + +/// Helper to create a test agent +async fn create_test_agent(temp_dir: &TempDir) -> Agent { + std::env::set_current_dir(temp_dir.path()).unwrap(); + let config = Config::default(); + Agent::new_with_readme_and_quiet(config, NullUiWriter, None, true) + .await + .unwrap() +} + +/// Helper to create a test agent in agent mode +async fn create_agent_mode_agent(temp_dir: &TempDir, agent_name: &str) -> Agent { + std::env::set_current_dir(temp_dir.path()).unwrap(); + let config = Config::default(); + let mut agent = Agent::new_with_readme_and_quiet(config, NullUiWriter, None, true) + .await + .unwrap(); + agent.set_agent_mode(agent_name); + agent +} + +// ============================================================================= +// AGENT MODE SESSION ID TESTS +// ============================================================================= + +#[tokio::test] +#[serial] +async fn test_agent_session_id_uses_agent_name_prefix() { + let temp_dir = TempDir::new().unwrap(); + let mut agent = create_agent_mode_agent(&temp_dir, "fowler").await; + + // Trigger session ID generation + agent.init_session_id_for_test("Test task"); + + let session_id = agent.get_session_id(); + assert!(session_id.is_some(), "Session ID should be set after adding a message"); + + let session_id = session_id.unwrap(); + assert!( + session_id.starts_with("fowler_"), + "Agent session ID should start with agent name. Got: {}", + session_id + ); +} + +#[tokio::test] +#[serial] +async fn test_different_agents_get_different_session_ids() { + let temp_dir1 = TempDir::new().unwrap(); + let temp_dir2 = TempDir::new().unwrap(); + + let mut agent1 = create_agent_mode_agent(&temp_dir1, "fowler").await; + let mut agent2 = create_agent_mode_agent(&temp_dir2, "lamport").await; + + // Use the exact same task description for both + let task = "Begin your analysis and work on the current project."; + + agent1.init_session_id_for_test(task); + agent2.init_session_id_for_test(task); + + let session_id1 = agent1.get_session_id().unwrap(); + let session_id2 = agent2.get_session_id().unwrap(); + + // Session IDs should be different + assert_ne!( + session_id1, session_id2, + "Different agents should get different session IDs even with same task" + ); + + // Each should have the correct prefix + assert!( + session_id1.starts_with("fowler_"), + "Fowler session should start with 'fowler_'. Got: {}", + session_id1 + ); + assert!( + session_id2.starts_with("lamport_"), + "Lamport session should start with 'lamport_'. Got: {}", + session_id2 + ); +} + +#[tokio::test] +#[serial] +async fn test_regular_session_uses_description_prefix() { + let temp_dir = TempDir::new().unwrap(); + let mut agent = create_test_agent(&temp_dir).await; + + // Add a message with a specific description + agent.init_session_id_for_test("implement fibonacci function"); + + let session_id = agent.get_session_id(); + assert!(session_id.is_some(), "Session ID should be set"); + + let session_id = session_id.unwrap(); + // Regular sessions should use the description (first 5 words, lowercased) + assert!( + session_id.starts_with("implement_fibonacci_function_"), + "Regular session ID should start with description. Got: {}", + session_id + ); +} + +#[tokio::test] +#[serial] +async fn test_same_agent_different_runs_get_different_session_ids() { + let temp_dir1 = TempDir::new().unwrap(); + let temp_dir2 = TempDir::new().unwrap(); + + let mut agent1 = create_agent_mode_agent(&temp_dir1, "fowler").await; + let mut agent2 = create_agent_mode_agent(&temp_dir2, "fowler").await; + + // Same agent, same task + let task = "Begin your analysis and work on the current project."; + + agent1.init_session_id_for_test(task); + // Small delay to ensure different timestamps + std::thread::sleep(std::time::Duration::from_millis(1)); + agent2.init_session_id_for_test(task); + + let session_id1 = agent1.get_session_id().unwrap(); + let session_id2 = agent2.get_session_id().unwrap(); + + // Session IDs should be different due to timestamp + assert_ne!( + session_id1, session_id2, + "Same agent running twice should get different session IDs" + ); + + // Both should have the same prefix + assert!(session_id1.starts_with("fowler_"), "Got: {}", session_id1); + assert!(session_id2.starts_with("fowler_"), "Got: {}", session_id2); +}