Compare commits

...

37 Commits

Author SHA1 Message Date
Jochen
9f6592efc2 remove redundant 'if' 2025-11-27 13:34:54 +11:00
Jochen
99125fc39e completely remove the skipping first player logic 2025-11-27 13:21:40 +11:00
Jochen
c58aa80932 explain what file was found in workspace 2025-11-26 21:43:59 +11:00
Jochen
c6c35bf2ca Merge pull request #31 from dhanji/jochen_fast_start
add code exploration fast start
2025-11-26 17:10:42 +11:00
Jochen
c9fde4ecef Merge pull request #32 from dhanji/jochen_reorder_system_prompt
minor change: reorder system prompt
2025-11-26 11:07:08 +11:00
Jochen
1e1702001c Add logging for discovery 2025-11-26 10:41:35 +11:00
Jochen
c419833ddf updated the prompt 2025-11-26 10:26:52 +11:00
Jochen
c19127f809 make sure user requirements are included 2025-11-26 10:26:52 +11:00
Jochen
bd29addefa reorder system prompt 2025-11-26 10:26:52 +11:00
Jochen
467e300ec2 reorder system prompt 2025-11-26 09:30:26 +11:00
Jochen
2e252cd298 added timer 2025-11-25 22:51:33 +11:00
Jochen
ad198a8501 add code exploration fast start
This tries to short-circuit multiple round-trips to llm for reading code.
It's a precursor to trying to context engineer tailored to specific tasks.
In initial experiments, it's only marginally faster than regular mode, and burns more tokens.
2025-11-25 22:51:32 +11:00
Jochen
f501751bdf Merge pull request #30 from dhanji/fix_tests
Fix tests & add code coverage tool
2025-11-25 10:18:18 +11:00
Jochen
a96a15d1fc add code coverage command 2025-11-21 14:38:58 +11:00
Jochen
24dc7ad642 fix build target 2025-11-21 14:07:31 +11:00
Jochen
a097c3abef first cut 2025-11-21 13:56:36 +11:00
Jochen
34e55050b3 Merge pull request #28 from dhanji/jochen_force_todo_check_at_start
check for stale TODO at startup of autonomous
2025-11-21 12:41:45 +11:00
Jochen
551a577ee1 changed user choice for TODO stale check
user can ignore, mark stale or quit.
2025-11-21 12:35:14 +11:00
Jochen
84718223bc remove minor comment 2025-11-21 12:26:41 +11:00
Jochen
28a83d2dcf check for stale TODOs
on by default, can be disabled
2025-11-21 12:09:01 +11:00
Jochen
0ce905dc74 Merge pull request #26 from dhanji/jochen_log_tool_calls__with_tool_logs
log tool calls, allow multiple calls (optional)
2025-11-21 11:07:23 +11:00
Jochen
9f0d5add1e remove redundant SYSTEM_NATIVE_TOOL_CALLS_MULTIPLE 2025-11-21 11:04:14 +11:00
Jochen
be6c6bfca4 fix ref to system prompt 2025-11-21 10:49:39 +11:00
Jochen
94a41c5c34 don't write warning to console 2025-11-21 10:49:27 +11:00
Jochen
09dbad2d68 allow multiple tool calls, log warnings if there are duplicate calls.
controlled via a flag to the agent config:
allow_multiple_tool_calls = true
2025-11-21 10:49:15 +11:00
Jochen
ffbf410b17 log tool calls 2025-11-21 10:49:02 +11:00
Jochen
c6f3f12b71 Merge pull request #27 from dhanji/jochen_tool_tail
useful shell command for tailing tool logs
2025-11-20 13:31:09 +11:00
Dhanji Prasanna
14c8d066c9 ensure system prompt is always added first 2025-11-20 08:45:03 +11:00
Jochen
e556f06b15 useful command for tailing tool logs 2025-11-19 21:02:42 +11:00
Jochen
b6e226df67 Merge pull request #23 from dhanji/jochen-add-code-instructions
system prompt now includes code style guide
2025-11-19 16:25:20 +11:00
Dhanji R. Prasanna
5b46922047 Merge pull request #25 from dhanji/fix_max_tokens
fix bad max_tokens and context_window logic
2025-11-19 15:55:34 +11:00
Jochen
1069664e16 fix bad max_tokens and context_window logic
for non-databricks code
2025-11-19 13:51:16 +11:00
Dhanji R. Prasanna
725f54b99b Merge pull request #24 from dhanji/jochen_cache_control
Add cache control for Anthropic (won't work via Databricks)
2025-11-19 13:39:09 +11:00
Dhanji R. Prasanna
325aab6b0e Merge pull request #22 from dhanji/micn/console-detection
patching console for detecting g3
2025-11-19 13:37:22 +11:00
Jochen
7f73b664a3 system prompt now includes code style guide 2025-11-18 18:21:16 +11:00
Michael Neale
8d8ddbe4b9 live reloading of detected things 2025-11-14 16:31:46 +11:00
Michael Neale
0466405d87 don't detect console, better process pickup 2025-11-13 18:46:55 +11:00
45 changed files with 3498 additions and 676 deletions

51
Cargo.lock generated
View File

@@ -576,6 +576,26 @@ dependencies = [
"tiny-keccak",
]
[[package]]
name = "const_format"
version = "0.2.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad"
dependencies = [
"const_format_proc_macros",
]
[[package]]
name = "const_format_proc_macros"
version = "0.2.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "convert_case"
version = "0.4.0"
@@ -1345,11 +1365,15 @@ dependencies = [
"dirs 5.0.1",
"g3-config",
"g3-core",
"g3-planner",
"g3-providers",
"hex",
"indicatif",
"ratatui",
"rustyline",
"serde",
"serde_json",
"sha2",
"termimad",
"tokio",
"tokio-util",
@@ -1389,6 +1413,7 @@ dependencies = [
"config",
"dirs 5.0.1",
"serde",
"serde_json",
"shellexpand",
"tempfile",
"thiserror 1.0.69",
@@ -1427,6 +1452,7 @@ dependencies = [
"anyhow",
"async-trait",
"chrono",
"const_format",
"futures-util",
"g3-computer-control",
"g3-config",
@@ -1475,6 +1501,19 @@ dependencies = [
"tracing",
]
[[package]]
name = "g3-planner"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"const_format",
"g3-providers",
"serde",
"serde_json",
"tokio",
]
[[package]]
name = "g3-providers"
version = "0.1.0"
@@ -1631,6 +1670,12 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "home"
version = "0.5.9"
@@ -4090,6 +4135,12 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
[[package]]
name = "unicode-xid"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
[[package]]
name = "unsafe-libyaml"
version = "0.2.11"

View File

@@ -2,6 +2,7 @@
members = [
"crates/g3-cli",
"crates/g3-core",
"crates/g3-planner",
"crates/g3-providers",
"crates/g3-config",
"crates/g3-execution",

View File

@@ -22,14 +22,16 @@ use_oauth = true
[providers.anthropic]
api_key = "your-anthropic-api-key"
model = "claude-3-haiku-20240307" # Using a faster model for player
model = "claude-sonnet-4-5"
max_tokens = 4096
temperature = 0.3 # Slightly higher temperature for more creative implementations
# cache_config = "ephemeral" # Optional: Enable prompt caching
# Options: "ephemeral", "5minute", "1hour"
# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
# enable_1m_context = true # optional, more expensive
[agent]
fallback_default_max_tokens = 8192
enable_streaming = true
timeout_seconds = 60
timeout_seconds = 60
allow_multiple_tool_calls = true # Enable multiple tool calls, will usually only work with Anthropic

View File

@@ -14,14 +14,16 @@ max_tokens = 4096 # Per-request output limit (how many tokens the model can gen
# Note: This is different from max_context_length (total conversation history size)
temperature = 0.1
use_oauth = true
# cache_config = "ephemeral" # Optional: Enable prompt caching for Claude models on Databricks
# Options: "ephemeral", "5minute", "1hour"
# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
# The cache control will be automatically applied to:
# - The system prompt at the start of each session
# - Assistant responses after every 10 tool calls
# - 5minute costs $3/mtok, more details below
# https://docs.claude.com/en/docs/build-with-claude/prompt-caching#pricing
[providers.anthropic]
api_key = "your-anthropic-api-key"
model = "claude-sonnet-4-5"
max_tokens = 4096
temperature = 0.3 # Slightly higher temperature for more creative implementations
# cache_config = "ephemeral" # Optional: Enable prompt caching
# Options: "ephemeral", "5minute", "1hour"
# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
# enable_1m_context = true # optional, more expensive
# Multiple OpenAI-compatible providers can be configured with custom names
@@ -55,6 +57,7 @@ timeout_seconds = 60
# Retry configuration for recoverable errors (timeouts, rate limits, etc.)
max_retry_attempts = 3 # Default mode retry attempts
autonomous_max_retry_attempts = 6 # Autonomous mode retry attempts (higher for long-running tasks)
allow_multiple_tool_calls = true # Enable multiple tool calls
[computer_control]
enabled = false # Set to true to enable computer control (requires OS permissions)

View File

@@ -7,6 +7,8 @@ description = "CLI interface for G3 AI coding agent"
[dependencies]
g3-core = { path = "../g3-core" }
g3-config = { path = "../g3-config" }
g3-planner = { path = "../g3-planner" }
g3-providers = { path = "../g3-providers" }
clap = { workspace = true }
tokio = { workspace = true }
anyhow = { workspace = true }
@@ -17,6 +19,8 @@ serde_json = { workspace = true }
rustyline = "17.0.1"
dirs = "5.0"
tokio-util = "0.7"
sha2 = "0.10"
hex = "0.4"
indicatif = "0.17"
chrono = { version = "0.4", features = ["serde"] }
crossterm = "0.29.0"

View File

@@ -98,6 +98,25 @@ fn generate_turn_histogram(turn_metrics: &[TurnMetrics]) -> String {
histogram
}
/// Format a Duration as human-readable elapsed time (e.g., "1h 23m 45s", "5m 30s", "45s")
fn format_elapsed_time(duration: Duration) -> String {
let total_secs = duration.as_secs();
let hours = total_secs / 3600;
let minutes = (total_secs % 3600) / 60;
let seconds = total_secs % 60;
if hours > 0 {
format!("{}h {}m {}s", hours, minutes, seconds)
} else if minutes > 0 {
format!("{}m {}s", minutes, seconds)
} else if seconds > 0 {
format!("{}s", seconds)
} else {
// For very short durations, show milliseconds
format!("{}ms", duration.as_millis())
}
}
/// Extract coach feedback by reading from the coach agent's specific log file
/// Uses the coach agent's session ID to find the exact log file
fn extract_coach_feedback_from_logs(
@@ -159,11 +178,12 @@ fn extract_coach_feedback_from_logs(
use clap::Parser;
use g3_config::Config;
use g3_core::{project::Project, ui_writer::UiWriter, Agent};
use g3_core::{project::Project, ui_writer::UiWriter, Agent, DiscoveryOptions};
use rustyline::error::ReadlineError;
use rustyline::DefaultEditor;
use std::path::Path;
use std::path::PathBuf;
use sha2::{Digest, Sha256};
use tokio_util::sync::CancellationToken;
use tracing::{error, info};
@@ -246,6 +266,10 @@ pub struct Cli {
/// Enable WebDriver browser automation tools
#[arg(long)]
pub webdriver: bool,
/// Enable fast codebase discovery before first LLM turn
#[arg(long, value_name = "PATH")]
pub codebase_fast_start: Option<PathBuf>,
}
pub async fn run() -> Result<()> {
@@ -675,6 +699,7 @@ async fn run_accumulative_mode(
cli.show_code,
cli.max_turns,
cli.quiet,
cli.codebase_fast_start.clone(),
) => result,
_ = tokio::signal::ctrl_c() => {
output.print("\n⚠️ Autonomous run cancelled by user (Ctrl+C)");
@@ -726,6 +751,7 @@ async fn run_autonomous_machine(
show_code: bool,
max_turns: usize,
_quiet: bool,
_codebase_fast_start: Option<PathBuf>,
) -> Result<()> {
println!("AUTONOMOUS_MODE_STARTED");
println!("WORKSPACE: {}", project.workspace().display());
@@ -756,7 +782,7 @@ async fn run_autonomous_machine(
);
println!("TASK_START");
let result = agent.execute_task_with_timing(&task, None, false, show_prompt, show_code, true).await?;
let result = agent.execute_task_with_timing(&task, None, false, show_prompt, show_code, true, None).await?;
println!("AGENT_RESPONSE:");
println!("{}", result.response);
println!("END_AGENT_RESPONSE");
@@ -783,13 +809,14 @@ async fn run_with_console_mode(
cli.show_code,
cli.max_turns,
cli.quiet,
cli.codebase_fast_start.clone(),
)
.await?;
} else if let Some(task) = cli.task {
// Single-shot mode
let output = SimpleOutput::new();
let result = agent
.execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true)
.execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true, None)
.await?;
output.print_smart(&result.response);
} else {
@@ -814,12 +841,13 @@ async fn run_with_machine_mode(
cli.show_code,
cli.max_turns,
cli.quiet,
cli.codebase_fast_start.clone(),
)
.await?;
} else if let Some(task) = cli.task {
// Single-shot mode
let result = agent
.execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true)
.execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true, None)
.await?;
println!("AGENT_RESPONSE:");
println!("{}", result.response);
@@ -1211,7 +1239,7 @@ async fn execute_task<W: UiWriter>(
// Execute task with cancellation support
let execution_result = tokio::select! {
result = agent.execute_task_with_timing_cancellable(
input, None, false, show_prompt, show_code, true, cancellation_token.clone()
input, None, false, show_prompt, show_code, true, cancellation_token.clone(), None
) => {
result
}
@@ -1402,7 +1430,7 @@ async fn execute_task_machine(
// Execute task with cancellation support
let execution_result = tokio::select! {
result = agent.execute_task_with_timing_cancellable(
input, None, false, show_prompt, show_code, true, cancellation_token.clone()
input, None, false, show_prompt, show_code, true, cancellation_token.clone(), None
) => {
result
}
@@ -1551,6 +1579,7 @@ async fn run_autonomous(
show_code: bool,
max_turns: usize,
quiet: bool,
codebase_fast_start: Option<PathBuf>,
) -> Result<()> {
let start_time = std::time::Instant::now();
let output = SimpleOutput::new();
@@ -1660,17 +1689,52 @@ async fn run_autonomous(
} else {
output.print("📋 Requirements loaded from requirements.md");
}
// Calculate SHA256 of requirements
let mut hasher = Sha256::new();
hasher.update(requirements.as_bytes());
let requirements_sha = hex::encode(hasher.finalize());
output.print(&format!("🔒 Requirements SHA256: {}", requirements_sha));
// Pass SHA to agent for staleness checking
agent.set_requirements_sha(requirements_sha.clone());
let loop_start = Instant::now();
output.print("🔄 Starting coach-player feedback loop...");
// Check if implementation files already exist
let skip_first_player = project.has_implementation_files();
if skip_first_player {
output.print("📂 Detected existing implementation files in workspace");
output.print("⏭️ Skipping first player turn - proceeding directly to coach review");
// Load fast-discovery messages before the loop starts (if enabled)
let (discovery_messages, discovery_working_dir): (Vec<g3_providers::Message>, Option<String>) =
if let Some(ref codebase_path) = codebase_fast_start {
// Canonicalize the path to ensure it's absolute
let canonical_path = codebase_path.canonicalize().unwrap_or_else(|_| codebase_path.clone());
let path_str = canonical_path.to_string_lossy();
output.print(&format!("🔍 Fast-discovery mode: will explore codebase at {}", path_str));
// Get the provider from the agent and use async LLM-based discovery
match agent.get_provider() {
Ok(provider) => {
// Create a status callback that prints to output
let output_clone = output.clone();
let status_callback: g3_planner::StatusCallback = Box::new(move |msg: &str| {
output_clone.print(msg);
});
match g3_planner::get_initial_discovery_messages(&path_str, Some(&requirements), provider, Some(&status_callback)).await {
Ok(messages) => (messages, Some(path_str.to_string())),
Err(e) => {
output.print(&format!("⚠️ LLM discovery failed: {}, skipping fast-start", e));
(Vec::new(), None)
}
}
}
Err(e) => {
output.print(&format!("⚠️ Could not get provider: {}, skipping fast-start", e));
(Vec::new(), None)
}
}
} else {
output.print("📂 No existing implementation files detected");
output.print("🎯 Starting with player implementation");
}
(Vec::new(), None)
};
let has_discovery = !discovery_messages.is_empty();
let mut turn = 1;
let mut coach_feedback = String::new();
@@ -1679,194 +1743,201 @@ async fn run_autonomous(
loop {
let turn_start_time = Instant::now();
let turn_start_tokens = agent.get_context_window().used_tokens;
// Skip player turn if it's the first turn and implementation files exist
if !(turn == 1 && skip_first_player) {
output.print(&format!(
"\n=== TURN {}/{} - PLAYER MODE ===",
turn, max_turns
));
// Player mode: implement requirements (with coach feedback if available)
let player_prompt = if coach_feedback.is_empty() {
format!(
"You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step, creating all necessary files and code.",
requirements
)
} else {
format!(
"You are G3 in implementation mode. Address the following specific feedback from the coach:\n\n{}\n\nContext: You are improving an implementation based on these requirements:\n{}\n\nFocus on fixing the issues mentioned in the coach feedback above.",
coach_feedback, requirements
)
};
output.print(&format!(
"\n=== TURN {}/{} - PLAYER MODE ===",
turn, max_turns
));
output.print("🎯 Starting player implementation...");
// Surface provider info for player agent
agent.print_provider_banner("Player");
// Display what feedback the player is receiving
// If there's no coach feedback on subsequent turns, this is an error
if coach_feedback.is_empty() {
if turn > 1 {
return Err(anyhow::anyhow!(
"Player mode error: No coach feedback received on turn {}",
turn
));
}
output.print("📋 Player starting initial implementation (no prior coach feedback)");
} else {
output.print(&format!(
"📋 Player received coach feedback ({} chars):",
coach_feedback.len()
));
output.print(&coach_feedback.to_string());
}
output.print(""); // Empty line for readability
// Player mode: implement requirements (with coach feedback if available)
let player_prompt = if coach_feedback.is_empty() {
format!(
"You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nRequirements SHA256: {}\n\nImplement this step by step, creating all necessary files and code.",
requirements, requirements_sha
)
} else {
format!(
"You are G3 in implementation mode. Address the following specific feedback from the coach:\n\n{}\n\nContext: You are improving an implementation based on these requirements:\n{}\n\nFocus on fixing the issues mentioned in the coach feedback above.",
coach_feedback, requirements
)
};
// Execute player task with retry on error
let mut _player_retry_count = 0;
const MAX_PLAYER_RETRIES: u32 = 3;
let mut player_failed = false;
output.print(&format!("🎯 Starting player implementation... (elapsed: {})", format_elapsed_time(loop_start.elapsed())));
loop {
match agent
.execute_task_with_timing(
&player_prompt,
None,
false,
show_prompt,
show_code,
true,
)
.await
{
Ok(result) => {
// Display player's implementation result
output.print("📝 Player implementation completed:");
output.print_smart(&result.response);
break;
}
Err(e) => {
// Check if this is a context length exceeded error
use g3_core::error_handling::{classify_error, ErrorType, RecoverableError};
let error_type = classify_error(&e);
if matches!(error_type, ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)) {
output.print(&format!("⚠️ Context length exceeded in player turn: {}", e));
output.print("📝 Logging error to session and ending current turn...");
// Build forensic context
let forensic_context = format!(
"Turn: {}\n\
Role: Player\n\
Context tokens: {}\n\
Total available: {}\n\
Percentage used: {:.1}%\n\
Prompt length: {} chars\n\
Error occurred at: {}",
turn,
agent.get_context_window().used_tokens,
agent.get_context_window().total_tokens,
agent.get_context_window().percentage_used(),
player_prompt.len(),
chrono::Utc::now().to_rfc3339()
);
// Log to session JSON
agent.log_error_to_session(&e, "assistant", Some(forensic_context));
// Mark turn as failed and continue to next turn
player_failed = true;
break;
} else if e.to_string().contains("panic") {
output.print(&format!("💥 Player panic detected: {}", e));
// Generate final report even for panic
let elapsed = start_time.elapsed();
let context_window = agent.get_context_window();
output.print(&format!("\n{}", "=".repeat(60)));
output.print("📊 AUTONOMOUS MODE SESSION REPORT");
output.print(&"=".repeat(60));
output.print(&format!(
"⏱️ Total Duration: {:.2}s",
elapsed.as_secs_f64()
));
output.print(&format!("🔄 Turns Taken: {}/{}", turn, max_turns));
output.print("📝 Final Status: 💥 PLAYER PANIC");
output.print("\n📈 Token Usage Statistics:");
output.print(&format!(
" • Used Tokens: {}",
context_window.used_tokens
));
output.print(&format!(
" • Total Available: {}",
context_window.total_tokens
));
output.print(&format!(
" • Cumulative Tokens: {}",
context_window.cumulative_tokens
));
output.print(&format!(
" • Usage Percentage: {:.1}%",
context_window.percentage_used()
));
// Add per-turn histogram
output.print(&generate_turn_histogram(&turn_metrics));
output.print(&"=".repeat(60));
return Err(e);
}
_player_retry_count += 1;
output.print(&format!(
"⚠️ Player error (attempt {}/{}): {}",
_player_retry_count, MAX_PLAYER_RETRIES, e
));
if _player_retry_count >= MAX_PLAYER_RETRIES {
output.print(
"🔄 Max retries reached for player, marking turn as failed...",
);
player_failed = true;
break; // Exit retry loop
}
output.print("🔄 Retrying player implementation...");
}
}
}
// If player failed after max retries, increment turn and continue
if player_failed {
output.print(&format!(
"⚠️ Player turn {} failed after max retries. Moving to next turn.",
// Display what feedback the player is receiving
// If there's no coach feedback on subsequent turns, this is an error
if coach_feedback.is_empty() {
if turn > 1 {
return Err(anyhow::anyhow!(
"Player mode error: No coach feedback received on turn {}",
turn
));
// Record turn metrics before incrementing
let turn_duration = turn_start_time.elapsed();
let turn_tokens = agent.get_context_window().used_tokens.saturating_sub(turn_start_tokens);
turn_metrics.push(TurnMetrics {
turn_number: turn,
tokens_used: turn_tokens,
wall_clock_time: turn_duration,
});
turn += 1;
}
output.print("📋 Player starting initial implementation (no prior coach feedback)");
} else {
output.print(&format!(
"📋 Player received coach feedback ({} chars):",
coach_feedback.len()
));
output.print(&coach_feedback.to_string());
}
output.print(""); // Empty line for readability
// Check if we've reached max turns
if turn > max_turns {
output.print("\n=== SESSION COMPLETED - MAX TURNS REACHED ===");
output.print(&format!("⏰ Maximum turns ({}) reached", max_turns));
// Execute player task with retry on error
let mut _player_retry_count = 0;
const MAX_PLAYER_RETRIES: u32 = 3;
let mut player_failed = false;
loop {
match agent
.execute_task_with_timing(
&player_prompt,
None,
false,
show_prompt,
show_code,
true,
if has_discovery {
Some(DiscoveryOptions {
messages: &discovery_messages,
fast_start_path: discovery_working_dir.as_deref(),
})
} else { None },
)
.await
{
Ok(result) => {
// Display player's implementation result
output.print("📝 Player implementation completed:");
output.print_smart(&result.response);
break;
}
Err(e) => {
// Check if this is a context length exceeded error
use g3_core::error_handling::{classify_error, ErrorType, RecoverableError};
let error_type = classify_error(&e);
// Continue to next iteration with empty feedback (restart from scratch)
coach_feedback = String::new();
continue;
if matches!(error_type, ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)) {
output.print(&format!("⚠️ Context length exceeded in player turn: {}", e));
output.print("📝 Logging error to session and ending current turn...");
// Build forensic context
let forensic_context = format!(
"Turn: {}\n\
Role: Player\n\
Context tokens: {}\n\
Total available: {}\n\
Percentage used: {:.1}%\n\
Prompt length: {} chars\n\
Error occurred at: {}",
turn,
agent.get_context_window().used_tokens,
agent.get_context_window().total_tokens,
agent.get_context_window().percentage_used(),
player_prompt.len(),
chrono::Utc::now().to_rfc3339()
);
// Log to session JSON
agent.log_error_to_session(&e, "assistant", Some(forensic_context));
// Mark turn as failed and continue to next turn
player_failed = true;
break;
} else if e.to_string().contains("panic") {
output.print(&format!("💥 Player panic detected: {}", e));
// Generate final report even for panic
let elapsed = start_time.elapsed();
let context_window = agent.get_context_window();
output.print(&format!("\n{}", "=".repeat(60)));
output.print("📊 AUTONOMOUS MODE SESSION REPORT");
output.print(&"=".repeat(60));
output.print(&format!(
"⏱️ Total Duration: {:.2}s",
elapsed.as_secs_f64()
));
output.print(&format!("🔄 Turns Taken: {}/{}", turn, max_turns));
output.print("📝 Final Status: 💥 PLAYER PANIC");
output.print("\n📈 Token Usage Statistics:");
output.print(&format!(
" • Used Tokens: {}",
context_window.used_tokens
));
output.print(&format!(
" • Total Available: {}",
context_window.total_tokens
));
output.print(&format!(
" • Cumulative Tokens: {}",
context_window.cumulative_tokens
));
output.print(&format!(
" • Usage Percentage: {:.1}%",
context_window.percentage_used()
));
// Add per-turn histogram
output.print(&generate_turn_histogram(&turn_metrics));
output.print(&"=".repeat(60));
return Err(e);
}
_player_retry_count += 1;
output.print(&format!(
"⚠️ Player error (attempt {}/{}): {}",
_player_retry_count, MAX_PLAYER_RETRIES, e
));
if _player_retry_count >= MAX_PLAYER_RETRIES {
output.print(
"🔄 Max retries reached for player, marking turn as failed...",
);
player_failed = true;
break; // Exit retry loop
}
output.print("🔄 Retrying player implementation...");
}
}
}
// If player failed after max retries, increment turn and continue
if player_failed {
output.print(&format!(
"⚠️ Player turn {} failed after max retries. Moving to next turn.",
turn
));
// Record turn metrics before incrementing
let turn_duration = turn_start_time.elapsed();
let turn_tokens = agent.get_context_window().used_tokens.saturating_sub(turn_start_tokens);
turn_metrics.push(TurnMetrics {
turn_number: turn,
tokens_used: turn_tokens,
wall_clock_time: turn_duration,
});
turn += 1;
// Check if we've reached max turns
if turn > max_turns {
output.print("\n=== SESSION COMPLETED - MAX TURNS REACHED ===");
output.print(&format!("⏰ Maximum turns ({}) reached", max_turns));
break;
}
// Give some time for file operations to complete
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
// Continue to next iteration with empty feedback (restart from scratch)
coach_feedback = String::new();
continue;
}
// Give some time for file operations to complete
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
// Create a new agent instance for coach mode to ensure fresh context
// Use the same config with overrides that was passed to the player agent
let base_config = agent.get_config().clone();
@@ -1879,6 +1950,9 @@ async fn run_autonomous(
let mut coach_agent =
Agent::new_autonomous_with_readme_and_quiet(coach_config, ui_writer, None, quiet).await?;
// Surface provider info for coach agent
coach_agent.print_provider_banner("Coach");
// Ensure coach agent is also in the workspace directory
project.enter_workspace()?;
@@ -1918,7 +1992,7 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the
requirements
);
output.print("🎓 Starting coach review...");
output.print(&format!("🎓 Starting coach review... (elapsed: {})", format_elapsed_time(loop_start.elapsed())));
// Execute coach task with retry on error
let mut coach_retry_count = 0;
@@ -1928,7 +2002,13 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the
loop {
match coach_agent
.execute_task_with_timing(&coach_prompt, None, false, show_prompt, show_code, true)
.execute_task_with_timing(&coach_prompt, None, false, show_prompt, show_code, true,
if has_discovery {
Some(DiscoveryOptions {
messages: &discovery_messages,
fast_start_path: discovery_working_dir.as_deref(),
})
} else { None })
.await
{
Ok(result) => {
@@ -2158,9 +2238,9 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the
output.print(&"=".repeat(60));
if implementation_approved {
output.print("\n🎉 Autonomous mode completed successfully");
output.print(&format!("\n🎉 Autonomous mode completed successfully (total loop time: {})", format_elapsed_time(loop_start.elapsed())));
} else {
output.print("\n🔄 Autonomous mode terminated (max iterations)");
output.print(&format!("\n🔄 Autonomous mode terminated (max iterations) (total loop time: {})", format_elapsed_time(loop_start.elapsed())));
}
Ok(())

View File

@@ -91,4 +91,18 @@ impl UiWriter for MachineUiWriter {
fn wants_full_output(&self) -> bool {
true // Machine mode wants complete, untruncated output
}
fn prompt_user_yes_no(&self, message: &str) -> bool {
// In machine mode, we can't interactively prompt, so we log the request and return true
// to allow automation to proceed.
println!("PROMPT_USER_YES_NO: {}", message);
true
}
fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
println!("PROMPT_USER_CHOICE: {}", message);
println!("OPTIONS: {:?}", options);
// Default to first option (index 0) for automation
0
}
}

View File

@@ -1,4 +1,5 @@
/// Simple output helper for printing messages
#[derive(Clone)]
pub struct SimpleOutput {
machine_mode: bool,
}

View File

@@ -343,5 +343,40 @@ impl UiWriter for ConsoleUiWriter {
fn flush(&self) {
let _ = io::stdout().flush();
}
fn prompt_user_yes_no(&self, message: &str) -> bool {
print!("{} [y/N] ", message);
let _ = io::stdout().flush();
let mut input = String::new();
if io::stdin().read_line(&mut input).is_ok() {
let trimmed = input.trim().to_lowercase();
trimmed == "y" || trimmed == "yes"
} else {
false
}
}
fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
println!("{} ", message);
for (i, option) in options.iter().enumerate() {
println!(" [{}] {}", i + 1, option);
}
print!("Select an option (1-{}): ", options.len());
let _ = io::stdout().flush();
loop {
let mut input = String::new();
if io::stdin().read_line(&mut input).is_ok() {
if let Ok(choice) = input.trim().parse::<usize>() {
if choice > 0 && choice <= options.len() {
return choice - 1;
}
}
}
print!("Invalid choice. Please select (1-{}): ", options.len());
let _ = io::stdout().flush();
}
}
}

View File

@@ -36,11 +36,20 @@ fn main() {
// Copy the dylib to the output directory so it can be found at runtime
let target_dir = manifest_dir.parent().unwrap().parent().unwrap().join("target");
let profile = env::var("PROFILE").unwrap_or_else(|_| "debug".to_string());
let output_dir = target_dir.join(&profile);
// Determine the actual target directory (could be llvm-cov-target or regular target)
let target_dir_name = env::var("CARGO_TARGET_DIR")
.unwrap_or_else(|_| target_dir.to_string_lossy().to_string());
let actual_target_dir = PathBuf::from(&target_dir_name);
let output_dir = actual_target_dir.join(&profile);
let dylib_src = lib_path.join("libVisionBridge.dylib");
let dylib_dst = output_dir.join("libVisionBridge.dylib");
// Create output directory if it doesn't exist
std::fs::create_dir_all(&output_dir)
.expect(&format!("Failed to create output directory {}", output_dir.display()));
std::fs::copy(&dylib_src, &dylib_dst)
.expect(&format!("Failed to copy dylib from {} to {}", dylib_src.display(), dylib_dst.display()));

View File

@@ -15,3 +15,4 @@ dirs = "5.0"
[dev-dependencies]
tempfile = "3.8"
serde_json = { workspace = true }

View File

@@ -70,10 +70,17 @@ pub struct AgentConfig {
pub max_context_length: Option<u32>,
pub fallback_default_max_tokens: usize,
pub enable_streaming: bool,
pub allow_multiple_tool_calls: bool,
pub timeout_seconds: u64,
pub auto_compact: bool,
pub max_retry_attempts: u32,
pub autonomous_max_retry_attempts: u32,
#[serde(default = "default_check_todo_staleness")]
pub check_todo_staleness: bool,
}
fn default_check_todo_staleness() -> bool {
true
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -145,10 +152,12 @@ impl Default for Config {
max_context_length: None,
fallback_default_max_tokens: 8192,
enable_streaming: true,
allow_multiple_tool_calls: false,
timeout_seconds: 60,
auto_compact: true,
max_retry_attempts: 3,
autonomous_max_retry_attempts: 6,
check_todo_staleness: true,
},
computer_control: ComputerControlConfig::default(),
webdriver: WebDriverConfig::default(),
@@ -265,10 +274,12 @@ impl Config {
max_context_length: None,
fallback_default_max_tokens: 8192,
enable_streaming: true,
allow_multiple_tool_calls: false,
timeout_seconds: 60,
auto_compact: true,
max_retry_attempts: 3,
autonomous_max_retry_attempts: 6,
check_todo_staleness: true,
},
computer_control: ComputerControlConfig::default(),
webdriver: WebDriverConfig::default(),

View File

@@ -0,0 +1,40 @@
#[cfg(test)]
mod test_multiple_tool_calls {
use g3_config::{Config, AgentConfig};
#[test]
fn test_config_has_multiple_tool_calls_field() {
let config = Config::default();
// Test that the field exists and defaults to false
assert_eq!(config.agent.allow_multiple_tool_calls, false);
// Test that we can create a config with the field set to true
let mut custom_config = Config::default();
custom_config.agent.allow_multiple_tool_calls = true;
assert_eq!(custom_config.agent.allow_multiple_tool_calls, true);
}
#[test]
fn test_agent_config_serialization() {
let agent_config = AgentConfig {
max_context_length: Some(100000),
fallback_default_max_tokens: 8192,
enable_streaming: true,
allow_multiple_tool_calls: true,
timeout_seconds: 60,
auto_compact: true,
max_retry_attempts: 3,
autonomous_max_retry_attempts: 6,
check_todo_staleness: true,
};
// Test serialization
let json = serde_json::to_string(&agent_config).unwrap();
assert!(json.contains("\"allow_multiple_tool_calls\":true"));
// Test deserialization
let deserialized: AgentConfig = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.allow_multiple_tool_calls, true);
}
}

View File

@@ -6,6 +6,9 @@ authors = ["G3 Team"]
description = "Web console for monitoring and managing g3 instances"
license = "MIT"
[lib]
path = "src/lib.rs"
[[bin]]
name = "g3-console"
path = "src/main.rs"

View File

@@ -0,0 +1,5 @@
pub mod api;
pub mod logs;
pub mod models;
pub mod process;
pub mod launch;

View File

@@ -0,0 +1,256 @@
use crate::models::{InstanceStats, TurnInfo};
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::fs;
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LogEntry {
pub timestamp: Option<DateTime<Utc>>,
pub role: Option<String>,
pub content: Option<String>,
pub tool_calls: Option<Vec<Value>>,
pub raw: Value,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChatMessage {
pub role: String,
pub content: String,
pub timestamp: Option<DateTime<Utc>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCall {
pub name: String,
pub parameters: Value,
pub result: Option<String>,
pub timestamp: Option<DateTime<Utc>>,
}
pub struct LogParser;
impl LogParser {
/// Parse logs from a workspace directory
pub fn parse_logs(workspace: &Path) -> Result<Vec<LogEntry>> {
let logs_dir = workspace.join("logs");
if !logs_dir.exists() {
return Ok(Vec::new());
}
let mut entries = Vec::new();
// Read all JSON log files
for entry in fs::read_dir(&logs_dir).context("Failed to read logs directory")? {
let entry = entry?;
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) == Some("json") {
if let Ok(content) = fs::read_to_string(&path) {
if let Ok(json) = serde_json::from_str::<Value>(&content) {
// Try to parse as a log session
if let Some(messages) = json.get("messages").and_then(|m| m.as_array()) {
for msg in messages {
entries.push(LogEntry {
timestamp: msg.get("timestamp")
.and_then(|t| t.as_str())
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
.map(|dt| dt.with_timezone(&Utc)),
role: msg.get("role")
.and_then(|r| r.as_str())
.map(String::from),
content: msg.get("content")
.and_then(|c| c.as_str())
.map(String::from),
tool_calls: msg.get("tool_calls")
.and_then(|tc| tc.as_array())
.map(|arr| arr.clone()),
raw: msg.clone(),
});
}
}
}
}
}
}
// Sort by timestamp
entries.sort_by(|a, b| {
match (&a.timestamp, &b.timestamp) {
(Some(t1), Some(t2)) => t1.cmp(t2),
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => std::cmp::Ordering::Equal,
}
});
Ok(entries)
}
/// Extract chat messages from log entries
pub fn extract_chat_messages(entries: &[LogEntry]) -> Vec<ChatMessage> {
entries
.iter()
.filter_map(|entry| {
let role = entry.role.clone()?;
let content = entry.content.clone()?;
Some(ChatMessage {
role,
content,
timestamp: entry.timestamp,
})
})
.collect()
}
/// Extract tool calls from log entries
pub fn extract_tool_calls(entries: &[LogEntry]) -> Vec<ToolCall> {
let mut tool_calls = Vec::new();
for entry in entries {
if let Some(calls) = &entry.tool_calls {
for call in calls {
if let Some(name) = call.get("name").and_then(|n| n.as_str()) {
tool_calls.push(ToolCall {
name: name.to_string(),
parameters: call.get("parameters")
.cloned()
.unwrap_or(Value::Object(serde_json::Map::new())),
result: call.get("result")
.and_then(|r| r.as_str())
.map(String::from),
timestamp: entry.timestamp,
});
}
}
}
}
tool_calls
}
}
pub struct StatsAggregator;
impl StatsAggregator {
/// Aggregate statistics from log entries
pub fn aggregate_stats(
entries: &[LogEntry],
start_time: DateTime<Utc>,
is_ensemble: bool,
) -> InstanceStats {
let total_tokens = Self::count_tokens(entries);
let tool_calls = Self::count_tool_calls(entries);
let errors = Self::count_errors(entries);
let duration_secs = if let Some(last_entry) = entries.last() {
if let Some(last_time) = last_entry.timestamp {
(last_time - start_time).num_seconds().max(0) as u64
} else {
(Utc::now() - start_time).num_seconds().max(0) as u64
}
} else {
(Utc::now() - start_time).num_seconds().max(0) as u64
};
let turns = if is_ensemble {
Some(Self::extract_turns(entries))
} else {
None
};
InstanceStats {
total_tokens,
tool_calls,
errors,
duration_secs,
turns,
}
}
/// Get the latest message content from log entries
pub fn get_latest_message(entries: &[LogEntry]) -> Option<String> {
entries
.iter()
.rev()
.find(|entry| entry.role.as_deref() == Some("assistant"))
.and_then(|entry| entry.content.clone())
.or_else(|| {
entries
.iter()
.rev()
.find(|entry| entry.content.is_some())
.and_then(|entry| entry.content.clone())
})
}
fn count_tokens(entries: &[LogEntry]) -> u64 {
// Try to extract token counts from metadata
entries
.iter()
.filter_map(|entry| {
entry.raw.get("usage")
.and_then(|u| u.get("total_tokens"))
.and_then(|t| t.as_u64())
})
.sum()
}
fn count_tool_calls(entries: &[LogEntry]) -> u64 {
entries
.iter()
.filter_map(|entry| entry.tool_calls.as_ref())
.map(|calls| calls.len() as u64)
.sum()
}
fn count_errors(entries: &[LogEntry]) -> u64 {
entries
.iter()
.filter(|entry| {
entry.raw.get("error").is_some()
|| entry.content.as_ref().map(|c| c.to_lowercase().contains("error")).unwrap_or(false)
})
.count() as u64
}
fn extract_turns(entries: &[LogEntry]) -> Vec<TurnInfo> {
// Simple implementation: group consecutive assistant messages as turns
let mut turns = Vec::new();
let mut current_turn_start: Option<DateTime<Utc>> = None;
let mut turn_count = 0;
for entry in entries {
if entry.role.as_deref() == Some("assistant") {
if current_turn_start.is_none() {
current_turn_start = entry.timestamp;
turn_count += 1;
}
} else if entry.role.as_deref() == Some("user") {
if let Some(start) = current_turn_start {
if let Some(end) = entry.timestamp {
let duration = (end - start).num_seconds().max(0) as u64;
turns.push(TurnInfo {
agent: format!("agent-{}", turn_count),
duration_secs: duration,
status: "completed".to_string(),
color: Self::get_turn_color(turn_count),
});
}
current_turn_start = None;
}
}
}
turns
}
fn get_turn_color(turn_number: usize) -> String {
let colors = vec!["blue", "green", "purple", "orange", "pink", "teal"];
colors[turn_number % colors.len()].to_string()
}
}

View File

@@ -1,8 +1,6 @@
mod api;
mod logs;
mod models;
mod process;
mod launch;
use g3_console::api;
use g3_console::process;
use g3_console::launch;
use api::control::{kill_instance, launch_instance, restart_instance};
use api::instances::{get_instance, get_file_content, list_instances};

View File

@@ -3,7 +3,7 @@ use anyhow::Result;
use chrono::{DateTime, Utc};
use std::path::PathBuf;
use sysinfo::{System, Pid, Process};
use tracing::{debug, warn};
use tracing::{debug, info, warn};
pub struct ProcessDetector {
system: System,
@@ -17,7 +17,11 @@ impl ProcessDetector {
}
pub fn detect_instances(&mut self) -> Result<Vec<Instance>> {
self.system.refresh_processes();
info!("Scanning for g3 processes...");
// Refresh all processes to ensure we catch newly started ones
// Using refresh_all() instead of just refresh_processes() to ensure
// we get complete information about new processes
self.system.refresh_all();
let mut instances = Vec::new();
// Find all g3 processes
@@ -33,7 +37,7 @@ impl ProcessDetector {
}
}
debug!("Detected {} g3 instances", instances.len());
info!("Detected {} g3 instances", instances.len());
Ok(instances)
}
@@ -45,24 +49,27 @@ impl ProcessDetector {
) -> Option<Instance> {
let cmd_str = cmd.join(" ");
// Exclude g3-console itself
if cmd_str.contains("g3-console") {
return None;
}
// Check if this is a g3 binary (more comprehensive check)
let is_g3_binary = cmd.get(0).map(|s| {
s.ends_with("g3") || s.ends_with("/g3") || s.contains("/target/release/g3") || s.contains("/target/debug/g3")
(s.ends_with("g3") || s.ends_with("/g3") || s.contains("/target/release/g3") || s.contains("/target/debug/g3"))
&& !s.contains("g3-") // Exclude other g3-* binaries
}).unwrap_or(false);
// Check if this is cargo run with g3
let is_cargo_run = cmd.get(0).map(|s| s.contains("cargo")).unwrap_or(false) && cmd.iter().any(|s| s == "run");
// Check if this is cargo run with g3 (not g3-console or other variants)
let is_cargo_run = cmd.get(0).map(|s| s.contains("cargo")).unwrap_or(false)
&& cmd.iter().any(|s| s == "run")
&& !cmd_str.contains("g3-console");
// Also check if any part of the command line contains g3-related patterns
let has_g3_pattern = cmd_str.contains("g3 ")
|| cmd_str.contains("/g3 ")
|| cmd_str.contains("g3-")
|| cmd_str.ends_with("g3")
|| cmd_str.contains("--workspace") // g3-specific flag
|| cmd_str.contains("--autonomous"); // g3-specific flag
// Also check if command line has g3-specific flags
let has_g3_flags = cmd_str.contains("--workspace") || cmd_str.contains("--autonomous");
// Accept if it's a g3 binary, cargo run with g3 patterns, or has g3-specific flags
let is_g3_process = is_g3_binary || (is_cargo_run && has_g3_pattern) || has_g3_pattern;
// Accept if it's a g3 binary or cargo run with g3, and has typical g3 patterns
let is_g3_process = is_g3_binary || (is_cargo_run && has_g3_flags);
if !is_g3_process {
return None;
@@ -165,7 +172,7 @@ impl ProcessDetector {
}
pub fn get_process_status(&mut self, pid: u32) -> Option<InstanceStatus> {
self.system.refresh_processes();
self.system.refresh_all();
let sysinfo_pid = Pid::from_u32(pid);
if self.system.process(sysinfo_pid).is_some() {

View File

@@ -15,7 +15,7 @@
<div id="app">
<header class="header">
<div class="header-content">
<h1 class="header-title">G3 Console</h1>
<h1 class="header-title">G3 Console <span id="live-indicator" class="live-indicator" title="Scanning for processes every 3 seconds">● LIVE</span></h1>
<div class="header-actions">
<button id="new-run-btn" class="btn btn-primary">+ New Run</button>
<button id="theme-toggle" class="btn btn-secondary">🌙</button>

View File

@@ -6,6 +6,7 @@ const router = {
currentInstanceId: null,
initialized: false,
renderInProgress: false,
REFRESH_INTERVAL_MS: 3000, // Refresh every 3 seconds for live updates
init() {
console.log('[Router] init() called');
@@ -84,6 +85,9 @@ const router = {
this.renderInProgress = true;
try {
// Flash live indicator
this.flashLiveIndicator();
// Check if we already have a container for instances
let instancesList = container.querySelector('.instances-list');
const isInitialLoad = !instancesList;
@@ -167,11 +171,11 @@ const router = {
// Schedule next refresh only if still on home route
if (this.currentRoute === '/' || this.currentRoute === '') {
console.log('[Router] Scheduling auto-refresh in 5 seconds');
console.log(`[Router] Scheduling auto-refresh in ${this.REFRESH_INTERVAL_MS}ms`);
this.refreshTimeout = setTimeout(() => {
console.log('[Router] Auto-refresh triggered');
this.renderHome(container);
}, 5000);
}, this.REFRESH_INTERVAL_MS);
}
} catch (error) {
console.error('[Router] Error in renderHome:', error);
@@ -187,12 +191,26 @@ const router = {
}
},
flashLiveIndicator() {
const indicator = document.getElementById('live-indicator');
if (indicator) {
indicator.style.animation = 'none';
// Force reflow
void indicator.offsetWidth;
indicator.style.animation = null;
indicator.style.opacity = '1';
}
},
async renderDetail(container, id) {
console.log('[Router] renderDetail called for', id);
this.currentInstanceId = id;
try {
// Flash live indicator
this.flashLiveIndicator();
// Check if we already have a detail view for this instance
let detailView = container.querySelector('.detail-view');
const isInitialLoad = !detailView || detailView.getAttribute('data-instance-id') !== id;

View File

@@ -64,6 +64,22 @@ body {
color: var(--text-primary);
}
.live-indicator {
font-size: 0.625rem; /* 75% of 0.833rem */
font-weight: 600;
color: var(--success);
margin-left: 0.75rem;
display: inline-flex;
align-items: center;
gap: 0.25rem;
animation: pulse 2s ease-in-out infinite;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
.header-actions {
display: flex;
gap: 1rem;

View File

@@ -43,6 +43,8 @@ tree-sitter-scheme = "0.24"
streaming-iterator = "0.1"
walkdir = "2.4"
const_format = "0.2"
[dev-dependencies]
tempfile = "3.8"
serial_test = "3.0"

View File

@@ -48,7 +48,7 @@ pub async fn another_async(x: i32) -> Result<(), ()> {
println!("{}\n", "=".repeat(80));
let mut parser = Parser::new();
let language: Language = tree_sitter_rust::language().into();
let language: Language = tree_sitter_rust::LANGUAGE.into();
parser.set_language(&language)?;
let tree = parser.parse(source_code, None).unwrap();

View File

@@ -46,7 +46,7 @@ class MyClass:
println!("{}\n", "=".repeat(80));
let mut parser = Parser::new();
let language: Language = tree_sitter_python::language().into();
let language: Language = tree_sitter_python::LANGUAGE.into();
parser.set_language(&language)?;
let tree = parser.parse(source_code, None).unwrap();

View File

@@ -1,6 +1,7 @@
//! Test Python async query
use tree_sitter::{Parser, Query, QueryCursor, Language};
use streaming_iterator::StreamingIterator;
fn main() -> anyhow::Result<()> {
let source_code = r#"
@@ -12,7 +13,7 @@ async def async_function():
"#;
let mut parser = Parser::new();
let language: Language = tree_sitter_python::language().into();
let language: Language = tree_sitter_python::LANGUAGE.into();
parser.set_language(&language)?;
let tree = parser.parse(source_code, None).unwrap();

File diff suppressed because it is too large Load Diff

View File

@@ -98,49 +98,6 @@ impl Project {
self.requirements_text.is_some() || self.requirements_path.is_some()
}
/// Check if implementation files exist in the workspace
pub fn has_implementation_files(&self) -> bool {
self.check_dir_for_implementation_files(&self.workspace_dir)
}
/// Recursively check a directory for implementation files
#[allow(clippy::only_used_in_recursion)]
fn check_dir_for_implementation_files(&self, dir: &Path) -> bool {
// Common source file extensions
let extensions = vec![
"swift", "rs", "py", "js", "ts", "java", "cpp", "c",
"go", "rb", "php", "cs", "kt", "scala", "m", "h"
];
if let Ok(entries) = std::fs::read_dir(dir) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_file() {
// Check if it's a source file
if let Some(ext) = path.extension() {
if let Some(ext_str) = ext.to_str() {
if extensions.contains(&ext_str) {
return true;
}
}
}
} else if path.is_dir() {
// Skip hidden directories and common non-source directories
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if !name.starts_with('.') && name != "logs" && name != "target" && name != "node_modules" {
// Recursively check subdirectories
if self.check_dir_for_implementation_files(&path) {
return true;
}
}
}
}
}
}
false
}
/// Read the requirements file content
pub fn read_requirements(&self) -> Result<Option<String>> {
// Prioritize requirements text override
@@ -181,4 +138,4 @@ impl Project {
}
Ok(())
}
}
}

View File

@@ -0,0 +1,374 @@
use const_format::concatcp;
const CODING_STYLE: &'static str = "# IMPORTANT FOR CODING:
It is very important that you adhere to these principles when writing code. I will use a code quality tool to assess the code you have generated.
### Most important for coding: Specific guideline for code design:
- Functions and methods should be short - at most 80 lines, ideally under 40.
- Classes should be modular and composable. They should not have more than 20 methods.
- Do not write deeply nested (above 6 levels deep) if, match or case statements, rather refactor into separate logical sections or functions.
- Code should be written such that it is maintainable and testable.
- For Rust code write *ALL* test code into a tests directory that is a peer to the src of each crate, and is for testing code in that crate.
- For Python code write *ALL* test code into a top level tests directory.
- Each non-trivial function should have test coverage. DO NOT WRITE TESTS FOR INDIVIDUAL FUNCTIONS / METHODS / CLASSES unless they are large and important. Instead write something
at a higher level of abstraction, closer to an integration test.
- Write tests in separate files, where the filename should match the main implementation and adding a “_test” suffix.
### Important for coding: General guidelines for code design:
Keep the code as simple as possible, with few if any external dependencies.
DRY (Dont repeat yourself) - each small piece code may only occur exactly once in the entire system.
KISS (Keep it simple, stupid!) - keep each small piece of software simple and unnecessary complexity should be avoided.
YAGNI (You aint gonna need it) - Always implement things when you actually need them never implements things before you need them.
Use Descriptive Names for Code Elements. - As a rule of thumb, use more descriptive names for larger scopes. e.g., name a loop counter variable “i” is good when the scope of the loop is a single line. But dont name some class field or method parameter “i”.
When modifying an existing code base, do not unnecessarily refactor or modify code that is not directly relevant to the current coding task. It is fine to do so if new code calls/is called by the new functionality, or you prevent code duplication when new functionality is added.
If possible constrain the side-effects on other pieces of code if possible, this is part of the principle of modularity.
### Important for coding: General advice on designing algorithms:
If possible, consider the \"Gang of Four\" design patterns when writing code.
The Gang of Four (GOF) patterns are set of 23 common software design patterns introduced in the book
\"Design Patterns: Elements of Reusable Object-Oriented Software\".
These patterns categorize into three main groups:
1. Creational Patterns
2. Structural Patterns
3. Behavioral Patterns
These patterns provide solutions to common design problems and help make software systems more modular, flexible and maintainable. Consider using these patterns in your code design.";
const SYSTEM_NATIVE_TOOL_CALLS: &'static str =
"You are G3, an AI programming agent of the same skill level as a seasoned engineer at a major technology company. You analyze given tasks and write code to achieve goals.
You have access to tools. When you need to accomplish a task, you MUST use the appropriate tool. Do not just describe what you would do - actually use the tools.
IMPORTANT: You must call tools to achieve goals. When you receive a request:
1. Analyze and identify what needs to be done
2. Call the appropriate tool with the required parameters
3. Continue or complete the task based on the result
4. If you repeatedly try something and it fails, try a different approach
5. Call the final_output tool with a detailed summary when done.
For shell commands: Use the shell tool with the exact command needed. Avoid commands that produce a large amount of output, and consider piping those outputs to files. Example: If asked to list files, immediately call the shell tool with command parameter \"ls\".
If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir.
# Task Management with TODO Tools
**REQUIRED for multi-step tasks.** Use TODO tools when your task involves ANY of:
- Multiple files to create/modify (2+)
- Multiple distinct steps (3+)
- Dependencies between steps
- Testing or verification needed
- Uncertainty about approach
## Workflow
Every multi-step task follows this pattern:
1. **Start**: Call todo_read, then todo_write to create your plan
2. **During**: Execute steps, then todo_read and todo_write to mark progress
3. **End**: Call todo_read to verify all items complete
Note: todo_write replaces the entire todo.g3.md file, so always read first to preserve content. TODO lists persist across g3 sessions in the workspace directory.
IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
`{{Based on the requirements file with SHA256: <SHA>}}`
This ensures the TODO list is tracked against the specific version of requirements it was generated from.
## Examples
**Example 1: Feature Implementation**
User asks: \"Add user authentication with tests\"
First action:
{\"tool\": \"todo_read\", \"args\": {}}
Then create plan:
{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Add user authentication\\n - [ ] Create User struct\\n - [ ] Add login endpoint\\n - [ ] Add password hashing\\n - [ ] Write unit tests\\n - [ ] Write integration tests\"}}
After completing User struct:
{\"tool\": \"todo_read\", \"args\": {}}
{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Add user authentication\\n - [x] Create User struct\\n - [ ] Add login endpoint\\n - [ ] Add password hashing\\n - [ ] Write unit tests\\n - [ ] Write integration tests\"}}
**Example 2: Bug Fix**
User asks: \"Fix the memory leak in cache module\"
{\"tool\": \"todo_read\", \"args\": {}}
{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Fix memory leak\\n - [ ] Review cache.rs\\n - [ ] Check for unclosed resources\\n - [ ] Add drop implementation\\n - [ ] Write test to verify fix\"}}
**Example 3: Refactoring**
User asks: \"Refactor database layer to use async/await\"
{\"tool\": \"todo_read\", \"args\": {}}
{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Refactor to async\\n - [ ] Update function signatures\\n - [ ] Replace blocking calls\\n - [ ] Update all callers\\n - [ ] Update tests\"}}
## Format
Use markdown checkboxes:
- \"- [ ]\" for incomplete tasks
- \"- [x]\" for completed tasks
- Indent with 2 spaces for subtasks
Keep items short, specific, and action-oriented.
## Benefits
✓ Prevents missed steps
✓ Makes progress visible
✓ Helps recover from interruptions
✓ Creates better summaries
## When NOT to Use
Skip TODO tools for simple single-step tasks:
- \"List files\" → just use shell
- \"Read config.json\" → just use read_file
- \"Search for functions\" → just use code_search
If you can complete it with 1-2 tool calls, skip TODO.
# Code Search Guidelines
IMPORTANT: When searching for code constructs (functions, classes, methods, structs, etc.), ALWAYS use `code_search` instead of shell grep/rg.
If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir.
# Code Search Guidelines
IMPORTANT: When searching for code constructs (functions, classes, methods, structs, etc.), ALWAYS use `code_search` instead of shell grep/rg.
It's syntax-aware and finds actual code, not comments or strings. Only use shell grep for:
- Searching non-code files (logs, markdown, text)
- Simple string searches across all file types
- When you need regex for text content (not code structure)
Common code_search query patterns:
**Rust:**
- All functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"functions\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\"}]}}
- Async functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"async_fns\", \"query\": \"(function_item (function_modifiers) name: (identifier) @name)\", \"language\": \"rust\"}]}}
- Structs: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"structs\", \"query\": \"(struct_item name: (type_identifier) @name)\", \"language\": \"rust\"}]}}
- Enums: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"enums\", \"query\": \"(enum_item name: (type_identifier) @name)\", \"language\": \"rust\"}]}}
- Impl blocks: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"impls\", \"query\": \"(impl_item type: (type_identifier) @name)\", \"language\": \"rust\"}]}}
**Python:**
- Functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"functions\", \"query\": \"(function_definition name: (identifier) @name)\", \"language\": \"python\"}]}}
- Classes: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"classes\", \"query\": \"(class_definition name: (identifier) @name)\", \"language\": \"python\"}]}}
**JavaScript/TypeScript:**
- Functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"functions\", \"query\": \"(function_declaration name: (identifier) @name)\", \"language\": \"javascript\"}]}}
- Classes: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"classes\", \"query\": \"(class_declaration name: (identifier) @name)\", \"language\": \"javascript\"}]}}
- Arrow functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"arrow_fns\", \"query\": \"(arrow_function) @fn\", \"language\": \"javascript\"}]}}
**Go:**
- Functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"functions\", \"query\": \"(function_declaration name: (identifier) @name)\", \"language\": \"go\"}]}}
- Methods: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"methods\", \"query\": \"(method_declaration name: (field_identifier) @name)\", \"language\": \"go\"}]}}
**Java/C++:**
- Classes: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"classes\", \"query\": \"(class_declaration name: (identifier) @name)\", \"language\": \"java\"}]}}
- Methods: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"methods\", \"query\": \"(method_declaration name: (identifier) @name)\", \"language\": \"java\"}]}}
**Advanced features:**
- Multiple searches: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\"}, {\"name\": \"structs\", \"query\": \"(struct_item name: (type_identifier) @name)\", \"language\": \"rust\"}]}}
- With context: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\", \"context_lines\": 3}]}}
- Specific paths: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\", \"paths\": [\"src/core\"]}]}}
IMPORTANT: If the user asks you to just respond with text (like \"just say hello\" or \"tell me about X\"), do NOT use tools. Simply respond with the requested text directly. Only use tools when you need to execute commands or complete tasks that require action.
When taking screenshots of specific windows (like \"my Safari window\" or \"my terminal\"), ALWAYS use list_windows first to identify the correct window ID, then use take_screenshot with the window_id parameter.
Do not explain what you're going to do - just do it by calling the tools.
# Response Guidelines
- Use Markdown formatting for all responses except tool calls.
- Whenever taking actions, use the pronoun 'I'
";
pub const SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE: &'static str =
concatcp!(SYSTEM_NATIVE_TOOL_CALLS, CODING_STYLE);
/// Generate system prompt based on whether multiple tool calls are allowed
pub fn get_system_prompt_for_native(allow_multiple: bool) -> String {
if allow_multiple {
// Replace the "ONE tool" instruction with multiple tools instruction
let base = SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string();
base.replace(
"2. Call the appropriate tool with the required parameters",
"2. Call the appropriate tool(s) with the required parameters - you may call multiple tools in parallel when appropriate.
<use_parallel_tool_calls>
For maximum efficiency, whenever you perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially. Prioritize calling tools in parallel whenever possible. For example, when reading 3 files, run 3 tool calls in parallel to read all 3 files into context at the same time. When running multiple read-only commands like `ls` or `list_dir`, always run all of the commands in parallel. Err on the side of maximizing parallel tool calls rather than running too many tools sequentially.
</use_parallel_tool_calls>
"
)
} else {
SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
}
}
const SYSTEM_NON_NATIVE_TOOL_USE: &'static str =
"You are G3, a general-purpose AI agent. Your goal is to analyze and solve problems by writing code.
You have access to tools. When you need to accomplish a task, you MUST use the appropriate tool. Do not just describe what you would do - actually use the tools.
# Tool Call Format
When you need to execute a tool, write ONLY the JSON tool call on a new line:
{\"tool\": \"tool_name\", \"args\": {\"param\": \"value\"}
The tool will execute immediately and you'll receive the result (success or error) to continue with.
# Available Tools
Short description for providers without native calling specs:
- **shell**: Execute shell commands
- Format: {\"tool\": \"shell\", \"args\": {\"command\": \"your_command_here\"}
- Example: {\"tool\": \"shell\", \"args\": {\"command\": \"ls ~/Downloads\"}
- **read_file**: Read the contents of a file (supports partial reads via start/end)
- Format: {\"tool\": \"read_file\", \"args\": {\"file_path\": \"path/to/file\", \"start\": 0, \"end\": 100}
- Example: {\"tool\": \"read_file\", \"args\": {\"file_path\": \"src/main.rs\"}
- Example (partial): {\"tool\": \"read_file\", \"args\": {\"file_path\": \"large.log\", \"start\": 0, \"end\": 1000}
- **write_file**: Write content to a file (creates or overwrites)
- Format: {\"tool\": \"write_file\", \"args\": {\"file_path\": \"path/to/file\", \"content\": \"file content\"}
- Example: {\"tool\": \"write_file\", \"args\": {\"file_path\": \"src/lib.rs\", \"content\": \"pub fn hello() {}\"}
- **str_replace**: Replace text in a file using a diff
- Format: {\"tool\": \"str_replace\", \"args\": {\"file_path\": \"path/to/file\", \"diff\": \"--- old\\n-old text\\n+++ new\\n+new text\"}
- Example: {\"tool\": \"str_replace\", \"args\": {\"file_path\": \"src/main.rs\", \"diff\": \"--- old\\n-old_code();\\n+++ new\\n+new_code();\"}
- **final_output**: Signal task completion with a detailed summary of work done in markdown format
- Format: {\"tool\": \"final_output\", \"args\": {\"summary\": \"what_was_accomplished\"}
- **todo_read**: Read the entire TODO list from todo.g3.md file in workspace directory
- Format: {\"tool\": \"todo_read\", \"args\": {}}
- Example: {\"tool\": \"todo_read\", \"args\": {}}
- **todo_write**: Write or overwrite the entire todo.g3.md file (WARNING: overwrites completely, always read first)
- Format: {\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Task 1\\n- [ ] Task 2\"}}
- Example: {\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Implement feature\\n - [ ] Write tests\\n - [ ] Run tests\"}}
- **code_search**: Syntax-aware code search using tree-sitter. Supports Rust, Python, JavaScript, TypeScript.
- Format: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"label\", \"query\": \"tree-sitter query\", \"language\": \"rust|python|javascript|typescript\", \"paths\": [\"src/\"], \"context_lines\": 0}]}}
- Find functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"find_functions\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\", \"paths\": [\"src/\"]}]}}
- Find async functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"find_async\", \"query\": \"(function_item (function_modifiers) name: (identifier) @name)\", \"language\": \"rust\"}]}}
- Find structs: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"structs\", \"query\": \"(struct_item name: (type_identifier) @name)\", \"language\": \"rust\"}]}}
- Multiple searches: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\"}, {\"name\": \"structs\", \"query\": \"(struct_item name: (type_identifier) @name)\", \"language\": \"rust\"}]}}
- With context lines: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\", \"context_lines\": 3}]}}
- \"context\": 3 (show surrounding lines),
- \"json_style\": \"stream\" (for large results)
# Instructions
1. Analyze the request and break down into smaller tasks if appropriate
2. Execute ONE tool at a time. An exception exists for when you're writing files. See below.
3. STOP when the original request was satisfied
4. Call the final_output tool when done
For reading files, prioritize use of code_search tool use with multiple search requests per call instead of read_file, if it makes sense.
Exception to using ONE tool at a time:
If all youre doing is WRITING files, and you dont need to do anything else between each step.
You can issue MULTIPLE write_file tool calls in a request, however you may ONLY make a SINGLE write_file call for any file in that request.
For example you may call:
[START OF REQUEST]
write_file(\"helper.rs\", \"...\")
write_file(\"file2.txt\", \"...\")
[DONE]
But NOT:
[START OF REQUEST]
write_file(\"helper.rs\", \"...\")
write_file(\"file2.txt\", \"...\")
write_file(\"helper.rs\", \"...\")
[DONE]
# Task Management with TODO Tools
**REQUIRED for multi-step tasks.** Use TODO tools when your task involves ANY of:
- Multiple files to create/modify (2+)
- Multiple distinct steps (3+)
- Dependencies between steps
- Testing or verification needed
- Uncertainty about approach
## Workflow
Every multi-step task follows this pattern:
1. **Start**: Call todo_read, then todo_write to create your plan
2. **During**: Execute steps, then todo_read and todo_write to mark progress
3. **End**: Call todo_read to verify all items complete
Note: todo_write replaces the entire list, so always read first to preserve content.
IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
`{{Based on the requirements file with SHA256: <SHA>}}`
This ensures the TODO list is tracked against the specific version of requirements it was generated from.
## Examples
**Example 1: Feature Implementation**
User asks: \"Add user authentication with tests\"
First action:
{\"tool\": \"todo_read\", \"args\": {}}
Then create plan:
{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Add user authentication\\n - [ ] Create User struct\\n - [ ] Add login endpoint\\n - [ ] Add password hashing\\n - [ ] Write unit tests\\n - [ ] Write integration tests\"}}
After completing User struct:
{\"tool\": \"todo_read\", \"args\": {}}
{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Add user authentication\\n - [x] Create User struct\\n - [ ] Add login endpoint\\n - [ ] Add password hashing\\n - [ ] Write unit tests\\n - [ ] Write integration tests\"}}
**Example 2: Bug Fix**
User asks: \"Fix the memory leak in cache module\"
{\"tool\": \"todo_read\", \"args\": {}}
{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Fix memory leak\\n - [ ] Review cache.rs\\n - [ ] Check for unclosed resources\\n - [ ] Add drop implementation\\n - [ ] Write test to verify fix\"}}
**Example 3: Refactoring**
User asks: \"Refactor database layer to use async/await\"
{\"tool\": \"todo_read\", \"args\": {}}
{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Refactor to async\\n - [ ] Update function signatures\\n - [ ] Replace blocking calls\\n - [ ] Update all callers\\n - [ ] Update tests\"}}
## Format
Use markdown checkboxes:
- \"- [ ]\" for incomplete tasks
- \"- [x]\" for completed tasks
- Indent with 2 spaces for subtasks
Keep items short, specific, and action-oriented.
## Benefits
✓ Prevents missed steps
✓ Makes progress visible
✓ Helps recover from interruptions
✓ Creates better summaries
## When NOT to Use
Skip TODO tools for simple single-step tasks:
- \"List files\" → just use shell
- \"Read config.json\" → just use read_file
- \"Search for functions\" → just use code_search
If you can complete it with 1-2 tool calls, skip TODO.
# Response Guidelines
- Use Markdown formatting for all responses except tool calls.
- Whenever taking actions, use the pronoun 'I'
";
pub const SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE: &'static str =
concatcp!(SYSTEM_NON_NATIVE_TOOL_USE, CODING_STYLE);

View File

@@ -56,6 +56,13 @@ pub trait UiWriter: Send + Sync {
/// Returns true if this UI writer wants full, untruncated output
/// Default is false (truncate for human readability)
fn wants_full_output(&self) -> bool { false }
/// Prompt the user for a yes/no confirmation
fn prompt_user_yes_no(&self, message: &str) -> bool;
/// Prompt the user to choose from a list of options
/// Returns the index of the selected option
fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize;
}
/// A no-op implementation for when UI output is not needed
@@ -80,4 +87,6 @@ impl UiWriter for NullUiWriter {
fn notify_sse_received(&self) {}
fn flush(&self) {}
fn wants_full_output(&self) -> bool { false }
fn prompt_user_yes_no(&self, _message: &str) -> bool { true }
fn prompt_user_choice(&self, _message: &str, _options: &[&str]) -> usize { 0 }
}

View File

@@ -551,6 +551,7 @@ async fn test_cpp_search() {
}
#[tokio::test]
#[ignore]
async fn test_kotlin_search() {
let request = CodeSearchRequest {
searches: vec![SearchSpec {

View File

@@ -0,0 +1,193 @@
use g3_core::{Agent, ToolCall};
use g3_core::ui_writer::UiWriter;
use g3_config::Config;
use std::sync::{Arc, Mutex};
use tempfile::TempDir;
use serial_test::serial;
// Mock UI Writer for testing
#[derive(Clone)]
struct MockUiWriter {
output: Arc<Mutex<Vec<String>>>,
prompt_responses: Arc<Mutex<Vec<bool>>>,
choice_responses: Arc<Mutex<Vec<usize>>>,
}
impl MockUiWriter {
fn new() -> Self {
Self {
output: Arc::new(Mutex::new(Vec::new())),
prompt_responses: Arc::new(Mutex::new(Vec::new())),
choice_responses: Arc::new(Mutex::new(Vec::new())),
}
}
fn set_prompt_response(&self, response: bool) {
self.prompt_responses.lock().unwrap().push(response);
}
fn set_choice_response(&self, response: usize) {
self.choice_responses.lock().unwrap().push(response);
}
fn get_output(&self) -> Vec<String> {
self.output.lock().unwrap().clone()
}
}
impl UiWriter for MockUiWriter {
fn print(&self, message: &str) {
self.output.lock().unwrap().push(message.to_string());
}
fn println(&self, message: &str) {
self.output.lock().unwrap().push(message.to_string());
}
fn print_inline(&self, message: &str) {
self.output.lock().unwrap().push(message.to_string());
}
fn print_system_prompt(&self, _prompt: &str) {}
fn print_context_status(&self, message: &str) {
self.output.lock().unwrap().push(format!("STATUS: {}", message));
}
fn print_context_thinning(&self, _message: &str) {}
fn print_tool_header(&self, _tool_name: &str) {}
fn print_tool_arg(&self, _key: &str, _value: &str) {}
fn print_tool_output_header(&self) {}
fn update_tool_output_line(&self, _line: &str) {}
fn print_tool_output_line(&self, _line: &str) {}
fn print_tool_output_summary(&self, _hidden_count: usize) {}
fn print_tool_timing(&self, _duration_str: &str) {}
fn print_agent_prompt(&self) {}
fn print_agent_response(&self, _content: &str) {}
fn notify_sse_received(&self) {}
fn flush(&self) {}
fn wants_full_output(&self) -> bool { false }
fn prompt_user_yes_no(&self, message: &str) -> bool {
self.output.lock().unwrap().push(format!("PROMPT: {}", message));
self.prompt_responses.lock().unwrap().pop().unwrap_or(true)
}
fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
self.output.lock().unwrap().push(format!("CHOICE: {} Options: {:?}", message, options));
self.choice_responses.lock().unwrap().pop().unwrap_or(0)
}
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_matching_sha() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha = "abc123hash";
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = true;
let ui_writer = MockUiWriter::new();
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("📝 TODO list:"));
assert!(!result.contains("⚠️ TODO list is stale"));
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_mismatch_sha_ignore() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha_file = "old_sha";
let sha_req = "new_sha";
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = true;
let ui_writer = MockUiWriter::new();
ui_writer.set_choice_response(0); // Ignore
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha_req.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("📝 TODO list:"));
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_mismatch_sha_mark_stale() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha_file = "old_sha";
let sha_req = "new_sha";
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = true;
let ui_writer = MockUiWriter::new();
ui_writer.set_choice_response(1); // Mark as Stale
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha_req.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("⚠️ TODO list is stale"));
assert!(result.contains("Please regenerate"));
}
// Note: We cannot easily test "Quit" (index 2) because it calls std::process::exit(0)
// which would kill the test runner. We skip that test case here.
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_disabled() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha_file = "old_sha";
let sha_req = "new_sha";
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = false;
let ui_writer = MockUiWriter::new();
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha_req.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("📝 TODO list:"));
}

View File

@@ -0,0 +1,13 @@
use g3_execution::ensure_coverage_tools_installed;
fn main() -> anyhow::Result<()> {
// Ensure coverage tools are installed
let already_installed = ensure_coverage_tools_installed()?;
if already_installed {
println!("All coverage tools are already installed!");
} else {
println!("Coverage tools have been installed successfully!");
}
Ok(())
}

View File

@@ -5,6 +5,17 @@ use tempfile::NamedTempFile;
use std::io::Write;
use tracing::{info, debug, error};
/// Expand tilde (~) in a path to the user's home directory
fn expand_tilde(path: &str) -> String {
if path.starts_with("~") {
if let Some(home) = std::env::var_os("HOME") {
let home_str = home.to_string_lossy();
return path.replacen("~", &home_str, 1);
}
}
path.to_string()
}
pub struct CodeExecutor {
// Future: add configuration for execution limits, sandboxing, etc.
}
@@ -241,11 +252,33 @@ impl CodeExecutor {
&self,
code: &str,
receiver: &R
) -> Result<ExecutionResult> {
self.execute_bash_streaming_in_dir(code, receiver, None).await
}
/// Execute bash command with streaming output in a specific directory
pub async fn execute_bash_streaming_in_dir<R: OutputReceiver>(
&self,
code: &str,
receiver: &R,
working_dir: Option<&str>,
) -> Result<ExecutionResult> {
use std::process::Stdio;
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command as TokioCommand;
// CRITICAL DEBUG: Print to stderr so it's always visible
debug!("========== execute_bash_streaming_in_dir START ==========");
debug!("Code to execute: {}", code);
debug!("Working directory parameter: {:?}", working_dir);
debug!("FULL DIAGNOSTIC: code='{}', working_dir={:?}", code, working_dir);
if let Some(dir) = working_dir {
debug!("Working dir exists check: {}", std::path::Path::new(dir).exists());
debug!("Working dir is_dir check: {}", std::path::Path::new(dir).is_dir());
}
debug!("Current process working directory: {:?}", std::env::current_dir());
// Check if this is a detached/daemon command that should run independently
// Look for patterns like: setsid, nohup with &, or explicit backgrounding with disown
let is_detached = code.trim_start().starts_with("setsid ")
@@ -255,10 +288,17 @@ impl CodeExecutor {
if is_detached {
// For detached commands, just spawn and return immediately
TokioCommand::new("bash")
.arg("-c")
.arg(code)
.spawn()?;
let mut cmd = TokioCommand::new("bash");
cmd.arg("-c")
.arg(code);
// Set working directory if provided
if let Some(dir) = working_dir {
let expanded_dir = expand_tilde(dir);
cmd.current_dir(&expanded_dir);
}
cmd.spawn()?;
// Don't wait for the process - it's meant to run independently
return Ok(ExecutionResult {
@@ -269,12 +309,33 @@ impl CodeExecutor {
});
}
let mut child = TokioCommand::new("bash")
.arg("-c")
let mut cmd = TokioCommand::new("bash");
cmd.arg("-c")
.arg(code)
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?;
.stderr(Stdio::piped());
// Set working directory if provided
if let Some(dir) = working_dir {
debug!("Setting current_dir on command to: {}", dir);
let expanded_dir = expand_tilde(dir);
debug!("Expanded working dir: {}", expanded_dir);
debug!("Expanded dir exists: {}", std::path::Path::new(&expanded_dir).exists());
debug!("Expanded dir is_dir: {}", std::path::Path::new(&expanded_dir).is_dir());
cmd.current_dir(&expanded_dir);
}
debug!("About to spawn command...");
let spawn_result = cmd.spawn();
debug!("Spawn result: {:?}", spawn_result.is_ok());
let mut child = match spawn_result {
Ok(c) => c,
Err(e) => {
debug!("SPAWN ERROR: {:?}", e);
return Err(e.into());
}
};
debug!("Command spawned successfully");
let stdout = child.stdout.take().unwrap();
let stderr = child.stderr.take().unwrap();
@@ -322,11 +383,106 @@ impl CodeExecutor {
let status = child.wait().await?;
Ok(ExecutionResult {
let result = ExecutionResult {
stdout: stdout_output.join("\n"),
stderr: stderr_output.join("\n"),
exit_code: status.code().unwrap_or(-1),
success: status.success(),
})
};
debug!("========== execute_bash_streaming_in_dir END ==========");
debug!("Exit code: {}", result.exit_code);
debug!("Success: {}", result.success);
debug!("Stdout length: {}", result.stdout.len());
debug!("Stderr length: {}", result.stderr.len());
if !result.stderr.is_empty() {
debug!("Stderr content: {}", result.stderr);
}
Ok(result)
}
}
/// Check if rustup component llvm-tools-preview is installed
pub fn is_llvm_tools_installed() -> Result<bool> {
let output = Command::new("rustup")
.args(&["component", "list", "--installed"])
.output()?;
let installed = String::from_utf8_lossy(&output.stdout)
.lines()
.any(|line| line.trim() == "llvm-tools-preview" || line.starts_with("llvm-tools"));
Ok(installed)
}
/// Check if cargo-llvm-cov is installed
pub fn is_cargo_llvm_cov_installed() -> Result<bool> {
let output = Command::new("cargo")
.args(&["--list"])
.output()?;
let installed = String::from_utf8_lossy(&output.stdout)
.lines()
.any(|line| line.trim().starts_with("llvm-cov"));
Ok(installed)
}
/// Install llvm-tools-preview via rustup
pub fn install_llvm_tools() -> Result<()> {
info!("Installing llvm-tools-preview...");
let output = Command::new("rustup")
.args(&["component", "add", "llvm-tools-preview"])
.output()?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("Failed to install llvm-tools-preview: {}", stderr);
}
info!("✅ llvm-tools-preview installed successfully");
Ok(())
}
/// Install cargo-llvm-cov via cargo install
pub fn install_cargo_llvm_cov() -> Result<()> {
info!("Installing cargo-llvm-cov... (this may take a few minutes)");
let output = Command::new("cargo")
.args(&["install", "cargo-llvm-cov"])
.output()?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("Failed to install cargo-llvm-cov: {}", stderr);
}
info!("✅ cargo-llvm-cov installed successfully");
Ok(())
}
/// Ensure both llvm-tools-preview and cargo-llvm-cov are installed
/// Returns Ok(true) if tools were already installed, Ok(false) if they were installed by this function
pub fn ensure_coverage_tools_installed() -> Result<bool> {
let mut already_installed = true;
// Check and install llvm-tools-preview
if !is_llvm_tools_installed()? {
info!("llvm-tools-preview not found, installing...");
install_llvm_tools()?;
already_installed = false;
} else {
info!("✅ llvm-tools-preview is already installed");
}
// Check and install cargo-llvm-cov
if !is_cargo_llvm_cov_installed()? {
info!("cargo-llvm-cov not found, installing...");
install_cargo_llvm_cov()?;
already_installed = false;
} else {
info!("✅ cargo-llvm-cov is already installed");
}
Ok(already_installed)
}

View File

@@ -0,0 +1,14 @@
[package]
name = "g3-planner"
version = "0.1.0"
edition = "2021"
description = "Fast-discovery planner for G3 AI coding agent"
[dependencies]
g3-providers = { path = "../g3-providers" }
serde = { workspace = true }
serde_json = { workspace = true }
const_format = "0.2"
anyhow = { workspace = true }
tokio = { workspace = true }
chrono = { version = "0.4", features = ["serde"] }

View File

@@ -0,0 +1,724 @@
//! Code exploration module for analyzing codebases
//!
//! This module provides functions to explore and analyze codebases
//! for various programming languages, returning structured reports
//! about the code structure.
use std::path::Path;
use std::process::Command;
/// Main entry point for exploring a codebase at the given path.
/// Detects which languages are present and generates a comprehensive report.
pub fn explore_codebase(path: &str) -> String {
let path = expand_tilde(path);
let mut report = String::new();
let mut languages_found = Vec::new();
// Check for each language and add to report if found
if has_rust_files(&path) {
languages_found.push("Rust".to_string());
report.push_str(&explore_rust(&path));
}
if has_java_files(&path) {
languages_found.push("Java".to_string());
report.push_str(&explore_java(&path));
}
if has_kotlin_files(&path) {
languages_found.push("Kotlin".to_string());
report.push_str(&explore_kotlin(&path));
}
if has_swift_files(&path) {
languages_found.push("Swift".to_string());
report.push_str(&explore_swift(&path));
}
if has_go_files(&path) {
languages_found.push("Go".to_string());
report.push_str(&explore_go(&path));
}
if has_python_files(&path) {
languages_found.push("Python".to_string());
report.push_str(&explore_python(&path));
}
if has_typescript_files(&path) {
languages_found.push("TypeScript".to_string());
report.push_str(&explore_typescript(&path));
}
if has_javascript_files(&path) {
languages_found.push("JavaScript".to_string());
report.push_str(&explore_javascript(&path));
}
if has_cpp_files(&path) {
languages_found.push("C/C++".to_string());
report.push_str(&explore_cpp(&path));
}
if has_markdown_files(&path) {
languages_found.push("Markdown".to_string());
report.push_str(&explore_markdown(&path));
}
if has_yaml_files(&path) {
languages_found.push("YAML".to_string());
report.push_str(&explore_yaml(&path));
}
if has_sql_files(&path) {
languages_found.push("SQL".to_string());
report.push_str(&explore_sql(&path));
}
if has_ruby_files(&path) {
languages_found.push("Ruby".to_string());
report.push_str(&explore_ruby(&path));
}
if languages_found.is_empty() {
report.push_str("No recognized programming languages found in the codebase.\n");
} else {
let header = format!(
"=== CODEBASE ANALYSIS ===\nLanguages detected: {}\n\n",
languages_found.join(", ")
);
report = header + &report;
}
report
}
/// Expand tilde to home directory
fn expand_tilde(path: &str) -> String {
if path.starts_with("~/") {
if let Some(home) = std::env::var_os("HOME") {
return path.replacen("~", &home.to_string_lossy(), 1);
}
}
path.to_string()
}
/// Run a shell command and return its output
fn run_command(cmd: &str, working_dir: &str) -> String {
let output = Command::new("sh")
.arg("-c")
.arg(cmd)
.current_dir(working_dir)
.output();
match output {
Ok(out) => {
let stdout = String::from_utf8_lossy(&out.stdout);
let stderr = String::from_utf8_lossy(&out.stderr);
if !stdout.is_empty() {
stdout.to_string()
} else if !stderr.is_empty() {
format!("(stderr): {}", stderr)
} else {
String::new()
}
}
Err(e) => format!("Error running command: {}", e),
}
}
/// Check if files with given extension exist
fn has_files_with_extension(path: &str, extension: &str) -> bool {
let cmd = format!(
"find . -name '.git' -prune -o -type f -name '*.{}' -print | head -1",
extension
);
!run_command(&cmd, path).trim().is_empty()
}
// Language detection functions
fn has_rust_files(path: &str) -> bool {
has_files_with_extension(path, "rs") || Path::new(path).join("Cargo.toml").exists()
}
fn has_java_files(path: &str) -> bool {
has_files_with_extension(path, "java")
}
fn has_kotlin_files(path: &str) -> bool {
has_files_with_extension(path, "kt") || has_files_with_extension(path, "kts")
}
fn has_swift_files(path: &str) -> bool {
has_files_with_extension(path, "swift")
}
fn has_go_files(path: &str) -> bool {
has_files_with_extension(path, "go")
}
fn has_python_files(path: &str) -> bool {
has_files_with_extension(path, "py")
}
fn has_typescript_files(path: &str) -> bool {
has_files_with_extension(path, "ts") || has_files_with_extension(path, "tsx")
}
fn has_javascript_files(path: &str) -> bool {
has_files_with_extension(path, "js") || has_files_with_extension(path, "jsx")
}
fn has_cpp_files(path: &str) -> bool {
has_files_with_extension(path, "cpp")
|| has_files_with_extension(path, "cc")
|| has_files_with_extension(path, "c")
|| has_files_with_extension(path, "h")
|| has_files_with_extension(path, "hpp")
}
fn has_markdown_files(path: &str) -> bool {
has_files_with_extension(path, "md")
}
fn has_yaml_files(path: &str) -> bool {
has_files_with_extension(path, "yaml") || has_files_with_extension(path, "yml")
}
fn has_sql_files(path: &str) -> bool {
has_files_with_extension(path, "sql")
}
fn has_ruby_files(path: &str) -> bool {
has_files_with_extension(path, "rb")
}
/// Explore Rust codebase
pub fn explore_rust(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== RUST ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.rs' . 2>/dev/null | grep -v '/target/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Dependencies (Cargo.toml)
report.push_str("--- Dependencies (Cargo.toml) ---\n");
let cargo = run_command("cat Cargo.toml 2>/dev/null | head -50", path);
report.push_str(&cargo);
report.push('\n');
// Data structures
report.push_str("--- Data Structures (Structs, Enums, Types) ---\n");
let structs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^(pub )?(struct|enum|type|union) ' . 2>/dev/null | grep -v '/target/' | head -100"#,
path,
);
report.push_str(&structs);
report.push('\n');
// Traits and implementations
report.push_str("--- Traits & Implementations ---\n");
let traits = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^(pub )?trait |^impl ' . 2>/dev/null | grep -v '/target/' | head -100"#,
path,
);
report.push_str(&traits);
report.push('\n');
// Public functions
report.push_str("--- Public Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^pub (async )?fn ' . 2>/dev/null | grep -v '/target/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore Java codebase
pub fn explore_java(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== JAVA ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.java' . 2>/dev/null | grep -v '/build/' | grep -v '/target/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Build files
report.push_str("--- Build Configuration ---\n");
let build = run_command(
"cat pom.xml 2>/dev/null | head -50 || cat build.gradle 2>/dev/null | head -50",
path,
);
report.push_str(&build);
report.push('\n');
// Classes and interfaces
report.push_str("--- Classes & Interfaces ---\n");
let classes = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.java' '^(public |private |protected )?(abstract )?(class|interface|enum|record) ' . 2>/dev/null | grep -v '/build/' | head -100"#,
path,
);
report.push_str(&classes);
report.push('\n');
// Public methods
report.push_str("--- Public Methods ---\n");
let methods = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.java' '^\s+public .+\(' . 2>/dev/null | grep -v '/build/' | head -100"#,
path,
);
report.push_str(&methods);
report.push('\n');
report
}
/// Explore Kotlin codebase
pub fn explore_kotlin(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== KOTLIN ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.kt' -g '*.kts' . 2>/dev/null | grep -v '/build/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Build files
report.push_str("--- Build Configuration ---\n");
let build = run_command("cat build.gradle.kts 2>/dev/null | head -50 || cat build.gradle 2>/dev/null | head -50", path);
report.push_str(&build);
report.push('\n');
// Classes, objects, interfaces
report.push_str("--- Classes, Objects & Interfaces ---\n");
let classes = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.kt' '^(data |sealed |open |abstract )?(class|interface|object|enum class) ' . 2>/dev/null | grep -v '/build/' | head -100"#,
path,
);
report.push_str(&classes);
report.push('\n');
// Functions
report.push_str("--- Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.kt' '^(suspend |private |internal |public )?fun ' . 2>/dev/null | grep -v '/build/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore Swift codebase
pub fn explore_swift(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== SWIFT ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.swift' . 2>/dev/null | grep -v '/.build/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Package.swift
report.push_str("--- Package Configuration ---\n");
let pkg = run_command("cat Package.swift 2>/dev/null | head -50", path);
report.push_str(&pkg);
report.push('\n');
// Classes, structs, protocols
report.push_str("--- Types (Classes, Structs, Protocols, Enums) ---\n");
let types = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.swift' '^(public |private |internal |open |final )?(class|struct|protocol|enum|actor) ' . 2>/dev/null | grep -v '/.build/' | head -100"#,
path,
);
report.push_str(&types);
report.push('\n');
// Functions
report.push_str("--- Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.swift' '^\s*(public |private |internal |open )?func ' . 2>/dev/null | grep -v '/.build/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore Go codebase
pub fn explore_go(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== GO ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.go' . 2>/dev/null | grep -v '/vendor/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// go.mod
report.push_str("--- Module Configuration ---\n");
let gomod = run_command("cat go.mod 2>/dev/null | head -50", path);
report.push_str(&gomod);
report.push('\n');
// Types (structs, interfaces)
report.push_str("--- Types (Structs & Interfaces) ---\n");
let types = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.go' '^type .+ (struct|interface)' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
path,
);
report.push_str(&types);
report.push('\n');
// Functions
report.push_str("--- Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.go' '^func ' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore Python codebase
pub fn explore_python(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== PYTHON ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.py' . 2>/dev/null | grep -v '/__pycache__/' | grep -v '/venv/' | grep -v '/.venv/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Requirements/setup
report.push_str("--- Dependencies ---\n");
let deps = run_command(
"cat requirements.txt 2>/dev/null | head -30 || cat pyproject.toml 2>/dev/null | head -50 || cat setup.py 2>/dev/null | head -30",
path,
);
report.push_str(&deps);
report.push('\n');
// Classes
report.push_str("--- Classes ---\n");
let classes = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.py' '^class ' . 2>/dev/null | grep -v '/__pycache__/' | grep -v '/venv/' | head -100"#,
path,
);
report.push_str(&classes);
report.push('\n');
// Functions
report.push_str("--- Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.py' '^def |^async def ' . 2>/dev/null | grep -v '/__pycache__/' | grep -v '/venv/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore TypeScript codebase
pub fn explore_typescript(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== TYPESCRIPT ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.ts' -g '*.tsx' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/dist/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// package.json
report.push_str("--- Package Configuration ---\n");
let pkg = run_command("cat package.json 2>/dev/null | head -50", path);
report.push_str(&pkg);
report.push('\n');
// Types, interfaces, classes
report.push_str("--- Types, Interfaces & Classes ---\n");
let types = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.ts' -g '*.tsx' '^export (type|interface|class|enum|abstract class) ' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
path,
);
report.push_str(&types);
report.push('\n');
// Functions
report.push_str("--- Exported Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.ts' -g '*.tsx' '^export (async )?function |^export const .+ = (async )?\(' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore JavaScript codebase
pub fn explore_javascript(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== JAVASCRIPT ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.js' -g '*.jsx' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/dist/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// package.json
report.push_str("--- Package Configuration ---\n");
let pkg = run_command("cat package.json 2>/dev/null | head -50", path);
report.push_str(&pkg);
report.push('\n');
// Classes
report.push_str("--- Classes ---\n");
let classes = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.js' -g '*.jsx' '^(export )?(default )?(class ) ' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
path,
);
report.push_str(&classes);
report.push('\n');
// Functions
report.push_str("--- Exported Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.js' -g '*.jsx' '^(export )?(async )?function |^module\.exports' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore C/C++ codebase
pub fn explore_cpp(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== C/C++ ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.c' -g '*.cpp' -g '*.cc' -g '*.h' -g '*.hpp' . 2>/dev/null | grep -v '/build/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Build files
report.push_str("--- Build Configuration ---\n");
let build = run_command(
"cat CMakeLists.txt 2>/dev/null | head -50 || cat Makefile 2>/dev/null | head -50",
path,
);
report.push_str(&build);
report.push('\n');
// Classes and structs
report.push_str("--- Classes & Structs ---\n");
let classes = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.cpp' -g '*.cc' -g '*.h' -g '*.hpp' '^(class|struct|enum|union|typedef) ' . 2>/dev/null | grep -v '/build/' | head -100"#,
path,
);
report.push_str(&classes);
report.push('\n');
// Functions (simplified pattern)
report.push_str("--- Function Declarations ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.h' -g '*.hpp' '^[a-zA-Z_][a-zA-Z0-9_<>: ]*\s+[a-zA-Z_][a-zA-Z0-9_]*\s*\(' . 2>/dev/null | grep -v '/build/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore Markdown documentation
pub fn explore_markdown(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== MARKDOWN DOCUMENTATION ===\n\n");
// File structure
report.push_str("--- Documentation Files ---\n");
let files = run_command(
"rg --files -g '*.md' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/vendor/' | sort | head -50",
path,
);
report.push_str(&files);
report.push('\n');
// README content
report.push_str("--- README Overview ---\n");
let readme = run_command(
"cat README.md 2>/dev/null | head -100 || cat readme.md 2>/dev/null | head -100",
path,
);
report.push_str(&readme);
report.push('\n');
// Headers from all markdown files
report.push_str("--- Document Headers ---\n");
let headers = run_command(
r#"rg --no-heading --line-number --with-filename -g '*.md' '^#{1,3} ' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
path,
);
report.push_str(&headers);
report.push('\n');
report
}
/// Explore YAML configuration files
pub fn explore_yaml(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== YAML CONFIGURATION ===\n\n");
// File structure
report.push_str("--- YAML Files ---\n");
let files = run_command(
"rg --files -g '*.yaml' -g '*.yml' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/vendor/' | sort | head -50",
path,
);
report.push_str(&files);
report.push('\n');
// Top-level keys from YAML files
report.push_str("--- Top-Level Keys ---\n");
let keys = run_command(
r#"rg --no-heading --line-number --with-filename -g '*.yaml' -g '*.yml' '^[a-zA-Z_][a-zA-Z0-9_-]*:' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
path,
);
report.push_str(&keys);
report.push('\n');
report
}
/// Explore SQL files
pub fn explore_sql(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== SQL ===\n\n");
// File structure
report.push_str("--- SQL Files ---\n");
let files = run_command(
"rg --files -g '*.sql' . 2>/dev/null | sort | head -50",
path,
);
report.push_str(&files);
report.push('\n');
// Tables
report.push_str("--- Table Definitions ---\n");
let tables = run_command(
r#"rg --no-heading --line-number --with-filename -i -g '*.sql' 'CREATE TABLE' . 2>/dev/null | head -100"#,
path,
);
report.push_str(&tables);
report.push('\n');
// Views and procedures
report.push_str("--- Views & Procedures ---\n");
let views = run_command(
r#"rg --no-heading --line-number --with-filename -i -g '*.sql' 'CREATE (VIEW|PROCEDURE|FUNCTION)' . 2>/dev/null | head -100"#,
path,
);
report.push_str(&views);
report.push('\n');
report
}
/// Explore Ruby codebase
pub fn explore_ruby(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== RUBY ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.rb' . 2>/dev/null | grep -v '/vendor/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Gemfile
report.push_str("--- Dependencies (Gemfile) ---\n");
let gemfile = run_command("cat Gemfile 2>/dev/null | head -50", path);
report.push_str(&gemfile);
report.push('\n');
// Classes and modules
report.push_str("--- Classes & Modules ---\n");
let classes = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rb' '^(class|module) ' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
path,
);
report.push_str(&classes);
report.push('\n');
// Methods
report.push_str("--- Methods ---\n");
let methods = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rb' '^\s*def ' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
path,
);
report.push_str(&methods);
report.push('\n');
report
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_expand_tilde() {
let path = expand_tilde("~/test");
assert!(!path.starts_with("~"));
}
#[test]
fn test_explore_codebase_returns_string() {
// Test with current directory
let result = explore_codebase(".");
assert!(!result.is_empty());
}
}

View File

@@ -0,0 +1,325 @@
//! g3-planner: Fast-discovery planner for G3 AI coding agent
//!
//! This crate provides functionality to generate initial discovery tool calls
//! that are injected into the conversation before the first LLM turn.
mod code_explore;
pub mod prompts;
pub use code_explore::explore_codebase;
use anyhow::Result;
use g3_providers::{CompletionRequest, LLMProvider, Message, MessageRole};
use chrono::Local;
use std::fs::{self, OpenOptions};
use std::io::Write;
use prompts::{DISCOVERY_REQUIREMENTS_PROMPT, DISCOVERY_SYSTEM_PROMPT};
/// Type alias for a status callback function
pub type StatusCallback = Box<dyn Fn(&str) + Send + Sync>;
/// Generates initial discovery messages for fast codebase exploration.
///
/// This function:
/// 1. Runs explore_codebase to get a codebase report
/// 2. Sends the report to the LLM with DISCOVERY_SYSTEM_PROMPT
/// 3. Extracts shell commands from the LLM response
/// 4. Returns Assistant messages with tool calls for each command
///
/// # Arguments
///
/// * `codebase_path` - The path to the codebase to explore
/// * `provider` - An LLM provider to query for exploration commands
/// * `requirements_text` - Optional requirements text to include in the discovery prompt
/// * `status_callback` - Optional callback for status updates
///
/// # Returns
///
/// A `Result<Vec<Message>>` containing Assistant messages with JSON tool call strings.
pub async fn get_initial_discovery_messages(
codebase_path: &str,
requirements_text: Option<&str>,
provider: &dyn LLMProvider,
status_callback: Option<&StatusCallback>,
) -> Result<Vec<Message>> {
// Helper to call status callback if provided
let status = |msg: &str| {
if let Some(cb) = status_callback {
cb(msg);
}
};
status("🔍 Starting code discovery...");
// Step 1: Run explore_codebase to get the codebase report
let codebase_report = explore_codebase(codebase_path);
// Write the codebase report to logs directory
write_code_report(&codebase_report)?;
// Step 2: Build the prompt with the codebase report appended
let user_prompt = if let Some(requirements) = requirements_text {
format!(
"{}\n\n
=== REQUIREMENTS ===\n\n{}\n\n
=== CODEBASE REPORT ===\n\n{}",
DISCOVERY_REQUIREMENTS_PROMPT, requirements, codebase_report
)
} else {
format!(
"{}\n\n=== CODEBASE REPORT ===\n\n{}",
DISCOVERY_REQUIREMENTS_PROMPT, codebase_report
)
};
// Step 3: Create messages for the LLM
let messages = vec![
Message::new(MessageRole::System, DISCOVERY_SYSTEM_PROMPT.to_string()),
Message::new(MessageRole::User, user_prompt),
];
// Step 4: Send to LLM
let request = CompletionRequest {
messages,
max_tokens: Some(provider.max_tokens()),
temperature: Some(provider.temperature()),
stream: false,
tools: None,
};
status("🤖 Calling LLM for discovery commands...");
let response = provider.complete(request).await?;
// Step 5: Extract shell commands from the response
let shell_commands = extract_shell_commands(&response.content);
status(&format!("📋 Extracted {} discovery commands", shell_commands.len()));
// Write the discovery commands to logs directory
write_discovery_commands(&shell_commands)?;
// Step 6: Format as tool messages
let tool_messages = shell_commands
.into_iter()
.map(|cmd| create_tool_message("shell", &cmd))
.collect();
Ok(tool_messages)
}
/// Creates an Assistant message with a tool call in g3's JSON format.
pub fn create_tool_message(tool: &str, command: &str) -> Message {
let tool_call = serde_json::json!({
"tool": tool,
"args": {
"command": command
}
});
Message::new(MessageRole::Assistant, tool_call.to_string())
}
/// Extract shell commands from the LLM response.
/// Looks for {{CODE EXPLORATION COMMANDS}} section and extracts commands from code blocks.
pub fn extract_shell_commands(response: &str) -> Vec<String> {
let mut commands = Vec::new();
let section_marker = "{{CODE EXPLORATION COMMANDS}}";
let section_start = match response.find(section_marker) {
Some(pos) => pos + section_marker.len(),
None => return commands,
};
let section_content = &response[section_start..];
let mut in_code_block = false;
let mut current_block = String::new();
for line in section_content.lines() {
let trimmed = line.trim();
if trimmed.starts_with("```") {
if in_code_block {
// End of code block - extract commands
for cmd_line in current_block.lines() {
let cmd = cmd_line.trim();
if !cmd.is_empty() && !cmd.starts_with('#') {
commands.push(cmd.to_string());
}
}
current_block.clear();
}
in_code_block = !in_code_block;
} else if in_code_block {
current_block.push_str(line);
current_block.push('\n');
}
}
commands
}
/// Extract the summary section from the LLM response
pub fn extract_summary(response: &str) -> Option<String> {
let section_marker = "{{SUMMARY BASED ON INITIAL INFO}}";
let section_start = match response.find(section_marker) {
Some(pos) => pos + section_marker.len(),
None => return None,
};
let section_content = &response[section_start..];
let section_end = section_content.find("{{").unwrap_or(section_content.len());
let summary = section_content[..section_end].trim().to_string();
if summary.is_empty() {
None
} else {
Some(summary)
}
}
/// Write the codebase report to logs directory
fn write_code_report(report: &str) -> Result<()> {
// Ensure logs directory exists
fs::create_dir_all("logs")?;
// Generate timestamp in same format as tool_calls log
let timestamp = Local::now().format("%Y%m%d_%H%M%S").to_string();
let filename = format!("logs/code_report_{}.log", timestamp);
// Write the report to file
let mut file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(&filename)?;
file.write_all(report.as_bytes())?;
file.flush()?;
Ok(())
}
/// Write the discovery commands to logs directory
fn write_discovery_commands(commands: &[String]) -> Result<()> {
// Ensure logs directory exists
fs::create_dir_all("logs")?;
// Generate timestamp in same format as tool_calls log
let timestamp = Local::now().format("%Y%m%d_%H%M%S").to_string();
let filename = format!("logs/discovery_commands_{}.log", timestamp);
// Write the commands to file
let mut file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(&filename)?;
// Write header
file.write_all(b"# Discovery Commands\n")?;
file.write_all(b"# Generated by g3-planner\n\n")?;
// Write each command on a separate line
for cmd in commands {
file.write_all(cmd.as_bytes())?;
file.write_all(b"\n")?;
}
file.flush()?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_create_tool_message_format() {
let msg = create_tool_message("shell", "ls -la");
assert!(matches!(msg.role, MessageRole::Assistant));
let parsed: serde_json::Value = serde_json::from_str(&msg.content).unwrap();
assert_eq!(parsed["tool"], "shell");
assert_eq!(parsed["args"]["command"], "ls -la");
}
#[test]
fn test_extract_shell_commands_basic() {
let response = r#"
Some text here.
{{CODE EXPLORATION COMMANDS}}
```bash
ls -la
cat README.md
rg --files -g '*.rs'
```
More text.
"#;
let commands = extract_shell_commands(response);
assert_eq!(commands.len(), 3);
assert_eq!(commands[0], "ls -la");
assert_eq!(commands[1], "cat README.md");
assert_eq!(commands[2], "rg --files -g '*.rs'");
}
#[test]
fn test_extract_shell_commands_with_comments() {
let response = r#"
{{CODE EXPLORATION COMMANDS}}
```
# This is a comment
ls -la
# Another comment
cat file.txt
```
"#;
let commands = extract_shell_commands(response);
assert_eq!(commands.len(), 2);
assert_eq!(commands[0], "ls -la");
assert_eq!(commands[1], "cat file.txt");
}
#[test]
fn test_extract_shell_commands_no_section() {
let response = "Some response without the expected section.";
let commands = extract_shell_commands(response);
assert!(commands.is_empty());
}
#[test]
fn test_extract_summary() {
let response = r#"
{{SUMMARY BASED ON INITIAL INFO}}
This is a summary of the codebase.
It has multiple lines.
{{CODE EXPLORATION COMMANDS}}
```
ls -la
```
"#;
let summary = extract_summary(response);
assert!(summary.is_some());
let summary_text = summary.unwrap();
assert!(summary_text.contains("This is a summary"));
assert!(summary_text.contains("multiple lines"));
}
#[test]
fn test_extract_summary_no_section() {
let response = "Response without summary section.";
let summary = extract_summary(response);
assert!(summary.is_none());
}
}

View File

@@ -0,0 +1,37 @@
//! Prompts used for discovery phase
/// System prompt for discovery mode - instructs the LLM to analyze codebase and generate exploration commands
pub const DISCOVERY_SYSTEM_PROMPT: &str = r#"You are an expert code analyst. Your task is to analyze a codebase structure and generate shell commands to explore it further.
You will receive:
1. User requirements describing what needs to be implemented
2. A codebase report showing the structure and key elements of the codebase
Your job is to:
1. Understand the requirements and identify what parts of the codebase are relevant
2. Generate shell commands to explore those parts in more detail
IMPORTANT: Do NOT attempt to implement anything. Only generate exploration commands."#;
/// Discovery prompt template - used when we have a codebase report.
/// The codebase report should be appended after this prompt.
pub const DISCOVERY_REQUIREMENTS_PROMPT: &str = r#"**CRITICAL**: DO ABSOLUTELY NOT ATTEMPT TO IMPLEMENT THESE REQUIREMENTS AT THIS POINT. ONLY USE THEM TO
UNDERSTAND WHICH PARTS OF THE CODE YOU MIGHT BE INTERESTED IN, AND WHAT SEARCH/GREP EXPRESSIONS YOU MIGHT WANT TO USE
TO GET A BETTER UNDERSTANDING OF THE CODEBASE.
Your task is to analyze the codebase overview provided below and generate shell commands to explore it further - in particular, those
you deem most relevant to the requirements given below.
Your output MUST include:
1. A summary report. Use the heading {{SUMMARY BASED ON INITIAL INFO}}.
- retain as much information of that as you consider relevant to the requirements, and for making an implementation plan.
- Ideally that should not be more than 10000 tokens.
2. A list of shell commands to explore the code. Use the heading {{CODE EXPLORATION COMMANDS}}.
- Try plan ahead for what you need for a deep dive into the code. Make sure the information is sparing.
- Carefully consider which commands give you the most relevant information, pick the top 25 commands.
- Use tools like `ls`, `rg` (ripgrep), `grep`, `sed`, `cat`, `head`, `tail` etc.
- Focus on commands that will help understand the code STRUCTURE without dumping large sections of file.
- e.g. for Rust you might try `rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^(pub )?(struct|enum|type|union)`
- Mark the beginning and end of the commands with "```".
DO NOT ADD ANY COMMENTS OR OTHER EXPLANATION IN THE COMMANDS SECTION, JUST INCLUDE THE SHELL COMMANDS."#;

View File

@@ -0,0 +1,60 @@
//! Integration tests for logging functionality
use std::fs;
use std::path::Path;
#[test]
fn test_log_files_created() {
// This test verifies that the logging functions work correctly
// by checking that files can be created in the logs directory
// Clean up any existing test logs
let _ = fs::remove_dir_all("logs");
// Create logs directory
fs::create_dir_all("logs").expect("Failed to create logs directory");
// Verify directory exists
assert!(Path::new("logs").exists());
assert!(Path::new("logs").is_dir());
// Test writing a code report
let test_report = "Test codebase report\nLine 2\nLine 3";
let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string();
let report_filename = format!("logs/code_report_{}.log", timestamp);
fs::write(&report_filename, test_report).expect("Failed to write code report");
assert!(Path::new(&report_filename).exists());
let content = fs::read_to_string(&report_filename).expect("Failed to read code report");
assert_eq!(content, test_report);
// Test writing discovery commands
let commands_filename = format!("logs/discovery_commands_{}.log", timestamp);
let test_commands = "# Discovery Commands\n# Generated by g3-planner\n\nls -la\ncat README.md\n";
fs::write(&commands_filename, test_commands).expect("Failed to write discovery commands");
assert!(Path::new(&commands_filename).exists());
let content = fs::read_to_string(&commands_filename).expect("Failed to read discovery commands");
assert_eq!(content, test_commands);
// Clean up
let _ = fs::remove_file(&report_filename);
let _ = fs::remove_file(&commands_filename);
}
#[test]
fn test_filename_format() {
// Verify the filename format matches the tool_calls log format
let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string();
// Check format: YYYYMMDD_HHMMSS
assert_eq!(timestamp.len(), 15); // 8 digits + underscore + 6 digits
assert!(timestamp.contains('_'));
let parts: Vec<&str> = timestamp.split('_').collect();
assert_eq!(parts.len(), 2);
assert_eq!(parts[0].len(), 8); // YYYYMMDD
assert_eq!(parts[1].len(), 6); // HHMMSS
}

View File

@@ -0,0 +1,103 @@
//! Integration tests for g3-planner
use g3_planner::{create_tool_message, explore_codebase, extract_shell_commands};
use g3_providers::MessageRole;
#[test]
fn test_create_tool_message_format() {
let msg = create_tool_message("shell", "ls -la");
assert!(matches!(msg.role, MessageRole::Assistant));
let parsed: serde_json::Value = serde_json::from_str(&msg.content).unwrap();
assert_eq!(parsed["tool"], "shell");
assert_eq!(parsed["args"]["command"], "ls -la");
}
#[test]
fn test_explore_codebase_returns_report() {
// Test with current directory (should find Rust files in g3 project)
let report = explore_codebase(".");
// Should return a non-empty report
assert!(!report.is_empty(), "Report should not be empty");
// Should contain the codebase analysis header
assert!(
report.contains("CODEBASE ANALYSIS") || report.contains("No recognized"),
"Report should have analysis header or indicate no languages found"
);
}
#[test]
fn test_extract_shell_commands_basic() {
let response = r#"
Some text here.
{{CODE EXPLORATION COMMANDS}}
```bash
ls -la
cat README.md
rg --files -g '*.rs'
```
More text.
"#;
let commands = extract_shell_commands(response);
assert_eq!(commands.len(), 3);
assert_eq!(commands[0], "ls -la");
assert_eq!(commands[1], "cat README.md");
assert_eq!(commands[2], "rg --files -g '*.rs'");
}
#[test]
fn test_extract_shell_commands_with_comments() {
let response = r#"
{{CODE EXPLORATION COMMANDS}}
```
# This is a comment
ls -la
# Another comment
cat file.txt
```
"#;
let commands = extract_shell_commands(response);
assert_eq!(commands.len(), 2);
assert_eq!(commands[0], "ls -la");
assert_eq!(commands[1], "cat file.txt");
}
#[test]
fn test_extract_shell_commands_no_section() {
let response = "Some response without the expected section.";
let commands = extract_shell_commands(response);
assert!(commands.is_empty());
}
#[test]
fn test_extract_shell_commands_multiple_code_blocks() {
let response = r#"
{{CODE EXPLORATION COMMANDS}}
```bash
ls -la
```
Some explanation text.
```
cat README.md
head -50 src/main.rs
```
"#;
let commands = extract_shell_commands(response);
assert_eq!(commands.len(), 3);
assert_eq!(commands[0], "ls -la");
assert_eq!(commands[1], "cat README.md");
assert_eq!(commands[2], "head -50 src/main.rs");
}

View File

@@ -678,6 +678,14 @@ impl LLMProvider for AnthropicProvider {
// Anthropic supports cache control
true
}
fn max_tokens(&self) -> u32 {
self.max_tokens
}
fn temperature(&self) -> f32 {
self.temperature
}
}
// Anthropic API request/response structures

View File

@@ -1055,6 +1055,14 @@ impl LLMProvider for DatabricksProvider {
fn supports_cache_control(&self) -> bool {
false
}
fn max_tokens(&self) -> u32 {
self.max_tokens
}
fn temperature(&self) -> f32 {
self.temperature
}
}
// Databricks API request/response structures

View File

@@ -771,4 +771,12 @@ impl LLMProvider for EmbeddedProvider {
fn model(&self) -> &str {
&self.model_name
}
fn max_tokens(&self) -> u32 {
self.max_tokens
}
fn temperature(&self) -> f32 {
self.temperature
}
}

View File

@@ -26,6 +26,12 @@ pub trait LLMProvider: Send + Sync {
fn supports_cache_control(&self) -> bool {
false
}
/// Get the configured max_tokens for this provider
fn max_tokens(&self) -> u32;
/// Get the configured temperature for this provider
fn temperature(&self) -> f32;
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -384,6 +384,14 @@ impl LLMProvider for OpenAIProvider {
// OpenAI models support native tool calling
true
}
fn max_tokens(&self) -> u32 {
self.max_tokens.unwrap_or(16000)
}
fn temperature(&self) -> f32 {
self._temperature.unwrap_or(0.1)
}
}
fn convert_messages(messages: &[Message]) -> Vec<serde_json::Value> {

70
tail_tool_logs.sh Executable file
View File

@@ -0,0 +1,70 @@
#!/bin/bash
# Useful tool for tailing tool_calls files. It picks up whatever the latest is and does tail -f
if [[ -n "$G3_WORKSPACE" ]]; then
TARGET_DIR="$G3_WORKSPACE/logs"
else
TARGET_DIR="$HOME/tmp/workspace/logs"
fi
if [[ ! -d "$TARGET_DIR" ]]; then
echo "Error: Directory '$TARGET_DIR' does not exist."
exit 1
fi
cd "$TARGET_DIR" || exit 1
echo "Monitoring directory '$TARGET_DIR' for newest 'tool_calls*' file..."
# Variables to keep track of the current state
CURRENT_PID=""
CURRENT_FILE=""
# Cleanup function: Kill the background tail process when this script is stopped (Ctrl+C)
cleanup() {
echo ""
echo "Stopping monitor..."
if [[ -n "$CURRENT_PID" ]]; then
kill "$CURRENT_PID" 2>/dev/null
fi
exit 0
}
# Register the cleanup function for SIGINT (Ctrl+C) and SIGTERM
trap cleanup SIGINT SIGTERM
while true; do
# Find the newest file matching the pattern using ls -t (sort by time)
# 2>/dev/null suppresses errors if no files are found
NEWEST_FILE=$(ls -t tool_calls* 2>/dev/null | head -n 1)
# If a file was found AND it is different from the one we are currently watching
if [[ -n "$NEWEST_FILE" && "$NEWEST_FILE" != "$CURRENT_FILE" ]]; then
# If we were already watching a file, kill the old tail process
if [[ -n "$CURRENT_PID" ]]; then
kill "$CURRENT_PID" 2>/dev/null
fi
echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
echo ">>> Switched to new file: $NEWEST_FILE"
echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
# Start tail in the background (&)
tail -f "$NEWEST_FILE" &
# Capture the Process ID ($!) of the tail command we just launched
CURRENT_PID=$!
# Update the tracker variable
CURRENT_FILE="$NEWEST_FILE"
fi
# Wait 1 second before checking again
sleep 1
done