diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs
index e1a8da7..af152d9 100644
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -3,12 +3,8 @@ use clap::Parser;
 use g3_config::Config;
 use g3_core::{project::Project, Agent};
 use rustyline::error::ReadlineError;
-use rustyline::{Cmd, ConditionalEventHandler, DefaultEditor, Event, EventHandler, KeyEvent};
-use std::fs::OpenOptions;
-use std::io::{BufWriter, Write};
+use rustyline::DefaultEditor;
 use std::path::PathBuf;
-use std::sync::{Arc, Mutex};
-use std::time::{Duration, Instant};
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info};
 
@@ -159,7 +155,8 @@ async fn run_interactive(mut agent: Agent, show_prompt: bool, show_code: bool) -
 
     println!();
     println!("Type 'exit' or 'quit' to exit, use Up/Down arrows for command history");
-    println!("Press Enter to submit, Shift+Enter to add a new line");
+    println!("For multiline input: use \\ at the end of a line to continue");
+    println!("Submit multiline with Enter (without backslash)");
     println!();
 
     // Initialize rustyline editor with history
@@ -175,128 +172,79 @@ async fn run_interactive(mut agent: Agent, show_prompt: bool, show_code: bool) -
         let _ = rl.load_history(history_path);
     }
 
-    // Track whether we're in multi-line mode
-    let mut multi_line_buffer = String::new();
-    let mut in_multi_line = false;
+    // Track multiline input
+    let mut multiline_buffer = String::new();
+    let mut in_multiline = false;
 
     loop {
         // Display context window progress bar before each prompt
         display_context_progress(&agent);
 
         // Adjust prompt based on whether we're in multi-line mode
-        let prompt = if in_multi_line { "... > " } else { "g3> " };
+        let prompt = if in_multiline { "... > " } else { "g3> " };
 
         let readline = rl.readline(prompt);
         match readline {
             Ok(line) => {
-                // Check if the line ends with a backslash (alternative multi-line indicator)
-                // or if we're continuing a multi-line input
-                let trimmed_line = line.trim_end();
-
-                // Handle multi-line continuation with backslash
-                if trimmed_line.ends_with('\\') && !in_multi_line {
-                    // Start multi-line mode
-                    in_multi_line = true;
+                let trimmed = line.trim_end();
+                
+                // Check if line ends with backslash for continuation
+                if trimmed.ends_with('\\') {
                     // Remove the backslash and add to buffer
-                    let line_without_backslash = &trimmed_line[..trimmed_line.len() - 1];
-                    multi_line_buffer.push_str(line_without_backslash);
-                    multi_line_buffer.push('\n');
+                    let without_backslash = &trimmed[..trimmed.len() - 1];
+                    multiline_buffer.push_str(without_backslash);
+                    multiline_buffer.push('\n');
+                    in_multiline = true;
                     continue;
-                } else if in_multi_line {
-                    // Continue multi-line input
-                    if trimmed_line.is_empty() {
-                        // Empty line ends multi-line mode - process the accumulated input
-                        in_multi_line = false;
-                        // Fall through to process the accumulated input
-                    } else if trimmed_line.ends_with('\\') {
-                        // Continue multi-line with backslash
-                        let line_without_backslash = &trimmed_line[..trimmed_line.len() - 1];
-                        multi_line_buffer.push_str(line_without_backslash);
-                        multi_line_buffer.push('\n');
-                        continue;
-                    } else {
-                        // Last line of multi-line input (no backslash) - add it and process
-                        multi_line_buffer.push_str(&line);
-                        in_multi_line = false;
-                        // Fall through to process the accumulated input
-                    }
                 }
-
-                // Get the final input
-                let input = if !multi_line_buffer.is_empty() {
-                    // We have multi-line input to process
-                    let result = multi_line_buffer.trim().to_string();
-                    multi_line_buffer.clear();
-                    result
-                } else if !in_multi_line {
-                    // Single line input
-                    line.trim().to_string()
+                
+                // If we're in multiline mode and no backslash, this is the final line
+                if in_multiline {
+                    multiline_buffer.push_str(&line);
+                    in_multiline = false;
+                    // Process the complete multiline input
+                    let input = multiline_buffer.trim().to_string();
+                    multiline_buffer.clear();
+                    
+                    if input.is_empty() {
+                        continue;
+                    }
+                    
+                    // Add complete multiline to history
+                    rl.add_history_entry(&input)?;
+                    
+                    if input == "exit" || input == "quit" {
+                        break;
+                    }
+                    
+                    // Process the multiline input
+                    execute_task(&mut agent, &input, show_prompt, show_code).await;
                 } else {
-                    // Should not reach here, but handle gracefully
-                    continue;
-                };
-
-                if input == "exit" || input == "quit" {
-                    break;
-                }
-
-                if input.is_empty() {
-                    continue;
-                }
-
-                // Add to history
-                rl.add_history_entry(&input)?;
-
-                // Show thinking indicator immediately
-                print!("🤔 Thinking...");
-                std::io::stdout().flush()?;
-
-                // Create cancellation token for this request
-                let cancellation_token = CancellationToken::new();
-                let cancel_token_clone = cancellation_token.clone();
-
-                // Spawn a task to monitor for ESC key during execution
-                let esc_monitor = tokio::spawn(async move {
-                    // This is a simplified approach - in a real implementation,
-                    // we'd need to handle raw terminal input to detect ESC
-                    // For now, we'll just provide the cancellation infrastructure
-                    tokio::time::sleep(tokio::time::Duration::from_secs(3600)).await;
-                });
-
-                // Execute task with cancellation support
-                let execution_result = tokio::select! {
-                    result = agent.execute_task_with_timing_cancellable(
-                        &input, None, false, show_prompt, show_code, true, cancellation_token
-                    ) => {
-                        esc_monitor.abort();
-                        result
-                    }
-                    _ = tokio::signal::ctrl_c() => {
-                        cancel_token_clone.cancel();
-                        esc_monitor.abort();
-                        println!("\n⚠️  Operation cancelled by user (Ctrl+C)");
+                    // Single line input
+                    let input = line.trim().to_string();
+                    
+                    if input.is_empty() {
                         continue;
                     }
-                };
-
-                match execution_result {
-                    Ok(response) => println!("{}", response),
-                    Err(e) => {
-                        if e.to_string().contains("cancelled") {
-                            println!("⚠️  Operation cancelled by user");
-                        } else {
-                            error!("Error: {}", e);
-                        }
+                    
+                    if input == "exit" || input == "quit" {
+                        break;
                     }
+                    
+                    // Add to history
+                    rl.add_history_entry(&input)?;
+                    
+                    // Process the single line input
+                    execute_task(&mut agent, &input, show_prompt, show_code).await;
                 }
             }
             Err(ReadlineError::Interrupted) => {
                 // Ctrl-C pressed
-                if in_multi_line {
-                    // Cancel multi-line input
+                if in_multiline {
+                    // Cancel multiline input
                     println!("Multi-line input cancelled");
-                    multi_line_buffer.clear();
-                    in_multi_line = false;
+                    multiline_buffer.clear();
+                    in_multiline = false;
                 } else {
                     println!("CTRL-C");
                 }
@@ -322,628 +270,39 @@ async fn run_interactive(mut agent: Agent, show_prompt: bool, show_code: bool) -
     Ok(())
 }
 
-/// Metrics tracking for autonomous mode sessions
-#[derive(Debug, Clone)]
-struct TurnMetrics {
-    turn_number: usize,
-    role: String, // "player" or "coach"
-    start_time: Instant,
-    duration: Duration,
-    tokens_used: u32,
-    tool_calls: Vec<ToolCallMetric>,
-    success: bool,
-}
+async fn execute_task(agent: &mut Agent, input: &str, show_prompt: bool, show_code: bool) {
+    // Show thinking indicator immediately
+    print!("🤔 Thinking...");
+    std::io::stdout().flush().unwrap();
 
-#[derive(Debug, Clone)]
-struct ToolCallMetric {
-    tool_name: String,
-    duration: Duration,
-    success: bool,
-}
+    // Create cancellation token for this request
+    let cancellation_token = CancellationToken::new();
+    let cancel_token_clone = cancellation_token.clone();
 
-#[derive(Debug)]
-struct SessionMetrics {
-    session_start: Instant,
-    total_duration: Duration,
-    turns: Vec<TurnMetrics>,
-    total_tokens: u32,
-    total_tool_calls: usize,
-    successful_completion: bool,
-}
-
-impl SessionMetrics {
-    fn new() -> Self {
-        Self {
-            session_start: Instant::now(),
-            total_duration: Duration::default(),
-            turns: Vec::new(),
-            total_tokens: 0,
-            total_tool_calls: 0,
-            successful_completion: false,
+    // Execute task with cancellation support
+    let execution_result = tokio::select! {
+        result = agent.execute_task_with_timing_cancellable(
+            input, None, false, show_prompt, show_code, true, cancellation_token
+        ) => {
+            result
         }
-    }
-
-    fn add_turn(&mut self, turn: TurnMetrics) {
-        self.total_tokens += turn.tokens_used;
-        self.total_tool_calls += turn.tool_calls.len();
-        self.turns.push(turn);
-    }
-
-    fn finalize(&mut self, successful: bool) {
-        self.total_duration = self.session_start.elapsed();
-        self.successful_completion = successful;
-    }
-
-    fn generate_summary(&self) -> String {
-        let mut summary = String::new();
-
-        // Header
-        summary.push_str(
-            "╔═══════════════════════════════════════════════════════════════════════════════╗\n",
-        );
-        summary.push_str(
-            "║                          G3 AUTONOMOUS SESSION SUMMARY                        ║\n",
-        );
-        summary.push_str(
-            "╚═══════════════════════════════════════════════════════════════════════════════╝\n\n",
-        );
-
-        // Overall metrics
-        summary.push_str("📊 OVERALL METRICS\n");
-        summary.push_str(&format!(
-            "   Total Duration: {}\n",
-            format_duration(self.total_duration)
-        ));
-        summary.push_str(&format!("   Total Turns: {}\n", self.turns.len()));
-        summary.push_str(&format!("   Total Tokens: {}\n", self.total_tokens));
-        summary.push_str(&format!("   Total Tool Calls: {}\n", self.total_tool_calls));
-        summary.push_str(&format!(
-            "   Success: {}\n",
-            if self.successful_completion {
-                "✅ Yes"
-            } else {
-                "❌ No"
-            }
-        ));
-
-        // Efficiency metrics
-        if !self.turns.is_empty() {
-            let avg_duration = self.total_duration / self.turns.len() as u32;
-            let avg_tokens = self.total_tokens / self.turns.len() as u32;
-            summary.push_str(&format!(
-                "   Avg Turn Duration: {}\n",
-                format_duration(avg_duration)
-            ));
-            summary.push_str(&format!("   Avg Tokens/Turn: {}\n", avg_tokens));
-        }
-        summary.push_str("\n");
-
-        // Turn-by-turn breakdown
-        summary.push_str("🔄 TURN-BY-TURN BREAKDOWN\n");
-        for turn in &self.turns {
-            let role_icon = if turn.role == "player" {
-                "🎯"
-            } else {
-                "🎓"
-            };
-            summary.push_str(&format!(
-                "   {} Turn {} ({}): {} | {} tokens | {} tools | {}\n",
-                role_icon,
-                turn.turn_number,
-                turn.role.to_uppercase(),
-                format_duration(turn.duration),
-                turn.tokens_used,
-                turn.tool_calls.len(),
-                if turn.success { "✅" } else { "❌" }
-            ));
-        }
-        summary.push_str("\n");
-
-        // Token consumption graph
-        summary.push_str("📈 TOKEN CONSUMPTION GRAPH\n");
-        summary.push_str(&self.generate_token_graph());
-        summary.push_str("\n");
-
-        // Tool usage statistics
-        summary.push_str("🔧 TOOL USAGE STATISTICS\n");
-        summary.push_str(&self.generate_tool_stats());
-        summary.push_str("\n");
-
-        // Performance insights
-        summary.push_str("💡 PERFORMANCE INSIGHTS\n");
-        summary.push_str(&self.generate_insights());
-
-        summary
-    }
-
-    fn generate_token_graph(&self) -> String {
-        let mut graph = String::new();
-
-        if self.turns.is_empty() {
-            return "   No data available\n".to_string();
-        }
-
-        let max_tokens = self.turns.iter().map(|t| t.tokens_used).max().unwrap_or(1);
-        let scale = if max_tokens > 50 { max_tokens / 50 } else { 1 };
-
-        for turn in &self.turns {
-            let bar_length = (turn.tokens_used / scale).min(50) as usize;
-            let bar = "█".repeat(bar_length);
-            let role_icon = if turn.role == "player" {
-                "🎯"
-            } else {
-                "🎓"
-            };
-
-            graph.push_str(&format!(
-                "   {} T{:<2} |{:<50}| {} tokens\n",
-                role_icon, turn.turn_number, bar, turn.tokens_used
-            ));
-        }
-
-        if scale > 1 {
-            graph.push_str(&format!("   Scale: 1 █ = {} tokens\n", scale));
-        }
-
-        graph
-    }
-
-    fn generate_tool_stats(&self) -> String {
-        let mut stats = String::new();
-        let mut tool_counts: std::collections::HashMap<String, (usize, usize, Duration)> =
-            std::collections::HashMap::new();
-
-        // Collect tool statistics
-        for turn in &self.turns {
-            for tool in &turn.tool_calls {
-                let entry = tool_counts.entry(tool.tool_name.clone()).or_insert((
-                    0,
-                    0,
-                    Duration::default(),
-                ));
-                entry.0 += 1; // total count
-                if tool.success {
-                    entry.1 += 1; // success count
-                }
-                entry.2 += tool.duration; // total duration
-            }
-        }
-
-        if tool_counts.is_empty() {
-            return "   No tool calls recorded\n".to_string();
-        }
-
-        // Sort by usage count
-        let mut sorted_tools: Vec<_> = tool_counts.iter().collect();
-        sorted_tools.sort_by(|a, b| b.1 .0.cmp(&a.1 .0));
-
-        for (tool_name, (total, success, duration)) in sorted_tools {
-            let success_rate = if *total > 0 {
-                (*success as f32 / *total as f32) * 100.0
-            } else {
-                0.0
-            };
-            let avg_duration = if *total > 0 {
-                *duration / *total as u32
-            } else {
-                Duration::default()
-            };
-
-            stats.push_str(&format!(
-                "   {:<12} | {:>3} calls | {:>5.1}% success | {} avg\n",
-                tool_name,
-                total,
-                success_rate,
-                format_duration(avg_duration)
-            ));
-        }
-
-        stats
-    }
-
-    fn generate_insights(&self) -> String {
-        let mut insights = String::new();
-
-        if self.turns.is_empty() {
-            return "   No data available for insights\n".to_string();
-        }
-
-        // Completion insight
-        if self.successful_completion {
-            insights.push_str("   ✅ Session completed successfully with coach approval\n");
-        } else {
-            insights.push_str("   ⚠️  Session ended without coach approval (max turns reached)\n");
-        }
-
-        // Turn efficiency
-        let player_turns: Vec<_> = self.turns.iter().filter(|t| t.role == "player").collect();
-        let coach_turns: Vec<_> = self.turns.iter().filter(|t| t.role == "coach").collect();
-
-        if !player_turns.is_empty() && !coach_turns.is_empty() {
-            let avg_player_tokens =
-                player_turns.iter().map(|t| t.tokens_used).sum::<u32>() / player_turns.len() as u32;
-            let avg_coach_tokens =
-                coach_turns.iter().map(|t| t.tokens_used).sum::<u32>() / coach_turns.len() as u32;
-
-            insights.push_str(&format!(
-                "   📊 Player turns averaged {} tokens, Coach turns averaged {} tokens\n",
-                avg_player_tokens, avg_coach_tokens
-            ));
-        }
-
-        // Tool usage insight
-        let total_tools = self.turns.iter().map(|t| t.tool_calls.len()).sum::<usize>();
-        if total_tools > 0 {
-            let avg_tools_per_turn = total_tools as f32 / self.turns.len() as f32;
-            insights.push_str(&format!(
-                "   🔧 Average of {:.1} tool calls per turn\n",
-                avg_tools_per_turn
-            ));
-        }
-
-        // Time distribution
-        let total_player_time: Duration = player_turns.iter().map(|t| t.duration).sum();
-        let total_coach_time: Duration = coach_turns.iter().map(|t| t.duration).sum();
-        let total_time = total_player_time + total_coach_time;
-
-        if total_time > Duration::default() {
-            let player_percent =
-                (total_player_time.as_secs_f32() / total_time.as_secs_f32()) * 100.0;
-            let coach_percent = (total_coach_time.as_secs_f32() / total_time.as_secs_f32()) * 100.0;
-
-            insights.push_str(&format!(
-                "   ⏱️  Time split: {:.1}% implementation, {:.1}% review\n",
-                player_percent, coach_percent
-            ));
-        }
-
-        insights
-    }
-}
-
-fn format_duration(duration: Duration) -> String {
-    let total_secs = duration.as_secs();
-    if total_secs < 60 {
-        format!("{}s", total_secs)
-    } else if total_secs < 3600 {
-        let mins = total_secs / 60;
-        let secs = total_secs % 60;
-        format!("{}m{}s", mins, secs)
-    } else {
-        let hours = total_secs / 3600;
-        let mins = (total_secs % 3600) / 60;
-        format!("{}h{}m", hours, mins)
-    }
-}
-
-async fn run_autonomous(
-    mut agent: Agent,
-    project: Project,
-    show_prompt: bool,
-    show_code: bool,
-    max_turns: usize,
-) -> Result<()> {
-    // Set up logging
-    project.ensure_logs_dir()?;
-    let logger = AutonomousLogger::new(&project.logs_dir())?;
-
-    // Initialize session metrics
-    let mut session_metrics = SessionMetrics::new();
-
-    logger.log_section("G3 AUTONOMOUS MODE SESSION STARTED");
-    logger.log(&format!("🤖 G3 AI Coding Agent - Autonomous Mode"));
-    logger.log(&format!(
-        "📁 Using workspace directory: {}",
-        project.workspace().display()
-    ));
-    logger.log(&format!("📂 Project: {}", project.name));
-
-    logger.log("🎯 Looking for requirements.md in workspace directory...");
-
-    // Check if requirements exist
-    if !project.has_requirements() {
-        logger.log("❌ Error: requirements.md not found in workspace directory");
-        logger.log(&format!(
-            "   Please create a requirements.md file with your project requirements at:"
-        ));
-        logger.log(&format!(
-            "   {}/requirements.md",
-            project.workspace().display()
-        ));
-        return Ok(());
-    }
-
-    // Read requirements
-    let requirements = match project.read_requirements()? {
-        Some(content) => content,
-        None => {
-            logger.log("❌ Error: Could not read requirements.md");
-            return Ok(());
+        _ = tokio::signal::ctrl_c() => {
+            cancel_token_clone.cancel();
+            println!("\n⚠️  Operation cancelled by user (Ctrl+C)");
+            return;
         }
     };
 
-    logger.log("📋 Requirements loaded from requirements.md");
-    logger.log(&format!(
-        "Requirements: {}",
-        logger.truncate_for_log(&requirements, 150)
-    ));
-
-    // Check if there are existing project files (skip first player turn if so)
-    let has_existing_files = check_existing_project_files(project.workspace(), &logger)?;
-
-    logger.log("🔄 Starting coach-player feedback loop...");
-    logger.log("");
-
-    let mut turn = 1;
-    let mut coach_feedback = String::new();
-    let mut skip_player_turn = has_existing_files;
-    let mut implementation_approved = false;
-
-    loop {
-        // Skip player turn if we have existing files and this is the first iteration
-        if skip_player_turn {
-            logger.log_section(&format!(
-                "TURN {}/{} - SKIPPING PLAYER MODE",
-                turn, max_turns
-            ));
-            logger.log("📁 Existing project files detected, skipping to coach evaluation");
-            skip_player_turn = false; // Only skip the first turn
-        } else {
-            logger.log_section(&format!("TURN {}/{} - PLAYER MODE", turn, max_turns));
-
-            // Player mode: implement requirements (with coach feedback if available)
-            let player_prompt = if coach_feedback.is_empty() {
-                format!(
-                    "You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step, creating all necessary files and code.",
-                    requirements
-                )
-            } else {
-                format!(
-                    "You are G3 in implementation mode. You need to address the coach's feedback and improve your implementation.\n\nORIGINAL REQUIREMENTS:\n{}\n\nCOACH FEEDBACK TO ADDRESS:\n{}\n\nPlease make the necessary improvements to address the coach's feedback while ensuring all original requirements are met.",
-                    requirements, coach_feedback
-                )
-            };
-
-            logger.log("🎯 Starting player implementation...");
-            if !coach_feedback.is_empty() {
-                logger.log("📝 Incorporating coach feedback from previous turn");
-            }
-
-            // Track player turn metrics
-            let player_start = Instant::now();
-            let initial_tokens = agent.get_context_window().used_tokens;
-
-            let player_result = agent
-                .execute_task_with_timing(&player_prompt, None, false, show_prompt, show_code, true)
-                .await;
-
-            let player_duration = player_start.elapsed();
-            let final_tokens = agent.get_context_window().used_tokens;
-            let tokens_used = final_tokens.saturating_sub(initial_tokens);
-
-            let player_success = player_result.is_ok();
-            if let Err(e) = player_result {
-                logger.log(&format!("❌ Player implementation failed: {}", e));
-            }
-
-            // Extract tool call metrics from the agent
-            let player_tool_metrics: Vec<ToolCallMetric> = agent
-                .get_tool_call_metrics()
-                .iter()
-                .map(|(tool_name, duration, success)| ToolCallMetric {
-                    tool_name: tool_name.clone(),
-                    duration: *duration,
-                    success: *success,
-                })
-                .collect();
-
-            // Create player turn metrics
-            let player_turn = TurnMetrics {
-                turn_number: turn,
-                role: "player".to_string(),
-                start_time: player_start,
-                duration: player_duration,
-                tokens_used,
-                tool_calls: player_tool_metrics,
-                success: player_success,
-            };
-
-            session_metrics.add_turn(player_turn);
-
-            logger.log("🎯 Player implementation completed");
-            logger.log("");
-        }
-
-        // Create a new agent instance for coach mode to ensure fresh context
-        let config = g3_config::Config::load(None)?;
-        let mut coach_agent = Agent::new(config).await?;
-
-        // Ensure coach agent is also in the workspace directory
-        project.enter_workspace()?;
-
-        logger.log_section(&format!("TURN {}/{} - COACH MODE", turn, max_turns));
-
-        // Coach mode: critique the implementation
-        let coach_prompt = format!(
-            "You are G3 in coach mode. Your role is to critique and review implementations against requirements.
-
-REQUIREMENTS:
-{}
-
-IMPLEMENTATION REVIEW:
-Review the current state of the project and provide a concise critique focusing on:
-1. Whether the requirements are correctly implemented
-2. Whether the project compiles successfully
-3. What requirements are missing or incorrect
-4. Specific improvements needed to satisfy requirements
-
-If the implementation correctly meets all requirements, respond with: 'IMPLEMENTATION_APPROVED'
-If improvements are needed, provide specific actionable feedback. Be thorough but don't be overly critical. APPROVE the
-implementation if it doesn't have compile errors, glaring omissions and generally fits the bill.
-
-Keep your response concise and focused on actionable items.",
-            requirements
-        );
-
-        logger.log("🎓 Starting coach review...");
-
-        // Track coach turn metrics
-        let coach_start = Instant::now();
-        let initial_coach_tokens = coach_agent.get_context_window().used_tokens;
-
-        let coach_result = coach_agent
-            .execute_task_with_timing(&coach_prompt, None, false, show_prompt, show_code, true)
-            .await?;
-
-        let coach_duration = coach_start.elapsed();
-        let final_coach_tokens = coach_agent.get_context_window().used_tokens;
-        let coach_tokens_used = final_coach_tokens.saturating_sub(initial_coach_tokens);
-
-        // Extract tool call metrics from the coach agent
-        let coach_tool_metrics: Vec<ToolCallMetric> = coach_agent
-            .get_tool_call_metrics()
-            .iter()
-            .map(|(tool_name, duration, success)| ToolCallMetric {
-                tool_name: tool_name.clone(),
-                duration: *duration,
-                success: *success,
-            })
-            .collect();
-
-        // Create coach turn metrics
-        let coach_turn = TurnMetrics {
-            turn_number: turn,
-            role: "coach".to_string(),
-            start_time: coach_start,
-            duration: coach_duration,
-            tokens_used: coach_tokens_used,
-            tool_calls: coach_tool_metrics,
-            success: true, // Coach execution succeeded if we got here
-        };
-
-        session_metrics.add_turn(coach_turn);
-
-        logger.log("🎓 Coach review completed");
-        logger.log(&format!("Coach feedback: {}", coach_result));
-
-        // Check if coach approved the implementation
-        if coach_result.contains("IMPLEMENTATION_APPROVED") {
-            logger.log_section("SESSION COMPLETED - IMPLEMENTATION APPROVED");
-            logger.log("✅ Coach approved the implementation!");
-            implementation_approved = true;
-            break;
-        }
-
-        // Check if we've reached max turns
-        if turn >= max_turns {
-            logger.log_section("SESSION COMPLETED - MAX TURNS REACHED");
-            logger.log(&format!("⏰ Maximum turns ({}) reached", max_turns));
-            logger.log("🔄 Autonomous mode completed (max iterations)");
-            break;
-        }
-
-        // Store coach feedback for next iteration
-        coach_feedback = coach_result;
-        turn += 1;
-
-        logger.log("🔄 Coach provided feedback for next iteration");
-        logger.log(&format!(
-            "📝 Preparing to incorporate feedback in turn {}",
-            turn
-        ));
-        logger.log("");
-    }
-
-    // Finalize session metrics
-    session_metrics.finalize(implementation_approved);
-
-    // Generate and display comprehensive summary
-    logger.log_section("G3 AUTONOMOUS MODE SESSION ENDED");
-
-    if implementation_approved {
-        logger.log("🎉 Autonomous mode completed successfully");
-    }
-
-    // Display the comprehensive metrics summary
-    let summary = session_metrics.generate_summary();
-    println!("\n{}", summary);
-
-    // Also log the summary to file (without printing to console again)
-    if let Ok(mut writer) = logger.log_writer.lock() {
-        let timestamp = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC");
-        for line in summary.lines() {
-            let _ = writeln!(writer, "[{}] {}", timestamp, line);
-        }
-        let _ = writer.flush();
-    }
-
-    Ok(())
-}
-
-/// Check if there are existing project files in the workspace directory
-/// Returns true if project files are found (excluding requirements.md and logs directory)
-fn check_existing_project_files(
-    workspace_dir: &std::path::Path,
-    logger: &AutonomousLogger,
-) -> Result<bool> {
-    logger.log("🔍 Checking for existing project files...");
-
-    let entries = match std::fs::read_dir(workspace_dir) {
-        Ok(entries) => entries,
+    match execution_result {
+        Ok(response) => println!("{}", response),
         Err(e) => {
-            logger.log(&format!("❌ Failed to read workspace directory: {}", e));
-            return Ok(false);
-        }
-    };
-
-    let mut project_files = Vec::new();
-    let mut total_files = 0;
-
-    for entry in entries {
-        let entry = entry?;
-        let path = entry.path();
-        let file_name = path
-            .file_name()
-            .and_then(|n| n.to_str())
-            .unwrap_or("unknown");
-
-        // Skip requirements.md, logs directory, and hidden files
-        if file_name == "requirements.md" || file_name == "logs" || file_name.starts_with('.') {
-            continue;
-        }
-
-        total_files += 1;
-
-        // Collect project files for logging (limit to first 5)
-        if project_files.len() < 5 {
-            if path.is_dir() {
-                project_files.push(format!("{}/", file_name));
+            if e.to_string().contains("cancelled") {
+                println!("⚠️  Operation cancelled by user");
             } else {
-                project_files.push(file_name.to_string());
+                error!("Error: {}", e);
             }
         }
     }
-
-    if total_files > 0 {
-        logger.log(&format!("📁 Found {} existing project files", total_files));
-        if !project_files.is_empty() {
-            let files_display = if total_files > 5 {
-                format!(
-                    "{} (and {} more)",
-                    project_files.join(", "),
-                    total_files - 5
-                )
-            } else {
-                project_files.join(", ")
-            };
-            logger.log(&format!("   Files: {}", files_display));
-        }
-        logger.log("⏭️  Will skip first player turn and evaluate existing implementation");
-        Ok(true)
-    } else {
-        logger.log("📂 No existing project files found, starting fresh implementation");
-        Ok(false)
-    }
 }
 
 fn display_context_progress(agent: &Agent) {
@@ -990,95 +349,134 @@ fn setup_workspace_directory() -> Result<PathBuf> {
     Ok(workspace_dir)
 }
 
-/// Logger for autonomous mode that writes to both console and log file
-struct AutonomousLogger {
-    log_writer: Arc<Mutex<BufWriter<std::fs::File>>>,
-}
-
-impl AutonomousLogger {
-    fn new(logs_dir: &PathBuf) -> Result<Self> {
-        // Create log file with timestamp in logs subdirectory
-        let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
-        let log_path = logs_dir.join(format!("g3_autonomous_{}.log", timestamp));
-
-        let file = OpenOptions::new()
-            .create(true)
-            .write(true)
-            .append(true)
-            .open(&log_path)?;
-
-        let log_writer = Arc::new(Mutex::new(BufWriter::new(file)));
-
-        println!("📝 Logging autonomous session to: {}", log_path.display());
-
-        Ok(Self { log_writer })
+// Simplified autonomous mode implementation
+async fn run_autonomous(
+    mut agent: Agent,
+    project: Project,
+    show_prompt: bool,
+    show_code: bool,
+    max_turns: usize,
+) -> Result<()> {
+    println!("🤖 G3 AI Coding Agent - Autonomous Mode");
+    println!("📁 Using workspace: {}", project.workspace().display());
+    
+    // Check if requirements exist
+    if !project.has_requirements() {
+        println!("❌ Error: requirements.md not found in workspace directory");
+        println!("   Please create a requirements.md file with your project requirements at:");
+        println!("   {}/requirements.md", project.workspace().display());
+        return Ok(());
     }
 
-    /// Truncate text to a single line for logging (UTF-8 safe)
-    fn truncate_for_log(&self, text: &str, max_chars: usize) -> String {
-        // First, get the first line only
-        let first_line = text.lines().next().unwrap_or("").trim();
+    // Read requirements
+    let requirements = match project.read_requirements()? {
+        Some(content) => content,
+        None => {
+            println!("❌ Error: Could not read requirements.md");
+            return Ok(());
+        }
+    };
 
-        // Then truncate if too long (using char boundaries to avoid UTF-8 panics)
-        if first_line.chars().count() <= max_chars {
-            first_line.to_string()
+    println!("📋 Requirements loaded from requirements.md");
+    println!("🔄 Starting coach-player feedback loop...");
+    
+    let mut turn = 1;
+    let mut coach_feedback = String::new();
+    let mut implementation_approved = false;
+
+    loop {
+        println!("\n=== TURN {}/{} - PLAYER MODE ===", turn, max_turns);
+
+        // Player mode: implement requirements (with coach feedback if available)
+        let player_prompt = if coach_feedback.is_empty() {
+            format!(
+                "You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step, creating all necessary files and code.",
+                requirements
+            )
         } else {
-            // Use char indices to ensure we don't split UTF-8 characters
-            let truncated: String = first_line
-                .chars()
-                .take(max_chars.saturating_sub(3))
-                .collect();
-            format!("{}...", truncated)
+            format!(
+                "You are G3 in implementation mode. You need to address the coach's feedback and improve your implementation.\n\nORIGINAL REQUIREMENTS:\n{}\n\nCOACH FEEDBACK TO ADDRESS:\n{}\n\nPlease make the necessary improvements to address the coach's feedback while ensuring all original requirements are met.",
+                requirements, coach_feedback
+            )
+        };
+
+        println!("🎯 Starting player implementation...");
+        let player_result = agent
+            .execute_task_with_timing(&player_prompt, None, false, show_prompt, show_code, true)
+            .await;
+
+        if let Err(e) = player_result {
+            println!("❌ Player implementation failed: {}", e);
         }
+
+        // Create a new agent instance for coach mode to ensure fresh context
+        let config = g3_config::Config::load(None)?;
+        let mut coach_agent = Agent::new(config).await?;
+
+        // Ensure coach agent is also in the workspace directory
+        project.enter_workspace()?;
+
+        println!("\n=== TURN {}/{} - COACH MODE ===", turn, max_turns);
+
+        // Coach mode: critique the implementation
+        let coach_prompt = format!(
+            "You are G3 in coach mode. Your role is to critique and review implementations against requirements.
+
+REQUIREMENTS:
+{}
+
+IMPLEMENTATION REVIEW:
+Review the current state of the project and provide a concise critique focusing on:
+1. Whether the requirements are correctly implemented
+2. Whether the project compiles successfully
+3. What requirements are missing or incorrect
+4. Specific improvements needed to satisfy requirements
+
+If the implementation correctly meets all requirements, respond with: 'IMPLEMENTATION_APPROVED'
+If improvements are needed, provide specific actionable feedback. Be thorough but don't be overly critical. APPROVE the
+implementation if it doesn't have compile errors, glaring omissions and generally fits the bill.
+
+Keep your response concise and focused on actionable items.",
+            requirements
+        );
+
+        println!("🎓 Starting coach review...");
+        let coach_result = coach_agent
+            .execute_task_with_timing(&coach_prompt, None, false, show_prompt, show_code, true)
+            .await?;
+
+        println!("🎓 Coach review completed");
+        println!("Coach feedback: {}", coach_result);
+
+        // Check if coach approved the implementation
+        if coach_result.contains("IMPLEMENTATION_APPROVED") {
+            println!("\n=== SESSION COMPLETED - IMPLEMENTATION APPROVED ===");
+            println!("✅ Coach approved the implementation!");
+            implementation_approved = true;
+            break;
+        }
+
+        // Check if we've reached max turns
+        if turn >= max_turns {
+            println!("\n=== SESSION COMPLETED - MAX TURNS REACHED ===");
+            println!("⏰ Maximum turns ({}) reached", max_turns);
+            break;
+        }
+
+        // Store coach feedback for next iteration
+        coach_feedback = coach_result;
+        turn += 1;
+
+        println!("🔄 Coach provided feedback for next iteration");
     }
 
-    fn log(&self, message: &str) {
-        // Ensure single line for console output
-        let single_line_message = self.truncate_for_log(message, 200);
-
-        // Print to console
-        println!("{}", single_line_message);
-
-        // Write to log file with timestamp (also single line)
-        if let Ok(mut writer) = self.log_writer.lock() {
-            let timestamp = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC");
-            let _ = writeln!(writer, "[{}] {}", timestamp, single_line_message);
-            let _ = writer.flush();
-        }
+    if implementation_approved {
+        println!("\n🎉 Autonomous mode completed successfully");
+    } else {
+        println!("\n🔄 Autonomous mode completed (max iterations)");
     }
 
-    fn log_section(&self, section: &str) {
-        // Sections can be multi-line for visual separation, but content should be single line
-        let single_line_section = self.truncate_for_log(section, 100);
-        let separator = "=".repeat(80);
-
-        // Print to console with visual formatting
-        println!("{}", separator);
-        println!("{}", single_line_section);
-        println!("{}", separator);
-
-        // Log to file as single entries
-        if let Ok(mut writer) = self.log_writer.lock() {
-            let timestamp = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC");
-            let _ = writeln!(writer, "[{}] === {} ===", timestamp, single_line_section);
-            let _ = writer.flush();
-        }
-    }
-
-    fn log_subsection(&self, subsection: &str) {
-        let single_line_subsection = self.truncate_for_log(subsection, 100);
-        let separator = "-".repeat(60);
-
-        // Print to console with visual formatting
-        println!("{}", separator);
-        println!("{}", single_line_subsection);
-        println!("{}", separator);
-
-        // Log to file as single entry
-        if let Ok(mut writer) = self.log_writer.lock() {
-            let timestamp = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC");
-            let _ = writeln!(writer, "[{}] --- {} ---", timestamp, single_line_subsection);
-            let _ = writer.flush();
-        }
-    }
+    Ok(())
 }
+
+use std::io::Write;
diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs
index 89c6700..bf8699b 100644
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -238,12 +238,26 @@ impl ContextWindow {
             return;
         }
 
-        // Simple token estimation: ~4 characters per token
-        let estimated_tokens = (message.content.len() as f32 / 4.0).ceil() as u32;
+        // Better token estimation based on content type
+        let estimated_tokens = Self::estimate_tokens(&message.content);
         self.used_tokens += estimated_tokens;
         self.conversation_history.push(message);
     }
 
+    /// More accurate token estimation
+    fn estimate_tokens(text: &str) -> u32 {
+        // Better heuristic: 
+        // - Average English text: ~4 characters per token
+        // - Code/JSON: ~3 characters per token (more symbols)
+        // - Add 10% buffer for safety
+        let base_estimate = if text.contains("{") || text.contains("```") || text.contains("fn ") {
+            (text.len() as f32 / 3.0).ceil() as u32  // Code/JSON
+        } else {
+            (text.len() as f32 / 4.0).ceil() as u32  // Regular text
+        };
+        (base_estimate as f32 * 1.1).ceil() as u32  // Add 10% buffer
+    }
+
     pub fn update_usage(&mut self, usage: &g3_providers::Usage) {
         // Update with actual token usage from the provider
         self.used_tokens = usage.total_tokens;
@@ -261,15 +275,25 @@ impl ContextWindow {
         self.total_tokens.saturating_sub(self.used_tokens)
     }
 
+
     /// Check if we should trigger summarization (at 80% capacity)
     pub fn should_summarize(&self) -> bool {
-        self.percentage_used() >= 80.0
+        // Trigger at 80% OR if we're getting close to absolute limits
+        // This prevents issues with models that have large contexts but still hit limits
+        let percentage_trigger = self.percentage_used() >= 80.0;
+        
+        // Also trigger if we're approaching common token limits
+        // Most models start having issues around 150k tokens
+        let absolute_trigger = self.used_tokens > 150_000;
+        
+        percentage_trigger || absolute_trigger
     }
-
+    
     /// Create a summary request prompt for the current conversation
     pub fn create_summary_prompt(&self) -> String {
         "Please provide a comprehensive summary of our conversation so far. Include:
 
+
 1. **Main Topic/Goal**: What is the primary task or objective being worked on?
 2. **Key Decisions**: What important decisions have been made?
 3. **Actions Taken**: What specific actions, commands, or code changes have been completed?
@@ -897,43 +921,77 @@ The tool will execute immediately and you'll receive the result (success or erro
         // Check if we need to summarize before starting
         if self.context_window.should_summarize() {
             info!(
-                "Context window at {}%, triggering auto-summarization",
-                self.context_window.percentage_used() as u32
+                "Context window at {}% ({}/{} tokens), triggering auto-summarization", 
+                self.context_window.percentage_used() as u32,
+                self.context_window.used_tokens,
+                self.context_window.total_tokens
             );
-
+            
             // Notify user about summarization
-            println!(
-                "\n📊 Context window reaching capacity ({}%). Creating summary...",
-                self.context_window.percentage_used() as u32
-            );
-
-            // Create summary request
+            println!("\n📊 Context window reaching capacity ({}%). Creating summary...", 
+                self.context_window.percentage_used() as u32);
+            
+            // Create summary request with FULL history
             let summary_prompt = self.context_window.create_summary_prompt();
+            
+            // Get the full conversation history
+            let conversation_text = self.context_window.conversation_history
+                .iter()
+                .map(|m| format!("{:?}: {}", m.role, m.content))
+                .collect::<Vec<_>>()
+                .join("\n\n");
+            
             let summary_messages = vec![
                 Message {
                     role: MessageRole::System,
-                    content: "You are a helpful assistant that creates concise summaries."
-                        .to_string(),
+                    content: "You are a helpful assistant that creates concise summaries.".to_string(),
                 },
                 Message {
                     role: MessageRole::User,
-                    content: format!(
-                        "Based on this conversation history, {}\n\nConversation:\n{}",
+                    content: format!("Based on this conversation history, {}\n\nConversation:\n{}", 
                         summary_prompt,
-                        self.context_window
-                            .conversation_history
-                            .iter()
-                            .map(|m| format!("{:?}: {}", m.role, m.content))
-                            .collect::<Vec<_>>()
-                            .join("\n\n")
+                        conversation_text
                     ),
                 },
             ];
 
             let provider = self.providers.get(None)?;
+            
+            // Dynamically calculate max_tokens for summary based on what's left
+            // We need to ensure: used_tokens + max_tokens <= total_context_limit
+            let summary_max_tokens = match provider.name() {
+                "databricks" | "anthropic" => {
+                    // Claude models have 200k context
+                    // Calculate how much room we have left
+                    let model_limit = 200_000u32;
+                    let current_usage = self.context_window.used_tokens;
+                    // Leave some buffer (5k tokens) for safety
+                    let available = model_limit.saturating_sub(current_usage).saturating_sub(5000);
+                    // Cap at a reasonable summary size (10k tokens max)
+                    Some(available.min(10_000))
+                }
+                "embedded" => {
+                    // For smaller context models, be more conservative
+                    let model_limit = self.context_window.total_tokens;
+                    let current_usage = self.context_window.used_tokens;
+                    // Leave 1k buffer
+                    let available = model_limit.saturating_sub(current_usage).saturating_sub(1000);
+                    // Cap at 3k for embedded models
+                    Some(available.min(3000))
+                }
+                _ => {
+                    // Default: conservative approach
+                    let available = self.context_window.remaining_tokens().saturating_sub(2000);
+                    Some(available.min(5000))
+                }
+            };
+            
+            info!("Requesting summary with max_tokens: {:?} (current usage: {} tokens)", 
+                summary_max_tokens, self.context_window.used_tokens);
+            
             let summary_request = CompletionRequest {
                 messages: summary_messages,
-                max_tokens: Some(4000), // Reasonable size for summary
+                max_tokens: summary_max_tokens,
                 temperature: Some(0.3), // Lower temperature for factual summary
                 stream: false,
                 tools: None,
@@ -962,7 +1020,11 @@ The tool will execute immediately and you'll receive the result (success or erro
                     println!("🔄 Context reset complete. Continuing with your request...\n");
                 }
                 Err(e) => {
-                    warn!("Failed to create summary: {}. Continuing without reset.", e);
+                    error!("Failed to create summary: {}", e);
+                    println!("⚠️ Unable to create summary. Consider starting a new session if you continue to see errors.\n");
+                    // Don't continue with the original request if summarization failed
+                    // as we're likely at token limit
+                    return Err(anyhow::anyhow!("Context window at capacity and summarization failed. Please start a new session."));
                 }
             }
         }