diff --git a/config.coach-player.example.toml b/config.coach-player.example.toml
index 2101564..999b674 100644
--- a/config.coach-player.example.toml
+++ b/config.coach-player.example.toml
@@ -19,6 +19,6 @@ max_tokens = 4096
 temperature = 0.3  # Slightly higher temperature for more creative implementations
 
 [agent]
-max_context_length = 8192
+fallback_default_max_tokens = 8192
 enable_streaming = true
 timeout_seconds = 60
\ No newline at end of file
diff --git a/config.example.toml b/config.example.toml
index b58ae3f..56954f9 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -15,7 +15,7 @@ temperature = 0.1
 use_oauth = true
 
 [agent]
-max_context_length = 8192
+fallback_default_max_tokens = 8192
 enable_streaming = true
 timeout_seconds = 60
 
diff --git a/crates/g3-config/src/lib.rs b/crates/g3-config/src/lib.rs
index d9f0602..ba578e9 100644
--- a/crates/g3-config/src/lib.rs
+++ b/crates/g3-config/src/lib.rs
@@ -62,7 +62,7 @@ pub struct EmbeddedConfig {
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct AgentConfig {
-    pub max_context_length: usize,
+    pub fallback_default_max_tokens: usize,
     pub enable_streaming: bool,
     pub timeout_seconds: u64,
     pub auto_compact: bool,
@@ -133,7 +133,7 @@ impl Default for Config {
                 player: None, // Will use default_provider if not specified
             },
             agent: AgentConfig {
-                max_context_length: 8192,
+                fallback_default_max_tokens: 8192,
                 enable_streaming: true,
                 timeout_seconds: 60,
                 auto_compact: true,
@@ -249,7 +249,7 @@ impl Config {
                 player: None, // Will use default_provider if not specified
             },
             agent: AgentConfig {
-                max_context_length: 8192,
+                fallback_default_max_tokens: 8192,
                 enable_streaming: true,
                 timeout_seconds: 60,
                 auto_compact: true,
diff --git a/crates/g3-config/src/tests.rs b/crates/g3-config/src/tests.rs
index a1e1e9f..6899a8b 100644
--- a/crates/g3-config/src/tests.rs
+++ b/crates/g3-config/src/tests.rs
@@ -31,7 +31,7 @@ model_path = "test.gguf"
 model_type = "llama"
 
 [agent]
-max_context_length = 8192
+fallback_default_max_tokens = 8192
 enable_streaming = true
 timeout_seconds = 60
 "#;
@@ -72,7 +72,7 @@ token = "test-token"
 model = "test-model"
 
 [agent]
-max_context_length = 8192
+fallback_default_max_tokens = 8192
 enable_streaming = true
 timeout_seconds = 60
 "#;
@@ -113,7 +113,7 @@ token = "test-token"
 model = "test-model"
 
 [agent]
-max_context_length = 8192
+fallback_default_max_tokens = 8192
 enable_streaming = true
 timeout_seconds = 60
 "#;
diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs
index 1c754fa..b338852 100644
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -865,7 +865,7 @@ impl<W: UiWriter> Agent<W> {
         debug!("Default provider set successfully");
 
         // Determine context window size based on active provider
-        let context_length = Self::determine_context_length(&config, &providers)?;
+        let context_length = Self::get_configured_context_length(&config, &providers)?;
         let mut context_window = ContextWindow::new(context_length);
 
         // If README content is provided, add it as the first system message
@@ -920,7 +920,7 @@ impl<W: UiWriter> Agent<W> {
         })
     }
 
-    fn determine_context_length(config: &Config, providers: &ProviderRegistry) -> Result<u32> {
+    fn get_configured_context_length(config: &Config, providers: &ProviderRegistry) -> Result<u32> {
         // Get the configured max_tokens for the current provider
         fn get_provider_max_tokens(config: &Config, provider_name: &str) -> Option<u32> {
             match provider_name {
@@ -959,7 +959,7 @@ impl<W: UiWriter> Agent<W> {
                         }
                     })
                 } else {
-                    config.agent.max_context_length as u32
+                    config.agent.fallback_default_max_tokens as u32
                 }
             }
             "openai" => {
@@ -983,7 +983,7 @@ impl<W: UiWriter> Agent<W> {
                     }
                 })
             }
-            _ => config.agent.max_context_length as u32,
+            _ => config.agent.fallback_default_max_tokens as u32,
         };
 
         debug!(
@@ -2415,8 +2415,8 @@ Template:
 
         // Check if we need to summarize before starting
         if self.context_window.should_summarize() {
-            // First try thinning if we haven't reached 90% yet
-            if self.context_window.percentage_used() < 90.0 && self.context_window.should_thin() {
+            // First try thinning if we are at capacity, don't call the LLM for a summary (might fail)
+            if self.context_window.percentage_used() > 90.0 && self.context_window.should_thin() {
                 self.ui_writer.print_context_status(&format!(
                     "\n🥒 Context window at {}%. Trying thinning first...",
                     self.context_window.percentage_used() as u32