rename max_context_length to fallback_default_max_tokens
This commit is contained in:
@@ -19,6 +19,6 @@ max_tokens = 4096
|
|||||||
temperature = 0.3 # Slightly higher temperature for more creative implementations
|
temperature = 0.3 # Slightly higher temperature for more creative implementations
|
||||||
|
|
||||||
[agent]
|
[agent]
|
||||||
max_context_length = 8192
|
fallback_default_max_tokens = 8192
|
||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
@@ -15,7 +15,7 @@ temperature = 0.1
|
|||||||
use_oauth = true
|
use_oauth = true
|
||||||
|
|
||||||
[agent]
|
[agent]
|
||||||
max_context_length = 8192
|
fallback_default_max_tokens = 8192
|
||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
|
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ pub struct EmbeddedConfig {
|
|||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct AgentConfig {
|
pub struct AgentConfig {
|
||||||
pub max_context_length: usize,
|
pub fallback_default_max_tokens: usize,
|
||||||
pub enable_streaming: bool,
|
pub enable_streaming: bool,
|
||||||
pub timeout_seconds: u64,
|
pub timeout_seconds: u64,
|
||||||
pub auto_compact: bool,
|
pub auto_compact: bool,
|
||||||
@@ -133,7 +133,7 @@ impl Default for Config {
|
|||||||
player: None, // Will use default_provider if not specified
|
player: None, // Will use default_provider if not specified
|
||||||
},
|
},
|
||||||
agent: AgentConfig {
|
agent: AgentConfig {
|
||||||
max_context_length: 8192,
|
fallback_default_max_tokens: 8192,
|
||||||
enable_streaming: true,
|
enable_streaming: true,
|
||||||
timeout_seconds: 60,
|
timeout_seconds: 60,
|
||||||
auto_compact: true,
|
auto_compact: true,
|
||||||
@@ -249,7 +249,7 @@ impl Config {
|
|||||||
player: None, // Will use default_provider if not specified
|
player: None, // Will use default_provider if not specified
|
||||||
},
|
},
|
||||||
agent: AgentConfig {
|
agent: AgentConfig {
|
||||||
max_context_length: 8192,
|
fallback_default_max_tokens: 8192,
|
||||||
enable_streaming: true,
|
enable_streaming: true,
|
||||||
timeout_seconds: 60,
|
timeout_seconds: 60,
|
||||||
auto_compact: true,
|
auto_compact: true,
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ model_path = "test.gguf"
|
|||||||
model_type = "llama"
|
model_type = "llama"
|
||||||
|
|
||||||
[agent]
|
[agent]
|
||||||
max_context_length = 8192
|
fallback_default_max_tokens = 8192
|
||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
"#;
|
"#;
|
||||||
@@ -72,7 +72,7 @@ token = "test-token"
|
|||||||
model = "test-model"
|
model = "test-model"
|
||||||
|
|
||||||
[agent]
|
[agent]
|
||||||
max_context_length = 8192
|
fallback_default_max_tokens = 8192
|
||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
"#;
|
"#;
|
||||||
@@ -113,7 +113,7 @@ token = "test-token"
|
|||||||
model = "test-model"
|
model = "test-model"
|
||||||
|
|
||||||
[agent]
|
[agent]
|
||||||
max_context_length = 8192
|
fallback_default_max_tokens = 8192
|
||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
"#;
|
"#;
|
||||||
|
|||||||
@@ -865,7 +865,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
debug!("Default provider set successfully");
|
debug!("Default provider set successfully");
|
||||||
|
|
||||||
// Determine context window size based on active provider
|
// Determine context window size based on active provider
|
||||||
let context_length = Self::determine_context_length(&config, &providers)?;
|
let context_length = Self::get_configured_context_length(&config, &providers)?;
|
||||||
let mut context_window = ContextWindow::new(context_length);
|
let mut context_window = ContextWindow::new(context_length);
|
||||||
|
|
||||||
// If README content is provided, add it as the first system message
|
// If README content is provided, add it as the first system message
|
||||||
@@ -920,7 +920,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn determine_context_length(config: &Config, providers: &ProviderRegistry) -> Result<u32> {
|
fn get_configured_context_length(config: &Config, providers: &ProviderRegistry) -> Result<u32> {
|
||||||
// Get the configured max_tokens for the current provider
|
// Get the configured max_tokens for the current provider
|
||||||
fn get_provider_max_tokens(config: &Config, provider_name: &str) -> Option<u32> {
|
fn get_provider_max_tokens(config: &Config, provider_name: &str) -> Option<u32> {
|
||||||
match provider_name {
|
match provider_name {
|
||||||
@@ -959,7 +959,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
config.agent.max_context_length as u32
|
config.agent.fallback_default_max_tokens as u32
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"openai" => {
|
"openai" => {
|
||||||
@@ -983,7 +983,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
_ => config.agent.max_context_length as u32,
|
_ => config.agent.fallback_default_max_tokens as u32,
|
||||||
};
|
};
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
@@ -2415,8 +2415,8 @@ Template:
|
|||||||
|
|
||||||
// Check if we need to summarize before starting
|
// Check if we need to summarize before starting
|
||||||
if self.context_window.should_summarize() {
|
if self.context_window.should_summarize() {
|
||||||
// First try thinning if we haven't reached 90% yet
|
// First try thinning if we are at capacity, don't call the LLM for a summary (might fail)
|
||||||
if self.context_window.percentage_used() < 90.0 && self.context_window.should_thin() {
|
if self.context_window.percentage_used() > 90.0 && self.context_window.should_thin() {
|
||||||
self.ui_writer.print_context_status(&format!(
|
self.ui_writer.print_context_status(&format!(
|
||||||
"\n🥒 Context window at {}%. Trying thinning first...",
|
"\n🥒 Context window at {}%. Trying thinning first...",
|
||||||
self.context_window.percentage_used() as u32
|
self.context_window.percentage_used() as u32
|
||||||
|
|||||||
Reference in New Issue
Block a user