add code exploration fast start

This tries to short-circuit multiple round-trips to llm for reading code.
It's a precursor to trying to context engineer tailored to specific tasks.
In initial experiments, it's only marginally faster than regular mode, and burns more tokens.
This commit is contained in:
Jochen
2025-11-25 16:26:53 +11:00
parent f501751bdf
commit ad198a8501
17 changed files with 1418 additions and 22 deletions

View File

@@ -678,6 +678,14 @@ impl LLMProvider for AnthropicProvider {
// Anthropic supports cache control
true
}
fn max_tokens(&self) -> u32 {
self.max_tokens
}
fn temperature(&self) -> f32 {
self.temperature
}
}
// Anthropic API request/response structures

View File

@@ -1055,6 +1055,14 @@ impl LLMProvider for DatabricksProvider {
fn supports_cache_control(&self) -> bool {
false
}
fn max_tokens(&self) -> u32 {
self.max_tokens
}
fn temperature(&self) -> f32 {
self.temperature
}
}
// Databricks API request/response structures

View File

@@ -771,4 +771,12 @@ impl LLMProvider for EmbeddedProvider {
fn model(&self) -> &str {
&self.model_name
}
fn max_tokens(&self) -> u32 {
self.max_tokens
}
fn temperature(&self) -> f32 {
self.temperature
}
}

View File

@@ -26,6 +26,12 @@ pub trait LLMProvider: Send + Sync {
fn supports_cache_control(&self) -> bool {
false
}
/// Get the configured max_tokens for this provider
fn max_tokens(&self) -> u32;
/// Get the configured temperature for this provider
fn temperature(&self) -> f32;
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -384,6 +384,14 @@ impl LLMProvider for OpenAIProvider {
// OpenAI models support native tool calling
true
}
fn max_tokens(&self) -> u32 {
self.max_tokens.unwrap_or(16000)
}
fn temperature(&self) -> f32 {
self._temperature.unwrap_or(0.1)
}
}
fn convert_messages(messages: &[Message]) -> Vec<serde_json::Value> {