disable thinking if there is no token budget

This commit is contained in:
Jochen
2025-12-09 16:45:28 +11:00
parent 2283d9ddbf
commit 4aa84e2144
5 changed files with 97 additions and 8 deletions

View File

@@ -42,6 +42,8 @@ pub struct CompletionRequest {
pub temperature: Option<f32>,
pub stream: bool,
pub tools: Option<Vec<Tool>>,
/// Force disable thinking mode for this request (used when max_tokens is too low)
pub disable_thinking: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]