works better without streaming
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,6 +3,7 @@
|
|||||||
debug
|
debug
|
||||||
target
|
target
|
||||||
.build
|
.build
|
||||||
|
appy/
|
||||||
|
|
||||||
# These are backup files generated by rustfmt
|
# These are backup files generated by rustfmt
|
||||||
**/*.rs.bk
|
**/*.rs.bk
|
||||||
|
|||||||
@@ -466,7 +466,7 @@ impl LLMProvider for OllamaProvider {
|
|||||||
|
|
||||||
async fn stream(&self, request: CompletionRequest) -> Result<CompletionStream> {
|
async fn stream(&self, request: CompletionRequest) -> Result<CompletionStream> {
|
||||||
debug!(
|
debug!(
|
||||||
"Processing Ollama streaming request with {} messages",
|
"Processing Ollama request (non-streaming) with {} messages",
|
||||||
request.messages.len()
|
request.messages.len()
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -483,13 +483,13 @@ impl LLMProvider for OllamaProvider {
|
|||||||
let request_body = self.create_request_body(
|
let request_body = self.create_request_body(
|
||||||
&request.messages,
|
&request.messages,
|
||||||
request.tools.as_deref(),
|
request.tools.as_deref(),
|
||||||
true,
|
false, // Use non-streaming mode to avoid streaming bugs
|
||||||
max_tokens,
|
max_tokens,
|
||||||
temperature,
|
temperature,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
"Sending streaming request to Ollama API: model={}, temperature={}",
|
"Sending request to Ollama API (stream=false): model={}, temperature={}",
|
||||||
self.model, request_body.options.temperature
|
self.model, request_body.options.temperature
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -499,7 +499,7 @@ impl LLMProvider for OllamaProvider {
|
|||||||
.json(&request_body)
|
.json(&request_body)
|
||||||
.send()
|
.send()
|
||||||
.await
|
.await
|
||||||
.map_err(|e| anyhow!("Failed to send streaming request to Ollama API: {}", e))?;
|
.map_err(|e| anyhow!("Failed to send request to Ollama API: {}", e))?;
|
||||||
|
|
||||||
let status = response.status();
|
let status = response.status();
|
||||||
if !status.is_success() {
|
if !status.is_success() {
|
||||||
@@ -510,12 +510,58 @@ impl LLMProvider for OllamaProvider {
|
|||||||
return Err(anyhow!("Ollama API error {}: {}", status, error_text));
|
return Err(anyhow!("Ollama API error {}: {}", status, error_text));
|
||||||
}
|
}
|
||||||
|
|
||||||
let stream = response.bytes_stream();
|
// For non-streaming, parse the complete JSON response
|
||||||
|
let response_text = response.text().await?;
|
||||||
|
debug!("Raw Ollama API response: {}", response_text);
|
||||||
|
|
||||||
|
let ollama_response: OllamaResponse =
|
||||||
|
serde_json::from_str(&response_text).map_err(|e| {
|
||||||
|
anyhow!(
|
||||||
|
"Failed to parse Ollama response: {} - Response: {}",
|
||||||
|
e,
|
||||||
|
response_text
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
let (tx, rx) = mpsc::channel(100);
|
let (tx, rx) = mpsc::channel(100);
|
||||||
|
|
||||||
let provider = self.clone();
|
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
provider.parse_streaming_response(stream, tx).await;
|
let content = ollama_response.message.content;
|
||||||
|
let usage = Usage {
|
||||||
|
prompt_tokens: ollama_response.prompt_eval_count.unwrap_or(0),
|
||||||
|
completion_tokens: ollama_response.eval_count.unwrap_or(0),
|
||||||
|
total_tokens: ollama_response.prompt_eval_count.unwrap_or(0)
|
||||||
|
+ ollama_response.eval_count.unwrap_or(0),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Extract tool calls if present
|
||||||
|
let tool_calls: Option<Vec<ToolCall>> = ollama_response.message.tool_calls.map(|tcs| {
|
||||||
|
tcs.iter()
|
||||||
|
.map(|tc| ToolCall {
|
||||||
|
id: tc.function.name.clone(),
|
||||||
|
tool: tc.function.name.clone(),
|
||||||
|
args: tc.function.arguments.clone(),
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
});
|
||||||
|
|
||||||
|
// Send content if any
|
||||||
|
if !content.is_empty() {
|
||||||
|
let _ = tx.send(Ok(CompletionChunk {
|
||||||
|
content,
|
||||||
|
finished: false,
|
||||||
|
usage: None,
|
||||||
|
tool_calls: None,
|
||||||
|
})).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send final chunk with usage and tool calls
|
||||||
|
let _ = tx.send(Ok(CompletionChunk {
|
||||||
|
content: String::new(),
|
||||||
|
finished: true,
|
||||||
|
usage: Some(usage),
|
||||||
|
tool_calls,
|
||||||
|
})).await;
|
||||||
});
|
});
|
||||||
|
|
||||||
Ok(ReceiverStream::new(rx))
|
Ok(ReceiverStream::new(rx))
|
||||||
|
|||||||
Reference in New Issue
Block a user