refactor(g3-core): use StreamingState and IterationState structs in stream_completion_with_tools
Consolidate scattered state variables in the 834-line stream_completion_with_tools() function to use the existing StreamingState and IterationState structs from streaming.rs. This eliminates code-path aliasing where state was tracked in multiple places and makes the streaming loop easier to reason about. Changes: - Add assistant_message_added field to StreamingState - Add stream_stop_reason field to IterationState - Replace 8 inline state variables with StreamingState::new() - Replace 7 iteration-local variables with IterationState::new() - All 585 workspace tests pass This is a pure refactor with no behavior changes. The state structs were already defined in streaming.rs but not used in the main streaming loop. Agent: fowler
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
# Project Memory
|
||||
> Updated: 2026-01-20T08:53:25Z | Size: 16.3k chars
|
||||
> Updated: 2026-01-20T09:01:08Z | Size: 16.7k chars
|
||||
|
||||
### Remember Tool Wiring
|
||||
- `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()`
|
||||
@@ -296,3 +296,10 @@ Shared display functions for interactive and agent modes.
|
||||
- `LoadedContent` [32..39] - tracks loaded project files (README, AGENTS.md, Memory, include prompt)
|
||||
- `print_loaded_status()` [87..103] - prints "✓ README ✓ AGENTS.md" status line
|
||||
- `print_project_heading()` [106..114] - prints project name from README
|
||||
|
||||
### Interactive Commands Module
|
||||
Handles `/` commands in interactive mode (extracted from interactive.rs).
|
||||
|
||||
- `crates/g3-cli/src/commands.rs`
|
||||
- `handle_command()` [17..320] - dispatches `/help`, `/compact`, `/thinnify`, `/skinnify`, `/fragments`, `/rehydrate`, `/run`, `/dump`, `/clear`, `/readme`, `/stats`, `/resume`
|
||||
- Returns `Result<bool>` - true if command handled and loop should continue
|
||||
@@ -1897,31 +1897,23 @@ Skip if nothing new. Be brief."#;
|
||||
debug!("Starting stream_completion_with_tools");
|
||||
|
||||
// --- State Initialization ---
|
||||
let full_response = String::new();
|
||||
let mut first_token_time: Option<Duration> = None;
|
||||
let stream_start = Instant::now();
|
||||
let mut iteration_count = 0;
|
||||
const MAX_ITERATIONS: usize = 400; // Prevent infinite loops
|
||||
let mut response_started = false;
|
||||
let mut any_tool_executed = false; // Track if ANY tool was executed across all iterations
|
||||
let mut assistant_message_added = false; // Track if assistant message was added to context this iteration
|
||||
// Note: Session-level duplicate tracking was removed - we only prevent sequential duplicates (DUP IN CHUNK, DUP IN MSG)
|
||||
let mut turn_accumulated_usage: Option<g3_providers::Usage> = None; // Track token usage for timing footer
|
||||
let mut state = streaming::StreamingState::new();
|
||||
|
||||
// --- Phase 1: Pre-loop Context Capacity Check ---
|
||||
self.ensure_context_capacity(&mut request).await?;
|
||||
|
||||
// --- Phase 2: Main Streaming Loop ---
|
||||
loop {
|
||||
iteration_count += 1;
|
||||
debug!("Starting iteration {}", iteration_count);
|
||||
if iteration_count > MAX_ITERATIONS {
|
||||
state.iteration_count += 1;
|
||||
debug!("Starting iteration {}", state.iteration_count);
|
||||
if state.iteration_count > streaming::MAX_ITERATIONS {
|
||||
warn!("Maximum iterations reached, stopping stream");
|
||||
break;
|
||||
}
|
||||
|
||||
// Add a small delay between iterations to prevent "model busy" errors
|
||||
if iteration_count > 1 {
|
||||
if state.iteration_count > 1 {
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
|
||||
}
|
||||
|
||||
@@ -1970,10 +1962,10 @@ Skip if nothing new. Be brief."#;
|
||||
Err(e) => {
|
||||
error!("Failed to start stream: {}", e);
|
||||
// Additional retry for "busy" errors on subsequent iterations
|
||||
if iteration_count > 1 && e.to_string().contains("busy") {
|
||||
if state.iteration_count > 1 && e.to_string().contains("busy") {
|
||||
warn!(
|
||||
"Model busy on iteration {}, attempting one more retry in 500ms",
|
||||
iteration_count
|
||||
state.iteration_count
|
||||
);
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||
|
||||
@@ -1994,14 +1986,8 @@ Skip if nothing new. Be brief."#;
|
||||
// Write context window summary every time we send messages to LLM
|
||||
self.write_context_window_summary();
|
||||
|
||||
let mut parser = StreamingToolParser::new();
|
||||
let mut current_response = String::new();
|
||||
let mut tool_executed = false;
|
||||
let mut chunks_received = 0;
|
||||
let mut raw_chunks: Vec<String> = Vec::new(); // Store raw chunks for debugging
|
||||
|
||||
let mut accumulated_usage: Option<g3_providers::Usage> = None;
|
||||
let mut stream_stop_reason: Option<String> = None; // Track why the stream stopped
|
||||
// Create fresh iteration state for this streaming iteration
|
||||
let mut iter = streaming::IterationState::new();
|
||||
|
||||
while let Some(chunk_result) = stream.next().await {
|
||||
match chunk_result {
|
||||
@@ -2011,8 +1997,8 @@ Skip if nothing new. Be brief."#;
|
||||
|
||||
// Capture usage data if available
|
||||
if let Some(ref usage) = chunk.usage {
|
||||
accumulated_usage = Some(usage.clone());
|
||||
turn_accumulated_usage = Some(usage.clone());
|
||||
iter.accumulated_usage = Some(usage.clone());
|
||||
state.turn_accumulated_usage = Some(usage.clone());
|
||||
debug!(
|
||||
"Received usage data - prompt: {}, completion: {}, total: {}",
|
||||
usage.prompt_tokens, usage.completion_tokens, usage.total_tokens
|
||||
@@ -2031,29 +2017,29 @@ Skip if nothing new. Be brief."#;
|
||||
}
|
||||
|
||||
// Store raw chunk for debugging (limit to first 20 and last 5)
|
||||
if chunks_received < 20 || chunk.finished {
|
||||
raw_chunks.push(format!(
|
||||
if iter.chunks_received < 20 || chunk.finished {
|
||||
iter.raw_chunks.push(format!(
|
||||
"Chunk #{}: content={:?}, finished={}, tool_calls={:?}",
|
||||
chunks_received + 1,
|
||||
iter.chunks_received + 1,
|
||||
chunk.content,
|
||||
chunk.finished,
|
||||
chunk.tool_calls
|
||||
));
|
||||
} else if raw_chunks.len() == 20 {
|
||||
raw_chunks.push("... (chunks 21+ omitted for brevity) ...".to_string());
|
||||
} else if iter.raw_chunks.len() == 20 {
|
||||
iter.raw_chunks.push("... (chunks 21+ omitted for brevity) ...".to_string());
|
||||
}
|
||||
|
||||
// Record time to first token
|
||||
if first_token_time.is_none() && !chunk.content.is_empty() {
|
||||
first_token_time = Some(stream_start.elapsed());
|
||||
if state.first_token_time.is_none() && !chunk.content.is_empty() {
|
||||
state.first_token_time = Some(state.stream_start.elapsed());
|
||||
// Record in agent metrics
|
||||
if let Some(ttft) = first_token_time {
|
||||
if let Some(ttft) = state.first_token_time {
|
||||
self.first_token_times.push(ttft);
|
||||
}
|
||||
}
|
||||
|
||||
chunks_received += 1;
|
||||
if chunks_received == 1 {
|
||||
iter.chunks_received += 1;
|
||||
if iter.chunks_received == 1 {
|
||||
debug!(
|
||||
"First chunk received: content_len={}, finished={}",
|
||||
chunk.content.len(),
|
||||
@@ -2062,7 +2048,7 @@ Skip if nothing new. Be brief."#;
|
||||
}
|
||||
|
||||
// Process chunk with the new parser
|
||||
let completed_tools = parser.process_chunk(&chunk);
|
||||
let completed_tools = iter.parser.process_chunk(&chunk);
|
||||
|
||||
// Handle completed tool calls - process all if multiple calls enabled
|
||||
// Always process all tool calls - they will be executed after stream ends
|
||||
@@ -2101,8 +2087,8 @@ Skip if nothing new. Be brief."#;
|
||||
}
|
||||
|
||||
// Calculate new content to display (skip already-shown text)
|
||||
let already_displayed_chars = current_response.chars().count();
|
||||
let text_content = parser.get_text_content();
|
||||
let already_displayed_chars = iter.current_response.chars().count();
|
||||
let text_content = iter.parser.get_text_content();
|
||||
let clean_content = streaming::clean_llm_tokens(&text_content);
|
||||
let raw_content_for_log = clean_content.clone();
|
||||
let filtered_content =
|
||||
@@ -2111,7 +2097,7 @@ Skip if nothing new. Be brief."#;
|
||||
|
||||
// Extract only the new (undisplayed) portion
|
||||
let new_content =
|
||||
if current_response.len() <= final_display_content.len() {
|
||||
if iter.current_response.len() <= final_display_content.len() {
|
||||
final_display_content
|
||||
.chars()
|
||||
.skip(already_displayed_chars)
|
||||
@@ -2123,13 +2109,13 @@ Skip if nothing new. Be brief."#;
|
||||
// Display new text before tool execution
|
||||
if !new_content.trim().is_empty() {
|
||||
#[allow(unused_assignments)]
|
||||
if !response_started {
|
||||
if !state.response_started {
|
||||
self.ui_writer.print_agent_prompt();
|
||||
response_started = true;
|
||||
state.response_started = true;
|
||||
}
|
||||
self.ui_writer.print_agent_response(&new_content);
|
||||
self.ui_writer.flush();
|
||||
current_response.push_str(&new_content);
|
||||
iter.current_response.push_str(&new_content);
|
||||
}
|
||||
|
||||
self.ui_writer.finish_streaming_markdown();
|
||||
@@ -2392,8 +2378,8 @@ Skip if nothing new. Be brief."#;
|
||||
// 1. At the end when no tools were executed
|
||||
// 2. At the end when no tools were executed (handled in the "no tool executed" branch)
|
||||
|
||||
tool_executed = true;
|
||||
any_tool_executed = true; // Track across all iterations
|
||||
iter.tool_executed = true;
|
||||
state.any_tool_executed = true; // Track across all iterations
|
||||
|
||||
// Reset the JSON tool call filter state after each tool execution
|
||||
// This ensures the filter doesn't stay in suppression mode for subsequent streaming content
|
||||
@@ -2401,22 +2387,22 @@ Skip if nothing new. Be brief."#;
|
||||
|
||||
// Only reset parser if there are no more unexecuted tool calls in the buffer
|
||||
// This handles the case where the LLM emits multiple tool calls in one response
|
||||
if parser.has_unexecuted_tool_call() {
|
||||
if iter.parser.has_unexecuted_tool_call() {
|
||||
debug!(
|
||||
"Parser still has unexecuted tool calls, not resetting buffer"
|
||||
);
|
||||
// Mark current tool as consumed so we don't re-detect it
|
||||
parser.mark_tool_calls_consumed();
|
||||
iter.parser.mark_tool_calls_consumed();
|
||||
} else {
|
||||
// Reset parser for next iteration - this clears the text buffer
|
||||
parser.reset();
|
||||
iter.parser.reset();
|
||||
}
|
||||
|
||||
// Clear current_response for next iteration to prevent buffered text
|
||||
// from being incorrectly displayed after tool execution
|
||||
current_response.clear();
|
||||
iter.current_response.clear();
|
||||
// Reset for next iteration (value read in next loop pass)
|
||||
response_started = false;
|
||||
state.response_started = false;
|
||||
|
||||
// Continue processing - don't break mid-stream
|
||||
} // End of for loop processing each tool call
|
||||
@@ -2425,7 +2411,7 @@ Skip if nothing new. Be brief."#;
|
||||
// All tool calls are collected and executed after the stream ends.
|
||||
|
||||
// If no tool calls were completed, continue streaming normally
|
||||
if !tool_executed {
|
||||
if !iter.tool_executed {
|
||||
let clean_content = streaming::clean_llm_tokens(&chunk.content);
|
||||
|
||||
if !clean_content.is_empty() {
|
||||
@@ -2433,42 +2419,42 @@ Skip if nothing new. Be brief."#;
|
||||
self.ui_writer.filter_json_tool_calls(&clean_content);
|
||||
|
||||
if !filtered_content.is_empty() {
|
||||
if !response_started {
|
||||
if !state.response_started {
|
||||
self.ui_writer.print_agent_prompt();
|
||||
response_started = true;
|
||||
state.response_started = true;
|
||||
}
|
||||
|
||||
self.ui_writer.print_agent_response(&filtered_content);
|
||||
self.ui_writer.flush();
|
||||
current_response.push_str(&filtered_content);
|
||||
iter.current_response.push_str(&filtered_content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if chunk.finished {
|
||||
debug!("Stream finished: tool_executed={}, current_response_len={}, full_response_len={}, chunks_received={}",
|
||||
tool_executed, current_response.len(), full_response.len(), chunks_received);
|
||||
iter.tool_executed, iter.current_response.len(), state.full_response.len(), iter.chunks_received);
|
||||
|
||||
// Capture the stop reason from the final chunk
|
||||
if let Some(ref reason) = chunk.stop_reason {
|
||||
debug!("Stream stop_reason: {}", reason);
|
||||
stream_stop_reason = Some(reason.clone());
|
||||
iter.stream_stop_reason = Some(reason.clone());
|
||||
}
|
||||
|
||||
// Stream finished - check if we should continue or return
|
||||
if !tool_executed {
|
||||
if !iter.tool_executed {
|
||||
// No tools were executed in this iteration
|
||||
// Check if we got any meaningful response at all
|
||||
// We need to check the parser's text buffer as well, since the LLM
|
||||
// might have responded with text but no tool calls
|
||||
let text_content = parser.get_text_content();
|
||||
let text_content = iter.parser.get_text_content();
|
||||
let has_text_response = !text_content.trim().is_empty()
|
||||
|| !current_response.trim().is_empty();
|
||||
|| !iter.current_response.trim().is_empty();
|
||||
|
||||
// Don't re-add text from parser buffer if we already displayed it
|
||||
// The parser buffer contains ALL accumulated text, but current_response
|
||||
// already has what was displayed during streaming
|
||||
if current_response.is_empty() && !text_content.trim().is_empty() {
|
||||
if iter.current_response.is_empty() && !text_content.trim().is_empty() {
|
||||
// Only use parser text if we truly have no response
|
||||
// This should be rare - only if streaming failed to display anything
|
||||
debug!("Warning: Using parser buffer text as fallback - this may duplicate output");
|
||||
@@ -2480,7 +2466,7 @@ Skip if nothing new. Be brief."#;
|
||||
self.ui_writer.filter_json_tool_calls(&clean_text);
|
||||
|
||||
// Only use this if we truly have nothing else
|
||||
if !filtered_text.trim().is_empty() && full_response.is_empty()
|
||||
if !filtered_text.trim().is_empty() && state.full_response.is_empty()
|
||||
{
|
||||
debug!(
|
||||
"Using filtered parser text as last resort: {} chars",
|
||||
@@ -2491,22 +2477,22 @@ Skip if nothing new. Be brief."#;
|
||||
}
|
||||
}
|
||||
|
||||
if !has_text_response && full_response.is_empty() {
|
||||
if !has_text_response && state.full_response.is_empty() {
|
||||
streaming::log_stream_error(
|
||||
iteration_count,
|
||||
state.iteration_count,
|
||||
&provider_name,
|
||||
&provider_model,
|
||||
chunks_received,
|
||||
&parser,
|
||||
iter.chunks_received,
|
||||
&iter.parser,
|
||||
&request,
|
||||
&self.context_window,
|
||||
self.session_id.as_deref(),
|
||||
&raw_chunks,
|
||||
&iter.raw_chunks,
|
||||
);
|
||||
|
||||
// No response received - this is an error condition
|
||||
warn!("Stream finished without any content or tool calls");
|
||||
warn!("Chunks received: {}", chunks_received);
|
||||
warn!("Chunks received: {}", iter.chunks_received);
|
||||
return Err(anyhow::anyhow!(
|
||||
"No response received from the model. The model may be experiencing issues or the request may have been malformed."
|
||||
));
|
||||
@@ -2514,18 +2500,18 @@ Skip if nothing new. Be brief."#;
|
||||
|
||||
// If tools were executed in previous iterations,
|
||||
// break to let the outer loop handle finalization
|
||||
if any_tool_executed {
|
||||
if state.any_tool_executed {
|
||||
debug!("Tools were executed in previous iterations, breaking to finalize");
|
||||
// IMPORTANT: Save any text response to context window before breaking
|
||||
// This ensures text displayed after tool execution is not lost
|
||||
if !current_response.trim().is_empty() && !assistant_message_added {
|
||||
debug!("Saving current_response ({} chars) to context before finalization", current_response.len());
|
||||
if !iter.current_response.trim().is_empty() && !state.assistant_message_added {
|
||||
debug!("Saving current_response ({} chars) to context before finalization", iter.current_response.len());
|
||||
let assistant_msg = Message::new(
|
||||
MessageRole::Assistant,
|
||||
current_response.clone(),
|
||||
iter.current_response.clone(),
|
||||
);
|
||||
self.context_window.add_message(assistant_msg);
|
||||
assistant_message_added = true;
|
||||
state.assistant_message_added = true;
|
||||
}
|
||||
|
||||
// NOTE: We intentionally do NOT set full_response here.
|
||||
@@ -2538,14 +2524,14 @@ Skip if nothing new. Be brief."#;
|
||||
|
||||
// Save assistant message before returning (no tools were executed)
|
||||
// This ensures text-only responses are saved to context
|
||||
if !current_response.trim().is_empty() && !assistant_message_added {
|
||||
debug!("Saving current_response ({} chars) to context before early return", current_response.len());
|
||||
if !iter.current_response.trim().is_empty() && !state.assistant_message_added {
|
||||
debug!("Saving current_response ({} chars) to context before early return", iter.current_response.len());
|
||||
let assistant_msg = Message::new(
|
||||
MessageRole::Assistant,
|
||||
current_response.clone(),
|
||||
iter.current_response.clone(),
|
||||
);
|
||||
self.context_window.add_message(assistant_msg);
|
||||
// assistant_message_added = true; // Not needed, we're returning
|
||||
// state.assistant_message_added = true; // Not needed, we're returning
|
||||
}
|
||||
|
||||
// Set full_response to empty to avoid duplication in return value
|
||||
@@ -2553,9 +2539,9 @@ Skip if nothing new. Be brief."#;
|
||||
return Ok(self.finalize_streaming_turn(
|
||||
String::new(),
|
||||
show_timing,
|
||||
stream_start,
|
||||
first_token_time,
|
||||
&turn_accumulated_usage,
|
||||
state.stream_start,
|
||||
state.first_token_time,
|
||||
&state.turn_accumulated_usage,
|
||||
));
|
||||
}
|
||||
break; // Tool was executed, break to continue outer loop
|
||||
@@ -2566,13 +2552,13 @@ Skip if nothing new. Be brief."#;
|
||||
let error_msg = e.to_string();
|
||||
let error_details = format!(
|
||||
"Streaming error at chunk {}: {}",
|
||||
chunks_received + 1,
|
||||
iter.chunks_received + 1,
|
||||
error_msg
|
||||
);
|
||||
|
||||
error!("Error type: {}", std::any::type_name_of_val(&e));
|
||||
error!("Parser state at error: text_buffer_len={}, has_incomplete={}, message_stopped={}",
|
||||
parser.text_buffer_len(), parser.has_incomplete_tool_call(), parser.is_message_stopped());
|
||||
iter.parser.text_buffer_len(), iter.parser.has_incomplete_tool_call(), iter.parser.is_message_stopped());
|
||||
|
||||
// Check if this is a recoverable connection error
|
||||
let is_connection_error = streaming::is_connection_error(&error_msg);
|
||||
@@ -2580,26 +2566,26 @@ Skip if nothing new. Be brief."#;
|
||||
if is_connection_error {
|
||||
warn!(
|
||||
"Connection error at chunk {}, treating as end of stream",
|
||||
chunks_received + 1
|
||||
iter.chunks_received + 1
|
||||
);
|
||||
// If we have any content or tool calls, treat this as a graceful end
|
||||
if chunks_received > 0
|
||||
&& (!parser.get_text_content().is_empty()
|
||||
|| parser.has_unexecuted_tool_call())
|
||||
if iter.chunks_received > 0
|
||||
&& (!iter.parser.get_text_content().is_empty()
|
||||
|| iter.parser.has_unexecuted_tool_call())
|
||||
{
|
||||
warn!("Stream terminated unexpectedly but we have content, continuing");
|
||||
break; // Break to process what we have
|
||||
}
|
||||
}
|
||||
|
||||
if tool_executed {
|
||||
if iter.tool_executed {
|
||||
error!("{}", error_details);
|
||||
warn!("Stream error after tool execution, attempting to continue");
|
||||
break; // Break to outer loop to start new stream
|
||||
} else {
|
||||
// Log raw chunks before failing
|
||||
error!("Fatal streaming error. Raw chunks received before error:");
|
||||
for chunk_str in raw_chunks.iter().take(10) {
|
||||
for chunk_str in iter.raw_chunks.iter().take(10) {
|
||||
error!(" {}", chunk_str);
|
||||
}
|
||||
return Err(e);
|
||||
@@ -2609,41 +2595,41 @@ Skip if nothing new. Be brief."#;
|
||||
}
|
||||
|
||||
// Update context window with actual usage if available
|
||||
if let Some(usage) = accumulated_usage {
|
||||
if let Some(usage) = iter.accumulated_usage {
|
||||
debug!("Updating context window with actual usage from stream");
|
||||
self.context_window.update_usage_from_response(&usage);
|
||||
} else {
|
||||
// Fall back to estimation if no usage data was provided
|
||||
debug!("No usage data from stream, using estimation");
|
||||
let estimated_tokens = ContextWindow::estimate_tokens(¤t_response);
|
||||
let estimated_tokens = ContextWindow::estimate_tokens(&iter.current_response);
|
||||
self.context_window.add_streaming_tokens(estimated_tokens);
|
||||
}
|
||||
|
||||
// If we get here and no tool was executed, we're done
|
||||
if !tool_executed {
|
||||
if !iter.tool_executed {
|
||||
// IMPORTANT: Do NOT add parser text_content here!
|
||||
// The text has already been displayed during streaming via current_response.
|
||||
// The parser buffer accumulates ALL text and would cause duplication.
|
||||
debug!("Stream completed without tool execution. Response already displayed during streaming.");
|
||||
debug!(
|
||||
"Current response length: {}, Full response length: {}",
|
||||
current_response.len(),
|
||||
full_response.len()
|
||||
iter.current_response.len(),
|
||||
state.full_response.len()
|
||||
);
|
||||
|
||||
let has_response = !current_response.is_empty() || !full_response.is_empty();
|
||||
let has_response = !iter.current_response.is_empty() || !state.full_response.is_empty();
|
||||
|
||||
// Check if the response is essentially empty (just whitespace or timing lines)
|
||||
// Check if there's an incomplete tool call in the buffer (for debugging)
|
||||
let has_incomplete_tool_call = parser.has_incomplete_tool_call();
|
||||
let has_incomplete_tool_call = iter.parser.has_incomplete_tool_call();
|
||||
|
||||
// Check if there's a complete but unexecuted tool call in the buffer (for debugging)
|
||||
let has_unexecuted_tool_call = parser.has_unexecuted_tool_call();
|
||||
let has_unexecuted_tool_call = iter.parser.has_unexecuted_tool_call();
|
||||
|
||||
// Log when we detect unexecuted or incomplete tool calls for debugging
|
||||
if has_incomplete_tool_call {
|
||||
debug!("Detected incomplete tool call in buffer (buffer_len={}, consumed_up_to={})",
|
||||
parser.text_buffer_len(), parser.text_buffer_len());
|
||||
iter.parser.text_buffer_len(), iter.parser.text_buffer_len());
|
||||
}
|
||||
if has_unexecuted_tool_call {
|
||||
debug!("Detected unexecuted tool call in buffer - this may indicate a parsing issue");
|
||||
@@ -2652,7 +2638,7 @@ Skip if nothing new. Be brief."#;
|
||||
|
||||
// Check if the response was truncated due to max_tokens
|
||||
let was_truncated_by_max_tokens =
|
||||
stream_stop_reason.as_deref() == Some("max_tokens");
|
||||
iter.stream_stop_reason.as_deref() == Some("max_tokens");
|
||||
if was_truncated_by_max_tokens {
|
||||
debug!("Response was truncated due to max_tokens limit");
|
||||
warn!("LLM response was cut off due to max_tokens limit");
|
||||
@@ -2662,7 +2648,7 @@ Skip if nothing new. Be brief."#;
|
||||
if has_response {
|
||||
debug!(
|
||||
"Response already streamed, not setting full_response. current_response: {} chars",
|
||||
current_response.len()
|
||||
iter.current_response.len()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -2670,9 +2656,9 @@ Skip if nothing new. Be brief."#;
|
||||
// This ensures the log contains the true raw content including any JSON.
|
||||
// Note: We check current_response, not full_response, because full_response
|
||||
// may be empty to avoid display duplication (content was already streamed).
|
||||
if !current_response.trim().is_empty() && !assistant_message_added {
|
||||
if !iter.current_response.trim().is_empty() && !state.assistant_message_added {
|
||||
// Get the raw text from the parser (before filtering)
|
||||
let raw_text = parser.get_text_content();
|
||||
let raw_text = iter.parser.get_text_content();
|
||||
let raw_clean = streaming::clean_llm_tokens(&raw_text);
|
||||
|
||||
// Use raw_clean if available, otherwise fall back to current_response.
|
||||
@@ -2681,18 +2667,18 @@ Skip if nothing new. Be brief."#;
|
||||
let content_to_save = if !raw_clean.trim().is_empty() {
|
||||
raw_clean
|
||||
} else {
|
||||
current_response.clone()
|
||||
iter.current_response.clone()
|
||||
};
|
||||
let assistant_message = Message::new(MessageRole::Assistant, content_to_save);
|
||||
self.context_window.add_message(assistant_message);
|
||||
}
|
||||
|
||||
return Ok(self.finalize_streaming_turn(
|
||||
full_response,
|
||||
state.full_response.clone(),
|
||||
show_timing,
|
||||
stream_start,
|
||||
first_token_time,
|
||||
&turn_accumulated_usage,
|
||||
state.stream_start,
|
||||
state.first_token_time,
|
||||
&state.turn_accumulated_usage,
|
||||
));
|
||||
}
|
||||
|
||||
@@ -2701,11 +2687,11 @@ Skip if nothing new. Be brief."#;
|
||||
|
||||
// --- Phase 4: Post-Loop Finalization ---
|
||||
Ok(self.finalize_streaming_turn(
|
||||
full_response,
|
||||
state.full_response.clone(),
|
||||
show_timing,
|
||||
stream_start,
|
||||
first_token_time,
|
||||
&turn_accumulated_usage,
|
||||
state.stream_start,
|
||||
state.first_token_time,
|
||||
&state.turn_accumulated_usage,
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ pub struct StreamingState {
|
||||
pub response_started: bool,
|
||||
pub any_tool_executed: bool,
|
||||
pub auto_summary_attempts: usize,
|
||||
pub assistant_message_added: bool,
|
||||
pub turn_accumulated_usage: Option<g3_providers::Usage>,
|
||||
}
|
||||
|
||||
@@ -36,6 +37,7 @@ impl StreamingState {
|
||||
response_started: false,
|
||||
any_tool_executed: false,
|
||||
auto_summary_attempts: 0,
|
||||
assistant_message_added: false,
|
||||
turn_accumulated_usage: None,
|
||||
}
|
||||
}
|
||||
@@ -65,6 +67,7 @@ pub struct IterationState {
|
||||
pub chunks_received: usize,
|
||||
pub raw_chunks: Vec<String>,
|
||||
pub accumulated_usage: Option<g3_providers::Usage>,
|
||||
pub stream_stop_reason: Option<String>,
|
||||
}
|
||||
|
||||
impl IterationState {
|
||||
@@ -76,6 +79,7 @@ impl IterationState {
|
||||
chunks_received: 0,
|
||||
raw_chunks: Vec::new(),
|
||||
accumulated_usage: None,
|
||||
stream_stop_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user