From a7e0b0ef9ed6dbd53050d7d010d46b3965c0290d Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Fri, 13 Feb 2026 12:37:09 +1100 Subject: [PATCH] Refactor: deduplicate JSON parsing, provider constructors, and identity function Agent: fowler Eliminate code-path aliasing and near-duplicates across recent commits: 1. Deduplicate find_json_object_end: Three near-identical copies in streaming_parser.rs, context_window.rs, and acd.rs consolidated into a single canonical implementation in utils.rs. All callers now route through the canonical version. The utils.rs version uses the most defensive variant (with found_start guard). (-84 lines) 2. Deduplicate provider constructors: AnthropicProvider::new() and GeminiProvider::new() now delegate to their respective new_with_name() methods instead of duplicating the full constructor body. (OpenAI already delegated.) (-28 lines) 3. Inline convert_cache_control: Removed identity function that just cloned CacheControl. Call sites now use .map(|cc| cc.clone()) directly. (-4 lines) Net: -65 lines, 0 behavior changes, all 683 library tests pass. --- crates/g3-core/src/acd.rs | 26 +--------------- crates/g3-core/src/context_window.rs | 26 +--------------- crates/g3-core/src/streaming_parser.rs | 33 +------------------- crates/g3-core/src/utils.rs | 43 ++++++++++++++++++++++++++ crates/g3-providers/src/anthropic.rs | 32 +++---------------- crates/g3-providers/src/gemini.rs | 9 +----- 6 files changed, 52 insertions(+), 117 deletions(-) diff --git a/crates/g3-core/src/acd.rs b/crates/g3-core/src/acd.rs index caa0d61..7902243 100644 --- a/crates/g3-core/src/acd.rs +++ b/crates/g3-core/src/acd.rs @@ -246,31 +246,7 @@ fn extract_tool_name_from_content(content: &str) -> Option { /// Find the end of a JSON object (matching braces). fn find_json_end(json_str: &str) -> Option { - let mut brace_count = 0; - let mut in_string = false; - let mut escape_next = false; - - for (i, ch) in json_str.char_indices() { - if escape_next { - escape_next = false; - continue; - } - - match ch { - '\\' => escape_next = true, - '"' if !escape_next => in_string = !in_string, - '{' if !in_string => brace_count += 1, - '}' if !in_string => { - brace_count -= 1; - if brace_count == 0 { - return Some(i); - } - } - _ => {} - } - } - - None + crate::utils::find_json_object_end(json_str) } /// Estimate token count for messages. diff --git a/crates/g3-core/src/context_window.rs b/crates/g3-core/src/context_window.rs index 8ae0d1d..723d0b3 100644 --- a/crates/g3-core/src/context_window.rs +++ b/crates/g3-core/src/context_window.rs @@ -740,31 +740,7 @@ Format this as a detailed but concise summary that can be used to resume the con /// Find the end position of a JSON object. pub fn find_json_end(json_str: &str) -> Option { - let mut brace_count = 0; - let mut in_string = false; - let mut escape_next = false; - - for (i, ch) in json_str.char_indices() { - if escape_next { - escape_next = false; - continue; - } - - match ch { - '\\' => escape_next = true, - '"' if !escape_next => in_string = !in_string, - '{' if !in_string => brace_count += 1, - '}' if !in_string => { - brace_count -= 1; - if brace_count == 0 { - return Some(i); - } - } - _ => {} - } - } - - None + crate::utils::find_json_object_end(json_str) } } diff --git a/crates/g3-core/src/streaming_parser.rs b/crates/g3-core/src/streaming_parser.rs index 9c9f297..a316ebf 100644 --- a/crates/g3-core/src/streaming_parser.rs +++ b/crates/g3-core/src/streaming_parser.rs @@ -116,38 +116,7 @@ fn is_position_in_fence_ranges(pos: usize, ranges: &[(usize, usize)]) -> bool { // JSON Parsing Utilities // ============================================================================ -/// Find the end byte index of a complete JSON object, or None if incomplete. -fn find_json_object_end(text: &str) -> Option { - let mut brace_count = 0; - let mut in_string = false; - let mut escape_next = false; - let mut found_start = false; - - for (i, ch) in text.char_indices() { - if escape_next { - escape_next = false; - continue; - } - - match ch { - '\\' => escape_next = true, - '"' => in_string = !in_string, - '{' if !in_string => { - brace_count += 1; - found_start = true; - } - '}' if !in_string => { - brace_count -= 1; - if brace_count == 0 && found_start { - return Some(i); - } - } - _ => {} - } - } - - None -} +use crate::utils::find_json_object_end; /// Check if a partial JSON tool call has been invalidated. /// diff --git a/crates/g3-core/src/utils.rs b/crates/g3-core/src/utils.rs index 82efe5a..0b2fc4a 100644 --- a/crates/g3-core/src/utils.rs +++ b/crates/g3-core/src/utils.rs @@ -545,6 +545,49 @@ pub fn fix_mixed_quotes_in_json(json_str: &str) -> String { result } +// ============================================================================ +// JSON Utilities +// ============================================================================ + +/// Find the end byte-index of a complete JSON object starting at the beginning of `text`. +/// +/// Tracks brace nesting and string escaping to find the matching `}` for the +/// first `{`. Returns `None` if the JSON is incomplete or no `{` is found. +/// +/// This is the single canonical implementation — used by streaming_parser, +/// context_window thinning, and ACD fragment parsing. +pub fn find_json_object_end(text: &str) -> Option { + let mut brace_count = 0; + let mut in_string = false; + let mut escape_next = false; + let mut found_start = false; + + for (i, ch) in text.char_indices() { + if escape_next { + escape_next = false; + continue; + } + + match ch { + '\\' => escape_next = true, + '"' => in_string = !in_string, + '{' if !in_string => { + brace_count += 1; + found_start = true; + } + '}' if !in_string => { + brace_count -= 1; + if brace_count == 0 && found_start { + return Some(i); + } + } + _ => {} + } + } + + None +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/g3-providers/src/anthropic.rs b/crates/g3-providers/src/anthropic.rs index 4b3fe2a..2e6ea76 100644 --- a/crates/g3-providers/src/anthropic.rs +++ b/crates/g3-providers/src/anthropic.rs @@ -145,26 +145,7 @@ impl AnthropicProvider { enable_1m_context: Option, thinking_budget_tokens: Option, ) -> Result { - let client = Client::builder() - .timeout(Duration::from_secs(300)) - .build() - .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?; - - let model = model.unwrap_or_else(|| "claude-3-5-sonnet-20241022".to_string()); - - debug!("Initialized Anthropic provider with model: {}", model); - - Ok(Self { - client, - name: "anthropic".to_string(), - api_key, - model, - max_tokens: max_tokens.unwrap_or(32768), - temperature: temperature.unwrap_or(0.1), - cache_config, - enable_1m_context: enable_1m_context.unwrap_or(false), - thinking_budget_tokens, - }) + Self::new_with_name("anthropic".to_string(), api_key, model, max_tokens, temperature, cache_config, enable_1m_context, thinking_budget_tokens) } /// Create a new AnthropicProvider with a custom name (e.g., "anthropic.default") @@ -222,10 +203,7 @@ impl AnthropicProvider { builder } - fn convert_cache_control(cache_control: &crate::CacheControl) -> crate::CacheControl { - // Anthropic uses the same format, so just clone it - cache_control.clone() - } + // Anthropic uses the same CacheControl format — no conversion needed, just clone at call sites. fn convert_tools(&self, tools: &[Tool]) -> Vec { tools @@ -312,7 +290,7 @@ impl AnthropicProvider { cache_control: message .cache_control .as_ref() - .map(Self::convert_cache_control), + .map(|cc| cc.clone()), }); } else { // Regular user message: images as top-level blocks, then text @@ -330,7 +308,7 @@ impl AnthropicProvider { cache_control: message .cache_control .as_ref() - .map(Self::convert_cache_control), + .map(|cc| cc.clone()), }); } @@ -349,7 +327,7 @@ impl AnthropicProvider { cache_control: message .cache_control .as_ref() - .map(Self::convert_cache_control), + .map(|cc| cc.clone()), }); } diff --git a/crates/g3-providers/src/gemini.rs b/crates/g3-providers/src/gemini.rs index db0148e..41986b6 100644 --- a/crates/g3-providers/src/gemini.rs +++ b/crates/g3-providers/src/gemini.rs @@ -77,14 +77,7 @@ impl GeminiProvider { max_tokens: Option, temperature: Option, ) -> Result { - Ok(Self { - client: Client::new(), - api_key, - model: model.unwrap_or_else(|| "gemini-2.0-flash".to_string()), - max_tokens: max_tokens.unwrap_or(16384), - temperature: temperature.unwrap_or(0.1), - name: "gemini".to_string(), - }) + Self::new_with_name("gemini".to_string(), api_key, model, max_tokens, temperature) } pub fn new_with_name(