Compare commits
2 Commits
main
...
jochen-fix
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ccb8383f6b | ||
|
|
c343dfa2f0 |
@@ -1,5 +1,5 @@
|
||||
# Workspace Memory
|
||||
> Updated: 2026-03-18T03:59:01Z | Size: 25.2k chars
|
||||
> Updated: 2026-02-14T22:33:04Z | Size: 22.9k chars
|
||||
|
||||
### Remember Tool Wiring
|
||||
- `crates/g3-core/src/tools/memory.rs` [0..5686]
|
||||
@@ -397,18 +397,3 @@ Tool output responsive to terminal width — no line wrapping, 4-char right marg
|
||||
- `crates/g3-core/src/lib.rs` [1675..1686] - `set_plan_mode(enabled, working_dir)` - captures baseline on enable, clears on disable
|
||||
- **Key invariant**: The approval gate NEVER deletes or reverts files. It only warns.
|
||||
- **Key invariant**: Pre-existing dirty files (captured at plan mode start) are excluded from gate checks.
|
||||
|
||||
### Context Window Calibration (Token Drift Fix)
|
||||
- `crates/g3-core/src/context_window.rs` [159..189] - `update_usage_from_response()` now calibrates `used_tokens` from API `prompt_tokens` (ground truth). When `prompt_tokens > 0`, snaps `used_tokens` to it. When 0, leaves unchanged (heuristic fallback).
|
||||
- `crates/g3-core/src/context_window.rs` [93..100] - No more 1% safety buffer. `total_tokens = raw` (was `raw * 0.99`).
|
||||
- `crates/g3-core/src/context_window.rs` [222..250] - `estimate_message_tokens()` now adds: +4 per-message overhead, +30 per tool_use block (was 20), +15 per tool_result message.
|
||||
- `crates/g3-core/src/lib.rs` [2232..2241] - `ensure_context_capacity()` called inside streaming loop for iteration > 1 (catches post-tool-execution growth).
|
||||
- **Root cause**: Heuristic token estimation drifted ~48% over 809 messages / 388 tool calls (136k estimated vs 201k actual). API `prompt_tokens` is ground truth.
|
||||
|
||||
### Context Window Calibration (Token Drift Fix) - CORRECTED
|
||||
- `crates/g3-core/src/context_window.rs` [168..189] - `update_usage_from_response()` calibrates `used_tokens` from API `prompt_tokens` (ground truth). When `prompt_tokens > 0`, snaps `used_tokens` to it. When 0, leaves unchanged (heuristic fallback).
|
||||
- `crates/g3-core/src/lib.rs` [2316..2319] - Calibration call placed **inline** during streaming (when usage chunk arrives in `chunk.usage`), NOT after the streaming loop. Critical because text-only responses take an early return path that bypasses post-loop code.
|
||||
- `crates/g3-core/src/lib.rs` [2892..2898] - Post-loop code only handles fallback (no-usage) case now.
|
||||
- `crates/g3-core/src/context_window.rs` [87..93] - 1% safety buffer IS still in place (`total_tokens * 0.99`). Left as safety net between calibration points.
|
||||
- **Root cause of display bug**: (1) `update_usage_from_response` never calibrated `used_tokens`, only `cumulative_tokens`. (2) `execute_single_task` had mock usage with hardcoded `prompt_tokens: 100`. (3) Post-loop usage update was bypassed by early returns in text-only response paths.
|
||||
- **Key streaming flow**: For text-only responses (most common in interactive mode), `chunk.finished` triggers an early `return Ok(self.finalize_streaming_turn(...))` that bypasses all post-loop code. Calibration MUST happen inline when `chunk.usage` arrives.
|
||||
@@ -207,6 +207,9 @@ pub async fn run_agent_mode(
|
||||
if flags.acd {
|
||||
agent.set_acd_enabled(true);
|
||||
}
|
||||
if flags.skip_plan_tool_check {
|
||||
agent.set_skip_plan_tool_check(true);
|
||||
}
|
||||
|
||||
// If resuming a session, restore context and TODO
|
||||
let initial_task = if let Some(ref incomplete_session) = resuming_session {
|
||||
|
||||
@@ -32,6 +32,8 @@ pub struct CommonFlags {
|
||||
pub project: Option<PathBuf>,
|
||||
/// Resume a specific session by ID
|
||||
pub resume: Option<String>,
|
||||
/// Skip the plan approval gate for plan tools
|
||||
pub skip_plan_tool_check: bool,
|
||||
}
|
||||
|
||||
#[derive(Parser, Clone)]
|
||||
@@ -161,6 +163,11 @@ pub struct Cli {
|
||||
/// Load a project from the given path at startup (like /project but without auto-prompt)
|
||||
#[arg(long, value_name = "PATH")]
|
||||
pub project: Option<PathBuf>,
|
||||
|
||||
/// Skip the plan approval gate check for plan tools (plan_read, plan_write, plan_approve).
|
||||
/// Without this flag, plan tools are subject to the same approval gate as other tools.
|
||||
#[arg(long)]
|
||||
pub skip_plan_tool_check: bool,
|
||||
}
|
||||
|
||||
impl Cli {
|
||||
@@ -179,6 +186,7 @@ impl Cli {
|
||||
acd: self.acd,
|
||||
project: self.project.clone(),
|
||||
resume: self.resume.clone(),
|
||||
skip_plan_tool_check: self.skip_plan_tool_check,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,6 +193,9 @@ async fn run_console_mode(
|
||||
if cli.acd {
|
||||
agent.set_acd_enabled(true);
|
||||
}
|
||||
if cli.skip_plan_tool_check {
|
||||
agent.set_skip_plan_tool_check(true);
|
||||
}
|
||||
|
||||
// Load CLI project if --project flag was specified
|
||||
let initial_project: Option<project::Project> = if let Some(ref project_path) = cli.project {
|
||||
|
||||
@@ -22,35 +22,6 @@ use termimad::MadSkin;
|
||||
static SYNTAX_SET: Lazy<SyntaxSet> = Lazy::new(SyntaxSet::load_defaults_newlines);
|
||||
static THEME_SET: Lazy<ThemeSet> = Lazy::new(ThemeSet::load_defaults);
|
||||
|
||||
// ── Catppuccin Macchiato palette ──────────────────────────────────────────────
|
||||
// https://github.com/catppuccin/catppuccin (Macchiato variant)
|
||||
//
|
||||
// Each constant is an ANSI 24-bit color prefix: \x1b[38;2;R;G;Bm
|
||||
// Combine with style modifiers (1=bold, 3=italic, 4=underline, 9=strike) as needed.
|
||||
|
||||
/// Mauve #c6a0f6 — H1 headers
|
||||
const MAUVE: &str = "\x1b[38;2;198;160;246m";
|
||||
/// Blue #8aadf4 — H2 headers
|
||||
const BLUE: &str = "\x1b[38;2;138;173;244m";
|
||||
/// Lavender #b7bdf8 — H3 headers
|
||||
const LAVENDER: &str = "\x1b[38;2;183;189;248m";
|
||||
/// Teal #8bd5ca — H4 headers
|
||||
const TEAL: &str = "\x1b[38;2;139;213;202m";
|
||||
/// Subtext1 #a5adcb — H5/H6 headers (dim)
|
||||
const SUBTEXT1: &str = "\x1b[38;2;165;173;203m";
|
||||
/// Sky #91d7e3 — italic text
|
||||
const SKY: &str = "\x1b[38;2;145;215;227m";
|
||||
/// Sapphire #7dc4e4 — bold text
|
||||
const SAPPHIRE: &str = "\x1b[38;2;125;196;228m";
|
||||
/// Peach #f5a97f — inline code
|
||||
const PEACH: &str = "\x1b[38;2;245;169;127m";
|
||||
/// Green #a6da95 — links
|
||||
const GREEN: &str = "\x1b[38;2;166;218;149m";
|
||||
/// Overlay1 #8087a2 — horizontal rules, muted elements
|
||||
const OVERLAY1: &str = "\x1b[38;2;128;135;162m";
|
||||
/// Reset all attributes
|
||||
const RESET: &str = "\x1b[0m";
|
||||
|
||||
/// Types of markdown delimiters we track.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum DelimiterKind {
|
||||
@@ -557,7 +528,7 @@ impl StreamingMarkdownFormatter {
|
||||
|| (trimmed.len() >= 3 && trimmed.chars().all(|c| c == '_'));
|
||||
if is_hr {
|
||||
// Emit a horizontal rule
|
||||
self.pending_output.push_back(format!("{}────────────────────────────────────────{}\n", OVERLAY1, RESET));
|
||||
self.pending_output.push_back("\x1b[2m────────────────────────────────────────\x1b[0m\n".to_string());
|
||||
self.current_line.clear();
|
||||
self.delimiter_stack.clear();
|
||||
return;
|
||||
@@ -598,12 +569,12 @@ impl StreamingMarkdownFormatter {
|
||||
// Format based on level (magenta, bold for h1/h2)
|
||||
// We wrap the already-formatted content in header color, then reset at the end
|
||||
match level {
|
||||
1 => format!("\x1b[1m{}{}{}\n", MAUVE, formatted_content, RESET), // Bold Mauve
|
||||
2 => format!("{}{}{}\n", BLUE, formatted_content, RESET), // Blue
|
||||
3 => format!("{}{}{}\n", LAVENDER, formatted_content, RESET), // Lavender
|
||||
4 => format!("{}{}{}\n", TEAL, formatted_content, RESET), // Teal
|
||||
5 => format!("\x1b[2m{}{}{}\n", SUBTEXT1, formatted_content, RESET), // Dim Subtext1
|
||||
_ => format!("\x1b[2m{}{}{}\n", SUBTEXT1, formatted_content, RESET), // Dim Subtext1
|
||||
1 => format!("\x1b[1;95m{}\x1b[0m\n", formatted_content), // Bold pink (Dracula)
|
||||
2 => format!("\x1b[35m{}\x1b[0m\n", formatted_content), // Purple/magenta (Dracula)
|
||||
3 => format!("\x1b[36m{}\x1b[0m\n", formatted_content), // Cyan (Dracula)
|
||||
4 => format!("\x1b[37m{}\x1b[0m\n", formatted_content), // White (Dracula)
|
||||
5 => format!("\x1b[2m{}\x1b[0m\n", formatted_content), // Dim (Dracula)
|
||||
_ => format!("\x1b[2m{}\x1b[0m\n", formatted_content), // Dim for h6+ (Dracula)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -636,14 +607,14 @@ impl StreamingMarkdownFormatter {
|
||||
let text = &caps[1];
|
||||
// Format any inline code within the link text
|
||||
let formatted_text = format_inline_code_only(text);
|
||||
format!("\x1b[4m{}{}{}", GREEN, formatted_text, RESET)
|
||||
format!("\x1b[36;4m{}\x1b[0m", formatted_text)
|
||||
}).to_string();
|
||||
|
||||
// Process inline code `code` -> code (in orange)
|
||||
let code_re = regex::Regex::new(r"`([^`]+)`").unwrap();
|
||||
result = code_re.replace_all(&result, |caps: ®ex::Captures| {
|
||||
let code = &caps[1];
|
||||
format!("{}{}{}", PEACH, code, RESET)
|
||||
format!("\x1b[38;2;216;177;114m{}\x1b[0m", code)
|
||||
}).to_string();
|
||||
|
||||
// Handle unclosed inline code at end of line: `code without closing backtick
|
||||
@@ -651,7 +622,7 @@ impl StreamingMarkdownFormatter {
|
||||
let unclosed_code_re = regex::Regex::new(r"`([^`]+)$").unwrap();
|
||||
result = unclosed_code_re.replace_all(&result, |caps: ®ex::Captures| {
|
||||
let code = &caps[1];
|
||||
format!("{}{}{}", PEACH, code, RESET)
|
||||
format!("\x1b[38;2;216;177;114m{}\x1b[0m", code)
|
||||
}).to_string();
|
||||
|
||||
// Process strikethrough ~~text~~ -> text (with strikethrough)
|
||||
@@ -674,7 +645,7 @@ impl StreamingMarkdownFormatter {
|
||||
let text = &caps[1];
|
||||
// Process nested italic within bold
|
||||
let inner = format_nested_italic(text);
|
||||
format!("\x1b[1m{}{}{}", SAPPHIRE, inner, RESET)
|
||||
format!("\x1b[1;32m{}\x1b[0m", inner)
|
||||
}).to_string();
|
||||
|
||||
// Restore escaped characters (remove the placeholder markers)
|
||||
@@ -697,7 +668,7 @@ impl StreamingMarkdownFormatter {
|
||||
// Emit language label
|
||||
if let Some(ref l) = lang {
|
||||
self.pending_output
|
||||
.push_back(format!("\x1b[2;3m{}{}{}\n", OVERLAY1, l, RESET));
|
||||
.push_back(format!("\x1b[2;3m{}\x1b[0m\n", l));
|
||||
}
|
||||
|
||||
// Highlight the code
|
||||
@@ -794,7 +765,7 @@ fn format_inline_code_only(text: &str) -> String {
|
||||
let code_re = regex::Regex::new(r"`([^`]+)`").unwrap();
|
||||
code_re.replace_all(text, |caps: ®ex::Captures| {
|
||||
let code = &caps[1];
|
||||
format!("{}{}{}", PEACH, code, RESET)
|
||||
format!("\x1b[38;2;216;177;114m{}\x1b[0m", code)
|
||||
}).to_string()
|
||||
}
|
||||
|
||||
@@ -803,7 +774,7 @@ fn format_nested_italic(text: &str) -> String {
|
||||
let italic_re = regex::Regex::new(r"\*([^*]+)\*").unwrap();
|
||||
italic_re.replace_all(text, |caps: ®ex::Captures| {
|
||||
let inner = &caps[1];
|
||||
format!("\x1b[3m{}{}{}\x1b[1m{}", SKY, inner, RESET, SAPPHIRE) // italic sky, then restore bold sapphire
|
||||
format!("\x1b[3;36m{}\x1b[0m\x1b[1;32m", inner) // italic, then restore bold
|
||||
}).to_string()
|
||||
}
|
||||
|
||||
@@ -812,7 +783,7 @@ fn format_nested_bold(text: &str) -> String {
|
||||
let bold_re = regex::Regex::new(r"\*\*(.+?)\*\*").unwrap();
|
||||
bold_re.replace_all(text, |caps: ®ex::Captures| {
|
||||
let inner = &caps[1];
|
||||
format!("\x1b[1m{}{}{}\x1b[3m{}", SAPPHIRE, inner, RESET, SKY) // bold sapphire, then restore italic sky
|
||||
format!("\x1b[1;32m{}\x1b[0m\x1b[3;36m", inner) // bold, then restore italic
|
||||
}).to_string()
|
||||
}
|
||||
|
||||
@@ -848,7 +819,7 @@ fn process_italic_with_nested_bold(text: &str) -> String {
|
||||
let inner: String = chars[start..end_pos].iter().collect();
|
||||
// Process nested bold within the italic content
|
||||
let formatted_inner = format_nested_bold(&inner);
|
||||
result.push_str(&format!("\x1b[3m{}{}{}", SKY, formatted_inner, RESET));
|
||||
result.push_str(&format!("\x1b[3;36m{}\x1b[0m", formatted_inner));
|
||||
i = end_pos + 1;
|
||||
} else {
|
||||
// No closing *, just output the *
|
||||
@@ -889,7 +860,7 @@ fn highlight_code(code: &str, lang: Option<&str>) -> String {
|
||||
.and_then(|_| normalized_lang.and_then(|l| SYNTAX_SET.find_syntax_by_token(l)))
|
||||
.unwrap_or_else(|| SYNTAX_SET.find_syntax_plain_text());
|
||||
|
||||
let theme = &THEME_SET.themes["base16-mocha.dark"];
|
||||
let theme = &THEME_SET.themes["base16-ocean.dark"];
|
||||
let mut highlighter = HighlightLines::new(syntax, theme);
|
||||
|
||||
let mut output = String::new();
|
||||
|
||||
@@ -562,8 +562,8 @@ fn test_bold_formatting() {
|
||||
eprintln!("Input: {:?}", input);
|
||||
eprintln!("Output: {:?}", full_output);
|
||||
|
||||
// Should contain sapphire bold ANSI code (Catppuccin Macchiato)
|
||||
assert!(full_output.contains("\x1b[1m\x1b[38;2;125;196;228m"), "Should contain bold formatting");
|
||||
// Should contain green bold ANSI code (\x1b[1;32m)
|
||||
assert!(full_output.contains("\x1b[1;32m"), "Should contain bold formatting");
|
||||
// Should NOT contain raw **
|
||||
assert!(!full_output.contains("**"), "Should not contain raw **");
|
||||
}
|
||||
@@ -611,20 +611,20 @@ Normal text with **bold**, *italic*, and `inline code` all together.
|
||||
eprintln!("=== END ===");
|
||||
|
||||
// Check headers are formatted (Dracula colors)
|
||||
assert!(full_output.contains("\x1b[1m\x1b[38;2;198;160;246mHeader 1"), "H1 should be bold mauve");
|
||||
assert!(full_output.contains("\x1b[38;2;138;173;244mHeader 2"), "H2 should be blue");
|
||||
assert!(full_output.contains("\x1b[1;95mHeader 1"), "H1 should be bold pink");
|
||||
assert!(full_output.contains("\x1b[35mHeader 2"), "H2 should be magenta");
|
||||
|
||||
// Check bold is green
|
||||
assert!(full_output.contains("\x1b[1m\x1b[38;2;125;196;228mbold text\x1b[0m"), "Bold should be sapphire");
|
||||
assert!(full_output.contains("\x1b[1;32mbold text\x1b[0m"), "Bold should be green");
|
||||
|
||||
// Check italic is cyan
|
||||
assert!(full_output.contains("\x1b[3m\x1b[38;2;145;215;227mitalic text\x1b[0m"), "Italic should be sky");
|
||||
assert!(full_output.contains("\x1b[3;36mitalic text\x1b[0m"), "Italic should be cyan");
|
||||
|
||||
// Check inline code is orange
|
||||
assert!(full_output.contains("\x1b[38;2;245;169;127minline code\x1b[0m"), "Inline code should be peach");
|
||||
assert!(full_output.contains("\x1b[38;2;216;177;114minline code\x1b[0m"), "Inline code should be orange");
|
||||
|
||||
// Check link is cyan underlined
|
||||
assert!(full_output.contains("\x1b[4m\x1b[38;2;166;218;149mlink\x1b[0m"), "Link should be green underlined");
|
||||
assert!(full_output.contains("\x1b[36;4mlink\x1b[0m"), "Link should be cyan underlined");
|
||||
|
||||
// Check bullets
|
||||
assert!(full_output.contains("• Bullet item 1"), "Should have bullet");
|
||||
@@ -662,7 +662,7 @@ fn test_unclosed_inline_code() {
|
||||
assert!(!full_output.contains('`'), "Should not contain raw backtick");
|
||||
|
||||
// Should contain orange formatting for the unclosed code
|
||||
assert!(full_output.contains("\x1b[38;2;245;169;127m"), "Should have peach formatting");
|
||||
assert!(full_output.contains("\x1b[38;2;216;177;114m"), "Should have orange formatting");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -705,11 +705,11 @@ Your config already has it set up with consult:
|
||||
|
||||
// Headers should be formatted (H3 = cyan in Dracula), not raw
|
||||
assert!(!full_output.contains("### Key"), "Should not have raw ### header");
|
||||
assert!(full_output.contains("\x1b[38;2;183;189;248mKey bindings"), "H3 header should be lavender");
|
||||
assert!(full_output.contains("\x1b[36mKey bindings"), "H3 header should be cyan");
|
||||
|
||||
// Bold should be formatted, not raw
|
||||
assert!(!full_output.contains("**C-x p f**"), "Should not have raw ** bold");
|
||||
assert!(full_output.contains("\x1b[1m\x1b[38;2;125;196;228mC-x p f\x1b[0m"), "Bold should be sapphire");
|
||||
assert!(full_output.contains("\x1b[1;32mC-x p f\x1b[0m"), "Bold should be green");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -823,7 +823,7 @@ Some **bold** text.
|
||||
|
||||
// Header should be formatted (H3 = cyan in Dracula)
|
||||
assert!(!full_output.contains("### Header"), "Should not have raw ### header");
|
||||
assert!(full_output.contains("\x1b[38;2;183;189;248mHeader after table"), "H3 header should be lavender");
|
||||
assert!(full_output.contains("\x1b[36mHeader after table"), "H3 header should be cyan");
|
||||
|
||||
// Bold should be formatted
|
||||
assert!(!full_output.contains("**bold**"), "Should not have raw ** bold");
|
||||
@@ -1011,7 +1011,7 @@ fn test_simple_italic() {
|
||||
let mut fmt = make_formatter();
|
||||
let out = fmt.process("*simple italic*\n");
|
||||
eprintln!("Simple italic: {:?}", out);
|
||||
assert!(out.contains("\x1b[3m\x1b[38;2;145;215;227m"), "Should have italic formatting");
|
||||
assert!(out.contains("\x1b[3;36m"), "Should have italic formatting");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1020,9 +1020,9 @@ fn test_italic_with_nested_bold() {
|
||||
let output = fmt.process("*italic with **nested bold** inside*\n");
|
||||
eprintln!("Output: {:?}", output);
|
||||
// Should have italic formatting (cyan)
|
||||
assert!(output.contains("\x1b[3m\x1b[38;2;145;215;227m"), "Should have italic formatting");
|
||||
assert!(output.contains("\x1b[3;36m"), "Should have italic formatting");
|
||||
// Should have bold formatting (green) for nested bold
|
||||
assert!(output.contains("\x1b[1m\x1b[38;2;125;196;228m"), "Should have bold formatting for nested");
|
||||
assert!(output.contains("\x1b[1;32m"), "Should have bold formatting for nested");
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
@@ -1790,10 +1790,10 @@ fn test_bold_inside_header() {
|
||||
assert!(!full.contains("**"), "Should not contain raw ** markers, got: {}", full);
|
||||
|
||||
// Should have header formatting (H1 = bold pink in Dracula)
|
||||
assert!(full.contains("\x1b[1m\x1b[38;2;198;160;246m"), "Should have bold mauve header formatting");
|
||||
assert!(full.contains("\x1b[1;95m"), "Should have bold pink header formatting");
|
||||
|
||||
// Should have bold formatting (green) for the bold text inside
|
||||
assert!(full.contains("\x1b[1m\x1b[38;2;125;196;228m"), "Should have sapphire bold formatting for **Bold Header**");
|
||||
assert!(full.contains("\x1b[1;32m"), "Should have green bold formatting for **Bold Header**");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1817,10 +1817,10 @@ fn test_italic_inside_header() {
|
||||
assert!(!without_ansi.contains('*'), "Should not contain raw * markers, got: {}", without_ansi);
|
||||
|
||||
// Should have header formatting (magenta)
|
||||
assert!(full.contains("\x1b[38;2;138;173;244m"), "Should have blue header formatting");
|
||||
assert!(full.contains("\x1b[35m"), "Should have magenta header formatting");
|
||||
|
||||
// Should have italic formatting (cyan) for the italic text inside
|
||||
assert!(full.contains("\x1b[3m\x1b[38;2;145;215;227m"), "Should have sky italic formatting for *Italic Header*");
|
||||
assert!(full.contains("\x1b[3;36m"), "Should have cyan italic formatting for *Italic Header*");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1842,11 +1842,11 @@ fn test_code_inside_header() {
|
||||
let without_ansi = strip_ansi(&full);
|
||||
assert!(!without_ansi.contains('`'), "Should not contain raw backticks, got: {}", without_ansi);
|
||||
|
||||
// Should have header formatting (H3 = lavender in Catppuccin Macchiato)
|
||||
assert!(full.contains("\x1b[38;2;183;189;248m"), "Should have lavender header formatting");
|
||||
// Should have header formatting (H3 = cyan in Dracula)
|
||||
assert!(full.contains("\x1b[36m"), "Should have cyan header formatting");
|
||||
|
||||
// Should have code formatting (orange) for the inline code
|
||||
assert!(full.contains("\x1b[38;2;245;169;127m"), "Should have peach code formatting");
|
||||
assert!(full.contains("\x1b[38;2;216;177;114m"), "Should have orange code formatting");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1870,8 +1870,8 @@ fn test_mixed_formatting_inside_header() {
|
||||
assert!(!without_ansi.contains("*italic*"), "Should not contain raw *italic* markers");
|
||||
|
||||
// Should have both bold and italic formatting
|
||||
assert!(full.contains("\x1b[1m\x1b[38;2;125;196;228m"), "Should have sapphire bold formatting");
|
||||
assert!(full.contains("\x1b[3m\x1b[38;2;145;215;227m"), "Should have sky italic formatting");
|
||||
assert!(full.contains("\x1b[1;32m"), "Should have green bold formatting");
|
||||
assert!(full.contains("\x1b[3;36m"), "Should have cyan italic formatting");
|
||||
}
|
||||
|
||||
/// Helper to strip ANSI escape codes for easier assertion
|
||||
|
||||
@@ -152,37 +152,15 @@ impl ContextWindow {
|
||||
/// Update token usage from provider response.
|
||||
///
|
||||
/// NOTE: This only updates cumulative_tokens (total API usage tracking).
|
||||
/// Calibrates `used_tokens` from the provider's actual token count when
|
||||
/// available. Our heuristic estimation (chars/3 or chars/4) drifts
|
||||
/// over long sessions because it doesn't account for tool definitions
|
||||
/// (~4000 tokens) sent alongside the conversation history.
|
||||
///
|
||||
/// `prompt_tokens` is the ground-truth count of every token the API
|
||||
/// received (system prompt + conversation history + tool definitions).
|
||||
/// By snapping `used_tokens` to this value after each API call, we
|
||||
/// eliminate accumulated drift and ensure `should_compact()` triggers
|
||||
/// at the right time.
|
||||
///
|
||||
/// When `prompt_tokens` is 0 (some providers don't report it), we leave
|
||||
/// `used_tokens` unchanged and fall back to the heuristic estimate.
|
||||
/// It does NOT update used_tokens because:
|
||||
/// 1. prompt_tokens represents the ENTIRE context sent to API (already tracked via add_message)
|
||||
/// 2. completion_tokens will be tracked when the assistant message is added via add_message
|
||||
/// Adding total_tokens here would cause double/triple counting and break the 80% threshold check.
|
||||
pub fn update_usage_from_response(&mut self, usage: &Usage) {
|
||||
self.cumulative_tokens += usage.total_tokens;
|
||||
|
||||
// Calibrate used_tokens from the provider's actual prompt token count.
|
||||
// prompt_tokens = all tokens sent to the API (system + history + tools).
|
||||
// This is the ground truth — use it to correct heuristic drift.
|
||||
if usage.prompt_tokens > 0 {
|
||||
let old = self.used_tokens;
|
||||
self.used_tokens = usage.prompt_tokens;
|
||||
debug!(
|
||||
"Calibrated used_tokens from API: {} -> {} (drift was {} tokens)",
|
||||
old, self.used_tokens, (self.used_tokens as i64 - old as i64).abs()
|
||||
);
|
||||
}
|
||||
|
||||
debug!(
|
||||
"Post-calibration: used={}/{}, cumulative={}",
|
||||
self.used_tokens, self.total_tokens, self.cumulative_tokens
|
||||
"Updated cumulative tokens: {} (used: {}/{}, cumulative: {})",
|
||||
usage.total_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -166,6 +166,9 @@ pub struct Agent<W: UiWriter> {
|
||||
acd_enabled: bool,
|
||||
/// Whether plan mode is active (gate blocks file changes without approved plan)
|
||||
in_plan_mode: bool,
|
||||
/// When true, plan tools (plan_read/write/approve) skip the approval gate.
|
||||
/// Controlled by --skip-plan-tool-check CLI flag.
|
||||
skip_plan_tool_check: bool,
|
||||
/// Files that were already dirty when plan mode started (excluded from approval gate)
|
||||
baseline_dirty_files: std::collections::HashSet<String>,
|
||||
/// Manager for async research tasks
|
||||
@@ -226,6 +229,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
auto_memory: false,
|
||||
acd_enabled: false,
|
||||
in_plan_mode: false,
|
||||
skip_plan_tool_check: false,
|
||||
baseline_dirty_files: std::collections::HashSet::new(),
|
||||
pending_research_manager: pending_research::PendingResearchManager::new(),
|
||||
loaded_toolsets: std::collections::HashSet::new(),
|
||||
@@ -1096,6 +1100,18 @@ impl<W: UiWriter> Agent<W> {
|
||||
let response_content = task_result.response.clone();
|
||||
let _llm_duration = llm_start.elapsed();
|
||||
|
||||
// Create a mock usage for now (we'll need to track this during streaming)
|
||||
let mock_usage = g3_providers::Usage {
|
||||
prompt_tokens: 100, // Estimate
|
||||
completion_tokens: response_content.len() as u32 / 4, // Rough estimate
|
||||
total_tokens: 100 + (response_content.len() as u32 / 4),
|
||||
cache_creation_tokens: 0,
|
||||
cache_read_tokens: 0,
|
||||
};
|
||||
|
||||
// Update context window with estimated token usage
|
||||
self.context_window.update_usage(&mock_usage);
|
||||
|
||||
// Add assistant response to context window only if not empty
|
||||
// This prevents the "Skipping empty message" warning when only tools were executed
|
||||
// Also strip timing footer - it's display-only and shouldn't be in context
|
||||
@@ -1676,6 +1692,11 @@ impl<W: UiWriter> Agent<W> {
|
||||
self.in_plan_mode
|
||||
}
|
||||
|
||||
/// Set whether plan tools skip the approval gate.
|
||||
pub fn set_skip_plan_tool_check(&mut self, skip: bool) {
|
||||
self.skip_plan_tool_check = skip;
|
||||
}
|
||||
|
||||
/// Check if the current plan is in a terminal state (all items done or blocked).
|
||||
///
|
||||
/// Returns true if:
|
||||
@@ -2180,7 +2201,6 @@ Skip if nothing new. Be brief."#;
|
||||
mut request: CompletionRequest,
|
||||
show_timing: bool,
|
||||
) -> Result<TaskResult> {
|
||||
|
||||
// =========================================================================
|
||||
// STREAMING COMPLETION WITH TOOL EXECUTION
|
||||
// =========================================================================
|
||||
@@ -2313,10 +2333,6 @@ Skip if nothing new. Be brief."#;
|
||||
iter.accumulated_usage = Some(usage.clone());
|
||||
state.turn_accumulated_usage = Some(usage.clone());
|
||||
|
||||
// Calibrate context window with actual API usage immediately
|
||||
// (must happen here, not after the loop, because early returns bypass post-loop code)
|
||||
self.context_window.update_usage_from_response(usage);
|
||||
|
||||
// Update cumulative cache statistics
|
||||
self.cache_stats.total_calls += 1;
|
||||
self.cache_stats.total_input_tokens += usage.prompt_tokens as u64;
|
||||
@@ -2892,9 +2908,12 @@ Skip if nothing new. Be brief."#;
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to estimation if no usage data was provided by the stream
|
||||
// (calibration already happened inline when usage data arrived)
|
||||
if iter.accumulated_usage.is_none() {
|
||||
// Update context window with actual usage if available
|
||||
if let Some(usage) = iter.accumulated_usage {
|
||||
debug!("Updating context window with actual usage from stream");
|
||||
self.context_window.update_usage_from_response(&usage);
|
||||
} else {
|
||||
// Fall back to estimation if no usage data was provided
|
||||
debug!("No usage data from stream, using estimation");
|
||||
let estimated_tokens = ContextWindow::estimate_tokens(&iter.current_response);
|
||||
self.context_window.add_streaming_tokens(estimated_tokens);
|
||||
@@ -3012,7 +3031,12 @@ Skip if nothing new. Be brief."#;
|
||||
let result = self.execute_tool_inner_in_dir(tool_call, working_dir).await;
|
||||
|
||||
// Check plan approval gate after tool execution (only in plan mode)
|
||||
if self.in_plan_mode {
|
||||
// Plan tools (plan_read/write/approve) only skip the gate when
|
||||
// --skip-plan-tool-check is passed; otherwise they are gated like
|
||||
// every other tool.
|
||||
let is_plan_tool = matches!(tool_call.tool.as_str(), "plan_write" | "plan_read" | "plan_approve");
|
||||
let dominated_by_gate = if is_plan_tool { !self.skip_plan_tool_check } else { true };
|
||||
if self.in_plan_mode && dominated_by_gate {
|
||||
if let Some(session_id) = &self.session_id {
|
||||
if let ApprovalGateResult::Blocked { message } =
|
||||
check_plan_approval_gate(session_id, working_dir, &self.baseline_dirty_files)
|
||||
|
||||
@@ -610,23 +610,17 @@ pub fn write_plan(session_id: &str, plan: &Plan) -> Result<()> {
|
||||
|
||||
/// Extract YAML content from a markdown file with ```yaml code block.
|
||||
fn extract_yaml_from_markdown(content: &str) -> Result<String> {
|
||||
// Look for ```yaml ... ``` block
|
||||
let start_marker = "```yaml";
|
||||
let end_marker = "```";
|
||||
|
||||
if let Some(start_idx) = content.find(start_marker) {
|
||||
let yaml_start = start_idx + start_marker.len();
|
||||
// Find closing ``` that appears at the start of a line.
|
||||
// A simple .find("```") would match backticks embedded inside YAML
|
||||
// string values (e.g., descriptions containing code fences), truncating
|
||||
// the YAML and causing parse errors.
|
||||
let remainder = &content[yaml_start..];
|
||||
for (i, line) in remainder.split('\n').enumerate() {
|
||||
if i > 0 && line.starts_with("```") {
|
||||
let offset: usize = remainder.split('\n').take(i).map(|l| l.len() + 1).sum();
|
||||
let yaml = remainder[..offset].trim();
|
||||
if let Some(end_idx) = content[yaml_start..].find(end_marker) {
|
||||
let yaml = content[yaml_start..yaml_start + end_idx].trim();
|
||||
return Ok(yaml.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no code block, try parsing the whole content as YAML
|
||||
Ok(content.to_string())
|
||||
@@ -821,7 +815,7 @@ pub async fn execute_plan_read<W: UiWriter>(
|
||||
}
|
||||
None => {
|
||||
ctx.ui_writer.print_plan_compact(None, None, false);
|
||||
Ok(String::new())
|
||||
Ok("No plan exists yet. Use plan_write to create one.".to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1288,55 +1282,6 @@ items: []
|
||||
assert!(yaml.contains("plan_id: test"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_yaml_extraction_with_backticks_in_values() {
|
||||
// This is the exact bug: YAML values containing ``` caused
|
||||
// extract_yaml_from_markdown to truncate at the embedded backticks
|
||||
// instead of finding the real closing fence.
|
||||
let md = "# Plan: test\n\n## Plan Data\n\n\
|
||||
```yaml\n\
|
||||
plan_id: test\n\
|
||||
revision: 1\n\
|
||||
items:\n\
|
||||
- id: I1\n\
|
||||
description: 'Fix the ```yaml parsing issue with ```'\n\
|
||||
state: todo\n\
|
||||
touches:\n\
|
||||
- src/plan.rs\n\
|
||||
checks:\n\
|
||||
happy:\n\
|
||||
desc: Works\n\
|
||||
target: plan\n\
|
||||
negative:\n\
|
||||
- desc: Fails gracefully\n\
|
||||
target: plan\n\
|
||||
boundary:\n\
|
||||
- desc: Edge case\n\
|
||||
target: plan\n\
|
||||
```\n";
|
||||
|
||||
let yaml = extract_yaml_from_markdown(md).unwrap();
|
||||
// Must contain the full YAML, not truncated at the embedded backticks
|
||||
assert!(yaml.contains("plan_id: test"), "should contain plan_id");
|
||||
assert!(yaml.contains("description:"), "should contain description field");
|
||||
assert!(yaml.contains("state: todo"), "should contain state field");
|
||||
assert!(yaml.contains("checks:"), "should contain checks");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_yaml_extraction_no_code_block_fallback() {
|
||||
let raw_yaml = "plan_id: test\nrevision: 1\nitems: []\n";
|
||||
let yaml = extract_yaml_from_markdown(raw_yaml).unwrap();
|
||||
assert_eq!(yaml, raw_yaml);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_yaml_extraction_closing_fence_no_trailing_newline() {
|
||||
let md = "```yaml\nplan_id: test\nrevision: 1\nitems: []\n```";
|
||||
let yaml = extract_yaml_from_markdown(md).unwrap();
|
||||
assert!(yaml.contains("plan_id: test"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_plan_serialization_roundtrip() {
|
||||
let mut plan = Plan::new("test-plan");
|
||||
|
||||
@@ -714,6 +714,10 @@ async fn test_token_counting_no_double_count() {
|
||||
|
||||
let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
|
||||
|
||||
// Get initial token count
|
||||
let initial_used = agent.get_context_window().used_tokens;
|
||||
let initial_percentage = agent.get_context_window().percentage_used();
|
||||
|
||||
// Execute a task
|
||||
agent.execute_task("Say something short", None, false).await.unwrap();
|
||||
|
||||
@@ -721,16 +725,18 @@ async fn test_token_counting_no_double_count() {
|
||||
let final_used = agent.get_context_window().used_tokens;
|
||||
let final_percentage = agent.get_context_window().percentage_used();
|
||||
|
||||
// With calibration, used_tokens should be snapped to the mock's prompt_tokens (100)
|
||||
// plus any heuristic addition from the assistant response message added after calibration.
|
||||
// The key invariant: no double-counting that would push us to 80%+.
|
||||
// The increase should be reasonable (not doubled)
|
||||
// A short response + user message should be < 1000 tokens
|
||||
let token_increase = final_used - initial_used;
|
||||
assert!(
|
||||
final_used < 2000,
|
||||
"After calibration from mock (prompt_tokens=100), used_tokens should be low, got {}",
|
||||
final_used
|
||||
token_increase < 1000,
|
||||
"Token increase should be reasonable, got {} ({}% -> {}%)",
|
||||
token_increase,
|
||||
initial_percentage,
|
||||
final_percentage
|
||||
);
|
||||
|
||||
// Percentage should be very low (not jumping to 80%+ from double-counting)
|
||||
// Percentage should also be reasonable (not jumping to 80%+)
|
||||
assert!(
|
||||
final_percentage < 50.0,
|
||||
"Context percentage should be reasonable after one exchange, got {}%",
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use g3_core::ContextWindow;
|
||||
use g3_providers::{Message, MessageRole, Usage};
|
||||
|
||||
/// Test that used_tokens is tracked via add_message.
|
||||
/// Test that used_tokens is tracked via add_message, not update_usage_from_response.
|
||||
/// This is critical for the 80% compaction threshold to work correctly.
|
||||
#[test]
|
||||
fn test_used_tokens_tracked_via_messages() {
|
||||
let mut window = ContextWindow::new(10000);
|
||||
@@ -22,17 +23,17 @@ fn test_used_tokens_tracked_via_messages() {
|
||||
assert!(window.used_tokens > tokens_after_user_msg, "used_tokens should increase after adding assistant message");
|
||||
}
|
||||
|
||||
/// Test that update_usage_from_response calibrates used_tokens from prompt_tokens.
|
||||
/// When prompt_tokens > 0, used_tokens is snapped to the API's ground truth.
|
||||
/// When prompt_tokens is 0, used_tokens is left unchanged (heuristic fallback).
|
||||
/// Test that update_usage_from_response only updates cumulative_tokens, not used_tokens.
|
||||
/// This prevents double-counting which was causing the 80% threshold to be reached at 200%+.
|
||||
#[test]
|
||||
fn test_update_usage_calibrates_used_tokens() {
|
||||
fn test_update_usage_only_affects_cumulative() {
|
||||
let mut window = ContextWindow::new(10000);
|
||||
|
||||
// Initial state
|
||||
assert_eq!(window.used_tokens, 0);
|
||||
assert_eq!(window.cumulative_tokens, 0);
|
||||
|
||||
// Simulate API response — prompt_tokens > 0 triggers calibration
|
||||
// Simulate API response with usage data
|
||||
let usage = Usage {
|
||||
prompt_tokens: 100,
|
||||
completion_tokens: 50,
|
||||
@@ -42,13 +43,13 @@ fn test_update_usage_calibrates_used_tokens() {
|
||||
};
|
||||
window.update_usage_from_response(&usage);
|
||||
|
||||
// used_tokens should be calibrated to prompt_tokens
|
||||
assert_eq!(window.used_tokens, 100, "used_tokens should be calibrated to prompt_tokens");
|
||||
// used_tokens should NOT change - it's tracked via add_message
|
||||
assert_eq!(window.used_tokens, 0, "used_tokens should not be updated by update_usage_from_response");
|
||||
|
||||
// cumulative_tokens tracks total API usage
|
||||
// cumulative_tokens SHOULD be updated for API usage tracking
|
||||
assert_eq!(window.cumulative_tokens, 150, "cumulative_tokens should track total API usage");
|
||||
|
||||
// Another API call with higher prompt_tokens
|
||||
// Another API call
|
||||
let usage2 = Usage {
|
||||
prompt_tokens: 200,
|
||||
completion_tokens: 75,
|
||||
@@ -58,27 +59,11 @@ fn test_update_usage_calibrates_used_tokens() {
|
||||
};
|
||||
window.update_usage_from_response(&usage2);
|
||||
|
||||
// used_tokens calibrated to latest prompt_tokens
|
||||
assert_eq!(window.used_tokens, 200, "used_tokens should be calibrated to latest prompt_tokens");
|
||||
// used_tokens still unchanged
|
||||
assert_eq!(window.used_tokens, 0, "used_tokens should remain unchanged");
|
||||
|
||||
// cumulative_tokens accumulates
|
||||
assert_eq!(window.cumulative_tokens, 425, "cumulative_tokens should accumulate");
|
||||
|
||||
// When prompt_tokens is 0, used_tokens should NOT change (fallback)
|
||||
let usage3 = Usage {
|
||||
prompt_tokens: 0,
|
||||
completion_tokens: 30,
|
||||
total_tokens: 30,
|
||||
cache_creation_tokens: 0,
|
||||
cache_read_tokens: 0,
|
||||
};
|
||||
window.update_usage_from_response(&usage3);
|
||||
|
||||
// used_tokens unchanged (prompt_tokens was 0)
|
||||
assert_eq!(window.used_tokens, 200, "used_tokens should not change when prompt_tokens is 0");
|
||||
|
||||
// cumulative_tokens still accumulates
|
||||
assert_eq!(window.cumulative_tokens, 455, "cumulative_tokens should still accumulate");
|
||||
}
|
||||
|
||||
/// Test that add_streaming_tokens only updates cumulative_tokens.
|
||||
@@ -127,6 +112,7 @@ fn test_percentage_based_on_used_tokens() {
|
||||
}
|
||||
|
||||
/// Test that the 80% compaction threshold works correctly.
|
||||
/// This was the original bug - used_tokens was being double/triple counted.
|
||||
#[test]
|
||||
fn test_should_compact_threshold() {
|
||||
let mut window = ContextWindow::new(1000);
|
||||
@@ -139,6 +125,7 @@ fn test_should_compact_threshold() {
|
||||
}
|
||||
|
||||
// Should be around 720 tokens (72%) - not yet at threshold
|
||||
// Note: actual token count depends on estimation algorithm
|
||||
let percentage = window.percentage_used();
|
||||
println!("After 9 messages: {}% used ({} tokens)", percentage, window.used_tokens);
|
||||
|
||||
@@ -155,19 +142,21 @@ fn test_should_compact_threshold() {
|
||||
}
|
||||
}
|
||||
|
||||
/// Test that calibration and cumulative tracking work together correctly.
|
||||
/// Test that cumulative_tokens and used_tokens are independent.
|
||||
#[test]
|
||||
fn test_calibration_and_cumulative_interaction() {
|
||||
fn test_cumulative_vs_used_independence() {
|
||||
let mut window = ContextWindow::new(10000);
|
||||
|
||||
// Add a message (affects both used_tokens and cumulative_tokens)
|
||||
// Add a message (affects used_tokens)
|
||||
let msg = Message::new(MessageRole::User, "Hello world".to_string());
|
||||
window.add_message(msg);
|
||||
let used_after_msg = window.used_tokens;
|
||||
let cumulative_after_msg = window.cumulative_tokens;
|
||||
|
||||
// Both should be equal at this point (message adds to both)
|
||||
assert_eq!(used_after_msg, cumulative_after_msg);
|
||||
|
||||
// Simulate API response — calibrates used_tokens, accumulates cumulative_tokens
|
||||
// Now simulate API response (only affects cumulative_tokens)
|
||||
let usage = Usage {
|
||||
prompt_tokens: 500,
|
||||
completion_tokens: 200,
|
||||
@@ -177,43 +166,12 @@ fn test_calibration_and_cumulative_interaction() {
|
||||
};
|
||||
window.update_usage_from_response(&usage);
|
||||
|
||||
// used_tokens calibrated to prompt_tokens (500)
|
||||
assert_eq!(window.used_tokens, 500, "used_tokens should be calibrated to prompt_tokens");
|
||||
// used_tokens unchanged
|
||||
assert_eq!(window.used_tokens, used_after_msg, "used_tokens should not change from API response");
|
||||
|
||||
// cumulative_tokens increased by total_tokens
|
||||
// cumulative_tokens increased
|
||||
assert_eq!(window.cumulative_tokens, cumulative_after_msg + 700, "cumulative_tokens should increase");
|
||||
|
||||
// They should now be different
|
||||
assert!(window.cumulative_tokens > window.used_tokens, "cumulative should be greater than used");
|
||||
}
|
||||
|
||||
/// Test that calibration corrects heuristic undercount.
|
||||
/// The heuristic doesn't account for tool definitions (~4000 tokens),
|
||||
/// so prompt_tokens from the API is always larger.
|
||||
#[test]
|
||||
fn test_calibration_corrects_undercount() {
|
||||
let mut window = ContextWindow::new(200000);
|
||||
|
||||
// Simulate adding a system prompt and user message via heuristic
|
||||
let system_msg = Message::new(MessageRole::System, "x".repeat(4000)); // ~1000 tokens
|
||||
window.add_message(system_msg);
|
||||
let user_msg = Message::new(MessageRole::User, "Hello".to_string());
|
||||
window.add_message(user_msg);
|
||||
|
||||
let heuristic_estimate = window.used_tokens;
|
||||
assert!(heuristic_estimate > 0);
|
||||
|
||||
// API reports higher prompt_tokens (includes tool definitions)
|
||||
let usage = Usage {
|
||||
prompt_tokens: heuristic_estimate + 4000, // tool definitions add ~4000 tokens
|
||||
completion_tokens: 100,
|
||||
total_tokens: heuristic_estimate + 4100,
|
||||
cache_creation_tokens: 0,
|
||||
cache_read_tokens: 0,
|
||||
};
|
||||
window.update_usage_from_response(&usage);
|
||||
|
||||
// used_tokens should now be higher than the heuristic estimate
|
||||
assert_eq!(window.used_tokens, heuristic_estimate + 4000);
|
||||
assert!(window.used_tokens > heuristic_estimate, "calibration should correct undercount");
|
||||
}
|
||||
|
||||
@@ -415,19 +415,57 @@ impl LLMProvider for OpenAIProvider {
|
||||
}
|
||||
|
||||
fn convert_messages(messages: &[Message]) -> Vec<serde_json::Value> {
|
||||
messages
|
||||
.iter()
|
||||
.map(|msg| {
|
||||
json!({
|
||||
"role": match msg.role {
|
||||
let mut result = Vec::new();
|
||||
for msg in messages {
|
||||
// Tool result messages: OpenAI expects role "tool" with tool_call_id
|
||||
if let Some(ref tool_call_id) = msg.tool_result_id {
|
||||
result.push(json!({
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call_id,
|
||||
"content": msg.content,
|
||||
}));
|
||||
continue;
|
||||
}
|
||||
|
||||
let role = match msg.role {
|
||||
MessageRole::System => "system",
|
||||
MessageRole::User => "user",
|
||||
MessageRole::Assistant => "assistant",
|
||||
},
|
||||
};
|
||||
|
||||
// Assistant messages with tool calls
|
||||
if !msg.tool_calls.is_empty() {
|
||||
let tool_calls: Vec<serde_json::Value> = msg.tool_calls.iter().map(|tc| {
|
||||
json!({
|
||||
"id": tc.id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tc.name,
|
||||
"arguments": tc.input.to_string(),
|
||||
}
|
||||
})
|
||||
}).collect();
|
||||
|
||||
let mut m = json!({
|
||||
"role": role,
|
||||
"tool_calls": tool_calls,
|
||||
});
|
||||
// Include content only if non-empty (OpenAI allows null/absent content
|
||||
// on assistant messages that have tool_calls)
|
||||
if !msg.content.is_empty() {
|
||||
m["content"] = json!(msg.content);
|
||||
}
|
||||
result.push(m);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Regular messages
|
||||
result.push(json!({
|
||||
"role": role,
|
||||
"content": msg.content,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}));
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn convert_tools(tools: &[Tool]) -> Vec<serde_json::Value> {
|
||||
|
||||
Reference in New Issue
Block a user