diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index debeea6..b9a0404 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -75,9 +75,20 @@ async fn run_interactive(mut agent: Agent, show_prompt: bool, show_code: bool) - println!( "I solve problems by writing and executing code. Tell me what you need to accomplish!" ); + println!(); + + // Display provider and model information + match agent.get_provider_info() { + Ok((provider, model)) => { + println!("šŸ”§ Provider: {} | Model: {}", provider, model); + } + Err(e) => { + error!("Failed to get provider info: {}", e); + } + } + println!(); println!("Type 'exit' or 'quit' to exit, use Up/Down arrows for command history"); - println!("Press ESC during operations to cancel the current request"); println!(); // Initialize rustyline editor with history diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index f09e409..647cea3 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -6,8 +6,7 @@ use serde::{Deserialize, Serialize}; use std::path::Path; use std::time::{Duration, Instant}; use tokio_util::sync::CancellationToken; -use tracing::field::debug; -use tracing::info; +use tracing::{error, field::debug, info}; #[derive(Debug, Clone)] pub struct ContextWindow { @@ -94,8 +93,9 @@ impl Agent { // Set default provider providers.set_default(&config.providers.default_provider)?; - // Initialize context window with configured max context length - let context_window = ContextWindow::new(config.agent.max_context_length as u32); + // Determine context window size based on active provider + let context_length = Self::determine_context_length(&config, &providers)?; + let context_window = ContextWindow::new(context_length); Ok(Self { providers, @@ -104,6 +104,62 @@ impl Agent { }) } + fn determine_context_length(config: &Config, providers: &ProviderRegistry) -> Result { + // Get the active provider to determine context length + let provider = providers.get(None)?; + let provider_name = provider.name(); + let model_name = provider.model(); + + // Use provider-specific context length if available, otherwise fall back to agent config + let context_length = match provider_name { + "embedded" => { + // For embedded models, use the configured context_length or model-specific defaults + if let Some(embedded_config) = &config.providers.embedded { + embedded_config.context_length.unwrap_or_else(|| { + // Model-specific defaults for embedded models + match embedded_config.model_type.to_lowercase().as_str() { + "codellama" => 16384, // CodeLlama supports 16k context + "llama" => 4096, // Base Llama models + "mistral" => 8192, // Mistral models + _ => 4096, // Conservative default + } + }) + } else { + config.agent.max_context_length as u32 + } + } + "openai" => { + // OpenAI model-specific context lengths + match model_name { + m if m.contains("gpt-4") => 128000, // GPT-4 models have 128k context + m if m.contains("gpt-3.5") => 16384, // GPT-3.5-turbo has 16k context + _ => 4096, // Conservative default + } + } + "anthropic" => { + // Anthropic model-specific context lengths + match model_name { + m if m.contains("claude-3") => 200000, // Claude-3 has 200k context + m if m.contains("claude-2") => 100000, // Claude-2 has 100k context + _ => 100000, // Conservative default for Claude + } + } + _ => config.agent.max_context_length as u32, + }; + + info!( + "Using context length: {} tokens for provider: {} (model: {})", + context_length, provider_name, model_name + ); + + Ok(context_length) + } + + pub fn get_provider_info(&self) -> Result<(String, String)> { + let provider = self.providers.get(None)?; + Ok((provider.name().to_string(), provider.model().to_string())) + } + pub async fn execute_task( &mut self, description: &str, @@ -168,41 +224,67 @@ impl Agent { ) -> Result { info!("Executing task: {}", description); - let total_start = Instant::now(); - let provider = self.providers.get(None)?; let system_prompt = format!( - "You are G3, a code-first AI agent. Your goal is to solve problems by writing code that completes the desired task. + "You are G3, a general-purpose AI agent. Your goal is to analyze and write code to solve given problems. -When given a task: -1. Analyze what needs to be done -2. Rate the difficulty of the task from 1 (easy, file operations) to 10 (difficult, build complex applications like Firefox) -3. Choose the most appropriate programming language{} -4. Include any necessary imports/dependencies -5. Add error handling where appropriate -6. Generate code to complete the task, or ask for more details, but no other output + G3 uses LLMs with tool calling capability. + Tools allow external systems to provide context and data to G3. You solve higher level problems using + tools, and can interact with multiple at once. When you want to perform an action, use 'I' as the pronoun. -Prefer these languages: -- Bash/Shell: File operations, system administration, simple tasks -- Python: Complex data processing, when libraries are needed -- Rust: Performance-critical tasks, system programming +# Available Tools +- shell: + Execute a command in the shell. -Only use Rust/Python when you need libraries or complex logic that bash can't handle easily. + This will return the output and error concatenated into a single string, as + you would see from running on the command line. There will also be an indication + of if the command succeeded or failed. -Format your code response in markdown backticks as follows: -difficulty rating: [X] -```[language] -[code] -``` + Avoid commands that produce a large amount of output, and consider piping those outputs to files. -with nothing afterwards.", - if let Some(lang) = language { - format!(" (prefer {})", lang) - } else { - " based on the task type".to_string() - } - ); + **Important**: Each shell command runs in its own process. Things like directory changes or + sourcing files do not persist between tool calls. So you may need to repeat them each time by + stringing together commands, e.g. `cd example && ls` or `source env/bin/activate && pip install numpy` + + Multiple commands: Use ; or && to chain commands, avoid newlines + Pathnames: Use absolute paths and avoid cd unless explicitly requested + + Usage: + - Call the `shell` tool with the desired bash/shell commands. + +- search: + Search the web for information about any topic. + +- final_output: + This tool signals the final output for a user in a conversation and MUST be used for the final message to the user. You must + pass in a detailed summary of the work done to this tool call. + + Purpose: + - Collects the final output for a user + - Provides clear validation feedback when output isn't valid + + Usage: + - Call the `final_output` tool with a summary of the work performed. + +# Response Guidelines +- Use Markdown formatting for all responses. +- Follow best practices for Markdown, including: + - Using headers for organization. + - Bullet points for lists. + - Links formatted correctly, either as linked text (e.g., [this is linked text](https://example.com)) or automatic links using angle brackets (e.g., ). +- For code, use fenced code blocks by placing triple backticks (` ``` `) before and after the code. Include the language identifier after the opening backticks (e.g., ` ```python `) to enable syntax highlighting. +- Ensure clarity, conciseness, and proper formatting to enhance readability and usability. + +IMPORTANT INSTRUCTIONS: + +Please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. +Only terminate your turn when you are sure that the problem is solved. + +If you are not sure about file content or codebase structure, or other information pertaining to the user's request, +use your tools to read files and gather the relevant information: do NOT guess or make up an answer. It is important +you use tools that can assist with providing the right context. +"); if show_prompt { println!("šŸ” System Prompt:"); @@ -232,26 +314,33 @@ with nothing afterwards.", messages, max_tokens: Some(2048), temperature: Some(0.2), - stream: false, + stream: true, // Enable streaming }; - // Time the LLM call with cancellation support + // Time the LLM call with cancellation support and streaming let llm_start = Instant::now(); - let response = tokio::select! { - result = provider.complete(request) => result?, + let response_content = tokio::select! { + result = self.stream_completion(request) => result?, _ = cancellation_token.cancelled() => { return Err(anyhow::anyhow!("Operation cancelled by user")); } }; let llm_duration = llm_start.elapsed(); - // Update context window with actual token usage - self.context_window.update_usage(&response.usage); + // Create a mock usage for now (we'll need to track this during streaming) + let mock_usage = g3_providers::Usage { + prompt_tokens: 100, // Estimate + completion_tokens: response_content.len() as u32 / 4, // Rough estimate + total_tokens: 100 + (response_content.len() as u32 / 4), + }; + + // Update context window with estimated token usage + self.context_window.update_usage(&mock_usage); // Add assistant response to context window let assistant_message = Message { role: MessageRole::Assistant, - content: response.content.clone(), + content: response_content.clone(), }; self.context_window.add_message(assistant_message); @@ -259,19 +348,16 @@ with nothing afterwards.", let exec_start = Instant::now(); let executor = CodeExecutor::new(); let result = tokio::select! { - result = executor.execute_from_response_with_options(&response.content, show_code) => result?, + result = executor.execute_from_response_with_options(&response_content, show_code) => result?, _ = cancellation_token.cancelled() => { return Err(anyhow::anyhow!("Operation cancelled by user")); } }; let exec_duration = exec_start.elapsed(); - let total_duration = total_start.elapsed(); - if show_timing { let timing_summary = format!( - "\n{} [šŸ’”: {} āš”ļø: {}]", - Self::format_duration(total_duration), + "\nšŸ’­ {} | āš”ļø {}", Self::format_duration(llm_duration), Self::format_duration(exec_duration) ); @@ -285,6 +371,39 @@ with nothing afterwards.", &self.context_window } + async fn stream_completion(&self, request: CompletionRequest) -> Result { + use tokio_stream::StreamExt; + + let provider = self.providers.get(None)?; + let mut stream = provider.stream(request).await?; + + let mut full_content = String::new(); + print!("šŸ¤– "); // Start the response indicator + use std::io::{self, Write}; + io::stdout().flush()?; + + while let Some(chunk_result) = stream.next().await { + match chunk_result { + Ok(chunk) => { + print!("{}", chunk.content); + io::stdout().flush()?; + full_content.push_str(&chunk.content); + + if chunk.finished { + break; + } + } + Err(e) => { + error!("Streaming error: {}", e); + return Err(e); + } + } + } + + println!(); // New line after streaming completes + Ok(full_content) + } + fn format_duration(duration: Duration) -> String { let total_ms = duration.as_millis(); diff --git a/crates/g3-core/src/providers/embedded.rs b/crates/g3-core/src/providers/embedded.rs index 3fa9f1f..8a8874a 100644 --- a/crates/g3-core/src/providers/embedded.rs +++ b/crates/g3-core/src/providers/embedded.rs @@ -1,13 +1,19 @@ -use g3_providers::{LLMProvider, CompletionRequest, CompletionResponse, CompletionStream, CompletionChunk, Usage, Message, MessageRole}; use anyhow::Result; -use llama_cpp::{LlamaModel, LlamaSession, LlamaParams, SessionParams, standard_sampler::{StandardSampler, SamplerStage}}; +use g3_providers::{ + CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message, + MessageRole, Usage, +}; +use llama_cpp::{ + standard_sampler::{SamplerStage, StandardSampler}, + LlamaModel, LlamaParams, LlamaSession, SessionParams, +}; use std::path::Path; -use std::sync::Arc; -use tokio::sync::Mutex; -use tracing::{debug, info, error, warn}; -use tokio::sync::mpsc; -use tokio_stream::wrappers::ReceiverStream; use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use tokio::sync::mpsc; +use tokio::sync::Mutex; +use tokio_stream::wrappers::ReceiverStream; +use tracing::{debug, error, info, warn}; pub struct EmbeddedProvider { model: Arc, @@ -30,43 +36,44 @@ impl EmbeddedProvider { threads: Option, ) -> Result { info!("Loading embedded model from: {}", model_path); - + // Expand tilde in path let expanded_path = shellexpand::tilde(&model_path); let model_path = Path::new(expanded_path.as_ref()); - + if !model_path.exists() { anyhow::bail!("Model file not found: {}", model_path.display()); } - + // Set up model parameters let mut params = LlamaParams::default(); - + if let Some(gpu_layers) = gpu_layers { params.n_gpu_layers = gpu_layers; info!("Using {} GPU layers", gpu_layers); } - + let context_size = context_length.unwrap_or(4096); info!("Using context length: {}", context_size); - + // Load the model info!("Loading model..."); let model = LlamaModel::load_from_file(model_path, params) .map_err(|e| anyhow::anyhow!("Failed to load model: {}", e))?; - + // Create session with parameters let mut session_params = SessionParams::default(); session_params.n_ctx = context_size; if let Some(threads) = threads { session_params.n_threads = threads; } - - let session = model.create_session(session_params) + + let session = model + .create_session(session_params) .map_err(|e| anyhow::anyhow!("Failed to create session: {}", e))?; - + info!("Successfully loaded {} model", model_type); - + Ok(Self { model: Arc::new(model), session: Arc::new(Mutex::new(session)), @@ -77,15 +84,18 @@ impl EmbeddedProvider { generation_active: Arc::new(AtomicBool::new(false)), }) } - + fn format_messages(&self, messages: &[Message]) -> String { // Use proper prompt format for CodeLlama let mut formatted = String::new(); - + for message in messages { match message.role { MessageRole::System => { - formatted.push_str(&format!("[INST] <>\n{}\n<>\n\n", message.content)); + formatted.push_str(&format!( + "[INST] <>\n{}\n<>\n\n", + message.content + )); } MessageRole::User => { formatted.push_str(&format!("{} [/INST] ", message.content)); @@ -95,108 +105,127 @@ impl EmbeddedProvider { } } } - + formatted } - - async fn generate_completion(&self, prompt: &str, max_tokens: u32, temperature: f32) -> Result { + + async fn generate_completion( + &self, + prompt: &str, + max_tokens: u32, + temperature: f32, + ) -> Result { let session = self.session.clone(); let prompt = prompt.to_string(); - + // Calculate dynamic max tokens based on available context headroom let prompt_tokens = self.estimate_tokens(&prompt); - let available_tokens = self.context_length.saturating_sub(prompt_tokens).saturating_sub(50); // Reserve 50 tokens for safety + let available_tokens = self + .context_length + .saturating_sub(prompt_tokens) + .saturating_sub(50); // Reserve 50 tokens for safety let dynamic_max_tokens = std::cmp::min(max_tokens as usize, available_tokens as usize); - - debug!("Context calculation: prompt_tokens={}, context_length={}, available_tokens={}, dynamic_max_tokens={}", + + debug!("Context calculation: prompt_tokens={}, context_length={}, available_tokens={}, dynamic_max_tokens={}", prompt_tokens, self.context_length, available_tokens, dynamic_max_tokens); - + // Add timeout to the entire operation let timeout_duration = std::time::Duration::from_secs(30); // Increased timeout for larger contexts - - let result = tokio::time::timeout(timeout_duration, tokio::task::spawn_blocking(move || { - let mut session = match session.try_lock() { - Ok(ctx) => ctx, - Err(_) => return Err(anyhow::anyhow!("Model is busy, please try again")), - }; - - debug!("Starting inference with prompt length: {} chars, estimated {} tokens", prompt.len(), prompt_tokens); - - // Set context to the prompt - debug!("About to call set_context..."); - session.set_context(&prompt) - .map_err(|e| anyhow::anyhow!("Failed to set context: {}", e))?; - debug!("set_context completed successfully"); - - // Create sampler with temperature - debug!("Creating sampler..."); - let stages = vec![ - SamplerStage::Temperature(temperature), - SamplerStage::TopK(40), - SamplerStage::TopP(0.9), - ]; - let sampler = StandardSampler::new_softmax(stages, 1); - debug!("Sampler created successfully"); - - // Start completion with dynamic max tokens - debug!("About to call start_completing_with with {} max tokens...", dynamic_max_tokens); - let mut completion_handle = session.start_completing_with(sampler, dynamic_max_tokens) - .map_err(|e| anyhow::anyhow!("Failed to start completion: {}", e))?; - debug!("start_completing_with completed successfully"); - - let mut generated_text = String::new(); - let mut token_count = 0; - let start_time = std::time::Instant::now(); - - debug!("Starting token generation loop..."); - - // Generate tokens with dynamic limits - while let Some(token) = completion_handle.next_token() { - // Check for timeout on each token - if start_time.elapsed() > std::time::Duration::from_secs(25) { - debug!("Token generation timeout after {} tokens", token_count); - break; + + let result = tokio::time::timeout( + timeout_duration, + tokio::task::spawn_blocking(move || { + let mut session = match session.try_lock() { + Ok(ctx) => ctx, + Err(_) => return Err(anyhow::anyhow!("Model is busy, please try again")), + }; + + debug!( + "Starting inference with prompt length: {} chars, estimated {} tokens", + prompt.len(), + prompt_tokens + ); + + // Set context to the prompt + debug!("About to call set_context..."); + session + .set_context(&prompt) + .map_err(|e| anyhow::anyhow!("Failed to set context: {}", e))?; + debug!("set_context completed successfully"); + + // Create sampler with temperature + debug!("Creating sampler..."); + let stages = vec![ + SamplerStage::Temperature(temperature), + SamplerStage::TopK(40), + SamplerStage::TopP(0.9), + ]; + let sampler = StandardSampler::new_softmax(stages, 1); + debug!("Sampler created successfully"); + + // Start completion with dynamic max tokens + debug!( + "About to call start_completing_with with {} max tokens...", + dynamic_max_tokens + ); + let mut completion_handle = session + .start_completing_with(sampler, dynamic_max_tokens) + .map_err(|e| anyhow::anyhow!("Failed to start completion: {}", e))?; + debug!("start_completing_with completed successfully"); + + let mut generated_text = String::new(); + let mut token_count = 0; + let start_time = std::time::Instant::now(); + + debug!("Starting token generation loop..."); + + // Generate tokens with dynamic limits + while let Some(token) = completion_handle.next_token() { + // Check for timeout on each token + if start_time.elapsed() > std::time::Duration::from_secs(25) { + debug!("Token generation timeout after {} tokens", token_count); + break; + } + + let token_string = session.model().token_to_piece(token); + generated_text.push_str(&token_string); + token_count += 1; + + if token_count <= 10 || token_count % 50 == 0 { + debug!("Generated token {}: '{}'", token_count, token_string); + } + + // Use dynamic token limit + if token_count >= dynamic_max_tokens { + debug!("Reached dynamic token limit: {}", dynamic_max_tokens); + break; + } + + // Stop on completion markers + if generated_text.contains("") || generated_text.contains("[/INST]") { + debug!("Hit CodeLlama stop sequence at {} tokens", token_count); + break; + } } - - let token_string = session.model().token_to_piece(token); - generated_text.push_str(&token_string); - token_count += 1; - - if token_count <= 10 || token_count % 50 == 0 { - debug!("Generated token {}: '{}'", token_count, token_string); - } - - // Use dynamic token limit - if token_count >= dynamic_max_tokens { - debug!("Reached dynamic token limit: {}", dynamic_max_tokens); - break; - } - - // Stop on completion markers - if generated_text.contains("") || generated_text.contains("[/INST]") { - debug!("Hit CodeLlama stop sequence at {} tokens", token_count); - break; - } - - // Stop on natural completion points after reasonable generation - if token_count >= 20 && ( - generated_text.trim().ends_with("```") || - (generated_text.contains("```") && generated_text.matches("```").count() % 2 == 0) // Complete code blocks - ) { - debug!("Hit code block completion at {} tokens", token_count); - break; - } - } - - debug!("Token generation loop completed. Generated {} tokens in {:?}", token_count, start_time.elapsed()); - Ok((generated_text.trim().to_string(), token_count)) - })).await; - + + debug!( + "Token generation loop completed. Generated {} tokens in {:?}", + token_count, + start_time.elapsed() + ); + Ok((generated_text.trim().to_string(), token_count)) + }), + ) + .await; + match result { Ok(inner_result) => match inner_result { Ok(task_result) => match task_result { Ok((text, token_count)) => { - info!("Completed generation: {} tokens (dynamic limit was {})", token_count, dynamic_max_tokens); + info!( + "Completed generation: {} tokens (dynamic limit was {})", + token_count, dynamic_max_tokens + ); Ok(text) } Err(e) => Err(e), @@ -209,7 +238,7 @@ impl EmbeddedProvider { } } } - + // Helper function to estimate token count from text fn estimate_tokens(&self, text: &str) -> u32 { // Rough estimation: average 4 characters per token @@ -221,20 +250,25 @@ impl EmbeddedProvider { #[async_trait::async_trait] impl LLMProvider for EmbeddedProvider { async fn complete(&self, request: CompletionRequest) -> Result { - debug!("Processing completion request with {} messages", request.messages.len()); - + debug!( + "Processing completion request with {} messages", + request.messages.len() + ); + let prompt = self.format_messages(&request.messages); let max_tokens = request.max_tokens.unwrap_or(self.max_tokens); let temperature = request.temperature.unwrap_or(self.temperature); - + debug!("Formatted prompt length: {} chars", prompt.len()); - - let content = self.generate_completion(&prompt, max_tokens, temperature).await?; - + + let content = self + .generate_completion(&prompt, max_tokens, temperature) + .await?; + // Estimate token usage (rough approximation) let prompt_tokens = (prompt.len() / 4) as u32; // Rough estimate: 4 chars per token let completion_tokens = (content.len() / 4) as u32; - + Ok(CompletionResponse { content, usage: Usage { @@ -245,34 +279,38 @@ impl LLMProvider for EmbeddedProvider { model: self.model_name.clone(), }) } - + async fn stream(&self, request: CompletionRequest) -> Result { - debug!("Processing streaming request with {} messages", request.messages.len()); - + debug!( + "Processing streaming request with {} messages", + request.messages.len() + ); + let prompt = self.format_messages(&request.messages); let max_tokens = request.max_tokens.unwrap_or(self.max_tokens); let temperature = request.temperature.unwrap_or(self.temperature); - + let (tx, rx) = mpsc::channel(100); let session = self.session.clone(); let prompt = prompt.to_string(); - + // Spawn streaming task tokio::task::spawn_blocking(move || { let mut session = match session.try_lock() { Ok(ctx) => ctx, Err(_) => { - let _ = tx.blocking_send(Err(anyhow::anyhow!("Model is busy, please try again"))); + let _ = + tx.blocking_send(Err(anyhow::anyhow!("Model is busy, please try again"))); return; } }; - + // Set context to the prompt if let Err(e) = session.set_context(&prompt) { let _ = tx.blocking_send(Err(anyhow::anyhow!("Failed to set context: {}", e))); return; } - + // Create sampler with temperature let stages = vec![ SamplerStage::Temperature(temperature), @@ -280,67 +318,55 @@ impl LLMProvider for EmbeddedProvider { SamplerStage::TopP(0.9), ]; let sampler = StandardSampler::new_softmax(stages, 1); - + // Start completion - let mut completion_handle = match session.start_completing_with(sampler, max_tokens as usize) { + let mut completion_handle = match session + .start_completing_with(sampler, max_tokens as usize) + { Ok(handle) => handle, Err(e) => { - let _ = tx.blocking_send(Err(anyhow::anyhow!("Failed to start completion: {}", e))); + let _ = + tx.blocking_send(Err(anyhow::anyhow!("Failed to start completion: {}", e))); return; } }; - + let mut accumulated_text = String::new(); let mut token_count = 0; - + // Stream tokens with proper limits while let Some(token) = completion_handle.next_token() { let token_string = session.model().token_to_piece(token); - + accumulated_text.push_str(&token_string); token_count += 1; - + let chunk = CompletionChunk { content: token_string.clone(), finished: false, }; - + if tx.blocking_send(Ok(chunk)).is_err() { break; // Receiver dropped } - + // Enforce token limit if token_count >= max_tokens as usize { debug!("Reached max token limit in streaming: {}", max_tokens); break; } - + // Stop if we hit common stop sequences - if accumulated_text.contains("### Human") || - accumulated_text.contains("### System") || - accumulated_text.contains("<|end|>") || - accumulated_text.contains("") || - accumulated_text.trim().ends_with("```") { + if accumulated_text.contains("### Human") + || accumulated_text.contains("### System") + || accumulated_text.contains("<|end|>") + || accumulated_text.contains("") + { debug!("Hit stop sequence in streaming, stopping generation"); break; } - - // Emergency brake for streaming too - if token_count > 0 && token_count % 100 == 0 { - debug!("Streaming: Generated {} tokens so far", token_count); - if accumulated_text.trim().len() > 50 && - (accumulated_text.contains('\n') || accumulated_text.len() > 200) { - if accumulated_text.trim().ends_with('.') || - accumulated_text.trim().ends_with('!') || - accumulated_text.trim().ends_with('?') || - accumulated_text.trim().ends_with('\n') { - debug!("Found natural stopping point in streaming at {} tokens", token_count); - break; - } - } - } } - + // Send final chunk let final_chunk = CompletionChunk { content: String::new(), @@ -348,14 +374,14 @@ impl LLMProvider for EmbeddedProvider { }; let _ = tx.blocking_send(Ok(final_chunk)); }); - + Ok(ReceiverStream::new(rx)) } - + fn name(&self) -> &str { "embedded" } - + fn model(&self) -> &str { &self.model_name }