From cedd565177bbae0a9f2dd9f9e39a01a17c076a51 Mon Sep 17 00:00:00 2001
From: Michael Neale <michael.neale@gmail.com>
Date: Mon, 15 Sep 2025 12:21:59 -0700
Subject: [PATCH 1/4] download qwen if not already there

---
 crates/g3-core/src/providers/embedded.rs | 83 ++++++++++++++++++++++--
 1 file changed, 76 insertions(+), 7 deletions(-)
diff --git a/crates/g3-core/src/providers/embedded.rs b/crates/g3-core/src/providers/embedded.rs
index 22f9972..2c132ce 100644
--- a/crates/g3-core/src/providers/embedded.rs
+++ b/crates/g3-core/src/providers/embedded.rs
@@ -7,13 +7,14 @@ use llama_cpp::{
     standard_sampler::{SamplerStage, StandardSampler},
     LlamaModel, LlamaParams, LlamaSession, SessionParams,
 };
-use std::path::Path;
-use std::sync::atomic::AtomicBool;
+use std::path::{Path, PathBuf};
+use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
 use std::sync::Arc;
+use std::time::Duration;
 use tokio::sync::mpsc;
 use tokio::sync::Mutex;
 use tokio_stream::wrappers::ReceiverStream;
-use tracing::{debug, error, info};
+use tracing::{debug, error, info, warn};
 
 pub struct EmbeddedProvider {
     model: Arc<LlamaModel>,
@@ -39,11 +40,19 @@ impl EmbeddedProvider {
 
         // Expand tilde in path
         let expanded_path = shellexpand::tilde(&model_path);
-        let model_path = Path::new(expanded_path.as_ref());
-
-        if !model_path.exists() {
-            anyhow::bail!("Model file not found: {}", model_path.display());
+        let model_path_buf = PathBuf::from(expanded_path.as_ref());
+        
+        // If model doesn't exist and it's the default Qwen model, offer to download it
+        if !model_path_buf.exists() {
+            if model_path.contains("qwen2.5-7b-instruct-q3_k_m.gguf") {
+                info!("Model file not found. Attempting to download Qwen 2.5 7B model...");
+                Self::download_qwen_model(&model_path_buf)?;
+            } else {
+                anyhow::bail!("Model file not found: {}", model_path_buf.display());
+            }
         }
+        
+        let model_path = model_path_buf.as_path();
 
         // Set up model parameters
         let mut params = LlamaParams::default();
@@ -377,6 +386,66 @@ impl EmbeddedProvider {
         
         cleaned.trim().to_string()
     }
+
+    // Download the Qwen 2.5 7B model if it doesn't exist
+    fn download_qwen_model(model_path: &Path) -> Result<()> {
+        use std::fs;
+        use std::io::Write;
+        use std::process::Command;
+        
+        const MODEL_URL: &str = "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-GGUF/resolve/main/qwen2.5-7b-instruct-q3_k_m.gguf";
+        const MODEL_SIZE_MB: u64 = 3631; // Approximate size in MB
+        
+        // Create the parent directory if it doesn't exist
+        if let Some(parent) = model_path.parent() {
+            fs::create_dir_all(parent)?;
+        }
+        
+        info!("Downloading Qwen 2.5 7B model (Q3_K_M quantization, ~3.5GB)...");
+        info!("This is a one-time download that may take several minutes depending on your connection.");
+        info!("Downloading to: {}", model_path.display());
+        
+        // Use curl with progress bar for download
+        let output = Command::new("curl")
+            .args(&[
+                "-L",  // Follow redirects
+                "-#",  // Show progress bar
+                "-f",  // Fail on HTTP errors
+                "-o", model_path.to_str().unwrap(),
+                MODEL_URL,
+            ])
+            .output()?;
+        
+        if !output.status.success() {
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            
+            // If curl is not available, provide alternative instructions
+            if stderr.contains("command not found") || stderr.contains("not found") {
+                error!("curl is not installed. Please install curl or manually download the model.");
+                error!("Manual download instructions:");
+                error!("1. Download from: {}", MODEL_URL);
+                error!("2. Save to: {}", model_path.display());
+                anyhow::bail!("curl not found - please install curl or download the model manually");
+            }
+            
+            anyhow::bail!("Failed to download model: {}", stderr);
+        }
+        
+        // Verify the file was created and has reasonable size
+        let metadata = fs::metadata(model_path)?;
+        let size_mb = metadata.len() / (1024 * 1024);
+        
+        if size_mb < MODEL_SIZE_MB - 100 {  // Allow some variance
+            fs::remove_file(model_path).ok();  // Clean up partial download
+            anyhow::bail!(
+                "Downloaded file appears incomplete ({}MB vs expected ~{}MB). Please try again.",
+                size_mb, MODEL_SIZE_MB
+            );
+        }
+        
+        info!("Successfully downloaded Qwen 2.5 7B model ({}MB)", size_mb);
+        Ok(())
+    }
 }
 
 #[async_trait::async_trait]

From 8e821d0a5b49923900b34b135d7d963d12913514 Mon Sep 17 00:00:00 2001
From: Michael Neale <michael.neale@gmail.com>
Date: Mon, 15 Sep 2025 13:19:02 -0700
Subject: [PATCH 2/4] auto setup

---
 Cargo.lock                                  |  1 +
 crates/g3-config/Cargo.toml                 |  1 +
 crates/g3-config/src/lib.rs                 | 68 +++++++++++++++++++++
 g3_session_hello_test_a63e133911916b31.json | 32 ++++++++++
 4 files changed, 102 insertions(+)
 create mode 100644 g3_session_hello_test_a63e133911916b31.json

diff --git a/Cargo.lock b/Cargo.lock
index 6e41156..93c3eee 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -720,6 +720,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "config",
+ "dirs 5.0.1",
  "serde",
  "shellexpand",
  "thiserror 1.0.69",
diff --git a/crates/g3-config/Cargo.toml b/crates/g3-config/Cargo.toml
index bc9e7c1..d818d23 100644
--- a/crates/g3-config/Cargo.toml
+++ b/crates/g3-config/Cargo.toml
@@ -11,3 +11,4 @@ anyhow = { workspace = true }
 thiserror = { workspace = true }
 toml = "0.8"
 shellexpand = "3.0"
+dirs = "5.0"
diff --git a/crates/g3-config/src/lib.rs b/crates/g3-config/src/lib.rs
index 5b3596f..46856f9 100644
--- a/crates/g3-config/src/lib.rs
+++ b/crates/g3-config/src/lib.rs
@@ -71,6 +71,50 @@ impl Default for Config {
 
 impl Config {
     pub fn load(config_path: Option<&str>) -> Result<Self> {
+        // Check if any config file exists
+        let config_exists = if let Some(path) = config_path {
+            Path::new(path).exists()
+        } else {
+            // Check default locations
+            let default_paths = [
+                "./g3.toml",
+                "~/.config/g3/config.toml",
+                "~/.g3.toml",
+            ];
+            
+            default_paths.iter().any(|path| {
+                let expanded_path = shellexpand::tilde(path);
+                Path::new(expanded_path.as_ref()).exists()
+            })
+        };
+        
+        // If no config exists, create and save a default Qwen config
+        if !config_exists {
+            let qwen_config = Self::default_qwen_config();
+            
+            // Save to default location
+            let config_dir = dirs::home_dir()
+                .map(|mut path| {
+                    path.push(".config");
+                    path.push("g3");
+                    path
+                })
+                .unwrap_or_else(|| std::path::PathBuf::from("."));
+            
+            // Create directory if it doesn't exist
+            std::fs::create_dir_all(&config_dir).ok();
+            
+            let config_file = config_dir.join("config.toml");
+            if let Err(e) = qwen_config.save(config_file.to_str().unwrap()) {
+                eprintln!("Warning: Could not save default config: {}", e);
+            } else {
+                println!("Created default Qwen configuration at: {}", config_file.display());
+            }
+            
+            return Ok(qwen_config);
+        }
+        
+        // Existing config loading logic
         let mut settings = config::Config::builder();
         
         // Load default configuration
@@ -108,6 +152,30 @@ impl Config {
         Ok(config)
     }
     
+    fn default_qwen_config() -> Self {
+        Self {
+            providers: ProvidersConfig {
+                openai: None,
+                anthropic: None,
+                embedded: Some(EmbeddedConfig {
+                    model_path: "~/.cache/g3/models/qwen2.5-7b-instruct-q3_k_m.gguf".to_string(),
+                    model_type: "qwen".to_string(),
+                    context_length: Some(32768),  // Qwen2.5 supports 32k context
+                    max_tokens: Some(2048),
+                    temperature: Some(0.1),
+                    gpu_layers: Some(32),
+                    threads: Some(8),
+                }),
+                default_provider: "embedded".to_string(),
+            },
+            agent: AgentConfig {
+                max_context_length: 8192,
+                enable_streaming: true,
+                timeout_seconds: 60,
+            },
+        }
+    }
+    
     pub fn save(&self, path: &str) -> Result<()> {
         let toml_string = toml::to_string_pretty(self)?;
         std::fs::write(path, toml_string)?;
diff --git a/g3_session_hello_test_a63e133911916b31.json b/g3_session_hello_test_a63e133911916b31.json
new file mode 100644
index 0000000..d82cba3
--- /dev/null
+++ b/g3_session_hello_test_a63e133911916b31.json
@@ -0,0 +1,32 @@
+{
+  "context_window": {
+    "conversation_history": [
+      {
+        "content": "You are G3, a general-purpose AI agent. Your goal is to analyze and solve problems by writing code.\n\n# Tool Call Format\n\nWhen you need to execute a tool, write ONLY the JSON tool call on a new line:\n\n{\"tool\": \"tool_name\", \"args\": {\"param\": \"value\"}}\n\nThe tool will execute immediately and you'll receive the result (success or error) to continue with.\n\n# Available Tools\n\n- **shell**: Execute shell commands\n  - Format: {\"tool\": \"shell\", \"args\": {\"command\": \"your_command_here\"}}\n  - Example: {\"tool\": \"shell\", \"args\": {\"command\": \"ls ~/Downloads\"}}\n\n- **final_output**: Signal task completion with a detailed summary of work done in markdown format\n  - Format: {\"tool\": \"final_output\", \"args\": {\"summary\": \"what_was_accomplished\"}}\n\n# Instructions\n\n1. Analyze the request and break down into smaller tasks if appropriate\n2. Execute ONE tool at a time\n3. STOP when the original request was satisfied\n4. Call the final_output tool when done\n\n# Response Guidelines\n\n- Use Markdown formatting for all responses except tool calls.\n- Whenever taking actions, use the pronoun 'I'\n\n",
+        "role": "system"
+      },
+      {
+        "content": "Task: hello test",
+        "role": "user"
+      },
+      {
+        "content": "I will start by executing a simple task as requested. Let's print \"Hello, Test\" to confirm the task is understood and can be executed.\n\n{\"tool\": \"shell\", \"args\": {\"command\":\"echo Hello, Test\"}}",
+        "role": "assistant"
+      },
+      {
+        "content": "Tool result: Hello, Test",
+        "role": "user"
+      },
+      {
+        "content": "I will start by executing a simple task as requested. Let's print \"Hello, Test\" to confirm the task is understood and can be executed.\n\nTool executed: shell -> Hello, Test\n\n{\"tool\": \"final_output\", \"args\": {\"summary\": \"# Task Accomplished\n\nThe task 'hello test' was successfully executed. The output was:\n\n```\nHello, Test\n```\n\nThis confirms that the request was understood and completed as intended.\"}}",
+        "role": "assistant"
+      }
+    ],
+    "percentage_used": 0.9185791015625,
+    "total_tokens": 32768,
+    "used_tokens": 301
+  },
+  "session_id": "hello_test_a63e133911916b31",
+  "status": "completed",
+  "timestamp": 1757966669
+}
\ No newline at end of file

From cfc4e7d3605ab5b93b199c163ee8f2e9122e2c90 Mon Sep 17 00:00:00 2001
From: Michael Neale <michael.neale@gmail.com>
Date: Mon, 15 Sep 2025 13:47:47 -0700
Subject: [PATCH 3/4] add mistral support

---
 crates/g3-core/src/providers/embedded.rs    | 39 +++++++++++++++++++++
 g3_session_hello_test_a63e133911916b31.json | 12 +++----
 2 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/crates/g3-core/src/providers/embedded.rs b/crates/g3-core/src/providers/embedded.rs
index 2c132ce..a45611d 100644
--- a/crates/g3-core/src/providers/embedded.rs
+++ b/crates/g3-core/src/providers/embedded.rs
@@ -117,6 +117,45 @@ impl EmbeddedProvider {
             
             // Add the start of assistant response
             formatted.push_str("<|im_start|>assistant\n");
+            formatted
+        } else if model_name_lower.contains("mistral") {
+            // Mistral Instruct format: <s>[INST] ... [/INST] assistant_response</s>
+            let mut formatted = String::new();
+            let mut in_conversation = false;
+            
+            for (i, message) in messages.iter().enumerate() {
+                match message.role {
+                    MessageRole::System => {
+                        // Mistral doesn't have a special system token, include it at the start
+                        if i == 0 {
+                            formatted.push_str("<s>[INST] ");
+                            formatted.push_str(&message.content);
+                            formatted.push_str("\n\n");
+                            in_conversation = true;
+                        }
+                    }
+                    MessageRole::User => {
+                        if !in_conversation {
+                            formatted.push_str("<s>[INST] ");
+                        }
+                        formatted.push_str(&message.content);
+                        formatted.push_str(" [/INST]");
+                        in_conversation = false;
+                    }
+                    MessageRole::Assistant => {
+                        formatted.push_str(" ");
+                        formatted.push_str(&message.content);
+                        formatted.push_str("</s> ");
+                        in_conversation = false;
+                    }
+                }
+            }
+            
+            // If the last message was from user, add a space for the assistant's response
+            if messages.last().map_or(false, |m| matches!(m.role, MessageRole::User)) {
+                formatted.push_str(" ");
+            }
+            
             formatted
         } else {
             // Use Llama/CodeLlama format for other models
diff --git a/g3_session_hello_test_a63e133911916b31.json b/g3_session_hello_test_a63e133911916b31.json
index d82cba3..d96e037 100644
--- a/g3_session_hello_test_a63e133911916b31.json
+++ b/g3_session_hello_test_a63e133911916b31.json
@@ -10,23 +10,23 @@
         "role": "user"
       },
       {
-        "content": "I will start by executing a simple task as requested. Let's print \"Hello, Test\" to confirm the task is understood and can be executed.\n\n{\"tool\": \"shell\", \"args\": {\"command\":\"echo Hello, Test\"}}",
+        "content": "1. Understanding the task, it seems simple enough. The goal is to output the message \"hello\" for this test.\n2. I will execute the following tool call to print the message \"hello\" to the console:\n\n```json\n\n{\"tool\": \"shell\", \"args\": {\"command\":\"echo hello\"}}",
         "role": "assistant"
       },
       {
-        "content": "Tool result: Hello, Test",
+        "content": "Tool result: hello",
         "role": "user"
       },
       {
-        "content": "I will start by executing a simple task as requested. Let's print \"Hello, Test\" to confirm the task is understood and can be executed.\n\nTool executed: shell -> Hello, Test\n\n{\"tool\": \"final_output\", \"args\": {\"summary\": \"# Task Accomplished\n\nThe task 'hello test' was successfully executed. The output was:\n\n```\nHello, Test\n```\n\nThis confirms that the request was understood and completed as intended.\"}}",
+        "content": "1. Understanding the task, it seems simple enough. The goal is to output the message \"hello\" for this test.\n2. I will execute the following tool call to print the message \"hello\" to the console:\n\n```json\n\nTool executed: shell -> hello\n\n1. The shell tool call was successful, and it has printed the message \"hello\" as requested.\n2. I am now ready to call the final_output tool to signal task completion and provide a summary of the work done.\n\n```json\n\n=> Printed the message 'hello' as requested.",
         "role": "assistant"
       }
     ],
-    "percentage_used": 0.9185791015625,
+    "percentage_used": 1.06201171875,
     "total_tokens": 32768,
-    "used_tokens": 301
+    "used_tokens": 348
   },
   "session_id": "hello_test_a63e133911916b31",
   "status": "completed",
-  "timestamp": 1757966669
+  "timestamp": 1757968007
 }
\ No newline at end of file

From 76825ca4d65d9d51504561a0d8b02fe718a7f31c Mon Sep 17 00:00:00 2001
From: Michael Neale <michael.neale@gmail.com>
Date: Mon, 15 Sep 2025 15:21:55 -0700
Subject: [PATCH 4/4] don't need this

---
 g3_session_hello_test_a63e133911916b31.json | 32 ---------------------
 1 file changed, 32 deletions(-)
 delete mode 100644 g3_session_hello_test_a63e133911916b31.json

diff --git a/g3_session_hello_test_a63e133911916b31.json b/g3_session_hello_test_a63e133911916b31.json
deleted file mode 100644
index d96e037..0000000
--- a/g3_session_hello_test_a63e133911916b31.json
+++ /dev/null
@@ -1,32 +0,0 @@
-{
-  "context_window": {
-    "conversation_history": [
-      {
-        "content": "You are G3, a general-purpose AI agent. Your goal is to analyze and solve problems by writing code.\n\n# Tool Call Format\n\nWhen you need to execute a tool, write ONLY the JSON tool call on a new line:\n\n{\"tool\": \"tool_name\", \"args\": {\"param\": \"value\"}}\n\nThe tool will execute immediately and you'll receive the result (success or error) to continue with.\n\n# Available Tools\n\n- **shell**: Execute shell commands\n  - Format: {\"tool\": \"shell\", \"args\": {\"command\": \"your_command_here\"}}\n  - Example: {\"tool\": \"shell\", \"args\": {\"command\": \"ls ~/Downloads\"}}\n\n- **final_output**: Signal task completion with a detailed summary of work done in markdown format\n  - Format: {\"tool\": \"final_output\", \"args\": {\"summary\": \"what_was_accomplished\"}}\n\n# Instructions\n\n1. Analyze the request and break down into smaller tasks if appropriate\n2. Execute ONE tool at a time\n3. STOP when the original request was satisfied\n4. Call the final_output tool when done\n\n# Response Guidelines\n\n- Use Markdown formatting for all responses except tool calls.\n- Whenever taking actions, use the pronoun 'I'\n\n",
-        "role": "system"
-      },
-      {
-        "content": "Task: hello test",
-        "role": "user"
-      },
-      {
-        "content": "1. Understanding the task, it seems simple enough. The goal is to output the message \"hello\" for this test.\n2. I will execute the following tool call to print the message \"hello\" to the console:\n\n```json\n\n{\"tool\": \"shell\", \"args\": {\"command\":\"echo hello\"}}",
-        "role": "assistant"
-      },
-      {
-        "content": "Tool result: hello",
-        "role": "user"
-      },
-      {
-        "content": "1. Understanding the task, it seems simple enough. The goal is to output the message \"hello\" for this test.\n2. I will execute the following tool call to print the message \"hello\" to the console:\n\n```json\n\nTool executed: shell -> hello\n\n1. The shell tool call was successful, and it has printed the message \"hello\" as requested.\n2. I am now ready to call the final_output tool to signal task completion and provide a summary of the work done.\n\n```json\n\n=> Printed the message 'hello' as requested.",
-        "role": "assistant"
-      }
-    ],
-    "percentage_used": 1.06201171875,
-    "total_tokens": 32768,
-    "used_tokens": 348
-  },
-  "session_id": "hello_test_a63e133911916b31",
-  "status": "completed",
-  "timestamp": 1757968007
-}
\ No newline at end of file