embedded model support

2025-09-06 13:32:37 +10:00
parent 80e5178a1f
commit 1834b8946c
8 changed files with 793 additions and 14 deletions
--- a/config.example.toml
+++ b/config.example.toml
@@ -2,7 +2,7 @@
 # Copy to ~/.config/g3/config.toml and customize

 [providers]
-default_provider = "openai"
+default_provider = "embedded"

 [providers.openai]
 # Get your API key from https://platform.openai.com/api-keys
@@ -20,6 +20,18 @@ model = "claude-3-sonnet-20240229"
 max_tokens = 2048
 temperature = 0.1

+[providers.embedded]
+# Path to your GGUF model file
+model_path = "~/.cache/g3/models/codellama-7b-instruct.Q4_K_M.gguf"
+model_type = "codellama"
+context_length = 16384  # Use CodeLlama's full context capability
+max_tokens = 2048  # Default fallback, but will be calculated dynamically
+temperature = 0.1
+# Number of layers to offload to GPU (0 for CPU only)
+gpu_layers = 32
+# Number of CPU threads to use
+threads = 8
+
 [agent]
 max_context_length = 8192
 enable_streaming = true