Refactor system prompts to eliminate duplication; upgrade embedded provider

- Refactor prompts.rs: extract shared sections (intro, TODO, workspace memory, web research, response guidelines) used by both native and non-native prompts - Fix typo in native prompt: "save them.." -> "save them." - Fix non-native prompt: add missing closing braces in JSON examples, add IMPORTANT steps section, align with native prompt quality - Add 9 unit tests to verify both prompts contain required sections - Upgrade llama-cpp-2 dependency and refactor embedded provider - Update config.example.toml with embedded model examples - Update workspace memory
2026-01-28 09:56:39 +11:00
parent 585684a86e
commit a902be1562
9 changed files with 1027 additions and 851 deletions
--- a/config.example.toml
+++ b/config.example.toml
@@ -52,6 +52,35 @@ model = "claude-sonnet-4-5"
 # model = "anthropic/claude-3.5-sonnet"
 # base_url = "https://openrouter.ai/api/v1"

+# =============================================================================
+# Embedded providers (local models via llama.cpp with Metal acceleration)
+# =============================================================================
+# Download models from Hugging Face:
+#   huggingface-cli download bartowski/THUDM_GLM-4-32B-0414-GGUF \
+#     --include "THUDM_GLM-4-32B-0414-Q6_K_L.gguf" --local-dir ~/.g3/models/
+#
+# GLM-4 32B - Top-tier local model for coding/reasoning (context_length auto-detected from GGUF)
+# [providers.embedded.glm4]
+# model_path = "~/.g3/models/THUDM_GLM-4-32B-0414-Q6_K_L.gguf"
+# model_type = "glm4"            # Required: glm4, qwen, mistral, llama, codellama
+# context_length = 32768         # Optional: auto-detected from GGUF (GLM-4 = 32K)
+# max_tokens = 4096              # Optional: defaults to min(4096, context/4)
+# temperature = 0.1
+# gpu_layers = 99                # Use all GPU layers on Apple Silicon
+# threads = 8
+
+# GLM-4 9B - Smaller but very capable (minimal config - most settings auto-detected)
+# [providers.embedded.glm4-9b]
+# model_path = "~/.g3/models/THUDM_GLM-4-9B-0414-Q8_0.gguf"
+# model_type = "glm4"
+# gpu_layers = 99                # Optional but recommended for Apple Silicon
+
+# Qwen3 4B - Small but powerful, good for ensemble usage (minimal config)
+# [providers.embedded.qwen3]
+# model_path = "~/.g3/models/qwen3-4b-q4_k_m.gguf"
+# model_type = "qwen"
+# gpu_layers = 99                # Optional but recommended for Apple Silicon
+
 # =============================================================================
 # Agent settings (all optional - these are the defaults)
 # =============================================================================