Refactor system prompts to eliminate duplication; upgrade embedded provider

- Refactor prompts.rs: extract shared sections (intro, TODO, workspace memory,
  web research, response guidelines) used by both native and non-native prompts
- Fix typo in native prompt: "save them.." -> "save them."
- Fix non-native prompt: add missing closing braces in JSON examples,
  add IMPORTANT steps section, align with native prompt quality
- Add 9 unit tests to verify both prompts contain required sections
- Upgrade llama-cpp-2 dependency and refactor embedded provider
- Update config.example.toml with embedded model examples
- Update workspace memory
This commit is contained in:
Dhanji R. Prasanna
2026-01-28 09:56:39 +11:00
parent 585684a86e
commit a902be1562
9 changed files with 1027 additions and 851 deletions

View File

@@ -52,6 +52,35 @@ model = "claude-sonnet-4-5"
# model = "anthropic/claude-3.5-sonnet"
# base_url = "https://openrouter.ai/api/v1"
# =============================================================================
# Embedded providers (local models via llama.cpp with Metal acceleration)
# =============================================================================
# Download models from Hugging Face:
# huggingface-cli download bartowski/THUDM_GLM-4-32B-0414-GGUF \
# --include "THUDM_GLM-4-32B-0414-Q6_K_L.gguf" --local-dir ~/.g3/models/
#
# GLM-4 32B - Top-tier local model for coding/reasoning (context_length auto-detected from GGUF)
# [providers.embedded.glm4]
# model_path = "~/.g3/models/THUDM_GLM-4-32B-0414-Q6_K_L.gguf"
# model_type = "glm4" # Required: glm4, qwen, mistral, llama, codellama
# context_length = 32768 # Optional: auto-detected from GGUF (GLM-4 = 32K)
# max_tokens = 4096 # Optional: defaults to min(4096, context/4)
# temperature = 0.1
# gpu_layers = 99 # Use all GPU layers on Apple Silicon
# threads = 8
# GLM-4 9B - Smaller but very capable (minimal config - most settings auto-detected)
# [providers.embedded.glm4-9b]
# model_path = "~/.g3/models/THUDM_GLM-4-9B-0414-Q8_0.gguf"
# model_type = "glm4"
# gpu_layers = 99 # Optional but recommended for Apple Silicon
# Qwen3 4B - Small but powerful, good for ensemble usage (minimal config)
# [providers.embedded.qwen3]
# model_path = "~/.g3/models/qwen3-4b-q4_k_m.gguf"
# model_type = "qwen"
# gpu_layers = 99 # Optional but recommended for Apple Silicon
# =============================================================================
# Agent settings (all optional - these are the defaults)
# =============================================================================