summaryrefslogtreecommitdiff
path: root/hyperstack-vm.toml
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-03-21 12:58:08 +0200
committerPaul Buetow <paul@buetow.org>2026-03-21 12:58:08 +0200
commit437168c7a47ee656edc91177b9753957b9361209 (patch)
tree0b0b1fb74137c0dd6f2bb4a9a4e7b67f33ea7129 /hyperstack-vm.toml
parentf9a2ffcdf1d7dd9612c2d328585f58310236bd9b (diff)
Set single-VM default to GPT-OSS 120B; add hyperstack Pi provider and fish abbr
- hyperstack-vm.toml: switch [vllm] default from Qwen3-Coder-Next to openai/gpt-oss-120b (container_name, max_model_len=131072, tool_call_parser=''); labels already reflected gpt-oss-120b - pi/agent/models.json: add 'hyperstack' provider pointing at hyperstack.wg1:11434/v1 with GPT-OSS 120B as primary model and all preset models registered (alongside hyperstack1/hyperstack2) - hyperstack.fish: add pi-hyperstack abbreviation for single-VM GPT-OSS 120B - README.md: update fish abbreviations table, provider table, VM config table, and Single-VM setup section to reflect the new defaults
Diffstat (limited to 'hyperstack-vm.toml')
-rw-r--r--hyperstack-vm.toml11
1 files changed, 7 insertions, 4 deletions
diff --git a/hyperstack-vm.toml b/hyperstack-vm.toml
index 28de975..88f2a19 100644
--- a/hyperstack-vm.toml
+++ b/hyperstack-vm.toml
@@ -58,14 +58,17 @@ pull_models = ["qwen3-coder-next", "qwen3-coder:30b", "gpt-oss:20b", "gpt-oss:12
# Use --vllm / --no-vllm CLI flags to override install at runtime.
[vllm]
install = true
-model = "bullpoint/Qwen3-Coder-Next-AWQ-4bit"
+model = "openai/gpt-oss-120b"
# HuggingFace model cache on ephemeral NVMe (fast; survives reboots on most providers).
hug_cache_dir = "/ephemeral/hug"
-container_name = "vllm_qwen3"
-max_model_len = 262144
+container_name = "vllm_gpt_oss_120b"
+# Hard architecture limit: max_position_embeddings=131072 in model config.json.
+max_model_len = 131072
gpu_memory_utilization = 0.92
tensor_parallel_size = 1
-tool_call_parser = "qwen3_coder"
+# tool_call_parser="" disables --enable-auto-tool-choice; the llama3_json parser crashes
+# on gpt-oss responses (vLLM 0.17.1 adds token_ids to responses, breaking the parser API).
+tool_call_parser = ""
# Named model presets for 'ruby hyperstack.rb model switch <name>'.
# Each preset overrides the matching [vllm] field; unset fields fall back to [vllm] defaults.