summaryrefslogtreecommitdiff
path: root/hyperstack-vm1-gptoss.toml
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-03-24 23:49:42 +0200
committerPaul Buetow <paul@buetow.org>2026-03-24 23:49:42 +0200
commit9e3ae0f5847f73eea73af6ed9f49f93bf2b811f4 (patch)
treed9e8acc1931d56803b5d1e4a96931f5c9929534c /hyperstack-vm1-gptoss.toml
parent9731b82818a2a199a8d826ae3e406c61572c2b6f (diff)
gpt-oss-120b: enable reasoning via openai_gptoss parser
- Add --reasoning-parser openai_gptoss to gpt-oss-120b vLLM config in all three toml files; extracts <|channel|>analysis thinking blocks into reasoning_content in API responses - Mark gpt-oss-120b as reasoning: true in pi/agent/models.json for all three providers (hyperstack, hyperstack1, hyperstack2) - Update vm1 state file Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'hyperstack-vm1-gptoss.toml')
-rw-r--r--hyperstack-vm1-gptoss.toml4
1 files changed, 4 insertions, 0 deletions
diff --git a/hyperstack-vm1-gptoss.toml b/hyperstack-vm1-gptoss.toml
index 80f908c..ce489ee 100644
--- a/hyperstack-vm1-gptoss.toml
+++ b/hyperstack-vm1-gptoss.toml
@@ -71,6 +71,9 @@ tensor_parallel_size = 1
# tool_call_parser="" disables --enable-auto-tool-choice; the llama3_json parser crashes
# on gpt-oss responses (vLLM 0.17.1 adds token_ids to responses, breaking the parser API).
tool_call_parser = ""
+# gpt-oss-120b is a reasoning model (o-series architecture); the openai_gptoss parser
+# extracts <|channel|>analysis…<|end|> thinking blocks into reasoning_content in the response.
+extra_vllm_args = ["--reasoning-parser", "openai_gptoss"]
# Named model presets for 'ruby hyperstack.rb --config hyperstack-vm1-gptoss.toml model switch <name>'.
# Each preset overrides the matching [vllm] field; unset fields fall back to [vllm] defaults.
@@ -114,6 +117,7 @@ max_model_len = 131072
gpu_memory_utilization = 0.92
tensor_parallel_size = 1
tool_call_parser = ""
+extra_vllm_args = ["--reasoning-parser", "openai_gptoss"]
# Qwen2.5-Coder-32B-Instruct AWQ — best-in-class open coding model at 32B, ~18 GB on A100.
[vllm.presets.qwen25-coder-32b]