diff options
| author | Paul Buetow <paul@buetow.org> | 2026-03-21 10:49:35 +0200 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-03-21 10:49:35 +0200 |
| commit | ea0f9f7f51b32f0c392f75aa0cc3231211f54757 (patch) | |
| tree | 378d01dbc87dc0ef9f4fbd6ec7788e0a62f66876 /hyperstack-vm.toml | |
| parent | 4baa087445a11b856139f55adab262fa97384033 (diff) | |
Remove LiteLLM and Claude Code repo references (task 301)
Diffstat (limited to 'hyperstack-vm.toml')
| -rw-r--r-- | hyperstack-vm.toml | 14 |
1 files changed, 2 insertions, 12 deletions
diff --git a/hyperstack-vm.toml b/hyperstack-vm.toml index e82c97f..28de975 100644 --- a/hyperstack-vm.toml +++ b/hyperstack-vm.toml @@ -37,8 +37,6 @@ allowed_ssh_cidrs = ["auto"] allowed_wireguard_cidrs = ["auto"] # Port 11434 is shared by both Ollama and vLLM for firewall compatibility. ollama_port = 11434 -# Port 4000: LiteLLM Anthropic-API proxy (used with vLLM). -litellm_port = 4000 [bootstrap] enable_guest_bootstrap = true @@ -56,7 +54,7 @@ num_parallel = 1 context_length = 32768 pull_models = ["qwen3-coder-next", "qwen3-coder:30b", "gpt-oss:20b", "gpt-oss:120b", "nemotron-3-super"] -# vLLM serves one model via Docker; LiteLLM translates Anthropic API → OpenAI. +# vLLM serves one model via Docker on the OpenAI-compatible API. # Use --vllm / --no-vllm CLI flags to override install at runtime. [vllm] install = true @@ -68,14 +66,6 @@ max_model_len = 262144 gpu_memory_utilization = 0.92 tensor_parallel_size = 1 tool_call_parser = "qwen3_coder" -# LiteLLM maps each entry to the vLLM model; add new Anthropic model IDs here. -litellm_master_key = "sk-litellm-master" -litellm_claude_model_names = [ - "claude-sonnet-4-20250514", - "claude-opus-4-20250514", - "claude-opus-4-6-20260604", - "claude-haiku-3-5-20241022" -] # Named model presets for 'ruby hyperstack.rb model switch <name>'. # Each preset overrides the matching [vllm] field; unset fields fall back to [vllm] defaults. @@ -127,7 +117,7 @@ tool_call_parser = "" # OpenAI GPT-OSS 120B — powerful MoE (5.1B active / 117B total, MXFP4), ~65 GB on A100. # Hard architecture limit: max_position_embeddings=131072 in model config.json. # 131072 is the absolute ceiling — exceeding it causes NaN or CUDA OOB errors. -# For sessions approaching this limit, start a fresh opencode conversation. +# For sessions approaching this limit, start a fresh Pi conversation. # tool_call_parser = "" disables --enable-auto-tool-choice (same reason as gpt-oss-20b). [vllm.presets.gpt-oss-120b] model = "openai/gpt-oss-120b" |
