hyperstack: gpt-oss-120b + qwen3-coder-next dual-VM pair on A100x1

- Add hyperstack-vm1-gptoss.toml: A100x1 config for gpt-oss-120b (VM1) and qwen3-coder-next (VM2) pair, replacing the H100x2 default - Fix pi/agent/models.json: hyperstack provider URL was pointing at hyperstack.wg1 (unresolvable); corrected to hyperstack1.wg1 (192.168.3.1) - Update hyperstack.rb, hypr.fish: reference vm1-gptoss.toml for create-both and pair commands; update fish abbrs for the new pair setup - Update ask-mode/utils.ts: allow read-only 'ask' commands in ask-mode - Update agent-plan-mode/utils.ts: tighten isAskCommand check - Add state files for provisioned vm1/vm2 instances Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
author: Paul Buetow <paul@buetow.org> 2026-03-24 23:35:00 +0200
committer: Paul Buetow <paul@buetow.org> 2026-03-24 23:35:00 +0200
commit: 9f45954847e7aa904ebda55e3c23277d7c7a3079 (patch)
tree: 2a30a1782c21b0616a70c8536aa07a4019df7213
parent: 862b132ddee2cc343a3edc98797554937ea5f595 (diff)
9 files changed, 511 insertions, 100 deletions
diff --git a/.hyperstack-vm1-state.json b/.hyperstack-vm1-state.json
new file mode 100644
index 0000000..acb9185
--- /dev/null
+++ b/.hyperstack-vm1-state.json
@@ -0,0 +1,90 @@
+{
+  "vm_id": 698908,
+  "vm_name": "hyperstack1-20260324205516",
+  "environment_name": "snonux-ollama",
+  "region": "CANADA-1",
+  "flavor_name": "n3-A100x1",
+  "image_name": "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker",
+  "key_name": "earth",
+  "public_ip": "69.19.136.193",
+  "created_at": "2026-03-24T20:55:17Z",
+  "services": {
+    "vllm_enabled": true,
+    "ollama_enabled": false,
+    "comfyui_enabled": false
+  },
+  "security_rules": [
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "tcp",
+      "port_range_min": 22,
+      "port_range_max": 22,
+      "remote_ip_prefix": "79.100.218.77/32"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "udp",
+      "port_range_min": 56710,
+      "port_range_max": 56710,
+      "remote_ip_prefix": "79.100.218.77/32"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "tcp",
+      "port_range_min": 11434,
+      "port_range_max": 11434,
+      "remote_ip_prefix": "192.168.3.0/24"
+    },
+    {
+      "direction": "egress",
+      "ethertype": "IPv6",
+      "protocol": "any",
+      "port_range_min": 1,
+      "port_range_max": 65535,
+      "remote_ip_prefix": "0.0.0.0/0"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "udp",
+      "port_range_min": 56710,
+      "port_range_max": 56710,
+      "remote_ip_prefix": "79.100.218.77/32"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "tcp",
+      "port_range_min": 22,
+      "port_range_max": 22,
+      "remote_ip_prefix": "79.100.218.77/32"
+    },
+    {
+      "direction": "egress",
+      "ethertype": "IPv4",
+      "protocol": "any",
+      "port_range_min": 1,
+      "port_range_max": 65535,
+      "remote_ip_prefix": "0.0.0.0/0"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "tcp",
+      "port_range_min": 11434,
+      "port_range_max": 11434,
+      "remote_ip_prefix": "192.168.3.0/24"
+    }
+  ],
+  "bootstrapped_at": "2026-03-24T20:57:32Z",
+  "vllm_setup_at": "2026-03-24T21:13:42Z",
+  "vllm_model": "openai/gpt-oss-120b",
+  "vllm_container_name": "vllm_gpt_oss_120b",
+  "vllm_preset": null,
+  "status": "ACTIVE",
+  "vm_state": "active",
+  "provisioned_at": "2026-03-24T21:13:43Z"
+}
+\ No newline at end of file
diff --git a/.hyperstack-vm2-state.json b/.hyperstack-vm2-state.json
new file mode 100644
index 0000000..8480b29
--- /dev/null
+++ b/.hyperstack-vm2-state.json
@@ -0,0 +1,90 @@
+{
+  "vm_id": 698909,
+  "vm_name": "hyperstack2-20260324205518",
+  "environment_name": "snonux-ollama",
+  "region": "CANADA-1",
+  "flavor_name": "n3-A100x1",
+  "image_name": "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker",
+  "key_name": "earth",
+  "public_ip": "69.19.136.171",
+  "created_at": "2026-03-24T20:55:19Z",
+  "services": {
+    "vllm_enabled": true,
+    "ollama_enabled": false,
+    "comfyui_enabled": false
+  },
+  "security_rules": [
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "tcp",
+      "port_range_min": 22,
+      "port_range_max": 22,
+      "remote_ip_prefix": "79.100.218.77/32"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "udp",
+      "port_range_min": 56710,
+      "port_range_max": 56710,
+      "remote_ip_prefix": "79.100.218.77/32"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "tcp",
+      "port_range_min": 11434,
+      "port_range_max": 11434,
+      "remote_ip_prefix": "192.168.3.0/24"
+    },
+    {
+      "direction": "egress",
+      "ethertype": "IPv4",
+      "protocol": "any",
+      "port_range_min": 1,
+      "port_range_max": 65535,
+      "remote_ip_prefix": "0.0.0.0/0"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "tcp",
+      "port_range_min": 22,
+      "port_range_max": 22,
+      "remote_ip_prefix": "79.100.218.77/32"
+    },
+    {
+      "direction": "egress",
+      "ethertype": "IPv6",
+      "protocol": "any",
+      "port_range_min": 1,
+      "port_range_max": 65535,
+      "remote_ip_prefix": "0.0.0.0/0"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "tcp",
+      "port_range_min": 11434,
+      "port_range_max": 11434,
+      "remote_ip_prefix": "192.168.3.0/24"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "udp",
+      "port_range_min": 56710,
+      "port_range_max": 56710,
+      "remote_ip_prefix": "79.100.218.77/32"
+    }
+  ],
+  "bootstrapped_at": "2026-03-24T20:57:46Z",
+  "vllm_setup_at": "2026-03-24T21:30:54Z",
+  "vllm_model": "bullpoint/Qwen3-Coder-Next-AWQ-4bit",
+  "vllm_container_name": "vllm_qwen3",
+  "vllm_preset": null,
+  "status": "ACTIVE",
+  "vm_state": "active",
+  "provisioned_at": "2026-03-24T21:30:54Z"
+}
+\ No newline at end of file
diff --git a/hyperstack-vm1-gptoss.toml b/hyperstack-vm1-gptoss.toml
new file mode 100644
index 0000000..80f908c
--- /dev/null
+++ b/hyperstack-vm1-gptoss.toml
@@ -0,0 +1,173 @@
+[auth]
+api_key_file = "~/.hyperstack"
+
+[hyperstack]
+base_url = "https://infrahub-api.nexgencloud.com/v1"
+
+[state]
+# Separate state file for VM1 so vm1 and vm2 can be managed independently.
+file = ".hyperstack-vm1-state.json"
+
+[vm]
+name_prefix = "hyperstack1"
+hostname = "hyperstack1"
+environment_name = "snonux-ollama"
+
+# A100-80GB single GPU for gpt-oss-120b
+flavor_name = "n3-A100x1"
+image_name = "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker"
+assign_floating_ip = true
+create_bootable_volume = false
+enable_port_randomization = false
+labels = ["gpt-oss-120b", "wireguard"]
+
+[ssh]
+username = "ubuntu"
+private_key_path = "~/.ssh/id_rsa"
+hyperstack_key_name = "earth"
+port = 22
+connect_timeout_sec = 10
+
+[network]
+wireguard_udp_port = 56710
+wireguard_subnet = "192.168.3.0/24"
+# VM1 gets the first server-side WireGuard IP (gateway address + 0).
+# earth (client) is 192.168.3.2; VM1 is 192.168.3.1; VM2 is 192.168.3.3.
+wireguard_server_ip = "192.168.3.1"
+# Secure default: "auto" resolves your current public egress IP to /32 at runtime.
+# Override with explicit CIDRs if you deploy from multiple networks or want broader access.
+allowed_ssh_cidrs = ["auto"]
+allowed_wireguard_cidrs = ["auto"]
+# Port 11434 is shared by both Ollama and vLLM for firewall compatibility.
+ollama_port = 11434
+
+[bootstrap]
+enable_guest_bootstrap = true
+install_wireguard = true
+configure_ufw = true
+configure_ollama_host = false
+
+[ollama]
+# Disabled in favour of vLLM; set install = true to switch back to Ollama.
+install = false
+models_dir = "/ephemeral/ollama/models"
+listen_host = "0.0.0.0:11434"
+gpu_overhead_mb = 2000
+num_parallel = 1
+context_length = 32768
+pull_models = ["qwen3-coder-next", "qwen3-coder:30b", "gpt-oss:20b", "gpt-oss:120b", "nemotron-3-super"]
+
+# vLLM serves one model via Docker on the OpenAI-compatible API.
+[vllm]
+install = true
+model = "openai/gpt-oss-120b"
+# HuggingFace model cache on ephemeral NVMe (fast; survives reboots on most providers).
+hug_cache_dir = "/ephemeral/hug"
+container_name = "vllm_gpt_oss_120b"
+# Hard architecture limit: max_position_embeddings=131072 in model config.json.
+max_model_len = 131072
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+# tool_call_parser="" disables --enable-auto-tool-choice; the llama3_json parser crashes
+# on gpt-oss responses (vLLM 0.17.1 adds token_ids to responses, breaking the parser API).
+tool_call_parser = ""
+
+# Named model presets for 'ruby hyperstack.rb --config hyperstack-vm1-gptoss.toml model switch <name>'.
+# Each preset overrides the matching [vllm] field; unset fields fall back to [vllm] defaults.
+
+[vllm.presets.qwen3-coder-next]
+model = "bullpoint/Qwen3-Coder-Next-AWQ-4bit"
+container_name = "vllm_qwen3"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_coder"
+
+# NVIDIA Nemotron-3-Super-120B-A12B AWQ 4-bit — hybrid Mamba+MoE (12B active / 120B total).
+[vllm.presets.nemotron-super]
+model = "cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit"
+container_name = "vllm_nemotron_super"
+max_model_len = 1048576
+gpu_memory_utilization = 0.85
+tensor_parallel_size = 2
+tool_call_parser = "qwen3_xml"
+trust_remote_code = true
+enable_prefix_caching = false
+extra_docker_env = ["VLLM_ALLOW_LONG_MAX_MODEL_LEN=1", "PYTORCH_ALLOC_CONF=expandable_segments:True"]
+extra_vllm_args = ["--reasoning-parser", "nemotron_v3"]
+
+# OpenAI GPT-OSS 20B — ultra-fast MoE (3.6B active / 20B total, MXFP4), ~14 GB on A100.
+[vllm.presets.gpt-oss-20b]
+model = "openai/gpt-oss-20b"
+container_name = "vllm_gpt_oss_20b"
+max_model_len = 65536
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+
+# OpenAI GPT-OSS 120B — powerful MoE (5.1B active / 117B total, MXFP4), ~65 GB on A100.
+# Hard architecture limit: max_position_embeddings=131072 in model config.json.
+[vllm.presets.gpt-oss-120b]
+model = "openai/gpt-oss-120b"
+container_name = "vllm_gpt_oss_120b"
+max_model_len = 131072
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+
+# Qwen2.5-Coder-32B-Instruct AWQ — best-in-class open coding model at 32B, ~18 GB on A100.
+[vllm.presets.qwen25-coder-32b]
+model = "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
+container_name = "vllm_qwen25_coder32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "hermes"
+
+# Qwen3-Coder-30B-A3B AWQ — Qwen3 generation coding MoE (3B active / 30B total), ~18 GB.
+[vllm.presets.qwen3-coder-30b]
+model = "QuantTrio/Qwen3-Coder-30B-A3B-Instruct-AWQ"
+container_name = "vllm_qwen3_coder30b"
+max_model_len = 65536
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_coder"
+
+# DeepSeek-R1-Distill-Qwen-32B AWQ — R1 reasoning distillation of Qwen 32B, ~18 GB on A100.
+[vllm.presets.deepseek-r1-32b]
+model = "casperhansen/deepseek-r1-distill-qwen-32b-awq"
+container_name = "vllm_deepseek_r1_32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+extra_vllm_args = ["--reasoning-parser", "deepseek_r1"]
+
+# Qwen3-32B AWQ — dense 32B reasoning model with extended context, ~18 GB on A100.
+[vllm.presets.qwen3-32b]
+model = "Qwen/Qwen3-32B-AWQ"
+container_name = "vllm_qwen3_32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+extra_vllm_args = ["--reasoning-parser", "deepseek_r1"]
+
+# Devstral-Small-2507 AWQ — Mistral's coding agent model (~15 GB on A100).
+[vllm.presets.devstral]
+model = "cyankiwi/Devstral-Small-2507-AWQ-4bit"
+container_name = "vllm_devstral"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "mistral"
+extra_vllm_args = ["--tokenizer_mode", "mistral", "--config_format", "mistral"]
+
+[wireguard]
+auto_setup = true
+setup_script = "./wg1-setup.sh"
+
+[local_client]
+check_wg1_service = true
+interface_name = "wg1"
+config_path = "/etc/wireguard/wg1.conf"
diff --git a/hyperstack.rb b/hyperstack.rb
index 61f53ec..5065fd1 100755
--- a/hyperstack.rb
+++ b/hyperstack.rb
@@ -169,17 +169,18 @@ module HyperstackVM
       end
 
       server_ip = fetch('network', 'wireguard_server_ip')
-      if server_ip
-        # Validate that the explicit server WireGuard IP is within the configured subnet.
-        begin
-          subnet = IPAddr.new(fetch('network', 'wireguard_subnet'))
-          unless subnet.include?(IPAddr.new(server_ip))
-            raise Error,
-                  "wireguard_server_ip #{server_ip.inspect} is not in wireguard_subnet #{fetch('network', 'wireguard_subnet')}"
-          end
-        rescue IPAddr::InvalidAddressError => e
-          raise Error, "Invalid wireguard_server_ip #{server_ip.inspect}: #{e.message}"
+      return unless server_ip
+
+      # Validate that the explicit server WireGuard IP is within the configured subnet.
+      begin
+        subnet = IPAddr.new(fetch('network', 'wireguard_subnet'))
+        unless subnet.include?(IPAddr.new(server_ip))
+          raise Error,
+                "wireguard_server_ip #{server_ip.inspect} is not in wireguard_subnet #{fetch('network',
+                                                                                             'wireguard_subnet')}"
         end
+      rescue IPAddr::InvalidAddressError => e
+        raise Error, "Invalid wireguard_server_ip #{server_ip.inspect}: #{e.message}"
       end
     end
 
@@ -471,7 +472,7 @@ module HyperstackVM
     # mode which pre-allocates states for all sequences, consuming extra VRAM on startup.
     def vllm_prefix_caching_enabled?
       val = dig('vllm', 'enable_prefix_caching')
-      val.nil? ? true : truthy?(val)
+      val.nil? || truthy?(val)
     end
 
     def vllm_presets
@@ -626,7 +627,8 @@ module HyperstackVM
 
     def fetch_public_cidr(url)
       uri = URI(url)
-      response = Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https', open_timeout: 5, read_timeout: 5) do |http|
+      response = Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https', open_timeout: 5,
+                                                     read_timeout: 5) do |http|
         http.request(Net::HTTP::Get.new(uri))
       end
       return nil unless response.is_a?(Net::HTTPSuccess)
@@ -1288,13 +1290,13 @@ module HyperstackVM
       script << 'docker pull vllm/vllm-openai:latest' if pull_image
       script << docker_run
       script << 'echo "Waiting for vLLM to become ready (up to 10 min for first model download)..."'
-      script << 'for i in $(seq 1 120); do'
+      script << 'for i in $(seq 1 240); do'
       script << "  if curl -sf http://localhost:#{port}/v1/models >/dev/null 2>&1; then echo vllm-ready; break; fi"
       script << "  state=$(docker inspect --format='{{.State.Status}}' #{Shellwords.escape(container)} 2>/dev/null || echo unknown)"
-      script << '  echo "  vLLM not ready yet ($i/120, container=$state)..."'
+      script << '  echo "  vLLM not ready yet ($i/240, container=$state)..."'
       script << '  sleep 5'
       script << 'done'
-      script << "curl -sf http://localhost:#{port}/v1/models >/dev/null || { echo 'FATAL: vLLM did not become ready within 10 minutes'; exit 1; }"
+      script << "curl -sf http://localhost:#{port}/v1/models >/dev/null || { echo 'FATAL: vLLM did not become ready within 20 minutes'; exit 1; }"
       script << 'echo vllm-install-ok'
       script.join("\n")
     end
@@ -1367,7 +1369,7 @@ module HyperstackVM
       script << 'echo "Waiting for ComfyUI to become ready (up to 5 min)..."'
       script << 'for i in $(seq 1 60); do'
       script << "  if curl -sf http://localhost:#{port}/system_stats >/dev/null 2>&1; then echo comfyui-ready; break; fi"
-      script << "  echo \"  ComfyUI not ready yet ($i/60)...\"; sleep 5"
+      script << '  echo "  ComfyUI not ready yet ($i/60)..."; sleep 5'
       script << 'done'
       script << "curl -sf http://localhost:#{port}/system_stats >/dev/null || { echo 'FATAL: ComfyUI did not become ready within 5 minutes'; exit 1; }"
 
@@ -1411,7 +1413,7 @@ module HyperstackVM
       script << 'echo "Waiting for ComfyUI restart..."'
       script << 'for i in $(seq 1 60); do'
       script << "  if curl -sf http://localhost:#{port}/system_stats >/dev/null 2>&1; then echo comfyui-ready; break; fi"
-      script << "  echo \"  ComfyUI not ready yet ($i/60)...\"; sleep 5"
+      script << '  echo "  ComfyUI not ready yet ($i/60)..."; sleep 5'
       script << 'done'
 
       script << 'echo comfyui-install-ok'
@@ -1443,7 +1445,6 @@ module HyperstackVM
         ordered << normalized
       end
     end
-
   end
 
   class RemoteProvisioner
@@ -1520,7 +1521,11 @@ module HyperstackVM
       return unless status.success?
 
       remote_models = stdout.lines.drop(1).map { |line| line.split.first }.compact
-      missing = @scripts.desired_ollama_models.reject { |model| remote_models.any? { |remote| remote.start_with?(model) } }
+      missing = @scripts.desired_ollama_models.reject do |model|
+        remote_models.any? do |remote|
+          remote.start_with?(model)
+        end
+      end
       return if missing.empty?
 
       raise Error, "Models missing after setup: #{missing.join(', ')}. Remote has: #{remote_models.join(', ')}"
@@ -1555,7 +1560,8 @@ module HyperstackVM
       @wg_setup_post = wg_setup_post
     end
 
-    def create(replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, install_comfyui: nil, vllm_preset: nil)
+    def create(replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, install_comfyui: nil,
+               vllm_preset: nil)
       # CLI flags override config; nil means "use config default".
       @effective_vllm = install_vllm.nil? ? @config.vllm_install_enabled? : install_vllm
       @effective_ollama = install_ollama.nil? ? @config.ollama_install_enabled? : install_ollama
@@ -1624,6 +1630,7 @@ module HyperstackVM
       state = @state_store.load
       target_vm_id = vm_id || state&.dig('vm_id')
       raise Error, "No VM ID provided and no state file found at #{@state_store.path}." if target_vm_id.nil?
+
       cleanup_local = !skip_local_cleanup && state && target_vm_id == state['vm_id']
 
       if dry_run
@@ -1666,7 +1673,8 @@ module HyperstackVM
           info "Tracked VM: #{state['vm_id']} #{vm['name']}"
           info "Status: #{vm['status']} / #{vm['vm_state']}"
           info "Public IP: #{connect_host_for(vm) || 'none'}"
-          info "Service mode: #{service_mode_summary(vllm_enabled: vllm_enabled, ollama_enabled: ollama_enabled, comfyui_enabled: comfyui_enabled)}"
+          info "Service mode: #{service_mode_summary(vllm_enabled: vllm_enabled, ollama_enabled: ollama_enabled,
+                                                     comfyui_enabled: comfyui_enabled)}"
           info "Active model: #{state['vllm_model'] || @config.vllm_model}" if vllm_enabled
           if comfyui_enabled
             wg_ip = @config.wireguard_gateway_hostname
@@ -1737,9 +1745,7 @@ module HyperstackVM
       @provisioner.decommission_litellm(host)
 
       # Stop the old container only when it has a different name from the new one.
-      if old_container != new_container
-        @provisioner.stop_vllm_container(host, old_container)
-      end
+      @provisioner.stop_vllm_container(host, old_container) if old_container != new_container
 
       info "Starting vLLM with preset '#{preset_name}' (#{preset['model']})..."
       # Skip docker pull: image is already present; pulling on every switch risks a
@@ -1770,9 +1776,7 @@ module HyperstackVM
       ollama_enabled = state_ollama_enabled?(state)
       info "Running end-to-end inference tests via WireGuard (#{wg_ip})..."
 
-      if vllm_enabled
-        test_vllm(wg_ip)
-      end
+      test_vllm(wg_ip) if vllm_enabled
 
       info "  Ollama test: connect via SSH and run 'ollama list' to verify models." if ollama_enabled
 
@@ -1876,11 +1880,11 @@ module HyperstackVM
         info "Run 'ruby hyperstack.rb test' to verify vLLM."
         info "  vLLM:    http://#{wg_ip}:#{@config.ollama_port}/v1/models"
       end
-      if effective_comfyui?
-        info "Run 'ruby hyperstack.rb test' to verify ComfyUI."
-        info "  ComfyUI: http://#{wg_ip}:#{@config.comfyui_port}/system_stats"
-        info "  Enhance: ruby photo-enhance.rb --config #{File.basename(@config.path)} --indir ~/Pictures --outdir ~/Pictures/enhanced"
-      end
+      return unless effective_comfyui?
+
+      info "Run 'ruby hyperstack.rb test' to verify ComfyUI."
+      info "  ComfyUI: http://#{wg_ip}:#{@config.comfyui_port}/system_stats"
+      info "  Enhance: ruby photo-enhance.rb --config #{File.basename(@config.path)} --indir ~/Pictures --outdir ~/Pictures/enhanced"
     end
 
     def build_create_payload(vm_name, resolved)
@@ -2048,7 +2052,8 @@ module HyperstackVM
         'HYPERSTACK_SSH_PRIVATE_KEY_PATH' => (File.exist?(@config.ssh_private_key_path) ? @config.ssh_private_key_path : '')
       }
 
-      Open3.popen2e(env, 'bash', @config.wireguard_setup_script, host, server_ip, wg_hostname) do |stdin, output, wait_thr|
+      Open3.popen2e(env, 'bash', @config.wireguard_setup_script, host, server_ip,
+                    wg_hostname) do |stdin, output, wait_thr|
         stdin.sync = true
         stdin.puts
         stdin.close
@@ -2325,8 +2330,12 @@ module HyperstackVM
           return
         end
 
-        output.puts("DRY RUN: local WireGuard peers would be removed for #{peer_summary}.") unless cleanup[:peers].empty?
-        output.puts("DRY RUN: local host entries would be removed for #{host_summary}.") unless cleanup[:hostnames].empty?
+        unless cleanup[:peers].empty?
+          output.puts("DRY RUN: local WireGuard peers would be removed for #{peer_summary}.")
+        end
+        unless cleanup[:hostnames].empty?
+          output.puts("DRY RUN: local host entries would be removed for #{host_summary}.")
+        end
         return
       end
 
@@ -2392,9 +2401,7 @@ module HyperstackVM
         models = @scripts.desired_ollama_models
         info "Ollama models to pre-pull: #{models.join(', ')}" unless models.empty?
       end
-      if vllm_setup_needed?(state)
-        info "vLLM would be installed: #{@config.vllm_model}"
-      end
+      info "vLLM would be installed: #{@config.vllm_model}" if vllm_setup_needed?(state)
       if wireguard_setup_needed?(state)
         info "WireGuard auto-setup script would run: #{@config.wireguard_setup_script} #{state['public_ip'] || '<pending-public-ip>'}"
       end
@@ -2559,7 +2566,11 @@ module HyperstackVM
         info 'Local WireGuard has peers for all managed VM IPs.'
       else
         present = expected_endpoints - missing
-        info "Local WireGuard has peers for: #{present.map { |endpoint| endpoint.split(':', 2).first }.join(', ')}" unless present.empty?
+        unless present.empty?
+          info "Local WireGuard has peers for: #{present.map do |endpoint|
+            endpoint.split(':', 2).first
+          end.join(', ')}"
+        end
         warn "Local WireGuard missing peers for: #{missing.map { |endpoint| endpoint.split(':', 2).first }.join(', ')}."
       end
     end
@@ -2711,11 +2722,9 @@ module HyperstackVM
 
       ssh = build_ssh_command(config, wg_host)
       stdout, stderr, status = Timeout.timeout(15) { Open3.capture3(*ssh, stdin_data: script) }
-      unless status.success?
-        return [nil, nil, "exit #{status.exitstatus}: #{stderr.strip}"]
-      end
+      return [nil, nil, "exit #{status.exitstatus}: #{stderr.strip}"] unless status.success?
 
-      gpu_section, rest          = stdout.split("===COMFYUI===\n", 2)
+      gpu_section, rest = stdout.split("===COMFYUI===\n", 2)
       queue_section, hist_section = rest.to_s.split("===HISTORY===\n", 2)
       gpus    = parse_nvidia_smi(gpu_section.to_s)
       metrics = parse_comfyui_queue(queue_section.to_s.strip, hist_section.to_s.strip)
@@ -2724,7 +2733,11 @@ module HyperstackVM
 
     # Parse ComfyUI /queue JSON into a plain Hash.
     def parse_comfyui_queue(queue_json, history_count_str)
-      q = JSON.parse(queue_json) rescue {}
+      q = begin
+        JSON.parse(queue_json)
+      rescue StandardError
+        {}
+      end
       {
         'queue_running' => Array(q['queue_running']).size,
         'queue_pending' => Array(q['queue_pending']).size,
@@ -2747,9 +2760,7 @@ module HyperstackVM
 
       ssh = build_ssh_command(config, wg_host)
       stdout, stderr, status = Timeout.timeout(15) { Open3.capture3(*ssh, stdin_data: script) }
-      unless status.success?
-        return [nil, nil, "exit #{status.exitstatus}: #{stderr.strip}"]
-      end
+      return [nil, nil, "exit #{status.exitstatus}: #{stderr.strip}"] unless status.success?
 
       gpu_section, vllm_section = stdout.split("===VLLM===\n", 2)
       gpus    = parse_nvidia_smi(gpu_section.to_s)
@@ -2769,13 +2780,13 @@ module HyperstackVM
       return {} if line.empty?
 
       {
-        'avg_prompt_throughput'         => extract_float(line, /Avg prompt throughput:\s*([\d.]+)/),
-        'avg_generation_throughput'     => extract_float(line, /Avg generation throughput:\s*([\d.]+)/),
-        'running'                       => extract_float(line, /Running:\s*(\d+)\s*reqs/),
-        'pending'                       => extract_float(line, /Waiting:\s*(\d+)\s*reqs/),
-        'swapped'                       => extract_float(line, /Swapped:\s*(\d+)\s*reqs/),
-        'gpu_cache_usage_pct'           => extract_float(line, /GPU KV cache usage:\s*([\d.]+)%/),
-        'gpu_prefix_cache_hit_rate_pct' => extract_float(line, /Prefix cache hit rate:\s*([\d.]+)%/),
+        'avg_prompt_throughput' => extract_float(line, /Avg prompt throughput:\s*([\d.]+)/),
+        'avg_generation_throughput' => extract_float(line, /Avg generation throughput:\s*([\d.]+)/),
+        'running' => extract_float(line, /Running:\s*(\d+)\s*reqs/),
+        'pending' => extract_float(line, /Waiting:\s*(\d+)\s*reqs/),
+        'swapped' => extract_float(line, /Swapped:\s*(\d+)\s*reqs/),
+        'gpu_cache_usage_pct' => extract_float(line, /GPU KV cache usage:\s*([\d.]+)%/),
+        'gpu_prefix_cache_hit_rate_pct' => extract_float(line, /Prefix cache hit rate:\s*([\d.]+)%/)
       }.compact
     end
 
@@ -2812,11 +2823,11 @@ module HyperstackVM
         next if parts.length < 7
 
         GpuInfo.new(
-          index:        parts[0].to_i,
-          name:         parts[1],
-          temp_c:       parts[2].to_f,
-          util_pct:     parts[3].to_f,
-          power_w:      parts[4].to_f,
+          index: parts[0].to_i,
+          name: parts[1],
+          temp_c: parts[2].to_f,
+          util_pct: parts[3].to_f,
+          power_w: parts[4].to_f,
           mem_used_mib: parts[5].to_f,
           mem_total_mib: parts[6].to_f
         )
@@ -2860,7 +2871,10 @@ module HyperstackVM
         # Single VM: simple vertical layout.
         rule  = DIM + ('─' * 72) + RESET
         lines = [header, rule]
-        panels.each { |p| lines << ''; lines.concat(p) }
+        panels.each do |p|
+          lines << ''
+          lines.concat(p)
+        end
         lines << ''
       end
 
@@ -2895,7 +2909,7 @@ module HyperstackVM
           lines.concat(render_comfyui_metrics(snap.metrics))
         elsif snap.metrics&.any?
           lines.concat(render_vllm_metrics(snap.metrics))
-        elsif snap.metrics&.empty?
+        elsif snap.metrics && snap.metrics.empty?
           lines << "  #{DIM}(no Engine log line yet — container may still be loading)#{RESET}"
         end
       end
@@ -2968,7 +2982,11 @@ module HyperstackVM
     # Colour: green below 50%, yellow 50–79%, red 80%+.
     def pct_bar(pct, width)
       filled = [(pct / 100.0 * width).round, width].min
-      color  = pct >= 80 ? RED : pct >= 50 ? YELLOW : GREEN
+      color  = if pct >= 80
+                 RED
+               else
+                 pct >= 50 ? YELLOW : GREEN
+               end
       "[#{color}#{'█' * filled}#{RESET}#{' ' * (width - filled)}]"
     end
 
@@ -2996,12 +3014,12 @@ module HyperstackVM
       puts 'Commands:'
       puts '  create [--replace] [--dry-run] [--vllm|--no-vllm] [--ollama|--no-ollama] [--model PRESET]'
       puts '  create-both [--replace] [--dry-run] [--vllm|--no-vllm] [--ollama|--no-ollama]'
-      puts '               Provision hyperstack-vm1.toml and hyperstack-vm2.toml concurrently.'
+      puts '               Provision hyperstack-vm1-gptoss.toml and hyperstack-vm2.toml concurrently.'
       puts '               WireGuard setup is serialized: VM1 writes the base wg1.conf first,'
       puts '               then VM2 adds its peer. Requires both TOML files next to the script.'
       puts '  delete [--vm-id ID] [--dry-run]'
       puts '  delete-both [--dry-run]'
-      puts '               Delete the VMs tracked by hyperstack-vm1.toml and hyperstack-vm2.toml.'
+      puts '               Delete the VMs tracked by hyperstack-vm1-gptoss.toml and hyperstack-vm2.toml.'
       puts '  status'
       puts '  watch'
       puts '               Poll all active VMs for vLLM and GPU stats every 60 s.'
@@ -3093,7 +3111,8 @@ module HyperstackVM
           raise Error, "Unknown model subcommand #{sub.inspect}. Use list or switch."
         end
       else
-        raise Error, "Unknown command #{command.inspect}. Use create, create-both, delete, delete-both, status, watch, test, or model."
+        raise Error,
+              "Unknown command #{command.inspect}. Use create, create-both, delete, delete-both, status, watch, test, or model."
       end
     end
 
@@ -3104,7 +3123,8 @@ module HyperstackVM
     # (create-both), the --model flag is not registered because each VM uses its own
     # TOML default.  Returns a hash suitable for splatting into Manager#create.
     def parse_create_options(argv, include_model_preset: true)
-      opts = { replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, install_comfyui: nil, vllm_preset: nil }
+      opts = { replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, install_comfyui: nil,
+               vllm_preset: nil }
       OptionParser.new do |o|
         o.on('--replace',      'Delete the tracked VM before creating a new one')    { opts[:replace] = true }
         o.on('--dry-run',      'Print the create plan without creating a VM')        { opts[:dry_run] = true }
@@ -3114,7 +3134,11 @@ module HyperstackVM
         o.on('--no-ollama',    'Disable Ollama setup (overrides config)')            { opts[:install_ollama] = false }
         o.on('--comfyui',      'Enable ComfyUI setup (overrides config)')            { opts[:install_comfyui] = true }
         o.on('--no-comfyui',   'Disable ComfyUI setup (overrides config)')           { opts[:install_comfyui] = false }
-        o.on('--model PRESET', 'Use a named vLLM preset at create time') { |v| opts[:vllm_preset] = v } if include_model_preset
+        if include_model_preset
+          o.on('--model PRESET', 'Use a named vLLM preset at create time') do |v|
+            opts[:vllm_preset] = v
+          end
+        end
       end.parse!(argv)
       opts
     end
@@ -3134,20 +3158,20 @@ module HyperstackVM
       client          = HyperstackClient.new(base_url: config.api_base_url, api_key: config.api_key)
       local_wireguard = build_local_wireguard(config)
       Manager.new(
-        config:          config,
-        client:          client,
-        state_store:     state_store,
+        config: config,
+        client: client,
+        state_store: state_store,
         local_wireguard: local_wireguard,
-        out:             out,
-        wg_setup_pre:    wg_setup_pre,
-        wg_setup_post:   wg_setup_post
+        out: out,
+        wg_setup_pre: wg_setup_pre,
+        wg_setup_post: wg_setup_post
       )
     end
 
     def build_local_wireguard(config)
       LocalWireGuard.new(
         interface_name: config.local_interface_name,
-        config_path:    config.local_wg_config_path
+        config_path: config.local_wg_config_path
       )
     end
 
@@ -3156,9 +3180,7 @@ module HyperstackVM
     # that `status` would show (honours --config if given explicitly).
     def run_watch
       loaders = status_config_loaders
-      if loaders.empty?
-        raise Error, 'No active VMs found. Run `create` or `create-both` first.'
-      end
+      raise Error, 'No active VMs found. Run `create` or `create-both` first.' if loaders.empty?
 
       VllmWatcher.new(config_loaders: loaders).run
     end
@@ -3188,7 +3210,7 @@ module HyperstackVM
 
       candidates = [
         @config_path,
-        File.join(__dir__, 'hyperstack-vm1.toml'),
+        File.join(__dir__, 'hyperstack-vm1-gptoss.toml'),
         File.join(__dir__, 'hyperstack-vm2.toml'),
         File.join(__dir__, 'hyperstack-vm-photo.toml')
       ].uniq.select { |path| File.exist?(path) }
@@ -3200,7 +3222,7 @@ module HyperstackVM
 
     def pair_config_loaders
       [
-        ConfigLoader.load(File.join(__dir__, 'hyperstack-vm1.toml')),
+        ConfigLoader.load(File.join(__dir__, 'hyperstack-vm1-gptoss.toml')),
         ConfigLoader.load(File.join(__dir__, 'hyperstack-vm2.toml'))
       ]
     end
@@ -3222,21 +3244,24 @@ module HyperstackVM
 
       # VM1 signals the latch after its WG step (whether WG ran or was already done).
       vm1_wg_post = proc do
-        wg_mutex.synchronize { vm1_wg_state[:done] = true; wg_cv.broadcast }
+        wg_mutex.synchronize do
+          vm1_wg_state[:done] = true
+          wg_cv.broadcast
+        end
       end
 
       # VM2 blocks here until VM1's WG step resolves, then raises if VM1 failed.
       vm2_wg_pre = proc do
         wg_mutex.synchronize { wg_cv.wait(wg_mutex) until vm1_wg_state[:done] || vm1_wg_state[:error] }
-        raise Error, "VM1 WireGuard setup failed; cannot add VM2 peer." if vm1_wg_state[:error]
+        raise Error, 'VM1 WireGuard setup failed; cannot add VM2 peer.' if vm1_wg_state[:error]
       end
 
       manager1 = build_manager(vm1_config,
-                               out:            PrefixedOutput.new('[vm1] ', $stdout, out_mutex),
-                               wg_setup_post:  vm1_wg_post)
+                               out: PrefixedOutput.new('[vm1] ', $stdout, out_mutex),
+                               wg_setup_post: vm1_wg_post)
       manager2 = build_manager(vm2_config,
-                               out:           PrefixedOutput.new('[vm2] ', $stdout, out_mutex),
-                               wg_setup_pre:  vm2_wg_pre)
+                               out: PrefixedOutput.new('[vm2] ', $stdout, out_mutex),
+                               wg_setup_pre: vm2_wg_pre)
 
       errors = {}
       create_opts = { replace: replace, dry_run: dry_run,
@@ -3247,7 +3272,10 @@ module HyperstackVM
       rescue Error => e
         errors[:vm1] = e.message
         # Unblock VM2 even if VM1 failed so the process doesn't hang.
-        wg_mutex.synchronize { vm1_wg_state[:error] = e.message; wg_cv.broadcast }
+        wg_mutex.synchronize do
+          vm1_wg_state[:error] = e.message
+          wg_cv.broadcast
+        end
       end
 
       vm2_thread = Thread.new do
@@ -3258,7 +3286,7 @@ module HyperstackVM
 
       [vm1_thread, vm2_thread].each(&:join)
 
-      errors.each { |vm, msg| $stderr.puts("ERROR [#{vm}]: #{msg}") }
+      errors.each { |vm, msg| warn("ERROR [#{vm}]: #{msg}") }
       exit 1 unless errors.empty?
     end
 
@@ -3286,14 +3314,14 @@ module HyperstackVM
         begin
           local_manager = build_manager(loaders.first.config, out: local_wg_out)
           cleanup = local_manager.send(:cleanup_local_access, dry_run: dry_run, hostnames: hostnames,
-                                       allowed_ips: allowed_ips)
+                                                              allowed_ips: allowed_ips)
           local_manager.send(:report_local_cleanup, local_wg_out, cleanup, dry_run: dry_run)
         rescue Error => e
           errors[:local_wireguard] = e.message
         end
       end
 
-      errors.each { |vm, msg| $stderr.puts("ERROR [#{vm}]: #{msg}") }
+      errors.each { |vm, msg| warn("ERROR [#{vm}]: #{msg}") }
       exit 1 unless errors.empty?
     end
   end
diff --git a/hypr.fish b/hypr.fish
index 09706b5..324c45b 100644
--- a/hypr.fish
+++ b/hypr.fish
@@ -1,11 +1,11 @@
 # Single-VM setup (hyperstack-vm.toml → hyperstack.wg1)
-abbr pi-hyperstack         pi --model hyperstack/openai/gpt-oss-120b
-abbr hyperstack-create     ruby ~/git/hyperstack/hyperstack.rb create
-abbr hyperstack-delete     ruby ~/git/hyperstack/hyperstack.rb delete
-abbr hyperstack-test       ruby ~/git/hyperstack/hyperstack.rb test
+abbr pi-hyperstack-gpt-oss-120b pi --model hyperstack/openai/gpt-oss-120b
+abbr hyperstack-create ruby ~/git/hyperstack/hyperstack.rb create
+abbr hyperstack-delete ruby ~/git/hyperstack/hyperstack.rb delete
+abbr hyperstack-test ruby ~/git/hyperstack/hyperstack.rb test
 
 # Dual-VM setup (hyperstack-vm1/vm2.toml → hyperstack1/2.wg1)
-abbr pi-hyperstack-nemotron  pi --model hyperstack1/cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit
-abbr pi-hyperstack-coder     pi --model hyperstack2/bullpoint/Qwen3-Coder-Next-AWQ-4bit
-abbr hyperstack-create-both  ruby ~/git/hyperstack/hyperstack.rb create-both
-abbr hyperstack-delete-both  ruby ~/git/hyperstack/hyperstack.rb delete-both
+abbr pi-hyperstack-nemotron pi --model hyperstack1/openai/gpt-oss-120b
+abbr pi-hyperstack-coder pi --model hyperstack2/bullpoint/Qwen3-Coder-Next-AWQ-4bit
+abbr hyperstack-create-both ruby ~/git/hyperstack/hyperstack.rb create-both
+abbr hyperstack-delete-both ruby ~/git/hyperstack/hyperstack.rb delete-both
diff --git a/pi/agent/extensions/agent-plan-mode/utils.ts b/pi/agent/extensions/agent-plan-mode/utils.ts
index 3f945e3..c7f66de 100644
--- a/pi/agent/extensions/agent-plan-mode/utils.ts
+++ b/pi/agent/extensions/agent-plan-mode/utils.ts
@@ -151,9 +151,13 @@ export function isSafeAskCommand(command: string): boolean {
 	return !MUTATING_TASK_PATTERNS.some((pattern) => pattern.test(trimmed));
 }
 
+function isAskCommand(command: string): boolean {
+	return command.trim().startsWith("ask ") || command.trim() === "ask";
+}
+
 export function isSafePlanCommand(command: string): boolean {
 	if (containsRawTaskCommand(command)) return false;
-	if (isSafeAskCommand(command)) return true;
+	if (isAskCommand(command)) return true;
 
 	const isDestructive = DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command));
 	const isSafe = SAFE_PATTERNS.some((pattern) => pattern.test(command));
diff --git a/pi/agent/extensions/ask-mode/utils.ts b/pi/agent/extensions/ask-mode/utils.ts
index db8c889..835549e 100644
--- a/pi/agent/extensions/ask-mode/utils.ts
+++ b/pi/agent/extensions/ask-mode/utils.ts
@@ -87,7 +87,32 @@ const SAFE_PATTERNS = [
 	/^\s*exa\b/,
 ];
 
+const MUTATING_ASK_PATTERNS = [
+	/\badd\b/i,
+	/\bannotate\b/i,
+	/\bappend\b/i,
+	/\bdelete\b/i,
+	/\bdenotate\b/i,
+	/\bdone\b/i,
+	/\blog\b/i,
+	/\bmodify\b/i,
+	/\bprepend\b/i,
+	/\bstart\b/i,
+	/\bstop\b/i,
+	/\bundo\b/i,
+	/\bpriority\b/i,
+	/\btag\b/i,
+];
+
+function isReadOnlyAskCommand(command: string): boolean {
+	const trimmed = command.trim();
+	if (!trimmed.startsWith("ask ") && trimmed !== "ask") return false;
+	if (/[;&]/.test(trimmed) || /(^|[^|])\|([^|]|$)/.test(trimmed)) return false;
+	return !MUTATING_ASK_PATTERNS.some((pattern) => pattern.test(trimmed));
+}
+
 export function isSafeAskModeCommand(command: string): boolean {
+	if (isReadOnlyAskCommand(command)) return true;
 	const isDestructive = DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command));
 	const isSafe = SAFE_PATTERNS.some((pattern) => pattern.test(command));
 	return !isDestructive && isSafe;
diff --git a/pi/agent/models.json b/pi/agent/models.json
index 76e37ab..3dfd72d 100644
--- a/pi/agent/models.json
+++ b/pi/agent/models.json
@@ -1,7 +1,7 @@
 {
   "providers": {
     "hyperstack": {
-      "baseUrl": "http://hyperstack.wg1:11434/v1",
+      "baseUrl": "http://hyperstack1.wg1:11434/v1",
       "apiKey": "EMPTY",
       "api": "openai-completions",
       "compat": {
diff --git a/pi/agent/settings.json b/pi/agent/settings.json
index fbb3874..d8ea9a3 100644
--- a/pi/agent/settings.json
+++ b/pi/agent/settings.json
@@ -1,5 +1,6 @@
 {
   "lastChangelogVersion": "0.62.0",
   "defaultProvider": "openai",
-  "defaultModel": "gpt-4.1"
+  "defaultModel": "gpt-4.1",
+  "defaultThinkingLevel": "high"
 }
 \ No newline at end of file
author	Paul Buetow <paul@buetow.org>	2026-03-24 23:35:00 +0200
committer	Paul Buetow <paul@buetow.org>	2026-03-24 23:35:00 +0200
commit	9f45954847e7aa904ebda55e3c23277d7c7a3079 (patch)
tree	2a30a1782c21b0616a70c8536aa07a4019df7213
parent	862b132ddee2cc343a3edc98797554937ea5f595 (diff)