summaryrefslogtreecommitdiff
path: root/lib/hyperstack
diff options
context:
space:
mode:
Diffstat (limited to 'lib/hyperstack')
-rw-r--r--lib/hyperstack/cli.rb16
-rw-r--r--lib/hyperstack/manager.rb15
-rw-r--r--lib/hyperstack/provisioning.rb26
3 files changed, 37 insertions, 20 deletions
diff --git a/lib/hyperstack/cli.rb b/lib/hyperstack/cli.rb
index f575b59..8568474 100644
--- a/lib/hyperstack/cli.rb
+++ b/lib/hyperstack/cli.rb
@@ -4,9 +4,13 @@ require 'optparse'
module HyperstackVM
class CLI
+ # Repo root is two levels above this file (lib/hyperstack/ → lib/ → repo root).
+ # All TOML config files live at the repo root, not alongside this library file.
+ REPO_ROOT = File.expand_path(File.join(__dir__, '..', '..'))
+
def initialize(argv)
@argv = argv.dup
- @config_path = File.join(__dir__, 'hyperstack-vm.toml')
+ @config_path = File.join(REPO_ROOT, 'hyperstack-vm.toml')
@config_explicit = false
end
@@ -212,9 +216,9 @@ module HyperstackVM
candidates = [
@config_path,
- File.join(__dir__, 'hyperstack-vm1-gptoss.toml'),
- File.join(__dir__, 'hyperstack-vm2.toml'),
- File.join(__dir__, 'hyperstack-vm-photo.toml')
+ File.join(REPO_ROOT, 'hyperstack-vm1-gptoss.toml'),
+ File.join(REPO_ROOT, 'hyperstack-vm2.toml'),
+ File.join(REPO_ROOT, 'hyperstack-vm-photo.toml')
].uniq.select { |path| File.exist?(path) }
loaders = candidates.map { |path| ConfigLoader.load(path) }
@@ -224,8 +228,8 @@ module HyperstackVM
def pair_config_loaders
[
- ConfigLoader.load(File.join(__dir__, 'hyperstack-vm1-gptoss.toml')),
- ConfigLoader.load(File.join(__dir__, 'hyperstack-vm2.toml'))
+ ConfigLoader.load(File.join(REPO_ROOT, 'hyperstack-vm1-gptoss.toml')),
+ ConfigLoader.load(File.join(REPO_ROOT, 'hyperstack-vm2.toml'))
]
end
diff --git a/lib/hyperstack/manager.rb b/lib/hyperstack/manager.rb
index 0d17b2f..2134c92 100644
--- a/lib/hyperstack/manager.rb
+++ b/lib/hyperstack/manager.rb
@@ -343,15 +343,8 @@ module HyperstackVM
info "VM ready: #{state['public_ip']} (id=#{state['vm_id']})"
print_local_wireguard_summary(state['public_ip'])
- wg_ip = @config.wireguard_gateway_hostname
- if effective_vllm?
- info "Run 'ruby hyperstack.rb test' to verify vLLM."
- info " vLLM: http://#{wg_ip}:#{@config.ollama_port}/v1/models"
- end
- return unless effective_comfyui?
-
- info "Run 'ruby hyperstack.rb test' to verify ComfyUI."
- info " ComfyUI: http://#{wg_ip}:#{@config.comfyui_port}/system_stats"
+ # Run end-to-end tests automatically so the human doesn't need a manual step.
+ test
info " Enhance: ruby photo-enhance.rb --config #{File.basename(@config.path)} --indir ~/Pictures --outdir ~/Pictures/enhanced"
end
@@ -693,12 +686,16 @@ module HyperstackVM
def with_polling(description, timeout: 900, interval: 5)
deadline = Time.now + timeout
+ attempt = 0
loop do
result = yield
return result if result
raise Error, "Timed out waiting for #{description}." if Time.now >= deadline
+ attempt += 1
+ # Print a heartbeat every 30 seconds so the user can see the script hasn't stalled.
+ info(" still waiting for #{description}... (#{attempt * interval}s)") if (attempt % 6).zero?
sleep interval
end
end
diff --git a/lib/hyperstack/provisioning.rb b/lib/hyperstack/provisioning.rb
index eb3518e..fd1e212 100644
--- a/lib/hyperstack/provisioning.rb
+++ b/lib/hyperstack/provisioning.rb
@@ -204,11 +204,26 @@ module HyperstackVM
script << "docker rm #{Shellwords.escape(container)} 2>/dev/null || true"
script << 'docker pull vllm/vllm-openai:latest' if pull_image
script << docker_run
- script << 'echo "Waiting for vLLM to become ready (up to 10 min for first model download)..."'
+ # Stage patterns cover the full vLLM startup sequence:
+ # HuggingFace download → safetensors shard loading → torch.compile → CUDA graphs → API up.
+ # The sed strip removes the "(EngineCore pid=N) INFO date time [file.py:line] " log prefix
+ # so only the human-readable message is shown.
+ stage_pat = 'Starting to load model|Fetching|Downloading shards|checkpoint shards:.*% Completed' \
+ '|Loading weights took|Model loading took|torch\\.compile took' \
+ '|Graph capturing|Application startup complete'
+ strip_pfx = 's/^\\([A-Za-z]+ [^)]+\\) INFO [^ ]+ [^ ]+ \\[[^]]+\\] //'
+ script << 'echo "Waiting for vLLM to become ready (live progress from container logs)..."'
+ script << "stage_pat='#{stage_pat}'"
+ script << "strip_pfx='#{strip_pfx}'"
script << 'for i in $(seq 1 240); do'
script << " if curl -sf http://localhost:#{port}/v1/models >/dev/null 2>&1; then echo vllm-ready; break; fi"
script << " state=$(docker inspect --format='{{.State.Status}}' #{Shellwords.escape(container)} 2>/dev/null || echo unknown)"
- script << ' echo " vLLM not ready yet ($i/240, container=$state)..."'
+ script << " progress=$(docker logs --tail 100 #{Shellwords.escape(container)} 2>&1 | grep -E \"$stage_pat\" | tail -1 | sed -E \"$strip_pfx\" | cut -c1-100)"
+ script << ' if [ -n "$progress" ]; then'
+ script << ' echo " vLLM ($i/240, $state): $progress"'
+ script << ' else'
+ script << ' echo " vLLM not ready yet ($i/240, container=$state)..."'
+ script << ' fi'
script << ' sleep 5'
script << 'done'
script << "curl -sf http://localhost:#{port}/v1/models >/dev/null || { echo 'FATAL: vLLM did not become ready within 20 minutes'; exit 1; }"
@@ -375,13 +390,14 @@ module HyperstackVM
info 'Bootstrapping Ubuntu guest over SSH...'
retries = 3
retries.times do |attempt|
- stdout, stderr, status = @ssh_command_runner.call(host, @scripts.guest_bootstrap_script)
+ # Stream output so apt-lock waits and individual bootstrap steps are visible in real time.
+ output, status = @ssh_stream_runner.call(host, @scripts.guest_bootstrap_script)
return if status.success?
- msg = stderr.strip.empty? ? stdout : stderr
+ msg = output.lines.last&.strip || output.strip
raise Error, "Guest bootstrap failed after #{retries} attempts: #{msg}" if attempt == retries - 1
- warn "Bootstrap attempt #{attempt + 1}/#{retries} failed (#{msg.lines.last&.strip}), retrying in 15s..."
+ warn "Bootstrap attempt #{attempt + 1}/#{retries} failed (#{msg}), retrying in 15s..."
sleep 15
end
end