summaryrefslogtreecommitdiff
path: root/lib/hyperstack/provisioning.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/hyperstack/provisioning.rb')
-rw-r--r--lib/hyperstack/provisioning.rb26
1 files changed, 21 insertions, 5 deletions
diff --git a/lib/hyperstack/provisioning.rb b/lib/hyperstack/provisioning.rb
index eb3518e..fd1e212 100644
--- a/lib/hyperstack/provisioning.rb
+++ b/lib/hyperstack/provisioning.rb
@@ -204,11 +204,26 @@ module HyperstackVM
script << "docker rm #{Shellwords.escape(container)} 2>/dev/null || true"
script << 'docker pull vllm/vllm-openai:latest' if pull_image
script << docker_run
- script << 'echo "Waiting for vLLM to become ready (up to 10 min for first model download)..."'
+ # Stage patterns cover the full vLLM startup sequence:
+ # HuggingFace download → safetensors shard loading → torch.compile → CUDA graphs → API up.
+ # The sed strip removes the "(EngineCore pid=N) INFO date time [file.py:line] " log prefix
+ # so only the human-readable message is shown.
+ stage_pat = 'Starting to load model|Fetching|Downloading shards|checkpoint shards:.*% Completed' \
+ '|Loading weights took|Model loading took|torch\\.compile took' \
+ '|Graph capturing|Application startup complete'
+ strip_pfx = 's/^\\([A-Za-z]+ [^)]+\\) INFO [^ ]+ [^ ]+ \\[[^]]+\\] //'
+ script << 'echo "Waiting for vLLM to become ready (live progress from container logs)..."'
+ script << "stage_pat='#{stage_pat}'"
+ script << "strip_pfx='#{strip_pfx}'"
script << 'for i in $(seq 1 240); do'
script << " if curl -sf http://localhost:#{port}/v1/models >/dev/null 2>&1; then echo vllm-ready; break; fi"
script << " state=$(docker inspect --format='{{.State.Status}}' #{Shellwords.escape(container)} 2>/dev/null || echo unknown)"
- script << ' echo " vLLM not ready yet ($i/240, container=$state)..."'
+ script << " progress=$(docker logs --tail 100 #{Shellwords.escape(container)} 2>&1 | grep -E \"$stage_pat\" | tail -1 | sed -E \"$strip_pfx\" | cut -c1-100)"
+ script << ' if [ -n "$progress" ]; then'
+ script << ' echo " vLLM ($i/240, $state): $progress"'
+ script << ' else'
+ script << ' echo " vLLM not ready yet ($i/240, container=$state)..."'
+ script << ' fi'
script << ' sleep 5'
script << 'done'
script << "curl -sf http://localhost:#{port}/v1/models >/dev/null || { echo 'FATAL: vLLM did not become ready within 20 minutes'; exit 1; }"
@@ -375,13 +390,14 @@ module HyperstackVM
info 'Bootstrapping Ubuntu guest over SSH...'
retries = 3
retries.times do |attempt|
- stdout, stderr, status = @ssh_command_runner.call(host, @scripts.guest_bootstrap_script)
+ # Stream output so apt-lock waits and individual bootstrap steps are visible in real time.
+ output, status = @ssh_stream_runner.call(host, @scripts.guest_bootstrap_script)
return if status.success?
- msg = stderr.strip.empty? ? stdout : stderr
+ msg = output.lines.last&.strip || output.strip
raise Error, "Guest bootstrap failed after #{retries} attempts: #{msg}" if attempt == retries - 1
- warn "Bootstrap attempt #{attempt + 1}/#{retries} failed (#{msg.lines.last&.strip}), retrying in 15s..."
+ warn "Bootstrap attempt #{attempt + 1}/#{retries} failed (#{msg}), retrying in 15s..."
sleep 15
end
end