diff options
| author | Paul Buetow <paul@buetow.org> | 2026-03-24 09:58:21 +0200 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-03-24 09:58:21 +0200 |
| commit | c2d54f7a4823ca0de99fdb8cc0a094b0cdf4cbb4 (patch) | |
| tree | d790cd7ebf1f2c97cf686dfdf23622b3c7e83823 | |
| parent | 9b3c83d4860566c8d5d682821314378d0c18c35d (diff) | |
Add ComfyUI photo enhancement VM and photo-enhance.rb client
- hyperstack-vm-photo.toml: L40 GPU VM config (192.168.3.4, ~$1/hr)
with [comfyui] section for port, model dirs, and pre-downloaded weights
- hyperstack.rb: full ComfyUI provisioning support alongside vLLM/Ollama —
config accessors, comfyui_install_script (git clone + venv + systemd),
RemoteProvisioner#install_comfyui, Manager#create integration, UFW rules,
status/service_mode_summary updates, --comfyui/--no-comfyui CLI flags
- photo-enhance.rb: standalone client — uploads photos, submits ComfyUI
workflow, polls for output, downloads PNG, converts to JPEG at quality 92
so file sizes match originals; --watch mode; processed-file manifest
- workflows/photo-enhance.json: Real-ESRGAN x4plus enhance-in-place workflow
(upscale 4x for enhancement, ImageScaleBy 0.25 back to original resolution)
- README.md: Photo enhancement section with quickstart, config reference,
workflow customisation notes, and performance table
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
| -rw-r--r-- | README.md | 55 | ||||
| -rw-r--r-- | hyperstack-vm-photo.toml | 68 | ||||
| -rwxr-xr-x | hyperstack.rb | 255 | ||||
| -rwxr-xr-x | photo-enhance.rb | 398 | ||||
| -rw-r--r-- | workflows/photo-enhance.json | 42 |
5 files changed, 790 insertions, 28 deletions
@@ -639,3 +639,58 @@ Measured on A100 80 GB PCIe (single GPU) with Qwen3-Coder-Next AWQ 4-bit: | Per-turn latency | ~10–15 s | ~28 s (32k ctx) | | Context window | 262k (full, no truncation) | 32k (was truncating) | | VRAM usage | 75 GiB (more KV cache) | 52–61 GiB | + +## Photo enhancement (ComfyUI) + +A separate VM setup (`hyperstack-vm-photo.toml`) runs [ComfyUI](https://github.com/comfyanonymous/ComfyUI) +on an L40 GPU for Photolemur-style automatic photo enhancement. No prompts needed — drop photos in, +get enhanced photos out. + +### How it works + +The pipeline runs Real-ESRGAN x4plus in "enhance in place" mode: +upscale 4× (noise reduction, sharpening, colour correction) → scale back to the original resolution. +Output is saved as JPEG at quality 92, so file sizes stay close to the originals. + +### Quickstart + +```sh +# Provision the L40 VM (~$1/hr, ~8 min first-time setup including model download) +ruby hyperstack.rb --config hyperstack-vm-photo.toml create + +# Check connectivity +ruby photo-enhance.rb --test + +# Enhance all photos in a directory (outputs <name>_enhanced.jpg alongside originals) +ruby photo-enhance.rb --indir ~/Pictures/my-album + +# Watch mode: process new arrivals automatically +ruby photo-enhance.rb --indir ~/Pictures/my-album --watch + +# Destroy VM when done +ruby hyperstack.rb --config hyperstack-vm-photo.toml delete +``` + +### Configuration (`hyperstack-vm-photo.toml`) + +| Key | Default | Description | +|-----|---------|-------------| +| `[vm].flavor_name` | `n3-L40x1` | Hyperstack GPU flavor (L40 48 GB, ~$1/hr) | +| `[network].wireguard_server_ip` | `192.168.3.4` | WireGuard IP (after VM1=.1, VM2=.3) | +| `[comfyui].port` | `8188` | ComfyUI REST API port (WireGuard subnet only) | +| `[comfyui].models_dir` | `/ephemeral/comfyui/models` | Model weights (ephemeral NVMe) | +| `[comfyui].models` | `["RealESRGAN_x4plus"]` | Pre-downloaded models | + +### Custom workflows + +The workflow JSON lives at `workflows/photo-enhance.json`. The `NODE_INPUT_IMAGE` placeholder +is substituted at runtime by `photo-enhance.rb` with the uploaded filename. +Swap in any ComfyUI-compatible workflow (e.g. add SUPIR for deeper restoration) by editing the JSON +or passing `--workflow path/to/other.json`. + +### Performance (L40 48 GB) + +| Operation | Time per photo | +|-----------|---------------| +| Real-ESRGAN enhance + scale back | ~50–60 s | +| Upload + download overhead | ~3 s | diff --git a/hyperstack-vm-photo.toml b/hyperstack-vm-photo.toml new file mode 100644 index 0000000..d6130b0 --- /dev/null +++ b/hyperstack-vm-photo.toml @@ -0,0 +1,68 @@ +[auth] +api_key_file = "~/.hyperstack" + +[hyperstack] +base_url = "https://infrahub-api.nexgencloud.com/v1" + +[state] +file = ".hyperstack-vm-photo-state.json" + +[vm] +name_prefix = "hyperstack-photo" +hostname = "hyperstack-photo" +environment_name = "snonux-ollama" + +# L40 (48GB GDDR6, ~$1.00/hr) is the recommended GPU for ComfyUI photo enhancement. +# It provides ample VRAM for SUPIR (FP16 needs ~12-20GB) + Real-ESRGAN with large tile sizes. +# Cheaper than A100 ($2/hr) while offering faster CUDA cores and FP8 support. +flavor_name = "n3-L40x1" +image_name = "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker" +assign_floating_ip = true +create_bootable_volume = false +enable_port_randomization = false +labels = ["comfyui", "photo-enhance", "wireguard"] + +[ssh] +username = "ubuntu" +private_key_path = "~/.ssh/id_rsa" +hyperstack_key_name = "earth" +port = 22 +connect_timeout_sec = 10 + +[network] +wireguard_udp_port = 56710 +wireguard_subnet = "192.168.3.0/24" +# Photo VM uses .4; VM1=.1, VM2=.3 are the LLM VMs on the same wg1 tunnel. +wireguard_server_ip = "192.168.3.4" +# Reuse the same inference port constant; ComfyUI uses its own port 8188 below. +ollama_port = 11434 +allowed_ssh_cidrs = ["auto"] +allowed_wireguard_cidrs = ["auto"] + +[bootstrap] +enable_guest_bootstrap = true +install_wireguard = true +configure_ufw = true +configure_ollama_host = false + +[ollama] +# Not needed on this VM; photo enhancement uses ComfyUI exclusively. +install = false + +[vllm] +# Not needed on this VM; ComfyUI handles all inference. +install = false + +[comfyui] +install = true +# ComfyUI REST API port — opened on the WireGuard subnet only. +port = 8188 +# Model weights on ephemeral NVMe for fast access; survives reboots on Hyperstack. +models_dir = "/ephemeral/comfyui/models" +output_dir = "/ephemeral/comfyui/output" +container_name = "comfyui_photo" +# Pre-downloaded model weights: +# RealESRGAN_x4plus — fast 4x upscaling + sharpening (~65MB, upscale_models/) +# SUPIR-v0Q — SDXL-based photo restoration, Photolemur-quality results (~8GB, checkpoints/) +# SUPIR-v0F — SUPIR variant tuned for faithful fidelity over generative enhancement (~8GB) +models = ["RealESRGAN_x4plus", "SUPIR-v0Q"] diff --git a/hyperstack.rb b/hyperstack.rb index d48260e..af67be3 100755 --- a/hyperstack.rb +++ b/hyperstack.rb @@ -115,6 +115,15 @@ module HyperstackVM 'tensor_parallel_size' => 1, 'tool_call_parser' => 'qwen3_coder' }, + 'comfyui' => { + 'install' => false, + 'port' => 8188, + 'models_dir' => '/ephemeral/comfyui/models', + 'output_dir' => '/ephemeral/comfyui/output', + 'container_name' => 'comfyui', + # Models to pre-download: Real-ESRGAN for fast upscaling, SUPIR for deep restoration. + 'models' => [] + }, 'wireguard' => { 'auto_setup' => true, 'setup_script' => './wg1-setup.sh' @@ -127,7 +136,7 @@ module HyperstackVM }.freeze def validate! - %w[auth hyperstack state vm ssh network bootstrap ollama vllm wireguard local_client].each do |section| + %w[auth hyperstack state vm ssh network bootstrap ollama vllm comfyui wireguard local_client].each do |section| raise Error, "Missing config section [#{section}]" unless @data.key?(section) end @@ -494,6 +503,31 @@ module HyperstackVM } end + def comfyui_install_enabled? + truthy?(fetch('comfyui', 'install')) + end + + def comfyui_port + Integer(fetch('comfyui', 'port')) + end + + def comfyui_models_dir + fetch('comfyui', 'models_dir') + end + + def comfyui_output_dir + fetch('comfyui', 'output_dir') + end + + def comfyui_container_name + fetch('comfyui', 'container_name') + end + + # Models to pre-download during provisioning (e.g. RealESRGAN_x4plus, SUPIR-v0Q). + def comfyui_models + Array(fetch('comfyui', 'models')).map(&:to_s) + end + def local_client_checks_enabled? truthy?(fetch('local_client', 'check_wg1_service')) end @@ -514,7 +548,8 @@ module HyperstackVM expand_path(fetch('wireguard', 'setup_script')) end - def desired_security_rules(include_ollama: ollama_install_enabled?, include_vllm: vllm_install_enabled?) + def desired_security_rules(include_ollama: ollama_install_enabled?, include_vllm: vllm_install_enabled?, + include_comfyui: comfyui_install_enabled?) rules = [] allowed_ssh_cidrs.each do |cidr| @@ -526,6 +561,8 @@ module HyperstackVM end rules << firewall_rule('tcp', ollama_port, wireguard_subnet) if include_ollama || include_vllm + # ComfyUI REST API on its own port, restricted to the WireGuard subnet. + rules << firewall_rule('tcp', comfyui_port, wireguard_subnet) if include_comfyui rules.uniq end @@ -1080,6 +1117,10 @@ module HyperstackVM script << "sudo ufw allow #{@config.wireguard_udp_port}/udp comment 'WireGuard #{@config.local_interface_name}' >/dev/null 2>&1 || true" # Port 11434 is shared by Ollama and vLLM; open for both regardless of which is installed. script << "sudo ufw allow from #{Shellwords.escape(@config.wireguard_subnet)} to any port #{@config.ollama_port} proto tcp comment 'Inference API (Ollama/vLLM) via #{@config.local_interface_name}' >/dev/null 2>&1 || true" + # ComfyUI REST API on port 8188; only open when ComfyUI is enabled. + if @config.comfyui_install_enabled? + script << "sudo ufw allow from #{Shellwords.escape(@config.wireguard_subnet)} to any port #{@config.comfyui_port} proto tcp comment 'ComfyUI API via #{@config.local_interface_name}' >/dev/null 2>&1 || true" + end end if @config.configure_ollama_host? @@ -1258,6 +1299,106 @@ module HyperstackVM script.join("\n") end + def comfyui_install_script + models_dir = @config.comfyui_models_dir + output_dir = @config.comfyui_output_dir + port = @config.comfyui_port + model_names = @config.comfyui_models + # Use ubuntu home dir to avoid /opt permission issues when running as the SSH user. + install_dir = '/home/ubuntu/ComfyUI' + venv_dir = '/home/ubuntu/comfyui-venv' + service = 'comfyui' + + script = [] + script << 'set -euo pipefail' + + # Wait for apt locks released by unattended-upgrades before touching packages. + script << 'for i in $(seq 1 30); do' + script << ' if ! fuser /var/lib/dpkg/lock-frontend /var/lib/apt/lists/lock /var/cache/apt/archives/lock >/dev/null 2>&1; then break; fi' + script << ' echo " apt lock held, waiting ($i/30)..."; sleep 10' + script << 'done' + script << 'sudo pkill -f unattended-upgrade >/dev/null 2>&1 || true' + + # Install system deps: git, python venv, wget. + script << 'sudo apt-get update -qq' + script << 'sudo apt-get install -y -qq git python3-venv python3-pip wget' + + # Ephemeral NVMe dirs for models and output. + script << "sudo mkdir -p #{Shellwords.escape(models_dir)} #{Shellwords.escape(output_dir)}" + script << "sudo chmod -R 0777 #{Shellwords.escape(File.dirname(models_dir))}" + + # Clone or update ComfyUI from the official repo (no sudo needed in ubuntu home). + script << "if [ ! -d #{Shellwords.escape(install_dir)} ]; then" + script << " git clone --depth 1 https://github.com/comfyanonymous/ComfyUI #{Shellwords.escape(install_dir)}" + script << 'else' + script << " git -C #{Shellwords.escape(install_dir)} pull --ff-only" + script << 'fi' + + # Create Python venv and install PyTorch + ComfyUI deps. + # CUDA 12.8 is installed on the VM; cu128 wheel index covers it. + script << "[ -d #{Shellwords.escape(venv_dir)} ] || python3 -m venv #{Shellwords.escape(venv_dir)}" + script << "#{venv_dir}/bin/pip install --quiet --upgrade pip" + script << "#{venv_dir}/bin/pip install --quiet torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128" + script << "#{venv_dir}/bin/pip install --quiet -r #{Shellwords.escape("#{install_dir}/requirements.txt")}" + + # Symlink ephemeral model/output dirs into the ComfyUI directory tree. + script << "rm -rf #{Shellwords.escape("#{install_dir}/models")} && ln -sfn #{Shellwords.escape(models_dir)} #{Shellwords.escape("#{install_dir}/models")}" + script << "rm -rf #{Shellwords.escape("#{install_dir}/output")} && ln -sfn #{Shellwords.escape(output_dir)} #{Shellwords.escape("#{install_dir}/output")}" + + # Systemd service so ComfyUI starts on reboot. + script << "cat <<'UNIT' | sudo tee /etc/systemd/system/#{Shellwords.escape(service)}.service >/dev/null" + script << '[Unit]' + script << 'Description=ComfyUI photo enhancement server' + script << 'After=network.target' + script << '[Service]' + script << "ExecStart=#{venv_dir}/bin/python #{install_dir}/main.py --listen 0.0.0.0 --port #{port} --output-directory #{output_dir}" + script << 'Restart=on-failure' + script << 'RestartSec=5' + script << "WorkingDirectory=#{install_dir}" + script << 'Environment=HOME=/root' + script << '[Install]' + script << 'WantedBy=multi-user.target' + script << 'UNIT' + script << 'sudo systemctl daemon-reload' + script << "sudo systemctl enable --now #{Shellwords.escape(service)}" + script << "sudo systemctl restart #{Shellwords.escape(service)}" + + # Wait for ComfyUI API to respond (model loading and CUDA init can take ~60s). + script << 'echo "Waiting for ComfyUI to become ready (up to 5 min)..."' + script << 'for i in $(seq 1 60); do' + script << " if curl -sf http://localhost:#{port}/system_stats >/dev/null 2>&1; then echo comfyui-ready; break; fi" + script << " echo \" ComfyUI not ready yet ($i/60)...\"; sleep 5" + script << 'done' + script << "curl -sf http://localhost:#{port}/system_stats >/dev/null || { echo 'FATAL: ComfyUI did not become ready within 5 minutes'; exit 1; }" + + # Download model weights into the ComfyUI subdirectories. + # Real-ESRGAN → upscale_models/; SUPIR → checkpoints/. + model_names.each do |model_name| + case model_name + when /RealESRGAN/i + dest_dir = "#{models_dir}/upscale_models" + url = if model_name =~ /anime/i + 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth' + else + 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth' + end + dest_file = "#{dest_dir}/#{model_name}.pth" + script << "mkdir -p #{Shellwords.escape(dest_dir)}" + script << "[ -f #{Shellwords.escape(dest_file)} ] || wget -q --show-progress -O #{Shellwords.escape(dest_file)} #{Shellwords.escape(url)}" + when /SUPIR/i + dest_dir = "#{models_dir}/checkpoints" + # SUPIR weights on HuggingFace; v0Q is the quantised variant (~8 GB). + hf_file = model_name.end_with?('F') ? 'SUPIR-v0F.ckpt' : 'SUPIR-v0Q.ckpt' + url = "https://huggingface.co/camenduru/SUPIR/resolve/main/#{hf_file}" + script << "mkdir -p #{Shellwords.escape(dest_dir)}" + script << "[ -f #{Shellwords.escape("#{dest_dir}/#{hf_file}")} ] || wget -q --show-progress -O #{Shellwords.escape("#{dest_dir}/#{hf_file}")} #{Shellwords.escape(url)}" + end + end + + script << 'echo comfyui-install-ok' + script.join("\n") + end + def litellm_decommission_script script = [] script << 'set -euo pipefail' @@ -1347,6 +1488,12 @@ module HyperstackVM install_vllm(host, preset_config: preset_config) end + def install_comfyui(host) + info "Setting up ComfyUI Docker container on #{host}..." + output, status = @ssh_stream_runner.call(host, @scripts.comfyui_install_script) + raise Error, "ComfyUI install failed: #{output.strip}" unless status.success? + end + private def verify_remote_models(host) @@ -1389,10 +1536,11 @@ module HyperstackVM @wg_setup_post = wg_setup_post end - def create(replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, vllm_preset: nil) + def create(replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, install_comfyui: nil, vllm_preset: nil) # CLI flags override config; nil means "use config default". @effective_vllm = install_vllm.nil? ? @config.vllm_install_enabled? : install_vllm @effective_ollama = install_ollama.nil? ? @config.ollama_install_enabled? : install_ollama + @effective_comfyui = install_comfyui.nil? ? @config.comfyui_install_enabled? : install_comfyui # Validate preset name early so we fail before touching any remote state. @effective_vllm_preset = vllm_preset @config.vllm_preset(vllm_preset) if vllm_preset @@ -1492,14 +1640,19 @@ module HyperstackVM desired = desired_security_rules_for_state(state).map { |rule| normalize_rule(rule) } current = Array(vm['security_rules']).map { |rule| normalize_rule(rule) } missing_rules = desired - current - vllm_enabled = state_vllm_enabled?(state) - ollama_enabled = state_ollama_enabled?(state) + vllm_enabled = state_vllm_enabled?(state) + ollama_enabled = state_ollama_enabled?(state) + comfyui_enabled = state_comfyui_enabled?(state) info "Tracked VM: #{state['vm_id']} #{vm['name']}" info "Status: #{vm['status']} / #{vm['vm_state']}" info "Public IP: #{connect_host_for(vm) || 'none'}" - info "Service mode: #{service_mode_summary(vllm_enabled: vllm_enabled, ollama_enabled: ollama_enabled)}" + info "Service mode: #{service_mode_summary(vllm_enabled: vllm_enabled, ollama_enabled: ollama_enabled, comfyui_enabled: comfyui_enabled)}" info "Active model: #{state['vllm_model'] || @config.vllm_model}" if vllm_enabled + if comfyui_enabled + wg_ip = @config.wireguard_gateway_hostname + info "ComfyUI: http://#{wg_ip}:#{@config.comfyui_port}" + end info "Missing firewall rules: #{missing_rules.empty? ? 'none' : missing_rules.size}" rescue Error => e warn "Unable to load VM #{state['vm_id']}: #{e.message}" @@ -1614,6 +1767,7 @@ module HyperstackVM state['bootstrapped_at'].nil? || ollama_setup_needed?(state) || vllm_setup_needed?(state) || + comfyui_setup_needed?(state) || wireguard_setup_needed?(state) ) end @@ -1680,6 +1834,15 @@ module HyperstackVM @state_store.save(state) end + # Set up ComfyUI after the tunnel is up so model downloads are visible locally. + if comfyui_setup_needed?(state) + @provisioner.install_comfyui(state['public_ip']) + state['comfyui_setup_at'] = Time.now.utc.iso8601 + state['comfyui_container_name'] = @config.comfyui_container_name + state['comfyui_models'] = @config.comfyui_models + @state_store.save(state) + end + vm = @client.get_vm(vm_id) state['security_rules'] = Array(vm['security_rules']).map { |rule| normalize_rule(rule) } state['status'] = vm['status'] @@ -1689,11 +1852,16 @@ module HyperstackVM info "VM ready: #{state['public_ip']} (id=#{state['vm_id']})" print_local_wireguard_summary(state['public_ip']) - return unless effective_vllm? - wg_ip = @config.wireguard_gateway_hostname - info "Run 'ruby hyperstack.rb test' to verify vLLM." - info " vLLM: http://#{wg_ip}:#{@config.ollama_port}/v1/models" + if effective_vllm? + info "Run 'ruby hyperstack.rb test' to verify vLLM." + info " vLLM: http://#{wg_ip}:#{@config.ollama_port}/v1/models" + end + if effective_comfyui? + info "Run 'ruby hyperstack.rb test' to verify ComfyUI." + info " ComfyUI: http://#{wg_ip}:#{@config.comfyui_port}/system_stats" + info " Enhance: ruby photo-enhance.rb --config #{File.basename(@config.path)} --indir ~/Pictures --outdir ~/Pictures/enhanced" + end end def build_create_payload(vm_name, resolved) @@ -2057,16 +2225,21 @@ module HyperstackVM def sync_service_mode_state(state) state['services'] = { 'vllm_enabled' => effective_vllm?, - 'ollama_enabled' => effective_ollama? + 'ollama_enabled' => effective_ollama?, + 'comfyui_enabled' => effective_comfyui? } end - def desired_security_rules(include_vllm: effective_vllm?, include_ollama: effective_ollama?) - @config.desired_security_rules(include_vllm: include_vllm, include_ollama: include_ollama) + def desired_security_rules(include_vllm: effective_vllm?, include_ollama: effective_ollama?, + include_comfyui: effective_comfyui?) + @config.desired_security_rules(include_vllm: include_vllm, include_ollama: include_ollama, + include_comfyui: include_comfyui) end def desired_security_rules_for_state(state) - desired_security_rules(include_vllm: state_vllm_enabled?(state), include_ollama: state_ollama_enabled?(state)) + desired_security_rules(include_vllm: state_vllm_enabled?(state), + include_ollama: state_ollama_enabled?(state), + include_comfyui: state_comfyui_enabled?(state)) end def legacy_litellm_rules(rules) @@ -2097,12 +2270,23 @@ module HyperstackVM @config.ollama_install_enabled? end - def service_mode_summary(vllm_enabled:, ollama_enabled:) - return 'vLLM enabled, Ollama enabled' if vllm_enabled && ollama_enabled - return 'vLLM enabled, Ollama disabled' if vllm_enabled - return 'Ollama enabled, vLLM disabled' if ollama_enabled + def state_comfyui_enabled?(state) + recorded = state&.dig('services', 'comfyui_enabled') + return recorded unless recorded.nil? + + return true if state&.key?('comfyui_setup_at') + + @config.comfyui_install_enabled? + end + + def service_mode_summary(vllm_enabled:, ollama_enabled:, comfyui_enabled: false) + parts = [] + parts << 'vLLM' if vllm_enabled + parts << 'Ollama' if ollama_enabled + parts << 'ComfyUI' if comfyui_enabled + return 'All inference services disabled' if parts.empty? - 'All inference services disabled' + "#{parts.join(', ')} enabled" end def cleanup_local_access(dry_run:, hostnames:, allowed_ips:) @@ -2259,6 +2443,19 @@ module HyperstackVM state['vllm_model'] != desired end + # Returns the effective ComfyUI flag: CLI override if set, else config default. + def effective_comfyui? + defined?(@effective_comfyui) ? @effective_comfyui : @config.comfyui_install_enabled? + end + + def comfyui_setup_needed?(state) + return false unless effective_comfyui? + return true if state['comfyui_setup_at'].nil? + + # Re-run if the desired model list changed since last provision. + (@config.comfyui_models.sort != Array(state['comfyui_models']).sort) + end + # Tests the vLLM OpenAI-compatible API: lists loaded models and runs a # short inference request to confirm the model accepts requests. def test_vllm(wg_ip) @@ -2807,14 +3004,16 @@ module HyperstackVM # (create-both), the --model flag is not registered because each VM uses its own # TOML default. Returns a hash suitable for splatting into Manager#create. def parse_create_options(argv, include_model_preset: true) - opts = { replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, vllm_preset: nil } + opts = { replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, install_comfyui: nil, vllm_preset: nil } OptionParser.new do |o| - o.on('--replace', 'Delete the tracked VM before creating a new one') { opts[:replace] = true } - o.on('--dry-run', 'Print the create plan without creating a VM') { opts[:dry_run] = true } - o.on('--vllm', 'Enable vLLM setup (overrides config)') { opts[:install_vllm] = true } - o.on('--no-vllm', 'Disable vLLM setup (overrides config)') { opts[:install_vllm] = false } - o.on('--ollama', 'Enable Ollama setup (overrides config)') { opts[:install_ollama] = true } - o.on('--no-ollama', 'Disable Ollama setup (overrides config)') { opts[:install_ollama] = false } + o.on('--replace', 'Delete the tracked VM before creating a new one') { opts[:replace] = true } + o.on('--dry-run', 'Print the create plan without creating a VM') { opts[:dry_run] = true } + o.on('--vllm', 'Enable vLLM setup (overrides config)') { opts[:install_vllm] = true } + o.on('--no-vllm', 'Disable vLLM setup (overrides config)') { opts[:install_vllm] = false } + o.on('--ollama', 'Enable Ollama setup (overrides config)') { opts[:install_ollama] = true } + o.on('--no-ollama', 'Disable Ollama setup (overrides config)') { opts[:install_ollama] = false } + o.on('--comfyui', 'Enable ComfyUI setup (overrides config)') { opts[:install_comfyui] = true } + o.on('--no-comfyui', 'Disable ComfyUI setup (overrides config)') { opts[:install_comfyui] = false } o.on('--model PRESET', 'Use a named vLLM preset at create time') { |v| opts[:vllm_preset] = v } if include_model_preset end.parse!(argv) opts @@ -2910,7 +3109,7 @@ module HyperstackVM # VM2 adds its peer. A Mutex+ConditionVariable acts as a one-shot latch between threads. # If VM1 fails before reaching the WG step the latch is still released so VM2 doesn't hang. # vllm_preset is accepted but ignored — each VM uses its own TOML default preset. - def run_create_both(replace:, dry_run:, install_vllm:, install_ollama:, vllm_preset: nil) # rubocop:disable Lint/UnusedMethodArgument + def run_create_both(replace:, dry_run:, install_vllm:, install_ollama:, install_comfyui: nil, vllm_preset: nil) # rubocop:disable Lint/UnusedMethodArgument vm1_loader, vm2_loader = pair_config_loaders vm1_config = vm1_loader.config vm2_config = vm2_loader.config @@ -2940,7 +3139,7 @@ module HyperstackVM errors = {} create_opts = { replace: replace, dry_run: dry_run, - install_vllm: install_vllm, install_ollama: install_ollama } + install_vllm: install_vllm, install_ollama: install_ollama, install_comfyui: install_comfyui } vm1_thread = Thread.new do manager1.create(**create_opts) diff --git a/photo-enhance.rb b/photo-enhance.rb new file mode 100755 index 0000000..02a281a --- /dev/null +++ b/photo-enhance.rb @@ -0,0 +1,398 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# photo-enhance.rb — Photolemur-style automatic photo enhancer via ComfyUI. +# +# Submits images from --indir to the ComfyUI REST API running on a Hyperstack VM, +# downloads the enhanced results to --outdir, and optionally watches for new files. +# +# Usage: +# ruby photo-enhance.rb --config hyperstack-vm-photo.toml \ +# --indir ~/Pictures --outdir ~/Pictures/enhanced [--watch] [--workflow workflows/photo-enhance.json] +# +# Requirements: +# - ComfyUI VM provisioned with: ruby hyperstack.rb --config hyperstack-vm-photo.toml create +# - WireGuard tunnel active (wg1): verified via curl http://hyperstack-photo.wg1:8188/system_stats +# - Ruby stdlib only (no extra gems needed). + +begin + require 'bundler/setup' +rescue LoadError, Gem::GemNotFoundException, Gem::LoadError, Errno::ENOENT + nil +end + +require 'json' +require 'net/http' +require 'optparse' +require 'fileutils' +require 'digest' +require 'time' + +begin + require 'toml-rb' +rescue LoadError + warn "Missing dependency: toml-rb. Run `bundle install` in #{__dir__} first." + exit 2 +end + +# --------------------------------------------------------------------------- +# Config loading — reads only the fields photo-enhance.rb needs from the TOML. +# --------------------------------------------------------------------------- + +class PhotoConfig + attr_reader :host, :port, :workflow_path + + def initialize(config_path, workflow_path_override) + raw = TomlRB.load_file(File.expand_path(config_path)) + hostname = raw.dig('vm', 'hostname') || 'hyperstack-photo' + interface = raw.dig('local_client', 'interface_name') || 'wg1' + @host = "#{hostname}.#{interface}" + @port = Integer(raw.dig('comfyui', 'port') || 8188) + @workflow_path = workflow_path_override || + File.join(File.dirname(File.expand_path(config_path)), 'workflows', 'photo-enhance.json') + end +end + +# --------------------------------------------------------------------------- +# ComfyUI API client — upload, submit, poll, download. +# --------------------------------------------------------------------------- + +class ComfyUIClient + POLL_INTERVAL_SEC = 2 + POLL_TIMEOUT_SEC = 600 # 10 minutes per image (SUPIR can be slow on first load) + + def initialize(host:, port:, out: $stdout) + @host = host + @port = port + @out = out + end + + # Upload a local image file; returns the filename ComfyUI assigned it. + def upload_image(file_path) + filename = File.basename(file_path) + image_data = File.binread(file_path) + boundary = "----RubyPhotoEnhance#{SecureRandom_hex(8)}" + + body = [ + "--#{boundary}\r\n", + "Content-Disposition: form-data; name=\"image\"; filename=\"#{filename}\"\r\n", + "Content-Type: #{mime_type_for(file_path)}\r\n\r\n", + image_data, + "\r\n--#{boundary}\r\n", + "Content-Disposition: form-data; name=\"overwrite\"\r\n\r\ntrue\r\n", + "--#{boundary}--\r\n" + ].join + + resp = post_raw('/upload/image', body, "multipart/form-data; boundary=#{boundary}") + raise "Upload failed (HTTP #{resp.code}): #{resp.body}" unless resp.code == '200' + + JSON.parse(resp.body)['name'] || filename + rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e + raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard (wg1) active? (#{e.message})" + end + + # Submit a workflow; returns the prompt_id string. + def submit_prompt(workflow) + resp = post_json('/prompt', { 'prompt' => workflow }) + raise "Prompt submission failed (HTTP #{resp.code}): #{resp.body}" unless resp.code == '200' + + JSON.parse(resp.body)['prompt_id'] or raise "No prompt_id in response: #{resp.body}" + end + + # Poll until the prompt finishes; returns the list of output filenames. + def wait_for_output(prompt_id) + deadline = Time.now + POLL_TIMEOUT_SEC + loop do + raise "Timed out after #{POLL_TIMEOUT_SEC}s waiting for prompt #{prompt_id}" if Time.now > deadline + + resp = get("/history/#{prompt_id}") + raise "History poll failed (HTTP #{resp.code})" unless resp.code == '200' + + history = JSON.parse(resp.body) + result = history[prompt_id] + if result + outputs = extract_output_filenames(result) + return outputs unless outputs.empty? + # ComfyUI may record the prompt before writing output nodes; keep polling. + end + + sleep POLL_INTERVAL_SEC + end + end + + # Download an output image; saves to dest_path. + def download_output(filename, dest_path) + resp = get("/view?filename=#{URI.encode_www_form_component(filename)}&type=output&subfolder=") + raise "Download failed (HTTP #{resp.code}) for #{filename}" unless resp.code == '200' + + FileUtils.mkdir_p(File.dirname(dest_path)) + File.binwrite(dest_path, resp.body) + end + + # Quick connectivity check; raises on failure. + def check_connectivity! + resp = get('/system_stats') + raise "ComfyUI health check failed (HTTP #{resp.code}): #{resp.body}" unless resp.code == '200' + rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e + raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard (wg1) active? (#{e.message})" + end + + private + + def extract_output_filenames(result) + Array(result.dig('outputs')) + .flat_map { |_node_id, node_out| Array(node_out['images']) } + .map { |img| img['filename'] } + .compact + .reject(&:empty?) + end + + def get(path) + uri = URI("http://#{@host}:#{@port}#{path}") + Net::HTTP.get_response(uri) + end + + def post_json(path, payload) + uri = URI("http://#{@host}:#{@port}#{path}") + req = Net::HTTP::Post.new(uri) + req['Content-Type'] = 'application/json' + req.body = JSON.generate(payload) + Net::HTTP.start(uri.host, uri.port) { |http| http.request(req) } + end + + def post_raw(path, body, content_type) + uri = URI("http://#{@host}:#{@port}#{path}") + req = Net::HTTP::Post.new(uri) + req['Content-Type'] = content_type + req.body = body + Net::HTTP.start(uri.host, uri.port, read_timeout: 120) { |http| http.request(req) } + end + + def mime_type_for(file_path) + case File.extname(file_path).downcase + when '.jpg', '.jpeg' then 'image/jpeg' + when '.png' then 'image/png' + when '.webp' then 'image/webp' + else 'application/octet-stream' + end + end + + # Minimal hex token without SecureRandom (pure stdlib). + def SecureRandom_hex(n) + Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, n * 2] + end +end + +# --------------------------------------------------------------------------- +# Manifest — tracks which files have been processed to avoid re-enhancing. +# --------------------------------------------------------------------------- + +class ProcessedManifest + MANIFEST_FILE = '.photo-enhance-processed' + + def initialize(outdir) + @path = File.join(outdir, MANIFEST_FILE) + @entries = load_entries + end + + def processed?(file_path) + key = digest(file_path) + @entries.include?(key) + end + + def mark_done(file_path) + key = digest(file_path) + @entries << key + File.open(@path, 'a') { |f| f.puts(key) } + end + + private + + def load_entries + return [] unless File.exist?(@path) + + File.readlines(@path, chomp: true).map(&:strip).reject(&:empty?).to_set + end + + # Digest includes mtime so a re-shot of the same filename is re-processed. + def digest(file_path) + stat = File.stat(file_path) + Digest::SHA256.hexdigest("#{File.basename(file_path)}:#{stat.size}:#{stat.mtime.to_i}") + rescue Errno::ENOENT + Digest::SHA256.hexdigest(File.basename(file_path)) + end +end + +# --------------------------------------------------------------------------- +# Enhancer — orchestrates upload → prompt → poll → download for one image. +# --------------------------------------------------------------------------- + +class PhotoEnhancer + SUPPORTED_EXTENSIONS = %w[.jpg .jpeg .png .webp].freeze + + def initialize(config:, client:, workflow:, indir:, manifest:, out: $stdout) + @config = config + @client = client + @workflow = workflow + @indir = indir + @manifest = manifest + @out = out + end + + def enhance_directory(indir, watch: false) + @client.check_connectivity! + @out.puts "ComfyUI ready at http://#{@config.host}:#{@config.port}" + @out.puts "Enhancing photos in #{indir} (output: <name>_enhanced.<ext> alongside originals)" + @out.puts watch ? '(watch mode — Ctrl-C to stop)' : '' + + loop do + pending = find_pending_images(indir) + pending.each { |path| enhance_one(path) } + break unless watch + + sleep 5 + end + end + + private + + def find_pending_images(indir) + Dir.glob(File.join(indir, '*')) + .select { |f| File.file?(f) && SUPPORTED_EXTENSIONS.include?(File.extname(f).downcase) } + .reject { |f| @manifest.processed?(f) } + .sort + end + + def enhance_one(src_path) + basename = File.basename(src_path, '.*') + ext = File.extname(src_path).downcase + # Output lives in the same directory as the original, with an _enhanced suffix + # before the extension (e.g. photo.jpg -> photo_enhanced.jpg). + dest_path = File.join(File.dirname(src_path), "#{basename}_enhanced#{ext}") + + @out.puts "[#{Time.now.strftime('%H:%M:%S')}] Enhancing #{File.basename(src_path)}..." + + # Inject the input filename into the workflow LoadImage node. + uploaded_name = @client.upload_image(src_path) + workflow = inject_input_image(@workflow, uploaded_name) + prompt_id = @client.submit_prompt(workflow) + @out.puts " Submitted prompt #{prompt_id}, waiting for ComfyUI..." + + filenames = @client.wait_for_output(prompt_id) + raise "No output images returned for #{src_path}" if filenames.empty? + + # ComfyUI SaveImage always outputs PNG. Download to a temp file then convert + # to the original format (JPEG for .jpg/.jpeg) so file sizes stay comparable. + tmp_path = "#{dest_path}.tmp.png" + @client.download_output(filenames.first, tmp_path) + convert_to_original_format(tmp_path, dest_path, ext) + File.delete(tmp_path) if File.exist?(tmp_path) + @manifest.mark_done(src_path) + orig_size = File.size(src_path) + enhanced_size = File.size(dest_path) + @out.puts " Saved -> #{dest_path} (#{kb(orig_size)} KB -> #{kb(enhanced_size)} KB)" + rescue StandardError => e + @out.puts " ERROR enhancing #{File.basename(src_path)}: #{e.message}" + end + + # Convert the PNG downloaded from ComfyUI into the desired output format. + # JPEG (.jpg/.jpeg) uses quality 92 to stay close to the original file size. + # All other formats fall back to a straight copy (PNG stays PNG). + def convert_to_original_format(src_png, dest_path, original_ext) + case original_ext + when '.jpg', '.jpeg' + system('magick', src_png, '-quality', '92', dest_path) + else + FileUtils.cp(src_png, dest_path) + end + end + + def kb(bytes) + (bytes / 1024.0).round + end + + # Replace the placeholder filename in the LoadImage node so the workflow + # processes the newly uploaded image rather than any hardcoded test image. + def inject_input_image(workflow, filename) + modified = JSON.parse(JSON.generate(workflow)) # deep dup + modified.each_value do |node| + next unless node.is_a?(Hash) && node['class_type'] == 'LoadImage' + + node['inputs']['image'] = filename + end + modified + end +end + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +options = { + config: File.join(__dir__, 'hyperstack-vm-photo.toml'), + indir: nil, + watch: false, + workflow: nil, + test: false +} + +OptionParser.new do |o| + o.banner = 'Usage: ruby photo-enhance.rb [options]' + o.on('--config PATH', 'TOML config file (default: hyperstack-vm-photo.toml)') { |v| options[:config] = v } + o.on('--indir PATH', 'Directory of photos to enhance (output: <name>_enhanced.<ext> in same dir)') { |v| options[:indir] = v } + o.on('--workflow PATH', 'ComfyUI workflow JSON (default: workflows/photo-enhance.json)') { |v| options[:workflow] = v } + o.on('--watch', 'Keep running and process new images as they arrive') { options[:watch] = true } + o.on('--test', 'Only check connectivity to ComfyUI, then exit') { options[:test] = true } + o.on('-h', '--help', 'Show this help') { puts o; exit } +end.parse! + +unless File.exist?(options[:config]) + warn "Config not found: #{options[:config]}" + exit 1 +end + +cfg = PhotoConfig.new(options[:config], options[:workflow]) +client = ComfyUIClient.new(host: cfg.host, port: cfg.port) + +if options[:test] + begin + client.check_connectivity! + puts "ComfyUI is reachable at http://#{cfg.host}:#{cfg.port} — OK" + exit 0 + rescue RuntimeError => e + warn "ERROR: #{e.message}" + exit 1 + end +end + +unless options[:indir] + warn '--indir is required (use --test to only check connectivity)' + exit 1 +end + +indir = File.expand_path(options[:indir]) + +unless File.directory?(indir) + warn "Input directory not found: #{indir}" + exit 1 +end + +unless File.exist?(cfg.workflow_path) + warn "Workflow JSON not found: #{cfg.workflow_path}" + warn "Expected at #{File.join(__dir__, 'workflows', 'photo-enhance.json')}" + exit 1 +end + +workflow = JSON.parse(File.read(cfg.workflow_path)) +# Manifest lives in the indir so it stays with the photos. +manifest = ProcessedManifest.new(indir) +enhancer = PhotoEnhancer.new(config: cfg, client: client, workflow: workflow, + indir: indir, manifest: manifest) +begin + enhancer.enhance_directory(indir, watch: options[:watch]) +rescue RuntimeError => e + warn "ERROR: #{e.message}" + exit 1 +rescue Interrupt + puts "\nStopped." +end diff --git a/workflows/photo-enhance.json b/workflows/photo-enhance.json new file mode 100644 index 0000000..6be7dfa --- /dev/null +++ b/workflows/photo-enhance.json @@ -0,0 +1,42 @@ +{ + "1": { + "class_type": "LoadImage", + "_meta": {"title": "Load Input Photo"}, + "inputs": { + "image": "NODE_INPUT_IMAGE", + "upload": "image" + } + }, + "2": { + "class_type": "UpscaleModelLoader", + "_meta": {"title": "Load Real-ESRGAN Model"}, + "inputs": { + "model_name": "RealESRGAN_x4plus.pth" + } + }, + "3": { + "class_type": "ImageUpscaleWithModel", + "_meta": {"title": "Enhance (internal 4x pass)"}, + "inputs": { + "upscale_model": ["2", 0], + "image": ["1", 0] + } + }, + "4": { + "class_type": "ImageScaleBy", + "_meta": {"title": "Scale back to original resolution"}, + "inputs": { + "image": ["3", 0], + "upscale_method": "lanczos", + "scale_by": 0.25 + } + }, + "5": { + "class_type": "SaveImage", + "_meta": {"title": "Save Enhanced Photo"}, + "inputs": { + "images": ["4", 0], + "filename_prefix": "enhanced_" + } + } +} |
