1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
|
#!/usr/bin/env ruby
# frozen_string_literal: true
# photo-enhance.rb — AI photo enhancer via ComfyUI on a Hyperstack GPU VM.
#
# Submits images from --indir to the ComfyUI REST API, downloads the AI-enhanced
# results and saves alongside the originals with an _e suffix. Also downloads
# a per-photo JSON metadata file written by the WritePhotoMetadata ComfyUI node
# and converts it to a human-readable .md report alongside each enhanced photo.
#
# AI pipeline (ComfyUI, GPU):
# 1. Real-ESRGAN realesr-general-x4v3 — 4× upscale at full 4K input, AI denoise
# 2. CodeFormer fidelity=0.7 — neural face restoration
# 3. CLIP ViT-B/32 — scene classification (portrait/landscape/…)
# 4. AdaptivePhotoGrade — scene-tuned exposure/contrast/saturation/detail
# 5. SkyEnhance — HSV sky mask + graduated sky correction
# 6. Depth Anything V2 Small — depth map → foreground sharp, background soft
#
# Usage:
# ruby photo-enhance.rb --config hyperstack-vm-photo.toml \
# --indir ~/Pictures [--watch] [--workflow workflows/photo-enhance.json]
#
# Requirements:
# - ComfyUI VM: ruby hyperstack.rb --config hyperstack-vm-photo.toml create
# - WireGuard tunnel active (wg1)
begin
require 'bundler/setup'
rescue LoadError, Gem::GemNotFoundException, Gem::LoadError, Errno::ENOENT
nil
end
require 'json'
require 'net/http'
require 'optparse'
require 'fileutils'
require 'digest'
require 'time'
require 'set'
begin
require 'toml-rb'
rescue LoadError
warn "Missing dependency: toml-rb. Run `bundle install` in #{__dir__} first."
exit 2
end
# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
class PhotoConfig
attr_reader :host, :port, :workflow_path
def initialize(config_path, workflow_path_override)
raw = TomlRB.load_file(File.expand_path(config_path))
hostname = raw.dig('vm', 'hostname') || 'hyperstack-photo'
interface = raw.dig('local_client', 'interface_name') || 'wg1'
@host = "#{hostname}.#{interface}"
@port = Integer(raw.dig('comfyui', 'port') || 8188)
@workflow_path = workflow_path_override ||
File.join(File.dirname(File.expand_path(config_path)), 'workflows', 'photo-enhance.json')
end
end
# ---------------------------------------------------------------------------
# ComfyUI API client — upload, submit, poll, download.
# ---------------------------------------------------------------------------
class ComfyUIClient
POLL_INTERVAL_SEC = 2
POLL_TIMEOUT_SEC = 300 # 5 minutes; ESRGAN is fast on GPU
def initialize(host:, port:, out: $stdout)
@host = host
@port = port
@out = out
end
def upload_image(file_path)
filename = File.basename(file_path)
image_data = File.binread(file_path)
boundary = "----RubyPhotoEnhance#{hex(8)}"
body = [
"--#{boundary}\r\n",
"Content-Disposition: form-data; name=\"image\"; filename=\"#{filename}\"\r\n",
"Content-Type: #{mime_type(file_path)}\r\n\r\n",
image_data,
"\r\n--#{boundary}\r\n",
"Content-Disposition: form-data; name=\"overwrite\"\r\n\r\ntrue\r\n",
"--#{boundary}--\r\n"
].join
resp = post_raw('/upload/image', body, "multipart/form-data; boundary=#{boundary}")
raise "Upload failed (#{resp.code}): #{resp.body}" unless resp.code == '200'
JSON.parse(resp.body)['name'] || filename
rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard active? (#{e.message})"
end
def submit_prompt(workflow)
resp = post_json('/prompt', { 'prompt' => workflow })
raise "Prompt failed (#{resp.code}): #{resp.body}" unless resp.code == '200'
JSON.parse(resp.body)['prompt_id'] or raise "No prompt_id in: #{resp.body}"
end
def wait_for_output(prompt_id)
deadline = Time.now + POLL_TIMEOUT_SEC
loop do
raise "Timed out after #{POLL_TIMEOUT_SEC}s for #{prompt_id}" if Time.now > deadline
resp = get("/history/#{prompt_id}")
raise "History poll failed (#{resp.code})" unless resp.code == '200'
result = JSON.parse(resp.body)[prompt_id]
if result
outputs = extract_filenames(result)
return outputs unless outputs.empty?
# ComfyUI cached the run (identical inputs) and wrote no new files — bail fast.
status = result.dig('status', 'status_str')
raise "ComfyUI cached execution returned no outputs for #{prompt_id}" \
if result.dig('status', 'completed') && status == 'success'
end
sleep POLL_INTERVAL_SEC
end
end
def download_output(filename, dest_path)
resp = get("/view?filename=#{URI.encode_www_form_component(filename)}&type=output&subfolder=")
raise "Download failed (#{resp.code}) for #{filename}" unless resp.code == '200'
FileUtils.mkdir_p(File.dirname(dest_path))
File.binwrite(dest_path, resp.body)
end
def check_connectivity!
resp = get('/system_stats')
raise "Health check failed (#{resp.code}): #{resp.body}" unless resp.code == '200'
rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard active? (#{e.message})"
end
private
def extract_filenames(result)
Array(result.dig('outputs'))
.flat_map { |_id, node| Array(node['images']) }
.map { |img| img['filename'] }
.compact.reject(&:empty?)
end
def get(path)
Net::HTTP.get_response(URI("http://#{@host}:#{@port}#{path}"))
end
def post_json(path, payload)
uri = URI("http://#{@host}:#{@port}#{path}")
req = Net::HTTP::Post.new(uri)
req['Content-Type'] = 'application/json'
req.body = JSON.generate(payload)
Net::HTTP.start(uri.host, uri.port) { |h| h.request(req) }
end
def post_raw(path, body, content_type)
uri = URI("http://#{@host}:#{@port}#{path}")
req = Net::HTTP::Post.new(uri)
req['Content-Type'] = content_type
req.body = body
Net::HTTP.start(uri.host, uri.port, read_timeout: 120) { |h| h.request(req) }
end
def mime_type(path)
case File.extname(path).downcase
when '.jpg', '.jpeg' then 'image/jpeg'
when '.png' then 'image/png'
when '.webp' then 'image/webp'
else 'application/octet-stream'
end
end
def hex(n)
Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, n * 2]
end
end
# ---------------------------------------------------------------------------
# Manifest — avoids re-processing files across runs and in watch mode.
# ---------------------------------------------------------------------------
class ProcessedManifest
FILE_NAME = '.photo-enhance-processed'
def initialize(dir)
@path = File.join(dir, FILE_NAME)
@entries = load_entries
end
def processed?(file_path)
@entries.include?(digest(file_path))
end
def mark_done(file_path)
key = digest(file_path)
@entries << key
File.open(@path, 'a') { |f| f.puts(key) }
end
private
def load_entries
return Set.new unless File.exist?(@path)
File.readlines(@path, chomp: true).map(&:strip).reject(&:empty?).to_set
end
# Covers basename + size + mtime so a re-shot of the same filename re-processes.
def digest(file_path)
stat = File.stat(file_path)
Digest::SHA256.hexdigest("#{File.basename(file_path)}:#{stat.size}:#{stat.mtime.to_i}")
rescue Errno::ENOENT
Digest::SHA256.hexdigest(File.basename(file_path))
end
end
# ---------------------------------------------------------------------------
# Enhancer — orchestrates upload → AI → download → colour correct per image.
# ---------------------------------------------------------------------------
class PhotoEnhancer
SUPPORTED_EXTENSIONS = %w[.jpg .jpeg .png .webp].freeze
# No colour corrections — pure AI output from Real-ESRGAN is used as-is.
# ImageMagick is only used to bake EXIF rotation and convert PNG→JPEG.
COLOR_ARGS = [].freeze
def initialize(config:, client:, workflow:, indir:, manifest:, out: $stdout)
@config = config
@client = client
@workflow = workflow
@indir = indir
@manifest = manifest
@out = out
end
def run(watch: false)
@client.check_connectivity!
@out.puts "ComfyUI ready at http://#{@config.host}:#{@config.port}"
@out.puts "Enhancing photos in #{@indir}"
@out.puts watch ? '(watch mode — Ctrl-C to stop)' : ''
loop do
find_pending.each { |path| enhance_one(path) }
break unless watch
sleep 5
end
end
private
def find_pending
Dir.glob(File.join(@indir, '*'))
.select { |f| File.file?(f) && SUPPORTED_EXTENSIONS.include?(File.extname(f).downcase) }
.reject { |f| File.basename(f, '.*').end_with?('_e') }
.reject { |f| File.basename(f).include?('.orient.') }
.reject { |f| @manifest.processed?(f) }
.sort
end
def enhance_one(src_path)
ext = File.extname(src_path).downcase
basename = File.basename(src_path, File.extname(src_path))
dest_path = File.join(File.dirname(src_path), "#{basename}_e#{ext}")
@out.puts "[#{Time.now.strftime('%H:%M:%S')}] #{File.basename(src_path)}"
# Bake in EXIF rotation before uploading — ComfyUI strips EXIF metadata.
upload_path = auto_orient_tempfile(src_path)
uploaded_name = @client.upload_image(upload_path)
workflow = inject_input(@workflow, uploaded_name)
prompt_id = @client.submit_prompt(workflow)
@out.puts " prompt #{prompt_id}"
filenames = @client.wait_for_output(prompt_id)
raise "No outputs returned for #{src_path}" if filenames.empty?
# ComfyUI outputs PNG; download then convert to original format.
tmp_png = "#{dest_path}.tmp.png"
@client.download_output(filenames.first, tmp_png)
save_with_corrections(tmp_png, dest_path, ext)
File.delete(tmp_png) if File.exist?(tmp_png)
File.delete(upload_path) if upload_path != src_path && File.exist?(upload_path)
# Download the JSON metadata written by WritePhotoMetadata and render it
# as a human-readable .md report alongside the enhanced photo.
# ComfyUI appends _NNNNN_ counter: "enhanced_abc123__00001_.png" → "enhanced_abc123_"
prefix = filenames.first.sub(/_\d+_\.png$/, '')
meta_file = "#{prefix}meta.json"
md_path = File.join(File.dirname(dest_path),
"#{File.basename(dest_path, File.extname(dest_path))}.md")
download_and_write_md(meta_file, src_path, dest_path, md_path)
@manifest.mark_done(src_path)
@out.puts " -> #{dest_path} (#{kb(src_path)} KB -> #{kb(dest_path)} KB)"
rescue StandardError => e
@out.puts " ERROR #{File.basename(src_path)}: #{e.message}"
end
# Run magick -auto-orient into a temp file so EXIF rotation is baked in.
# Falls back to the original path if magick is unavailable.
def auto_orient_tempfile(src_path)
ext = File.extname(src_path)
tmp = "#{src_path}.orient#{ext}"
return tmp if system('magick', src_path, '-auto-orient', tmp) && File.exist?(tmp)
@out.puts " Warning: auto-orient failed, uploading original"
src_path
end
# Convert the downloaded PNG to the target format (JPEG quality 92 for .jpg).
# No colour processing — pure AI output from Real-ESRGAN is preserved as-is.
def save_with_corrections(src_png, dest_path, ext)
quality_args = ext.match?(/\.jpe?g/) ? ['-quality', '92'] : []
system('magick', src_png, *COLOR_ARGS, *quality_args, dest_path)
end
# Download the WritePhotoMetadata JSON from ComfyUI output and render it
# as a Markdown report saved alongside the enhanced photo.
def download_and_write_md(meta_filename, src_path, dest_path, md_path)
resp = @client.send(:get,
"/view?filename=#{URI.encode_www_form_component(meta_filename)}&type=output&subfolder=")
return unless resp.code == '200'
meta = JSON.parse(resp.body)
profile = meta['enhancement_profile'] || {}
sky = meta['sky'] || {}
depth = meta['depth_sharpen'] || {}
models = meta['models'] || {}
scene = meta['scene_type'] || 'unknown'
ts = meta['generated_at'] || Time.now.utc.iso8601
md = <<~MD
# #{File.basename(dest_path)} — Enhancement Report
**Source:** #{File.basename(src_path)} (#{kb(src_path)} KB)
**Enhanced:** #{File.basename(dest_path)} (#{kb(dest_path)} KB)
**Processed:** #{ts}
## AI Pipeline
| Step | Model / Node | Device | What it does |
|------|-------------|--------|--------------|
| 1 | `#{models['upscaler']}` | GPU | 4× upscale at full 4K input → 16K → back to 4K |
| 2 | `#{models['face_restore']}` | GPU | Face detection + neural restoration |
| 3 | `#{models['scene_detect']}` | GPU | Zero-shot scene classification |
| 4 | Adaptive Photo Grade | CPU | Scene-tuned exposure / contrast / saturation / detail |
| 5 | Sky Enhance | CPU | HSV sky mask + graduated sky correction |
| 6 | `#{models['depth']}` | GPU | Depth map → foreground sharp, background soft |
## Scene Detection
| | |
|-|-|
| **Detected scene** | #{scene} |
## Colour Grading Profile (#{scene})
| Setting | Value |
|---------|-------|
| Exposure | +#{profile['exposure_stops']} stops |
| Contrast | #{profile['contrast_factor']}× |
| Saturation | #{profile['saturation_mult']}× |
| Detail / Clarity | #{profile['detail_mult']}× |
| Denoise strength | #{profile['denoise_strength']} |
## Sky Enhancement
| Setting | Value |
|---------|-------|
| Sky coverage | #{sky['coverage_pct']}% of image |
| Sky exposure | +#{sky['sky_exposure']} stops |
| Sky saturation | #{sky['sky_saturation']}× |
## Depth-Guided Sharpening
| Setting | Value |
|---------|-------|
| Foreground sharpening | #{depth['foreground_sharpen']}× |
| Background blur | #{depth['background_blur']} |
MD
File.write(md_path, md)
rescue StandardError => e
@out.puts " Warning: could not write metadata report: #{e.message}"
end
# Inject the upload filename and a unique prefix into LoadImage, SaveImage,
# and WritePhotoMetadata to bust ComfyUI's cache and link metadata to image.
def inject_input(workflow, filename)
wf = JSON.parse(JSON.generate(workflow)) # deep dup
prefix = "enhanced_#{Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, 8]}_"
wf.each_value do |node|
next unless node.is_a?(Hash)
case node['class_type']
when 'LoadImage' then node['inputs']['image'] = filename
when 'SaveImage' then node['inputs']['filename_prefix'] = prefix
when 'WritePhotoMetadata'
node['inputs']['filename_prefix'] = prefix
node['inputs']['source_filename'] = filename
end
end
wf
end
def kb(path)
(File.size(path) / 1024.0).round
end
end
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
options = {
config: File.join(__dir__, 'hyperstack-vm-photo.toml'),
indir: nil,
watch: false,
workflow: nil,
test: false
}
OptionParser.new do |o|
o.banner = 'Usage: ruby photo-enhance.rb [options]'
o.on('--config PATH', 'TOML config (default: hyperstack-vm-photo.toml)') { |v| options[:config] = v }
o.on('--indir PATH', 'Directory of photos to enhance') { |v| options[:indir] = v }
o.on('--workflow PATH', 'ComfyUI workflow JSON override') { |v| options[:workflow] = v }
o.on('--watch', 'Keep running, process new images as they arrive') { options[:watch] = true }
o.on('--test', 'Check ComfyUI connectivity only, then exit') { options[:test] = true }
o.on('-h', '--help', 'Show this help') { puts o; exit }
end.parse!
abort "Config not found: #{options[:config]}" unless File.exist?(options[:config])
cfg = PhotoConfig.new(options[:config], options[:workflow])
client = ComfyUIClient.new(host: cfg.host, port: cfg.port)
if options[:test]
begin
client.check_connectivity!
puts "ComfyUI reachable at http://#{cfg.host}:#{cfg.port} — OK"
exit 0
rescue RuntimeError => e
warn "ERROR: #{e.message}"; exit 1
end
end
abort '--indir is required' unless options[:indir]
indir = File.expand_path(options[:indir])
abort "Directory not found: #{indir}" unless File.directory?(indir)
abort "Workflow not found: #{cfg.workflow_path}" unless File.exist?(cfg.workflow_path)
workflow = JSON.parse(File.read(cfg.workflow_path))
manifest = ProcessedManifest.new(indir)
enhancer = PhotoEnhancer.new(config: cfg, client: client, workflow: workflow,
indir: indir, manifest: manifest)
begin
enhancer.run(watch: options[:watch])
rescue RuntimeError => e
warn "ERROR: #{e.message}"; exit 1
rescue Interrupt
puts "\nStopped."
end
|