summaryrefslogtreecommitdiff
path: root/photo-enhance.rb
blob: 79c2e4cea2e3b4111647841350ddf9e4d16b6b77 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
#!/usr/bin/env ruby
# frozen_string_literal: true

# photo-enhance.rb — AI photo enhancer via ComfyUI on a Hyperstack GPU VM.
#
# Submits images from --indir to the ComfyUI REST API, downloads the AI-enhanced
# results and saves alongside the originals with an _e suffix.  Also downloads
# a per-photo JSON metadata file written by the WritePhotoMetadata ComfyUI node
# and converts it to a human-readable .md report alongside each enhanced photo.
#
# AI pipeline (ComfyUI, GPU):
#   1. Real-ESRGAN realesr-general-x4v3  — 4× upscale at full 4K input, AI denoise
#   2. CodeFormer fidelity=0.7           — neural face restoration
#   3. CLIP ViT-B/32                     — scene classification (portrait/landscape/…)
#   4. AdaptivePhotoGrade                — scene-tuned exposure/contrast/saturation/detail
#   5. SkyEnhance                        — HSV sky mask + graduated sky correction
#   6. Depth Anything V2 Small           — depth map → foreground sharp, background soft
#
# Usage:
#   ruby photo-enhance.rb --config hyperstack-vm-photo.toml \
#     --indir ~/Pictures [--watch] [--workflow workflows/photo-enhance.json]
#
# Requirements:
#   - ComfyUI VM: ruby hyperstack.rb --config hyperstack-vm-photo.toml create
#   - WireGuard tunnel active (wg1)

begin
  require 'bundler/setup'
rescue LoadError, Gem::GemNotFoundException, Gem::LoadError, Errno::ENOENT
  nil
end

require 'json'
require 'net/http'
require 'optparse'
require 'fileutils'
require 'digest'
require 'time'
require 'set'

begin
  require 'toml-rb'
rescue LoadError
  warn "Missing dependency: toml-rb. Run `bundle install` in #{__dir__} first."
  exit 2
end

# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------

class PhotoConfig
  attr_reader :host, :port, :workflow_path

  def initialize(config_path, workflow_path_override)
    raw       = TomlRB.load_file(File.expand_path(config_path))
    hostname  = raw.dig('vm', 'hostname') || 'hyperstack-photo'
    interface = raw.dig('local_client', 'interface_name') || 'wg1'
    @host     = "#{hostname}.#{interface}"
    @port     = Integer(raw.dig('comfyui', 'port') || 8188)
    @workflow_path = workflow_path_override ||
                     File.join(File.dirname(File.expand_path(config_path)), 'workflows', 'photo-enhance.json')
  end
end

# ---------------------------------------------------------------------------
# ComfyUI API client — upload, submit, poll, download.
# ---------------------------------------------------------------------------

class ComfyUIClient
  POLL_INTERVAL_SEC = 2
  POLL_TIMEOUT_SEC  = 300  # 5 minutes; ESRGAN is fast on GPU

  def initialize(host:, port:, out: $stdout)
    @host = host
    @port = port
    @out  = out
  end

  def upload_image(file_path)
    filename   = File.basename(file_path)
    image_data = File.binread(file_path)
    boundary   = "----RubyPhotoEnhance#{hex(8)}"
    body = [
      "--#{boundary}\r\n",
      "Content-Disposition: form-data; name=\"image\"; filename=\"#{filename}\"\r\n",
      "Content-Type: #{mime_type(file_path)}\r\n\r\n",
      image_data,
      "\r\n--#{boundary}\r\n",
      "Content-Disposition: form-data; name=\"overwrite\"\r\n\r\ntrue\r\n",
      "--#{boundary}--\r\n"
    ].join
    resp = post_raw('/upload/image', body, "multipart/form-data; boundary=#{boundary}")
    raise "Upload failed (#{resp.code}): #{resp.body}" unless resp.code == '200'
    JSON.parse(resp.body)['name'] || filename
  rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
    raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard active? (#{e.message})"
  end

  def submit_prompt(workflow)
    resp = post_json('/prompt', { 'prompt' => workflow })
    raise "Prompt failed (#{resp.code}): #{resp.body}" unless resp.code == '200'
    JSON.parse(resp.body)['prompt_id'] or raise "No prompt_id in: #{resp.body}"
  end

  def wait_for_output(prompt_id)
    deadline = Time.now + POLL_TIMEOUT_SEC
    loop do
      raise "Timed out after #{POLL_TIMEOUT_SEC}s for #{prompt_id}" if Time.now > deadline

      resp   = get("/history/#{prompt_id}")
      raise "History poll failed (#{resp.code})" unless resp.code == '200'

      result = JSON.parse(resp.body)[prompt_id]
      if result
        outputs = extract_filenames(result)
        return outputs unless outputs.empty?

        # ComfyUI cached the run (identical inputs) and wrote no new files — bail fast.
        status = result.dig('status', 'status_str')
        raise "ComfyUI cached execution returned no outputs for #{prompt_id}" \
          if result.dig('status', 'completed') && status == 'success'
      end

      sleep POLL_INTERVAL_SEC
    end
  end

  def download_output(filename, dest_path)
    resp = get("/view?filename=#{URI.encode_www_form_component(filename)}&type=output&subfolder=")
    raise "Download failed (#{resp.code}) for #{filename}" unless resp.code == '200'
    FileUtils.mkdir_p(File.dirname(dest_path))
    File.binwrite(dest_path, resp.body)
  end

  def check_connectivity!
    resp = get('/system_stats')
    raise "Health check failed (#{resp.code}): #{resp.body}" unless resp.code == '200'
  rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
    raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard active? (#{e.message})"
  end

  private

  def extract_filenames(result)
    Array(result.dig('outputs'))
      .flat_map { |_id, node| Array(node['images']) }
      .map { |img| img['filename'] }
      .compact.reject(&:empty?)
  end

  def get(path)
    Net::HTTP.get_response(URI("http://#{@host}:#{@port}#{path}"))
  end

  def post_json(path, payload)
    uri = URI("http://#{@host}:#{@port}#{path}")
    req = Net::HTTP::Post.new(uri)
    req['Content-Type'] = 'application/json'
    req.body = JSON.generate(payload)
    Net::HTTP.start(uri.host, uri.port) { |h| h.request(req) }
  end

  def post_raw(path, body, content_type)
    uri = URI("http://#{@host}:#{@port}#{path}")
    req = Net::HTTP::Post.new(uri)
    req['Content-Type'] = content_type
    req.body = body
    Net::HTTP.start(uri.host, uri.port, read_timeout: 120) { |h| h.request(req) }
  end

  def mime_type(path)
    case File.extname(path).downcase
    when '.jpg', '.jpeg' then 'image/jpeg'
    when '.png'          then 'image/png'
    when '.webp'         then 'image/webp'
    else 'application/octet-stream'
    end
  end

  def hex(n)
    Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, n * 2]
  end
end

# ---------------------------------------------------------------------------
# Manifest — avoids re-processing files across runs and in watch mode.
# ---------------------------------------------------------------------------

class ProcessedManifest
  FILE_NAME = '.photo-enhance-processed'

  def initialize(dir)
    @path    = File.join(dir, FILE_NAME)
    @entries = load_entries
  end

  def processed?(file_path)
    @entries.include?(digest(file_path))
  end

  def mark_done(file_path)
    key = digest(file_path)
    @entries << key
    File.open(@path, 'a') { |f| f.puts(key) }
  end

  private

  def load_entries
    return Set.new unless File.exist?(@path)
    File.readlines(@path, chomp: true).map(&:strip).reject(&:empty?).to_set
  end

  # Covers basename + size + mtime so a re-shot of the same filename re-processes.
  def digest(file_path)
    stat = File.stat(file_path)
    Digest::SHA256.hexdigest("#{File.basename(file_path)}:#{stat.size}:#{stat.mtime.to_i}")
  rescue Errno::ENOENT
    Digest::SHA256.hexdigest(File.basename(file_path))
  end
end

# ---------------------------------------------------------------------------
# Enhancer — orchestrates upload → AI → download → colour correct per image.
# ---------------------------------------------------------------------------

class PhotoEnhancer
  SUPPORTED_EXTENSIONS = %w[.jpg .jpeg .png .webp].freeze

  # No colour corrections — pure AI output from Real-ESRGAN is used as-is.
  # ImageMagick is only used to bake EXIF rotation and convert PNG→JPEG.
  COLOR_ARGS = [].freeze

  def initialize(config:, client:, workflow:, indir:, manifest:, out: $stdout)
    @config   = config
    @client   = client
    @workflow = workflow
    @indir    = indir
    @manifest = manifest
    @out      = out
  end

  def run(watch: false)
    @client.check_connectivity!
    @out.puts "ComfyUI ready at http://#{@config.host}:#{@config.port}"
    @out.puts "Enhancing photos in #{@indir}"
    @out.puts watch ? '(watch mode — Ctrl-C to stop)' : ''

    loop do
      find_pending.each { |path| enhance_one(path) }
      break unless watch
      sleep 5
    end
  end

  private

  def find_pending
    Dir.glob(File.join(@indir, '*'))
       .select { |f| File.file?(f) && SUPPORTED_EXTENSIONS.include?(File.extname(f).downcase) }
       .reject { |f| File.basename(f, '.*').end_with?('_e') }
       .reject { |f| File.basename(f).include?('.orient.') }
       .reject { |f| @manifest.processed?(f) }
       .sort
  end

  def enhance_one(src_path)
    ext       = File.extname(src_path).downcase
    basename  = File.basename(src_path, File.extname(src_path))
    dest_path = File.join(File.dirname(src_path), "#{basename}_e#{ext}")

    @out.puts "[#{Time.now.strftime('%H:%M:%S')}] #{File.basename(src_path)}"

    # Bake in EXIF rotation before uploading — ComfyUI strips EXIF metadata.
    upload_path   = auto_orient_tempfile(src_path)
    uploaded_name = @client.upload_image(upload_path)
    workflow      = inject_input(@workflow, uploaded_name)
    prompt_id     = @client.submit_prompt(workflow)
    @out.puts "  prompt #{prompt_id}"

    filenames = @client.wait_for_output(prompt_id)
    raise "No outputs returned for #{src_path}" if filenames.empty?

    # ComfyUI outputs PNG; download then convert to original format.
    tmp_png = "#{dest_path}.tmp.png"
    @client.download_output(filenames.first, tmp_png)
    save_with_corrections(tmp_png, dest_path, ext)
    File.delete(tmp_png) if File.exist?(tmp_png)
    File.delete(upload_path) if upload_path != src_path && File.exist?(upload_path)

    # Download the JSON metadata written by WritePhotoMetadata and render it
    # as a human-readable .md report alongside the enhanced photo.
    # ComfyUI appends _NNNNN_ counter: "enhanced_abc123__00001_.png" → "enhanced_abc123_"
    prefix = filenames.first.sub(/_\d+_\.png$/, '')
    meta_file = "#{prefix}meta.json"
    md_path   = File.join(File.dirname(dest_path),
                          "#{File.basename(dest_path, File.extname(dest_path))}.md")
    download_and_write_md(meta_file, src_path, dest_path, md_path)

    @manifest.mark_done(src_path)
    @out.puts "  -> #{dest_path} (#{kb(src_path)} KB -> #{kb(dest_path)} KB)"
  rescue StandardError => e
    @out.puts "  ERROR #{File.basename(src_path)}: #{e.message}"
  end

  # Run magick -auto-orient into a temp file so EXIF rotation is baked in.
  # Falls back to the original path if magick is unavailable.
  def auto_orient_tempfile(src_path)
    ext = File.extname(src_path)
    tmp = "#{src_path}.orient#{ext}"
    return tmp if system('magick', src_path, '-auto-orient', tmp) && File.exist?(tmp)

    @out.puts "  Warning: auto-orient failed, uploading original"
    src_path
  end

  # Convert the downloaded PNG to the target format (JPEG quality 92 for .jpg).
  # No colour processing — pure AI output from Real-ESRGAN is preserved as-is.
  def save_with_corrections(src_png, dest_path, ext)
    quality_args = ext.match?(/\.jpe?g/) ? ['-quality', '92'] : []
    system('magick', src_png, *COLOR_ARGS, *quality_args, dest_path)
  end

  # Download the WritePhotoMetadata JSON from ComfyUI output and render it
  # as a Markdown report saved alongside the enhanced photo.
  def download_and_write_md(meta_filename, src_path, dest_path, md_path)
    resp = @client.send(:get,
      "/view?filename=#{URI.encode_www_form_component(meta_filename)}&type=output&subfolder=")
    return unless resp.code == '200'

    meta    = JSON.parse(resp.body)
    profile = meta['enhancement_profile'] || {}
    sky     = meta['sky']                  || {}
    depth   = meta['depth_sharpen']        || {}
    models  = meta['models']               || {}
    scene   = meta['scene_type'] || 'unknown'
    ts      = meta['generated_at'] || Time.now.utc.iso8601

    md = <<~MD
      # #{File.basename(dest_path)} — Enhancement Report

      **Source:** #{File.basename(src_path)} (#{kb(src_path)} KB)
      **Enhanced:** #{File.basename(dest_path)} (#{kb(dest_path)} KB)
      **Processed:** #{ts}

      ## AI Pipeline

      | Step | Model / Node | Device | What it does |
      |------|-------------|--------|--------------|
      | 1 | `#{models['upscaler']}` | GPU | 4× upscale at full 4K input → 16K → back to 4K |
      | 2 | `#{models['face_restore']}` | GPU | Face detection + neural restoration |
      | 3 | `#{models['scene_detect']}` | GPU | Zero-shot scene classification |
      | 4 | Adaptive Photo Grade | CPU | Scene-tuned exposure / contrast / saturation / detail |
      | 5 | Sky Enhance | CPU | HSV sky mask + graduated sky correction |
      | 6 | `#{models['depth']}` | GPU | Depth map → foreground sharp, background soft |

      ## Scene Detection

      | | |
      |-|-|
      | **Detected scene** | #{scene} |

      ## Colour Grading Profile (#{scene})

      | Setting | Value |
      |---------|-------|
      | Exposure | +#{profile['exposure_stops']} stops |
      | Contrast | #{profile['contrast_factor']}× |
      | Saturation | #{profile['saturation_mult']}× |
      | Detail / Clarity | #{profile['detail_mult']}× |
      | Denoise strength | #{profile['denoise_strength']} |

      ## Sky Enhancement

      | Setting | Value |
      |---------|-------|
      | Sky coverage | #{sky['coverage_pct']}% of image |
      | Sky exposure | +#{sky['sky_exposure']} stops |
      | Sky saturation | #{sky['sky_saturation']}× |

      ## Depth-Guided Sharpening

      | Setting | Value |
      |---------|-------|
      | Foreground sharpening | #{depth['foreground_sharpen']}× |
      | Background blur | #{depth['background_blur']} |
    MD

    File.write(md_path, md)
  rescue StandardError => e
    @out.puts "  Warning: could not write metadata report: #{e.message}"
  end

  # Inject the upload filename and a unique prefix into LoadImage, SaveImage,
  # and WritePhotoMetadata to bust ComfyUI's cache and link metadata to image.
  def inject_input(workflow, filename)
    wf     = JSON.parse(JSON.generate(workflow))  # deep dup
    prefix = "enhanced_#{Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, 8]}_"
    wf.each_value do |node|
      next unless node.is_a?(Hash)
      case node['class_type']
      when 'LoadImage'          then node['inputs']['image']           = filename
      when 'SaveImage'          then node['inputs']['filename_prefix'] = prefix
      when 'WritePhotoMetadata'
        node['inputs']['filename_prefix'] = prefix
        node['inputs']['source_filename'] = filename
      end
    end
    wf
  end

  def kb(path)
    (File.size(path) / 1024.0).round
  end
end

# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

options = {
  config:   File.join(__dir__, 'hyperstack-vm-photo.toml'),
  indir:    nil,
  watch:    false,
  workflow: nil,
  test:     false
}

OptionParser.new do |o|
  o.banner = 'Usage: ruby photo-enhance.rb [options]'
  o.on('--config PATH',   'TOML config (default: hyperstack-vm-photo.toml)') { |v| options[:config]   = v }
  o.on('--indir PATH',    'Directory of photos to enhance')                   { |v| options[:indir]    = v }
  o.on('--workflow PATH', 'ComfyUI workflow JSON override')                   { |v| options[:workflow] = v }
  o.on('--watch',         'Keep running, process new images as they arrive')  { options[:watch] = true }
  o.on('--test',          'Check ComfyUI connectivity only, then exit')       { options[:test]  = true }
  o.on('-h', '--help',    'Show this help') { puts o; exit }
end.parse!

abort "Config not found: #{options[:config]}" unless File.exist?(options[:config])

cfg    = PhotoConfig.new(options[:config], options[:workflow])
client = ComfyUIClient.new(host: cfg.host, port: cfg.port)

if options[:test]
  begin
    client.check_connectivity!
    puts "ComfyUI reachable at http://#{cfg.host}:#{cfg.port} — OK"
    exit 0
  rescue RuntimeError => e
    warn "ERROR: #{e.message}"; exit 1
  end
end

abort '--indir is required' unless options[:indir]
indir = File.expand_path(options[:indir])
abort "Directory not found: #{indir}" unless File.directory?(indir)
abort "Workflow not found: #{cfg.workflow_path}" unless File.exist?(cfg.workflow_path)

workflow = JSON.parse(File.read(cfg.workflow_path))
manifest = ProcessedManifest.new(indir)
enhancer = PhotoEnhancer.new(config: cfg, client: client, workflow: workflow,
                              indir: indir, manifest: manifest)
begin
  enhancer.run(watch: options[:watch])
rescue RuntimeError => e
  warn "ERROR: #{e.message}"; exit 1
rescue Interrupt
  puts "\nStopped."
end