diff options
| -rw-r--r-- | .hyperstack-vm-photo-state.json | 93 | ||||
| -rw-r--r-- | Gemfile | 1 | ||||
| -rw-r--r-- | photo-compare.rb | 186 | ||||
| -rwxr-xr-x | photo-enhance.rb | 375 | ||||
| -rw-r--r-- | smart_photo_node.py | 536 | ||||
| -rw-r--r-- | workflows/photo-enhance.json | 109 |
6 files changed, 1079 insertions, 221 deletions
diff --git a/.hyperstack-vm-photo-state.json b/.hyperstack-vm-photo-state.json new file mode 100644 index 0000000..8bf47b2 --- /dev/null +++ b/.hyperstack-vm-photo-state.json @@ -0,0 +1,93 @@ +{ + "vm_id": 698370, + "vm_name": "hyperstack-photo-20260324164547", + "environment_name": "snonux-ollama", + "region": "CANADA-1", + "flavor_name": "n3-L40x1", + "image_name": "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker", + "key_name": "earth", + "public_ip": "69.19.136.63", + "created_at": "2026-03-24T16:45:48Z", + "services": { + "vllm_enabled": false, + "ollama_enabled": false, + "comfyui_enabled": true + }, + "security_rules": [ + { + "direction": "ingress", + "ethertype": "IPv4", + "protocol": "tcp", + "port_range_min": 22, + "port_range_max": 22, + "remote_ip_prefix": "79.100.218.77/32" + }, + { + "direction": "ingress", + "ethertype": "IPv4", + "protocol": "udp", + "port_range_min": 56710, + "port_range_max": 56710, + "remote_ip_prefix": "79.100.218.77/32" + }, + { + "direction": "ingress", + "ethertype": "IPv4", + "protocol": "tcp", + "port_range_min": 8188, + "port_range_max": 8188, + "remote_ip_prefix": "192.168.3.0/24" + }, + { + "direction": "ingress", + "ethertype": "IPv4", + "protocol": "tcp", + "port_range_min": 22, + "port_range_max": 22, + "remote_ip_prefix": "79.100.218.77/32" + }, + { + "direction": "ingress", + "ethertype": "IPv4", + "protocol": "udp", + "port_range_min": 56710, + "port_range_max": 56710, + "remote_ip_prefix": "79.100.218.77/32" + }, + { + "direction": "egress", + "ethertype": "IPv4", + "protocol": "any", + "port_range_min": 1, + "port_range_max": 65535, + "remote_ip_prefix": "0.0.0.0/0" + }, + { + "direction": "ingress", + "ethertype": "IPv4", + "protocol": "tcp", + "port_range_min": 8188, + "port_range_max": 8188, + "remote_ip_prefix": "192.168.3.0/24" + }, + { + "direction": "egress", + "ethertype": "IPv6", + "protocol": "any", + "port_range_min": 1, + "port_range_max": 65535, + "remote_ip_prefix": "0.0.0.0/0" + } + ], + "bootstrapped_at": "2026-03-24T16:48:36Z", + "wireguard_setup_at": "2026-03-24T16:48:54Z", + "comfyui_setup_at": "2026-03-24T16:52:16Z", + "comfyui_container_name": "comfyui_photo", + "comfyui_models": [ + "RealESRGAN_x4plus", + "SUPIR-v0Q" + ], + "status": "ACTIVE", + "vm_state": "active", + "provisioned_at": "2026-03-24T16:52:30Z" +}
\ No newline at end of file @@ -1,3 +1,4 @@ source "https://rubygems.org" gem "toml-rb", "~> 2.2" +gem "gtk4" diff --git a/photo-compare.rb b/photo-compare.rb new file mode 100644 index 0000000..4f5ec4c --- /dev/null +++ b/photo-compare.rb @@ -0,0 +1,186 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# photo-compare.rb — Side-by-side before/after photo comparison and selection tool. +# +# Shows each original + enhanced pair side by side, filling the window. +# Press O to move the original to --outdir, E to move the enhanced version, +# Space/S to skip. Rescans after each action so newly finished photos appear. +# +# Usage: +# ruby photo-compare.rb --indir ~/Downloads/fuji --outdir ~/Downloads/fuji/selected +# +# Keyboard shortcuts: +# O — move original to outdir +# E — move enhanced to outdir +# Space/S — skip (leave both, advance to next) +# Q/Escape — quit + +require 'gtk4' +require 'optparse' +require 'fileutils' + +SUPPORTED_EXTENSIONS = %w[.jpg .jpeg .png .webp].freeze + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def find_pairs(indir) + Dir.glob(File.join(indir, '*')) + .select { |f| File.file?(f) && SUPPORTED_EXTENSIONS.include?(File.extname(f).downcase) } + .reject { |f| File.basename(f, '.*').end_with?('_e') } + .reject { |f| File.basename(f).include?('.orient.') } + .filter_map do |orig| + ext = File.extname(orig).downcase # enhanced files always have lowercase ext + base = File.basename(orig, File.extname(orig)) + enh = File.join(File.dirname(orig), "#{base}_e#{ext}") + [orig, enh] if File.exist?(enh) + end + .sort +end + +def kb(path) + (File.size(path) / 1024.0).round +end + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +options = { indir: nil, outdir: nil } +OptionParser.new do |o| + o.banner = 'Usage: ruby photo-compare.rb --indir DIR --outdir DIR' + o.on('--indir PATH', 'Directory with original + _e photo pairs') { |v| options[:indir] = v } + o.on('--outdir PATH', 'Directory to move selected photos into') { |v| options[:outdir] = v } + o.on('-h', '--help', 'Show this help') { puts o; exit } +end.parse! + +abort '--indir is required' unless options[:indir] +abort '--outdir is required' unless options[:outdir] + +indir = File.expand_path(options[:indir]) +outdir = File.expand_path(options[:outdir]) +FileUtils.mkdir_p(outdir) + +state = { pairs: find_pairs(indir), index: 0, indir: indir, outdir: outdir } +abort "No before/after pairs found in #{indir}" if state[:pairs].empty? + +# --------------------------------------------------------------------------- +# GTK4 UI +# --------------------------------------------------------------------------- + +app = Gtk::Application.new('org.hypr.photo-compare', :default_flags) + +app.signal_connect('activate') do |a| + win = Gtk::ApplicationWindow.new(a) + win.title = 'Photo Compare' + win.maximize # fill the screen + + root = Gtk::Box.new(:vertical, 4) + root.margin_top = root.margin_bottom = root.margin_start = root.margin_end = 6 + win.child = root + + # Top: progress info + progress_lbl = Gtk::Label.new + progress_lbl.xalign = 0 + root.append(progress_lbl) + + # Middle: two pictures side by side — Gtk::Picture scales to fill its container + img_row = Gtk::Box.new(:horizontal, 8) + img_row.vexpand = true + root.append(img_row) + + left_frame = Gtk::Box.new(:vertical, 2) + right_frame = Gtk::Box.new(:vertical, 2) + left_frame.hexpand = right_frame.hexpand = true + left_frame.vexpand = right_frame.vexpand = true + + # Gtk::Picture is GTK4's scaling image widget; content_fit: :contain keeps aspect ratio + left_pic = Gtk::Picture.new + right_pic = Gtk::Picture.new + left_pic.content_fit = :contain + right_pic.content_fit = :contain + left_pic.hexpand = left_pic.vexpand = true + right_pic.hexpand = right_pic.vexpand = true + + left_lbl = Gtk::Label.new + right_lbl = Gtk::Label.new + + left_frame.append(left_pic) + left_frame.append(left_lbl) + right_frame.append(right_pic) + right_frame.append(right_lbl) + img_row.append(left_frame) + img_row.append(right_frame) + + # Bottom: action buttons + btn_row = Gtk::Box.new(:horizontal, 16) + btn_row.halign = :center + orig_btn = Gtk::Button.new(label: '← Original [O]') + skip_btn = Gtk::Button.new(label: 'Skip [Space]') + enh_btn = Gtk::Button.new(label: 'Enhanced → [E]') + btn_row.append(orig_btn) + btn_row.append(skip_btn) + btn_row.append(enh_btn) + root.append(btn_row) + + # ----------------------------------------------------------------------- + # Refresh display for current pair + # ----------------------------------------------------------------------- + refresh = lambda do + orig, enh = state[:pairs][state[:index]] + progress_lbl.label = "#{state[:index] + 1} / #{state[:pairs].length} — #{File.basename(orig)}" + left_pic.set_filename(orig) + right_pic.set_filename(enh) + left_lbl.label = "Original (#{kb(orig)} KB)" + right_lbl.label = "Enhanced (#{kb(enh)} KB)" + end + + # ----------------------------------------------------------------------- + # After moving (or skipping), rescan and show next pair. + # Moving removes the pair from the list, so index stays put and naturally + # points at the next pair. Skip increments the index explicitly. + # ----------------------------------------------------------------------- + advance = lambda do |pick| + unless pick.nil? + FileUtils.mv(pick, File.join(state[:outdir], File.basename(pick))) + else + state[:index] += 1 + end + + state[:pairs] = find_pairs(state[:indir]) + + if state[:index] >= state[:pairs].length + progress_lbl.label = 'All pairs reviewed — you can close the window.' + left_pic.set_filename(nil) + right_pic.set_filename(nil) + left_lbl.label = right_lbl.label = '' + [orig_btn, skip_btn, enh_btn].each { |b| b.sensitive = false } + else + refresh.call + end + end + + orig_btn.signal_connect('clicked') { advance.call(state[:pairs][state[:index]][0]) } + enh_btn.signal_connect('clicked') { advance.call(state[:pairs][state[:index]][1]) } + skip_btn.signal_connect('clicked') { advance.call(nil) } + + key_ctrl = Gtk::EventControllerKey.new + key_ctrl.signal_connect('key-pressed') do |_ctrl, keyval, _code, _mod| + case keyval + when Gdk::Keyval::KEY_o, Gdk::Keyval::KEY_O then orig_btn.emit('clicked') + when Gdk::Keyval::KEY_e, Gdk::Keyval::KEY_E then enh_btn.emit('clicked') + when Gdk::Keyval::KEY_s, Gdk::Keyval::KEY_S, + Gdk::Keyval::KEY_space then skip_btn.emit('clicked') + when Gdk::Keyval::KEY_q, Gdk::Keyval::KEY_Escape then a.quit + end + false + end + win.add_controller(key_ctrl) + + refresh.call + win.show +end + +exit app.run([]) diff --git a/photo-enhance.rb b/photo-enhance.rb index 39f3942..79c2e4c 100755 --- a/photo-enhance.rb +++ b/photo-enhance.rb @@ -1,19 +1,28 @@ #!/usr/bin/env ruby # frozen_string_literal: true -# photo-enhance.rb — Photolemur-style automatic photo enhancer via ComfyUI. +# photo-enhance.rb — AI photo enhancer via ComfyUI on a Hyperstack GPU VM. # -# Submits images from --indir to the ComfyUI REST API running on a Hyperstack VM, -# downloads the enhanced results to --outdir, and optionally watches for new files. +# Submits images from --indir to the ComfyUI REST API, downloads the AI-enhanced +# results and saves alongside the originals with an _e suffix. Also downloads +# a per-photo JSON metadata file written by the WritePhotoMetadata ComfyUI node +# and converts it to a human-readable .md report alongside each enhanced photo. +# +# AI pipeline (ComfyUI, GPU): +# 1. Real-ESRGAN realesr-general-x4v3 — 4× upscale at full 4K input, AI denoise +# 2. CodeFormer fidelity=0.7 — neural face restoration +# 3. CLIP ViT-B/32 — scene classification (portrait/landscape/…) +# 4. AdaptivePhotoGrade — scene-tuned exposure/contrast/saturation/detail +# 5. SkyEnhance — HSV sky mask + graduated sky correction +# 6. Depth Anything V2 Small — depth map → foreground sharp, background soft # # Usage: # ruby photo-enhance.rb --config hyperstack-vm-photo.toml \ -# --indir ~/Pictures --outdir ~/Pictures/enhanced [--watch] [--workflow workflows/photo-enhance.json] +# --indir ~/Pictures [--watch] [--workflow workflows/photo-enhance.json] # # Requirements: -# - ComfyUI VM provisioned with: ruby hyperstack.rb --config hyperstack-vm-photo.toml create -# - WireGuard tunnel active (wg1): verified via curl http://hyperstack-photo.wg1:8188/system_stats -# - Ruby stdlib only (no extra gems needed). +# - ComfyUI VM: ruby hyperstack.rb --config hyperstack-vm-photo.toml create +# - WireGuard tunnel active (wg1) begin require 'bundler/setup' @@ -27,6 +36,7 @@ require 'optparse' require 'fileutils' require 'digest' require 'time' +require 'set' begin require 'toml-rb' @@ -36,18 +46,18 @@ rescue LoadError end # --------------------------------------------------------------------------- -# Config loading — reads only the fields photo-enhance.rb needs from the TOML. +# Config # --------------------------------------------------------------------------- class PhotoConfig attr_reader :host, :port, :workflow_path def initialize(config_path, workflow_path_override) - raw = TomlRB.load_file(File.expand_path(config_path)) - hostname = raw.dig('vm', 'hostname') || 'hyperstack-photo' + raw = TomlRB.load_file(File.expand_path(config_path)) + hostname = raw.dig('vm', 'hostname') || 'hyperstack-photo' interface = raw.dig('local_client', 'interface_name') || 'wg1' - @host = "#{hostname}.#{interface}" - @port = Integer(raw.dig('comfyui', 'port') || 8188) + @host = "#{hostname}.#{interface}" + @port = Integer(raw.dig('comfyui', 'port') || 8188) @workflow_path = workflow_path_override || File.join(File.dirname(File.expand_path(config_path)), 'workflows', 'photo-enhance.json') end @@ -59,7 +69,7 @@ end class ComfyUIClient POLL_INTERVAL_SEC = 2 - POLL_TIMEOUT_SEC = 600 # 10 minutes per image (SUPIR can be slow on first load) + POLL_TIMEOUT_SEC = 300 # 5 minutes; ESRGAN is fast on GPU def initialize(host:, port:, out: $stdout) @host = host @@ -67,98 +77,80 @@ class ComfyUIClient @out = out end - # Upload a local image file; returns the filename ComfyUI assigned it. def upload_image(file_path) - filename = File.basename(file_path) + filename = File.basename(file_path) image_data = File.binread(file_path) - boundary = "----RubyPhotoEnhance#{SecureRandom_hex(8)}" - + boundary = "----RubyPhotoEnhance#{hex(8)}" body = [ "--#{boundary}\r\n", "Content-Disposition: form-data; name=\"image\"; filename=\"#{filename}\"\r\n", - "Content-Type: #{mime_type_for(file_path)}\r\n\r\n", + "Content-Type: #{mime_type(file_path)}\r\n\r\n", image_data, "\r\n--#{boundary}\r\n", "Content-Disposition: form-data; name=\"overwrite\"\r\n\r\ntrue\r\n", "--#{boundary}--\r\n" ].join - resp = post_raw('/upload/image', body, "multipart/form-data; boundary=#{boundary}") - raise "Upload failed (HTTP #{resp.code}): #{resp.body}" unless resp.code == '200' - + raise "Upload failed (#{resp.code}): #{resp.body}" unless resp.code == '200' JSON.parse(resp.body)['name'] || filename rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e - raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard (wg1) active? (#{e.message})" + raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard active? (#{e.message})" end - # Submit a workflow; returns the prompt_id string. def submit_prompt(workflow) resp = post_json('/prompt', { 'prompt' => workflow }) - raise "Prompt submission failed (HTTP #{resp.code}): #{resp.body}" unless resp.code == '200' - - JSON.parse(resp.body)['prompt_id'] or raise "No prompt_id in response: #{resp.body}" + raise "Prompt failed (#{resp.code}): #{resp.body}" unless resp.code == '200' + JSON.parse(resp.body)['prompt_id'] or raise "No prompt_id in: #{resp.body}" end - # Poll until the prompt finishes; returns the list of output filenames. def wait_for_output(prompt_id) deadline = Time.now + POLL_TIMEOUT_SEC loop do - raise "Timed out after #{POLL_TIMEOUT_SEC}s waiting for prompt #{prompt_id}" if Time.now > deadline + raise "Timed out after #{POLL_TIMEOUT_SEC}s for #{prompt_id}" if Time.now > deadline - resp = get("/history/#{prompt_id}") - raise "History poll failed (HTTP #{resp.code})" unless resp.code == '200' + resp = get("/history/#{prompt_id}") + raise "History poll failed (#{resp.code})" unless resp.code == '200' - history = JSON.parse(resp.body) - result = history[prompt_id] + result = JSON.parse(resp.body)[prompt_id] if result - outputs = extract_output_filenames(result) + outputs = extract_filenames(result) return outputs unless outputs.empty? - # If ComfyUI marks the run complete but outputs are empty, it used a fully - # cached execution (execution_cached for all nodes) and wrote no new files. - # Raise immediately rather than spinning until timeout. + # ComfyUI cached the run (identical inputs) and wrote no new files — bail fast. status = result.dig('status', 'status_str') - completed = result.dig('status', 'completed') - raise "ComfyUI returned empty outputs (cached execution?) for #{prompt_id}" \ - if completed && status == 'success' - - # ComfyUI may record the prompt before writing output nodes; keep polling. + raise "ComfyUI cached execution returned no outputs for #{prompt_id}" \ + if result.dig('status', 'completed') && status == 'success' end sleep POLL_INTERVAL_SEC end end - # Download an output image; saves to dest_path. def download_output(filename, dest_path) resp = get("/view?filename=#{URI.encode_www_form_component(filename)}&type=output&subfolder=") - raise "Download failed (HTTP #{resp.code}) for #{filename}" unless resp.code == '200' - + raise "Download failed (#{resp.code}) for #{filename}" unless resp.code == '200' FileUtils.mkdir_p(File.dirname(dest_path)) File.binwrite(dest_path, resp.body) end - # Quick connectivity check; raises on failure. def check_connectivity! resp = get('/system_stats') - raise "ComfyUI health check failed (HTTP #{resp.code}): #{resp.body}" unless resp.code == '200' + raise "Health check failed (#{resp.code}): #{resp.body}" unless resp.code == '200' rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e - raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard (wg1) active? (#{e.message})" + raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard active? (#{e.message})" end private - def extract_output_filenames(result) + def extract_filenames(result) Array(result.dig('outputs')) - .flat_map { |_node_id, node_out| Array(node_out['images']) } + .flat_map { |_id, node| Array(node['images']) } .map { |img| img['filename'] } - .compact - .reject(&:empty?) + .compact.reject(&:empty?) end def get(path) - uri = URI("http://#{@host}:#{@port}#{path}") - Net::HTTP.get_response(uri) + Net::HTTP.get_response(URI("http://#{@host}:#{@port}#{path}")) end def post_json(path, payload) @@ -166,7 +158,7 @@ class ComfyUIClient req = Net::HTTP::Post.new(uri) req['Content-Type'] = 'application/json' req.body = JSON.generate(payload) - Net::HTTP.start(uri.host, uri.port) { |http| http.request(req) } + Net::HTTP.start(uri.host, uri.port) { |h| h.request(req) } end def post_raw(path, body, content_type) @@ -174,11 +166,11 @@ class ComfyUIClient req = Net::HTTP::Post.new(uri) req['Content-Type'] = content_type req.body = body - Net::HTTP.start(uri.host, uri.port, read_timeout: 120) { |http| http.request(req) } + Net::HTTP.start(uri.host, uri.port, read_timeout: 120) { |h| h.request(req) } end - def mime_type_for(file_path) - case File.extname(file_path).downcase + def mime_type(path) + case File.extname(path).downcase when '.jpg', '.jpeg' then 'image/jpeg' when '.png' then 'image/png' when '.webp' then 'image/webp' @@ -186,27 +178,25 @@ class ComfyUIClient end end - # Minimal hex token without SecureRandom (pure stdlib). - def SecureRandom_hex(n) + def hex(n) Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, n * 2] end end # --------------------------------------------------------------------------- -# Manifest — tracks which files have been processed to avoid re-enhancing. +# Manifest — avoids re-processing files across runs and in watch mode. # --------------------------------------------------------------------------- class ProcessedManifest - MANIFEST_FILE = '.photo-enhance-processed' + FILE_NAME = '.photo-enhance-processed' - def initialize(outdir) - @path = File.join(outdir, MANIFEST_FILE) + def initialize(dir) + @path = File.join(dir, FILE_NAME) @entries = load_entries end def processed?(file_path) - key = digest(file_path) - @entries.include?(key) + @entries.include?(digest(file_path)) end def mark_done(file_path) @@ -218,12 +208,11 @@ class ProcessedManifest private def load_entries - return [] unless File.exist?(@path) - + return Set.new unless File.exist?(@path) File.readlines(@path, chomp: true).map(&:strip).reject(&:empty?).to_set end - # Digest includes mtime so a re-shot of the same filename is re-processed. + # Covers basename + size + mtime so a re-shot of the same filename re-processes. def digest(file_path) stat = File.stat(file_path) Digest::SHA256.hexdigest("#{File.basename(file_path)}:#{stat.size}:#{stat.mtime.to_i}") @@ -233,12 +222,16 @@ class ProcessedManifest end # --------------------------------------------------------------------------- -# Enhancer — orchestrates upload → prompt → poll → download for one image. +# Enhancer — orchestrates upload → AI → download → colour correct per image. # --------------------------------------------------------------------------- class PhotoEnhancer SUPPORTED_EXTENSIONS = %w[.jpg .jpeg .png .webp].freeze + # No colour corrections — pure AI output from Real-ESRGAN is used as-is. + # ImageMagick is only used to bake EXIF rotation and convert PNG→JPEG. + COLOR_ARGS = [].freeze + def initialize(config:, client:, workflow:, indir:, manifest:, out: $stdout) @config = config @client = client @@ -248,120 +241,177 @@ class PhotoEnhancer @out = out end - def enhance_directory(indir, watch: false) + def run(watch: false) @client.check_connectivity! @out.puts "ComfyUI ready at http://#{@config.host}:#{@config.port}" - @out.puts "Enhancing photos in #{indir} (output: <name>_e.<ext> alongside originals)" + @out.puts "Enhancing photos in #{@indir}" @out.puts watch ? '(watch mode — Ctrl-C to stop)' : '' loop do - pending = find_pending_images(indir) - pending.each { |path| enhance_one(path) } + find_pending.each { |path| enhance_one(path) } break unless watch - sleep 5 end end private - def find_pending_images(indir) - Dir.glob(File.join(indir, '*')) + def find_pending + Dir.glob(File.join(@indir, '*')) .select { |f| File.file?(f) && SUPPORTED_EXTENSIONS.include?(File.extname(f).downcase) } .reject { |f| File.basename(f, '.*').end_with?('_e') } + .reject { |f| File.basename(f).include?('.orient.') } .reject { |f| @manifest.processed?(f) } .sort end def enhance_one(src_path) - basename = File.basename(src_path, '.*') - ext = File.extname(src_path).downcase - # Output lives in the same directory as the original, with an _enhanced suffix - # before the extension (e.g. photo.jpg -> photo_enhanced.jpg). - dest_path = File.join(File.dirname(src_path), "#{basename}_e#{ext}") + ext = File.extname(src_path).downcase + basename = File.basename(src_path, File.extname(src_path)) + dest_path = File.join(File.dirname(src_path), "#{basename}_e#{ext}") - @out.puts "[#{Time.now.strftime('%H:%M:%S')}] Enhancing #{File.basename(src_path)}..." + @out.puts "[#{Time.now.strftime('%H:%M:%S')}] #{File.basename(src_path)}" - # Auto-rotate based on EXIF orientation before uploading. ComfyUI strips EXIF, - # so we bake the rotation into a temp file; this ensures output is correctly oriented. - upload_path = auto_orient_tempfile(src_path) + # Bake in EXIF rotation before uploading — ComfyUI strips EXIF metadata. + upload_path = auto_orient_tempfile(src_path) uploaded_name = @client.upload_image(upload_path) - workflow = inject_input_image(@workflow, uploaded_name) + workflow = inject_input(@workflow, uploaded_name) prompt_id = @client.submit_prompt(workflow) - @out.puts " Submitted prompt #{prompt_id}, waiting for ComfyUI..." + @out.puts " prompt #{prompt_id}" filenames = @client.wait_for_output(prompt_id) - raise "No output images returned for #{src_path}" if filenames.empty? - - # ComfyUI SaveImage always outputs PNG. Download to a temp file then convert - # to the original format (JPEG for .jpg/.jpeg) so file sizes stay comparable. - tmp_path = "#{dest_path}.tmp.png" - @client.download_output(filenames.first, tmp_path) - convert_to_original_format(tmp_path, dest_path, ext) - File.delete(tmp_path) if File.exist?(tmp_path) + raise "No outputs returned for #{src_path}" if filenames.empty? + + # ComfyUI outputs PNG; download then convert to original format. + tmp_png = "#{dest_path}.tmp.png" + @client.download_output(filenames.first, tmp_png) + save_with_corrections(tmp_png, dest_path, ext) + File.delete(tmp_png) if File.exist?(tmp_png) File.delete(upload_path) if upload_path != src_path && File.exist?(upload_path) + + # Download the JSON metadata written by WritePhotoMetadata and render it + # as a human-readable .md report alongside the enhanced photo. + # ComfyUI appends _NNNNN_ counter: "enhanced_abc123__00001_.png" → "enhanced_abc123_" + prefix = filenames.first.sub(/_\d+_\.png$/, '') + meta_file = "#{prefix}meta.json" + md_path = File.join(File.dirname(dest_path), + "#{File.basename(dest_path, File.extname(dest_path))}.md") + download_and_write_md(meta_file, src_path, dest_path, md_path) + @manifest.mark_done(src_path) - orig_size = File.size(src_path) - enhanced_size = File.size(dest_path) - @out.puts " Saved -> #{dest_path} (#{kb(orig_size)} KB -> #{kb(enhanced_size)} KB)" + @out.puts " -> #{dest_path} (#{kb(src_path)} KB -> #{kb(dest_path)} KB)" rescue StandardError => e - @out.puts " ERROR enhancing #{File.basename(src_path)}: #{e.message}" + @out.puts " ERROR #{File.basename(src_path)}: #{e.message}" end - # Apply EXIF auto-orientation to a copy of src_path and return the copy's path. - # If magick fails (e.g. not installed or no EXIF), returns src_path unchanged so - # the caller always has a valid upload path. + # Run magick -auto-orient into a temp file so EXIF rotation is baked in. + # Falls back to the original path if magick is unavailable. def auto_orient_tempfile(src_path) - ext = File.extname(src_path) - tmp = "#{src_path}.orient#{ext}" - success = system('magick', src_path, '-auto-orient', tmp) - return tmp if success && File.exist?(tmp) + ext = File.extname(src_path) + tmp = "#{src_path}.orient#{ext}" + return tmp if system('magick', src_path, '-auto-orient', tmp) && File.exist?(tmp) - @out.puts " Warning: auto-orient failed for #{File.basename(src_path)}, uploading original" + @out.puts " Warning: auto-orient failed, uploading original" src_path end - # Convert the PNG downloaded from ComfyUI into the desired output format and - # apply local colour corrections via ImageMagick: - # -sigmoidal-contrast 3,50% — gentle S-curve (lifts shadows, adds punch) - # -modulate 100,120,100 — +20% saturation (vibrance-style boost) - # -unsharp 0x1.5+0.7+0.02 — mild clarity / micro-contrast sharpening - # PNG output gets the same corrections but stays lossless. - def convert_to_original_format(src_png, dest_path, original_ext) - color_args = [ - '-sigmoidal-contrast', '3,50%', - '-modulate', '100,120,100', - '-unsharp', '0x1.5+0.7+0.02' - ] - case original_ext - when '.jpg', '.jpeg' - system('magick', src_png, *color_args, '-quality', '92', dest_path) - else - system('magick', src_png, *color_args, dest_path) - end + # Convert the downloaded PNG to the target format (JPEG quality 92 for .jpg). + # No colour processing — pure AI output from Real-ESRGAN is preserved as-is. + def save_with_corrections(src_png, dest_path, ext) + quality_args = ext.match?(/\.jpe?g/) ? ['-quality', '92'] : [] + system('magick', src_png, *COLOR_ARGS, *quality_args, dest_path) end - def kb(bytes) - (bytes / 1024.0).round + # Download the WritePhotoMetadata JSON from ComfyUI output and render it + # as a Markdown report saved alongside the enhanced photo. + def download_and_write_md(meta_filename, src_path, dest_path, md_path) + resp = @client.send(:get, + "/view?filename=#{URI.encode_www_form_component(meta_filename)}&type=output&subfolder=") + return unless resp.code == '200' + + meta = JSON.parse(resp.body) + profile = meta['enhancement_profile'] || {} + sky = meta['sky'] || {} + depth = meta['depth_sharpen'] || {} + models = meta['models'] || {} + scene = meta['scene_type'] || 'unknown' + ts = meta['generated_at'] || Time.now.utc.iso8601 + + md = <<~MD + # #{File.basename(dest_path)} — Enhancement Report + + **Source:** #{File.basename(src_path)} (#{kb(src_path)} KB) + **Enhanced:** #{File.basename(dest_path)} (#{kb(dest_path)} KB) + **Processed:** #{ts} + + ## AI Pipeline + + | Step | Model / Node | Device | What it does | + |------|-------------|--------|--------------| + | 1 | `#{models['upscaler']}` | GPU | 4× upscale at full 4K input → 16K → back to 4K | + | 2 | `#{models['face_restore']}` | GPU | Face detection + neural restoration | + | 3 | `#{models['scene_detect']}` | GPU | Zero-shot scene classification | + | 4 | Adaptive Photo Grade | CPU | Scene-tuned exposure / contrast / saturation / detail | + | 5 | Sky Enhance | CPU | HSV sky mask + graduated sky correction | + | 6 | `#{models['depth']}` | GPU | Depth map → foreground sharp, background soft | + + ## Scene Detection + + | | | + |-|-| + | **Detected scene** | #{scene} | + + ## Colour Grading Profile (#{scene}) + + | Setting | Value | + |---------|-------| + | Exposure | +#{profile['exposure_stops']} stops | + | Contrast | #{profile['contrast_factor']}× | + | Saturation | #{profile['saturation_mult']}× | + | Detail / Clarity | #{profile['detail_mult']}× | + | Denoise strength | #{profile['denoise_strength']} | + + ## Sky Enhancement + + | Setting | Value | + |---------|-------| + | Sky coverage | #{sky['coverage_pct']}% of image | + | Sky exposure | +#{sky['sky_exposure']} stops | + | Sky saturation | #{sky['sky_saturation']}× | + + ## Depth-Guided Sharpening + + | Setting | Value | + |---------|-------| + | Foreground sharpening | #{depth['foreground_sharpen']}× | + | Background blur | #{depth['background_blur']} | + MD + + File.write(md_path, md) + rescue StandardError => e + @out.puts " Warning: could not write metadata report: #{e.message}" end - # Inject the input filename and a unique SaveImage prefix into the workflow. - # The unique prefix prevents ComfyUI from returning a fully-cached execution - # (outputs: {}) instead of actually running the pipeline and writing output files. - def inject_input_image(workflow, filename) - modified = JSON.parse(JSON.generate(workflow)) # deep dup - unique_prefix = "enhanced_#{Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, 8]}_" - modified.each_value do |node| + # Inject the upload filename and a unique prefix into LoadImage, SaveImage, + # and WritePhotoMetadata to bust ComfyUI's cache and link metadata to image. + def inject_input(workflow, filename) + wf = JSON.parse(JSON.generate(workflow)) # deep dup + prefix = "enhanced_#{Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, 8]}_" + wf.each_value do |node| next unless node.is_a?(Hash) - case node['class_type'] - when 'LoadImage' - node['inputs']['image'] = filename - when 'SaveImage' - node['inputs']['filename_prefix'] = unique_prefix + when 'LoadImage' then node['inputs']['image'] = filename + when 'SaveImage' then node['inputs']['filename_prefix'] = prefix + when 'WritePhotoMetadata' + node['inputs']['filename_prefix'] = prefix + node['inputs']['source_filename'] = filename end end - modified + wf + end + + def kb(path) + (File.size(path) / 1024.0).round end end @@ -379,18 +429,15 @@ options = { OptionParser.new do |o| o.banner = 'Usage: ruby photo-enhance.rb [options]' - o.on('--config PATH', 'TOML config file (default: hyperstack-vm-photo.toml)') { |v| options[:config] = v } - o.on('--indir PATH', 'Directory of photos to enhance (output: <name>_enhanced.<ext> in same dir)') { |v| options[:indir] = v } - o.on('--workflow PATH', 'ComfyUI workflow JSON (default: workflows/photo-enhance.json)') { |v| options[:workflow] = v } - o.on('--watch', 'Keep running and process new images as they arrive') { options[:watch] = true } - o.on('--test', 'Only check connectivity to ComfyUI, then exit') { options[:test] = true } + o.on('--config PATH', 'TOML config (default: hyperstack-vm-photo.toml)') { |v| options[:config] = v } + o.on('--indir PATH', 'Directory of photos to enhance') { |v| options[:indir] = v } + o.on('--workflow PATH', 'ComfyUI workflow JSON override') { |v| options[:workflow] = v } + o.on('--watch', 'Keep running, process new images as they arrive') { options[:watch] = true } + o.on('--test', 'Check ComfyUI connectivity only, then exit') { options[:test] = true } o.on('-h', '--help', 'Show this help') { puts o; exit } end.parse! -unless File.exist?(options[:config]) - warn "Config not found: #{options[:config]}" - exit 1 -end +abort "Config not found: #{options[:config]}" unless File.exist?(options[:config]) cfg = PhotoConfig.new(options[:config], options[:workflow]) client = ComfyUIClient.new(host: cfg.host, port: cfg.port) @@ -398,42 +445,26 @@ client = ComfyUIClient.new(host: cfg.host, port: cfg.port) if options[:test] begin client.check_connectivity! - puts "ComfyUI is reachable at http://#{cfg.host}:#{cfg.port} — OK" + puts "ComfyUI reachable at http://#{cfg.host}:#{cfg.port} — OK" exit 0 rescue RuntimeError => e - warn "ERROR: #{e.message}" - exit 1 + warn "ERROR: #{e.message}"; exit 1 end end -unless options[:indir] - warn '--indir is required (use --test to only check connectivity)' - exit 1 -end - +abort '--indir is required' unless options[:indir] indir = File.expand_path(options[:indir]) - -unless File.directory?(indir) - warn "Input directory not found: #{indir}" - exit 1 -end - -unless File.exist?(cfg.workflow_path) - warn "Workflow JSON not found: #{cfg.workflow_path}" - warn "Expected at #{File.join(__dir__, 'workflows', 'photo-enhance.json')}" - exit 1 -end +abort "Directory not found: #{indir}" unless File.directory?(indir) +abort "Workflow not found: #{cfg.workflow_path}" unless File.exist?(cfg.workflow_path) workflow = JSON.parse(File.read(cfg.workflow_path)) -# Manifest lives in the indir so it stays with the photos. manifest = ProcessedManifest.new(indir) enhancer = PhotoEnhancer.new(config: cfg, client: client, workflow: workflow, indir: indir, manifest: manifest) begin - enhancer.enhance_directory(indir, watch: options[:watch]) + enhancer.run(watch: options[:watch]) rescue RuntimeError => e - warn "ERROR: #{e.message}" - exit 1 + warn "ERROR: #{e.message}"; exit 1 rescue Interrupt puts "\nStopped." end diff --git a/smart_photo_node.py b/smart_photo_node.py new file mode 100644 index 0000000..114ff4e --- /dev/null +++ b/smart_photo_node.py @@ -0,0 +1,536 @@ +""" +Smart Photo Enhancement Nodes for ComfyUI +========================================== +Four AI-driven nodes that replace static colour-correction filters with +content-aware processing: + + CLIPSceneDetect — CLIP zero-shot classification → scene label + AdaptivePhotoGrade — scene-tuned exposure / contrast / saturation / detail + SkyEnhance — HSV sky mask + graduated exposure & saturation boost + DepthSelectiveSharpen— Depth-Anything depth map → foreground sharp, BG soft + +All heavy models are loaded once and kept in _MODEL_CACHE between prompts. +""" + +import torch +import numpy as np +import cv2 +from PIL import Image + +# --------------------------------------------------------------------------- +# Global model cache — prevents reloading 100–600 MB models every frame +# --------------------------------------------------------------------------- +_MODEL_CACHE: dict = {} + + +def _cached_model(key: str, loader_fn): + """Return a cached model, loading it on the first call.""" + if key not in _MODEL_CACHE: + _MODEL_CACHE[key] = loader_fn() + return _MODEL_CACHE[key] + + +# --------------------------------------------------------------------------- +# CLIPSceneDetect +# --------------------------------------------------------------------------- +class CLIPSceneDetect: + """ + Zero-shot scene classification using OpenAI CLIP (ViT-B/32, ~600 MB). + Matches the photo against 8 descriptive text prompts and emits the + winning scene label as a STRING for AdaptivePhotoGrade to consume. + + Scene labels: portrait | landscape | night | indoor | + golden_hour | overcast | beach | street + """ + + # Text prompts whose cosine similarity to the image selects the scene + SCENE_PROMPTS = [ + "a portrait photograph of a person or people", + "a landscape photograph of nature or scenery outdoors", + "a night photograph taken in low light or darkness", + "an indoor photograph inside a room or building", + "a golden hour or sunset photograph with warm orange light", + "an overcast or cloudy day outdoor photograph", + "a beach, ocean, or waterfront photograph", + "a street, city, or urban photograph", + ] + SCENE_LABELS = [ + "portrait", "landscape", "night", "indoor", + "golden_hour", "overcast", "beach", "street", + ] + + @classmethod + def INPUT_TYPES(cls): + return {"required": {"image": ("IMAGE",)}} + + RETURN_TYPES = ("IMAGE", "STRING") + RETURN_NAMES = ("image", "scene_type") + FUNCTION = "detect" + CATEGORY = "image/smart" + + def detect(self, image): + from transformers import CLIPProcessor, CLIPModel + + device = "cuda" if torch.cuda.is_available() else "cpu" + + def _load(): + print("[CLIPSceneDetect] Loading CLIP ViT-B/32…") + m = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval() + p = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") + return m, p + + model, processor = _cached_model("clip_scene", _load) + + # Use the first image in the batch; all frames are the same scene + img_np = (image[0].cpu().numpy() * 255).astype(np.uint8) + img_pil = Image.fromarray(img_np) + + inputs = processor( + text=self.SCENE_PROMPTS, + images=img_pil, + return_tensors="pt", + padding=True, + ).to(device) + + with torch.no_grad(): + logits = model(**inputs).logits_per_image[0] + probs = logits.softmax(dim=0).cpu() + + idx = int(probs.argmax()) + scene = self.SCENE_LABELS[idx] + conf = float(probs[idx]) + print(f"[CLIPSceneDetect] → {scene} ({conf:.1%})") + return (image, scene) + + +# --------------------------------------------------------------------------- +# AdaptivePhotoGrade +# --------------------------------------------------------------------------- +class AdaptivePhotoGrade: + """ + Scene-adaptive colour grading node. + + Applies exposure correction (Reinhard tonemapping), contrast, saturation, + and guided-filter clarity enhancement with parameters tuned per scene type. + Falls back to balanced 'default' settings for unknown scene labels. + + Replaces the three static ComfyUI-Image-Filters nodes + (ExposureAdjust + AdjustContrast + EnhanceDetail) with one smart node + that adapts to content. + """ + + # Per-scene profiles: exposure in stops, contrast factor, saturation + # multiplier, detail enhancement multiplier, denoise strength (0..1). + PROFILES = { + # Portraits: gentle — preserve skin tones, avoid over-sharpening hair + "portrait": dict(stops=0.30, contrast=1.10, saturation=1.00, detail=1.2, denoise=0.15), + # Landscapes: vivid — strong clarity, saturated skies & greens + "landscape": dict(stops=0.20, contrast=1.20, saturation=1.15, detail=1.8, denoise=0.05), + # Night: lift shadows aggressively, reduce sharpening (hides noise) + "night": dict(stops=0.80, contrast=1.05, saturation=0.90, detail=0.8, denoise=0.30), + # Indoor: correct typically warm/dim ambient light + "indoor": dict(stops=0.50, contrast=1.15, saturation=1.05, detail=1.3, denoise=0.10), + # Golden hour: enhance warmth, lift shadow detail + "golden_hour": dict(stops=0.25, contrast=1.20, saturation=1.20, detail=1.5, denoise=0.05), + # Overcast: punch contrast to compensate for flat light + "overcast": dict(stops=0.40, contrast=1.20, saturation=1.10, detail=1.6, denoise=0.08), + # Beach: bright scene, protect highlights, boost blues/greens + "beach": dict(stops=0.15, contrast=1.15, saturation=1.20, detail=1.7, denoise=0.05), + # Street: punchy contrast, neutral colour + "street": dict(stops=0.35, contrast=1.20, saturation=1.05, detail=1.5, denoise=0.08), + # Balanced fallback for unrecognised labels + "default": dict(stops=0.40, contrast=1.15, saturation=1.05, detail=1.5, denoise=0.10), + } + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "images": ("IMAGE",), + "scene_type": ("STRING", {"default": "default"}), + } + } + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "grade" + CATEGORY = "image/smart" + + def grade(self, images, scene_type: str): + p = self.PROFILES.get(scene_type, self.PROFILES["default"]) + print(f"[AdaptivePhotoGrade] Scene={scene_type} → {p}") + + results = [] + for img in images: + arr = img.cpu().numpy().copy() # [H, W, C] float32 0..1 + arr = self._apply_exposure(arr, p["stops"]) + arr = self._apply_contrast(arr, p["contrast"]) + arr = self._apply_saturation(arr, p["saturation"]) + arr = self._apply_detail(arr, p["detail"], p["denoise"]) + results.append(torch.from_numpy(arr.clip(0, 1)).float()) + + return (torch.stack(results),) + + # -- helpers ------------------------------------------------------------ + + def _apply_exposure(self, img: np.ndarray, stops: float) -> np.ndarray: + """ + Per-stop exposure adjustment in linear light. + Converts sRGB → linear, multiplies by 2^stops, clips highlights, converts back. + Simple and photographic — avoids Reinhard's tonal compression which + would darken already-bright Fuji photos. + """ + linear = img ** 2.2 # sRGB → approximate linear + linear = linear * (2.0 ** stops) # shift by N stops (positive = brighter) + return np.clip(linear ** (1.0 / 2.2), 0, 1) # back to sRGB, clip overexposed + + def _apply_contrast(self, img: np.ndarray, factor: float) -> np.ndarray: + """Simple linear contrast around 0.5 midpoint.""" + return np.clip((img - 0.5) * factor + 0.5, 0, 1) + + def _apply_saturation(self, img: np.ndarray, factor: float) -> np.ndarray: + """HSV saturation boost; factor=1.0 is a no-op.""" + u8 = (img * 255).astype(np.uint8) + hsv = cv2.cvtColor(u8, cv2.COLOR_RGB2HSV).astype(np.float32) + hsv[:, :, 1] = np.clip(hsv[:, :, 1] * factor, 0, 255) + return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB).astype(np.float32) / 255.0 + + def _apply_detail(self, img: np.ndarray, mult: float, denoise: float) -> np.ndarray: + """ + Clarity / structure boost via guided-filter edge-preserving decomposition. + Separates base (low-freq) from detail (high-freq), scales detail by mult, + optionally denoises the base layer via bilateral filter. + """ + u8 = (img * 255).astype(np.uint8) + + # Guided filter produces an edge-preserving smooth base layer + # eps controls smoothing strength (higher = more smoothing) + base = cv2.ximgproc.guidedFilter(u8, u8, radius=8, eps=int(0.01 * 255 ** 2)) + detail = u8.astype(np.float32) - base.astype(np.float32) + + # Optionally soften the base to reduce noise before adding detail back + if denoise > 0.05: + sigma = int(denoise * 75) + base = cv2.bilateralFilter(base, d=5, sigmaColor=sigma, sigmaSpace=sigma) + + enhanced = base.astype(np.float32) + detail * mult + return np.clip(enhanced / 255.0, 0, 1) + + +# --------------------------------------------------------------------------- +# SkyEnhance +# --------------------------------------------------------------------------- +class SkyEnhance: + """ + Sky region detection and graduated enhancement — no ML model required. + + Detects sky using HSV colour ranges (blue sky, white clouds, sunset tones) + combined with a spatial prior (sky lives in the upper portion of the frame). + Applies independent exposure + saturation adjustments to the sky mask, + blended smoothly with the rest of the image. + + Works on any outdoor shot; portraits and indoor shots receive no change + because the sky mask will be near zero. + """ + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "images": ("IMAGE",), + "sky_exposure": ("FLOAT", {"default": 0.30, "min": -1.0, "max": 1.0, "step": 0.05}), + "sky_saturation": ("FLOAT", {"default": 1.20, "min": 0.5, "max": 2.0, "step": 0.05}), + } + } + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "enhance" + CATEGORY = "image/smart" + + def enhance(self, images, sky_exposure: float = 0.30, sky_saturation: float = 1.20): + results = [] + for img in images: + arr = (img.cpu().numpy() * 255).astype(np.uint8) + mask = self._detect_sky(arr) + enhanced = self._apply_sky(arr, mask, sky_exposure, sky_saturation) + results.append(torch.from_numpy(enhanced.astype(np.float32) / 255.0)) + return (torch.stack(results),) + + def _detect_sky(self, img_rgb: np.ndarray) -> np.ndarray: + """ + Build a soft float sky mask [0..1] using three HSV colour bands + plus a vertical spatial prior (sky = upper image region). + """ + h = img_rgb.shape[0] + hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV).astype(np.float32) + H, S, V = hsv[:, :, 0], hsv[:, :, 1], hsv[:, :, 2] + + # Band 1: Blue daytime sky (hue 90–140 in OpenCV 0–180 scale) + blue = ((H >= 90) & (H <= 140) & (S >= 30) & (V >= 50)).astype(np.float32) + + # Band 2: White/grey clouds (low saturation, bright) + clouds = ((S < 40) & (V >= 180)).astype(np.float32) + + # Band 3: Sunset/golden sky (hue 0–25 or 155–180, moderate sat) + sunset = (((H <= 25) | (H >= 155)) & (S >= 40) & (V >= 100)).astype(np.float32) + + raw = np.clip(blue + clouds + sunset, 0, 1) + + # Vertical gradient prior: top row = 1.2, bottom row = 0.0 + y_weight = np.linspace(1.2, 0.0, h)[:, np.newaxis] + raw = raw * y_weight + + # Morphological close to fill gaps between cloud patches + kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15)) + raw = cv2.morphologyEx(raw, cv2.MORPH_CLOSE, kernel) + + # Large Gaussian blur for smooth mask edges (avoids halo artifacts) + mask = cv2.GaussianBlur(raw, (51, 51), 0) + return np.clip(mask, 0, 1) + + def _apply_sky(self, img_rgb: np.ndarray, mask: np.ndarray, + sky_exposure: float, sky_saturation: float) -> np.ndarray: + """Blend sky-enhanced pixels into the original image using the mask.""" + orig = img_rgb.astype(np.float32) + + # Exposure adjustment in linear light — simple shift, no Reinhard compression + linear = (orig / 255.0) ** 2.2 + linear = np.clip(linear * (2.0 ** sky_exposure), 0, 1) + sky_exp = np.clip(linear ** (1.0 / 2.2) * 255, 0, 255).astype(np.uint8) + + # Saturation boost in HSV + hsv = cv2.cvtColor(sky_exp, cv2.COLOR_RGB2HSV).astype(np.float32) + hsv[:, :, 1] = np.clip(hsv[:, :, 1] * sky_saturation, 0, 255) + sky_sat = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB).astype(np.float32) + + # Alpha blend: mask=1 → sky-enhanced, mask=0 → original + mask3 = mask[:, :, np.newaxis] + result = orig * (1.0 - mask3) + sky_sat * mask3 + return np.clip(result, 0, 255).astype(np.uint8) + + +# --------------------------------------------------------------------------- +# DepthSelectiveSharpen +# --------------------------------------------------------------------------- +class DepthSelectiveSharpen: + """ + Depth-guided selective sharpening using Depth Anything V2 Small (~100 MB). + + Estimates a monocular depth map, then: + • Foreground (near): unsharp-mask sharpening (foreground_sharpen controls + the detail multiplier; 1.0 = no change, 2.0 = strong sharpening) + • Background (far): Gaussian blur (background_blur controls kernel size; + 0.0 = no blur, 1.0 = heavy background softening) + + This mimics the depth-of-field separation of a fast prime lens — + the subject stays razor sharp while busy backgrounds recede. + """ + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "images": ("IMAGE",), + "foreground_sharpen": ("FLOAT", {"default": 1.50, "min": 1.0, "max": 3.0, "step": 0.1}), + "background_blur": ("FLOAT", {"default": 0.50, "min": 0.0, "max": 1.0, "step": 0.1}), + } + } + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "process" + CATEGORY = "image/smart" + + def process(self, images, foreground_sharpen: float = 1.5, background_blur: float = 0.5): + device = "cuda" if torch.cuda.is_available() else "cpu" + + def _load(): + print("[DepthSelectiveSharpen] Loading Depth Anything V2 Small…") + from transformers import pipeline as hf_pipeline + return hf_pipeline( + task="depth-estimation", + model="depth-anything/Depth-Anything-V2-Small-hf", + device=0 if device == "cuda" else -1, + ) + + depth_pipe = _cached_model("depth_anything_v2", _load) + + results = [] + for img in images: + arr = (img.cpu().numpy() * 255).astype(np.uint8) + fg_mask = self._depth_foreground_mask(arr, depth_pipe) + result = self._blend_sharp_blur(arr, fg_mask, foreground_sharpen, background_blur) + results.append(torch.from_numpy(result.astype(np.float32) / 255.0)) + + return (torch.stack(results),) + + def _depth_foreground_mask(self, img_rgb: np.ndarray, depth_pipe) -> np.ndarray: + """ + Run Depth Anything on the image, normalise to [0,1], resize to match, + then invert so that near=1 (foreground) and far=0 (background). + """ + h, w = img_rgb.shape[:2] + img_pil = Image.fromarray(img_rgb) + + depth_out = depth_pipe(img_pil) + depth_arr = np.array(depth_out["depth"], dtype=np.float32) + + # Normalise depth to 0..1 + d_min, d_max = depth_arr.min(), depth_arr.max() + depth_norm = (depth_arr - d_min) / (d_max - d_min + 1e-8) + + # Resize depth map to original image size + depth_resized = cv2.resize(depth_norm, (w, h), interpolation=cv2.INTER_LINEAR) + + # Depth Anything: larger value = farther away → invert for foreground mask + fg_mask = 1.0 - depth_resized + + # Smooth mask to avoid hard transitions at object boundaries + fg_mask = cv2.GaussianBlur(fg_mask, (31, 31), 0) + return fg_mask.clip(0, 1) + + def _blend_sharp_blur(self, img_u8: np.ndarray, fg_mask: np.ndarray, + fg_sharpen: float, bg_blur: float) -> np.ndarray: + """Blend foreground-sharpened and background-blurred versions using depth mask.""" + fg_mask3 = fg_mask[:, :, np.newaxis] + + # Foreground: unsharp mask sharpening + if fg_sharpen > 1.0: + blur = cv2.GaussianBlur(img_u8.astype(np.float32), (0, 0), 2.0) + detail = img_u8.astype(np.float32) - blur + sharpened = np.clip(img_u8.astype(np.float32) + detail * (fg_sharpen - 1.0), 0, 255) + else: + sharpened = img_u8.astype(np.float32) + + # Background: Gaussian blur + if bg_blur > 0.05: + ksize = int(bg_blur * 10) * 2 + 1 # always odd + blurred = cv2.GaussianBlur(img_u8, (ksize, ksize), bg_blur * 5).astype(np.float32) + else: + blurred = img_u8.astype(np.float32) + + # Combine: near pixels get sharpened version, far pixels get blurred + blended = sharpened * fg_mask3 + blurred * (1.0 - fg_mask3) + return np.clip(blended, 0, 255).astype(np.uint8) + + +# --------------------------------------------------------------------------- +# WritePhotoMetadata +# --------------------------------------------------------------------------- +class WritePhotoMetadata: + """ + Writes a per-photo JSON metadata file to the ComfyUI output directory. + + The Ruby photo-enhance.rb script downloads this file after the image, + reads the AI pipeline details (scene type, profile settings, sky coverage, + depth settings), and generates a human-readable .md report alongside the + enhanced JPEG. + + filename_prefix must match the prefix injected into SaveImage so the Ruby + script can find both files by the same prefix. + """ + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "image": ("IMAGE",), + "scene_type": ("STRING", {"default": "unknown"}), + # Both inputs are injected per-prompt by photo-enhance.rb's inject_input + "filename_prefix": ("STRING", {"default": "enhanced_"}), + "source_filename": ("STRING", {"default": "photo"}), + } + } + + # Pass image through unchanged; side-effect is writing the metadata file + RETURN_TYPES = ("IMAGE",) + FUNCTION = "write" + CATEGORY = "image/smart" + + # Mirrors AdaptivePhotoGrade — keep in sync if profiles change + PROFILES = AdaptivePhotoGrade.PROFILES + + def write(self, image, scene_type: str, filename_prefix: str, source_filename: str): + import json, datetime, os + + # Resolve ComfyUI output directory via its internal module + try: + import folder_paths + out_dir = folder_paths.get_output_directory() + except Exception: + out_dir = "/ephemeral/comfyui/output" + + profile = self.PROFILES.get(scene_type, self.PROFILES["default"]) + + # Compute sky mask coverage as a percentage of the image + sky_coverage = self._sky_coverage(image[0]) + + # source_filename is the upload name on ComfyUI, e.g. "DSCF5434.JPG.orient.JPG" + # Strip the .orient.<ext> suffix if present to recover the original base name + base = os.path.basename(source_filename) + base = base.replace(".orient.JPG", "").replace(".orient.jpg", "") + + meta = { + "generated_at": datetime.datetime.utcnow().isoformat() + "Z", + "source_filename": base, + "scene_type": scene_type, + "enhancement_profile": { + "exposure_stops": profile["stops"], + "contrast_factor": profile["contrast"], + "saturation_mult": profile["saturation"], + "detail_mult": profile["detail"], + "denoise_strength": profile["denoise"], + }, + "sky": { + "coverage_pct": round(sky_coverage * 100, 1), + "sky_exposure": 0.30, + "sky_saturation": 1.20, + }, + "depth_sharpen": { + "foreground_sharpen": 1.50, + "background_blur": 0.50, + }, + "models": { + "upscaler": "realesr-general-x4v3 (Real-ESRGAN, GPU)", + "face_restore": "CodeFormer fidelity=0.7 (GPU)", + "scene_detect": "CLIP ViT-B/32 (openai/clip-vit-base-patch32)", + "depth": "Depth Anything V2 Small (GPU)", + }, + } + + # Write as both a prefixed file (for Ruby to download by prefix) and + # a source-named file for easy manual lookup in the output dir + meta_path = os.path.join(out_dir, f"{filename_prefix}meta.json") + with open(meta_path, "w") as f: + json.dump(meta, f, indent=2) + print(f"[WritePhotoMetadata] Wrote {meta_path} (scene={scene_type}, sky={sky_coverage:.1%})") + + return (image,) + + def _sky_coverage(self, img_tensor: "torch.Tensor") -> float: + """Re-use SkyEnhance's mask logic to estimate sky % for reporting.""" + try: + arr = (img_tensor.cpu().numpy() * 255).astype(np.uint8) + helper = SkyEnhance() + mask = helper._detect_sky(arr) + return float(mask.mean()) + except Exception: + return 0.0 + + +# --------------------------------------------------------------------------- +# ComfyUI node registration +# --------------------------------------------------------------------------- +NODE_CLASS_MAPPINGS = { + "CLIPSceneDetect": CLIPSceneDetect, + "AdaptivePhotoGrade": AdaptivePhotoGrade, + "SkyEnhance": SkyEnhance, + "DepthSelectiveSharpen": DepthSelectiveSharpen, + "WritePhotoMetadata": WritePhotoMetadata, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "CLIPSceneDetect": "CLIP Scene Detect", + "AdaptivePhotoGrade": "Adaptive Photo Grade", + "SkyEnhance": "Sky Enhance", + "DepthSelectiveSharpen": "Depth Selective Sharpen", + "WritePhotoMetadata": "Write Photo Metadata", +} diff --git a/workflows/photo-enhance.json b/workflows/photo-enhance.json index 034ae4f..6db3e1d 100644 --- a/workflows/photo-enhance.json +++ b/workflows/photo-enhance.json @@ -7,75 +7,86 @@ "upload": "image" } }, - "2": { + "3": { + "class_type": "UpscaleModelLoader", + "_meta": {"title": "Load realesr-general-x4v3 (photo-tuned AI upscaler)"}, + "inputs": { + "model_name": "realesr-general-x4v3.pth" + } + }, + "4": { + "class_type": "ImageUpscaleWithModel", + "_meta": {"title": "Real-ESRGAN 4x — AI denoising + detail recovery at full 4K input (GPU)"}, + "inputs": { + "upscale_model": ["3", 0], + "image": ["1", 0] + } + }, + "5": { "class_type": "ImageScaleBy", - "_meta": {"title": "Downscale to 2K for SUPIR (4x faster processing)"}, + "_meta": {"title": "Scale 16K back to original 4K resolution (0.25x lanczos)"}, "inputs": { - "image": ["1", 0], + "image": ["4", 0], "upscale_method": "lanczos", - "scale_by": 0.5 + "scale_by": 0.25 } }, - "3": { - "class_type": "SUPIR_Upscale", - "_meta": {"title": "SUPIR Restoration at 2K"}, + "6": { + "class_type": "CodeFormerRestore", + "_meta": {"title": "CodeFormer — AI face restoration (GPU, fidelity=0.7)"}, "inputs": { - "supir_model": "SUPIR-v0Q.ckpt", - "sdxl_model": "sd_xl_base_1.0.safetensors", - "image": ["2", 0], - "seed": 42, - "resize_method": "lanczos", - "scale_by": 1.0, - "steps": 8, - "restoration_scale": -1, - "cfg_scale": 7.5, - "a_prompt": "high quality photograph, sharp, detailed, natural colors", - "n_prompt": "blurry, noisy, low quality, artifact, overexposed, underexposed", - "s_churn": 5, - "s_noise": 1.003, - "control_scale": 1.0, - "cfg_scale_start": 4.0, - "control_scale_start": 0.0, - "color_fix_type": "Wavelet", - "keep_model_loaded": true, - "use_tiled_vae": true, - "encoder_tile_size_pixels": 1024, - "decoder_tile_size_latent": 128, - "fp8_unet": true, - "use_tiled_sampling": true, - "sampler_tile_size": 512, - "sampler_tile_stride": 256 + "image": ["5", 0], + "fidelity": 0.7 } }, - "4": { - "class_type": "UpscaleModelLoader", - "_meta": {"title": "Load Real-ESRGAN 4x"}, + "11": { + "class_type": "CLIPSceneDetect", + "_meta": {"title": "CLIP Scene Detect — classifies scene type (portrait/landscape/night/etc)"}, "inputs": { - "model_name": "RealESRGAN_x4plus.pth" + "image": ["6", 0] } }, - "5": { - "class_type": "ImageUpscaleWithModel", - "_meta": {"title": "Real-ESRGAN 4x upscale back towards original resolution"}, + "12": { + "class_type": "AdaptivePhotoGrade", + "_meta": {"title": "Adaptive Photo Grade — scene-tuned exposure/contrast/saturation/detail"}, "inputs": { - "upscale_model": ["4", 0], - "image": ["3", 0] + "images": ["11", 0], + "scene_type": ["11", 1] } }, - "6": { - "class_type": "ImageScaleBy", - "_meta": {"title": "Scale back to original resolution (2K * 4x * 0.5 = 4K)"}, + "13": { + "class_type": "SkyEnhance", + "_meta": {"title": "Sky Enhance — HSV sky mask + graduated exposure/saturation boost"}, "inputs": { - "image": ["5", 0], - "upscale_method": "lanczos", - "scale_by": 0.5 + "images": ["12", 0], + "sky_exposure": 0.30, + "sky_saturation": 1.20 + } + }, + "14": { + "class_type": "DepthSelectiveSharpen", + "_meta": {"title": "Depth Selective Sharpen — Depth-Anything depth map → foreground sharp, BG soft"}, + "inputs": { + "images": ["13", 0], + "foreground_sharpen": 1.50, + "background_blur": 0.50 + } + }, + "15": { + "class_type": "WritePhotoMetadata", + "_meta": {"title": "Write Photo Metadata — saves per-photo JSON report to ComfyUI output dir"}, + "inputs": { + "image": ["14", 0], + "scene_type": ["11", 1], + "filename_prefix": "enhanced_", + "source_filename": "NODE_INPUT_IMAGE" } }, - "7": { + "10": { "class_type": "SaveImage", "_meta": {"title": "Save Enhanced Photo"}, "inputs": { - "images": ["6", 0], + "images": ["15", 0], "filename_prefix": "enhanced_" } } |
