summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-03-24 19:58:35 +0200
committerPaul Buetow <paul@buetow.org>2026-03-24 19:58:35 +0200
commit862b132ddee2cc343a3edc98797554937ea5f595 (patch)
tree19795d60f0e90515c55ada95da2a7f18fb4d8028
parent98d52f5c8eb5ed7242db390a0b760eb1b918997f (diff)
photo-enhance: full AI pipeline with scene-adaptive enhancement
Complete overhaul of the ComfyUI photo enhancement stack: Pipeline (workflows/photo-enhance.json): - Real-ESRGAN realesr-general-x4v3: 4x upscale at full 4K input (no pre-downscale, preserves all original detail before AI sees it), output scaled back to 4K - CodeFormer fidelity=0.7: GPU neural face detection + restoration - CLIP ViT-B/32: zero-shot scene classification (portrait/landscape/night/ indoor/golden_hour/overcast/beach/street) - AdaptivePhotoGrade: scene-tuned exposure/contrast/saturation/detail using guided-filter clarity enhancement; simple linear exposure shift (replaces Reinhard tonemapping which was darkening already-exposed Fuji photos) - SkyEnhance: HSV sky mask (blue sky + clouds + sunset) with spatial prior, graduated exposure and saturation boost for sky regions - DepthSelectiveSharpen: Depth Anything V2 Small depth map → foreground sharpening, background softening - WritePhotoMetadata: writes per-photo JSON to ComfyUI output dir smart_photo_node.py: implements all 5 new ComfyUI custom nodes photo-enhance.rb: - Downloads per-photo metadata JSON from ComfyUI and renders it as a human-readable _e.md report alongside each enhanced JPEG - inject_input now also patches WritePhotoMetadata with prefix + source filename - Updated pipeline description in header comments photo-compare.rb: GTK4 side-by-side comparison GUI — show original vs enhanced, keyboard shortcuts O/E to move preferred version to outdir, Space to skip, auto-rescans as new photos arrive Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
-rw-r--r--.hyperstack-vm-photo-state.json93
-rw-r--r--Gemfile1
-rw-r--r--photo-compare.rb186
-rwxr-xr-xphoto-enhance.rb375
-rw-r--r--smart_photo_node.py536
-rw-r--r--workflows/photo-enhance.json109
6 files changed, 1079 insertions, 221 deletions
diff --git a/.hyperstack-vm-photo-state.json b/.hyperstack-vm-photo-state.json
new file mode 100644
index 0000000..8bf47b2
--- /dev/null
+++ b/.hyperstack-vm-photo-state.json
@@ -0,0 +1,93 @@
+{
+ "vm_id": 698370,
+ "vm_name": "hyperstack-photo-20260324164547",
+ "environment_name": "snonux-ollama",
+ "region": "CANADA-1",
+ "flavor_name": "n3-L40x1",
+ "image_name": "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker",
+ "key_name": "earth",
+ "public_ip": "69.19.136.63",
+ "created_at": "2026-03-24T16:45:48Z",
+ "services": {
+ "vllm_enabled": false,
+ "ollama_enabled": false,
+ "comfyui_enabled": true
+ },
+ "security_rules": [
+ {
+ "direction": "ingress",
+ "ethertype": "IPv4",
+ "protocol": "tcp",
+ "port_range_min": 22,
+ "port_range_max": 22,
+ "remote_ip_prefix": "79.100.218.77/32"
+ },
+ {
+ "direction": "ingress",
+ "ethertype": "IPv4",
+ "protocol": "udp",
+ "port_range_min": 56710,
+ "port_range_max": 56710,
+ "remote_ip_prefix": "79.100.218.77/32"
+ },
+ {
+ "direction": "ingress",
+ "ethertype": "IPv4",
+ "protocol": "tcp",
+ "port_range_min": 8188,
+ "port_range_max": 8188,
+ "remote_ip_prefix": "192.168.3.0/24"
+ },
+ {
+ "direction": "ingress",
+ "ethertype": "IPv4",
+ "protocol": "tcp",
+ "port_range_min": 22,
+ "port_range_max": 22,
+ "remote_ip_prefix": "79.100.218.77/32"
+ },
+ {
+ "direction": "ingress",
+ "ethertype": "IPv4",
+ "protocol": "udp",
+ "port_range_min": 56710,
+ "port_range_max": 56710,
+ "remote_ip_prefix": "79.100.218.77/32"
+ },
+ {
+ "direction": "egress",
+ "ethertype": "IPv4",
+ "protocol": "any",
+ "port_range_min": 1,
+ "port_range_max": 65535,
+ "remote_ip_prefix": "0.0.0.0/0"
+ },
+ {
+ "direction": "ingress",
+ "ethertype": "IPv4",
+ "protocol": "tcp",
+ "port_range_min": 8188,
+ "port_range_max": 8188,
+ "remote_ip_prefix": "192.168.3.0/24"
+ },
+ {
+ "direction": "egress",
+ "ethertype": "IPv6",
+ "protocol": "any",
+ "port_range_min": 1,
+ "port_range_max": 65535,
+ "remote_ip_prefix": "0.0.0.0/0"
+ }
+ ],
+ "bootstrapped_at": "2026-03-24T16:48:36Z",
+ "wireguard_setup_at": "2026-03-24T16:48:54Z",
+ "comfyui_setup_at": "2026-03-24T16:52:16Z",
+ "comfyui_container_name": "comfyui_photo",
+ "comfyui_models": [
+ "RealESRGAN_x4plus",
+ "SUPIR-v0Q"
+ ],
+ "status": "ACTIVE",
+ "vm_state": "active",
+ "provisioned_at": "2026-03-24T16:52:30Z"
+} \ No newline at end of file
diff --git a/Gemfile b/Gemfile
index a1bbd94..b94602f 100644
--- a/Gemfile
+++ b/Gemfile
@@ -1,3 +1,4 @@
source "https://rubygems.org"
gem "toml-rb", "~> 2.2"
+gem "gtk4"
diff --git a/photo-compare.rb b/photo-compare.rb
new file mode 100644
index 0000000..4f5ec4c
--- /dev/null
+++ b/photo-compare.rb
@@ -0,0 +1,186 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+# photo-compare.rb — Side-by-side before/after photo comparison and selection tool.
+#
+# Shows each original + enhanced pair side by side, filling the window.
+# Press O to move the original to --outdir, E to move the enhanced version,
+# Space/S to skip. Rescans after each action so newly finished photos appear.
+#
+# Usage:
+# ruby photo-compare.rb --indir ~/Downloads/fuji --outdir ~/Downloads/fuji/selected
+#
+# Keyboard shortcuts:
+# O — move original to outdir
+# E — move enhanced to outdir
+# Space/S — skip (leave both, advance to next)
+# Q/Escape — quit
+
+require 'gtk4'
+require 'optparse'
+require 'fileutils'
+
+SUPPORTED_EXTENSIONS = %w[.jpg .jpeg .png .webp].freeze
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def find_pairs(indir)
+ Dir.glob(File.join(indir, '*'))
+ .select { |f| File.file?(f) && SUPPORTED_EXTENSIONS.include?(File.extname(f).downcase) }
+ .reject { |f| File.basename(f, '.*').end_with?('_e') }
+ .reject { |f| File.basename(f).include?('.orient.') }
+ .filter_map do |orig|
+ ext = File.extname(orig).downcase # enhanced files always have lowercase ext
+ base = File.basename(orig, File.extname(orig))
+ enh = File.join(File.dirname(orig), "#{base}_e#{ext}")
+ [orig, enh] if File.exist?(enh)
+ end
+ .sort
+end
+
+def kb(path)
+ (File.size(path) / 1024.0).round
+end
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+options = { indir: nil, outdir: nil }
+OptionParser.new do |o|
+ o.banner = 'Usage: ruby photo-compare.rb --indir DIR --outdir DIR'
+ o.on('--indir PATH', 'Directory with original + _e photo pairs') { |v| options[:indir] = v }
+ o.on('--outdir PATH', 'Directory to move selected photos into') { |v| options[:outdir] = v }
+ o.on('-h', '--help', 'Show this help') { puts o; exit }
+end.parse!
+
+abort '--indir is required' unless options[:indir]
+abort '--outdir is required' unless options[:outdir]
+
+indir = File.expand_path(options[:indir])
+outdir = File.expand_path(options[:outdir])
+FileUtils.mkdir_p(outdir)
+
+state = { pairs: find_pairs(indir), index: 0, indir: indir, outdir: outdir }
+abort "No before/after pairs found in #{indir}" if state[:pairs].empty?
+
+# ---------------------------------------------------------------------------
+# GTK4 UI
+# ---------------------------------------------------------------------------
+
+app = Gtk::Application.new('org.hypr.photo-compare', :default_flags)
+
+app.signal_connect('activate') do |a|
+ win = Gtk::ApplicationWindow.new(a)
+ win.title = 'Photo Compare'
+ win.maximize # fill the screen
+
+ root = Gtk::Box.new(:vertical, 4)
+ root.margin_top = root.margin_bottom = root.margin_start = root.margin_end = 6
+ win.child = root
+
+ # Top: progress info
+ progress_lbl = Gtk::Label.new
+ progress_lbl.xalign = 0
+ root.append(progress_lbl)
+
+ # Middle: two pictures side by side — Gtk::Picture scales to fill its container
+ img_row = Gtk::Box.new(:horizontal, 8)
+ img_row.vexpand = true
+ root.append(img_row)
+
+ left_frame = Gtk::Box.new(:vertical, 2)
+ right_frame = Gtk::Box.new(:vertical, 2)
+ left_frame.hexpand = right_frame.hexpand = true
+ left_frame.vexpand = right_frame.vexpand = true
+
+ # Gtk::Picture is GTK4's scaling image widget; content_fit: :contain keeps aspect ratio
+ left_pic = Gtk::Picture.new
+ right_pic = Gtk::Picture.new
+ left_pic.content_fit = :contain
+ right_pic.content_fit = :contain
+ left_pic.hexpand = left_pic.vexpand = true
+ right_pic.hexpand = right_pic.vexpand = true
+
+ left_lbl = Gtk::Label.new
+ right_lbl = Gtk::Label.new
+
+ left_frame.append(left_pic)
+ left_frame.append(left_lbl)
+ right_frame.append(right_pic)
+ right_frame.append(right_lbl)
+ img_row.append(left_frame)
+ img_row.append(right_frame)
+
+ # Bottom: action buttons
+ btn_row = Gtk::Box.new(:horizontal, 16)
+ btn_row.halign = :center
+ orig_btn = Gtk::Button.new(label: '← Original [O]')
+ skip_btn = Gtk::Button.new(label: 'Skip [Space]')
+ enh_btn = Gtk::Button.new(label: 'Enhanced → [E]')
+ btn_row.append(orig_btn)
+ btn_row.append(skip_btn)
+ btn_row.append(enh_btn)
+ root.append(btn_row)
+
+ # -----------------------------------------------------------------------
+ # Refresh display for current pair
+ # -----------------------------------------------------------------------
+ refresh = lambda do
+ orig, enh = state[:pairs][state[:index]]
+ progress_lbl.label = "#{state[:index] + 1} / #{state[:pairs].length} — #{File.basename(orig)}"
+ left_pic.set_filename(orig)
+ right_pic.set_filename(enh)
+ left_lbl.label = "Original (#{kb(orig)} KB)"
+ right_lbl.label = "Enhanced (#{kb(enh)} KB)"
+ end
+
+ # -----------------------------------------------------------------------
+ # After moving (or skipping), rescan and show next pair.
+ # Moving removes the pair from the list, so index stays put and naturally
+ # points at the next pair. Skip increments the index explicitly.
+ # -----------------------------------------------------------------------
+ advance = lambda do |pick|
+ unless pick.nil?
+ FileUtils.mv(pick, File.join(state[:outdir], File.basename(pick)))
+ else
+ state[:index] += 1
+ end
+
+ state[:pairs] = find_pairs(state[:indir])
+
+ if state[:index] >= state[:pairs].length
+ progress_lbl.label = 'All pairs reviewed — you can close the window.'
+ left_pic.set_filename(nil)
+ right_pic.set_filename(nil)
+ left_lbl.label = right_lbl.label = ''
+ [orig_btn, skip_btn, enh_btn].each { |b| b.sensitive = false }
+ else
+ refresh.call
+ end
+ end
+
+ orig_btn.signal_connect('clicked') { advance.call(state[:pairs][state[:index]][0]) }
+ enh_btn.signal_connect('clicked') { advance.call(state[:pairs][state[:index]][1]) }
+ skip_btn.signal_connect('clicked') { advance.call(nil) }
+
+ key_ctrl = Gtk::EventControllerKey.new
+ key_ctrl.signal_connect('key-pressed') do |_ctrl, keyval, _code, _mod|
+ case keyval
+ when Gdk::Keyval::KEY_o, Gdk::Keyval::KEY_O then orig_btn.emit('clicked')
+ when Gdk::Keyval::KEY_e, Gdk::Keyval::KEY_E then enh_btn.emit('clicked')
+ when Gdk::Keyval::KEY_s, Gdk::Keyval::KEY_S,
+ Gdk::Keyval::KEY_space then skip_btn.emit('clicked')
+ when Gdk::Keyval::KEY_q, Gdk::Keyval::KEY_Escape then a.quit
+ end
+ false
+ end
+ win.add_controller(key_ctrl)
+
+ refresh.call
+ win.show
+end
+
+exit app.run([])
diff --git a/photo-enhance.rb b/photo-enhance.rb
index 39f3942..79c2e4c 100755
--- a/photo-enhance.rb
+++ b/photo-enhance.rb
@@ -1,19 +1,28 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
-# photo-enhance.rb — Photolemur-style automatic photo enhancer via ComfyUI.
+# photo-enhance.rb — AI photo enhancer via ComfyUI on a Hyperstack GPU VM.
#
-# Submits images from --indir to the ComfyUI REST API running on a Hyperstack VM,
-# downloads the enhanced results to --outdir, and optionally watches for new files.
+# Submits images from --indir to the ComfyUI REST API, downloads the AI-enhanced
+# results and saves alongside the originals with an _e suffix. Also downloads
+# a per-photo JSON metadata file written by the WritePhotoMetadata ComfyUI node
+# and converts it to a human-readable .md report alongside each enhanced photo.
+#
+# AI pipeline (ComfyUI, GPU):
+# 1. Real-ESRGAN realesr-general-x4v3 — 4× upscale at full 4K input, AI denoise
+# 2. CodeFormer fidelity=0.7 — neural face restoration
+# 3. CLIP ViT-B/32 — scene classification (portrait/landscape/…)
+# 4. AdaptivePhotoGrade — scene-tuned exposure/contrast/saturation/detail
+# 5. SkyEnhance — HSV sky mask + graduated sky correction
+# 6. Depth Anything V2 Small — depth map → foreground sharp, background soft
#
# Usage:
# ruby photo-enhance.rb --config hyperstack-vm-photo.toml \
-# --indir ~/Pictures --outdir ~/Pictures/enhanced [--watch] [--workflow workflows/photo-enhance.json]
+# --indir ~/Pictures [--watch] [--workflow workflows/photo-enhance.json]
#
# Requirements:
-# - ComfyUI VM provisioned with: ruby hyperstack.rb --config hyperstack-vm-photo.toml create
-# - WireGuard tunnel active (wg1): verified via curl http://hyperstack-photo.wg1:8188/system_stats
-# - Ruby stdlib only (no extra gems needed).
+# - ComfyUI VM: ruby hyperstack.rb --config hyperstack-vm-photo.toml create
+# - WireGuard tunnel active (wg1)
begin
require 'bundler/setup'
@@ -27,6 +36,7 @@ require 'optparse'
require 'fileutils'
require 'digest'
require 'time'
+require 'set'
begin
require 'toml-rb'
@@ -36,18 +46,18 @@ rescue LoadError
end
# ---------------------------------------------------------------------------
-# Config loading — reads only the fields photo-enhance.rb needs from the TOML.
+# Config
# ---------------------------------------------------------------------------
class PhotoConfig
attr_reader :host, :port, :workflow_path
def initialize(config_path, workflow_path_override)
- raw = TomlRB.load_file(File.expand_path(config_path))
- hostname = raw.dig('vm', 'hostname') || 'hyperstack-photo'
+ raw = TomlRB.load_file(File.expand_path(config_path))
+ hostname = raw.dig('vm', 'hostname') || 'hyperstack-photo'
interface = raw.dig('local_client', 'interface_name') || 'wg1'
- @host = "#{hostname}.#{interface}"
- @port = Integer(raw.dig('comfyui', 'port') || 8188)
+ @host = "#{hostname}.#{interface}"
+ @port = Integer(raw.dig('comfyui', 'port') || 8188)
@workflow_path = workflow_path_override ||
File.join(File.dirname(File.expand_path(config_path)), 'workflows', 'photo-enhance.json')
end
@@ -59,7 +69,7 @@ end
class ComfyUIClient
POLL_INTERVAL_SEC = 2
- POLL_TIMEOUT_SEC = 600 # 10 minutes per image (SUPIR can be slow on first load)
+ POLL_TIMEOUT_SEC = 300 # 5 minutes; ESRGAN is fast on GPU
def initialize(host:, port:, out: $stdout)
@host = host
@@ -67,98 +77,80 @@ class ComfyUIClient
@out = out
end
- # Upload a local image file; returns the filename ComfyUI assigned it.
def upload_image(file_path)
- filename = File.basename(file_path)
+ filename = File.basename(file_path)
image_data = File.binread(file_path)
- boundary = "----RubyPhotoEnhance#{SecureRandom_hex(8)}"
-
+ boundary = "----RubyPhotoEnhance#{hex(8)}"
body = [
"--#{boundary}\r\n",
"Content-Disposition: form-data; name=\"image\"; filename=\"#{filename}\"\r\n",
- "Content-Type: #{mime_type_for(file_path)}\r\n\r\n",
+ "Content-Type: #{mime_type(file_path)}\r\n\r\n",
image_data,
"\r\n--#{boundary}\r\n",
"Content-Disposition: form-data; name=\"overwrite\"\r\n\r\ntrue\r\n",
"--#{boundary}--\r\n"
].join
-
resp = post_raw('/upload/image', body, "multipart/form-data; boundary=#{boundary}")
- raise "Upload failed (HTTP #{resp.code}): #{resp.body}" unless resp.code == '200'
-
+ raise "Upload failed (#{resp.code}): #{resp.body}" unless resp.code == '200'
JSON.parse(resp.body)['name'] || filename
rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
- raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard (wg1) active? (#{e.message})"
+ raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard active? (#{e.message})"
end
- # Submit a workflow; returns the prompt_id string.
def submit_prompt(workflow)
resp = post_json('/prompt', { 'prompt' => workflow })
- raise "Prompt submission failed (HTTP #{resp.code}): #{resp.body}" unless resp.code == '200'
-
- JSON.parse(resp.body)['prompt_id'] or raise "No prompt_id in response: #{resp.body}"
+ raise "Prompt failed (#{resp.code}): #{resp.body}" unless resp.code == '200'
+ JSON.parse(resp.body)['prompt_id'] or raise "No prompt_id in: #{resp.body}"
end
- # Poll until the prompt finishes; returns the list of output filenames.
def wait_for_output(prompt_id)
deadline = Time.now + POLL_TIMEOUT_SEC
loop do
- raise "Timed out after #{POLL_TIMEOUT_SEC}s waiting for prompt #{prompt_id}" if Time.now > deadline
+ raise "Timed out after #{POLL_TIMEOUT_SEC}s for #{prompt_id}" if Time.now > deadline
- resp = get("/history/#{prompt_id}")
- raise "History poll failed (HTTP #{resp.code})" unless resp.code == '200'
+ resp = get("/history/#{prompt_id}")
+ raise "History poll failed (#{resp.code})" unless resp.code == '200'
- history = JSON.parse(resp.body)
- result = history[prompt_id]
+ result = JSON.parse(resp.body)[prompt_id]
if result
- outputs = extract_output_filenames(result)
+ outputs = extract_filenames(result)
return outputs unless outputs.empty?
- # If ComfyUI marks the run complete but outputs are empty, it used a fully
- # cached execution (execution_cached for all nodes) and wrote no new files.
- # Raise immediately rather than spinning until timeout.
+ # ComfyUI cached the run (identical inputs) and wrote no new files — bail fast.
status = result.dig('status', 'status_str')
- completed = result.dig('status', 'completed')
- raise "ComfyUI returned empty outputs (cached execution?) for #{prompt_id}" \
- if completed && status == 'success'
-
- # ComfyUI may record the prompt before writing output nodes; keep polling.
+ raise "ComfyUI cached execution returned no outputs for #{prompt_id}" \
+ if result.dig('status', 'completed') && status == 'success'
end
sleep POLL_INTERVAL_SEC
end
end
- # Download an output image; saves to dest_path.
def download_output(filename, dest_path)
resp = get("/view?filename=#{URI.encode_www_form_component(filename)}&type=output&subfolder=")
- raise "Download failed (HTTP #{resp.code}) for #{filename}" unless resp.code == '200'
-
+ raise "Download failed (#{resp.code}) for #{filename}" unless resp.code == '200'
FileUtils.mkdir_p(File.dirname(dest_path))
File.binwrite(dest_path, resp.body)
end
- # Quick connectivity check; raises on failure.
def check_connectivity!
resp = get('/system_stats')
- raise "ComfyUI health check failed (HTTP #{resp.code}): #{resp.body}" unless resp.code == '200'
+ raise "Health check failed (#{resp.code}): #{resp.body}" unless resp.code == '200'
rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
- raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard (wg1) active? (#{e.message})"
+ raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard active? (#{e.message})"
end
private
- def extract_output_filenames(result)
+ def extract_filenames(result)
Array(result.dig('outputs'))
- .flat_map { |_node_id, node_out| Array(node_out['images']) }
+ .flat_map { |_id, node| Array(node['images']) }
.map { |img| img['filename'] }
- .compact
- .reject(&:empty?)
+ .compact.reject(&:empty?)
end
def get(path)
- uri = URI("http://#{@host}:#{@port}#{path}")
- Net::HTTP.get_response(uri)
+ Net::HTTP.get_response(URI("http://#{@host}:#{@port}#{path}"))
end
def post_json(path, payload)
@@ -166,7 +158,7 @@ class ComfyUIClient
req = Net::HTTP::Post.new(uri)
req['Content-Type'] = 'application/json'
req.body = JSON.generate(payload)
- Net::HTTP.start(uri.host, uri.port) { |http| http.request(req) }
+ Net::HTTP.start(uri.host, uri.port) { |h| h.request(req) }
end
def post_raw(path, body, content_type)
@@ -174,11 +166,11 @@ class ComfyUIClient
req = Net::HTTP::Post.new(uri)
req['Content-Type'] = content_type
req.body = body
- Net::HTTP.start(uri.host, uri.port, read_timeout: 120) { |http| http.request(req) }
+ Net::HTTP.start(uri.host, uri.port, read_timeout: 120) { |h| h.request(req) }
end
- def mime_type_for(file_path)
- case File.extname(file_path).downcase
+ def mime_type(path)
+ case File.extname(path).downcase
when '.jpg', '.jpeg' then 'image/jpeg'
when '.png' then 'image/png'
when '.webp' then 'image/webp'
@@ -186,27 +178,25 @@ class ComfyUIClient
end
end
- # Minimal hex token without SecureRandom (pure stdlib).
- def SecureRandom_hex(n)
+ def hex(n)
Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, n * 2]
end
end
# ---------------------------------------------------------------------------
-# Manifest — tracks which files have been processed to avoid re-enhancing.
+# Manifest — avoids re-processing files across runs and in watch mode.
# ---------------------------------------------------------------------------
class ProcessedManifest
- MANIFEST_FILE = '.photo-enhance-processed'
+ FILE_NAME = '.photo-enhance-processed'
- def initialize(outdir)
- @path = File.join(outdir, MANIFEST_FILE)
+ def initialize(dir)
+ @path = File.join(dir, FILE_NAME)
@entries = load_entries
end
def processed?(file_path)
- key = digest(file_path)
- @entries.include?(key)
+ @entries.include?(digest(file_path))
end
def mark_done(file_path)
@@ -218,12 +208,11 @@ class ProcessedManifest
private
def load_entries
- return [] unless File.exist?(@path)
-
+ return Set.new unless File.exist?(@path)
File.readlines(@path, chomp: true).map(&:strip).reject(&:empty?).to_set
end
- # Digest includes mtime so a re-shot of the same filename is re-processed.
+ # Covers basename + size + mtime so a re-shot of the same filename re-processes.
def digest(file_path)
stat = File.stat(file_path)
Digest::SHA256.hexdigest("#{File.basename(file_path)}:#{stat.size}:#{stat.mtime.to_i}")
@@ -233,12 +222,16 @@ class ProcessedManifest
end
# ---------------------------------------------------------------------------
-# Enhancer — orchestrates upload → prompt → poll → download for one image.
+# Enhancer — orchestrates upload → AI → download → colour correct per image.
# ---------------------------------------------------------------------------
class PhotoEnhancer
SUPPORTED_EXTENSIONS = %w[.jpg .jpeg .png .webp].freeze
+ # No colour corrections — pure AI output from Real-ESRGAN is used as-is.
+ # ImageMagick is only used to bake EXIF rotation and convert PNG→JPEG.
+ COLOR_ARGS = [].freeze
+
def initialize(config:, client:, workflow:, indir:, manifest:, out: $stdout)
@config = config
@client = client
@@ -248,120 +241,177 @@ class PhotoEnhancer
@out = out
end
- def enhance_directory(indir, watch: false)
+ def run(watch: false)
@client.check_connectivity!
@out.puts "ComfyUI ready at http://#{@config.host}:#{@config.port}"
- @out.puts "Enhancing photos in #{indir} (output: <name>_e.<ext> alongside originals)"
+ @out.puts "Enhancing photos in #{@indir}"
@out.puts watch ? '(watch mode — Ctrl-C to stop)' : ''
loop do
- pending = find_pending_images(indir)
- pending.each { |path| enhance_one(path) }
+ find_pending.each { |path| enhance_one(path) }
break unless watch
-
sleep 5
end
end
private
- def find_pending_images(indir)
- Dir.glob(File.join(indir, '*'))
+ def find_pending
+ Dir.glob(File.join(@indir, '*'))
.select { |f| File.file?(f) && SUPPORTED_EXTENSIONS.include?(File.extname(f).downcase) }
.reject { |f| File.basename(f, '.*').end_with?('_e') }
+ .reject { |f| File.basename(f).include?('.orient.') }
.reject { |f| @manifest.processed?(f) }
.sort
end
def enhance_one(src_path)
- basename = File.basename(src_path, '.*')
- ext = File.extname(src_path).downcase
- # Output lives in the same directory as the original, with an _enhanced suffix
- # before the extension (e.g. photo.jpg -> photo_enhanced.jpg).
- dest_path = File.join(File.dirname(src_path), "#{basename}_e#{ext}")
+ ext = File.extname(src_path).downcase
+ basename = File.basename(src_path, File.extname(src_path))
+ dest_path = File.join(File.dirname(src_path), "#{basename}_e#{ext}")
- @out.puts "[#{Time.now.strftime('%H:%M:%S')}] Enhancing #{File.basename(src_path)}..."
+ @out.puts "[#{Time.now.strftime('%H:%M:%S')}] #{File.basename(src_path)}"
- # Auto-rotate based on EXIF orientation before uploading. ComfyUI strips EXIF,
- # so we bake the rotation into a temp file; this ensures output is correctly oriented.
- upload_path = auto_orient_tempfile(src_path)
+ # Bake in EXIF rotation before uploading — ComfyUI strips EXIF metadata.
+ upload_path = auto_orient_tempfile(src_path)
uploaded_name = @client.upload_image(upload_path)
- workflow = inject_input_image(@workflow, uploaded_name)
+ workflow = inject_input(@workflow, uploaded_name)
prompt_id = @client.submit_prompt(workflow)
- @out.puts " Submitted prompt #{prompt_id}, waiting for ComfyUI..."
+ @out.puts " prompt #{prompt_id}"
filenames = @client.wait_for_output(prompt_id)
- raise "No output images returned for #{src_path}" if filenames.empty?
-
- # ComfyUI SaveImage always outputs PNG. Download to a temp file then convert
- # to the original format (JPEG for .jpg/.jpeg) so file sizes stay comparable.
- tmp_path = "#{dest_path}.tmp.png"
- @client.download_output(filenames.first, tmp_path)
- convert_to_original_format(tmp_path, dest_path, ext)
- File.delete(tmp_path) if File.exist?(tmp_path)
+ raise "No outputs returned for #{src_path}" if filenames.empty?
+
+ # ComfyUI outputs PNG; download then convert to original format.
+ tmp_png = "#{dest_path}.tmp.png"
+ @client.download_output(filenames.first, tmp_png)
+ save_with_corrections(tmp_png, dest_path, ext)
+ File.delete(tmp_png) if File.exist?(tmp_png)
File.delete(upload_path) if upload_path != src_path && File.exist?(upload_path)
+
+ # Download the JSON metadata written by WritePhotoMetadata and render it
+ # as a human-readable .md report alongside the enhanced photo.
+ # ComfyUI appends _NNNNN_ counter: "enhanced_abc123__00001_.png" → "enhanced_abc123_"
+ prefix = filenames.first.sub(/_\d+_\.png$/, '')
+ meta_file = "#{prefix}meta.json"
+ md_path = File.join(File.dirname(dest_path),
+ "#{File.basename(dest_path, File.extname(dest_path))}.md")
+ download_and_write_md(meta_file, src_path, dest_path, md_path)
+
@manifest.mark_done(src_path)
- orig_size = File.size(src_path)
- enhanced_size = File.size(dest_path)
- @out.puts " Saved -> #{dest_path} (#{kb(orig_size)} KB -> #{kb(enhanced_size)} KB)"
+ @out.puts " -> #{dest_path} (#{kb(src_path)} KB -> #{kb(dest_path)} KB)"
rescue StandardError => e
- @out.puts " ERROR enhancing #{File.basename(src_path)}: #{e.message}"
+ @out.puts " ERROR #{File.basename(src_path)}: #{e.message}"
end
- # Apply EXIF auto-orientation to a copy of src_path and return the copy's path.
- # If magick fails (e.g. not installed or no EXIF), returns src_path unchanged so
- # the caller always has a valid upload path.
+ # Run magick -auto-orient into a temp file so EXIF rotation is baked in.
+ # Falls back to the original path if magick is unavailable.
def auto_orient_tempfile(src_path)
- ext = File.extname(src_path)
- tmp = "#{src_path}.orient#{ext}"
- success = system('magick', src_path, '-auto-orient', tmp)
- return tmp if success && File.exist?(tmp)
+ ext = File.extname(src_path)
+ tmp = "#{src_path}.orient#{ext}"
+ return tmp if system('magick', src_path, '-auto-orient', tmp) && File.exist?(tmp)
- @out.puts " Warning: auto-orient failed for #{File.basename(src_path)}, uploading original"
+ @out.puts " Warning: auto-orient failed, uploading original"
src_path
end
- # Convert the PNG downloaded from ComfyUI into the desired output format and
- # apply local colour corrections via ImageMagick:
- # -sigmoidal-contrast 3,50% — gentle S-curve (lifts shadows, adds punch)
- # -modulate 100,120,100 — +20% saturation (vibrance-style boost)
- # -unsharp 0x1.5+0.7+0.02 — mild clarity / micro-contrast sharpening
- # PNG output gets the same corrections but stays lossless.
- def convert_to_original_format(src_png, dest_path, original_ext)
- color_args = [
- '-sigmoidal-contrast', '3,50%',
- '-modulate', '100,120,100',
- '-unsharp', '0x1.5+0.7+0.02'
- ]
- case original_ext
- when '.jpg', '.jpeg'
- system('magick', src_png, *color_args, '-quality', '92', dest_path)
- else
- system('magick', src_png, *color_args, dest_path)
- end
+ # Convert the downloaded PNG to the target format (JPEG quality 92 for .jpg).
+ # No colour processing — pure AI output from Real-ESRGAN is preserved as-is.
+ def save_with_corrections(src_png, dest_path, ext)
+ quality_args = ext.match?(/\.jpe?g/) ? ['-quality', '92'] : []
+ system('magick', src_png, *COLOR_ARGS, *quality_args, dest_path)
end
- def kb(bytes)
- (bytes / 1024.0).round
+ # Download the WritePhotoMetadata JSON from ComfyUI output and render it
+ # as a Markdown report saved alongside the enhanced photo.
+ def download_and_write_md(meta_filename, src_path, dest_path, md_path)
+ resp = @client.send(:get,
+ "/view?filename=#{URI.encode_www_form_component(meta_filename)}&type=output&subfolder=")
+ return unless resp.code == '200'
+
+ meta = JSON.parse(resp.body)
+ profile = meta['enhancement_profile'] || {}
+ sky = meta['sky'] || {}
+ depth = meta['depth_sharpen'] || {}
+ models = meta['models'] || {}
+ scene = meta['scene_type'] || 'unknown'
+ ts = meta['generated_at'] || Time.now.utc.iso8601
+
+ md = <<~MD
+ # #{File.basename(dest_path)} — Enhancement Report
+
+ **Source:** #{File.basename(src_path)} (#{kb(src_path)} KB)
+ **Enhanced:** #{File.basename(dest_path)} (#{kb(dest_path)} KB)
+ **Processed:** #{ts}
+
+ ## AI Pipeline
+
+ | Step | Model / Node | Device | What it does |
+ |------|-------------|--------|--------------|
+ | 1 | `#{models['upscaler']}` | GPU | 4× upscale at full 4K input → 16K → back to 4K |
+ | 2 | `#{models['face_restore']}` | GPU | Face detection + neural restoration |
+ | 3 | `#{models['scene_detect']}` | GPU | Zero-shot scene classification |
+ | 4 | Adaptive Photo Grade | CPU | Scene-tuned exposure / contrast / saturation / detail |
+ | 5 | Sky Enhance | CPU | HSV sky mask + graduated sky correction |
+ | 6 | `#{models['depth']}` | GPU | Depth map → foreground sharp, background soft |
+
+ ## Scene Detection
+
+ | | |
+ |-|-|
+ | **Detected scene** | #{scene} |
+
+ ## Colour Grading Profile (#{scene})
+
+ | Setting | Value |
+ |---------|-------|
+ | Exposure | +#{profile['exposure_stops']} stops |
+ | Contrast | #{profile['contrast_factor']}× |
+ | Saturation | #{profile['saturation_mult']}× |
+ | Detail / Clarity | #{profile['detail_mult']}× |
+ | Denoise strength | #{profile['denoise_strength']} |
+
+ ## Sky Enhancement
+
+ | Setting | Value |
+ |---------|-------|
+ | Sky coverage | #{sky['coverage_pct']}% of image |
+ | Sky exposure | +#{sky['sky_exposure']} stops |
+ | Sky saturation | #{sky['sky_saturation']}× |
+
+ ## Depth-Guided Sharpening
+
+ | Setting | Value |
+ |---------|-------|
+ | Foreground sharpening | #{depth['foreground_sharpen']}× |
+ | Background blur | #{depth['background_blur']} |
+ MD
+
+ File.write(md_path, md)
+ rescue StandardError => e
+ @out.puts " Warning: could not write metadata report: #{e.message}"
end
- # Inject the input filename and a unique SaveImage prefix into the workflow.
- # The unique prefix prevents ComfyUI from returning a fully-cached execution
- # (outputs: {}) instead of actually running the pipeline and writing output files.
- def inject_input_image(workflow, filename)
- modified = JSON.parse(JSON.generate(workflow)) # deep dup
- unique_prefix = "enhanced_#{Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, 8]}_"
- modified.each_value do |node|
+ # Inject the upload filename and a unique prefix into LoadImage, SaveImage,
+ # and WritePhotoMetadata to bust ComfyUI's cache and link metadata to image.
+ def inject_input(workflow, filename)
+ wf = JSON.parse(JSON.generate(workflow)) # deep dup
+ prefix = "enhanced_#{Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, 8]}_"
+ wf.each_value do |node|
next unless node.is_a?(Hash)
-
case node['class_type']
- when 'LoadImage'
- node['inputs']['image'] = filename
- when 'SaveImage'
- node['inputs']['filename_prefix'] = unique_prefix
+ when 'LoadImage' then node['inputs']['image'] = filename
+ when 'SaveImage' then node['inputs']['filename_prefix'] = prefix
+ when 'WritePhotoMetadata'
+ node['inputs']['filename_prefix'] = prefix
+ node['inputs']['source_filename'] = filename
end
end
- modified
+ wf
+ end
+
+ def kb(path)
+ (File.size(path) / 1024.0).round
end
end
@@ -379,18 +429,15 @@ options = {
OptionParser.new do |o|
o.banner = 'Usage: ruby photo-enhance.rb [options]'
- o.on('--config PATH', 'TOML config file (default: hyperstack-vm-photo.toml)') { |v| options[:config] = v }
- o.on('--indir PATH', 'Directory of photos to enhance (output: <name>_enhanced.<ext> in same dir)') { |v| options[:indir] = v }
- o.on('--workflow PATH', 'ComfyUI workflow JSON (default: workflows/photo-enhance.json)') { |v| options[:workflow] = v }
- o.on('--watch', 'Keep running and process new images as they arrive') { options[:watch] = true }
- o.on('--test', 'Only check connectivity to ComfyUI, then exit') { options[:test] = true }
+ o.on('--config PATH', 'TOML config (default: hyperstack-vm-photo.toml)') { |v| options[:config] = v }
+ o.on('--indir PATH', 'Directory of photos to enhance') { |v| options[:indir] = v }
+ o.on('--workflow PATH', 'ComfyUI workflow JSON override') { |v| options[:workflow] = v }
+ o.on('--watch', 'Keep running, process new images as they arrive') { options[:watch] = true }
+ o.on('--test', 'Check ComfyUI connectivity only, then exit') { options[:test] = true }
o.on('-h', '--help', 'Show this help') { puts o; exit }
end.parse!
-unless File.exist?(options[:config])
- warn "Config not found: #{options[:config]}"
- exit 1
-end
+abort "Config not found: #{options[:config]}" unless File.exist?(options[:config])
cfg = PhotoConfig.new(options[:config], options[:workflow])
client = ComfyUIClient.new(host: cfg.host, port: cfg.port)
@@ -398,42 +445,26 @@ client = ComfyUIClient.new(host: cfg.host, port: cfg.port)
if options[:test]
begin
client.check_connectivity!
- puts "ComfyUI is reachable at http://#{cfg.host}:#{cfg.port} — OK"
+ puts "ComfyUI reachable at http://#{cfg.host}:#{cfg.port} — OK"
exit 0
rescue RuntimeError => e
- warn "ERROR: #{e.message}"
- exit 1
+ warn "ERROR: #{e.message}"; exit 1
end
end
-unless options[:indir]
- warn '--indir is required (use --test to only check connectivity)'
- exit 1
-end
-
+abort '--indir is required' unless options[:indir]
indir = File.expand_path(options[:indir])
-
-unless File.directory?(indir)
- warn "Input directory not found: #{indir}"
- exit 1
-end
-
-unless File.exist?(cfg.workflow_path)
- warn "Workflow JSON not found: #{cfg.workflow_path}"
- warn "Expected at #{File.join(__dir__, 'workflows', 'photo-enhance.json')}"
- exit 1
-end
+abort "Directory not found: #{indir}" unless File.directory?(indir)
+abort "Workflow not found: #{cfg.workflow_path}" unless File.exist?(cfg.workflow_path)
workflow = JSON.parse(File.read(cfg.workflow_path))
-# Manifest lives in the indir so it stays with the photos.
manifest = ProcessedManifest.new(indir)
enhancer = PhotoEnhancer.new(config: cfg, client: client, workflow: workflow,
indir: indir, manifest: manifest)
begin
- enhancer.enhance_directory(indir, watch: options[:watch])
+ enhancer.run(watch: options[:watch])
rescue RuntimeError => e
- warn "ERROR: #{e.message}"
- exit 1
+ warn "ERROR: #{e.message}"; exit 1
rescue Interrupt
puts "\nStopped."
end
diff --git a/smart_photo_node.py b/smart_photo_node.py
new file mode 100644
index 0000000..114ff4e
--- /dev/null
+++ b/smart_photo_node.py
@@ -0,0 +1,536 @@
+"""
+Smart Photo Enhancement Nodes for ComfyUI
+==========================================
+Four AI-driven nodes that replace static colour-correction filters with
+content-aware processing:
+
+ CLIPSceneDetect — CLIP zero-shot classification → scene label
+ AdaptivePhotoGrade — scene-tuned exposure / contrast / saturation / detail
+ SkyEnhance — HSV sky mask + graduated exposure & saturation boost
+ DepthSelectiveSharpen— Depth-Anything depth map → foreground sharp, BG soft
+
+All heavy models are loaded once and kept in _MODEL_CACHE between prompts.
+"""
+
+import torch
+import numpy as np
+import cv2
+from PIL import Image
+
+# ---------------------------------------------------------------------------
+# Global model cache — prevents reloading 100–600 MB models every frame
+# ---------------------------------------------------------------------------
+_MODEL_CACHE: dict = {}
+
+
+def _cached_model(key: str, loader_fn):
+ """Return a cached model, loading it on the first call."""
+ if key not in _MODEL_CACHE:
+ _MODEL_CACHE[key] = loader_fn()
+ return _MODEL_CACHE[key]
+
+
+# ---------------------------------------------------------------------------
+# CLIPSceneDetect
+# ---------------------------------------------------------------------------
+class CLIPSceneDetect:
+ """
+ Zero-shot scene classification using OpenAI CLIP (ViT-B/32, ~600 MB).
+ Matches the photo against 8 descriptive text prompts and emits the
+ winning scene label as a STRING for AdaptivePhotoGrade to consume.
+
+ Scene labels: portrait | landscape | night | indoor |
+ golden_hour | overcast | beach | street
+ """
+
+ # Text prompts whose cosine similarity to the image selects the scene
+ SCENE_PROMPTS = [
+ "a portrait photograph of a person or people",
+ "a landscape photograph of nature or scenery outdoors",
+ "a night photograph taken in low light or darkness",
+ "an indoor photograph inside a room or building",
+ "a golden hour or sunset photograph with warm orange light",
+ "an overcast or cloudy day outdoor photograph",
+ "a beach, ocean, or waterfront photograph",
+ "a street, city, or urban photograph",
+ ]
+ SCENE_LABELS = [
+ "portrait", "landscape", "night", "indoor",
+ "golden_hour", "overcast", "beach", "street",
+ ]
+
+ @classmethod
+ def INPUT_TYPES(cls):
+ return {"required": {"image": ("IMAGE",)}}
+
+ RETURN_TYPES = ("IMAGE", "STRING")
+ RETURN_NAMES = ("image", "scene_type")
+ FUNCTION = "detect"
+ CATEGORY = "image/smart"
+
+ def detect(self, image):
+ from transformers import CLIPProcessor, CLIPModel
+
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+
+ def _load():
+ print("[CLIPSceneDetect] Loading CLIP ViT-B/32…")
+ m = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
+ p = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+ return m, p
+
+ model, processor = _cached_model("clip_scene", _load)
+
+ # Use the first image in the batch; all frames are the same scene
+ img_np = (image[0].cpu().numpy() * 255).astype(np.uint8)
+ img_pil = Image.fromarray(img_np)
+
+ inputs = processor(
+ text=self.SCENE_PROMPTS,
+ images=img_pil,
+ return_tensors="pt",
+ padding=True,
+ ).to(device)
+
+ with torch.no_grad():
+ logits = model(**inputs).logits_per_image[0]
+ probs = logits.softmax(dim=0).cpu()
+
+ idx = int(probs.argmax())
+ scene = self.SCENE_LABELS[idx]
+ conf = float(probs[idx])
+ print(f"[CLIPSceneDetect] → {scene} ({conf:.1%})")
+ return (image, scene)
+
+
+# ---------------------------------------------------------------------------
+# AdaptivePhotoGrade
+# ---------------------------------------------------------------------------
+class AdaptivePhotoGrade:
+ """
+ Scene-adaptive colour grading node.
+
+ Applies exposure correction (Reinhard tonemapping), contrast, saturation,
+ and guided-filter clarity enhancement with parameters tuned per scene type.
+ Falls back to balanced 'default' settings for unknown scene labels.
+
+ Replaces the three static ComfyUI-Image-Filters nodes
+ (ExposureAdjust + AdjustContrast + EnhanceDetail) with one smart node
+ that adapts to content.
+ """
+
+ # Per-scene profiles: exposure in stops, contrast factor, saturation
+ # multiplier, detail enhancement multiplier, denoise strength (0..1).
+ PROFILES = {
+ # Portraits: gentle — preserve skin tones, avoid over-sharpening hair
+ "portrait": dict(stops=0.30, contrast=1.10, saturation=1.00, detail=1.2, denoise=0.15),
+ # Landscapes: vivid — strong clarity, saturated skies & greens
+ "landscape": dict(stops=0.20, contrast=1.20, saturation=1.15, detail=1.8, denoise=0.05),
+ # Night: lift shadows aggressively, reduce sharpening (hides noise)
+ "night": dict(stops=0.80, contrast=1.05, saturation=0.90, detail=0.8, denoise=0.30),
+ # Indoor: correct typically warm/dim ambient light
+ "indoor": dict(stops=0.50, contrast=1.15, saturation=1.05, detail=1.3, denoise=0.10),
+ # Golden hour: enhance warmth, lift shadow detail
+ "golden_hour": dict(stops=0.25, contrast=1.20, saturation=1.20, detail=1.5, denoise=0.05),
+ # Overcast: punch contrast to compensate for flat light
+ "overcast": dict(stops=0.40, contrast=1.20, saturation=1.10, detail=1.6, denoise=0.08),
+ # Beach: bright scene, protect highlights, boost blues/greens
+ "beach": dict(stops=0.15, contrast=1.15, saturation=1.20, detail=1.7, denoise=0.05),
+ # Street: punchy contrast, neutral colour
+ "street": dict(stops=0.35, contrast=1.20, saturation=1.05, detail=1.5, denoise=0.08),
+ # Balanced fallback for unrecognised labels
+ "default": dict(stops=0.40, contrast=1.15, saturation=1.05, detail=1.5, denoise=0.10),
+ }
+
+ @classmethod
+ def INPUT_TYPES(cls):
+ return {
+ "required": {
+ "images": ("IMAGE",),
+ "scene_type": ("STRING", {"default": "default"}),
+ }
+ }
+
+ RETURN_TYPES = ("IMAGE",)
+ FUNCTION = "grade"
+ CATEGORY = "image/smart"
+
+ def grade(self, images, scene_type: str):
+ p = self.PROFILES.get(scene_type, self.PROFILES["default"])
+ print(f"[AdaptivePhotoGrade] Scene={scene_type} → {p}")
+
+ results = []
+ for img in images:
+ arr = img.cpu().numpy().copy() # [H, W, C] float32 0..1
+ arr = self._apply_exposure(arr, p["stops"])
+ arr = self._apply_contrast(arr, p["contrast"])
+ arr = self._apply_saturation(arr, p["saturation"])
+ arr = self._apply_detail(arr, p["detail"], p["denoise"])
+ results.append(torch.from_numpy(arr.clip(0, 1)).float())
+
+ return (torch.stack(results),)
+
+ # -- helpers ------------------------------------------------------------
+
+ def _apply_exposure(self, img: np.ndarray, stops: float) -> np.ndarray:
+ """
+ Per-stop exposure adjustment in linear light.
+ Converts sRGB → linear, multiplies by 2^stops, clips highlights, converts back.
+ Simple and photographic — avoids Reinhard's tonal compression which
+ would darken already-bright Fuji photos.
+ """
+ linear = img ** 2.2 # sRGB → approximate linear
+ linear = linear * (2.0 ** stops) # shift by N stops (positive = brighter)
+ return np.clip(linear ** (1.0 / 2.2), 0, 1) # back to sRGB, clip overexposed
+
+ def _apply_contrast(self, img: np.ndarray, factor: float) -> np.ndarray:
+ """Simple linear contrast around 0.5 midpoint."""
+ return np.clip((img - 0.5) * factor + 0.5, 0, 1)
+
+ def _apply_saturation(self, img: np.ndarray, factor: float) -> np.ndarray:
+ """HSV saturation boost; factor=1.0 is a no-op."""
+ u8 = (img * 255).astype(np.uint8)
+ hsv = cv2.cvtColor(u8, cv2.COLOR_RGB2HSV).astype(np.float32)
+ hsv[:, :, 1] = np.clip(hsv[:, :, 1] * factor, 0, 255)
+ return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB).astype(np.float32) / 255.0
+
+ def _apply_detail(self, img: np.ndarray, mult: float, denoise: float) -> np.ndarray:
+ """
+ Clarity / structure boost via guided-filter edge-preserving decomposition.
+ Separates base (low-freq) from detail (high-freq), scales detail by mult,
+ optionally denoises the base layer via bilateral filter.
+ """
+ u8 = (img * 255).astype(np.uint8)
+
+ # Guided filter produces an edge-preserving smooth base layer
+ # eps controls smoothing strength (higher = more smoothing)
+ base = cv2.ximgproc.guidedFilter(u8, u8, radius=8, eps=int(0.01 * 255 ** 2))
+ detail = u8.astype(np.float32) - base.astype(np.float32)
+
+ # Optionally soften the base to reduce noise before adding detail back
+ if denoise > 0.05:
+ sigma = int(denoise * 75)
+ base = cv2.bilateralFilter(base, d=5, sigmaColor=sigma, sigmaSpace=sigma)
+
+ enhanced = base.astype(np.float32) + detail * mult
+ return np.clip(enhanced / 255.0, 0, 1)
+
+
+# ---------------------------------------------------------------------------
+# SkyEnhance
+# ---------------------------------------------------------------------------
+class SkyEnhance:
+ """
+ Sky region detection and graduated enhancement — no ML model required.
+
+ Detects sky using HSV colour ranges (blue sky, white clouds, sunset tones)
+ combined with a spatial prior (sky lives in the upper portion of the frame).
+ Applies independent exposure + saturation adjustments to the sky mask,
+ blended smoothly with the rest of the image.
+
+ Works on any outdoor shot; portraits and indoor shots receive no change
+ because the sky mask will be near zero.
+ """
+
+ @classmethod
+ def INPUT_TYPES(cls):
+ return {
+ "required": {
+ "images": ("IMAGE",),
+ "sky_exposure": ("FLOAT", {"default": 0.30, "min": -1.0, "max": 1.0, "step": 0.05}),
+ "sky_saturation": ("FLOAT", {"default": 1.20, "min": 0.5, "max": 2.0, "step": 0.05}),
+ }
+ }
+
+ RETURN_TYPES = ("IMAGE",)
+ FUNCTION = "enhance"
+ CATEGORY = "image/smart"
+
+ def enhance(self, images, sky_exposure: float = 0.30, sky_saturation: float = 1.20):
+ results = []
+ for img in images:
+ arr = (img.cpu().numpy() * 255).astype(np.uint8)
+ mask = self._detect_sky(arr)
+ enhanced = self._apply_sky(arr, mask, sky_exposure, sky_saturation)
+ results.append(torch.from_numpy(enhanced.astype(np.float32) / 255.0))
+ return (torch.stack(results),)
+
+ def _detect_sky(self, img_rgb: np.ndarray) -> np.ndarray:
+ """
+ Build a soft float sky mask [0..1] using three HSV colour bands
+ plus a vertical spatial prior (sky = upper image region).
+ """
+ h = img_rgb.shape[0]
+ hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV).astype(np.float32)
+ H, S, V = hsv[:, :, 0], hsv[:, :, 1], hsv[:, :, 2]
+
+ # Band 1: Blue daytime sky (hue 90–140 in OpenCV 0–180 scale)
+ blue = ((H >= 90) & (H <= 140) & (S >= 30) & (V >= 50)).astype(np.float32)
+
+ # Band 2: White/grey clouds (low saturation, bright)
+ clouds = ((S < 40) & (V >= 180)).astype(np.float32)
+
+ # Band 3: Sunset/golden sky (hue 0–25 or 155–180, moderate sat)
+ sunset = (((H <= 25) | (H >= 155)) & (S >= 40) & (V >= 100)).astype(np.float32)
+
+ raw = np.clip(blue + clouds + sunset, 0, 1)
+
+ # Vertical gradient prior: top row = 1.2, bottom row = 0.0
+ y_weight = np.linspace(1.2, 0.0, h)[:, np.newaxis]
+ raw = raw * y_weight
+
+ # Morphological close to fill gaps between cloud patches
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
+ raw = cv2.morphologyEx(raw, cv2.MORPH_CLOSE, kernel)
+
+ # Large Gaussian blur for smooth mask edges (avoids halo artifacts)
+ mask = cv2.GaussianBlur(raw, (51, 51), 0)
+ return np.clip(mask, 0, 1)
+
+ def _apply_sky(self, img_rgb: np.ndarray, mask: np.ndarray,
+ sky_exposure: float, sky_saturation: float) -> np.ndarray:
+ """Blend sky-enhanced pixels into the original image using the mask."""
+ orig = img_rgb.astype(np.float32)
+
+ # Exposure adjustment in linear light — simple shift, no Reinhard compression
+ linear = (orig / 255.0) ** 2.2
+ linear = np.clip(linear * (2.0 ** sky_exposure), 0, 1)
+ sky_exp = np.clip(linear ** (1.0 / 2.2) * 255, 0, 255).astype(np.uint8)
+
+ # Saturation boost in HSV
+ hsv = cv2.cvtColor(sky_exp, cv2.COLOR_RGB2HSV).astype(np.float32)
+ hsv[:, :, 1] = np.clip(hsv[:, :, 1] * sky_saturation, 0, 255)
+ sky_sat = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB).astype(np.float32)
+
+ # Alpha blend: mask=1 → sky-enhanced, mask=0 → original
+ mask3 = mask[:, :, np.newaxis]
+ result = orig * (1.0 - mask3) + sky_sat * mask3
+ return np.clip(result, 0, 255).astype(np.uint8)
+
+
+# ---------------------------------------------------------------------------
+# DepthSelectiveSharpen
+# ---------------------------------------------------------------------------
+class DepthSelectiveSharpen:
+ """
+ Depth-guided selective sharpening using Depth Anything V2 Small (~100 MB).
+
+ Estimates a monocular depth map, then:
+ • Foreground (near): unsharp-mask sharpening (foreground_sharpen controls
+ the detail multiplier; 1.0 = no change, 2.0 = strong sharpening)
+ • Background (far): Gaussian blur (background_blur controls kernel size;
+ 0.0 = no blur, 1.0 = heavy background softening)
+
+ This mimics the depth-of-field separation of a fast prime lens —
+ the subject stays razor sharp while busy backgrounds recede.
+ """
+
+ @classmethod
+ def INPUT_TYPES(cls):
+ return {
+ "required": {
+ "images": ("IMAGE",),
+ "foreground_sharpen": ("FLOAT", {"default": 1.50, "min": 1.0, "max": 3.0, "step": 0.1}),
+ "background_blur": ("FLOAT", {"default": 0.50, "min": 0.0, "max": 1.0, "step": 0.1}),
+ }
+ }
+
+ RETURN_TYPES = ("IMAGE",)
+ FUNCTION = "process"
+ CATEGORY = "image/smart"
+
+ def process(self, images, foreground_sharpen: float = 1.5, background_blur: float = 0.5):
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+
+ def _load():
+ print("[DepthSelectiveSharpen] Loading Depth Anything V2 Small…")
+ from transformers import pipeline as hf_pipeline
+ return hf_pipeline(
+ task="depth-estimation",
+ model="depth-anything/Depth-Anything-V2-Small-hf",
+ device=0 if device == "cuda" else -1,
+ )
+
+ depth_pipe = _cached_model("depth_anything_v2", _load)
+
+ results = []
+ for img in images:
+ arr = (img.cpu().numpy() * 255).astype(np.uint8)
+ fg_mask = self._depth_foreground_mask(arr, depth_pipe)
+ result = self._blend_sharp_blur(arr, fg_mask, foreground_sharpen, background_blur)
+ results.append(torch.from_numpy(result.astype(np.float32) / 255.0))
+
+ return (torch.stack(results),)
+
+ def _depth_foreground_mask(self, img_rgb: np.ndarray, depth_pipe) -> np.ndarray:
+ """
+ Run Depth Anything on the image, normalise to [0,1], resize to match,
+ then invert so that near=1 (foreground) and far=0 (background).
+ """
+ h, w = img_rgb.shape[:2]
+ img_pil = Image.fromarray(img_rgb)
+
+ depth_out = depth_pipe(img_pil)
+ depth_arr = np.array(depth_out["depth"], dtype=np.float32)
+
+ # Normalise depth to 0..1
+ d_min, d_max = depth_arr.min(), depth_arr.max()
+ depth_norm = (depth_arr - d_min) / (d_max - d_min + 1e-8)
+
+ # Resize depth map to original image size
+ depth_resized = cv2.resize(depth_norm, (w, h), interpolation=cv2.INTER_LINEAR)
+
+ # Depth Anything: larger value = farther away → invert for foreground mask
+ fg_mask = 1.0 - depth_resized
+
+ # Smooth mask to avoid hard transitions at object boundaries
+ fg_mask = cv2.GaussianBlur(fg_mask, (31, 31), 0)
+ return fg_mask.clip(0, 1)
+
+ def _blend_sharp_blur(self, img_u8: np.ndarray, fg_mask: np.ndarray,
+ fg_sharpen: float, bg_blur: float) -> np.ndarray:
+ """Blend foreground-sharpened and background-blurred versions using depth mask."""
+ fg_mask3 = fg_mask[:, :, np.newaxis]
+
+ # Foreground: unsharp mask sharpening
+ if fg_sharpen > 1.0:
+ blur = cv2.GaussianBlur(img_u8.astype(np.float32), (0, 0), 2.0)
+ detail = img_u8.astype(np.float32) - blur
+ sharpened = np.clip(img_u8.astype(np.float32) + detail * (fg_sharpen - 1.0), 0, 255)
+ else:
+ sharpened = img_u8.astype(np.float32)
+
+ # Background: Gaussian blur
+ if bg_blur > 0.05:
+ ksize = int(bg_blur * 10) * 2 + 1 # always odd
+ blurred = cv2.GaussianBlur(img_u8, (ksize, ksize), bg_blur * 5).astype(np.float32)
+ else:
+ blurred = img_u8.astype(np.float32)
+
+ # Combine: near pixels get sharpened version, far pixels get blurred
+ blended = sharpened * fg_mask3 + blurred * (1.0 - fg_mask3)
+ return np.clip(blended, 0, 255).astype(np.uint8)
+
+
+# ---------------------------------------------------------------------------
+# WritePhotoMetadata
+# ---------------------------------------------------------------------------
+class WritePhotoMetadata:
+ """
+ Writes a per-photo JSON metadata file to the ComfyUI output directory.
+
+ The Ruby photo-enhance.rb script downloads this file after the image,
+ reads the AI pipeline details (scene type, profile settings, sky coverage,
+ depth settings), and generates a human-readable .md report alongside the
+ enhanced JPEG.
+
+ filename_prefix must match the prefix injected into SaveImage so the Ruby
+ script can find both files by the same prefix.
+ """
+
+ @classmethod
+ def INPUT_TYPES(cls):
+ return {
+ "required": {
+ "image": ("IMAGE",),
+ "scene_type": ("STRING", {"default": "unknown"}),
+ # Both inputs are injected per-prompt by photo-enhance.rb's inject_input
+ "filename_prefix": ("STRING", {"default": "enhanced_"}),
+ "source_filename": ("STRING", {"default": "photo"}),
+ }
+ }
+
+ # Pass image through unchanged; side-effect is writing the metadata file
+ RETURN_TYPES = ("IMAGE",)
+ FUNCTION = "write"
+ CATEGORY = "image/smart"
+
+ # Mirrors AdaptivePhotoGrade — keep in sync if profiles change
+ PROFILES = AdaptivePhotoGrade.PROFILES
+
+ def write(self, image, scene_type: str, filename_prefix: str, source_filename: str):
+ import json, datetime, os
+
+ # Resolve ComfyUI output directory via its internal module
+ try:
+ import folder_paths
+ out_dir = folder_paths.get_output_directory()
+ except Exception:
+ out_dir = "/ephemeral/comfyui/output"
+
+ profile = self.PROFILES.get(scene_type, self.PROFILES["default"])
+
+ # Compute sky mask coverage as a percentage of the image
+ sky_coverage = self._sky_coverage(image[0])
+
+ # source_filename is the upload name on ComfyUI, e.g. "DSCF5434.JPG.orient.JPG"
+ # Strip the .orient.<ext> suffix if present to recover the original base name
+ base = os.path.basename(source_filename)
+ base = base.replace(".orient.JPG", "").replace(".orient.jpg", "")
+
+ meta = {
+ "generated_at": datetime.datetime.utcnow().isoformat() + "Z",
+ "source_filename": base,
+ "scene_type": scene_type,
+ "enhancement_profile": {
+ "exposure_stops": profile["stops"],
+ "contrast_factor": profile["contrast"],
+ "saturation_mult": profile["saturation"],
+ "detail_mult": profile["detail"],
+ "denoise_strength": profile["denoise"],
+ },
+ "sky": {
+ "coverage_pct": round(sky_coverage * 100, 1),
+ "sky_exposure": 0.30,
+ "sky_saturation": 1.20,
+ },
+ "depth_sharpen": {
+ "foreground_sharpen": 1.50,
+ "background_blur": 0.50,
+ },
+ "models": {
+ "upscaler": "realesr-general-x4v3 (Real-ESRGAN, GPU)",
+ "face_restore": "CodeFormer fidelity=0.7 (GPU)",
+ "scene_detect": "CLIP ViT-B/32 (openai/clip-vit-base-patch32)",
+ "depth": "Depth Anything V2 Small (GPU)",
+ },
+ }
+
+ # Write as both a prefixed file (for Ruby to download by prefix) and
+ # a source-named file for easy manual lookup in the output dir
+ meta_path = os.path.join(out_dir, f"{filename_prefix}meta.json")
+ with open(meta_path, "w") as f:
+ json.dump(meta, f, indent=2)
+ print(f"[WritePhotoMetadata] Wrote {meta_path} (scene={scene_type}, sky={sky_coverage:.1%})")
+
+ return (image,)
+
+ def _sky_coverage(self, img_tensor: "torch.Tensor") -> float:
+ """Re-use SkyEnhance's mask logic to estimate sky % for reporting."""
+ try:
+ arr = (img_tensor.cpu().numpy() * 255).astype(np.uint8)
+ helper = SkyEnhance()
+ mask = helper._detect_sky(arr)
+ return float(mask.mean())
+ except Exception:
+ return 0.0
+
+
+# ---------------------------------------------------------------------------
+# ComfyUI node registration
+# ---------------------------------------------------------------------------
+NODE_CLASS_MAPPINGS = {
+ "CLIPSceneDetect": CLIPSceneDetect,
+ "AdaptivePhotoGrade": AdaptivePhotoGrade,
+ "SkyEnhance": SkyEnhance,
+ "DepthSelectiveSharpen": DepthSelectiveSharpen,
+ "WritePhotoMetadata": WritePhotoMetadata,
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+ "CLIPSceneDetect": "CLIP Scene Detect",
+ "AdaptivePhotoGrade": "Adaptive Photo Grade",
+ "SkyEnhance": "Sky Enhance",
+ "DepthSelectiveSharpen": "Depth Selective Sharpen",
+ "WritePhotoMetadata": "Write Photo Metadata",
+}
diff --git a/workflows/photo-enhance.json b/workflows/photo-enhance.json
index 034ae4f..6db3e1d 100644
--- a/workflows/photo-enhance.json
+++ b/workflows/photo-enhance.json
@@ -7,75 +7,86 @@
"upload": "image"
}
},
- "2": {
+ "3": {
+ "class_type": "UpscaleModelLoader",
+ "_meta": {"title": "Load realesr-general-x4v3 (photo-tuned AI upscaler)"},
+ "inputs": {
+ "model_name": "realesr-general-x4v3.pth"
+ }
+ },
+ "4": {
+ "class_type": "ImageUpscaleWithModel",
+ "_meta": {"title": "Real-ESRGAN 4x — AI denoising + detail recovery at full 4K input (GPU)"},
+ "inputs": {
+ "upscale_model": ["3", 0],
+ "image": ["1", 0]
+ }
+ },
+ "5": {
"class_type": "ImageScaleBy",
- "_meta": {"title": "Downscale to 2K for SUPIR (4x faster processing)"},
+ "_meta": {"title": "Scale 16K back to original 4K resolution (0.25x lanczos)"},
"inputs": {
- "image": ["1", 0],
+ "image": ["4", 0],
"upscale_method": "lanczos",
- "scale_by": 0.5
+ "scale_by": 0.25
}
},
- "3": {
- "class_type": "SUPIR_Upscale",
- "_meta": {"title": "SUPIR Restoration at 2K"},
+ "6": {
+ "class_type": "CodeFormerRestore",
+ "_meta": {"title": "CodeFormer — AI face restoration (GPU, fidelity=0.7)"},
"inputs": {
- "supir_model": "SUPIR-v0Q.ckpt",
- "sdxl_model": "sd_xl_base_1.0.safetensors",
- "image": ["2", 0],
- "seed": 42,
- "resize_method": "lanczos",
- "scale_by": 1.0,
- "steps": 8,
- "restoration_scale": -1,
- "cfg_scale": 7.5,
- "a_prompt": "high quality photograph, sharp, detailed, natural colors",
- "n_prompt": "blurry, noisy, low quality, artifact, overexposed, underexposed",
- "s_churn": 5,
- "s_noise": 1.003,
- "control_scale": 1.0,
- "cfg_scale_start": 4.0,
- "control_scale_start": 0.0,
- "color_fix_type": "Wavelet",
- "keep_model_loaded": true,
- "use_tiled_vae": true,
- "encoder_tile_size_pixels": 1024,
- "decoder_tile_size_latent": 128,
- "fp8_unet": true,
- "use_tiled_sampling": true,
- "sampler_tile_size": 512,
- "sampler_tile_stride": 256
+ "image": ["5", 0],
+ "fidelity": 0.7
}
},
- "4": {
- "class_type": "UpscaleModelLoader",
- "_meta": {"title": "Load Real-ESRGAN 4x"},
+ "11": {
+ "class_type": "CLIPSceneDetect",
+ "_meta": {"title": "CLIP Scene Detect — classifies scene type (portrait/landscape/night/etc)"},
"inputs": {
- "model_name": "RealESRGAN_x4plus.pth"
+ "image": ["6", 0]
}
},
- "5": {
- "class_type": "ImageUpscaleWithModel",
- "_meta": {"title": "Real-ESRGAN 4x upscale back towards original resolution"},
+ "12": {
+ "class_type": "AdaptivePhotoGrade",
+ "_meta": {"title": "Adaptive Photo Grade — scene-tuned exposure/contrast/saturation/detail"},
"inputs": {
- "upscale_model": ["4", 0],
- "image": ["3", 0]
+ "images": ["11", 0],
+ "scene_type": ["11", 1]
}
},
- "6": {
- "class_type": "ImageScaleBy",
- "_meta": {"title": "Scale back to original resolution (2K * 4x * 0.5 = 4K)"},
+ "13": {
+ "class_type": "SkyEnhance",
+ "_meta": {"title": "Sky Enhance — HSV sky mask + graduated exposure/saturation boost"},
"inputs": {
- "image": ["5", 0],
- "upscale_method": "lanczos",
- "scale_by": 0.5
+ "images": ["12", 0],
+ "sky_exposure": 0.30,
+ "sky_saturation": 1.20
+ }
+ },
+ "14": {
+ "class_type": "DepthSelectiveSharpen",
+ "_meta": {"title": "Depth Selective Sharpen — Depth-Anything depth map → foreground sharp, BG soft"},
+ "inputs": {
+ "images": ["13", 0],
+ "foreground_sharpen": 1.50,
+ "background_blur": 0.50
+ }
+ },
+ "15": {
+ "class_type": "WritePhotoMetadata",
+ "_meta": {"title": "Write Photo Metadata — saves per-photo JSON report to ComfyUI output dir"},
+ "inputs": {
+ "image": ["14", 0],
+ "scene_type": ["11", 1],
+ "filename_prefix": "enhanced_",
+ "source_filename": "NODE_INPUT_IMAGE"
}
},
- "7": {
+ "10": {
"class_type": "SaveImage",
"_meta": {"title": "Save Enhanced Photo"},
"inputs": {
- "images": ["6", 0],
+ "images": ["15", 0],
"filename_prefix": "enhanced_"
}
}