photo-enhance: full AI pipeline with scene-adaptive enhancement

Complete overhaul of the ComfyUI photo enhancement stack: Pipeline (workflows/photo-enhance.json): - Real-ESRGAN realesr-general-x4v3: 4x upscale at full 4K input (no pre-downscale, preserves all original detail before AI sees it), output scaled back to 4K - CodeFormer fidelity=0.7: GPU neural face detection + restoration - CLIP ViT-B/32: zero-shot scene classification (portrait/landscape/night/ indoor/golden_hour/overcast/beach/street) - AdaptivePhotoGrade: scene-tuned exposure/contrast/saturation/detail using guided-filter clarity enhancement; simple linear exposure shift (replaces Reinhard tonemapping which was darkening already-exposed Fuji photos) - SkyEnhance: HSV sky mask (blue sky + clouds + sunset) with spatial prior, graduated exposure and saturation boost for sky regions - DepthSelectiveSharpen: Depth Anything V2 Small depth map → foreground sharpening, background softening - WritePhotoMetadata: writes per-photo JSON to ComfyUI output dir smart_photo_node.py: implements all 5 new ComfyUI custom nodes photo-enhance.rb: - Downloads per-photo metadata JSON from ComfyUI and renders it as a human-readable _e.md report alongside each enhanced JPEG - inject_input now also patches WritePhotoMetadata with prefix + source filename - Updated pipeline description in header comments photo-compare.rb: GTK4 side-by-side comparison GUI — show original vs enhanced, keyboard shortcuts O/E to move preferred version to outdir, Space to skip, auto-rescans as new photos arrive Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
author: Paul Buetow <paul@buetow.org> 2026-03-24 19:58:35 +0200
committer: Paul Buetow <paul@buetow.org> 2026-03-24 19:58:35 +0200
commit: 862b132ddee2cc343a3edc98797554937ea5f595 (patch)
tree: 19795d60f0e90515c55ada95da2a7f18fb4d8028
parent: 98d52f5c8eb5ed7242db390a0b760eb1b918997f (diff)
6 files changed, 1079 insertions, 221 deletions
diff --git a/.hyperstack-vm-photo-state.json b/.hyperstack-vm-photo-state.json
new file mode 100644
index 0000000..8bf47b2
--- /dev/null
+++ b/.hyperstack-vm-photo-state.json
@@ -0,0 +1,93 @@
+{
+  "vm_id": 698370,
+  "vm_name": "hyperstack-photo-20260324164547",
+  "environment_name": "snonux-ollama",
+  "region": "CANADA-1",
+  "flavor_name": "n3-L40x1",
+  "image_name": "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker",
+  "key_name": "earth",
+  "public_ip": "69.19.136.63",
+  "created_at": "2026-03-24T16:45:48Z",
+  "services": {
+    "vllm_enabled": false,
+    "ollama_enabled": false,
+    "comfyui_enabled": true
+  },
+  "security_rules": [
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "tcp",
+      "port_range_min": 22,
+      "port_range_max": 22,
+      "remote_ip_prefix": "79.100.218.77/32"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "udp",
+      "port_range_min": 56710,
+      "port_range_max": 56710,
+      "remote_ip_prefix": "79.100.218.77/32"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "tcp",
+      "port_range_min": 8188,
+      "port_range_max": 8188,
+      "remote_ip_prefix": "192.168.3.0/24"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "tcp",
+      "port_range_min": 22,
+      "port_range_max": 22,
+      "remote_ip_prefix": "79.100.218.77/32"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "udp",
+      "port_range_min": 56710,
+      "port_range_max": 56710,
+      "remote_ip_prefix": "79.100.218.77/32"
+    },
+    {
+      "direction": "egress",
+      "ethertype": "IPv4",
+      "protocol": "any",
+      "port_range_min": 1,
+      "port_range_max": 65535,
+      "remote_ip_prefix": "0.0.0.0/0"
+    },
+    {
+      "direction": "ingress",
+      "ethertype": "IPv4",
+      "protocol": "tcp",
+      "port_range_min": 8188,
+      "port_range_max": 8188,
+      "remote_ip_prefix": "192.168.3.0/24"
+    },
+    {
+      "direction": "egress",
+      "ethertype": "IPv6",
+      "protocol": "any",
+      "port_range_min": 1,
+      "port_range_max": 65535,
+      "remote_ip_prefix": "0.0.0.0/0"
+    }
+  ],
+  "bootstrapped_at": "2026-03-24T16:48:36Z",
+  "wireguard_setup_at": "2026-03-24T16:48:54Z",
+  "comfyui_setup_at": "2026-03-24T16:52:16Z",
+  "comfyui_container_name": "comfyui_photo",
+  "comfyui_models": [
+    "RealESRGAN_x4plus",
+    "SUPIR-v0Q"
+  ],
+  "status": "ACTIVE",
+  "vm_state": "active",
+  "provisioned_at": "2026-03-24T16:52:30Z"
+}
+\ No newline at end of file
diff --git a/Gemfile b/Gemfile
index a1bbd94..b94602f 100644
--- a/Gemfile
+++ b/Gemfile
@@ -1,3 +1,4 @@
 source "https://rubygems.org"
 
 gem "toml-rb", "~> 2.2"
+gem "gtk4"
diff --git a/photo-compare.rb b/photo-compare.rb
new file mode 100644
index 0000000..4f5ec4c
--- /dev/null
+++ b/photo-compare.rb
@@ -0,0 +1,186 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+# photo-compare.rb — Side-by-side before/after photo comparison and selection tool.
+#
+# Shows each original + enhanced pair side by side, filling the window.
+# Press O to move the original to --outdir, E to move the enhanced version,
+# Space/S to skip. Rescans after each action so newly finished photos appear.
+#
+# Usage:
+#   ruby photo-compare.rb --indir ~/Downloads/fuji --outdir ~/Downloads/fuji/selected
+#
+# Keyboard shortcuts:
+#   O        — move original to outdir
+#   E        — move enhanced to outdir
+#   Space/S  — skip (leave both, advance to next)
+#   Q/Escape — quit
+
+require 'gtk4'
+require 'optparse'
+require 'fileutils'
+
+SUPPORTED_EXTENSIONS = %w[.jpg .jpeg .png .webp].freeze
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def find_pairs(indir)
+  Dir.glob(File.join(indir, '*'))
+     .select { |f| File.file?(f) && SUPPORTED_EXTENSIONS.include?(File.extname(f).downcase) }
+     .reject { |f| File.basename(f, '.*').end_with?('_e') }
+     .reject { |f| File.basename(f).include?('.orient.') }
+     .filter_map do |orig|
+       ext  = File.extname(orig).downcase  # enhanced files always have lowercase ext
+       base = File.basename(orig, File.extname(orig))
+       enh  = File.join(File.dirname(orig), "#{base}_e#{ext}")
+       [orig, enh] if File.exist?(enh)
+     end
+     .sort
+end
+
+def kb(path)
+  (File.size(path) / 1024.0).round
+end
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+options = { indir: nil, outdir: nil }
+OptionParser.new do |o|
+  o.banner = 'Usage: ruby photo-compare.rb --indir DIR --outdir DIR'
+  o.on('--indir PATH',  'Directory with original + _e photo pairs') { |v| options[:indir]  = v }
+  o.on('--outdir PATH', 'Directory to move selected photos into')   { |v| options[:outdir] = v }
+  o.on('-h', '--help', 'Show this help') { puts o; exit }
+end.parse!
+
+abort '--indir is required'  unless options[:indir]
+abort '--outdir is required' unless options[:outdir]
+
+indir  = File.expand_path(options[:indir])
+outdir = File.expand_path(options[:outdir])
+FileUtils.mkdir_p(outdir)
+
+state = { pairs: find_pairs(indir), index: 0, indir: indir, outdir: outdir }
+abort "No before/after pairs found in #{indir}" if state[:pairs].empty?
+
+# ---------------------------------------------------------------------------
+# GTK4 UI
+# ---------------------------------------------------------------------------
+
+app = Gtk::Application.new('org.hypr.photo-compare', :default_flags)
+
+app.signal_connect('activate') do |a|
+  win = Gtk::ApplicationWindow.new(a)
+  win.title = 'Photo Compare'
+  win.maximize  # fill the screen
+
+  root = Gtk::Box.new(:vertical, 4)
+  root.margin_top = root.margin_bottom = root.margin_start = root.margin_end = 6
+  win.child = root
+
+  # Top: progress info
+  progress_lbl = Gtk::Label.new
+  progress_lbl.xalign = 0
+  root.append(progress_lbl)
+
+  # Middle: two pictures side by side — Gtk::Picture scales to fill its container
+  img_row      = Gtk::Box.new(:horizontal, 8)
+  img_row.vexpand = true
+  root.append(img_row)
+
+  left_frame  = Gtk::Box.new(:vertical, 2)
+  right_frame = Gtk::Box.new(:vertical, 2)
+  left_frame.hexpand = right_frame.hexpand = true
+  left_frame.vexpand = right_frame.vexpand = true
+
+  # Gtk::Picture is GTK4's scaling image widget; content_fit: :contain keeps aspect ratio
+  left_pic  = Gtk::Picture.new
+  right_pic = Gtk::Picture.new
+  left_pic.content_fit  = :contain
+  right_pic.content_fit = :contain
+  left_pic.hexpand  = left_pic.vexpand  = true
+  right_pic.hexpand = right_pic.vexpand = true
+
+  left_lbl  = Gtk::Label.new
+  right_lbl = Gtk::Label.new
+
+  left_frame.append(left_pic)
+  left_frame.append(left_lbl)
+  right_frame.append(right_pic)
+  right_frame.append(right_lbl)
+  img_row.append(left_frame)
+  img_row.append(right_frame)
+
+  # Bottom: action buttons
+  btn_row  = Gtk::Box.new(:horizontal, 16)
+  btn_row.halign = :center
+  orig_btn = Gtk::Button.new(label: '← Original  [O]')
+  skip_btn = Gtk::Button.new(label: 'Skip  [Space]')
+  enh_btn  = Gtk::Button.new(label: 'Enhanced →  [E]')
+  btn_row.append(orig_btn)
+  btn_row.append(skip_btn)
+  btn_row.append(enh_btn)
+  root.append(btn_row)
+
+  # -----------------------------------------------------------------------
+  # Refresh display for current pair
+  # -----------------------------------------------------------------------
+  refresh = lambda do
+    orig, enh = state[:pairs][state[:index]]
+    progress_lbl.label = "#{state[:index] + 1} / #{state[:pairs].length}  —  #{File.basename(orig)}"
+    left_pic.set_filename(orig)
+    right_pic.set_filename(enh)
+    left_lbl.label  = "Original (#{kb(orig)} KB)"
+    right_lbl.label = "Enhanced (#{kb(enh)} KB)"
+  end
+
+  # -----------------------------------------------------------------------
+  # After moving (or skipping), rescan and show next pair.
+  # Moving removes the pair from the list, so index stays put and naturally
+  # points at the next pair. Skip increments the index explicitly.
+  # -----------------------------------------------------------------------
+  advance = lambda do |pick|
+    unless pick.nil?
+      FileUtils.mv(pick, File.join(state[:outdir], File.basename(pick)))
+    else
+      state[:index] += 1
+    end
+
+    state[:pairs] = find_pairs(state[:indir])
+
+    if state[:index] >= state[:pairs].length
+      progress_lbl.label = 'All pairs reviewed — you can close the window.'
+      left_pic.set_filename(nil)
+      right_pic.set_filename(nil)
+      left_lbl.label = right_lbl.label = ''
+      [orig_btn, skip_btn, enh_btn].each { |b| b.sensitive = false }
+    else
+      refresh.call
+    end
+  end
+
+  orig_btn.signal_connect('clicked') { advance.call(state[:pairs][state[:index]][0]) }
+  enh_btn.signal_connect('clicked')  { advance.call(state[:pairs][state[:index]][1]) }
+  skip_btn.signal_connect('clicked') { advance.call(nil) }
+
+  key_ctrl = Gtk::EventControllerKey.new
+  key_ctrl.signal_connect('key-pressed') do |_ctrl, keyval, _code, _mod|
+    case keyval
+    when Gdk::Keyval::KEY_o, Gdk::Keyval::KEY_O      then orig_btn.emit('clicked')
+    when Gdk::Keyval::KEY_e, Gdk::Keyval::KEY_E      then enh_btn.emit('clicked')
+    when Gdk::Keyval::KEY_s, Gdk::Keyval::KEY_S,
+         Gdk::Keyval::KEY_space                      then skip_btn.emit('clicked')
+    when Gdk::Keyval::KEY_q, Gdk::Keyval::KEY_Escape then a.quit
+    end
+    false
+  end
+  win.add_controller(key_ctrl)
+
+  refresh.call
+  win.show
+end
+
+exit app.run([])
diff --git a/photo-enhance.rb b/photo-enhance.rb
index 39f3942..79c2e4c 100755
--- a/photo-enhance.rb
+++ b/photo-enhance.rb
@@ -1,19 +1,28 @@
 #!/usr/bin/env ruby
 # frozen_string_literal: true
 
-# photo-enhance.rb — Photolemur-style automatic photo enhancer via ComfyUI.
+# photo-enhance.rb — AI photo enhancer via ComfyUI on a Hyperstack GPU VM.
 #
-# Submits images from --indir to the ComfyUI REST API running on a Hyperstack VM,
-# downloads the enhanced results to --outdir, and optionally watches for new files.
+# Submits images from --indir to the ComfyUI REST API, downloads the AI-enhanced
+# results and saves alongside the originals with an _e suffix.  Also downloads
+# a per-photo JSON metadata file written by the WritePhotoMetadata ComfyUI node
+# and converts it to a human-readable .md report alongside each enhanced photo.
+#
+# AI pipeline (ComfyUI, GPU):
+#   1. Real-ESRGAN realesr-general-x4v3  — 4× upscale at full 4K input, AI denoise
+#   2. CodeFormer fidelity=0.7           — neural face restoration
+#   3. CLIP ViT-B/32                     — scene classification (portrait/landscape/…)
+#   4. AdaptivePhotoGrade                — scene-tuned exposure/contrast/saturation/detail
+#   5. SkyEnhance                        — HSV sky mask + graduated sky correction
+#   6. Depth Anything V2 Small           — depth map → foreground sharp, background soft
 #
 # Usage:
 #   ruby photo-enhance.rb --config hyperstack-vm-photo.toml \
-#     --indir ~/Pictures --outdir ~/Pictures/enhanced [--watch] [--workflow workflows/photo-enhance.json]
+#     --indir ~/Pictures [--watch] [--workflow workflows/photo-enhance.json]
 #
 # Requirements:
-#   - ComfyUI VM provisioned with: ruby hyperstack.rb --config hyperstack-vm-photo.toml create
-#   - WireGuard tunnel active (wg1): verified via curl http://hyperstack-photo.wg1:8188/system_stats
-#   - Ruby stdlib only (no extra gems needed).
+#   - ComfyUI VM: ruby hyperstack.rb --config hyperstack-vm-photo.toml create
+#   - WireGuard tunnel active (wg1)
 
 begin
   require 'bundler/setup'
@@ -27,6 +36,7 @@ require 'optparse'
 require 'fileutils'
 require 'digest'
 require 'time'
+require 'set'
 
 begin
   require 'toml-rb'
@@ -36,18 +46,18 @@ rescue LoadError
 end
 
 # ---------------------------------------------------------------------------
-# Config loading — reads only the fields photo-enhance.rb needs from the TOML.
+# Config
 # ---------------------------------------------------------------------------
 
 class PhotoConfig
   attr_reader :host, :port, :workflow_path
 
   def initialize(config_path, workflow_path_override)
-    raw = TomlRB.load_file(File.expand_path(config_path))
-    hostname = raw.dig('vm', 'hostname') || 'hyperstack-photo'
+    raw       = TomlRB.load_file(File.expand_path(config_path))
+    hostname  = raw.dig('vm', 'hostname') || 'hyperstack-photo'
     interface = raw.dig('local_client', 'interface_name') || 'wg1'
-    @host = "#{hostname}.#{interface}"
-    @port = Integer(raw.dig('comfyui', 'port') || 8188)
+    @host     = "#{hostname}.#{interface}"
+    @port     = Integer(raw.dig('comfyui', 'port') || 8188)
     @workflow_path = workflow_path_override ||
                      File.join(File.dirname(File.expand_path(config_path)), 'workflows', 'photo-enhance.json')
   end
@@ -59,7 +69,7 @@ end
 
 class ComfyUIClient
   POLL_INTERVAL_SEC = 2
-  POLL_TIMEOUT_SEC  = 600  # 10 minutes per image (SUPIR can be slow on first load)
+  POLL_TIMEOUT_SEC  = 300  # 5 minutes; ESRGAN is fast on GPU
 
   def initialize(host:, port:, out: $stdout)
     @host = host
@@ -67,98 +77,80 @@ class ComfyUIClient
     @out  = out
   end
 
-  # Upload a local image file; returns the filename ComfyUI assigned it.
   def upload_image(file_path)
-    filename = File.basename(file_path)
+    filename   = File.basename(file_path)
     image_data = File.binread(file_path)
-    boundary = "----RubyPhotoEnhance#{SecureRandom_hex(8)}"
-
+    boundary   = "----RubyPhotoEnhance#{hex(8)}"
     body = [
       "--#{boundary}\r\n",
       "Content-Disposition: form-data; name=\"image\"; filename=\"#{filename}\"\r\n",
-      "Content-Type: #{mime_type_for(file_path)}\r\n\r\n",
+      "Content-Type: #{mime_type(file_path)}\r\n\r\n",
       image_data,
       "\r\n--#{boundary}\r\n",
       "Content-Disposition: form-data; name=\"overwrite\"\r\n\r\ntrue\r\n",
       "--#{boundary}--\r\n"
     ].join
-
     resp = post_raw('/upload/image', body, "multipart/form-data; boundary=#{boundary}")
-    raise "Upload failed (HTTP #{resp.code}): #{resp.body}" unless resp.code == '200'
-
+    raise "Upload failed (#{resp.code}): #{resp.body}" unless resp.code == '200'
     JSON.parse(resp.body)['name'] || filename
   rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
-    raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard (wg1) active? (#{e.message})"
+    raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard active? (#{e.message})"
   end
 
-  # Submit a workflow; returns the prompt_id string.
   def submit_prompt(workflow)
     resp = post_json('/prompt', { 'prompt' => workflow })
-    raise "Prompt submission failed (HTTP #{resp.code}): #{resp.body}" unless resp.code == '200'
-
-    JSON.parse(resp.body)['prompt_id'] or raise "No prompt_id in response: #{resp.body}"
+    raise "Prompt failed (#{resp.code}): #{resp.body}" unless resp.code == '200'
+    JSON.parse(resp.body)['prompt_id'] or raise "No prompt_id in: #{resp.body}"
   end
 
-  # Poll until the prompt finishes; returns the list of output filenames.
   def wait_for_output(prompt_id)
     deadline = Time.now + POLL_TIMEOUT_SEC
     loop do
-      raise "Timed out after #{POLL_TIMEOUT_SEC}s waiting for prompt #{prompt_id}" if Time.now > deadline
+      raise "Timed out after #{POLL_TIMEOUT_SEC}s for #{prompt_id}" if Time.now > deadline
 
-      resp = get("/history/#{prompt_id}")
-      raise "History poll failed (HTTP #{resp.code})" unless resp.code == '200'
+      resp   = get("/history/#{prompt_id}")
+      raise "History poll failed (#{resp.code})" unless resp.code == '200'
 
-      history = JSON.parse(resp.body)
-      result = history[prompt_id]
+      result = JSON.parse(resp.body)[prompt_id]
       if result
-        outputs = extract_output_filenames(result)
+        outputs = extract_filenames(result)
         return outputs unless outputs.empty?
 
-        # If ComfyUI marks the run complete but outputs are empty, it used a fully
-        # cached execution (execution_cached for all nodes) and wrote no new files.
-        # Raise immediately rather than spinning until timeout.
+        # ComfyUI cached the run (identical inputs) and wrote no new files — bail fast.
         status = result.dig('status', 'status_str')
-        completed = result.dig('status', 'completed')
-        raise "ComfyUI returned empty outputs (cached execution?) for #{prompt_id}" \
-          if completed && status == 'success'
-
-        # ComfyUI may record the prompt before writing output nodes; keep polling.
+        raise "ComfyUI cached execution returned no outputs for #{prompt_id}" \
+          if result.dig('status', 'completed') && status == 'success'
       end
 
       sleep POLL_INTERVAL_SEC
     end
   end
 
-  # Download an output image; saves to dest_path.
   def download_output(filename, dest_path)
     resp = get("/view?filename=#{URI.encode_www_form_component(filename)}&type=output&subfolder=")
-    raise "Download failed (HTTP #{resp.code}) for #{filename}" unless resp.code == '200'
-
+    raise "Download failed (#{resp.code}) for #{filename}" unless resp.code == '200'
     FileUtils.mkdir_p(File.dirname(dest_path))
     File.binwrite(dest_path, resp.body)
   end
 
-  # Quick connectivity check; raises on failure.
   def check_connectivity!
     resp = get('/system_stats')
-    raise "ComfyUI health check failed (HTTP #{resp.code}): #{resp.body}" unless resp.code == '200'
+    raise "Health check failed (#{resp.code}): #{resp.body}" unless resp.code == '200'
   rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
-    raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard (wg1) active? (#{e.message})"
+    raise "Cannot reach ComfyUI at #{@host}:#{@port} — is WireGuard active? (#{e.message})"
   end
 
   private
 
-  def extract_output_filenames(result)
+  def extract_filenames(result)
     Array(result.dig('outputs'))
-      .flat_map { |_node_id, node_out| Array(node_out['images']) }
+      .flat_map { |_id, node| Array(node['images']) }
       .map { |img| img['filename'] }
-      .compact
-      .reject(&:empty?)
+      .compact.reject(&:empty?)
   end
 
   def get(path)
-    uri = URI("http://#{@host}:#{@port}#{path}")
-    Net::HTTP.get_response(uri)
+    Net::HTTP.get_response(URI("http://#{@host}:#{@port}#{path}"))
   end
 
   def post_json(path, payload)
@@ -166,7 +158,7 @@ class ComfyUIClient
     req = Net::HTTP::Post.new(uri)
     req['Content-Type'] = 'application/json'
     req.body = JSON.generate(payload)
-    Net::HTTP.start(uri.host, uri.port) { |http| http.request(req) }
+    Net::HTTP.start(uri.host, uri.port) { |h| h.request(req) }
   end
 
   def post_raw(path, body, content_type)
@@ -174,11 +166,11 @@ class ComfyUIClient
     req = Net::HTTP::Post.new(uri)
     req['Content-Type'] = content_type
     req.body = body
-    Net::HTTP.start(uri.host, uri.port, read_timeout: 120) { |http| http.request(req) }
+    Net::HTTP.start(uri.host, uri.port, read_timeout: 120) { |h| h.request(req) }
   end
 
-  def mime_type_for(file_path)
-    case File.extname(file_path).downcase
+  def mime_type(path)
+    case File.extname(path).downcase
     when '.jpg', '.jpeg' then 'image/jpeg'
     when '.png'          then 'image/png'
     when '.webp'         then 'image/webp'
@@ -186,27 +178,25 @@ class ComfyUIClient
     end
   end
 
-  # Minimal hex token without SecureRandom (pure stdlib).
-  def SecureRandom_hex(n)
+  def hex(n)
     Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, n * 2]
   end
 end
 
 # ---------------------------------------------------------------------------
-# Manifest — tracks which files have been processed to avoid re-enhancing.
+# Manifest — avoids re-processing files across runs and in watch mode.
 # ---------------------------------------------------------------------------
 
 class ProcessedManifest
-  MANIFEST_FILE = '.photo-enhance-processed'
+  FILE_NAME = '.photo-enhance-processed'
 
-  def initialize(outdir)
-    @path = File.join(outdir, MANIFEST_FILE)
+  def initialize(dir)
+    @path    = File.join(dir, FILE_NAME)
     @entries = load_entries
   end
 
   def processed?(file_path)
-    key = digest(file_path)
-    @entries.include?(key)
+    @entries.include?(digest(file_path))
   end
 
   def mark_done(file_path)
@@ -218,12 +208,11 @@ class ProcessedManifest
   private
 
   def load_entries
-    return [] unless File.exist?(@path)
-
+    return Set.new unless File.exist?(@path)
     File.readlines(@path, chomp: true).map(&:strip).reject(&:empty?).to_set
   end
 
-  # Digest includes mtime so a re-shot of the same filename is re-processed.
+  # Covers basename + size + mtime so a re-shot of the same filename re-processes.
   def digest(file_path)
     stat = File.stat(file_path)
     Digest::SHA256.hexdigest("#{File.basename(file_path)}:#{stat.size}:#{stat.mtime.to_i}")
@@ -233,12 +222,16 @@ class ProcessedManifest
 end
 
 # ---------------------------------------------------------------------------
-# Enhancer — orchestrates upload → prompt → poll → download for one image.
+# Enhancer — orchestrates upload → AI → download → colour correct per image.
 # ---------------------------------------------------------------------------
 
 class PhotoEnhancer
   SUPPORTED_EXTENSIONS = %w[.jpg .jpeg .png .webp].freeze
 
+  # No colour corrections — pure AI output from Real-ESRGAN is used as-is.
+  # ImageMagick is only used to bake EXIF rotation and convert PNG→JPEG.
+  COLOR_ARGS = [].freeze
+
   def initialize(config:, client:, workflow:, indir:, manifest:, out: $stdout)
     @config   = config
     @client   = client
@@ -248,120 +241,177 @@ class PhotoEnhancer
     @out      = out
   end
 
-  def enhance_directory(indir, watch: false)
+  def run(watch: false)
     @client.check_connectivity!
     @out.puts "ComfyUI ready at http://#{@config.host}:#{@config.port}"
-    @out.puts "Enhancing photos in #{indir} (output: <name>_e.<ext> alongside originals)"
+    @out.puts "Enhancing photos in #{@indir}"
     @out.puts watch ? '(watch mode — Ctrl-C to stop)' : ''
 
     loop do
-      pending = find_pending_images(indir)
-      pending.each { |path| enhance_one(path) }
+      find_pending.each { |path| enhance_one(path) }
       break unless watch
-
       sleep 5
     end
   end
 
   private
 
-  def find_pending_images(indir)
-    Dir.glob(File.join(indir, '*'))
+  def find_pending
+    Dir.glob(File.join(@indir, '*'))
        .select { |f| File.file?(f) && SUPPORTED_EXTENSIONS.include?(File.extname(f).downcase) }
        .reject { |f| File.basename(f, '.*').end_with?('_e') }
+       .reject { |f| File.basename(f).include?('.orient.') }
        .reject { |f| @manifest.processed?(f) }
        .sort
   end
 
   def enhance_one(src_path)
-    basename   = File.basename(src_path, '.*')
-    ext        = File.extname(src_path).downcase
-    # Output lives in the same directory as the original, with an _enhanced suffix
-    # before the extension (e.g. photo.jpg -> photo_enhanced.jpg).
-    dest_path  = File.join(File.dirname(src_path), "#{basename}_e#{ext}")
+    ext       = File.extname(src_path).downcase
+    basename  = File.basename(src_path, File.extname(src_path))
+    dest_path = File.join(File.dirname(src_path), "#{basename}_e#{ext}")
 
-    @out.puts "[#{Time.now.strftime('%H:%M:%S')}] Enhancing #{File.basename(src_path)}..."
+    @out.puts "[#{Time.now.strftime('%H:%M:%S')}] #{File.basename(src_path)}"
 
-    # Auto-rotate based on EXIF orientation before uploading. ComfyUI strips EXIF,
-    # so we bake the rotation into a temp file; this ensures output is correctly oriented.
-    upload_path = auto_orient_tempfile(src_path)
+    # Bake in EXIF rotation before uploading — ComfyUI strips EXIF metadata.
+    upload_path   = auto_orient_tempfile(src_path)
     uploaded_name = @client.upload_image(upload_path)
-    workflow      = inject_input_image(@workflow, uploaded_name)
+    workflow      = inject_input(@workflow, uploaded_name)
     prompt_id     = @client.submit_prompt(workflow)
-    @out.puts "  Submitted prompt #{prompt_id}, waiting for ComfyUI..."
+    @out.puts "  prompt #{prompt_id}"
 
     filenames = @client.wait_for_output(prompt_id)
-    raise "No output images returned for #{src_path}" if filenames.empty?
-
-    # ComfyUI SaveImage always outputs PNG. Download to a temp file then convert
-    # to the original format (JPEG for .jpg/.jpeg) so file sizes stay comparable.
-    tmp_path = "#{dest_path}.tmp.png"
-    @client.download_output(filenames.first, tmp_path)
-    convert_to_original_format(tmp_path, dest_path, ext)
-    File.delete(tmp_path) if File.exist?(tmp_path)
+    raise "No outputs returned for #{src_path}" if filenames.empty?
+
+    # ComfyUI outputs PNG; download then convert to original format.
+    tmp_png = "#{dest_path}.tmp.png"
+    @client.download_output(filenames.first, tmp_png)
+    save_with_corrections(tmp_png, dest_path, ext)
+    File.delete(tmp_png) if File.exist?(tmp_png)
     File.delete(upload_path) if upload_path != src_path && File.exist?(upload_path)
+
+    # Download the JSON metadata written by WritePhotoMetadata and render it
+    # as a human-readable .md report alongside the enhanced photo.
+    # ComfyUI appends _NNNNN_ counter: "enhanced_abc123__00001_.png" → "enhanced_abc123_"
+    prefix = filenames.first.sub(/_\d+_\.png$/, '')
+    meta_file = "#{prefix}meta.json"
+    md_path   = File.join(File.dirname(dest_path),
+                          "#{File.basename(dest_path, File.extname(dest_path))}.md")
+    download_and_write_md(meta_file, src_path, dest_path, md_path)
+
     @manifest.mark_done(src_path)
-    orig_size    = File.size(src_path)
-    enhanced_size = File.size(dest_path)
-    @out.puts "  Saved -> #{dest_path} (#{kb(orig_size)} KB -> #{kb(enhanced_size)} KB)"
+    @out.puts "  -> #{dest_path} (#{kb(src_path)} KB -> #{kb(dest_path)} KB)"
   rescue StandardError => e
-    @out.puts "  ERROR enhancing #{File.basename(src_path)}: #{e.message}"
+    @out.puts "  ERROR #{File.basename(src_path)}: #{e.message}"
   end
 
-  # Apply EXIF auto-orientation to a copy of src_path and return the copy's path.
-  # If magick fails (e.g. not installed or no EXIF), returns src_path unchanged so
-  # the caller always has a valid upload path.
+  # Run magick -auto-orient into a temp file so EXIF rotation is baked in.
+  # Falls back to the original path if magick is unavailable.
   def auto_orient_tempfile(src_path)
-    ext    = File.extname(src_path)
-    tmp    = "#{src_path}.orient#{ext}"
-    success = system('magick', src_path, '-auto-orient', tmp)
-    return tmp if success && File.exist?(tmp)
+    ext = File.extname(src_path)
+    tmp = "#{src_path}.orient#{ext}"
+    return tmp if system('magick', src_path, '-auto-orient', tmp) && File.exist?(tmp)
 
-    @out.puts "  Warning: auto-orient failed for #{File.basename(src_path)}, uploading original"
+    @out.puts "  Warning: auto-orient failed, uploading original"
     src_path
   end
 
-  # Convert the PNG downloaded from ComfyUI into the desired output format and
-  # apply local colour corrections via ImageMagick:
-  #   -sigmoidal-contrast 3,50%   — gentle S-curve (lifts shadows, adds punch)
-  #   -modulate 100,120,100       — +20% saturation (vibrance-style boost)
-  #   -unsharp 0x1.5+0.7+0.02    — mild clarity / micro-contrast sharpening
-  # PNG output gets the same corrections but stays lossless.
-  def convert_to_original_format(src_png, dest_path, original_ext)
-    color_args = [
-      '-sigmoidal-contrast', '3,50%',
-      '-modulate',           '100,120,100',
-      '-unsharp',            '0x1.5+0.7+0.02'
-    ]
-    case original_ext
-    when '.jpg', '.jpeg'
-      system('magick', src_png, *color_args, '-quality', '92', dest_path)
-    else
-      system('magick', src_png, *color_args, dest_path)
-    end
+  # Convert the downloaded PNG to the target format (JPEG quality 92 for .jpg).
+  # No colour processing — pure AI output from Real-ESRGAN is preserved as-is.
+  def save_with_corrections(src_png, dest_path, ext)
+    quality_args = ext.match?(/\.jpe?g/) ? ['-quality', '92'] : []
+    system('magick', src_png, *COLOR_ARGS, *quality_args, dest_path)
   end
 
-  def kb(bytes)
-    (bytes / 1024.0).round
+  # Download the WritePhotoMetadata JSON from ComfyUI output and render it
+  # as a Markdown report saved alongside the enhanced photo.
+  def download_and_write_md(meta_filename, src_path, dest_path, md_path)
+    resp = @client.send(:get,
+      "/view?filename=#{URI.encode_www_form_component(meta_filename)}&type=output&subfolder=")
+    return unless resp.code == '200'
+
+    meta    = JSON.parse(resp.body)
+    profile = meta['enhancement_profile'] || {}
+    sky     = meta['sky']                  || {}
+    depth   = meta['depth_sharpen']        || {}
+    models  = meta['models']               || {}
+    scene   = meta['scene_type'] || 'unknown'
+    ts      = meta['generated_at'] || Time.now.utc.iso8601
+
+    md = <<~MD
+      # #{File.basename(dest_path)} — Enhancement Report
+
+      **Source:** #{File.basename(src_path)} (#{kb(src_path)} KB)
+      **Enhanced:** #{File.basename(dest_path)} (#{kb(dest_path)} KB)
+      **Processed:** #{ts}
+
+      ## AI Pipeline
+
+      | Step | Model / Node | Device | What it does |
+      |------|-------------|--------|--------------|
+      | 1 | `#{models['upscaler']}` | GPU | 4× upscale at full 4K input → 16K → back to 4K |
+      | 2 | `#{models['face_restore']}` | GPU | Face detection + neural restoration |
+      | 3 | `#{models['scene_detect']}` | GPU | Zero-shot scene classification |
+      | 4 | Adaptive Photo Grade | CPU | Scene-tuned exposure / contrast / saturation / detail |
+      | 5 | Sky Enhance | CPU | HSV sky mask + graduated sky correction |
+      | 6 | `#{models['depth']}` | GPU | Depth map → foreground sharp, background soft |
+
+      ## Scene Detection
+
+      | | |
+      |-|-|
+      | **Detected scene** | #{scene} |
+
+      ## Colour Grading Profile (#{scene})
+
+      | Setting | Value |
+      |---------|-------|
+      | Exposure | +#{profile['exposure_stops']} stops |
+      | Contrast | #{profile['contrast_factor']}× |
+      | Saturation | #{profile['saturation_mult']}× |
+      | Detail / Clarity | #{profile['detail_mult']}× |
+      | Denoise strength | #{profile['denoise_strength']} |
+
+      ## Sky Enhancement
+
+      | Setting | Value |
+      |---------|-------|
+      | Sky coverage | #{sky['coverage_pct']}% of image |
+      | Sky exposure | +#{sky['sky_exposure']} stops |
+      | Sky saturation | #{sky['sky_saturation']}× |
+
+      ## Depth-Guided Sharpening
+
+      | Setting | Value |
+      |---------|-------|
+      | Foreground sharpening | #{depth['foreground_sharpen']}× |
+      | Background blur | #{depth['background_blur']} |
+    MD
+
+    File.write(md_path, md)
+  rescue StandardError => e
+    @out.puts "  Warning: could not write metadata report: #{e.message}"
   end
 
-  # Inject the input filename and a unique SaveImage prefix into the workflow.
-  # The unique prefix prevents ComfyUI from returning a fully-cached execution
-  # (outputs: {}) instead of actually running the pipeline and writing output files.
-  def inject_input_image(workflow, filename)
-    modified = JSON.parse(JSON.generate(workflow)) # deep dup
-    unique_prefix = "enhanced_#{Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, 8]}_"
-    modified.each_value do |node|
+  # Inject the upload filename and a unique prefix into LoadImage, SaveImage,
+  # and WritePhotoMetadata to bust ComfyUI's cache and link metadata to image.
+  def inject_input(workflow, filename)
+    wf     = JSON.parse(JSON.generate(workflow))  # deep dup
+    prefix = "enhanced_#{Digest::SHA256.hexdigest(Time.now.to_f.to_s + rand.to_s)[0, 8]}_"
+    wf.each_value do |node|
       next unless node.is_a?(Hash)
-
       case node['class_type']
-      when 'LoadImage'
-        node['inputs']['image'] = filename
-      when 'SaveImage'
-        node['inputs']['filename_prefix'] = unique_prefix
+      when 'LoadImage'          then node['inputs']['image']           = filename
+      when 'SaveImage'          then node['inputs']['filename_prefix'] = prefix
+      when 'WritePhotoMetadata'
+        node['inputs']['filename_prefix'] = prefix
+        node['inputs']['source_filename'] = filename
       end
     end
-    modified
+    wf
+  end
+
+  def kb(path)
+    (File.size(path) / 1024.0).round
   end
 end
 
@@ -379,18 +429,15 @@ options = {
 
 OptionParser.new do |o|
   o.banner = 'Usage: ruby photo-enhance.rb [options]'
-  o.on('--config PATH',   'TOML config file (default: hyperstack-vm-photo.toml)') { |v| options[:config] = v }
-  o.on('--indir PATH',    'Directory of photos to enhance (output: <name>_enhanced.<ext> in same dir)') { |v| options[:indir] = v }
-  o.on('--workflow PATH', 'ComfyUI workflow JSON (default: workflows/photo-enhance.json)') { |v| options[:workflow] = v }
-  o.on('--watch',         'Keep running and process new images as they arrive')    { options[:watch] = true }
-  o.on('--test',          'Only check connectivity to ComfyUI, then exit')         { options[:test] = true }
+  o.on('--config PATH',   'TOML config (default: hyperstack-vm-photo.toml)') { |v| options[:config]   = v }
+  o.on('--indir PATH',    'Directory of photos to enhance')                   { |v| options[:indir]    = v }
+  o.on('--workflow PATH', 'ComfyUI workflow JSON override')                   { |v| options[:workflow] = v }
+  o.on('--watch',         'Keep running, process new images as they arrive')  { options[:watch] = true }
+  o.on('--test',          'Check ComfyUI connectivity only, then exit')       { options[:test]  = true }
   o.on('-h', '--help',    'Show this help') { puts o; exit }
 end.parse!
 
-unless File.exist?(options[:config])
-  warn "Config not found: #{options[:config]}"
-  exit 1
-end
+abort "Config not found: #{options[:config]}" unless File.exist?(options[:config])
 
 cfg    = PhotoConfig.new(options[:config], options[:workflow])
 client = ComfyUIClient.new(host: cfg.host, port: cfg.port)
@@ -398,42 +445,26 @@ client = ComfyUIClient.new(host: cfg.host, port: cfg.port)
 if options[:test]
   begin
     client.check_connectivity!
-    puts "ComfyUI is reachable at http://#{cfg.host}:#{cfg.port} — OK"
+    puts "ComfyUI reachable at http://#{cfg.host}:#{cfg.port} — OK"
     exit 0
   rescue RuntimeError => e
-    warn "ERROR: #{e.message}"
-    exit 1
+    warn "ERROR: #{e.message}"; exit 1
   end
 end
 
-unless options[:indir]
-  warn '--indir is required (use --test to only check connectivity)'
-  exit 1
-end
-
+abort '--indir is required' unless options[:indir]
 indir = File.expand_path(options[:indir])
-
-unless File.directory?(indir)
-  warn "Input directory not found: #{indir}"
-  exit 1
-end
-
-unless File.exist?(cfg.workflow_path)
-  warn "Workflow JSON not found: #{cfg.workflow_path}"
-  warn "Expected at #{File.join(__dir__, 'workflows', 'photo-enhance.json')}"
-  exit 1
-end
+abort "Directory not found: #{indir}" unless File.directory?(indir)
+abort "Workflow not found: #{cfg.workflow_path}" unless File.exist?(cfg.workflow_path)
 
 workflow = JSON.parse(File.read(cfg.workflow_path))
-# Manifest lives in the indir so it stays with the photos.
 manifest = ProcessedManifest.new(indir)
 enhancer = PhotoEnhancer.new(config: cfg, client: client, workflow: workflow,
                               indir: indir, manifest: manifest)
 begin
-  enhancer.enhance_directory(indir, watch: options[:watch])
+  enhancer.run(watch: options[:watch])
 rescue RuntimeError => e
-  warn "ERROR: #{e.message}"
-  exit 1
+  warn "ERROR: #{e.message}"; exit 1
 rescue Interrupt
   puts "\nStopped."
 end
diff --git a/smart_photo_node.py b/smart_photo_node.py
new file mode 100644
index 0000000..114ff4e
--- /dev/null
+++ b/smart_photo_node.py
@@ -0,0 +1,536 @@
+"""
+Smart Photo Enhancement Nodes for ComfyUI
+==========================================
+Four AI-driven nodes that replace static colour-correction filters with
+content-aware processing:
+
+  CLIPSceneDetect      — CLIP zero-shot classification → scene label
+  AdaptivePhotoGrade   — scene-tuned exposure / contrast / saturation / detail
+  SkyEnhance           — HSV sky mask + graduated exposure & saturation boost
+  DepthSelectiveSharpen— Depth-Anything depth map → foreground sharp, BG soft
+
+All heavy models are loaded once and kept in _MODEL_CACHE between prompts.
+"""
+
+import torch
+import numpy as np
+import cv2
+from PIL import Image
+
+# ---------------------------------------------------------------------------
+# Global model cache — prevents reloading 100–600 MB models every frame
+# ---------------------------------------------------------------------------
+_MODEL_CACHE: dict = {}
+
+
+def _cached_model(key: str, loader_fn):
+    """Return a cached model, loading it on the first call."""
+    if key not in _MODEL_CACHE:
+        _MODEL_CACHE[key] = loader_fn()
+    return _MODEL_CACHE[key]
+
+
+# ---------------------------------------------------------------------------
+# CLIPSceneDetect
+# ---------------------------------------------------------------------------
+class CLIPSceneDetect:
+    """
+    Zero-shot scene classification using OpenAI CLIP (ViT-B/32, ~600 MB).
+    Matches the photo against 8 descriptive text prompts and emits the
+    winning scene label as a STRING for AdaptivePhotoGrade to consume.
+
+    Scene labels: portrait | landscape | night | indoor |
+                  golden_hour | overcast | beach | street
+    """
+
+    # Text prompts whose cosine similarity to the image selects the scene
+    SCENE_PROMPTS = [
+        "a portrait photograph of a person or people",
+        "a landscape photograph of nature or scenery outdoors",
+        "a night photograph taken in low light or darkness",
+        "an indoor photograph inside a room or building",
+        "a golden hour or sunset photograph with warm orange light",
+        "an overcast or cloudy day outdoor photograph",
+        "a beach, ocean, or waterfront photograph",
+        "a street, city, or urban photograph",
+    ]
+    SCENE_LABELS = [
+        "portrait", "landscape", "night", "indoor",
+        "golden_hour", "overcast", "beach", "street",
+    ]
+
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {"required": {"image": ("IMAGE",)}}
+
+    RETURN_TYPES = ("IMAGE", "STRING")
+    RETURN_NAMES = ("image", "scene_type")
+    FUNCTION = "detect"
+    CATEGORY = "image/smart"
+
+    def detect(self, image):
+        from transformers import CLIPProcessor, CLIPModel
+
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+
+        def _load():
+            print("[CLIPSceneDetect] Loading CLIP ViT-B/32…")
+            m = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
+            p = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+            return m, p
+
+        model, processor = _cached_model("clip_scene", _load)
+
+        # Use the first image in the batch; all frames are the same scene
+        img_np = (image[0].cpu().numpy() * 255).astype(np.uint8)
+        img_pil = Image.fromarray(img_np)
+
+        inputs = processor(
+            text=self.SCENE_PROMPTS,
+            images=img_pil,
+            return_tensors="pt",
+            padding=True,
+        ).to(device)
+
+        with torch.no_grad():
+            logits = model(**inputs).logits_per_image[0]
+            probs = logits.softmax(dim=0).cpu()
+
+        idx = int(probs.argmax())
+        scene = self.SCENE_LABELS[idx]
+        conf = float(probs[idx])
+        print(f"[CLIPSceneDetect] → {scene} ({conf:.1%})")
+        return (image, scene)
+
+
+# ---------------------------------------------------------------------------
+# AdaptivePhotoGrade
+# ---------------------------------------------------------------------------
+class AdaptivePhotoGrade:
+    """
+    Scene-adaptive colour grading node.
+
+    Applies exposure correction (Reinhard tonemapping), contrast, saturation,
+    and guided-filter clarity enhancement with parameters tuned per scene type.
+    Falls back to balanced 'default' settings for unknown scene labels.
+
+    Replaces the three static ComfyUI-Image-Filters nodes
+    (ExposureAdjust + AdjustContrast + EnhanceDetail) with one smart node
+    that adapts to content.
+    """
+
+    # Per-scene profiles: exposure in stops, contrast factor, saturation
+    # multiplier, detail enhancement multiplier, denoise strength (0..1).
+    PROFILES = {
+        # Portraits: gentle — preserve skin tones, avoid over-sharpening hair
+        "portrait":    dict(stops=0.30, contrast=1.10, saturation=1.00, detail=1.2, denoise=0.15),
+        # Landscapes: vivid — strong clarity, saturated skies & greens
+        "landscape":   dict(stops=0.20, contrast=1.20, saturation=1.15, detail=1.8, denoise=0.05),
+        # Night: lift shadows aggressively, reduce sharpening (hides noise)
+        "night":       dict(stops=0.80, contrast=1.05, saturation=0.90, detail=0.8, denoise=0.30),
+        # Indoor: correct typically warm/dim ambient light
+        "indoor":      dict(stops=0.50, contrast=1.15, saturation=1.05, detail=1.3, denoise=0.10),
+        # Golden hour: enhance warmth, lift shadow detail
+        "golden_hour": dict(stops=0.25, contrast=1.20, saturation=1.20, detail=1.5, denoise=0.05),
+        # Overcast: punch contrast to compensate for flat light
+        "overcast":    dict(stops=0.40, contrast=1.20, saturation=1.10, detail=1.6, denoise=0.08),
+        # Beach: bright scene, protect highlights, boost blues/greens
+        "beach":       dict(stops=0.15, contrast=1.15, saturation=1.20, detail=1.7, denoise=0.05),
+        # Street: punchy contrast, neutral colour
+        "street":      dict(stops=0.35, contrast=1.20, saturation=1.05, detail=1.5, denoise=0.08),
+        # Balanced fallback for unrecognised labels
+        "default":     dict(stops=0.40, contrast=1.15, saturation=1.05, detail=1.5, denoise=0.10),
+    }
+
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "images":     ("IMAGE",),
+                "scene_type": ("STRING", {"default": "default"}),
+            }
+        }
+
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "grade"
+    CATEGORY = "image/smart"
+
+    def grade(self, images, scene_type: str):
+        p = self.PROFILES.get(scene_type, self.PROFILES["default"])
+        print(f"[AdaptivePhotoGrade] Scene={scene_type} → {p}")
+
+        results = []
+        for img in images:
+            arr = img.cpu().numpy().copy()   # [H, W, C] float32 0..1
+            arr = self._apply_exposure(arr, p["stops"])
+            arr = self._apply_contrast(arr, p["contrast"])
+            arr = self._apply_saturation(arr, p["saturation"])
+            arr = self._apply_detail(arr, p["detail"], p["denoise"])
+            results.append(torch.from_numpy(arr.clip(0, 1)).float())
+
+        return (torch.stack(results),)
+
+    # -- helpers ------------------------------------------------------------
+
+    def _apply_exposure(self, img: np.ndarray, stops: float) -> np.ndarray:
+        """
+        Per-stop exposure adjustment in linear light.
+        Converts sRGB → linear, multiplies by 2^stops, clips highlights, converts back.
+        Simple and photographic — avoids Reinhard's tonal compression which
+        would darken already-bright Fuji photos.
+        """
+        linear = img ** 2.2                    # sRGB → approximate linear
+        linear = linear * (2.0 ** stops)       # shift by N stops (positive = brighter)
+        return np.clip(linear ** (1.0 / 2.2), 0, 1)  # back to sRGB, clip overexposed
+
+    def _apply_contrast(self, img: np.ndarray, factor: float) -> np.ndarray:
+        """Simple linear contrast around 0.5 midpoint."""
+        return np.clip((img - 0.5) * factor + 0.5, 0, 1)
+
+    def _apply_saturation(self, img: np.ndarray, factor: float) -> np.ndarray:
+        """HSV saturation boost; factor=1.0 is a no-op."""
+        u8 = (img * 255).astype(np.uint8)
+        hsv = cv2.cvtColor(u8, cv2.COLOR_RGB2HSV).astype(np.float32)
+        hsv[:, :, 1] = np.clip(hsv[:, :, 1] * factor, 0, 255)
+        return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB).astype(np.float32) / 255.0
+
+    def _apply_detail(self, img: np.ndarray, mult: float, denoise: float) -> np.ndarray:
+        """
+        Clarity / structure boost via guided-filter edge-preserving decomposition.
+        Separates base (low-freq) from detail (high-freq), scales detail by mult,
+        optionally denoises the base layer via bilateral filter.
+        """
+        u8 = (img * 255).astype(np.uint8)
+
+        # Guided filter produces an edge-preserving smooth base layer
+        # eps controls smoothing strength (higher = more smoothing)
+        base = cv2.ximgproc.guidedFilter(u8, u8, radius=8, eps=int(0.01 * 255 ** 2))
+        detail = u8.astype(np.float32) - base.astype(np.float32)
+
+        # Optionally soften the base to reduce noise before adding detail back
+        if denoise > 0.05:
+            sigma = int(denoise * 75)
+            base = cv2.bilateralFilter(base, d=5, sigmaColor=sigma, sigmaSpace=sigma)
+
+        enhanced = base.astype(np.float32) + detail * mult
+        return np.clip(enhanced / 255.0, 0, 1)
+
+
+# ---------------------------------------------------------------------------
+# SkyEnhance
+# ---------------------------------------------------------------------------
+class SkyEnhance:
+    """
+    Sky region detection and graduated enhancement — no ML model required.
+
+    Detects sky using HSV colour ranges (blue sky, white clouds, sunset tones)
+    combined with a spatial prior (sky lives in the upper portion of the frame).
+    Applies independent exposure + saturation adjustments to the sky mask,
+    blended smoothly with the rest of the image.
+
+    Works on any outdoor shot; portraits and indoor shots receive no change
+    because the sky mask will be near zero.
+    """
+
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "images":          ("IMAGE",),
+                "sky_exposure":    ("FLOAT", {"default": 0.30, "min": -1.0, "max": 1.0,  "step": 0.05}),
+                "sky_saturation":  ("FLOAT", {"default": 1.20, "min":  0.5, "max": 2.0,  "step": 0.05}),
+            }
+        }
+
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "enhance"
+    CATEGORY = "image/smart"
+
+    def enhance(self, images, sky_exposure: float = 0.30, sky_saturation: float = 1.20):
+        results = []
+        for img in images:
+            arr = (img.cpu().numpy() * 255).astype(np.uint8)
+            mask = self._detect_sky(arr)
+            enhanced = self._apply_sky(arr, mask, sky_exposure, sky_saturation)
+            results.append(torch.from_numpy(enhanced.astype(np.float32) / 255.0))
+        return (torch.stack(results),)
+
+    def _detect_sky(self, img_rgb: np.ndarray) -> np.ndarray:
+        """
+        Build a soft float sky mask [0..1] using three HSV colour bands
+        plus a vertical spatial prior (sky = upper image region).
+        """
+        h = img_rgb.shape[0]
+        hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV).astype(np.float32)
+        H, S, V = hsv[:, :, 0], hsv[:, :, 1], hsv[:, :, 2]
+
+        # Band 1: Blue daytime sky  (hue 90–140 in OpenCV 0–180 scale)
+        blue = ((H >= 90) & (H <= 140) & (S >= 30) & (V >= 50)).astype(np.float32)
+
+        # Band 2: White/grey clouds  (low saturation, bright)
+        clouds = ((S < 40) & (V >= 180)).astype(np.float32)
+
+        # Band 3: Sunset/golden sky  (hue 0–25 or 155–180, moderate sat)
+        sunset = (((H <= 25) | (H >= 155)) & (S >= 40) & (V >= 100)).astype(np.float32)
+
+        raw = np.clip(blue + clouds + sunset, 0, 1)
+
+        # Vertical gradient prior: top row = 1.2, bottom row = 0.0
+        y_weight = np.linspace(1.2, 0.0, h)[:, np.newaxis]
+        raw = raw * y_weight
+
+        # Morphological close to fill gaps between cloud patches
+        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
+        raw = cv2.morphologyEx(raw, cv2.MORPH_CLOSE, kernel)
+
+        # Large Gaussian blur for smooth mask edges (avoids halo artifacts)
+        mask = cv2.GaussianBlur(raw, (51, 51), 0)
+        return np.clip(mask, 0, 1)
+
+    def _apply_sky(self, img_rgb: np.ndarray, mask: np.ndarray,
+                   sky_exposure: float, sky_saturation: float) -> np.ndarray:
+        """Blend sky-enhanced pixels into the original image using the mask."""
+        orig = img_rgb.astype(np.float32)
+
+        # Exposure adjustment in linear light — simple shift, no Reinhard compression
+        linear = (orig / 255.0) ** 2.2
+        linear = np.clip(linear * (2.0 ** sky_exposure), 0, 1)
+        sky_exp = np.clip(linear ** (1.0 / 2.2) * 255, 0, 255).astype(np.uint8)
+
+        # Saturation boost in HSV
+        hsv = cv2.cvtColor(sky_exp, cv2.COLOR_RGB2HSV).astype(np.float32)
+        hsv[:, :, 1] = np.clip(hsv[:, :, 1] * sky_saturation, 0, 255)
+        sky_sat = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB).astype(np.float32)
+
+        # Alpha blend: mask=1 → sky-enhanced, mask=0 → original
+        mask3 = mask[:, :, np.newaxis]
+        result = orig * (1.0 - mask3) + sky_sat * mask3
+        return np.clip(result, 0, 255).astype(np.uint8)
+
+
+# ---------------------------------------------------------------------------
+# DepthSelectiveSharpen
+# ---------------------------------------------------------------------------
+class DepthSelectiveSharpen:
+    """
+    Depth-guided selective sharpening using Depth Anything V2 Small (~100 MB).
+
+    Estimates a monocular depth map, then:
+      • Foreground (near): unsharp-mask sharpening (foreground_sharpen controls
+        the detail multiplier; 1.0 = no change, 2.0 = strong sharpening)
+      • Background (far):  Gaussian blur (background_blur controls kernel size;
+        0.0 = no blur, 1.0 = heavy background softening)
+
+    This mimics the depth-of-field separation of a fast prime lens —
+    the subject stays razor sharp while busy backgrounds recede.
+    """
+
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "images":              ("IMAGE",),
+                "foreground_sharpen":  ("FLOAT", {"default": 1.50, "min": 1.0, "max": 3.0, "step": 0.1}),
+                "background_blur":     ("FLOAT", {"default": 0.50, "min": 0.0, "max": 1.0, "step": 0.1}),
+            }
+        }
+
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "process"
+    CATEGORY = "image/smart"
+
+    def process(self, images, foreground_sharpen: float = 1.5, background_blur: float = 0.5):
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+
+        def _load():
+            print("[DepthSelectiveSharpen] Loading Depth Anything V2 Small…")
+            from transformers import pipeline as hf_pipeline
+            return hf_pipeline(
+                task="depth-estimation",
+                model="depth-anything/Depth-Anything-V2-Small-hf",
+                device=0 if device == "cuda" else -1,
+            )
+
+        depth_pipe = _cached_model("depth_anything_v2", _load)
+
+        results = []
+        for img in images:
+            arr = (img.cpu().numpy() * 255).astype(np.uint8)
+            fg_mask = self._depth_foreground_mask(arr, depth_pipe)
+            result = self._blend_sharp_blur(arr, fg_mask, foreground_sharpen, background_blur)
+            results.append(torch.from_numpy(result.astype(np.float32) / 255.0))
+
+        return (torch.stack(results),)
+
+    def _depth_foreground_mask(self, img_rgb: np.ndarray, depth_pipe) -> np.ndarray:
+        """
+        Run Depth Anything on the image, normalise to [0,1], resize to match,
+        then invert so that near=1 (foreground) and far=0 (background).
+        """
+        h, w = img_rgb.shape[:2]
+        img_pil = Image.fromarray(img_rgb)
+
+        depth_out = depth_pipe(img_pil)
+        depth_arr = np.array(depth_out["depth"], dtype=np.float32)
+
+        # Normalise depth to 0..1
+        d_min, d_max = depth_arr.min(), depth_arr.max()
+        depth_norm = (depth_arr - d_min) / (d_max - d_min + 1e-8)
+
+        # Resize depth map to original image size
+        depth_resized = cv2.resize(depth_norm, (w, h), interpolation=cv2.INTER_LINEAR)
+
+        # Depth Anything: larger value = farther away → invert for foreground mask
+        fg_mask = 1.0 - depth_resized
+
+        # Smooth mask to avoid hard transitions at object boundaries
+        fg_mask = cv2.GaussianBlur(fg_mask, (31, 31), 0)
+        return fg_mask.clip(0, 1)
+
+    def _blend_sharp_blur(self, img_u8: np.ndarray, fg_mask: np.ndarray,
+                          fg_sharpen: float, bg_blur: float) -> np.ndarray:
+        """Blend foreground-sharpened and background-blurred versions using depth mask."""
+        fg_mask3 = fg_mask[:, :, np.newaxis]
+
+        # Foreground: unsharp mask sharpening
+        if fg_sharpen > 1.0:
+            blur = cv2.GaussianBlur(img_u8.astype(np.float32), (0, 0), 2.0)
+            detail = img_u8.astype(np.float32) - blur
+            sharpened = np.clip(img_u8.astype(np.float32) + detail * (fg_sharpen - 1.0), 0, 255)
+        else:
+            sharpened = img_u8.astype(np.float32)
+
+        # Background: Gaussian blur
+        if bg_blur > 0.05:
+            ksize = int(bg_blur * 10) * 2 + 1   # always odd
+            blurred = cv2.GaussianBlur(img_u8, (ksize, ksize), bg_blur * 5).astype(np.float32)
+        else:
+            blurred = img_u8.astype(np.float32)
+
+        # Combine: near pixels get sharpened version, far pixels get blurred
+        blended = sharpened * fg_mask3 + blurred * (1.0 - fg_mask3)
+        return np.clip(blended, 0, 255).astype(np.uint8)
+
+
+# ---------------------------------------------------------------------------
+# WritePhotoMetadata
+# ---------------------------------------------------------------------------
+class WritePhotoMetadata:
+    """
+    Writes a per-photo JSON metadata file to the ComfyUI output directory.
+
+    The Ruby photo-enhance.rb script downloads this file after the image,
+    reads the AI pipeline details (scene type, profile settings, sky coverage,
+    depth settings), and generates a human-readable .md report alongside the
+    enhanced JPEG.
+
+    filename_prefix must match the prefix injected into SaveImage so the Ruby
+    script can find both files by the same prefix.
+    """
+
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "image":            ("IMAGE",),
+                "scene_type":       ("STRING",  {"default": "unknown"}),
+                # Both inputs are injected per-prompt by photo-enhance.rb's inject_input
+                "filename_prefix":  ("STRING",  {"default": "enhanced_"}),
+                "source_filename":  ("STRING",  {"default": "photo"}),
+            }
+        }
+
+    # Pass image through unchanged; side-effect is writing the metadata file
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "write"
+    CATEGORY = "image/smart"
+
+    # Mirrors AdaptivePhotoGrade — keep in sync if profiles change
+    PROFILES = AdaptivePhotoGrade.PROFILES
+
+    def write(self, image, scene_type: str, filename_prefix: str, source_filename: str):
+        import json, datetime, os
+
+        # Resolve ComfyUI output directory via its internal module
+        try:
+            import folder_paths
+            out_dir = folder_paths.get_output_directory()
+        except Exception:
+            out_dir = "/ephemeral/comfyui/output"
+
+        profile = self.PROFILES.get(scene_type, self.PROFILES["default"])
+
+        # Compute sky mask coverage as a percentage of the image
+        sky_coverage = self._sky_coverage(image[0])
+
+        # source_filename is the upload name on ComfyUI, e.g. "DSCF5434.JPG.orient.JPG"
+        # Strip the .orient.<ext> suffix if present to recover the original base name
+        base = os.path.basename(source_filename)
+        base = base.replace(".orient.JPG", "").replace(".orient.jpg", "")
+
+        meta = {
+            "generated_at":    datetime.datetime.utcnow().isoformat() + "Z",
+            "source_filename": base,
+            "scene_type":      scene_type,
+            "enhancement_profile": {
+                "exposure_stops":   profile["stops"],
+                "contrast_factor":  profile["contrast"],
+                "saturation_mult":  profile["saturation"],
+                "detail_mult":      profile["detail"],
+                "denoise_strength": profile["denoise"],
+            },
+            "sky": {
+                "coverage_pct":  round(sky_coverage * 100, 1),
+                "sky_exposure":  0.30,
+                "sky_saturation": 1.20,
+            },
+            "depth_sharpen": {
+                "foreground_sharpen": 1.50,
+                "background_blur":    0.50,
+            },
+            "models": {
+                "upscaler":     "realesr-general-x4v3 (Real-ESRGAN, GPU)",
+                "face_restore": "CodeFormer fidelity=0.7 (GPU)",
+                "scene_detect": "CLIP ViT-B/32 (openai/clip-vit-base-patch32)",
+                "depth":        "Depth Anything V2 Small (GPU)",
+            },
+        }
+
+        # Write as both a prefixed file (for Ruby to download by prefix) and
+        # a source-named file for easy manual lookup in the output dir
+        meta_path = os.path.join(out_dir, f"{filename_prefix}meta.json")
+        with open(meta_path, "w") as f:
+            json.dump(meta, f, indent=2)
+        print(f"[WritePhotoMetadata] Wrote {meta_path} (scene={scene_type}, sky={sky_coverage:.1%})")
+
+        return (image,)
+
+    def _sky_coverage(self, img_tensor: "torch.Tensor") -> float:
+        """Re-use SkyEnhance's mask logic to estimate sky % for reporting."""
+        try:
+            arr = (img_tensor.cpu().numpy() * 255).astype(np.uint8)
+            helper = SkyEnhance()
+            mask = helper._detect_sky(arr)
+            return float(mask.mean())
+        except Exception:
+            return 0.0
+
+
+# ---------------------------------------------------------------------------
+# ComfyUI node registration
+# ---------------------------------------------------------------------------
+NODE_CLASS_MAPPINGS = {
+    "CLIPSceneDetect":       CLIPSceneDetect,
+    "AdaptivePhotoGrade":    AdaptivePhotoGrade,
+    "SkyEnhance":            SkyEnhance,
+    "DepthSelectiveSharpen": DepthSelectiveSharpen,
+    "WritePhotoMetadata":    WritePhotoMetadata,
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "CLIPSceneDetect":       "CLIP Scene Detect",
+    "AdaptivePhotoGrade":    "Adaptive Photo Grade",
+    "SkyEnhance":            "Sky Enhance",
+    "DepthSelectiveSharpen": "Depth Selective Sharpen",
+    "WritePhotoMetadata":    "Write Photo Metadata",
+}
diff --git a/workflows/photo-enhance.json b/workflows/photo-enhance.json
index 034ae4f..6db3e1d 100644
--- a/workflows/photo-enhance.json
+++ b/workflows/photo-enhance.json
@@ -7,75 +7,86 @@
       "upload": "image"
     }
   },
-  "2": {
+  "3": {
+    "class_type": "UpscaleModelLoader",
+    "_meta": {"title": "Load realesr-general-x4v3 (photo-tuned AI upscaler)"},
+    "inputs": {
+      "model_name": "realesr-general-x4v3.pth"
+    }
+  },
+  "4": {
+    "class_type": "ImageUpscaleWithModel",
+    "_meta": {"title": "Real-ESRGAN 4x — AI denoising + detail recovery at full 4K input (GPU)"},
+    "inputs": {
+      "upscale_model": ["3", 0],
+      "image":         ["1", 0]
+    }
+  },
+  "5": {
     "class_type": "ImageScaleBy",
-    "_meta": {"title": "Downscale to 2K for SUPIR (4x faster processing)"},
+    "_meta": {"title": "Scale 16K back to original 4K resolution (0.25x lanczos)"},
     "inputs": {
-      "image":          ["1", 0],
+      "image":          ["4", 0],
       "upscale_method": "lanczos",
-      "scale_by":       0.5
+      "scale_by":       0.25
     }
   },
-  "3": {
-    "class_type": "SUPIR_Upscale",
-    "_meta": {"title": "SUPIR Restoration at 2K"},
+  "6": {
+    "class_type": "CodeFormerRestore",
+    "_meta": {"title": "CodeFormer — AI face restoration (GPU, fidelity=0.7)"},
     "inputs": {
-      "supir_model":              "SUPIR-v0Q.ckpt",
-      "sdxl_model":               "sd_xl_base_1.0.safetensors",
-      "image":                    ["2", 0],
-      "seed":                     42,
-      "resize_method":            "lanczos",
-      "scale_by":                 1.0,
-      "steps":                    8,
-      "restoration_scale":        -1,
-      "cfg_scale":                7.5,
-      "a_prompt":                 "high quality photograph, sharp, detailed, natural colors",
-      "n_prompt":                 "blurry, noisy, low quality, artifact, overexposed, underexposed",
-      "s_churn":                  5,
-      "s_noise":                  1.003,
-      "control_scale":            1.0,
-      "cfg_scale_start":          4.0,
-      "control_scale_start":      0.0,
-      "color_fix_type":           "Wavelet",
-      "keep_model_loaded":        true,
-      "use_tiled_vae":            true,
-      "encoder_tile_size_pixels": 1024,
-      "decoder_tile_size_latent": 128,
-      "fp8_unet":                 true,
-      "use_tiled_sampling":       true,
-      "sampler_tile_size":        512,
-      "sampler_tile_stride":      256
+      "image":    ["5", 0],
+      "fidelity": 0.7
     }
   },
-  "4": {
-    "class_type": "UpscaleModelLoader",
-    "_meta": {"title": "Load Real-ESRGAN 4x"},
+  "11": {
+    "class_type": "CLIPSceneDetect",
+    "_meta": {"title": "CLIP Scene Detect — classifies scene type (portrait/landscape/night/etc)"},
     "inputs": {
-      "model_name": "RealESRGAN_x4plus.pth"
+      "image": ["6", 0]
     }
   },
-  "5": {
-    "class_type": "ImageUpscaleWithModel",
-    "_meta": {"title": "Real-ESRGAN 4x upscale back towards original resolution"},
+  "12": {
+    "class_type": "AdaptivePhotoGrade",
+    "_meta": {"title": "Adaptive Photo Grade — scene-tuned exposure/contrast/saturation/detail"},
     "inputs": {
-      "upscale_model": ["4", 0],
-      "image":         ["3", 0]
+      "images":     ["11", 0],
+      "scene_type": ["11", 1]
     }
   },
-  "6": {
-    "class_type": "ImageScaleBy",
-    "_meta": {"title": "Scale back to original resolution (2K * 4x * 0.5 = 4K)"},
+  "13": {
+    "class_type": "SkyEnhance",
+    "_meta": {"title": "Sky Enhance — HSV sky mask + graduated exposure/saturation boost"},
     "inputs": {
-      "image":          ["5", 0],
-      "upscale_method": "lanczos",
-      "scale_by":       0.5
+      "images":         ["12", 0],
+      "sky_exposure":   0.30,
+      "sky_saturation": 1.20
+    }
+  },
+  "14": {
+    "class_type": "DepthSelectiveSharpen",
+    "_meta": {"title": "Depth Selective Sharpen — Depth-Anything depth map → foreground sharp, BG soft"},
+    "inputs": {
+      "images":             ["13", 0],
+      "foreground_sharpen": 1.50,
+      "background_blur":    0.50
+    }
+  },
+  "15": {
+    "class_type": "WritePhotoMetadata",
+    "_meta": {"title": "Write Photo Metadata — saves per-photo JSON report to ComfyUI output dir"},
+    "inputs": {
+      "image":            ["14", 0],
+      "scene_type":       ["11", 1],
+      "filename_prefix":  "enhanced_",
+      "source_filename":  "NODE_INPUT_IMAGE"
     }
   },
-  "7": {
+  "10": {
     "class_type": "SaveImage",
     "_meta": {"title": "Save Enhanced Photo"},
     "inputs": {
-      "images":          ["6", 0],
+      "images":          ["15", 0],
       "filename_prefix": "enhanced_"
     }
   }
author	Paul Buetow <paul@buetow.org>	2026-03-24 19:58:35 +0200
committer	Paul Buetow <paul@buetow.org>	2026-03-24 19:58:35 +0200
commit	862b132ddee2cc343a3edc98797554937ea5f595 (patch)
tree	19795d60f0e90515c55ada95da2a7f18fb4d8028
parent	98d52f5c8eb5ed7242db390a0b760eb1b918997f (diff)