Update

author: Paul Buetow <paul@buetow.org> 2025-09-06 10:35:14 +0300
committer: Paul Buetow <paul@buetow.org> 2025-09-06 10:35:14 +0300
commit: 79c5890ca8979138eb617d2cdd615a9b7549ebc1 (patch)
tree: 6cbe701322d4df05e13fd5d68fb6d8b0a7fc5d5a /frontends/scripts
parent: e257dc29316ee2f3bb6e467ec978f526764fd132 (diff)
1 files changed, 758 insertions, 451 deletions
diff --git a/frontends/scripts/foostats.pl b/frontends/scripts/foostats.pl
index 235da02..e6ef1c5 100644
--- a/frontends/scripts/foostats.pl
+++ b/frontends/scripts/foostats.pl
@@ -12,8 +12,8 @@ use experimental qw(builtin);
 use feature qw(refaliasing);
 no warnings qw(experimental::refaliasing);
 
-# TODO: UNDO
-use diagnostics;
+# Debugging aids like diagnostics are noisy in production.
+# Removed per review: enable locally when debugging only.
 
 use constant VERSION => 'v0.1.0';
 
@@ -24,86 +24,101 @@ use constant VERSION => 'v0.1.0';
 # * Nicely formatted .txt output by stats by count by date
 # * Print out all UAs, to add new excludes/blocked IPs
 
+# Package: FileHelper — small file/JSON helpers
+# - Purpose: Atomic writes, gzip JSON read/write, and line reading.
+# - Notes: Dies on I/O errors; JSON encoding uses core JSON.
 package FileHelper {
     use JSON;
 
-    sub write ( $path, $content ) {
+    # Sub: write
+    # - Purpose: Atomic write to a file via "$path.tmp" and rename.
+    # - Params: $path (str) destination; $content (str) contents to write.
+    # - Return: undef; dies on failure.
+    sub write ($path, $content) {
         open my $fh, '>', "$path.tmp"
-          or die "\nCannot open file: $!";
+            or die "\nCannot open file: $!";
         print $fh $content;
         close $fh;
 
         rename
-          "$path.tmp",
-          $path;
+            "$path.tmp",
+            $path;
     }
 
-    sub write_json_gz ( $path, $data ) {
+    # Sub: write_json_gz
+    # - Purpose: JSON-encode $data and write it gzipped atomically.
+    # - Params: $path (str) destination path; $data (ref/scalar) Perl data.
+    # - Return: undef; dies on failure.
+    sub write_json_gz ($path, $data) {
         my $json = encode_json $data;
 
         say "Writing $path";
         open my $fd, '>:gzip', "$path.tmp"
-          or die "$path.tmp: $!";
+            or die "$path.tmp: $!";
         print $fd $json;
         close $fd;
 
         rename "$path.tmp", $path
-          or die "$path.tmp: $!";
+            or die "$path.tmp: $!";
     }
 
+    # Sub: read_json_gz
+    # - Purpose: Read a gzipped JSON file and decode to Perl data.
+    # - Params: $path (str) path to .json.gz file.
+    # - Return: Perl data structure.
     sub read_json_gz ($path) {
         say "Reading $path";
         open my $fd, '<:gzip', $path
-          or die "$path: $!";
+            or die "$path: $!";
         my $json = decode_json <$fd>;
         close $fd;
         return $json;
     }
 
+    # Sub: read_lines
+    # - Purpose: Slurp file lines and chomp newlines.
+    # - Params: $path (str) file path.
+    # - Return: list of lines (no trailing newlines).
     sub read_lines ($path) {
         my @lines;
-        open( my $fh, '<', $path )
-          or die "$path: $!";
-        chomp( @lines = <$fh> );
+        open(my $fh, '<', $path)
+            or die "$path: $!";
+        chomp(@lines = <$fh>);
         close($fh);
         return @lines;
     }
 }
 
+# Package: DateHelper — date range helpers
+# - Purpose: Produce date strings used for report windows.
+# - Format: Dates are returned as YYYYMMDD strings.
 package DateHelper {
     use Time::Piece;
 
+    # Sub: last_month_dates
+    # - Purpose: Return dates for today back to 30 days ago (inclusive).
+    # - Params: none.
+    # - Return: list of YYYYMMDD strings, newest first.
     sub last_month_dates () {
         my $today = localtime;
         my @dates;
 
-        for my $days_ago ( 0 .. 30 ) {
-            my $date = $today - ( $days_ago * 24 * 60 * 60 );
+        for my $days_ago (0 .. 30) {
+            my $date = $today - ($days_ago * 24 * 60 * 60);
             push
-              @dates,
-              $date->strftime('%Y%m%d');
+                @dates,
+                $date->strftime('%Y%m%d');
         }
 
         return @dates;
     }
 
-    sub last_n_months_day_dates ($months) {
-        my $today = localtime;
-        my $start_year  = $today->year;
-        my $start_month = $today->mon - $months;
-        while ($start_month <= 0) { $start_month += 12; $start_year--; }
-
-        my $start = Time::Piece->strptime(sprintf('%04d-%02d-01', $start_year, $start_month), '%Y-%m-%d');
-        my @dates;
-        my $t = $start;
-        while ($t <= $today) {
-            push @dates, $t->strftime('%Y%m%d');
-            $t += 24 * 60 * 60; # one day
-        }
-        return @dates;
-    }
+    
 }
 
+# Package: Foostats::Logreader — parse and normalize logs
+# - Purpose: Read web and gemini logs, anonymize IPs, and emit normalized events.
+# - Output Event: { proto, host, ip_hash, ip_proto, date, time, uri_path, status }
 package Foostats::Logreader {
     use Digest::SHA3 'sha3_512_base64';
     use File::stat;
@@ -111,41 +126,54 @@ package Foostats::Logreader {
     use Time::Piece;
     use String::Util qw(contains startswith endswith);
 
-    use constant {
-        GEMINI_LOGS_GLOB => '/var/log/daemon*',
-        WEB_LOGS_GLOB    => '/var/www/logs/access.log*',
-    };
-
+    # Make log locations configurable (env overrides) to enable testing.
+    # Sub: gemini_logs_glob
+    # - Purpose: Glob for gemini-related logs; env override for testing.
+    # - Return: glob pattern string.
+    sub gemini_logs_glob { $ENV{FOOSTATS_GEMINI_LOGS_GLOB} // '/var/log/daemon*' }
+    # Sub: web_logs_glob
+    # - Purpose: Glob for web access logs; env override for testing.
+    # - Return: glob pattern string.
+    sub web_logs_glob    { $ENV{FOOSTATS_WEB_LOGS_GLOB}    // '/var/www/logs/access.log*' }
+
+    # Sub: anonymize_ip
+    # - Purpose: Classify IPv4/IPv6 and map IP to a stable SHA3-512 base64 hash.
+    # - Params: $ip (str) source IP.
+    # - Return: ($hash, $proto) where $proto is 'IPv4' or 'IPv6'.
     sub anonymize_ip ($ip) {
         my $ip_proto =
-          contains( $ip, ':' )
-          ? 'IPv6'
-          : 'IPv4';
+            contains($ip, ':')
+            ? 'IPv6'
+            : 'IPv4';
         my $ip_hash = sha3_512_base64 $ip;
-        return ( $ip_hash, $ip_proto );
+        return ($ip_hash, $ip_proto);
     }
 
-    sub read_lines ( $glob, $cb ) {
+    # Sub: read_lines
+    # - Purpose: Iterate files matching glob by age; invoke $cb for each line.
+    # - Params: $glob (str) file glob; $cb (code) callback ($year, @fields).
+    # - Return: undef; stops early if callback returns undef for a file.
+    sub read_lines ($glob, $cb) {
         my sub year ($path) {
-            localtime( ( stat $path )->mtime )->strftime('%Y');
+            localtime((stat $path)->mtime)->strftime('%Y');
         }
 
         my sub open_file ($path) {
             my $flag =
-              $path =~ /\.gz$/
-              ? '<:gzip'
-              : '<';
+                $path =~ /\.gz$/
+                ? '<:gzip'
+                : '<';
             open my $fd, $flag, $path
-              or die "$path: $!";
+                or die "$path: $!";
             return $fd;
         }
 
         my $last = false;
 
-        say 'File path glob matches: ' . join( ' ', glob $glob );
+        say 'File path glob matches: ' . join(' ', glob $glob);
 
-      LAST:
-        for my $path ( sort { -M $a <=> -M $b } glob $glob ) {
+    LAST:
+        for my $path (sort { -M $a <=> -M $b } glob $glob) {
             say "Processing $path";
 
             my $file = open_file $path;
@@ -153,37 +181,41 @@ package Foostats::Logreader {
 
             while (<$file>) {
                 next
-                  if contains( $_, 'logfile turned over' );
+                    if contains($_, 'logfile turned over');
 
                 # last == true means: After this file, don't process more
                 $last = true
-                  unless defined $cb->( $year, split / +/ );
+                    unless defined $cb->($year, split / +/);
             }
 
             say "Closing $path (last:$last)";
             close $file;
             last LAST
-              if $last;
+                if $last;
         }
     }
 
-    sub parse_web_logs ( $last_processed_date, $cb ) {
+    # Sub: parse_web_logs
+    # - Purpose: Parse web log lines into normalized events and pass to callback.
+    # - Params: $last_processed_date (YYYYMMDD int) lower bound; $cb (code) event consumer.
+    # - Return: undef.
+    sub parse_web_logs ($last_processed_date, $cb) {
         my sub parse_date ($date) {
-            my $t = Time::Piece->strptime( $date, '[%d/%b/%Y:%H:%M:%S' );
-            return ( $t->strftime('%Y%m%d'), $t->strftime('%H%M%S') );
+            my $t = Time::Piece->strptime($date, '[%d/%b/%Y:%H:%M:%S');
+            return ($t->strftime('%Y%m%d'), $t->strftime('%H%M%S'));
         }
 
         my sub parse_web_line (@line) {
-            my ( $date, $time ) = parse_date $line [4];
+            my ($date, $time) = parse_date $line [4];
             return undef
-              if $date < $last_processed_date;
+                if $date < $last_processed_date;
 
             # X-Forwarded-For?
             my $ip =
-                $line[-2] eq '-'
-              ? $line[1]
-              : $line[-2];
-            my ( $ip_hash, $ip_proto ) = anonymize_ip $ip;
+                  $line[-2] eq '-'
+                ? $line[1]
+                : $line[-2];
+            my ($ip_hash, $ip_proto) = anonymize_ip $ip;
 
             return {
                 proto    => 'web',
@@ -197,42 +229,45 @@ package Foostats::Logreader {
             };
         }
 
-        read_lines WEB_LOGS_GLOB, sub ( $year, @line ) {
-            $cb->( parse_web_line @line );
+        read_lines web_logs_glob(), sub ($year, @line) {
+            $cb->(parse_web_line @line);
         };
     }
 
-    sub parse_gemini_logs ( $last_processed_date, $cb ) {
-        my sub parse_date ( $year, @line ) {
+    # Sub: parse_gemini_logs
+    # - Purpose: Parse vger/relayd lines, merge paired entries, and emit events.
+    # - Params: $last_processed_date (YYYYMMDD int); $cb (code) event consumer.
+    # - Return: undef.
+    sub parse_gemini_logs ($last_processed_date, $cb) {
+        my sub parse_date ($year, @line) {
             my $timestr = "$line[0] $line[1]";
-            return Time::Piece->strptime( $timestr, '%b %d' )
-              ->strftime("$year%m%d");
+            return Time::Piece->strptime($timestr, '%b %d')->strftime("$year%m%d");
         }
 
-        my sub parse_vger_line ( $year, @line ) {
+        my sub parse_vger_line ($year, @line) {
             my $full_path = $line[5];
             $full_path =~ s/"//g;
-            my ( $proto, undef, $host, $uri_path ) =
-              split '/',
-              $full_path,
-              4;
+            my ($proto, undef, $host, $uri_path) =
+                split '/',
+                $full_path,
+                4;
             $uri_path = ''
-              unless defined $uri_path;
+                unless defined $uri_path;
 
             return {
                 proto    => 'gemini',
                 host     => $host,
                 uri_path => "/$uri_path",
                 status   => $line[6],
-                date     => int( parse_date( $year, @line ) ),
+                date     => int(parse_date($year, @line)),
                 time     => $line[2],
             };
         }
 
-        my sub parse_relayd_line ( $year, @line ) {
-            my $date = int( parse_date( $year, @line ) );
+        my sub parse_relayd_line ($year, @line) {
+            my $date = int(parse_date($year, @line));
 
-            my ( $ip_hash, $ip_proto ) = anonymize_ip $line [12];
+            my ($ip_hash, $ip_proto) = anonymize_ip $line [12];
             return {
                 ip_hash  => $ip_hash,
                 ip_proto => $ip_proto,
@@ -241,26 +276,26 @@ package Foostats::Logreader {
             };
         }
 
-      # Expect one vger and one relayd log line per event! So collect
-      # both events (one from one log line each) and then merge the result hash!
-        my ( $vger, $relayd );
-        read_lines GEMINI_LOGS_GLOB, sub ( $year, @line ) {
-            if ( $line[4] eq 'vger:' ) {
+        # Expect one vger and one relayd log line per event! So collect
+        # both events (one from one log line each) and then merge the result hash!
+        my ($vger, $relayd);
+        read_lines gemini_logs_glob(), sub ($year, @line) {
+            if ($line[4] eq 'vger:') {
                 $vger = parse_vger_line $year, @line;
             }
-            elsif ( $line[5] eq 'relay'
-                and startswith( $line[6], 'gemini' ) )
+            elsif ($line[5] eq 'relay'
+                and startswith($line[6], 'gemini'))
             {
                 $relayd = parse_relayd_line $year, @line;
                 return undef
-                  if $relayd->{date} < $last_processed_date;
+                    if $relayd->{date} < $last_processed_date;
             }
 
             if (    defined $vger
                 and defined $relayd
-                and $vger->{time} eq $relayd->{time} )
+                and $vger->{time} eq $relayd->{time})
             {
-                $cb->( { %$vger, %$relayd } );
+                $cb->({ %$vger, %$relayd });
                 $vger = $relayd = undef;
             }
 
@@ -268,9 +303,12 @@ package Foostats::Logreader {
         };
     }
 
-    sub parse_logs ( $last_web_date, $last_gemini_date, $odds_file, $odds_log )
-    {
-        my $agg = Foostats::Aggregator->new( $odds_file, $odds_log );
+    # Sub: parse_logs
+    # - Purpose: Coordinate parsing for both web and gemini, aggregating into stats.
+    # - Params: $last_web_date, $last_gemini_date (YYYYMMDD int), $odds_file, $odds_log.
+    # - Return: stats hashref keyed by "proto_YYYYMMDD".
+    sub parse_logs ($last_web_date, $last_gemini_date, $odds_file, $odds_log) {
+        my $agg = Foostats::Aggregator->new($odds_file, $odds_log);
 
         say "Last web date: $last_web_date";
         say "Last gemini date: $last_gemini_date";
@@ -287,29 +325,40 @@ package Foostats::Logreader {
 }
 
 # TODO: Write filter summary at the end of the filter log.
+# Package: Foostats::Filter — request filtering and logging
+# - Purpose: Identify odd URI patterns and excessive requests per second per IP.
+# - Notes: Maintains an in-process blocklist for the current run.
 package Foostats::Filter {
     use String::Util qw(contains startswith endswith);
 
-    sub new ( $class, $odds_file, $log_path ) {
+    # Sub: new
+    # - Purpose: Construct a filter with odd patterns and a log path.
+    # - Params: $odds_file (str) pattern list; $log_path (str) append-only log file.
+    # - Return: blessed Foostats::Filter instance.
+    sub new ($class, $odds_file, $log_path) {
         say "Logging filter to $log_path";
         my @odds = FileHelper::read_lines($odds_file);
 
         bless {
             odds     => \@odds,
             log_path => $log_path
-          },
-          $class;
+            },
+            $class;
     }
 
-    sub ok ( $self, $event ) {
+    # Sub: ok
+    # - Purpose: Check if an event passes filters; updates block state/logging.
+    # - Params: $event (hashref) normalized request.
+    # - Return: true if allowed; false if blocked.
+    sub ok ($self, $event) {
         state %blocked = ();
         return false
-          if exists $blocked{ $event->{ip_hash} };
+            if exists $blocked{ $event->{ip_hash} };
 
         if (   $self->odd($event)
-            or $self->excessive($event) )
+            or $self->excessive($event))
         {
-            ( $blocked{ $event->{ip_hash} } //= 0 )++;
+            ($blocked{ $event->{ip_hash} } //= 0)++;
             return false;
         }
         else {
@@ -317,53 +366,64 @@ package Foostats::Filter {
         }
     }
 
-    sub odd ( $self, $event ) {
+    # Sub: odd
+    # - Purpose: Match URI path against user-provided odd patterns (substring match).
+    # - Params: $event (hashref) with uri_path.
+    # - Return: true if odd (blocked), false otherwise.
+    sub odd ($self, $event) {
         \my $uri_path = \$event->{uri_path};
 
-        for ( $self->{odds}->@* ) {
+        for ($self->{odds}->@*) {
+            next if !defined $_ || $_ eq '' || /^\s*#/;
             next
-              unless contains( $uri_path, $_ );
+                unless contains($uri_path, $_);
 
-            $self->log( 'WARN', $uri_path,
-                "contains $_ and is odd and will therefore be blocked!" );
+            $self->log('WARN', $uri_path, "contains $_ and is odd and will therefore be blocked!");
             return true;
         }
 
-        $self->log( 'OK', $uri_path, "appears fine..." );
+        $self->log('OK', $uri_path, "appears fine...");
         return false;
     }
 
-    sub log ( $self, $severity, $subject, $message ) {
+    # Sub: log
+    # - Purpose: Deduplicated append-only logging for filter decisions.
+    # - Params: $severity (OK|WARN), $subject (str), $message (str).
+    # - Return: undef.
+    sub log ($self, $severity, $subject, $message) {
         state %dedup;
 
         # Don't log if path was already logged
         return
-          if exists $dedup{$subject};
+            if exists $dedup{$subject};
         $dedup{$subject} = 1;
 
-        open( my $fh, '>>', $self->{log_path} )
-          or die $self->{log_path} . ": $!";
+        open(my $fh, '>>', $self->{log_path})
+            or die $self->{log_path} . ": $!";
         print $fh "$severity: $subject $message\n";
         close($fh);
     }
 
-    sub excessive ( $self, $event ) {
+    # Sub: excessive
+    # - Purpose: Block if an IP makes more than one request within the same second.
+    # - Params: $event (hashref) with time and ip_hash.
+    # - Return: true if blocked; false otherwise.
+    sub excessive ($self, $event) {
         \my $time    = \$event->{time};
         \my $ip_hash = \$event->{ip_hash};
 
         state $last_time = $time;    # Time with second: 'HH:MM:SS'
         state %count     = ();       # IPs accessing within the same second!
 
-        if ( $last_time ne $time ) {
+        if ($last_time ne $time) {
             $last_time = $time;
             %count     = ();
             return false;
         }
 
         # IP requested site more than once within the same second!?
-        if ( 1 < ++( $count{$ip_hash} //= 0 ) ) {
-            $self->log( 'WARN', $ip_hash,
-                "blocked due to excessive requesting..." );
+        if (1 < ++($count{$ip_hash} //= 0)) {
+            $self->log('WARN', $ip_hash, "blocked due to excessive requesting...");
             return true;
         }
 
@@ -371,6 +431,8 @@ package Foostats::Filter {
     }
 }
 
+# Package: Foostats::Aggregator — in-memory stats builder
+# - Purpose: Apply filters and accumulate counts, unique IPs per feed/page.
 package Foostats::Aggregator {
     use String::Util qw(contains startswith endswith);
 
@@ -380,140 +442,185 @@ package Foostats::Aggregator {
         GEMFEED_URI_2 => '/gemfeed/',
     };
 
-    sub new ( $class, $odds_file, $odds_log ) {
+    # Sub: new
+    # - Purpose: Construct aggregator with a filter and empty stats store.
+    # - Params: $odds_file (str), $odds_log (str).
+    # - Return: Foostats::Aggregator instance.
+    sub new ($class, $odds_file, $odds_log) {
         bless {
-            filter => Foostats::Filter->new( $odds_file, $odds_log ),
+            filter => Foostats::Filter->new($odds_file, $odds_log),
             stats  => {}
-          },
-          $class;
+            },
+            $class;
     }
 
-    sub add ( $self, $event ) {
+    # Sub: add
+    # - Purpose: Apply filter, update counts and unique-IP sets, and return event.
+    # - Params: $event (hashref) normalized event; ignored if undef.
+    # - Return: $event; filtered events increment filtered count only.
+    sub add ($self, $event) {
         return undef
-          unless defined $event;
+            unless defined $event;
 
         my $date     = $event->{date};
         my $date_key = $event->{proto} . "_$date";
 
+        # Stats data model per protocol+day (key: "proto_YYYYMMDD"):
+        # - count: per-proto request count, per IP version, and filtered count
+        # - feed_ips: unique IPs per feed type (atom_feed, gemfeed)
+        # - page_ips: unique IPs per host and per URL
         $self->{stats}{$date_key} //= {
-            count => {
-                filtered => 0
-            },
+            count    => { filtered => 0, },
             feed_ips => {
                 atom_feed => {},
-                gemfeed   => {}
+                gemfeed   => {},
             },
             page_ips => {
                 hosts => {},
-                urls  => {}
+                urls  => {},
             },
         };
 
         \my $s = \$self->{stats}{$date_key};
-        unless ( $self->{filter}->ok($event) ) {
+        unless ($self->{filter}->ok($event)) {
             $s->{count}{filtered}++;
             return $event;
         }
 
-        $self->add_count( $s, $event );
-        $self->add_page_ips( $s, $event )
-          unless $self->add_feed_ips( $s, $event );
+        $self->add_count($s, $event);
+        $self->add_page_ips($s, $event)
+            unless $self->add_feed_ips($s, $event);
 
         return $event;
     }
 
-    sub add_count ( $self, $stats, $event ) {
+    # Sub: add_count
+    # - Purpose: Increment totals by protocol and IP version.
+    # - Params: $stats (hashref) date bucket; $event (hashref).
+    # - Return: undef.
+    sub add_count ($self, $stats, $event) {
         \my $c = \$stats->{count};
         \my $e = \$event;
 
-        ( $c->{ $e->{proto} }    //= 0 )++;
-        ( $c->{ $e->{ip_proto} } //= 0 )++;
+        ($c->{ $e->{proto} }    //= 0)++;
+        ($c->{ $e->{ip_proto} } //= 0)++;
     }
 
-    sub add_feed_ips ( $self, $stats, $event ) {
+    # Sub: add_feed_ips
+    # - Purpose: If event hits feed endpoints, add unique IP and short-circuit.
+    # - Params: $stats (hashref), $event (hashref).
+    # - Return: 1 if feed matched; 0 otherwise.
+    sub add_feed_ips ($self, $stats, $event) {
         \my $f = \$stats->{feed_ips};
         \my $e = \$event;
 
-        if ( endswith( $e->{uri_path}, ATOM_FEED_URI ) ) {
-            ( $f->{atom_feed}->{ $e->{ip_hash} } //= 0 )++;
-        }
-        elsif ( contains( $e->{uri_path}, GEMFEED_URI ) ) {
-            ( $f->{gemfeed}->{ $e->{ip_hash} } //= 0 )++;
+        # Atom feed (exact path match, allow optional query string)
+        if ($e->{uri_path} =~ m{^/gemfeed/atom\.xml(?:[?#].*)?$}) {
+            ($f->{atom_feed}->{ $e->{ip_hash} } //= 0)++;
+            return 1;
         }
-        elsif ( endswith( $e->{uri_path}, GEMFEED_URI_2 ) ) {
-            ( $f->{gemfeed}->{ $e->{ip_hash} } //= 0 )++;
-        }
-        else {
-            0;
+
+        # Gemfeed index: '/gemfeed/' or '/gemfeed/index.gmi' (optionally with query)
+        if ($e->{uri_path} =~ m{^/gemfeed/(?:index\.gmi)?(?:[?#].*)?$}) {
+            ($f->{gemfeed}->{ $e->{ip_hash} } //= 0)++;
+            return 1;
         }
+
+        return 0;
     }
 
-    sub add_page_ips ( $self, $stats, $event ) {
+    # Sub: add_page_ips
+    # - Purpose: Track unique IPs per host and per URL for .html/.gmi pages.
+    # - Params: $stats (hashref), $event (hashref).
+    # - Return: undef.
+    sub add_page_ips ($self, $stats, $event) {
         \my $e = \$event;
         \my $p = \$stats->{page_ips};
 
         return
-          if !endswith( $e->{uri_path}, '.html' )
-          && !endswith( $e->{uri_path}, '.gmi' );
+            if !endswith($e->{uri_path}, '.html')
+            && !endswith($e->{uri_path}, '.gmi');
 
-        ( $p->{hosts}->{ $e->{host} }->{ $e->{ip_hash} } //= 0 )++;
-        ( $p->{urls}->{ $e->{host} . $e->{uri_path} }->{ $e->{ip_hash} } //=
-              0 )++;
+        ($p->{hosts}->{ $e->{host} }->{ $e->{ip_hash} } //= 0)++;
+        ($p->{urls}->{ $e->{host} . $e->{uri_path} }->{ $e->{ip_hash} } //=
+                0)++;
     }
 }
 
+# Package: Foostats::FileOutputter — write per-day stats to disk
+# - Purpose: Persist aggregated stats to gzipped JSON files under a stats dir.
 package Foostats::FileOutputter {
     use JSON;
     use Sys::Hostname;
     use PerlIO::gzip;
 
-    sub new ( $class, %args ) {
+    # Sub: new
+    # - Purpose: Create outputter with stats_dir; ensures directory exists.
+    # - Params: %args (hash) must include stats_dir.
+    # - Return: Foostats::FileOutputter instance.
+    sub new ($class, %args) {
         my $self = bless \%args, $class;
         mkdir $self->{stats_dir}
-          or die $self->{stats_dir} . ": $!"
-          unless -d $self->{stats_dir};
+            or die $self->{stats_dir} . ": $!"
+            unless -d $self->{stats_dir};
 
         return $self;
     }
 
-    sub last_processed_date ( $self, $proto ) {
-        my $hostname = hostname();
-        my @processed =
-          glob $self->{stats_dir} . "/${proto}_????????.$hostname.json.gz";
+    # Sub: last_processed_date
+    # - Purpose: Determine the most recent processed date for a protocol for this host.
+    # - Params: $proto (str) 'web' or 'gemini'.
+    # - Return: YYYYMMDD int (0 if none found).
+    sub last_processed_date ($self, $proto) {
+        my $hostname  = hostname();
+        my @processed = glob $self->{stats_dir} . "/${proto}_????????.$hostname.json.gz";
         my ($date) =
-          @processed
-          ? ( $processed[-1] =~ /_(\d{8})\.$hostname\.json.gz/ )
-          : 0;
+            @processed
+            ? ($processed[-1] =~ /_(\d{8})\.$hostname\.json.gz/)
+            : 0;
 
         return int($date);
     }
 
+    # Sub: write
+    # - Purpose: Write one gzipped JSON file per date bucket to stats_dir.
+    # - Params: none (uses $self->{stats}).
+    # - Return: undef.
     sub write ($self) {
         $self->for_dates(
-            sub ( $self, $date_key, $stats ) {
+            sub ($self, $date_key, $stats) {
                 my $hostname = hostname();
-                my $path =
-                  $self->{stats_dir} . "/${date_key}.$hostname.json.gz";
+                my $path     = $self->{stats_dir} . "/${date_key}.$hostname.json.gz";
                 FileHelper::write_json_gz
-                  $path,
-                  $stats;
+                    $path,
+                    $stats;
             }
         );
     }
 
-    sub for_dates ( $self, $cb ) {
-        $cb->( $self, $_, $self->{stats}{$_} ) for sort
-          keys $self->{stats}->%*;
+    # Sub: for_dates
+    # - Purpose: Iterate date-keyed stats in sorted order and call $cb.
+    # - Params: $cb (code) receives ($self, $date_key, $stats).
+    # - Return: undef.
+    sub for_dates ($self, $cb) {
+        $cb->($self, $_, $self->{stats}{$_}) for sort
+            keys $self->{stats}->%*;
     }
 }
 
+# Package: Foostats::Replicator — pull partner stats files over HTTP(S)
+# - Purpose: Fetch recent partner node stats into local stats dir.
 package Foostats::Replicator {
     use JSON;
     use File::Basename;
     use LWP::UserAgent;
     use String::Util qw(endswith);
 
-    sub replicate ( $stats_dir, $partner_node ) {
+    # Sub: replicate
+    # - Purpose: For each proto and last 31 days, replicate newest files.
+    # - Params: $stats_dir (str) local dir; $partner_node (str) hostname.
+    # - Return: undef (best-effort fetches).
+    sub replicate ($stats_dir, $partner_node) {
         say "Replicating from $partner_node";
 
         for my $proto (qw(gemini web)) {
@@ -527,51 +634,63 @@ package Foostats::Replicator {
                     "https://$partner_node/foostats/$dest_path",
                     "$stats_dir/$dest_path",
                     $count++
-                      <
-                      3
+                        <
+                        3
                     ,    # Always replicate the newest 3 files.
                 );
             }
         }
     }
 
-    sub replicate_file ( $remote_url, $dest_path, $force ) {
+    # Sub: replicate_file
+    # - Purpose: Download a single URL to a destination unless already present (unless forced).
+    # - Params: $remote_url (str) source; $dest_path (str) destination; $force (bool/int).
+    # - Return: undef; logs failures.
+    sub replicate_file ($remote_url, $dest_path, $force) {
 
         # $dest_path already exists, not replicating it
         return
-          if !$force
-          && -f $dest_path;
+            if !$force
+            && -f $dest_path;
 
         say "Replicating $remote_url to $dest_path (force:$force)... ";
         my $response = LWP::UserAgent->new->get($remote_url);
-        unless ( $response->is_success ) {
+        unless ($response->is_success) {
             say "\nFailed to fetch the file: " . $response->status_line;
             return;
         }
 
         FileHelper::write
-          $dest_path,
-          $response->decoded_content;
+            $dest_path,
+            $response->decoded_content;
         say 'done';
     }
 }
 
+# Package: Foostats::Merger — merge per-host daily stats into a single view
+# - Purpose: Merge multiple node files per day into totals and unique counts.
 package Foostats::Merger {
-    use Data::Dumper;    # TODO: UNDO
-
+    # Removed Data::Dumper (debug-only) per review.
+    # Sub: merge
+    # - Purpose: Produce merged stats for the last month (date => stats hashref).
+    # - Params: $stats_dir (str) directory with daily gz JSON files.
+    # - Return: hash (not ref) of date => merged stats.
     sub merge ($stats_dir) {
         my %merge;
-        $merge{$_} = merge_for_date( $stats_dir, $_ )
-          for DateHelper::last_month_dates;
+        $merge{$_} = merge_for_date($stats_dir, $_) for DateHelper::last_month_dates;
         return %merge;
     }
 
-    sub merge_for_date ( $stats_dir, $date ) {
+    # Sub: merge_for_date
+    # - Purpose: Merge all node files for a specific date into one stats hashref.
+    # - Params: $stats_dir (str), $date (YYYYMMDD str/int).
+    # - Return: { feed_ips => {...}, count => {...}, page_ips => {...} }.
+    sub merge_for_date ($stats_dir, $date) {
         printf
-          "Merging for date %s\n",
-          $date;
+            "Merging for date %s\n",
+            $date;
 
-        my @stats = stats_for_date( $stats_dir, $date );
+        my @stats = stats_for_date($stats_dir, $date);
         return {
             feed_ips => feed_ips(@stats),
             count    => count(@stats),
@@ -579,9 +698,13 @@ package Foostats::Merger {
         };
     }
 
-    sub merge_ips ( $a, $b, $key_transform = undef ) {
-        my sub merge ( $a, $b ) {
-            while ( my ( $key, $val ) = each %$b ) {
+    # Sub: merge_ips
+    # - Purpose: Deep-ish merge helper: sums numbers, merges hash-of-hash counts.
+    # - Params: $a (hashref target), $b (hashref source), $key_transform (code|undef).
+    # - Return: undef; updates $a in place; dies on incompatible types.
+    sub merge_ips ($a, $b, $key_transform = undef) {
+        my sub merge ($a, $b) {
+            while (my ($key, $val) = each %$b) {
                 $a->{$key} //= 0;
                 $a->{$key} += $val;
             }
@@ -589,52 +712,56 @@ package Foostats::Merger {
 
         my $is_num = qr/^\d+(\.\d+)?$/;
 
-        while ( my ( $key, $val ) = each %$b ) {
+        while (my ($key, $val) = each %$b) {
             $key = $key_transform->($key)
-              if defined $key_transform;
+                if defined $key_transform;
 
-            if ( not exists $a->{$key} ) {
+            if (not exists $a->{$key}) {
                 $a->{$key} = $val;
             }
-            elsif (ref( $a->{$key} ) eq 'HASH'
-                && ref($val) eq 'HASH' )
+            elsif (ref($a->{$key}) eq 'HASH'
+                && ref($val) eq 'HASH')
             {
-                merge( $a->{$key}, $val );
+                merge($a->{$key}, $val);
             }
             elsif ($a->{$key} =~ $is_num
-                && $val =~ $is_num )
+                && $val =~ $is_num)
             {
                 $a->{$key} += $val;
             }
             else {
                 die
-"Not merging tkey '%s' (ref:%s): '%s' (ref:%s) with '%s' (ref:%s)\n",
-                  $key,
-                  ref($key), $a->{$key},
-                  ref( $a->{$key} ),
-                  $val,
-                  ref($val);
+                    "Not merging tkey '%s' (ref:%s): '%s' (ref:%s) with '%s' (ref:%s)\n",
+                    $key,
+                    ref($key), $a->{$key},
+                    ref($a->{$key}),
+                    $val,
+                    ref($val);
             }
         }
     }
 
+    # Sub: feed_ips
+    # - Purpose: Merge feed unique-IP sets from per-proto stats into totals.
+    # - Params: @stats (list of stats hashrefs) each with {proto, feed_ips}.
+    # - Return: hashref with Total and per-proto feed counts.
     sub feed_ips (@stats) {
-        my ( %gemini, %web );
+        my (%gemini, %web);
 
         for my $stats (@stats) {
             my $merge =
-              $stats->{proto} eq 'web'
-              ? \%web
-              : \%gemini;
+                $stats->{proto} eq 'web'
+                ? \%web
+                : \%gemini;
             printf
-              "Merging proto %s feed IPs\n",
-              $stats->{proto};
-            merge_ips( $merge, $stats->{feed_ips} );
+                "Merging proto %s feed IPs\n",
+                $stats->{proto};
+            merge_ips($merge, $stats->{feed_ips});
         }
 
         my %total;
-        merge_ips( \%total, $web{$_} )    for keys %web;
-        merge_ips( \%total, $gemini{$_} ) for keys %gemini;
+        merge_ips(\%total, $web{$_})    for keys %web;
+        merge_ips(\%total, $gemini{$_}) for keys %gemini;
 
         my %merge = (
             'Total'          => scalar keys %total,
@@ -647,11 +774,15 @@ package Foostats::Merger {
         return \%merge;
     }
 
+    # Sub: count
+    # - Purpose: Sum request counters across stats for the day.
+    # - Params: @stats (list of stats hashrefs) each with {count}.
+    # - Return: hashref of summed counters.
     sub count (@stats) {
         my %merge;
 
         for my $stats (@stats) {
-            while ( my ( $key, $val ) = each $stats->{count}->%* ) {
+            while (my ($key, $val) = each $stats->{count}->%*) {
                 $merge{$key} //= 0;
                 $merge{$key} += $val;
             }
@@ -660,13 +791,17 @@ package Foostats::Merger {
         return \%merge;
     }
 
+    # Sub: page_ips
+    # - Purpose: Merge unique IPs per host and per URL; coalesce truncated endings.
+    # - Params: @stats (list of stats hashrefs) with {page_ips}{urls,hosts}.
+    # - Return: hashref with urls/hosts each mapping => unique counts.
     sub page_ips (@stats) {
         my %merge = (
             urls  => {},
             hosts => {}
         );
 
-        for my $key ( keys %merge ) {
+        for my $key (keys %merge) {
             merge_ips(
                 $merge{$key},
                 $_->{page_ips}->{$key},
@@ -678,25 +813,28 @@ package Foostats::Merger {
             ) for @stats;
 
             # Keep only uniq IP count
-            $merge{$key}->{$_} = scalar keys $merge{$key}->{$_}->%*
-              for keys $merge{$key}->%*;
+            $merge{$key}->{$_} = scalar keys $merge{$key}->{$_}->%* for keys $merge{$key}->%*;
         }
 
         return \%merge;
     }
 
-    sub stats_for_date ( $stats_dir, $date ) {
+    # Sub: stats_for_date
+    # - Purpose: Load all stats files for a date across protos; tag proto/path.
+    # - Params: $stats_dir (str), $date (YYYYMMDD).
+    # - Return: list of stats hashrefs.
+    sub stats_for_date ($stats_dir, $date) {
         my @stats;
 
         for my $proto (qw(gemini web)) {
             for my $path (<$stats_dir/${proto}_${date}.*.json.gz>) {
                 printf
-                  "Reading %s\n",
-                  $path;
+                    "Reading %s\n",
+                    $path;
                 push
-                  @stats,
-                  FileHelper::read_json_gz($path);
-                @{ $stats[-1] }{qw(proto path)} = ( $proto, $path );
+                    @stats,
+                    FileHelper::read_json_gz($path);
+                @{ $stats[-1] }{qw(proto path)} = ($proto, $path);
             }
         }
 
@@ -704,11 +842,18 @@ package Foostats::Merger {
     }
 }
 
+# Package: Foostats::Reporter — build gemtext/HTML daily and summary reports
+# - Purpose: Render daily reports and rolling summaries (30/365), and index pages.
 package Foostats::Reporter {
     use Time::Piece;
+    use HTML::Entities qw(encode_entities);
 
+    # Sub: truncate_url
+    # - Purpose: Middle-ellipsize long URLs to fit within a target length.
+    # - Params: $url (str), $max_length (int default 100).
+    # - Return: possibly truncated string.
     sub truncate_url {
-        my ( $url, $max_length ) = @_;
+        my ($url, $max_length) = @_;
         $max_length //= 100;    # Default to 100 characters
 
         return $url if length($url) <= $max_length;
@@ -719,44 +864,52 @@ package Foostats::Reporter {
         my $available_length = $max_length - $ellipsis_length;
 
         # Split available length between start and end, favoring the end
-        my $keep_start = int( $available_length * 0.4 );     # 40% for start
+        my $keep_start = int($available_length * 0.4);       # 40% for start
         my $keep_end   = $available_length - $keep_start;    # 60% for end
 
-        my $start = substr( $url, 0, $keep_start );
-        my $end   = substr( $url, -$keep_end );
+        my $start = substr($url, 0, $keep_start);
+        my $end   = substr($url, -$keep_end);
 
         return $start . $ellipsis . $end;
     }
 
+    # Sub: truncate_urls_for_table
+    # - Purpose: Truncate URL cells in-place to fit target table width.
+    # - Params: $url_rows (arrayref of [url,count]), $count_column_header (str).
+    # - Return: undef; mutates $url_rows.
     sub truncate_urls_for_table {
-        my ( $url_rows, $count_column_header ) = @_;
+        my ($url_rows, $count_column_header) = @_;
 
         # Calculate the maximum width needed for the count column
         my $max_count_width = length($count_column_header);
         for my $row (@$url_rows) {
-            my $count_width = length( $row->[1] );
+            my $count_width = length($row->[1]);
             $max_count_width = $count_width if $count_width > $max_count_width;
         }
 
         # Row format: "| URL... | count |" with padding
         # Calculate: "| " (2) + URL + " | " (3) + count_with_padding + " |" (2)
         my $max_url_length = 100 - 7 - $max_count_width;
-        $max_url_length = 70 if $max_url_length > 70; # Cap at reasonable length
+        $max_url_length = 70 if $max_url_length > 70;    # Cap at reasonable length
 
         # Truncate URLs in place
         for my $row (@$url_rows) {
-            $row->[0] = truncate_url( $row->[0], $max_url_length );
+            $row->[0] = truncate_url($row->[0], $max_url_length);
         }
     }
 
+    # Sub: format_table
+    # - Purpose: Render a simple monospace table from headers and rows.
+    # - Params: $headers (arrayref), $rows (arrayref of arrayrefs).
+    # - Return: string with lines separated by \n.
     sub format_table {
-        my ( $headers, $rows ) = @_;
+        my ($headers, $rows) = @_;
 
         my @widths;
-        for my $col ( 0 .. $#{$headers} ) {
-            my $max_width = length( $headers->[$col] );
+        for my $col (0 .. $#{$headers}) {
+            my $max_width = length($headers->[$col]);
             for my $row (@$rows) {
-                my $len = length( $row->[$col] );
+                my $len = length($row->[$col]);
                 $max_width = $len if $len > $max_width;
             }
             push @widths, $max_width;
@@ -764,10 +917,10 @@ package Foostats::Reporter {
 
         my $header_line    = '|';
         my $separator_line = '|';
-        for my $col ( 0 .. $#{$headers} ) {
+        for my $col (0 .. $#{$headers}) {
             $header_line .=
-              sprintf( " %-*s |", $widths[$col], $headers->[$col] );
-            $separator_line .= '-' x ( $widths[$col] + 2 ) . '|';
+                sprintf(" %-*s |", $widths[$col], $headers->[$col]);
+            $separator_line .= '-' x ($widths[$col] + 2) . '|';
         }
 
         my @table_lines;
@@ -777,33 +930,39 @@ package Foostats::Reporter {
 
         for my $row (@$rows) {
             my $row_line = '|';
-            for my $col ( 0 .. $#{$row} ) {
-                $row_line .= sprintf( " %-*s |", $widths[$col], $row->[$col] );
+            for my $col (0 .. $#{$row}) {
+                $row_line .= sprintf(" %-*s |", $widths[$col], $row->[$col]);
             }
             push @table_lines, $row_line;
         }
 
         push @table_lines, $separator_line;    # Add bottom terminator
 
-        return join( "\n", @table_lines );
+        return join("\n", @table_lines);
     }
 
     # Convert gemtext to HTML
+    # Sub: gemtext_to_html
+    # - Purpose: Convert a subset of Gemtext to compact HTML, incl. code blocks and lists.
+    # - Params: $content (str) Gemtext.
+    # - Return: HTML string (fragment).
     sub gemtext_to_html {
-        my ($content) = @_;
-        my $html = "";
-        my $in_code_block = 0;
-        my $in_list = 0;
-        my @lines = split /\n/, $content;
+        my ($content)        = @_;
+        my $html             = "";
+        my $in_code_block    = 0;
+        my $in_list          = 0;
+        my @lines            = split /\n/, $content;
         my @code_block_lines = ();
-        
+
         for my $line (@lines) {
             if ($line =~ /^```/) {
                 if ($in_code_block) {
+
                     # End code block - check if it's a table
                     if (is_ascii_table(\@code_block_lines)) {
                         $html .= convert_ascii_table_to_html(\@code_block_lines);
-                    } else {
+                    }
+                    else {
                         $html .= "<pre>\n";
                         for my $code_line (@code_block_lines) {
                             $html .= encode_entities($code_line) . "\n";
@@ -811,136 +970,229 @@ package Foostats::Reporter {
                         $html .= "</pre>\n";
                     }
                     @code_block_lines = ();
-                    $in_code_block = 0;
-                } else {
+                    $in_code_block    = 0;
+                }
+                else {
                     $in_code_block = 1;
                 }
                 next;
             }
-            
+
             if ($in_code_block) {
                 push @code_block_lines, $line;
                 next;
             }
+
             
-            # Skip 365-day summary section header in HTML output
-            if ($line =~ /^## 365-Day Summary Reports\s*$/) {
-                next;
-            }
 
             # Check if we need to close a list
             if ($in_list && $line !~ /^\* /) {
                 $html .= "</ul>\n";
                 $in_list = 0;
             }
-            
+
             # Headers
             if ($line =~ /^### (.*)/) {
                 $html .= "<h3>" . encode_entities($1) . "</h3>\n";
-            } elsif ($line =~ /^## (.*)/) {
+            }
+            elsif ($line =~ /^## (.*)/) {
                 $html .= "<h2>" . encode_entities($1) . "</h2>\n";
-            } elsif ($line =~ /^# (.*)/) {
+            }
+            elsif ($line =~ /^# (.*)/) {
                 $html .= "<h1>" . encode_entities($1) . "</h1>\n";
             }
+
             # Links
             elsif ($line =~ /^=> (\S+)\s+(.*)/) {
                 my ($url, $text) = ($1, $2);
+
                 # Drop 365-day summary links from HTML output
                 if ($url =~ /(?:^|[\/.])365day_summary_\d{8}\.gmi$/) {
                     next;
                 }
+
                 # Convert .gmi links to .html
                 $url =~ s/\.gmi$/\.html/;
                 $html .= "<p><a href=\"" . encode_entities($url) . "\">" . encode_entities($text) . "</a></p>\n";
             }
+
             # Bullet points
             elsif ($line =~ /^\* (.*)/) {
                 if (!$in_list) {
                     $html .= "<ul>\n";
                     $in_list = 1;
                 }
-                $html .= "<li>" . encode_entities($1) . "</li>\n";
+                $html .= "<li>" . linkify_text($1) . "</li>\n";
             }
+
             # Empty line - skip to avoid excessive spacing
             elsif ($line =~ /^\s*$/) {
+
                 # Skip empty lines for more compact output
             }
+
             # Regular text
             else {
-                $html .= "<p>" . encode_entities($line) . "</p>\n";
+                $html .= "<p>" . linkify_text($line) . "</p>\n";
             }
         }
-        
+
         # Close list if still open
         if ($in_list) {
             $html .= "</ul>\n";
         }
-        
+
         return $html;
     }
-    
+
     # Check if the lines form an ASCII table
+    # Sub: is_ascii_table
+    # - Purpose: Heuristically detect if a code block is an ASCII table.
+    # - Params: $lines (arrayref of strings).
+    # - Return: 1 if likely table; 0 otherwise.
     sub is_ascii_table {
         my ($lines) = @_;
-        return 0 if @$lines < 3;  # Need at least header, separator, and one data row
-        
+        return 0 if @$lines < 3;    # Need at least header, separator, and one data row
+
         # Check for separator lines with dashes and pipes
         for my $line (@$lines) {
             return 1 if $line =~ /^\|?[\s\-]+\|/;
         }
         return 0;
     }
-    
+
     # Convert ASCII table to HTML table
+    # Sub: convert_ascii_table_to_html
+    # - Purpose: Convert simple ASCII table lines to an HTML <table>.
+    # - Params: $lines (arrayref of strings).
+    # - Return: HTML string.
     sub convert_ascii_table_to_html {
-        my ($lines) = @_;
-        my $html = "<table>\n";
+        my ($lines)   = @_;
+        my $html      = "<table>\n";
         my $row_count = 0;
-        
+
         for my $line (@$lines) {
+
             # Skip separator lines
             next if $line =~ /^\|?[\s\-]+\|/ && $line =~ /\-/;
-            
+
             # Parse table row
             my @cells = split /\s*\|\s*/, $line;
-            @cells = grep { length($_) > 0 } @cells;  # Remove empty cells
-            
+            @cells = grep { length($_) > 0 } @cells;    # Remove empty cells
+
             if (@cells) {
                 $html .= "<tr>\n";
+
                 # First row is header
                 my $tag = ($row_count == 0) ? "th" : "td";
                 for my $cell (@cells) {
-                    $html .= "  <$tag>" . encode_entities(trim($cell)) . "</$tag>\n";
+                    my $val = trim($cell);
+                    $html .= "  <$tag>" . linkify_text($val) . "</$tag>\n";
                 }
                 $html .= "</tr>\n";
                 $row_count++;
             }
         }
-        
+
         $html .= "</table>\n";
         return $html;
     }
-    
+
     # Trim whitespace from string
+    # Sub: trim
+    # - Purpose: Strip leading/trailing whitespace.
+    # - Params: $str (str).
+    # - Return: trimmed string.
     sub trim {
         my ($str) = @_;
         $str =~ s/^\s+//;
         $str =~ s/\s+$//;
         return $str;
     }
-    
-    # Encode HTML entities to prevent XSS
-    sub encode_entities {
+
+    # Build an href for a token that looks like a URL or FQDN
+    # Sub: _guess_href
+    # - Purpose: Infer absolute href for a token (supports gemini for .gmi).
+    # - Params: $token (str) token from text.
+    # - Return: href string or undef.
+    sub _guess_href {
+        my ($token) = @_;
+        my $t = $token;
+        $t =~ s/^\s+//;
+        $t =~ s/\s+$//;
+
+        # Already absolute http(s)
+        return $t if $t =~ m{^https?://}i;
+
+        # Extract trailing punctuation to avoid including it in href
+        my $trail = '';
+        if ($t =~ s{([)\]\}.,;:!?]+)$}{}) { $trail = $1; }
+
+        # host[/path]
+        if ($t =~ m{^([A-Za-z0-9.-]+\.[A-Za-z]{2,})(/[^\s<]*)?$}) {
+            my ($host, $path) = ($1, $2 // '');
+            my $has_ellipsis = index($t, '...') != -1 || index(($path // ''), '...') != -1;
+            my $is_gemini    = defined($path) && $path =~ /\.gmi(?:[?#].*)?$/i;
+            my $scheme       = $is_gemini ? 'gemini' : 'https';
+
+            # If truncated, fall back to host root
+            my $href =
+                $has_ellipsis
+                ? sprintf('%s://%s/', $scheme, $host)
+                : sprintf('%s://%s%s', $scheme, $host, ($path eq '' ? '/' : $path));
+            return ($href . $trail);
+        }
+
+        return undef;
+    }
+
+    # Turn any URLs/FQDNs in the provided text into anchors
+    # Sub: linkify_text
+    # - Purpose: Replace URL/FQDN tokens in text with HTML anchors.
+    # - Params: $text (str) input text.
+    # - Return: HTML string with entities encoded.
+    sub linkify_text {
         my ($text) = @_;
-        $text =~ s/&/&amp;/g;
-        $text =~ s/</&lt;/g;
-        $text =~ s/>/&gt;/g;
-        $text =~ s/"/&quot;/g;
-        $text =~ s/'/&#39;/g;
-        return $text;
+        return '' unless defined $text;
+
+        my $out = '';
+        my $pos = 0;
+        while ($text =~ m{((?:https?://)?[A-Za-z0-9.-]+\.[A-Za-z]{2,}(?:/[^\s<]*)?)}g) {
+            my $match = $1;
+            my $start = $-[1];
+            my $end   = $+[1];
+
+            # Emit preceding text
+            $out .= encode_entities(substr($text, $pos, $start - $pos));
+
+            # Separate trailing punctuation from the match
+            my ($core, $trail) = ($match, '');
+            if ($core =~ s{([)\]\}.,;:!?]+)$}{}) { $trail = $1; }
+
+            my $href = _guess_href($core);
+            if ($href) {
+                $out .= sprintf('<a href="%s.html">%s</a>%s',
+                    encode_entities($href), encode_entities($core), encode_entities($trail));
+            }
+            else {
+                # Not a linkable token after all
+                $out .= encode_entities($match);
+            }
+            $pos = $end;
+        }
+
+        # Remainder
+        $out .= encode_entities(substr($text, $pos));
+        return $out;
     }
-    
+
+    # Use HTML::Entities::encode_entities imported above
+
     # Generate HTML wrapper
+    # Sub: generate_html_page
+    # - Purpose: Wrap content in a minimal HTML5 page with a title and CSS reset.
+    # - Params: $title (str), $content (str) HTML fragment.
+    # - Return: full HTML page string.
     sub generate_html_page {
         my ($title, $content) = @_;
         return qq{<!DOCTYPE html>
@@ -1008,49 +1260,50 @@ $content
 };
     }
 
+    # Sub: report
+    # - Purpose: Generate daily .gmi and .html reports per date, then summaries and index.
+    # - Params: $stats_dir, $output_dir, $html_output_dir, %merged (date => stats).
+    # - Return: undef.
     sub report {
-        my ( $stats_dir, $output_dir, $html_output_dir, %merged ) = @_;
-        for my $date ( sort { $b cmp $a } keys %merged ) {
+        my ($stats_dir, $output_dir, $html_output_dir, %merged) = @_;
+        for my $date (sort { $b cmp $a } keys %merged) {
             my $stats = $merged{$date};
             next unless $stats->{count};
 
-            my ( $year, $month, $day ) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+            my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
 
             # Check if .gmi file exists and its age based on date in filename
-            my $report_path = "$output_dir/$date.gmi";
+            my $report_path      = "$output_dir/$date.gmi";
             my $html_report_path = "$output_dir/$date.html";
 
             # Calculate age of the data based on date in filename
             my $today     = Time::Piece->new();
-            my $file_date = Time::Piece->strptime( $date, '%Y%m%d' );
-            my $age_days  = ( $today - $file_date ) / ( 24 * 60 * 60 );
+            my $file_date = Time::Piece->strptime($date, '%Y%m%d');
+            my $age_days  = ($today - $file_date) / (24 * 60 * 60);
 
-            if ( -e $report_path && -e $html_report_path ) {
+            if (-e $report_path && -e $html_report_path) {
 
                 # Files exist
-                if ( $age_days <= 3 ) {
+                if ($age_days <= 3) {
 
                     # Data is recent (within 3 days), regenerate it
-                    say
-"Regenerating daily report for $year-$month-$day (data age: "
-                      . sprintf( "%.1f", $age_days )
-                      . " days)";
+                    say "Regenerating daily report for $year-$month-$day (data age: "
+                        . sprintf("%.1f", $age_days)
+                        . " days)";
                 }
                 else {
                     # Data is old (older than 3 days), skip if files exist
-                    say
-"Skipping daily report for $year-$month-$day (files exist, data age: "
-                      . sprintf( "%.1f", $age_days )
-                      . " days)";
+                    say "Skipping daily report for $year-$month-$day (files exist, data age: "
+                        . sprintf("%.1f", $age_days)
+                        . " days)";
                     next;
                 }
             }
             else {
                 # File doesn't exist, generate it
-                say
-"Generating new daily report for $year-$month-$day (file doesn't exist, data age: "
-                  . sprintf( "%.1f", $age_days )
-                  . " days)";
+                say "Generating new daily report for $year-$month-$day (file doesn't exist, data age: "
+                    . sprintf("%.1f", $age_days)
+                    . " days)";
             }
 
             my $report_content = "";
@@ -1060,27 +1313,23 @@ $content
             # Feed counts first
             $report_content .= "### Feed Statistics\n\n";
             my @feed_rows;
-            push @feed_rows, [ 'Total', $stats->{feed_ips}{'Total'} // 0 ];
-            push @feed_rows,
-              [ 'Gemini Gemfeed', $stats->{feed_ips}{'Gemini Gemfeed'} // 0 ];
-            push @feed_rows,
-              [ 'Gemini Atom', $stats->{feed_ips}{'Gemini Atom'} // 0 ];
-            push @feed_rows,
-              [ 'Web Gemfeed', $stats->{feed_ips}{'Web Gemfeed'} // 0 ];
-            push @feed_rows,
-              [ 'Web Atom', $stats->{feed_ips}{'Web Atom'} // 0 ];
+            push @feed_rows, [ 'Total',          $stats->{feed_ips}{'Total'}          // 0 ];
+            push @feed_rows, [ 'Gemini Gemfeed', $stats->{feed_ips}{'Gemini Gemfeed'} // 0 ];
+            push @feed_rows, [ 'Gemini Atom',    $stats->{feed_ips}{'Gemini Atom'}    // 0 ];
+            push @feed_rows, [ 'Web Gemfeed',    $stats->{feed_ips}{'Web Gemfeed'}    // 0 ];
+            push @feed_rows, [ 'Web Atom',       $stats->{feed_ips}{'Web Atom'}       // 0 ];
             $report_content .= "```\n";
-            $report_content .=
-              format_table( [ 'Feed Type', 'Count' ], \@feed_rows );
+            $report_content .= format_table([ 'Feed Type', 'Count' ], \@feed_rows);
             $report_content .= "\n```\n\n";
+
             # Top 50 URLs next
             $report_content .= "### Top 50 URLs\n\n";
             my @url_rows;
             my $urls = $stats->{page_ips}{urls};
             my @sorted_urls =
-              sort { ( $urls->{$b} // 0 ) <=> ( $urls->{$a} // 0 ) }
-              keys %$urls;
-            my $truncated   = @sorted_urls > 50;
+                sort { ($urls->{$b} // 0) <=> ($urls->{$a} // 0) }
+                keys %$urls;
+            my $truncated = @sorted_urls > 50;
             @sorted_urls = @sorted_urls[ 0 .. 49 ] if $truncated;
 
             for my $url (@sorted_urls) {
@@ -1088,10 +1337,9 @@ $content
             }
 
             # Truncate URLs to fit within 100-character rows
-            truncate_urls_for_table( \@url_rows, 'Unique Visitors' );
+            truncate_urls_for_table(\@url_rows, 'Unique Visitors');
             $report_content .= "```\n";
-            $report_content .=
-              format_table( [ 'URL', 'Unique Visitors' ], \@url_rows );
+            $report_content .= format_table([ 'URL', 'Unique Visitors' ], \@url_rows);
             $report_content .= "\n```\n";
             if ($truncated) {
                 $report_content .= "\n... and more (truncated to 50 entries).\n";
@@ -1103,18 +1351,17 @@ $content
             my @host_rows;
             my $hosts = $stats->{page_ips}{hosts};
             my @sorted_hosts =
-              sort { ( $hosts->{$b} // 0 ) <=> ( $hosts->{$a} // 0 ) }
-              keys %$hosts;
+                sort { ($hosts->{$b} // 0) <=> ($hosts->{$a} // 0) }
+                keys %$hosts;
 
-            $truncated = @sorted_hosts > 50;
+            $truncated    = @sorted_hosts > 50;
             @sorted_hosts = @sorted_hosts[ 0 .. 49 ] if $truncated;
 
             for my $host (@sorted_hosts) {
                 push @host_rows, [ $host, $hosts->{$host} // 0 ];
             }
             $report_content .= "```\n";
-            $report_content .=
-              format_table( [ 'Host', 'Unique Visitors' ], \@host_rows );
+            $report_content .= format_table([ 'Host', 'Unique Visitors' ], \@host_rows);
             $report_content .= "\n```\n";
             if ($truncated) {
                 $report_content .= "\n... and more (truncated to 50 entries).\n";
@@ -1124,22 +1371,22 @@ $content
             # Summary last
             $report_content .= "### Summary\n\n";
             my $total_requests =
-              ( $stats->{count}{gemini} // 0 ) + ( $stats->{count}{web} // 0 );
+                ($stats->{count}{gemini} // 0) + ($stats->{count}{web} // 0);
             $report_content .= "* Total requests: $total_requests\n";
             $report_content .=
-              "* Filtered requests: " . ( $stats->{count}{filtered} // 0 ) . "\n";
+                "* Filtered requests: " . ($stats->{count}{filtered} // 0) . "\n";
             $report_content .=
-              "* Gemini requests: " . ( $stats->{count}{gemini} // 0 ) . "\n";
+                "* Gemini requests: " . ($stats->{count}{gemini} // 0) . "\n";
             $report_content .=
-              "* Web requests: " . ( $stats->{count}{web} // 0 ) . "\n";
+                "* Web requests: " . ($stats->{count}{web} // 0) . "\n";
             $report_content .=
-              "* IPv4 requests: " . ( $stats->{count}{IPv4} // 0 ) . "\n";
+                "* IPv4 requests: " . ($stats->{count}{IPv4} // 0) . "\n";
             $report_content .=
-              "* IPv6 requests: " . ( $stats->{count}{IPv6} // 0 ) . "\n\n";
+                "* IPv6 requests: " . ($stats->{count}{IPv6} // 0) . "\n\n";
 
             # Add links to summary reports (only monthly)
             $report_content .= "## Related Reports\n\n";
-            my $now           = localtime;
+            my $now          = localtime;
             my $current_date = $now->strftime('%Y%m%d');
             $report_content .= "=> ./30day_summary_$current_date.gmi 30-Day Summary Report\n\n";
 
@@ -1148,29 +1395,33 @@ $content
 
             # $report_path already defined above
             say "Writing report to $report_path";
-            FileHelper::write( $report_path, $report_content );
-            
+            FileHelper::write($report_path, $report_content);
+
             # Also write HTML version
             mkdir $html_output_dir unless -d $html_output_dir;
-            my $html_path = "$html_output_dir/$date.html";
+            my $html_path    = "$html_output_dir/$date.html";
             my $html_content = gemtext_to_html($report_content);
-            my $html_page = generate_html_page("Stats for $year-$month-$day", $html_content);
+            my $html_page    = generate_html_page("Stats for $year-$month-$day", $html_content);
             say "Writing HTML report to $html_path";
-            FileHelper::write( $html_path, $html_page );
+            FileHelper::write($html_path, $html_page);
         }
 
         # Generate summary reports
-        generate_summary_report( 30, $stats_dir, $output_dir, $html_output_dir, %merged );
-        
+        generate_summary_report(30, $stats_dir, $output_dir, $html_output_dir, %merged);
+
         # Generate index.gmi and index.html
-        generate_index( $output_dir, $html_output_dir );
+        generate_index($output_dir, $html_output_dir);
     }
 
+    # Sub: generate_summary_report
+    # - Purpose: Generate N-day rolling summary in .gmi (+.html except 365-day).
+    # - Params: $days (int), $stats_dir, $output_dir, $html_output_dir, %merged.
+    # - Return: undef.
     sub generate_summary_report {
-        my ( $days, $stats_dir, $output_dir, $html_output_dir, %merged ) = @_;
+        my ($days, $stats_dir, $output_dir, $html_output_dir, %merged) = @_;
 
         # Get the last N days of dates
-        my @dates = sort { $b cmp $a } keys %merged;
+        my @dates     = sort { $b cmp $a } keys %merged;
         my $max_index = $days - 1;
         @dates = @dates[ 0 .. $max_index ] if @dates > $days;
 
@@ -1179,16 +1430,16 @@ $content
 
         # Build report content
         my $report_content = build_report_header($today, $days);
+
         # Order: feed counts -> Top URLs -> daily top 3 for last 30 days -> other tables
-        $report_content .= build_feed_statistics_section( \@dates, \%merged );
+        $report_content .= build_feed_statistics_section(\@dates, \%merged);
 
         # Aggregate and add top lists
-        my ( $all_hosts, $all_urls ) =
-          aggregate_hosts_and_urls( \@dates, \%merged );
+        my ($all_hosts, $all_urls) = aggregate_hosts_and_urls(\@dates, \%merged);
         $report_content .= build_top_urls_section($all_urls, $days);
         $report_content .= build_top3_urls_last_n_days_per_day($stats_dir, 30, \%merged);
         $report_content .= build_top_hosts_section($all_hosts, $days);
-        $report_content .= build_daily_summary_section( \@dates, \%merged );
+        $report_content .= build_daily_summary_section(\@dates, \%merged);
 
         # Add links to other summary reports
         $report_content .= build_summary_links($days, $report_date);
@@ -1198,99 +1449,111 @@ $content
 
         my $report_path = "$output_dir/${days}day_summary_$report_date.gmi";
         say "Writing $days-day summary report to $report_path";
-        FileHelper::write( $report_path, $report_content );
-        
+        FileHelper::write($report_path, $report_content);
+
         # Also write HTML version, except for 365-day summaries (HTML suppressed)
         if ($days != 365) {
             mkdir $html_output_dir unless -d $html_output_dir;
-            my $html_path = "$html_output_dir/${days}day_summary_$report_date.html";
+            my $html_path    = "$html_output_dir/${days}day_summary_$report_date.html";
             my $html_content = gemtext_to_html($report_content);
-            my $html_page = generate_html_page("$days-Day Summary Report", $html_content);
+            my $html_page    = generate_html_page("$days-Day Summary Report", $html_content);
             say "Writing HTML $days-day summary report to $html_path";
-            FileHelper::write( $html_path, $html_page );
-        } else {
+            FileHelper::write($html_path, $html_page);
+        }
+        else {
             say "Skipping HTML generation for 365-day summary (Gemtext only)";
         }
     }
 
+    # Sub: build_report_header
+    # - Purpose: Header section for summary reports.
+    # - Params: $today (Time::Piece), $days (int default 30).
+    # - Return: gemtext string.
     sub build_report_header {
         my ($today, $days) = @_;
-        $days //= 30;  # Default to 30 days for backward compatibility
+        $days //= 30;    # Default to 30 days for backward compatibility
 
         my $content = "# $days-Day Summary Report\n\n";
         $content .= "Generated on " . $today->strftime('%Y-%m-%d') . "\n\n";
         return $content;
     }
 
+    # Sub: build_daily_summary_section
+    # - Purpose: Table of daily total counts over a period.
+    # - Params: $dates (arrayref YYYYMMDD), $merged (hashref date=>stats).
+    # - Return: gemtext string.
     sub build_daily_summary_section {
-        my ( $dates, $merged ) = @_;
+        my ($dates, $merged) = @_;
 
         my $content = "## Daily Summary Evolution (Last 30 Days)\n\n";
         $content .= "### Total Requests by Day\n\n```\n";
 
         my @summary_rows;
-        for my $date ( reverse @$dates ) {
+        for my $date (reverse @$dates) {
             my $stats = $merged->{$date};
             next unless $stats->{count};
 
-            push @summary_rows, build_daily_summary_row( $date, $stats );
+            push @summary_rows, build_daily_summary_row($date, $stats);
         }
 
-        $content .= format_table(
-            [ 'Date', 'Filtered', 'Gemini', 'Web', 'IPv4', 'IPv6', 'Total' ],
-            \@summary_rows );
+        $content .= format_table([ 'Date', 'Filtered', 'Gemini', 'Web', 'IPv4', 'IPv6', 'Total' ], \@summary_rows);
         $content .= "\n```\n\n";
 
         return $content;
     }
 
+    # Sub: build_daily_summary_row
+    # - Purpose: Build one table row with counts for a date.
+    # - Params: $date (YYYYMMDD), $stats (hashref).
+    # - Return: arrayref of cell strings.
     sub build_daily_summary_row {
-        my ( $date, $stats ) = @_;
+        my ($date, $stats) = @_;
 
-        my ( $year, $month, $day ) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+        my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
         my $formatted_date = "$year-$month-$day";
 
         my $total_requests =
-          ( $stats->{count}{gemini} // 0 ) + ( $stats->{count}{web} // 0 );
+            ($stats->{count}{gemini} // 0) + ($stats->{count}{web} // 0);
         my $filtered = $stats->{count}{filtered} // 0;
         my $gemini   = $stats->{count}{gemini}   // 0;
         my $web      = $stats->{count}{web}      // 0;
         my $ipv4     = $stats->{count}{IPv4}     // 0;
         my $ipv6     = $stats->{count}{IPv6}     // 0;
 
-        return [
-            $formatted_date, $filtered,
-            $gemini,         $web,            $ipv4,
-            $ipv6,           $total_requests
-        ];
+        return [ $formatted_date, $filtered, $gemini, $web, $ipv4, $ipv6, $total_requests ];
     }
 
+    # Sub: build_feed_statistics_section
+    # - Purpose: Table of feed unique counts by day over a period.
+    # - Params: $dates (arrayref), $merged (hashref).
+    # - Return: gemtext string.
     sub build_feed_statistics_section {
-        my ( $dates, $merged ) = @_;
+        my ($dates, $merged) = @_;
 
         my $content = "### Feed Statistics Evolution\n\n```\n";
 
         my @feed_rows;
-        for my $date ( reverse @$dates ) {
+        for my $date (reverse @$dates) {
             my $stats = $merged->{$date};
             next unless $stats->{feed_ips};
 
-            push @feed_rows, build_feed_statistics_row( $date, $stats );
+            push @feed_rows, build_feed_statistics_row($date, $stats);
         }
 
-        $content .= format_table(
-            [ 'Date', 'Gem Feed', 'Gem Atom', 'Web Feed', 'Web Atom', 'Total' ],
-            \@feed_rows
-        );
+        $content .= format_table([ 'Date', 'Gem Feed', 'Gem Atom', 'Web Feed', 'Web Atom', 'Total' ], \@feed_rows);
         $content .= "\n```\n\n";
 
         return $content;
     }
 
+    # Sub: build_feed_statistics_row
+    # - Purpose: Build one row of feed unique counts for a date.
+    # - Params: $date (YYYYMMDD), $stats (hashref).
+    # - Return: arrayref of cell strings.
     sub build_feed_statistics_row {
-        my ( $date, $stats ) = @_;
+        my ($date, $stats) = @_;
 
-        my ( $year, $month, $day ) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+        my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
         my $formatted_date = "$year-$month-$day";
 
         return [
@@ -1303,8 +1566,12 @@ $content
         ];
     }
 
+    # Sub: aggregate_hosts_and_urls
+    # - Purpose: Sum hosts and URLs across multiple days.
+    # - Params: $dates (arrayref), $merged (hashref).
+    # - Return: (\%all_hosts, \%all_urls).
     sub aggregate_hosts_and_urls {
-        my ( $dates, $merged ) = @_;
+        my ($dates, $merged) = @_;
 
         my %all_hosts;
         my %all_urls;
@@ -1314,22 +1581,25 @@ $content
             next unless $stats->{page_ips};
 
             # Aggregate hosts
-            while ( my ( $host, $count ) = each %{ $stats->{page_ips}{hosts} } )
-            {
+            while (my ($host, $count) = each %{ $stats->{page_ips}{hosts} }) {
                 $all_hosts{$host} //= 0;
                 $all_hosts{$host} += $count;
             }
 
             # Aggregate URLs
-            while ( my ( $url, $count ) = each %{ $stats->{page_ips}{urls} } ) {
+            while (my ($url, $count) = each %{ $stats->{page_ips}{urls} }) {
                 $all_urls{$url} //= 0;
                 $all_urls{$url} += $count;
             }
         }
 
-        return ( \%all_hosts, \%all_urls );
+        return (\%all_hosts, \%all_urls);
     }
 
+    # Sub: build_top_hosts_section
+    # - Purpose: Build Top-50 hosts table for the aggregated period.
+    # - Params: $all_hosts (hashref), $days (int default 30).
+    # - Return: gemtext string.
     sub build_top_hosts_section {
         my ($all_hosts, $days) = @_;
         $days //= 30;
@@ -1338,19 +1608,23 @@ $content
 
         my @host_rows;
         my @sorted_hosts =
-          sort { $all_hosts->{$b} <=> $all_hosts->{$a} } keys %$all_hosts;
+            sort { $all_hosts->{$b} <=> $all_hosts->{$a} } keys %$all_hosts;
         @sorted_hosts = @sorted_hosts[ 0 .. 49 ] if @sorted_hosts > 50;
 
         for my $host (@sorted_hosts) {
             push @host_rows, [ $host, $all_hosts->{$host} ];
         }
 
-        $content .= format_table( [ 'Host', 'Visitors' ], \@host_rows );
+        $content .= format_table([ 'Host', 'Visitors' ], \@host_rows);
         $content .= "\n```\n\n";
 
         return $content;
     }
 
+    # Sub: build_top_urls_section
+    # - Purpose: Build Top-50 URLs table for the aggregated period (with truncation).
+    # - Params: $all_urls (hashref), $days (int default 30).
+    # - Return: gemtext string.
     sub build_top_urls_section {
         my ($all_urls, $days) = @_;
         $days //= 30;
@@ -1359,7 +1633,7 @@ $content
 
         my @url_rows;
         my @sorted_urls =
-          sort { $all_urls->{$b} <=> $all_urls->{$a} } keys %$all_urls;
+            sort { $all_urls->{$b} <=> $all_urls->{$a} } keys %$all_urls;
         @sorted_urls = @sorted_urls[ 0 .. 49 ] if @sorted_urls > 50;
 
         for my $url (@sorted_urls) {
@@ -1367,18 +1641,23 @@ $content
         }
 
         # Truncate URLs to fit within 100-character rows
-        truncate_urls_for_table( \@url_rows, 'Visitors' );
+        truncate_urls_for_table(\@url_rows, 'Visitors');
 
-        $content .= format_table( [ 'URL', 'Visitors' ], \@url_rows );
+        $content .= format_table([ 'URL', 'Visitors' ], \@url_rows);
         $content .= "\n```\n\n";
 
         return $content;
     }
 
+    # Sub: build_summary_links
+    # - Purpose: Links to other summary reports (30-day when not already on it).
+    # - Params: $current_days (int), $report_date (YYYYMMDD).
+    # - Return: gemtext string.
     sub build_summary_links {
-        my ( $current_days, $report_date ) = @_;
+        my ($current_days, $report_date) = @_;
 
         my $content = '';
+
         # Only add link to 30-day summary when not on the 30-day report itself
         if ($current_days != 30) {
             $content .= "## Other Summary Reports\n\n";
@@ -1388,17 +1667,22 @@ $content
         return $content;
     }
 
-sub build_top3_urls_last_n_days_per_day {
-    my ($stats_dir, $days, $merged) = @_;
-    $days //= 30;
-    my $content = "## Top 5 URLs Per Day (Last ${days} Days)\n\n";
+    # Sub: build_top3_urls_last_n_days_per_day
+    # - Purpose: For each of last N days, render the top URLs table.
+    # - Params: $stats_dir (str), $days (int default 30), $merged (hashref).
+    # - Return: gemtext string.
+    sub build_top3_urls_last_n_days_per_day {
+        my ($stats_dir, $days, $merged) = @_;
+        $days //= 30;
+        my $content = "## Top 5 URLs Per Day (Last ${days} Days)\n\n";
 
-    my @all = DateHelper::last_month_dates();
-    my @dates = @all;
-    @dates = @all[0 .. $days-1] if @all > $days;
-    return $content . "(no data)\n\n" unless @dates;
+        my @all   = DateHelper::last_month_dates();
+        my @dates = @all;
+        @dates = @all[ 0 .. $days - 1 ] if @all > $days;
+        return $content . "(no data)\n\n" unless @dates;
 
         for my $date (@dates) {
+
             # Prefer in-memory merged stats if available; otherwise merge from disk
             my $stats = $merged->{$date};
             if (!$stats || !($stats->{page_ips} && $stats->{page_ips}{urls})) {
@@ -1406,24 +1690,28 @@ sub build_top3_urls_last_n_days_per_day {
             }
             next unless $stats && $stats->{page_ips} && $stats->{page_ips}{urls};
 
-            my ($y,$m,$d) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+            my ($y, $m, $d) = $date =~ /(\d{4})(\d{2})(\d{2})/;
             $content .= "### $y-$m-$d\n\n";
 
-            my $urls = $stats->{page_ips}{urls};
-            my @sorted = sort { ($urls->{$b}//0) <=> ($urls->{$a}//0) } keys %$urls;
+            my $urls   = $stats->{page_ips}{urls};
+            my @sorted = sort { ($urls->{$b} // 0) <=> ($urls->{$a} // 0) } keys %$urls;
             next unless @sorted;
             my $limit = @sorted < 5 ? @sorted : 5;
-            @sorted = @sorted[0..$limit-1];
+            @sorted = @sorted[ 0 .. $limit - 1 ];
 
             my @rows;
             for my $u (@sorted) { push @rows, [ $u, $urls->{$u} // 0 ]; }
-            truncate_urls_for_table( \@rows, 'Visitors' );
+            truncate_urls_for_table(\@rows, 'Visitors');
             $content .= "```\n" . format_table([ 'URL', 'Visitors' ], \@rows) . "\n```\n\n";
+        }
+
+        return $content;
     }
 
-    return $content;
-}
-    
+    # Sub: generate_index
+    # - Purpose: Create index.gmi/.html using the latest 30-day summary as content.
+    # - Params: $output_dir (str), $html_output_dir (str).
+    # - Return: undef.
     sub generate_index {
         my ($output_dir, $html_output_dir) = @_;
 
@@ -1433,13 +1721,14 @@ sub build_top3_urls_last_n_days_per_day {
         closedir($dh);
 
         my @summaries_30day = sort { $b cmp $a } grep { /^30day_summary_/ } @gmi_files;
-        my $latest_30 = $summaries_30day[0];
+        my $latest_30       = $summaries_30day[0];
 
         my $index_path = "$output_dir/index.gmi";
         mkdir $html_output_dir unless -d $html_output_dir;
         my $html_path = "$html_output_dir/index.html";
 
         if ($latest_30) {
+
             # Read 30-day summary content and use it as index
             my $summary_path = "$output_dir/$latest_30";
             open my $sfh, '<', $summary_path or die "$summary_path: $!";
@@ -1460,9 +1749,10 @@ sub build_top3_urls_last_n_days_per_day {
                 close $hh;
                 say "Writing HTML index to $html_path (copy of $latest_html)";
                 FileHelper::write($html_path, $html_page);
-            } else {
+            }
+            else {
                 my $html_content = gemtext_to_html($content);
-                my $html_page = generate_html_page("30-Day Summary Report", $html_content);
+                my $html_page    = generate_html_page("30-Day Summary Report", $html_content);
                 say "Writing HTML index to $html_path (from gemtext)";
                 FileHelper::write($html_path, $html_page);
             }
@@ -1475,18 +1765,24 @@ sub build_top3_urls_last_n_days_per_day {
         FileHelper::write($index_path, $fallback);
 
         my $html_content = gemtext_to_html($fallback);
-        my $html_page = generate_html_page("Foostats Reports Index", $html_content);
+        my $html_page    = generate_html_page("Foostats Reports Index", $html_content);
         say "Writing fallback HTML index to $html_path";
         FileHelper::write($html_path, $html_page);
     }
 }
 
-package main {
-    use Getopt::Long;
-    use Sys::Hostname;
-
-    sub usage {
-        print <<~"USAGE";
+package main;
+# Package: main — CLI entrypoint and orchestration
+# - Purpose: Parse options and invoke parse/replicate/report flows.
+use Getopt::Long;
+use Sys::Hostname;
+
+# Sub: usage
+# - Purpose: Print usage and exit 0.
+# - Params: none.
+# - Return: never (exits).
+sub usage {
+    print <<~"USAGE";
         Usage: $0 [options]
 
         Options:
@@ -1509,47 +1805,56 @@ package main {
           --version                 Show version information.
           --help                    Show this help message.
         USAGE
-        exit 0;
-    }
+    exit 0;
+}
 
-    sub parse_logs ( $stats_dir, $odds_file, $odds_log ) {
-        my $out = Foostats::FileOutputter->new( stats_dir => $stats_dir );
+# Sub: parse_logs
+# - Purpose: Parse logs and persist aggregated stats files under $stats_dir.
+# - Params: $stats_dir (str), $odds_file (str), $odds_log (str).
+# - Return: undef.
+sub parse_logs ($stats_dir, $odds_file, $odds_log) {
+    my $out = Foostats::FileOutputter->new(stats_dir => $stats_dir);
 
-        $out->{stats} = Foostats::Logreader::parse_logs(
-            $out->last_processed_date('web'),
-            $out->last_processed_date('gemini'),
-            $odds_file, $odds_log
-        );
+    $out->{stats} = Foostats::Logreader::parse_logs(
+        $out->last_processed_date('web'),
+        $out->last_processed_date('gemini'),
+        $odds_file, $odds_log
+    );
 
-        $out->write;
-    }
+    $out->write;
+}
 
-    my ( $parse_logs, $replicate, $report, $all, $help, $version );
+# Sub: foostats_main
+# - Purpose: Option parsing and execution of requested actions.
+# - Params: none (reads @ARGV).
+# - Return: exit code via program termination.
+sub foostats_main {
+    my ($parse_logs, $replicate, $report, $all, $help, $version);
 
     # With default values
     my $stats_dir = '/var/www/htdocs/buetow.org/self/foostats';
     my $odds_file = $stats_dir . '/fooodds.txt';
     my $odds_log  = '/var/log/fooodds';
-    my $output_dir;  # Will default to $stats_dir/gemtext if not specified
-    my $html_output_dir;  # Will default to /var/www/htdocs/gemtexter/stats.foo.zone if not specified
+    my $output_dir;         # Will default to $stats_dir/gemtext if not specified
+    my $html_output_dir;    # Will default to /var/www/htdocs/gemtexter/stats.foo.zone if not specified
     my $partner_node =
-      hostname eq 'fishfinger.buetow.org'
-      ? 'blowfish.buetow.org'
-      : 'fishfinger.buetow.org';
+        hostname eq 'fishfinger.buetow.org'
+        ? 'blowfish.buetow.org'
+        : 'fishfinger.buetow.org';
 
     GetOptions
-      'parse-logs!'    => \$parse_logs,
-      'filter-log=s'   => \$odds_log,
-      'odds-file=s'    => \$odds_file,
-      'replicate!'     => \$replicate,
-      'report!'        => \$report,
-      'all!'           => \$all,
-      'stats-dir=s'    => \$stats_dir,
-      'output-dir=s'   => \$output_dir,
-      'html-output-dir=s' => \$html_output_dir,
-      'partner-node=s' => \$partner_node,
-      'version'        => \$version,
-      'help|?'         => \$help;
+        'parse-logs!'       => \$parse_logs,
+        'filter-log=s'      => \$odds_log,
+        'odds-file=s'       => \$odds_file,
+        'replicate!'        => \$replicate,
+        'report!'           => \$report,
+        'all!'              => \$all,
+        'stats-dir=s'       => \$stats_dir,
+        'output-dir=s'      => \$output_dir,
+        'html-output-dir=s' => \$html_output_dir,
+        'partner-node=s'    => \$partner_node,
+        'version'           => \$version,
+        'help|?'            => \$help;
 
     if ($version) {
         print "foostats " . VERSION . "\n";
@@ -1558,20 +1863,22 @@ package main {
 
     usage() if $help;
 
-    parse_logs( $stats_dir, $odds_file, $odds_log )
-      if $parse_logs
-      or $all;
+    parse_logs($stats_dir, $odds_file, $odds_log)
+        if $parse_logs
+        or $all;
 
-    Foostats::Replicator::replicate( $stats_dir, $partner_node )
-      if $replicate
-      or $all;
+    Foostats::Replicator::replicate($stats_dir, $partner_node)
+        if $replicate
+        or $all;
 
     # Set default output directories if not specified
-    $output_dir //= '/var/gemini/stats.foo.zone';
+    $output_dir      //= '/var/gemini/stats.foo.zone';
     $html_output_dir //= '/var/www/htdocs/gemtexter/stats.foo.zone';
-    
-    Foostats::Reporter::report( $stats_dir, $output_dir, $html_output_dir,
-        Foostats::Merger::merge($stats_dir) )
-      if $report
-      or $all;
+
+    Foostats::Reporter::report($stats_dir, $output_dir, $html_output_dir, Foostats::Merger::merge($stats_dir))
+        if $report
+        or $all;
 }
+
+# Only run main flow when executed as a script, not when required (e.g., tests)
+foostats_main() unless caller;
author	Paul Buetow <paul@buetow.org>	2025-09-06 10:35:14 +0300
committer	Paul Buetow <paul@buetow.org>	2025-09-06 10:35:14 +0300
commit	79c5890ca8979138eb617d2cdd615a9b7549ebc1 (patch)
tree	6cbe701322d4df05e13fd5d68fb6d8b0a7fc5d5a /frontends/scripts
parent	e257dc29316ee2f3bb6e467ec978f526764fd132 (diff)