summaryrefslogtreecommitdiff
path: root/frontends/scripts
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-09-06 10:35:14 +0300
committerPaul Buetow <paul@buetow.org>2025-09-06 10:35:14 +0300
commit79c5890ca8979138eb617d2cdd615a9b7549ebc1 (patch)
tree6cbe701322d4df05e13fd5d68fb6d8b0a7fc5d5a /frontends/scripts
parente257dc29316ee2f3bb6e467ec978f526764fd132 (diff)
Update
Diffstat (limited to 'frontends/scripts')
-rw-r--r--frontends/scripts/foostats.pl1209
1 files changed, 758 insertions, 451 deletions
diff --git a/frontends/scripts/foostats.pl b/frontends/scripts/foostats.pl
index 235da02..e6ef1c5 100644
--- a/frontends/scripts/foostats.pl
+++ b/frontends/scripts/foostats.pl
@@ -12,8 +12,8 @@ use experimental qw(builtin);
use feature qw(refaliasing);
no warnings qw(experimental::refaliasing);
-# TODO: UNDO
-use diagnostics;
+# Debugging aids like diagnostics are noisy in production.
+# Removed per review: enable locally when debugging only.
use constant VERSION => 'v0.1.0';
@@ -24,86 +24,101 @@ use constant VERSION => 'v0.1.0';
# * Nicely formatted .txt output by stats by count by date
# * Print out all UAs, to add new excludes/blocked IPs
+# Package: FileHelper — small file/JSON helpers
+# - Purpose: Atomic writes, gzip JSON read/write, and line reading.
+# - Notes: Dies on I/O errors; JSON encoding uses core JSON.
package FileHelper {
use JSON;
- sub write ( $path, $content ) {
+ # Sub: write
+ # - Purpose: Atomic write to a file via "$path.tmp" and rename.
+ # - Params: $path (str) destination; $content (str) contents to write.
+ # - Return: undef; dies on failure.
+ sub write ($path, $content) {
open my $fh, '>', "$path.tmp"
- or die "\nCannot open file: $!";
+ or die "\nCannot open file: $!";
print $fh $content;
close $fh;
rename
- "$path.tmp",
- $path;
+ "$path.tmp",
+ $path;
}
- sub write_json_gz ( $path, $data ) {
+ # Sub: write_json_gz
+ # - Purpose: JSON-encode $data and write it gzipped atomically.
+ # - Params: $path (str) destination path; $data (ref/scalar) Perl data.
+ # - Return: undef; dies on failure.
+ sub write_json_gz ($path, $data) {
my $json = encode_json $data;
say "Writing $path";
open my $fd, '>:gzip', "$path.tmp"
- or die "$path.tmp: $!";
+ or die "$path.tmp: $!";
print $fd $json;
close $fd;
rename "$path.tmp", $path
- or die "$path.tmp: $!";
+ or die "$path.tmp: $!";
}
+ # Sub: read_json_gz
+ # - Purpose: Read a gzipped JSON file and decode to Perl data.
+ # - Params: $path (str) path to .json.gz file.
+ # - Return: Perl data structure.
sub read_json_gz ($path) {
say "Reading $path";
open my $fd, '<:gzip', $path
- or die "$path: $!";
+ or die "$path: $!";
my $json = decode_json <$fd>;
close $fd;
return $json;
}
+ # Sub: read_lines
+ # - Purpose: Slurp file lines and chomp newlines.
+ # - Params: $path (str) file path.
+ # - Return: list of lines (no trailing newlines).
sub read_lines ($path) {
my @lines;
- open( my $fh, '<', $path )
- or die "$path: $!";
- chomp( @lines = <$fh> );
+ open(my $fh, '<', $path)
+ or die "$path: $!";
+ chomp(@lines = <$fh>);
close($fh);
return @lines;
}
}
+# Package: DateHelper — date range helpers
+# - Purpose: Produce date strings used for report windows.
+# - Format: Dates are returned as YYYYMMDD strings.
package DateHelper {
use Time::Piece;
+ # Sub: last_month_dates
+ # - Purpose: Return dates for today back to 30 days ago (inclusive).
+ # - Params: none.
+ # - Return: list of YYYYMMDD strings, newest first.
sub last_month_dates () {
my $today = localtime;
my @dates;
- for my $days_ago ( 0 .. 30 ) {
- my $date = $today - ( $days_ago * 24 * 60 * 60 );
+ for my $days_ago (0 .. 30) {
+ my $date = $today - ($days_ago * 24 * 60 * 60);
push
- @dates,
- $date->strftime('%Y%m%d');
+ @dates,
+ $date->strftime('%Y%m%d');
}
return @dates;
}
- sub last_n_months_day_dates ($months) {
- my $today = localtime;
- my $start_year = $today->year;
- my $start_month = $today->mon - $months;
- while ($start_month <= 0) { $start_month += 12; $start_year--; }
-
- my $start = Time::Piece->strptime(sprintf('%04d-%02d-01', $start_year, $start_month), '%Y-%m-%d');
- my @dates;
- my $t = $start;
- while ($t <= $today) {
- push @dates, $t->strftime('%Y%m%d');
- $t += 24 * 60 * 60; # one day
- }
- return @dates;
- }
+
}
+# Package: Foostats::Logreader — parse and normalize logs
+# - Purpose: Read web and gemini logs, anonymize IPs, and emit normalized events.
+# - Output Event: { proto, host, ip_hash, ip_proto, date, time, uri_path, status }
package Foostats::Logreader {
use Digest::SHA3 'sha3_512_base64';
use File::stat;
@@ -111,41 +126,54 @@ package Foostats::Logreader {
use Time::Piece;
use String::Util qw(contains startswith endswith);
- use constant {
- GEMINI_LOGS_GLOB => '/var/log/daemon*',
- WEB_LOGS_GLOB => '/var/www/logs/access.log*',
- };
-
+ # Make log locations configurable (env overrides) to enable testing.
+ # Sub: gemini_logs_glob
+ # - Purpose: Glob for gemini-related logs; env override for testing.
+ # - Return: glob pattern string.
+ sub gemini_logs_glob { $ENV{FOOSTATS_GEMINI_LOGS_GLOB} // '/var/log/daemon*' }
+ # Sub: web_logs_glob
+ # - Purpose: Glob for web access logs; env override for testing.
+ # - Return: glob pattern string.
+ sub web_logs_glob { $ENV{FOOSTATS_WEB_LOGS_GLOB} // '/var/www/logs/access.log*' }
+
+ # Sub: anonymize_ip
+ # - Purpose: Classify IPv4/IPv6 and map IP to a stable SHA3-512 base64 hash.
+ # - Params: $ip (str) source IP.
+ # - Return: ($hash, $proto) where $proto is 'IPv4' or 'IPv6'.
sub anonymize_ip ($ip) {
my $ip_proto =
- contains( $ip, ':' )
- ? 'IPv6'
- : 'IPv4';
+ contains($ip, ':')
+ ? 'IPv6'
+ : 'IPv4';
my $ip_hash = sha3_512_base64 $ip;
- return ( $ip_hash, $ip_proto );
+ return ($ip_hash, $ip_proto);
}
- sub read_lines ( $glob, $cb ) {
+ # Sub: read_lines
+ # - Purpose: Iterate files matching glob by age; invoke $cb for each line.
+ # - Params: $glob (str) file glob; $cb (code) callback ($year, @fields).
+ # - Return: undef; stops early if callback returns undef for a file.
+ sub read_lines ($glob, $cb) {
my sub year ($path) {
- localtime( ( stat $path )->mtime )->strftime('%Y');
+ localtime((stat $path)->mtime)->strftime('%Y');
}
my sub open_file ($path) {
my $flag =
- $path =~ /\.gz$/
- ? '<:gzip'
- : '<';
+ $path =~ /\.gz$/
+ ? '<:gzip'
+ : '<';
open my $fd, $flag, $path
- or die "$path: $!";
+ or die "$path: $!";
return $fd;
}
my $last = false;
- say 'File path glob matches: ' . join( ' ', glob $glob );
+ say 'File path glob matches: ' . join(' ', glob $glob);
- LAST:
- for my $path ( sort { -M $a <=> -M $b } glob $glob ) {
+ LAST:
+ for my $path (sort { -M $a <=> -M $b } glob $glob) {
say "Processing $path";
my $file = open_file $path;
@@ -153,37 +181,41 @@ package Foostats::Logreader {
while (<$file>) {
next
- if contains( $_, 'logfile turned over' );
+ if contains($_, 'logfile turned over');
# last == true means: After this file, don't process more
$last = true
- unless defined $cb->( $year, split / +/ );
+ unless defined $cb->($year, split / +/);
}
say "Closing $path (last:$last)";
close $file;
last LAST
- if $last;
+ if $last;
}
}
- sub parse_web_logs ( $last_processed_date, $cb ) {
+ # Sub: parse_web_logs
+ # - Purpose: Parse web log lines into normalized events and pass to callback.
+ # - Params: $last_processed_date (YYYYMMDD int) lower bound; $cb (code) event consumer.
+ # - Return: undef.
+ sub parse_web_logs ($last_processed_date, $cb) {
my sub parse_date ($date) {
- my $t = Time::Piece->strptime( $date, '[%d/%b/%Y:%H:%M:%S' );
- return ( $t->strftime('%Y%m%d'), $t->strftime('%H%M%S') );
+ my $t = Time::Piece->strptime($date, '[%d/%b/%Y:%H:%M:%S');
+ return ($t->strftime('%Y%m%d'), $t->strftime('%H%M%S'));
}
my sub parse_web_line (@line) {
- my ( $date, $time ) = parse_date $line [4];
+ my ($date, $time) = parse_date $line [4];
return undef
- if $date < $last_processed_date;
+ if $date < $last_processed_date;
# X-Forwarded-For?
my $ip =
- $line[-2] eq '-'
- ? $line[1]
- : $line[-2];
- my ( $ip_hash, $ip_proto ) = anonymize_ip $ip;
+ $line[-2] eq '-'
+ ? $line[1]
+ : $line[-2];
+ my ($ip_hash, $ip_proto) = anonymize_ip $ip;
return {
proto => 'web',
@@ -197,42 +229,45 @@ package Foostats::Logreader {
};
}
- read_lines WEB_LOGS_GLOB, sub ( $year, @line ) {
- $cb->( parse_web_line @line );
+ read_lines web_logs_glob(), sub ($year, @line) {
+ $cb->(parse_web_line @line);
};
}
- sub parse_gemini_logs ( $last_processed_date, $cb ) {
- my sub parse_date ( $year, @line ) {
+ # Sub: parse_gemini_logs
+ # - Purpose: Parse vger/relayd lines, merge paired entries, and emit events.
+ # - Params: $last_processed_date (YYYYMMDD int); $cb (code) event consumer.
+ # - Return: undef.
+ sub parse_gemini_logs ($last_processed_date, $cb) {
+ my sub parse_date ($year, @line) {
my $timestr = "$line[0] $line[1]";
- return Time::Piece->strptime( $timestr, '%b %d' )
- ->strftime("$year%m%d");
+ return Time::Piece->strptime($timestr, '%b %d')->strftime("$year%m%d");
}
- my sub parse_vger_line ( $year, @line ) {
+ my sub parse_vger_line ($year, @line) {
my $full_path = $line[5];
$full_path =~ s/"//g;
- my ( $proto, undef, $host, $uri_path ) =
- split '/',
- $full_path,
- 4;
+ my ($proto, undef, $host, $uri_path) =
+ split '/',
+ $full_path,
+ 4;
$uri_path = ''
- unless defined $uri_path;
+ unless defined $uri_path;
return {
proto => 'gemini',
host => $host,
uri_path => "/$uri_path",
status => $line[6],
- date => int( parse_date( $year, @line ) ),
+ date => int(parse_date($year, @line)),
time => $line[2],
};
}
- my sub parse_relayd_line ( $year, @line ) {
- my $date = int( parse_date( $year, @line ) );
+ my sub parse_relayd_line ($year, @line) {
+ my $date = int(parse_date($year, @line));
- my ( $ip_hash, $ip_proto ) = anonymize_ip $line [12];
+ my ($ip_hash, $ip_proto) = anonymize_ip $line [12];
return {
ip_hash => $ip_hash,
ip_proto => $ip_proto,
@@ -241,26 +276,26 @@ package Foostats::Logreader {
};
}
- # Expect one vger and one relayd log line per event! So collect
- # both events (one from one log line each) and then merge the result hash!
- my ( $vger, $relayd );
- read_lines GEMINI_LOGS_GLOB, sub ( $year, @line ) {
- if ( $line[4] eq 'vger:' ) {
+ # Expect one vger and one relayd log line per event! So collect
+ # both events (one from one log line each) and then merge the result hash!
+ my ($vger, $relayd);
+ read_lines gemini_logs_glob(), sub ($year, @line) {
+ if ($line[4] eq 'vger:') {
$vger = parse_vger_line $year, @line;
}
- elsif ( $line[5] eq 'relay'
- and startswith( $line[6], 'gemini' ) )
+ elsif ($line[5] eq 'relay'
+ and startswith($line[6], 'gemini'))
{
$relayd = parse_relayd_line $year, @line;
return undef
- if $relayd->{date} < $last_processed_date;
+ if $relayd->{date} < $last_processed_date;
}
if ( defined $vger
and defined $relayd
- and $vger->{time} eq $relayd->{time} )
+ and $vger->{time} eq $relayd->{time})
{
- $cb->( { %$vger, %$relayd } );
+ $cb->({ %$vger, %$relayd });
$vger = $relayd = undef;
}
@@ -268,9 +303,12 @@ package Foostats::Logreader {
};
}
- sub parse_logs ( $last_web_date, $last_gemini_date, $odds_file, $odds_log )
- {
- my $agg = Foostats::Aggregator->new( $odds_file, $odds_log );
+ # Sub: parse_logs
+ # - Purpose: Coordinate parsing for both web and gemini, aggregating into stats.
+ # - Params: $last_web_date, $last_gemini_date (YYYYMMDD int), $odds_file, $odds_log.
+ # - Return: stats hashref keyed by "proto_YYYYMMDD".
+ sub parse_logs ($last_web_date, $last_gemini_date, $odds_file, $odds_log) {
+ my $agg = Foostats::Aggregator->new($odds_file, $odds_log);
say "Last web date: $last_web_date";
say "Last gemini date: $last_gemini_date";
@@ -287,29 +325,40 @@ package Foostats::Logreader {
}
# TODO: Write filter summary at the end of the filter log.
+# Package: Foostats::Filter — request filtering and logging
+# - Purpose: Identify odd URI patterns and excessive requests per second per IP.
+# - Notes: Maintains an in-process blocklist for the current run.
package Foostats::Filter {
use String::Util qw(contains startswith endswith);
- sub new ( $class, $odds_file, $log_path ) {
+ # Sub: new
+ # - Purpose: Construct a filter with odd patterns and a log path.
+ # - Params: $odds_file (str) pattern list; $log_path (str) append-only log file.
+ # - Return: blessed Foostats::Filter instance.
+ sub new ($class, $odds_file, $log_path) {
say "Logging filter to $log_path";
my @odds = FileHelper::read_lines($odds_file);
bless {
odds => \@odds,
log_path => $log_path
- },
- $class;
+ },
+ $class;
}
- sub ok ( $self, $event ) {
+ # Sub: ok
+ # - Purpose: Check if an event passes filters; updates block state/logging.
+ # - Params: $event (hashref) normalized request.
+ # - Return: true if allowed; false if blocked.
+ sub ok ($self, $event) {
state %blocked = ();
return false
- if exists $blocked{ $event->{ip_hash} };
+ if exists $blocked{ $event->{ip_hash} };
if ( $self->odd($event)
- or $self->excessive($event) )
+ or $self->excessive($event))
{
- ( $blocked{ $event->{ip_hash} } //= 0 )++;
+ ($blocked{ $event->{ip_hash} } //= 0)++;
return false;
}
else {
@@ -317,53 +366,64 @@ package Foostats::Filter {
}
}
- sub odd ( $self, $event ) {
+ # Sub: odd
+ # - Purpose: Match URI path against user-provided odd patterns (substring match).
+ # - Params: $event (hashref) with uri_path.
+ # - Return: true if odd (blocked), false otherwise.
+ sub odd ($self, $event) {
\my $uri_path = \$event->{uri_path};
- for ( $self->{odds}->@* ) {
+ for ($self->{odds}->@*) {
+ next if !defined $_ || $_ eq '' || /^\s*#/;
next
- unless contains( $uri_path, $_ );
+ unless contains($uri_path, $_);
- $self->log( 'WARN', $uri_path,
- "contains $_ and is odd and will therefore be blocked!" );
+ $self->log('WARN', $uri_path, "contains $_ and is odd and will therefore be blocked!");
return true;
}
- $self->log( 'OK', $uri_path, "appears fine..." );
+ $self->log('OK', $uri_path, "appears fine...");
return false;
}
- sub log ( $self, $severity, $subject, $message ) {
+ # Sub: log
+ # - Purpose: Deduplicated append-only logging for filter decisions.
+ # - Params: $severity (OK|WARN), $subject (str), $message (str).
+ # - Return: undef.
+ sub log ($self, $severity, $subject, $message) {
state %dedup;
# Don't log if path was already logged
return
- if exists $dedup{$subject};
+ if exists $dedup{$subject};
$dedup{$subject} = 1;
- open( my $fh, '>>', $self->{log_path} )
- or die $self->{log_path} . ": $!";
+ open(my $fh, '>>', $self->{log_path})
+ or die $self->{log_path} . ": $!";
print $fh "$severity: $subject $message\n";
close($fh);
}
- sub excessive ( $self, $event ) {
+ # Sub: excessive
+ # - Purpose: Block if an IP makes more than one request within the same second.
+ # - Params: $event (hashref) with time and ip_hash.
+ # - Return: true if blocked; false otherwise.
+ sub excessive ($self, $event) {
\my $time = \$event->{time};
\my $ip_hash = \$event->{ip_hash};
state $last_time = $time; # Time with second: 'HH:MM:SS'
state %count = (); # IPs accessing within the same second!
- if ( $last_time ne $time ) {
+ if ($last_time ne $time) {
$last_time = $time;
%count = ();
return false;
}
# IP requested site more than once within the same second!?
- if ( 1 < ++( $count{$ip_hash} //= 0 ) ) {
- $self->log( 'WARN', $ip_hash,
- "blocked due to excessive requesting..." );
+ if (1 < ++($count{$ip_hash} //= 0)) {
+ $self->log('WARN', $ip_hash, "blocked due to excessive requesting...");
return true;
}
@@ -371,6 +431,8 @@ package Foostats::Filter {
}
}
+# Package: Foostats::Aggregator — in-memory stats builder
+# - Purpose: Apply filters and accumulate counts, unique IPs per feed/page.
package Foostats::Aggregator {
use String::Util qw(contains startswith endswith);
@@ -380,140 +442,185 @@ package Foostats::Aggregator {
GEMFEED_URI_2 => '/gemfeed/',
};
- sub new ( $class, $odds_file, $odds_log ) {
+ # Sub: new
+ # - Purpose: Construct aggregator with a filter and empty stats store.
+ # - Params: $odds_file (str), $odds_log (str).
+ # - Return: Foostats::Aggregator instance.
+ sub new ($class, $odds_file, $odds_log) {
bless {
- filter => Foostats::Filter->new( $odds_file, $odds_log ),
+ filter => Foostats::Filter->new($odds_file, $odds_log),
stats => {}
- },
- $class;
+ },
+ $class;
}
- sub add ( $self, $event ) {
+ # Sub: add
+ # - Purpose: Apply filter, update counts and unique-IP sets, and return event.
+ # - Params: $event (hashref) normalized event; ignored if undef.
+ # - Return: $event; filtered events increment filtered count only.
+ sub add ($self, $event) {
return undef
- unless defined $event;
+ unless defined $event;
my $date = $event->{date};
my $date_key = $event->{proto} . "_$date";
+ # Stats data model per protocol+day (key: "proto_YYYYMMDD"):
+ # - count: per-proto request count, per IP version, and filtered count
+ # - feed_ips: unique IPs per feed type (atom_feed, gemfeed)
+ # - page_ips: unique IPs per host and per URL
$self->{stats}{$date_key} //= {
- count => {
- filtered => 0
- },
+ count => { filtered => 0, },
feed_ips => {
atom_feed => {},
- gemfeed => {}
+ gemfeed => {},
},
page_ips => {
hosts => {},
- urls => {}
+ urls => {},
},
};
\my $s = \$self->{stats}{$date_key};
- unless ( $self->{filter}->ok($event) ) {
+ unless ($self->{filter}->ok($event)) {
$s->{count}{filtered}++;
return $event;
}
- $self->add_count( $s, $event );
- $self->add_page_ips( $s, $event )
- unless $self->add_feed_ips( $s, $event );
+ $self->add_count($s, $event);
+ $self->add_page_ips($s, $event)
+ unless $self->add_feed_ips($s, $event);
return $event;
}
- sub add_count ( $self, $stats, $event ) {
+ # Sub: add_count
+ # - Purpose: Increment totals by protocol and IP version.
+ # - Params: $stats (hashref) date bucket; $event (hashref).
+ # - Return: undef.
+ sub add_count ($self, $stats, $event) {
\my $c = \$stats->{count};
\my $e = \$event;
- ( $c->{ $e->{proto} } //= 0 )++;
- ( $c->{ $e->{ip_proto} } //= 0 )++;
+ ($c->{ $e->{proto} } //= 0)++;
+ ($c->{ $e->{ip_proto} } //= 0)++;
}
- sub add_feed_ips ( $self, $stats, $event ) {
+ # Sub: add_feed_ips
+ # - Purpose: If event hits feed endpoints, add unique IP and short-circuit.
+ # - Params: $stats (hashref), $event (hashref).
+ # - Return: 1 if feed matched; 0 otherwise.
+ sub add_feed_ips ($self, $stats, $event) {
\my $f = \$stats->{feed_ips};
\my $e = \$event;
- if ( endswith( $e->{uri_path}, ATOM_FEED_URI ) ) {
- ( $f->{atom_feed}->{ $e->{ip_hash} } //= 0 )++;
- }
- elsif ( contains( $e->{uri_path}, GEMFEED_URI ) ) {
- ( $f->{gemfeed}->{ $e->{ip_hash} } //= 0 )++;
+ # Atom feed (exact path match, allow optional query string)
+ if ($e->{uri_path} =~ m{^/gemfeed/atom\.xml(?:[?#].*)?$}) {
+ ($f->{atom_feed}->{ $e->{ip_hash} } //= 0)++;
+ return 1;
}
- elsif ( endswith( $e->{uri_path}, GEMFEED_URI_2 ) ) {
- ( $f->{gemfeed}->{ $e->{ip_hash} } //= 0 )++;
- }
- else {
- 0;
+
+ # Gemfeed index: '/gemfeed/' or '/gemfeed/index.gmi' (optionally with query)
+ if ($e->{uri_path} =~ m{^/gemfeed/(?:index\.gmi)?(?:[?#].*)?$}) {
+ ($f->{gemfeed}->{ $e->{ip_hash} } //= 0)++;
+ return 1;
}
+
+ return 0;
}
- sub add_page_ips ( $self, $stats, $event ) {
+ # Sub: add_page_ips
+ # - Purpose: Track unique IPs per host and per URL for .html/.gmi pages.
+ # - Params: $stats (hashref), $event (hashref).
+ # - Return: undef.
+ sub add_page_ips ($self, $stats, $event) {
\my $e = \$event;
\my $p = \$stats->{page_ips};
return
- if !endswith( $e->{uri_path}, '.html' )
- && !endswith( $e->{uri_path}, '.gmi' );
+ if !endswith($e->{uri_path}, '.html')
+ && !endswith($e->{uri_path}, '.gmi');
- ( $p->{hosts}->{ $e->{host} }->{ $e->{ip_hash} } //= 0 )++;
- ( $p->{urls}->{ $e->{host} . $e->{uri_path} }->{ $e->{ip_hash} } //=
- 0 )++;
+ ($p->{hosts}->{ $e->{host} }->{ $e->{ip_hash} } //= 0)++;
+ ($p->{urls}->{ $e->{host} . $e->{uri_path} }->{ $e->{ip_hash} } //=
+ 0)++;
}
}
+# Package: Foostats::FileOutputter — write per-day stats to disk
+# - Purpose: Persist aggregated stats to gzipped JSON files under a stats dir.
package Foostats::FileOutputter {
use JSON;
use Sys::Hostname;
use PerlIO::gzip;
- sub new ( $class, %args ) {
+ # Sub: new
+ # - Purpose: Create outputter with stats_dir; ensures directory exists.
+ # - Params: %args (hash) must include stats_dir.
+ # - Return: Foostats::FileOutputter instance.
+ sub new ($class, %args) {
my $self = bless \%args, $class;
mkdir $self->{stats_dir}
- or die $self->{stats_dir} . ": $!"
- unless -d $self->{stats_dir};
+ or die $self->{stats_dir} . ": $!"
+ unless -d $self->{stats_dir};
return $self;
}
- sub last_processed_date ( $self, $proto ) {
- my $hostname = hostname();
- my @processed =
- glob $self->{stats_dir} . "/${proto}_????????.$hostname.json.gz";
+ # Sub: last_processed_date
+ # - Purpose: Determine the most recent processed date for a protocol for this host.
+ # - Params: $proto (str) 'web' or 'gemini'.
+ # - Return: YYYYMMDD int (0 if none found).
+ sub last_processed_date ($self, $proto) {
+ my $hostname = hostname();
+ my @processed = glob $self->{stats_dir} . "/${proto}_????????.$hostname.json.gz";
my ($date) =
- @processed
- ? ( $processed[-1] =~ /_(\d{8})\.$hostname\.json.gz/ )
- : 0;
+ @processed
+ ? ($processed[-1] =~ /_(\d{8})\.$hostname\.json.gz/)
+ : 0;
return int($date);
}
+ # Sub: write
+ # - Purpose: Write one gzipped JSON file per date bucket to stats_dir.
+ # - Params: none (uses $self->{stats}).
+ # - Return: undef.
sub write ($self) {
$self->for_dates(
- sub ( $self, $date_key, $stats ) {
+ sub ($self, $date_key, $stats) {
my $hostname = hostname();
- my $path =
- $self->{stats_dir} . "/${date_key}.$hostname.json.gz";
+ my $path = $self->{stats_dir} . "/${date_key}.$hostname.json.gz";
FileHelper::write_json_gz
- $path,
- $stats;
+ $path,
+ $stats;
}
);
}
- sub for_dates ( $self, $cb ) {
- $cb->( $self, $_, $self->{stats}{$_} ) for sort
- keys $self->{stats}->%*;
+ # Sub: for_dates
+ # - Purpose: Iterate date-keyed stats in sorted order and call $cb.
+ # - Params: $cb (code) receives ($self, $date_key, $stats).
+ # - Return: undef.
+ sub for_dates ($self, $cb) {
+ $cb->($self, $_, $self->{stats}{$_}) for sort
+ keys $self->{stats}->%*;
}
}
+# Package: Foostats::Replicator — pull partner stats files over HTTP(S)
+# - Purpose: Fetch recent partner node stats into local stats dir.
package Foostats::Replicator {
use JSON;
use File::Basename;
use LWP::UserAgent;
use String::Util qw(endswith);
- sub replicate ( $stats_dir, $partner_node ) {
+ # Sub: replicate
+ # - Purpose: For each proto and last 31 days, replicate newest files.
+ # - Params: $stats_dir (str) local dir; $partner_node (str) hostname.
+ # - Return: undef (best-effort fetches).
+ sub replicate ($stats_dir, $partner_node) {
say "Replicating from $partner_node";
for my $proto (qw(gemini web)) {
@@ -527,51 +634,63 @@ package Foostats::Replicator {
"https://$partner_node/foostats/$dest_path",
"$stats_dir/$dest_path",
$count++
- <
- 3
+ <
+ 3
, # Always replicate the newest 3 files.
);
}
}
}
- sub replicate_file ( $remote_url, $dest_path, $force ) {
+ # Sub: replicate_file
+ # - Purpose: Download a single URL to a destination unless already present (unless forced).
+ # - Params: $remote_url (str) source; $dest_path (str) destination; $force (bool/int).
+ # - Return: undef; logs failures.
+ sub replicate_file ($remote_url, $dest_path, $force) {
# $dest_path already exists, not replicating it
return
- if !$force
- && -f $dest_path;
+ if !$force
+ && -f $dest_path;
say "Replicating $remote_url to $dest_path (force:$force)... ";
my $response = LWP::UserAgent->new->get($remote_url);
- unless ( $response->is_success ) {
+ unless ($response->is_success) {
say "\nFailed to fetch the file: " . $response->status_line;
return;
}
FileHelper::write
- $dest_path,
- $response->decoded_content;
+ $dest_path,
+ $response->decoded_content;
say 'done';
}
}
+# Package: Foostats::Merger — merge per-host daily stats into a single view
+# - Purpose: Merge multiple node files per day into totals and unique counts.
package Foostats::Merger {
- use Data::Dumper; # TODO: UNDO
-
+ # Removed Data::Dumper (debug-only) per review.
+ # Sub: merge
+ # - Purpose: Produce merged stats for the last month (date => stats hashref).
+ # - Params: $stats_dir (str) directory with daily gz JSON files.
+ # - Return: hash (not ref) of date => merged stats.
sub merge ($stats_dir) {
my %merge;
- $merge{$_} = merge_for_date( $stats_dir, $_ )
- for DateHelper::last_month_dates;
+ $merge{$_} = merge_for_date($stats_dir, $_) for DateHelper::last_month_dates;
return %merge;
}
- sub merge_for_date ( $stats_dir, $date ) {
+ # Sub: merge_for_date
+ # - Purpose: Merge all node files for a specific date into one stats hashref.
+ # - Params: $stats_dir (str), $date (YYYYMMDD str/int).
+ # - Return: { feed_ips => {...}, count => {...}, page_ips => {...} }.
+ sub merge_for_date ($stats_dir, $date) {
printf
- "Merging for date %s\n",
- $date;
+ "Merging for date %s\n",
+ $date;
- my @stats = stats_for_date( $stats_dir, $date );
+ my @stats = stats_for_date($stats_dir, $date);
return {
feed_ips => feed_ips(@stats),
count => count(@stats),
@@ -579,9 +698,13 @@ package Foostats::Merger {
};
}
- sub merge_ips ( $a, $b, $key_transform = undef ) {
- my sub merge ( $a, $b ) {
- while ( my ( $key, $val ) = each %$b ) {
+ # Sub: merge_ips
+ # - Purpose: Deep-ish merge helper: sums numbers, merges hash-of-hash counts.
+ # - Params: $a (hashref target), $b (hashref source), $key_transform (code|undef).
+ # - Return: undef; updates $a in place; dies on incompatible types.
+ sub merge_ips ($a, $b, $key_transform = undef) {
+ my sub merge ($a, $b) {
+ while (my ($key, $val) = each %$b) {
$a->{$key} //= 0;
$a->{$key} += $val;
}
@@ -589,52 +712,56 @@ package Foostats::Merger {
my $is_num = qr/^\d+(\.\d+)?$/;
- while ( my ( $key, $val ) = each %$b ) {
+ while (my ($key, $val) = each %$b) {
$key = $key_transform->($key)
- if defined $key_transform;
+ if defined $key_transform;
- if ( not exists $a->{$key} ) {
+ if (not exists $a->{$key}) {
$a->{$key} = $val;
}
- elsif (ref( $a->{$key} ) eq 'HASH'
- && ref($val) eq 'HASH' )
+ elsif (ref($a->{$key}) eq 'HASH'
+ && ref($val) eq 'HASH')
{
- merge( $a->{$key}, $val );
+ merge($a->{$key}, $val);
}
elsif ($a->{$key} =~ $is_num
- && $val =~ $is_num )
+ && $val =~ $is_num)
{
$a->{$key} += $val;
}
else {
die
-"Not merging tkey '%s' (ref:%s): '%s' (ref:%s) with '%s' (ref:%s)\n",
- $key,
- ref($key), $a->{$key},
- ref( $a->{$key} ),
- $val,
- ref($val);
+ "Not merging tkey '%s' (ref:%s): '%s' (ref:%s) with '%s' (ref:%s)\n",
+ $key,
+ ref($key), $a->{$key},
+ ref($a->{$key}),
+ $val,
+ ref($val);
}
}
}
+ # Sub: feed_ips
+ # - Purpose: Merge feed unique-IP sets from per-proto stats into totals.
+ # - Params: @stats (list of stats hashrefs) each with {proto, feed_ips}.
+ # - Return: hashref with Total and per-proto feed counts.
sub feed_ips (@stats) {
- my ( %gemini, %web );
+ my (%gemini, %web);
for my $stats (@stats) {
my $merge =
- $stats->{proto} eq 'web'
- ? \%web
- : \%gemini;
+ $stats->{proto} eq 'web'
+ ? \%web
+ : \%gemini;
printf
- "Merging proto %s feed IPs\n",
- $stats->{proto};
- merge_ips( $merge, $stats->{feed_ips} );
+ "Merging proto %s feed IPs\n",
+ $stats->{proto};
+ merge_ips($merge, $stats->{feed_ips});
}
my %total;
- merge_ips( \%total, $web{$_} ) for keys %web;
- merge_ips( \%total, $gemini{$_} ) for keys %gemini;
+ merge_ips(\%total, $web{$_}) for keys %web;
+ merge_ips(\%total, $gemini{$_}) for keys %gemini;
my %merge = (
'Total' => scalar keys %total,
@@ -647,11 +774,15 @@ package Foostats::Merger {
return \%merge;
}
+ # Sub: count
+ # - Purpose: Sum request counters across stats for the day.
+ # - Params: @stats (list of stats hashrefs) each with {count}.
+ # - Return: hashref of summed counters.
sub count (@stats) {
my %merge;
for my $stats (@stats) {
- while ( my ( $key, $val ) = each $stats->{count}->%* ) {
+ while (my ($key, $val) = each $stats->{count}->%*) {
$merge{$key} //= 0;
$merge{$key} += $val;
}
@@ -660,13 +791,17 @@ package Foostats::Merger {
return \%merge;
}
+ # Sub: page_ips
+ # - Purpose: Merge unique IPs per host and per URL; coalesce truncated endings.
+ # - Params: @stats (list of stats hashrefs) with {page_ips}{urls,hosts}.
+ # - Return: hashref with urls/hosts each mapping => unique counts.
sub page_ips (@stats) {
my %merge = (
urls => {},
hosts => {}
);
- for my $key ( keys %merge ) {
+ for my $key (keys %merge) {
merge_ips(
$merge{$key},
$_->{page_ips}->{$key},
@@ -678,25 +813,28 @@ package Foostats::Merger {
) for @stats;
# Keep only uniq IP count
- $merge{$key}->{$_} = scalar keys $merge{$key}->{$_}->%*
- for keys $merge{$key}->%*;
+ $merge{$key}->{$_} = scalar keys $merge{$key}->{$_}->%* for keys $merge{$key}->%*;
}
return \%merge;
}
- sub stats_for_date ( $stats_dir, $date ) {
+ # Sub: stats_for_date
+ # - Purpose: Load all stats files for a date across protos; tag proto/path.
+ # - Params: $stats_dir (str), $date (YYYYMMDD).
+ # - Return: list of stats hashrefs.
+ sub stats_for_date ($stats_dir, $date) {
my @stats;
for my $proto (qw(gemini web)) {
for my $path (<$stats_dir/${proto}_${date}.*.json.gz>) {
printf
- "Reading %s\n",
- $path;
+ "Reading %s\n",
+ $path;
push
- @stats,
- FileHelper::read_json_gz($path);
- @{ $stats[-1] }{qw(proto path)} = ( $proto, $path );
+ @stats,
+ FileHelper::read_json_gz($path);
+ @{ $stats[-1] }{qw(proto path)} = ($proto, $path);
}
}
@@ -704,11 +842,18 @@ package Foostats::Merger {
}
}
+# Package: Foostats::Reporter — build gemtext/HTML daily and summary reports
+# - Purpose: Render daily reports and rolling summaries (30/365), and index pages.
package Foostats::Reporter {
use Time::Piece;
+ use HTML::Entities qw(encode_entities);
+ # Sub: truncate_url
+ # - Purpose: Middle-ellipsize long URLs to fit within a target length.
+ # - Params: $url (str), $max_length (int default 100).
+ # - Return: possibly truncated string.
sub truncate_url {
- my ( $url, $max_length ) = @_;
+ my ($url, $max_length) = @_;
$max_length //= 100; # Default to 100 characters
return $url if length($url) <= $max_length;
@@ -719,44 +864,52 @@ package Foostats::Reporter {
my $available_length = $max_length - $ellipsis_length;
# Split available length between start and end, favoring the end
- my $keep_start = int( $available_length * 0.4 ); # 40% for start
+ my $keep_start = int($available_length * 0.4); # 40% for start
my $keep_end = $available_length - $keep_start; # 60% for end
- my $start = substr( $url, 0, $keep_start );
- my $end = substr( $url, -$keep_end );
+ my $start = substr($url, 0, $keep_start);
+ my $end = substr($url, -$keep_end);
return $start . $ellipsis . $end;
}
+ # Sub: truncate_urls_for_table
+ # - Purpose: Truncate URL cells in-place to fit target table width.
+ # - Params: $url_rows (arrayref of [url,count]), $count_column_header (str).
+ # - Return: undef; mutates $url_rows.
sub truncate_urls_for_table {
- my ( $url_rows, $count_column_header ) = @_;
+ my ($url_rows, $count_column_header) = @_;
# Calculate the maximum width needed for the count column
my $max_count_width = length($count_column_header);
for my $row (@$url_rows) {
- my $count_width = length( $row->[1] );
+ my $count_width = length($row->[1]);
$max_count_width = $count_width if $count_width > $max_count_width;
}
# Row format: "| URL... | count |" with padding
# Calculate: "| " (2) + URL + " | " (3) + count_with_padding + " |" (2)
my $max_url_length = 100 - 7 - $max_count_width;
- $max_url_length = 70 if $max_url_length > 70; # Cap at reasonable length
+ $max_url_length = 70 if $max_url_length > 70; # Cap at reasonable length
# Truncate URLs in place
for my $row (@$url_rows) {
- $row->[0] = truncate_url( $row->[0], $max_url_length );
+ $row->[0] = truncate_url($row->[0], $max_url_length);
}
}
+ # Sub: format_table
+ # - Purpose: Render a simple monospace table from headers and rows.
+ # - Params: $headers (arrayref), $rows (arrayref of arrayrefs).
+ # - Return: string with lines separated by \n.
sub format_table {
- my ( $headers, $rows ) = @_;
+ my ($headers, $rows) = @_;
my @widths;
- for my $col ( 0 .. $#{$headers} ) {
- my $max_width = length( $headers->[$col] );
+ for my $col (0 .. $#{$headers}) {
+ my $max_width = length($headers->[$col]);
for my $row (@$rows) {
- my $len = length( $row->[$col] );
+ my $len = length($row->[$col]);
$max_width = $len if $len > $max_width;
}
push @widths, $max_width;
@@ -764,10 +917,10 @@ package Foostats::Reporter {
my $header_line = '|';
my $separator_line = '|';
- for my $col ( 0 .. $#{$headers} ) {
+ for my $col (0 .. $#{$headers}) {
$header_line .=
- sprintf( " %-*s |", $widths[$col], $headers->[$col] );
- $separator_line .= '-' x ( $widths[$col] + 2 ) . '|';
+ sprintf(" %-*s |", $widths[$col], $headers->[$col]);
+ $separator_line .= '-' x ($widths[$col] + 2) . '|';
}
my @table_lines;
@@ -777,33 +930,39 @@ package Foostats::Reporter {
for my $row (@$rows) {
my $row_line = '|';
- for my $col ( 0 .. $#{$row} ) {
- $row_line .= sprintf( " %-*s |", $widths[$col], $row->[$col] );
+ for my $col (0 .. $#{$row}) {
+ $row_line .= sprintf(" %-*s |", $widths[$col], $row->[$col]);
}
push @table_lines, $row_line;
}
push @table_lines, $separator_line; # Add bottom terminator
- return join( "\n", @table_lines );
+ return join("\n", @table_lines);
}
# Convert gemtext to HTML
+ # Sub: gemtext_to_html
+ # - Purpose: Convert a subset of Gemtext to compact HTML, incl. code blocks and lists.
+ # - Params: $content (str) Gemtext.
+ # - Return: HTML string (fragment).
sub gemtext_to_html {
- my ($content) = @_;
- my $html = "";
- my $in_code_block = 0;
- my $in_list = 0;
- my @lines = split /\n/, $content;
+ my ($content) = @_;
+ my $html = "";
+ my $in_code_block = 0;
+ my $in_list = 0;
+ my @lines = split /\n/, $content;
my @code_block_lines = ();
-
+
for my $line (@lines) {
if ($line =~ /^```/) {
if ($in_code_block) {
+
# End code block - check if it's a table
if (is_ascii_table(\@code_block_lines)) {
$html .= convert_ascii_table_to_html(\@code_block_lines);
- } else {
+ }
+ else {
$html .= "<pre>\n";
for my $code_line (@code_block_lines) {
$html .= encode_entities($code_line) . "\n";
@@ -811,136 +970,229 @@ package Foostats::Reporter {
$html .= "</pre>\n";
}
@code_block_lines = ();
- $in_code_block = 0;
- } else {
+ $in_code_block = 0;
+ }
+ else {
$in_code_block = 1;
}
next;
}
-
+
if ($in_code_block) {
push @code_block_lines, $line;
next;
}
+
- # Skip 365-day summary section header in HTML output
- if ($line =~ /^## 365-Day Summary Reports\s*$/) {
- next;
- }
# Check if we need to close a list
if ($in_list && $line !~ /^\* /) {
$html .= "</ul>\n";
$in_list = 0;
}
-
+
# Headers
if ($line =~ /^### (.*)/) {
$html .= "<h3>" . encode_entities($1) . "</h3>\n";
- } elsif ($line =~ /^## (.*)/) {
+ }
+ elsif ($line =~ /^## (.*)/) {
$html .= "<h2>" . encode_entities($1) . "</h2>\n";
- } elsif ($line =~ /^# (.*)/) {
+ }
+ elsif ($line =~ /^# (.*)/) {
$html .= "<h1>" . encode_entities($1) . "</h1>\n";
}
+
# Links
elsif ($line =~ /^=> (\S+)\s+(.*)/) {
my ($url, $text) = ($1, $2);
+
# Drop 365-day summary links from HTML output
if ($url =~ /(?:^|[\/.])365day_summary_\d{8}\.gmi$/) {
next;
}
+
# Convert .gmi links to .html
$url =~ s/\.gmi$/\.html/;
$html .= "<p><a href=\"" . encode_entities($url) . "\">" . encode_entities($text) . "</a></p>\n";
}
+
# Bullet points
elsif ($line =~ /^\* (.*)/) {
if (!$in_list) {
$html .= "<ul>\n";
$in_list = 1;
}
- $html .= "<li>" . encode_entities($1) . "</li>\n";
+ $html .= "<li>" . linkify_text($1) . "</li>\n";
}
+
# Empty line - skip to avoid excessive spacing
elsif ($line =~ /^\s*$/) {
+
# Skip empty lines for more compact output
}
+
# Regular text
else {
- $html .= "<p>" . encode_entities($line) . "</p>\n";
+ $html .= "<p>" . linkify_text($line) . "</p>\n";
}
}
-
+
# Close list if still open
if ($in_list) {
$html .= "</ul>\n";
}
-
+
return $html;
}
-
+
# Check if the lines form an ASCII table
+ # Sub: is_ascii_table
+ # - Purpose: Heuristically detect if a code block is an ASCII table.
+ # - Params: $lines (arrayref of strings).
+ # - Return: 1 if likely table; 0 otherwise.
sub is_ascii_table {
my ($lines) = @_;
- return 0 if @$lines < 3; # Need at least header, separator, and one data row
-
+ return 0 if @$lines < 3; # Need at least header, separator, and one data row
+
# Check for separator lines with dashes and pipes
for my $line (@$lines) {
return 1 if $line =~ /^\|?[\s\-]+\|/;
}
return 0;
}
-
+
# Convert ASCII table to HTML table
+ # Sub: convert_ascii_table_to_html
+ # - Purpose: Convert simple ASCII table lines to an HTML <table>.
+ # - Params: $lines (arrayref of strings).
+ # - Return: HTML string.
sub convert_ascii_table_to_html {
- my ($lines) = @_;
- my $html = "<table>\n";
+ my ($lines) = @_;
+ my $html = "<table>\n";
my $row_count = 0;
-
+
for my $line (@$lines) {
+
# Skip separator lines
next if $line =~ /^\|?[\s\-]+\|/ && $line =~ /\-/;
-
+
# Parse table row
my @cells = split /\s*\|\s*/, $line;
- @cells = grep { length($_) > 0 } @cells; # Remove empty cells
-
+ @cells = grep { length($_) > 0 } @cells; # Remove empty cells
+
if (@cells) {
$html .= "<tr>\n";
+
# First row is header
my $tag = ($row_count == 0) ? "th" : "td";
for my $cell (@cells) {
- $html .= " <$tag>" . encode_entities(trim($cell)) . "</$tag>\n";
+ my $val = trim($cell);
+ $html .= " <$tag>" . linkify_text($val) . "</$tag>\n";
}
$html .= "</tr>\n";
$row_count++;
}
}
-
+
$html .= "</table>\n";
return $html;
}
-
+
# Trim whitespace from string
+ # Sub: trim
+ # - Purpose: Strip leading/trailing whitespace.
+ # - Params: $str (str).
+ # - Return: trimmed string.
sub trim {
my ($str) = @_;
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return $str;
}
-
- # Encode HTML entities to prevent XSS
- sub encode_entities {
+
+ # Build an href for a token that looks like a URL or FQDN
+ # Sub: _guess_href
+ # - Purpose: Infer absolute href for a token (supports gemini for .gmi).
+ # - Params: $token (str) token from text.
+ # - Return: href string or undef.
+ sub _guess_href {
+ my ($token) = @_;
+ my $t = $token;
+ $t =~ s/^\s+//;
+ $t =~ s/\s+$//;
+
+ # Already absolute http(s)
+ return $t if $t =~ m{^https?://}i;
+
+ # Extract trailing punctuation to avoid including it in href
+ my $trail = '';
+ if ($t =~ s{([)\]\}.,;:!?]+)$}{}) { $trail = $1; }
+
+ # host[/path]
+ if ($t =~ m{^([A-Za-z0-9.-]+\.[A-Za-z]{2,})(/[^\s<]*)?$}) {
+ my ($host, $path) = ($1, $2 // '');
+ my $has_ellipsis = index($t, '...') != -1 || index(($path // ''), '...') != -1;
+ my $is_gemini = defined($path) && $path =~ /\.gmi(?:[?#].*)?$/i;
+ my $scheme = $is_gemini ? 'gemini' : 'https';
+
+ # If truncated, fall back to host root
+ my $href =
+ $has_ellipsis
+ ? sprintf('%s://%s/', $scheme, $host)
+ : sprintf('%s://%s%s', $scheme, $host, ($path eq '' ? '/' : $path));
+ return ($href . $trail);
+ }
+
+ return undef;
+ }
+
+ # Turn any URLs/FQDNs in the provided text into anchors
+ # Sub: linkify_text
+ # - Purpose: Replace URL/FQDN tokens in text with HTML anchors.
+ # - Params: $text (str) input text.
+ # - Return: HTML string with entities encoded.
+ sub linkify_text {
my ($text) = @_;
- $text =~ s/&/&amp;/g;
- $text =~ s/</&lt;/g;
- $text =~ s/>/&gt;/g;
- $text =~ s/"/&quot;/g;
- $text =~ s/'/&#39;/g;
- return $text;
+ return '' unless defined $text;
+
+ my $out = '';
+ my $pos = 0;
+ while ($text =~ m{((?:https?://)?[A-Za-z0-9.-]+\.[A-Za-z]{2,}(?:/[^\s<]*)?)}g) {
+ my $match = $1;
+ my $start = $-[1];
+ my $end = $+[1];
+
+ # Emit preceding text
+ $out .= encode_entities(substr($text, $pos, $start - $pos));
+
+ # Separate trailing punctuation from the match
+ my ($core, $trail) = ($match, '');
+ if ($core =~ s{([)\]\}.,;:!?]+)$}{}) { $trail = $1; }
+
+ my $href = _guess_href($core);
+ if ($href) {
+ $out .= sprintf('<a href="%s.html">%s</a>%s',
+ encode_entities($href), encode_entities($core), encode_entities($trail));
+ }
+ else {
+ # Not a linkable token after all
+ $out .= encode_entities($match);
+ }
+ $pos = $end;
+ }
+
+ # Remainder
+ $out .= encode_entities(substr($text, $pos));
+ return $out;
}
-
+
+ # Use HTML::Entities::encode_entities imported above
+
# Generate HTML wrapper
+ # Sub: generate_html_page
+ # - Purpose: Wrap content in a minimal HTML5 page with a title and CSS reset.
+ # - Params: $title (str), $content (str) HTML fragment.
+ # - Return: full HTML page string.
sub generate_html_page {
my ($title, $content) = @_;
return qq{<!DOCTYPE html>
@@ -1008,49 +1260,50 @@ $content
};
}
+ # Sub: report
+ # - Purpose: Generate daily .gmi and .html reports per date, then summaries and index.
+ # - Params: $stats_dir, $output_dir, $html_output_dir, %merged (date => stats).
+ # - Return: undef.
sub report {
- my ( $stats_dir, $output_dir, $html_output_dir, %merged ) = @_;
- for my $date ( sort { $b cmp $a } keys %merged ) {
+ my ($stats_dir, $output_dir, $html_output_dir, %merged) = @_;
+ for my $date (sort { $b cmp $a } keys %merged) {
my $stats = $merged{$date};
next unless $stats->{count};
- my ( $year, $month, $day ) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+ my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
# Check if .gmi file exists and its age based on date in filename
- my $report_path = "$output_dir/$date.gmi";
+ my $report_path = "$output_dir/$date.gmi";
my $html_report_path = "$output_dir/$date.html";
# Calculate age of the data based on date in filename
my $today = Time::Piece->new();
- my $file_date = Time::Piece->strptime( $date, '%Y%m%d' );
- my $age_days = ( $today - $file_date ) / ( 24 * 60 * 60 );
+ my $file_date = Time::Piece->strptime($date, '%Y%m%d');
+ my $age_days = ($today - $file_date) / (24 * 60 * 60);
- if ( -e $report_path && -e $html_report_path ) {
+ if (-e $report_path && -e $html_report_path) {
# Files exist
- if ( $age_days <= 3 ) {
+ if ($age_days <= 3) {
# Data is recent (within 3 days), regenerate it
- say
-"Regenerating daily report for $year-$month-$day (data age: "
- . sprintf( "%.1f", $age_days )
- . " days)";
+ say "Regenerating daily report for $year-$month-$day (data age: "
+ . sprintf("%.1f", $age_days)
+ . " days)";
}
else {
# Data is old (older than 3 days), skip if files exist
- say
-"Skipping daily report for $year-$month-$day (files exist, data age: "
- . sprintf( "%.1f", $age_days )
- . " days)";
+ say "Skipping daily report for $year-$month-$day (files exist, data age: "
+ . sprintf("%.1f", $age_days)
+ . " days)";
next;
}
}
else {
# File doesn't exist, generate it
- say
-"Generating new daily report for $year-$month-$day (file doesn't exist, data age: "
- . sprintf( "%.1f", $age_days )
- . " days)";
+ say "Generating new daily report for $year-$month-$day (file doesn't exist, data age: "
+ . sprintf("%.1f", $age_days)
+ . " days)";
}
my $report_content = "";
@@ -1060,27 +1313,23 @@ $content
# Feed counts first
$report_content .= "### Feed Statistics\n\n";
my @feed_rows;
- push @feed_rows, [ 'Total', $stats->{feed_ips}{'Total'} // 0 ];
- push @feed_rows,
- [ 'Gemini Gemfeed', $stats->{feed_ips}{'Gemini Gemfeed'} // 0 ];
- push @feed_rows,
- [ 'Gemini Atom', $stats->{feed_ips}{'Gemini Atom'} // 0 ];
- push @feed_rows,
- [ 'Web Gemfeed', $stats->{feed_ips}{'Web Gemfeed'} // 0 ];
- push @feed_rows,
- [ 'Web Atom', $stats->{feed_ips}{'Web Atom'} // 0 ];
+ push @feed_rows, [ 'Total', $stats->{feed_ips}{'Total'} // 0 ];
+ push @feed_rows, [ 'Gemini Gemfeed', $stats->{feed_ips}{'Gemini Gemfeed'} // 0 ];
+ push @feed_rows, [ 'Gemini Atom', $stats->{feed_ips}{'Gemini Atom'} // 0 ];
+ push @feed_rows, [ 'Web Gemfeed', $stats->{feed_ips}{'Web Gemfeed'} // 0 ];
+ push @feed_rows, [ 'Web Atom', $stats->{feed_ips}{'Web Atom'} // 0 ];
$report_content .= "```\n";
- $report_content .=
- format_table( [ 'Feed Type', 'Count' ], \@feed_rows );
+ $report_content .= format_table([ 'Feed Type', 'Count' ], \@feed_rows);
$report_content .= "\n```\n\n";
+
# Top 50 URLs next
$report_content .= "### Top 50 URLs\n\n";
my @url_rows;
my $urls = $stats->{page_ips}{urls};
my @sorted_urls =
- sort { ( $urls->{$b} // 0 ) <=> ( $urls->{$a} // 0 ) }
- keys %$urls;
- my $truncated = @sorted_urls > 50;
+ sort { ($urls->{$b} // 0) <=> ($urls->{$a} // 0) }
+ keys %$urls;
+ my $truncated = @sorted_urls > 50;
@sorted_urls = @sorted_urls[ 0 .. 49 ] if $truncated;
for my $url (@sorted_urls) {
@@ -1088,10 +1337,9 @@ $content
}
# Truncate URLs to fit within 100-character rows
- truncate_urls_for_table( \@url_rows, 'Unique Visitors' );
+ truncate_urls_for_table(\@url_rows, 'Unique Visitors');
$report_content .= "```\n";
- $report_content .=
- format_table( [ 'URL', 'Unique Visitors' ], \@url_rows );
+ $report_content .= format_table([ 'URL', 'Unique Visitors' ], \@url_rows);
$report_content .= "\n```\n";
if ($truncated) {
$report_content .= "\n... and more (truncated to 50 entries).\n";
@@ -1103,18 +1351,17 @@ $content
my @host_rows;
my $hosts = $stats->{page_ips}{hosts};
my @sorted_hosts =
- sort { ( $hosts->{$b} // 0 ) <=> ( $hosts->{$a} // 0 ) }
- keys %$hosts;
+ sort { ($hosts->{$b} // 0) <=> ($hosts->{$a} // 0) }
+ keys %$hosts;
- $truncated = @sorted_hosts > 50;
+ $truncated = @sorted_hosts > 50;
@sorted_hosts = @sorted_hosts[ 0 .. 49 ] if $truncated;
for my $host (@sorted_hosts) {
push @host_rows, [ $host, $hosts->{$host} // 0 ];
}
$report_content .= "```\n";
- $report_content .=
- format_table( [ 'Host', 'Unique Visitors' ], \@host_rows );
+ $report_content .= format_table([ 'Host', 'Unique Visitors' ], \@host_rows);
$report_content .= "\n```\n";
if ($truncated) {
$report_content .= "\n... and more (truncated to 50 entries).\n";
@@ -1124,22 +1371,22 @@ $content
# Summary last
$report_content .= "### Summary\n\n";
my $total_requests =
- ( $stats->{count}{gemini} // 0 ) + ( $stats->{count}{web} // 0 );
+ ($stats->{count}{gemini} // 0) + ($stats->{count}{web} // 0);
$report_content .= "* Total requests: $total_requests\n";
$report_content .=
- "* Filtered requests: " . ( $stats->{count}{filtered} // 0 ) . "\n";
+ "* Filtered requests: " . ($stats->{count}{filtered} // 0) . "\n";
$report_content .=
- "* Gemini requests: " . ( $stats->{count}{gemini} // 0 ) . "\n";
+ "* Gemini requests: " . ($stats->{count}{gemini} // 0) . "\n";
$report_content .=
- "* Web requests: " . ( $stats->{count}{web} // 0 ) . "\n";
+ "* Web requests: " . ($stats->{count}{web} // 0) . "\n";
$report_content .=
- "* IPv4 requests: " . ( $stats->{count}{IPv4} // 0 ) . "\n";
+ "* IPv4 requests: " . ($stats->{count}{IPv4} // 0) . "\n";
$report_content .=
- "* IPv6 requests: " . ( $stats->{count}{IPv6} // 0 ) . "\n\n";
+ "* IPv6 requests: " . ($stats->{count}{IPv6} // 0) . "\n\n";
# Add links to summary reports (only monthly)
$report_content .= "## Related Reports\n\n";
- my $now = localtime;
+ my $now = localtime;
my $current_date = $now->strftime('%Y%m%d');
$report_content .= "=> ./30day_summary_$current_date.gmi 30-Day Summary Report\n\n";
@@ -1148,29 +1395,33 @@ $content
# $report_path already defined above
say "Writing report to $report_path";
- FileHelper::write( $report_path, $report_content );
-
+ FileHelper::write($report_path, $report_content);
+
# Also write HTML version
mkdir $html_output_dir unless -d $html_output_dir;
- my $html_path = "$html_output_dir/$date.html";
+ my $html_path = "$html_output_dir/$date.html";
my $html_content = gemtext_to_html($report_content);
- my $html_page = generate_html_page("Stats for $year-$month-$day", $html_content);
+ my $html_page = generate_html_page("Stats for $year-$month-$day", $html_content);
say "Writing HTML report to $html_path";
- FileHelper::write( $html_path, $html_page );
+ FileHelper::write($html_path, $html_page);
}
# Generate summary reports
- generate_summary_report( 30, $stats_dir, $output_dir, $html_output_dir, %merged );
-
+ generate_summary_report(30, $stats_dir, $output_dir, $html_output_dir, %merged);
+
# Generate index.gmi and index.html
- generate_index( $output_dir, $html_output_dir );
+ generate_index($output_dir, $html_output_dir);
}
+ # Sub: generate_summary_report
+ # - Purpose: Generate N-day rolling summary in .gmi (+.html except 365-day).
+ # - Params: $days (int), $stats_dir, $output_dir, $html_output_dir, %merged.
+ # - Return: undef.
sub generate_summary_report {
- my ( $days, $stats_dir, $output_dir, $html_output_dir, %merged ) = @_;
+ my ($days, $stats_dir, $output_dir, $html_output_dir, %merged) = @_;
# Get the last N days of dates
- my @dates = sort { $b cmp $a } keys %merged;
+ my @dates = sort { $b cmp $a } keys %merged;
my $max_index = $days - 1;
@dates = @dates[ 0 .. $max_index ] if @dates > $days;
@@ -1179,16 +1430,16 @@ $content
# Build report content
my $report_content = build_report_header($today, $days);
+
# Order: feed counts -> Top URLs -> daily top 3 for last 30 days -> other tables
- $report_content .= build_feed_statistics_section( \@dates, \%merged );
+ $report_content .= build_feed_statistics_section(\@dates, \%merged);
# Aggregate and add top lists
- my ( $all_hosts, $all_urls ) =
- aggregate_hosts_and_urls( \@dates, \%merged );
+ my ($all_hosts, $all_urls) = aggregate_hosts_and_urls(\@dates, \%merged);
$report_content .= build_top_urls_section($all_urls, $days);
$report_content .= build_top3_urls_last_n_days_per_day($stats_dir, 30, \%merged);
$report_content .= build_top_hosts_section($all_hosts, $days);
- $report_content .= build_daily_summary_section( \@dates, \%merged );
+ $report_content .= build_daily_summary_section(\@dates, \%merged);
# Add links to other summary reports
$report_content .= build_summary_links($days, $report_date);
@@ -1198,99 +1449,111 @@ $content
my $report_path = "$output_dir/${days}day_summary_$report_date.gmi";
say "Writing $days-day summary report to $report_path";
- FileHelper::write( $report_path, $report_content );
-
+ FileHelper::write($report_path, $report_content);
+
# Also write HTML version, except for 365-day summaries (HTML suppressed)
if ($days != 365) {
mkdir $html_output_dir unless -d $html_output_dir;
- my $html_path = "$html_output_dir/${days}day_summary_$report_date.html";
+ my $html_path = "$html_output_dir/${days}day_summary_$report_date.html";
my $html_content = gemtext_to_html($report_content);
- my $html_page = generate_html_page("$days-Day Summary Report", $html_content);
+ my $html_page = generate_html_page("$days-Day Summary Report", $html_content);
say "Writing HTML $days-day summary report to $html_path";
- FileHelper::write( $html_path, $html_page );
- } else {
+ FileHelper::write($html_path, $html_page);
+ }
+ else {
say "Skipping HTML generation for 365-day summary (Gemtext only)";
}
}
+ # Sub: build_report_header
+ # - Purpose: Header section for summary reports.
+ # - Params: $today (Time::Piece), $days (int default 30).
+ # - Return: gemtext string.
sub build_report_header {
my ($today, $days) = @_;
- $days //= 30; # Default to 30 days for backward compatibility
+ $days //= 30; # Default to 30 days for backward compatibility
my $content = "# $days-Day Summary Report\n\n";
$content .= "Generated on " . $today->strftime('%Y-%m-%d') . "\n\n";
return $content;
}
+ # Sub: build_daily_summary_section
+ # - Purpose: Table of daily total counts over a period.
+ # - Params: $dates (arrayref YYYYMMDD), $merged (hashref date=>stats).
+ # - Return: gemtext string.
sub build_daily_summary_section {
- my ( $dates, $merged ) = @_;
+ my ($dates, $merged) = @_;
my $content = "## Daily Summary Evolution (Last 30 Days)\n\n";
$content .= "### Total Requests by Day\n\n```\n";
my @summary_rows;
- for my $date ( reverse @$dates ) {
+ for my $date (reverse @$dates) {
my $stats = $merged->{$date};
next unless $stats->{count};
- push @summary_rows, build_daily_summary_row( $date, $stats );
+ push @summary_rows, build_daily_summary_row($date, $stats);
}
- $content .= format_table(
- [ 'Date', 'Filtered', 'Gemini', 'Web', 'IPv4', 'IPv6', 'Total' ],
- \@summary_rows );
+ $content .= format_table([ 'Date', 'Filtered', 'Gemini', 'Web', 'IPv4', 'IPv6', 'Total' ], \@summary_rows);
$content .= "\n```\n\n";
return $content;
}
+ # Sub: build_daily_summary_row
+ # - Purpose: Build one table row with counts for a date.
+ # - Params: $date (YYYYMMDD), $stats (hashref).
+ # - Return: arrayref of cell strings.
sub build_daily_summary_row {
- my ( $date, $stats ) = @_;
+ my ($date, $stats) = @_;
- my ( $year, $month, $day ) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+ my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
my $formatted_date = "$year-$month-$day";
my $total_requests =
- ( $stats->{count}{gemini} // 0 ) + ( $stats->{count}{web} // 0 );
+ ($stats->{count}{gemini} // 0) + ($stats->{count}{web} // 0);
my $filtered = $stats->{count}{filtered} // 0;
my $gemini = $stats->{count}{gemini} // 0;
my $web = $stats->{count}{web} // 0;
my $ipv4 = $stats->{count}{IPv4} // 0;
my $ipv6 = $stats->{count}{IPv6} // 0;
- return [
- $formatted_date, $filtered,
- $gemini, $web, $ipv4,
- $ipv6, $total_requests
- ];
+ return [ $formatted_date, $filtered, $gemini, $web, $ipv4, $ipv6, $total_requests ];
}
+ # Sub: build_feed_statistics_section
+ # - Purpose: Table of feed unique counts by day over a period.
+ # - Params: $dates (arrayref), $merged (hashref).
+ # - Return: gemtext string.
sub build_feed_statistics_section {
- my ( $dates, $merged ) = @_;
+ my ($dates, $merged) = @_;
my $content = "### Feed Statistics Evolution\n\n```\n";
my @feed_rows;
- for my $date ( reverse @$dates ) {
+ for my $date (reverse @$dates) {
my $stats = $merged->{$date};
next unless $stats->{feed_ips};
- push @feed_rows, build_feed_statistics_row( $date, $stats );
+ push @feed_rows, build_feed_statistics_row($date, $stats);
}
- $content .= format_table(
- [ 'Date', 'Gem Feed', 'Gem Atom', 'Web Feed', 'Web Atom', 'Total' ],
- \@feed_rows
- );
+ $content .= format_table([ 'Date', 'Gem Feed', 'Gem Atom', 'Web Feed', 'Web Atom', 'Total' ], \@feed_rows);
$content .= "\n```\n\n";
return $content;
}
+ # Sub: build_feed_statistics_row
+ # - Purpose: Build one row of feed unique counts for a date.
+ # - Params: $date (YYYYMMDD), $stats (hashref).
+ # - Return: arrayref of cell strings.
sub build_feed_statistics_row {
- my ( $date, $stats ) = @_;
+ my ($date, $stats) = @_;
- my ( $year, $month, $day ) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+ my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
my $formatted_date = "$year-$month-$day";
return [
@@ -1303,8 +1566,12 @@ $content
];
}
+ # Sub: aggregate_hosts_and_urls
+ # - Purpose: Sum hosts and URLs across multiple days.
+ # - Params: $dates (arrayref), $merged (hashref).
+ # - Return: (\%all_hosts, \%all_urls).
sub aggregate_hosts_and_urls {
- my ( $dates, $merged ) = @_;
+ my ($dates, $merged) = @_;
my %all_hosts;
my %all_urls;
@@ -1314,22 +1581,25 @@ $content
next unless $stats->{page_ips};
# Aggregate hosts
- while ( my ( $host, $count ) = each %{ $stats->{page_ips}{hosts} } )
- {
+ while (my ($host, $count) = each %{ $stats->{page_ips}{hosts} }) {
$all_hosts{$host} //= 0;
$all_hosts{$host} += $count;
}
# Aggregate URLs
- while ( my ( $url, $count ) = each %{ $stats->{page_ips}{urls} } ) {
+ while (my ($url, $count) = each %{ $stats->{page_ips}{urls} }) {
$all_urls{$url} //= 0;
$all_urls{$url} += $count;
}
}
- return ( \%all_hosts, \%all_urls );
+ return (\%all_hosts, \%all_urls);
}
+ # Sub: build_top_hosts_section
+ # - Purpose: Build Top-50 hosts table for the aggregated period.
+ # - Params: $all_hosts (hashref), $days (int default 30).
+ # - Return: gemtext string.
sub build_top_hosts_section {
my ($all_hosts, $days) = @_;
$days //= 30;
@@ -1338,19 +1608,23 @@ $content
my @host_rows;
my @sorted_hosts =
- sort { $all_hosts->{$b} <=> $all_hosts->{$a} } keys %$all_hosts;
+ sort { $all_hosts->{$b} <=> $all_hosts->{$a} } keys %$all_hosts;
@sorted_hosts = @sorted_hosts[ 0 .. 49 ] if @sorted_hosts > 50;
for my $host (@sorted_hosts) {
push @host_rows, [ $host, $all_hosts->{$host} ];
}
- $content .= format_table( [ 'Host', 'Visitors' ], \@host_rows );
+ $content .= format_table([ 'Host', 'Visitors' ], \@host_rows);
$content .= "\n```\n\n";
return $content;
}
+ # Sub: build_top_urls_section
+ # - Purpose: Build Top-50 URLs table for the aggregated period (with truncation).
+ # - Params: $all_urls (hashref), $days (int default 30).
+ # - Return: gemtext string.
sub build_top_urls_section {
my ($all_urls, $days) = @_;
$days //= 30;
@@ -1359,7 +1633,7 @@ $content
my @url_rows;
my @sorted_urls =
- sort { $all_urls->{$b} <=> $all_urls->{$a} } keys %$all_urls;
+ sort { $all_urls->{$b} <=> $all_urls->{$a} } keys %$all_urls;
@sorted_urls = @sorted_urls[ 0 .. 49 ] if @sorted_urls > 50;
for my $url (@sorted_urls) {
@@ -1367,18 +1641,23 @@ $content
}
# Truncate URLs to fit within 100-character rows
- truncate_urls_for_table( \@url_rows, 'Visitors' );
+ truncate_urls_for_table(\@url_rows, 'Visitors');
- $content .= format_table( [ 'URL', 'Visitors' ], \@url_rows );
+ $content .= format_table([ 'URL', 'Visitors' ], \@url_rows);
$content .= "\n```\n\n";
return $content;
}
+ # Sub: build_summary_links
+ # - Purpose: Links to other summary reports (30-day when not already on it).
+ # - Params: $current_days (int), $report_date (YYYYMMDD).
+ # - Return: gemtext string.
sub build_summary_links {
- my ( $current_days, $report_date ) = @_;
+ my ($current_days, $report_date) = @_;
my $content = '';
+
# Only add link to 30-day summary when not on the 30-day report itself
if ($current_days != 30) {
$content .= "## Other Summary Reports\n\n";
@@ -1388,17 +1667,22 @@ $content
return $content;
}
-sub build_top3_urls_last_n_days_per_day {
- my ($stats_dir, $days, $merged) = @_;
- $days //= 30;
- my $content = "## Top 5 URLs Per Day (Last ${days} Days)\n\n";
+ # Sub: build_top3_urls_last_n_days_per_day
+ # - Purpose: For each of last N days, render the top URLs table.
+ # - Params: $stats_dir (str), $days (int default 30), $merged (hashref).
+ # - Return: gemtext string.
+ sub build_top3_urls_last_n_days_per_day {
+ my ($stats_dir, $days, $merged) = @_;
+ $days //= 30;
+ my $content = "## Top 5 URLs Per Day (Last ${days} Days)\n\n";
- my @all = DateHelper::last_month_dates();
- my @dates = @all;
- @dates = @all[0 .. $days-1] if @all > $days;
- return $content . "(no data)\n\n" unless @dates;
+ my @all = DateHelper::last_month_dates();
+ my @dates = @all;
+ @dates = @all[ 0 .. $days - 1 ] if @all > $days;
+ return $content . "(no data)\n\n" unless @dates;
for my $date (@dates) {
+
# Prefer in-memory merged stats if available; otherwise merge from disk
my $stats = $merged->{$date};
if (!$stats || !($stats->{page_ips} && $stats->{page_ips}{urls})) {
@@ -1406,24 +1690,28 @@ sub build_top3_urls_last_n_days_per_day {
}
next unless $stats && $stats->{page_ips} && $stats->{page_ips}{urls};
- my ($y,$m,$d) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+ my ($y, $m, $d) = $date =~ /(\d{4})(\d{2})(\d{2})/;
$content .= "### $y-$m-$d\n\n";
- my $urls = $stats->{page_ips}{urls};
- my @sorted = sort { ($urls->{$b}//0) <=> ($urls->{$a}//0) } keys %$urls;
+ my $urls = $stats->{page_ips}{urls};
+ my @sorted = sort { ($urls->{$b} // 0) <=> ($urls->{$a} // 0) } keys %$urls;
next unless @sorted;
my $limit = @sorted < 5 ? @sorted : 5;
- @sorted = @sorted[0..$limit-1];
+ @sorted = @sorted[ 0 .. $limit - 1 ];
my @rows;
for my $u (@sorted) { push @rows, [ $u, $urls->{$u} // 0 ]; }
- truncate_urls_for_table( \@rows, 'Visitors' );
+ truncate_urls_for_table(\@rows, 'Visitors');
$content .= "```\n" . format_table([ 'URL', 'Visitors' ], \@rows) . "\n```\n\n";
+ }
+
+ return $content;
}
- return $content;
-}
-
+ # Sub: generate_index
+ # - Purpose: Create index.gmi/.html using the latest 30-day summary as content.
+ # - Params: $output_dir (str), $html_output_dir (str).
+ # - Return: undef.
sub generate_index {
my ($output_dir, $html_output_dir) = @_;
@@ -1433,13 +1721,14 @@ sub build_top3_urls_last_n_days_per_day {
closedir($dh);
my @summaries_30day = sort { $b cmp $a } grep { /^30day_summary_/ } @gmi_files;
- my $latest_30 = $summaries_30day[0];
+ my $latest_30 = $summaries_30day[0];
my $index_path = "$output_dir/index.gmi";
mkdir $html_output_dir unless -d $html_output_dir;
my $html_path = "$html_output_dir/index.html";
if ($latest_30) {
+
# Read 30-day summary content and use it as index
my $summary_path = "$output_dir/$latest_30";
open my $sfh, '<', $summary_path or die "$summary_path: $!";
@@ -1460,9 +1749,10 @@ sub build_top3_urls_last_n_days_per_day {
close $hh;
say "Writing HTML index to $html_path (copy of $latest_html)";
FileHelper::write($html_path, $html_page);
- } else {
+ }
+ else {
my $html_content = gemtext_to_html($content);
- my $html_page = generate_html_page("30-Day Summary Report", $html_content);
+ my $html_page = generate_html_page("30-Day Summary Report", $html_content);
say "Writing HTML index to $html_path (from gemtext)";
FileHelper::write($html_path, $html_page);
}
@@ -1475,18 +1765,24 @@ sub build_top3_urls_last_n_days_per_day {
FileHelper::write($index_path, $fallback);
my $html_content = gemtext_to_html($fallback);
- my $html_page = generate_html_page("Foostats Reports Index", $html_content);
+ my $html_page = generate_html_page("Foostats Reports Index", $html_content);
say "Writing fallback HTML index to $html_path";
FileHelper::write($html_path, $html_page);
}
}
-package main {
- use Getopt::Long;
- use Sys::Hostname;
-
- sub usage {
- print <<~"USAGE";
+package main;
+# Package: main — CLI entrypoint and orchestration
+# - Purpose: Parse options and invoke parse/replicate/report flows.
+use Getopt::Long;
+use Sys::Hostname;
+
+# Sub: usage
+# - Purpose: Print usage and exit 0.
+# - Params: none.
+# - Return: never (exits).
+sub usage {
+ print <<~"USAGE";
Usage: $0 [options]
Options:
@@ -1509,47 +1805,56 @@ package main {
--version Show version information.
--help Show this help message.
USAGE
- exit 0;
- }
+ exit 0;
+}
- sub parse_logs ( $stats_dir, $odds_file, $odds_log ) {
- my $out = Foostats::FileOutputter->new( stats_dir => $stats_dir );
+# Sub: parse_logs
+# - Purpose: Parse logs and persist aggregated stats files under $stats_dir.
+# - Params: $stats_dir (str), $odds_file (str), $odds_log (str).
+# - Return: undef.
+sub parse_logs ($stats_dir, $odds_file, $odds_log) {
+ my $out = Foostats::FileOutputter->new(stats_dir => $stats_dir);
- $out->{stats} = Foostats::Logreader::parse_logs(
- $out->last_processed_date('web'),
- $out->last_processed_date('gemini'),
- $odds_file, $odds_log
- );
+ $out->{stats} = Foostats::Logreader::parse_logs(
+ $out->last_processed_date('web'),
+ $out->last_processed_date('gemini'),
+ $odds_file, $odds_log
+ );
- $out->write;
- }
+ $out->write;
+}
- my ( $parse_logs, $replicate, $report, $all, $help, $version );
+# Sub: foostats_main
+# - Purpose: Option parsing and execution of requested actions.
+# - Params: none (reads @ARGV).
+# - Return: exit code via program termination.
+sub foostats_main {
+ my ($parse_logs, $replicate, $report, $all, $help, $version);
# With default values
my $stats_dir = '/var/www/htdocs/buetow.org/self/foostats';
my $odds_file = $stats_dir . '/fooodds.txt';
my $odds_log = '/var/log/fooodds';
- my $output_dir; # Will default to $stats_dir/gemtext if not specified
- my $html_output_dir; # Will default to /var/www/htdocs/gemtexter/stats.foo.zone if not specified
+ my $output_dir; # Will default to $stats_dir/gemtext if not specified
+ my $html_output_dir; # Will default to /var/www/htdocs/gemtexter/stats.foo.zone if not specified
my $partner_node =
- hostname eq 'fishfinger.buetow.org'
- ? 'blowfish.buetow.org'
- : 'fishfinger.buetow.org';
+ hostname eq 'fishfinger.buetow.org'
+ ? 'blowfish.buetow.org'
+ : 'fishfinger.buetow.org';
GetOptions
- 'parse-logs!' => \$parse_logs,
- 'filter-log=s' => \$odds_log,
- 'odds-file=s' => \$odds_file,
- 'replicate!' => \$replicate,
- 'report!' => \$report,
- 'all!' => \$all,
- 'stats-dir=s' => \$stats_dir,
- 'output-dir=s' => \$output_dir,
- 'html-output-dir=s' => \$html_output_dir,
- 'partner-node=s' => \$partner_node,
- 'version' => \$version,
- 'help|?' => \$help;
+ 'parse-logs!' => \$parse_logs,
+ 'filter-log=s' => \$odds_log,
+ 'odds-file=s' => \$odds_file,
+ 'replicate!' => \$replicate,
+ 'report!' => \$report,
+ 'all!' => \$all,
+ 'stats-dir=s' => \$stats_dir,
+ 'output-dir=s' => \$output_dir,
+ 'html-output-dir=s' => \$html_output_dir,
+ 'partner-node=s' => \$partner_node,
+ 'version' => \$version,
+ 'help|?' => \$help;
if ($version) {
print "foostats " . VERSION . "\n";
@@ -1558,20 +1863,22 @@ package main {
usage() if $help;
- parse_logs( $stats_dir, $odds_file, $odds_log )
- if $parse_logs
- or $all;
+ parse_logs($stats_dir, $odds_file, $odds_log)
+ if $parse_logs
+ or $all;
- Foostats::Replicator::replicate( $stats_dir, $partner_node )
- if $replicate
- or $all;
+ Foostats::Replicator::replicate($stats_dir, $partner_node)
+ if $replicate
+ or $all;
# Set default output directories if not specified
- $output_dir //= '/var/gemini/stats.foo.zone';
+ $output_dir //= '/var/gemini/stats.foo.zone';
$html_output_dir //= '/var/www/htdocs/gemtexter/stats.foo.zone';
-
- Foostats::Reporter::report( $stats_dir, $output_dir, $html_output_dir,
- Foostats::Merger::merge($stats_dir) )
- if $report
- or $all;
+
+ Foostats::Reporter::report($stats_dir, $output_dir, $html_output_dir, Foostats::Merger::merge($stats_dir))
+ if $report
+ or $all;
}
+
+# Only run main flow when executed as a script, not when required (e.g., tests)
+foostats_main() unless caller;