summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--foostats.pl914
-rw-r--r--t/tmp_filter_log3
2 files changed, 489 insertions, 428 deletions
diff --git a/foostats.pl b/foostats.pl
index 4e32a9f..2b2b2fc 100644
--- a/foostats.pl
+++ b/foostats.pl
@@ -27,34 +27,34 @@ use constant VERSION => 'v0.1.0';
package FileHelper {
use JSON;
- sub write ( $path, $content ) {
+ sub write ($path, $content) {
open my $fh, '>', "$path.tmp"
- or die "\nCannot open file: $!";
+ or die "\nCannot open file: $!";
print $fh $content;
close $fh;
rename
- "$path.tmp",
- $path;
+ "$path.tmp",
+ $path;
}
- sub write_json_gz ( $path, $data ) {
+ sub write_json_gz ($path, $data) {
my $json = encode_json $data;
say "Writing $path";
open my $fd, '>:gzip', "$path.tmp"
- or die "$path.tmp: $!";
+ or die "$path.tmp: $!";
print $fd $json;
close $fd;
rename "$path.tmp", $path
- or die "$path.tmp: $!";
+ or die "$path.tmp: $!";
}
sub read_json_gz ($path) {
say "Reading $path";
open my $fd, '<:gzip', $path
- or die "$path: $!";
+ or die "$path: $!";
my $json = decode_json <$fd>;
close $fd;
return $json;
@@ -62,9 +62,9 @@ package FileHelper {
sub read_lines ($path) {
my @lines;
- open( my $fh, '<', $path )
- or die "$path: $!";
- chomp( @lines = <$fh> );
+ open(my $fh, '<', $path)
+ or die "$path: $!";
+ chomp(@lines = <$fh>);
close($fh);
return @lines;
}
@@ -77,18 +77,18 @@ package DateHelper {
my $today = localtime;
my @dates;
- for my $days_ago ( 0 .. 30 ) {
- my $date = $today - ( $days_ago * 24 * 60 * 60 );
+ for my $days_ago (0 .. 30) {
+ my $date = $today - ($days_ago * 24 * 60 * 60);
push
- @dates,
- $date->strftime('%Y%m%d');
+ @dates,
+ $date->strftime('%Y%m%d');
}
return @dates;
}
sub last_n_months_day_dates ($months) {
- my $today = localtime;
+ my $today = localtime;
my $start_year = $today->year;
my $start_month = $today->mon - $months;
while ($start_month <= 0) { $start_month += 12; $start_year--; }
@@ -98,7 +98,7 @@ package DateHelper {
my $t = $start;
while ($t <= $today) {
push @dates, $t->strftime('%Y%m%d');
- $t += 24 * 60 * 60; # one day
+ $t += 24 * 60 * 60; # one day
}
return @dates;
}
@@ -117,34 +117,34 @@ package Foostats::Logreader {
sub anonymize_ip ($ip) {
my $ip_proto =
- contains( $ip, ':' )
- ? 'IPv6'
- : 'IPv4';
+ contains($ip, ':')
+ ? 'IPv6'
+ : 'IPv4';
my $ip_hash = sha3_512_base64 $ip;
- return ( $ip_hash, $ip_proto );
+ return ($ip_hash, $ip_proto);
}
- sub read_lines ( $glob, $cb ) {
+ sub read_lines ($glob, $cb) {
my sub year ($path) {
- localtime( ( stat $path )->mtime )->strftime('%Y');
+ localtime((stat $path)->mtime)->strftime('%Y');
}
my sub open_file ($path) {
my $flag =
- $path =~ /\.gz$/
- ? '<:gzip'
- : '<';
+ $path =~ /\.gz$/
+ ? '<:gzip'
+ : '<';
open my $fd, $flag, $path
- or die "$path: $!";
+ or die "$path: $!";
return $fd;
}
my $last = false;
- say 'File path glob matches: ' . join( ' ', glob $glob );
+ say 'File path glob matches: ' . join(' ', glob $glob);
- LAST:
- for my $path ( sort { -M $a <=> -M $b } glob $glob ) {
+ LAST:
+ for my $path (sort { -M $a <=> -M $b } glob $glob) {
say "Processing $path";
my $file = open_file $path;
@@ -152,37 +152,37 @@ package Foostats::Logreader {
while (<$file>) {
next
- if contains( $_, 'logfile turned over' );
+ if contains($_, 'logfile turned over');
# last == true means: After this file, don't process more
$last = true
- unless defined $cb->( $year, split / +/ );
+ unless defined $cb->($year, split / +/);
}
say "Closing $path (last:$last)";
close $file;
last LAST
- if $last;
+ if $last;
}
}
- sub parse_web_logs ( $last_processed_date, $cb ) {
+ sub parse_web_logs ($last_processed_date, $cb) {
my sub parse_date ($date) {
- my $t = Time::Piece->strptime( $date, '[%d/%b/%Y:%H:%M:%S' );
- return ( $t->strftime('%Y%m%d'), $t->strftime('%H%M%S') );
+ my $t = Time::Piece->strptime($date, '[%d/%b/%Y:%H:%M:%S');
+ return ($t->strftime('%Y%m%d'), $t->strftime('%H%M%S'));
}
my sub parse_web_line (@line) {
- my ( $date, $time ) = parse_date $line [4];
+ my ($date, $time) = parse_date $line [4];
return undef
- if $date < $last_processed_date;
+ if $date < $last_processed_date;
# X-Forwarded-For?
my $ip =
- $line[-2] eq '-'
- ? $line[1]
- : $line[-2];
- my ( $ip_hash, $ip_proto ) = anonymize_ip $ip;
+ $line[-2] eq '-'
+ ? $line[1]
+ : $line[-2];
+ my ($ip_hash, $ip_proto) = anonymize_ip $ip;
return {
proto => 'web',
@@ -196,42 +196,41 @@ package Foostats::Logreader {
};
}
- read_lines web_logs_glob(), sub ( $year, @line ) {
- $cb->( parse_web_line @line );
+ read_lines web_logs_glob(), sub ($year, @line) {
+ $cb->(parse_web_line @line);
};
}
- sub parse_gemini_logs ( $last_processed_date, $cb ) {
- my sub parse_date ( $year, @line ) {
+ sub parse_gemini_logs ($last_processed_date, $cb) {
+ my sub parse_date ($year, @line) {
my $timestr = "$line[0] $line[1]";
- return Time::Piece->strptime( $timestr, '%b %d' )
- ->strftime("$year%m%d");
+ return Time::Piece->strptime($timestr, '%b %d')->strftime("$year%m%d");
}
- my sub parse_vger_line ( $year, @line ) {
+ my sub parse_vger_line ($year, @line) {
my $full_path = $line[5];
$full_path =~ s/"//g;
- my ( $proto, undef, $host, $uri_path ) =
- split '/',
- $full_path,
- 4;
+ my ($proto, undef, $host, $uri_path) =
+ split '/',
+ $full_path,
+ 4;
$uri_path = ''
- unless defined $uri_path;
+ unless defined $uri_path;
return {
proto => 'gemini',
host => $host,
uri_path => "/$uri_path",
status => $line[6],
- date => int( parse_date( $year, @line ) ),
+ date => int(parse_date($year, @line)),
time => $line[2],
};
}
- my sub parse_relayd_line ( $year, @line ) {
- my $date = int( parse_date( $year, @line ) );
+ my sub parse_relayd_line ($year, @line) {
+ my $date = int(parse_date($year, @line));
- my ( $ip_hash, $ip_proto ) = anonymize_ip $line [12];
+ my ($ip_hash, $ip_proto) = anonymize_ip $line [12];
return {
ip_hash => $ip_hash,
ip_proto => $ip_proto,
@@ -240,26 +239,26 @@ package Foostats::Logreader {
};
}
- # Expect one vger and one relayd log line per event! So collect
- # both events (one from one log line each) and then merge the result hash!
- my ( $vger, $relayd );
- read_lines gemini_logs_glob(), sub ( $year, @line ) {
- if ( $line[4] eq 'vger:' ) {
+ # Expect one vger and one relayd log line per event! So collect
+ # both events (one from one log line each) and then merge the result hash!
+ my ($vger, $relayd);
+ read_lines gemini_logs_glob(), sub ($year, @line) {
+ if ($line[4] eq 'vger:') {
$vger = parse_vger_line $year, @line;
}
- elsif ( $line[5] eq 'relay'
- and startswith( $line[6], 'gemini' ) )
+ elsif ($line[5] eq 'relay'
+ and startswith($line[6], 'gemini'))
{
$relayd = parse_relayd_line $year, @line;
return undef
- if $relayd->{date} < $last_processed_date;
+ if $relayd->{date} < $last_processed_date;
}
if ( defined $vger
and defined $relayd
- and $vger->{time} eq $relayd->{time} )
+ and $vger->{time} eq $relayd->{time})
{
- $cb->( { %$vger, %$relayd } );
+ $cb->({ %$vger, %$relayd });
$vger = $relayd = undef;
}
@@ -267,9 +266,8 @@ package Foostats::Logreader {
};
}
- sub parse_logs ( $last_web_date, $last_gemini_date, $odds_file, $odds_log )
- {
- my $agg = Foostats::Aggregator->new( $odds_file, $odds_log );
+ sub parse_logs ($last_web_date, $last_gemini_date, $odds_file, $odds_log) {
+ my $agg = Foostats::Aggregator->new($odds_file, $odds_log);
say "Last web date: $last_web_date";
say "Last gemini date: $last_gemini_date";
@@ -289,26 +287,26 @@ package Foostats::Logreader {
package Foostats::Filter {
use String::Util qw(contains startswith endswith);
- sub new ( $class, $odds_file, $log_path ) {
+ sub new ($class, $odds_file, $log_path) {
say "Logging filter to $log_path";
my @odds = FileHelper::read_lines($odds_file);
bless {
odds => \@odds,
log_path => $log_path
- },
- $class;
+ },
+ $class;
}
- sub ok ( $self, $event ) {
+ sub ok ($self, $event) {
state %blocked = ();
return false
- if exists $blocked{ $event->{ip_hash} };
+ if exists $blocked{ $event->{ip_hash} };
if ( $self->odd($event)
- or $self->excessive($event) )
+ or $self->excessive($event))
{
- ( $blocked{ $event->{ip_hash} } //= 0 )++;
+ ($blocked{ $event->{ip_hash} } //= 0)++;
return false;
}
else {
@@ -316,54 +314,52 @@ package Foostats::Filter {
}
}
- sub odd ( $self, $event ) {
+ sub odd ($self, $event) {
\my $uri_path = \$event->{uri_path};
- for ( $self->{odds}->@* ) {
+ for ($self->{odds}->@*) {
next if !defined $_ || $_ eq '' || /^\s*#/;
next
- unless contains( $uri_path, $_ );
+ unless contains($uri_path, $_);
- $self->log( 'WARN', $uri_path,
- "contains $_ and is odd and will therefore be blocked!" );
+ $self->log('WARN', $uri_path, "contains $_ and is odd and will therefore be blocked!");
return true;
}
- $self->log( 'OK', $uri_path, "appears fine..." );
+ $self->log('OK', $uri_path, "appears fine...");
return false;
}
- sub log ( $self, $severity, $subject, $message ) {
+ sub log ($self, $severity, $subject, $message) {
state %dedup;
# Don't log if path was already logged
return
- if exists $dedup{$subject};
+ if exists $dedup{$subject};
$dedup{$subject} = 1;
- open( my $fh, '>>', $self->{log_path} )
- or die $self->{log_path} . ": $!";
+ open(my $fh, '>>', $self->{log_path})
+ or die $self->{log_path} . ": $!";
print $fh "$severity: $subject $message\n";
close($fh);
}
- sub excessive ( $self, $event ) {
+ sub excessive ($self, $event) {
\my $time = \$event->{time};
\my $ip_hash = \$event->{ip_hash};
state $last_time = $time; # Time with second: 'HH:MM:SS'
state %count = (); # IPs accessing within the same second!
- if ( $last_time ne $time ) {
+ if ($last_time ne $time) {
$last_time = $time;
%count = ();
return false;
}
# IP requested site more than once within the same second!?
- if ( 1 < ++( $count{$ip_hash} //= 0 ) ) {
- $self->log( 'WARN', $ip_hash,
- "blocked due to excessive requesting..." );
+ if (1 < ++($count{$ip_hash} //= 0)) {
+ $self->log('WARN', $ip_hash, "blocked due to excessive requesting...");
return true;
}
@@ -380,17 +376,17 @@ package Foostats::Aggregator {
GEMFEED_URI_2 => '/gemfeed/',
};
- sub new ( $class, $odds_file, $odds_log ) {
+ sub new ($class, $odds_file, $odds_log) {
bless {
- filter => Foostats::Filter->new( $odds_file, $odds_log ),
+ filter => Foostats::Filter->new($odds_file, $odds_log),
stats => {}
- },
- $class;
+ },
+ $class;
}
- sub add ( $self, $event ) {
+ sub add ($self, $event) {
return undef
- unless defined $event;
+ unless defined $event;
my $date = $event->{date};
my $date_key = $event->{proto} . "_$date";
@@ -400,9 +396,7 @@ package Foostats::Aggregator {
# - feed_ips: unique IPs per feed type (atom_feed, gemfeed)
# - page_ips: unique IPs per host and per URL
$self->{stats}{$date_key} //= {
- count => {
- filtered => 0,
- },
+ count => { filtered => 0, },
feed_ips => {
atom_feed => {},
gemfeed => {},
@@ -414,56 +408,56 @@ package Foostats::Aggregator {
};
\my $s = \$self->{stats}{$date_key};
- unless ( $self->{filter}->ok($event) ) {
+ unless ($self->{filter}->ok($event)) {
$s->{count}{filtered}++;
return $event;
}
- $self->add_count( $s, $event );
- $self->add_page_ips( $s, $event )
- unless $self->add_feed_ips( $s, $event );
+ $self->add_count($s, $event);
+ $self->add_page_ips($s, $event)
+ unless $self->add_feed_ips($s, $event);
return $event;
}
- sub add_count ( $self, $stats, $event ) {
+ sub add_count ($self, $stats, $event) {
\my $c = \$stats->{count};
\my $e = \$event;
- ( $c->{ $e->{proto} } //= 0 )++;
- ( $c->{ $e->{ip_proto} } //= 0 )++;
+ ($c->{ $e->{proto} } //= 0)++;
+ ($c->{ $e->{ip_proto} } //= 0)++;
}
- sub add_feed_ips ( $self, $stats, $event ) {
+ sub add_feed_ips ($self, $stats, $event) {
\my $f = \$stats->{feed_ips};
\my $e = \$event;
# Atom feed (exact path match, allow optional query string)
- if ( $e->{uri_path} =~ m{^/gemfeed/atom\.xml(?:[?#].*)?$} ) {
- ( $f->{atom_feed}->{ $e->{ip_hash} } //= 0 )++;
+ if ($e->{uri_path} =~ m{^/gemfeed/atom\.xml(?:[?#].*)?$}) {
+ ($f->{atom_feed}->{ $e->{ip_hash} } //= 0)++;
return 1;
}
# Gemfeed index: '/gemfeed/' or '/gemfeed/index.gmi' (optionally with query)
- if ( $e->{uri_path} =~ m{^/gemfeed/(?:index\.gmi)?(?:[?#].*)?$} ) {
- ( $f->{gemfeed}->{ $e->{ip_hash} } //= 0 )++;
+ if ($e->{uri_path} =~ m{^/gemfeed/(?:index\.gmi)?(?:[?#].*)?$}) {
+ ($f->{gemfeed}->{ $e->{ip_hash} } //= 0)++;
return 1;
}
return 0;
}
- sub add_page_ips ( $self, $stats, $event ) {
+ sub add_page_ips ($self, $stats, $event) {
\my $e = \$event;
\my $p = \$stats->{page_ips};
return
- if !endswith( $e->{uri_path}, '.html' )
- && !endswith( $e->{uri_path}, '.gmi' );
+ if !endswith($e->{uri_path}, '.html')
+ && !endswith($e->{uri_path}, '.gmi');
- ( $p->{hosts}->{ $e->{host} }->{ $e->{ip_hash} } //= 0 )++;
- ( $p->{urls}->{ $e->{host} . $e->{uri_path} }->{ $e->{ip_hash} } //=
- 0 )++;
+ ($p->{hosts}->{ $e->{host} }->{ $e->{ip_hash} } //= 0)++;
+ ($p->{urls}->{ $e->{host} . $e->{uri_path} }->{ $e->{ip_hash} } //=
+ 0)++;
}
}
@@ -472,43 +466,41 @@ package Foostats::FileOutputter {
use Sys::Hostname;
use PerlIO::gzip;
- sub new ( $class, %args ) {
+ sub new ($class, %args) {
my $self = bless \%args, $class;
mkdir $self->{stats_dir}
- or die $self->{stats_dir} . ": $!"
- unless -d $self->{stats_dir};
+ or die $self->{stats_dir} . ": $!"
+ unless -d $self->{stats_dir};
return $self;
}
- sub last_processed_date ( $self, $proto ) {
- my $hostname = hostname();
- my @processed =
- glob $self->{stats_dir} . "/${proto}_????????.$hostname.json.gz";
+ sub last_processed_date ($self, $proto) {
+ my $hostname = hostname();
+ my @processed = glob $self->{stats_dir} . "/${proto}_????????.$hostname.json.gz";
my ($date) =
- @processed
- ? ( $processed[-1] =~ /_(\d{8})\.$hostname\.json.gz/ )
- : 0;
+ @processed
+ ? ($processed[-1] =~ /_(\d{8})\.$hostname\.json.gz/)
+ : 0;
return int($date);
}
sub write ($self) {
$self->for_dates(
- sub ( $self, $date_key, $stats ) {
+ sub ($self, $date_key, $stats) {
my $hostname = hostname();
- my $path =
- $self->{stats_dir} . "/${date_key}.$hostname.json.gz";
+ my $path = $self->{stats_dir} . "/${date_key}.$hostname.json.gz";
FileHelper::write_json_gz
- $path,
- $stats;
+ $path,
+ $stats;
}
);
}
- sub for_dates ( $self, $cb ) {
- $cb->( $self, $_, $self->{stats}{$_} ) for sort
- keys $self->{stats}->%*;
+ sub for_dates ($self, $cb) {
+ $cb->($self, $_, $self->{stats}{$_}) for sort
+ keys $self->{stats}->%*;
}
}
@@ -518,7 +510,7 @@ package Foostats::Replicator {
use LWP::UserAgent;
use String::Util qw(endswith);
- sub replicate ( $stats_dir, $partner_node ) {
+ sub replicate ($stats_dir, $partner_node) {
say "Replicating from $partner_node";
for my $proto (qw(gemini web)) {
@@ -532,51 +524,50 @@ package Foostats::Replicator {
"https://$partner_node/foostats/$dest_path",
"$stats_dir/$dest_path",
$count++
- <
- 3
+ <
+ 3
, # Always replicate the newest 3 files.
);
}
}
}
- sub replicate_file ( $remote_url, $dest_path, $force ) {
+ sub replicate_file ($remote_url, $dest_path, $force) {
# $dest_path already exists, not replicating it
return
- if !$force
- && -f $dest_path;
+ if !$force
+ && -f $dest_path;
say "Replicating $remote_url to $dest_path (force:$force)... ";
my $response = LWP::UserAgent->new->get($remote_url);
- unless ( $response->is_success ) {
+ unless ($response->is_success) {
say "\nFailed to fetch the file: " . $response->status_line;
return;
}
FileHelper::write
- $dest_path,
- $response->decoded_content;
+ $dest_path,
+ $response->decoded_content;
say 'done';
}
}
package Foostats::Merger {
- # Removed Data::Dumper (debug-only) per review.
+ # Removed Data::Dumper (debug-only) per review.
sub merge ($stats_dir) {
my %merge;
- $merge{$_} = merge_for_date( $stats_dir, $_ )
- for DateHelper::last_month_dates;
+ $merge{$_} = merge_for_date($stats_dir, $_) for DateHelper::last_month_dates;
return %merge;
}
- sub merge_for_date ( $stats_dir, $date ) {
+ sub merge_for_date ($stats_dir, $date) {
printf
- "Merging for date %s\n",
- $date;
+ "Merging for date %s\n",
+ $date;
- my @stats = stats_for_date( $stats_dir, $date );
+ my @stats = stats_for_date($stats_dir, $date);
return {
feed_ips => feed_ips(@stats),
count => count(@stats),
@@ -584,9 +575,9 @@ package Foostats::Merger {
};
}
- sub merge_ips ( $a, $b, $key_transform = undef ) {
- my sub merge ( $a, $b ) {
- while ( my ( $key, $val ) = each %$b ) {
+ sub merge_ips ($a, $b, $key_transform = undef) {
+ my sub merge ($a, $b) {
+ while (my ($key, $val) = each %$b) {
$a->{$key} //= 0;
$a->{$key} += $val;
}
@@ -594,52 +585,52 @@ package Foostats::Merger {
my $is_num = qr/^\d+(\.\d+)?$/;
- while ( my ( $key, $val ) = each %$b ) {
+ while (my ($key, $val) = each %$b) {
$key = $key_transform->($key)
- if defined $key_transform;
+ if defined $key_transform;
- if ( not exists $a->{$key} ) {
+ if (not exists $a->{$key}) {
$a->{$key} = $val;
}
- elsif (ref( $a->{$key} ) eq 'HASH'
- && ref($val) eq 'HASH' )
+ elsif (ref($a->{$key}) eq 'HASH'
+ && ref($val) eq 'HASH')
{
- merge( $a->{$key}, $val );
+ merge($a->{$key}, $val);
}
elsif ($a->{$key} =~ $is_num
- && $val =~ $is_num )
+ && $val =~ $is_num)
{
$a->{$key} += $val;
}
else {
die
-"Not merging tkey '%s' (ref:%s): '%s' (ref:%s) with '%s' (ref:%s)\n",
- $key,
- ref($key), $a->{$key},
- ref( $a->{$key} ),
- $val,
- ref($val);
+ "Not merging tkey '%s' (ref:%s): '%s' (ref:%s) with '%s' (ref:%s)\n",
+ $key,
+ ref($key), $a->{$key},
+ ref($a->{$key}),
+ $val,
+ ref($val);
}
}
}
sub feed_ips (@stats) {
- my ( %gemini, %web );
+ my (%gemini, %web);
for my $stats (@stats) {
my $merge =
- $stats->{proto} eq 'web'
- ? \%web
- : \%gemini;
+ $stats->{proto} eq 'web'
+ ? \%web
+ : \%gemini;
printf
- "Merging proto %s feed IPs\n",
- $stats->{proto};
- merge_ips( $merge, $stats->{feed_ips} );
+ "Merging proto %s feed IPs\n",
+ $stats->{proto};
+ merge_ips($merge, $stats->{feed_ips});
}
my %total;
- merge_ips( \%total, $web{$_} ) for keys %web;
- merge_ips( \%total, $gemini{$_} ) for keys %gemini;
+ merge_ips(\%total, $web{$_}) for keys %web;
+ merge_ips(\%total, $gemini{$_}) for keys %gemini;
my %merge = (
'Total' => scalar keys %total,
@@ -656,7 +647,7 @@ package Foostats::Merger {
my %merge;
for my $stats (@stats) {
- while ( my ( $key, $val ) = each $stats->{count}->%* ) {
+ while (my ($key, $val) = each $stats->{count}->%*) {
$merge{$key} //= 0;
$merge{$key} += $val;
}
@@ -671,7 +662,7 @@ package Foostats::Merger {
hosts => {}
);
- for my $key ( keys %merge ) {
+ for my $key (keys %merge) {
merge_ips(
$merge{$key},
$_->{page_ips}->{$key},
@@ -683,25 +674,24 @@ package Foostats::Merger {
) for @stats;
# Keep only uniq IP count
- $merge{$key}->{$_} = scalar keys $merge{$key}->{$_}->%*
- for keys $merge{$key}->%*;
+ $merge{$key}->{$_} = scalar keys $merge{$key}->{$_}->%* for keys $merge{$key}->%*;
}
return \%merge;
}
- sub stats_for_date ( $stats_dir, $date ) {
+ sub stats_for_date ($stats_dir, $date) {
my @stats;
for my $proto (qw(gemini web)) {
for my $path (<$stats_dir/${proto}_${date}.*.json.gz>) {
printf
- "Reading %s\n",
- $path;
+ "Reading %s\n",
+ $path;
push
- @stats,
- FileHelper::read_json_gz($path);
- @{ $stats[-1] }{qw(proto path)} = ( $proto, $path );
+ @stats,
+ FileHelper::read_json_gz($path);
+ @{ $stats[-1] }{qw(proto path)} = ($proto, $path);
}
}
@@ -714,7 +704,7 @@ package Foostats::Reporter {
use HTML::Entities qw(encode_entities);
sub truncate_url {
- my ( $url, $max_length ) = @_;
+ my ($url, $max_length) = @_;
$max_length //= 100; # Default to 100 characters
return $url if length($url) <= $max_length;
@@ -725,44 +715,44 @@ package Foostats::Reporter {
my $available_length = $max_length - $ellipsis_length;
# Split available length between start and end, favoring the end
- my $keep_start = int( $available_length * 0.4 ); # 40% for start
+ my $keep_start = int($available_length * 0.4); # 40% for start
my $keep_end = $available_length - $keep_start; # 60% for end
- my $start = substr( $url, 0, $keep_start );
- my $end = substr( $url, -$keep_end );
+ my $start = substr($url, 0, $keep_start);
+ my $end = substr($url, -$keep_end);
return $start . $ellipsis . $end;
}
sub truncate_urls_for_table {
- my ( $url_rows, $count_column_header ) = @_;
+ my ($url_rows, $count_column_header) = @_;
# Calculate the maximum width needed for the count column
my $max_count_width = length($count_column_header);
for my $row (@$url_rows) {
- my $count_width = length( $row->[1] );
+ my $count_width = length($row->[1]);
$max_count_width = $count_width if $count_width > $max_count_width;
}
# Row format: "| URL... | count |" with padding
# Calculate: "| " (2) + URL + " | " (3) + count_with_padding + " |" (2)
my $max_url_length = 100 - 7 - $max_count_width;
- $max_url_length = 70 if $max_url_length > 70; # Cap at reasonable length
+ $max_url_length = 70 if $max_url_length > 70; # Cap at reasonable length
# Truncate URLs in place
for my $row (@$url_rows) {
- $row->[0] = truncate_url( $row->[0], $max_url_length );
+ $row->[0] = truncate_url($row->[0], $max_url_length);
}
}
sub format_table {
- my ( $headers, $rows ) = @_;
+ my ($headers, $rows) = @_;
my @widths;
- for my $col ( 0 .. $#{$headers} ) {
- my $max_width = length( $headers->[$col] );
+ for my $col (0 .. $#{$headers}) {
+ my $max_width = length($headers->[$col]);
for my $row (@$rows) {
- my $len = length( $row->[$col] );
+ my $len = length($row->[$col]);
$max_width = $len if $len > $max_width;
}
push @widths, $max_width;
@@ -770,10 +760,10 @@ package Foostats::Reporter {
my $header_line = '|';
my $separator_line = '|';
- for my $col ( 0 .. $#{$headers} ) {
+ for my $col (0 .. $#{$headers}) {
$header_line .=
- sprintf( " %-*s |", $widths[$col], $headers->[$col] );
- $separator_line .= '-' x ( $widths[$col] + 2 ) . '|';
+ sprintf(" %-*s |", $widths[$col], $headers->[$col]);
+ $separator_line .= '-' x ($widths[$col] + 2) . '|';
}
my @table_lines;
@@ -783,33 +773,35 @@ package Foostats::Reporter {
for my $row (@$rows) {
my $row_line = '|';
- for my $col ( 0 .. $#{$row} ) {
- $row_line .= sprintf( " %-*s |", $widths[$col], $row->[$col] );
+ for my $col (0 .. $#{$row}) {
+ $row_line .= sprintf(" %-*s |", $widths[$col], $row->[$col]);
}
push @table_lines, $row_line;
}
push @table_lines, $separator_line; # Add bottom terminator
- return join( "\n", @table_lines );
+ return join("\n", @table_lines);
}
# Convert gemtext to HTML
sub gemtext_to_html {
- my ($content) = @_;
- my $html = "";
- my $in_code_block = 0;
- my $in_list = 0;
- my @lines = split /\n/, $content;
+ my ($content) = @_;
+ my $html = "";
+ my $in_code_block = 0;
+ my $in_list = 0;
+ my @lines = split /\n/, $content;
my @code_block_lines = ();
-
+
for my $line (@lines) {
if ($line =~ /^```/) {
if ($in_code_block) {
+
# End code block - check if it's a table
if (is_ascii_table(\@code_block_lines)) {
$html .= convert_ascii_table_to_html(\@code_block_lines);
- } else {
+ }
+ else {
$html .= "<pre>\n";
for my $code_line (@code_block_lines) {
$html .= encode_entities($code_line) . "\n";
@@ -817,18 +809,19 @@ package Foostats::Reporter {
$html .= "</pre>\n";
}
@code_block_lines = ();
- $in_code_block = 0;
- } else {
+ $in_code_block = 0;
+ }
+ else {
$in_code_block = 1;
}
next;
}
-
+
if ($in_code_block) {
push @code_block_lines, $line;
next;
}
-
+
# Skip 365-day summary section header in HTML output
if ($line =~ /^## 365-Day Summary Reports\s*$/) {
next;
@@ -839,94 +832,106 @@ package Foostats::Reporter {
$html .= "</ul>\n";
$in_list = 0;
}
-
+
# Headers
if ($line =~ /^### (.*)/) {
$html .= "<h3>" . encode_entities($1) . "</h3>\n";
- } elsif ($line =~ /^## (.*)/) {
+ }
+ elsif ($line =~ /^## (.*)/) {
$html .= "<h2>" . encode_entities($1) . "</h2>\n";
- } elsif ($line =~ /^# (.*)/) {
+ }
+ elsif ($line =~ /^# (.*)/) {
$html .= "<h1>" . encode_entities($1) . "</h1>\n";
}
+
# Links
elsif ($line =~ /^=> (\S+)\s+(.*)/) {
my ($url, $text) = ($1, $2);
+
# Drop 365-day summary links from HTML output
if ($url =~ /(?:^|[\/.])365day_summary_\d{8}\.gmi$/) {
next;
}
+
# Convert .gmi links to .html
$url =~ s/\.gmi$/\.html/;
$html .= "<p><a href=\"" . encode_entities($url) . "\">" . encode_entities($text) . "</a></p>\n";
}
+
# Bullet points
elsif ($line =~ /^\* (.*)/) {
if (!$in_list) {
$html .= "<ul>\n";
$in_list = 1;
}
- $html .= "<li>" . encode_entities($1) . "</li>\n";
+ $html .= "<li>" . linkify_text($1) . "</li>\n";
}
+
# Empty line - skip to avoid excessive spacing
elsif ($line =~ /^\s*$/) {
+
# Skip empty lines for more compact output
}
+
# Regular text
else {
- $html .= "<p>" . encode_entities($line) . "</p>\n";
+ $html .= "<p>" . linkify_text($line) . "</p>\n";
}
}
-
+
# Close list if still open
if ($in_list) {
$html .= "</ul>\n";
}
-
+
return $html;
}
-
+
# Check if the lines form an ASCII table
sub is_ascii_table {
my ($lines) = @_;
- return 0 if @$lines < 3; # Need at least header, separator, and one data row
-
+ return 0 if @$lines < 3; # Need at least header, separator, and one data row
+
# Check for separator lines with dashes and pipes
for my $line (@$lines) {
return 1 if $line =~ /^\|?[\s\-]+\|/;
}
return 0;
}
-
+
# Convert ASCII table to HTML table
sub convert_ascii_table_to_html {
- my ($lines) = @_;
- my $html = "<table>\n";
+ my ($lines) = @_;
+ my $html = "<table>\n";
my $row_count = 0;
-
+
for my $line (@$lines) {
+
# Skip separator lines
next if $line =~ /^\|?[\s\-]+\|/ && $line =~ /\-/;
-
+
# Parse table row
my @cells = split /\s*\|\s*/, $line;
- @cells = grep { length($_) > 0 } @cells; # Remove empty cells
-
+ @cells = grep { length($_) > 0 } @cells; # Remove empty cells
+
if (@cells) {
$html .= "<tr>\n";
+
# First row is header
my $tag = ($row_count == 0) ? "th" : "td";
for my $cell (@cells) {
- $html .= " <$tag>" . encode_entities(trim($cell)) . "</$tag>\n";
+ my $val = trim($cell);
+ $html .= " <$tag>" . linkify_text($val) . "</$tag>\n";
}
$html .= "</tr>\n";
$row_count++;
}
}
-
+
$html .= "</table>\n";
return $html;
}
-
+
# Trim whitespace from string
sub trim {
my ($str) = @_;
@@ -934,9 +939,77 @@ package Foostats::Reporter {
$str =~ s/\s+$//;
return $str;
}
-
+
+ # Build an href for a token that looks like a URL or FQDN
+ sub _guess_href {
+ my ($token) = @_;
+ my $t = $token;
+ $t =~ s/^\s+//;
+ $t =~ s/\s+$//;
+
+ # Already absolute http(s)
+ return $t if $t =~ m{^https?://}i;
+
+ # Extract trailing punctuation to avoid including it in href
+ my $trail = '';
+ if ($t =~ s{([)\]\}.,;:!?]+)$}{}) { $trail = $1; }
+
+ # host[/path]
+ if ($t =~ m{^([A-Za-z0-9.-]+\.[A-Za-z]{2,})(/[^\s<]*)?$}) {
+ my ($host, $path) = ($1, $2 // '');
+ my $has_ellipsis = index($t, '...') != -1 || index(($path // ''), '...') != -1;
+ my $is_gemini = defined($path) && $path =~ /\.gmi(?:[?#].*)?$/i;
+ my $scheme = $is_gemini ? 'gemini' : 'https';
+
+ # If truncated, fall back to host root
+ my $href =
+ $has_ellipsis
+ ? sprintf('%s://%s/', $scheme, $host)
+ : sprintf('%s://%s%s', $scheme, $host, ($path eq '' ? '/' : $path));
+ return ($href . $trail);
+ }
+
+ return undef;
+ }
+
+ # Turn any URLs/FQDNs in the provided text into anchors
+ sub linkify_text {
+ my ($text) = @_;
+ return '' unless defined $text;
+
+ my $out = '';
+ my $pos = 0;
+ while ($text =~ m{((?:https?://)?[A-Za-z0-9.-]+\.[A-Za-z]{2,}(?:/[^\s<]*)?)}g) {
+ my $match = $1;
+ my $start = $-[1];
+ my $end = $+[1];
+
+ # Emit preceding text
+ $out .= encode_entities(substr($text, $pos, $start - $pos));
+
+ # Separate trailing punctuation from the match
+ my ($core, $trail) = ($match, '');
+ if ($core =~ s{([)\]\}.,;:!?]+)$}{}) { $trail = $1; }
+
+ my $href = _guess_href($core);
+ if ($href) {
+ $out .= sprintf('<a href="%s.html">%s</a>%s',
+ encode_entities($href), encode_entities($core), encode_entities($trail));
+ }
+ else {
+ # Not a linkable token after all
+ $out .= encode_entities($match);
+ }
+ $pos = $end;
+ }
+
+ # Remainder
+ $out .= encode_entities(substr($text, $pos));
+ return $out;
+ }
+
# Use HTML::Entities::encode_entities imported above
-
+
# Generate HTML wrapper
sub generate_html_page {
my ($title, $content) = @_;
@@ -1006,48 +1079,45 @@ $content
}
sub report {
- my ( $stats_dir, $output_dir, $html_output_dir, %merged ) = @_;
- for my $date ( sort { $b cmp $a } keys %merged ) {
+ my ($stats_dir, $output_dir, $html_output_dir, %merged) = @_;
+ for my $date (sort { $b cmp $a } keys %merged) {
my $stats = $merged{$date};
next unless $stats->{count};
- my ( $year, $month, $day ) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+ my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
# Check if .gmi file exists and its age based on date in filename
- my $report_path = "$output_dir/$date.gmi";
+ my $report_path = "$output_dir/$date.gmi";
my $html_report_path = "$output_dir/$date.html";
# Calculate age of the data based on date in filename
my $today = Time::Piece->new();
- my $file_date = Time::Piece->strptime( $date, '%Y%m%d' );
- my $age_days = ( $today - $file_date ) / ( 24 * 60 * 60 );
+ my $file_date = Time::Piece->strptime($date, '%Y%m%d');
+ my $age_days = ($today - $file_date) / (24 * 60 * 60);
- if ( -e $report_path && -e $html_report_path ) {
+ if (-e $report_path && -e $html_report_path) {
# Files exist
- if ( $age_days <= 3 ) {
+ if ($age_days <= 3) {
# Data is recent (within 3 days), regenerate it
- say
-"Regenerating daily report for $year-$month-$day (data age: "
- . sprintf( "%.1f", $age_days )
- . " days)";
+ say "Regenerating daily report for $year-$month-$day (data age: "
+ . sprintf("%.1f", $age_days)
+ . " days)";
}
else {
# Data is old (older than 3 days), skip if files exist
- say
-"Skipping daily report for $year-$month-$day (files exist, data age: "
- . sprintf( "%.1f", $age_days )
- . " days)";
+ say "Skipping daily report for $year-$month-$day (files exist, data age: "
+ . sprintf("%.1f", $age_days)
+ . " days)";
next;
}
}
else {
# File doesn't exist, generate it
- say
-"Generating new daily report for $year-$month-$day (file doesn't exist, data age: "
- . sprintf( "%.1f", $age_days )
- . " days)";
+ say "Generating new daily report for $year-$month-$day (file doesn't exist, data age: "
+ . sprintf("%.1f", $age_days)
+ . " days)";
}
my $report_content = "";
@@ -1057,27 +1127,23 @@ $content
# Feed counts first
$report_content .= "### Feed Statistics\n\n";
my @feed_rows;
- push @feed_rows, [ 'Total', $stats->{feed_ips}{'Total'} // 0 ];
- push @feed_rows,
- [ 'Gemini Gemfeed', $stats->{feed_ips}{'Gemini Gemfeed'} // 0 ];
- push @feed_rows,
- [ 'Gemini Atom', $stats->{feed_ips}{'Gemini Atom'} // 0 ];
- push @feed_rows,
- [ 'Web Gemfeed', $stats->{feed_ips}{'Web Gemfeed'} // 0 ];
- push @feed_rows,
- [ 'Web Atom', $stats->{feed_ips}{'Web Atom'} // 0 ];
+ push @feed_rows, [ 'Total', $stats->{feed_ips}{'Total'} // 0 ];
+ push @feed_rows, [ 'Gemini Gemfeed', $stats->{feed_ips}{'Gemini Gemfeed'} // 0 ];
+ push @feed_rows, [ 'Gemini Atom', $stats->{feed_ips}{'Gemini Atom'} // 0 ];
+ push @feed_rows, [ 'Web Gemfeed', $stats->{feed_ips}{'Web Gemfeed'} // 0 ];
+ push @feed_rows, [ 'Web Atom', $stats->{feed_ips}{'Web Atom'} // 0 ];
$report_content .= "```\n";
- $report_content .=
- format_table( [ 'Feed Type', 'Count' ], \@feed_rows );
+ $report_content .= format_table([ 'Feed Type', 'Count' ], \@feed_rows);
$report_content .= "\n```\n\n";
+
# Top 50 URLs next
$report_content .= "### Top 50 URLs\n\n";
my @url_rows;
my $urls = $stats->{page_ips}{urls};
my @sorted_urls =
- sort { ( $urls->{$b} // 0 ) <=> ( $urls->{$a} // 0 ) }
- keys %$urls;
- my $truncated = @sorted_urls > 50;
+ sort { ($urls->{$b} // 0) <=> ($urls->{$a} // 0) }
+ keys %$urls;
+ my $truncated = @sorted_urls > 50;
@sorted_urls = @sorted_urls[ 0 .. 49 ] if $truncated;
for my $url (@sorted_urls) {
@@ -1085,10 +1151,9 @@ $content
}
# Truncate URLs to fit within 100-character rows
- truncate_urls_for_table( \@url_rows, 'Unique Visitors' );
+ truncate_urls_for_table(\@url_rows, 'Unique Visitors');
$report_content .= "```\n";
- $report_content .=
- format_table( [ 'URL', 'Unique Visitors' ], \@url_rows );
+ $report_content .= format_table([ 'URL', 'Unique Visitors' ], \@url_rows);
$report_content .= "\n```\n";
if ($truncated) {
$report_content .= "\n... and more (truncated to 50 entries).\n";
@@ -1100,18 +1165,17 @@ $content
my @host_rows;
my $hosts = $stats->{page_ips}{hosts};
my @sorted_hosts =
- sort { ( $hosts->{$b} // 0 ) <=> ( $hosts->{$a} // 0 ) }
- keys %$hosts;
+ sort { ($hosts->{$b} // 0) <=> ($hosts->{$a} // 0) }
+ keys %$hosts;
- $truncated = @sorted_hosts > 50;
+ $truncated = @sorted_hosts > 50;
@sorted_hosts = @sorted_hosts[ 0 .. 49 ] if $truncated;
for my $host (@sorted_hosts) {
push @host_rows, [ $host, $hosts->{$host} // 0 ];
}
$report_content .= "```\n";
- $report_content .=
- format_table( [ 'Host', 'Unique Visitors' ], \@host_rows );
+ $report_content .= format_table([ 'Host', 'Unique Visitors' ], \@host_rows);
$report_content .= "\n```\n";
if ($truncated) {
$report_content .= "\n... and more (truncated to 50 entries).\n";
@@ -1121,22 +1185,22 @@ $content
# Summary last
$report_content .= "### Summary\n\n";
my $total_requests =
- ( $stats->{count}{gemini} // 0 ) + ( $stats->{count}{web} // 0 );
+ ($stats->{count}{gemini} // 0) + ($stats->{count}{web} // 0);
$report_content .= "* Total requests: $total_requests\n";
$report_content .=
- "* Filtered requests: " . ( $stats->{count}{filtered} // 0 ) . "\n";
+ "* Filtered requests: " . ($stats->{count}{filtered} // 0) . "\n";
$report_content .=
- "* Gemini requests: " . ( $stats->{count}{gemini} // 0 ) . "\n";
+ "* Gemini requests: " . ($stats->{count}{gemini} // 0) . "\n";
$report_content .=
- "* Web requests: " . ( $stats->{count}{web} // 0 ) . "\n";
+ "* Web requests: " . ($stats->{count}{web} // 0) . "\n";
$report_content .=
- "* IPv4 requests: " . ( $stats->{count}{IPv4} // 0 ) . "\n";
+ "* IPv4 requests: " . ($stats->{count}{IPv4} // 0) . "\n";
$report_content .=
- "* IPv6 requests: " . ( $stats->{count}{IPv6} // 0 ) . "\n\n";
+ "* IPv6 requests: " . ($stats->{count}{IPv6} // 0) . "\n\n";
# Add links to summary reports (only monthly)
$report_content .= "## Related Reports\n\n";
- my $now = localtime;
+ my $now = localtime;
my $current_date = $now->strftime('%Y%m%d');
$report_content .= "=> ./30day_summary_$current_date.gmi 30-Day Summary Report\n\n";
@@ -1145,29 +1209,29 @@ $content
# $report_path already defined above
say "Writing report to $report_path";
- FileHelper::write( $report_path, $report_content );
-
+ FileHelper::write($report_path, $report_content);
+
# Also write HTML version
mkdir $html_output_dir unless -d $html_output_dir;
- my $html_path = "$html_output_dir/$date.html";
+ my $html_path = "$html_output_dir/$date.html";
my $html_content = gemtext_to_html($report_content);
- my $html_page = generate_html_page("Stats for $year-$month-$day", $html_content);
+ my $html_page = generate_html_page("Stats for $year-$month-$day", $html_content);
say "Writing HTML report to $html_path";
- FileHelper::write( $html_path, $html_page );
+ FileHelper::write($html_path, $html_page);
}
# Generate summary reports
- generate_summary_report( 30, $stats_dir, $output_dir, $html_output_dir, %merged );
-
+ generate_summary_report(30, $stats_dir, $output_dir, $html_output_dir, %merged);
+
# Generate index.gmi and index.html
- generate_index( $output_dir, $html_output_dir );
+ generate_index($output_dir, $html_output_dir);
}
sub generate_summary_report {
- my ( $days, $stats_dir, $output_dir, $html_output_dir, %merged ) = @_;
+ my ($days, $stats_dir, $output_dir, $html_output_dir, %merged) = @_;
# Get the last N days of dates
- my @dates = sort { $b cmp $a } keys %merged;
+ my @dates = sort { $b cmp $a } keys %merged;
my $max_index = $days - 1;
@dates = @dates[ 0 .. $max_index ] if @dates > $days;
@@ -1176,16 +1240,16 @@ $content
# Build report content
my $report_content = build_report_header($today, $days);
+
# Order: feed counts -> Top URLs -> daily top 3 for last 30 days -> other tables
- $report_content .= build_feed_statistics_section( \@dates, \%merged );
+ $report_content .= build_feed_statistics_section(\@dates, \%merged);
# Aggregate and add top lists
- my ( $all_hosts, $all_urls ) =
- aggregate_hosts_and_urls( \@dates, \%merged );
+ my ($all_hosts, $all_urls) = aggregate_hosts_and_urls(\@dates, \%merged);
$report_content .= build_top_urls_section($all_urls, $days);
$report_content .= build_top3_urls_last_n_days_per_day($stats_dir, 30, \%merged);
$report_content .= build_top_hosts_section($all_hosts, $days);
- $report_content .= build_daily_summary_section( \@dates, \%merged );
+ $report_content .= build_daily_summary_section(\@dates, \%merged);
# Add links to other summary reports
$report_content .= build_summary_links($days, $report_date);
@@ -1195,24 +1259,25 @@ $content
my $report_path = "$output_dir/${days}day_summary_$report_date.gmi";
say "Writing $days-day summary report to $report_path";
- FileHelper::write( $report_path, $report_content );
-
+ FileHelper::write($report_path, $report_content);
+
# Also write HTML version, except for 365-day summaries (HTML suppressed)
if ($days != 365) {
mkdir $html_output_dir unless -d $html_output_dir;
- my $html_path = "$html_output_dir/${days}day_summary_$report_date.html";
+ my $html_path = "$html_output_dir/${days}day_summary_$report_date.html";
my $html_content = gemtext_to_html($report_content);
- my $html_page = generate_html_page("$days-Day Summary Report", $html_content);
+ my $html_page = generate_html_page("$days-Day Summary Report", $html_content);
say "Writing HTML $days-day summary report to $html_path";
- FileHelper::write( $html_path, $html_page );
- } else {
+ FileHelper::write($html_path, $html_page);
+ }
+ else {
say "Skipping HTML generation for 365-day summary (Gemtext only)";
}
}
sub build_report_header {
my ($today, $days) = @_;
- $days //= 30; # Default to 30 days for backward compatibility
+ $days //= 30; # Default to 30 days for backward compatibility
my $content = "# $days-Day Summary Report\n\n";
$content .= "Generated on " . $today->strftime('%Y-%m-%d') . "\n\n";
@@ -1220,74 +1285,65 @@ $content
}
sub build_daily_summary_section {
- my ( $dates, $merged ) = @_;
+ my ($dates, $merged) = @_;
my $content = "## Daily Summary Evolution (Last 30 Days)\n\n";
$content .= "### Total Requests by Day\n\n```\n";
my @summary_rows;
- for my $date ( reverse @$dates ) {
+ for my $date (reverse @$dates) {
my $stats = $merged->{$date};
next unless $stats->{count};
- push @summary_rows, build_daily_summary_row( $date, $stats );
+ push @summary_rows, build_daily_summary_row($date, $stats);
}
- $content .= format_table(
- [ 'Date', 'Filtered', 'Gemini', 'Web', 'IPv4', 'IPv6', 'Total' ],
- \@summary_rows );
+ $content .= format_table([ 'Date', 'Filtered', 'Gemini', 'Web', 'IPv4', 'IPv6', 'Total' ], \@summary_rows);
$content .= "\n```\n\n";
return $content;
}
sub build_daily_summary_row {
- my ( $date, $stats ) = @_;
+ my ($date, $stats) = @_;
- my ( $year, $month, $day ) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+ my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
my $formatted_date = "$year-$month-$day";
my $total_requests =
- ( $stats->{count}{gemini} // 0 ) + ( $stats->{count}{web} // 0 );
+ ($stats->{count}{gemini} // 0) + ($stats->{count}{web} // 0);
my $filtered = $stats->{count}{filtered} // 0;
my $gemini = $stats->{count}{gemini} // 0;
my $web = $stats->{count}{web} // 0;
my $ipv4 = $stats->{count}{IPv4} // 0;
my $ipv6 = $stats->{count}{IPv6} // 0;
- return [
- $formatted_date, $filtered,
- $gemini, $web, $ipv4,
- $ipv6, $total_requests
- ];
+ return [ $formatted_date, $filtered, $gemini, $web, $ipv4, $ipv6, $total_requests ];
}
sub build_feed_statistics_section {
- my ( $dates, $merged ) = @_;
+ my ($dates, $merged) = @_;
my $content = "### Feed Statistics Evolution\n\n```\n";
my @feed_rows;
- for my $date ( reverse @$dates ) {
+ for my $date (reverse @$dates) {
my $stats = $merged->{$date};
next unless $stats->{feed_ips};
- push @feed_rows, build_feed_statistics_row( $date, $stats );
+ push @feed_rows, build_feed_statistics_row($date, $stats);
}
- $content .= format_table(
- [ 'Date', 'Gem Feed', 'Gem Atom', 'Web Feed', 'Web Atom', 'Total' ],
- \@feed_rows
- );
+ $content .= format_table([ 'Date', 'Gem Feed', 'Gem Atom', 'Web Feed', 'Web Atom', 'Total' ], \@feed_rows);
$content .= "\n```\n\n";
return $content;
}
sub build_feed_statistics_row {
- my ( $date, $stats ) = @_;
+ my ($date, $stats) = @_;
- my ( $year, $month, $day ) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+ my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
my $formatted_date = "$year-$month-$day";
return [
@@ -1301,7 +1357,7 @@ $content
}
sub aggregate_hosts_and_urls {
- my ( $dates, $merged ) = @_;
+ my ($dates, $merged) = @_;
my %all_hosts;
my %all_urls;
@@ -1311,20 +1367,19 @@ $content
next unless $stats->{page_ips};
# Aggregate hosts
- while ( my ( $host, $count ) = each %{ $stats->{page_ips}{hosts} } )
- {
+ while (my ($host, $count) = each %{ $stats->{page_ips}{hosts} }) {
$all_hosts{$host} //= 0;
$all_hosts{$host} += $count;
}
# Aggregate URLs
- while ( my ( $url, $count ) = each %{ $stats->{page_ips}{urls} } ) {
+ while (my ($url, $count) = each %{ $stats->{page_ips}{urls} }) {
$all_urls{$url} //= 0;
$all_urls{$url} += $count;
}
}
- return ( \%all_hosts, \%all_urls );
+ return (\%all_hosts, \%all_urls);
}
sub build_top_hosts_section {
@@ -1335,14 +1390,14 @@ $content
my @host_rows;
my @sorted_hosts =
- sort { $all_hosts->{$b} <=> $all_hosts->{$a} } keys %$all_hosts;
+ sort { $all_hosts->{$b} <=> $all_hosts->{$a} } keys %$all_hosts;
@sorted_hosts = @sorted_hosts[ 0 .. 49 ] if @sorted_hosts > 50;
for my $host (@sorted_hosts) {
push @host_rows, [ $host, $all_hosts->{$host} ];
}
- $content .= format_table( [ 'Host', 'Visitors' ], \@host_rows );
+ $content .= format_table([ 'Host', 'Visitors' ], \@host_rows);
$content .= "\n```\n\n";
return $content;
@@ -1356,7 +1411,7 @@ $content
my @url_rows;
my @sorted_urls =
- sort { $all_urls->{$b} <=> $all_urls->{$a} } keys %$all_urls;
+ sort { $all_urls->{$b} <=> $all_urls->{$a} } keys %$all_urls;
@sorted_urls = @sorted_urls[ 0 .. 49 ] if @sorted_urls > 50;
for my $url (@sorted_urls) {
@@ -1364,18 +1419,19 @@ $content
}
# Truncate URLs to fit within 100-character rows
- truncate_urls_for_table( \@url_rows, 'Visitors' );
+ truncate_urls_for_table(\@url_rows, 'Visitors');
- $content .= format_table( [ 'URL', 'Visitors' ], \@url_rows );
+ $content .= format_table([ 'URL', 'Visitors' ], \@url_rows);
$content .= "\n```\n\n";
return $content;
}
sub build_summary_links {
- my ( $current_days, $report_date ) = @_;
+ my ($current_days, $report_date) = @_;
my $content = '';
+
# Only add link to 30-day summary when not on the 30-day report itself
if ($current_days != 30) {
$content .= "## Other Summary Reports\n\n";
@@ -1385,17 +1441,18 @@ $content
return $content;
}
-sub build_top3_urls_last_n_days_per_day {
- my ($stats_dir, $days, $merged) = @_;
- $days //= 30;
- my $content = "## Top 5 URLs Per Day (Last ${days} Days)\n\n";
+ sub build_top3_urls_last_n_days_per_day {
+ my ($stats_dir, $days, $merged) = @_;
+ $days //= 30;
+ my $content = "## Top 5 URLs Per Day (Last ${days} Days)\n\n";
- my @all = DateHelper::last_month_dates();
- my @dates = @all;
- @dates = @all[0 .. $days-1] if @all > $days;
- return $content . "(no data)\n\n" unless @dates;
+ my @all = DateHelper::last_month_dates();
+ my @dates = @all;
+ @dates = @all[ 0 .. $days - 1 ] if @all > $days;
+ return $content . "(no data)\n\n" unless @dates;
for my $date (@dates) {
+
# Prefer in-memory merged stats if available; otherwise merge from disk
my $stats = $merged->{$date};
if (!$stats || !($stats->{page_ips} && $stats->{page_ips}{urls})) {
@@ -1403,24 +1460,24 @@ sub build_top3_urls_last_n_days_per_day {
}
next unless $stats && $stats->{page_ips} && $stats->{page_ips}{urls};
- my ($y,$m,$d) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+ my ($y, $m, $d) = $date =~ /(\d{4})(\d{2})(\d{2})/;
$content .= "### $y-$m-$d\n\n";
- my $urls = $stats->{page_ips}{urls};
- my @sorted = sort { ($urls->{$b}//0) <=> ($urls->{$a}//0) } keys %$urls;
+ my $urls = $stats->{page_ips}{urls};
+ my @sorted = sort { ($urls->{$b} // 0) <=> ($urls->{$a} // 0) } keys %$urls;
next unless @sorted;
my $limit = @sorted < 5 ? @sorted : 5;
- @sorted = @sorted[0..$limit-1];
+ @sorted = @sorted[ 0 .. $limit - 1 ];
my @rows;
for my $u (@sorted) { push @rows, [ $u, $urls->{$u} // 0 ]; }
- truncate_urls_for_table( \@rows, 'Visitors' );
+ truncate_urls_for_table(\@rows, 'Visitors');
$content .= "```\n" . format_table([ 'URL', 'Visitors' ], \@rows) . "\n```\n\n";
+ }
+
+ return $content;
}
- return $content;
-}
-
sub generate_index {
my ($output_dir, $html_output_dir) = @_;
@@ -1430,13 +1487,14 @@ sub build_top3_urls_last_n_days_per_day {
closedir($dh);
my @summaries_30day = sort { $b cmp $a } grep { /^30day_summary_/ } @gmi_files;
- my $latest_30 = $summaries_30day[0];
+ my $latest_30 = $summaries_30day[0];
my $index_path = "$output_dir/index.gmi";
mkdir $html_output_dir unless -d $html_output_dir;
my $html_path = "$html_output_dir/index.html";
if ($latest_30) {
+
# Read 30-day summary content and use it as index
my $summary_path = "$output_dir/$latest_30";
open my $sfh, '<', $summary_path or die "$summary_path: $!";
@@ -1457,9 +1515,10 @@ sub build_top3_urls_last_n_days_per_day {
close $hh;
say "Writing HTML index to $html_path (copy of $latest_html)";
FileHelper::write($html_path, $html_page);
- } else {
+ }
+ else {
my $html_content = gemtext_to_html($content);
- my $html_page = generate_html_page("30-Day Summary Report", $html_content);
+ my $html_page = generate_html_page("30-Day Summary Report", $html_content);
say "Writing HTML index to $html_path (from gemtext)";
FileHelper::write($html_path, $html_page);
}
@@ -1472,18 +1531,18 @@ sub build_top3_urls_last_n_days_per_day {
FileHelper::write($index_path, $fallback);
my $html_content = gemtext_to_html($fallback);
- my $html_page = generate_html_page("Foostats Reports Index", $html_content);
+ my $html_page = generate_html_page("Foostats Reports Index", $html_content);
say "Writing fallback HTML index to $html_path";
FileHelper::write($html_path, $html_page);
}
}
package main;
- use Getopt::Long;
- use Sys::Hostname;
+use Getopt::Long;
+use Sys::Hostname;
- sub usage {
- print <<~"USAGE";
+sub usage {
+ print <<~"USAGE";
Usage: $0 [options]
Options:
@@ -1506,73 +1565,72 @@ package main;
--version Show version information.
--help Show this help message.
USAGE
- exit 0;
- }
+ exit 0;
+}
- sub parse_logs ( $stats_dir, $odds_file, $odds_log ) {
- my $out = Foostats::FileOutputter->new( stats_dir => $stats_dir );
+sub parse_logs ($stats_dir, $odds_file, $odds_log) {
+ my $out = Foostats::FileOutputter->new(stats_dir => $stats_dir);
- $out->{stats} = Foostats::Logreader::parse_logs(
- $out->last_processed_date('web'),
- $out->last_processed_date('gemini'),
- $odds_file, $odds_log
- );
+ $out->{stats} = Foostats::Logreader::parse_logs(
+ $out->last_processed_date('web'),
+ $out->last_processed_date('gemini'),
+ $odds_file, $odds_log
+ );
- $out->write;
- }
+ $out->write;
+}
- sub foostats_main {
- my ( $parse_logs, $replicate, $report, $all, $help, $version );
-
- # With default values
- my $stats_dir = '/var/www/htdocs/buetow.org/self/foostats';
- my $odds_file = $stats_dir . '/fooodds.txt';
- my $odds_log = '/var/log/fooodds';
- my $output_dir; # Will default to $stats_dir/gemtext if not specified
- my $html_output_dir; # Will default to /var/www/htdocs/gemtexter/stats.foo.zone if not specified
- my $partner_node =
- hostname eq 'fishfinger.buetow.org'
- ? 'blowfish.buetow.org'
- : 'fishfinger.buetow.org';
-
- GetOptions
- 'parse-logs!' => \$parse_logs,
- 'filter-log=s' => \$odds_log,
- 'odds-file=s' => \$odds_file,
- 'replicate!' => \$replicate,
- 'report!' => \$report,
- 'all!' => \$all,
- 'stats-dir=s' => \$stats_dir,
- 'output-dir=s' => \$output_dir,
- 'html-output-dir=s' => \$html_output_dir,
- 'partner-node=s' => \$partner_node,
- 'version' => \$version,
- 'help|?' => \$help;
-
- if ($version) {
- print "foostats " . VERSION . "\n";
- exit 0;
- }
+sub foostats_main {
+ my ($parse_logs, $replicate, $report, $all, $help, $version);
+
+ # With default values
+ my $stats_dir = '/var/www/htdocs/buetow.org/self/foostats';
+ my $odds_file = $stats_dir . '/fooodds.txt';
+ my $odds_log = '/var/log/fooodds';
+ my $output_dir; # Will default to $stats_dir/gemtext if not specified
+ my $html_output_dir; # Will default to /var/www/htdocs/gemtexter/stats.foo.zone if not specified
+ my $partner_node =
+ hostname eq 'fishfinger.buetow.org'
+ ? 'blowfish.buetow.org'
+ : 'fishfinger.buetow.org';
+
+ GetOptions
+ 'parse-logs!' => \$parse_logs,
+ 'filter-log=s' => \$odds_log,
+ 'odds-file=s' => \$odds_file,
+ 'replicate!' => \$replicate,
+ 'report!' => \$report,
+ 'all!' => \$all,
+ 'stats-dir=s' => \$stats_dir,
+ 'output-dir=s' => \$output_dir,
+ 'html-output-dir=s' => \$html_output_dir,
+ 'partner-node=s' => \$partner_node,
+ 'version' => \$version,
+ 'help|?' => \$help;
+
+ if ($version) {
+ print "foostats " . VERSION . "\n";
+ exit 0;
+ }
- usage() if $help;
+ usage() if $help;
- parse_logs( $stats_dir, $odds_file, $odds_log )
- if $parse_logs
- or $all;
+ parse_logs($stats_dir, $odds_file, $odds_log)
+ if $parse_logs
+ or $all;
- Foostats::Replicator::replicate( $stats_dir, $partner_node )
- if $replicate
- or $all;
+ Foostats::Replicator::replicate($stats_dir, $partner_node)
+ if $replicate
+ or $all;
- # Set default output directories if not specified
- $output_dir //= '/var/gemini/stats.foo.zone';
- $html_output_dir //= '/var/www/htdocs/gemtexter/stats.foo.zone';
+ # Set default output directories if not specified
+ $output_dir //= '/var/gemini/stats.foo.zone';
+ $html_output_dir //= '/var/www/htdocs/gemtexter/stats.foo.zone';
- Foostats::Reporter::report( $stats_dir, $output_dir, $html_output_dir,
- Foostats::Merger::merge($stats_dir) )
- if $report
- or $all;
- }
+ Foostats::Reporter::report($stats_dir, $output_dir, $html_output_dir, Foostats::Merger::merge($stats_dir))
+ if $report
+ or $all;
+}
- # Only run main flow when executed as a script, not when required (e.g., tests)
- foostats_main() unless caller;
+# Only run main flow when executed as a script, not when required (e.g., tests)
+foostats_main() unless caller;
diff --git a/t/tmp_filter_log b/t/tmp_filter_log
index 3b07060..1b549e8 100644
--- a/t/tmp_filter_log
+++ b/t/tmp_filter_log
@@ -23,3 +23,6 @@ OK: /gemfeed/index.gmi appears fine...
OK: /index.html appears fine...
OK: /index.html appears fine...
WARN: same blocked due to excessive requesting...
+OK: /index.html appears fine...
+OK: /some/really/long/path/with/many/segments/and/query/parts/that/could/be/truncated/when/rendered/for/display/example.html appears fine...
+WARN: kztSAIRs7AaVHRqGWS7so4NFHc3bsTmarHBZvMDO8IreNJYorMG0T4VtDO7g32NwLamuggahn8zDDaQ8zptcug blocked due to excessive requesting...