summaryrefslogtreecommitdiff
path: root/scripts/stats
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2011-03-06 10:57:41 +0000
committerPaul Buetow <paul@buetow.org>2011-03-06 10:57:41 +0000
commit213033db33be271791f2d9ff1c9c44c0bed79f18 (patch)
tree1b3ff0d0eddfc7f6995afa67b5d3fc2ef8fa85a7 /scripts/stats
initial xerl import to utils
Diffstat (limited to 'scripts/stats')
-rwxr-xr-xscripts/stats/calc.sh49
-rwxr-xr-xscripts/stats/clean.sh49
-rwxr-xr-xscripts/stats/replace.sh11
-rwxr-xr-xscripts/stats/stats.sh61
4 files changed, 170 insertions, 0 deletions
diff --git a/scripts/stats/calc.sh b/scripts/stats/calc.sh
new file mode 100755
index 0000000..dfb7453
--- /dev/null
+++ b/scripts/stats/calc.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+# By Paul C. Buetow (http://www.buetow.org)
+
+perl='
+ /.*? (.*?) (.*?) /o
+ && ++$ip{$2}{$1} && ++$p{$1}
+ && ++$h{$2} && ++$t
+ for <>;
+ $l = do { $_ = length $t; $_ < 4 ? 4 : $_ };
+ printf " # %$l"."s%4s %$l"."s%4s %24s\n",
+ "HITS", "%", "UNIQ", "%", "SITE ADDRESS";
+ printf "%2.d %$l.d%4.f %$l.d%4.f %24s\n",
+ ++$i, $h{$_}, 100*$h{$_}/$t,
+ ($n = keys %{$ip{$_}}), 100*$n/(keys %p),$_
+ and $i==15 && last
+ for sort { $h{$b} <=> $h{$a} } keys %h'
+
+ls=`ls *.log`
+cat << STATS | less
+Weekly top 15:
+
+`echo "$ls" | tail -n 7 | xargs cat | perl -e "$perl"`
+
+Monthly top ten:
+
+`echo "$ls" | tail -n 28 | xargs cat | perl -e "$perl"`
+
+Yearly top ten:
+
+`echo "$ls" | tail -n 356 | xargs cat | perl -e "$perl"`
+
+STATS
+ftp://ftp.buetow.org download top ten:
+
+exit 0
+`gawk '
+ $9 ~ /^\/data\/ftp\// { ++dl[\$9] }
+ END {
+ for (k in dl)
+ d[k] = sprintf("%3d %s", dl[k], k)
+ n = asort(d)
+ rank = 1
+ for (i = n; i > 0 && rank < 11; --i)
+ printf "%2.d%s\n", rank++, d[i]
+ }' /var/log/proftpdtransfer.log | sed s,/data/ftp/,,`
+
+This stats are powered by Perl, GNU AWK and Bourne Shell
+STATS
+
diff --git a/scripts/stats/clean.sh b/scripts/stats/clean.sh
new file mode 100755
index 0000000..ba0f0e8
--- /dev/null
+++ b/scripts/stats/clean.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+
+# 2006 - 2008 The Xerl Project
+
+for log in *.log
+do
+ re=''
+ for remove in \
+ Charlotte \
+ Exabot \
+ Mnogo \
+ Netcraft \
+ Perl \
+ Python \
+ SurveyBot \
+ VoilaBot \
+ Yandex \
+ Yeti \
+ ajSitemap \
+ archiver \
+ crawler \
+ feed \
+ findlinks \
+ fulltext \
+ googlebot \
+ grabber \
+ jeeves \
+ msnbot \
+ pear \
+ pingdom \
+ rss2 \
+ sagool \
+ sbider \
+ slurp \
+ spider \
+ tagsdir \
+ validator \
+ walhello \
+ ;do
+ if [ -z "$re" ]
+ then
+ re="($remove)"
+ else
+ re="$re|($remove)"
+ fi
+ done
+ grep -E -i -v "$re" $log > $log.new
+ mv -f $log.new $log
+done
diff --git a/scripts/stats/replace.sh b/scripts/stats/replace.sh
new file mode 100755
index 0000000..1624364
--- /dev/null
+++ b/scripts/stats/replace.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+from="vs.buetow.org"
+to="vs-sim.buetow.org"
+
+for log in *.log
+do
+ sed "s/$from/$to/" $log > $log.new
+ mv -f $log.new $log
+done
+
diff --git a/scripts/stats/stats.sh b/scripts/stats/stats.sh
new file mode 100755
index 0000000..0f1c070
--- /dev/null
+++ b/scripts/stats/stats.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+
+# 2007 (C) Paul C. Buetow (http://paul.buetow.org)
+
+if [ "$1" != "xerl" ]
+then
+ perl='
+ /.*? (.*?) (.*?) /o
+ && ++$ip{$2}{$1} && ++$p{$1}
+ && ++$h{$2} && ++$t
+ for <>;
+ $l = do { $_ = length $t; $_ < 4 ? 4 : $_ };
+ printf " # %$l"."s%4s %$l"."s%4s %24s\n",
+ "HITS", "%", "UNIQ", "%", "SITE ADDRESS";
+ printf "%2.d %$l.d%4.f %$l.d%4.f %24s\n",
+ ++$i, $h{$_}, 100*$h{$_}/$t,
+ ($n = keys %{$ip{$_}}), 100*$n/(keys %p),$_
+ and $i==20 && last
+ for sort { $h{$b} <=> $h{$a} } keys %h'
+else
+ perl='
+ /.*? (.*?) (.*?) /o
+ && ++$ip{$2}{$1} && ++$p{$1}
+ && ++$h{$2} && ++$t
+ for <>;
+ $l = do { $_ = length $t; $_ < 4 ? 4 : $_ };
+ printf "%02.d %0$l.d %02.f %0$l.d %02.f %24s\n",
+ ++$i, $h{$_}, 100*$h{$_}/$t,
+ ($n = keys %{$ip{$_}}), 100*$n/(keys %p), "!!URL(http://$_)!!"
+ and $i==20 && last
+ for sort { $h{$b} <=> $h{$a} } keys %h'
+fi
+
+#./clean.sh
+
+ls=`ls $path*.log`
+
+cat << STATS
+No IP addresses are being logged by Xerl!
+
+
+Yesterdays top list (pos, total hits, total %, unique hits, unique %):
+
+`echo "$ls" | tail -n 2 | head -n 1 | xargs cat | perl -e "$perl"`
+
+Last 7 days top list (pos, total hits, total %, unique hits, unique %):
+
+`echo "$ls" | tail -n 8 | head -n 7 | xargs cat | perl -e "$perl"`
+
+Last 30 days top list (pos, total hits, total %, unique hits, unique %):
+
+`echo "$ls" | tail -n 31 | head -n 30 | xargs cat | perl -e "$perl"`
+
+Last 365 days top list (pos, total hits, total %, unique hits, unique %):
+
+`echo "$ls" | tail -n 366 | head -n 365 | xargs cat | perl -e "$perl"`
+
+Overall top list (pos, total hits, total %, unique hits, unique %):
+
+`echo "$ls" | xargs cat | perl -e "$perl"`
+STATS