Wikipedia:Statistik/totalViews.pl
Us der alemannische Wikipedia, der freie Dialäkt-Enzyklopedy
- !/usr/bin/perl
- This script was released by w:als:User:Melancholie under the GNU General Public License and Creative Commons by-sa (attribution + share alike); if you should improve this script, please tell me!
- Usage: See bottom (or just try ;-)
use LWP::UserAgent; $project = $ARGV[0]; $date = $ARGV[1]; $hour = $ARGV[2]; if ($hour eq "") {
$hour = 0;
} else {
$hour =~ s/^\+(.+)$/$1/; $hour =~ s/^(-?)0([0-9])$/$1$2/; $hour =~ s/^(-?)0?([0-9]):?00$/$1$2/;
} $hour = $hour-1; if ($hour > 0) {$hour = 24 - $hour;} else {$hour = $hour * -1;} if ($hour < 10) {$hour = "0$hour";}
mkdir "projectcounts" unless -d "projectcounts"; mkdir "pagecounts" unless -d "pagecounts"; mkdir "$date" unless -d "$date";
for (my $i = 1; $i <= 2; $i++) {
if ($ARGV[0] && $ARGV[1] && $hour =~ /^[0-9]{2,2}$/) {
if ($i eq 1) {
$url = "http://dammit.lt/wikistats/";
$file = "wikiStats.htm";
print "Downloading $file\n";
$userAgent = LWP::UserAgent->new();
$httpRequest = HTTP::Request->new("GET", $url);
$serverResponse = $userAgent->request($httpRequest, $file);
if ($serverResponse->is_error()) {
print "Error code: ", $serverResponse->code(), "\n";
print "Error message: ", $serverResponse->message(), "\n";
}
}
open HTM, "wikiStats.htm";
if ($i eq 1) {
open LIST, ">projectcounts/list.txt";
} else {
print "Preparing to download pagecounts files (+/- 600 MB; press Ctrl+C to abort)\n";
open LIST, ">pagecounts/list.txt";
open EXE, ">pagecounts/extract.sh";
print EXE "#!/bin/bash\n\n";
}
$j = 0;
while(<HTM>) {
if ($i eq 1 && $_ =~ /href="projectcounts-([0-9]+)-([0-9]{2,2})([0-9]+)"/ || $i eq 2 && $_ =~ /href="pagecounts-([0-9]+)-([0-9]{2,2})([0-9]+)\.gz"/) {
if ($1 eq $date && $2 eq $hour || $j > 0 && $j < 24) {
$j++;
if ($i eq 1) {
$file = "projectcounts-$1-$2$3";
} else {
$file = "pagecounts-$1-$2$3.gz";
}
print LIST "$file\n";
if ($i eq 2) {
print EXE "echo \" Extracting $file\" && gunzip -f $file && ";
}
$url = "http://dammit.lt/wikistats/$file";
print " Downloading $file\n";
$userAgent = LWP::UserAgent->new();
$httpRequest = HTTP::Request->new("GET", $url);
if ($i eq 1) {
$serverResponse = $userAgent->request($httpRequest, "projectcounts/$file");
} else {
$serverResponse = $userAgent->request($httpRequest, "pagecounts/$file");
}
if ($serverResponse->is_error()) {
print "Error code: ", $serverResponse->code(), "\n";
print "Error message: ", $serverResponse->message(), "\n";
}
}
}
}
print EXE "sleep 1";
close EXE;
close LIST;
close HTM;
if ($i eq 1) {
open LIST, "projectcounts/list.txt";
$totalViews = 0;
my $globalViews;
while(<LIST>) {
if ($_ =~ /^(projectcounts-[0-9]+-[0-9]+)$/) {
open IN, "projectcounts/$1";
while(<IN>) {
if ($_ =~ /^([a-z-]+) - ([0-9]+) ([0-9]+)$/) {
if ($project eq "Wikimedia") {
$globalViews{$1} = $globalViews{$1}+$2;
} elsif ($1 eq $project) {
$totalViews = $totalViews+$2;
}
}
}
close IN;
}
}
close LIST;
unlink ("projectcounts/list.txt");
# unlink ("projectcounts-files")?
# rmdir "projectcounts-folder"?
open ALS, "totalHits-$date.txt";
while(<ALS>) {
if ($_ =~ /^([0-9]+) /) {$alsHits = $1;}
}
close ALS;
if ($alsHits) {$globalViews{"als"} = $alsHits;}
if ($project eq "Wikimedia") {
open OUT, ">$date/index.html";
print OUT "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n";
print OUT "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\n";
print OUT "<head>\n";
print OUT "<meta name=\"robots\" content=\"noindex, noarchive, nosnippet\" />\n";
print OUT "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n";
print OUT "<title>Wikimedia page Hits</title>\n";
print OUT "</head>\n\n";
print OUT "<body style=\"background-color: #FFFFF0; margin: 10px;\">\n";
print OUT "
< <a href=\"../\">Home</a>
\n"; print OUT "
Total page hits for Wikimedia wikis on $date (UTC); counted by the <a href=\"http://lists.wikimedia.org/pipermail/wikitech-l/2007-December/035435.html\">squid servers</a> (also bots, crawlers, reloads etc. have been counted)
Wiktionary & Co. are not yet analysed in <a href=\"http://lists.wikimedia.org/pipermail/wikitech-l/2007-December/035435.html\">midom's statistics</a>, unfortunately!
Overall page impressions: Not until Wiktionary & Co. get analysed!
Wiktionary & Co. are not yet analysed in <a href=\"http://lists.wikimedia.org/pipermail/wikitech-l/2007-December/035435.html\">midom's statistics</a>, unfortunately!
Overall page impressions: Not until Wiktionary & Co. get analysed!
\n";
print OUT "
\n";
print OUT "
\n";
print OUT "Sorted by language code:\n";print OUT "
- \n"; my @abc = sort keys %globalViews; my @hitList; foreach (@abc) { $key = $_; $wikiHits = $globalViews{$key}; $wikiHits =~ s/([0-9])([0-9]{3,3})$/$1,$2/; $wikiHits =~ s/([0-9])([0-9]{3,3}),/$1,$2,/; if ($globalViews{$key} > 99999) { print OUT "
- <a href=\"../do-it-yourself.htm\">$key</a> - $wikiHits page hits
- \n";
} elsif ($globalViews{$key} > 1) {print OUT " - <a href=\"$key/\">$key</a> - $wikiHits page hits
- \n";
} push(@hitList, $globalViews{$key}); }print OUT "
\n"; print OUT "
\n";
print OUT "Sorted by total page hits:\n";print OUT "
- \n"; my @hit = sort(numSort @hitList); my $dbl = 0; foreach (@hit) { if ($_ ne $dbl) { $wikiHits = $_; $dbl = $_; foreach (keys %globalViews) { $key = $_; if ($globalViews{$_} eq $dbl) { $wikiHits =~ s/([0-9])([0-9]{3,3})$/$1,$2/; $wikiHits =~ s/([0-9])([0-9]{3,3}),/$1,$2,/; if ($globalViews{$key} > 99999) { print OUT "
- <a href=\"../do-it-yourself.htm\">$key</a> - $wikiHits page hits\n";
} elsif ($globalViews{$key} > 1) {print OUT " - <a href=\"$key/\">$key</a> - $wikiHits page hits\n";
} } } } }print OUT "
\n"; print OUT "
\n
\n"; print OUT "
Get the free-and-open-source <a href=\"../wikiHits.pl\">Perl script</a>, used for creating this overview!
\n";
print OUT "</body>\n</html>";
} else {
open OUT, ">totalHits-$date.txt";
print OUT "$totalViews page hits/views including those of bots, reloads etc. ($date, UTC$ARGV[2])";
print "$totalViews page hits/views including those of bots, reloads etc. ($date, UTC$ARGV[2])\n";
}
close OUT;
} else {
print "You have to extract the downloaded *.gz files (> 2.5 GB; use 7-Zip)!\n If you are running Linux you can execute the file extract.sh!\n You then may run pageHits.pl, too!\n\n";
}
} else {
if ($i eq 1) {
print "\nYou have to specify your project code and the exact day and hour (UTC) to begin with!\n";
print " If you want to evaluate the day 2008-02-17, for example:\n";
print " If your wiki's timezone is UTC+2 or more, you have to type \"perl wikiHits.pl de 20080216 +2\"\n";
print " If your wiki's timezone is UTC-5 [< +2], you have to type \"perl wikiHits.pl en 20080217 -5\"\n";
print " If your project is Wikimedia Commons you may just type \"perl wikiHits.pl commons 20080217\"\n\n";
}
}
} unlink ("wikiStats.htm");
sub numSort {
if ($a > $b) {return -1;}
elsif ($a == $b) {return 0;}
else {return 1;}
}

