Added an option for fetching website hit data from UDD, and enabled it by

default. It falls back to alphabetical sorting if some required modules are not available or the fetch fails. Also added a note to the stats page mentioning the new sort order. CVS version numbers stattrans.pl: 1.94 -> 1.95
author: Marcin Owsiany <porridge> 2010-12-27 15:33:41 +0000
committer: Marcin Owsiany <porridge> 2010-12-27 15:33:41 +0000
commit: 57314027eb8a0d5e82f0871c12eac2b2a4eafec2 (patch)
tree: 26e4f905e34209461bcf3548924b0804834c7f8f /stattrans.pl
parent: ae5ba3d04a732e032aea31c3ffda7c456f5f0049 (diff)
1 files changed, 58 insertions, 18 deletions
diff --git a/stattrans.pl b/stattrans.pl
index 214299b3af0..64df83b129d 100755
--- a/stattrans.pl
+++ b/stattrans.pl
@@ -27,6 +27,19 @@ use Webwml::Langs;
 use Webwml::TransCheck;
 use Webwml::TransIgnore;
 
+BEGIN {
+    $udd_available = 0;
+    eval {
+        require JSON;
+        require LWP::Simple;
+        LWP::Simple->import;
+        $udd_available = 1;
+    }; if ($@) {
+        warn "One or more modules required for DDE support failed to load: $@\n";
+    }
+}
+
+
 $| = 1;
 
 $opt_h = "/org/www.debian.org/www/devel/website/stats";
@@ -37,7 +50,8 @@ $opt_v = 0;
 $opt_d = "u";
 $opt_l = undef;
 $opt_b = ""; # Base URL, if not debian.org
-$opt_f = undef; # File lines: "1299999 /doc/index\n"
+# URL of JSON data or path to plaintext file with lines: "1299999 /doc/index\n"
+$opt_f = "http://dde.debian.net/dde/q/static/porridge/stats?t=json";
 getopts('h:w:b:p:t:vd:l:f:') || die;
 #  Replace filename globbing by Perl regexps
 $opt_p =~ s/\./\\./g;
@@ -271,27 +285,49 @@ mkdir ($config{'htmldir'}, 02775) if (! -d $config{'htmldir'});
 
 # Read website hit statistics, if available
 my %hits;
-my $file_sorter;
+my $file_sorter = sub($$) { $_[0] cmp $_[1] };
+if ($config{'hit_file'} and $config{'hit_file'} =~ m{^(f|ht)tps?://} and ! $udd_available) {
+    warn "Disabling fetching of hit data.\n";
+    $config{'hit_file'} = undef;
+}
 if ($config{'hit_file'}) {
-    open(HITS, $config{'hit_file'}) or die sprintf("Opening hit file [%s] failed: $!", $config{'hit_file'});
-    printf "Reading hit file [%s]\n", $config{'hit_file'} if ($config{'verbose'});
-    foreach my $hit_line (<HITS>) {
-	chomp $hit_line;
-        $hit_line =~ /^\s*(\d+)\s+(.*)/ or warn sprintf("unrecognized hit file [%s] line [%s]", $config{'hit_file'}, $hit_line);
-	my ($count, $url) = ($1, $2);
-        last if $count < 3; # URLS with 2 or 1 hits are most likely mistakes; let's not waste RAM on them
-        $hits{substr($url, 1)} = $count;
+    if ($config{'hit_file'} =~ m{^(f|ht)tps?://}) {
+        printf("Retrieving hit data from [%s].\n", $config{'hit_file'}) if ($config{'verbose'});
+        my $json = LWP::Simple::get($config{'hit_file'});
+        if ($json) {
+            my $perl = JSON::from_json($json, {utf8 => 1});
+            foreach my $e (@{$perl->{'r'}}) {
+                my ($count, $url) = @$e;
+                last if $count < 3; # URLS with 2 or 1 hits are most likely mistakes; let's not waste RAM on them
+                $hits{substr($url, 1)} = $count;
+            }
+        } else {
+            warn "Retrieving hit data failed.\n";
+        }
+    } else {
+        open(HITS, $config{'hit_file'}) or die sprintf("Opening hit file [%s] failed: $!", $config{'hit_file'});
+        printf "Reading hit file [%s]\n", $config{'hit_file'} if ($config{'verbose'});
+        foreach my $hit_line (<HITS>) {
+    	chomp $hit_line;
+            $hit_line =~ /^\s*(\d+)\s+(.*)/ or warn sprintf("unrecognized hit file [%s] line [%s]", $config{'hit_file'}, $hit_line);
+    	my ($count, $url) = ($1, $2);
+            last if $count < 3; # URLS with 2 or 1 hits are most likely mistakes; let's not waste RAM on them
+            $hits{substr($url, 1)} = $count;
+        }
+        close(HITS) or die sprintf("Closing hit file [%s] failed: $!", $config{'hit_file'});
+    }
+    if (%hits) {
+        $file_sorter = sub($$) {
+            my ($a, $b) = @_;
+            $a =~ s/\.wml$//o;
+            $b =~ s/\.wml$//o;
+            $hits{$b} <=> $hits{$a}
+        };
+    } else {
+        print "Tables will be sorted alphabetically.\n" if ($config{'verbose'});
     }
-    close(HITS) or die sprintf("Closing hit file [%s] failed: $!", $config{'hit_file'});
-    $file_sorter = sub($$) {
-        my ($a, $b) = @_;
-        $a =~ s/\.wml$//o;
-        $b =~ s/\.wml$//o;
-        $hits{$b} <=> $hits{$a}
-    };
 } else {
     print "No hit file specified. Tables will be sorted alphabetically.\n" if ($config{'verbose'});
-    $file_sorter = sub($$) { $_[0] cmp $_[1] };
 }
 
 my @filenames = sort $file_sorter keys %files;
@@ -500,6 +536,10 @@ foreach $lang (@search_in) {
             }
             print HTML "</ul>\n";
 
+            if (%hits) {
+                print HTML "<p>Note: The lists of pages are sorted by popularity. Hover over the page name to see the number of hits.</p>\n";
+            }
+
             # outputs the content
             if ($o_body) {
                 print HTML "<h3 id='outdated'>Outdated translations: <a href='#top'>(top)</a></h3>\n";
author	Marcin Owsiany <porridge>	2010-12-27 15:33:41 +0000
committer	Marcin Owsiany <porridge>	2010-12-27 15:33:41 +0000
commit	57314027eb8a0d5e82f0871c12eac2b2a4eafec2 (patch)
tree	26e4f905e34209461bcf3548924b0804834c7f8f /stattrans.pl
parent	ae5ba3d04a732e032aea31c3ffda7c456f5f0049 (diff)