#! /usr/bin/perl # webwml-stattrans - Debian web site translation statistics # Copyright (c) 2001 Martin Schulze and others # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. use POSIX qw(strftime); use Getopt::Std; # These modules reside under webwml/Perl use lib ($0 =~ m|(.*)/|, $1 or ".") ."/Perl"; use Local::Cvsinfo; use Webwml::Langs; use Webwml::TransCheck; use Webwml::TransIgnore; $| = 1; $opt_h = "/org/www.debian.org/www/devel/website/stats"; $opt_w = "/org/www.debian.org/webwml"; $opt_p = "*.(wml|src)"; $opt_t = "Debian web site translation statistics"; $opt_v = 0; $opt_d = "u"; $opt_l = undef; $opt_b = ""; # Base URL, if not debian.org getopts('h:w:b:p:t:vd:l:') || die; # Replace filename globbing by Perl regexps $opt_p =~ s/\./\\./g; $opt_p =~ s/\?/./g; $opt_p =~ s/\*/.*/g; $opt_p =~ s/$/\$/g; %config = ( 'htmldir' => $opt_h, 'wmldir' => $opt_w, 'wmlpat' => $opt_p, 'title' => $opt_t, 'verbose' => $opt_v, 'difftype'=> $opt_d, ); my $l = Webwml::Langs->new($opt_w); my %langs = $l->name_iso(); my $transignore = Webwml::TransIgnore->new($opt_w); my $cvs = Local::Cvsinfo->new(); $cvs->options( recursive => 1, matchfile => [ $config{'wmlpat'} ], skipdir => [ "template" ], ); $cvs->readinfo("$config{'wmldir'}/english"); foreach (@{$transignore->global()}) { $cvs->removefile("$config{'wmldir'}/english/$_"); } my $altcvs = Local::Cvsinfo->new(); $altcvs->options( recursive => 1, matchfile => [ $config{'wmlpat'} ], skipdir => [ "template" ], ); $max_versions = 5; $min_versions = 1; # $border_head = "
" # ."
"; # $border_foot = "
"; $date = strftime "%a %b %e %H:%M:%S %Y %z", localtime; my %original; my %transversion; my %version; my %files; my %sizes; sub format3($) { $_ = sprintf("%d", shift); # $_ = "  $_" if length($_) < 2; # $_ = " $_" if length($_) < 3; # $_; } sub format5($) { $_ = sprintf("%d", shift); # $_ = sprintf("%.1f", shift); # $_ = "  $_" if length($_) < 4; # $_ = " $_" if length($_) < 5; # $_; } # Count wml files in given directory # sub getwmlfiles { my $lang = shift; my $dir = "$config{'wmldir'}/$lang"; my $cutfrom = length ($config{'wmldir'})+length($lang)+2; my $count = 0; my $size = 0; my $is_english = ($lang eq "english")?1:0; my ( $file, $v ); my @listfiles; print "$lang " if ($config{verbose}); die "$0: can't find $dir!\n" if (! -d "$dir"); if ($is_english) { @listfiles = @{$cvs->files()}; } else { $altcvs->reset(); $altcvs->readinfo($dir); @listfiles = @{$altcvs->files()}; } foreach my $f (@listfiles) { $file = substr ($f, $cutfrom); next if $transignore->is_global($file); $files{$file} = 1; $wmlfiles{$lang} .= " " . $file; my $transcheck = Webwml::TransCheck->new("$dir/$file"); if ($transcheck->revision()) { $transversion{"$lang/$file"} = $transcheck->revision(); $original{"$lang/$file"} ||= $transcheck->original(); } if ($is_english) { $version{"$lang/$file"} = $cvs->revision($f); } else { $version{"$lang/$file"} = $altcvs->revision($f); if (!$transcheck->revision()) { $transcheckenglish = Webwml::TransCheck->new("english/$file"); if (!$transcheckenglish->revision() and (-e "english/$file")) { $transversion{"$lang/$file"} = "1.1"; $original{"$lang/$file"} = "english"; } else { $original{"english/$file"} = $lang; $transversion{"english/$file"} ||= "1.1"; } } } if ($transcheck->maintainer()) { $maintainer{"$lang/$file"} = $transcheck->maintainer(); } $count++; $sizes{$file} = (stat "".($original{"english/$file"}||"english")."/".$file)[7]; $size += $sizes{$file}; } $wmlfiles{$lang} .= " "; $wml{$lang} = $count; $wml_s{$lang} = $size; } sub get_color { my $percent = shift; if ($percent < 50) { return sprintf ("#FF%02x00", (255/50) * $percent); } else { return sprintf ("#%02xFF00", (255/50) * (100 - $percent)); } } sub check_translation { my ($translation, $version, $file) = @_; my ( @version_numbers, $major_number, $last_number ); my ( @translation_numbers, $major_translated_number, $last_translated_number ); if ( $version && $translation ) { @version_numbers = split /\./,$version; $major_number = $version_numbers[0]; $last_number = pop @version_numbers; die "Invalid CVS revision for $file: $version\n" unless ($major_number =~ /\d+/ && $last_number =~ /\d+/); @translation_numbers = split /\./,$translation; $major_translated_number = $translation_numbers[0]; $last_translated_number = pop @translation_numbers; die "Invalid translation revision for $file: $translation\n" unless ($major_translated_number =~ /\d+/ && $last_translated_number =~ /\d+/); # Here we compare the original version with the translated one and print # a note for the user if their first or last numbers are too far apart # From translation-check.wml if ( $major_number != $major_translated_number ) { return "This translation is too out of date"; } elsif ( $last_number - $last_translated_number < 0 ) { return "Wrong translation version"; } elsif ( $last_number - $last_translated_number >= $max_versions ) { return "This translation is too out of date"; } elsif ( $last_number - $last_translated_number >= $min_versions ) { return "The original is newer than this translation"; } } elsif ( !$version && $translation) { return "The original no longer exists"; } return ""; } print "Collecting data in: " if ($config{'verbose'}); if ($opt_l) { getwmlfiles ($opt_l); getwmlfiles ('english'); } else { foreach $lang (keys %langs) { getwmlfiles ($lang); } } print "\n" if ($config{'verbose'}); my @search_in; if ($opt_l) { @search_in = ( 'english', $opt_l ); } else { @search_in = sort keys %langs; } # Compute stats about gettext files print "Computing statistics in gettext files... " if ($config{'verbose'}); my ( %po_translated, %po_fuzzy, %po_untranslated, %po_total ); my ( %percent_po_t, %percent_po_u, %percent_po_f ); foreach $lang (@search_in) { next if $lang eq 'english'; $po_translated{"total"}{$lang} = $po_fuzzy{"total"}{$lang} = $po_untranslated{"total"}{$lang} = 0; my @status = qx,LC_ALL=C make -C $opt_w/$lang/po stats 2>&1 1>/dev/null,; foreach $line (@status) { chomp $line; ($domain = $line) =~ s/\..*//; $po_translated{$domain}{$lang} = ($line =~ /(\d+) translated/ ? $1 : "0"); $po_fuzzy{$domain}{$lang} = ($line =~ /(\d+) fuzzy/ ? $1 : "0"); $po_untranslated{$domain}{$lang} = ($line =~ /(\d+) untranslated/ ? $1 : "0"); $po_total{$domain} = $po_translated{$domain}{$lang} + $po_fuzzy{$domain}{$lang} + $po_untranslated{$domain}{$lang}; $po_translated{"total"}{$lang} += $po_translated{$domain}{$lang}; $po_fuzzy{"total"}{$lang} += $po_fuzzy{$domain}{$lang}; $po_untranslated{"total"}{$lang} += $po_untranslated{$domain}{$lang}; if ($po_total{$domain} > 0) { $percent_po_t{$domain}{$lang} = int ($po_translated{$domain}{$lang}/$po_total{$domain} * 100 + .5); $percent_po_f{$domain}{$lang} = int ($po_fuzzy{$domain}{$lang}/$po_total{$domain} * 100 + .5); $percent_po_u{$domain}{$lang} = int ($po_untranslated{$domain}{$lang}/$po_total{$domain} * 100 + .5); } else { $percent_po_t{$domain}{$lang} = 0; $percent_po_f{$domain}{$lang} = 0; $percent_po_u{$domain}{$lang} = 0; } } $po_total{"total"} = $po_translated{"total"}{$lang} + $po_fuzzy{"total"}{$lang} + $po_untranslated{"total"}{$lang}; if ($po_total{'total'} > 0) { $percent_po_t{'total'}{$lang} = int ($po_translated{'total'}{$lang}/$po_total{'total'} * 100 + .5); $percent_po_f{'total'}{$lang} = int ($po_fuzzy{'total'}{$lang}/$po_total{'total'} * 100 + .5); $percent_po_u{'total'}{$lang} = int ($po_untranslated{'total'}{$lang}/$po_total{'total'} * 100 + .5); } else { $percent_po_t{'total'}{$lang} = 0; $percent_po_f{'total'}{$lang} = 0; $percent_po_u{'total'}{$lang} = 0; } } print "done.\n" if ($config{'verbose'}); # =============== Create HTML files =============== mkdir ($config{'htmldir'}, 02775) if (! -d $config{'htmldir'}); my @filenames = sort keys %files; my $nfiles = scalar @filenames; $nsize += $sizes{$_} foreach (@filenames); print "Creating files: " if ($config{'verbose'}); foreach $lang (@search_in) { my @processed_langs = ($langs{$lang}); @processed_langs = ("zh-cn", "zh-tw") if $langs{$lang} eq "zh"; foreach $l (@processed_langs) { print "$l.html " if ($config{'verbose'}); $t_body = $u_body = $o_body = ""; $translated{$lang} = $outdated{$lang} = $untranslated{$lang} = 0; # get stats about files foreach $file (@filenames) { next if ($file eq ""); # Translated pages if (index ($wmlfiles{$lang}, " $file ") >= 0) { $translated{$lang}++; $translated_s{$lang} += $sizes{$file}; $orig = $original{"$lang/$file"} || "english"; # Outdated translations $msg = check_translation ($transversion{"$lang/$file"}, $version{"$orig/$file"}, "$lang/$file"); if (length ($msg)) { $o_body .= ""; if (($file !~ /\.wml$/) || ($file eq "devel/wnpp/wnpp.wml")) { $o_body .= sprintf "%s", $file; } else { (my $base = $file) =~ s/\.wml$//; $o_body .= sprintf "%s", $base, $l, $base; } $o_body .= sprintf "%s", $transversion{"$lang/$file"}; $o_body .= sprintf "%s", $version{"$orig/$file"}; $o_body .= sprintf "%s", $msg; if ($msg eq "Wrong translation version" || $msg eq "The original no longer exists") { $o_body .= ""; } else { $o_body .= sprintf "  %s -> %s", $file, $transversion{"$lang/$file"}, $version{"$orig/$file"}, $config{'difftype'}, $transversion{"$lang/$file"}, $version{"$orig/$file"}; $o_body .= sprintf "[L]", $file, $version{"$orig/$file"}; } $o_body .= sprintf "%s", $maintainer{"$lang/$file"} || ""; $o_body .= "\n"; $outdated{$lang}++; $outdated_s{$lang} += $sizes{$file}; # Up-to-date translations } else { if (($file !~ /\.wml$/) || ($file eq "devel/wnpp/wnpp.wml")) { $t_body .= sprintf "
  • %s
  • \n", $file; } else { (my $base = $file) =~ s/\.wml$//; $t_body .= sprintf "
  • %s
  • \n", $base, $l, $base; } } } # Untranslated pages else { if (($file !~ /\.wml$/) || ($file eq "devel/wnpp/wnpp.wml")) { $u_body .= sprintf "%s \n", $file; } else { (my $base = $file) =~ s/\.wml$//; $u_body .= sprintf "%s%d(%.2f ‰)\n", $base, $base, $sizes{$file}, int($sizes{$file}/$nsize * 100000 + .5) / 100; } $untranslated{$lang}++; $untranslated_s{$lang} += $sizes{$file}; } } # this is where we discard the files that the translation directory contains # but which don't exist in the English directory # print "extra files: ".$wml{$lang}-$translated{$lang}."\n"; $wml{$lang} = $translated{$lang}; $wml_s{$lang} = $translated_s{$lang}; $translated{$lang} = $translated{$lang} - $outdated{$lang}; $translated_s{$lang} = $translated_s{$lang} - $outdated_s{$lang}; if ($nfiles > 0) { $percent_a{$lang} = int ($wml{$lang}/$nfiles * 100 + .5); $percent_as{$lang} = (int ($wml_s{$lang}/$nsize * 1000 + .5))/10; } else { $percent_a{$lang} = 0; $percent_as{$lang} = 0; } if ($wml{$lang} > 0) { $percent_t{$lang} = int ($translated{$lang}/$wml{$lang} * 100 + .5); $percent_ts{$lang} = (int ($translated_s{$lang}/($wml_s{$lang}+1) * 1000 + .5))/10; } else { $percent_t{$lang} = 0; $percent_ts{$lang} = 0; } $percent_o{$lang} = 100 - $percent_t{$lang}; $percent_os{$lang} = 100 - $percent_ts{$lang}; $percent_u{$lang} = 100 - $percent_a{$lang}; $percent_us{$lang} = 100 - $percent_as{$lang}; if (open (HTML, ">$config{'htmldir'}/$l.html")) { printf HTML "\n"; # printf HTML "%s: %s\n", $config{'title'}, ucfirst $lang; printf HTML "\n\n"; printf HTML " \n"; printf HTML " %s: %s\n", $config{'title'}, ucfirst $lang; print HTML " "; print HTML "\n\n"; $color = get_color ($percent_a{$lang}); printf HTML "

    ", $color; printf HTML "%s: %s

    \n", $config{'title'}, ucfirst $lang; printf HTML "\n", $color; print HTML "\n"; # printf HTML "", $config{'title'}, ucfirst $lang; print HTML "\n"; printf HTML "", $wml{$lang}, $percent_a{$lang}; printf HTML "", $translated{$lang}, $percent_t{$lang}; printf HTML "", $outdated{$lang}, $percent_o{$lang}; printf HTML "", $untranslated{$lang}, $percent_u{$lang}; print HTML "\n"; print HTML "\n"; printf HTML "", $wml_s{$lang}, $percent_as{$lang}; printf HTML "", $translated_s{$lang}, $percent_ts{$lang}; printf HTML "", $outdated_s{$lang}, $percent_os{$lang}; printf HTML "", $nsize-$wml_s{$lang}, $percent_us{$lang}; print HTML "\n"; print HTML "

    %s: %s

    TranslatedUp-to-dateOutdatedNot translated
    %d files (%d%%)%d files (%d%%)%d files (%d%%)%d files (%d%%)
    %d bytes (%.1f%%)%d bytes (%.1f%%)%d bytes (%.1f%%)%d bytes (%.1f%%)
    \n"; # Make the table of content print HTML "

    Table of Contents

    \n"; print HTML "\n"; # outputs the content if ($o_body) { print HTML "

    Outdated translations: (top)

    \n"; print HTML "\n"; print HTML ""; if ($opt_d eq "u") { print HTML ""; } elsif ($opt_d eq "h") { print HTML ""; } else { print HTML ""; } print HTML ""; print HTML ""; print HTML "\n"; print HTML $o_body; print HTML "
    FileTranslatedOriginCommentUnified diffColored diffDiffLogMaintainer
    \n"; } if ($u_body) { print HTML "

    Pages not translated: (top)

    \n"; print HTML "\n"; print HTML $u_body; print HTML "
    \n"; } if ($t_body) { print HTML "

    Translations up to date: (top)

    \n"; print HTML "\n"; } # outputs the gettext stats if ($lang ne 'english') { print HTML "

    Translations of templates (gettext files): (top)

    \n"; # print HTML $border_head; print HTML "\n"; print HTML "\n"; foreach my $domain (sort keys %po_total) { next if $domain eq 'total'; print HTML ""; $color_t = get_color ($percent_po_t{$domain}{$lang}); $color_f = get_color (100 - $percent_po_f{$domain}{$lang}); $color_u = get_color (100 - $percent_po_u{$domain}{$lang}); print HTML ""; printf HTML "", $color_t, $po_translated{$domain}{$lang}, $percent_po_t{$domain}{$lang}; printf HTML "", $color_f, $po_fuzzy{$domain}{$lang}, $percent_po_f{$domain}{$lang}; printf HTML "", $color_u, $po_untranslated{$domain}{$lang}, $percent_po_u{$domain}{$lang}; printf HTML "", $po_total{$domain}; print HTML "\n"; } print HTML ""; $color_t = get_color ($percent_po_t{'total'}{$lang}); $color_f = get_color (100 - $percent_po_f{'total'}{$lang}); $color_u = get_color (100 - $percent_po_u{'total'}{$lang}); printf HTML "", $color_t, $po_translated{'total'}{$lang}, $percent_po_t{'total'}{$lang}; printf HTML "", $color_f, $po_fuzzy{'total'}{$lang}, $percent_po_f{'total'}{$lang}; printf HTML "", $color_u, $po_untranslated{'total'}{$lang}, $percent_po_u{'total'}{$lang}; printf HTML "", $po_total{'total'}; print HTML "\n"; print HTML "
    FileUp to dateFuzzyUntranslatedTotal
    $domain.$langs{$lang}.po%d (%s%%)%d (%s%%)%d (%s%%)%d
         
    Total:%d (%d%%)%d (%d%%)%d (%d%%)%d
    \n"; } # outputs footer print HTML "
    Compiled at $date
    \n"; print HTML ""; close (HTML); } } } print "\n" if ($config{'verbose'}); # =============== Creating index.html =============== print "Creating index.html... " if ($config{'verbose'}); open (HTMLI, ">$config{'htmldir'}/index.html") || die "Can't open $config{'htmldir'}/index.html"; # printf HTMLI "\n%s\n\n", $config{'title'}; # printf HTMLI "

    %s

    \n", $config{'title'}; printf HTMLI "\n\n\n"; printf HTMLI " \n"; printf HTMLI " %s\n", $config{'title'}; print HTMLI " "; printf HTMLI "\n\n"; printf HTMLI "

    %s

    \n", $config{'title'}; print HTMLI "

    Translated web pages

    \n"; printf HTMLI "

    There are %d pages to translate.

    \n",($wml{'english'}+$untranslated{'english'}); # print HTMLI $border_head; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI ""; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI ""; print HTMLI ""; print HTMLI "\n"; print HTMLI ""; print HTMLI ""; foreach $lang (@search_in) { my @processed_langs = ($langs{$lang}); @processed_langs = ("zh-cn", "zh-tw") if $langs{$lang} eq "zh"; foreach $l (@processed_langs) { $color_a = get_color ($percent_a{$lang}); $color_t = get_color ($percent_t{$lang}); $color_o = get_color (100 - $percent_o{$lang}); $color_u = get_color (100 - $percent_u{$lang}); print HTMLI ""; printf HTMLI "", $l, ucfirst $lang, $l; printf HTMLI "", $color_a, $wml{$lang}, format3($percent_a{$lang}); printf HTMLI "", $color_t, $translated{$lang}, format3($percent_t{$lang}); printf HTMLI "", $color_o, $outdated{$lang}, format3($percent_o{$lang}); printf HTMLI "", $color_u, $untranslated{$lang}, format3($percent_u{$lang}); print HTMLI "\n"; } } print HTMLI ""; print HTMLI "
    LanguageTranslationsUp to dateOutdatedNot translated
    %s (%s)%d(%s%%)%d(%s%%)%d(%s%%)%d(%s%%)
    \n"; # print HTMLI $border_foot; print HTMLI "

    Translated web pages (by size)

    \n"; printf HTMLI "

    There are %d bytes to translate.

    \n",($wml_s{'english'}+$untranslated_s{'english'}); # print HTMLI $border_head; print HTMLI "\n"; # print HTMLI "
    \n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI ""; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI ""; print HTMLI ""; print HTMLI "\n"; print HTMLI ""; print HTMLI ""; foreach $lang (@search_in) { my @processed_langs = ($langs{$lang}); @processed_langs = ("zh-cn", "zh-tw") if $langs{$lang} eq "zh"; foreach $l (@processed_langs) { $color_a = get_color ($percent_a{$lang}); $color_t = get_color ($percent_t{$lang}); $color_o = get_color (100 - $percent_o{$lang}); $color_u = get_color (100 - $percent_u{$lang}); print HTMLI ""; printf HTMLI "", $l, ucfirst $lang, $l; printf HTMLI "", $color_a, $wml_s{$lang}, format5($percent_as{$lang}); printf HTMLI "", $color_t, $translated_s{$lang}, format5($percent_ts{$lang}); printf HTMLI "", $color_o, $outdated_s{$lang}, format5($percent_os{$lang}); printf HTMLI "", $color_u, $wml_s{"english"}+$untranslated_s{'english'}-$wml_s{$lang}, format5($percent_us{$lang}); print HTMLI "\n"; } } print HTMLI ""; print HTMLI "
    LanguageTranslationsUp to dateOutdatedNot translated
    %s (%s)%d(%s%%)%d(%s%%)%d(%s%%)%d(%s%%)
    \n"; # print HTMLI $border_foot; print HTMLI "

    Translated templates (gettext files)

    \n"; printf HTMLI "

    There are %d strings to translate.

    \n",$po_total{'total'}; # print HTMLI $border_head; print HTMLI "\n"; # print HTMLI "
    \n"; print HTMLI "\n"; print HTMLI ""; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI "\n"; print HTMLI ""; print HTMLI ""; print HTMLI "\n"; print HTMLI ""; print HTMLI ""; foreach $lang (@search_in) { next if $lang eq 'english'; my @processed_langs = ($langs{$lang}); @processed_langs = ("zh-cn", "zh-tw") if $langs{$lang} eq "zh"; foreach $l (@processed_langs) { print HTMLI ""; printf HTMLI "", $l, ucfirst $lang, $l; $color_t = get_color ($percent_po_t{'total'}{$lang}); $color_f = get_color (100 - $percent_po_f{'total'}{$lang}); $color_u = get_color (100 - $percent_po_u{'total'}{$lang}); printf HTMLI "", $color_t, $po_translated{'total'}{$lang}, format3($percent_po_t{'total'}{$lang}); printf HTMLI "", $color_f, $po_fuzzy{'total'}{$lang}, format3($percent_po_f{'total'}{$lang}); printf HTMLI "", $color_u, $po_untranslated{'total'}{$lang}, format3($percent_po_u{'total'}{$lang}); print HTMLI "\n"; } } print HTMLI ""; print HTMLI "
    LanguageUp to dateFuzzyNot translated
    %s (%s)%d(%s%%)%d(%s%%)%d(%s%%)
    \n"; # print HTMLI $border_foot; print HTMLI "


    \n"; print HTMLI "

    Created with webwml-stattrans at $date\n"; print HTMLI "\n"; close (HTMLI); print "done.\n" if ($config{'verbose'}); # Note: # Translated pages on ll.html may be higher than in index.html. # This is due to the fact that some english pages were removed. # printf "%s\n", join ("\n", keys %version); # printf "%s - %s\n", $version{'german/devel/index'}, $version{'english/devel/index'};