diff options
author | nobuyuki morita <victory-guest> | 2012-12-02 02:44:24 +0000 |
---|---|---|
committer | nobuyuki morita <victory-guest> | 2012-12-02 02:44:24 +0000 |
commit | 95615ebc66c00bdb73d20a927f6eb5bd777dc2a6 (patch) | |
tree | fdc84a914e954f3cfe84dbd74222348ca3a7f60e /copypage.pl | |
parent | f95b116d50888999f13070a0386448c2e9d40e6a (diff) |
fix #690331 copypage and stattrans cleaning
CVS version numbers
copypage.pl: 1.41 -> 1.42
Diffstat (limited to 'copypage.pl')
-rwxr-xr-x | copypage.pl | 105 |
1 files changed, 1 insertions, 104 deletions
diff --git a/copypage.pl b/copypage.pl index cdaf5175b23..982fe168ea7 100755 --- a/copypage.pl +++ b/copypage.pl @@ -25,14 +25,6 @@ use Local::VCS qw(vcs_file_info); use File::Temp qw/tempfile/; use Getopt::Std; - -# Declare variables only used in references to avoid warnings -use vars qw(@iso_8859_2_compat @iso_8859_3_compat @iso_8859_4_compat - @iso_8859_5_compat @iso_8859_6_compat @iso_8859_7_compat - @iso_8859_8_compat @iso_8859_9_compat @iso_8859_10_compat - @iso_8859_13_compat @iso_8859_14_compat @iso_8859_15_compat - @iso_8859_16_compat); - # Get configuration # Read first two valid lines from language.conf if (open CONF, "<language.conf") @@ -106,70 +98,13 @@ die "Language not defined in DWWW_LANG or language.conf\n" #warn "Maintainer name not defined in DWWW_MAINT or language.conf\n" # if not defined $maintainer; - -# Table of entities used when copying to non-latin1 encodings -@entities = ( - ' ', '¡', '¢', '£', '¤', '¥', - '¦', '§', '¨', '©', 'ª', '«', '¬', - '­', '®', '¯', '°', '±', '²', '³', - '´', 'µ', '¶', '·', '¸', '¹', - 'º', '»', '¼', '½', '¾', '¿', - 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', - 'Æ', 'Ç', 'È', 'É', 'Ê', 'Ë', - 'Ì', 'Í', 'Î', 'Ï', 'Ð', 'Ñ', - 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×', - 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', - 'Þ', 'ß', 'à', 'á', 'â', 'ã', - 'ä', 'å', 'æ', 'ç', 'è', 'é', - 'ê', 'ë', 'ì', 'í', 'î', 'ï', - 'ð', 'ñ', 'ò', 'ó', 'ô', 'õ', - 'ö', '÷', 'ø', 'ù', 'ú', 'û', - 'ü', 'ý', 'þ', 'ÿ' -); - -# Compatibility tables for the iso-8859 series; 1 indicates that the -# codepoint is the same as in iso-8859-1. Used to perform partial remaps -# for these. -@iso_8859_2_compat = (1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0); -@iso_8859_3_compat = (1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0); -@iso_8859_4_compat = (1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0); -@iso_8859_5_compat = (1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0); -@iso_8859_6_compat = (1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0); -@iso_8859_7_compat = (1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0); -@iso_8859_8_compat = (1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0); -@iso_8859_9_compat = (1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1); -@iso_8859_10_compat =(1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0); -@iso_8859_13_compat =(1,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,0,0); -@iso_8859_14_compat =(1,0,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1); -@iso_8859_15_compat =(1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1); -@iso_8859_16_compat =(1,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,1); - -# Check destination character encoding -my $recode = 0; -if (open WMLRC, "$language/.wmlrc") -{ - while (<WMLRC>) - { - if (s/^-D CHARSET=//) - { - $recode = 1 unless /^utf-8$/i; - if ($recode && /^iso-8859-([0-9]+)$/) - { - my $compattablename = 'iso_8859_' . $1 . '_compat'; - $compat = \@{$compattablename} if defined @{$compattablename}; - } - last; - } - } -} - # Loop over command line foreach $page (@ARGV) { # Check if valid source if ($page =~ /wml$/ || $page =~ /src$/) { - ©($page, $recode, $compat); + ©($page); } else { @@ -181,8 +116,6 @@ foreach $page (@ARGV) sub copy { my $page = shift; - my $recodelatin1 = shift; - my $compattable = shift; print "Processing $page...\n"; # Remove english/ from path @@ -294,30 +227,6 @@ sub copy } else { - # Transform the string into a string that is fit for the encoding - # of the output language. We do that by first converting any - # SGML entities in the input stream into 8-bit ISO 8859-1 - # encoding, and then convert extended characters (back) into - # entities if necessary for the target encoding. - - # Decode - s/(&[^#;]+;)/&decodeentity($1)/ge; - s/&#(1[6-9][0-9]|2[0-4][0-9]|25[0-5]);/chr($1)/ge; - - # Encode - if (defined $compattable) - { - # Output encoding is in part compatible with ISO 8859-1, only - # convert incompatible characters into entities. - s/([\xA0-\xFF])/$$compattable[ord($1)-160]?$1:$entities[ord($1)-160]/ge; - } - elsif ($recodelatin1) - { - # Output encoding is incompatible with ISO 8859-1, convert all - # 8-bit characters into entities. - s/([\xA0-\xFF])/$entities[ord($1)-160]/ge; - } - print DST $_; } } @@ -339,18 +248,6 @@ sub copy if defined $dsttitle; } -# Return the ISO-8859-1 character that corresponds to the given entity -sub decodeentity -{ - my $ent = shift; - # Start at one to avoid decoding - for (my $i = 1; $i < $#entities; ++ $i) - { - return chr($i + 160) if $entities[$i] eq $ent; - } - return $ent; -} - # Find for old translations in the CVS Attic sub find_files_attic { |