diff options
author | Peter Karlsson <peterk> | 2002-08-28 18:11:29 +0000 |
---|---|---|
committer | Peter Karlsson <peterk> | 2002-08-28 18:11:29 +0000 |
commit | 074101851eb07a8cbd27738f86c2c00178efa420 (patch) | |
tree | 4b9738fe83b8afb9f2f9beb718c4489caa1dcab3 | |
parent | c1cb01b4f4de9c36065cf3692c63674ca75d9dcb (diff) |
If destination language's encoding is something other than iso-8859-1,
encode all non-ASCII characters as entities.
CVS version numbers
copypage.pl: 1.16 -> 1.17
-rwxr-xr-x | copypage.pl | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/copypage.pl b/copypage.pl index 5d497dac988..1a66abe9751 100755 --- a/copypage.pl +++ b/copypage.pl @@ -49,13 +49,27 @@ if ($#ARGV == -1) exit; } +# Check destination character encoding +my $charset = 'iso-8859-1'; +if (open WMLRC, "$language/.wmlrc") +{ + while (<WMLRC>) + { + if (/^-D CHARSET=(.*)$/) + { + $charset = lc($1); + last; + } + } +} + # Loop over command line foreach $page (@ARGV) { # Check if valid source if ($page =~ /wml$/) { - ©($page); + ©($page, $charset ne 'iso-8859-1'); } else { @@ -67,6 +81,7 @@ foreach $page (@ARGV) sub copy { my $page = shift; + my $recodelatin1 = shift; print "Processing $page...\n"; # Remove english/ from path @@ -192,6 +207,12 @@ sub copy } else { + if ($recodelatin1) + { + # Recode any non-ASCII characters as entities + s/([\xA0-\xFF])/&entity($1)/ge; + } + print DST $_; } } @@ -210,3 +231,9 @@ sub copy if defined $dsttitle; } +# Subroutine to encode a latin-1 character as a HTML entity +sub entity +{ + # Exploiting the fact that latin-1 is a subset of Unicode + return '&#' . ord(shift) . ';' +} |