aboutsummaryrefslogtreecommitdiffstats
path: root/copypage.pl
diff options
context:
space:
mode:
authorPeter Karlsson <peterk>2002-08-28 18:11:29 +0000
committerPeter Karlsson <peterk>2002-08-28 18:11:29 +0000
commit074101851eb07a8cbd27738f86c2c00178efa420 (patch)
tree4b9738fe83b8afb9f2f9beb718c4489caa1dcab3 /copypage.pl
parentc1cb01b4f4de9c36065cf3692c63674ca75d9dcb (diff)
If destination language's encoding is something other than iso-8859-1,
encode all non-ASCII characters as entities. CVS version numbers copypage.pl: 1.16 -> 1.17
Diffstat (limited to 'copypage.pl')
-rwxr-xr-xcopypage.pl29
1 files changed, 28 insertions, 1 deletions
diff --git a/copypage.pl b/copypage.pl
index 5d497dac988..1a66abe9751 100755
--- a/copypage.pl
+++ b/copypage.pl
@@ -49,13 +49,27 @@ if ($#ARGV == -1)
exit;
}
+# Check destination character encoding
+my $charset = 'iso-8859-1';
+if (open WMLRC, "$language/.wmlrc")
+{
+ while (<WMLRC>)
+ {
+ if (/^-D CHARSET=(.*)$/)
+ {
+ $charset = lc($1);
+ last;
+ }
+ }
+}
+
# Loop over command line
foreach $page (@ARGV)
{
# Check if valid source
if ($page =~ /wml$/)
{
- &copy($page);
+ &copy($page, $charset ne 'iso-8859-1');
}
else
{
@@ -67,6 +81,7 @@ foreach $page (@ARGV)
sub copy
{
my $page = shift;
+ my $recodelatin1 = shift;
print "Processing $page...\n";
# Remove english/ from path
@@ -192,6 +207,12 @@ sub copy
}
else
{
+ if ($recodelatin1)
+ {
+ # Recode any non-ASCII characters as entities
+ s/([\xA0-\xFF])/&entity($1)/ge;
+ }
+
print DST $_;
}
}
@@ -210,3 +231,9 @@ sub copy
if defined $dsttitle;
}
+# Subroutine to encode a latin-1 character as a HTML entity
+sub entity
+{
+ # Exploiting the fact that latin-1 is a subset of Unicode
+ return '&#' . ord(shift) . ';'
+}

© 2014-2024 Faster IT GmbH | imprint | privacy policy