aboutsummaryrefslogtreecommitdiffstats
path: root/copypage.pl
diff options
context:
space:
mode:
authorPeter Karlsson <peterk>2002-08-28 19:18:21 +0000
committerPeter Karlsson <peterk>2002-08-28 19:18:21 +0000
commit8d7ff3e6079c335d50b346807b7ad6e0285d7832 (patch)
tree941874c2dc608d6999a187b864c882f0d111445a /copypage.pl
parentbaa8c3b1685d5e9788c47f0a6bbe272ccce05742 (diff)
Used named entities instead of numeric when encoding latin1 characters
CVS version numbers copypage.pl: 1.17 -> 1.18
Diffstat (limited to 'copypage.pl')
-rwxr-xr-xcopypage.pl29
1 files changed, 21 insertions, 8 deletions
diff --git a/copypage.pl b/copypage.pl
index 1a66abe9751..d91ea335a3f 100755
--- a/copypage.pl
+++ b/copypage.pl
@@ -63,6 +63,26 @@ if (open WMLRC, "$language/.wmlrc")
}
}
+# Table of entities used when copying to non-latin1 encodings
+@entities = (
+ '&nbsp;', '&iexcl;', '&cent;', '&pound;', '&curren;', '&yen;',
+ '&brvbar;', '&sect;', '&uml;', '&copy;', '&ordf;', '&laquo;', '&not;',
+ '&shy;', '&reg;', '&macr;', '&deg;', '&plusmn;', '&sup2;', '&sup3;',
+ '&acute;', '&micro;', '&para;', '&middot;', '&cedil;', '&sup1;',
+ '&ordm;', '&raquo;', '&frac14;', '&frac12;', '&frac34;', '&iquest;',
+ '&Agrave;', '&Aacute;', '&Acirc;', '&Atilde;', '&Auml;', '&Aring;',
+ '&AElig;', '&Ccedil;', '&Egrave;', '&Eacute;', '&Ecirc;', '&Euml;',
+ '&Igrave;', '&Iacute;', '&Icirc;', '&Iuml;', '&ETH;', '&Ntilde;',
+ '&Ograve;', '&Oacute;', '&Ocirc;', '&Otilde;', '&Ouml;', '&times;',
+ '&Oslash;', '&Ugrave;', '&Uacute;', '&Ucirc;', '&Uuml;', '&Yacute;',
+ '&THORN;', '&szlig;', '&agrave;', '&aacute;', '&acirc;', '&atilde;',
+ '&auml;', '&aring;', '&aelig;', '&ccedil;', '&egrave;', '&eacute;',
+ '&ecirc;', '&euml;', '&igrave;', '&iacute;', '&icirc;', '&iuml;',
+ '&eth;', '&ntilde;', '&ograve;', '&oacute;', '&ocirc;', '&otilde;',
+ '&ouml;', '&divide;', '&oslash;', '&ugrave;', '&uacute;', '&ucirc;',
+ '&uuml;', '&yacute;', '&thorn;', '&yuml;'
+);
+
# Loop over command line
foreach $page (@ARGV)
{
@@ -210,7 +230,7 @@ sub copy
if ($recodelatin1)
{
# Recode any non-ASCII characters as entities
- s/([\xA0-\xFF])/&entity($1)/ge;
+ s/([\xA0-\xFF])/$entities[ord($1)-160]/ge;
}
print DST $_;
@@ -230,10 +250,3 @@ sub copy
print "and to remove $dsttitle when finished\n"
if defined $dsttitle;
}
-
-# Subroutine to encode a latin-1 character as a HTML entity
-sub entity
-{
- # Exploiting the fact that latin-1 is a subset of Unicode
- return '&#' . ord(shift) . ';'
-}

© 2014-2024 Faster IT GmbH | imprint | privacy policy