From 8d7ff3e6079c335d50b346807b7ad6e0285d7832 Mon Sep 17 00:00:00 2001 From: Peter Karlsson Date: Wed, 28 Aug 2002 19:18:21 +0000 Subject: Used named entities instead of numeric when encoding latin1 characters CVS version numbers copypage.pl: 1.17 -> 1.18 --- copypage.pl | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'copypage.pl') diff --git a/copypage.pl b/copypage.pl index 1a66abe9751..d91ea335a3f 100755 --- a/copypage.pl +++ b/copypage.pl @@ -63,6 +63,26 @@ if (open WMLRC, "$language/.wmlrc") } } +# Table of entities used when copying to non-latin1 encodings +@entities = ( + ' ', '¡', '¢', '£', '¤', '¥', + '¦', '§', '¨', '©', 'ª', '«', '¬', + '­', '®', '¯', '°', '±', '²', '³', + '´', 'µ', '¶', '·', '¸', '¹', + 'º', '»', '¼', '½', '¾', '¿', + 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', + 'Æ', 'Ç', 'È', 'É', 'Ê', 'Ë', + 'Ì', 'Í', 'Î', 'Ï', 'Ð', 'Ñ', + 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×', + 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', + 'Þ', 'ß', 'à', 'á', 'â', 'ã', + 'ä', 'å', 'æ', 'ç', 'è', 'é', + 'ê', 'ë', 'ì', 'í', 'î', 'ï', + 'ð', 'ñ', 'ò', 'ó', 'ô', 'õ', + 'ö', '÷', 'ø', 'ù', 'ú', 'û', + 'ü', 'ý', 'þ', 'ÿ' +); + # Loop over command line foreach $page (@ARGV) { @@ -210,7 +230,7 @@ sub copy if ($recodelatin1) { # Recode any non-ASCII characters as entities - s/([\xA0-\xFF])/&entity($1)/ge; + s/([\xA0-\xFF])/$entities[ord($1)-160]/ge; } print DST $_; @@ -230,10 +250,3 @@ sub copy print "and to remove $dsttitle when finished\n" if defined $dsttitle; } - -# Subroutine to encode a latin-1 character as a HTML entity -sub entity -{ - # Exploiting the fact that latin-1 is a subset of Unicode - return '&#' . ord(shift) . ';' -} -- cgit v1.2.3