From 8d7ff3e6079c335d50b346807b7ad6e0285d7832 Mon Sep 17 00:00:00 2001
From: Peter Karlsson <peterk>
Date: Wed, 28 Aug 2002 19:18:21 +0000
Subject: Used named entities instead of numeric when encoding latin1
 characters

CVS version numbers

copypage.pl: 1.17 -> 1.18
---
 copypage.pl | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

(limited to 'copypage.pl')
diff --git a/copypage.pl b/copypage.pl
index 1a66abe9751..d91ea335a3f 100755
--- a/copypage.pl
+++ b/copypage.pl
@@ -63,6 +63,26 @@ if (open WMLRC, "$language/.wmlrc")
 	}
 }
 
+# Table of entities used when copying to non-latin1 encodings
+@entities = (
+	'&nbsp;', '&iexcl;', '&cent;', '&pound;', '&curren;', '&yen;',
+	'&brvbar;', '&sect;', '&uml;', '&copy;', '&ordf;', '&laquo;', '&not;',
+	'&shy;', '&reg;', '&macr;', '&deg;', '&plusmn;', '&sup2;', '&sup3;',
+	'&acute;', '&micro;', '&para;', '&middot;', '&cedil;', '&sup1;',
+	'&ordm;', '&raquo;', '&frac14;', '&frac12;', '&frac34;', '&iquest;',
+	'&Agrave;', '&Aacute;', '&Acirc;', '&Atilde;', '&Auml;', '&Aring;',
+	'&AElig;', '&Ccedil;', '&Egrave;', '&Eacute;', '&Ecirc;', '&Euml;',
+	'&Igrave;', '&Iacute;', '&Icirc;', '&Iuml;', '&ETH;', '&Ntilde;',
+	'&Ograve;', '&Oacute;', '&Ocirc;', '&Otilde;', '&Ouml;', '&times;',
+	'&Oslash;', '&Ugrave;', '&Uacute;', '&Ucirc;', '&Uuml;', '&Yacute;',
+	'&THORN;', '&szlig;', '&agrave;', '&aacute;', '&acirc;', '&atilde;',
+	'&auml;', '&aring;', '&aelig;', '&ccedil;', '&egrave;', '&eacute;',
+	'&ecirc;', '&euml;', '&igrave;', '&iacute;', '&icirc;', '&iuml;',
+	'&eth;', '&ntilde;', '&ograve;', '&oacute;', '&ocirc;', '&otilde;',
+	'&ouml;', '&divide;', '&oslash;', '&ugrave;', '&uacute;', '&ucirc;',
+	'&uuml;', '&yacute;', '&thorn;', '&yuml;'
+);
+
 # Loop over command line
 foreach $page (@ARGV)
 {
@@ -210,7 +230,7 @@ sub copy
 			if ($recodelatin1)
 			{
 				# Recode any non-ASCII characters as entities
-				s/([\xA0-\xFF])/&entity($1)/ge;
+				s/([\xA0-\xFF])/$entities[ord($1)-160]/ge;
 			}
 
 			print DST $_;
@@ -230,10 +250,3 @@ sub copy
 	print "and to remove $dsttitle when finished\n"
 		if defined $dsttitle;
 }
-
-# Subroutine to encode a latin-1 character as a HTML entity
-sub entity
-{
-	# Exploiting the fact that latin-1 is a subset of Unicode
-	return '&#' . ord(shift) . ';'
-}
-- 
cgit v1.2.3