From c55f71887d620f13e99014f5a25350dc79890e92 Mon Sep 17 00:00:00 2001 From: Peter Karlsson Date: Sat, 18 Jan 2003 18:34:13 +0000 Subject: Since we are forced to use entities at some places in the English source files, when copying the page first decode any entities for the ISO 8859-1 area. The entities are then re-introduced if required by the output encoding. CVS version numbers copypage.pl: 1.22 -> 1.23 --- copypage.pl | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'copypage.pl') diff --git a/copypage.pl b/copypage.pl index 723a1c354d1..de66d21bb10 100755 --- a/copypage.pl +++ b/copypage.pl @@ -7,7 +7,7 @@ # Makefiles are not supported anymore for they bear too much space for errors. # Originally written 2000-02-26 by Peter Karlsson -# © Copyright 2000-2002 Software in the public interest, Inc. +# © Copyright 2000-2003 Software in the public interest, Inc. # This program is released under the GNU General Public License, v2. # $Id$ @@ -245,15 +245,27 @@ sub copy } else { + # Transform the string into a string that is fit for the encoding + # of the output language. We do that by first converting any + # SGML entities in the input stream into 8-bit ISO 8859-1 + # encoding, and then convert extended characters (back) into + # entities if necessary for the target encoding. + + # Decode + s/(&[^#;]+;)/&decodeentity($1)/ge; + s/&#(1[6-9][0-9]|2[0-4][0-9]|25[0-5]);/chr($1)/ge; + + # Encode if (defined $compattable) { - # Recode non-ASCII characters that are not identical to - # ISO 8859-1 into entitites + # Output encoding is in part compatible with ISO 8859-1, only + # convert incompatible characters into entities. s/([\xA0-\xFF])/$$compattable[ord($1)-160]?$1:$entities[ord($1)-160]/ge; } elsif ($recodelatin1) { - # Recode any non-ASCII characters into entities + # Output encoding is incompatible with ISO 8859-1, convert all + # 8-bit characters into entities. s/([\xA0-\xFF])/$entities[ord($1)-160]/ge; } @@ -274,3 +286,15 @@ sub copy print "and to remove $dsttitle when finished\n" if defined $dsttitle; } + +# Return the ISO-8859-1 character that corresponds to the given entity +sub decodeentity +{ + my $ent = shift; + # Start at one to avoid decoding   + for (my $i = 1; $i < $#entities; ++ $i) + { + return chr($i + 160) if $entities[$i] eq $ent; + } + return $ent; +} -- cgit v1.2.3