diff options
author | Denis Barbier <barbier> | 2001-11-19 00:05:22 +0000 |
---|---|---|
committer | Denis Barbier <barbier> | 2001-11-19 00:05:22 +0000 |
commit | 27df4ecfbc4681ec60ab50c16197956b3596498c (patch) | |
tree | 774b09bcee88d3be3228423f49abc9fee0c2fc47 /Perl/Locale | |
parent | 05d3940f68b53d77122f306561c89ebc0a62c573 (diff) |
Add CPAN modules Locale::Country and Locale::Language to replace
webwml/english/international/l10n/scripts/{Country,Language}.pm
These modules are needed by
webwml/english/international/l10n/scripts/transmonitor-check2
CVS version numbers
Perl/Locale/Constants.pm: INITIAL -> 1.1
Perl/Locale/Country.pm: INITIAL -> 1.1
Perl/Locale/Language.pm: INITIAL -> 1.1
Diffstat (limited to 'Perl/Locale')
-rw-r--r-- | Perl/Locale/Constants.pm | 94 | ||||
-rw-r--r-- | Perl/Locale/Country.pm | 702 | ||||
-rw-r--r-- | Perl/Locale/Language.pm | 455 |
3 files changed, 1251 insertions, 0 deletions
diff --git a/Perl/Locale/Constants.pm b/Perl/Locale/Constants.pm new file mode 100644 index 00000000000..f220c659b3d --- /dev/null +++ b/Perl/Locale/Constants.pm @@ -0,0 +1,94 @@ +package Locale::Constants; +# +# Locale::Constants - defined constants for identifying codesets +# +# $Id$ +# + +use strict; + +require Exporter; + +use vars qw($VERSION @ISA @EXPORT); +@ISA = qw(Exporter); +@EXPORT = qw(LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC + LOCALE_CODE_DEFAULT); + +use constant LOCALE_CODE_ALPHA_2 => 1; +use constant LOCALE_CODE_ALPHA_3 => 2; +use constant LOCALE_CODE_NUMERIC => 3; + +use constant LOCALE_CODE_DEFAULT => LOCALE_CODE_ALPHA_2; + +1; + +__END__ + +=head1 NAME + +Locale::Constants - constants for Locale codes + +=head1 SYNOPSIS + + use Locale::Constants; + + $codeset = LOCALE_CODE_ALPHA_2; + +=head1 DESCRIPTION + +B<Locale::Constants> defines symbols which are used in +the three modules from the Locale-Codes distribution: + + Locale::Language + Locale::Country + Locale::Currency + +B<Note:> at the moment only Locale::Country supports +more than one code set. + +The symbols defined are used to specify which codes you +want to be used: + + LOCALE_CODE_ALPHA_2 + LOCALE_CODE_ALPHA_3 + LOCALE_CODE_NUMERIC + +You shouldn't have to C<use> this module directly yourself - +it is used by the three Locale modules, which in turn export +the symbols. + +=head1 KNOWN BUGS AND LIMITATIONS + +None at the moment. + +=head1 SEE ALSO + +=over 4 + +=item Locale::Language + +Codes for identification of languages. + +=item Locale::Country + +Codes for identification of countries. + +=item Locale::Currency + +Codes for identification of currencies and funds. + +=back + +=head1 AUTHOR + +Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt> + +=head1 COPYRIGHT + +Copyright (C) 2001, Canon Research Centre Europe (CRE). + +This module is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut + diff --git a/Perl/Locale/Country.pm b/Perl/Locale/Country.pm new file mode 100644 index 00000000000..68bd6982599 --- /dev/null +++ b/Perl/Locale/Country.pm @@ -0,0 +1,702 @@ +#----------------------------------------------------------------------- + +=head1 NAME + +Locale::Country - ISO codes for country identification (ISO 3166) + +=head1 SYNOPSIS + + use Locale::Country; + + $country = code2country('jp'); # $country gets 'Japan' + $code = country2code('Norway'); # $code gets 'no' + + @codes = all_country_codes(); + @names = all_country_names(); + + # add "uk" as a pseudo country code for United Kingdom + Locale::Country::_alias_code('uk' => 'gb'); + +=cut + +#----------------------------------------------------------------------- + +package Locale::Country; +use strict; +require 5.002; + +#----------------------------------------------------------------------- + +=head1 DESCRIPTION + +The C<Locale::Country> module provides access to the ISO +codes for identifying countries, as defined in ISO 3166. +You can either access the codes via the L<conversion routines> +(described below), or with the two functions which return lists +of all country codes or all country names. + +There are three different code sets you can use for identifying +countries: + +=over 4 + +=item B<alpha-2> + +Two letter codes, such as 'tv' for Tuvalu. +This code set is identified with the symbol C<LOCALE_CODE_ALPHA_2>. + +=item B<alpha-3> + +Three letter codes, such as 'brb' for Barbados. +This code set is identified with the symbol C<LOCALE_CODE_ALPHA_3>. + +=item B<numeric> + +Numeric codes, such as 064 for Bhutan. +This code set is identified with the symbol C<LOCALE_CODE_NUMERIC>. + +=back + +All of the routines take an optional additional argument +which specifies the code set to use. +If not specified, it defaults to the two-letter codes. +This is partly for backwards compatibility (previous versions +of this module only supported the alpha-2 codes), and +partly because they are the most widely used codes. + +The alpha-2 and alpha-3 codes are not case-dependent, +so you can use 'BO', 'Bo', 'bO' or 'bo' for Bolivia. +When a code is returned by one of the functions in +this module, it will always be lower-case. + +=cut + +#----------------------------------------------------------------------- + +require Exporter; +use Carp; +use Locale::Constants; + + +#----------------------------------------------------------------------- +# Public Global Variables +#----------------------------------------------------------------------- +use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); +$VERSION = sprintf("%d.%02d", q$Revision$ =~ /(\d+)\.(\d+)/); +@ISA = qw(Exporter); +@EXPORT = qw(code2country country2code + all_country_codes all_country_names + country_code2code + LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC); + +#----------------------------------------------------------------------- +# Private Global Variables +#----------------------------------------------------------------------- +my $CODES = []; +my $COUNTRIES = []; + + +#======================================================================= + +=head1 CONVERSION ROUTINES + +There are three conversion routines: C<code2country()>, C<country2code()>, +and C<country_code2code()>. + +=over 8 + +=item code2country( CODE, [ CODESET ] ) + +This function takes a country code and returns a string +which contains the name of the country identified. +If the code is not a valid country code, as defined by ISO 3166, +then C<undef> will be returned: + + $country = code2country('fi'); + +=item country2code( STRING, [ CODESET ] ) + +This function takes a country name and returns the corresponding +country code, if such exists. +If the argument could not be identified as a country name, +then C<undef> will be returned: + + $code = country2code('Norway', LOCALE_CODE_ALPHA_3); + # $code will now be 'nor' + +The case of the country name is not important. +See the section L<KNOWN BUGS AND LIMITATIONS> below. + +=item country_code2code( CODE, CODESET, CODESET ) + +This function takes a country code from one code set, +and returns the corresponding code from another code set. + + $alpha2 = country_code2code('fin', + LOCALE_CODE_ALPHA_3 => LOCALE_CODE_ALPHA_2); + # $alpha2 will now be 'fi' + +If the code passed is not a valid country code in +the first code set, or if there isn't a code for the +corresponding country in the second code set, +then C<undef> will be returned. + +=back + +=cut + +#======================================================================= +sub code2country +{ + my $code = shift; + my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; + + + return undef unless defined $code; + + #------------------------------------------------------------------- + # Make sure the code is in the right form before we use it + # to look up the corresponding country. + # We have to sprintf because the codes are given as 3-digits, + # with leading 0's. Eg 052 for Barbados. + #------------------------------------------------------------------- + if ($codeset == LOCALE_CODE_NUMERIC) + { + return undef if ($code =~ /\D/); + $code = sprintf("%.3d", $code); + } + else + { + $code = lc($code); + } + + if (exists $CODES->[$codeset]->{$code}) + { + return $CODES->[$codeset]->{$code}; + } + else + { + #--------------------------------------------------------------- + # no such country code! + #--------------------------------------------------------------- + return undef; + } +} + +sub country2code +{ + my $country = shift; + my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; + + + return undef unless defined $country; + $country = lc($country); + if (exists $COUNTRIES->[$codeset]->{$country}) + { + return $COUNTRIES->[$codeset]->{$country}; + } + else + { + #--------------------------------------------------------------- + # no such country! + #--------------------------------------------------------------- + return undef; + } +} + +sub country_code2code +{ + (@_ == 3) or croak "country_code2code() takes 3 arguments!"; + + my $code = shift; + my $inset = shift; + my $outset = shift; + my $outcode = shift; + my $country; + + + return undef if $inset == $outset; + $country = code2country($code, $inset); + return undef if not defined $country; + $outcode = country2code($country, $outset); + return $outcode; +} + +#======================================================================= + +=head1 QUERY ROUTINES + +There are two function which can be used to obtain a list of all codes, +or all country names: + +=over 8 + +=item C<all_country_codes( [ CODESET ] )> + +Returns a list of all two-letter country codes. +The codes are guaranteed to be all lower-case, +and not in any particular order. + +=item C<all_country_names( [ CODESET ] )> + +Returns a list of all country names for which there is a corresponding +country code in the specified code set. +The names are capitalised, and not returned in any particular order. + +Not all countries have alpha-3 and numeric codes - +some just have an alpha-2 code, +so you'll get a different number of countries +depending on which code set you specify. + +=back + +=cut + +#======================================================================= +sub all_country_codes +{ + my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; + + return keys %{ $CODES->[$codeset] }; +} + +sub all_country_names +{ + my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; + + return values %{ $CODES->[$codeset] }; +} + +#----------------------------------------------------------------------- + +=head1 CODE ALIASING + +This module supports a semi-private routine for specifying two letter +code aliases. + + Locale::Country::_alias_code( ALIAS => CODE [, CODESET ] ) + +This feature was added as a mechanism for handling +a "uk" code. The ISO standard says that the two-letter code for +"United Kingdom" is "gb", whereas domain names are all .uk. + +By default the module does not understand "uk", since it is implementing +an ISO standard. If you would like 'uk' to work as the two-letter +code for United Kingdom, use the following: + + use Locale::Country; + + Locale::Country::_alias_code('uk' => 'gb'); + +With this code, both "uk" and "gb" are valid codes for United Kingdom, +with the reverse lookup returning "uk" rather than the usual "gb". + +=cut + +#----------------------------------------------------------------------- + +sub _alias_code +{ + my $alias = shift; + my $real = shift; + my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; + + my $country; + + + if (not exists $CODES->[$codeset]->{$real}) + { + carp "attempt to alias \"$alias\" to unknown country code \"$real\"\n"; + return undef; + } + $country = $CODES->[$codeset]->{$real}; + $CODES->[$codeset]->{$alias} = $country; + $COUNTRIES->[$codeset]->{"\L$country"} = $alias; + + return $alias; +} + +#----------------------------------------------------------------------- + +=head1 EXAMPLES + +The following example illustrates use of the C<code2country()> function. +The user is prompted for a country code, and then told the corresponding +country name: + + $| = 1; # turn off buffering + + print "Enter country code: "; + chop($code = <STDIN>); + $country = code2country($code, LOCALE_CODE_ALPHA_2); + if (defined $country) + { + print "$code = $country\n"; + } + else + { + print "'$code' is not a valid country code!\n"; + } + +=head1 DOMAIN NAMES + +Most top-level domain names are based on these codes, +but there are certain codes which aren't. +If you are using this module to identify country from hostname, +your best bet is to preprocess the country code. + +For example, B<edu>, B<com>, B<gov> and friends would map to B<us>; +B<uk> would map to B<gb>. Any others? + +=head1 KNOWN BUGS AND LIMITATIONS + +=over 4 + +=item * + +When using C<country2code()>, the country name must currently appear +exactly as it does in the source of the module. For example, + + country2code('United States') + +will return B<us>, as expected. But the following will all return C<undef>: + + country2code('United States of America') + country2code('Great Britain') + country2code('U.S.A.') + +If there's need for it, a future version could have variants +for country names. + +=item * + +In the current implementation, all data is read in when the +module is loaded, and then held in memory. +A lazy implementation would be more memory friendly. + +=back + +=head1 SEE ALSO + +=over 4 + +=item Locale::Language + +ISO two letter codes for identification of language (ISO 639). + +=item Locale::Currency + +ISO three letter codes for identification of currencies +and funds (ISO 4217). + +=item ISO 3166 + +The ISO standard which defines these codes. + +=item http://www.din.de/gremien/nas/nabd/iso3166ma/ + +Official home page for ISO 3166 + +=item http://www.egt.ie/standards/iso3166/iso3166-1-en.html + +Another useful, but not official, home page. + +=item http://www.cia.gov/cia/publications/factbook/docs/app-f.html + +An appendix in the CIA world fact book which lists country codes +as defined by ISO 3166, FIPS 10-4, and internet domain names. + +=back + + +=head1 AUTHOR + +Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt> + +=head1 COPYRIGHT + +Copyright (c) 1997-2001 Canon Research Centre Europe (CRE). + +This module is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut + +#----------------------------------------------------------------------- + +#======================================================================= +# initialisation code - stuff the DATA into the ALPHA2 hash +#======================================================================= +{ + my ($alpha2, $alpha3, $numeric); + my $country; + + + while (<DATA>) + { + next unless /\S/; + chop; + ($alpha2, $alpha3, $numeric, $country) = split(/:/, $_, 4); + + $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $country; + $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$country"} = $alpha2; + + if ($alpha3) + { + $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $country; + $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$country"} = $alpha3; + } + + if ($numeric) + { + $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $country; + $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$country"} = $numeric; + } + + } +} + +1; + +__DATA__ +ad:and:020:Andorra +ae:are:784:United Arab Emirates +af:afg:004:Afghanistan +ag:atg:028:Antigua and Barbuda +ai:aia:660:Anguilla +al:alb:008:Albania +am:arm:051:Armenia +an:ant:530:Netherlands Antilles +ao:ago:024:Angola +aq:::Antarctica +ar:arg:032:Argentina +as:asm:016:American Samoa +at:aut:040:Austria +au:aus:036:Australia +aw:abw:533:Aruba +az:aze:031:Azerbaijan +ba:bih:070:Bosnia and Herzegovina +bb:brb:052:Barbados +bd:bgd:050:Bangladesh +be:bel:056:Belgium +bf:bfa:854:Burkina Faso +bg:bgr:100:Bulgaria +bh:bhr:048:Bahrain +bi:bdi:108:Burundi +bj:ben:204:Benin +bm:bmu:060:Bermuda +bn:brn:096:Brunei Darussalam +bo:bol:068:Bolivia +br:bra:076:Brazil +bs:bhs:044:Bahamas +bt:btn:064:Bhutan +bv:::Bouvet Island +bw:bwa:072:Botswana +by:blr:112:Belarus +bz:blz:084:Belize +ca:can:124:Canada +cc:::Cocos (Keeling) Islands +cd:cod:180:Congo, The Democratic Republic of the +cf:caf:140:Central African Republic +cg:cog:178:Congo +ch:che:756:Switzerland +ci:civ:384:Cote D'Ivoire +ck:cok:184:Cook Islands +cl:chl:152:Chile +cm:cmr:120:Cameroon +cn:chn:156:China +co:col:170:Colombia +cr:cri:188:Costa Rica +cu:cub:192:Cuba +cv:cpv:132:Cape Verde +cx:::Christmas Island +cy:cyp:196:Cyprus +cz:cze:203:Czech Republic +de:deu:276:Germany +dj:dji:262:Djibouti +dk:dnk:208:Denmark +dm:dma:212:Dominica +do:dom:214:Dominican Republic +dz:dza:012:Algeria +ec:ecu:218:Ecuador +ee:est:233:Estonia +eg:egy:818:Egypt +eh:esh:732:Western Sahara +er:eri:232:Eritrea +es:esp:724:Spain +et:eth:231:Ethiopia +fi:fin:246:Finland +fj:fji:242:Fiji +fk:flk:238:Falkland Islands (Malvinas) +fm:fsm:583:Micronesia, Federated States of +fo:fro:234:Faroe Islands +fr:fra:250:France +fx:::France, Metropolitan +ga:gab:266:Gabon +gb:gbr:826:United Kingdom +gd:grd:308:Grenada +ge:geo:268:Georgia +gf:guf:254:French Guiana +gh:gha:288:Ghana +gi:gib:292:Gibraltar +gl:grl:304:Greenland +gm:gmb:270:Gambia +gn:gin:324:Guinea +gp:glp:312:Guadeloupe +gq:gnq:226:Equatorial Guinea +gr:grc:300:Greece +gs:::South Georgia and the South Sandwich Islands +gt:gtm:320:Guatemala +gu:gum:316:Guam +gw:gnb:624:Guinea-Bissau +gy:guy:328:Guyana +hk:hkg:344:Hong Kong +hm:::Heard Island and McDonald Islands +hn:hnd:340:Honduras +hr:hrv:191:Croatia +ht:hti:332:Haiti +hu:hun:348:Hungary +id:idn:360:Indonesia +ie:irl:372:Ireland +il:isr:376:Israel +in:ind:356:India +io:::British Indian Ocean Territory +iq:irq:368:Iraq +ir:irn:364:Iran, Islamic Republic of +is:isl:352:Iceland +it:ita:380:Italy +jm:jam:388:Jamaica +jo:jor:400:Jordan +jp:jpn:392:Japan +ke:ken:404:Kenya +kg:kgz:417:Kyrgyzstan +kh:khm:116:Cambodia +ki:kir:296:Kiribati +km:com:174:Comoros +kn:kna:659:Saint Kitts and Nevis +kp:prk:408:Korea, Democratic People's Republic of +kr:kor:410:Korea, Republic of +kw:kwt:414:Kuwait +ky:cym:136:Cayman Islands +kz:kaz:398:Kazakstan +la:lao:418:Lao People's Democratic Republic +lb:lbn:422:Lebanon +lc:lca:662:Saint Lucia +li:lie:438:Liechtenstein +lk:lka:144:Sri Lanka +lr:lbr:430:Liberia +ls:lso:426:Lesotho +lt:ltu:440:Lithuania +lu:lux:442:Luxembourg +lv:lva:428:Latvia +ly:lby:434:Libyan Arab Jamahiriya +ma:mar:504:Morocco +mc:mco:492:Monaco +md:mda:498:Moldova, Republic of +mg:mdg:450:Madagascar +mh:mhl:584:Marshall Islands +mk:mkd:807:Macedonia, the Former Yugoslav Republic of +ml:mli:466:Mali +mm:mmr:104:Myanmar +mn:mng:496:Mongolia +mo:mac:446:Macau +mp:mnp:580:Northern Mariana Islands +mq:mtq:474:Martinique +mr:mrt:478:Mauritania +ms:msr:500:Montserrat +mt:mlt:470:Malta +mu:mus:480:Mauritius +mv:mdv:462:Maldives +mw:mwi:454:Malawi +mx:mex:484:Mexico +my:mys:458:Malaysia +mz:moz:508:Mozambique +na:nam:516:Namibia +nc:ncl:540:New Caledonia +ne:ner:562:Niger +nf:nfk:574:Norfolk Island +ng:nga:566:Nigeria +ni:nic:558:Nicaragua +nl:nld:528:Netherlands +no:nor:578:Norway +np:npl:524:Nepal +nr:nru:520:Nauru +nu:niu:570:Niue +nz:nzl:554:New Zealand +om:omn:512:Oman +pa:pan:591:Panama +pe:per:604:Peru +pf:pyf:258:French Polynesia +pg:png:598:Papua New Guinea +ph:phl:608:Philippines +pk:pak:586:Pakistan +pl:pol:616:Poland +pm:spm:666:Saint Pierre and Miquelon +pn:pcn:612:Pitcairn +pr:pri:630:Puerto Rico +ps:pse:275:Palestinian Territory, Occupied +pt:prt:620:Portugal +pw:plw:585:Palau +py:pry:600:Paraguay +qa:qat:634:Qatar +re:reu:638:Reunion +ro:rom:642:Romania +ru:rus:643:Russian Federation +rw:rwa:646:Rwanda +sa:sau:682:Saudi Arabia +sb:slb:090:Solomon Islands +sc:syc:690:Seychelles +sd:sdn:736:Sudan +se:swe:752:Sweden +sg:sgp:702:Singapore +sh:shn:654:Saint Helena +si:svn:705:Slovenia +sj:sjm:744:Svalbard and Jan Mayen +sk:svk:703:Slovakia +sl:sle:694:Sierra Leone +sm:smr:674:San Marino +sn:sen:686:Senegal +so:som:706:Somalia +sr:sur:740:Suriname +st:stp:678:Sao Tome and Principe +sv:slv:222:El Salvador +sy:syr:760:Syrian Arab Republic +sz:swz:748:Swaziland +tc:tca:796:Turks and Caicos Islands +td:tcd:148:Chad +tf:::French Southern Territories +tg:tgo:768:Togo +th:tha:764:Thailand +tj:tjk:762:Tajikistan +tk:tkl:772:Tokelau +tm:tkm:795:Turkmenistan +tn:tun:788:Tunisia +to:ton:776:Tonga +tp:tmp:626:East Timor +tr:tur:792:Turkey +tt:tto:780:Trinidad and Tobago +tv:tuv:798:Tuvalu +tw:twn:158:Taiwan, Province of China +tz:tza:834:Tanzania, United Republic of +ua:ukr:804:Ukraine +ug:uga:800:Uganda +um:::United States Minor Outlying Islands +us:usa:840:United States +uy:ury:858:Uruguay +uz:uzb:860:Uzbekistan +va:vat:336:Holy See (Vatican City State) +vc:vct:670:Saint Vincent and the Grenadines +ve:ven:862:Venezuela +vg:vgb:092:Virgin Islands, British +vi:vir:850:Virgin Islands, U.S. +vn:vnm:704:Vietnam +vu:vut:548:Vanuatu +wf:wlf:876:Wallis and Futuna +ws:wsm:882:Samoa +ye:yem:887:Yemen +yt:::Mayotte +yu:yug:891:Yugoslavia +za:zaf:710:South Africa +zm:zmb:894:Zambia +zr:::Zaire +zw:zwe:716:Zimbabwe diff --git a/Perl/Locale/Language.pm b/Perl/Locale/Language.pm new file mode 100644 index 00000000000..1ce0faff5b6 --- /dev/null +++ b/Perl/Locale/Language.pm @@ -0,0 +1,455 @@ +#----------------------------------------------------------------------- + +=head1 NAME + +Locale::Language - ISO two letter codes for language identification (ISO 639) + +=head1 SYNOPSIS + + use Locale::Language; + + $lang = code2language('en'); # $lang gets 'English' + $code = language2code('French'); # $code gets 'fr' + + @codes = all_language_codes(); + @names = all_language_names(); + +=cut + +#----------------------------------------------------------------------- + +package Locale::Language; +use strict; +require 5.002; + +#----------------------------------------------------------------------- + +=head1 DESCRIPTION + +The C<Locale::Language> module provides access to the ISO two-letter +codes for identifying languages, as defined in ISO 639. You can either +access the codes via the L<conversion routines> (described below), +or with the two functions which return lists of all language codes or +all language names. + +=cut + +#----------------------------------------------------------------------- + +require Exporter; + +#----------------------------------------------------------------------- +# Public Global Variables +#----------------------------------------------------------------------- +use vars qw($VERSION @ISA @EXPORT); +$VERSION = sprintf("%d.%02d", q$Revision$ =~ /(\d+)\.(\d+)/); +@ISA = qw(Exporter); +@EXPORT = qw(&code2language &language2code + &all_language_codes &all_language_names ); + +#----------------------------------------------------------------------- +# Private Global Variables +#----------------------------------------------------------------------- +my %CODES = (); +my %LANGUAGES = (); + + +#======================================================================= + +=head1 CONVERSION ROUTINES + +There are two conversion routines: C<code2language()> and C<language2code()>. + +=over 8 + +=item code2language() + +This function takes a two letter language code and returns a string +which contains the name of the language identified. If the code is +not a valid language code, as defined by ISO 639, then C<undef> +will be returned. + + $lang = code2language($code); + +=item language2code() + +This function takes a language name and returns the corresponding +two letter language code, if such exists. +If the argument could not be identified as a language name, +then C<undef> will be returned. + + $code = language2code('French'); + +The case of the language name is not important. +See the section L<KNOWN BUGS AND LIMITATIONS> below. + +=back + +=cut + +#======================================================================= +sub code2language +{ + my $code = shift; + + + return undef unless defined $code; + $code = lc($code); + if (exists $CODES{$code}) + { + return $CODES{$code}; + } + else + { + #--------------------------------------------------------------- + # no such language code! + #--------------------------------------------------------------- + return undef; + } +} + +sub language2code +{ + my $lang = shift; + + + return undef unless defined $lang; + $lang = lc($lang); + if (exists $LANGUAGES{$lang}) + { + return $LANGUAGES{$lang}; + } + else + { + #--------------------------------------------------------------- + # no such language! + #--------------------------------------------------------------- + return undef; + } +} + +#======================================================================= + +=head1 QUERY ROUTINES + +There are two function which can be used to obtain a list of all +language codes, or all language names: + +=over 8 + +=item C<all_language_codes()> + +Returns a list of all two-letter language codes. +The codes are guaranteed to be all lower-case, +and not in any particular order. + +=item C<all_language_names()> + +Returns a list of all language names for which there is a corresponding +two-letter language code. The names are capitalised, and not returned +in any particular order. + +=back + +=cut + +#======================================================================= +sub all_language_codes +{ + return keys %CODES; +} + +sub all_language_names +{ + return values %CODES; +} + +#----------------------------------------------------------------------- + +=head1 EXAMPLES + +The following example illustrates use of the C<code2language()> function. +The user is prompted for a language code, and then told the corresponding +language name: + + $| = 1; # turn off buffering + + print "Enter language code: "; + chop($code = <STDIN>); + $lang = code2language($code); + if (defined $lang) + { + print "$code = $lang\n"; + } + else + { + print "'$code' is not a valid language code!\n"; + } + +=head1 KNOWN BUGS AND LIMITATIONS + +=over 4 + +=item * + +In the current implementation, all data is read in when the +module is loaded, and then held in memory. +A lazy implementation would be more memory friendly. + +=item * + +Currently just supports the two letter language codes - +there are also three-letter codes, and numbers. +Would these be of any use to anyone? + +=back + +=head1 SEE ALSO + +=over 4 + +=item Locale::Country + +ISO codes for identification of country (ISO 3166). +Supports 2-letter, 3-letter, and numeric country codes. + +=item Locale::Currency + +ISO three letter codes for identification of currencies and funds (ISO 4217). + +=item ISO 639:1988 (E/F) + +Code for the representation of names of languages. + +=item http://lcweb.loc.gov/standards/iso639-2/langhome.html + +Home page for ISO 639-2 + +=back + + +=head1 AUTHOR + +Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt> + +=head1 COPYRIGHT + +Copyright (c) 1997-2001 Canon Research Centre Europe (CRE). + +This module is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut + +#----------------------------------------------------------------------- + +#======================================================================= +# initialisation code - stuff the DATA into the CODES hash +#======================================================================= +{ + my $code; + my $language; + + + while (<DATA>) + { + next unless /\S/; + chop; + ($code, $language) = split(/:/, $_, 2); + $CODES{$code} = $language; + $LANGUAGES{"\L$language"} = $code; + } +} + +1; + +__DATA__ +aa:Afar +ab:Abkhazian +ae:Avestan +af:Afrikaans +am:Amharic +ar:Arabic +as:Assamese +ay:Aymara +az:Azerbaijani + +ba:Bashkir +be:Belarusian +bg:Bulgarian +bh:Bihari +bi:Bislama +bn:Bengali +bo:Tibetan +br:Breton +bs:Bosnian + +ca:Catalan +ce:Chechen +ch:Chamorro +co:Corsican +cs:Czech +cu:Church Slavic +cv:Chuvash +cy:Welsh + +da:Danish +de:German +dz:Dzongkha + +el:Greek +en:English +eo:Esperanto +es:Spanish +et:Estonian +eu:Basque + +fa:Persian +fi:Finnish +fj:Fijian +fo:Faeroese +fr:French +fy:Frisian + +ga:Irish +gd:Gaelic (Scots) +gl:Gallegan +gn:Guarani +gu:Gujarati +gv:Manx + +ha:Hausa +he:Hebrew +hi:Hindi +ho:Hiri Motu +hr:Croatian +hu:Hungarian +hy:Armenian +hz:Herero + +ia:Interlingua +id:Indonesian +ie:Interlingue +ik:Inupiaq +is:Icelandic +it:Italian +iu:Inuktitut + +ja:Japanese +jw:Javanese + +ka:Georgian +ki:Kikuyu +kj:Kuanyama +kk:Kazakh +kl:Kalaallisut +km:Khmer +kn:Kannada +ko:Korean +ks:Kashmiri +ku:Kurdish +kv:Komi +kw:Cornish +ky:Kirghiz + +la:Latin +lb:Letzeburgesch +ln:Lingala +lo:Lao +lt:Lithuanian +lv:Latvian + +mg:Malagasy +mh:Marshall +mi:Maori +mk:Macedonian +ml:Malayalam +mn:Mongolian +mo:Moldavian +mr:Marathi +ms:Malay +mt:Maltese +my:Burmese + +na:Nauru +nb:Norwegian Bokmål +nd:Ndebele, North +ne:Nepali +ng:Ndonga +nl:Dutch +nn:Norwegian Nynorsk +no:Norwegian +nr:Ndebele, South +nv:Navajo +ny:Chichewa; Nyanja + +oc:Occitan (post 1500) +om:Oromo +or:Oriya +os:Ossetian; Ossetic + +pa:Panjabi +pi:Pali +pl:Polish +ps:Pushto +pt:Portuguese + +qu:Quechua + +rm:Rhaeto-Romance +rn:Rundi +ro:Romanian +ru:Russian +rw:Kinyarwanda + +sa:Sanskrit +sc:Sardinian +sd:Sindhi +se:Sami +sg:Sango +si:Sinhalese +sk:Slovak +sl:Slovenian +sm:Samoan +sn:Shona +so:Somali +sq:Albanian +sr:Serbian +ss:Swati +st:Sotho +su:Sundanese +sv:Swedish +sw:Swahili + +ta:Tamil +te:Telugu +tg:Tajik +th:Thai +ti:Tigrinya +tk:Turkmen +tl:Tagalog +tn:Tswana +to:Tonga +tr:Turkish +ts:Tsonga +tt:Tatar +tw:Twi + +ug:Uighur +uk:Ukrainian +ur:Urdu +uz:Uzbek + +vi:Vietnamese +vo:Volapük + +wo:Wolof + +xh:Xhosa + +yi:Yiddish +yo:Yoruba + +za:Zhuang +zh:Chinese +zu:Zulu |