| # |
| # Locale::Script - ISO codes for script identification (ISO 15924) |
| # |
| # $Id: Script.pm,v 2.7 2004/06/10 21:19:34 neilb Exp $ |
| # |
| |
| package Locale::Script; |
| use strict; |
| require 5.002; |
| |
| require Exporter; |
| use Carp; |
| use Locale::Constants; |
| |
| |
| #----------------------------------------------------------------------- |
| # Public Global Variables |
| #----------------------------------------------------------------------- |
| use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); |
| $VERSION = sprintf("%d.%02d", q$Revision: 2.7 $ =~ /(\d+)\.(\d+)/); |
| @ISA = qw(Exporter); |
| @EXPORT = qw(code2script script2code |
| all_script_codes all_script_names |
| script_code2code |
| LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC); |
| |
| #----------------------------------------------------------------------- |
| # Private Global Variables |
| #----------------------------------------------------------------------- |
| my $CODES = []; |
| my $COUNTRIES = []; |
| |
| |
| #======================================================================= |
| # |
| # code2script ( CODE [, CODESET ] ) |
| # |
| #======================================================================= |
| sub code2script |
| { |
| my $code = shift; |
| my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; |
| |
| |
| return undef unless defined $code; |
| |
| #------------------------------------------------------------------- |
| # Make sure the code is in the right form before we use it |
| # to look up the corresponding script. |
| # We have to sprintf because the codes are given as 3-digits, |
| # with leading 0's. Eg 070 for Egyptian demotic. |
| #------------------------------------------------------------------- |
| if ($codeset == LOCALE_CODE_NUMERIC) |
| { |
| return undef if ($code =~ /\D/); |
| $code = sprintf("%.3d", $code); |
| } |
| else |
| { |
| $code = lc($code); |
| } |
| |
| if (exists $CODES->[$codeset]->{$code}) |
| { |
| return $CODES->[$codeset]->{$code}; |
| } |
| else |
| { |
| #--------------------------------------------------------------- |
| # no such script code! |
| #--------------------------------------------------------------- |
| return undef; |
| } |
| } |
| |
| |
| #======================================================================= |
| # |
| # script2code ( SCRIPT [, CODESET ] ) |
| # |
| #======================================================================= |
| sub script2code |
| { |
| my $script = shift; |
| my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; |
| |
| |
| return undef unless defined $script; |
| $script = lc($script); |
| if (exists $COUNTRIES->[$codeset]->{$script}) |
| { |
| return $COUNTRIES->[$codeset]->{$script}; |
| } |
| else |
| { |
| #--------------------------------------------------------------- |
| # no such script! |
| #--------------------------------------------------------------- |
| return undef; |
| } |
| } |
| |
| |
| #======================================================================= |
| # |
| # script_code2code ( CODE, IN-CODESET, OUT-CODESET ) |
| # |
| #======================================================================= |
| sub script_code2code |
| { |
| (@_ == 3) or croak "script_code2code() takes 3 arguments!"; |
| |
| my $code = shift; |
| my $inset = shift; |
| my $outset = shift; |
| my $outcode; |
| my $script; |
| |
| |
| return undef if $inset == $outset; |
| $script = code2script($code, $inset); |
| return undef if not defined $script; |
| $outcode = script2code($script, $outset); |
| return $outcode; |
| } |
| |
| |
| #======================================================================= |
| # |
| # all_script_codes() |
| # |
| #======================================================================= |
| sub all_script_codes |
| { |
| my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; |
| |
| return keys %{ $CODES->[$codeset] }; |
| } |
| |
| |
| #======================================================================= |
| # |
| # all_script_names() |
| # |
| #======================================================================= |
| sub all_script_names |
| { |
| my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; |
| |
| return values %{ $CODES->[$codeset] }; |
| } |
| |
| |
| #======================================================================= |
| # |
| # initialisation code - stuff the DATA into the ALPHA2 hash |
| # |
| #======================================================================= |
| { |
| my ($alpha2, $alpha3, $numeric); |
| my $script; |
| local $_; |
| |
| |
| while (<DATA>) |
| { |
| next unless /\S/; |
| chop; |
| ($alpha2, $alpha3, $numeric, $script) = split(/:/, $_, 4); |
| |
| $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $script; |
| $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$script"} = $alpha2; |
| |
| if ($alpha3) |
| { |
| $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $script; |
| $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$script"} = $alpha3; |
| } |
| |
| if ($numeric) |
| { |
| $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $script; |
| $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$script"} = $numeric; |
| } |
| |
| } |
| |
| close(DATA); |
| } |
| |
| 1; |
| |
| __DATA__ |
| am:ama:130:Aramaic |
| ar:ara:160:Arabic |
| av:ave:151:Avestan |
| bh:bhm:300:Brahmi (Ashoka) |
| bi:bid:372:Buhid |
| bn:ben:325:Bengali |
| bo:bod:330:Tibetan |
| bp:bpm:285:Bopomofo |
| br:brl:570:Braille |
| bt:btk:365:Batak |
| bu:bug:367:Buginese (Makassar) |
| by:bys:550:Blissymbols |
| ca:cam:358:Cham |
| ch:chu:221:Old Church Slavonic |
| ci:cir:291:Cirth |
| cm:cmn:402:Cypro-Minoan |
| co:cop:205:Coptic |
| cp:cpr:403:Cypriote syllabary |
| cy:cyr:220:Cyrillic |
| ds:dsr:250:Deserel (Mormon) |
| dv:dvn:315:Devanagari (Nagari) |
| ed:egd:070:Egyptian demotic |
| eg:egy:050:Egyptian hieroglyphs |
| eh:egh:060:Egyptian hieratic |
| el:ell:200:Greek |
| eo:eos:210:Etruscan and Oscan |
| et:eth:430:Ethiopic |
| gl:glg:225:Glagolitic |
| gm:gmu:310:Gurmukhi |
| gt:gth:206:Gothic |
| gu:guj:320:Gujarati |
| ha:han:500:Han ideographs |
| he:heb:125:Hebrew |
| hg:hgl:420:Hangul |
| hm:hmo:450:Pahawh Hmong |
| ho:hoo:371:Hanunoo |
| hr:hrg:410:Hiragana |
| hu:hun:176:Old Hungarian runic |
| hv:hvn:175:Kok Turki runic |
| hy:hye:230:Armenian |
| iv:ivl:610:Indus Valley |
| ja:jap:930:(alias for Han + Hiragana + Katakana) |
| jl:jlg:445:Cherokee syllabary |
| jw:jwi:360:Javanese |
| ka:kam:241:Georgian (Mxedruli) |
| kh:khn:931:(alias for Hangul + Han) |
| kk:kkn:411:Katakana |
| km:khm:354:Khmer |
| kn:kan:345:Kannada |
| kr:krn:357:Karenni (Kayah Li) |
| ks:kst:305:Kharoshthi |
| kx:kax:240:Georgian (Xucuri) |
| la:lat:217:Latin |
| lf:laf:215:Latin (Fraktur variant) |
| lg:lag:216:Latin (Gaelic variant) |
| lo:lao:356:Lao |
| lp:lpc:335:Lepcha (Rong) |
| md:mda:140:Mandaean |
| me:mer:100:Meroitic |
| mh:may:090:Mayan hieroglyphs |
| ml:mlm:347:Malayalam |
| mn:mon:145:Mongolian |
| my:mya:350:Burmese |
| na:naa:400:Linear A |
| nb:nbb:401:Linear B |
| og:ogm:212:Ogham |
| or:ory:327:Oriya |
| os:osm:260:Osmanya |
| ph:phx:115:Phoenician |
| ph:pah:150:Pahlavi |
| pl:pld:282:Pollard Phonetic |
| pq:pqd:295:Klingon plQaD |
| pr:prm:227:Old Permic |
| ps:pst:600:Phaistos Disk |
| rn:rnr:211:Runic (Germanic) |
| rr:rro:620:Rongo-rongo |
| sa:sar:110:South Arabian |
| si:sin:348:Sinhala |
| sj:syj:137:Syriac (Jacobite variant) |
| sl:slb:440:Unified Canadian Aboriginal Syllabics |
| sn:syn:136:Syriac (Nestorian variant) |
| sw:sww:281:Shavian (Shaw) |
| sy:syr:135:Syriac (Estrangelo) |
| ta:tam:346:Tamil |
| tb:tbw:373:Tagbanwa |
| te:tel:340:Telugu |
| tf:tfn:120:Tifnagh |
| tg:tag:370:Tagalog |
| th:tha:352:Thai |
| tn:tna:170:Thaana |
| tw:twr:290:Tengwar |
| va:vai:470:Vai |
| vs:vsp:280:Visible Speech |
| xa:xas:000:Cuneiform, Sumero-Akkadian |
| xf:xfa:105:Cuneiform, Old Persian |
| xk:xkn:412:(alias for Hiragana + Katakana) |
| xu:xug:106:Cuneiform, Ugaritic |
| yi:yii:460:Yi |
| zx:zxx:997:Unwritten language |
| zy:zyy:998:Undetermined script |
| zz:zzz:999:Uncoded script |