| /* Copyright (c) 2002 Red Hat Incorporated. |
| All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions are met: |
| |
| Redistributions of source code must retain the above copyright |
| notice, this list of conditions and the following disclaimer. |
| |
| Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| |
| The name of Red Hat Incorporated may not be used to endorse |
| or promote products derived from this software without specific |
| prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| ARE DISCLAIMED. IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY |
| DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| /* Generated using UnicodeData.txt 5.2 */ |
| |
| /* Expression used to filter out the characters for the below tables: |
| |
| awk -F\; \ |
| '{ \ |
| VAL = strtonum (sprintf("0x%s", $1)); \ |
| # All of general category "L", except for two Thai characters which \ |
| # are actually punctuation characters. Old Unicode weirdness. \ |
| # The character "COMBINING GREEK YPOGEGRAMMENI", as well as all Thai \ |
| # characters which are in "Mn" category. Old Unicode weirdness. \ |
| # All numerical digit or letter characters, except the ASCII variants. \ |
| # This is necessary due to the unfortunate ISO C definition for the \ |
| # iswdigit class, otherwise these characters are missing in iswalnum. \ |
| # All "Other Symbols" which are named as "LETTER" characters. \ |
| # \ |
| # Before running this test, make sure to expand all Unicode blocks \ |
| # which are just marked by their first and last character! \ |
| # \ |
| if ( (match($3, "^L") && VAL != 0x0e2f && VAL != 0x0e46) \ |
| || (match($3, "^Mn") && (VAL == 0x0345 || match($2, "\\<CHARACTER\\>"))) \ |
| || (match($3, "^N[dl]") && VAL >= 0x100) \ |
| || (match($3, "^So") && match($2, "\\<LETTER\\>"))) \ |
| print $1; \ |
| }' UnicodeData.txt |
| */ |
| |
| static const unsigned char u0[] = { |
| 0x41, 0x0, 0x5a, 0x61, 0x0, 0x7a, 0xaa, 0xb5, |
| 0xba, 0xc0, 0x0, 0xd6, 0xd8, 0x0, 0xf6, 0xf8, |
| 0x0, 0xff }; |
| /* u1 all alphabetic */ |
| static const unsigned char u2[] = { |
| 0x00, 0x0, 0xc1, 0xc6, 0x0, 0xd1, |
| 0xe0, 0x0, 0xe4, 0xec, 0xee }; |
| static const unsigned char u3[] = { |
| 0x45, 0x70, 0x0, 0x74, 0x76, 0x77, |
| 0x7a, 0x0, 0x7d, 0x86, 0x88, 0x0, 0x8a, 0x8c, |
| 0x8e, 0x0, 0xa1, 0xa3, 0x0, 0xf5, |
| 0xf7, 0x0, 0xff }; |
| static const unsigned char u4[] = { |
| 0x00, 0x0, 0x81, 0x8a, 0x0, 0xff }; |
| static const unsigned char u5[] = { |
| 0x00, 0x0, 0x25, 0x31, 0x0, 0x56, 0x59, |
| 0x61, 0x0, 0x87, 0xd0, 0x0, 0xea, |
| 0xf0, 0x0, 0xf2 }; |
| static const unsigned char u6[] = { |
| 0x21, 0x0, 0x4a, 0x60, 0x0, 0x69, |
| 0x6e, 0x0, 0x6f, 0x71, 0x0, 0xd3, |
| 0xd5, 0xe5, 0x0, 0xe6, 0xee, 0x0, 0xfc, 0xff }; |
| static const unsigned char u7[] = { |
| 0x10, 0x12, 0x0, 0x2f, 0x4d, 0x0, 0xa5, 0xb1, |
| 0xc0, 0x0, 0xea, 0xf4, 0xf5, 0xfa }; |
| static const unsigned char u8[] = { |
| 0x00, 0x0, 0x15, 0x1a, 0x24, 0x28 }; |
| static const unsigned char u9[] = { |
| 0x04, 0x0, 0x39, 0x3d, 0x50, 0x58, 0x0, 0x61, |
| 0x66, 0x0, 0x6f, 0x71, 0x72, 0x79, 0x0, 0x7f, |
| 0x85, 0x0, 0x8c, 0x8f, 0x0, 0x90, |
| 0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0, 0xb2, |
| 0xb6, 0x0, 0xb9, 0xbd, 0xce, 0xdc, 0x0, 0xdd, |
| 0xdf, 0x0, 0xe1, 0xe6, 0x0, 0xf1 }; |
| static const unsigned char ua[] = { |
| 0x05, 0x0, 0x0a, 0x0f, 0x0, 0x10, |
| 0x13, 0x0, 0x28, 0x2a, 0x0, 0x30, |
| 0x32, 0x0, 0x33, 0x35, 0x0, 0x36, |
| 0x38, 0x0, 0x39, 0x59, 0x0, 0x5c, |
| 0x5e, 0x66, 0x0, 0x6f, 0x72, 0x0, 0x74, |
| 0x85, 0x0, 0x8d, 0x8f, 0x0, 0x91, |
| 0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0, |
| 0xb2, 0x0, 0xb3, 0xb5, 0x0, 0xb9, |
| 0xbd, 0xd0, 0xe0, 0xe1, 0xe6, 0x0, 0xef }; |
| static const unsigned char ub[] = { |
| 0x05, 0x0, 0x0c, 0x0f, 0x0, 0x10, |
| 0x13, 0x0, 0x28, 0x2a, 0x0, 0x30, |
| 0x32, 0x0, 0x33, 0x35, 0x0, 0x39, 0x3d, |
| 0x5c, 0x0, 0x5d, 0x5f, 0x0, 0x61, |
| 0x66, 0x0, 0x6f, 0x71, 0x83, 0x85, 0x0, 0x8a, |
| 0x8e, 0x0, 0x90, 0x92, 0x0, 0x95, |
| 0x99, 0x0, 0x9a, 0x9c, 0x9e, 0x0, 0x9f, |
| 0xa3, 0x0, 0xa4, 0xa8, 0x0, 0xaa, |
| 0xae, 0x0, 0xb9, 0xd0, 0xe6, 0x0, 0xef }; |
| static const unsigned char uc[] = { |
| 0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10, |
| 0x12, 0x0, 0x28, 0x2a, 0x0, 0x33, |
| 0x35, 0x0, 0x39, 0x3d, 0x58, 0x59, |
| 0x60, 0x0, 0x61, 0x66, 0x0, 0x6f, |
| 0x85, 0x0, 0x8c, 0x8e, 0x0, 0x90, |
| 0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3, |
| 0xb5, 0x0, 0xb9, 0xbd, 0xde, 0xe0, 0x0, 0xe1, |
| 0xe6, 0x0, 0xef }; |
| static const unsigned char ud[] = { |
| 0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10, |
| 0x12, 0x0, 0x28, 0x2a, 0x0, 0x39, 0x3d, |
| 0x60, 0x0, 0x61, 0x66, 0x0, 0x6f, |
| 0x7a, 0x0, 0x7f, 0x85, 0x0, 0x96, 0x9a, |
| 0x0, 0xb1, 0xb3, 0x0, 0xbb, 0xbd, |
| 0xc0, 0x0, 0xc6 }; |
| static const unsigned char ue[] = { |
| 0x01, 0x0, 0x2e, 0x30, 0x0, 0x3a, 0x40, |
| 0x0, 0x45, 0x47, 0x0, 0x4e, 0x50, 0x0, 0x59, |
| 0x81, 0x0, 0x82, 0x84, 0x87, 0x0, 0x88, 0x8a, |
| 0x8d, 0x94, 0x0, 0x97, 0x99, 0x0, 0x9f, 0xa1, |
| 0x0, 0xa3, 0xa5, 0xa7, 0xaa, 0x0, 0xab, 0xad, |
| 0x0, 0xb0, 0xb2, 0x0, 0xb3, 0xbd, 0xc0, 0x0, |
| 0xc4, 0xc6, 0xd0, 0x0, 0xd9, 0xdc, 0x0, 0xdd }; |
| static const unsigned char uf[] = { |
| 0x00, 0x20, 0x0, 0x29, 0x40, 0x0, 0x47, 0x49, |
| 0x0, 0x6c, 0x88, 0x0, 0x8b }; |
| static const unsigned char u10[] = { |
| 0x00, 0x0, 0x2a, 0x3f, 0x0, 0x49, |
| 0x50, 0x0, 0x55, 0x5a, 0x0, 0x5d, |
| 0x61, 0x65, 0x66, 0x6e, 0x0, 0x70, |
| 0x75, 0x0, 0x81, 0x8e, 0x90, 0x0, 0x99, |
| 0xa0, 0x0, 0xc5, 0xd0, 0x0, 0xfa, 0xfc }; |
| /* u11 all alphabetic */ |
| static const unsigned char u12[] = { |
| 0x00, 0x0, 0x48, 0x4a, 0x0, 0x4d, |
| 0x50, 0x0, 0x56, 0x58, 0x5a, 0x0, 0x5d, |
| 0x60, 0x0, 0x88, 0x8a, 0x0, 0x8d, |
| 0x90, 0x0, 0xb0, 0xb2, 0x0, 0xb5, |
| 0xb8, 0x0, 0xbe, 0xc0, 0xc2, 0x0, 0xc5, |
| 0xc8, 0x0, 0xd6, 0xd8, 0x0, 0xff }; |
| static const unsigned char u13[] = { |
| 0x00, 0x0, 0x10, 0x12, 0x0, 0x15, |
| 0x18, 0x0, 0x5a, 0x80, 0x0, 0x8f, |
| 0xa0, 0x0, 0xf4 }; |
| static const unsigned char u14[] = { |
| 0x01, 0x0, 0xff }; |
| /* u15 all alphabetic */ |
| static const unsigned char u16[] = { |
| 0x00, 0x0, 0x6c, 0x6f, 0x0, 0x7f, |
| 0x81, 0x0, 0x9a, 0xa0, 0x0, 0xea, |
| 0xee, 0x0, 0xf0 }; |
| static const unsigned char u17[] = { |
| 0x00, 0x0, 0x0c, 0x0e, 0x0, 0x11, |
| 0x20, 0x0, 0x31, 0x40, 0x0, 0x51, |
| 0x60, 0x0, 0x6c, 0x6e, 0x0, 0x70, |
| 0x80, 0x0, 0xb3, 0xd7, 0xdc, 0xe0, 0x0, 0xe9 }; |
| static const unsigned char u18[] = { |
| 0x10, 0x0, 0x19, 0x20, 0x0, 0x77, |
| 0x80, 0x0, 0xa8, 0xaa, 0xb0, 0x0, 0xf5 }; |
| static const unsigned char u19[] = { |
| 0x00, 0x0, 0x1c, 0x46, 0x0, 0x6d, |
| 0x70, 0x0, 0x74, 0x80, 0x0, 0xab, |
| 0xc1, 0x0, 0xc7, 0xd0, 0x0, 0xda }; |
| static const unsigned char u1a[] = { |
| 0x00, 0x0, 0x16, 0x20, 0x0, 0x54, |
| 0x80, 0x0, 0x89, 0x90, 0x0, 0x99, 0xa7 }; |
| static const unsigned char u1b[] = { |
| 0x05, 0x0, 0x33, 0x45, 0x0, 0x4b, |
| 0x50, 0x0, 0x59, 0x83, 0x0, 0xa0, |
| 0xae, 0x0, 0xb9 }; |
| static const unsigned char u1c[] = { |
| 0x00, 0x0, 0x23, 0x40, 0x0, 0x49, |
| 0x4d, 0x0, 0x7d, 0xe9, 0x0, 0xec, |
| 0xee, 0x0, 0xf1 }; |
| static const unsigned char u1d[] = { |
| 0x00, 0x0, 0xbf }; |
| /* u1e all alphabetic */ |
| static const unsigned char u1f[] = { |
| 0x00, 0x0, 0x15, 0x18, 0x0, 0x1d, |
| 0x20, 0x0, 0x45, 0x48, 0x0, 0x4d, 0x50, 0x0, 0x57, 0x59, |
| 0x5b, 0x5d, 0x5f, 0x0, 0x7d, 0x80, 0x0, 0xb4, |
| 0xb6, 0x0, 0xbc, 0xbe, 0xc2, 0x0, 0xc4, 0xc6, |
| 0x0, 0xcc, 0xd0, 0x0, 0xd3, 0xd6, 0x0, 0xdb, |
| 0xe0, 0x0, 0xec, 0xf2, 0x0, 0xf4, 0xf6, 0x0, |
| 0xfc }; |
| static const unsigned char u20[] = { |
| 0x71, 0x7f, 0x90, 0x0, 0x94 }; |
| static const unsigned char u21[] = { |
| 0x02, 0x07, 0x0a, 0x0, 0x13, 0x15, |
| 0x19, 0x0, 0x1d, 0x24, 0x26, 0x28, 0x0, 0x2d, |
| 0x2f, 0x0, 0x39, 0x3c, 0x0, 0x3f, |
| 0x45, 0x0, 0x49, 0x4e, 0x60, 0x0, 0x88 }; |
| static const unsigned char u24[] = { |
| 0x9c, 0x0, 0xe9 }; |
| static const unsigned char u2c[] = { |
| 0x00, 0x0, 0x2e, 0x30, 0x0, 0x5e, |
| 0x60, 0x0, 0xe4, 0xeb, 0x0, 0xee }; |
| static const unsigned char u2d[] = { |
| 0x00, 0x0, 0x25, 0x30, 0x0, 0x65, 0x6f, |
| 0x80, 0x0, 0x96, 0xa0, 0x0, 0xa6, |
| 0xa8, 0x0, 0xae, 0xb0, 0x0, 0xb6, |
| 0xb8, 0x0, 0xbe, 0xc0, 0x0, 0xc6, |
| 0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6, |
| 0xd8, 0x0, 0xde }; |
| static const unsigned char u2e[] = { |
| 0x2f }; |
| static const unsigned char u30[] = { |
| 0x05, 0x0, 0x07, 0x21, 0x0, |
| 0x29, 0x31, 0x0, 0x35, 0x38, 0x0, 0x3c, 0x41, |
| 0x0, 0x96, 0x9d, 0x0, 0x9f, 0xa1, 0x0, 0xfa, |
| 0xfc, 0x0, 0xff }; |
| static const unsigned char u31[] = { |
| 0x05, 0x0, 0x2d, 0x31, 0x0, |
| 0x8e, 0xa0, 0x0, 0xb7, 0xf0, 0x0, 0xff }; |
| /* u34 to u4c all alphabetic */ |
| static const unsigned char u4d[] = { |
| 0x00, 0x0, 0xb5 }; |
| /* u4e to u9e all alphabetic */ |
| static const unsigned char u9f[] = { |
| 0x00, 0x0, 0xcb }; |
| /* ua0 to ua3 all alphabetic */ |
| static const unsigned char ua4[] = { |
| 0x00, 0x0, 0x8c, 0xd0, 0x0, 0xfd }; |
| /* ua5 all alphabetic */ |
| static const unsigned char ua6[] = { |
| 0x00, 0x0, 0x0c, 0x10, 0x0, 0x2b, |
| 0x40, 0x0, 0x5f, 0x62, 0x0, 0x6e, |
| 0x7f, 0x0, 0x97, 0xa0, 0x0, 0xef }; |
| static const unsigned char ua7[] = { |
| 0x17, 0x0, 0x1f, 0x22, 0x0, 0x88, |
| 0x8b, 0x8c, |
| 0xfb, 0x0, 0xff }; |
| static const unsigned char ua8[] = { |
| 0x00, 0x01, 0x03, 0x0, 0x05, 0x07, 0x0, 0x0a, |
| 0x0c, 0x0, 0x22, 0x40, 0x0, 0x73, |
| 0x82, 0x0, 0xb3, 0xd0, 0x0, 0xd9, |
| 0xf2, 0x0, 0xf7, 0xfb }; |
| static const unsigned char ua9[] = { |
| 0x00, 0x0, 0x25, 0x30, 0x0, 0x46, |
| 0x60, 0x0, 0x7c, 0x84, 0x0, 0xb2, |
| 0xcf, 0x0, 0xd9 }; |
| static const unsigned char uaa[] = { |
| 0x00, 0x0, 0x28, 0x40, 0x0, 0x42, |
| 0x44, 0x0, 0x4b, 0x50, 0x0, 0x59, |
| 0x60, 0x0, 0x76, 0x7a, 0x80, 0x0, 0xaf, |
| 0xb1, 0xb5, 0xb6, 0xb9, 0x0, 0xbd, |
| 0xc0, 0xc2, 0xdb, 0x0, 0xdd }; |
| static const unsigned char uab[] = { |
| 0xc0, 0x0, 0xe2, 0xf0, 0x0, 0xf9 }; |
| /* uac to ud6 all alphabetic */ |
| static const unsigned char ud7[] = { |
| 0x00, 0x0, 0xa3, 0xb0, 0x0, 0xc6, |
| 0xcb, 0x0, 0xfb }; |
| /* uf9 all alphabetic */ |
| static const unsigned char ufa[] = { |
| 0x00, 0x0, 0x2d, 0x30, 0x0, 0x6d, |
| 0x70, 0x0, 0xd9 }; |
| static const unsigned char ufb[] = { |
| 0x00, 0x0, 0x06, 0x13, 0x0, 0x17, 0x1d, |
| 0x1f, 0x0, 0x28, 0x2a, 0x0, 0x36, 0x38, 0x0, |
| 0x3c, 0x3e, 0x40, 0x0, 0x41, 0x43, 0x0, 0x44, |
| 0x46, 0x0, 0xb1, 0xd3, 0x0, 0xff }; |
| /* ufc all alphabetic */ |
| static const unsigned char ufd[] = { |
| 0x00, 0x0, 0x3d, 0x50, 0x0, |
| 0x8f, 0x92, 0x0, 0xc7, 0xf0, 0x0, 0xfb }; |
| static const unsigned char ufe[] = { |
| 0x70, |
| 0x0, 0x74, 0x76, 0x0, 0xfc }; |
| static const unsigned char uff[] = { |
| 0x10, 0x0, 0x19, |
| 0x21, 0x0, 0x3a, 0x41, 0x0, 0x5a, 0x66, 0x0, |
| 0xbe, 0xc2, 0x0, 0xc7, 0xca, 0x0, 0xcf, 0xd2, |
| 0x0, 0xd7, 0xda, 0x0, 0xdc }; |
| static const unsigned char u100[] = { |
| 0x00, 0x0, 0x0b, 0x0d, 0x0, 0x26, |
| 0x28, 0x0, 0x3a, 0x3c, 0x3d, 0x3f, 0x0, 0x4d, |
| 0x50, 0x0, 0x5d, 0x80, 0x0, 0xfa }; |
| static const unsigned char u101[] = { |
| 0x40, 0x0, 0x74 }; |
| static const unsigned char u102[] = { |
| 0x80, 0x0, 0x9c, 0xa0, 0x0, 0xd0 }; |
| static const unsigned char u103[] = { |
| 0x00, 0x0, 0x1e, 0x30, 0x0, 0x4a, |
| 0x80, 0x0, 0x9d, 0xa0, 0x0, 0xc3, |
| 0xc8, 0x0, 0xcf, 0xd1, 0x0, 0xd5 }; |
| static const unsigned char u104[] = { |
| 0x00, 0x0, 0x9d, 0xa0, 0x0, 0xa9 }; |
| static const unsigned char u108[] = { |
| 0x00, 0x0, 0x05, 0x08, 0x0a, 0x0, 0x35, |
| 0x37, 0x38, 0x3c, 0x3f, 0x0, 0x55 }; |
| static const unsigned char u109[] = { |
| 0x00, 0x0, 0x15, 0x20, 0x0, 0x39 }; |
| static const unsigned char u10a[] = { |
| 0x00, 0x10, 0x0, 0x13, 0x15, 0x0, 0x17, |
| 0x19, 0x0, 0x33, 0x60, 0x0, 0x7c }; |
| static const unsigned char u10b[] = { |
| 0x00, 0x0, 0x35, 0x40, 0x0, 0x55, |
| 0x60, 0x0, 0x72 }; |
| static const unsigned char u10c[] = { |
| 0x00, 0x0, 0x48 }; |
| static const unsigned char u110[] = { |
| 0x83, 0x0, 0xaf }; |
| /* u120 to u122 all alphabetic */ |
| static const unsigned char u123[] = { |
| 0x00, 0x0, 0x6e }; |
| static const unsigned char u124[] = { |
| 0x00, 0x0, 0x62 }; |
| /* u130 to u133 all alphabetic */ |
| static const unsigned char u134[] = { |
| 0x00, 0x0, 0x2e }; |
| static const unsigned char u1d4[] = { |
| 0x00, 0x0, 0x54, 0x56, 0x0, 0x9c, |
| 0x9e, 0x0, 0x9f, 0xa2, 0xa5, 0x0, 0xa6, |
| 0xa9, 0x0, 0xac, 0xae, 0x0, 0xb9, 0xbb, |
| 0xbd, 0x0, 0xc3, 0xc5, 0x0, 0xff }; |
| static const unsigned char u1d5[] = { |
| 0x00, 0x0, 0x05, 0x07, 0x0, |
| 0x0a, 0x0d, 0x0, 0x14, 0x16, 0x0, 0x1c, 0x1e, |
| 0x0, 0x39, 0x3b, 0x0, 0x3e, 0x40, 0x0, 0x44, |
| 0x46, 0x4a, 0x0, 0x50, 0x52, 0x0, 0xff }; |
| static const unsigned char u1d6[] = { |
| 0x00, 0x0, 0xa5, 0xa8, 0x0, 0xc0, |
| 0xc2, 0x0, 0xda, 0xdc, 0x0, 0xfa, |
| 0xfc, 0x0, 0xff }; |
| static const unsigned char u1d7[] = { |
| 0x00, 0x0, 0x14, 0x16, 0x0, 0x34, |
| 0x36, 0x0, 0x4e, 0x50, 0x0, 0x6e, |
| 0x70, 0x0, 0x88, 0x8a, 0x0, 0xa8, |
| 0xaa, 0x0, 0xc2, 0xc4, 0x0, 0xcb, |
| 0xce, 0x0, 0xff }; |
| static const unsigned char u1f1[] = { |
| 0x10, 0x0, 0x2c, 0x31, 0x3d, 0x3f, 0x42, 0x46, |
| 0x57, 0x5f, 0x79, 0x7b, 0x7c, 0x7f, 0x8a }; |
| /* u200 to u2a5 all alphabetic */ |
| static const unsigned char u2a6[] = { |
| 0x00, 0x0, 0xd6 }; |
| /* u2a7 to u2b6 all alphabetic */ |
| static const unsigned char u2b7[] = { |
| 0x00, 0x0, 0x34 }; |
| /* u2f8 to u2f9 all alphabetic */ |
| static const unsigned char u2fa[] = { |
| 0x00, 0x0, 0x1d }; |