third_party/blink/renderer/platform/text/unicode_range.cc - chromium/src - Git at Google

 /*
  * Copyright (C) 2007 Apple Computer, Inc.
  *
  * Portions are Copyright (C) 1998 Netscape Communications Corporation.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
  *
  * Alternatively, the contents of this file may be used under the terms
  * of either the Mozilla Public License Version 1.1, found at
  * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
  * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
  * (the "GPL"), in which case the provisions of the MPL or the GPL are
  * applicable instead of those above.  If you wish to allow use of your
  * version of this file only under the terms of one of those two
  * licenses (the MPL or the GPL) and not to allow others to use your
  * version of this file under the LGPL, indicate your decision by
  * deletingthe provisions above and replace them with the notice and
  * other provisions required by the MPL or the GPL, as the case may be.
  * If you do not delete the provisions above, a recipient may use your
  * version of this file under any of the LGPL, the MPL or the GPL.
  */

 #include "third_party/blink/renderer/platform/text/unicode_range.h"

 namespace blink {

 /**********************************************************************
  * Unicode subranges as defined in unicode 3.0
  * x-western, x-central-euro, tr, x-baltic  -> latin
  *  0000 - 036f
  *  1e00 - 1eff
  *  2000 - 206f  (general punctuation)
  *  20a0 - 20cf  (currency symbols)
  *  2100 - 214f  (letterlike symbols)
  *  2150 - 218f  (Number Forms)
  * el         -> greek
  *  0370 - 03ff
  *  1f00 - 1fff
  * x-cyrillic -> cyrillic
  *  0400 - 04ff
  * he         -> hebrew
  *  0590 - 05ff
  * ar         -> arabic
  *  0600 - 06ff
  *  fb50 - fdff (arabic presentation forms)
  *  fe70 - feff (arabic presentation forms b)
  * th - thai
  *  0e00 - 0e7f
  * ko        -> korean
  *  ac00 - d7af  (hangul Syllables)
  *  1100 - 11ff    (jamo)
  *  3130 - 318f (hangul compatibility jamo)
  * ja
  *  3040 - 309f (hiragana)
  *  30a0 - 30ff (katakana)
  * zh-CN
  * zh-TW
  *
  * CJK
  *  3100 - 312f (bopomofo)
  *  31a0 - 31bf (bopomofo extended)
  *  3000 - 303f (CJK Symbols and Punctuation)
  *  2e80 - 2eff (CJK radicals supplement)
  *  2f00 - 2fdf (Kangxi Radicals)
  *  2ff0 - 2fff (Ideographic Description Characters)
  *  3190 - 319f (kanbun)
  *  3200 - 32ff (Enclosed CJK letters and Months)
  *  3300 - 33ff (CJK compatibility)
  *  3400 - 4dbf (CJK Unified Ideographs Extension A)
  *  4e00 - 9faf (CJK Unified Ideographs)
  *  f900 - fa5f (CJK Compatibility Ideographs)
  *  fe30 - fe4f (CJK compatibility Forms)
  *  ff00 - ffef (halfwidth and fullwidth forms)
  *
  * Armenian
  *  0530 - 058f
  * Sriac
  *  0700 - 074f
  * Thaana
  *  0780 - 07bf
  * Devanagari
  *  0900 - 097f
  * Bengali
  *  0980 - 09ff
  * Gurmukhi
  *  0a00 - 0a7f
  * Gujarati
  *  0a80 - 0aff
  * Oriya
  *  0b00 - 0b7f
  * Tamil
  *  0b80 - 0bff
  * Telugu
  *  0c00 - 0c7f
  * Kannada
  *  0c80 - 0cff
  * Malayalam
  *  0d00 - 0d7f
  * Sinhala
  *  0d80 - 0def
  * Lao
  *  0e80 - 0eff
  * Tibetan
  *  0f00 - 0fbf
  * Myanmar
  *  1000 - 109f
  * Georgian
  *  10a0 - 10ff
  * Ethiopic
  *  1200 - 137f
  * Cherokee
  *  13a0 - 13ff
  * Canadian Aboriginal Syllabics
  *  1400 - 167f
  * Ogham
  *  1680 - 169f
  * Runic
  *  16a0 - 16ff
  * Khmer
  *  1780 - 17ff
  * Mongolian
  *  1800 - 18af
  * Misc - superscripts and subscripts
  *  2070 - 209f
  * Misc - Combining Diacritical Marks for Symbols
  *  20d0 - 20ff
  * Misc - Arrows
  *  2190 - 21ff
  * Misc - Mathematical Operators
  *  2200 - 22ff
  * Misc - Miscellaneous Technical
  *  2300 - 23ff
  * Misc - Control picture
  *  2400 - 243f
  * Misc - Optical character recognition
  *  2440 - 2450
  * Misc - Enclose Alphanumerics
  *  2460 - 24ff
  * Misc - Box Drawing
  *  2500 - 257f
  * Misc - Block Elements
  *  2580 - 259f
  * Misc - Geometric Shapes
  *  25a0 - 25ff
  * Misc - Miscellaneous Symbols
  *  2600 - 267f
  * Misc - Dingbats
  *  2700 - 27bf
  * Misc - Braille Patterns
  *  2800 - 28ff
  * Yi Syllables
  *  a000 - a48f
  * Yi radicals
  *  a490 - a4cf
  * Alphabetic Presentation Forms
  *  fb00 - fb4f
  * Misc - Combining half Marks
  *  fe20 - fe2f
  * Misc - small form variants
  *  fe50 - fe6f
  * Misc - Specials
  *  fff0 - ffff
  *********************************************************************/

 static const unsigned kCNumSubTables = 9;
 static const unsigned kCSubTableSize = 16;

 static const unsigned char
     kGUnicodeSubrangeTable[kCNumSubTables][kCSubTableSize] = {
         {
             // table for X---
             kCRangeTableBase + 1,  // u0xxx
             kCRangeTableBase + 2,  // u1xxx
             kCRangeTableBase + 3,  // u2xxx
             kCRangeSetCJK,         // u3xxx
             kCRangeSetCJK,         // u4xxx
             kCRangeSetCJK,         // u5xxx
             kCRangeSetCJK,         // u6xxx
             kCRangeSetCJK,         // u7xxx
             kCRangeSetCJK,         // u8xxx
             kCRangeSetCJK,         // u9xxx
             kCRangeTableBase + 4,  // uaxxx
             kCRangeKorean,         // ubxxx
             kCRangeKorean,         // ucxxx
             kCRangeTableBase + 5,  // udxxx
             kCRangePrivate,        // uexxx
             kCRangeTableBase + 6   // ufxxx
         },
         {
             // table for 0X--
             kCRangeSetLatin,  // u00xx
             kCRangeSetLatin,  // u01xx
             kCRangeSetLatin,  // u02xx
             kCRangeGreek,     // u03xx     XXX 0300-036f is in fact
                               // cRangeCombiningDiacriticalMarks
             kCRangeCyrillic,  // u04xx
             kCRangeTableBase +
                 7,  // u05xx, includes Cyrillic supplement, Hebrew, and Armenian
             kCRangeArabic,         // u06xx
             kCRangeTertiaryTable,  // u07xx
             kCRangeUnassigned,     // u08xx
             kCRangeTertiaryTable,  // u09xx
             kCRangeTertiaryTable,  // u0axx
             kCRangeTertiaryTable,  // u0bxx
             kCRangeTertiaryTable,  // u0cxx
             kCRangeTertiaryTable,  // u0dxx
             kCRangeTertiaryTable,  // u0exx
             kCRangeTibetan,        // u0fxx
         },
         {
             // table for 1x--
             kCRangeTertiaryTable,  // u10xx
             kCRangeKorean,         // u11xx
             kCRangeEthiopic,       // u12xx
             kCRangeTertiaryTable,  // u13xx
             kCRangeCanadian,       // u14xx
             kCRangeCanadian,       // u15xx
             kCRangeTertiaryTable,  // u16xx
             kCRangeKhmer,          // u17xx
             kCRangeMongolian,      // u18xx
             kCRangeUnassigned,     // u19xx
             kCRangeUnassigned,     // u1axx
             kCRangeUnassigned,     // u1bxx
             kCRangeUnassigned,     // u1cxx
             kCRangeUnassigned,     // u1dxx
             kCRangeSetLatin,       // u1exx
             kCRangeGreek,          // u1fxx
         },
         {
             // table for 2x--
             kCRangeSetLatin,               // u20xx
             kCRangeSetLatin,               // u21xx
             kCRangeMathOperators,          // u22xx
             kCRangeMiscTechnical,          // u23xx
             kCRangeControlOpticalEnclose,  // u24xx
             kCRangeBoxBlockGeometrics,     // u25xx
             kCRangeMiscSymbols,            // u26xx
             kCRangeDingbats,               // u27xx
             kCRangeBraillePattern,         // u28xx
             kCRangeUnassigned,             // u29xx
             kCRangeUnassigned,             // u2axx
             kCRangeUnassigned,             // u2bxx
             kCRangeUnassigned,             // u2cxx
             kCRangeUnassigned,             // u2dxx
             kCRangeSetCJK,                 // u2exx
             kCRangeSetCJK,                 // u2fxx
         },
         {
             // table for ax--
             kCRangeYi,          // ua0xx
             kCRangeYi,          // ua1xx
             kCRangeYi,          // ua2xx
             kCRangeYi,          // ua3xx
             kCRangeYi,          // ua4xx
             kCRangeUnassigned,  // ua5xx
             kCRangeUnassigned,  // ua6xx
             kCRangeUnassigned,  // ua7xx
             kCRangeUnassigned,  // ua8xx
             kCRangeUnassigned,  // ua9xx
             kCRangeUnassigned,  // uaaxx
             kCRangeUnassigned,  // uabxx
             kCRangeKorean,      // uacxx
             kCRangeKorean,      // uadxx
             kCRangeKorean,      // uaexx
             kCRangeKorean,      // uafxx
         },
         {
             // table for dx--
             kCRangeKorean,     // ud0xx
             kCRangeKorean,     // ud1xx
             kCRangeKorean,     // ud2xx
             kCRangeKorean,     // ud3xx
             kCRangeKorean,     // ud4xx
             kCRangeKorean,     // ud5xx
             kCRangeKorean,     // ud6xx
             kCRangeKorean,     // ud7xx
             kCRangeSurrogate,  // ud8xx
             kCRangeSurrogate,  // ud9xx
             kCRangeSurrogate,  // udaxx
             kCRangeSurrogate,  // udbxx
             kCRangeSurrogate,  // udcxx
             kCRangeSurrogate,  // uddxx
             kCRangeSurrogate,  // udexx
             kCRangeSurrogate,  // udfxx
         },
         {
             // table for fx--
             kCRangePrivate,  // uf0xx
             kCRangePrivate,  // uf1xx
             kCRangePrivate,  // uf2xx
             kCRangePrivate,  // uf3xx
             kCRangePrivate,  // uf4xx
             kCRangePrivate,  // uf5xx
             kCRangePrivate,  // uf6xx
             kCRangePrivate,  // uf7xx
             kCRangePrivate,  // uf8xx
             kCRangeSetCJK,   // uf9xx
             kCRangeSetCJK,   // ufaxx
             kCRangeArabic,   // ufbxx, includes alphabic presentation form
             kCRangeArabic,   // ufcxx
             kCRangeArabic,   // ufdxx
             kCRangeArabic,   // ufexx, includes Combining half marks,
                             //                CJK compatibility forms,
                             //                CJK compatibility forms,
                             //                small form variants
             kCRangeTableBase +
                 8,  // uffxx, halfwidth and fullwidth forms, includes Specials
         },
         {
             // table for 0x0500 - 0x05ff
             kCRangeCyrillic,  // u050x
             kCRangeCyrillic,  // u051x
             kCRangeCyrillic,  // u052x
             kCRangeArmenian,  // u053x
             kCRangeArmenian,  // u054x
             kCRangeArmenian,  // u055x
             kCRangeArmenian,  // u056x
             kCRangeArmenian,  // u057x
             kCRangeArmenian,  // u058x
             kCRangeHebrew,    // u059x
             kCRangeHebrew,    // u05ax
             kCRangeHebrew,    // u05bx
             kCRangeHebrew,    // u05cx
             kCRangeHebrew,    // u05dx
             kCRangeHebrew,    // u05ex
             kCRangeHebrew,    // u05fx
         },
         {
             // table for 0xff00 - 0xffff
             kCRangeSetCJK,    // uff0x, fullwidth latin
             kCRangeSetCJK,    // uff1x, fullwidth latin
             kCRangeSetCJK,    // uff2x, fullwidth latin
             kCRangeSetCJK,    // uff3x, fullwidth latin
             kCRangeSetCJK,    // uff4x, fullwidth latin
             kCRangeSetCJK,    // uff5x, fullwidth latin
             kCRangeSetCJK,    // uff6x, halfwidth katakana
             kCRangeSetCJK,    // uff7x, halfwidth katakana
             kCRangeSetCJK,    // uff8x, halfwidth katakana
             kCRangeSetCJK,    // uff9x, halfwidth katakana
             kCRangeSetCJK,    // uffax, halfwidth hangul jamo
             kCRangeSetCJK,    // uffbx, halfwidth hangul jamo
             kCRangeSetCJK,    // uffcx, halfwidth hangul jamo
             kCRangeSetCJK,    // uffdx, halfwidth hangul jamo
             kCRangeSetCJK,    // uffex, fullwidth symbols
             kCRangeSpecials,  // ufffx, Specials
         },
 };

 // Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80)
 // code points so that the number of entries in the tertiary range
 // table for that range is obtained by dividing (0x1700 - 0x0700) by 128.
 // Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal
 // syllabaries take multiple chunks and Ogham and Runic share a single chunk.
 static const unsigned kCTertiaryTableSize = ((0x1700 - 0x0700) / 0x80);

 static const unsigned char kGUnicodeTertiaryRangeTable[kCTertiaryTableSize] = {
     // table for 0x0700 - 0x1600
     kCRangeSyriac,      // u070x
     kCRangeThaana,      // u078x
     kCRangeUnassigned,  // u080x  place holder(resolved in the 2ndary tab.)
     kCRangeUnassigned,  // u088x  place holder(resolved in the 2ndary tab.)
     kCRangeDevanagari,  // u090x
     kCRangeBengali,     // u098x
     kCRangeGurmukhi,    // u0a0x
     kCRangeGujarati,    // u0a8x
     kCRangeOriya,       // u0b0x
     kCRangeTamil,       // u0b8x
     kCRangeTelugu,      // u0c0x
     kCRangeKannada,     // u0c8x
     kCRangeMalayalam,   // u0d0x
     kCRangeSinhala,     // u0d8x
     kCRangeThai,        // u0e0x
     kCRangeLao,         // u0e8x
     kCRangeTibetan,     // u0f0x  place holder(resolved in the 2ndary tab.)
     kCRangeTibetan,     // u0f8x  place holder(resolved in the 2ndary tab.)
     kCRangeMyanmar,     // u100x
     kCRangeGeorgian,    // u108x
     kCRangeKorean,      // u110x  place holder(resolved in the 2ndary tab.)
     kCRangeKorean,      // u118x  place holder(resolved in the 2ndary tab.)
     kCRangeEthiopic,    // u120x  place holder(resolved in the 2ndary tab.)
     kCRangeEthiopic,    // u128x  place holder(resolved in the 2ndary tab.)
     kCRangeEthiopic,    // u130x
     kCRangeCherokee,    // u138x
     kCRangeCanadian,    // u140x  place holder(resolved in the 2ndary tab.)
     kCRangeCanadian,    // u148x  place holder(resolved in the 2ndary tab.)
     kCRangeCanadian,    // u150x  place holder(resolved in the 2ndary tab.)
     kCRangeCanadian,    // u158x  place holder(resolved in the 2ndary tab.)
     kCRangeCanadian,    // u160x
     kCRangeOghamRunic,  // u168x  this contains two scripts, Ogham & Runic
 };

 // A two level index is almost enough for locating a range, with the
 // exception of u03xx and u05xx. Since we don't really care about range for
 // combining diacritical marks in our font application, they are
 // not discriminated further.  Future adoption of this method for other use
 // should be aware of this limitation. The implementation can be extended if
 // there is such a need.
 // For Indic, Southeast Asian scripts and some other scripts between
 // U+0700 and U+16FF, it's extended to the third level.
 unsigned FindCharUnicodeRange(UChar32 ch) {
   if (ch >= 0xFFFF)
     return 0;

   unsigned range;

   // search the first table
   range = kGUnicodeSubrangeTable[0][ch >> 12];

   if (range < kCRangeTableBase)
     // we try to get a specific range
     return range;

   // otherwise, we have one more table to look at
   range = kGUnicodeSubrangeTable[range - kCRangeTableBase][(ch & 0x0f00) >> 8];
   if (range < kCRangeTableBase)
     return range;
   if (range < kCRangeTertiaryTable)
     return kGUnicodeSubrangeTable[range - kCRangeTableBase][(ch & 0x00f0) >> 4];

   // Yet another table to look at : U+0700 - U+16FF : 128 code point blocks
   return kGUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];
 }

 }  // namespace blink
	/*
	* Copyright (C) 2007 Apple Computer, Inc.
	*
	* Portions are Copyright (C) 1998 Netscape Communications Corporation.
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this library; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	*
	* Alternatively, the contents of this file may be used under the terms
	* of either the Mozilla Public License Version 1.1, found at
	* http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
	* License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
	* (the "GPL"), in which case the provisions of the MPL or the GPL are
	* applicable instead of those above. If you wish to allow use of your
	* version of this file only under the terms of one of those two
	* licenses (the MPL or the GPL) and not to allow others to use your
	* version of this file under the LGPL, indicate your decision by
	* deletingthe provisions above and replace them with the notice and
	* other provisions required by the MPL or the GPL, as the case may be.
	* If you do not delete the provisions above, a recipient may use your
	* version of this file under any of the LGPL, the MPL or the GPL.
	*/

	#include "third_party/blink/renderer/platform/text/unicode_range.h"

	namespace blink {

	/**********************************************************************
	* Unicode subranges as defined in unicode 3.0
	* x-western, x-central-euro, tr, x-baltic -> latin
	* 0000 - 036f
	* 1e00 - 1eff
	* 2000 - 206f (general punctuation)
	* 20a0 - 20cf (currency symbols)
	* 2100 - 214f (letterlike symbols)
	* 2150 - 218f (Number Forms)
	* el -> greek
	* 0370 - 03ff
	* 1f00 - 1fff
	* x-cyrillic -> cyrillic
	* 0400 - 04ff
	* he -> hebrew
	* 0590 - 05ff
	* ar -> arabic
	* 0600 - 06ff
	* fb50 - fdff (arabic presentation forms)
	* fe70 - feff (arabic presentation forms b)
	* th - thai
	* 0e00 - 0e7f
	* ko -> korean
	* ac00 - d7af (hangul Syllables)
	* 1100 - 11ff (jamo)
	* 3130 - 318f (hangul compatibility jamo)
	* ja
	* 3040 - 309f (hiragana)
	* 30a0 - 30ff (katakana)
	* zh-CN
	* zh-TW
	*
	* CJK
	* 3100 - 312f (bopomofo)
	* 31a0 - 31bf (bopomofo extended)
	* 3000 - 303f (CJK Symbols and Punctuation)
	* 2e80 - 2eff (CJK radicals supplement)
	* 2f00 - 2fdf (Kangxi Radicals)
	* 2ff0 - 2fff (Ideographic Description Characters)
	* 3190 - 319f (kanbun)
	* 3200 - 32ff (Enclosed CJK letters and Months)
	* 3300 - 33ff (CJK compatibility)
	* 3400 - 4dbf (CJK Unified Ideographs Extension A)
	* 4e00 - 9faf (CJK Unified Ideographs)
	* f900 - fa5f (CJK Compatibility Ideographs)
	* fe30 - fe4f (CJK compatibility Forms)
	* ff00 - ffef (halfwidth and fullwidth forms)
	*
	* Armenian
	* 0530 - 058f
	* Sriac
	* 0700 - 074f
	* Thaana
	* 0780 - 07bf
	* Devanagari
	* 0900 - 097f
	* Bengali
	* 0980 - 09ff
	* Gurmukhi
	* 0a00 - 0a7f
	* Gujarati
	* 0a80 - 0aff
	* Oriya
	* 0b00 - 0b7f
	* Tamil
	* 0b80 - 0bff
	* Telugu
	* 0c00 - 0c7f
	* Kannada
	* 0c80 - 0cff
	* Malayalam
	* 0d00 - 0d7f
	* Sinhala
	* 0d80 - 0def
	* Lao
	* 0e80 - 0eff
	* Tibetan
	* 0f00 - 0fbf
	* Myanmar
	* 1000 - 109f
	* Georgian
	* 10a0 - 10ff
	* Ethiopic
	* 1200 - 137f
	* Cherokee
	* 13a0 - 13ff
	* Canadian Aboriginal Syllabics
	* 1400 - 167f
	* Ogham
	* 1680 - 169f
	* Runic
	* 16a0 - 16ff
	* Khmer
	* 1780 - 17ff
	* Mongolian
	* 1800 - 18af
	* Misc - superscripts and subscripts
	* 2070 - 209f
	* Misc - Combining Diacritical Marks for Symbols
	* 20d0 - 20ff
	* Misc - Arrows
	* 2190 - 21ff
	* Misc - Mathematical Operators
	* 2200 - 22ff
	* Misc - Miscellaneous Technical
	* 2300 - 23ff
	* Misc - Control picture
	* 2400 - 243f
	* Misc - Optical character recognition
	* 2440 - 2450
	* Misc - Enclose Alphanumerics
	* 2460 - 24ff
	* Misc - Box Drawing
	* 2500 - 257f
	* Misc - Block Elements
	* 2580 - 259f
	* Misc - Geometric Shapes
	* 25a0 - 25ff
	* Misc - Miscellaneous Symbols
	* 2600 - 267f
	* Misc - Dingbats
	* 2700 - 27bf
	* Misc - Braille Patterns
	* 2800 - 28ff
	* Yi Syllables
	* a000 - a48f
	* Yi radicals
	* a490 - a4cf
	* Alphabetic Presentation Forms
	* fb00 - fb4f
	* Misc - Combining half Marks
	* fe20 - fe2f
	* Misc - small form variants
	* fe50 - fe6f
	* Misc - Specials
	* fff0 - ffff
	*********************************************************************/

	static const unsigned kCNumSubTables = 9;
	static const unsigned kCSubTableSize = 16;

	static const unsigned char
	kGUnicodeSubrangeTable[kCNumSubTables][kCSubTableSize] = {
	{
	// table for X---
	kCRangeTableBase + 1, // u0xxx
	kCRangeTableBase + 2, // u1xxx
	kCRangeTableBase + 3, // u2xxx
	kCRangeSetCJK, // u3xxx
	kCRangeSetCJK, // u4xxx
	kCRangeSetCJK, // u5xxx
	kCRangeSetCJK, // u6xxx
	kCRangeSetCJK, // u7xxx
	kCRangeSetCJK, // u8xxx
	kCRangeSetCJK, // u9xxx
	kCRangeTableBase + 4, // uaxxx
	kCRangeKorean, // ubxxx
	kCRangeKorean, // ucxxx
	kCRangeTableBase + 5, // udxxx
	kCRangePrivate, // uexxx
	kCRangeTableBase + 6 // ufxxx
	},
	{
	// table for 0X--
	kCRangeSetLatin, // u00xx
	kCRangeSetLatin, // u01xx
	kCRangeSetLatin, // u02xx
	kCRangeGreek, // u03xx XXX 0300-036f is in fact
	// cRangeCombiningDiacriticalMarks
	kCRangeCyrillic, // u04xx
	kCRangeTableBase +
	7, // u05xx, includes Cyrillic supplement, Hebrew, and Armenian
	kCRangeArabic, // u06xx
	kCRangeTertiaryTable, // u07xx
	kCRangeUnassigned, // u08xx
	kCRangeTertiaryTable, // u09xx
	kCRangeTertiaryTable, // u0axx
	kCRangeTertiaryTable, // u0bxx
	kCRangeTertiaryTable, // u0cxx
	kCRangeTertiaryTable, // u0dxx
	kCRangeTertiaryTable, // u0exx
	kCRangeTibetan, // u0fxx
	},
	{
	// table for 1x--
	kCRangeTertiaryTable, // u10xx
	kCRangeKorean, // u11xx
	kCRangeEthiopic, // u12xx
	kCRangeTertiaryTable, // u13xx
	kCRangeCanadian, // u14xx
	kCRangeCanadian, // u15xx
	kCRangeTertiaryTable, // u16xx
	kCRangeKhmer, // u17xx
	kCRangeMongolian, // u18xx
	kCRangeUnassigned, // u19xx
	kCRangeUnassigned, // u1axx
	kCRangeUnassigned, // u1bxx
	kCRangeUnassigned, // u1cxx
	kCRangeUnassigned, // u1dxx
	kCRangeSetLatin, // u1exx
	kCRangeGreek, // u1fxx
	},
	{
	// table for 2x--
	kCRangeSetLatin, // u20xx
	kCRangeSetLatin, // u21xx
	kCRangeMathOperators, // u22xx
	kCRangeMiscTechnical, // u23xx
	kCRangeControlOpticalEnclose, // u24xx
	kCRangeBoxBlockGeometrics, // u25xx
	kCRangeMiscSymbols, // u26xx
	kCRangeDingbats, // u27xx
	kCRangeBraillePattern, // u28xx
	kCRangeUnassigned, // u29xx
	kCRangeUnassigned, // u2axx
	kCRangeUnassigned, // u2bxx
	kCRangeUnassigned, // u2cxx
	kCRangeUnassigned, // u2dxx
	kCRangeSetCJK, // u2exx
	kCRangeSetCJK, // u2fxx
	},
	{
	// table for ax--
	kCRangeYi, // ua0xx
	kCRangeYi, // ua1xx
	kCRangeYi, // ua2xx
	kCRangeYi, // ua3xx
	kCRangeYi, // ua4xx
	kCRangeUnassigned, // ua5xx
	kCRangeUnassigned, // ua6xx
	kCRangeUnassigned, // ua7xx
	kCRangeUnassigned, // ua8xx
	kCRangeUnassigned, // ua9xx
	kCRangeUnassigned, // uaaxx
	kCRangeUnassigned, // uabxx
	kCRangeKorean, // uacxx
	kCRangeKorean, // uadxx
	kCRangeKorean, // uaexx
	kCRangeKorean, // uafxx
	},
	{
	// table for dx--
	kCRangeKorean, // ud0xx
	kCRangeKorean, // ud1xx
	kCRangeKorean, // ud2xx
	kCRangeKorean, // ud3xx
	kCRangeKorean, // ud4xx
	kCRangeKorean, // ud5xx
	kCRangeKorean, // ud6xx
	kCRangeKorean, // ud7xx
	kCRangeSurrogate, // ud8xx
	kCRangeSurrogate, // ud9xx
	kCRangeSurrogate, // udaxx
	kCRangeSurrogate, // udbxx
	kCRangeSurrogate, // udcxx
	kCRangeSurrogate, // uddxx
	kCRangeSurrogate, // udexx
	kCRangeSurrogate, // udfxx
	},
	{
	// table for fx--
	kCRangePrivate, // uf0xx
	kCRangePrivate, // uf1xx
	kCRangePrivate, // uf2xx
	kCRangePrivate, // uf3xx
	kCRangePrivate, // uf4xx
	kCRangePrivate, // uf5xx
	kCRangePrivate, // uf6xx
	kCRangePrivate, // uf7xx
	kCRangePrivate, // uf8xx
	kCRangeSetCJK, // uf9xx
	kCRangeSetCJK, // ufaxx
	kCRangeArabic, // ufbxx, includes alphabic presentation form
	kCRangeArabic, // ufcxx
	kCRangeArabic, // ufdxx
	kCRangeArabic, // ufexx, includes Combining half marks,
	// CJK compatibility forms,
	// CJK compatibility forms,
	// small form variants
	kCRangeTableBase +
	8, // uffxx, halfwidth and fullwidth forms, includes Specials
	},
	{
	// table for 0x0500 - 0x05ff
	kCRangeCyrillic, // u050x
	kCRangeCyrillic, // u051x
	kCRangeCyrillic, // u052x
	kCRangeArmenian, // u053x
	kCRangeArmenian, // u054x
	kCRangeArmenian, // u055x
	kCRangeArmenian, // u056x
	kCRangeArmenian, // u057x
	kCRangeArmenian, // u058x
	kCRangeHebrew, // u059x
	kCRangeHebrew, // u05ax
	kCRangeHebrew, // u05bx
	kCRangeHebrew, // u05cx
	kCRangeHebrew, // u05dx
	kCRangeHebrew, // u05ex
	kCRangeHebrew, // u05fx
	},
	{
	// table for 0xff00 - 0xffff
	kCRangeSetCJK, // uff0x, fullwidth latin
	kCRangeSetCJK, // uff1x, fullwidth latin
	kCRangeSetCJK, // uff2x, fullwidth latin
	kCRangeSetCJK, // uff3x, fullwidth latin
	kCRangeSetCJK, // uff4x, fullwidth latin
	kCRangeSetCJK, // uff5x, fullwidth latin
	kCRangeSetCJK, // uff6x, halfwidth katakana
	kCRangeSetCJK, // uff7x, halfwidth katakana
	kCRangeSetCJK, // uff8x, halfwidth katakana
	kCRangeSetCJK, // uff9x, halfwidth katakana
	kCRangeSetCJK, // uffax, halfwidth hangul jamo
	kCRangeSetCJK, // uffbx, halfwidth hangul jamo
	kCRangeSetCJK, // uffcx, halfwidth hangul jamo
	kCRangeSetCJK, // uffdx, halfwidth hangul jamo
	kCRangeSetCJK, // uffex, fullwidth symbols
	kCRangeSpecials, // ufffx, Specials
	},
	};

	// Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80)
	// code points so that the number of entries in the tertiary range
	// table for that range is obtained by dividing (0x1700 - 0x0700) by 128.
	// Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal
	// syllabaries take multiple chunks and Ogham and Runic share a single chunk.
	static const unsigned kCTertiaryTableSize = ((0x1700 - 0x0700) / 0x80);

	static const unsigned char kGUnicodeTertiaryRangeTable[kCTertiaryTableSize] = {
	// table for 0x0700 - 0x1600
	kCRangeSyriac, // u070x
	kCRangeThaana, // u078x
	kCRangeUnassigned, // u080x place holder(resolved in the 2ndary tab.)
	kCRangeUnassigned, // u088x place holder(resolved in the 2ndary tab.)
	kCRangeDevanagari, // u090x
	kCRangeBengali, // u098x
	kCRangeGurmukhi, // u0a0x
	kCRangeGujarati, // u0a8x
	kCRangeOriya, // u0b0x
	kCRangeTamil, // u0b8x
	kCRangeTelugu, // u0c0x
	kCRangeKannada, // u0c8x
	kCRangeMalayalam, // u0d0x
	kCRangeSinhala, // u0d8x
	kCRangeThai, // u0e0x
	kCRangeLao, // u0e8x
	kCRangeTibetan, // u0f0x place holder(resolved in the 2ndary tab.)
	kCRangeTibetan, // u0f8x place holder(resolved in the 2ndary tab.)
	kCRangeMyanmar, // u100x
	kCRangeGeorgian, // u108x
	kCRangeKorean, // u110x place holder(resolved in the 2ndary tab.)
	kCRangeKorean, // u118x place holder(resolved in the 2ndary tab.)
	kCRangeEthiopic, // u120x place holder(resolved in the 2ndary tab.)
	kCRangeEthiopic, // u128x place holder(resolved in the 2ndary tab.)
	kCRangeEthiopic, // u130x
	kCRangeCherokee, // u138x
	kCRangeCanadian, // u140x place holder(resolved in the 2ndary tab.)
	kCRangeCanadian, // u148x place holder(resolved in the 2ndary tab.)
	kCRangeCanadian, // u150x place holder(resolved in the 2ndary tab.)
	kCRangeCanadian, // u158x place holder(resolved in the 2ndary tab.)
	kCRangeCanadian, // u160x
	kCRangeOghamRunic, // u168x this contains two scripts, Ogham & Runic
	};

	// A two level index is almost enough for locating a range, with the
	// exception of u03xx and u05xx. Since we don't really care about range for
	// combining diacritical marks in our font application, they are
	// not discriminated further. Future adoption of this method for other use
	// should be aware of this limitation. The implementation can be extended if
	// there is such a need.
	// For Indic, Southeast Asian scripts and some other scripts between
	// U+0700 and U+16FF, it's extended to the third level.
	unsigned FindCharUnicodeRange(UChar32 ch) {
	if (ch >= 0xFFFF)
	return 0;

	unsigned range;

	// search the first table
	range = kGUnicodeSubrangeTable[0][ch >> 12];

	if (range < kCRangeTableBase)
	// we try to get a specific range
	return range;

	// otherwise, we have one more table to look at
	range = kGUnicodeSubrangeTable[range - kCRangeTableBase][(ch & 0x0f00) >> 8];
	if (range < kCRangeTableBase)
	return range;
	if (range < kCRangeTertiaryTable)
	return kGUnicodeSubrangeTable[range - kCRangeTableBase][(ch & 0x00f0) >> 4];

	// Yet another table to look at : U+0700 - U+16FF : 128 code point blocks
	return kGUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];
	}

	} // namespace blink