| // Copyright 2019 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "third_party/blink/renderer/platform/fonts/utf16_ragel_iterator.h" |
| |
| #include "third_party/blink/renderer/platform/text/character.h" |
| #include "third_party/blink/renderer/platform/wtf/text/character_names.h" |
| |
| namespace blink { |
| |
| namespace { |
| |
| char EmojiSegmentationCategory(UChar32 codepoint) { |
| if (codepoint <= 0x7F) { |
| if (Character::IsEmojiKeycapBase(codepoint)) |
| return UTF16RagelIterator::KEYCAP_BASE; |
| return UTF16RagelIterator::kMaxEmojiScannerCategory; |
| } |
| // For the grammar to work, we need to check for more specific character |
| // classes first, then expand towards more generic ones. So we match single |
| // characters and small ranges first, then return EMOJI and |
| // EMOJI_TEXT_PRESENTATION for the remaining ones. |
| if (codepoint == kCombiningEnclosingKeycapCharacter) |
| return UTF16RagelIterator::COMBINING_ENCLOSING_KEYCAP; |
| if (codepoint == kCombiningEnclosingCircleBackslashCharacter) |
| return UTF16RagelIterator::COMBINING_ENCLOSING_CIRCLE_BACKSLASH; |
| if (codepoint == kZeroWidthJoinerCharacter) |
| return UTF16RagelIterator::ZWJ; |
| if (codepoint == kVariationSelector15Character) |
| return UTF16RagelIterator::VS15; |
| if (codepoint == kVariationSelector16Character) |
| return UTF16RagelIterator::VS16; |
| if (codepoint == 0x1F3F4) |
| return UTF16RagelIterator::TAG_BASE; |
| if (Character::IsEmojiTagSequence(codepoint)) |
| return UTF16RagelIterator::TAG_SEQUENCE; |
| if (codepoint == kCancelTag) { |
| // http://www.unicode.org/reports/tr51/#def_emoji_tag_sequence |
| // defines a TAG_TERM grammar rule for U+E007F CANCEL TAG. |
| return UTF16RagelIterator::TAG_TERM; |
| } |
| if (Character::IsEmojiModifierBase(codepoint)) |
| return UTF16RagelIterator::EMOJI_MODIFIER_BASE; |
| if (Character::IsModifier(codepoint)) |
| return UTF16RagelIterator::EMOJI_MODIFIER; |
| if (Character::IsRegionalIndicator(codepoint)) |
| return UTF16RagelIterator::REGIONAL_INDICATOR; |
| |
| if (Character::IsEmojiEmojiDefault(codepoint)) |
| return UTF16RagelIterator::EMOJI_EMOJI_PRESENTATION; |
| if (Character::IsEmojiTextDefault(codepoint)) |
| return UTF16RagelIterator::EMOJI_TEXT_PRESENTATION; |
| if (Character::IsEmoji(codepoint)) |
| return UTF16RagelIterator::EMOJI; |
| |
| // Ragel state machine will interpret unknown category as "any". |
| return UTF16RagelIterator::kMaxEmojiScannerCategory; |
| } |
| |
| } // namespace |
| |
| UTF16RagelIterator& UTF16RagelIterator::SetCursor(unsigned new_cursor) { |
| CHECK_GE(new_cursor, 0u); |
| CHECK_LT(new_cursor, buffer_size_); |
| cursor_ = new_cursor; |
| UpdateCachedCategory(); |
| return *this; |
| } |
| |
| void UTF16RagelIterator::UpdateCachedCategory() { |
| if (cursor_ >= buffer_size_) |
| return; |
| cached_category_ = EmojiSegmentationCategory(Codepoint()); |
| } |
| |
| } // namespace blink |