blob: 3a1c58751d31ad665414e16c38cf8576ca4f194e [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/renderer/core/editing/state_machines/state_machine_util.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/renderer/platform/wtf/text/character_names.h"
#include "third_party/blink/renderer/platform/wtf/text/unicode.h"
namespace blink {
TEST(StateMachineUtilTest, IsGraphmeBreak_LineBreak) {
// U+000AD (SOFT HYPHEN) has Control grapheme property.
const UChar32 kControl = WTF::unicode::kSoftHyphenCharacter;
// Grapheme Cluster Boundary Rule GB3: CR x LF
EXPECT_FALSE(IsGraphemeBreak('\r', '\n'));
EXPECT_TRUE(IsGraphemeBreak('\n', '\r'));
// Grapheme Cluster Boundary Rule GB4: (Control | CR | LF) ÷
EXPECT_TRUE(IsGraphemeBreak('\r', 'a'));
EXPECT_TRUE(IsGraphemeBreak('\n', 'a'));
EXPECT_TRUE(IsGraphemeBreak(kControl, 'a'));
// Grapheme Cluster Boundary Rule GB5: ÷ (Control | CR | LF)
EXPECT_TRUE(IsGraphemeBreak('a', '\r'));
EXPECT_TRUE(IsGraphemeBreak('a', '\n'));
EXPECT_TRUE(IsGraphemeBreak('a', kControl));
}
TEST(StateMachineUtilTest, IsGraphmeBreak_Hangul) {
// U+1100 (HANGUL CHOSEONG KIYEOK) has L grapheme property.
const UChar32 kL = 0x1100;
// U+1160 (HANGUL JUNGSEONG FILLER) has V grapheme property.
const UChar32 kV = 0x1160;
// U+AC00 (HANGUL SYLLABLE GA) has LV grapheme property.
const UChar32 kLV = 0xAC00;
// U+AC01 (HANGUL SYLLABLE GAG) has LVT grapheme property.
const UChar32 kLVT = 0xAC01;
// U+11A8 (HANGUL JONGSEONG KIYEOK) has T grapheme property.
const UChar32 kT = 0x11A8;
// Grapheme Cluster Boundary Rule GB6: L x (L | V | LV | LVT)
EXPECT_FALSE(IsGraphemeBreak(kL, kL));
EXPECT_FALSE(IsGraphemeBreak(kL, kV));
EXPECT_FALSE(IsGraphemeBreak(kL, kLV));
EXPECT_FALSE(IsGraphemeBreak(kL, kLVT));
EXPECT_TRUE(IsGraphemeBreak(kL, kT));
// Grapheme Cluster Boundary Rule GB7: (LV | V) x (V | T)
EXPECT_TRUE(IsGraphemeBreak(kV, kL));
EXPECT_FALSE(IsGraphemeBreak(kV, kV));
EXPECT_TRUE(IsGraphemeBreak(kV, kLV));
EXPECT_TRUE(IsGraphemeBreak(kV, kLVT));
EXPECT_FALSE(IsGraphemeBreak(kV, kT));
// Grapheme Cluster Boundary Rule GB7: (LV | V) x (V | T)
EXPECT_TRUE(IsGraphemeBreak(kLV, kL));
EXPECT_FALSE(IsGraphemeBreak(kLV, kV));
EXPECT_TRUE(IsGraphemeBreak(kLV, kLV));
EXPECT_TRUE(IsGraphemeBreak(kLV, kLVT));
EXPECT_FALSE(IsGraphemeBreak(kLV, kT));
// Grapheme Cluster Boundary Rule GB8: (LVT | T) x T
EXPECT_TRUE(IsGraphemeBreak(kLVT, kL));
EXPECT_TRUE(IsGraphemeBreak(kLVT, kV));
EXPECT_TRUE(IsGraphemeBreak(kLVT, kLV));
EXPECT_TRUE(IsGraphemeBreak(kLVT, kLVT));
EXPECT_FALSE(IsGraphemeBreak(kLVT, kT));
// Grapheme Cluster Boundary Rule GB8: (LVT | T) x T
EXPECT_TRUE(IsGraphemeBreak(kT, kL));
EXPECT_TRUE(IsGraphemeBreak(kT, kV));
EXPECT_TRUE(IsGraphemeBreak(kT, kLV));
EXPECT_TRUE(IsGraphemeBreak(kT, kLVT));
EXPECT_FALSE(IsGraphemeBreak(kT, kT));
}
TEST(StateMachineUtilTest, IsGraphmeBreak_Extend_or_ZWJ) {
// U+0300 (COMBINING GRAVE ACCENT) has Extend grapheme property.
const UChar32 kExtend = 0x0300;
// Grapheme Cluster Boundary Rule GB9: x (Extend | ZWJ)
EXPECT_FALSE(IsGraphemeBreak('a', kExtend));
EXPECT_FALSE(IsGraphemeBreak('a', WTF::unicode::kZeroWidthJoinerCharacter));
EXPECT_FALSE(IsGraphemeBreak(kExtend, kExtend));
EXPECT_FALSE(IsGraphemeBreak(WTF::unicode::kZeroWidthJoinerCharacter,
WTF::unicode::kZeroWidthJoinerCharacter));
EXPECT_FALSE(
IsGraphemeBreak(kExtend, WTF::unicode::kZeroWidthJoinerCharacter));
EXPECT_FALSE(
IsGraphemeBreak(WTF::unicode::kZeroWidthJoinerCharacter, kExtend));
}
TEST(StateMachineUtilTest, IsGraphmeBreak_SpacingMark) {
// U+0903 (DEVANAGARI SIGN VISARGA) has SpacingMark grapheme property.
const UChar32 kSpacingMark = 0x0903;
// Grapheme Cluster Boundary Rule GB9a: x SpacingMark.
EXPECT_FALSE(IsGraphemeBreak('a', kSpacingMark));
}
// TODO(nona): Introduce tests for GB9b rule once ICU grabs Unicod 9.0.
// There is no character having Prepend grapheme property in Unicode 8.0.
TEST(StateMachineUtilTest, IsGraphmeBreak_EmojiModifier) {
// U+261D (WHITE UP POINTING INDEX) has E_Base grapheme property.
const UChar32 kEBase = 0x261D;
// U+1F466 (BOY) has E_Base_GAZ grapheme property.
const UChar32 kEBaseGAZ = 0x1F466;
// U+1F3FB (EMOJI MODIFIER FITZPATRICK TYPE-1-2) has E_Modifier grapheme
// property.
const UChar32 kEModifier = 0x1F3FB;
// Grapheme Cluster Boundary Rule GB10: (E_Base, E_Base_GAZ) x E_Modifier
EXPECT_FALSE(IsGraphemeBreak(kEBase, kEModifier));
EXPECT_FALSE(IsGraphemeBreak(kEBaseGAZ, kEModifier));
EXPECT_FALSE(IsGraphemeBreak(kEBase, kEModifier));
EXPECT_TRUE(IsGraphemeBreak(kEBase, kEBase));
EXPECT_TRUE(IsGraphemeBreak(kEBaseGAZ, kEBase));
EXPECT_TRUE(IsGraphemeBreak(kEBase, kEBaseGAZ));
EXPECT_TRUE(IsGraphemeBreak(kEBaseGAZ, kEBaseGAZ));
// EModifier is absorbed into Extend and there is NO break
// before Extend per GB 9.
EXPECT_FALSE(IsGraphemeBreak(kEModifier, kEModifier));
}
TEST(StateMachineUtilTest, IsGraphmeBreak_ZWJSequecne) {
// U+2764 (HEAVY BLACK HEART) has Glue_After_Zwj grapheme property.
const UChar32 kGlueAfterZwj = 0x2764;
// U+1F466 (BOY) has E_Base_GAZ grapheme property.
const UChar32 kEBaseGAZ = 0x1F466;
// U+1F5FA (WORLD MAP) doesn'T have Glue_After_Zwj or E_Base_GAZ property
// but has Emoji property.
const UChar32 kEmoji = 0x1F5FA;
// Grapheme Cluster Boundary Rule GB11: ZWJ x (Glue_After_Zwj | EBG)
EXPECT_FALSE(
IsGraphemeBreak(WTF::unicode::kZeroWidthJoinerCharacter, kGlueAfterZwj));
EXPECT_FALSE(
IsGraphemeBreak(WTF::unicode::kZeroWidthJoinerCharacter, kEBaseGAZ));
EXPECT_FALSE(
IsGraphemeBreak(WTF::unicode::kZeroWidthJoinerCharacter, kEmoji));
EXPECT_TRUE(IsGraphemeBreak(kGlueAfterZwj, kEBaseGAZ));
EXPECT_TRUE(IsGraphemeBreak(kGlueAfterZwj, kGlueAfterZwj));
EXPECT_TRUE(IsGraphemeBreak(kEBaseGAZ, kGlueAfterZwj));
EXPECT_TRUE(IsGraphemeBreak(WTF::unicode::kZeroWidthJoinerCharacter, 'a'));
}
TEST(StateMachineUtilTest, IsGraphmeBreak_IndicSyllabicCategoryVirama) {
// U+094D (DEVANAGARI SIGN VIRAMA) has Indic_Syllabic_Category=Virama
// property.
const UChar32 kVirama = 0x094D;
// U+0915 (DEVANAGARI LETTER KA). Should not break after kVirama and before
// this character.
const UChar32 kDevangariKa = 0x0915;
// Do not break after character having Indic_Syllabic_Category=Virama
// property if following character has General_Category=C(Other) property.
EXPECT_FALSE(IsGraphemeBreak(kVirama, kDevangariKa));
// Tamil virama is an exception (crbug.com/693697).
const UChar32 kTamilVirama = 0x0BCD;
EXPECT_TRUE(IsGraphemeBreak(kTamilVirama, kDevangariKa));
}
} // namespace blink