blob: 9198ea4d13e96ea2792d2dbffb3bee6d7b187c64 [file] [log] [blame]
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <string>
#include "base/i18n/rtl.h"
#include "base/i18n/string_search.h"
#include "base/string16.h"
#include "base/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "unicode/usearch.h"
namespace base {
namespace i18n {
// Note on setting default locale for testing: The current default locale on
// the Mac trybot is en_US_POSIX, with which primary-level collation strength
// string search is case-sensitive, when normally it should be
// case-insensitive. In other locales (including en_US which English speakers
// in the U.S. use), this search would be case-insensitive as expected.
TEST(StringSearchTest, ASCII) {
std::string default_locale(uloc_getDefault());
bool locale_is_posix = (default_locale == "en_US_POSIX");
if (locale_is_posix)
SetICUDefaultLocale("en_US");
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
ASCIIToUTF16("hello"), ASCIIToUTF16("hello world")));
EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o")));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa")));
EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
ASCIIToUTF16("searching within empty string"), string16()));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
string16(), ASCIIToUTF16("searching for empty string")));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY")));
if (locale_is_posix)
SetICUDefaultLocale(default_locale.data());
}
TEST(StringSearchTest, UnicodeLocaleIndependent) {
// Base characters
const string16 e_base = WideToUTF16(L"e");
const string16 E_base = WideToUTF16(L"E");
const string16 a_base = WideToUTF16(L"a");
// Composed characters
const string16 e_with_accute_accent = WideToUTF16(L"\u00e9");
const string16 E_with_accute_accent = WideToUTF16(L"\u00c9");
const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
const string16 a_with_accute_accent = WideToUTF16(L"\u00e1");
// Decomposed characters
const string16 e_with_accute_combining_mark = WideToUTF16(L"e\u0301");
const string16 E_with_accute_combining_mark = WideToUTF16(L"E\u0301");
const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
const string16 a_with_accute_combining_mark = WideToUTF16(L"a\u0301");
std::string default_locale(uloc_getDefault());
bool locale_is_posix = (default_locale == "en_US_POSIX");
if (locale_is_posix)
SetICUDefaultLocale("en_US");
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
e_base, e_with_accute_accent));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
e_with_accute_accent, e_base));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
e_base, e_with_accute_combining_mark));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
e_with_accute_combining_mark, e_base));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
e_with_accute_combining_mark, e_with_accute_accent));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
e_with_accute_accent, e_with_accute_combining_mark));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
e_with_accute_combining_mark, e_with_grave_combining_mark));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
e_with_grave_combining_mark, e_with_accute_combining_mark));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
e_with_accute_combining_mark, e_with_grave_accent));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
e_with_grave_accent, e_with_accute_combining_mark));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
E_with_accute_accent, e_with_accute_accent));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
E_with_grave_accent, e_with_accute_accent));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
E_with_accute_combining_mark, e_with_grave_accent));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
E_with_grave_combining_mark, e_with_accute_accent));
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
E_base, e_with_grave_accent));
EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
a_with_accute_accent, e_with_accute_accent));
EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
a_with_accute_combining_mark, e_with_accute_combining_mark));
if (locale_is_posix)
SetICUDefaultLocale(default_locale.data());
}
TEST(StringSearchTest, UnicodeLocaleDependent) {
// Base characters
const string16 a_base = WideToUTF16(L"a");
// Composed characters
const string16 a_with_ring = WideToUTF16(L"\u00e5");
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
a_base, a_with_ring));
const char* default_locale = uloc_getDefault();
SetICUDefaultLocale("da");
EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
a_base, a_with_ring));
SetICUDefaultLocale(default_locale);
}
} // namespace i18n
} // namespace base