blob: 23f1eb317a89190e99d7738e00b301c449fd92e6 [file] [log] [blame]
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <string>
#include "base/i18n/rtl.h"
#include "base/i18n/string_search.h"
#include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/icu/source/i18n/unicode/usearch.h"
namespace base {
namespace i18n {
#define EXPECT_MATCH_IGNORE_CASE(find_this, in_this, ex_start, ex_len) \
{ \
size_t index = 0; \
size_t length = 0; \
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(find_this, in_this, &index, \
&length)); \
EXPECT_EQ(ex_start, index); \
EXPECT_EQ(ex_len, length); \
index = 0; \
length = 0; \
EXPECT_TRUE( \
StringSearch(find_this, in_this, &index, &length, false, true)); \
EXPECT_EQ(ex_start, index); \
EXPECT_EQ(ex_len, length); \
}
#define EXPECT_MATCH_SENSITIVE(find_this, in_this, ex_start, ex_len) \
{ \
size_t index = 0; \
size_t length = 0; \
EXPECT_TRUE( \
StringSearch(find_this, in_this, &index, &length, true, true)); \
EXPECT_EQ(ex_start, index); \
EXPECT_EQ(ex_len, length); \
}
#define EXPECT_MATCH_IGNORE_CASE_BACKWARDS(find_this, in_this, ex_start, \
ex_len) \
{ \
size_t index = 0; \
size_t length = 0; \
EXPECT_TRUE( \
StringSearch(find_this, in_this, &index, &length, false, false)); \
EXPECT_EQ(ex_start, index); \
EXPECT_EQ(ex_len, length); \
}
#define EXPECT_MATCH_SENSITIVE_BACKWARDS(find_this, in_this, ex_start, ex_len) \
{ \
size_t index = 0; \
size_t length = 0; \
EXPECT_TRUE( \
StringSearch(find_this, in_this, &index, &length, true, false)); \
EXPECT_EQ(ex_start, index); \
EXPECT_EQ(ex_len, length); \
}
#define EXPECT_MISS_IGNORE_CASE(find_this, in_this) \
{ \
size_t index = 0; \
size_t length = 0; \
EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(find_this, in_this, \
&index, &length)); \
index = 0; \
length = 0; \
EXPECT_FALSE( \
StringSearch(find_this, in_this, &index, &length, false, true)); \
}
#define EXPECT_MISS_SENSITIVE(find_this, in_this) \
{ \
size_t index = 0; \
size_t length = 0; \
EXPECT_FALSE( \
StringSearch(find_this, in_this, &index, &length, true, true)); \
}
#define EXPECT_MISS_IGNORE_CASE_BACKWARDS(find_this, in_this) \
{ \
size_t index = 0; \
size_t length = 0; \
EXPECT_FALSE( \
StringSearch(find_this, in_this, &index, &length, false, false)); \
}
#define EXPECT_MISS_SENSITIVE_BACKWARDS(find_this, in_this) \
{ \
size_t index = 0; \
size_t length = 0; \
EXPECT_FALSE( \
StringSearch(find_this, in_this, &index, &length, true, false)); \
}
// Note on setting default locale for testing: The current default locale on
// the Mac trybot is en_US_POSIX, with which primary-level collation strength
// string search is case-sensitive, when normally it should be
// case-insensitive. In other locales (including en_US which English speakers
// in the U.S. use), this search would be case-insensitive as expected.
TEST(StringSearchTest, ASCII) {
std::string default_locale(uloc_getDefault());
bool locale_is_posix = (default_locale == "en_US_POSIX");
if (locale_is_posix)
SetICUDefaultLocale("en_US");
EXPECT_MATCH_IGNORE_CASE(ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"),
0U, 5U);
EXPECT_MISS_IGNORE_CASE(ASCIIToUTF16("h e l l o"),
ASCIIToUTF16("h e l l o"));
EXPECT_MATCH_IGNORE_CASE(ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"),
4U, 6U);
EXPECT_MISS_IGNORE_CASE(ASCIIToUTF16("searching within empty string"),
string16());
EXPECT_MATCH_IGNORE_CASE(string16(),
ASCIIToUTF16("searching for empty string"), 0U, 0U);
EXPECT_MATCH_IGNORE_CASE(ASCIIToUTF16("case insensitivity"),
ASCIIToUTF16("CaSe InSeNsItIvItY"), 0U, 18U);
EXPECT_MATCH_SENSITIVE(ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), 4U,
6U);
EXPECT_MISS_SENSITIVE(ASCIIToUTF16("searching within empty string"),
string16());
EXPECT_MATCH_SENSITIVE(string16(), ASCIIToUTF16("searching for empty string"),
0U, 0U);
EXPECT_MISS_SENSITIVE(ASCIIToUTF16("case insensitivity"),
ASCIIToUTF16("CaSe InSeNsItIvItY"));
if (locale_is_posix)
SetICUDefaultLocale(default_locale.data());
}
TEST(StringSearchTest, UnicodeLocaleIndependent) {
// Base characters
const string16 e_base = WideToUTF16(L"e");
const string16 E_base = WideToUTF16(L"E");
const string16 a_base = WideToUTF16(L"a");
// Composed characters
const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
// Decomposed characters
const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
std::string default_locale(uloc_getDefault());
bool locale_is_posix = (default_locale == "en_US_POSIX");
if (locale_is_posix)
SetICUDefaultLocale("en_US");
EXPECT_MATCH_IGNORE_CASE(e_base, e_with_acute_accent, 0U,
e_with_acute_accent.size());
EXPECT_MATCH_IGNORE_CASE(e_with_acute_accent, e_base, 0U, e_base.size());
EXPECT_MATCH_IGNORE_CASE(e_base, e_with_acute_combining_mark, 0U,
e_with_acute_combining_mark.size());
EXPECT_MATCH_IGNORE_CASE(e_with_acute_combining_mark, e_base, 0U,
e_base.size());
EXPECT_MATCH_IGNORE_CASE(e_with_acute_combining_mark, e_with_acute_accent, 0U,
e_with_acute_accent.size());
EXPECT_MATCH_IGNORE_CASE(e_with_acute_accent, e_with_acute_combining_mark, 0U,
e_with_acute_combining_mark.size());
EXPECT_MATCH_IGNORE_CASE(e_with_acute_combining_mark,
e_with_grave_combining_mark, 0U,
e_with_grave_combining_mark.size());
EXPECT_MATCH_IGNORE_CASE(e_with_grave_combining_mark,
e_with_acute_combining_mark, 0U,
e_with_acute_combining_mark.size());
EXPECT_MATCH_IGNORE_CASE(e_with_acute_combining_mark, e_with_grave_accent, 0U,
e_with_grave_accent.size());
EXPECT_MATCH_IGNORE_CASE(e_with_grave_accent, e_with_acute_combining_mark, 0U,
e_with_acute_combining_mark.size());
EXPECT_MATCH_IGNORE_CASE(E_with_acute_accent, e_with_acute_accent, 0U,
e_with_acute_accent.size());
EXPECT_MATCH_IGNORE_CASE(E_with_grave_accent, e_with_acute_accent, 0U,
e_with_acute_accent.size());
EXPECT_MATCH_IGNORE_CASE(E_with_acute_combining_mark, e_with_grave_accent, 0U,
e_with_grave_accent.size());
EXPECT_MATCH_IGNORE_CASE(E_with_grave_combining_mark, e_with_acute_accent, 0U,
e_with_acute_accent.size());
EXPECT_MATCH_IGNORE_CASE(E_base, e_with_grave_accent, 0U,
e_with_grave_accent.size());
EXPECT_MISS_IGNORE_CASE(a_with_acute_accent, e_with_acute_accent);
EXPECT_MISS_IGNORE_CASE(a_with_acute_combining_mark,
e_with_acute_combining_mark);
EXPECT_MISS_SENSITIVE(e_base, e_with_acute_accent);
EXPECT_MISS_SENSITIVE(e_with_acute_accent, e_base);
EXPECT_MISS_SENSITIVE(e_base, e_with_acute_combining_mark);
EXPECT_MISS_SENSITIVE(e_with_acute_combining_mark, e_base);
EXPECT_MATCH_SENSITIVE(e_with_acute_combining_mark, e_with_acute_accent, 0U,
1U);
EXPECT_MATCH_SENSITIVE(e_with_acute_accent, e_with_acute_combining_mark, 0U,
2U);
EXPECT_MISS_SENSITIVE(e_with_acute_combining_mark,
e_with_grave_combining_mark);
EXPECT_MISS_SENSITIVE(e_with_grave_combining_mark,
e_with_acute_combining_mark);
EXPECT_MISS_SENSITIVE(e_with_acute_combining_mark, e_with_grave_accent);
EXPECT_MISS_SENSITIVE(e_with_grave_accent, e_with_acute_combining_mark);
EXPECT_MISS_SENSITIVE(E_with_acute_accent, e_with_acute_accent);
EXPECT_MISS_SENSITIVE(E_with_grave_accent, e_with_acute_accent);
EXPECT_MISS_SENSITIVE(E_with_acute_combining_mark, e_with_grave_accent);
EXPECT_MISS_SENSITIVE(E_with_grave_combining_mark, e_with_acute_accent);
EXPECT_MISS_SENSITIVE(E_base, e_with_grave_accent);
EXPECT_MISS_SENSITIVE(a_with_acute_accent, e_with_acute_accent);
EXPECT_MISS_SENSITIVE(a_with_acute_combining_mark,
e_with_acute_combining_mark);
EXPECT_MATCH_SENSITIVE(a_with_acute_combining_mark,
a_with_acute_combining_mark, 0U, 2U);
if (locale_is_posix)
SetICUDefaultLocale(default_locale.data());
}
TEST(StringSearchTest, UnicodeLocaleDependent) {
// Base characters
const string16 a_base = WideToUTF16(L"a");
// Composed characters
const string16 a_with_ring = WideToUTF16(L"\u00e5");
EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
nullptr));
EXPECT_TRUE(StringSearch(a_base, a_with_ring, nullptr, nullptr, false, true));
const char* default_locale = uloc_getDefault();
SetICUDefaultLocale("da");
EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
nullptr));
EXPECT_FALSE(
StringSearch(a_base, a_with_ring, nullptr, nullptr, false, true));
SetICUDefaultLocale(default_locale);
}
TEST(StringSearchTest, SearchBackwards) {
std::string default_locale(uloc_getDefault());
bool locale_is_posix = (default_locale == "en_US_POSIX");
if (locale_is_posix)
SetICUDefaultLocale("en_US");
EXPECT_MATCH_IGNORE_CASE_BACKWARDS(ASCIIToUTF16("ab"), ASCIIToUTF16("ABAB"),
2U, 2U);
EXPECT_MATCH_SENSITIVE_BACKWARDS(ASCIIToUTF16("ab"), ASCIIToUTF16("abab"), 2U,
2U);
EXPECT_MISS_SENSITIVE_BACKWARDS(ASCIIToUTF16("ab"), ASCIIToUTF16("ABAB"));
if (locale_is_posix)
SetICUDefaultLocale(default_locale.data());
}
TEST(StringSearchTest, FixedPatternMultipleSearch) {
std::string default_locale(uloc_getDefault());
bool locale_is_posix = (default_locale == "en_US_POSIX");
if (locale_is_posix)
SetICUDefaultLocale("en_US");
size_t index = 0;
size_t length = 0;
// Search "foo" over multiple texts.
FixedPatternStringSearch query1(ASCIIToUTF16("foo"), true);
EXPECT_TRUE(query1.Search(ASCIIToUTF16("12foo34"), &index, &length, true));
EXPECT_EQ(2U, index);
EXPECT_EQ(3U, length);
EXPECT_FALSE(query1.Search(ASCIIToUTF16("bye"), &index, &length, true));
EXPECT_FALSE(query1.Search(ASCIIToUTF16("FOO"), &index, &length, true));
EXPECT_TRUE(query1.Search(ASCIIToUTF16("foobarfoo"), &index, &length, true));
EXPECT_EQ(0U, index);
EXPECT_EQ(3U, length);
EXPECT_TRUE(query1.Search(ASCIIToUTF16("foobarfoo"), &index, &length, false));
EXPECT_EQ(6U, index);
EXPECT_EQ(3U, length);
// Search "hello" over multiple texts.
FixedPatternStringSearchIgnoringCaseAndAccents query2(ASCIIToUTF16("hello"));
EXPECT_TRUE(query2.Search(ASCIIToUTF16("12hello34"), &index, &length));
EXPECT_EQ(2U, index);
EXPECT_EQ(5U, length);
EXPECT_FALSE(query2.Search(ASCIIToUTF16("bye"), &index, &length));
EXPECT_TRUE(query2.Search(ASCIIToUTF16("hELLo"), &index, &length));
EXPECT_EQ(0U, index);
EXPECT_EQ(5U, length);
if (locale_is_posix)
SetICUDefaultLocale(default_locale.data());
}
} // namespace i18n
} // namespace base