base/i18n/string_search_unittest.cc - chromium/src - Git at Google

 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include <string>

 #include "base/i18n/rtl.h"
 #include "base/i18n/string_search.h"
 #include "base/string16.h"
 #include "base/utf_string_conversions.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "unicode/usearch.h"

 namespace base {
 namespace i18n {

 // Note on setting default locale for testing: The current default locale on
 // the Mac trybot is en_US_POSIX, with which primary-level collation strength
 // string search is case-sensitive, when normally it should be
 // case-insensitive. In other locales (including en_US which English speakers
 // in the U.S. use), this search would be case-insensitive as expected.

 TEST(StringSearchTest, ASCII) {
   std::string default_locale(uloc_getDefault());
   bool locale_is_posix = (default_locale == "en_US_POSIX");
   if (locale_is_posix)
     SetICUDefaultLocale("en_US");

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       ASCIIToUTF16("hello"), ASCIIToUTF16("hello world")));

   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
       ASCIIToUTF16("h    e l l o"), ASCIIToUTF16("h   e l l o")));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa")));

   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
       ASCIIToUTF16("searching within empty string"), string16()));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       string16(), ASCIIToUTF16("searching for empty string")));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY")));

   if (locale_is_posix)
     SetICUDefaultLocale(default_locale.data());
 }

 TEST(StringSearchTest, UnicodeLocaleIndependent) {
   // Base characters
   const string16 e_base = WideToUTF16(L"e");
   const string16 E_base = WideToUTF16(L"E");
   const string16 a_base = WideToUTF16(L"a");

   // Composed characters
   const string16 e_with_accute_accent = WideToUTF16(L"\u00e9");
   const string16 E_with_accute_accent = WideToUTF16(L"\u00c9");
   const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
   const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
   const string16 a_with_accute_accent = WideToUTF16(L"\u00e1");

   // Decomposed characters
   const string16 e_with_accute_combining_mark = WideToUTF16(L"e\u0301");
   const string16 E_with_accute_combining_mark = WideToUTF16(L"E\u0301");
   const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
   const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
   const string16 a_with_accute_combining_mark = WideToUTF16(L"a\u0301");

   std::string default_locale(uloc_getDefault());
   bool locale_is_posix = (default_locale == "en_US_POSIX");
   if (locale_is_posix)
     SetICUDefaultLocale("en_US");

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_base, e_with_accute_accent));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_accute_accent, e_base));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_base, e_with_accute_combining_mark));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_accute_combining_mark, e_base));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_accute_combining_mark, e_with_accute_accent));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_accute_accent, e_with_accute_combining_mark));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_accute_combining_mark, e_with_grave_combining_mark));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_grave_combining_mark, e_with_accute_combining_mark));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_accute_combining_mark, e_with_grave_accent));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_grave_accent, e_with_accute_combining_mark));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       E_with_accute_accent, e_with_accute_accent));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       E_with_grave_accent, e_with_accute_accent));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       E_with_accute_combining_mark, e_with_grave_accent));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       E_with_grave_combining_mark, e_with_accute_accent));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       E_base, e_with_grave_accent));

   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
       a_with_accute_accent, e_with_accute_accent));

   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
       a_with_accute_combining_mark, e_with_accute_combining_mark));

   if (locale_is_posix)
     SetICUDefaultLocale(default_locale.data());
 }

 TEST(StringSearchTest, UnicodeLocaleDependent) {
   // Base characters
   const string16 a_base = WideToUTF16(L"a");

   // Composed characters
   const string16 a_with_ring = WideToUTF16(L"\u00e5");

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       a_base, a_with_ring));

   const char* default_locale = uloc_getDefault();
   SetICUDefaultLocale("da");

   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
       a_base, a_with_ring));

   SetICUDefaultLocale(default_locale);
 }

 }  // namespace i18n
 }  // namespace base
	// Copyright (c) 2011 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include <string>

	#include "base/i18n/rtl.h"
	#include "base/i18n/string_search.h"
	#include "base/string16.h"
	#include "base/utf_string_conversions.h"
	#include "testing/gtest/include/gtest/gtest.h"
	#include "unicode/usearch.h"

	namespace base {
	namespace i18n {

	// Note on setting default locale for testing: The current default locale on
	// the Mac trybot is en_US_POSIX, with which primary-level collation strength
	// string search is case-sensitive, when normally it should be
	// case-insensitive. In other locales (including en_US which English speakers
	// in the U.S. use), this search would be case-insensitive as expected.

	TEST(StringSearchTest, ASCII) {
	std::string default_locale(uloc_getDefault());
	bool locale_is_posix = (default_locale == "en_US_POSIX");
	if (locale_is_posix)
	SetICUDefaultLocale("en_US");

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	ASCIIToUTF16("hello"), ASCIIToUTF16("hello world")));

	EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
	ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o")));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa")));

	EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
	ASCIIToUTF16("searching within empty string"), string16()));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	string16(), ASCIIToUTF16("searching for empty string")));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY")));

	if (locale_is_posix)
	SetICUDefaultLocale(default_locale.data());
	}

	TEST(StringSearchTest, UnicodeLocaleIndependent) {
	// Base characters
	const string16 e_base = WideToUTF16(L"e");
	const string16 E_base = WideToUTF16(L"E");
	const string16 a_base = WideToUTF16(L"a");

	// Composed characters
	const string16 e_with_accute_accent = WideToUTF16(L"\u00e9");
	const string16 E_with_accute_accent = WideToUTF16(L"\u00c9");
	const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
	const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
	const string16 a_with_accute_accent = WideToUTF16(L"\u00e1");

	// Decomposed characters
	const string16 e_with_accute_combining_mark = WideToUTF16(L"e\u0301");
	const string16 E_with_accute_combining_mark = WideToUTF16(L"E\u0301");
	const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
	const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
	const string16 a_with_accute_combining_mark = WideToUTF16(L"a\u0301");

	std::string default_locale(uloc_getDefault());
	bool locale_is_posix = (default_locale == "en_US_POSIX");
	if (locale_is_posix)
	SetICUDefaultLocale("en_US");

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_base, e_with_accute_accent));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_accute_accent, e_base));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_base, e_with_accute_combining_mark));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_accute_combining_mark, e_base));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_accute_combining_mark, e_with_accute_accent));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_accute_accent, e_with_accute_combining_mark));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_accute_combining_mark, e_with_grave_combining_mark));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_grave_combining_mark, e_with_accute_combining_mark));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_accute_combining_mark, e_with_grave_accent));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_grave_accent, e_with_accute_combining_mark));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	E_with_accute_accent, e_with_accute_accent));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	E_with_grave_accent, e_with_accute_accent));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	E_with_accute_combining_mark, e_with_grave_accent));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	E_with_grave_combining_mark, e_with_accute_accent));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	E_base, e_with_grave_accent));

	EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
	a_with_accute_accent, e_with_accute_accent));

	EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
	a_with_accute_combining_mark, e_with_accute_combining_mark));

	if (locale_is_posix)
	SetICUDefaultLocale(default_locale.data());
	}

	TEST(StringSearchTest, UnicodeLocaleDependent) {
	// Base characters
	const string16 a_base = WideToUTF16(L"a");

	// Composed characters
	const string16 a_with_ring = WideToUTF16(L"\u00e5");

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	a_base, a_with_ring));

	const char* default_locale = uloc_getDefault();
	SetICUDefaultLocale("da");

	EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
	a_base, a_with_ring));

	SetICUDefaultLocale(default_locale);
	}

	} // namespace i18n
	} // namespace base