| // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "app/l10n_util.h" |
| |
| #if defined(TOOLKIT_USES_GTK) |
| #include <glib/gutils.h> |
| #endif |
| |
| #include <algorithm> |
| #include <cstdlib> |
| #include <iterator> |
| |
| #include "app/app_paths.h" |
| #include "app/l10n_util_collator.h" |
| #include "app/resource_bundle.h" |
| #include "base/command_line.h" |
| #include "base/file_util.h" |
| #include "base/i18n/file_util_icu.h" |
| #include "base/i18n/rtl.h" |
| #include "base/path_service.h" |
| #include "base/scoped_ptr.h" |
| #include "base/string16.h" |
| #include "base/string_number_conversions.h" |
| #include "base/string_split.h" |
| #include "base/sys_string_conversions.h" |
| #include "base/utf_string_conversions.h" |
| #include "build/build_config.h" |
| #include "gfx/canvas.h" |
| #include "unicode/rbbi.h" |
| #include "unicode/uloc.h" |
| |
| #if defined(OS_MACOSX) |
| #include "app/l10n_util_mac.h" |
| #elif defined(OS_WIN) |
| #include "app/l10n_util_win.h" |
| #endif |
| |
| namespace { |
| |
| #if defined(OS_WIN) |
| static const FilePath::CharType kLocaleFileExtension[] = L".dll"; |
| #elif defined(OS_POSIX) |
| static const FilePath::CharType kLocaleFileExtension[] = ".pak"; |
| #endif |
| |
| static const char* const kAcceptLanguageList[] = { |
| "af", // Afrikaans |
| "am", // Amharic |
| "ar", // Arabic |
| "az", // Azerbaijani |
| "be", // Belarusian |
| "bg", // Bulgarian |
| "bh", // Bihari |
| "bn", // Bengali |
| "br", // Breton |
| "bs", // Bosnian |
| "ca", // Catalan |
| "co", // Corsican |
| "cs", // Czech |
| "cy", // Welsh |
| "da", // Danish |
| "de", // German |
| "de-AT", // German (Austria) |
| "de-CH", // German (Switzerland) |
| "de-DE", // German (Germany) |
| "el", // Greek |
| "en", // English |
| "en-AU", // English (Austrailia) |
| "en-CA", // English (Canada) |
| "en-GB", // English (UK) |
| "en-NZ", // English (New Zealand) |
| "en-US", // English (US) |
| "en-ZA", // English (South Africa) |
| "eo", // Esperanto |
| // TODO(jungshik) : Do we want to list all es-Foo for Latin-American |
| // Spanish speaking countries? |
| "es", // Spanish |
| "et", // Estonian |
| "eu", // Basque |
| "fa", // Persian |
| "fi", // Finnish |
| "fil", // Filipino |
| "fo", // Faroese |
| "fr", // French |
| "fr-CA", // French (Canada) |
| "fr-CH", // French (Switzerland) |
| "fr-FR", // French (France) |
| "fy", // Frisian |
| "ga", // Irish |
| "gd", // Scots Gaelic |
| "gl", // Galician |
| "gn", // Guarani |
| "gu", // Gujarati |
| "ha", // Hausa |
| "haw", // Hawaiian |
| "he", // Hebrew |
| "hi", // Hindi |
| "hr", // Croatian |
| "hu", // Hungarian |
| "hy", // Armenian |
| "ia", // Interlingua |
| "id", // Indonesian |
| "is", // Icelandic |
| "it", // Italian |
| "it-CH", // Italian (Switzerland) |
| "it-IT", // Italian (Italy) |
| "ja", // Japanese |
| "jw", // Javanese |
| "ka", // Georgian |
| "kk", // Kazakh |
| "km", // Cambodian |
| "kn", // Kannada |
| "ko", // Korean |
| "ku", // Kurdish |
| "ky", // Kyrgyz |
| "la", // Latin |
| "ln", // Lingala |
| "lo", // Laothian |
| "lt", // Lithuanian |
| "lv", // Latvian |
| "mk", // Macedonian |
| "ml", // Malayalam |
| "mn", // Mongolian |
| "mo", // Moldavian |
| "mr", // Marathi |
| "ms", // Malay |
| "mt", // Maltese |
| "nb", // Norwegian (Bokmal) |
| "ne", // Nepali |
| "nl", // Dutch |
| "nn", // Norwegian (Nynorsk) |
| "no", // Norwegian |
| "oc", // Occitan |
| "om", // Oromo |
| "or", // Oriya |
| "pa", // Punjabi |
| "pl", // Polish |
| "ps", // Pashto |
| "pt", // Portuguese |
| "pt-BR", // Portuguese (Brazil) |
| "pt-PT", // Portuguese (Portugal) |
| "qu", // Quechua |
| "rm", // Romansh |
| "ro", // Romanian |
| "ru", // Russian |
| "sd", // Sindhi |
| "sh", // Serbo-Croatian |
| "si", // Sinhalese |
| "sk", // Slovak |
| "sl", // Slovenian |
| "sn", // Shona |
| "so", // Somali |
| "sq", // Albanian |
| "sr", // Serbian |
| "st", // Sesotho |
| "su", // Sundanese |
| "sv", // Swedish |
| "sw", // Swahili |
| "ta", // Tamil |
| "te", // Telugu |
| "tg", // Tajik |
| "th", // Thai |
| "ti", // Tigrinya |
| "tk", // Turkmen |
| "to", // Tonga |
| "tr", // Turkish |
| "tt", // Tatar |
| "tw", // Twi |
| "ug", // Uighur |
| "uk", // Ukrainian |
| "ur", // Urdu |
| "uz", // Uzbek |
| "vi", // Vietnamese |
| "xh", // Xhosa |
| "yi", // Yiddish |
| "yo", // Yoruba |
| "zh", // Chinese |
| "zh-CN", // Chinese (Simplified) |
| "zh-TW", // Chinese (Traditional) |
| "zu", // Zulu |
| }; |
| |
| // Returns true if |locale_name| has an alias in the ICU data file. |
| bool IsDuplicateName(const std::string& locale_name) { |
| static const char* const kDuplicateNames[] = { |
| "en", |
| "pt", |
| "zh", |
| "zh_hans_cn", |
| "zh_hant_tw" |
| }; |
| |
| // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain). |
| // 'es-419' (Spanish in Latin America) is not available in ICU so that it |
| // has to be added manually in GetAvailableLocales(). |
| if (LowerCaseEqualsASCII(locale_name.substr(0, 3), "es_")) |
| return true; |
| for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) { |
| if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0) |
| return true; |
| } |
| return false; |
| } |
| |
| bool IsLocaleNameTranslated(const char* locale, |
| const std::string& display_locale) { |
| string16 display_name = |
| l10n_util::GetDisplayNameForLocale(locale, display_locale, false); |
| // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not |
| // uloc_getDisplayName returns the actual translation or the default |
| // value (locale code), we have to rely on this hack to tell whether |
| // the translation is available or not. If ICU doesn't have a translated |
| // name for this locale, GetDisplayNameForLocale will just return the |
| // locale code. |
| return !IsStringASCII(display_name) || UTF16ToASCII(display_name) != locale; |
| } |
| |
| // We added 30+ minimally populated locales with only a few entries |
| // (exemplar character set, script, writing direction and its own |
| // lanaguage name). These locales have to be distinguished from the |
| // fully populated locales to which Chrome is localized. |
| bool IsLocalePartiallyPopulated(const std::string& locale_name) { |
| // For partially populated locales, even the translation for "English" |
| // is not available. A more robust/elegant way to check is to add a special |
| // field (say, 'isPartial' to our version of ICU locale files) and |
| // check its value, but this hack seems to work well. |
| return !IsLocaleNameTranslated("en", locale_name); |
| } |
| |
| #if !defined(OS_MACOSX) |
| bool IsLocaleAvailable(const std::string& locale, |
| const FilePath& locale_path) { |
| // If locale has any illegal characters in it, we don't want to try to |
| // load it because it may be pointing outside the locale data file directory. |
| if (!file_util::IsFilenameLegal(ASCIIToUTF16(locale))) |
| return false; |
| |
| // IsLocalePartiallyPopulated() can be called here for an early return w/o |
| // checking the resource availability below. It'd help when Chrome is run |
| // under a system locale Chrome is not localized to (e.g.Farsi on Linux), |
| // but it'd slow down the start up time a little bit for locales Chrome is |
| // localized to. So, we don't call it here. |
| if (!l10n_util::IsLocaleSupportedByOS(locale)) |
| return false; |
| |
| FilePath test_path = locale_path; |
| test_path = |
| test_path.AppendASCII(locale).ReplaceExtension(kLocaleFileExtension); |
| return file_util::PathExists(test_path); |
| } |
| |
| bool CheckAndResolveLocale(const std::string& locale, |
| const FilePath& locale_path, |
| std::string* resolved_locale) { |
| if (IsLocaleAvailable(locale, locale_path)) { |
| *resolved_locale = locale; |
| return true; |
| } |
| // If the locale matches language but not country, use that instead. |
| // TODO(jungshik) : Nothing is done about languages that Chrome |
| // does not support but available on Windows. We fall |
| // back to en-US in GetApplicationLocale so that it's a not critical, |
| // but we can do better. |
| std::string::size_type hyphen_pos = locale.find('-'); |
| if (hyphen_pos != std::string::npos && hyphen_pos > 0) { |
| std::string lang(locale, 0, hyphen_pos); |
| std::string region(locale, hyphen_pos + 1); |
| std::string tmp_locale(lang); |
| // Map es-RR other than es-ES to es-419 (Chrome's Latin American |
| // Spanish locale). |
| if (LowerCaseEqualsASCII(lang, "es") && !LowerCaseEqualsASCII(region, "es")) |
| tmp_locale.append("-419"); |
| else if (LowerCaseEqualsASCII(lang, "zh")) { |
| // Map zh-HK and zh-MK to zh-TW. Otherwise, zh-FOO is mapped to zh-CN. |
| if (LowerCaseEqualsASCII(region, "hk") || |
| LowerCaseEqualsASCII(region, "mk")) { |
| tmp_locale.append("-TW"); |
| } else { |
| tmp_locale.append("-CN"); |
| } |
| } |
| if (IsLocaleAvailable(tmp_locale, locale_path)) { |
| resolved_locale->swap(tmp_locale); |
| return true; |
| } |
| } |
| |
| // Google updater uses no, iw and en for our nb, he, and en-US. |
| // We need to map them to our codes. |
| struct { |
| const char* source; |
| const char* dest; |
| } alias_map[] = { |
| {"no", "nb"}, |
| {"tl", "fil"}, |
| {"iw", "he"}, |
| {"en", "en-US"}, |
| }; |
| |
| for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) { |
| if (LowerCaseEqualsASCII(locale, alias_map[i].source)) { |
| std::string tmp_locale(alias_map[i].dest); |
| if (IsLocaleAvailable(tmp_locale, locale_path)) { |
| resolved_locale->swap(tmp_locale); |
| return true; |
| } |
| } |
| } |
| |
| return false; |
| } |
| #endif |
| |
| // On Linux, the text layout engine Pango determines paragraph directionality |
| // by looking at the first strongly-directional character in the text. This |
| // means text such as "Google Chrome foo bar..." will be layed out LTR even |
| // if "foo bar" is RTL. So this function prepends the necessary RLM in such |
| // cases. |
| void AdjustParagraphDirectionality(string16* paragraph) { |
| #if defined(OS_LINUX) |
| if (base::i18n::IsRTL() && |
| base::i18n::StringContainsStrongRTLChars(*paragraph)) { |
| paragraph->insert(0, 1, static_cast<char16>(base::i18n::kRightToLeftMark)); |
| } |
| #endif |
| } |
| |
| #if defined(OS_WIN) |
| std::string GetCanonicalLocale(const std::string& locale) { |
| return base::i18n::GetCanonicalLocale(locale.c_str()); |
| } |
| #endif |
| |
| } // namespace |
| |
| namespace l10n_util { |
| |
| std::string GetApplicationLocale(const std::string& pref_locale) { |
| #if defined(OS_MACOSX) |
| |
| // Use any override (Cocoa for the browser), otherwise use the preference |
| // passed to the function. |
| std::string app_locale = l10n_util::GetLocaleOverride(); |
| if (app_locale.empty()) |
| app_locale = pref_locale; |
| |
| // The above should handle all of the cases Chrome normally hits, but for some |
| // unit tests, we need something to fall back too. |
| if (app_locale.empty()) |
| app_locale = "en-US"; |
| |
| // Windows/Linux call SetICUDefaultLocale after determining the actual locale |
| // with CheckAndResolveLocal to make ICU APIs work in that locale. |
| // Mac doesn't use a locale directory tree of resources (it uses Mac style |
| // resources), so mirror the Windows/Linux behavior of calling |
| // SetICUDefaultLocale. |
| base::i18n::SetICUDefaultLocale(app_locale); |
| return app_locale; |
| |
| #else |
| |
| FilePath locale_path; |
| PathService::Get(app::DIR_LOCALES, &locale_path); |
| std::string resolved_locale; |
| std::vector<std::string> candidates; |
| |
| // We only use --lang and the app pref on Windows. On Linux, we only |
| // look at the LC_*/LANG environment variables. We do, however, pass --lang |
| // to renderer and plugin processes so they know what language the parent |
| // process decided to use. |
| |
| #if defined(OS_WIN) |
| |
| // First, try the preference value. |
| if (!pref_locale.empty()) |
| candidates.push_back(pref_locale); |
| |
| // Next, try the overridden locale. |
| const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides(); |
| if (!languages.empty()) { |
| candidates.reserve(candidates.size() + languages.size()); |
| std::transform(languages.begin(), languages.end(), |
| std::back_inserter(candidates), &GetCanonicalLocale); |
| } else { |
| // If no override was set, defer to ICU |
| candidates.push_back(base::i18n::GetConfiguredLocale()); |
| } |
| |
| #elif defined(OS_CHROMEOS) |
| |
| // On ChromeOS, use the application locale preference. |
| if (!pref_locale.empty()) |
| candidates.push_back(pref_locale); |
| |
| #elif defined(OS_POSIX) && defined(TOOLKIT_USES_GTK) |
| |
| // GLib implements correct environment variable parsing with |
| // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG. |
| // We used to use our custom parsing code along with ICU for this purpose. |
| // If we have a port that does not depend on GTK, we have to |
| // restore our custom code for that port. |
| const char* const* languages = g_get_language_names(); |
| DCHECK(languages); // A valid pointer is guaranteed. |
| DCHECK(*languages); // At least one entry, "C", is guaranteed. |
| |
| for (; *languages != NULL; ++languages) { |
| candidates.push_back(base::i18n::GetCanonicalLocale(*languages)); |
| } |
| |
| #else |
| #error Unsupported platform, see build/build_config.h |
| #endif |
| |
| std::vector<std::string>::const_iterator i = candidates.begin(); |
| for (; i != candidates.end(); ++i) { |
| if (CheckAndResolveLocale(*i, locale_path, &resolved_locale)) { |
| base::i18n::SetICUDefaultLocale(resolved_locale); |
| return resolved_locale; |
| } |
| } |
| |
| // Fallback on en-US. |
| const std::string fallback_locale("en-US"); |
| if (IsLocaleAvailable(fallback_locale, locale_path)) { |
| base::i18n::SetICUDefaultLocale(fallback_locale); |
| return fallback_locale; |
| } |
| |
| // No locale data file was found; we shouldn't get here. |
| NOTREACHED(); |
| |
| return std::string(); |
| |
| #endif |
| } |
| |
| string16 GetDisplayNameForLocale(const std::string& locale, |
| const std::string& display_locale, |
| bool is_for_ui) { |
| std::string locale_code = locale; |
| // Internally, we use the language code of zh-CN and zh-TW, but we want the |
| // display names to be Chinese (Simplified) and Chinese (Traditional) instead |
| // of Chinese (China) and Chinese (Taiwan). To do that, we pass zh-Hans |
| // and zh-Hant to ICU. Even with this mapping, we'd get |
| // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and |
| // even longer results in other languages. Arguably, they're better than |
| // the current results : Chinese (China) / Chinese (Taiwan). |
| // TODO(jungshik): Do one of the following: |
| // 1. Special-case Chinese by getting the custom-translation for them |
| // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE. |
| // 3. Get translations for two directly from the ICU resouce bundle |
| // because they're not accessible with other any API. |
| // 4. Patch ICU to special-case zh-Hans/zh-Hant for us. |
| // #1 and #2 wouldn't work if display_locale != current UI locale although |
| // we can think of additional hack to work around the problem. |
| // #3 can be potentially expensive. |
| if (locale_code == "zh-CN") |
| locale_code = "zh-Hans"; |
| else if (locale_code == "zh-TW") |
| locale_code = "zh-Hant"; |
| |
| UErrorCode error = U_ZERO_ERROR; |
| const int buffer_size = 1024; |
| |
| string16 display_name; |
| int actual_size = uloc_getDisplayName(locale_code.c_str(), |
| display_locale.c_str(), |
| WriteInto(&display_name, buffer_size + 1), buffer_size, &error); |
| DCHECK(U_SUCCESS(error)); |
| display_name.resize(actual_size); |
| // Add an RTL mark so parentheses are properly placed. |
| if (is_for_ui && base::i18n::IsRTL()) |
| display_name.push_back(static_cast<char16>(base::i18n::kRightToLeftMark)); |
| return display_name; |
| } |
| |
| std::string NormalizeLocale(const std::string& locale) { |
| std::string normalized_locale(locale); |
| std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_'); |
| |
| return normalized_locale; |
| } |
| |
| void GetParentLocales(const std::string& current_locale, |
| std::vector<std::string>* parent_locales) { |
| std::string locale(NormalizeLocale(current_locale)); |
| |
| const int kNameCapacity = 256; |
| char parent[kNameCapacity]; |
| base::strlcpy(parent, locale.c_str(), kNameCapacity); |
| parent_locales->push_back(parent); |
| UErrorCode err = U_ZERO_ERROR; |
| while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) { |
| if (U_FAILURE(err)) |
| break; |
| parent_locales->push_back(parent); |
| } |
| } |
| |
| bool IsValidLocaleSyntax(const std::string& locale) { |
| // Check that the length is plausible. |
| if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY) |
| return false; |
| |
| // Strip off the part after an '@' sign, which might contain keywords, |
| // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil. |
| // We don't validate that part much, just check that there's at least one |
| // equals sign in a plausible place. Normalize the prefix so that hyphens |
| // are changed to underscores. |
| std::string prefix = NormalizeLocale(locale); |
| size_t split_point = locale.find("@"); |
| if (split_point != std::string::npos) { |
| std::string keywords = locale.substr(split_point + 1); |
| prefix = locale.substr(0, split_point); |
| |
| size_t equals_loc = keywords.find("="); |
| if (equals_loc == std::string::npos || |
| equals_loc < 1 || equals_loc > keywords.size() - 2) |
| return false; |
| } |
| |
| // Check that all characters before the at-sign are alphanumeric or |
| // underscore. |
| for (size_t i = 0; i < prefix.size(); i++) { |
| char ch = prefix[i]; |
| if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_') |
| return false; |
| } |
| |
| // Check that the initial token (before the first hyphen/underscore) |
| // is 1 - 3 alphabetical characters (a language tag). |
| for (size_t i = 0; i < prefix.size(); i++) { |
| char ch = prefix[i]; |
| if (ch == '_') { |
| if (i < 1 || i > 3) |
| return false; |
| break; |
| } |
| if (!IsAsciiAlpha(ch)) |
| return false; |
| } |
| |
| // Check that the all tokens after the initial token are 1 - 8 characters. |
| // (Tokenize/StringTokenizer don't work here, they collapse multiple |
| // delimiters into one.) |
| int token_len = 0; |
| int token_index = 0; |
| for (size_t i = 0; i < prefix.size(); i++) { |
| if (prefix[i] != '_') { |
| token_len++; |
| continue; |
| } |
| |
| if (token_index > 0 && (token_len < 1 || token_len > 8)) { |
| return false; |
| } |
| token_index++; |
| token_len = 0; |
| } |
| if (token_index == 0 && (token_len < 1 || token_len > 3)) { |
| return false; |
| } else if (token_len < 1 || token_len > 8) { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| std::wstring GetString(int message_id) { |
| return UTF16ToWide(GetStringUTF16(message_id)); |
| } |
| |
| std::string GetStringUTF8(int message_id) { |
| return UTF16ToUTF8(GetStringUTF16(message_id)); |
| } |
| |
| string16 GetStringUTF16(int message_id) { |
| ResourceBundle& rb = ResourceBundle::GetSharedInstance(); |
| string16 str = rb.GetLocalizedString(message_id); |
| AdjustParagraphDirectionality(&str); |
| |
| return str; |
| } |
| |
| static string16 GetStringF(int message_id, |
| const std::vector<string16>& replacements, |
| std::vector<size_t>* offsets) { |
| // TODO(tc): We could save a string copy if we got the raw string as |
| // a StringPiece and were able to call ReplaceStringPlaceholders with |
| // a StringPiece format string and string16 substitution strings. In |
| // practice, the strings should be relatively short. |
| ResourceBundle& rb = ResourceBundle::GetSharedInstance(); |
| const string16& format_string = rb.GetLocalizedString(message_id); |
| |
| #ifndef NDEBUG |
| // Make sure every replacement string is being used, so we don't just |
| // silently fail to insert one. If |offsets| is non-NULL, then don't do this |
| // check as the code may simply want to find the placeholders rather than |
| // actually replacing them. |
| if (!offsets) { |
| std::string utf8_string = UTF16ToUTF8(format_string); |
| |
| // $9 is the highest allowed placeholder. |
| for (size_t i = 0; i < 9; ++i) { |
| bool placeholder_should_exist = replacements.size() > i; |
| |
| std::string placeholder = StringPrintf("$%d", static_cast<int>(i + 1)); |
| size_t pos = utf8_string.find(placeholder.c_str()); |
| if (placeholder_should_exist) { |
| DCHECK_NE(std::string::npos, pos) << |
| " Didn't find a " << placeholder << " placeholder in " << |
| utf8_string; |
| } else { |
| DCHECK_EQ(std::string::npos, pos) << |
| " Unexpectedly found a " << placeholder << " placeholder in " << |
| utf8_string; |
| } |
| } |
| } |
| #endif |
| |
| string16 formatted = ReplaceStringPlaceholders(format_string, replacements, |
| offsets); |
| AdjustParagraphDirectionality(&formatted); |
| |
| return formatted; |
| } |
| |
| #if !defined(WCHAR_T_IS_UTF16) |
| std::wstring GetStringF(int message_id, const std::wstring& a) { |
| return UTF16ToWide(GetStringFUTF16(message_id, WideToUTF16(a))); |
| } |
| |
| std::wstring GetStringF(int message_id, |
| const std::wstring& a, |
| const std::wstring& b) { |
| return UTF16ToWide(GetStringFUTF16(message_id, WideToUTF16(a), |
| WideToUTF16(b))); |
| } |
| |
| std::wstring GetStringF(int message_id, |
| const std::wstring& a, |
| const std::wstring& b, |
| const std::wstring& c) { |
| return UTF16ToWide(GetStringFUTF16(message_id, WideToUTF16(a), |
| WideToUTF16(b), WideToUTF16(c))); |
| } |
| |
| std::wstring GetStringF(int message_id, |
| const std::wstring& a, |
| const std::wstring& b, |
| const std::wstring& c, |
| const std::wstring& d) { |
| return UTF16ToWide(GetStringFUTF16(message_id, WideToUTF16(a), WideToUTF16(b), |
| WideToUTF16(c), WideToUTF16(d))); |
| } |
| #endif |
| |
| std::string GetStringFUTF8(int message_id, |
| const string16& a) { |
| return UTF16ToUTF8(GetStringFUTF16(message_id, a)); |
| } |
| |
| std::string GetStringFUTF8(int message_id, |
| const string16& a, |
| const string16& b) { |
| return UTF16ToUTF8(GetStringFUTF16(message_id, a, b)); |
| } |
| |
| std::string GetStringFUTF8(int message_id, |
| const string16& a, |
| const string16& b, |
| const string16& c) { |
| return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c)); |
| } |
| |
| std::string GetStringFUTF8(int message_id, |
| const string16& a, |
| const string16& b, |
| const string16& c, |
| const string16& d) { |
| return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d)); |
| } |
| |
| string16 GetStringFUTF16(int message_id, |
| const string16& a) { |
| std::vector<string16> replacements; |
| replacements.push_back(a); |
| return GetStringF(message_id, replacements, NULL); |
| } |
| |
| string16 GetStringFUTF16(int message_id, |
| const string16& a, |
| const string16& b) { |
| return GetStringFUTF16(message_id, a, b, NULL); |
| } |
| |
| string16 GetStringFUTF16(int message_id, |
| const string16& a, |
| const string16& b, |
| const string16& c) { |
| std::vector<string16> replacements; |
| replacements.push_back(a); |
| replacements.push_back(b); |
| replacements.push_back(c); |
| return GetStringF(message_id, replacements, NULL); |
| } |
| |
| string16 GetStringFUTF16(int message_id, |
| const string16& a, |
| const string16& b, |
| const string16& c, |
| const string16& d) { |
| std::vector<string16> replacements; |
| replacements.push_back(a); |
| replacements.push_back(b); |
| replacements.push_back(c); |
| replacements.push_back(d); |
| return GetStringF(message_id, replacements, NULL); |
| } |
| |
| std::wstring GetStringF(int message_id, const std::wstring& a, size_t* offset) { |
| DCHECK(offset); |
| std::vector<size_t> offsets; |
| std::vector<string16> replacements; |
| replacements.push_back(WideToUTF16(a)); |
| string16 result = GetStringF(message_id, replacements, &offsets); |
| DCHECK(offsets.size() == 1); |
| *offset = offsets[0]; |
| return UTF16ToWide(result); |
| } |
| |
| std::wstring GetStringF(int message_id, |
| const std::wstring& a, |
| const std::wstring& b, |
| std::vector<size_t>* offsets) { |
| std::vector<string16> replacements; |
| replacements.push_back(WideToUTF16(a)); |
| replacements.push_back(WideToUTF16(b)); |
| return UTF16ToWide(GetStringF(message_id, replacements, offsets)); |
| } |
| |
| string16 GetStringFUTF16(int message_id, const string16& a, size_t* offset) { |
| DCHECK(offset); |
| std::vector<size_t> offsets; |
| std::vector<string16> replacements; |
| replacements.push_back(a); |
| string16 result = GetStringF(message_id, replacements, &offsets); |
| DCHECK(offsets.size() == 1); |
| *offset = offsets[0]; |
| return result; |
| } |
| |
| string16 GetStringFUTF16(int message_id, |
| const string16& a, |
| const string16& b, |
| std::vector<size_t>* offsets) { |
| std::vector<string16> replacements; |
| replacements.push_back(a); |
| replacements.push_back(b); |
| return GetStringF(message_id, replacements, offsets); |
| } |
| |
| std::wstring GetStringF(int message_id, int a) { |
| return GetStringF(message_id, UTF8ToWide(base::IntToString(a))); |
| } |
| |
| std::wstring GetStringF(int message_id, int64 a) { |
| return GetStringF(message_id, UTF8ToWide(base::Int64ToString(a))); |
| } |
| |
| string16 TruncateString(const string16& string, size_t length) { |
| if (string.size() <= length) |
| // String fits, return it. |
| return string; |
| |
| if (length == 0) { |
| // No room for the elide string, return an empty string. |
| return string16(); |
| } |
| size_t max = length - 1; |
| |
| // Added to the end of strings that are too big. |
| static const char16 kElideString[] = { 0x2026, 0 }; |
| |
| if (max == 0) { |
| // Just enough room for the elide string. |
| return kElideString; |
| } |
| |
| // Use a line iterator to find the first boundary. |
| UErrorCode status = U_ZERO_ERROR; |
| scoped_ptr<icu::RuleBasedBreakIterator> bi( |
| static_cast<icu::RuleBasedBreakIterator*>( |
| icu::RuleBasedBreakIterator::createLineInstance( |
| icu::Locale::getDefault(), status))); |
| if (U_FAILURE(status)) |
| return string.substr(0, max) + kElideString; |
| bi->setText(string.c_str()); |
| int32_t index = bi->preceding(static_cast<int32_t>(max)); |
| if (index == icu::BreakIterator::DONE) { |
| index = static_cast<int32_t>(max); |
| } else { |
| // Found a valid break (may be the beginning of the string). Now use |
| // a character iterator to find the previous non-whitespace character. |
| icu::StringCharacterIterator char_iterator(string.c_str()); |
| if (index == 0) { |
| // No valid line breaks. Start at the end again. This ensures we break |
| // on a valid character boundary. |
| index = static_cast<int32_t>(max); |
| } |
| char_iterator.setIndex(index); |
| while (char_iterator.hasPrevious()) { |
| char_iterator.previous(); |
| if (!(u_isspace(char_iterator.current()) || |
| u_charType(char_iterator.current()) == U_CONTROL_CHAR || |
| u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) { |
| // Not a whitespace character. Advance the iterator so that we |
| // include the current character in the truncated string. |
| char_iterator.next(); |
| break; |
| } |
| } |
| if (char_iterator.hasPrevious()) { |
| // Found a valid break point. |
| index = char_iterator.getIndex(); |
| } else { |
| // String has leading whitespace, return the elide string. |
| return kElideString; |
| } |
| } |
| return string.substr(0, index) + kElideString; |
| } |
| |
| string16 ToLower(const string16& string) { |
| icu::UnicodeString lower_u_str( |
| icu::UnicodeString(string.c_str()).toLower(icu::Locale::getDefault())); |
| string16 result; |
| lower_u_str.extract(0, lower_u_str.length(), |
| WriteInto(&result, lower_u_str.length() + 1)); |
| return result; |
| } |
| |
| string16 ToUpper(const string16& string) { |
| icu::UnicodeString upper_u_str( |
| icu::UnicodeString(string.c_str()).toUpper(icu::Locale::getDefault())); |
| string16 result; |
| upper_u_str.extract(0, upper_u_str.length(), |
| WriteInto(&result, upper_u_str.length() + 1)); |
| return result; |
| } |
| |
| // Compares the character data stored in two different string16 strings by |
| // specified Collator instance. |
| UCollationResult CompareString16WithCollator(const icu::Collator* collator, |
| const string16& lhs, |
| const string16& rhs) { |
| DCHECK(collator); |
| UErrorCode error = U_ZERO_ERROR; |
| UCollationResult result = collator->compare( |
| static_cast<const UChar*>(lhs.c_str()), static_cast<int>(lhs.length()), |
| static_cast<const UChar*>(rhs.c_str()), static_cast<int>(rhs.length()), |
| error); |
| DCHECK(U_SUCCESS(error)); |
| return result; |
| } |
| |
| // Compares the character data stored in two different std:wstring strings by |
| // specified Collator instance. |
| UCollationResult CompareStringWithCollator(const icu::Collator* collator, |
| const std::wstring& lhs, |
| const std::wstring& rhs) { |
| DCHECK(collator); |
| UCollationResult result; |
| #if defined(WCHAR_T_IS_UTF32) |
| // Need to convert to UTF-16 to be compatible with UnicodeString's |
| // constructor. |
| string16 lhs_utf16 = WideToUTF16(lhs); |
| string16 rhs_utf16 = WideToUTF16(rhs); |
| |
| result = CompareString16WithCollator(collator, lhs_utf16, rhs_utf16); |
| #else |
| result = CompareString16WithCollator(collator, lhs, rhs); |
| #endif |
| return result; |
| } |
| |
| // Specialization of operator() method for std::wstring version. |
| template <> |
| bool StringComparator<std::wstring>::operator()(const std::wstring& lhs, |
| const std::wstring& rhs) { |
| // If we can not get collator instance for specified locale, just do simple |
| // string compare. |
| if (!collator_) |
| return lhs < rhs; |
| return CompareStringWithCollator(collator_, lhs, rhs) == UCOL_LESS; |
| }; |
| |
| #if !defined(WCHAR_T_IS_UTF16) |
| // Specialization of operator() method for string16 version. |
| template <> |
| bool StringComparator<string16>::operator()(const string16& lhs, |
| const string16& rhs) { |
| // If we can not get collator instance for specified locale, just do simple |
| // string compare. |
| if (!collator_) |
| return lhs < rhs; |
| return CompareString16WithCollator(collator_, lhs, rhs) == UCOL_LESS; |
| }; |
| #endif // !defined(WCHAR_T_IS_UTF16) |
| |
| void SortStrings(const std::string& locale, |
| std::vector<std::wstring>* strings) { |
| SortVectorWithStringKey(locale, strings, false); |
| } |
| |
| void SortStrings16(const std::string& locale, |
| std::vector<string16>* strings) { |
| SortVectorWithStringKey(locale, strings, false); |
| } |
| |
| const std::vector<std::string>& GetAvailableLocales() { |
| static std::vector<std::string> locales; |
| if (locales.empty()) { |
| int num_locales = uloc_countAvailable(); |
| for (int i = 0; i < num_locales; ++i) { |
| std::string locale_name = uloc_getAvailable(i); |
| // Filter out the names that have aliases. |
| if (IsDuplicateName(locale_name)) |
| continue; |
| // Filter out locales for which we have only partially populated data |
| // and to which Chrome is not localized. |
| if (IsLocalePartiallyPopulated(locale_name)) |
| continue; |
| if (!IsLocaleSupportedByOS(locale_name)) |
| continue; |
| // Normalize underscores to hyphens because that's what our locale files |
| // use. |
| std::replace(locale_name.begin(), locale_name.end(), '_', '-'); |
| |
| // Map the Chinese locale names over to zh-CN and zh-TW. |
| if (LowerCaseEqualsASCII(locale_name, "zh-hans")) { |
| locale_name = "zh-CN"; |
| } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) { |
| locale_name = "zh-TW"; |
| } |
| locales.push_back(locale_name); |
| } |
| |
| // Manually add 'es-419' to the list. See the comment in IsDuplicateName(). |
| locales.push_back("es-419"); |
| } |
| return locales; |
| } |
| |
| void GetAcceptLanguagesForLocale(const std::string& display_locale, |
| std::vector<std::string>* locale_codes) { |
| for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) { |
| if (!IsLocaleNameTranslated(kAcceptLanguageList[i], display_locale)) |
| // TODO(jungshik) : Put them at the of the list with language codes |
| // enclosed by brackets instead of skipping. |
| continue; |
| locale_codes->push_back(kAcceptLanguageList[i]); |
| } |
| } |
| |
| } // namespace l10n_util |