components/spellcheck/common/spellcheck_common.cc - chromium/src - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "components/spellcheck/common/spellcheck_common.h"

 #include "base/check.h"
 #include "base/command_line.h"
 #include "base/containers/contains.h"
 #include "base/files/file_path.h"
 #include "base/metrics/field_trial.h"
 #include "base/strings/string_util.h"
 #include "third_party/icu/source/common/unicode/uloc.h"
 #include "third_party/icu/source/common/unicode/urename.h"
 #include "third_party/icu/source/common/unicode/utypes.h"

 namespace spellcheck {

 struct LanguageRegion {
   const char* language;         // The language.
   const char* language_region;  // language & region, used by dictionaries.
 };

 struct LanguageVersion {
   const char* language;  // The language input.
   const char* version;   // The corresponding version.
 };

 static constexpr LanguageRegion kSupportedSpellCheckerLanguages[] = {
     // Several languages are not to be included in the spellchecker list:
     // th-TH, vi-VI.
     // clang-format off
     {"af", "af-ZA"},
     {"bg", "bg-BG"},
     {"ca", "ca-ES"},
     {"cs", "cs-CZ"},
     {"cy", "cy-GB"},
     {"da", "da-DK"},
     {"de", "de-DE"},
     {"de-DE", "de-DE"},
     {"el", "el-GR"},
     {"en-AU", "en-AU"},
     {"en-CA", "en-CA"},
     {"en-GB", "en-GB"},
     {"en-GB-oxendict", "en-GB-oxendict"},
     {"en-US", "en-US"},
     {"es", "es-ES"},
     {"es-419", "es-ES"},
     {"es-AR", "es-ES"},
     {"es-ES", "es-ES"},
     {"es-MX", "es-ES"},
     {"es-US", "es-ES"},
     {"et", "et-EE"},
     {"fa", "fa-IR"},
     {"fo", "fo-FO"},
     {"fr", "fr-FR"},
     {"fr-FR", "fr-FR"},
     {"he", "he-IL"},
     {"hi", "hi-IN"},
     {"hr", "hr-HR"},
     {"hu", "hu-HU"},
     {"hy", "hy"},
     {"id", "id-ID"},
     {"it", "it-IT"},
     {"it-IT", "it-IT"},
     {"ko", "ko"},
     {"lt", "lt-LT"},
     {"lv", "lv-LV"},
     {"nb", "nb-NO"},
     {"nl", "nl-NL"},
     {"pl", "pl-PL"},
     {"pt", "pt-BR"}, // based on kAliasMap from ui/base/l10n/l10n_util.cc
     {"pt-BR", "pt-BR"},
     {"pt-PT", "pt-PT"},
     {"ro", "ro-RO"},
     {"ru", "ru-RU"},
     {"sh", "sh"},
     {"sk", "sk-SK"},
     {"sl", "sl-SI"},
     {"sq", "sq"},
     {"sr", "sr"},
     {"sv", "sv-SE"},
     {"ta", "ta-IN"},
     {"tg", "tg-TG"},
     {"tr", "tr-TR"},
     {"uk", "uk-UA"},
     {"vi", "vi-VN"},
     // clang-format on
 };

 bool IsValidRegion(const std::string& region) {
   for (const auto& lang_region : kSupportedSpellCheckerLanguages) {
     if (lang_region.language_region == region)
       return true;
   }
   return false;
 }

 // This function returns the language-region version of language name.
 // e.g. returns hi-IN for hi.
 std::string GetSpellCheckLanguageRegion(base::StringPiece input_language) {
   for (const auto& lang_region : kSupportedSpellCheckerLanguages) {
     if (lang_region.language == input_language)
       return lang_region.language_region;
   }

   return input_language.as_string();
 }

 base::FilePath GetVersionedFileName(base::StringPiece input_language,
                                     const base::FilePath& dict_dir) {
   // The default dictionary version is 3-0. This version indicates that the bdic
   // file contains a checksum.
   static const char kDefaultVersionString[] = "-3-0";

   // Add non-default version strings here. Use the same version for all the
   // dictionaries that you add at the same time. Increment the major version
   // number if you're updating either dic or aff files. Increment the minor
   // version number if you're updating only dic_delta files.
   static constexpr LanguageVersion kSpecialVersionString[] = {
       // Jan 9, 2013: Add "FLAG num" to aff to avoid heapcheck crash.
       {"tr-TR", "-4-0"},

       // Mar 4, 2014: Add Tajik dictionary.
       {"tg-TG", "-5-0"},

       // Feb 2019: Initial check-in of Welsh.
       {"cy-GB", "-1-0"},

       // April 2019: Initial check-in of Armenian.
       {"hy", "-1-0"},

       // November 2019: Update Serbian-Latin and Serbian-Cyrillic
       {"sh", "-4-0"},
       {"sr", "-4-0"},

       // January 2020: Update en-* and fa-IR dictionaries from upstream.
       {"en-AU", "-9-0"},
       {"en-CA", "-9-0"},
       {"en-GB", "-9-0"},
       {"en-US", "-9-0"},
       {"fa-IR", "-9-0"},

       // March 2020: Update uk-UA dictionary from upstream.
       {"uk-UA", "-4-0"},

       // June 2020: Add the en-GB-oxendict dictionary.
       {"en-GB-oxendict", "-9-0"},
   };

   // Generate the bdict file name using default version string or special
   // version string, depending on the language.
   std::string language = GetSpellCheckLanguageRegion(input_language);
   std::string version = kDefaultVersionString;
   for (const auto& lang_ver : kSpecialVersionString) {
     if (language == lang_ver.language) {
       version = lang_ver.version;
       break;
     }
   }
   std::string versioned_bdict_file_name(language + version + ".bdic");
   return dict_dir.AppendASCII(versioned_bdict_file_name);
 }

 std::string GetCorrespondingSpellCheckLanguage(base::StringPiece language) {
   std::string best_match;
   // Look for exact match in the Spell Check language list.
   for (const auto& lang_region : kSupportedSpellCheckerLanguages) {
     // First look for exact match in the language region of the list.
     if (lang_region.language == language)
       return language.as_string();

     // Next, look for exact match in the language_region part of the list.
     if (lang_region.language_region == language) {
       if (best_match.empty())
         best_match = lang_region.language;
     }
   }

   // No match found - return best match, if any.
   return best_match;
 }

 std::vector<std::string> SpellCheckLanguages() {
   std::vector<std::string> languages;
   for (const auto& lang_region : kSupportedSpellCheckerLanguages)
     languages.push_back(lang_region.language);
   return languages;
 }

 void GetISOLanguageCountryCodeFromLocale(const std::string& locale,
                                          std::string* language_code,
                                          std::string* country_code) {
   DCHECK(language_code);
   DCHECK(country_code);
   char language[ULOC_LANG_CAPACITY] = ULOC_ENGLISH;
   const char* country = "USA";
   if (!locale.empty()) {
     UErrorCode error = U_ZERO_ERROR;
     char id[ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY];
     uloc_addLikelySubtags(locale.c_str(), id, base::size(id), &error);
     error = U_ZERO_ERROR;
     uloc_getLanguage(id, language, base::size(language), &error);
     country = uloc_getISO3Country(id);
   }
   *language_code = std::string(language);
   *country_code = std::string(country);
 }

 void FillSuggestions(
     const std::vector<std::vector<base::string16>>& suggestions_list,
     std::vector<base::string16>* optional_suggestions) {
   DCHECK(optional_suggestions);
   size_t num_languages = suggestions_list.size();

   // Compute maximum number of suggestions in a single language.
   size_t max_suggestions = 0;
   for (const auto& suggestions : suggestions_list)
     max_suggestions = std::max(max_suggestions, suggestions.size());

   for (size_t count = 0; count < (max_suggestions * num_languages); ++count) {
     size_t language = count % num_languages;
     size_t index = count / num_languages;

     if (suggestions_list[language].size() <= index)
       continue;

     const base::string16& suggestion = suggestions_list[language][index];
     // Only add the suggestion if it's unique.
     if (!base::Contains(*optional_suggestions, suggestion)) {
       optional_suggestions->push_back(suggestion);
     }
     if (optional_suggestions->size() >= kMaxSuggestions) {
       break;
     }
   }
 }

 }  // namespace spellcheck
	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "components/spellcheck/common/spellcheck_common.h"

	#include "base/check.h"
	#include "base/command_line.h"
	#include "base/containers/contains.h"
	#include "base/files/file_path.h"
	#include "base/metrics/field_trial.h"
	#include "base/strings/string_util.h"
	#include "third_party/icu/source/common/unicode/uloc.h"
	#include "third_party/icu/source/common/unicode/urename.h"
	#include "third_party/icu/source/common/unicode/utypes.h"

	namespace spellcheck {

	struct LanguageRegion {
	const char* language; // The language.
	const char* language_region; // language & region, used by dictionaries.
	};

	struct LanguageVersion {
	const char* language; // The language input.
	const char* version; // The corresponding version.
	};

	static constexpr LanguageRegion kSupportedSpellCheckerLanguages[] = {
	// Several languages are not to be included in the spellchecker list:
	// th-TH, vi-VI.
	// clang-format off
	{"af", "af-ZA"},
	{"bg", "bg-BG"},
	{"ca", "ca-ES"},
	{"cs", "cs-CZ"},
	{"cy", "cy-GB"},
	{"da", "da-DK"},
	{"de", "de-DE"},
	{"de-DE", "de-DE"},
	{"el", "el-GR"},
	{"en-AU", "en-AU"},
	{"en-CA", "en-CA"},
	{"en-GB", "en-GB"},
	{"en-GB-oxendict", "en-GB-oxendict"},
	{"en-US", "en-US"},
	{"es", "es-ES"},
	{"es-419", "es-ES"},
	{"es-AR", "es-ES"},
	{"es-ES", "es-ES"},
	{"es-MX", "es-ES"},
	{"es-US", "es-ES"},
	{"et", "et-EE"},
	{"fa", "fa-IR"},
	{"fo", "fo-FO"},
	{"fr", "fr-FR"},
	{"fr-FR", "fr-FR"},
	{"he", "he-IL"},
	{"hi", "hi-IN"},
	{"hr", "hr-HR"},
	{"hu", "hu-HU"},
	{"hy", "hy"},
	{"id", "id-ID"},
	{"it", "it-IT"},
	{"it-IT", "it-IT"},
	{"ko", "ko"},
	{"lt", "lt-LT"},
	{"lv", "lv-LV"},
	{"nb", "nb-NO"},
	{"nl", "nl-NL"},
	{"pl", "pl-PL"},
	{"pt", "pt-BR"}, // based on kAliasMap from ui/base/l10n/l10n_util.cc
	{"pt-BR", "pt-BR"},
	{"pt-PT", "pt-PT"},
	{"ro", "ro-RO"},
	{"ru", "ru-RU"},
	{"sh", "sh"},
	{"sk", "sk-SK"},
	{"sl", "sl-SI"},
	{"sq", "sq"},
	{"sr", "sr"},
	{"sv", "sv-SE"},
	{"ta", "ta-IN"},
	{"tg", "tg-TG"},
	{"tr", "tr-TR"},
	{"uk", "uk-UA"},
	{"vi", "vi-VN"},
	// clang-format on
	};

	bool IsValidRegion(const std::string& region) {
	for (const auto& lang_region : kSupportedSpellCheckerLanguages) {
	if (lang_region.language_region == region)
	return true;
	}
	return false;
	}

	// This function returns the language-region version of language name.
	// e.g. returns hi-IN for hi.
	std::string GetSpellCheckLanguageRegion(base::StringPiece input_language) {
	for (const auto& lang_region : kSupportedSpellCheckerLanguages) {
	if (lang_region.language == input_language)
	return lang_region.language_region;
	}

	return input_language.as_string();
	}

	base::FilePath GetVersionedFileName(base::StringPiece input_language,
	const base::FilePath& dict_dir) {
	// The default dictionary version is 3-0. This version indicates that the bdic
	// file contains a checksum.
	static const char kDefaultVersionString[] = "-3-0";

	// Add non-default version strings here. Use the same version for all the
	// dictionaries that you add at the same time. Increment the major version
	// number if you're updating either dic or aff files. Increment the minor
	// version number if you're updating only dic_delta files.
	static constexpr LanguageVersion kSpecialVersionString[] = {
	// Jan 9, 2013: Add "FLAG num" to aff to avoid heapcheck crash.
	{"tr-TR", "-4-0"},

	// Mar 4, 2014: Add Tajik dictionary.
	{"tg-TG", "-5-0"},

	// Feb 2019: Initial check-in of Welsh.
	{"cy-GB", "-1-0"},

	// April 2019: Initial check-in of Armenian.
	{"hy", "-1-0"},

	// November 2019: Update Serbian-Latin and Serbian-Cyrillic
	{"sh", "-4-0"},
	{"sr", "-4-0"},

	// January 2020: Update en-* and fa-IR dictionaries from upstream.
	{"en-AU", "-9-0"},
	{"en-CA", "-9-0"},
	{"en-GB", "-9-0"},
	{"en-US", "-9-0"},
	{"fa-IR", "-9-0"},

	// March 2020: Update uk-UA dictionary from upstream.
	{"uk-UA", "-4-0"},

	// June 2020: Add the en-GB-oxendict dictionary.
	{"en-GB-oxendict", "-9-0"},
	};

	// Generate the bdict file name using default version string or special
	// version string, depending on the language.
	std::string language = GetSpellCheckLanguageRegion(input_language);
	std::string version = kDefaultVersionString;
	for (const auto& lang_ver : kSpecialVersionString) {
	if (language == lang_ver.language) {
	version = lang_ver.version;
	break;
	}
	}
	std::string versioned_bdict_file_name(language + version + ".bdic");
	return dict_dir.AppendASCII(versioned_bdict_file_name);
	}

	std::string GetCorrespondingSpellCheckLanguage(base::StringPiece language) {
	std::string best_match;
	// Look for exact match in the Spell Check language list.
	for (const auto& lang_region : kSupportedSpellCheckerLanguages) {
	// First look for exact match in the language region of the list.
	if (lang_region.language == language)
	return language.as_string();

	// Next, look for exact match in the language_region part of the list.
	if (lang_region.language_region == language) {
	if (best_match.empty())
	best_match = lang_region.language;
	}
	}

	// No match found - return best match, if any.
	return best_match;
	}

	std::vector<std::string> SpellCheckLanguages() {
	std::vector<std::string> languages;
	for (const auto& lang_region : kSupportedSpellCheckerLanguages)
	languages.push_back(lang_region.language);
	return languages;
	}

	void GetISOLanguageCountryCodeFromLocale(const std::string& locale,
	std::string* language_code,
	std::string* country_code) {
	DCHECK(language_code);
	DCHECK(country_code);
	char language[ULOC_LANG_CAPACITY] = ULOC_ENGLISH;
	const char* country = "USA";
	if (!locale.empty()) {
	UErrorCode error = U_ZERO_ERROR;
	char id[ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY];
	uloc_addLikelySubtags(locale.c_str(), id, base::size(id), &error);
	error = U_ZERO_ERROR;
	uloc_getLanguage(id, language, base::size(language), &error);
	country = uloc_getISO3Country(id);
	}
	*language_code = std::string(language);
	*country_code = std::string(country);
	}

	void FillSuggestions(
	const std::vector<std::vector<base::string16>>& suggestions_list,
	std::vector<base::string16>* optional_suggestions) {
	DCHECK(optional_suggestions);
	size_t num_languages = suggestions_list.size();

	// Compute maximum number of suggestions in a single language.
	size_t max_suggestions = 0;
	for (const auto& suggestions : suggestions_list)
	max_suggestions = std::max(max_suggestions, suggestions.size());

	for (size_t count = 0; count < (max_suggestions * num_languages); ++count) {
	size_t language = count % num_languages;
	size_t index = count / num_languages;

	if (suggestions_list[language].size() <= index)
	continue;

	const base::string16& suggestion = suggestions_list[language][index];
	// Only add the suggestion if it's unique.
	if (!base::Contains(*optional_suggestions, suggestion)) {
	optional_suggestions->push_back(suggestion);
	}
	if (optional_suggestions->size() >= kMaxSuggestions) {
	break;
	}
	}
	}

	} // namespace spellcheck