cpp/src/language.cc - external/libaddressinput - Git at Google

 // Copyright (C) 2014 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "language.h"

 #include <algorithm>
 #include <cctype>
 #include <string>
 #include <vector>

 #include "rule.h"
 #include "util/string_split.h"

 namespace i18n {
 namespace addressinput {

 Language::Language(const std::string& language_tag) : tag(language_tag),
                                                       base(),
                                                       has_latin_script(false) {
   // Character '-' is the separator for subtags in the BCP 47. However, some
   // legacy code generates tags with '_' instead of '-'.
   static const char kSubtagsSeparator = '-';
   static const char kAlternativeSubtagsSeparator = '_';
   std::replace(
       tag.begin(), tag.end(), kAlternativeSubtagsSeparator, kSubtagsSeparator);

   // OK to use 'tolower' because BCP 47 tags are always in ASCII.
   std::string lowercase = tag;
   std::transform(
       lowercase.begin(), lowercase.end(), lowercase.begin(), tolower);

   base = lowercase.substr(0, lowercase.find(kSubtagsSeparator));

   // The lowercase BCP 47 subtag for Latin script.
   static const char kLowercaseLatinScript[] = "latn";
   std::vector<std::string> subtags;
   SplitString(lowercase, kSubtagsSeparator, &subtags);

   // Support only the second and third position for the script.
   has_latin_script =
       (subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) ||
       (subtags.size() > 2 && subtags[2] == kLowercaseLatinScript);
 }

 Language::~Language() = default;

 Language ChooseBestAddressLanguage(const Rule& address_region_rule,
                                    const Language& ui_language) {
   if (address_region_rule.GetLanguages().empty()) {
     return ui_language;
   }

   std::vector<Language> available_languages;
   for (const auto& language_tag : address_region_rule.GetLanguages()) {
     available_languages.emplace_back(language_tag);
   }

   if (ui_language.tag.empty()) {
     return available_languages.front();
   }

   bool has_latin_format = !address_region_rule.GetLatinFormat().empty();

   // The conventionally formatted BCP 47 Latin script with a preceding subtag
   // separator.
   static const char kLatinScriptSuffix[] = "-Latn";
   Language latin_script_language(
       available_languages.front().base + kLatinScriptSuffix);
   if (has_latin_format && ui_language.has_latin_script) {
     return latin_script_language;
   }

   for (const auto& language : available_languages) {
     // Base language comparison works because no region supports the same base
     // language with different scripts, for now. For example, no region supports
     // "zh-Hant" and "zh-Hans" at the same time.
     if (ui_language.base == language.base) {
       return language;
     }
   }

   return has_latin_format ? latin_script_language : available_languages.front();
 }

 }  // namespace addressinput
 }  // namespace i18n
	// Copyright (C) 2014 Google Inc.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include "language.h"

	#include <algorithm>
	#include <cctype>
	#include <string>
	#include <vector>

	#include "rule.h"
	#include "util/string_split.h"

	namespace i18n {
	namespace addressinput {

	Language::Language(const std::string& language_tag) : tag(language_tag),
	base(),
	has_latin_script(false) {
	// Character '-' is the separator for subtags in the BCP 47. However, some
	// legacy code generates tags with '_' instead of '-'.
	static const char kSubtagsSeparator = '-';
	static const char kAlternativeSubtagsSeparator = '_';
	std::replace(
	tag.begin(), tag.end(), kAlternativeSubtagsSeparator, kSubtagsSeparator);

	// OK to use 'tolower' because BCP 47 tags are always in ASCII.
	std::string lowercase = tag;
	std::transform(
	lowercase.begin(), lowercase.end(), lowercase.begin(), tolower);

	base = lowercase.substr(0, lowercase.find(kSubtagsSeparator));

	// The lowercase BCP 47 subtag for Latin script.
	static const char kLowercaseLatinScript[] = "latn";
	std::vector<std::string> subtags;
	SplitString(lowercase, kSubtagsSeparator, &subtags);

	// Support only the second and third position for the script.
	has_latin_script =
	(subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) \|\|
	(subtags.size() > 2 && subtags[2] == kLowercaseLatinScript);
	}

	Language::~Language() = default;

	Language ChooseBestAddressLanguage(const Rule& address_region_rule,
	const Language& ui_language) {
	if (address_region_rule.GetLanguages().empty()) {
	return ui_language;
	}

	std::vector<Language> available_languages;
	for (const auto& language_tag : address_region_rule.GetLanguages()) {
	available_languages.emplace_back(language_tag);
	}

	if (ui_language.tag.empty()) {
	return available_languages.front();
	}

	bool has_latin_format = !address_region_rule.GetLatinFormat().empty();

	// The conventionally formatted BCP 47 Latin script with a preceding subtag
	// separator.
	static const char kLatinScriptSuffix[] = "-Latn";
	Language latin_script_language(
	available_languages.front().base + kLatinScriptSuffix);
	if (has_latin_format && ui_language.has_latin_script) {
	return latin_script_language;
	}

	for (const auto& language : available_languages) {
	// Base language comparison works because no region supports the same base
	// language with different scripts, for now. For example, no region supports
	// "zh-Hant" and "zh-Hans" at the same time.
	if (ui_language.base == language.base) {
	return language;
	}
	}

	return has_latin_format ? latin_script_language : available_languages.front();
	}

	} // namespace addressinput
	} // namespace i18n