| // Copyright 2022 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chromeos/ash/components/string_matching/diacritic_utils.h" |
| |
| #include <string> |
| #include <vector> |
| |
| #include "base/containers/fixed_flat_map.h" |
| |
| namespace ash::string_matching { |
| |
| const std::u16string RemoveDiacritics(const std::u16string& str) { |
| // For the initial implementation of diacritic-insensitive search: |
| // 1) Intentionally only covering Latin-script accented characters. |
| // 2) Only deal with 1-to-1 char mapping i.e., "æ > ae; œ > oe; Æ > AE; Œ > |
| // OE" are ignored in this implementation. The implemented ones are listed |
| // as below: |
| |
| // "[ á à â ä ā å ] > a; " |
| // "[ Á À Â Ä Ā Å ] > A; " |
| // "[ é è ê ë ē ] > e; " |
| // "[ É È Ê Ë Ē ] > E; " |
| // "[ í ì î ï ī ] > i; " |
| // "[ Í Ì Î Ï Ī ] > I; " |
| // "[ ó ò ô ö ō ø ] > o; " |
| // "[ Ó Ò Ô Ö Ō Ø ] > O; " |
| // "[ ú ù û ü ū ] > u; " |
| // "[ Ú Ù Û Ü Ū ] > U; " |
| // "[ ý ỳ ŷ ÿ ȳ ] > y; " |
| // "[ Ý Ỳ Ŷ Ÿ Ȳ ] > Y; " |
| // "ç > c; ñ > n; " |
| // "Ç > C; Ñ > N;" |
| |
| // clang-format off |
| static constexpr auto kConversionMap = |
| base::MakeFixedFlatMap<char16_t, char16_t>({ |
| {u'á', u'a'}, {u'à', u'a'}, {u'â', u'a'}, {u'ä', u'a'}, {u'ā', u'a'}, {u'å', u'a'}, |
| {u'Á', u'A'}, {u'À', u'A'}, {u'Â', u'A'}, {u'Ä', u'A'}, {u'Ā', u'A'}, {u'Å', u'A'}, |
| {u'é', u'e'}, {u'è', u'e'}, {u'ê', u'e'}, {u'ë', u'e'}, {u'ē', u'e'}, |
| {u'É', u'E'}, {u'È', u'E'}, {u'Ê', u'E'}, {u'Ë', u'E'}, {u'Ē', u'E'}, |
| {u'í', u'i'}, {u'ì', u'i'}, {u'î', u'i'}, {u'ï', u'i'}, {u'ī', u'i'}, |
| {u'Í', u'I'}, {u'Ì', u'I'}, {u'Î', u'I'}, {u'Ï', u'I'}, {u'Ī', u'I'}, |
| {u'ó', u'o'}, {u'ò', u'o'}, {u'ô', u'o'}, {u'ö', u'o'}, {u'ō', u'o'}, {u'ø', u'o'}, |
| {u'Ó', u'O'}, {u'Ò', u'O'}, {u'Ô', u'O'}, {u'Ö', u'O'}, {u'Ō', u'O'}, {u'Ø', u'O'}, |
| {u'ú', u'u'}, {u'ù', u'u'}, {u'û', u'u'}, {u'ü', u'u'}, {u'ū', u'u'}, |
| {u'Ú', u'U'}, {u'Ù', u'U'}, {u'Û', u'U'}, {u'Ü', u'U'}, {u'Ū', u'U'}, |
| {u'ý', u'y'}, {u'ỳ', u'y'}, {u'ŷ', u'y'}, {u'ÿ', u'y'}, {u'ȳ', u'y'}, |
| {u'Ý', u'Y'}, {u'Ỳ', u'Y'}, {u'Ŷ', u'Y'}, {u'Ÿ', u'Y'}, {u'Ȳ', u'Y'}, |
| {u'ç', u'c'}, {u'Ç', u'C'}, {u'ñ', u'n'}, {u'Ñ', u'N'}, |
| }); |
| // clang-format on |
| |
| std::u16string result; |
| for (auto letter : str) { |
| auto it = kConversionMap.find(letter); |
| result.push_back(it == kConversionMap.end() ? letter : it->second); |
| } |
| return result; |
| } |
| |
| } // namespace ash::string_matching |