blob: 9aadd58bf4f01d64de0198142c640a1770d0559d [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/core/browser/autofill_data_util.h"
#include "base/strings/utf_string_conversions.h"
#include "components/autofill/core/browser/autofill_test_utils.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace autofill {
namespace data_util {
struct IsCJKNameTestCase {
const char* full_name;
bool is_cjk;
class IsCJKNameTest : public testing::TestWithParam<IsCJKNameTestCase> {};
TEST_P(IsCJKNameTest, IsCJKName) {
auto test_case = GetParam();
EXPECT_EQ(test_case.is_cjk, IsCJKName(base::UTF8ToUTF16(test_case.full_name)))
<< "Failed for: " << test_case.full_name;
// Non-CJK language with only ASCII characters.
IsCJKNameTestCase{"Homer Jay Simpson", false},
// Non-CJK language with some ASCII characters.
IsCJKNameTestCase{"Éloïse Paré", false},
// Non-CJK language with no ASCII characters.
IsCJKNameTestCase{"Σωκράτης", false},
// (Simplified) Chinese name, Unihan.
IsCJKNameTestCase{"刘翔", true},
// (Simplified) Chinese name, Unihan, with an ASCII space.
IsCJKNameTestCase{"成 龙", true},
// Korean name, Hangul.
IsCJKNameTestCase{"송지효", true},
// Korean name, Hangul, with an 'IDEOGRAPHIC SPACE' (U+3000).
IsCJKNameTestCase{"김 종국", true},
// Japanese name, Unihan.
IsCJKNameTestCase{"山田貴洋", true},
// Japanese name, Katakana, with a 'KATAKANA MIDDLE DOT' (U+30FB).
IsCJKNameTestCase{"ビル・ゲイツ", true},
// Japanese name, Katakana, with a 'MIDDLE DOT' (U+00B7) (likely a
// typo).
IsCJKNameTestCase{"ビル·ゲイツ", true},
// CJK names don't have a middle name, so a 3-part name is bogus to us.
IsCJKNameTestCase{"반 기 문", false}));
struct FullNameTestCase {
std::string full_name;
std::string given_name;
std::string middle_name;
std::string family_name;
class SplitNameTest : public testing::TestWithParam<FullNameTestCase> {};
TEST_P(SplitNameTest, SplitName) {
auto test_case = GetParam();
NameParts name_parts = SplitName(base::UTF8ToUTF16(test_case.full_name));
EXPECT_EQ(base::UTF8ToUTF16(test_case.given_name), name_parts.given);
EXPECT_EQ(base::UTF8ToUTF16(test_case.middle_name), name_parts.middle);
// Full name including given, middle and family names.
FullNameTestCase{"Homer Jay Simpson", "Homer", "Jay", "Simpson"},
// No middle name.
FullNameTestCase{"Moe Szyslak", "Moe", "", "Szyslak"},
// Common name prefixes removed.
FullNameTestCase{"Reverend Timothy Lovejoy", "Timothy", "", "Lovejoy"},
// Common name suffixes removed.
FullNameTestCase{"John Frink Phd", "John", "", "Frink"},
// Exception to the name suffix removal.
FullNameTestCase{"John Ma", "John", "", "Ma"},
// Common family name prefixes not considered a middle name.
FullNameTestCase{"Milhouse Van Houten", "Milhouse", "", "Van Houten"},
// CJK names have reverse order (surname goes first, given name goes
// second).
FullNameTestCase{"孫 德明", "德明", "", "孫"}, // Chinese name, Unihan
FullNameTestCase{"孫 德明", "德明", "",
"孫"}, // Chinese name, Unihan, 'IDEOGRAPHIC SPACE'
FullNameTestCase{"홍 길동", "길동", "", "홍"}, // Korean name, Hangul
FullNameTestCase{"山田 貴洋", "貴洋", "",
"山田"}, // Japanese name, Unihan
// In Japanese, foreign names use 'KATAKANA MIDDLE DOT' (U+30FB) as a
// separator. There is no consensus for the ordering. For now, we use
// the same ordering as regular Japanese names ("last・first").
FullNameTestCase{"ゲイツ・ビル", "ビル", "",
"ゲイツ"}, // Foreign name in Japanese, Katakana
// 'KATAKANA MIDDLE DOT' is occasionally typoed as 'MIDDLE DOT'
// (U+00B7).
FullNameTestCase{"ゲイツ·ビル", "ビル", "",
"ゲイツ"}, // Foreign name in Japanese, Katakana
// CJK names don't usually have a space in the middle, but most of the
// time, the surname is only one character (in Chinese & Korean).
FullNameTestCase{"최성훈", "성훈", "", "최"}, // Korean name, Hangul
FullNameTestCase{"刘翔", "翔", "",
"刘"}, // (Simplified) Chinese name, Unihan
FullNameTestCase{"劉翔", "翔", "",
"劉"}, // (Traditional) Chinese name, Unihan
// There are a few exceptions. Occasionally, the surname has two
// characters.
FullNameTestCase{"남궁도", "도", "", "남궁"}, // Korean name, Hangul
FullNameTestCase{"황보혜정", "혜정", "",
"황보"}, // Korean name, Hangul
FullNameTestCase{"歐陽靖", "靖", "",
"歐陽"}, // (Traditional) Chinese name, Unihan
// In Korean, some 2-character surnames are rare/ambiguous, like "강전":
// "강" is a common surname, and "전" can be part of a given name. In
// those cases, we assume it's 1/2 for 3-character names, or 2/2 for
// 4-character names.
FullNameTestCase{"강전희", "전희", "", "강"}, // Korean name, Hangul
FullNameTestCase{"황목치승", "치승", "",
"황목"}, // Korean name, Hangul
// It occasionally happens that a full name is 2 characters, 1/1.
FullNameTestCase{"이도", "도", "", "이"}, // Korean name, Hangul
FullNameTestCase{"孫文", "文", "", "孫"} // Chinese name, Unihan
class JoinNamePartsTest : public testing::TestWithParam<FullNameTestCase> {};
TEST_P(JoinNamePartsTest, JoinNameParts) {
auto test_case = GetParam();
base::string16 joined =
EXPECT_EQ(base::UTF8ToUTF16(test_case.full_name), joined);
// Full name including given, middle and family names.
FullNameTestCase{"Homer Jay Simpson", "Homer", "Jay", "Simpson"},
// No middle name.
FullNameTestCase{"Moe Szyslak", "Moe", "", "Szyslak"},
// CJK names have reversed order, no space.
FullNameTestCase{"孫德明", "德明", "", "孫"}, // Chinese name, Unihan
FullNameTestCase{"홍길동", "길동", "", "홍"}, // Korean name, Hangul
FullNameTestCase{"山田貴洋", "貴洋", "",
"山田"}, // Japanese name, Unihan
// These are no CJK names for us, they're just bogus.
FullNameTestCase{"Homer シンプソン", "Homer", "", "シンプソン"},
FullNameTestCase{"ホーマー Simpson", "ホーマー", "", "Simpson"},
FullNameTestCase{"반 기 문", "반", "기", "문"}
// Has a middle-name, too unusual
TEST(AutofillDataUtilTest, ProfileMatchesFullName) {
autofill::AutofillProfile profile;
&profile, "First", "Middle", "Last", "", "Acme inc",
"123 Main", "Apt 2", "Laredo", "TX", "77300", "US", "832-555-1000");
EXPECT_TRUE(ProfileMatchesFullName(base::UTF8ToUTF16("First Last"), profile));
ProfileMatchesFullName(base::UTF8ToUTF16("First Middle Last"), profile));
ProfileMatchesFullName(base::UTF8ToUTF16("First M Last"), profile));
ProfileMatchesFullName(base::UTF8ToUTF16("First M. Last"), profile));
ProfileMatchesFullName(base::UTF8ToUTF16("Last First"), profile));
ProfileMatchesFullName(base::UTF8ToUTF16("LastFirst"), profile));
ProfileMatchesFullName(base::UTF8ToUTF16("Kirby Puckett"), profile));
} // namespace data_util
} // namespace autofill