blob: 1deb7d626bcb6941739c9a6f8286f83ce37d5228 [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "ui/base/ime/utf_offset.h"
#include "base/logging.h"
#include "base/optional.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace ui {
namespace {
TEST(UtfOffsetTest, Utf16OffsetFromUtf8Offset) {
constexpr struct {
const char* str;
size_t offset;
base::Optional<size_t> expect;
} kTestCases[] = {
// 1 byte letters.
{u8"ab", 0, 0},
{u8"ab", 1, 1},
{u8"ab", 2, 2},
{u8"ab", 3, base::nullopt},
// 2 byte letters. \u03A9=\xCE\xA9 is greek OMEGA.
{u8"\u03A9\u03A9", 0, 0},
{u8"\u03A9\u03A9", 1, base::nullopt},
{u8"\u03A9\u03A9", 2, 1},
{u8"\u03A9\u03A9", 3, base::nullopt},
{u8"\u03A9\u03A9", 4, 2},
{u8"\u03A9\u03A9", 5, base::nullopt},
// 3 byte letters. \u3042=\xE3\x81\x82 is Japanese "A".
{u8"\u3042\u3042", 0, 0},
{u8"\u3042\u3042", 1, base::nullopt},
{u8"\u3042\u3042", 2, base::nullopt},
{u8"\u3042\u3042", 3, 1},
{u8"\u3042\u3042", 4, base::nullopt},
{u8"\u3042\u3042", 5, base::nullopt},
{u8"\u3042\u3042", 6, 2},
{u8"\u3042\u3042", 7, base::nullopt},
// 4 byte letters. \U0001F3B7=\xF0\x9F\x8E\xB7 is "SAXOPHONE" emoji.
// Note that a surrogate pair advances by 2 in UTF16.
{u8"\U0001F3B7\U0001F3B7", 0, 0},
{u8"\U0001F3B7\U0001F3B7", 1, base::nullopt},
{u8"\U0001F3B7\U0001F3B7", 2, base::nullopt},
{u8"\U0001F3B7\U0001F3B7", 3, base::nullopt},
{u8"\U0001F3B7\U0001F3B7", 4, 2},
{u8"\U0001F3B7\U0001F3B7", 5, base::nullopt},
{u8"\U0001F3B7\U0001F3B7", 6, base::nullopt},
{u8"\U0001F3B7\U0001F3B7", 7, base::nullopt},
{u8"\U0001F3B7\U0001F3B7", 8, 4},
{u8"\U0001F3B7\U0001F3B7", 9, base::nullopt},
// Mix case.
{u8"a\u03A9b\u3042c\U0001F3B7d", 0, 0},
{u8"a\u03A9b\u3042c\U0001F3B7d", 1, 1},
{u8"a\u03A9b\u3042c\U0001F3B7d", 2, base::nullopt},
{u8"a\u03A9b\u3042c\U0001F3B7d", 3, 2},
{u8"a\u03A9b\u3042c\U0001F3B7d", 4, 3},
{u8"a\u03A9b\u3042c\U0001F3B7d", 5, base::nullopt},
{u8"a\u03A9b\u3042c\U0001F3B7d", 6, base::nullopt},
{u8"a\u03A9b\u3042c\U0001F3B7d", 7, 4},
{u8"a\u03A9b\u3042c\U0001F3B7d", 8, 5},
{u8"a\u03A9b\u3042c\U0001F3B7d", 9, base::nullopt},
{u8"a\u03A9b\u3042c\U0001F3B7d", 10, base::nullopt},
{u8"a\u03A9b\u3042c\U0001F3B7d", 11, base::nullopt},
{u8"a\u03A9b\u3042c\U0001F3B7d", 12, 7},
{u8"a\u03A9b\u3042c\U0001F3B7d", 13, 8},
{u8"a\u03A9b\u3042c\U0001F3B7d", 14, base::nullopt},
};
for (const auto& test_case : kTestCases) {
EXPECT_EQ(test_case.expect,
Utf16OffsetFromUtf8Offset(test_case.str, test_case.offset))
<< " at " << test_case.str << "[" << test_case.offset << "]";
}
}
TEST(UtfOffsetTest, Utf8OffsetFromUtf16Offset) {
constexpr struct {
const char16_t* str;
size_t offset;
base::Optional<size_t> expect;
} kTestCases[] = {
// 1 byte letters.
{u"ab", 0, 0},
{u"ab", 1, 1},
{u"ab", 2, 2},
{u"ab", 3, base::nullopt},
// 2 byte letters.
{u"\u03A9\u03A9", 0, 0},
{u"\u03A9\u03A9", 1, 2},
{u"\u03A9\u03A9", 2, 4},
{u"\u03A9\u03A9", 3, base::nullopt},
// 3 byte letters.
{u"\u3042\u3042", 0, 0},
{u"\u3042\u3042", 1, 3},
{u"\u3042\u3042", 2, 6},
{u"\u3042\u3042", 3, base::nullopt},
// 4 byte letters = surrogate pairs.
{u"\U0001F3B7\U0001F3B7", 0, 0},
{u"\U0001F3B7\U0001F3B7", 1, base::nullopt},
{u"\U0001F3B7\U0001F3B7", 2, 4},
{u"\U0001F3B7\U0001F3B7", 3, base::nullopt},
{u"\U0001F3B7\U0001F3B7", 4, 8},
{u"\U0001F3B7\U0001F3B7", 5, base::nullopt},
{u"\U0001F3B7\U0001F3B7", 6, base::nullopt},
// Mix case.
{u"a\u03A9b\u3042c\U0001F3B7d", 0, 0},
{u"a\u03A9b\u3042c\U0001F3B7d", 1, 1},
{u"a\u03A9b\u3042c\U0001F3B7d", 2, 3},
{u"a\u03A9b\u3042c\U0001F3B7d", 3, 4},
{u"a\u03A9b\u3042c\U0001F3B7d", 4, 7},
{u"a\u03A9b\u3042c\U0001F3B7d", 5, 8},
{u"a\u03A9b\u3042c\U0001F3B7d", 6, base::nullopt},
{u"a\u03A9b\u3042c\U0001F3B7d", 7, 12},
{u"a\u03A9b\u3042c\U0001F3B7d", 8, 13},
{u"a\u03A9b\u3042c\U0001F3B7d", 9, base::nullopt},
};
for (const auto& test_case : kTestCases) {
// TODO(crbug.com/911896): Get rid of reinterpret_cast on switching
// to char16_t.
base::string16 text(reinterpret_cast<const base::char16*>(test_case.str));
EXPECT_EQ(test_case.expect,
Utf8OffsetFromUtf16Offset(text, test_case.offset))
<< " at " << text << "[" << test_case.offset << "]";
}
}
} // namespace
} // namespace ui