blob: aa7c81e13b79494208486ba848d2998f6a4d7dfe [file] [log] [blame]
// Copyright 2010-2011, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <string>
#include "composer/composer.h"
#include "composer/table.h"
#include "converter/segments.h"
#include "rewriter/transliteration_rewriter.h"
#include "session/commands.pb.h"
#include "testing/base/public/gunit.h"
#include "transliteration/transliteration.h"
namespace mozc {
namespace {
void InsertASCIISequence(const string &text, composer::Composer *composer) {
for (size_t i = 0; i < text.size(); ++i) {
commands::KeyEvent key;
key.set_key_code(text[i]);
composer->InsertCharacterKeyEvent(key);
}
}
void SetAkann(composer::Composer *composer) {
InsertASCIISequence("akann", composer);
string query;
composer->GetQueryForConversion(&query);
// "あかん"
EXPECT_EQ("\xe3\x81\x82\xe3\x81\x8b\xe3\x82\x93", query);
}
void SetKamaboko(composer::Composer *composer) {
InsertASCIISequence("kamabokonoinbou", composer);
string query;
composer->GetQueryForConversion(&query);
// "かまぼこのいんぼう"
EXPECT_EQ("\xe3\x81\x8b\xe3\x81\xbe\xe3\x81\xbc\xe3\x81\x93\xe3\x81\xae"
"\xe3\x81\x84\xe3\x82\x93\xe3\x81\xbc\xe3\x81\x86",
query);
}
} // namespace
TEST(TransliterationRewriterTest, T13NFromKeyTest) {
TransliterationRewriter t13n_rewriter;
Segments segments;
Segment *segment = segments.add_segment();
CHECK(segment);
// "あかん"
segment->set_key("\xe3\x81\x82\xe3\x81\x8b\xe3\x82\x93");
EXPECT_EQ(0, segment->meta_candidates_size());
EXPECT_TRUE(t13n_rewriter.Rewrite(&segments));
{
// "あかん"
EXPECT_EQ("\xe3\x81\x82\xe3\x81\x8b\xe3\x82\x93",
segment->meta_candidate(transliteration::HIRAGANA).value);
// "アカン"
EXPECT_EQ("\xe3\x82\xa2\xe3\x82\xab\xe3\x83\xb3",
segment->meta_candidate(transliteration::FULL_KATAKANA).value);
EXPECT_EQ("akan",
segment->meta_candidate(transliteration::HALF_ASCII).value);
EXPECT_EQ("AKAN",
segment->meta_candidate(transliteration::HALF_ASCII_UPPER).value);
EXPECT_EQ("akan",
segment->meta_candidate(transliteration::HALF_ASCII_LOWER).value);
EXPECT_EQ(
"Akan",
segment->meta_candidate(transliteration::HALF_ASCII_CAPITALIZED).value);
// "akan"
EXPECT_EQ("\xef\xbd\x81\xef\xbd\x8b\xef\xbd\x81\xef\xbd\x8e",
segment->meta_candidate(transliteration::FULL_ASCII).value);
// "AKAN"
EXPECT_EQ("\xef\xbc\xa1\xef\xbc\xab\xef\xbc\xa1\xef\xbc\xae",
segment->meta_candidate(transliteration::FULL_ASCII_UPPER).value);
// "akan"
EXPECT_EQ("\xef\xbd\x81\xef\xbd\x8b\xef\xbd\x81\xef\xbd\x8e",
segment->meta_candidate(transliteration::FULL_ASCII_LOWER).value);
// "Akan"
EXPECT_EQ(
"\xef\xbc\xa1\xef\xbd\x8b\xef\xbd\x81\xef\xbd\x8e",
segment->meta_candidate(transliteration::FULL_ASCII_CAPITALIZED).value);
// "アカン"
EXPECT_EQ("\xef\xbd\xb1\xef\xbd\xb6\xef\xbe\x9d",
segment->meta_candidate(transliteration::HALF_KATAKANA).value);
}
}
TEST(TransliterationRewriterTest, T13NFromComposerTest) {
TransliterationRewriter t13n_rewriter;
composer::Table table;
table.Initialize();
composer::Composer composer;
composer.SetTable(&table);
SetAkann(&composer);
Segments segments;
Segment *segment = segments.add_segment();
CHECK(segment);
segments.set_composer(&composer);
// "あかん"
segment->set_key("\xe3\x81\x82\xe3\x81\x8b\xe3\x82\x93");
EXPECT_TRUE(t13n_rewriter.Rewrite(&segments));
{
EXPECT_EQ(1, segments.conversion_segments_size());
const Segment &seg = segments.conversion_segment(0);
// "あかん"
EXPECT_EQ("\xe3\x81\x82\xe3\x81\x8b\xe3\x82\x93",
seg.meta_candidate(transliteration::HIRAGANA).value);
// "アカン"
EXPECT_EQ("\xe3\x82\xa2\xe3\x82\xab\xe3\x83\xb3",
seg.meta_candidate(transliteration::FULL_KATAKANA).value);
EXPECT_EQ("akann",
seg.meta_candidate(transliteration::HALF_ASCII).value);
EXPECT_EQ("AKANN",
seg.meta_candidate(transliteration::HALF_ASCII_UPPER).value);
EXPECT_EQ("akann",
seg.meta_candidate(transliteration::HALF_ASCII_LOWER).value);
EXPECT_EQ(
"Akann",
seg.meta_candidate(transliteration::HALF_ASCII_CAPITALIZED).value);
// "akann"
EXPECT_EQ("\xef\xbd\x81\xef\xbd\x8b\xef\xbd\x81\xef\xbd\x8e\xef\xbd\x8e",
seg.meta_candidate(transliteration::FULL_ASCII).value);
// "AKANN"
EXPECT_EQ("\xef\xbc\xa1\xef\xbc\xab\xef\xbc\xa1\xef\xbc\xae\xef\xbc\xae",
seg.meta_candidate(transliteration::FULL_ASCII_UPPER).value);
// "akann"
EXPECT_EQ("\xef\xbd\x81\xef\xbd\x8b\xef\xbd\x81\xef\xbd\x8e\xef\xbd\x8e",
seg.meta_candidate(transliteration::FULL_ASCII_LOWER).value);
// "Akann"
EXPECT_EQ(
"\xef\xbc\xa1\xef\xbd\x8b\xef\xbd\x81\xef\xbd\x8e\xef\xbd\x8e",
seg.meta_candidate(transliteration::FULL_ASCII_CAPITALIZED).value);
// "アカン"
EXPECT_EQ("\xef\xbd\xb1\xef\xbd\xb6\xef\xbe\x9d",
seg.meta_candidate(transliteration::HALF_KATAKANA).value);
}
}
TEST(TransliterationRewriterTest, T13NWithMultiSegmentsTest) {
TransliterationRewriter t13n_rewriter;
composer::Table table;
table.Initialize();
composer::Composer composer;
composer.SetTable(&table);
SetAkann(&composer);
Segments segments;
segments.set_composer(&composer);
{
Segment *segment = segments.add_segment();
CHECK(segment);
// "かまぼこの"
segment->set_key(
"\xe3\x81\x8b\xe3\x81\xbe\xe3\x81\xbc\xe3\x81\x93\xe3\x81\xae");
segment = segments.add_segment();
CHECK(segment);
// "いんぼう"
segment->set_key("\xe3\x81\x84\xe3\x82\x93\xe3\x81\xbc\xe3\x81\x86");
}
EXPECT_TRUE(t13n_rewriter.Rewrite(&segments));
EXPECT_EQ(2, segments.conversion_segments_size());
{
const Segment &seg = segments.conversion_segment(0);
// "かまぼこの"
EXPECT_EQ("\xe3\x81\x8b\xe3\x81\xbe\xe3\x81\xbc\xe3\x81\x93\xe3\x81\xae",
seg.meta_candidate(transliteration::HIRAGANA).value);
EXPECT_EQ("kamabokono",
seg.meta_candidate(transliteration::HALF_ASCII).value);
}
{
const Segment &seg = segments.conversion_segment(1);
// "いんぼう"
EXPECT_EQ("\xe3\x81\x84\xe3\x82\x93\xe3\x81\xbc\xe3\x81\x86",
seg.meta_candidate(transliteration::HIRAGANA).value);
EXPECT_EQ("inbou",
seg.meta_candidate(transliteration::HALF_ASCII).value);
}
}
TEST(TransliterationRewriterTest, ComposerValidationTest) {
TransliterationRewriter t13n_rewriter;
composer::Table table;
table.Initialize();
composer::Composer composer;
composer.SetTable(&table);
SetAkann(&composer);
Segments segments;
Segment *segment = segments.add_segment();
CHECK(segment);
segments.set_composer(&composer);
// "かん"
segment->set_key("\xe3\x81\x8b\xe3\x82\x93");
EXPECT_TRUE(t13n_rewriter.Rewrite(&segments));
// Should not use composer
{
EXPECT_EQ(1, segments.conversion_segments_size());
const Segment &seg = segments.conversion_segment(0);
// "かん"
EXPECT_EQ("\xe3\x81\x8b\xe3\x82\x93",
seg.meta_candidate(transliteration::HIRAGANA).value);
// "カン"
EXPECT_EQ("\xe3\x82\xab\xe3\x83\xb3",
seg.meta_candidate(transliteration::FULL_KATAKANA).value);
EXPECT_EQ("kan",
seg.meta_candidate(transliteration::HALF_ASCII).value);
EXPECT_EQ("KAN",
seg.meta_candidate(transliteration::HALF_ASCII_UPPER).value);
EXPECT_EQ("kan",
seg.meta_candidate(transliteration::HALF_ASCII_LOWER).value);
EXPECT_EQ(
"Kan",
seg.meta_candidate(transliteration::HALF_ASCII_CAPITALIZED).value);
// "kan"
EXPECT_EQ("\xef\xbd\x8b\xef\xbd\x81\xef\xbd\x8e",
seg.meta_candidate(transliteration::FULL_ASCII).value);
// "KAN"
EXPECT_EQ("\xef\xbc\xab\xef\xbc\xa1\xef\xbc\xae",
seg.meta_candidate(transliteration::FULL_ASCII_UPPER).value);
// "kan"
EXPECT_EQ("\xef\xbd\x8b\xef\xbd\x81\xef\xbd\x8e",
seg.meta_candidate(transliteration::FULL_ASCII_LOWER).value);
// "Kan"
EXPECT_EQ(
"\xef\xbc\xab\xef\xbd\x81\xef\xbd\x8e",
seg.meta_candidate(transliteration::FULL_ASCII_CAPITALIZED).value);
// "カン"
EXPECT_EQ("\xef\xbd\xb6\xef\xbe\x9d",
seg.meta_candidate(transliteration::HALF_KATAKANA).value);
}
}
TEST(TransliterationRewriterTest, RewriteWithSameComposerTest) {
TransliterationRewriter t13n_rewriter;
composer::Table table;
table.Initialize();
composer::Composer composer;
composer.SetTable(&table);
SetAkann(&composer);
Segments segments;
Segment *segment = segments.add_segment();
CHECK(segment);
segments.set_composer(&composer);
// "あかん"
segment->set_key("\xe3\x81\x82\xe3\x81\x8b\xe3\x82\x93");
EXPECT_TRUE(t13n_rewriter.Rewrite(&segments));
{
EXPECT_EQ(1, segments.conversion_segments_size());
const Segment &seg = segments.conversion_segment(0);
// "あかん"
EXPECT_EQ("\xe3\x81\x82\xe3\x81\x8b\xe3\x82\x93",
seg.meta_candidate(transliteration::HIRAGANA).value);
// "アカン"
EXPECT_EQ("\xe3\x82\xa2\xe3\x82\xab\xe3\x83\xb3",
seg.meta_candidate(transliteration::FULL_KATAKANA).value);
EXPECT_EQ("akann",
seg.meta_candidate(transliteration::HALF_ASCII).value);
EXPECT_EQ("AKANN",
seg.meta_candidate(transliteration::HALF_ASCII_UPPER).value);
EXPECT_EQ("akann",
seg.meta_candidate(transliteration::HALF_ASCII_LOWER).value);
EXPECT_EQ(
"Akann",
seg.meta_candidate(transliteration::HALF_ASCII_CAPITALIZED).value);
// "akann"
EXPECT_EQ("\xef\xbd\x81\xef\xbd\x8b\xef\xbd\x81\xef\xbd\x8e\xef\xbd\x8e",
seg.meta_candidate(transliteration::FULL_ASCII).value);
// "AKANN"
EXPECT_EQ("\xef\xbc\xa1\xef\xbc\xab\xef\xbc\xa1\xef\xbc\xae\xef\xbc\xae",
seg.meta_candidate(transliteration::FULL_ASCII_UPPER).value);
// "akann"
EXPECT_EQ("\xef\xbd\x81\xef\xbd\x8b\xef\xbd\x81\xef\xbd\x8e\xef\xbd\x8e",
seg.meta_candidate(transliteration::FULL_ASCII_LOWER).value);
// "Akann"
EXPECT_EQ(
"\xef\xbc\xa1\xef\xbd\x8b\xef\xbd\x81\xef\xbd\x8e\xef\xbd\x8e",
seg.meta_candidate(transliteration::FULL_ASCII_CAPITALIZED).value);
// "アカン"
EXPECT_EQ("\xef\xbd\xb1\xef\xbd\xb6\xef\xbe\x9d",
seg.meta_candidate(transliteration::HALF_KATAKANA).value);
}
// Resegmentation
segment = segments.mutable_segment(0);
CHECK(segment);
// "あか"
segment->set_key("\xe3\x81\x82\xe3\x81\x8b");
segment = segments.add_segment();
CHECK(segment);
// "ん"
segment->set_key("\xe3\x82\x93");
EXPECT_TRUE(t13n_rewriter.Rewrite(&segments));
EXPECT_EQ(2, segments.conversion_segments_size());
{
const Segment &seg = segments.conversion_segment(0);
// "あか"
EXPECT_EQ("\xe3\x81\x82\xe3\x81\x8b",
seg.meta_candidate(transliteration::HIRAGANA).value);
// "アカ"
EXPECT_EQ("\xe3\x82\xa2\xe3\x82\xab",
seg.meta_candidate(transliteration::FULL_KATAKANA).value);
EXPECT_EQ("aka",
seg.meta_candidate(transliteration::HALF_ASCII).value);
EXPECT_EQ("AKA",
seg.meta_candidate(transliteration::HALF_ASCII_UPPER).value);
EXPECT_EQ("aka",
seg.meta_candidate(transliteration::HALF_ASCII_LOWER).value);
EXPECT_EQ(
"Aka",
seg.meta_candidate(transliteration::HALF_ASCII_CAPITALIZED).value);
// "aka"
EXPECT_EQ("\xef\xbd\x81\xef\xbd\x8b\xef\xbd\x81",
seg.meta_candidate(transliteration::FULL_ASCII).value);
// "AKA"
EXPECT_EQ("\xef\xbc\xa1\xef\xbc\xab\xef\xbc\xa1",
seg.meta_candidate(transliteration::FULL_ASCII_UPPER).value);
// "aka"
EXPECT_EQ("\xef\xbd\x81\xef\xbd\x8b\xef\xbd\x81",
seg.meta_candidate(transliteration::FULL_ASCII_LOWER).value);
// "Aka"
EXPECT_EQ(
"\xef\xbc\xa1\xef\xbd\x8b\xef\xbd\x81",
seg.meta_candidate(transliteration::FULL_ASCII_CAPITALIZED).value);
// "アカ"
EXPECT_EQ("\xef\xbd\xb1\xef\xbd\xb6",
seg.meta_candidate(transliteration::HALF_KATAKANA).value);
}
{
const Segment &seg = segments.conversion_segment(1);
// "ん"
EXPECT_EQ("\xe3\x82\x93",
seg.meta_candidate(transliteration::HIRAGANA).value);
// "ン"
EXPECT_EQ("\xe3\x83\xb3",
seg.meta_candidate(transliteration::FULL_KATAKANA).value);
EXPECT_EQ("nn",
seg.meta_candidate(transliteration::HALF_ASCII).value);
EXPECT_EQ("NN",
seg.meta_candidate(transliteration::HALF_ASCII_UPPER).value);
EXPECT_EQ("nn",
seg.meta_candidate(transliteration::HALF_ASCII_LOWER).value);
EXPECT_EQ(
"Nn",
seg.meta_candidate(transliteration::HALF_ASCII_CAPITALIZED).value);
// "nn"
EXPECT_EQ("\xef\xbd\x8e\xef\xbd\x8e",
seg.meta_candidate(transliteration::FULL_ASCII).value);
// "NN"
EXPECT_EQ("\xef\xbc\xae\xef\xbc\xae",
seg.meta_candidate(transliteration::FULL_ASCII_UPPER).value);
// "nn"
EXPECT_EQ("\xef\xbd\x8e\xef\xbd\x8e",
seg.meta_candidate(transliteration::FULL_ASCII_LOWER).value);
EXPECT_EQ(
// "Nn"
"\xef\xbc\xae\xef\xbd\x8e",
seg.meta_candidate(transliteration::FULL_ASCII_CAPITALIZED).value);
// "ン"
EXPECT_EQ("\xef\xbe\x9d",
seg.meta_candidate(transliteration::HALF_KATAKANA).value);
}
}
} // namespace mozc