blob: 87a25218daaa341282e06c6556a695a064c1ec0e [file] [log] [blame]
// Copyright 2010-2011, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "converter/candidate_filter.h"
#include <climits>
#include <string>
#include <vector>
#include "base/base.h"
#include "base/freelist.h"
#include "base/util.h"
#include "converter/node.h"
#include "converter/segments.h"
#include "dictionary/pos_matcher.h"
#include "dictionary/suppression_dictionary.h"
#include "testing/base/public/gunit.h"
namespace mozc {
class CandidateFilterTest : public testing::Test {
protected:
virtual void SetUp() {
candidate_freelist_.reset(new FreeList<Segment::Candidate>(1024));
node_freelist_.reset(new FreeList<Node>(1024));
}
virtual void TearDown() {
candidate_freelist_->Free();
node_freelist_->Free();
}
void GetDefaultNodes(vector<const Node *> *nodes) {
nodes->clear();
Node *n1 = NewNode();
// "てすと"
n1->value = "\xE3\x81\xA6\xE3\x81\x99\xE3\x81\xA8";
n1->lid = POSMatcher::GetUnknownId();
n1->rid = POSMatcher::GetUnknownId();
nodes->push_back(n1);
Node *n2 = NewNode();
// "てすと"
n2->value = "\xE3\x81\xA6\xE3\x81\x99\xE3\x81\xA8";
n2->lid = POSMatcher::GetFunctionalId();
n2->rid = POSMatcher::GetFunctionalId();
nodes->push_back(n2);
}
Node *NewNode() {
Node *n = node_freelist_->Alloc(1);
n->Init();
return n;
}
Segment::Candidate *NewCandidate() {
Segment::Candidate *c = candidate_freelist_->Alloc(1);
c->Init();
c->cost = 100;
c->structure_cost = 100;
return c;
}
private:
scoped_ptr<FreeList<Segment::Candidate> > candidate_freelist_;
scoped_ptr<FreeList<Node> > node_freelist_;
};
TEST_F(CandidateFilterTest, FilterTest) {
CandidateFilter filter;
vector<const Node *> n;
GetDefaultNodes(&n);
Segment::Candidate *c1 = NewCandidate();
c1->lid = 1;
c1->rid = 1;
c1->value = "abc";
EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
filter.FilterCandidate(c1, n));
Segment::Candidate *c2 = NewCandidate();
c2->value = "abc";
// Same value candidate should be rejected.
EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
filter.FilterCandidate(c2, n));
Segment::Candidate *c3 = NewCandidate();
c3->structure_cost = INT_MAX;
c3->value = "def";
// High structure cost candidate should be rejected.
EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
filter.FilterCandidate(c3, n));
Segment::Candidate *c4 = NewCandidate();
// Checks if a canidate is active before appending many candidates.
EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
filter.FilterCandidate(c4, n));
// Don't filter if lid/rid the same as the top candidate.
Segment::Candidate *c5 = NewCandidate();
c5->value = "foo";
c5->lid = 1;
c5->rid = 1;
EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
filter.FilterCandidate(c5, n));
// Though CandidateFilter may change its limit, 1000 should
// be always above the limit.
for (int i = 0; i < 1000; ++i) {
Segment::Candidate *cand = NewCandidate();
char buf[10];
snprintf(buf, sizeof(buf), "%d", i);
cand->value = string(buf);
filter.FilterCandidate(cand, n);
}
// There will be no more candidates.
EXPECT_EQ(CandidateFilter::STOP_ENUMERATION,
filter.FilterCandidate(c4, n));
}
TEST_F(CandidateFilterTest, KatakanaT13N) {
{
CandidateFilter filter;
vector<const Node *> nodes;
GetDefaultNodes(&nodes);
// nodes[0] is KatakanaT13N
Segment::Candidate *c = NewCandidate();
c->value = "abc";
Node *n = NewNode();
n->lid = POSMatcher::GetUnknownId();
n->rid = POSMatcher::GetUnknownId();
n->value = "abc";
nodes[0] = n;
EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
filter.FilterCandidate(c, nodes));
}
{
CandidateFilter filter;
vector<const Node *> nodes;
GetDefaultNodes(&nodes);
// nodes[1] is KatakanaT13N
Segment::Candidate *c = NewCandidate();
c->value = "abc";
Node *n = NewNode();
n->lid = POSMatcher::GetFunctionalId();
n->rid = POSMatcher::GetFunctionalId();
n->value = "abc";
nodes[1] = n;
EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
filter.FilterCandidate(c, nodes));
}
{
CandidateFilter filter;
vector<const Node *> nodes;
GetDefaultNodes(&nodes);
// nodes[1] is not a functional word
Segment::Candidate *c = NewCandidate();
c->value = "abc";
Node *n1 = NewNode();
n1->lid = POSMatcher::GetUnknownId();
n1->rid = POSMatcher::GetUnknownId();
n1->value = "abc";
nodes[0] = n1;
Node *n2 = NewNode();
n2->lid = POSMatcher::GetUnknownId();
n2->rid = POSMatcher::GetUnknownId();
// "てすと";
n2->value = "\xE3\x81\xA6\xE3\x81\x99\xE3\x81\xA8";
nodes[1] = n2;
EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
filter.FilterCandidate(c, nodes));
}
}
TEST_F(CandidateFilterTest, IsolatedWord) {
CandidateFilter filter;
vector<const Node *> nodes;
Segment::Candidate *c = NewCandidate();
c->value = "abc";
Node *node = NewNode();
nodes.push_back(node);
node->prev = NewNode();
node->next = NewNode();
node->lid = POSMatcher::GetIsolatedWordId();
node->rid = POSMatcher::GetIsolatedWordId();
node->key = "test";
node->value = "test";
node->prev->node_type = Node::NOR_NODE;
node->next->node_type = Node::EOS_NODE;
EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
filter.FilterCandidate(c, nodes));
node->prev->node_type = Node::BOS_NODE;
node->next->node_type = Node::NOR_NODE;
EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
filter.FilterCandidate(c, nodes));
node->prev->node_type = Node::NOR_NODE;
node->next->node_type = Node::NOR_NODE;
EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
filter.FilterCandidate(c, nodes));
node->prev->node_type = Node::BOS_NODE;
node->next->node_type = Node::EOS_NODE;
EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
filter.FilterCandidate(c, nodes));
}
TEST_F(CandidateFilterTest, MayHaveMoreCandidates) {
CandidateFilter filter;
vector<const Node *> n;
GetDefaultNodes(&n);
Segment::Candidate *c1 = NewCandidate();
c1->value = "abc";
EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
filter.FilterCandidate(c1, n));
Segment::Candidate *c2 = NewCandidate();
c2->value = "abc";
// Though same value candidate is rejected, enumeration should continue.
EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
filter.FilterCandidate(c2, n));
Segment::Candidate *c3 = NewCandidate();
c3->structure_cost = INT_MAX;
c3->value = "def";
// High structure cost should not Stop enumeration.
EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
filter.FilterCandidate(c3, n));
Segment::Candidate *c4 = NewCandidate();
c4->cost = INT_MAX;
c4->structure_cost = INT_MAX;
c4->value = "ghi";
// High cost candidate should be rejected.
EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
filter.FilterCandidate(c4, n));
// Insert many valid candidates
for (int i = 0; i < 50; ++i) {
Segment::Candidate *tmp = NewCandidate();
tmp->value = Util::SimpleItoa(i) + "test";
filter.FilterCandidate(tmp, n);
}
Segment::Candidate *c5 = NewCandidate();
c5->cost = INT_MAX;
c5->structure_cost = INT_MAX;
c5->value = "ghi";
// finally, it returns STOP_ENUMERATION, because
// filter has seen more than 50 good candidates.
c5->value = "ghi2";
EXPECT_EQ(CandidateFilter::STOP_ENUMERATION,
filter.FilterCandidate(c5, n));
}
TEST_F(CandidateFilterTest, Regression3437022) {
CandidateFilter filter;
vector<const Node *> n;
GetDefaultNodes(&n);
Segment::Candidate *c1 = NewCandidate();
c1->key = "test_key";
c1->value = "test_value";
EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
filter.FilterCandidate(c1, n));
SuppressionDictionary *dic
= SuppressionDictionary::GetSuppressionDictionary();
dic->Lock();
dic->AddEntry("test_key", "test_value");
dic->UnLock();
EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
filter.FilterCandidate(c1, n));
c1->key = "test_key_suffix";
c1->value = "test_value_suffix";
c1->content_key = "test_key";
c1->content_value = "test_value";
EXPECT_EQ(CandidateFilter::BAD_CANDIDATE,
filter.FilterCandidate(c1, n));
dic->Lock();
dic->Clear();
dic->UnLock();
EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
filter.FilterCandidate(c1, n));
}
TEST_F(CandidateFilterTest, FilterRealtimeConversionTest) {
CandidateFilter filter;
vector<const Node *> n;
n.clear();
Node *n1 = NewNode();
n1->key = "PC";
n1->value = "PC";
n1->lid = POSMatcher::GetUnknownId();
n1->rid = POSMatcher::GetUnknownId();
n.push_back(n1);
Node *n2 = NewNode();
// "てすと"
n2->value = "\xE3\x81\xA6\xE3\x81\x99\xE3\x81\xA8";
n2->lid = POSMatcher::GetUnknownId();
n2->rid = POSMatcher::GetUnknownId();
n.push_back(n2);
Segment::Candidate *c1 = NewCandidate();
c1->attributes |= Segment::Candidate::REALTIME_CONVERSION;
// "PCテスト"
c1->value = "PC\xE3\x83\x86\xE3\x82\xB9\xE3\x83\x88";
// Don't filter a candidate because it starts with alphabets and
// is followed by a non-functional word.
EXPECT_EQ(CandidateFilter::GOOD_CANDIDATE,
filter.FilterCandidate(c1, n));
}
} // namespace mozc