blob: 9e43d9c00e152bec2876af54558b418b444353fe [file] [log] [blame] [edit]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/renderer/spellchecker/spellcheck.h"
#include "base/bind.h"
#include "base/file_util.h"
#include "base/metrics/histogram.h"
#include "base/message_loop_proxy.h"
#include "base/time.h"
#include "base/utf_string_conversions.h"
#include "chrome/common/render_messages.h"
#include "chrome/common/spellcheck_common.h"
#include "chrome/common/spellcheck_messages.h"
#include "chrome/common/spellcheck_result.h"
#include "content/public/renderer/render_thread.h"
#include "third_party/hunspell/src/hunspell/hunspell.hxx"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebTextCheckingCompletion.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebTextCheckingResult.h"
using base::TimeTicks;
using content::RenderThread;
using WebKit::WebVector;
using WebKit::WebTextCheckingResult;
using WebKit::WebTextCheckingType;
class SpellCheck::SpellCheckRequestParam
: public base::RefCountedThreadSafe<SpellCheck::SpellCheckRequestParam> {
public:
SpellCheckRequestParam(const string16& text,
int offset,
WebKit::WebTextCheckingCompletion* completion)
: text_(text),
offset_(offset),
completion_(completion) {
DCHECK(completion);
}
string16 text() {
return text_;
}
int offset() {
return offset_;
}
WebKit::WebTextCheckingCompletion* completion() {
return completion_;
}
private:
friend class base::RefCountedThreadSafe<SpellCheckRequestParam>;
~SpellCheckRequestParam() {}
// Text to be checked in this task.
string16 text_;
// The text offset from the beginning.
int offset_;
// The interface to send the misspelled ranges to WebKit.
WebKit::WebTextCheckingCompletion* completion_;
DISALLOW_COPY_AND_ASSIGN(SpellCheckRequestParam);
};
SpellCheck::SpellCheck()
: file_(base::kInvalidPlatformFileValue),
auto_spell_correct_turned_on_(false),
is_using_platform_spelling_engine_(false),
initialized_(false),
dictionary_requested_(false) {
// Wait till we check the first word before doing any initializing.
}
SpellCheck::~SpellCheck() {
}
bool SpellCheck::OnControlMessageReceived(const IPC::Message& message) {
bool handled = true;
IPC_BEGIN_MESSAGE_MAP(SpellCheck, message)
IPC_MESSAGE_HANDLER(SpellCheckMsg_Init, OnInit)
IPC_MESSAGE_HANDLER(SpellCheckMsg_WordAdded, OnWordAdded)
IPC_MESSAGE_HANDLER(SpellCheckMsg_EnableAutoSpellCorrect,
OnEnableAutoSpellCorrect)
IPC_MESSAGE_UNHANDLED(handled = false)
IPC_END_MESSAGE_MAP()
return handled;
}
void SpellCheck::OnInit(IPC::PlatformFileForTransit bdict_file,
const std::vector<std::string>& custom_words,
const std::string& language,
bool auto_spell_correct) {
Init(IPC::PlatformFileForTransitToPlatformFile(bdict_file),
custom_words, language);
auto_spell_correct_turned_on_ = auto_spell_correct;
PostDelayedSpellCheckTask();
}
void SpellCheck::OnWordAdded(const std::string& word) {
if (is_using_platform_spelling_engine_)
return;
if (!hunspell_.get()) {
// Save it for later---add it when hunspell is initialized.
custom_words_.push_back(word);
} else {
AddWordToHunspell(word);
}
}
void SpellCheck::OnEnableAutoSpellCorrect(bool enable) {
auto_spell_correct_turned_on_ = enable;
}
void SpellCheck::Init(base::PlatformFile file,
const std::vector<std::string>& custom_words,
const std::string& language) {
initialized_ = true;
hunspell_.reset();
bdict_file_.reset();
file_ = file;
is_using_platform_spelling_engine_ =
file == base::kInvalidPlatformFileValue && !language.empty();
character_attributes_.SetDefaultLanguage(language);
text_iterator_.Reset();
contraction_iterator_.Reset();
custom_words_.insert(custom_words_.end(),
custom_words.begin(), custom_words.end());
// We delay the actual initialization of hunspell until it is needed.
}
bool SpellCheck::SpellCheckWord(
const char16* in_word,
int in_word_len,
int tag,
int* misspelling_start,
int* misspelling_len,
std::vector<string16>* optional_suggestions) {
DCHECK(in_word_len >= 0);
DCHECK(misspelling_start && misspelling_len) << "Out vars must be given.";
// Do nothing if we need to delay initialization. (Rather than blocking,
// report the word as correctly spelled.)
if (InitializeIfNeeded())
return true;
// Do nothing if spell checking is disabled.
if (initialized_ && file_ == base::kInvalidPlatformFileValue &&
!is_using_platform_spelling_engine_) {
return true;
}
*misspelling_start = 0;
*misspelling_len = 0;
if (in_word_len == 0)
return true; // No input means always spelled correctly.
string16 word;
int word_start;
int word_length;
if (!text_iterator_.IsInitialized() &&
!text_iterator_.Initialize(&character_attributes_, true)) {
// We failed to initialize text_iterator_, return as spelled correctly.
VLOG(1) << "Failed to initialize SpellcheckWordIterator";
return true;
}
text_iterator_.SetText(in_word, in_word_len);
while (text_iterator_.GetNextWord(&word, &word_start, &word_length)) {
// Found a word (or a contraction) that the spellchecker can check the
// spelling of.
if (CheckSpelling(word, tag))
continue;
// If the given word is a concatenated word of two or more valid words
// (e.g. "hello:hello"), we should treat it as a valid word.
if (IsValidContraction(word, tag))
continue;
*misspelling_start = word_start;
*misspelling_len = word_length;
// Get the list of suggested words.
if (optional_suggestions)
FillSuggestionList(word, optional_suggestions);
return false;
}
return true;
}
bool SpellCheck::SpellCheckParagraph(
const string16& text,
WebKit::WebVector<WebKit::WebTextCheckingResult>* results) {
#if !defined(OS_MACOSX)
// Mac has its own spell checker, so this method will not be used.
DCHECK(results);
std::vector<WebKit::WebTextCheckingResult> textcheck_results;
size_t length = text.length();
size_t offset = 0;
// Spellcheck::SpellCheckWord() automatically breaks text into words and
// checks the spellings of the extracted words. This function sets the
// position and length of the first misspelled word and returns false when
// the text includes misspelled words. Therefore, we just repeat calling the
// function until it returns true to check the whole text.
int misspelling_start = 0;
int misspelling_length = 0;
while (offset <= length) {
if (SpellCheckWord(&text[offset],
length - offset,
0,
&misspelling_start,
&misspelling_length,
NULL)) {
results->assign(textcheck_results);
return true;
}
string16 replacement;
textcheck_results.push_back(WebKit::WebTextCheckingResult(
WebKit::WebTextCheckingTypeSpelling,
misspelling_start + offset,
misspelling_length,
replacement));
offset += misspelling_start + misspelling_length;
}
results->assign(textcheck_results);
return false;
#else
return true;
#endif
}
string16 SpellCheck::GetAutoCorrectionWord(const string16& word, int tag) {
string16 autocorrect_word;
if (!auto_spell_correct_turned_on_)
return autocorrect_word; // Return the empty string.
int word_length = static_cast<int>(word.size());
if (word_length < 2 ||
word_length > chrome::spellcheck_common::kMaxAutoCorrectWordSize)
return autocorrect_word;
if (InitializeIfNeeded())
return autocorrect_word;
char16 misspelled_word[
chrome::spellcheck_common::kMaxAutoCorrectWordSize + 1];
const char16* word_char = word.c_str();
for (int i = 0; i <= chrome::spellcheck_common::kMaxAutoCorrectWordSize;
++i) {
if (i >= word_length)
misspelled_word[i] = 0;
else
misspelled_word[i] = word_char[i];
}
// Swap adjacent characters and spellcheck.
int misspelling_start, misspelling_len;
for (int i = 0; i < word_length - 1; i++) {
// Swap.
std::swap(misspelled_word[i], misspelled_word[i + 1]);
// Check spelling.
misspelling_start = misspelling_len = 0;
SpellCheckWord(misspelled_word, word_length, tag, &misspelling_start,
&misspelling_len, NULL);
// Make decision: if only one swap produced a valid word, then we want to
// return it. If we found two or more, we don't do autocorrection.
if (misspelling_len == 0) {
if (autocorrect_word.empty()) {
autocorrect_word.assign(misspelled_word);
} else {
autocorrect_word.clear();
break;
}
}
// Restore the swapped characters.
std::swap(misspelled_word[i], misspelled_word[i + 1]);
}
return autocorrect_word;
}
void SpellCheck::RequestTextChecking(
const string16& text,
int offset,
WebKit::WebTextCheckingCompletion* completion) {
#if !defined(OS_MACOSX)
// Commented out on Mac, because SpellCheckRequest::PerformSpellCheck is not
// implemented on Mac. Mac uses its own spellchecker, so this method
// will not be used.
DCHECK(!is_using_platform_spelling_engine_);
// Clean up the previous request before starting a new request.
if (pending_request_param_.get()) {
pending_request_param_->completion()->didCancelCheckingText();
pending_request_param_ = NULL;
}
if (InitializeIfNeeded()) {
// We will check this text after we finish loading the hunspell dictionary.
// Save parameters so that we can use them when we receive an init message
// from the browser process.
pending_request_param_ = new SpellCheckRequestParam(
text, offset, completion);
return;
}
requested_params_.push(new SpellCheckRequestParam(text, offset, completion));
base::MessageLoopProxy::current()->PostTask(FROM_HERE,
base::Bind(&SpellCheck::PerformSpellCheck, AsWeakPtr()));
#else
NOTREACHED();
#endif
}
void SpellCheck::InitializeHunspell() {
if (hunspell_.get())
return;
bdict_file_.reset(new file_util::MemoryMappedFile);
if (bdict_file_->Initialize(file_)) {
TimeTicks debug_start_time = base::Histogram::DebugNow();
hunspell_.reset(
new Hunspell(bdict_file_->data(), bdict_file_->length()));
// Add custom words to Hunspell.
for (std::vector<std::string>::iterator it = custom_words_.begin();
it != custom_words_.end(); ++it) {
AddWordToHunspell(*it);
}
DHISTOGRAM_TIMES("Spellcheck.InitTime",
base::Histogram::DebugNow() - debug_start_time);
} else {
NOTREACHED() << "Could not mmap spellchecker dictionary.";
}
}
void SpellCheck::AddWordToHunspell(const std::string& word) {
if (!word.empty() && word.length() < MAXWORDLEN)
hunspell_->add(word.c_str());
}
bool SpellCheck::InitializeIfNeeded() {
if (is_using_platform_spelling_engine_)
return false;
if (!initialized_ && !dictionary_requested_) {
// RenderThread will not exist in test.
if (RenderThread::Get())
RenderThread::Get()->Send(new SpellCheckHostMsg_RequestDictionary);
dictionary_requested_ = true;
return true;
}
// Don't initialize if hunspell is disabled.
if (file_ != base::kInvalidPlatformFileValue)
InitializeHunspell();
return !initialized_;
}
// When called, relays the request to check the spelling to the proper
// backend, either hunspell or a platform-specific backend.
bool SpellCheck::CheckSpelling(const string16& word_to_check, int tag) {
bool word_correct = false;
if (is_using_platform_spelling_engine_) {
#if defined(OS_MACOSX)
RenderThread::Get()->Send(new SpellCheckHostMsg_CheckSpelling(
word_to_check, tag, &word_correct));
#endif
} else {
std::string word_to_check_utf8(UTF16ToUTF8(word_to_check));
// Hunspell shouldn't let us exceed its max, but check just in case
if (word_to_check_utf8.length() < MAXWORDLEN) {
if (hunspell_.get()) {
// |hunspell_->spell| returns 0 if the word is spelled correctly and
// non-zero otherwsie.
word_correct = (hunspell_->spell(word_to_check_utf8.c_str()) != 0);
} else {
// If |hunspell_| is NULL here, an error has occurred, but it's better
// to check rather than crash.
word_correct = true;
}
}
}
return word_correct;
}
void SpellCheck::PostDelayedSpellCheckTask() {
if (!pending_request_param_)
return;
if (file_ == base::kInvalidPlatformFileValue) {
pending_request_param_->completion()->didCancelCheckingText();
} else {
requested_params_.push(pending_request_param_);
base::MessageLoopProxy::current()->PostTask(FROM_HERE,
base::Bind(&SpellCheck::PerformSpellCheck, AsWeakPtr()));
}
pending_request_param_ = NULL;
}
void SpellCheck::PerformSpellCheck() {
#if !defined(OS_MACOSX)
DCHECK(!requested_params_.empty());
scoped_refptr<SpellCheckRequestParam> param = requested_params_.front();
DCHECK(param);
requested_params_.pop();
WebKit::WebVector<WebKit::WebTextCheckingResult> results;
SpellCheckParagraph(param->text(), &results);
param->completion()->didFinishCheckingText(results);
#else
// SpellCheck::SpellCheckParagraph is not implemented on Mac,
// so we return without spellchecking. Note that Mac uses its own
// spellchecker, this function won't be used.
NOTREACHED();
#endif
}
void SpellCheck::FillSuggestionList(
const string16& wrong_word,
std::vector<string16>* optional_suggestions) {
if (is_using_platform_spelling_engine_) {
#if defined(OS_MACOSX)
RenderThread::Get()->Send(new SpellCheckHostMsg_FillSuggestionList(
wrong_word, optional_suggestions));
#endif
return;
}
// If |hunspell_| is NULL here, an error has occurred, but it's better
// to check rather than crash.
if (!hunspell_.get())
return;
char** suggestions;
int number_of_suggestions =
hunspell_->suggest(&suggestions, UTF16ToUTF8(wrong_word).c_str());
// Populate the vector of WideStrings.
for (int i = 0; i < number_of_suggestions; ++i) {
if (i < chrome::spellcheck_common::kMaxSuggestions)
optional_suggestions->push_back(UTF8ToUTF16(suggestions[i]));
free(suggestions[i]);
}
if (suggestions != NULL)
free(suggestions);
}
// Returns whether or not the given string is a valid contraction.
// This function is a fall-back when the SpellcheckWordIterator class
// returns a concatenated word which is not in the selected dictionary
// (e.g. "in'n'out") but each word is valid.
bool SpellCheck::IsValidContraction(const string16& contraction, int tag) {
if (!contraction_iterator_.IsInitialized() &&
!contraction_iterator_.Initialize(&character_attributes_, false)) {
// We failed to initialize the word iterator, return as spelled correctly.
VLOG(1) << "Failed to initialize contraction_iterator_";
return true;
}
contraction_iterator_.SetText(contraction.c_str(), contraction.length());
string16 word;
int word_start;
int word_length;
while (contraction_iterator_.GetNextWord(&word, &word_start, &word_length)) {
if (!CheckSpelling(word, tag))
return false;
}
return true;
}
#if !defined(OS_MACOSX)
void SpellCheck::CreateTextCheckingResults(
int line_offset,
const string16& line_text,
const std::vector<SpellCheckResult>& spellcheck_results,
WebVector<WebTextCheckingResult>* textcheck_results) {
// Double-check misspelled words with our spellchecker and attach grammar
// markers to them if our spellchecker tells they are correct words, i.e. they
// are probably contextually-misspelled words.
const char16* text = line_text.c_str();
WebVector<WebTextCheckingResult> list(spellcheck_results.size());
for (size_t i = 0; i < spellcheck_results.size(); ++i) {
WebTextCheckingType type =
static_cast<WebTextCheckingType>(spellcheck_results[i].type);
int word_location = spellcheck_results[i].location;
int word_length = spellcheck_results[i].length;
if (type == WebKit::WebTextCheckingTypeSpelling) {
int misspelling_start = 0;
int misspelling_length = 0;
if (SpellCheckWord(text + word_location, word_length, 0,
&misspelling_start, &misspelling_length, NULL)) {
type = WebKit::WebTextCheckingTypeGrammar;
}
}
list[i] = WebKit::WebTextCheckingResult(type,
word_location + line_offset,
word_length,
spellcheck_results[i].replacement);
}
textcheck_results->swap(list);
}
#endif