blob: 11f59fc70d148e52229768fdf6f098dbd7218b89 [file] [log] [blame]
// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/web_test/renderer/web_test_spell_checker.h"
#include <stddef.h>
#include <algorithm>
#include <array>
#include "base/check_op.h"
#include "base/compiler_specific.h"
#include "base/strings/string_util.h"
namespace content {
namespace {
void Append(std::vector<blink::WebString>* data, const blink::WebString& item) {
std::vector<blink::WebString> result(data->size() + 1);
for (size_t i = 0; i < data->size(); ++i)
result[i] = (*data)[i];
result[data->size()] = item;
data->swap(result);
}
bool IsASCIIAlpha(char ch) {
return base::IsAsciiLower(ch | 0x20);
}
} // namespace
WebTestSpellChecker::WebTestSpellChecker() = default;
WebTestSpellChecker::~WebTestSpellChecker() = default;
bool WebTestSpellChecker::SpellCheckWord(const blink::WebString& text,
size_t* misspelled_offset,
size_t* misspelled_length) {
DCHECK(misspelled_offset);
DCHECK(misspelled_length);
// Reset the result values as our spellchecker does.
*misspelled_offset = 0;
*misspelled_length = 0;
// Convert to a std::u16string because we store std::u16string instances in
// misspelled_words_ and blink::WebString has no find().
std::u16string string_text = text.Utf16();
int skipped_length = 0;
while (!string_text.empty()) {
// Extract the first possible English word from the given string.
// The given string may include non-ASCII characters or numbers. So, we
// should filter out such characters before start looking up our
// misspelled-word table.
// (This is a simple version of our SpellCheckWordIterator class.)
// If the given string doesn't include any ASCII characters, we can treat
// the string as valid one.
std::u16string::iterator first_char =
std::ranges::find_if(string_text, IsASCIIAlpha);
if (first_char == string_text.end())
return true;
int word_offset = std::distance(string_text.begin(), first_char);
int max_word_length = static_cast<int>(string_text.length()) - word_offset;
int word_length;
std::u16string word;
// These words are known misspelled words in web tests. If there are other
// misspelled words in web tests, please add them in this array.
static const auto misspelled_words = std::to_array<std::u16string>({
u"foo",
u"Foo",
u"baz",
u"fo",
u"LibertyF",
u"chello",
u"xxxtestxxx",
u"XXxxx",
u"Textx",
u"blockquoted",
u"asd",
u"Lorem",
u"Nunc",
u"Curabitur",
u"eu",
u"adlj",
u"adaasj",
u"sdklj",
u"jlkds",
u"jsaada",
u"jlda",
u"contentEditable",
// Prefer to match the full word than a partial word when there's an
// ambiguous boundary.
u"zz't",
u"zz",
// The following words are used by unit tests.
u"ifmmp",
u"qwertyuiopasd",
u"qwertyuiopasdf",
u"upper case",
u"wellcome",
});
// Look up our misspelled-word table to check if the extracted word is a
// known misspelled word, and return the offset and the length of the
// extracted word if this word is a known misspelled word.
for (const std::u16string& misspelled_word : misspelled_words) {
word_length =
std::min(static_cast<int>(misspelled_word.length()), max_word_length);
word = string_text.substr(word_offset, word_length);
if (word == misspelled_word &&
(static_cast<int>(string_text.length()) ==
word_offset + word_length ||
!IsASCIIAlpha(string_text[word_offset + word_length]))) {
*misspelled_offset = word_offset + skipped_length;
*misspelled_length = word_length;
break;
}
}
if (*misspelled_length > 0)
break;
std::u16string::iterator last_char = std::find_if_not(
string_text.begin() + word_offset, string_text.end(), IsASCIIAlpha);
if (last_char == string_text.end())
word_length = static_cast<int>(string_text.length()) - word_offset;
else
word_length = std::distance(first_char, last_char);
DCHECK_LT(0, word_offset + word_length);
string_text = string_text.substr(word_offset + word_length);
skipped_length += word_offset + word_length;
}
return false;
}
bool WebTestSpellChecker::HasInCache(const blink::WebString& word) {
return word == "Spell wellcome. Is it broken?" ||
word == "Spell wellcome.\x007F";
}
bool WebTestSpellChecker::IsMultiWordMisspelling(
const blink::WebString& text,
std::vector<blink::WebTextCheckingResult>* results) {
if (text == "Helllo wordl.") {
results->push_back(blink::WebTextCheckingResult(
blink::kWebTextDecorationTypeSpelling, 0, 6,
std::vector<blink::WebString>({"Hello"})));
results->push_back(blink::WebTextCheckingResult(
blink::kWebTextDecorationTypeSpelling, 7, 5,
std::vector<blink::WebString>({"world"})));
return true;
}
return false;
}
void WebTestSpellChecker::FillSuggestionList(
const blink::WebString& word,
std::vector<blink::WebString>* suggestions) {
if (word == "wellcome")
Append(suggestions, blink::WebString::FromUTF8("welcome"));
else if (word == "upper case")
Append(suggestions, blink::WebString::FromUTF8("uppercase"));
else if (word == "Helllo")
Append(suggestions, blink::WebString::FromUTF8("Hello"));
else if (word == "wordl")
Append(suggestions, blink::WebString::FromUTF8("world"));
}
} // namespace content