blob: d1ddcaa826a4ed28efb991fe5377d059f1999312 [file] [log] [blame]
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "ui/base/ime/character_composer.h"
#include <algorithm>
#include <iterator>
#include <string>
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversion_utils.h"
#include "base/strings/utf_string_conversions.h"
#include "base/third_party/icu/icu_utf.h"
#include "ui/events/event.h"
#include "ui/events/keycodes/dom/dom_key.h"
#include "ui/events/keycodes/dom/keycode_converter.h"
#include "ui/events/keycodes/keyboard_codes.h"
namespace {
#include "ui/base/ime/character_composer_data.h"
bool CheckCharacterComposeTable(
const ui::CharacterComposer::ComposeBuffer& compose_sequence,
uint32_t* composed_character) {
const ui::TreeComposeChecker kTreeComposeChecker(kCompositions);
return kTreeComposeChecker.CheckSequence(compose_sequence,
composed_character) !=
ui::ComposeChecker::CheckSequenceResult::NO_MATCH;
}
// Converts |character| to UTF16 string.
// Returns false when |character| is not a valid character.
bool UTF32CharacterToUTF16(uint32_t character, base::string16* output) {
output->clear();
// Reject invalid character. (e.g. codepoint greater than 0x10ffff)
if (!CBU_IS_UNICODE_CHAR(character))
return false;
if (character) {
output->resize(CBU16_LENGTH(character));
size_t i = 0;
CBU16_APPEND_UNSAFE(&(*output)[0], i, character);
}
return true;
}
// Returns an hexadecimal digit integer (0 to 15) corresponding to |keycode|.
// -1 is returned when |keycode| cannot be a hexadecimal digit.
int KeycodeToHexDigit(unsigned int keycode) {
if (ui::VKEY_0 <= keycode && keycode <= ui::VKEY_9)
return keycode - ui::VKEY_0;
if (ui::VKEY_A <= keycode && keycode <= ui::VKEY_F)
return keycode - ui::VKEY_A + 10;
return -1; // |keycode| cannot be a hexadecimal digit.
}
} // namespace
namespace ui {
CharacterComposer::CharacterComposer() : composition_mode_(KEY_SEQUENCE_MODE) {}
CharacterComposer::~CharacterComposer() {}
void CharacterComposer::Reset() {
compose_buffer_.clear();
hex_buffer_.clear();
composed_character_.clear();
preedit_string_.clear();
composition_mode_ = KEY_SEQUENCE_MODE;
}
bool CharacterComposer::FilterKeyPress(const ui::KeyEvent& event) {
if (event.type() != ET_KEY_PRESSED && event.type() != ET_KEY_RELEASED)
return false;
// We don't care about modifier key presses.
if (KeycodeConverter::IsDomKeyForModifier(event.GetDomKey()))
return false;
composed_character_.clear();
preedit_string_.clear();
// When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE.
// We don't care about other modifiers like Alt. When CapsLock is on, we do
// nothing because what we receive is Ctrl+Shift+u (not U).
if (event.key_code() == VKEY_U &&
(event.flags() & (EF_SHIFT_DOWN | EF_CONTROL_DOWN | EF_CAPS_LOCK_ON)) ==
(EF_SHIFT_DOWN | EF_CONTROL_DOWN)) {
if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) {
// There is no ongoing composition. Let's switch to HEX_MODE.
composition_mode_ = HEX_MODE;
UpdatePreeditStringHexMode();
return true;
}
}
// Filter key press in an appropriate manner.
switch (composition_mode_) {
case KEY_SEQUENCE_MODE:
return FilterKeyPressSequenceMode(event);
case HEX_MODE:
return FilterKeyPressHexMode(event);
default:
NOTREACHED();
return false;
}
}
bool CharacterComposer::FilterKeyPressSequenceMode(const KeyEvent& event) {
DCHECK(composition_mode_ == KEY_SEQUENCE_MODE);
compose_buffer_.push_back(event.GetDomKey());
// Check compose table.
uint32_t composed_character_utf32 = 0;
if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) {
// Key press is recognized as a part of composition.
if (composed_character_utf32 != 0) {
// We get a composed character.
compose_buffer_.clear();
UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
}
return true;
}
// Key press is not a part of composition.
compose_buffer_.pop_back(); // Remove the keypress added this time.
if (!compose_buffer_.empty()) {
// Check for Windows-style composition fallback: If the dead key encodes
// a printable ASCII character, output that followed by the new keypress.
// (This could be extended to allow any printable Unicode character in
// the dead key, and/or for longer sequences, but there is no current use
// for that, so we keep it simple.)
if ((compose_buffer_.size() == 1) && (compose_buffer_[0].IsDeadKey())) {
int32_t dead_character = compose_buffer_[0].ToDeadKeyCombiningCharacter();
if (dead_character >= 0x20 && dead_character <= 0x7E) {
DomKey current_key = event.GetDomKey();
int32_t current_character = 0;
if (current_key.IsCharacter())
current_character = current_key.ToCharacter();
else if (current_key.IsDeadKey())
current_character = current_key.ToDeadKeyCombiningCharacter();
if (current_character) {
base::WriteUnicodeCharacter(dead_character, &composed_character_);
base::WriteUnicodeCharacter(current_character, &composed_character_);
}
}
}
compose_buffer_.clear();
return true;
}
return false;
}
bool CharacterComposer::FilterKeyPressHexMode(const KeyEvent& event) {
DCHECK(composition_mode_ == HEX_MODE);
const size_t kMaxHexSequenceLength = 8;
base::char16 c = event.GetCharacter();
int hex_digit = 0;
if (base::IsHexDigit(c)) {
hex_digit = base::HexDigitToInt(c);
} else {
// With 101 keyboard, control + shift + 3 produces '#', but a user may
// have intended to type '3'. So, if a hexadecimal character was not found,
// suppose a user is holding shift key (and possibly control key, too) and
// try a character with modifier keys removed.
hex_digit = KeycodeToHexDigit(event.key_code());
}
if (hex_digit >= 0) {
if (hex_buffer_.size() < kMaxHexSequenceLength) {
// Add the key to the buffer if it is a hex digit.
hex_buffer_.push_back(hex_digit);
}
} else {
DomKey key = event.GetDomKey();
if (key == DomKey::ESCAPE) {
// Cancel composition when ESC is pressed.
Reset();
} else if (key == DomKey::ENTER || c == ' ') {
// Commit the composed character when Enter or space is pressed.
CommitHex();
} else if (key == DomKey::BACKSPACE) {
// Pop back the buffer when Backspace is pressed.
if (!hex_buffer_.empty()) {
hex_buffer_.pop_back();
} else {
// If there is no character in |hex_buffer_|, cancel composition.
Reset();
}
}
// Other keystrokes are ignored in hex composition mode.
}
UpdatePreeditStringHexMode();
return true;
}
void CharacterComposer::CommitHex() {
DCHECK(composition_mode_ == HEX_MODE);
uint32_t composed_character_utf32 = 0;
for (size_t i = 0; i != hex_buffer_.size(); ++i) {
const uint32_t digit = hex_buffer_[i];
DCHECK(0 <= digit && digit < 16);
composed_character_utf32 <<= 4;
composed_character_utf32 |= digit;
}
Reset();
UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
}
void CharacterComposer::UpdatePreeditStringHexMode() {
if (composition_mode_ != HEX_MODE) {
preedit_string_.clear();
return;
}
std::string preedit_string_ascii("u");
for (size_t i = 0; i != hex_buffer_.size(); ++i) {
const int digit = hex_buffer_[i];
DCHECK(0 <= digit && digit < 16);
preedit_string_ascii += digit <= 9 ? ('0' + digit) : ('a' + (digit - 10));
}
preedit_string_ = base::ASCIIToUTF16(preedit_string_ascii);
}
ComposeChecker::CheckSequenceResult TreeComposeChecker::CheckSequence(
const ui::CharacterComposer::ComposeBuffer& sequence,
uint32_t* composed_character) const {
*composed_character = 0;
if (sequence.size() > data_.maximum_sequence_length)
return CheckSequenceResult::NO_MATCH;
uint16_t tree_index = 0;
for (const auto& keystroke : sequence) {
DCHECK(tree_index < data_.tree_entries);
// If we are looking up a dead key, skip over the character tables.
int32_t character = -1;
if (keystroke.IsDeadKey()) {
tree_index += 2 * data_.tree[tree_index] + 1; // internal unicode table
tree_index += 2 * data_.tree[tree_index] + 1; // leaf unicode table
character = keystroke.ToDeadKeyCombiningCharacter();
} else if (keystroke.IsCharacter()) {
character = keystroke.ToCharacter();
}
if (character < 0 || character > 0xFFFF)
return CheckSequenceResult::NO_MATCH;
// Check the internal subtree table.
uint16_t result = 0;
uint16_t entries = data_.tree[tree_index++];
if (entries &&
Find(tree_index, entries, static_cast<uint16_t>(character), &result)) {
tree_index = result;
continue;
}
// Skip over the internal subtree table and check the leaf table.
tree_index += 2 * entries;
entries = data_.tree[tree_index++];
if (entries &&
Find(tree_index, entries, static_cast<uint16_t>(character), &result)) {
*composed_character = result;
return CheckSequenceResult::FULL_MATCH;
}
return CheckSequenceResult::NO_MATCH;
}
return CheckSequenceResult::PREFIX_MATCH;
}
bool TreeComposeChecker::Find(uint16_t index,
uint16_t size,
uint16_t key,
uint16_t* value) const {
struct TableEntry {
uint16_t key;
uint16_t value;
bool operator<(const TableEntry& other) const {
return this->key < other.key;
}
};
const TableEntry* a = reinterpret_cast<const TableEntry*>(&data_.tree[index]);
const TableEntry* z = a + size;
const TableEntry target = {key, 0};
const TableEntry* it = std::lower_bound(a, z, target);
if ((it != z) && (it->key == key)) {
*value = it->value;
return true;
}
return false;
}
} // namespace ui