| /* |
| * Copyright (C) 2013, The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" |
| |
| #include "defines.h" |
| #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" |
| |
| namespace latinime { |
| |
| typedef PatriciaTrieReadingUtils PtReadingUtils; |
| |
| const PtReadingUtils::NodeFlags PtReadingUtils::MASK_CHILDREN_POSITION_TYPE = 0xC0; |
| const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_NOPOSITION = 0x00; |
| const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_ONEBYTE = 0x40; |
| const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_TWOBYTES = 0x80; |
| const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_THREEBYTES = 0xC0; |
| |
| // Flag for single/multiple char group |
| const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20; |
| // Flag for terminal PtNodes |
| const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10; |
| // Flag for shortcut targets presence |
| const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08; |
| // Flag for bigram presence |
| const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04; |
| // Flag for non-words (typically, shortcut only entries) |
| const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02; |
| // Flag for blacklist |
| const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; |
| |
| /* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition( |
| const uint8_t *const buffer, int *const pos) { |
| const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); |
| if (firstByte < 0x80) { |
| return firstByte; |
| } else { |
| return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition( |
| buffer, pos); |
| } |
| } |
| |
| /* static */ PtReadingUtils::NodeFlags PtReadingUtils::getFlagsAndAdvancePosition( |
| const uint8_t *const buffer, int *const pos) { |
| return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); |
| } |
| |
| /* static */ int PtReadingUtils::getCodePointAndAdvancePosition(const uint8_t *const buffer, |
| int *const pos) { |
| return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos); |
| } |
| |
| // Returns the number of read characters. |
| /* static */ int PtReadingUtils::getCharsAndAdvancePosition(const uint8_t *const buffer, |
| const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) { |
| int length = 0; |
| if (hasMultipleChars(flags)) { |
| length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer, |
| pos); |
| } else { |
| const int codePoint = getCodePointAndAdvancePosition(buffer, pos); |
| if (codePoint == NOT_A_CODE_POINT) { |
| // CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is |
| // CHARACTER_ARRAY_TERMINATOR. The code point must not be CHARACTER_ARRAY_TERMINATOR |
| // when the PtNode has a single code point. |
| length = 0; |
| AKLOGE("codePoint is NOT_A_CODE_POINT. pos: %d, codePoint: 0x%x, buffer[pos - 1]: 0x%x", |
| *pos - 1, codePoint, buffer[*pos - 1]); |
| ASSERT(false); |
| } else if (maxLength > 0) { |
| outBuffer[0] = codePoint; |
| length = 1; |
| } |
| } |
| return length; |
| } |
| |
| // Returns the number of skipped characters. |
| /* static */ int PtReadingUtils::skipCharacters(const uint8_t *const buffer, const NodeFlags flags, |
| const int maxLength, int *const pos) { |
| if (hasMultipleChars(flags)) { |
| return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos); |
| } else { |
| if (maxLength > 0) { |
| getCodePointAndAdvancePosition(buffer, pos); |
| return 1; |
| } else { |
| return 0; |
| } |
| } |
| } |
| |
| /* static */ int PtReadingUtils::readProbabilityAndAdvancePosition(const uint8_t *const buffer, |
| int *const pos) { |
| return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); |
| } |
| |
| /* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition( |
| const uint8_t *const buffer, const NodeFlags flags, int *const pos) { |
| const int base = *pos; |
| int offset = 0; |
| switch (MASK_CHILDREN_POSITION_TYPE & flags) { |
| case FLAG_CHILDREN_POSITION_TYPE_ONEBYTE: |
| offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); |
| break; |
| case FLAG_CHILDREN_POSITION_TYPE_TWOBYTES: |
| offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos); |
| break; |
| case FLAG_CHILDREN_POSITION_TYPE_THREEBYTES: |
| offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos); |
| break; |
| default: |
| // If we come here, it means we asked for the children of a word with |
| // no children. |
| return NOT_A_DICT_POS; |
| } |
| return base + offset; |
| } |
| |
| } // namespace latinime |