blob: 1926b9831fd2d6feb4abc0197de05cc1d06b5cdb [file] [log] [blame]
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_ADDRESS_TYPE =
0x30;
const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
// Flag for presence of more attributes
const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRIBUTE_HAS_NEXT =
0x80;
// Mask for attribute probability, stored on 4 bits inside the flags byte.
const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
int *const outTargetPtNodePos, int *const bigramEntryPos) {
const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
bigramEntryPos);
if (outBigramFlags) {
*outBigramFlags = bigramFlags;
}
const int targetPos = getBigramAddressAndAdvancePosition(bigramsBuf, bigramFlags,
bigramEntryPos);
if (outTargetPtNodePos) {
*outTargetPtNodePos = targetPos;
}
}
/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
int *const bigramListPos) {
BigramFlags flags;
do {
getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
bigramListPos);
} while(hasNext(flags));
}
/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
int offset = 0;
const int origin = *pos;
switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
offset = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
offset = ByteArrayUtils::readUint16AndAdvancePosition(bigramsBuf, pos);
break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
break;
}
if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID) {
return NOT_A_DICT_POS;
} else if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET) {
return origin;
}
if (isOffsetNegative(flags)) {
return origin - offset;
} else {
return origin + offset;
}
}
/* static */ bool BigramListReadWriteUtils::setHasNextFlag(
BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) {
const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos);
int readingPos = entryPos;
if (usesAdditionalBuffer) {
readingPos -= buffer->getOriginalBufferSize();
}
BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(
buffer->getBuffer(usesAdditionalBuffer), &readingPos);
if (hasNext) {
bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT;
} else {
bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT);
}
int writingPos = entryPos;
return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos);
}
/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
const bool hasNext, int *const writingPos) {
BigramFlags flags;
if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) {
return false;
}
return writeBigramEntry(buffer, flags, targetPos, writingPos);
}
/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
const int targetPtNodePos, int *const writingPos) {
const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos);
const BigramFlags flagsToWrite = (offset < 0) ?
(flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE);
if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) {
return false;
}
const uint32_t absOffest = abs(offset);
const int bigramTargetFieldSize = attributeAddressSize(flags);
return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
writingPos);
}
// Returns true if the bigram entry is valid and put entry flags into out*.
/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
const int targetPtNodePos, const int probability, const bool hasNext,
BigramFlags *const outBigramFlags) {
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
if (hasNext) {
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
}
const int offset = getBigramTargetOffset(targetPtNodePos, entryPos);
if (offset < 0) {
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
}
const uint32_t absOffest = abs(offset);
if ((absOffest >> 24) != 0) {
// Offset is too large.
return false;
} else if ((absOffest >> 16) != 0) {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
} else if ((absOffest >> 8) != 0) {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
} else {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
}
// Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
// writing.
// TODO: Remove following 2 lines and optimize memory space.
flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
*outBigramFlags = flags;
return true;
}
/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos,
const int entryPos) {
if (targetPtNodePos == NOT_A_DICT_POS) {
return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
} else {
const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */);
if (offset == 0) {
return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
} else {
return offset;
}
}
}
} // namespace latinime