blob: 052558bfc3880ed2ec8285f1510029c62da7dcbc [file] [log] [blame]
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "utils/hash_map_compat.h"
namespace latinime {
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
// TODO: Make MAX_DICTIONARY_SIZE 8MB.
const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const int probability,
bool *const outAddedNewUnigram) {
int parentPos = NOT_A_DICT_POS;
while (!readingHelper->isEnd()) {
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
if (!readingHelper->isMatchedCodePoint(0 /* index */,
wordCodePoints[matchedCodePointCount])) {
// The first code point is different from target code point. Skip this node and read
// the next sibling node.
readingHelper->readNextSiblingNode();
continue;
}
// Check following merged node code points.
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader();
const int nodeCodePointCount = nodeReader->getCodePointCount();
for (int j = 1; j < nodeCodePointCount; ++j) {
const int nextIndex = matchedCodePointCount + j;
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j,
wordCodePoints[matchedCodePointCount + j])) {
*outAddedNewUnigram = true;
return reallocatePtNodeAndAddNewPtNodes(nodeReader,
readingHelper->getMergedNodeCodePoints(), j,
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
probability),
wordCodePoints + matchedCodePointCount,
codePointCount - matchedCodePointCount);
}
}
// All characters are matched.
if (codePointCount == readingHelper->getTotalCodePointCount()) {
return setPtNodeProbability(nodeReader, probability,
readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram);
}
if (!nodeReader->hasChildren()) {
*outAddedNewUnigram = true;
return createChildrenPtNodeArrayAndAChildPtNode(nodeReader,
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
wordCodePoints + readingHelper->getTotalCodePointCount(),
codePointCount - readingHelper->getTotalCodePointCount());
}
// Advance to the children nodes.
parentPos = nodeReader->getHeadPos();
readingHelper->readChildNode();
}
if (readingHelper->isError()) {
// The dictionary is invalid.
return false;
}
int pos = readingHelper->getPosOfLastForwardLinkField();
*outAddedNewUnigram = true;
return createAndInsertNodeIntoPtNodeArray(parentPos,
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
codePointCount - readingHelper->getPrevTotalCodePointCount(),
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
}
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
const int probability, bool *const outAddedNewBigram) {
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
mMergedNodeCodePoints);
// Move node to add bigram entry.
const int newNodePos = mBuffer->getTailPosition();
if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) {
return false;
}
int writingPos = newNodePos;
// Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
&writingPos)) {
return false;
}
nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
// Insert a new bigram entry into the existing bigram list.
int bigramListPos = nodeReader.getBigramsPos();
return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos,
outAddedNewBigram);
} else {
// The PtNode doesn't have a bigram list.
*outAddedNewBigram = true;
// First, Write a bigram entry at the tail position of the PtNode.
if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) {
return false;
}
// Then, Mark as the PtNode having bigram list in the flags.
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(),
nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY,
nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
writingPos = newNodePos;
// Write updated flags into the moved PtNode's flags field.
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
&writingPos);
}
}
// Remove a bigram relation from word0Pos to word1Pos.
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
return false;
}
return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
}
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
mBuffer->getUsedAdditionalBufferSize();
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
return;
}
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
}
void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
const char *const fileName, const HeaderPolicy *const headerPolicy) {
BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
MAX_DICTIONARY_SIZE);
int unigramCount = 0;
int bigramCount = 0;
if (mNeedsToDecay) {
ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
}
if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
return;
}
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
return;
}
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
}
bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
const DynamicPatriciaTrieNodeReader *const nodeToUpdate) {
int pos = nodeToUpdate->getHeadPos();
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
// Read original flags
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
true /* isDeleted */);
int writingPos = nodeToUpdate->getHeadPos();
// Update flags.
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
&writingPos);
}
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
const int bigramLinkedNodePos) {
int pos = originalNode->getHeadPos();
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
// Read original flags
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
false /* isDeleted */);
int writingPos = originalNode->getHeadPos();
// Update flags.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
&writingPos)) {
return false;
}
// Update moved position, which is stored in the parent offset field.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) {
return false;
}
// Update bigram linked node position, which is stored in the children position field.
int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
return false;
}
if (originalNode->hasChildren()) {
// Update children's parent position.
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
while (!readingHelper.isEnd()) {
int parentOffsetFieldPos = nodeReader->getHeadPos()
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(),
&parentOffsetFieldPos)) {
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
// we give up to update dictionary.
return false;
}
readingHelper.readNextSiblingNode();
}
}
return true;
}
// Write new PtNode at writingPos.
bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted,
const bool isNotAWord, const int parentPos, const int *const codePoints,
const int codePointCount, const int probability, const int childrenPos,
const int originalBigramListPos, const int originalShortcutListPos,
int *const writingPos) {
const int nodePos = *writingPos;
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
// PtNode writing.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite,
0 /* nodeFlags */, writingPos)) {
return false;
}
// Calculate a parent offset and write the offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite,
parentPos, nodePos, writingPos)) {
return false;
}
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite,
codePoints, codePointCount, writingPos)) {
return false;
}
// Write probability when the probability is a valid probability, which means this node is
// terminal.
if (probability != NOT_A_PROBABILITY) {
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite,
probability, writingPos)) {
return false;
}
}
// Write children position
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite,
childrenPos, writingPos)) {
return false;
}
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
if (originalShortcutListPos != NOT_A_DICT_POS) {
int fromPos = originalShortcutListPos;
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos,
writingPos)) {
return false;
}
}
// Copy bigram list when the originalBigramListPos is valid dictionary position.
int bigramCount = 0;
if (originalBigramListPos != NOT_A_DICT_POS) {
int fromPos = originalBigramListPos;
if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) {
return false;
}
}
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
probability != NOT_A_PROBABILITY /* isTerminal */,
originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
}
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
BufferWithExtendableBuffer *const bufferToWrite, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos) {
return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */,
false /* isNotAWord */, parentPos, codePoints, codePointCount, probability,
NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
}
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
BufferWithExtendableBuffer *const bufferToWrite,
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos) {
return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
originalNode->getChildrenPos(), originalNode->getBigramsPos(),
originalNode->getShortcutPos(), writingPos);
}
bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
int *const forwardLinkFieldPos) {
const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
newPtNodeArrayPos, forwardLinkFieldPos)) {
return false;
}
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
probability);
}
bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability,
const int *const codePoints, bool *const outAddedNewUnigram) {
if (originalPtNode->isTerminal()) {
// Overwrites the probability.
*outAddedNewUnigram = false;
const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(),
probability);
int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
probabilityToWrite, &probabilityFieldPos)) {
return false;
}
} else {
// Make the node terminal and write the probability.
*outAddedNewUnigram = true;
int movedPos = mBuffer->getTailPosition();
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
return false;
}
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
&movedPos)) {
return false;
}
}
return true;
}
bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
const int *const codePoints, const int codePointCount) {
const int newPtNodeArrayPos = mBuffer->getTailPosition();
int childrenPosFieldPos = parentNode->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
newPtNodeArrayPos, &childrenPosFieldPos)) {
return false;
}
return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
codePointCount, probability);
}
bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
const int probability) {
int writingPos = mBuffer->getTailPosition();
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
1 /* arraySize */, &writingPos)) {
return false;
}
if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount,
probability, &writingPos)) {
return false;
}
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
return false;
}
return true;
}
// Returns whether the dictionary updating was succeeded or not.
bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount) {
// When addsExtraChild is true, split the reallocating PtNode and add new child.
// Reallocating PtNode: abcde, newNode: abcxy.
// abc (1st, not terminal) __ de (2nd)
// \_ xy (extra child, terminal)
// Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
// Reallocating PtNode: abcde, newNode: abc.
// abc (1st, terminal) __ de (2nd)
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
int writingPos = firstPartOfReallocatedPtNodePos;
// Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position.
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
&writingPos)) {
return false;
}
const int actualChildrenPos = writingPos;
// Create new children PtNode array.
const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
newPtNodeCount, &writingPos)) {
return false;
}
// Write the 2nd part of the reallocating node.
const int secondPartOfReallocatedPtNodePos = writingPos;
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
firstPartOfReallocatedPtNodePos,
reallocatingPtNodeCodePoints + overlappingCodePointCount,
reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
reallocatingPtNode->getProbability(), &writingPos)) {
return false;
}
if (addsExtraChild) {
if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos,
newNodeCodePoints + overlappingCodePointCount,
newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
&writingPos)) {
return false;
}
}
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
return false;
}
// Update original reallocatingPtNode as moved.
if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
secondPartOfReallocatedPtNodePos)) {
return false;
}
// Load node info. Information of the 1st part will be fetched.
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
// Update children position.
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
actualChildrenPos, &childrenPosFieldPos)) {
return false;
}
return true;
}
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
int *const outUnigramCount, int *const outBigramCount) {
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
headerPolicy, this, mBuffer, mNeedsToDecay);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
}
if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
.getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more unigrams.
}
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
traversePolicyToUpdateBigramProbability(mBigramPolicy);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateBigramProbability)) {
return false;
}
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more bigrams.
}
// Mapping from positions in mBuffer to positions in bufferToWrite.
DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite,
&dictPositionRelocationMap);
if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
&traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
return false;
}
// Create policy instance for the GCed dictionary.
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
mNeedsToDecay);
// Create reading helper for the GCed dictionary.
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
&newDictShortcutPolicy);
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
&dictPositionRelocationMap);
if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
&traversePolicyToUpdateAllPositionFields)) {
return false;
}
*outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
*outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount();
return true;
}
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
const int newProbability) {
if (mNeedsToDecay) {
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
newProbability);
} else {
return newProbability;
}
}
} // namespace latinime