blob: 15050c3343b2f29ad2246f63d23b9dfec346db8f [file]
/* liblouis Braille Translation and Back-Translation Library
Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The
BRLTTY Team
Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com
Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com
This file is part of liblouis.
liblouis is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 2.1 of the License, or
(at your option) any later version.
liblouis is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* @brief Translate from braille
*/
#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "internal.h"
typedef struct {
int size;
widechar **buffers;
int *inUse;
widechar *(*alloc)(int index, int length);
void (*free)(widechar *);
} StringBufferPool;
/* noContractMode states: tracks nocontractsign scope during back-translation */
/* Note that the LAPSED state is necessary as a stop-gap measure to
* approximate seqdelimiter behavior. Without it, begword rules could
* e.g. not fire after a hyphen. In forward translation, the
* equivalent scenario is handled by seqdelimiter (and seqbeforechars,
* seqafterchars and seqafterpattern), but the backward translation
* code does not take into account seqdelimiter yet.
*/
#define NO_CONTRACT_OFF 0 /* not in nocontractsign scope */
#define NO_CONTRACT_ACTIVE 1 /* after nocontractsign, contractions suppressed */
#define NO_CONTRACT_LAPSED 2 /* crossed non-letter, whole-word suppressed */
typedef struct {
int nextUpper;
int allUpper;
int allUpperPhrase;
int itsANumber;
int noContractMode;
formtype
activeWordEmphasis; /* emphasis for current word (cleared at word boundary) */
formtype activePhraseEmphasis; /* emphasis for current phrase (cleared by explicit
end) */
formtype nextCharEmphasis; /* emphasis for next character only (letter indicators) */
} TranslationContext;
static widechar *
allocStringBuffer(int index, int length) {
return _lou_allocMem(alloc_passbuf, index, 0, length);
}
static const StringBufferPool *stringBufferPool = NULL;
static void
initStringBufferPool() {
static widechar *stringBuffers[MAXPASSBUF] = { NULL };
static int stringBuffersInUse[MAXPASSBUF] = { 0 };
StringBufferPool *pool = malloc(sizeof(StringBufferPool));
pool->size = MAXPASSBUF;
pool->buffers = stringBuffers;
pool->inUse = stringBuffersInUse;
pool->alloc = &allocStringBuffer;
pool->free = NULL;
stringBufferPool = pool;
}
static int
getStringBuffer(int length) {
int i;
if (!stringBufferPool) initStringBufferPool();
for (i = 0; i < stringBufferPool->size; i++) {
if (!stringBufferPool->inUse[i]) {
stringBufferPool->buffers[i] = stringBufferPool->alloc(i, length);
stringBufferPool->inUse[i] = 1;
return i;
}
}
_lou_outOfMemory();
return -1;
}
static int
releaseStringBuffer(int idx) {
if (!stringBufferPool) {
_lou_logMessage(LOU_LOG_ERROR,
"Attempt to free string buffer prior to initialization of pool");
return 0;
}
if (idx >= 0 && idx < stringBufferPool->size) {
int inUse = stringBufferPool->inUse[idx];
if (inUse && stringBufferPool->free)
stringBufferPool->free(stringBufferPool->buffers[idx]);
stringBufferPool->inUse[idx] = 0;
return inUse;
}
return 0;
}
typedef struct {
int bufferIndex;
const widechar *chars;
int length;
} InString;
typedef struct {
int bufferIndex;
widechar *chars;
int maxlength;
int length;
} OutString;
typedef struct {
int startMatch;
int startReplace;
int endReplace;
int endMatch;
} PassRuleMatch;
static int
backTranslateString(const TranslationTableHeader *table, int mode, int currentPass,
const InString *input, OutString *output, unsigned char *typebuf, char *spacebuf,
int *posMapping, int *realInlen, int *cursorPosition, int *cursorStatus,
const TranslationTableRule **appliedRules, int *appliedRulesCount,
int maxAppliedRules);
static int
makeCorrections(const TranslationTableHeader *table, int mode, int currentPass,
const InString *input, OutString *output, int *posMapping, int *realInlen,
int *cursorPosition, int *cursorStatus, const TranslationTableRule **appliedRules,
int *appliedRulesCount, int maxAppliedRules);
static int
translatePass(const TranslationTableHeader *table, int mode, int currentPass,
const InString *input, OutString *output, int *posMapping, int *realInlen,
int *cursorPosition, int *cursorStatus, const TranslationTableRule **appliedRules,
int *appliedRulesCount, int maxAppliedRules);
static void
passSelectRule(const TranslationTableHeader *table, int pos, int currentPass,
const InString *input, TranslationTableOpcode *currentOpcode,
const TranslationTableRule **currentRule, const widechar **passInstructions,
int *passIC, PassRuleMatch *match);
int EXPORT_CALL
lou_backTranslateString(const char *tableList, const widechar *inbuf, int *inlen,
widechar *outbuf, int *outlen, formtype *typeform, char *spacing, int modex) {
return lou_backTranslate(tableList, inbuf, inlen, outbuf, outlen, typeform, spacing,
NULL, NULL, NULL, modex);
}
int EXPORT_CALL
lou_backTranslate(const char *tableList, const widechar *inbuf, int *inlen,
widechar *outbuf, int *outlen, formtype *typeform, char *spacing, int *outputPos,
int *inputPos, int *cursorPos, int modex) {
return _lou_backTranslate(tableList, tableList, inbuf, inlen, outbuf, outlen,
typeform, spacing, outputPos, inputPos, cursorPos, modex, NULL, NULL);
}
int EXPORT_CALL
_lou_backTranslate(const char *tableList, const char *displayTableList,
const widechar *inbuf, int *inlen, widechar *outbuf, int *outlen,
formtype *typeform, char *spacing, int *outputPos, int *inputPos, int *cursorPos,
int mode, const TranslationTableRule **rules, int *rulesLen) {
const TranslationTableHeader *table;
const DisplayTableHeader *displayTable;
InString input;
OutString output;
unsigned char *typebuf = NULL;
char *spacebuf;
// posMapping contains position mapping info between the output of the current pass
// and the initial input. It is 1 longer than the (consumed) input. The values are
// monotonically increasing and can range between -1 and the output length. At the end
// the position info is passed to the user as an inputPos and outputPos array.
// inputPos has the length of the final output and has values ranging from 0 to
// inlen-1. outputPos has the length of the (consumed) initial input and has values
// ranging from 0 to outlen-1.
int *posMapping = NULL;
int *posMapping1;
int *posMapping2;
int *posMapping3;
int cursorPosition;
int cursorStatus;
const TranslationTableRule **appliedRules;
int maxAppliedRules;
int appliedRulesCount;
int k;
int idx;
if (tableList == NULL || inbuf == NULL || inlen == NULL || outbuf == NULL ||
outlen == NULL)
return 0;
if (displayTableList == NULL) displayTableList = tableList;
_lou_getTable(tableList, displayTableList, &table, &displayTable);
if (table == NULL) return 0;
if (!_lou_isValidMode(mode))
_lou_logMessage(LOU_LOG_ERROR, "Invalid mode parameter: %d", mode);
if (!stringBufferPool) initStringBufferPool();
for (idx = 0; idx < stringBufferPool->size; idx++) releaseStringBuffer(idx);
{
widechar *passbuf1;
int srcmax;
k = 0;
while (k < *inlen && inbuf[k]) k++;
srcmax = k;
idx = getStringBuffer(srcmax);
passbuf1 = stringBufferPool->buffers[idx];
for (k = 0; k < srcmax; k++)
if ((mode & dotsIO))
if ((mode & ucBrl))
passbuf1[k] = (inbuf[k] & 0xff) | LOU_DOTS;
else
passbuf1[k] = inbuf[k] | LOU_DOTS;
else
passbuf1[k] = _lou_getDotsForChar(inbuf[k], displayTable);
passbuf1[srcmax] = _lou_getDotsForChar(' ', displayTable);
input = (InString){ .chars = passbuf1, .length = srcmax, .bufferIndex = idx };
}
idx = getStringBuffer(*outlen);
output = (OutString){ .chars = stringBufferPool->buffers[idx],
.maxlength = *outlen,
.length = 0,
.bufferIndex = idx };
typebuf = (unsigned char *)typeform;
spacebuf = spacing;
if (outputPos != NULL)
for (k = 0; k < input.length; k++) outputPos[k] = -1;
if (cursorPos != NULL)
cursorPosition = *cursorPos;
else
cursorPosition = -1;
cursorStatus = 0;
if (typebuf != NULL) memset(typebuf, 0, *outlen * sizeof(formtype));
if (spacebuf != NULL) memset(spacebuf, '*', *outlen);
if (!(posMapping1 = _lou_allocMem(alloc_posMapping1, 0, input.length, *outlen)))
return 0;
if (table->numPasses > 1 || table->corrections) {
if (!(posMapping2 = _lou_allocMem(alloc_posMapping2, 0, input.length, *outlen)))
return 0;
if (!(posMapping3 = _lou_allocMem(alloc_posMapping3, 0, input.length, *outlen)))
return 0;
}
appliedRulesCount = 0;
if (rules != NULL && rulesLen != NULL) {
appliedRules = rules;
maxAppliedRules = *rulesLen;
} else {
appliedRules = NULL;
maxAppliedRules = 0;
}
posMapping = posMapping1;
int currentPass = table->numPasses;
int lastPass = table->corrections ? 0 : 1;
int *passPosMapping = posMapping;
while (1) {
int realInlen;
switch (currentPass) {
case 1:
if (!backTranslateString(table, mode, currentPass, &input, &output, typebuf,
spacebuf, passPosMapping, &realInlen, &cursorPosition,
&cursorStatus, appliedRules, &appliedRulesCount, maxAppliedRules))
return 0;
break;
case 0:
if (!makeCorrections(table, mode, currentPass, &input, &output,
passPosMapping, &realInlen, &cursorPosition, &cursorStatus,
appliedRules, &appliedRulesCount, maxAppliedRules))
return 0;
break;
default:
if (!translatePass(table, mode, currentPass, &input, &output, passPosMapping,
&realInlen, &cursorPosition, &cursorStatus, appliedRules,
&appliedRulesCount, maxAppliedRules))
return 0;
break;
}
currentPass--;
passPosMapping[realInlen] = output.length;
if (passPosMapping == posMapping) {
passPosMapping = posMapping2;
if (realInlen < input.length) *inlen = realInlen;
} else {
int *prevPosMapping = posMapping3;
memcpy((int *)prevPosMapping, posMapping, (*inlen + 1) * sizeof(int));
for (k = 0; k <= *inlen; k++) {
if (prevPosMapping[k] < 0)
posMapping[k] = passPosMapping[0];
else if (prevPosMapping[k] < realInlen)
posMapping[k] = passPosMapping[prevPosMapping[k]];
else if (prevPosMapping[k] == realInlen) {
// outputPos is allowed to point to right after the last output
// character if the input character was deleted
if (realInlen < input.length) {
// however if there was back-tracking, we know that this is not
// the case
*inlen = k;
posMapping[k] = output.length;
break;
} else
posMapping[k] = passPosMapping[prevPosMapping[k]];
} else {
// this means there has been back-tracking to a point within a segment
// that was atomic in the previous pass
// it is not clear what should happen in this case
*inlen = k;
posMapping[k] = output.length;
break;
}
}
}
if (currentPass >= lastPass) {
releaseStringBuffer(input.bufferIndex);
input = (InString){ .chars = output.chars,
.length = output.length,
.bufferIndex = output.bufferIndex };
idx = getStringBuffer(*outlen);
output = (OutString){ .chars = stringBufferPool->buffers[idx],
.maxlength = *outlen,
.length = 0,
.bufferIndex = idx };
continue;
}
break;
}
for (k = 0; k < output.length; k++) outbuf[k] = output.chars[k];
*outlen = output.length;
if (inputPos != NULL) {
int inpos = -1;
int outpos = -1;
for (k = 0; k < *inlen; k++)
if (posMapping[k] > outpos) {
while (outpos < posMapping[k]) {
if (outpos >= 0 && outpos < *outlen)
inputPos[outpos] = inpos < 0 ? 0 : inpos;
outpos++;
}
inpos = k;
}
if (outpos < 0) outpos = 0;
while (outpos < *outlen) inputPos[outpos++] = inpos;
}
if (outputPos != NULL) {
for (k = 0; k < *inlen; k++)
if (posMapping[k] < 0)
outputPos[k] = 0;
else if (posMapping[k] > *outlen - 1)
outputPos[k] = *outlen - 1;
else
outputPos[k] = posMapping[k];
}
if (cursorPos != NULL && *cursorPos != -1) {
if (outputPos != NULL)
*cursorPos = outputPos[*cursorPos];
else
*cursorPos = cursorPosition;
}
if (rulesLen != NULL) *rulesLen = appliedRulesCount;
return 1;
}
static TranslationTableCharacter *
getChar(widechar c, const TranslationTableHeader *table) {
static TranslationTableCharacter notFound = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, 32,
0, 0 };
unsigned long int makeHash = _lou_charHash(c);
TranslationTableOffset bucket = table->characters[makeHash];
while (bucket) {
TranslationTableCharacter *character =
(TranslationTableCharacter *)&table->ruleArea[bucket];
if (character->value == c) return character;
bucket = character->next;
}
notFound.value = c;
return &notFound;
}
static TranslationTableCharacter *
getDots(widechar c, const TranslationTableHeader *table) {
static TranslationTableCharacter notFound = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0,
LOU_DOTS, 0, 0 };
unsigned long int makeHash = _lou_charHash(c);
TranslationTableOffset bucket = table->dots[makeHash];
while (bucket) {
TranslationTableCharacter *character =
(TranslationTableCharacter *)&table->ruleArea[bucket];
if (character->value == c) return character;
bucket = character->next;
}
notFound.value = c;
return &notFound;
}
static int
checkDotsAttr(const widechar d, const TranslationTableCharacterAttributes a,
const TranslationTableHeader *table) {
static widechar prevd = 0;
static TranslationTableCharacterAttributes preva = 0;
if (d != prevd) {
preva = (getDots(d, table))->attributes;
prevd = d;
}
return ((preva & a) ? 1 : 0);
}
static int
compareDots(const widechar *address1, const widechar *address2, int count) {
int k;
if (!count) return 0;
for (k = 0; k < count; k++)
if (address1[k] != address2[k]) return 0;
return 1;
}
static void
back_setBefore(const TranslationTableHeader *table, OutString *output,
TranslationTableCharacterAttributes *beforeAttributes) {
widechar before = (output->length == 0) ? ' ' : output->chars[output->length - 1];
*beforeAttributes = (getChar(before, table))->attributes;
}
static void
back_setAfter(int length, const TranslationTableHeader *table, int pos,
const InString *input, TranslationTableCharacterAttributes *afterAttributes) {
widechar after = (pos + length < input->length) ? input->chars[pos + length] : ' ';
*afterAttributes = (getDots(after, table))->attributes;
}
static int
isBegWord(const TranslationTableHeader *table, OutString *output) {
/* See if this is really the beginning of a word. Look at what has
* already been translated. */
int k;
if (output->length == 0) return 1;
for (k = output->length - 1; k >= 0; k--) {
const TranslationTableCharacter *ch = getChar(output->chars[k], table);
if (ch->attributes & CTC_Space) break;
if (ch->attributes & (CTC_Letter | CTC_Digit | CTC_Math | CTC_Sign)) return 0;
}
return 1;
}
static int
isEndWord(const TranslationTableHeader *table, int pos, int mode, const InString *input,
int currentDotslen) {
if (mode & partialTrans) return 0;
/* See if this is really the end of a word. */
int k;
const TranslationTableCharacter *dots;
TranslationTableOffset testRuleOffset;
TranslationTableRule *testRule;
for (k = pos + currentDotslen; k < input->length; k++) {
int postpuncFound = 0;
int TranslationFound = 0;
dots = getDots(input->chars[k], table);
testRuleOffset = dots->otherRules;
if (dots->attributes & CTC_Space) break;
if (dots->attributes & CTC_Letter) return 0;
while (testRuleOffset) {
testRule = (TranslationTableRule *)&table->ruleArea[testRuleOffset];
/* #360: Don't treat begword/midword as definite translations here
* because we don't know whether they apply yet. Subsequent
* input will allow us to determine whether the word continues.
*/
if (testRule->charslen > 1 && testRule->opcode != CTO_BegWord &&
testRule->opcode != CTO_MidWord)
TranslationFound = 1;
if (testRule->opcode == CTO_PostPunc) postpuncFound = 1;
if (testRule->opcode == CTO_Hyphen) return 1;
testRuleOffset = testRule->dotsnext;
}
if (TranslationFound && !postpuncFound) return 0;
}
return 1;
}
static int
findBrailleIndicatorRule(TranslationTableOffset offset,
const TranslationTableHeader *table, int *currentDotslen,
TranslationTableOpcode *currentOpcode, const TranslationTableRule **currentRule) {
if (!offset) return 0;
*currentRule = (TranslationTableRule *)&table->ruleArea[offset];
*currentOpcode = (*currentRule)->opcode;
*currentDotslen = (*currentRule)->dotslen;
return 1;
}
/* Identify which emphasis class and indicator type a emphasis rule belongs to.
* Returns the emphasis class index (0 to MAX_EMPH_CLASSES-1) and sets
* indicatorType to the EmphCodeOffset value. Returns -1 if not found. */
static int
findEmphasisClass(const TranslationTableHeader *table, const TranslationTableRule *rule,
int *indicatorType) {
int i;
/* Calculate rule offset: ruleArea is an array of TranslationTableData (8 bytes each),
* and offsets stored in emphRules are indices into this array */
TranslationTableOffset ruleOffset =
(TranslationTableOffset)(((char *)rule - (char *)&table->ruleArea[0]) /
sizeof(TranslationTableData));
for (i = 0; i < MAX_EMPH_CLASSES; i++) {
if (table->emphClasses[i].value == 0) continue; /* class not defined */
/* Check all indicator types for this emphasis class */
if (table->emphRules[i][letterOffset] == ruleOffset) {
*indicatorType = letterOffset;
return i;
}
if (table->emphRules[i][begWordOffset] == ruleOffset) {
*indicatorType = begWordOffset;
return i;
}
if (table->emphRules[i][endWordOffset] == ruleOffset) {
*indicatorType = endWordOffset;
return i;
}
if (table->emphRules[i][begPhraseOffset] == ruleOffset) {
*indicatorType = begPhraseOffset;
return i;
}
if (table->emphRules[i][endPhraseBeforeOffset] == ruleOffset) {
*indicatorType = endPhraseBeforeOffset;
return i;
}
if (table->emphRules[i][endPhraseAfterOffset] == ruleOffset) {
*indicatorType = endPhraseAfterOffset;
return i;
}
if (table->emphRules[i][begOffset] == ruleOffset) {
*indicatorType = begOffset;
return i;
}
if (table->emphRules[i][endOffset] == ruleOffset) {
*indicatorType = endOffset;
return i;
}
}
return -1;
}
static int
handleMultind(const TranslationTableHeader *table, int *currentDotslen,
TranslationTableOpcode *currentOpcode, const TranslationTableRule **currentRule,
int *doingMultind, const TranslationTableRule *multindRule) {
/* Handle multille braille indicators */
int found = 0;
if (!*doingMultind) return 0;
switch (multindRule->charsdots[multindRule->charslen - *doingMultind]) {
case CTO_CapsLetter: // FIXME: make sure this works
found = findBrailleIndicatorRule(table->emphRules[MAX_EMPH_CLASSES][letterOffset],
table, currentDotslen, currentOpcode, currentRule);
break;
// NOTE: following fixme is based on the names at the time of
// commit f22f91eb510cb4eef33dfb4950a297235dd2f9f1.
// FIXME: the next two opcodes were begcaps/endcaps,
// and they were aliased to opcodes capsword/capswordstop.
// However, the table attributes they use are
// table->beginCapitalSign and table->endCapitalSign.
// These are actually compiled with firstlettercaps/lastlettercaps.
// Which to use here?
case CTO_BegCapsWord:
found = findBrailleIndicatorRule(
table->emphRules[MAX_EMPH_CLASSES][begWordOffset], table, currentDotslen,
currentOpcode, currentRule);
break;
case CTO_EndCapsWord:
found = findBrailleIndicatorRule(
table->emphRules[MAX_EMPH_CLASSES][endWordOffset], table, currentDotslen,
currentOpcode, currentRule);
break;
case CTO_LetterSign:
found = findBrailleIndicatorRule(
table->letterSign, table, currentDotslen, currentOpcode, currentRule);
break;
case CTO_NoContractSign:
found = findBrailleIndicatorRule(
table->noContractSign, table, currentDotslen, currentOpcode, currentRule);
break;
case CTO_NumberSign:
found = findBrailleIndicatorRule(
table->numberSign, table, currentDotslen, currentOpcode, currentRule);
break;
case CTO_NoNumberSign:
found = findBrailleIndicatorRule(
table->noNumberSign, table, currentDotslen, currentOpcode, currentRule);
break;
case CTO_BegComp:
found = findBrailleIndicatorRule(
table->begComp, table, currentDotslen, currentOpcode, currentRule);
break;
case CTO_EndComp:
found = findBrailleIndicatorRule(
table->endComp, table, currentDotslen, currentOpcode, currentRule);
break;
default:
found = 0;
break;
}
(*doingMultind)--;
return found;
}
static int
back_passDoTest(const TranslationTableHeader *table, int pos, const InString *input,
TranslationTableOpcode currentOpcode, const TranslationTableRule *currentRule,
const widechar **passInstructions, int *passIC, PassRuleMatch *match);
static int
back_passDoAction(const TranslationTableHeader *table, int *pos, int mode,
const InString *input, OutString *output, int *posMapping, int *cursorPosition,
int *cursorStatus, TranslationContext *ctx, TranslationTableOpcode currentOpcode,
const TranslationTableRule *currentRule, const widechar *passInstructions,
int passIC, PassRuleMatch match);
static int
findBackPassRule(const TranslationTableHeader *table, int pos, int currentPass,
const InString *input, TranslationTableOpcode *currentOpcode,
const TranslationTableRule **currentRule, const widechar **passInstructions,
int *passIC, PassRuleMatch *match) {
TranslationTableOffset ruleOffset;
ruleOffset = table->backPassRules[currentPass];
while (ruleOffset) {
*currentRule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
*currentOpcode = (*currentRule)->opcode;
switch (*currentOpcode) {
case CTO_Correct:
if (currentPass != 0) goto NEXT_RULE;
break;
case CTO_Context:
if (currentPass != 1) goto NEXT_RULE;
break;
case CTO_Pass2:
if (currentPass != 2) goto NEXT_RULE;
break;
case CTO_Pass3:
if (currentPass != 3) goto NEXT_RULE;
break;
case CTO_Pass4:
if (currentPass != 4) goto NEXT_RULE;
break;
default:
goto NEXT_RULE;
}
if (back_passDoTest(table, pos, input, *currentOpcode, *currentRule,
passInstructions, passIC, match))
return 1;
NEXT_RULE:
ruleOffset = (*currentRule)->dotsnext;
}
return 0;
}
static void
back_selectRule(const TranslationTableHeader *table, int pos, int mode,
const InString *input, OutString *output, const TranslationContext ctx,
int *currentDotslen, TranslationTableOpcode *currentOpcode,
const TranslationTableRule **currentRule, TranslationTableOpcode previousOpcode,
int *doingMultind, const TranslationTableRule **multindRule,
TranslationTableCharacterAttributes beforeAttributes,
const widechar **passInstructions, int *passIC, PassRuleMatch *patternMatch,
int hasTypebuf) {
/* check for valid back-translations */
int length = input->length - pos;
TranslationTableOffset ruleOffset = 0;
static TranslationTableRule pseudoRule = { 0 };
unsigned long int makeHash = 0;
const TranslationTableCharacter *dots = getDots(input->chars[pos], table);
int tryThis;
if (handleMultind(table, currentDotslen, currentOpcode, currentRule, doingMultind,
*multindRule))
return;
for (tryThis = 0; tryThis < 3; tryThis++) {
switch (tryThis) {
case 0:
if (length < 2 || (ctx.itsANumber && (dots->attributes & CTC_LitDigit)))
break;
/* Hash function optimized for backward translation */
makeHash = (unsigned long int)dots->value << 8;
makeHash += (unsigned long int)(getDots(input->chars[pos + 1], table))->value;
makeHash %= HASHNUM;
ruleOffset = table->backRules[makeHash];
break;
case 1:
if (!(length >= 1)) break;
length = 1;
ruleOffset = dots->otherRules;
break;
case 2: /* No rule found */
*currentRule = &pseudoRule;
*currentOpcode = pseudoRule.opcode = CTO_None;
*currentDotslen = pseudoRule.dotslen = 1;
pseudoRule.charsdots[0] = input->chars[pos];
pseudoRule.charslen = 0;
return;
break;
}
while (ruleOffset) {
const widechar *currentDots;
*currentRule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
*currentOpcode = (*currentRule)->opcode;
if (*currentOpcode == CTO_Context) {
currentDots = &(*currentRule)->charsdots[0];
*currentDotslen = (*currentRule)->charslen;
} else {
currentDots = &(*currentRule)->charsdots[(*currentRule)->charslen];
*currentDotslen = (*currentRule)->dotslen;
}
if (((*currentDotslen <= length) &&
compareDots(&input->chars[pos], currentDots, *currentDotslen))) {
TranslationTableCharacterAttributes afterAttributes;
/* check this rule */
back_setAfter(*currentDotslen, table, pos, input, &afterAttributes);
if ((!((*currentRule)->after & ~CTC_EmpMatch) ||
(beforeAttributes & (*currentRule)->after)) &&
(!((*currentRule)->before & ~CTC_EmpMatch) ||
(afterAttributes & (*currentRule)->before))) {
switch (*currentOpcode) { /* check validity of this Translation */
case CTO_Context:
if (back_passDoTest(table, pos, input, *currentOpcode,
*currentRule, passInstructions, passIC, patternMatch))
return;
break;
case CTO_Space:
case CTO_Digit:
case CTO_Letter:
case CTO_UpperCase:
case CTO_LowerCase:
case CTO_Punctuation:
case CTO_Math:
case CTO_Sign:
case CTO_ExactDots:
case CTO_Repeated:
case CTO_Hyphen:
return;
case CTO_LitDigit:
if (ctx.itsANumber) return;
break;
case CTO_CapsLetter:
case CTO_BegCaps:
case CTO_EndCaps:
case CTO_BegCapsWord:
case CTO_EndCapsWord:
case CTO_BegEmph:
case CTO_EndEmph:
case CTO_NumberSign:
case CTO_BegComp:
case CTO_EndComp:
return;
case CTO_EmphLetter:
case CTO_BegEmphWord:
case CTO_EndEmphWord:
case CTO_BegEmphPhrase:
case CTO_EndEmphPhrase:
if (hasTypebuf) return;
break;
case CTO_NoContractSign:
/* This is just a heuristic test. During forward translation, the
nocontractsign is inserted when the following character is a
contraction, so CTC_Letter | CTC_Sign */
if ((afterAttributes & (CTC_Letter | CTC_Sign))) return;
break;
case CTO_LetterSign:
case CTO_NoNumberSign:
/* This is just a heuristic test. During forward translation, the
nonumsign is inserted when in numeric mode and the next
character is not numeric (CTC_Digit | CTC_LitDigit |
CTC_NumericMode | CTC_MidEndNumericMode) */
if (!(beforeAttributes & CTC_Letter) &&
(afterAttributes & (CTC_Letter | CTC_Sign)))
return;
break;
case CTO_MultInd:
*doingMultind = *currentDotslen;
*multindRule = *currentRule;
if (handleMultind(table, currentDotslen, currentOpcode,
currentRule, doingMultind, *multindRule))
return;
break;
case CTO_LargeSign:
return;
case CTO_WholeWord:
if (mode & partialTrans) break;
if (ctx.noContractMode || ctx.itsANumber) break;
if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) &&
((afterAttributes & CTC_Space) ||
isEndWord(table, pos, mode, input,
*currentDotslen)))
return;
break;
case CTO_LowWord:
if (mode & partialTrans) break;
if ((beforeAttributes & CTC_Space) &&
(afterAttributes & CTC_Space) &&
(previousOpcode != CTO_JoinableWord))
return;
break;
case CTO_JoinNum:
case CTO_JoinableWord:
if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) &&
(!(afterAttributes & CTC_Space) || mode & partialTrans))
return;
break;
case CTO_SuffixableWord:
if (ctx.noContractMode || ctx.itsANumber) break;
if (beforeAttributes & (CTC_Space | CTC_Punctuation)) return;
break;
case CTO_PrefixableWord:
if (ctx.noContractMode || ctx.itsANumber) break;
if ((beforeAttributes &
(CTC_Space | CTC_Letter | CTC_Punctuation)) &&
isEndWord(table, pos, mode, input, *currentDotslen))
return;
break;
case CTO_BegWord:
if (ctx.noContractMode == NO_CONTRACT_ACTIVE) break;
if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) &&
(!isEndWord(table, pos, mode, input, *currentDotslen)))
return;
break;
case CTO_BegMidWord:
if ((beforeAttributes &
(CTC_Letter | CTC_Space | CTC_Punctuation)) &&
(!isEndWord(table, pos, mode, input, *currentDotslen)))
return;
break;
case CTO_PartWord:
if (!(beforeAttributes & CTC_LitDigit) &&
(beforeAttributes & CTC_Letter ||
!isEndWord(table, pos, mode, input,
*currentDotslen)))
return;
break;
case CTO_MidWord:
if (beforeAttributes & CTC_Letter &&
!isEndWord(table, pos, mode, input, *currentDotslen))
return;
break;
case CTO_MidEndWord:
if ((beforeAttributes & CTC_Letter)) return;
break;
case CTO_EndWord:
if ((beforeAttributes & CTC_Letter) &&
isEndWord(table, pos, mode, input, *currentDotslen))
return;
break;
case CTO_BegNum:
if (beforeAttributes & (CTC_Space | CTC_Punctuation) &&
(afterAttributes & (CTC_LitDigit | CTC_Sign)))
return;
break;
case CTO_MidNum:
if (beforeAttributes & CTC_Digit &&
afterAttributes & CTC_LitDigit)
return;
break;
case CTO_EndNum:
if (ctx.itsANumber && !(afterAttributes & CTC_LitDigit)) return;
break;
case CTO_DecPoint:
if (afterAttributes & (CTC_Digit | CTC_LitDigit)) return;
break;
case CTO_PrePunc:
if (isBegWord(table, output)) return;
break;
case CTO_PostPunc:
if (isEndWord(table, pos, mode, input, *currentDotslen)) return;
break;
case CTO_Always:
if ((beforeAttributes & CTC_LitDigit) &&
(afterAttributes & CTC_LitDigit) &&
(*currentRule)->charslen > 1)
break;
return;
case CTO_BackMatch: {
widechar *patterns, *pattern;
// if(dontContract || (mode & noContractions))
// break;
// if(checkEmphasisChange(0))
// break;
patterns = (widechar *)&table->ruleArea[(*currentRule)->patterns];
/* check before pattern */
pattern = &patterns[1];
if (!_lou_pattern_check(
input->chars, pos - 1, -1, -1, pattern, table))
break;
/* check after pattern */
pattern = &patterns[patterns[0]];
if (!_lou_pattern_check(input->chars,
pos + (*currentRule)->dotslen, input->length, 1,
pattern, table))
break;
return;
}
default:
break;
}
}
} /* Done with checking this rule */
ruleOffset = (*currentRule)->dotsnext;
}
}
}
static widechar
toLowercase(
const TranslationTableHeader *table, const TranslationTableCharacter *character) {
if (character->mode & CTC_UpperCase) {
const TranslationTableCharacter *c = character;
if (c->basechar) c = (TranslationTableCharacter *)&table->ruleArea[c->basechar];
while (1) {
if ((c->mode & (character->mode & ~CTC_UpperCase)) ==
(character->mode & ~CTC_UpperCase))
return c->value;
if (!c->linked) break;
c = (TranslationTableCharacter *)&table->ruleArea[c->linked];
}
}
return character->value;
}
static widechar
toUppercase(
const TranslationTableHeader *table, const TranslationTableCharacter *character) {
const TranslationTableCharacter *c = character;
if (c->basechar) c = (TranslationTableCharacter *)&table->ruleArea[c->basechar];
while (c->linked) {
c = (TranslationTableCharacter *)&table->ruleArea[c->linked];
if ((c->mode & (character->mode | CTC_UpperCase)) ==
(character->mode | CTC_UpperCase))
return c->value;
}
return character->value;
}
static int
putchars(const widechar *chars, int count, const TranslationTableHeader *table,
OutString *output, TranslationContext *ctx) {
int k = 0;
if (!count || (output->length + count) > output->maxlength) return 0;
if (ctx->nextUpper) {
output->chars[(output->length)++] =
toUppercase(table, getChar(chars[k++], table));
ctx->nextUpper = 0;
}
if (!ctx->allUpper && !ctx->allUpperPhrase) {
memcpy(&output->chars[output->length], &chars[k], CHARSIZE * (count - k));
output->length += count - k;
} else
for (; k < count; k++) {
const TranslationTableCharacter *c = getChar(chars[k], table);
/* In capsword mode, a non-letter non-capsmode character terminates
* the capsword. This is needed here because multi-character rules
* (e.g. endword 's) output multiple chars in one putchars call,
* bypassing the main loop's per-iteration termination
* check. Capsphrase is not affected. */
if (ctx->allUpper == 2 && !ctx->allUpperPhrase &&
!(c->attributes & CTC_Letter) && !(c->attributes & CTC_CapsMode)) {
ctx->allUpper = 0;
memcpy(&output->chars[output->length], &chars[k], CHARSIZE * (count - k));
output->length += count - k;
return 1;
}
output->chars[(output->length)++] = toUppercase(table, c);
}
return 1;
}
static int
back_updatePositions(const widechar *outChars, int inLength, int outLength,
const TranslationTableHeader *table, int pos, const InString *input,
OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus,
TranslationContext *ctx) {
int k;
if ((output->length + outLength) > output->maxlength ||
(pos + inLength) > input->length)
return 0;
if (!*cursorStatus && *cursorPosition >= pos && *cursorPosition < (pos + inLength)) {
*cursorPosition = output->length + outLength / 2;
*cursorStatus = 1;
}
for (k = 0; k < inLength; k++) posMapping[pos + k] = output->length;
return putchars(outChars, outLength, table, output, ctx);
}
static int
undefinedDots(widechar dots, int mode, OutString *output, int pos, int *posMapping) {
posMapping[pos] = output->length;
if (mode & noUndefined) return 1;
/* Print out dot numbers */
const char *buffer = _lou_unknownDots(dots);
size_t buflen = strlen(buffer);
if ((output->length + buflen) > output->maxlength) return 0;
for (unsigned int k = 0; k < buflen; k += 1) {
output->chars[output->length++] = buffer[k];
}
return 1;
}
static int
putCharacter(widechar dots, const TranslationTableHeader *table, int pos, int mode,
const InString *input, OutString *output, int *posMapping, int *cursorPosition,
int *cursorStatus, TranslationContext *ctx) {
/* Output character(s) corresponding to a Unicode braille Character */
TranslationTableOffset offset = (getDots(dots, table))->definitionRule;
if (offset) {
const TranslationTableRule *rule =
(TranslationTableRule *)&table->ruleArea[offset];
return back_updatePositions(&rule->charsdots[0], rule->dotslen, rule->charslen,
table, pos, input, output, posMapping, cursorPosition, cursorStatus, ctx);
}
return undefinedDots(dots, mode, output, pos, posMapping);
}
static int
putCharacters(const widechar *characters, int count, const TranslationTableHeader *table,
int pos, int mode, const InString *input, OutString *output, int *posMapping,
int *cursorPosition, int *cursorStatus, TranslationContext *ctx) {
int k;
for (k = 0; k < count; k++)
if (!putCharacter(characters[k], table, pos, mode, input, output, posMapping,
cursorPosition, cursorStatus, ctx))
return 0;
return 1;
}
static int
insertSpace(const TranslationTableHeader *table, int pos, const InString *input,
OutString *output, char *spacebuf, int *posMapping, int *cursorPosition,
int *cursorStatus, TranslationContext *ctx) {
widechar c = ' ';
if (!back_updatePositions(&c, 1, 1, table, pos, input, output, posMapping,
cursorPosition, cursorStatus, ctx))
return 0;
if (spacebuf) spacebuf[output->length - 1] = '1';
return 1;
}
static int
compareChars(const widechar *address1, const widechar *address2, int count,
const TranslationTableHeader *table) {
int k;
if (!count) return 0;
for (k = 0; k < count; k++)
if (toLowercase(table, getChar(address1[k], table)) !=
toLowercase(table, getChar(address2[k], table)))
return 0;
return 1;
}
static int
makeCorrections(const TranslationTableHeader *table, int mode, int currentPass,
const InString *input, OutString *output, int *posMapping, int *realInlen,
int *cursorPosition, int *cursorStatus, const TranslationTableRule **appliedRules,
int *appliedRulesCount, int maxAppliedRules) {
int pos;
int posIncremented = 1;
TranslationContext ctx = { .nextUpper = 0,
.allUpper = 0,
.allUpperPhrase = 0,
.itsANumber = 0,
.noContractMode = NO_CONTRACT_OFF };
if (!table->corrections) return 1;
pos = 0;
output->length = 0;
_lou_resetPassVariables();
while (pos < input->length) {
int posBefore = pos;
TranslationTableOpcode currentOpcode;
const TranslationTableRule *currentRule; /* pointer to current rule in table */
const widechar *passInstructions;
int passIC; /* Instruction counter */
PassRuleMatch patternMatch;
int length = input->length - pos;
const TranslationTableCharacter *character = getChar(input->chars[pos], table);
const TranslationTableCharacter *character2;
int tryThis = 0;
if (!(posIncremented &&
findBackPassRule(table, pos, currentPass, input, &currentOpcode,
&currentRule, &passInstructions, &passIC, &patternMatch)))
while (tryThis < 3) {
TranslationTableOffset ruleOffset = 0;
unsigned long int makeHash = 0;
switch (tryThis) {
case 0:
if (!(length >= 2)) break;
makeHash = (unsigned long int)toLowercase(table, character) << 8;
character2 = getChar(input->chars[pos + 1], table);
makeHash += (unsigned long int)toLowercase(table, character2);
makeHash %= HASHNUM;
ruleOffset = table->backRules[makeHash];
break;
case 1:
if (!(length >= 1)) break;
length = 1;
ruleOffset = character->otherRules;
break;
case 2: /* No rule found */
currentOpcode = CTO_Always;
ruleOffset = 0;
break;
}
while (ruleOffset) {
currentRule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
currentOpcode = currentRule->opcode;
int currentCharslen = currentRule->charslen;
if (tryThis == 1 ||
(currentCharslen <= length &&
compareChars(&currentRule->charsdots[0],
&input->chars[pos], currentCharslen,
table))) {
if (currentOpcode == CTO_Correct &&
back_passDoTest(table, pos, input, currentOpcode,
currentRule, &passInstructions, &passIC,
&patternMatch)) {
tryThis = 4;
break;
}
}
ruleOffset = currentRule->dotsnext;
}
tryThis++;
}
switch (currentOpcode) {
case CTO_Always:
if (output->length >= output->maxlength) goto failure;
posMapping[pos] = output->length;
output->chars[(output->length)++] = input->chars[pos++];
break;
case CTO_Correct:
if (appliedRules != NULL && *appliedRulesCount < maxAppliedRules)
appliedRules[(*appliedRulesCount)++] = currentRule;
if (!back_passDoAction(table, &pos, mode, input, output, posMapping,
cursorPosition, cursorStatus, &ctx, currentOpcode, currentRule,
passInstructions, passIC, patternMatch))
goto failure;
break;
default:
break;
}
posIncremented = pos > posBefore;
}
failure:
*realInlen = pos;
return 1;
}
static int
backTranslateString(const TranslationTableHeader *table, int mode, int currentPass,
const InString *input, OutString *output, unsigned char *typebuf, char *spacebuf,
int *posMapping, int *realInlen, int *cursorPosition, int *cursorStatus,
const TranslationTableRule **appliedRules, int *appliedRulesCount,
int maxAppliedRules) {
int pos;
TranslationContext ctx = { .nextUpper = 0,
.allUpper = 0,
.allUpperPhrase = 0,
.itsANumber = 0,
.noContractMode = NO_CONTRACT_OFF,
.activeWordEmphasis = 0,
.activePhraseEmphasis = 0,
.nextCharEmphasis = 0 };
/* Back translation */
int srcword = 0;
int destword = 0; /* last word translated */
TranslationTableOpcode previousOpcode;
int doingMultind = 0;
const TranslationTableRule *multindRule;
_lou_resetPassVariables();
translation_direction = 0;
previousOpcode = CTO_None;
pos = output->length = 0;
while (pos < input->length) {
/* the main translation loop */
int currentDotslen; /* length of current find string */
TranslationTableOpcode currentOpcode;
const TranslationTableRule *currentRule; /* pointer to current rule in table */
TranslationTableCharacterAttributes beforeAttributes;
const widechar *passInstructions;
int passIC; /* Instruction counter */
PassRuleMatch patternMatch;
int prevOutputLength = output->length; /* track output for typeform population */
back_setBefore(table, output, &beforeAttributes);
if ((ctx.allUpper == 1) && (beforeAttributes & CTC_UpperCase))
// Capsword in progress
ctx.allUpper = 2;
else if ((ctx.allUpper == 2) && !(beforeAttributes & CTC_UpperCase) &&
!(beforeAttributes & CTC_CapsMode))
// terminate capsword
ctx.allUpper = 0;
if ((ctx.itsANumber == 2) && output->length > 0 &&
!(beforeAttributes & CTC_LitDigit) &&
!(beforeAttributes & CTC_NumericMode) &&
!(beforeAttributes & CTC_MidEndNumericMode))
ctx.itsANumber = 0;
back_selectRule(table, pos, mode, input, output, ctx, &currentDotslen,
&currentOpcode, &currentRule, previousOpcode, &doingMultind, &multindRule,
beforeAttributes, &passInstructions, &passIC, &patternMatch,
typebuf != NULL);
if (appliedRules != NULL && *appliedRulesCount < maxAppliedRules)
appliedRules[(*appliedRulesCount)++] = currentRule;
/* processing before replacement */
switch (currentOpcode) {
case CTO_LargeSign:
if (previousOpcode == CTO_LargeSign)
if (!insertSpace(table, pos, input, output, spacebuf, posMapping,
cursorPosition, cursorStatus, &ctx))
goto failure;
break;
case CTO_CapsLetter:
ctx.nextUpper = 1;
ctx.allUpper = 0;
ctx.itsANumber = 0;
while (currentDotslen-- > 0) posMapping[pos++] = output->length;
continue;
break;
case CTO_BegCapsWord:
ctx.allUpper = 1;
ctx.itsANumber = 0;
while (currentDotslen-- > 0) posMapping[pos++] = output->length;
continue;
break;
case CTO_BegCaps:
ctx.allUpperPhrase = 1;
ctx.itsANumber = 0;
while (currentDotslen-- > 0) posMapping[pos++] = output->length;
continue;
break;
case CTO_EndCapsWord:
ctx.allUpper = 0;
ctx.itsANumber = 0;
while (currentDotslen-- > 0) posMapping[pos++] = output->length;
continue;
break;
case CTO_EndCaps:
ctx.allUpperPhrase = 0;
ctx.itsANumber = 0;
while (currentDotslen-- > 0) posMapping[pos++] = output->length;
continue;
break;
case CTO_LetterSign:
case CTO_NoNumberSign:
case CTO_NoContractSign:
ctx.noContractMode = NO_CONTRACT_ACTIVE;
ctx.itsANumber = 0;
while (currentDotslen-- > 0) posMapping[pos++] = output->length;
continue;
break;
case CTO_NumberSign:
ctx.itsANumber = 1; // Starting number
ctx.allUpper = 0;
while (currentDotslen-- > 0) posMapping[pos++] = output->length;
continue;
break;
case CTO_LitDigit:
ctx.itsANumber = 2; // In the middle of a number
break;
case CTO_BegComp:
ctx.itsANumber = 0;
while (currentDotslen-- > 0) posMapping[pos++] = output->length;
continue;
break;
case CTO_EndComp:
while (currentDotslen-- > 0) posMapping[pos++] = output->length;
continue;
break;
case CTO_EmphLetter:
case CTO_BegEmphWord:
case CTO_EndEmphWord:
case CTO_BegEmph:
case CTO_EndEmph:
case CTO_BegEmphPhrase:
case CTO_EndEmphPhrase: {
int indicatorType = -1;
int emphClass = findEmphasisClass(table, currentRule, &indicatorType);
if (emphClass >= 0) {
formtype emphBit = table->emphClasses[emphClass].typeform;
switch (currentOpcode) {
case CTO_EmphLetter:
/* Single letter emphasis - applies to next character only */
ctx.nextCharEmphasis |= emphBit;
break;
case CTO_BegEmphWord:
/* Begin word emphasis */
ctx.activeWordEmphasis |= emphBit;
break;
case CTO_EndEmphWord:
/* End word emphasis. In UEB, endemphword and endemphphrase
* share the same dot pattern (e.g., 45-3 for bold). If we
* match endemphword but there's no active word emphasis for
* this class, check if there's active phrase emphasis and
* end that instead - this handles the ambiguity correctly. */
if (ctx.activeWordEmphasis & emphBit) {
ctx.activeWordEmphasis &= ~emphBit;
} else if (ctx.activePhraseEmphasis & emphBit) {
ctx.activePhraseEmphasis &= ~emphBit;
}
break;
case CTO_BegEmphPhrase:
/* Begin phrase emphasis */
ctx.activePhraseEmphasis |= emphBit;
break;
case CTO_EndEmphPhrase:
/* End phrase emphasis */
ctx.activePhraseEmphasis &= ~emphBit;
break;
case CTO_BegEmph:
/* Generic begin (firstletter style) - treat as word */
ctx.activeWordEmphasis |= emphBit;
break;
case CTO_EndEmph:
/* Generic end (lastletter style). Like CTO_EndEmphWord,
* handle the ambiguity when endemph could mean either
* word or phrase termination. */
if (ctx.activeWordEmphasis & emphBit) {
ctx.activeWordEmphasis &= ~emphBit;
} else if (ctx.activePhraseEmphasis & emphBit) {
ctx.activePhraseEmphasis &= ~emphBit;
}
break;
default:
break;
}
}
while (currentDotslen-- > 0) posMapping[pos++] = output->length;
continue;
break;
}
default:
break;
}
/* replacement processing */
switch (currentOpcode) {
case CTO_Context:
if (!back_passDoAction(table, &pos, mode, input, output, posMapping,
cursorPosition, cursorStatus, &ctx, currentOpcode, currentRule,
passInstructions, passIC, patternMatch))
return 0;
break;
case CTO_None:
if (!undefinedDots(input->chars[pos], mode, output, pos, posMapping))
goto failure;
pos++;
break;
case CTO_BegNum:
ctx.itsANumber = 1;
goto insertChars;
case CTO_EndNum:
ctx.itsANumber = 0;
goto insertChars;
case CTO_Space:
ctx.noContractMode = NO_CONTRACT_OFF;
ctx.itsANumber = ctx.allUpper = ctx.nextUpper = 0;
goto insertChars;
default:
insertChars:
if (ctx.noContractMode == NO_CONTRACT_ACTIVE && currentOpcode != CTO_Letter &&
currentOpcode != CTO_UpperCase && currentOpcode != CTO_LowerCase)
ctx.noContractMode = NO_CONTRACT_LAPSED;
if (currentRule->charslen) {
if (!back_updatePositions(&currentRule->charsdots[0],
currentRule->dotslen, currentRule->charslen, table, pos,
input, output, posMapping, cursorPosition, cursorStatus,
&ctx))
goto failure;
pos += currentDotslen;
} else {
int srclim = pos + currentDotslen;
while (1) {
if (!putCharacter(input->chars[pos], table, pos, mode, input, output,
posMapping, cursorPosition, cursorStatus, &ctx))
goto failure;
if (++pos == srclim) break;
}
}
}
/* Save opcode before passSelectRule may overwrite it.
* When processing space characters, passSelectRule() in the default
* switch case could overwrite currentOpcode before the word-emphasis
* clearing check. This would cause word emphasis to persist across
* space boundaries, affecting all subsequent text instead of just
* the emphasized word. */
TranslationTableOpcode opcodeForEmphasis = currentOpcode;
/* processing after replacement */
switch (currentOpcode) {
case CTO_JoinNum:
case CTO_JoinableWord:
if (!insertSpace(table, pos, input, output, spacebuf, posMapping,
cursorPosition, cursorStatus, &ctx))
goto failure;
break;
default:
passSelectRule(table, pos, currentPass, input, &currentOpcode, &currentRule,
&passInstructions, &passIC, &patternMatch);
if (currentOpcode == CTO_Context) {
if (appliedRules != NULL && *appliedRulesCount < maxAppliedRules)
appliedRules[(*appliedRulesCount)++] = currentRule;
back_passDoAction(table, &pos, mode, input, output, posMapping,
cursorPosition, cursorStatus, &ctx, currentOpcode, currentRule,
passInstructions, passIC, patternMatch);
}
break;
}
/* Populate typeform buffer for newly output characters */
if (typebuf != NULL && output->length > prevOutputLength) {
formtype currentEmphasis = ctx.activePhraseEmphasis | ctx.activeWordEmphasis |
ctx.nextCharEmphasis;
formtype *typeformBuf = (formtype *)typebuf;
int k;
for (k = prevOutputLength; k < output->length; k++) {
typeformBuf[k] = currentEmphasis;
}
/* Clear single-character emphasis after it's been applied */
ctx.nextCharEmphasis = 0;
}
/* Clear word-level emphasis at word boundaries.
* NOTE: This currently only checks for spaces. The forward translator also
* consults emphmodechars to determine which non-letter characters cancel
* word emphasis (see resetsEmphMode() in lou_translateString.c). Tables
* that declare emphmodechars (e.g. Dutch, German) may produce incorrect
* typeforms here. */
if (opcodeForEmphasis == CTO_Space) {
ctx.activeWordEmphasis = 0;
}
if (((pos > 0) && checkDotsAttr(input->chars[pos - 1], CTC_Space, table) &&
(currentOpcode != CTO_JoinableWord))) {
srcword = pos;
destword = output->length;
}
if ((currentOpcode >= CTO_Always && currentOpcode <= CTO_None) ||
(currentOpcode >= CTO_Digit && currentOpcode <= CTO_LitDigit))
previousOpcode = currentOpcode;
} /* end of translation loop */
failure:
if (destword != 0 && pos < input->length &&
!checkDotsAttr(input->chars[pos], CTC_Space, table)) {
pos = srcword;
output->length = destword;
}
if (pos < input->length) {
while (checkDotsAttr(input->chars[pos], CTC_Space, table))
if (++pos == input->length) break;
}
*realInlen = pos;
return 1;
} /* translation completed */
/* Multipass translation */
static int
matchCurrentInput(
const InString *input, int pos, const widechar *passInstructions, int passIC) {
int k;
int kk = pos;
for (k = passIC + 2; k < passIC + 2 + passInstructions[passIC + 1]; k++)
if (passInstructions[k] != input->chars[kk++]) return 0;
return 1;
}
static int
back_swapTest(const TranslationTableHeader *table, const InString *input, int *pos,
const widechar *passInstructions, int passIC) {
int curLen;
int curTest;
int curSrc = *pos;
TranslationTableOffset swapRuleOffset;
TranslationTableRule *swapRule;
swapRuleOffset = (passInstructions[passIC + 1] << 16) | passInstructions[passIC + 2];
swapRule = (TranslationTableRule *)&table->ruleArea[swapRuleOffset];
for (curLen = 0; curLen < passInstructions[passIC] + 3; curLen++) {
for (curTest = 0; curTest < swapRule->charslen; curTest++) {
if (input->chars[curSrc] == swapRule->charsdots[curTest]) break;
}
if (curTest == swapRule->charslen) return 0;
curSrc++;
}
if (passInstructions[passIC + 2] == passInstructions[passIC + 3]) {
*pos = curSrc;
return 1;
}
while (curLen < passInstructions[passIC + 4]) {
for (curTest = 0; curTest < swapRule->charslen; curTest++) {
if (input->chars[curSrc] != swapRule->charsdots[curTest]) break;
}
if (curTest < swapRule->charslen) {
*pos = curSrc;
return 1;
}
curSrc++;
curLen++;
}
*pos = curSrc;
return 1;
}
static int
back_swapReplace(int start, int end, const TranslationTableHeader *table,
const InString *input, OutString *output, int *posMapping,
const widechar *passInstructions, int passIC) {
TranslationTableOffset swapRuleOffset;
TranslationTableRule *swapRule;
widechar *replacements;
int p;
int lastPos = 0;
int lastRep = 0;
swapRuleOffset = (passInstructions[passIC + 1] << 16) | passInstructions[passIC + 2];
swapRule = (TranslationTableRule *)&table->ruleArea[swapRuleOffset];
replacements = &swapRule->charsdots[swapRule->charslen];
for (p = start; p < end; p++) {
int rep;
int test;
int k;
for (test = 0; test < swapRule->charslen; test++)
if (input->chars[p] == swapRule->charsdots[test]) break;
if (test == swapRule->charslen) return p;
if (test >= lastRep) {
k = lastPos;
rep = lastRep;
} else {
k = 0;
rep = 0;
}
while (k < swapRule->dotslen) {
if (rep == test) {
int l = replacements[k] - 1;
if (output->length + l >= output->maxlength) return 0;
posMapping[p] = output->length;
memcpy(&output->chars[output->length], &replacements[k + 1],
l * CHARSIZE);
output->length += l;
lastPos = k;
lastRep = rep;
break;
}
rep++;
k += replacements[k];
}
}
return p;
}
static int
back_passDoTest(const TranslationTableHeader *table, int pos, const InString *input,
TranslationTableOpcode currentOpcode, const TranslationTableRule *currentRule,
const widechar **passInstructions, int *passIC, PassRuleMatch *match) {
int k;
int m;
int notOperator = 0;
TranslationTableCharacterAttributes attributes;
*passInstructions = &currentRule->charsdots[currentRule->charslen];
*passIC = 0;
match->startMatch = match->endMatch = pos;
match->startReplace = -1;
if (currentOpcode == CTO_Correct)
m = 0;
else
m = 1;
while (*passIC < currentRule->dotslen) {
int itsTrue = 1;
if (pos > input->length) return 0;
switch ((*passInstructions)[*passIC]) {
case pass_first:
if (pos != 0) itsTrue = 0;
(*passIC)++;
break;
case pass_last:
if (pos != input->length) itsTrue = 0;
(*passIC)++;
break;
case pass_lookback:
pos -= (*passInstructions)[*passIC + 1];
if (pos < 0) {
pos = 0;
itsTrue = 0;
}
*passIC += 2;
break;
case pass_not:
notOperator = !notOperator;
(*passIC)++;
continue;
case pass_string:
case pass_dots:
itsTrue = matchCurrentInput(input, pos, *passInstructions, *passIC);
pos += (*passInstructions)[*passIC + 1];
*passIC += (*passInstructions)[*passIC + 1] + 2;
break;
case pass_startReplace:
match->startReplace = pos;
(*passIC)++;
break;
case pass_endReplace:
match->endReplace = pos;
(*passIC)++;
break;
case pass_attributes:
attributes = (*passInstructions)[*passIC + 1];
attributes <<= 16;
attributes |= (*passInstructions)[*passIC + 2];
attributes <<= 16;
attributes |= (*passInstructions)[*passIC + 3];
attributes <<= 16;
attributes |= (*passInstructions)[*passIC + 4];
for (k = 0; k < (*passInstructions)[*passIC + 5]; k++) {
if (pos >= input->length) {
itsTrue = 0;
break;
}
if (!((m ? getDots(input->chars[pos], table)
: getChar(input->chars[pos], table))
->attributes &
attributes)) {
itsTrue = 0;
break;
}
pos++;
}
if (itsTrue) {
for (k = (*passInstructions)[*passIC + 5];
k < (*passInstructions)[*passIC + 6] && pos < input->length;
k++) {
if (!((m ? getDots(input->chars[pos], table)
: getChar(input->chars[pos], table))
->attributes &
attributes))
break;
pos++;
}
}
*passIC += 7;
break;
case pass_swap:
itsTrue = back_swapTest(table, input, &pos, *passInstructions, *passIC);
*passIC += 5;
break;
case pass_endTest: {
(*passIC)++;
match->endMatch = pos;
if (match->startReplace == -1) {
match->startReplace = match->startMatch;
match->endReplace = match->endMatch;
}
return 1;
break;
}
default:
if (_lou_handlePassVariableTest(*passInstructions, passIC, &itsTrue)) break;
return 0;
}
if ((!notOperator && !itsTrue) || (notOperator && itsTrue)) return 0;
notOperator = 0;
}
return 0;
}
static int
copyCharacters(int from, int to, const TranslationTableHeader *table, int mode,
const InString *input, OutString *output, int *posMapping, int *cursorPosition,
int *cursorStatus, TranslationContext *ctx,
TranslationTableOpcode currentOpcode) {
if (currentOpcode == CTO_Context) {
while (from < to) {
if (!putCharacter(input->chars[from], table, from, mode, input, output,
posMapping, cursorPosition, cursorStatus, ctx))
return 0;
from++;
}
} else {
if (to > from) {
if ((output->length + to - from) > output->maxlength) return 0;
while (to > from) {
posMapping[from] = output->length;
output->chars[output->length] = input->chars[from];
output->length++;
from++;
}
}
}
return 1;
}
static int
back_passDoAction(const TranslationTableHeader *table, int *pos, int mode,
const InString *input, OutString *output, int *posMapping, int *cursorPosition,
int *cursorStatus, TranslationContext *ctx, TranslationTableOpcode currentOpcode,
const TranslationTableRule *currentRule, const widechar *passInstructions,
int passIC, PassRuleMatch match) {
int k;
int destStartMatch = output->length;
int destStartReplace;
int newPos = match.endReplace;
if (!copyCharacters(match.startMatch, match.startReplace, table, mode, input, output,
posMapping, cursorPosition, cursorStatus, ctx, currentOpcode))
return 0;
destStartReplace = output->length;
for (k = match.startReplace; k < match.endReplace; k++)
posMapping[k] = output->length;
while (passIC < currentRule->dotslen) switch (passInstructions[passIC]) {
case pass_string:
case pass_dots:
if ((output->length + passInstructions[passIC + 1]) > output->maxlength)
return 0;
memcpy(&output->chars[output->length], &passInstructions[passIC + 2],
passInstructions[passIC + 1] * sizeof(*output->chars));
output->length += passInstructions[passIC + 1];
passIC += passInstructions[passIC + 1] + 2;
break;
case pass_swap:
if (!back_swapReplace(match.startReplace, match.endReplace, table, input,
output, posMapping, passInstructions, passIC))
return 0;
passIC += 3;
break;
case pass_omit:
passIC++;
break;
case pass_copy: {
int count = destStartReplace - destStartMatch;
if (count > 0) {
memmove(&output->chars[destStartMatch], &output->chars[destStartReplace],
count * sizeof(*output->chars));
output->length -= count;
destStartReplace = destStartMatch;
}
}
if (!copyCharacters(match.startReplace, match.endReplace, table, mode, input,
output, posMapping, cursorPosition, cursorStatus, ctx,
currentOpcode))
return 0;
newPos = match.endMatch;
passIC++;
break;
default:
if (_lou_handlePassVariableAction(passInstructions, &passIC)) break;
return 0;
}
*pos = newPos;
return 1;
}
static void
passSelectRule(const TranslationTableHeader *table, int pos, int currentPass,
const InString *input, TranslationTableOpcode *currentOpcode,
const TranslationTableRule **currentRule, const widechar **passInstructions,
int *passIC, PassRuleMatch *match) {
if (!findBackPassRule(table, pos, currentPass, input, currentOpcode, currentRule,
passInstructions, passIC, match)) {
*currentOpcode = CTO_Always;
}
}
static int
translatePass(const TranslationTableHeader *table, int mode, int currentPass,
const InString *input, OutString *output, int *posMapping, int *realInlen,
int *cursorPosition, int *cursorStatus, const TranslationTableRule **appliedRules,
int *appliedRulesCount, int maxAppliedRules) {
int pos;
int posIncremented = 1;
TranslationContext ctx = { .nextUpper = 0,
.allUpper = 0,
.allUpperPhrase = 0,
.itsANumber = 0,
.noContractMode = NO_CONTRACT_OFF };
pos = output->length = 0;
_lou_resetPassVariables();
while (pos < input->length) { /* the main multipass translation loop */
int posBefore = pos;
TranslationTableOpcode currentOpcode;
const TranslationTableRule *currentRule; /* pointer to current rule in table */
const widechar *passInstructions;
int passIC; /* Instruction counter */
PassRuleMatch patternMatch;
if (!posIncremented)
currentOpcode = CTO_Always;
else
passSelectRule(table, pos, currentPass, input, &currentOpcode, &currentRule,
&passInstructions, &passIC, &patternMatch);
switch (currentOpcode) {
case CTO_Pass2:
case CTO_Pass3:
case CTO_Pass4:
if (appliedRules != NULL && *appliedRulesCount < maxAppliedRules)
appliedRules[(*appliedRulesCount)++] = currentRule;
if (!back_passDoAction(table, &pos, mode, input, output, posMapping,
cursorPosition, cursorStatus, &ctx, currentOpcode, currentRule,
passInstructions, passIC, patternMatch))
goto failure;
break;
case CTO_Always:
if ((output->length + 1) > output->maxlength) goto failure;
posMapping[pos] = output->length;
output->chars[(output->length)++] = input->chars[pos++];
break;
default:
goto failure;
}
posIncremented = pos > posBefore;
}
failure:
if (pos < input->length) {
while (checkDotsAttr(input->chars[pos], CTC_Space, table))
if (++pos == input->length) break;
}
*realInlen = pos;
return 1;
}