| /* liblouis Braille Translation and Back-Translation Library |
| |
| Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The |
| BRLTTY Team |
| |
| Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com |
| Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com |
| Copyright (C) 2016 Mike Gray, American Printing House for the Blind |
| Copyright (C) 2016 Davy Kager, Dedicon |
| |
| This file is part of liblouis. |
| |
| liblouis is free software: you can redistribute it and/or modify it |
| under the terms of the GNU Lesser General Public License as published |
| by the Free Software Foundation, either version 2.1 of the License, or |
| (at your option) any later version. |
| |
| liblouis is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Lesser General Public License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public |
| License along with liblouis. If not, see <http://www.gnu.org/licenses/>. |
| */ |
| |
| /** |
| * @file |
| * @brief Read and compile translation tables |
| */ |
| |
| #include <stddef.h> |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <stdarg.h> |
| #include <string.h> |
| #include <ctype.h> |
| #include <sys/stat.h> |
| |
| #include "internal.h" |
| #include "config.h" |
| |
| #define QUOTESUB 28 /* Stand-in for double quotes in strings */ |
| |
| /* needed to make debuggin easier */ |
| #ifdef DEBUG |
| wchar_t wchar; |
| #endif |
| |
| /* The following variables and functions make it possible to specify the |
| * path on which all tables for liblouis and all files for liblouisutdml, |
| * in their proper directories, will be found. |
| */ |
| |
| static char *dataPathPtr; |
| |
| char *EXPORT_CALL |
| lou_setDataPath(const char *path) { |
| _lou_logMessage(LOU_LOG_WARN, "warning: lou_setDataPath is deprecated."); |
| static char dataPath[MAXSTRING]; |
| dataPathPtr = NULL; |
| if (path == NULL || strlen(path) >= MAXSTRING) return NULL; |
| strcpy(dataPath, path); |
| dataPathPtr = dataPath; |
| return dataPathPtr; |
| } |
| |
| char *EXPORT_CALL |
| lou_getDataPath(void) { |
| _lou_logMessage(LOU_LOG_WARN, "warning: lou_getDataPath is deprecated."); |
| return dataPathPtr; |
| } |
| |
| /* End of dataPath code. */ |
| |
| static int |
| eqasc2uni(const unsigned char *a, const widechar *b, const int len) { |
| int k; |
| for (k = 0; k < len; k++) |
| if ((widechar)a[k] != b[k]) return 0; |
| return 1; |
| } |
| |
| typedef struct CharsString { |
| widechar length; |
| widechar chars[MAXSTRING]; |
| } CharsString; |
| |
| static int errorCount; |
| static int warningCount; |
| |
| typedef struct TranslationTableChainEntry { |
| struct TranslationTableChainEntry *next; |
| TranslationTableHeader *table; |
| int tableListLength; |
| char tableList[1]; |
| } TranslationTableChainEntry; |
| |
| static TranslationTableChainEntry *translationTableChain = NULL; |
| |
| typedef struct DisplayTableChainEntry { |
| struct DisplayTableChainEntry *next; |
| DisplayTableHeader *table; |
| int tableListLength; |
| char tableList[1]; |
| } DisplayTableChainEntry; |
| |
| static DisplayTableChainEntry *displayTableChain = NULL; |
| |
| /* predefined character classes */ |
| static const char *characterClassNames[] = { |
| "space", |
| "letter", |
| "digit", |
| "punctuation", |
| "uppercase", |
| "lowercase", |
| "math", |
| "sign", |
| "litdigit", |
| NULL, |
| }; |
| |
| // names that may not be used for custom attributes |
| static const char *reservedAttributeNames[] = { |
| "numericnocontchars", |
| "numericnocontchar", |
| "numericnocont", |
| "midendnumericmodechars", |
| "midendnumericmodechar", |
| "midendnumericmode", |
| "numericmodechars", |
| "numericmodechar", |
| "numericmode", |
| "capsmodechars", |
| "capsmodechar", |
| "capsmode", |
| "emphmodechars", |
| "emphmodechar", |
| "emphmode", |
| "noemphchars", |
| "noemphchar", |
| "noemph", |
| "seqdelimiter", |
| "seqbeforechars", |
| "seqbeforechar", |
| "seqbefore", |
| "seqafterchars", |
| "seqafterchar", |
| "seqafter", |
| "noletsign", |
| "noletsignbefore", |
| "noletsignafter", |
| NULL, |
| }; |
| |
| static const char *opcodeNames[CTO_None] = { |
| "include", |
| "locale", |
| "undefined", |
| "capsletter", |
| "begcapsword", |
| "endcapsword", |
| "begcaps", |
| "endcaps", |
| "begcapsphrase", |
| "endcapsphrase", |
| "lencapsphrase", |
| "modeletter", |
| "begmodeword", |
| "endmodeword", |
| "begmode", |
| "endmode", |
| "begmodephrase", |
| "endmodephrase", |
| "lenmodephrase", |
| "letsign", |
| "noletsignbefore", |
| "noletsign", |
| "noletsignafter", |
| "numsign", |
| "nonumsign", |
| "numericmodechars", |
| "midendnumericmodechars", |
| "numericnocontchars", |
| "seqdelimiter", |
| "seqbeforechars", |
| "seqafterchars", |
| "seqafterpattern", |
| "seqafterexpression", |
| "emphclass", |
| "emphletter", |
| "begemphword", |
| "endemphword", |
| "begemph", |
| "endemph", |
| "begemphphrase", |
| "endemphphrase", |
| "lenemphphrase", |
| "capsmodechars", |
| "emphmodechars", |
| "noemphchars", |
| "begcomp", |
| "endcomp", |
| "nocontractsign", |
| "multind", |
| "compdots", |
| "comp6", |
| "class", |
| "after", |
| "before", |
| "noback", |
| "nofor", |
| "empmatchbefore", |
| "empmatchafter", |
| "swapcc", |
| "swapcd", |
| "swapdd", |
| "space", |
| "digit", |
| "punctuation", |
| "math", |
| "sign", |
| "letter", |
| "uppercase", |
| "lowercase", |
| "grouping", |
| "uplow", |
| "litdigit", |
| "display", |
| "replace", |
| "context", |
| "correct", |
| "pass2", |
| "pass3", |
| "pass4", |
| "repeated", |
| "repword", |
| "rependword", |
| "capsnocont", |
| "always", |
| "exactdots", |
| "nocross", |
| "syllable", |
| "nocont", |
| "compbrl", |
| "literal", |
| "largesign", |
| "word", |
| "partword", |
| "joinnum", |
| "joinword", |
| "lowword", |
| "contraction", |
| "sufword", |
| "prfword", |
| "begword", |
| "begmidword", |
| "midword", |
| "midendword", |
| "endword", |
| "prepunc", |
| "postpunc", |
| "begnum", |
| "midnum", |
| "endnum", |
| "decpoint", |
| "hyphen", |
| // "apostrophe", |
| // "initial", |
| "nobreak", |
| "match", |
| "backmatch", |
| "attribute", |
| "base", |
| "macro", |
| }; |
| |
| static short opcodeLengths[CTO_None] = { 0 }; |
| |
| static void |
| compileError(const FileInfo *file, const char *format, ...); |
| |
| static void |
| free_tablefiles(char **tables); |
| |
| static int |
| getAChar(FileInfo *file) { |
| /* Read a big endian, little endian or ASCII 8 file and convert it to |
| * 16- or 32-bit unsigned integers */ |
| int ch1 = 0, ch2 = 0; |
| widechar character; |
| if (file->encoding == ascii8) |
| if (file->status == 2) { |
| file->status++; |
| return file->checkencoding[1]; |
| } |
| while ((ch1 = fgetc(file->in)) != EOF) { |
| if (file->status < 2) file->checkencoding[file->status] = ch1; |
| file->status++; |
| if (file->status == 2) { |
| if (file->checkencoding[0] == 0xfe && file->checkencoding[1] == 0xff) |
| file->encoding = bigEndian; |
| else if (file->checkencoding[0] == 0xff && file->checkencoding[1] == 0xfe) |
| file->encoding = littleEndian; |
| else if (file->checkencoding[0] < 128 && file->checkencoding[1] < 128) { |
| file->encoding = ascii8; |
| return file->checkencoding[0]; |
| } else { |
| compileError(file, |
| "encoding is neither big-endian, little-endian nor ASCII 8."); |
| ch1 = EOF; |
| break; |
| ; |
| } |
| continue; |
| } |
| switch (file->encoding) { |
| case noEncoding: |
| break; |
| case ascii8: |
| return ch1; |
| break; |
| case bigEndian: |
| ch2 = fgetc(file->in); |
| if (ch2 == EOF) break; |
| character = (widechar)(ch1 << 8) | ch2; |
| return (int)character; |
| break; |
| case littleEndian: |
| ch2 = fgetc(file->in); |
| if (ch2 == EOF) break; |
| character = (widechar)(ch2 << 8) | ch1; |
| return (int)character; |
| break; |
| } |
| if (ch1 == EOF || ch2 == EOF) break; |
| } |
| return EOF; |
| } |
| |
| int EXPORT_CALL |
| _lou_getALine(FileInfo *file) { |
| /* Read a line of widechar's from an input file */ |
| int ch; |
| int pch = 0; |
| file->linelen = 0; |
| while ((ch = getAChar(file)) != EOF) { |
| if (ch == 13) continue; |
| if (pch == '\\' && ch == 10) { |
| file->linelen--; |
| pch = ch; |
| continue; |
| } |
| if (ch == 10 || file->linelen >= MAXSTRING - 1) break; |
| file->line[file->linelen++] = (widechar)ch; |
| pch = ch; |
| } |
| file->line[file->linelen] = 0; |
| file->linepos = 0; |
| if (ch == EOF && !file->linelen) return 0; |
| file->lineNumber++; |
| return 1; |
| } |
| |
| static inline int |
| atEndOfLine(const FileInfo *file) { |
| return file->linepos >= file->linelen; |
| } |
| |
| static inline int |
| atTokenDelimiter(const FileInfo *file) { |
| return file->line[file->linepos] <= 32; |
| } |
| |
| static int |
| getToken(FileInfo *file, CharsString *result, const char *description) { |
| /* Find the next string of contiguous non-whitespace characters. If this |
| * is the last token on the line, return 2 instead of 1. */ |
| while (!atEndOfLine(file) && atTokenDelimiter(file)) file->linepos++; |
| result->length = 0; |
| while (!atEndOfLine(file) && !atTokenDelimiter(file)) { |
| int maxlen = MAXSTRING; |
| if (result->length >= maxlen) { |
| compileError(file, "more than %d characters (bytes)", maxlen); |
| return 0; |
| } else |
| result->chars[result->length++] = file->line[file->linepos++]; |
| } |
| if (!result->length) { |
| /* Not enough tokens */ |
| if (description) compileError(file, "%s not specified.", description); |
| return 0; |
| } |
| result->chars[result->length] = 0; |
| while (!atEndOfLine(file) && atTokenDelimiter(file)) file->linepos++; |
| return 1; |
| } |
| |
| static void |
| compileError(const FileInfo *file, const char *format, ...) { |
| #ifndef __SYMBIAN32__ |
| char buffer[MAXSTRING]; |
| va_list arguments; |
| va_start(arguments, format); |
| vsnprintf(buffer, sizeof(buffer), format, arguments); |
| va_end(arguments); |
| if (file) |
| _lou_logMessage(LOU_LOG_ERROR, "%s:%d: error: %s", file->fileName, |
| file->lineNumber, buffer); |
| else |
| _lou_logMessage(LOU_LOG_ERROR, "error: %s", buffer); |
| errorCount++; |
| #endif |
| } |
| |
| static void |
| compileWarning(const FileInfo *file, const char *format, ...) { |
| #ifndef __SYMBIAN32__ |
| char buffer[MAXSTRING]; |
| va_list arguments; |
| va_start(arguments, format); |
| vsnprintf(buffer, sizeof(buffer), format, arguments); |
| va_end(arguments); |
| if (file) |
| _lou_logMessage(LOU_LOG_WARN, "%s:%d: warning: %s", file->fileName, |
| file->lineNumber, buffer); |
| else |
| _lou_logMessage(LOU_LOG_WARN, "warning: %s", buffer); |
| warningCount++; |
| #endif |
| } |
| |
| static int |
| allocateSpaceInTranslationTable(const FileInfo *file, TranslationTableOffset *offset, |
| int size, TranslationTableHeader **table) { |
| /* allocate memory for table and expand previously allocated memory if necessary */ |
| int spaceNeeded = ((size + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE; |
| TranslationTableOffset newTableSize = (*table)->bytesUsed + spaceNeeded; |
| TranslationTableOffset tableSize = (*table)->tableSize; |
| if (newTableSize > tableSize) { |
| TranslationTableHeader *newTable; |
| newTableSize += (newTableSize / OFFSETSIZE); |
| newTable = realloc(*table, newTableSize); |
| if (!newTable) { |
| compileError(file, "Not enough memory for translation table."); |
| _lou_outOfMemory(); |
| } |
| memset(((unsigned char *)newTable) + tableSize, 0, newTableSize - tableSize); |
| /* update references to the old table */ |
| { |
| TranslationTableChainEntry *entry; |
| for (entry = translationTableChain; entry != NULL; entry = entry->next) |
| if (entry->table == *table) |
| entry->table = (TranslationTableHeader *)newTable; |
| } |
| newTable->tableSize = newTableSize; |
| *table = newTable; |
| } |
| if (offset != NULL) { |
| *offset = ((*table)->bytesUsed - sizeof(**table)) / OFFSETSIZE; |
| (*table)->bytesUsed += spaceNeeded; |
| } |
| return 1; |
| } |
| |
| static int |
| allocateSpaceInDisplayTable(const FileInfo *file, TranslationTableOffset *offset, |
| int size, DisplayTableHeader **table) { |
| /* allocate memory for table and expand previously allocated memory if necessary */ |
| int spaceNeeded = ((size + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE; |
| TranslationTableOffset newTableSize = (*table)->bytesUsed + spaceNeeded; |
| TranslationTableOffset tableSize = (*table)->tableSize; |
| if (newTableSize > tableSize) { |
| DisplayTableHeader *newTable; |
| newTableSize += (newTableSize / OFFSETSIZE); |
| newTable = realloc(*table, newTableSize); |
| if (!newTable) { |
| compileError(file, "Not enough memory for display table."); |
| _lou_outOfMemory(); |
| } |
| memset(((unsigned char *)newTable) + tableSize, 0, newTableSize - tableSize); |
| /* update references to the old table */ |
| { |
| DisplayTableChainEntry *entry; |
| for (entry = displayTableChain; entry != NULL; entry = entry->next) |
| if (entry->table == *table) entry->table = (DisplayTableHeader *)newTable; |
| } |
| newTable->tableSize = newTableSize; |
| *table = newTable; |
| } |
| if (offset != NULL) { |
| *offset = ((*table)->bytesUsed - sizeof(**table)) / OFFSETSIZE; |
| (*table)->bytesUsed += spaceNeeded; |
| } |
| return 1; |
| } |
| |
| static int |
| allocateTranslationTable(const FileInfo *file, TranslationTableHeader **table) { |
| /* Allocate memory for the table and a guess on the number of rules */ |
| const TranslationTableOffset startSize = 2 * sizeof(**table); |
| if (*table) return 1; |
| TranslationTableOffset bytesUsed = |
| sizeof(**table) + OFFSETSIZE; /* So no offset is ever zero */ |
| if (!(*table = malloc(startSize))) { |
| compileError(file, "Not enough memory"); |
| if (*table != NULL) free(*table); |
| *table = NULL; |
| _lou_outOfMemory(); |
| } |
| memset(*table, 0, startSize); |
| (*table)->tableSize = startSize; |
| (*table)->bytesUsed = bytesUsed; |
| return 1; |
| } |
| |
| static int |
| allocateDisplayTable(const FileInfo *file, DisplayTableHeader **table) { |
| /* Allocate memory for the table and a guess on the number of rules */ |
| const TranslationTableOffset startSize = 2 * sizeof(**table); |
| if (*table) return 1; |
| TranslationTableOffset bytesUsed = |
| sizeof(**table) + OFFSETSIZE; /* So no offset is ever zero */ |
| if (!(*table = malloc(startSize))) { |
| compileError(file, "Not enough memory"); |
| if (*table != NULL) free(*table); |
| *table = NULL; |
| _lou_outOfMemory(); |
| } |
| memset(*table, 0, startSize); |
| (*table)->tableSize = startSize; |
| (*table)->bytesUsed = bytesUsed; |
| return 1; |
| } |
| |
| /* Look up a character or dot pattern. Although the algorithms are almost identical, |
| * different tables are needed for characters and dots because of the possibility of |
| * conflicts. */ |
| |
| static TranslationTableCharacter * |
| getChar(widechar c, TranslationTableHeader *table, |
| TranslationTableOffset *characterOffset) { |
| const TranslationTableOffset bucket = table->characters[_lou_charHash(c)]; |
| TranslationTableOffset offset = bucket; |
| while (offset) { |
| TranslationTableCharacter *character = |
| (TranslationTableCharacter *)&table->ruleArea[offset]; |
| if (character->value == c) { |
| if (characterOffset) *characterOffset = offset; |
| return character; |
| } |
| offset = character->next; |
| } |
| return NULL; |
| } |
| |
| static TranslationTableCharacter * |
| getDots(widechar d, TranslationTableHeader *table) { |
| const TranslationTableOffset bucket = table->dots[_lou_charHash(d)]; |
| TranslationTableOffset offset = bucket; |
| while (offset) { |
| TranslationTableCharacter *character = |
| (TranslationTableCharacter *)&table->ruleArea[offset]; |
| if (character->value == d) return character; |
| offset = character->next; |
| } |
| return NULL; |
| } |
| |
| static TranslationTableCharacter * |
| putChar(const FileInfo *file, widechar c, TranslationTableHeader **table, |
| TranslationTableOffset *characterOffset) { |
| /* See if a character is in the appropriate table. If not, insert it. In either case, |
| * return a pointer to it. */ |
| TranslationTableCharacter *character; |
| TranslationTableOffset offset; |
| if ((character = getChar(c, *table, characterOffset))) return character; |
| if (!allocateSpaceInTranslationTable(file, &offset, sizeof(*character), table)) |
| return NULL; |
| character = (TranslationTableCharacter *)&(*table)->ruleArea[offset]; |
| memset(character, 0, sizeof(*character)); |
| character->sourceFile = file->sourceFile; |
| character->sourceLine = file->lineNumber; |
| character->value = c; |
| const unsigned long int charHash = _lou_charHash(c); |
| const TranslationTableOffset bucket = (*table)->characters[charHash]; |
| if (!bucket) |
| (*table)->characters[charHash] = offset; |
| else { |
| TranslationTableCharacter *oldchar = |
| (TranslationTableCharacter *)&(*table)->ruleArea[bucket]; |
| while (oldchar->next) |
| oldchar = (TranslationTableCharacter *)&(*table)->ruleArea[oldchar->next]; |
| oldchar->next = offset; |
| } |
| if (characterOffset) *characterOffset = offset; |
| return character; |
| } |
| |
| static TranslationTableCharacter * |
| putDots(const FileInfo *file, widechar d, TranslationTableHeader **table) { |
| /* See if a dot pattern is in the appropriate table. If not, insert it. In either |
| * case, return a pointer to it. */ |
| TranslationTableCharacter *character; |
| TranslationTableOffset offset; |
| if ((character = getDots(d, *table))) return character; |
| if (!allocateSpaceInTranslationTable(file, &offset, sizeof(*character), table)) |
| return NULL; |
| character = (TranslationTableCharacter *)&(*table)->ruleArea[offset]; |
| memset(character, 0, sizeof(*character)); |
| character->sourceFile = file->sourceFile; |
| character->sourceLine = file->lineNumber; |
| character->value = d; |
| const unsigned long int charHash = _lou_charHash(d); |
| const TranslationTableOffset bucket = (*table)->dots[charHash]; |
| if (!bucket) |
| (*table)->dots[charHash] = offset; |
| else { |
| TranslationTableCharacter *oldchar = |
| (TranslationTableCharacter *)&(*table)->ruleArea[bucket]; |
| while (oldchar->next) |
| oldchar = (TranslationTableCharacter *)&(*table)->ruleArea[oldchar->next]; |
| oldchar->next = offset; |
| } |
| return character; |
| } |
| |
| /* Look up a character-dots mapping in a display table. */ |
| |
| static CharDotsMapping * |
| getDotsForChar(widechar c, const DisplayTableHeader *table) { |
| CharDotsMapping *cdPtr; |
| const TranslationTableOffset bucket = table->charToDots[_lou_charHash(c)]; |
| TranslationTableOffset offset = bucket; |
| while (offset) { |
| cdPtr = (CharDotsMapping *)&table->ruleArea[offset]; |
| if (cdPtr->lookFor == c) return cdPtr; |
| offset = cdPtr->next; |
| } |
| return NULL; |
| } |
| |
| static CharDotsMapping * |
| getCharForDots(widechar d, const DisplayTableHeader *table) { |
| CharDotsMapping *cdPtr; |
| const TranslationTableOffset bucket = table->dotsToChar[_lou_charHash(d)]; |
| TranslationTableOffset offset = bucket; |
| while (offset) { |
| cdPtr = (CharDotsMapping *)&table->ruleArea[offset]; |
| if (cdPtr->lookFor == d) return cdPtr; |
| offset = cdPtr->next; |
| } |
| return NULL; |
| } |
| |
| widechar EXPORT_CALL |
| _lou_getDotsForChar(widechar c, const DisplayTableHeader *table) { |
| CharDotsMapping *cdPtr = getDotsForChar(c, table); |
| if (cdPtr) return cdPtr->found; |
| return LOU_DOTS; |
| } |
| |
| widechar EXPORT_CALL |
| _lou_getCharForDots(widechar d, const DisplayTableHeader *table) { |
| CharDotsMapping *cdPtr = getCharForDots(d, table); |
| if (cdPtr) return cdPtr->found; |
| return '\0'; |
| } |
| |
| static int |
| putCharDotsMapping( |
| const FileInfo *file, widechar c, widechar d, DisplayTableHeader **table) { |
| if (!getDotsForChar(c, *table)) { |
| CharDotsMapping *cdPtr; |
| TranslationTableOffset offset; |
| if (!allocateSpaceInDisplayTable(file, &offset, sizeof(*cdPtr), table)) return 0; |
| cdPtr = (CharDotsMapping *)&(*table)->ruleArea[offset]; |
| cdPtr->next = 0; |
| cdPtr->lookFor = c; |
| cdPtr->found = d; |
| const unsigned long int charHash = _lou_charHash(c); |
| const TranslationTableOffset bucket = (*table)->charToDots[charHash]; |
| if (!bucket) |
| (*table)->charToDots[charHash] = offset; |
| else { |
| CharDotsMapping *oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[bucket]; |
| while (oldcdPtr->next) |
| oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[oldcdPtr->next]; |
| oldcdPtr->next = offset; |
| } |
| } |
| if (!getCharForDots(d, *table)) { |
| CharDotsMapping *cdPtr; |
| TranslationTableOffset offset; |
| if (!allocateSpaceInDisplayTable(file, &offset, sizeof(*cdPtr), table)) return 0; |
| cdPtr = (CharDotsMapping *)&(*table)->ruleArea[offset]; |
| cdPtr->next = 0; |
| cdPtr->lookFor = d; |
| cdPtr->found = c; |
| const unsigned long int charHash = _lou_charHash(d); |
| const TranslationTableOffset bucket = (*table)->dotsToChar[charHash]; |
| if (!bucket) |
| (*table)->dotsToChar[charHash] = offset; |
| else { |
| CharDotsMapping *oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[bucket]; |
| while (oldcdPtr->next) |
| oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[oldcdPtr->next]; |
| oldcdPtr->next = offset; |
| } |
| } |
| return 1; |
| } |
| |
| static inline const char * |
| getPartName(int actionPart) { |
| return actionPart ? "action" : "test"; |
| } |
| |
| static int |
| passFindCharacters(const FileInfo *file, widechar *instructions, int end, |
| widechar **characters, int *length) { |
| int IC = 0; |
| int lookback = 0; |
| |
| *characters = NULL; |
| *length = 0; |
| |
| while (IC < end) { |
| widechar instruction = instructions[IC]; |
| |
| switch (instruction) { |
| case pass_string: |
| case pass_dots: { |
| int count = instructions[IC + 1]; |
| IC += 2; |
| if (count > lookback) { |
| *characters = &instructions[IC + lookback]; |
| *length = count - lookback; |
| return 1; |
| } else { |
| lookback -= count; |
| } |
| IC += count; |
| continue; |
| } |
| |
| case pass_attributes: |
| IC += 7; |
| if (instructions[IC - 2] == instructions[IC - 1] && |
| instructions[IC - 1] <= lookback) { |
| lookback -= instructions[IC - 1]; |
| continue; |
| } |
| goto NO_CHARACTERS; |
| |
| case pass_swap: |
| IC += 2; |
| /* fall through */ |
| |
| case pass_groupstart: |
| case pass_groupend: |
| case pass_groupreplace: |
| IC += 3; |
| |
| NO_CHARACTERS : { return 1; } |
| |
| case pass_eq: |
| case pass_lt: |
| case pass_gt: |
| case pass_lteq: |
| case pass_gteq: |
| IC += 3; |
| continue; |
| |
| case pass_lookback: |
| lookback += instructions[IC + 1]; |
| IC += 2; |
| continue; |
| |
| case pass_not: |
| case pass_startReplace: |
| case pass_endReplace: |
| case pass_first: |
| case pass_last: |
| case pass_copy: |
| case pass_omit: |
| case pass_plus: |
| case pass_hyphen: |
| IC += 1; |
| continue; |
| |
| case pass_endTest: |
| goto NO_CHARACTERS; |
| |
| default: |
| compileError(file, "unhandled test suboperand: \\x%02x", instruction); |
| return 0; |
| } |
| } |
| goto NO_CHARACTERS; |
| } |
| |
| static const char * |
| printSource(const FileInfo *currentFile, const char *sourceFile, int sourceLine) { |
| static char scratchBuf[MAXSTRING]; |
| if (sourceFile) { |
| if (currentFile && currentFile->sourceFile && |
| strcmp(currentFile->sourceFile, sourceFile) == 0) |
| snprintf(scratchBuf, MAXSTRING, "line %d", sourceLine); |
| else |
| snprintf(scratchBuf, MAXSTRING, "%s:%d", sourceFile, sourceLine); |
| } else |
| snprintf(scratchBuf, MAXSTRING, "source unknown"); |
| return scratchBuf; |
| } |
| |
| /* The following functions are called by addRule to handle various cases. */ |
| |
| static void |
| addForwardRuleWithSingleChar(const FileInfo *file, TranslationTableOffset ruleOffset, |
| TranslationTableRule *rule, TranslationTableHeader **table) { |
| /* direction = 0, rule->charslen = 1 */ |
| TranslationTableCharacter *character; |
| // get the character from the table, or if the character is not defined yet, define it |
| // (without adding attributes) |
| if (rule->opcode >= CTO_Pass2 && rule->opcode <= CTO_Pass4) { |
| character = putDots(file, rule->charsdots[0], table); |
| // putDots may have moved table, so make sure rule is still valid |
| rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
| } else if (rule->opcode == CTO_CompDots || rule->opcode == CTO_Comp6) { |
| character = putChar(file, rule->charsdots[0], table, NULL); |
| // putChar may have moved table, so make sure rule is still valid |
| rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
| character->compRule = ruleOffset; |
| return; |
| } else { |
| character = putChar(file, rule->charsdots[0], table, NULL); |
| // putChar may have moved table, so make sure rule is still valid |
| rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
| // if the new rule is a character definition rule, set the main definition rule of |
| // this character to it (possibly overwriting previous definition rules) |
| // adding the attributes to the character has already been done elsewhere |
| if (rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow) { |
| if (character->definitionRule) { |
| TranslationTableRule *prevRule = |
| (TranslationTableRule *)&(*table) |
| ->ruleArea[character->definitionRule]; |
| _lou_logMessage(LOU_LOG_DEBUG, |
| "%s:%d: Character already defined (%s). The new definition will " |
| "take precedence.", |
| file->fileName, file->lineNumber, |
| printSource(file, prevRule->sourceFile, prevRule->sourceLine)); |
| } else if (character->basechar) { |
| _lou_logMessage(LOU_LOG_DEBUG, |
| "%s:%d: A base rule already exists for this character (%s). The " |
| "%s rule will take precedence.", |
| file->fileName, file->lineNumber, |
| printSource(file, character->sourceFile, character->sourceLine), |
| _lou_findOpcodeName(rule->opcode)); |
| character->basechar = 0; |
| character->mode = 0; |
| } |
| character->definitionRule = ruleOffset; |
| } |
| } |
| // add the new rule to the list of rules associated with this character |
| // if the new rule is a character definition rule, it is inserted at the end of the |
| // list |
| // otherwise it is inserted before the first character definition rule |
| TranslationTableOffset *otherRule = &character->otherRules; |
| while (*otherRule) { |
| TranslationTableRule *r = (TranslationTableRule *)&(*table)->ruleArea[*otherRule]; |
| if (r->charslen == 0) break; |
| if (r->opcode >= CTO_Space && r->opcode < CTO_UpLow) |
| if (!(rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow)) break; |
| otherRule = &r->charsnext; |
| } |
| rule->charsnext = *otherRule; |
| *otherRule = ruleOffset; |
| } |
| |
| static void |
| addForwardRuleWithMultipleChars(TranslationTableOffset ruleOffset, |
| TranslationTableRule *rule, TranslationTableHeader *table) { |
| /* direction = 0 rule->charslen > 1 */ |
| TranslationTableOffset *forRule = |
| &table->forRules[_lou_stringHash(&rule->charsdots[0], 0, NULL)]; |
| while (*forRule) { |
| TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*forRule]; |
| if (rule->charslen > r->charslen) break; |
| if (rule->charslen == r->charslen) |
| if ((r->opcode == CTO_Always) && (rule->opcode != CTO_Always)) break; |
| forRule = &r->charsnext; |
| } |
| rule->charsnext = *forRule; |
| *forRule = ruleOffset; |
| } |
| |
| static void |
| addBackwardRuleWithSingleCell(const FileInfo *file, widechar cell, |
| TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
| TranslationTableHeader **table) { |
| /* direction = 1, rule->dotslen = 1 */ |
| TranslationTableCharacter *dots; |
| if (rule->opcode == CTO_SwapCc || rule->opcode == CTO_Repeated) |
| return; /* too ambiguous */ |
| // get the cell from the table, or if the cell is not defined yet, define it (without |
| // adding attributes) |
| dots = putDots(file, cell, table); |
| // putDots may have moved table, so make sure rule is still valid |
| rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
| if (rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow) |
| dots->definitionRule = ruleOffset; |
| TranslationTableOffset *otherRule = &dots->otherRules; |
| while (*otherRule) { |
| TranslationTableRule *r = (TranslationTableRule *)&(*table)->ruleArea[*otherRule]; |
| if (rule->charslen > r->charslen || r->dotslen == 0) break; |
| if (r->opcode >= CTO_Space && r->opcode < CTO_UpLow) |
| if (!(rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow)) break; |
| otherRule = &r->dotsnext; |
| } |
| rule->dotsnext = *otherRule; |
| *otherRule = ruleOffset; |
| } |
| |
| static void |
| addBackwardRuleWithMultipleCells(widechar *cells, int dotslen, |
| TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
| TranslationTableHeader *table) { |
| /* direction = 1, dotslen > 1 */ |
| TranslationTableOffset *backRule = &table->backRules[_lou_stringHash(cells, 0, NULL)]; |
| if (rule->opcode == CTO_SwapCc) return; |
| int ruleLength = dotslen + rule->charslen; |
| while (*backRule) { |
| TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*backRule]; |
| int rLength = r->dotslen + r->charslen; |
| if (ruleLength > rLength) break; |
| if (rLength == ruleLength) |
| if ((r->opcode == CTO_Always) && (rule->opcode != CTO_Always)) break; |
| backRule = &r->dotsnext; |
| } |
| rule->dotsnext = *backRule; |
| *backRule = ruleOffset; |
| } |
| |
| static int |
| addForwardPassRule(TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
| TranslationTableHeader *table) { |
| TranslationTableOffset *forPassRule; |
| switch (rule->opcode) { |
| case CTO_Correct: |
| forPassRule = &table->forPassRules[0]; |
| break; |
| case CTO_Context: |
| forPassRule = &table->forPassRules[1]; |
| break; |
| case CTO_Pass2: |
| forPassRule = &table->forPassRules[2]; |
| break; |
| case CTO_Pass3: |
| forPassRule = &table->forPassRules[3]; |
| break; |
| case CTO_Pass4: |
| forPassRule = &table->forPassRules[4]; |
| break; |
| default: |
| return 0; |
| } |
| while (*forPassRule) { |
| TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*forPassRule]; |
| if (rule->charslen > r->charslen) break; |
| forPassRule = &r->charsnext; |
| } |
| rule->charsnext = *forPassRule; |
| *forPassRule = ruleOffset; |
| return 1; |
| } |
| |
| static int |
| addBackwardPassRule(TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
| TranslationTableHeader *table) { |
| TranslationTableOffset *backPassRule; |
| switch (rule->opcode) { |
| case CTO_Correct: |
| backPassRule = &table->backPassRules[0]; |
| break; |
| case CTO_Context: |
| backPassRule = &table->backPassRules[1]; |
| break; |
| case CTO_Pass2: |
| backPassRule = &table->backPassRules[2]; |
| break; |
| case CTO_Pass3: |
| backPassRule = &table->backPassRules[3]; |
| break; |
| case CTO_Pass4: |
| backPassRule = &table->backPassRules[4]; |
| break; |
| default: |
| return 0; |
| } |
| while (*backPassRule) { |
| TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*backPassRule]; |
| if (rule->charslen > r->charslen) break; |
| backPassRule = &r->dotsnext; |
| } |
| rule->dotsnext = *backPassRule; |
| *backPassRule = ruleOffset; |
| return 1; |
| } |
| |
| static int |
| addRule(const FileInfo *file, TranslationTableOpcode opcode, CharsString *ruleChars, |
| CharsString *ruleDots, TranslationTableCharacterAttributes after, |
| TranslationTableCharacterAttributes before, TranslationTableOffset *ruleOffset, |
| TranslationTableRule **rule, int noback, int nofor, |
| TranslationTableHeader **table) { |
| /* Add a rule to the table, using the hash function to find the start of |
| * chains and chaining both the chars and dots strings */ |
| TranslationTableOffset offset; |
| int ruleSize = sizeof(TranslationTableRule) - (DEFAULTRULESIZE * CHARSIZE); |
| if (ruleChars) ruleSize += CHARSIZE * ruleChars->length; |
| if (ruleDots) ruleSize += CHARSIZE * ruleDots->length; |
| if (!allocateSpaceInTranslationTable(file, &offset, ruleSize, table)) return 0; |
| TranslationTableRule *r = (TranslationTableRule *)&(*table)->ruleArea[offset]; |
| if (rule) *rule = r; |
| if (ruleOffset) *ruleOffset = offset; |
| r->sourceFile = file->sourceFile; |
| r->sourceLine = file->lineNumber; |
| r->opcode = opcode; |
| r->after = after; |
| r->before = before; |
| r->nocross = 0; |
| if (ruleChars) |
| memcpy(&r->charsdots[0], &ruleChars->chars[0], |
| CHARSIZE * (r->charslen = ruleChars->length)); |
| else |
| r->charslen = 0; |
| if (ruleDots) |
| memcpy(&r->charsdots[r->charslen], &ruleDots->chars[0], |
| CHARSIZE * (r->dotslen = ruleDots->length)); |
| else |
| r->dotslen = 0; |
| |
| /* link new rule into table. */ |
| if (opcode == CTO_SwapCc || opcode == CTO_SwapCd || opcode == CTO_SwapDd) return 1; |
| if (opcode >= CTO_Context && opcode <= CTO_Pass4) |
| if (!(opcode == CTO_Context && r->charslen > 0)) { |
| if (!nofor) |
| if (!addForwardPassRule(offset, r, *table)) return 0; |
| if (!noback) |
| if (!addBackwardPassRule(offset, r, *table)) return 0; |
| return 1; |
| } |
| if (!nofor) { |
| if (r->charslen == 1) { |
| addForwardRuleWithSingleChar(file, offset, r, table); |
| // addForwardRuleWithSingleChar may have moved table, so make sure rule is |
| // still valid |
| r = (TranslationTableRule *)&(*table)->ruleArea[offset]; |
| if (rule) *rule = r; |
| } else if (r->charslen > 1) |
| addForwardRuleWithMultipleChars(offset, r, *table); |
| } |
| if (!noback) { |
| widechar *cells; |
| int dotslen; |
| |
| if (r->opcode == CTO_Context) { |
| cells = &r->charsdots[0]; |
| dotslen = r->charslen; |
| } else { |
| cells = &r->charsdots[r->charslen]; |
| dotslen = r->dotslen; |
| } |
| if (dotslen == 1) { |
| addBackwardRuleWithSingleCell(file, *cells, offset, r, table); |
| // addBackwardRuleWithSingleCell may have moved table, so make sure rule is |
| // still valid |
| r = (TranslationTableRule *)&(*table)->ruleArea[offset]; |
| if (rule) *rule = r; |
| } else if (dotslen > 1) |
| addBackwardRuleWithMultipleCells(cells, dotslen, offset, r, *table); |
| } |
| return 1; |
| } |
| |
| static const CharacterClass * |
| findCharacterClass(const CharsString *name, const TranslationTableHeader *table) { |
| /* Find a character class, whether predefined or user-defined */ |
| const CharacterClass *class = table->characterClasses; |
| while (class) { |
| if ((name->length == class->length) && |
| (memcmp(&name->chars[0], class->name, CHARSIZE * name->length) == 0)) |
| return class; |
| class = class->next; |
| } |
| return NULL; |
| } |
| |
| static TranslationTableCharacterAttributes |
| getNextNumberedAttribute(TranslationTableHeader *table) { |
| /* Get the next attribute value for numbered attributes, or 0 if there is no more |
| * space in the table. */ |
| TranslationTableCharacterAttributes next = table->nextNumberedCharacterClassAttribute; |
| if (next > CTC_UserDefined8) return 0; |
| table->nextNumberedCharacterClassAttribute <<= 1; |
| return next; |
| } |
| |
| static TranslationTableCharacterAttributes |
| getNextAttribute(TranslationTableHeader *table) { |
| /* Get the next attribute value, or 0 if there is no more space in the table. */ |
| TranslationTableCharacterAttributes next = table->nextCharacterClassAttribute; |
| if (next) { |
| if (next == CTC_LitDigit) |
| table->nextCharacterClassAttribute = CTC_UserDefined9; |
| else |
| table->nextCharacterClassAttribute <<= 1; |
| return next; |
| } else |
| return getNextNumberedAttribute(table); |
| } |
| |
| static CharacterClass * |
| addCharacterClass(const FileInfo *file, const widechar *name, int length, |
| TranslationTableHeader *table, int validate) { |
| /* Define a character class, Whether predefined or user-defined */ |
| if (validate) { |
| for (int i = 0; i < length; i++) { |
| if (!((name[i] >= 'a' && name[i] <= 'z') || |
| (name[i] >= 'A' && name[i] <= 'Z'))) { |
| // don't abort because in some cases (before/after rules) |
| // this will work fine, but it will not work in multipass |
| // expressions |
| compileWarning(file, |
| "Invalid attribute name: must be a digit between " |
| "0 and 7 or a word containing only letters"); |
| } |
| } |
| // check that name is not reserved |
| int k = 0; |
| while (reservedAttributeNames[k]) { |
| if (strlen(reservedAttributeNames[k]) == length) { |
| int i; |
| for (i = 0; i < length; i++) |
| if (reservedAttributeNames[k][i] != name[i]) break; |
| if (i == length) { |
| compileError(file, "Attribute name is reserved: %s", |
| reservedAttributeNames[k]); |
| return NULL; |
| } |
| } |
| k++; |
| } |
| } |
| CharacterClass **classes = &table->characterClasses; |
| TranslationTableCharacterAttributes attribute = getNextAttribute(table); |
| CharacterClass *class; |
| if (attribute) { |
| if (!(class = malloc(sizeof(*class) + CHARSIZE * (length - 1)))) |
| _lou_outOfMemory(); |
| else { |
| memset(class, 0, sizeof(*class)); |
| memcpy(class->name, name, CHARSIZE * (class->length = length)); |
| class->attribute = attribute; |
| class->next = *classes; |
| *classes = class; |
| return class; |
| } |
| } |
| compileError(file, "character class table overflow."); |
| return NULL; |
| } |
| |
| static void |
| deallocateCharacterClasses(TranslationTableHeader *table) { |
| CharacterClass **classes = &table->characterClasses; |
| while (*classes) { |
| CharacterClass *class = *classes; |
| *classes = (*classes)->next; |
| if (class) free(class); |
| } |
| } |
| |
| static int |
| allocateCharacterClasses(TranslationTableHeader *table) { |
| /* Allocate memory for predefined character classes */ |
| int k = 0; |
| table->characterClasses = NULL; |
| table->nextCharacterClassAttribute = 1; // CTC_Space |
| table->nextNumberedCharacterClassAttribute = CTC_UserDefined1; |
| while (characterClassNames[k]) { |
| widechar wname[MAXSTRING]; |
| int length = (int)strlen(characterClassNames[k]); |
| int kk; |
| for (kk = 0; kk < length; kk++) wname[kk] = (widechar)characterClassNames[k][kk]; |
| if (!addCharacterClass(NULL, wname, length, table, 0)) { |
| deallocateCharacterClasses(table); |
| return 0; |
| } |
| k++; |
| } |
| return 1; |
| } |
| |
| static TranslationTableOpcode |
| getOpcode(const FileInfo *file, const CharsString *token) { |
| static TranslationTableOpcode lastOpcode = 0; |
| TranslationTableOpcode opcode = lastOpcode; |
| |
| do { |
| if (token->length == opcodeLengths[opcode]) |
| if (eqasc2uni((unsigned char *)opcodeNames[opcode], &token->chars[0], |
| token->length)) { |
| lastOpcode = opcode; |
| return opcode; |
| } |
| opcode++; |
| if (opcode >= CTO_None) opcode = 0; |
| } while (opcode != lastOpcode); |
| return CTO_None; |
| } |
| |
| TranslationTableOpcode EXPORT_CALL |
| _lou_findOpcodeNumber(const char *toFind) { |
| /* Used by tools such as lou_debug */ |
| static TranslationTableOpcode lastOpcode = 0; |
| TranslationTableOpcode opcode = lastOpcode; |
| int length = (int)strlen(toFind); |
| do { |
| if (length == opcodeLengths[opcode] && |
| strcasecmp(toFind, opcodeNames[opcode]) == 0) { |
| lastOpcode = opcode; |
| return opcode; |
| } |
| opcode++; |
| if (opcode >= CTO_None) opcode = 0; |
| } while (opcode != lastOpcode); |
| return CTO_None; |
| } |
| |
| const char *EXPORT_CALL |
| _lou_findOpcodeName(TranslationTableOpcode opcode) { |
| static char scratchBuf[MAXSTRING]; |
| /* Used by tools such as lou_debug */ |
| if (opcode < 0 || opcode >= CTO_None) { |
| sprintf(scratchBuf, "%u", opcode); |
| return scratchBuf; |
| } |
| return opcodeNames[opcode]; |
| } |
| |
| static widechar |
| hexValue(const FileInfo *file, const widechar *digits, int length) { |
| int k; |
| unsigned int binaryValue = 0; |
| for (k = 0; k < length; k++) { |
| unsigned int hexDigit = 0; |
| if (digits[k] >= '0' && digits[k] <= '9') |
| hexDigit = digits[k] - '0'; |
| else if (digits[k] >= 'a' && digits[k] <= 'f') |
| hexDigit = digits[k] - 'a' + 10; |
| else if (digits[k] >= 'A' && digits[k] <= 'F') |
| hexDigit = digits[k] - 'A' + 10; |
| else { |
| compileError(file, "invalid %d-digit hexadecimal number", length); |
| return (widechar)0xffffffff; |
| } |
| binaryValue |= hexDigit << (4 * (length - 1 - k)); |
| } |
| return (widechar)binaryValue; |
| } |
| |
| #define MAXBYTES 7 |
| static const unsigned int first0Bit[MAXBYTES] = { 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, |
| 0XFE }; |
| |
| static int |
| parseChars(const FileInfo *file, CharsString *result, CharsString *token) { |
| int in = 0; |
| int out = 0; |
| int lastOutSize = 0; |
| int lastIn; |
| unsigned int ch = 0; |
| int numBytes = 0; |
| unsigned int utf32 = 0; |
| int k; |
| while (in < token->length) { |
| ch = token->chars[in++] & 0xff; |
| if (ch < 128) { |
| if (ch == '\\') { /* escape sequence */ |
| switch (ch = token->chars[in]) { |
| case '\\': |
| break; |
| case 'e': |
| ch = 0x1b; |
| break; |
| case 'f': |
| ch = 12; |
| break; |
| case 'n': |
| ch = 10; |
| break; |
| case 'r': |
| ch = 13; |
| break; |
| case 's': |
| ch = ' '; |
| break; |
| case 't': |
| ch = 9; |
| break; |
| case 'v': |
| ch = 11; |
| break; |
| case 'w': |
| ch = LOU_ENDSEGMENT; |
| break; |
| case 34: |
| ch = QUOTESUB; |
| break; |
| case 'X': |
| compileWarning(file, "\\Xhhhh (with a capital 'X') is deprecated."); |
| case 'x': |
| if (token->length - in > 4) { |
| ch = hexValue(file, &token->chars[in + 1], 4); |
| in += 4; |
| } |
| break; |
| case 'Y': |
| compileWarning(file, "\\Yhhhhh (with a capital 'Y') is deprecated."); |
| case 'y': |
| if (CHARSIZE == 2) { |
| not32: |
| compileError(file, |
| "liblouis has not been compiled for 32-bit Unicode"); |
| break; |
| } |
| if (token->length - in > 5) { |
| ch = hexValue(file, &token->chars[in + 1], 5); |
| in += 5; |
| } |
| break; |
| case 'Z': |
| compileWarning( |
| file, "\\Zhhhhhhhh (with a capital 'Z') is deprecated."); |
| case 'z': |
| if (CHARSIZE == 2) goto not32; |
| if (token->length - in > 8) { |
| ch = hexValue(file, &token->chars[in + 1], 8); |
| in += 8; |
| } |
| break; |
| default: |
| compileError(file, "invalid escape sequence '\\%c'", ch); |
| break; |
| } |
| in++; |
| } |
| if (out >= MAXSTRING - 1) { |
| compileError(file, "Token too long"); |
| result->length = MAXSTRING - 1; |
| return 1; |
| } |
| result->chars[out++] = (widechar)ch; |
| continue; |
| } |
| lastOutSize = out; |
| lastIn = in; |
| for (numBytes = MAXBYTES - 1; numBytes > 0; numBytes--) |
| if (ch >= first0Bit[numBytes]) break; |
| utf32 = ch & (0XFF - first0Bit[numBytes]); |
| for (k = 0; k < numBytes; k++) { |
| if (in >= MAXSTRING - 1 || in >= token->length) break; |
| if (out >= MAXSTRING - 1) { |
| compileError(file, "Token too long"); |
| result->length = lastOutSize; |
| return 1; |
| } |
| if (token->chars[in] < 128 || (token->chars[in] & 0x0040)) { |
| compileWarning(file, "invalid UTF-8. Assuming Latin-1."); |
| result->chars[out++] = token->chars[lastIn]; |
| in = lastIn + 1; |
| continue; |
| } |
| utf32 = (utf32 << 6) + (token->chars[in++] & 0x3f); |
| } |
| if (out >= MAXSTRING - 1) { |
| compileError(file, "Token too long"); |
| result->length = lastOutSize; |
| return 1; |
| } |
| if (CHARSIZE == 2 && utf32 > 0xffff) utf32 = 0xffff; |
| result->chars[out++] = (widechar)utf32; |
| } |
| result->length = out; |
| return 1; |
| } |
| |
| int EXPORT_CALL |
| _lou_extParseChars(const char *inString, widechar *outString) { |
| /* Parse external character strings */ |
| CharsString wideIn; |
| CharsString result; |
| int k; |
| for (k = 0; inString[k] && k < MAXSTRING - 1; k++) wideIn.chars[k] = inString[k]; |
| wideIn.chars[k] = 0; |
| wideIn.length = k; |
| parseChars(NULL, &result, &wideIn); |
| if (errorCount) { |
| errorCount = 0; |
| return 0; |
| } |
| for (k = 0; k < result.length; k++) outString[k] = result.chars[k]; |
| return result.length; |
| } |
| |
| static int |
| parseDots(const FileInfo *file, CharsString *cells, const CharsString *token) { |
| /* get dot patterns */ |
| widechar cell = 0; /* assembly place for dots */ |
| int cellCount = 0; |
| int index; |
| int start = 0; |
| |
| for (index = 0; index < token->length; index++) { |
| int started = index != start; |
| widechar character = token->chars[index]; |
| switch (character) { /* or dots to make up Braille cell */ |
| { |
| int dot; |
| case '1': |
| dot = LOU_DOT_1; |
| goto haveDot; |
| case '2': |
| dot = LOU_DOT_2; |
| goto haveDot; |
| case '3': |
| dot = LOU_DOT_3; |
| goto haveDot; |
| case '4': |
| dot = LOU_DOT_4; |
| goto haveDot; |
| case '5': |
| dot = LOU_DOT_5; |
| goto haveDot; |
| case '6': |
| dot = LOU_DOT_6; |
| goto haveDot; |
| case '7': |
| dot = LOU_DOT_7; |
| goto haveDot; |
| case '8': |
| dot = LOU_DOT_8; |
| goto haveDot; |
| case '9': |
| dot = LOU_DOT_9; |
| goto haveDot; |
| case 'a': |
| case 'A': |
| dot = LOU_DOT_10; |
| goto haveDot; |
| case 'b': |
| case 'B': |
| dot = LOU_DOT_11; |
| goto haveDot; |
| case 'c': |
| case 'C': |
| dot = LOU_DOT_12; |
| goto haveDot; |
| case 'd': |
| case 'D': |
| dot = LOU_DOT_13; |
| goto haveDot; |
| case 'e': |
| case 'E': |
| dot = LOU_DOT_14; |
| goto haveDot; |
| case 'f': |
| case 'F': |
| dot = LOU_DOT_15; |
| haveDot: |
| if (started && !cell) goto invalid; |
| if (cell & dot) { |
| compileError(file, "dot specified more than once."); |
| return 0; |
| } |
| cell |= dot; |
| break; |
| } |
| case '0': /* blank */ |
| if (started) goto invalid; |
| break; |
| case '-': /* got all dots for this cell */ |
| if (!started) { |
| compileError(file, "missing cell specification."); |
| return 0; |
| } |
| cells->chars[cellCount++] = cell | LOU_DOTS; |
| cell = 0; |
| start = index + 1; |
| break; |
| default: |
| invalid: |
| compileError( |
| file, "invalid dot number %s.", _lou_showString(&character, 1, 0)); |
| return 0; |
| } |
| } |
| if (index == start) { |
| compileError(file, "missing cell specification."); |
| return 0; |
| } |
| cells->chars[cellCount++] = cell | LOU_DOTS; /* last cell */ |
| cells->length = cellCount; |
| return 1; |
| } |
| |
| int EXPORT_CALL |
| _lou_extParseDots(const char *inString, widechar *outString) { |
| /* Parse external dot patterns */ |
| CharsString wideIn; |
| CharsString result; |
| int k; |
| for (k = 0; inString[k] && k < MAXSTRING - 1; k++) wideIn.chars[k] = inString[k]; |
| wideIn.chars[k] = 0; |
| wideIn.length = k; |
| parseDots(NULL, &result, &wideIn); |
| if (errorCount) { |
| errorCount = 0; |
| return 0; |
| } |
| for (k = 0; k < result.length; k++) outString[k] = result.chars[k]; |
| outString[k] = 0; |
| return result.length; |
| } |
| |
| static int |
| getCharacters(FileInfo *file, CharsString *characters) { |
| /* Get ruleChars string */ |
| CharsString token; |
| if (!getToken(file, &token, "characters")) return 0; |
| return parseChars(file, characters, &token); |
| } |
| |
| static int |
| getRuleCharsText(FileInfo *file, CharsString *ruleChars) { |
| CharsString token; |
| if (!getToken(file, &token, "Characters operand")) return 0; |
| return parseChars(file, ruleChars, &token); |
| } |
| |
| static int |
| getRuleDotsText(FileInfo *file, CharsString *ruleDots) { |
| CharsString token; |
| if (!getToken(file, &token, "characters")) return 0; |
| return parseChars(file, ruleDots, &token); |
| } |
| |
| static int |
| getRuleDotsPattern(FileInfo *file, CharsString *ruleDots) { |
| /* Interpret the dets operand */ |
| CharsString token; |
| if (!getToken(file, &token, "Dots operand")) return 0; |
| if (token.length == 1 && token.chars[0] == '=') { |
| ruleDots->length = 0; |
| return 1; |
| } else |
| return parseDots(file, ruleDots, &token); |
| } |
| |
| static int |
| includeFile(const FileInfo *file, CharsString *includedFile, |
| TranslationTableHeader **table, DisplayTableHeader **displayTable); |
| |
| static TranslationTableOffset |
| findRuleName(const CharsString *name, const TranslationTableHeader *table) { |
| const RuleName *ruleName = table->ruleNames; |
| while (ruleName) { |
| if ((name->length == ruleName->length) && |
| (memcmp(&name->chars[0], ruleName->name, CHARSIZE * name->length) == 0)) |
| return ruleName->ruleOffset; |
| ruleName = ruleName->next; |
| } |
| return 0; |
| } |
| |
| static int |
| addRuleName(const FileInfo *file, CharsString *name, TranslationTableOffset ruleOffset, |
| TranslationTableHeader *table) { |
| int k; |
| RuleName *ruleName; |
| if (!(ruleName = malloc(sizeof(*ruleName) + CHARSIZE * (name->length - 1)))) { |
| compileError(file, "not enough memory"); |
| _lou_outOfMemory(); |
| } |
| memset(ruleName, 0, sizeof(*ruleName)); |
| // a name is a sequence of characters in the ranges 'a'..'z' and 'A'..'Z' |
| for (k = 0; k < name->length; k++) { |
| widechar c = name->chars[k]; |
| if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) |
| ruleName->name[k] = c; |
| else { |
| compileError(file, "a name may contain only letters"); |
| free(ruleName); |
| return 0; |
| } |
| } |
| ruleName->length = name->length; |
| ruleName->ruleOffset = ruleOffset; |
| ruleName->next = table->ruleNames; |
| table->ruleNames = ruleName; |
| return 1; |
| } |
| |
| static void |
| deallocateRuleNames(TranslationTableHeader *table) { |
| RuleName **ruleName = &table->ruleNames; |
| while (*ruleName) { |
| RuleName *rn = *ruleName; |
| *ruleName = rn->next; |
| free(rn); |
| } |
| } |
| |
| static int |
| compileSwapDots(const FileInfo *file, CharsString *source, CharsString *dest) { |
| int k = 0; |
| int kk = 0; |
| CharsString dotsSource; |
| CharsString dotsDest; |
| dest->length = 0; |
| dotsSource.length = 0; |
| while (k <= source->length) { |
| if (source->chars[k] != ',' && k != source->length) |
| dotsSource.chars[dotsSource.length++] = source->chars[k]; |
| else { |
| if (!parseDots(file, &dotsDest, &dotsSource)) return 0; |
| dest->chars[dest->length++] = dotsDest.length + 1; |
| for (kk = 0; kk < dotsDest.length; kk++) |
| dest->chars[dest->length++] = dotsDest.chars[kk]; |
| dotsSource.length = 0; |
| } |
| k++; |
| } |
| return 1; |
| } |
| |
| static int |
| compileSwap(FileInfo *file, TranslationTableOpcode opcode, int noback, int nofor, |
| TranslationTableHeader **table) { |
| CharsString ruleChars; |
| CharsString ruleDots; |
| CharsString name; |
| CharsString matches; |
| CharsString replacements; |
| TranslationTableOffset ruleOffset; |
| if (!getToken(file, &name, "name operand")) return 0; |
| if (!getToken(file, &matches, "matches operand")) return 0; |
| if (!getToken(file, &replacements, "replacements operand")) return 0; |
| if (opcode == CTO_SwapCc || opcode == CTO_SwapCd) { |
| if (!parseChars(file, &ruleChars, &matches)) return 0; |
| } else { |
| if (!compileSwapDots(file, &matches, &ruleChars)) return 0; |
| } |
| if (opcode == CTO_SwapCc) { |
| if (!parseChars(file, &ruleDots, &replacements)) return 0; |
| } else { |
| if (!compileSwapDots(file, &replacements, &ruleDots)) return 0; |
| } |
| if (!addRule(file, opcode, &ruleChars, &ruleDots, 0, 0, &ruleOffset, NULL, noback, |
| nofor, table)) |
| return 0; |
| if (!addRuleName(file, &name, ruleOffset, *table)) return 0; |
| return 1; |
| } |
| |
| static int |
| getNumber(widechar *string, widechar *number) { |
| /* Convert a string of wide character digits to an integer */ |
| int k = 0; |
| *number = 0; |
| while (string[k] >= '0' && string[k] <= '9') |
| *number = 10 * *number + (string[k++] - '0'); |
| return k; |
| } |
| |
| /* Start of multipass compiler */ |
| |
| static int |
| passGetAttributes(CharsString *passLine, int *passLinepos, |
| TranslationTableCharacterAttributes *attributes, const FileInfo *file) { |
| int more = 1; |
| *attributes = 0; |
| while (more) { |
| switch (passLine->chars[*passLinepos]) { |
| case pass_any: |
| *attributes = 0xffffffff; |
| break; |
| case pass_digit: |
| *attributes |= CTC_Digit; |
| break; |
| case pass_litDigit: |
| *attributes |= CTC_LitDigit; |
| break; |
| case pass_letter: |
| *attributes |= CTC_Letter; |
| break; |
| case pass_math: |
| *attributes |= CTC_Math; |
| break; |
| case pass_punctuation: |
| *attributes |= CTC_Punctuation; |
| break; |
| case pass_sign: |
| *attributes |= CTC_Sign; |
| break; |
| case pass_space: |
| *attributes |= CTC_Space; |
| break; |
| case pass_uppercase: |
| *attributes |= CTC_UpperCase; |
| break; |
| case pass_lowercase: |
| *attributes |= CTC_LowerCase; |
| break; |
| case pass_class1: |
| *attributes |= CTC_UserDefined9; |
| break; |
| case pass_class2: |
| *attributes |= CTC_UserDefined10; |
| break; |
| case pass_class3: |
| *attributes |= CTC_UserDefined11; |
| break; |
| case pass_class4: |
| *attributes |= CTC_UserDefined12; |
| break; |
| default: |
| more = 0; |
| break; |
| } |
| if (more) (*passLinepos)++; |
| } |
| if (!*attributes) { |
| compileError(file, "missing attribute"); |
| (*passLinepos)--; |
| return 0; |
| } |
| return 1; |
| } |
| |
| static int |
| passGetDots(CharsString *passLine, int *passLinepos, CharsString *dots, |
| const FileInfo *file) { |
| CharsString collectDots; |
| collectDots.length = 0; |
| while (*passLinepos < passLine->length && |
| (passLine->chars[*passLinepos] == '-' || |
| (passLine->chars[*passLinepos] >= '0' && |
| passLine->chars[*passLinepos] <= '9') || |
| ((passLine->chars[*passLinepos] | 32) >= 'a' && |
| (passLine->chars[*passLinepos] | 32) <= 'f'))) |
| collectDots.chars[collectDots.length++] = passLine->chars[(*passLinepos)++]; |
| if (!parseDots(file, dots, &collectDots)) return 0; |
| return 1; |
| } |
| |
| static int |
| passGetString(CharsString *passLine, int *passLinepos, CharsString *string, |
| const FileInfo *file) { |
| string->length = 0; |
| while (1) { |
| if ((*passLinepos >= passLine->length) || !passLine->chars[*passLinepos]) { |
| compileError(file, "unterminated string"); |
| return 0; |
| } |
| if (passLine->chars[*passLinepos] == 34) break; |
| if (passLine->chars[*passLinepos] == QUOTESUB) |
| string->chars[string->length++] = 34; |
| else |
| string->chars[string->length++] = passLine->chars[*passLinepos]; |
| (*passLinepos)++; |
| } |
| string->chars[string->length] = 0; |
| (*passLinepos)++; |
| return 1; |
| } |
| |
| static int |
| passGetNumber(CharsString *passLine, int *passLinepos, widechar *number) { |
| /* Convert a string of wide character digits to an integer */ |
| *number = 0; |
| while ((*passLinepos < passLine->length) && (passLine->chars[*passLinepos] >= '0') && |
| (passLine->chars[*passLinepos] <= '9')) |
| *number = 10 * (*number) + (passLine->chars[(*passLinepos)++] - '0'); |
| return 1; |
| } |
| |
| static int |
| passGetVariableNumber( |
| const FileInfo *file, CharsString *passLine, int *passLinepos, widechar *number) { |
| if (!passGetNumber(passLine, passLinepos, number)) { |
| compileError(file, "missing variable number"); |
| return 0; |
| } |
| if ((*number >= 0) && (*number < NUMVAR)) return 1; |
| compileError(file, "variable number out of range"); |
| return 0; |
| } |
| |
| static int |
| passGetName(CharsString *passLine, int *passLinepos, CharsString *name) { |
| name->length = 0; |
| // a name is a sequence of characters in the ranges 'a'..'z' and 'A'..'Z' |
| do { |
| widechar c = passLine->chars[*passLinepos]; |
| if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { |
| name->chars[name->length++] = c; |
| (*passLinepos)++; |
| } else { |
| break; |
| } |
| } while (*passLinepos < passLine->length); |
| return 1; |
| } |
| |
| static inline int |
| wantsString(TranslationTableOpcode opcode, int actionPart, int nofor) { |
| if (opcode == CTO_Correct) return 1; |
| if (opcode != CTO_Context) return 0; |
| return !nofor == !actionPart; |
| } |
| |
| static int |
| verifyStringOrDots(const FileInfo *file, TranslationTableOpcode opcode, int isString, |
| int actionPart, int nofor) { |
| if (!wantsString(opcode, actionPart, nofor) == !isString) return 1; |
| |
| compileError(file, "%s are not allowed in the %s part of a %s translation %s rule.", |
| isString ? "strings" : "dots", getPartName(actionPart), |
| nofor ? "backward" : "forward", _lou_findOpcodeName(opcode)); |
| |
| return 0; |
| } |
| |
| static int |
| appendInstructionChar( |
| const FileInfo *file, widechar *passInstructions, int *passIC, widechar ch) { |
| if (*passIC >= MAXSTRING) { |
| compileError(file, "multipass operand too long"); |
| return 0; |
| } |
| passInstructions[(*passIC)++] = ch; |
| return 1; |
| } |
| |
| static int |
| compilePassOpcode(const FileInfo *file, TranslationTableOpcode opcode, int noback, |
| int nofor, TranslationTableHeader **table) { |
| static CharsString passRuleChars; |
| static CharsString passRuleDots; |
| /* Compile the operands of a pass opcode */ |
| widechar passSubOp; |
| const CharacterClass *class; |
| TranslationTableRule *rule = NULL; |
| int k; |
| int kk = 0; |
| int endTest = 0; |
| widechar *passInstructions = passRuleDots.chars; |
| int passIC = 0; /* Instruction counter */ |
| passRuleChars.length = 0; |
| CharsString passHoldString; |
| widechar passHoldNumber; |
| CharsString passLine; |
| int passLinepos = 0; |
| TranslationTableCharacterAttributes passAttributes; |
| int replacing = 0; |
| passHoldString.length = 0; |
| for (k = file->linepos; k < file->linelen; k++) |
| passHoldString.chars[passHoldString.length++] = file->line[k]; |
| #define SEPCHAR 0x0001 |
| for (k = 0; k < passHoldString.length && passHoldString.chars[k] > 32; k++) |
| ; |
| if (k < passHoldString.length) |
| passHoldString.chars[k] = SEPCHAR; |
| else { |
| compileError(file, "Invalid multipass operands"); |
| return 0; |
| } |
| parseChars(file, &passLine, &passHoldString); |
| /* Compile test part */ |
| for (k = 0; k < passLine.length && passLine.chars[k] != SEPCHAR; k++) |
| ; |
| endTest = k; |
| passLine.chars[endTest] = pass_endTest; |
| passLinepos = 0; |
| while (passLinepos <= endTest) { |
| switch ((passSubOp = passLine.chars[passLinepos])) { |
| case pass_lookback: |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_lookback)) |
| return 0; |
| passLinepos++; |
| passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
| if (passHoldNumber == 0) passHoldNumber = 1; |
| if (!appendInstructionChar(file, passInstructions, &passIC, passHoldNumber)) |
| return 0; |
| break; |
| case pass_not: |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_not)) |
| return 0; |
| passLinepos++; |
| break; |
| case pass_first: |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_first)) |
| return 0; |
| passLinepos++; |
| break; |
| case pass_last: |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_last)) |
| return 0; |
| passLinepos++; |
| break; |
| case pass_search: |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_search)) |
| return 0; |
| passLinepos++; |
| break; |
| case pass_string: |
| if (!verifyStringOrDots(file, opcode, 1, 0, nofor)) { |
| return 0; |
| } |
| passLinepos++; |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_string)) |
| return 0; |
| passGetString(&passLine, &passLinepos, &passHoldString, file); |
| if (passHoldString.length == 0) { |
| compileError(file, "empty string in test part"); |
| return 0; |
| } |
| goto testDoCharsDots; |
| case pass_dots: |
| if (!verifyStringOrDots(file, opcode, 0, 0, nofor)) { |
| return 0; |
| } |
| passLinepos++; |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_dots)) |
| return 0; |
| passGetDots(&passLine, &passLinepos, &passHoldString, file); |
| if (passHoldString.length == 0) { |
| compileError(file, "expected dot pattern after @ operand in test part"); |
| return 0; |
| } |
| testDoCharsDots: |
| if (passIC >= MAXSTRING) { |
| compileError( |
| file, "@ operand in test part of multipass operand too long"); |
| return 0; |
| } |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, passHoldString.length)) |
| return 0; |
| for (kk = 0; kk < passHoldString.length; kk++) { |
| if (passIC >= MAXSTRING) { |
| compileError( |
| file, "@ operand in test part of multipass operand too long"); |
| return 0; |
| } |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, passHoldString.chars[kk])) |
| return 0; |
| } |
| break; |
| case pass_startReplace: |
| if (replacing) { |
| compileError(file, "nested replacement statements"); |
| return 0; |
| } |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, pass_startReplace)) |
| return 0; |
| replacing = 1; |
| passLinepos++; |
| break; |
| case pass_endReplace: |
| if (!replacing) { |
| compileError(file, "unexpected end of replacement"); |
| return 0; |
| } |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_endReplace)) |
| return 0; |
| replacing = 0; |
| passLinepos++; |
| break; |
| case pass_variable: |
| passLinepos++; |
| if (!passGetVariableNumber(file, &passLine, &passLinepos, &passHoldNumber)) |
| return 0; |
| switch (passLine.chars[passLinepos]) { |
| case pass_eq: |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_eq)) |
| return 0; |
| goto doComp; |
| case pass_lt: |
| if (passLine.chars[passLinepos + 1] == pass_eq) { |
| passLinepos++; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, pass_lteq)) |
| return 0; |
| } else if (!appendInstructionChar( |
| file, passInstructions, &passIC, pass_lt)) |
| return 0; |
| goto doComp; |
| case pass_gt: |
| if (passLine.chars[passLinepos + 1] == pass_eq) { |
| passLinepos++; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, pass_gteq)) |
| return 0; |
| } else if (!appendInstructionChar( |
| file, passInstructions, &passIC, pass_gt)) |
| return 0; |
| doComp: |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, passHoldNumber)) |
| return 0; |
| passLinepos++; |
| passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, passHoldNumber)) |
| return 0; |
| break; |
| default: |
| compileError(file, "incorrect comparison operator"); |
| return 0; |
| } |
| break; |
| case pass_attributes: |
| passLinepos++; |
| if (!passGetAttributes(&passLine, &passLinepos, &passAttributes, file)) |
| return 0; |
| insertAttributes: |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_attributes)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, (passAttributes >> 48) & 0xffff)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, (passAttributes >> 32) & 0xffff)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, (passAttributes >> 16) & 0xffff)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, passAttributes & 0xffff)) |
| return 0; |
| getRange: |
| if (passLine.chars[passLinepos] == pass_until) { |
| passLinepos++; |
| if (!appendInstructionChar(file, passInstructions, &passIC, 1)) return 0; |
| if (!appendInstructionChar(file, passInstructions, &passIC, 0xffff)) |
| return 0; |
| break; |
| } |
| passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
| if (passHoldNumber == 0) { |
| if (!appendInstructionChar(file, passInstructions, &passIC, 1)) return 0; |
| if (!appendInstructionChar(file, passInstructions, &passIC, 1)) return 0; |
| break; |
| } |
| if (!appendInstructionChar(file, passInstructions, &passIC, passHoldNumber)) |
| return 0; |
| if (passLine.chars[passLinepos] != pass_hyphen) { |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, passHoldNumber)) |
| return 0; |
| break; |
| } |
| passLinepos++; |
| passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
| if (passHoldNumber == 0) { |
| compileError(file, "invalid range"); |
| return 0; |
| } |
| if (!appendInstructionChar(file, passInstructions, &passIC, passHoldNumber)) |
| return 0; |
| break; |
| case pass_groupstart: |
| case pass_groupend: { |
| passLinepos++; |
| passGetName(&passLine, &passLinepos, &passHoldString); |
| TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
| if (ruleOffset) |
| rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
| if (rule && rule->opcode == CTO_Grouping) { |
| if (!appendInstructionChar(file, passInstructions, &passIC, passSubOp)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, ruleOffset >> 16)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, ruleOffset & 0xffff)) |
| return 0; |
| break; |
| } else { |
| compileError(file, "%s is not a grouping name", |
| _lou_showString( |
| &passHoldString.chars[0], passHoldString.length, 0)); |
| return 0; |
| } |
| break; |
| } |
| case pass_swap: { |
| passLinepos++; |
| passGetName(&passLine, &passLinepos, &passHoldString); |
| if ((class = findCharacterClass(&passHoldString, *table))) { |
| passAttributes = class->attribute; |
| goto insertAttributes; |
| } |
| TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
| if (ruleOffset) |
| rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
| if (rule && |
| (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd || |
| rule->opcode == CTO_SwapDd)) { |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_swap)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, ruleOffset >> 16)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, ruleOffset & 0xffff)) |
| return 0; |
| goto getRange; |
| } |
| compileError(file, "%s is neither a class name nor a swap name.", |
| _lou_showString(&passHoldString.chars[0], passHoldString.length, 0)); |
| return 0; |
| } |
| case pass_endTest: |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_endTest)) |
| return 0; |
| if (replacing) { |
| compileError(file, "expected end of replacement"); |
| return 0; |
| } |
| passLinepos++; |
| break; |
| default: |
| compileError(file, "incorrect operator '%c ' in test part", |
| passLine.chars[passLinepos]); |
| return 0; |
| } |
| |
| } /* Compile action part */ |
| |
| /* Compile action part */ |
| while (passLinepos < passLine.length && passLine.chars[passLinepos] <= 32) |
| passLinepos++; |
| while (passLinepos < passLine.length && passLine.chars[passLinepos] > 32) { |
| if (passIC >= MAXSTRING) { |
| compileError(file, "Action part in multipass operand too long"); |
| return 0; |
| } |
| switch ((passSubOp = passLine.chars[passLinepos])) { |
| case pass_string: |
| if (!verifyStringOrDots(file, opcode, 1, 1, nofor)) { |
| return 0; |
| } |
| passLinepos++; |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_string)) |
| return 0; |
| passGetString(&passLine, &passLinepos, &passHoldString, file); |
| goto actionDoCharsDots; |
| case pass_dots: |
| if (!verifyStringOrDots(file, opcode, 0, 1, nofor)) { |
| return 0; |
| } |
| passLinepos++; |
| passGetDots(&passLine, &passLinepos, &passHoldString, file); |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_dots)) |
| return 0; |
| if (passHoldString.length == 0) { |
| compileError(file, "expected dot pattern after @ operand in action part"); |
| return 0; |
| } |
| actionDoCharsDots: |
| if (passIC >= MAXSTRING) { |
| compileError( |
| file, "@ operand in action part of multipass operand too long"); |
| return 0; |
| } |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, passHoldString.length)) |
| return 0; |
| for (kk = 0; kk < passHoldString.length; kk++) { |
| if (passIC >= MAXSTRING) { |
| compileError(file, |
| "@ operand in action part of multipass operand too long"); |
| return 0; |
| } |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, passHoldString.chars[kk])) |
| return 0; |
| } |
| break; |
| case pass_variable: |
| passLinepos++; |
| if (!passGetVariableNumber(file, &passLine, &passLinepos, &passHoldNumber)) |
| return 0; |
| switch (passLine.chars[passLinepos]) { |
| case pass_eq: |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_eq)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, passHoldNumber)) |
| return 0; |
| passLinepos++; |
| passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, passHoldNumber)) |
| return 0; |
| break; |
| case pass_plus: |
| case pass_hyphen: |
| if (!appendInstructionChar(file, passInstructions, &passIC, |
| passLine.chars[passLinepos++])) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, passHoldNumber)) |
| return 0; |
| break; |
| default: |
| compileError(file, "incorrect variable operator in action part"); |
| return 0; |
| } |
| break; |
| case pass_copy: |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_copy)) |
| return 0; |
| passLinepos++; |
| break; |
| case pass_omit: |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_omit)) |
| return 0; |
| passLinepos++; |
| break; |
| case pass_groupreplace: |
| case pass_groupstart: |
| case pass_groupend: { |
| passLinepos++; |
| passGetName(&passLine, &passLinepos, &passHoldString); |
| TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
| if (ruleOffset) |
| rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
| if (rule && rule->opcode == CTO_Grouping) { |
| if (!appendInstructionChar(file, passInstructions, &passIC, passSubOp)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, ruleOffset >> 16)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, ruleOffset & 0xffff)) |
| return 0; |
| break; |
| } |
| compileError(file, "%s is not a grouping name", |
| _lou_showString(&passHoldString.chars[0], passHoldString.length, 0)); |
| return 0; |
| } |
| case pass_swap: { |
| passLinepos++; |
| passGetName(&passLine, &passLinepos, &passHoldString); |
| TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
| if (ruleOffset) |
| rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
| if (rule && |
| (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd || |
| rule->opcode == CTO_SwapDd)) { |
| if (!appendInstructionChar(file, passInstructions, &passIC, pass_swap)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, ruleOffset >> 16)) |
| return 0; |
| if (!appendInstructionChar( |
| file, passInstructions, &passIC, ruleOffset & 0xffff)) |
| return 0; |
| break; |
| } |
| compileError(file, "%s is not a swap name.", |
| _lou_showString(&passHoldString.chars[0], passHoldString.length, 0)); |
| return 0; |
| break; |
| } |
| default: |
| compileError(file, "incorrect operator in action part"); |
| return 0; |
| } |
| } |
| |
| /* Analyze and add rule */ |
| passRuleDots.length = passIC; |
| |
| { |
| widechar *characters; |
| int length; |
| int found = passFindCharacters( |
| file, passInstructions, passRuleDots.length, &characters, &length); |
| |
| if (!found) return 0; |
| |
| if (characters) { |
| for (k = 0; k < length; k += 1) passRuleChars.chars[k] = characters[k]; |
| passRuleChars.length = k; |
| } |
| } |
| |
| if (!addRule(file, opcode, &passRuleChars, &passRuleDots, 0, 0, NULL, NULL, noback, |
| nofor, table)) |
| return 0; |
| return 1; |
| } |
| |
| /* End of multipass compiler */ |
| |
| static int |
| compileBrailleIndicator(FileInfo *file, const char *ermsg, TranslationTableOpcode opcode, |
| TranslationTableOffset *ruleOffset, int noback, int nofor, |
| TranslationTableHeader **table) { |
| CharsString token; |
| CharsString cells; |
| if (!getToken(file, &token, ermsg)) return 0; |
| if (!parseDots(file, &cells, &token)) return 0; |
| return addRule( |
| file, opcode, NULL, &cells, 0, 0, ruleOffset, NULL, noback, nofor, table); |
| } |
| |
| static int |
| compileNumber(FileInfo *file) { |
| CharsString token; |
| widechar number; |
| if (!getToken(file, &token, "number")) return 0; |
| getNumber(&token.chars[0], &number); |
| if (!(number > 0)) { |
| compileError(file, "a nonzero positive number is required"); |
| return 0; |
| } |
| return number; |
| } |
| |
| static int |
| compileGrouping(FileInfo *file, int noback, int nofor, TranslationTableHeader **table, |
| DisplayTableHeader **displayTable) { |
| int k; |
| CharsString name; |
| CharsString groupChars; |
| CharsString groupDots; |
| CharsString dotsParsed; |
| if (!getToken(file, &name, "name operand")) return 0; |
| if (!getRuleCharsText(file, &groupChars)) return 0; |
| if (!getToken(file, &groupDots, "dots operand")) return 0; |
| for (k = 0; k < groupDots.length && groupDots.chars[k] != ','; k++) |
| ; |
| if (k == groupDots.length) { |
| compileError(file, "Dots operand must consist of two cells separated by a comma"); |
| return 0; |
| } |
| groupDots.chars[k] = '-'; |
| if (!parseDots(file, &dotsParsed, &groupDots)) return 0; |
| if (groupChars.length != 2 || dotsParsed.length != 2) { |
| compileError(file, |
| "two Unicode characters and two cells separated by a comma are needed."); |
| return 0; |
| } |
| if (table) { |
| TranslationTableOffset ruleOffset; |
| TranslationTableCharacter *charsDotsPtr; |
| charsDotsPtr = putChar(file, groupChars.chars[0], table, NULL); |
| charsDotsPtr->attributes |= CTC_Math; |
| charsDotsPtr = putChar(file, groupChars.chars[1], table, NULL); |
| charsDotsPtr->attributes |= CTC_Math; |
| charsDotsPtr = putDots(file, dotsParsed.chars[0], table); |
| charsDotsPtr->attributes |= CTC_Math; |
| charsDotsPtr = putDots(file, dotsParsed.chars[1], table); |
| charsDotsPtr->attributes |= CTC_Math; |
| if (!addRule(file, CTO_Grouping, &groupChars, &dotsParsed, 0, 0, &ruleOffset, |
| NULL, noback, nofor, table)) |
| return 0; |
| if (!addRuleName(file, &name, ruleOffset, *table)) return 0; |
| } |
| if (displayTable) { |
| putCharDotsMapping(file, groupChars.chars[0], dotsParsed.chars[0], displayTable); |
| putCharDotsMapping(file, groupChars.chars[1], dotsParsed.chars[1], displayTable); |
| } |
| if (table) { |
| widechar endChar; |
| widechar endDots; |
| endChar = groupChars.chars[1]; |
| endDots = dotsParsed.chars[1]; |
| groupChars.length = dotsParsed.length = 1; |
| if (!addRule(file, CTO_Math, &groupChars, &dotsParsed, 0, 0, NULL, NULL, noback, |
| nofor, table)) |
| return 0; |
| groupChars.chars[0] = endChar; |
| dotsParsed.chars[0] = endDots; |
| if (!addRule(file, CTO_Math, &groupChars, &dotsParsed, 0, 0, NULL, NULL, noback, |
| nofor, table)) |
| return 0; |
| } |
| return 1; |
| } |
| |
| /* Functions for compiling hyphenation tables */ |
| |
| typedef struct HyphenDict { /* hyphenation dictionary: finite state machine */ |
| int numStates; |
| HyphenationState *states; |
| } HyphenDict; |
| |
| #define DEFAULTSTATE 0xffff |
| #define HYPHENHASHSIZE 8191 |
| |
| typedef struct HyphenHashEntry { |
| struct HyphenHashEntry *next; |
| CharsString *key; |
| int val; |
| } HyphenHashEntry; |
| |
| typedef struct HyphenHashTab { |
| HyphenHashEntry *entries[HYPHENHASHSIZE]; |
| } HyphenHashTab; |
| |
| /* a hash function from ASU - adapted from Gtk+ */ |
| static unsigned int |
| hyphenStringHash(const CharsString *s) { |
| int k; |
| unsigned int h = 0, g; |
| for (k = 0; k < s->length; k++) { |
| h = (h << 4) + s->chars[k]; |
| if ((g = h & 0xf0000000)) { |
| h = h ^ (g >> 24); |
| h = h ^ g; |
| } |
| } |
| return h; |
| } |
| |
| static HyphenHashTab * |
| hyphenHashNew(void) { |
| HyphenHashTab *hashTab; |
| if (!(hashTab = malloc(sizeof(HyphenHashTab)))) _lou_outOfMemory(); |
| memset(hashTab, 0, sizeof(HyphenHashTab)); |
| return hashTab; |
| } |
| |
| static void |
| hyphenHashFree(HyphenHashTab *hashTab) { |
| int i; |
| HyphenHashEntry *e, *next; |
| for (i = 0; i < HYPHENHASHSIZE; i++) |
| for (e = hashTab->entries[i]; e; e = next) { |
| next = e->next; |
| free(e->key); |
| free(e); |
| } |
| free(hashTab); |
| } |
| |
| /* assumes that key is not already present! */ |
| static void |
| hyphenHashInsert(HyphenHashTab *hashTab, const CharsString *key, int val) { |
| int i, j; |
| HyphenHashEntry *e; |
| i = hyphenStringHash(key) % HYPHENHASHSIZE; |
| if (!(e = malloc(sizeof(HyphenHashEntry)))) _lou_outOfMemory(); |
| e->next = hashTab->entries[i]; |
| e->key = malloc((key->length + 1) * CHARSIZE); |
| if (!e->key) _lou_outOfMemory(); |
| e->key->length = key->length; |
| for (j = 0; j < key->length; j++) e->key->chars[j] = key->chars[j]; |
| e->val = val; |
| hashTab->entries[i] = e; |
| } |
| |
| /* return val if found, otherwise DEFAULTSTATE */ |
| static int |
| hyphenHashLookup(HyphenHashTab *hashTab, const CharsString *key) { |
| int i, j; |
| HyphenHashEntry *e; |
| if (key->length == 0) return 0; |
| i = hyphenStringHash(key) % HYPHENHASHSIZE; |
| for (e = hashTab->entries[i]; e; e = e->next) { |
| if (key->length != e->key->length) continue; |
| for (j = 0; j < key->length; j++) |
| if (key->chars[j] != e->key->chars[j]) break; |
| if (j == key->length) return e->val; |
| } |
| return DEFAULTSTATE; |
| } |
| |
| static int |
| hyphenGetNewState(HyphenDict *dict, HyphenHashTab *hashTab, const CharsString *string) { |
| hyphenHashInsert(hashTab, string, dict->numStates); |
| /* predicate is true if dict->numStates is a power of two */ |
| if (!(dict->numStates & (dict->numStates - 1))) |
| dict->states = |
| realloc(dict->states, (dict->numStates << 1) * sizeof(HyphenationState)); |
| if (!dict->states) _lou_outOfMemory(); |
| dict->states[dict->numStates].hyphenPattern = 0; |
| dict->states[dict->numStates].fallbackState = DEFAULTSTATE; |
| dict->states[dict->numStates].numTrans = 0; |
| dict->states[dict->numStates].trans.pointer = NULL; |
| return dict->numStates++; |
| } |
| |
| /* add a transition from state1 to state2 through ch - assumes that the |
| * transition does not already exist */ |
| static void |
| hyphenAddTrans(HyphenDict *dict, int state1, int state2, widechar ch) { |
| int numTrans; |
| numTrans = dict->states[state1].numTrans; |
| if (numTrans == 0) |
| dict->states[state1].trans.pointer = malloc(sizeof(HyphenationTrans)); |
| else if (!(numTrans & (numTrans - 1))) |
| dict->states[state1].trans.pointer = realloc(dict->states[state1].trans.pointer, |
| (numTrans << 1) * sizeof(HyphenationTrans)); |
| dict->states[state1].trans.pointer[numTrans].ch = ch; |
| dict->states[state1].trans.pointer[numTrans].newState = state2; |
| dict->states[state1].numTrans++; |
| } |
| |
| static int |
| compileHyphenation( |
| FileInfo *file, CharsString *encoding, TranslationTableHeader **table) { |
| CharsString hyph; |
| HyphenationTrans *holdPointer; |
| HyphenHashTab *hashTab; |
| CharsString word; |
| char pattern[MAXSTRING + 1]; |
| unsigned int stateNum = 0, lastState = 0; |
| int i, j, k = encoding->length; |
| widechar ch; |
| int found; |
| HyphenHashEntry *e; |
| HyphenDict dict; |
| TranslationTableOffset holdOffset; |
| /* Set aside enough space for hyphenation states and transitions in |
| * translation table. Must be done before anything else */ |
| allocateSpaceInTranslationTable(file, NULL, 250000, table); |
| hashTab = hyphenHashNew(); |
| dict.numStates = 1; |
| dict.states = malloc(sizeof(HyphenationState)); |
| if (!dict.states) _lou_outOfMemory(); |
| dict.states[0].hyphenPattern = 0; |
| dict.states[0].fallbackState = DEFAULTSTATE; |
| dict.states[0].numTrans = 0; |
| dict.states[0].trans.pointer = NULL; |
| do { |
| if (encoding->chars[0] == 'I') { |
| if (!getToken(file, &hyph, NULL)) continue; |
| } else { |
| /* UTF-8 */ |
| if (!getToken(file, &word, NULL)) continue; |
| parseChars(file, &hyph, &word); |
| } |
| if (hyph.length == 0 || hyph.chars[0] == '#' || hyph.chars[0] == '%' || |
| hyph.chars[0] == '<') |
| continue; /* comment */ |
| j = 0; |
| pattern[j] = '0'; |
| for (i = 0; i < hyph.length; i++) { |
| if (hyph.chars[i] >= '0' && hyph.chars[i] <= '9') |
| pattern[j] = (char)hyph.chars[i]; |
| else { |
| word.chars[j] = hyph.chars[i]; |
| pattern[++j] = '0'; |
| } |
| } |
| word.chars[j] = 0; |
| word.length = j; |
| pattern[j + 1] = 0; |
| for (i = 0; pattern[i] == '0'; i++) |
| ; |
| found = hyphenHashLookup(hashTab, &word); |
| if (found != DEFAULTSTATE) |
| stateNum = found; |
| else |
| stateNum = hyphenGetNewState(&dict, hashTab, &word); |
| k = j + 2 - i; |
| if (k > 0) { |
| allocateSpaceInTranslationTable( |
| file, &dict.states[stateNum].hyphenPattern, k, table); |
| memcpy(&(*table)->ruleArea[dict.states[stateNum].hyphenPattern], &pattern[i], |
| k); |
| } |
| /* now, put in the prefix transitions */ |
| while (found == DEFAULTSTATE) { |
| lastState = stateNum; |
| ch = word.chars[word.length-- - 1]; |
| found = hyphenHashLookup(hashTab, &word); |
| if (found != DEFAULTSTATE) |
| stateNum = found; |
| else |
| stateNum = hyphenGetNewState(&dict, hashTab, &word); |
| hyphenAddTrans(&dict, stateNum, lastState, ch); |
| } |
| } while (_lou_getALine(file)); |
| /* put in the fallback states */ |
| for (i = 0; i < HYPHENHASHSIZE; i++) { |
| for (e = hashTab->entries[i]; e; e = e->next) { |
| for (j = 1; j <= e->key->length; j++) { |
| word.length = 0; |
| for (k = j; k < e->key->length; k++) |
| word.chars[word.length++] = e->key->chars[k]; |
| stateNum = hyphenHashLookup(hashTab, &word); |
| if (stateNum != DEFAULTSTATE) break; |
| } |
| if (e->val) dict.states[e->val].fallbackState = stateNum; |
| } |
| } |
| hyphenHashFree(hashTab); |
| /* Transfer hyphenation information to table */ |
| for (i = 0; i < dict.numStates; i++) { |
| if (dict.states[i].numTrans == 0) |
| dict.states[i].trans.offset = 0; |
| else { |
| holdPointer = dict.states[i].trans.pointer; |
| allocateSpaceInTranslationTable(file, &dict.states[i].trans.offset, |
| dict.states[i].numTrans * sizeof(HyphenationTrans), table); |
| memcpy(&(*table)->ruleArea[dict.states[i].trans.offset], holdPointer, |
| dict.states[i].numTrans * sizeof(HyphenationTrans)); |
| free(holdPointer); |
| } |
| } |
| allocateSpaceInTranslationTable( |
| file, &holdOffset, dict.numStates * sizeof(HyphenationState), table); |
| (*table)->hyphenStatesArray = holdOffset; |
| /* Prevents segmentation fault if table is reallocated */ |
| memcpy(&(*table)->ruleArea[(*table)->hyphenStatesArray], &dict.states[0], |
| dict.numStates * sizeof(HyphenationState)); |
| free(dict.states); |
| return 1; |
| } |
| |
| static int |
| compileCharDef(FileInfo *file, TranslationTableOpcode opcode, |
| TranslationTableCharacterAttributes attributes, int noback, int nofor, |
| TranslationTableHeader **table, DisplayTableHeader **displayTable) { |
| CharsString ruleChars; |
| CharsString ruleDots; |
| if (!getRuleCharsText(file, &ruleChars)) return 0; |
| if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
| if (ruleChars.length != 1) { |
| compileError(file, "Exactly one character is required."); |
| return 0; |
| } |
| if (ruleDots.length < 1) { |
| compileError(file, "At least one cell is required."); |
| return 0; |
| } |
| if (table) { |
| TranslationTableCharacter *character; |
| TranslationTableCharacter *cell = NULL; |
| int k; |
|