blob: 5ea5abd319bdaa700ab2ef65b9113b8d41c3495d [file] [log] [blame]
/* liblouis Braille Translation and Back-Translation
Library
Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by
The BRLTTY Team
Copyright (C) 2004, 2005, 2006
ViewPlus Technologies, Inc. www.viewplus.com
and
JJB Software, Inc. www.jjb-software.com
All rights reserved
This file is free software; you can redistribute it and/or modify it
under the terms of the Lesser or Library GNU General Public License
as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Library GNU General Public License for more details.
You should have received a copy of the Library GNU General Public
License along with this program; see the file COPYING. If not, write to
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
Maintained by John J. Boyer john.boyer@jjb-software.com
*/
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <sys/stat.h>
//#include <unistd.h>
#include "louis.h"
#include "config.h"
#define QUOTESUB 28 /*Stand-in for double quotes in strings */
/* Contributed by Michel Such <michel.such@free.fr */
#ifdef _WIN32
/* Adapted from BRLTTY code (see sys_progs_wihdows.h) */
#include <shlobj.h>
static void *
reallocWrapper (void *address, size_t size)
{
if (!(address = realloc (address, size)) && size)
outOfMemory ();
return address;
}
static char *
strdupWrapper (const char *string)
{
char *address = strdup (string);
if (!address)
outOfMemory ();
return address;
}
char *EXPORT_CALL
lou_getProgramPath ()
{
char *path = NULL;
HMODULE handle;
if ((handle = GetModuleHandle (NULL)))
{
size_t size = 0X80;
char *buffer = NULL;
while (1)
{
buffer = reallocWrapper (buffer, size <<= 1);
{
DWORD length = GetModuleFileName (handle, buffer, size);
if (!length)
{
printf ("GetModuleFileName\n");
exit (3);
3;
}
if (length < size)
{
buffer[length] = 0;
path = strdupWrapper (buffer);
while (length > 0)
if (path[--length] == '\\')
break;
strncpy (path, path, length + 1);
path[length + 1] = '\0';
break;
}
}
}
free (buffer);
}
else
{
printf ("GetModuleHandle\n");
exit (3);
}
return path;
}
#define PATH_SEP ';'
#define DIR_SEP '\\'
#else
#define PATH_SEP ':'
#define DIR_SEP '/'
#endif
/* End of MS contribution */
void
outOfMemory ()
{
fprintf (stderr,
"liblouis: Insufficient memory\n");
exit (3);
}
/* The folowing variables and functions make it possible to specify the
* path on which all tables for liblouis and all files for liblouisutdml,
* in their proper directories, will be found.
*/
static char dataPath[MAXSTRING];
static char *dataPathPtr;
char *EXPORT_CALL
lou_setDataPath (char *path)
{
dataPathPtr = NULL;
if (path == NULL)
return NULL;
strcpy (dataPath, path);
dataPathPtr = dataPath;
return dataPathPtr;
}
char *EXPORT_CALL
lou_getDataPath ()
{
return dataPathPtr;
}
/* End of dataPath code.*/
static FILE *logFile = NULL;
static char initialLogFileName[256];
void EXPORT_CALL
lou_logFile (const char *fileName)
{
if (fileName == NULL || fileName[0] == 0)
return;
if (initialLogFileName[0] == 0)
strcpy (initialLogFileName, fileName);
logFile = fopen (fileName, "wb");
if (logFile == NULL && initialLogFileName[0] != 0)
logFile = fopen (initialLogFileName, "wb");
if (logFile == NULL)
{
fprintf (stderr, "Cannot open log file %s\n", fileName);
logFile = stderr;
}
}
void EXPORT_CALL
lou_logPrint (char *format, ...)
{
#ifndef __SYMBIAN32__
va_list argp;
if (format == NULL)
return;
if (logFile == NULL && initialLogFileName[0] != 0)
logFile = fopen (initialLogFileName, "wb");
if (logFile == NULL)
logFile = stderr;
va_start (argp, format);
vfprintf (logFile, format, argp);
fprintf (logFile, "\n");
va_end (argp);
#endif
}
void EXPORT_CALL
lou_logEnd ()
{
if (logFile != NULL)
fclose (logFile);
logFile = NULL;
}
static int
eqasc2uni (const unsigned char *a, const widechar * b, const int len)
{
int k;
for (k = 0; k < len; k++)
if ((widechar) a[k] != b[k])
return 0;
return 1;
}
typedef struct
{
widechar length;
widechar chars[MAXSTRING];
}
CharsString;
static int errorCount;
static int warningCount;
static TranslationTableHeader *table;
static TranslationTableOffset tableSize;
static TranslationTableOffset tableUsed;
typedef struct
{
void *next;
void *table;
int tableListLength;
char tableList[1];
} ChainEntry;
static ChainEntry *tableChain = NULL;
static const char *characterClassNames[] = {
"space",
"letter",
"digit",
"punctuation",
"uppercase",
"lowercase",
"math",
"sign",
"litdigit",
NULL
};
struct CharacterClass
{
struct CharacterClass *next;
TranslationTableCharacterAttributes attribute;
widechar length;
widechar name[1];
};
static struct CharacterClass *characterClasses;
static TranslationTableCharacterAttributes characterClassAttribute;
static const char *opcodeNames[CTO_None] = {
"include",
"locale",
"undefined",
"capsign",
"begcaps",
"lenbegcaps",
"endcaps",
"firstwordcaps",
"lastwordbeforecaps",
"lastwordaftercaps",
"lencapsphrase",
"letsign",
"noletsignbefore",
"noletsign",
"noletsignafter",
"numsign",
"firstwordital",
"italsign",
"lastworditalbefore",
"lastworditalafter",
"begital",
"firstletterital",
"endital",
"lastletterital",
"singleletterital",
"italword",
"lenitalphrase",
"firstwordbold",
"boldsign",
"lastwordboldbefore",
"lastwordboldafter",
"begbold",
"firstletterbold",
"endbold",
"lastletterbold",
"singleletterbold",
"boldword",
"lenboldphrase",
"firstwordunder",
"undersign",
"lastwordunderbefore",
"lastwordunderafter",
"begunder",
"firstletterunder",
"endunder",
"lastletterunder",
"singleletterunder",
"underword",
"lenunderphrase",
"begcomp",
"compbegemph1",
"compendemph1",
"compbegemph2",
"compendemph2",
"compbegemph3",
"compendemph3",
"compcapsign",
"compbegcaps",
"compendcaps",
"endcomp",
"multind",
"compdots",
"comp6",
"class",
"after",
"before",
"noback",
"nofor",
"swapcc",
"swapcd",
"swapdd",
"space",
"digit",
"punctuation",
"math",
"sign",
"letter",
"uppercase",
"lowercase",
"grouping",
"uplow",
"litdigit",
"display",
"replace",
"context",
"correct",
"pass2",
"pass3",
"pass4",
"repeated",
"repword",
"capsnocont",
"always",
"exactdots",
"nocross",
"syllable",
"nocont",
"compbrl",
"literal",
"largesign",
"word",
"partword",
"joinnum",
"joinword",
"lowword",
"contraction",
"sufword",
"prfword",
"begword",
"begmidword",
"midword",
"midendword",
"endword",
"prepunc",
"postpunc",
"begnum",
"midnum",
"endnum",
"decpoint",
"hyphen",
"nobreak"
};
static short opcodeLengths[CTO_None] = { 0 };
typedef enum
{ noEncoding, bigEndian, littleEndian, ascii8 } EncodingType;
typedef struct
{
const char *fileName;
FILE *in;
int lineNumber;
EncodingType encoding;
int status;
int linelen;
int linepos;
int checkencoding[2];
widechar line[MAXSTRING];
}
FileInfo;
static char scratchBuf[MAXSTRING];
char *
showString (widechar const *chars, int length)
{
/*Translate a string of characters to the encoding used in character
* operands */
int charPos;
int bufPos = 0;
scratchBuf[bufPos++] = '\'';
for (charPos = 0; charPos < length; charPos++)
{
if (chars[charPos] >= 32 && chars[charPos] < 127)
scratchBuf[bufPos++] = (char) chars[charPos];
else
{
char hexbuf[20];
int hexLength;
char escapeLetter;
int leadingZeros;
int hexPos;
hexLength = sprintf (hexbuf, "%x", chars[charPos]);
switch (hexLength)
{
case 1:
case 2:
case 3:
case 4:
escapeLetter = 'x';
leadingZeros = 4 - hexLength;
break;
case 5:
escapeLetter = 'y';
leadingZeros = 0;
break;
case 6:
case 7:
case 8:
escapeLetter = 'z';
leadingZeros = 8 - hexLength;
break;
default:
escapeLetter = '?';
leadingZeros = 0;
break;
}
if ((bufPos + leadingZeros + hexLength + 4) >= sizeof (scratchBuf))
break;
scratchBuf[bufPos++] = '\\';
scratchBuf[bufPos++] = escapeLetter;
for (hexPos = 0; hexPos < leadingZeros; hexPos++)
scratchBuf[bufPos++] = '0';
for (hexPos = 0; hexPos < hexLength; hexPos++)
scratchBuf[bufPos++] = hexbuf[hexPos];
}
}
scratchBuf[bufPos++] = '\'';
scratchBuf[bufPos] = 0;
return scratchBuf;
}
char *
showDots (widechar const *dots, int length)
{
/* Translate a sequence of dots to the encoding used in dots operands.
*/
int bufPos = 0;
int dotsPos;
for (dotsPos = 0; bufPos < sizeof (scratchBuf) && dotsPos < length;
dotsPos++)
{
if ((dots[dotsPos] & B1))
scratchBuf[bufPos++] = '1';
if ((dots[dotsPos] & B2))
scratchBuf[bufPos++] = '2';
if ((dots[dotsPos] & B3))
scratchBuf[bufPos++] = '3';
if ((dots[dotsPos] & B4))
scratchBuf[bufPos++] = '4';
if ((dots[dotsPos] & B5))
scratchBuf[bufPos++] = '5';
if ((dots[dotsPos] & B6))
scratchBuf[bufPos++] = '6';
if ((dots[dotsPos] & B7))
scratchBuf[bufPos++] = '7';
if ((dots[dotsPos] & B8))
scratchBuf[bufPos++] = '8';
if ((dots[dotsPos] & B9))
scratchBuf[bufPos++] = '9';
if ((dots[dotsPos] & B10))
scratchBuf[bufPos++] = 'A';
if ((dots[dotsPos] & B11))
scratchBuf[bufPos++] = 'B';
if ((dots[dotsPos] & B12))
scratchBuf[bufPos++] = 'C';
if ((dots[dotsPos] & B13))
scratchBuf[bufPos++] = 'D';
if ((dots[dotsPos] & B14))
scratchBuf[bufPos++] = 'E';
if ((dots[dotsPos] & B15))
scratchBuf[bufPos++] = 'F';
if ((dots[dotsPos] == B16))
scratchBuf[bufPos++] = '0';
if (dotsPos != length - 1)
scratchBuf[bufPos++] = '-';
}
scratchBuf[bufPos] = 0;
return &scratchBuf[0];
}
char *
showAttributes (TranslationTableCharacterAttributes a)
{
/* Show attributes using the letters used after the $ in multipass
* opcodes. */
int bufPos = 0;
if ((a & CTC_Space))
scratchBuf[bufPos++] = 's';
if ((a & CTC_Letter))
scratchBuf[bufPos++] = 'l';
if ((a & CTC_Digit))
scratchBuf[bufPos++] = 'd';
if ((a & CTC_Punctuation))
scratchBuf[bufPos++] = 'p';
if ((a & CTC_UpperCase))
scratchBuf[bufPos++] = 'U';
if ((a & CTC_LowerCase))
scratchBuf[bufPos++] = 'u';
if ((a & CTC_Math))
scratchBuf[bufPos++] = 'm';
if ((a & CTC_Sign))
scratchBuf[bufPos++] = 'S';
if ((a & CTC_LitDigit))
scratchBuf[bufPos++] = 'D';
if ((a & CTC_Class1))
scratchBuf[bufPos++] = 'w';
if ((a & CTC_Class2))
scratchBuf[bufPos++] = 'x';
if ((a & CTC_Class3))
scratchBuf[bufPos++] = 'y';
if ((a & CTC_Class4))
scratchBuf[bufPos++] = 'z';
scratchBuf[bufPos] = 0;
return scratchBuf;
}
static void compileError (FileInfo * nested, char *format, ...);
static int
getAChar (FileInfo * nested)
{
/*Read a big endian, little *ndian or ASCII 8 file and convert it to
* 16- or 32-bit unsigned integers */
int ch1 = 0, ch2 = 0;
widechar character;
if (nested->encoding == ascii8)
if (nested->status == 2)
{
nested->status++;
return nested->checkencoding[1];
}
while ((ch1 = fgetc (nested->in)) != EOF)
{
if (nested->status < 2)
nested->checkencoding[nested->status] = ch1;
nested->status++;
if (nested->status == 2)
{
if (nested->checkencoding[0] == 0xfe
&& nested->checkencoding[1] == 0xff)
nested->encoding = bigEndian;
else if (nested->checkencoding[0] == 0xff
&& nested->checkencoding[1] == 0xfe)
nested->encoding = littleEndian;
else if (nested->checkencoding[0] < 128
&& nested->checkencoding[1] < 128)
{
nested->encoding = ascii8;
return nested->checkencoding[0];
}
else
{
compileError (nested,
"encoding is neither big-endian, little-endian nor ASCII 8.");
ch1 = EOF;
break;;
}
continue;
}
switch (nested->encoding)
{
case noEncoding:
break;
case ascii8:
return ch1;
break;
case bigEndian:
ch2 = fgetc (nested->in);
if (ch2 == EOF)
break;
character = (ch1 << 8) | ch2;
return (int) character;
break;
case littleEndian:
ch2 = fgetc (nested->in);
if (ch2 == EOF)
break;
character = (ch2 << 8) | ch1;
return (int) character;
break;
}
if (ch1 == EOF || ch2 == EOF)
break;
}
return EOF;
}
static int
getALine (FileInfo * nested)
{
/*Read a line of widechar's from an input file */
int ch;
int pch = 0;
nested->linelen = 0;
while ((ch = getAChar (nested)) != EOF)
{
if (ch == 13)
continue;
if (pch == '\\' && ch == 10)
{
nested->linelen--;
continue;
}
if (ch == 10 || nested->linelen >= MAXSTRING)
break;
nested->line[nested->linelen++] = (widechar) ch;
pch = ch;
}
nested->line[nested->linelen] = 0;
nested->linepos = 0;
if (ch == EOF)
return 0;
nested->lineNumber++;
return 1;
}
static int lastToken;
static int
getToken (FileInfo * nested, CharsString * result, const char *description)
{
/*Find the next string of contiguous non-whitespace characters. If this
* is the last token on the line, return 2 instead of 1. */
while (nested->line[nested->linepos] && nested->line[nested->linepos] <= 32)
nested->linepos++;
result->length = 0;
while (nested->line[nested->linepos] && nested->line[nested->linepos] > 32)
{
int maxlen = MAXSTRING;
if (result->length >= maxlen)
{
compileError (nested, "more than %d characters (bytes)", maxlen);
return 0;
}
else
result->chars[result->length++] = nested->line[nested->linepos++];
}
if (!result->length)
{
/* Not enough tokens */
if (description)
compileError (nested, "%s not specified.", description);
return 0;
}
result->chars[result->length] = 0;
while (nested->line[nested->linepos] && nested->line[nested->linepos] <= 32)
nested->linepos++;
if (nested->line[nested->linepos] == 0)
{
lastToken = 1;
return 2;
}
else
{
lastToken = 0;
return 1;
}
}
static void
compileError (FileInfo * nested, char *format, ...)
{
#ifndef __SYMBIAN32__
char buffer[MAXSTRING];
va_list arguments;
va_start (arguments, format);
#ifdef _WIN32
_vsnprintf (buffer, sizeof (buffer), format, arguments);
#else
vsnprintf (buffer, sizeof (buffer), format, arguments);
#endif
va_end (arguments);
if (nested)
lou_log (LOG_ERROR, "%s:%d: error: %s", nested->fileName,
nested->lineNumber, buffer);
else
lou_log (LOG_ERROR, "error: %s", buffer);
errorCount++;
#endif
}
static void
compileWarning (FileInfo * nested, char *format, ...)
{
#ifndef __SYMBIAN32__
char buffer[MAXSTRING];
va_list arguments;
va_start (arguments, format);
#ifdef _WIN32
_vsnprintf (buffer, sizeof (buffer), format, arguments);
#else
vsnprintf (buffer, sizeof (buffer), format, arguments);
#endif
va_end (arguments);
if (nested)
lou_log (LOG_WARN, "%s:%d: warning: %s", nested->fileName,
nested->lineNumber, buffer);
else
lou_log (LOG_WARN, "warning: %s", buffer);
warningCount++;
#endif
}
static int
allocateSpaceInTable (FileInfo * nested, TranslationTableOffset * offset,
int count)
{
/* allocate memory for translation table and expand previously allocated
* memory if necessary */
int spaceNeeded = ((count + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE;
TranslationTableOffset size = tableUsed + spaceNeeded;
if (size > tableSize)
{
void *newTable;
size += (size / OFFSETSIZE);
newTable = realloc (table, size);
if (!newTable)
{
compileError (nested, "Not enough memory for translation table.");
outOfMemory ();
}
memset (((unsigned char *) newTable) + tableSize, 0, size - tableSize);
/* update references to the old table */
{
ChainEntry *entry;
for (entry = tableChain; entry != NULL; entry = entry->next)
if (entry->table == table)
entry->table = (TranslationTableHeader *) newTable;
}
table = (TranslationTableHeader *) newTable;
tableSize = size;
}
if (offset != NULL)
{
*offset = (tableUsed - sizeof (*table)) / OFFSETSIZE;
tableUsed += spaceNeeded;
}
return 1;
}
static int
reserveSpaceInTable (FileInfo * nested, int count)
{
return (allocateSpaceInTable (nested, NULL, count));
}
static int
allocateHeader (FileInfo * nested)
{
/*Allocate memory for the table header and a guess on the number of
* rules */
const TranslationTableOffset startSize = 2 * sizeof (*table);
if (table)
return 1;
tableUsed = sizeof (*table) + OFFSETSIZE; /*So no offset is ever zero */
if (!(table = malloc (startSize)))
{
compileError (nested, "Not enough memory");
if (table != NULL)
free (table);
table = NULL;
outOfMemory ();
}
memset (table, 0, startSize);
tableSize = startSize;
return 1;
}
int
stringHash (const widechar * c)
{
/*hash function for strings */
unsigned long int makeHash = (((unsigned long int) c[0] << 8) +
(unsigned long int) c[1]) % HASHNUM;
return (int) makeHash;
}
int
charHash (widechar c)
{
unsigned long int makeHash = (unsigned long int) c % HASHNUM;
return (int) makeHash;
}
static TranslationTableCharacter *
compile_findCharOrDots (widechar c, int m)
{
/*Look up a character or dot pattern. If m is 0 look up a character,
* otherwise look up a dot pattern. Although the algorithms are almost
* identical, different tables are needed for characters and dots because
* of the possibility of conflicts.*/
TranslationTableCharacter *character;
TranslationTableOffset bucket;
unsigned long int makeHash = (unsigned long int) c % HASHNUM;
if (m == 0)
bucket = table->characters[makeHash];
else
bucket = table->dots[makeHash];
while (bucket)
{
character = (TranslationTableCharacter *) & table->ruleArea[bucket];
if (character->realchar == c)
return character;
bucket = character->next;
}
return NULL;
}
static TranslationTableCharacter noChar = { 0, 0, 0, CTC_Space, 32, 32, 32 };
static TranslationTableCharacter noDots =
{ 0, 0, 0, CTC_Space, B16, B16, B16 };
static char *unknownDots (widechar dots);
static TranslationTableCharacter *
definedCharOrDots (FileInfo * nested, widechar c, int m)
{
TranslationTableCharacter *notFound;
TranslationTableCharacter *charOrDots = compile_findCharOrDots (c, m);
if (charOrDots)
return charOrDots;
if (m == 0)
{
notFound = &noChar;
compileError (nested,
"character %s should be defined at this point but is not",
showString (&c, 1));
}
else
{
notFound = &noDots;
compileError (nested,
"cell %s should be defined at this point but is not",
unknownDots (c));
}
return notFound;
}
static TranslationTableCharacter *
addCharOrDots (FileInfo * nested, widechar c, int m)
{
/*See if a character or dot pattern is in the appropriate table. If not,
* insert it. In either
* case, return a pointer to it. */
TranslationTableOffset bucket;
TranslationTableCharacter *character;
TranslationTableCharacter *oldchar;
TranslationTableOffset offset;
unsigned long int makeHash;
if ((character = compile_findCharOrDots (c, m)))
return character;
if (!allocateSpaceInTable (nested, &offset, sizeof (*character)))
return NULL;
character = (TranslationTableCharacter *) & table->ruleArea[offset];
memset (character, 0, sizeof (*character));
character->realchar = c;
makeHash = (unsigned long int) c % HASHNUM;
if (m == 0)
bucket = table->characters[makeHash];
else
bucket = table->dots[makeHash];
if (!bucket)
{
if (m == 0)
table->characters[makeHash] = offset;
else
table->dots[makeHash] = offset;
}
else
{
oldchar = (TranslationTableCharacter *) & table->ruleArea[bucket];
while (oldchar->next)
oldchar =
(TranslationTableCharacter *) & table->ruleArea[oldchar->next];
oldchar->next = offset;
}
return character;
}
static CharOrDots *
getCharOrDots (widechar c, int m)
{
CharOrDots *cdPtr;
TranslationTableOffset bucket;
unsigned long int makeHash = (unsigned long int) c % HASHNUM;
if (m == 0)
bucket = table->charToDots[makeHash];
else
bucket = table->dotsToChar[makeHash];
while (bucket)
{
cdPtr = (CharOrDots *) & table->ruleArea[bucket];
if (cdPtr->lookFor == c)
return cdPtr;
bucket = cdPtr->next;
}
return NULL;
}
widechar
getDotsForChar (widechar c)
{
CharOrDots *cdPtr = getCharOrDots (c, 0);
if (cdPtr)
return cdPtr->found;
return B16;
}
widechar
getCharFromDots (widechar d)
{
CharOrDots *cdPtr = getCharOrDots (d, 1);
if (cdPtr)
return cdPtr->found;
return ' ';
}
static int
putCharAndDots (FileInfo * nested, widechar c, widechar d)
{
TranslationTableOffset bucket;
CharOrDots *cdPtr;
CharOrDots *oldcdPtr = NULL;
TranslationTableOffset offset;
unsigned long int makeHash;
if (!(cdPtr = getCharOrDots (c, 0)))
{
if (!allocateSpaceInTable (nested, &offset, sizeof (*cdPtr)))
return 0;
cdPtr = (CharOrDots *) & table->ruleArea[offset];
cdPtr->next = 0;
cdPtr->lookFor = c;
cdPtr->found = d;
makeHash = (unsigned long int) c % HASHNUM;
bucket = table->charToDots[makeHash];
if (!bucket)
table->charToDots[makeHash] = offset;
else
{
oldcdPtr = (CharOrDots *) & table->ruleArea[bucket];
while (oldcdPtr->next)
oldcdPtr = (CharOrDots *) & table->ruleArea[oldcdPtr->next];
oldcdPtr->next = offset;
}
}
if (!(cdPtr = getCharOrDots (d, 1)))
{
if (!allocateSpaceInTable (nested, &offset, sizeof (*cdPtr)))
return 0;
cdPtr = (CharOrDots *) & table->ruleArea[offset];
cdPtr->next = 0;
cdPtr->lookFor = d;
cdPtr->found = c;
makeHash = (unsigned long int) d % HASHNUM;
bucket = table->dotsToChar[makeHash];
if (!bucket)
table->dotsToChar[makeHash] = offset;
else
{
oldcdPtr = (CharOrDots *) & table->ruleArea[bucket];
while (oldcdPtr->next)
oldcdPtr = (CharOrDots *) & table->ruleArea[oldcdPtr->next];
oldcdPtr->next = offset;
}
}
return 1;
}
static char *
unknownDots (widechar dots)
{
/*Print out dot numbers */
static char buffer[20];
int k = 1;
buffer[0] = '\\';
if ((dots & B1))
buffer[k++] = '1';
if ((dots & B2))
buffer[k++] = '2';
if ((dots & B3))
buffer[k++] = '3';
if ((dots & B4))
buffer[k++] = '4';
if ((dots & B5))
buffer[k++] = '5';
if ((dots & B6))
buffer[k++] = '6';
if ((dots & B7))
buffer[k++] = '7';
if ((dots & B8))
buffer[k++] = '8';
if ((dots & B9))
buffer[k++] = '9';
if ((dots & B10))
buffer[k++] = 'A';
if ((dots & B11))
buffer[k++] = 'B';
if ((dots & B12))
buffer[k++] = 'C';
if ((dots & B13))
buffer[k++] = 'D';
if ((dots & B14))
buffer[k++] = 'E';
if ((dots & B15))
buffer[k++] = 'F';
buffer[k++] = '/';
buffer[k] = 0;
return buffer;
}
static TranslationTableOffset newRuleOffset = 0;
static TranslationTableRule *newRule = NULL;
static int
charactersDefined (FileInfo * nested)
{
/*Check that all characters are defined by character-definition
* opcodes*/
int noErrors = 1;
int k;
if ((newRule->opcode >= CTO_Space && newRule->opcode <= CTO_LitDigit)
|| newRule->opcode == CTO_SwapDd
||
newRule->opcode == CTO_Replace || newRule->opcode == CTO_MultInd
|| newRule->opcode == CTO_Repeated ||
((newRule->opcode >= CTO_Context && newRule->opcode <=
CTO_Pass4) && newRule->opcode != CTO_Correct))
return 1;
for (k = 0; k < newRule->charslen; k++)
if (!compile_findCharOrDots (newRule->charsdots[k], 0))
{
compileError (nested, "Character %s is not defined", showString
(&newRule->charsdots[k], 1));
noErrors = 0;
}
if (!(newRule->opcode == CTO_Correct || newRule->opcode ==
CTO_NoBreak || newRule->opcode == CTO_SwapCc || newRule->opcode ==
CTO_SwapCd))
{
for (k = newRule->charslen; k < newRule->charslen + newRule->dotslen;
k++)
if (!compile_findCharOrDots (newRule->charsdots[k], 1))
{
compileError (nested, "Dot pattern %s is not defined.",
unknownDots (newRule->charsdots[k]));
noErrors = 0;
}
}
return noErrors;
}
static int noback = 0;
static int nofor = 0;
/*The following functions are
called by addRule to handle various
* cases.*/
static void
add_0_single (FileInfo * nested)
{
/*direction = 0, newRule->charslen = 1*/
TranslationTableRule *currentRule;
TranslationTableOffset *currentOffsetPtr;
TranslationTableCharacter *character;
int m = 0;
if (newRule->opcode == CTO_CompDots || newRule->opcode == CTO_Comp6)
return;
if (newRule->opcode >= CTO_Pass2 && newRule->opcode <= CTO_Pass4)
m = 1;
character = definedCharOrDots (nested, newRule->charsdots[0], m);
if (m != 1 && character->attributes & CTC_Letter && (newRule->opcode
==
CTO_WholeWord
|| newRule->opcode ==
CTO_LargeSign))
{
if (table->noLetsignCount < LETSIGNSIZE)
table->noLetsign[table->noLetsignCount++] = newRule->charsdots[0];
}
if (newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow)
character->definitionRule = newRuleOffset;
currentOffsetPtr = &character->otherRules;
while (*currentOffsetPtr)
{
currentRule = (TranslationTableRule *)
& table->ruleArea[*currentOffsetPtr];
if (currentRule->charslen == 0)
break;
if (currentRule->opcode >= CTO_Space && currentRule->opcode < CTO_UpLow)
if (!(newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow))
break;
currentOffsetPtr = &currentRule->charsnext;
}
newRule->charsnext = *currentOffsetPtr;
*currentOffsetPtr = newRuleOffset;
}
static void
add_0_multiple ()
{
/*direction = 0 newRule->charslen > 1*/
TranslationTableRule *currentRule = NULL;
TranslationTableOffset *currentOffsetPtr =
&table->forRules[stringHash (&newRule->charsdots[0])];
while (*currentOffsetPtr)
{
currentRule = (TranslationTableRule *)
& table->ruleArea[*currentOffsetPtr];
if (newRule->charslen > currentRule->charslen)
break;
if (newRule->charslen == currentRule->charslen)
if ((currentRule->opcode == CTO_Always)
&& (newRule->opcode != CTO_Always))
break;
currentOffsetPtr = &currentRule->charsnext;
}
newRule->charsnext = *currentOffsetPtr;
*currentOffsetPtr = newRuleOffset;
}
static void
add_1_single (FileInfo * nested)
{
/*direction = 1, newRule->dotslen = 1*/
TranslationTableRule *currentRule;
TranslationTableOffset *currentOffsetPtr;
TranslationTableCharacter *dots;
if (newRule->opcode == CTO_NoBreak || newRule->opcode == CTO_SwapCc ||
(newRule->opcode >= CTO_Context
&&
newRule->opcode <= CTO_Pass4)
|| newRule->opcode == CTO_Repeated || (newRule->opcode == CTO_Always
&& newRule->charslen == 1))
return; /*too ambiguous */
dots = definedCharOrDots (nested, newRule->charsdots[newRule->charslen], 1);
if (newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow)
dots->definitionRule = newRuleOffset;
currentOffsetPtr = &dots->otherRules;
while (*currentOffsetPtr)
{
currentRule = (TranslationTableRule *)
& table->ruleArea[*currentOffsetPtr];
if (newRule->charslen > currentRule->charslen ||
currentRule->dotslen == 0)
break;
if (currentRule->opcode >= CTO_Space && currentRule->opcode < CTO_UpLow)
if (!(newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow))
break;
currentOffsetPtr = &currentRule->dotsnext;
}
newRule->dotsnext = *currentOffsetPtr;
*currentOffsetPtr = newRuleOffset;
}
static void
add_1_multiple ()
{
/*direction = 1, newRule->dotslen > 1*/
TranslationTableRule *currentRule = NULL;
TranslationTableOffset *currentOffsetPtr = &table->backRules[stringHash
(&newRule->
charsdots
[newRule->
charslen])];
if (newRule->opcode == CTO_NoBreak || newRule->opcode == CTO_SwapCc ||
(newRule->opcode >= CTO_Context && newRule->opcode <= CTO_Pass4))
return;
while (*currentOffsetPtr)
{
int currentLength;
int newLength;
currentRule = (TranslationTableRule *)
& table->ruleArea[*currentOffsetPtr];
currentLength = currentRule->dotslen + currentRule->charslen;
newLength = newRule->dotslen + newRule->charslen;
if (newLength > currentLength)
break;
if (currentLength == newLength)
if ((currentRule->opcode == CTO_Always)
&& (newRule->opcode != CTO_Always))
break;
currentOffsetPtr = &currentRule->dotsnext;
}
newRule->dotsnext = *currentOffsetPtr;
*currentOffsetPtr = newRuleOffset;
}
static void
makeRuleChain (TranslationTableOffset * offsetPtr)
{
TranslationTableRule *currentRule;
while (*offsetPtr)
{
currentRule = (TranslationTableRule *) & table->ruleArea[*offsetPtr];
offsetPtr = &currentRule->charsnext;
}
newRule->charsnext = *offsetPtr;
*offsetPtr = newRuleOffset;
}
static int
addPassRule (FileInfo * nested)
{
TranslationTableOffset *offsetPtr;
switch (newRule->opcode)
{
case CTO_Correct:
offsetPtr = &table->attribOrSwapRules[0];
break;
case CTO_Context:
offsetPtr = &table->attribOrSwapRules[1];
break;
case CTO_Pass2:
offsetPtr = &table->attribOrSwapRules[2];
break;
case CTO_Pass3:
offsetPtr = &table->attribOrSwapRules[3];
break;
case CTO_Pass4:
offsetPtr = &table->attribOrSwapRules[4];
break;
default:
return 0;
}
makeRuleChain (offsetPtr);
return 1;
}
static int
addRule
(FileInfo * nested,
TranslationTableOpcode opcode,
CharsString * ruleChars,
CharsString * ruleDots,
TranslationTableCharacterAttributes after,
TranslationTableCharacterAttributes before)
{
/*Add a rule to the table, using the hash function to find the start of
* chains and chaining both the chars and dots strings */
int ruleSize = sizeof (TranslationTableRule) - (DEFAULTRULESIZE * CHARSIZE);
int direction = 0; /*0 = forward translation; 1 = bacward */
if (ruleChars)
ruleSize += CHARSIZE * ruleChars->length;
if (ruleDots)
ruleSize += CHARSIZE * ruleDots->length;
if (!allocateSpaceInTable (nested, &newRuleOffset, ruleSize))
return 0;
newRule = (TranslationTableRule *) & table->ruleArea[newRuleOffset];
newRule->opcode = opcode;
newRule->after = after;
newRule->before = before;
if (ruleChars)
memcpy (&newRule->charsdots[0], &ruleChars->chars[0],
CHARSIZE * (newRule->charslen = ruleChars->length));
else
newRule->charslen = 0;
if (ruleDots)
memcpy (&newRule->charsdots[newRule->charslen],
&ruleDots->chars[0], CHARSIZE * (newRule->dotslen =
ruleDots->length));
else
newRule->dotslen = 0;
if (!charactersDefined (nested))
return 0;
/*link new rule into table. */
if (opcode == CTO_SwapCc || opcode == CTO_SwapCd || opcode == CTO_SwapDd)
return 1;
if (opcode >= CTO_Context && opcode <= CTO_Pass4 && newRule->charslen == 0)
return addPassRule (nested);
if (newRule->charslen == 0 || nofor)
direction = 1;
while (direction < 2)
{
if (direction == 0 && newRule->charslen == 1)
add_0_single (nested);
else if (direction == 0 && newRule->charslen > 1)
add_0_multiple ();
else if (direction == 1 && newRule->dotslen == 1 && !noback)
add_1_single (nested);
else if (direction == 1 && newRule->dotslen > 1 && !noback)
add_1_multiple ();
else
{
}
direction++;
if (newRule->dotslen == 0)
direction = 2;
}
return 1;
}
static const struct CharacterClass *
findCharacterClass (const CharsString * name)
{
/*Find a character class, whether predefined or user-defined */
const struct CharacterClass *class = characterClasses;
while (class)
{
if ((name->length == class->length) &&
(memcmp (&name->chars[0], class->name, CHARSIZE *
name->length) == 0))
return class;
class = class->next;
}
return NULL;
}
static struct CharacterClass *
addCharacterClass (FileInfo * nested, const widechar * name, int length)
{
/*Define a character class, Whether predefined or user-defined */
struct CharacterClass *class;
if (characterClassAttribute)
{
if (!(class = malloc (sizeof (*class) + CHARSIZE * (length - 1))))
outOfMemory ();
else
{
memset (class, 0, sizeof (*class));
memcpy (class->name, name, CHARSIZE * (class->length = length));
class->attribute = characterClassAttribute;
characterClassAttribute <<= 1;
class->next = characterClasses;
characterClasses = class;
return class;
}
}
compileError (nested, "character class table overflow.");
return NULL;
}
static void
deallocateCharacterClasses ()
{
while (characterClasses)
{
struct CharacterClass *class = characterClasses;
characterClasses = characterClasses->next;
if (class)
free (class);
}
}
static int
allocateCharacterClasses ()
{
/*Allocate memory for predifined character classes */
int k = 0;
characterClasses = NULL;
characterClassAttribute = 1;
while (characterClassNames[k])
{
widechar wname[MAXSTRING];
int length = strlen (characterClassNames[k]);
int kk;
for (kk = 0; kk < length; kk++)
wname[kk] = (widechar) characterClassNames[k][kk];
if (!addCharacterClass (NULL, wname, length))
{
deallocateCharacterClasses ();
return 0;
}
k++;
}
return 1;
}
static TranslationTableOpcode
getOpcode (FileInfo * nested, const CharsString * token)
{
static TranslationTableOpcode lastOpcode = 0;
TranslationTableOpcode opcode = lastOpcode;
do
{
if (token->length == opcodeLengths[opcode])
if (eqasc2uni ((unsigned char *) opcodeNames[opcode],
&token->chars[0], token->length))
{
lastOpcode = opcode;
return opcode;
}
opcode++;
if (opcode >= CTO_None)
opcode = 0;
}
while (opcode != lastOpcode);
compileError (nested, "opcode %s not defined.", showString
(&token->chars[0], token->length));
return CTO_None;
}
TranslationTableOpcode
findOpcodeNumber (const char *toFind)
{
/* Used by tools such as lou_debug */
static TranslationTableOpcode lastOpcode = 0;
TranslationTableOpcode opcode = lastOpcode;
int length = strlen (toFind);
do
{
if (length == opcodeLengths[opcode] && strcasecmp (toFind,
opcodeNames[opcode])
== 0)
{
lastOpcode = opcode;
return opcode;
}
opcode++;
if (opcode >= CTO_None)
opcode = 0;
}
while (opcode != lastOpcode);
return CTO_None;
}
const char *
findOpcodeName (TranslationTableOpcode opcode)
{
/* Used by tools such as lou_debug */
if (opcode < 0 || opcode >= CTO_None)
{
sprintf (scratchBuf, "%d", opcode);
return scratchBuf;
}
return opcodeNames[opcode];
}
static widechar
hexValue (FileInfo * nested, const widechar * digits, int length)
{
int k;
unsigned int binaryValue = 0;
for (k = 0; k < length; k++)
{
unsigned int hexDigit = 0;
if (digits[k] >= '0' && digits[k] <= '9')
hexDigit = digits[k] - '0';
else if (digits[k] >= 'a' && digits[k] <= 'f')
hexDigit = digits[k] - 'a' + 10;
else if (digits[k] >= 'A' && digits[k] <= 'F')
hexDigit = digits[k] - 'A' + 10;
else
{
compileError (nested, "invalid %d-digit hexadecimal number",
length);
return (widechar) 0xffffffff;
}
binaryValue |= hexDigit << (4 * (length - 1 - k));
}
return (widechar) binaryValue;
}
#define MAXBYTES 7
static int first0Bit[MAXBYTES] = { 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0XFE };
static int
parseChars (FileInfo * nested, CharsString * result, CharsString * token)
{
int in = 0;
int out = 0;
int lastOutSize = 0;
int lastIn;
unsigned int ch = 0;
int numBytes = 0;
unsigned int utf32 = 0;
int k;
while (in < token->length)
{
ch = token->chars[in++] & 0xff;
if (ch < 128)
{
if (ch == '\\')
{ /* escape sequence */
switch (ch = token->chars[in])
{
case '\\':
break;
case 'e':
ch = 0x1b;
break;
case 'f':
ch = 12;
break;
case 'n':
ch = 10;
break;
case 'r':
ch = 13;
break;
case 's':
ch = ' ';
break;
case 't':
ch = 9;
break;
case 'v':
ch = 22;
break;
case 'w':
ch = ENDSEGMENT;
break;
case 34:
ch = QUOTESUB;
break;
case 'X':
case 'x':
if (token->length - in > 4)
{
ch = hexValue (nested, &token->chars[in + 1], 4);
in += 4;
}
break;
case 'y':
case 'Y':
if (CHARSIZE == 2)
{
not32:
compileError (nested,
"liblouis has not been compiled for 32-bit Unicode");
break;
}
if (token->length - in > 5)
{
ch = hexValue (nested, &token->chars[in + 1], 5);
in += 5;
}
break;
case 'z':
case 'Z':
if (CHARSIZE == 2)
goto not32;
if (token->length - in > 8)
{
ch = hexValue (nested, &token->chars[in + 1], 8);
in += 8;
}
break;
default:
compileError (nested, "invalid escape sequence '\\%c'", ch);
break;
}
in++;
}
result->chars[out++] = (widechar) ch;
if (out >= MAXSTRING)
{
result->length = out;
return 1;
}
continue;
}
lastOutSize = out;
lastIn = in;
for (numBytes = MAXBYTES - 1; numBytes > 0; numBytes--)
if (ch >= first0Bit[numBytes])
break;
utf32 = ch & (0XFF - first0Bit[numBytes]);
for (k = 0; k < numBytes; k++)
{
if (in >= MAXSTRING)
break;
if (token->chars[in] < 128 || (token->chars[in] & 0x0040))
{
compileWarning (nested, "invalid UTF-8. Assuming Latin-1.");
result->chars[out++] = token->chars[lastIn];
in = lastIn + 1;
continue;
}
utf32 = (utf32 << 6) + (token->chars[in++] & 0x3f);
}
if (CHARSIZE == 2 && utf32 > 0xffff)
utf32 = 0xffff;
result->chars[out++] = (widechar) utf32;
if (out >= MAXSTRING)
{
result->length = lastOutSize;
return 1;
}
}
result->length = out;
return 1;
}
int
extParseChars (const char *inString, widechar * outString)
{
/* Parse external character strings */
CharsString wideIn;
CharsString result;
int k;
for (k = 0; inString[k] && k < MAXSTRING; k++)
wideIn.chars[k] = inString[k];
wideIn.chars[k] = 0;
wideIn.length = k;
parseChars (NULL, &result, &wideIn);
if (errorCount)
{
errorCount = 0;
return 0;
}
for (k = 0; k < result.length; k++)
outString[k] = result.chars[k];
return result.length;
}
static int
parseDots (FileInfo * nested, CharsString * cells, const CharsString * token)
{
/*get dot patterns */
widechar cell = 0; /*assembly place for dots */
int cellCount = 0;
int index;
int start = 0;
for (index = 0; index < token->length; index++)
{
int started = index != start;
widechar character = token->chars[index];
switch (character)
{ /*or dots to make up Braille cell */
{
int dot;
case '1':
dot = B1;
goto haveDot;
case '2':
dot = B2;
goto haveDot;
case '3':
dot = B3;
goto haveDot;
case '4':
dot = B4;
goto haveDot;
case '5':
dot = B5;
goto haveDot;
case '6':
dot = B6;
goto haveDot;
case '7':
dot = B7;
goto haveDot;
case '8':
dot = B8;
goto haveDot;
case '9':
dot = B9;
goto haveDot;
case 'a':
case 'A':
dot = B10;
goto haveDot;
case 'b':
case 'B':
dot = B11;
goto haveDot;
case 'c':
case 'C':
dot = B12;
goto haveDot;
case 'd':
case 'D':
dot = B13;
goto haveDot;
case 'e':
case 'E':
dot = B14;
goto haveDot;
case 'f':
case 'F':
dot = B15;
haveDot:
if (started && !cell)
goto invalid;
if (cell & dot)
{
compileError (nested, "dot specified more than once.");
return 0;
}
cell |= dot;
break;
}
case '0': /*blank */
if (started)
goto invalid;
break;
case '-': /*got all dots for this cell */
if (!started)
{
compileError (nested, "missing cell specification.");
return 0;
}
cells->chars[cellCount++] = cell | B16;
cell = 0;
start = index + 1;
break;
default:
invalid:
compileError (nested, "invalid dot number %s.", showString
(&character, 1));
return 0;
}
}
if (index == start)
{
compileError (nested, "missing cell specification.");
return 0;
}
cells->chars[cellCount++] = cell | B16; /*last cell */
cells->length = cellCount;
return 1;
}
int
extParseDots (const char *inString, widechar * outString)
{
/* Parse external dot patterns */
CharsString wideIn;
CharsString result;
int k;
for (k = 0; inString[k] && k < MAXSTRING; k++)
wideIn.chars[k] = inString[k];
wideIn.chars[k] = 0;
wideIn.length = k;
parseDots (NULL, &result, &wideIn);
if (errorCount)
{
errorCount = 0;
return 0;
}
for (k = 0; k < result.length; k++)
outString[k] = result.chars[k];
outString[k] = 0;
return result.length;
}
static int
getCharacters (FileInfo * nested, CharsString * characters)
{
/*Get ruleChars string */
CharsString token;
if (getToken (nested, &token, "characters"))
if (parseChars (nested, characters, &token))
return 1;
return 0;
}
static int
getRuleCharsText (FileInfo * nested, CharsString * ruleChars)
{
CharsString token;
if (getToken (nested, &token, "Characters operand"))
if (parseChars (nested, ruleChars, &token))
return 1;
return 0;
}
static int
getRuleDotsText (FileInfo * nested, CharsString * ruleDots)
{
CharsString token;
if (getToken (nested, &token, "characters"))
if (parseChars (nested, ruleDots, &token))
return 1;
return 0;
}
static int
getRuleDotsPattern (FileInfo * nested, CharsString * ruleDots)
{
/*Interpret the dets operand */
CharsString token;
if (getToken (nested, &token, "Dots operand"))
{
if (token.length == 1 && token.chars[0] == '=')
{
ruleDots->length = 0;
return 1;
}
if (parseDots (nested, ruleDots, &token))
return 1;
}
return 0;
}
static int
getCharacterClass (FileInfo * nested, const struct CharacterClass **class)
{
CharsString token;
if (getToken (nested, &token, "character class name"))
{
if ((*class = findCharacterClass (&token)))
return 1;
compileError (nested, "character class not defined.");
}
return 0;
}
static int includeFile (FileInfo * nested, CharsString * includedFile);
struct RuleName
{
struct RuleName *next;
TranslationTableOffset ruleOffset;
widechar length;
widechar name[1];
};
static struct RuleName *ruleNames = NULL;
static TranslationTableOffset
findRuleName (const CharsString * name)
{
const struct RuleName *nameRule = ruleNames;
while (nameRule)
{
if ((name->length == nameRule->length) &&
(memcmp (&name->chars[0], nameRule->name, CHARSIZE *
name->length) == 0))
return nameRule->ruleOffset;
nameRule = nameRule->next;
}
return 0;
}
static int
addRuleName (FileInfo * nested, CharsString * name)
{
int k;
struct RuleName *nameRule;
if (!(nameRule = malloc (sizeof (*nameRule) + CHARSIZE *
(name->length - 1))))
{
compileError (nested, "not enough memory");
outOfMemory ();
}
memset (nameRule, 0, sizeof (*nameRule));
for (k = 0; k < name->length; k++)
{
TranslationTableCharacter *ch = definedCharOrDots
(nested, name->chars[k],
0);
if (!(ch->attributes & CTC_Letter))
{
compileError (nested, "a name may contain only letters");
return 0;
}
nameRule->name[k] = name->chars[k];
}
nameRule->length = name->length;
nameRule->ruleOffset = newRuleOffset;
nameRule->next = ruleNames;
ruleNames = nameRule;
return 1;
}
static void
deallocateRuleNames ()
{
while (ruleNames)
{
struct RuleName *nameRule = ruleNames;
ruleNames = ruleNames->next;
if (nameRule)
free (nameRule);
}
}
static int
compileSwapDots (FileInfo * nested, CharsString * source, CharsString * dest)
{
int k = 0;
int kk = 0;
CharsString dotsSource;
CharsString dotsDest;
dest->length = 0;
dotsSource.length = 0;
while (k <= source->length)
{
if (source->chars[k] != ',' && k != source->length)
dotsSource.chars[dotsSource.length++] = source->chars[k];
else
{
if (!parseDots (nested, &dotsDest, &dotsSource))
return 0;
dest->chars[dest->length++] = dotsDest.length + 1;
for (kk = 0; kk < dotsDest.length; kk++)
dest->chars[dest->length++] = dotsDest.chars[kk];
dotsSource.length = 0;
}
k++;
}
return 1;
}
static int
compileSwap (FileInfo * nested, TranslationTableOpcode opcode)
{
CharsString ruleChars;
CharsString ruleDots;
CharsString name;
CharsString matches;
CharsString replacements;
if (!getToken (nested, &name, "name operand"))
return 0;
if (!getToken (nested, &matches, "matches operand"))
return 0;
if (!getToken (nested, &replacements, "replacements operand"))
return 0;
if (opcode == CTO_SwapCc || opcode == CTO_SwapCd)
{
if (!parseChars (nested, &ruleChars, &matches))
return 0;
}
else
{
if (!compileSwapDots (nested, &matches, &ruleChars))
return 0;
}
if (opcode == CTO_SwapCc)
{
if (!parseChars (nested, &ruleDots, &replacements))
return 0;
}
else
{
if (!compileSwapDots (nested, &replacements, &ruleDots))
return 0;
}
if (!addRule (nested, opcode, &ruleChars, &ruleDots, 0, 0))
return 0;
if (!addRuleName (nested, &name))
return 0;
return 1;
}
static int
getNumber (widechar * source, widechar * dest)
{
/*Convert a string of wide character digits to an integer*/
int k = 0;
*dest = 0;
while (source[k] >= '0' && source[k] <= '9')
*dest = 10 * *dest + (source[k++] - '0');
return k;
}
/* Start of multipass compiler*/
static CharsString passRuleChars;
static CharsString passRuleDots;
static CharsString passHoldString;
static CharsString passLine;
static int passLinepos;
static int passPrevLinepos;
static widechar passHoldNumber;
static widechar passEmphasis;
static TranslationTableCharacterAttributes passAttributes;
static FileInfo *passNested;
static TranslationTableOpcode passOpcode;
static widechar *passInstructions;
static int passIC;
static int
passGetAttributes ()
{
int more = 1;
passAttributes = 0;
while (more)
{
switch (passLine.chars[passLinepos])
{
case pass_any:
passAttributes = 0xffffffff;
break;
case pass_digit:
passAttributes |= CTC_Digit;
break;
case pass_litDigit:
passAttributes |= CTC_LitDigit;
break;
case pass_letter:
passAttributes |= CTC_Letter;
break;
case pass_math:
passAttributes |= CTC_Math;
break;
case pass_punctuation:
passAttributes |= CTC_Punctuation;
break;
case pass_sign:
passAttributes |= CTC_Sign;
break;
case pass_space:
passAttributes |= CTC_Space;
break;
case pass_uppercase:
passAttributes |= CTC_UpperCase;
break;
case pass_lowercase:
passAttributes |= CTC_LowerCase;
break;
case pass_class1:
passAttributes |= CTC_Class1;
break;
case pass_class2:
passAttributes |= CTC_Class2;
break;
case pass_class3:
passAttributes |= CTC_Class3;
break;
case pass_class4:
passAttributes |= CTC_Class4;
break;
default:
more = 0;
break;
}
if (more)
passLinepos++;
}
if (!passAttributes)
{
compileError (passNested, "Missing attribute");
passLinepos--;
return 0;
}
return 1;
}
static int
passGetEmphasis ()
{
int more = 1;
passLinepos++;
passEmphasis = 0;
while (more)
{
switch (passLine.chars[passLinepos])
{
case 'i':
passEmphasis |= italic;
break;
case 'b':
passEmphasis |= bold;
break;
case 'u':
passEmphasis |= underline;
break;
case 'c':
passEmphasis |= computer_braille;
break;
default:
more = 0;
break;
}
if (more)
passLinepos++;
}
if (!passEmphasis)
{
compileError (passNested, "emphasis indicators expected");
passLinepos--;
return 0;
}
return 1;
}
static int
passGetDots ()
{
CharsString collectDots;
collectDots.length = 0;
while (passLinepos < passLine.length && (passLine.chars[passLinepos]
== '-'
|| (passLine.chars[passLinepos] >=
'0'
&& passLine.
chars[passLinepos] <= '9')
||
((passLine.
chars[passLinepos] | 32) >= 'a'
&& (passLine.
chars[passLinepos] | 32) <=
'f')))
collectDots.chars[collectDots.length++] = passLine.chars[passLinepos++];
if (!parseDots (passNested, &passHoldString, &collectDots))
return 0;
return 1;
}
static int
passGetString ()
{
passHoldString.length = 0;
while (1)
{
if (!passLine.chars[passLinepos])
{
compileError (passNested, "unterminated string");
return 0;
}
if (passLine.chars[passLinepos] == 34)
break;
if (passLine.chars[passLinepos] == QUOTESUB)
passHoldString.chars[passHoldString.length++] = 34;
else
passHoldString.chars[passHoldString.length++] =
passLine.chars[passLinepos];
passLinepos++;
}
passHoldString.chars[passHoldString.length] = 0;
passLinepos++;
return 1;
}
static int
passGetNumber ()
{
/*Convert a string of wide character digits to an integer */
passHoldNumber = 0;
while (passLine.chars[passLinepos] >= '0'
&& passLine.chars[passLinepos] <= '9')
passHoldNumber =
10 * passHoldNumber + (passLine.chars[passLinepos++] - '0');
return 1;
}
static int
passGetName ()
{
TranslationTableCharacterAttributes attr;
passHoldString.length = 0;
do
{
attr = definedCharOrDots (passNested, passLine.chars[passLinepos],
0)->attributes;
if (passHoldString.length == 0)
{
if (!(attr & CTC_Letter))
{
passLinepos++;
continue;
}
}
if (!(attr & CTC_Letter))
break;
passHoldString.chars[passHoldString.length++] =
passLine.chars[passLinepos];
passLinepos++;
}
while (passLinepos < passLine.length);
return 1;
}
static int
passIsKeyword (const char *token)
{
int k;
int length = strlen (token);
int ch = passLine.chars[passLinepos + length + 1];
if (((ch | 32) >= 'a' && (ch | 32) <= 'z') || (ch >= '0' && ch <= '9'))
return 0;
for (k = 0; k < length && passLine.chars[passLinepos + k + 1]
== (widechar) token[k]; k++);
if (k == length)
{
passLinepos += length + 1;
return 1;
}
return 0;
}
struct PassName
{
struct PassName *next;
int varnum;
widechar length;
widechar name[1];
};
static struct PassName *passNames = NULL;
static int
passFindName (const CharsString * name)
{
const struct PassName *curname = passNames;
CharsString augmentedName;
for (augmentedName.length = 0; augmentedName.length < name->length;
augmentedName.length++)
augmentedName.chars[augmentedName.length] =
name->chars[augmentedName.length];
augmentedName.chars[augmentedName.length++] = passOpcode;
while (curname)
{
if ((augmentedName.length == curname->length) &&
(memcmp
(&augmentedName.chars[0], curname->name,
CHARSIZE * name->length) == 0))
return curname->varnum;
curname = curname->next;
}
compileError (passNested, "name not found");
return 0;
}
static int
passAddName (CharsString * name, int var)
{
int k;
struct PassName *curname;
CharsString augmentedName;
for (augmentedName.length = 0;
augmentedName.length < name->length; augmentedName.length++)
augmentedName.
chars[augmentedName.length] = name->chars[augmentedName.length];
augmentedName.chars[augmentedName.length++] = passOpcode;
if (!
(curname =
malloc (sizeof (*curname) + CHARSIZE * (augmentedName.length - 1))))
{
outOfMemory ();
}
memset (curname, 0, sizeof (*curname));
for (k = 0; k < augmentedName.length; k++)
{
curname->name[k] = augmentedName.chars[k];
}
curname->length = augmentedName.length;
curname->varnum = var;
curname->next = passNames;
passNames = curname;
return 1;
}
static pass_Codes
passGetScriptToken ()
{
while (passLinepos < passLine.length)
{
passPrevLinepos = passLinepos;
switch (passLine.chars[passLinepos])
{
case '\"':
passLinepos++;
if (passGetString ())
return pass_string;
return pass_invalidToken;
case '@':
passLinepos++;
if (passGetDots ())
return pass_dots;
return pass_invalidToken;
case '#': /*comment */
passLinepos = passLine.length + 1;
return pass_noMoreTokens;
case '!':
if (passLine.chars[passLinepos + 1] == '=')
{
passLinepos += 2;
return pass_noteq;
}
passLinepos++;
return pass_not;
case '-':
passLinepos++;
return pass_hyphen;
case '=':
passLinepos++;
return pass_eq;
case '<':
passLinepos++;
if (passLine.chars[passLinepos] == '=')
{
passLinepos++;
return pass_lteq;
}
return pass_lt;
case '>':
passLinepos++;
if (passLine.chars[passLinepos] == '=')
{
passLinepos++;
return pass_gteq;
}
return pass_gt;
case '+':
passLinepos++;
return pass_plus;
case '(':
passLinepos++;
return pass_leftParen;
case ')':
passLinepos++;
return pass_rightParen;
case ',':
passLinepos++;
return pass_comma;
case '&':
if (passLine.chars[passLinepos = 1] == '&')
{
passLinepos += 2;
return pass_and;
}
return pass_invalidToken;
case '|':
if (passLine.chars[passLinepos + 1] == '|')
{
passLinepos += 2;
return pass_or;
}
return pass_invalidToken;
case 'a':
if (passIsKeyword ("ttr"))
return pass_attributes;
passGetName ();
return pass_nameFound;
case 'b':
if (passIsKeyword ("ack"))
return pass_lookback;
if (passIsKeyword ("ool"))
return pass_boolean;
passGetName ();
return pass_nameFound;
case 'c':
if (passIsKeyword ("lass"))
return pass_class;
passGetName ();
return pass_nameFound;
case 'd':
if (passIsKeyword ("ef"))
return pass_define;
passGetName ();
return pass_nameFound;
case 'e':
if (passIsKeyword ("mph"))
return pass_emphasis;
passGetName ();
return pass_nameFound;
case 'f':
if (passIsKeyword ("ind"))
return pass_search;
if (passIsKeyword ("irst"))
return pass_first;
passGetName ();
return pass_nameFound;
case 'g':
if (passIsKeyword ("roup"))
return pass_group;
passGetName ();
return pass_nameFound;
case 'i':
if (passIsKeyword ("f"))
return pass_if;
passGetName ();
return pass_nameFound;
case 'l':
if (passIsKeyword ("ast"))
return pass_last;
passGetName ();
return pass_nameFound;
case 'm':
if (passIsKeyword ("ark"))
return pass_mark;
passGetName ();
return pass_nameFound;
case 'r':
if (passIsKeyword ("epgroup"))
return pass_repGroup;
if (passIsKeyword ("epcopy"))
return pass_copy;
if (passIsKeyword ("epomit"))
return pass_omit;
if (passIsKeyword ("ep"))
return pass_replace;
passGetName ();
return pass_nameFound;
case 's':
if (passIsKeyword ("cript"))
return pass_script;
if (passIsKeyword ("wap"))
return pass_swap;
passGetName ();
return pass_nameFound;
case 't':
if (passIsKeyword ("hen"))
return pass_then;
passGetName ();
return pass_nameFound;
default:
if (passLine.chars[passLinepos] <= 32)
{
passLinepos++;
break;
}
if (passLine.chars[passLinepos] >= '0'
&& passLine.chars[passLinepos] <= '9')
{
passGetNumber ();
return pass_numberFound;
}
else
{
if (!passGetName ())
return pass_invalidToken;
else
return pass_nameFound;
}
}
}
return pass_noMoreTokens;
}
static int
passIsLeftParen ()
{
pass_Codes passCode = passGetScriptToken ();
if (passCode != pass_leftParen)
{
compileError (passNested, "'(' expected");
return 0;
}
return 1;
}
static int
passIsName ()
{
pass_Codes passCode = passGetScriptToken ();
if (passCode != pass_nameFound)
{
compileError (passNested, "a name expected");
return 0;
}
return 1;
}
static int
passIsComma ()
{
pass_Codes passCode = passGetScriptToken ();
if (passCode != pass_comma)
{
compileError (passNested, "',' expected");
return 0;
}
return 1;
}
static int
passIsNumber ()
{
pass_Codes passCode = passGetScriptToken ();
if (passCode != pass_numberFound)
{
compileError (passNested, "a number expected");
return 0;
}
return 1;
}
static int
passIsRightParen ()
{
pass_Codes passCode = passGetScriptToken ();
if (passCode != pass_rightParen)
{
compileError (passNested, "')' expected");
return 0;
}
return 1;
}
static int
passGetRange ()
{
pass_Codes passCode = passGetScriptToken ();
if (!(passCode == pass_comma || passCode == pass_rightParen))
{
compileError (passNested, "invalid range");
return 0;
}
if (passCode == pass_rightParen)
{
passInstructions[passIC++] = 1;
passInstructions[passIC++] = 1;
return 1;
}
if (!passIsNumber ())
return 0;
passInstructions[passIC++] = passHoldNumber;
passCode = passGetScriptToken ();
if (!(passCode == pass_comma || passCode == pass_rightParen))
{
compileError (passNested, "invalid range");
return 0;
}
if (passCode == pass_rightParen)
{
passInstructions[passIC++] = passHoldNumber;
return 1;
}
if (!passIsNumber ())
return 0;
passInstructions[passIC++] = passHoldNumber;
if (!passIsRightParen ())
return 0;
return 1;
}
static int
passInsertAttributes ()
{
passInstructions[passIC++] = pass_attributes;
passInstructions[passIC++] = passAttributes >> 16;
passInstructions[passIC++] = passAttributes & 0xffff;
if (!passGetRange ())
return 0;
return 1;
}
static int
compilePassOpcode (FileInfo * nested, TranslationTableOpcode opcode)
{
/*Compile the operands of a pass opcode */
TranslationTableCharacterAttributes after = 0;
TranslationTableCharacterAttributes before = 0;
widechar passSubOp;
const struct CharacterClass *class;
TranslationTableOffset ruleOffset = 0;
TranslationTableRule *rule = NULL;
int k;
int kk = 0;
pass_Codes passCode;
int endTest = 0;
int isScript = 1;
passInstructions = passRuleDots.chars;
passIC = 0; /*Instruction counter */
passRuleChars.length = 0;
passNested = nested;
passOpcode = opcode;
/* passHoldString and passLine are static variables declared
* previously.*/
passLinepos = 0;
passHoldString.length = 0;
for (k = nested->linepos; k < nested->linelen; k++)
passHoldString.chars[passHoldString.length++] = nested->line[k];
if (!eqasc2uni ((unsigned char *) "script", passHoldString.chars, 6))
{
isScript = 0;
#define SEPCHAR 0x0001
for (k = 0; k < passHoldString.length && passHoldString.chars[k] > 32;
k++);
if (k < passHoldString.length)
passHoldString.chars[k] = SEPCHAR;
else
{
compileError (passNested, "Invalid multipass operands");
return 0;
}
}
parseChars (passNested, &passLine, &passHoldString);
if (isScript)
{
int more = 1;
passCode = passGetScriptToken ();
if (passCode != pass_script)
{
compileError (passNested, "Invalid multipass statement");
return 0;
}
/* Declaratives */
while (more)
{
passCode = passGetScriptToken ();
switch (passCode)
{
case pass_define:
if (!passIsLeftParen ())
return 0;
if (!passIsName ())
return 0;
if (!passIsComma ())
return 0;
if (!passIsNumber ())
return 0;
if (!passIsRightParen ())
return 0;
passAddName (&passHoldString, passHoldNumber);
break;
case pass_if:
more = 0;
break;
default:
compileError (passNested,
"invalid definition in declarative part");
return 0;
}
}
/* if part */
more = 1;
while (more)
{
passCode = passGetScriptToken ();
passSubOp = passCode;
switch (passCode)
{
case pass_not:
passInstructions[passIC++] = pass_not;
break;
case pass_first:
passInstructions[passIC++] = pass_first;
break;
case pass_last:
passInstructions[passIC++] = pass_last;
break;
case pass_search:
passInstructions[passIC++] = pass_search;
break;
case pass_string:
if (opcode != CTO_Context && opcode != CTO_Correct)
{
compileError (passNested,
"Character strings can only be used with the context and correct opcodes.");
return 0;
}
passInstructions[passIC++] = pass_string;
goto ifDoCharsDots;
case pass_dots:
if (passOpcode == CTO_Correct || passOpcode == CTO_Context)
{
compileError (passNested,
"dot patterns cannot be specified in the if part\
of the correct or context opcodes");
return 0;
}
passInstructions[passIC++] = pass_dots;
ifDoCharsDots:
passInstructions[passIC++] = passHoldString.length;
for (kk = 0; kk < passHoldString.length; kk++)
passInstructions[passIC++] = passHoldString.chars[kk];
break;
case pass_attributes:
if (!passIsLeftParen ())
return 0;
if (!passGetAttributes ())
return 0;
if (!passInsertAttributes ())
return 0;
break;
case pass_emphasis:
if (!passIsLeftParen ())
return 0;
if (!passGetEmphasis ())
return 0;
/*Right parenthis handled by subfunctiion */
break;
case pass_lookback:
passInstructions[passIC++] = pass_lookback;
passCode = passGetScriptToken ();
if (passCode != pass_leftParen)
{
passInstructions[passIC++] = 1;
passLinepos = passPrevLinepos;
break;
}
if (!passIsNumber ())
return 0;
if (!passIsRightParen ())
return 0;
passInstructions[passIC] = passHoldNumber;
break;
case pass_group: