blob: 8415d5722651901579d1c1a51cafd82c516b7404 [file] [log] [blame]
/* ----------------------------------------------------------------------- *
*
* Copyright 1996-2020 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following
* conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ----------------------------------------------------------------------- */
/*
* preproc.c macro preprocessor for the Netwide Assembler
*/
/* Typical flow of text through preproc
*
* pp_getline gets tokenized lines, either
*
* from a macro expansion
*
* or
* {
* read_line gets raw text from stdmacpos, or predef, or current input file
* tokenize converts to tokens
* }
*
* expand_mmac_params is used to expand %1 etc., unless a macro is being
* defined or a false conditional is being processed
* (%0, %1, %+1, %-1, %%foo
*
* do_directive checks for directives
*
* expand_smacro is used to expand single line macros
*
* expand_mmacro is used to expand multi-line macros
*
* detoken is used to convert the line back to text
*/
#include "compiler.h"
#include "nctype.h"
#include "nasm.h"
#include "nasmlib.h"
#include "error.h"
#include "preproc.h"
#include "hashtbl.h"
#include "quote.h"
#include "stdscan.h"
#include "eval.h"
#include "tokens.h"
#include "tables.h"
#include "listing.h"
/*
* Preprocessor execution options that can be controlled by %pragma or
* other directives. This structure is initialized to zero on each
* pass; this *must* reflect the default initial state.
*/
static struct pp_opts {
bool noaliases;
bool sane_empty_expansion;
} ppopt;
typedef struct SMacro SMacro;
typedef struct MMacro MMacro;
typedef struct MMacroInvocation MMacroInvocation;
typedef struct Context Context;
typedef struct Token Token;
typedef struct Line Line;
typedef struct Include Include;
typedef struct Cond Cond;
/*
* This is the internal form which we break input lines up into.
* Typically stored in linked lists.
*
* Note that `type' serves a double meaning: TOK_SMAC_START_PARAMS is
* not necessarily used as-is, but is also used to encode the number
* and expansion type of substituted parameter. So in the definition
*
* %define a(x,=y) ( (x) & ~(y) )
*
* the token representing `x' will have its type changed to
* tok_smac_param(0) but the one representing `y' will be
* tok_smac_param(1); see the accessor functions below.
*
* TOK_INTERNAL_STRING is a string which has been unquoted, but should
* be treated as if it was a quoted string. The code is free to change
* one into the other at will. TOK_NAKED_STRING is a text token which
* should be treated as a string, but which MUST NOT be turned into a
* quoted string. TOK_INTERNAL_STRINGs can contain any character,
* including NUL, but TOK_NAKED_STRING must be a valid C string.
*/
enum pp_token_type {
TOK_NONE = 0, TOK_WHITESPACE, TOK_COMMENT,
TOK_CORRUPT, /* Token text modified in an unsafe manner, now bogus */
TOK_BLOCK, /* Storage block pointer, not a real token */
TOK_ID,
TOK_PREPROC_ID, TOK_MMACRO_PARAM, TOK_LOCAL_SYMBOL,
TOK_LOCAL_MACRO, TOK_ENVIRON, TOK_STRING,
TOK_NUMBER, TOK_FLOAT, TOK_OTHER,
TOK_INTERNAL_STRING, TOK_NAKED_STRING,
TOK_PREPROC_Q, TOK_PREPROC_QQ,
TOK_PASTE, /* %+ */
TOK_COND_COMMA, /* %, */
TOK_INDIRECT, /* %[...] */
TOK_XDEF_PARAM, /* Used during %xdefine processing */
TOK_SMAC_START_PARAMS, /* MUST BE LAST IN THE LIST!!! */
TOK_MAX = INT_MAX /* Keep compiler from reducing the range */
};
static inline enum pp_token_type tok_smac_param(int param)
{
return TOK_SMAC_START_PARAMS + param;
}
static int smac_nparam(enum pp_token_type toktype)
{
return toktype - TOK_SMAC_START_PARAMS;
}
static bool is_smac_param(enum pp_token_type toktype)
{
return toktype >= TOK_SMAC_START_PARAMS;
}
#define PP_CONCAT_MASK(x) (1U << (x))
struct tokseq_match {
int mask_head;
int mask_tail;
};
/*
* This is tuned so struct Token should be 64 bytes on 64-bit
* systems and 32 bytes on 32-bit systems. It enables them
* to be nicely cache aligned, and the text to still be kept
* inline for nearly all tokens.
*
* We prohibit tokens of length > MAX_TEXT even though
* length here is an unsigned int; this avoids problems
* if the length is passed through an interface with type "int",
* and is absurdly large anyway.
*
* For the text mode, in pointer mode the pointer is stored at the end
* of the union and the pad field is cleared. This allows short tokens
* to be unconditionally tested for by only looking at the first text
* bytes and not examining the type or len fields.
*/
#define INLINE_TEXT (7*sizeof(char *)-sizeof(enum pp_token_type)-sizeof(unsigned int)-1)
#define MAX_TEXT (INT_MAX-2)
struct Token {
Token *next;
enum pp_token_type type;
unsigned int len;
union {
char a[INLINE_TEXT+1];
struct {
char pad[INLINE_TEXT+1 - sizeof(char *)];
char *ptr;
} p;
} text;
};
/*
* Note on the storage of both SMacro and MMacros: the hash table
* indexes them case-insensitively, and we then have to go through a
* linked list of potential case aliases (and, for MMacros, parameter
* ranges); this is to preserve the matching semantics of the earlier
* code. If the number of case aliases for a specific macro is a
* performance issue, you may want to reconsider your coding style.
*/
/*
* Function call tp obtain the expansion of an smacro
*/
typedef Token *(*ExpandSMacro)(const SMacro *s, Token **params, int nparams);
/*
* Store the definition of a single-line macro.
*/
enum sparmflags {
SPARM_PLAIN = 0,
SPARM_EVAL = 1, /* Evaluate as a numeric expression (=) */
SPARM_STR = 2, /* Convert to quoted string ($) */
SPARM_NOSTRIP = 4, /* Don't strip braces (!) */
SPARM_GREEDY = 8 /* Greedy final parameter (+) */
};
struct smac_param {
Token name;
enum sparmflags flags;
};
struct SMacro {
SMacro *next; /* MUST BE FIRST - see free_smacro() */
char *name;
Token *expansion;
ExpandSMacro expand;
intorptr expandpvt;
struct smac_param *params;
int nparam;
bool greedy;
bool casesense;
bool in_progress;
bool alias; /* This is an alias macro */
};
/*
* "No listing" flags. Inside a loop (%rep..%endrep) we may have
* macro listing suppressed with .nolist, but we still need to
* update line numbers for error messages and debug information...
* unless we are nested inside an actual .nolist macro.
*/
enum nolist_flags {
NL_LIST = 1, /* Suppress list output */
NL_LINE = 2 /* Don't update line information */
};
/*
* Store the definition of a multi-line macro. This is also used to
* store the interiors of `%rep...%endrep' blocks, which are
* effectively self-re-invoking multi-line macros which simply
* don't have a name or bother to appear in the hash tables. %rep
* blocks are signified by having a NULL `name' field.
*
* In a MMacro describing a `%rep' block, the `in_progress' field
* isn't merely boolean, but gives the number of repeats left to
* run.
*
* The `next' field is used for storing MMacros in hash tables; the
* `next_active' field is for stacking them on istk entries.
*
* When a MMacro is being expanded, `params', `iline', `nparam',
* `paramlen', `rotate' and `unique' are local to the invocation.
*/
/*
* Expansion stack. Note that .mmac can point back to the macro itself,
* whereas .mstk cannot.
*/
struct mstk {
MMacro *mstk; /* Any expansion, real macro or not */
MMacro *mmac; /* Highest level actual mmacro */
};
struct MMacro {
MMacro *next;
#if 0
MMacroInvocation *prev; /* previous invocation */
#endif
char *name;
int nparam_min, nparam_max;
enum nolist_flags nolist; /* is this macro listing-inhibited? */
bool casesense;
bool plus; /* is the last parameter greedy? */
bool capture_label; /* macro definition has %00; capture label */
int32_t in_progress; /* is this macro currently being expanded? */
int32_t max_depth; /* maximum number of recursive expansions allowed */
Token *dlist; /* All defaults as one list */
Token **defaults; /* Parameter default pointers */
int ndefs; /* number of default parameters */
Line *expansion;
struct mstk mstk; /* Macro expansion stack */
struct mstk dstk; /* Macro definitions stack */
Token **params; /* actual parameters */
Token *iline; /* invocation line */
struct src_location where; /* location of definition */
unsigned int nparam, rotate;
char *iname; /* name invoked as */
int *paramlen;
uint64_t unique;
uint64_t condcnt; /* number of if blocks... */
};
/* Store the definition of a multi-line macro, as defined in a
* previous recursive macro expansion.
*/
#if 0
struct MMacroInvocation {
MMacroInvocation *prev; /* previous invocation */
Token **params; /* actual parameters */
Token *iline; /* invocation line */
unsigned int nparam, rotate;
int *paramlen;
uint64_t unique;
uint64_t condcnt;
};
#endif
/*
* The context stack is composed of a linked list of these.
*/
struct Context {
Context *next;
const char *name;
struct hash_table localmac;
uint64_t number;
unsigned int depth;
};
static inline const char *tok_text(const struct Token *t)
{
return (t->len <= INLINE_TEXT) ? t->text.a : t->text.p.ptr;
}
/*
* Returns a mutable pointer to the text buffer. The text can be changed,
* but the length MUST NOT CHANGE, in either direction; nor is it permitted
* to pad with null characters to create an artificially shorter string.
*/
static inline char *tok_text_buf(struct Token *t)
{
return (t->len <= INLINE_TEXT) ? t->text.a : t->text.p.ptr;
}
static inline unsigned int tok_check_len(size_t len)
{
if (unlikely(len > MAX_TEXT))
nasm_fatal("impossibly large token");
return len;
}
static inline bool tok_text_match(const struct Token *a, const struct Token *b)
{
return a->len == b->len && !memcmp(tok_text(a), tok_text(b), a->len);
}
static inline unused_func bool
tok_match(const struct Token *a, const struct Token *b)
{
return a->type == b->type && tok_text_match(a, b);
}
/* strlen() variant useful for set_text() and its variants */
static size_t tok_strlen(const char *str)
{
return strnlen(str, MAX_TEXT+1);
}
/*
* Set the text field to a copy of the given string; the length if
* not given should be obtained with tok_strlen().
*/
static Token *set_text(struct Token *t, const char *text, size_t len)
{
char *textp;
if (t->len > INLINE_TEXT)
nasm_free(t->text.p.ptr);
nasm_zero(t->text);
t->len = len = tok_check_len(len);
textp = (len > INLINE_TEXT)
? (t->text.p.ptr = nasm_malloc(len+1)) : t->text.a;
memcpy(textp, text, len);
textp[len] = '\0';
return t;
}
/*
* Set the text field to the existing pre-allocated string, either
* taking over or freeing the allocation in the process.
*/
static Token *set_text_free(struct Token *t, char *text, unsigned int len)
{
char *textp;
if (t->len > INLINE_TEXT)
nasm_free(t->text.p.ptr);
nasm_zero(t->text);
t->len = len = tok_check_len(len);
if (len > INLINE_TEXT) {
textp = t->text.p.ptr = text;
} else {
textp = memcpy(t->text.a, text, len);
nasm_free(text);
}
textp[len] = '\0';
return t;
}
/*
* Allocate a new buffer containing a copy of the text field
* of the token.
*/
static char *dup_text(const struct Token *t)
{
size_t size = t->len + 1;
char *p = nasm_malloc(size);
return memcpy(p, tok_text(t), size);
}
/*
* Multi-line macro definitions are stored as a linked list of
* these, which is essentially a container to allow several linked
* lists of Tokens.
*
* Note that in this module, linked lists are treated as stacks
* wherever possible. For this reason, Lines are _pushed_ on to the
* `expansion' field in MMacro structures, so that the linked list,
* if walked, would give the macro lines in reverse order; this
* means that we can walk the list when expanding a macro, and thus
* push the lines on to the `expansion' field in _istk_ in reverse
* order (so that when popped back off they are in the right
* order). It may seem cockeyed, and it relies on my design having
* an even number of steps in, but it works...
*
* Some of these structures, rather than being actual lines, are
* markers delimiting the end of the expansion of a given macro.
* This is for use in the cycle-tracking and %rep-handling code.
* Such structures have `finishes' non-NULL, and `first' NULL. All
* others have `finishes' NULL, but `first' may still be NULL if
* the line is blank.
*/
struct Line {
Line *next;
MMacro *finishes;
Token *first;
struct src_location where; /* Where defined */
};
/*
* To handle an arbitrary level of file inclusion, we maintain a
* stack (ie linked list) of these things.
*
* Note: when we issue a message for a continuation line, we want to
* issue it for the actual *start* of the continuation line. This means
* we need to remember how many lines to skip over for the next one.
*/
struct Include {
Include *next;
FILE *fp;
Cond *conds;
Line *expansion;
uint64_t nolist; /* Listing inhibit counter */
uint64_t noline; /* Line number update inhibit counter */
struct mstk mstk;
struct src_location where; /* Filename and current line number */
int32_t lineinc; /* Increment given by %line */
int32_t lineskip; /* Accounting for passed continuation lines */
};
/*
* File real name hash, so we don't have to re-search the include
* path for every pass (and potentially more than that if a file
* is used more than once.)
*/
struct hash_table FileHash;
/*
* Counters to trap on insane macro recursion or processing.
* Note: for smacros these count *down*, for mmacros they count *up*.
*/
struct deadman {
int64_t total; /* Total number of macros/tokens */
int64_t levels; /* Descent depth across all macros */
bool triggered; /* Already triggered, no need for error msg */
};
static struct deadman smacro_deadman, mmacro_deadman;
/*
* Conditional assembly: we maintain a separate stack of these for
* each level of file inclusion. (The only reason we keep the
* stacks separate is to ensure that a stray `%endif' in a file
* included from within the true branch of a `%if' won't terminate
* it and cause confusion: instead, rightly, it'll cause an error.)
*/
enum cond_state {
/*
* These states are for use just after %if or %elif: IF_TRUE
* means the condition has evaluated to truth so we are
* currently emitting, whereas IF_FALSE means we are not
* currently emitting but will start doing so if a %else comes
* up. In these states, all directives are admissible: %elif,
* %else and %endif. (And of course %if.)
*/
COND_IF_TRUE, COND_IF_FALSE,
/*
* These states come up after a %else: ELSE_TRUE means we're
* emitting, and ELSE_FALSE means we're not. In ELSE_* states,
* any %elif or %else will cause an error.
*/
COND_ELSE_TRUE, COND_ELSE_FALSE,
/*
* These states mean that we're not emitting now, and also that
* nothing until %endif will be emitted at all. COND_DONE is
* used when we've had our moment of emission
* and have now started seeing %elifs. COND_NEVER is used when
* the condition construct in question is contained within a
* non-emitting branch of a larger condition construct,
* or if there is an error.
*/
COND_DONE, COND_NEVER
};
struct Cond {
Cond *next;
enum cond_state state;
};
#define emitting(x) ( (x) == COND_IF_TRUE || (x) == COND_ELSE_TRUE )
/*
* These defines are used as the possible return values for do_directive
*/
#define NO_DIRECTIVE_FOUND 0
#define DIRECTIVE_FOUND 1
/*
* Condition codes. Note that we use c_ prefix not C_ because C_ is
* used in nasm.h for the "real" condition codes. At _this_ level,
* we treat CXZ and ECXZ as condition codes, albeit non-invertible
* ones, so we need a different enum...
*/
static const char * const conditions[] = {
"a", "ae", "b", "be", "c", "cxz", "e", "ecxz", "g", "ge", "l", "le",
"na", "nae", "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no",
"np", "ns", "nz", "o", "p", "pe", "po", "rcxz", "s", "z"
};
enum pp_conds {
c_A, c_AE, c_B, c_BE, c_C, c_CXZ, c_E, c_ECXZ, c_G, c_GE, c_L, c_LE,
c_NA, c_NAE, c_NB, c_NBE, c_NC, c_NE, c_NG, c_NGE, c_NL, c_NLE, c_NO,
c_NP, c_NS, c_NZ, c_O, c_P, c_PE, c_PO, c_RCXZ, c_S, c_Z,
c_none = -1
};
static const enum pp_conds inverse_ccs[] = {
c_NA, c_NAE, c_NB, c_NBE, c_NC, -1, c_NE, -1, c_NG, c_NGE, c_NL, c_NLE,
c_A, c_AE, c_B, c_BE, c_C, c_E, c_G, c_GE, c_L, c_LE, c_O, c_P, c_S,
c_Z, c_NO, c_NP, c_PO, c_PE, -1, c_NS, c_NZ
};
/*
* Directive names.
*/
/* If this is a an IF, ELIF, ELSE or ENDIF keyword */
static int is_condition(enum preproc_token arg)
{
return PP_IS_COND(arg) || (arg == PP_ELSE) || (arg == PP_ENDIF);
}
/* For TASM compatibility we need to be able to recognise TASM compatible
* conditional compilation directives. Using the NASM pre-processor does
* not work, so we look for them specifically from the following list and
* then jam in the equivalent NASM directive into the input stream.
*/
enum {
TM_ARG, TM_ELIF, TM_ELSE, TM_ENDIF, TM_IF, TM_IFDEF, TM_IFDIFI,
TM_IFNDEF, TM_INCLUDE, TM_LOCAL
};
static const char * const tasm_directives[] = {
"arg", "elif", "else", "endif", "if", "ifdef", "ifdifi",
"ifndef", "include", "local"
};
static int StackSize = 4;
static const char *StackPointer = "ebp";
static int ArgOffset = 8;
static int LocalOffset = 0;
static Context *cstk;
static Include *istk;
static const struct strlist *ipath_list;
static struct strlist *deplist;
static uint64_t unique; /* unique identifier numbers */
static Line *predef = NULL;
static bool do_predef;
static enum preproc_mode pp_mode;
/*
* The current set of multi-line macros we have defined.
*/
static struct hash_table mmacros;
/*
* The current set of single-line macros we have defined.
*/
static struct hash_table smacros;
/*
* The multi-line macro we are currently defining, or the %rep
* block we are currently reading, if any.
*/
static MMacro *defining;
static uint64_t nested_mac_count;
static uint64_t nested_rep_count;
/*
* The number of macro parameters to allocate space for at a time.
*/
#define PARAM_DELTA 16
/*
* The standard macro set: defined in macros.c in a set of arrays.
* This gives our position in any macro set, while we are processing it.
* The stdmacset is an array of such macro sets.
*/
static macros_t *stdmacpos;
static macros_t **stdmacnext;
static macros_t *stdmacros[8];
static macros_t *extrastdmac;
/*
* Map of which %use packages have been loaded
*/
static bool *use_loaded;
/*
* Forward declarations.
*/
static void pp_add_stdmac(macros_t *macros);
static Token *expand_mmac_params(Token * tline);
static Token *expand_smacro(Token * tline);
static Token *expand_id(Token * tline);
static Context *get_ctx(const char *name, const char **namep);
static Token *make_tok_num(Token *next, int64_t val);
static Token *make_tok_qstr(Token *next, const char *str);
static Token *make_tok_qstr_len(Token *next, const char *str, size_t len);
static Token *make_tok_char(Token *next, char op);
static Token *new_Token(Token * next, enum pp_token_type type,
const char *text, size_t txtlen);
static Token *new_Token_free(Token * next, enum pp_token_type type,
char *text, size_t txtlen);
static Token *dup_Token(Token *next, const Token *src);
static Token *new_White(Token *next);
static Token *delete_Token(Token *t);
static Token *steal_Token(Token *dst, Token *src);
static const struct use_package *
get_use_pkg(Token *t, const char *dname, const char **name);
static void mark_smac_params(Token *tline, const SMacro *tmpl,
enum pp_token_type type);
/* Safe test for token type, false on x == NULL */
static inline bool tok_type(const Token *x, enum pp_token_type t)
{
return x && x->type == t;
}
/* Whitespace token? */
static inline bool tok_white(const Token *x)
{
return tok_type(x, TOK_WHITESPACE);
}
/* Skip past any whitespace */
static inline Token *skip_white(Token *x)
{
while (tok_white(x))
x = x->next;
return x;
}
/* Delete any whitespace */
static Token *zap_white(Token *x)
{
while (tok_white(x))
x = delete_Token(x);
return x;
}
/*
* Single special character tests. The use of & rather than && is intentional; it
* tells the compiler that it is safe to access text.a[1] unconditionally; hopefully
* a smart compiler should turn it into a 16-bit memory reference.
*/
static inline bool tok_is(const Token *x, char c)
{
return x && ((x->text.a[0] == c) & !x->text.a[1]);
}
/* True if any other kind of token that "c", but not NULL */
static inline bool tok_isnt(const Token *x, char c)
{
return x && !((x->text.a[0] == c) & !x->text.a[1]);
}
/*
* Unquote a token if it is a string, and set its type to
* TOK_INTERNAL_STRING.
*/
static const char *unquote_token(Token *t)
{
if (t->type != TOK_STRING)
return tok_text(t);
t->type = TOK_INTERNAL_STRING;
if (t->len > INLINE_TEXT) {
char *p = t->text.p.ptr;
t->len = nasm_unquote(p, NULL);
if (t->len <= INLINE_TEXT) {
nasm_zero(t->text.a);
memcpy(t->text.a, p, t->len);
nasm_free(p);
return t->text.a;
} else {
return p;
}
} else {
t->len = nasm_unquote(t->text.a, NULL);
return t->text.a;
}
}
/*
* Same as unquote_token(), but error out if the resulting string
* contains unacceptable control characters.
*/
static const char *unquote_token_cstr(Token *t)
{
if (t->type != TOK_STRING)
return tok_text(t);
t->type = TOK_INTERNAL_STRING;
if (t->len > INLINE_TEXT) {
char *p = t->text.p.ptr;
t->len = nasm_unquote_cstr(p, NULL);
if (t->len <= INLINE_TEXT) {
nasm_zero(t->text.a);
memcpy(t->text.a, p, t->len);
nasm_free(p);
return t->text.a;
} else {
return p;
}
} else {
t->len = nasm_unquote_cstr(t->text.a, NULL);
return t->text.a;
}
}
/*
* Convert a TOK_INTERNAL_STRING token to a quoted
* TOK_STRING tokens.
*/
static Token *quote_any_token(Token *t);
static inline unused_func
Token *quote_token(Token *t)
{
if (likely(!tok_is(t, TOK_INTERNAL_STRING)))
return t;
return quote_any_token(t);
}
/*
* Convert *any* kind of token to a quoted
* TOK_STRING token.
*/
static Token *quote_any_token(Token *t)
{
size_t len = t->len;
char *p;
p = nasm_quote(tok_text(t), &len);
t->type = TOK_STRING;
return set_text_free(t, p, len);
}
/*
* In-place reverse a list of tokens.
*/
static Token *reverse_tokens(Token *t)
{
Token *prev = NULL;
Token *next;
while (t) {
next = t->next;
t->next = prev;
prev = t;
t = next;
}
return prev;
}
/*
* getenv() variant operating on an input token
*/
static const char *pp_getenv(const Token *t, bool warn)
{
const char *txt = tok_text(t);
const char *v;
char *buf = NULL;
bool is_string = false;
if (!t)
return NULL;
switch (t->type) {
case TOK_ENVIRON:
txt += 2; /* Skip leading %! */
is_string = nasm_isquote(*txt);
break;
case TOK_STRING:
is_string = true;
break;
case TOK_INTERNAL_STRING:
case TOK_NAKED_STRING:
case TOK_ID:
is_string = false;
break;
default:
return NULL;
}
if (is_string) {
buf = nasm_strdup(txt);
nasm_unquote_cstr(buf, NULL);
txt = buf;
}
v = getenv(txt);
if (warn && !v) {
/*!
*!environment [on] nonexistent environment variable
*! warns if a nonexistent environment variable
*! is accessed using the \c{%!} preprocessor
*! construct (see \k{getenv}.) Such environment
*! variables are treated as empty (with this
*! warning issued) starting in NASM 2.15;
*! earlier versions of NASM would treat this as
*! an error.
*/
nasm_warn(WARN_ENVIRONMENT, "nonexistent environment variable `%s'", txt);
v = "";
}
if (buf)
nasm_free(buf);
return v;
}
/*
* Handle TASM specific directives, which do not contain a % in
* front of them. We do it here because I could not find any other
* place to do it for the moment, and it is a hack (ideally it would
* be nice to be able to use the NASM pre-processor to do it).
*/
static char *check_tasm_directive(char *line)
{
int32_t i, j, k, m, len;
char *p, *q, *oldline, oldchar;
p = nasm_skip_spaces(line);
/* Binary search for the directive name */
i = -1;
j = ARRAY_SIZE(tasm_directives);
q = nasm_skip_word(p);
len = q - p;
if (len) {
oldchar = p[len];
p[len] = 0;
while (j - i > 1) {
k = (j + i) / 2;
m = nasm_stricmp(p, tasm_directives[k]);
if (m == 0) {
/* We have found a directive, so jam a % in front of it
* so that NASM will then recognise it as one if it's own.
*/
p[len] = oldchar;
len = strlen(p);
oldline = line;
line = nasm_malloc(len + 2);
line[0] = '%';
if (k == TM_IFDIFI) {
/*
* NASM does not recognise IFDIFI, so we convert
* it to %if 0. This is not used in NASM
* compatible code, but does need to parse for the
* TASM macro package.
*/
strcpy(line + 1, "if 0");
} else {
memcpy(line + 1, p, len + 1);
}
nasm_free(oldline);
return line;
} else if (m < 0) {
j = k;
} else
i = k;
}
p[len] = oldchar;
}
return line;
}
/*
* The pre-preprocessing stage... This function translates line
* number indications as they emerge from GNU cpp (`# lineno "file"
* flags') into NASM preprocessor line number indications (`%line
* lineno file').
*/
static inline char *prepreproc(char *line)
{
if (unlikely(tasm_compatible_mode))
return check_tasm_directive(line);
else
return line;
}
/*
* Free a linked list of tokens.
*/
static void free_tlist(Token * list)
{
while (list)
list = delete_Token(list);
}
/*
* Free a linked list of lines.
*/
static void free_llist(Line * list)
{
Line *l, *tmp;
list_for_each_safe(l, tmp, list) {
free_tlist(l->first);
nasm_free(l);
}
}
/*
* Free an array of linked lists of tokens
*/
static void free_tlist_array(Token **array, size_t nlists)
{
Token **listp = array;
while (nlists--)
free_tlist(*listp++);
nasm_free(array);
}
/*
* Duplicate a linked list of tokens.
*/
static Token *dup_tlist(const Token *list, Token ***tailp)
{
Token *newlist = NULL;
Token **tailpp = &newlist;
const Token *t;
list_for_each(t, list) {
Token *nt;
*tailpp = nt = dup_Token(NULL, t);
tailpp = &nt->next;
}
if (tailp) {
**tailp = newlist;
*tailp = tailpp;
}
return newlist;
}
/*
* Duplicate a linked list of tokens with a maximum count
*/
static Token *dup_tlistn(const Token *list, size_t cnt, Token ***tailp)
{
Token *newlist = NULL;
Token **tailpp = &newlist;
const Token *t;
list_for_each(t, list) {
Token *nt;
if (!cnt--)
break;
*tailpp = nt = dup_Token(NULL, t);
tailpp = &nt->next;
}
if (tailp) {
**tailp = newlist;
if (newlist)
*tailp = tailpp;
}
return newlist;
}
/*
* Duplicate a linked list of tokens in reverse order
*/
static Token *dup_tlist_reverse(const Token *list, Token *tail)
{
const Token *t;
list_for_each(t, list)
tail = dup_Token(tail, t);
return tail;
}
/*
* Free an MMacro
*/
static void free_mmacro(MMacro * m)
{
nasm_free(m->name);
free_tlist(m->dlist);
nasm_free(m->defaults);
free_llist(m->expansion);
nasm_free(m);
}
/*
* Clear or free an SMacro
*/
static void free_smacro_members(SMacro *s)
{
if (s->params) {
int i;
for (i = 0; i < s->nparam; i++) {
if (s->params[i].name.len > INLINE_TEXT)
nasm_free(s->params[i].name.text.p.ptr);
}
nasm_free(s->params);
}
nasm_free(s->name);
free_tlist(s->expansion);
}
static void clear_smacro(SMacro *s)
{
free_smacro_members(s);
/* Wipe everything except the next pointer */
memset(&s->next + 1, 0, sizeof *s - sizeof s->next);
}
/*
* Free an SMacro
*/
static void free_smacro(SMacro *s)
{
free_smacro_members(s);
nasm_free(s);
}
/*
* Free all currently defined macros, and free the hash tables if empty
*/
enum clear_what {
CLEAR_NONE = 0,
CLEAR_DEFINE = 1, /* Clear smacros */
CLEAR_DEFALIAS = 2, /* Clear smacro aliases */
CLEAR_ALLDEFINE = CLEAR_DEFINE|CLEAR_DEFALIAS,
CLEAR_MMACRO = 4,
CLEAR_ALL = CLEAR_ALLDEFINE|CLEAR_MMACRO
};
static void clear_smacro_table(struct hash_table *smt, enum clear_what what)
{
struct hash_iterator it;
const struct hash_node *np;
bool empty = true;
/*
* Walk the hash table and clear out anything we don't want
*/
hash_for_each(smt, it, np) {
SMacro *tmp;
SMacro *s = np->data;
SMacro **head = (SMacro **)&np->data;
list_for_each_safe(s, tmp, s) {
if (what & ((enum clear_what)s->alias + 1)) {
*head = s->next;
free_smacro(s);
} else {
empty = false;
}
}
}
/*
* Free the hash table and keys if and only if it is now empty.
* Note: we cannot free keys even for an empty list above, as that
* mucks up the hash algorithm.
*/
if (empty)
hash_free_all(smt, true);
}
static void free_smacro_table(struct hash_table *smt)
{
clear_smacro_table(smt, CLEAR_ALLDEFINE);
}
static void free_mmacro_table(struct hash_table *mmt)
{
struct hash_iterator it;
const struct hash_node *np;
hash_for_each(mmt, it, np) {
MMacro *tmp;
MMacro *m = np->data;
nasm_free((void *)np->key);
list_for_each_safe(m, tmp, m)
free_mmacro(m);
}
hash_free(mmt);
}
static void free_macros(void)
{
free_smacro_table(&smacros);
free_mmacro_table(&mmacros);
}
/*
* Initialize the hash tables
*/
static void init_macros(void)
{
}
/*
* Pop the context stack.
*/
static void ctx_pop(void)
{
Context *c = cstk;
cstk = cstk->next;
free_smacro_table(&c->localmac);
nasm_free((char *)c->name);
nasm_free(c);
}
/*
* Search for a key in the hash index; adding it if necessary
* (in which case we initialize the data pointer to NULL.)
*/
static void **
hash_findi_add(struct hash_table *hash, const char *str)
{
struct hash_insert hi;
void **r;
char *strx;
size_t l = strlen(str) + 1;
r = hash_findib(hash, str, l, &hi);
if (r)
return r;
strx = nasm_malloc(l); /* Use a more efficient allocator here? */
memcpy(strx, str, l);
return hash_add(&hi, strx, NULL);
}
/*
* Like hash_findi, but returns the data element rather than a pointer
* to it. Used only when not adding a new element, hence no third
* argument.
*/
static void *
hash_findix(struct hash_table *hash, const char *str)
{
void **p;
p = hash_findi(hash, str, NULL);
return p ? *p : NULL;
}
/*
* read line from standart macros set,
* if there no more left -- return NULL
*/
static char *line_from_stdmac(void)
{
unsigned char c;
const unsigned char *p = stdmacpos;
char *line, *q;
size_t len = 0;
if (!stdmacpos)
return NULL;
/*
* 32-126 is ASCII, 127 is end of line, 128-31 are directives
* (allowed to wrap around) corresponding to PP_* tokens 0-159.
*/
while ((c = *p++) != 127) {
uint8_t ndir = c - 128;
if (ndir < 256-96)
len += pp_directives_len[ndir] + 1;
else
len++;
}
line = nasm_malloc(len + 1);
q = line;
while ((c = *stdmacpos++) != 127) {
uint8_t ndir = c - 128;
if (ndir < 256-96) {
memcpy(q, pp_directives[ndir], pp_directives_len[ndir]);
q += pp_directives_len[ndir];
*q++ = ' ';
} else {
*q++ = c;
}
}
stdmacpos = p;
*q = '\0';
if (*stdmacpos == 127) {
/* This was the last of this particular macro set */
stdmacpos = NULL;
if (*stdmacnext) {
stdmacpos = *stdmacnext++;
} else if (do_predef) {
Line *pd, *l;
/*
* Nasty hack: here we push the contents of
* `predef' on to the top-level expansion stack,
* since this is the most convenient way to
* implement the pre-include and pre-define
* features.
*/
list_for_each(pd, predef) {
nasm_new(l);
l->next = istk->expansion;
l->first = dup_tlist(pd->first, NULL);
l->finishes = NULL;
istk->expansion = l;
}
do_predef = false;
}
}
return line;
}
/*
* Read a line from a file. Return NULL on end of file.
*/
static char *line_from_file(FILE *f)
{
int c;
unsigned int size, next;
const unsigned int delta = 512;
const unsigned int pad = 8;
bool cont = false;
char *buffer, *p;
istk->where.lineno += istk->lineskip + istk->lineinc;
src_set_linnum(istk->where.lineno);
istk->lineskip = 0;
size = delta;
p = buffer = nasm_malloc(size);
do {
c = fgetc(f);
switch (c) {
case EOF:
if (p == buffer) {
nasm_free(buffer);
return NULL;
}
c = 0;
break;
case '\r':
next = fgetc(f);
if (next != '\n')
ungetc(next, f);
if (cont) {
cont = false;
continue;
}
c = 0;
break;
case '\n':
if (cont) {
cont = false;
continue;
}
c = 0;
break;
case 032: /* ^Z = legacy MS-DOS end of file mark */
c = 0;
break;
case '\\':
next = fgetc(f);
ungetc(next, f);
if (next == '\r' || next == '\n') {
cont = true;
istk->lineskip += istk->lineinc;
continue;
}
break;
}
if (p >= (buffer + size - pad)) {
buffer = nasm_realloc(buffer, size + delta);
p = buffer + size - pad;
size += delta;
}
*p++ = c;
} while (c);
return buffer;
}
/*
* Common read routine regardless of source
*/
static char *read_line(void)
{
char *line;
FILE *f = istk->fp;
if (f)
line = line_from_file(f);
else
line = line_from_stdmac();
if (!line)
return NULL;
if (!istk->nolist)
lfmt->line(LIST_READ, istk->where.lineno, line);
return line;
}
/*
* Tokenize a line of text. This is a very simple process since we
* don't need to parse the value out of e.g. numeric tokens: we
* simply split one string into many.
*/
static Token *tokenize(const char *line)
{
enum pp_token_type type;
Token *list = NULL;
Token *t, **tail = &list;
while (*line) {
const char *p = line;
const char *ep = NULL; /* End of token, for trimming the end */
size_t toklen;
char firstchar = *p; /* Can be used to override the first char */
if (*p == '%') {
/*
* Preprocessor construct; find the end of the token.
* Classification is handled later, because %{...} can be
* used to create any preprocessor token.
*/
p++;
if (*p == '+' && !nasm_isdigit(p[1])) {
/* Paste token */
p++;
} else if (nasm_isdigit(*p) ||
((*p == '-' || *p == '+') && nasm_isdigit(p[1]))) {
do {
p++;
}
while (nasm_isdigit(*p));
} else if (*p == '{' || *p == '[') {
/* %{...} or %[...] */
char firstchar = *p;
char endchar = *p + 2; /* } or ] */
int lvl = 1;
line += (*p++ == '{'); /* Skip { but not [ (yet) */
while (lvl) {
if (*p == firstchar) {
lvl++;
} else if (*p == endchar) {
lvl--;
} else if (nasm_isquote(*p)) {
p = nasm_skip_string(p);
}
/*
* *p can have been advanced to a null character by
* nasm_skip_string()
*/
if (!*p) {
nasm_warn(WARN_OTHER, "unterminated %%%c construct",
firstchar);
break;
}
p++;
}
ep = lvl ? p : p-1; /* Terminal character not part of token */
} else if (*p == '?') {
/* %? or %?? */
p++;
if (*p == '?')
p++;
} else if (*p == '!') {
/* Environment variable reference */
p++;
if (nasm_isidchar(*p)) {
do {
p++;
}
while (nasm_isidchar(*p));
} else if (nasm_isquote(*p)) {
p = nasm_skip_string(p);
if (*p)
p++;
else
nasm_nonfatalf(ERR_PASS1, "unterminated %%! string");
} else {
/* %! without anything else... */
}
} else if (*p == ',') {
/* Conditional comma */
p++;
} else if (nasm_isidchar(*p) ||
((*p == '%' || *p == '$') && nasm_isidchar(p[1]))) {
/* Identifier or some sort */
do {
p++;
}
while (nasm_isidchar(*p));
} else if (*p == '%') {
/* %% operator */
p++;
}
if (!ep)
ep = p;
toklen = ep - line;
/* Classify here, to handle %{...} correctly */
if (toklen < 2) {
type = TOK_OTHER; /* % operator */
} else {
char c0 = line[1];
switch (c0) {
case '+':
type = (toklen == 2) ? TOK_PASTE : TOK_MMACRO_PARAM;
break;
case '-':
type = TOK_MMACRO_PARAM;
break;
case '?':
if (toklen == 2)
type = TOK_PREPROC_Q;
else if (toklen == 3 && line[2] == '?')
type = TOK_PREPROC_QQ;
else
type = TOK_PREPROC_ID;
break;
case '!':
type = (toklen == 2) ? TOK_OTHER : TOK_ENVIRON;
break;
case '%':
type = (toklen == 2) ? TOK_OTHER : TOK_LOCAL_SYMBOL;
break;
case '$':
type = (toklen == 2) ? TOK_OTHER : TOK_LOCAL_MACRO;
break;
case '[':
line += 2; /* Skip %[ */
firstchar = *line; /* Don't clobber */
toklen -= 2;
type = TOK_INDIRECT;
break;
case ',':
type = (toklen == 2) ? TOK_COND_COMMA : TOK_PREPROC_ID;
break;
case '\'':
case '\"':
case '`':
/* %{'string'} */
type = TOK_PREPROC_ID;
break;
case ':':
type = TOK_MMACRO_PARAM; /* %{:..} */
break;
default:
if (nasm_isdigit(c0))
type = TOK_MMACRO_PARAM;
else if (nasm_isidchar(c0) || toklen > 2)
type = TOK_PREPROC_ID;
else
type = TOK_OTHER;
break;
}
}
} else if (nasm_isidstart(*p) || (*p == '$' && nasm_isidstart(p[1]))) {
/*
* An identifier. This includes the ? operator, which is
* treated as a keyword, not as a special character
* operator
*/
type = TOK_ID;
while (nasm_isidchar(*++p))
;
} else if (nasm_isquote(*p)) {
/*
* A string token.
*/
type = TOK_STRING;
p = nasm_skip_string(p);
if (*p) {
p++;
} else {
nasm_warn(WARN_OTHER, "unterminated string");
/* Handling unterminated strings by UNV */
/* type = -1; */
}
} else if (p[0] == '$' && p[1] == '$') {
type = TOK_OTHER; /* TOKEN_BASE */
p += 2;
} else if (nasm_isnumstart(*p)) {
bool is_hex = false;
bool is_float = false;
bool has_e = false;
char c;
/*
* A numeric token.
*/
if (*p == '$') {
p++;
is_hex = true;
}
for (;;) {
c = *p++;
if (!is_hex && (c == 'e' || c == 'E')) {
has_e = true;
if (*p == '+' || *p == '-') {
/*
* e can only be followed by +/- if it is either a
* prefixed hex number or a floating-point number
*/
p++;
is_float = true;
}
} else if (c == 'H' || c == 'h' || c == 'X' || c == 'x') {
is_hex = true;
} else if (c == 'P' || c == 'p') {
is_float = true;
if (*p == '+' || *p == '-')
p++;
} else if (nasm_isnumchar(c))
; /* just advance */
else if (c == '.') {
/*
* we need to deal with consequences of the legacy
* parser, like "1.nolist" being two tokens
* (TOK_NUMBER, TOK_ID) here; at least give it
* a shot for now. In the future, we probably need
* a flex-based scanner with proper pattern matching
* to do it as well as it can be done. Nothing in
* the world is going to help the person who wants
* 0x123.p16 interpreted as two tokens, though.
*/
const char *r = p;
while (*r == '_')
r++;
if (nasm_isdigit(*r) || (is_hex && nasm_isxdigit(*r)) ||
(!is_hex && (*r == 'e' || *r == 'E')) ||
(*r == 'p' || *r == 'P')) {
p = r;
is_float = true;
} else
break; /* Terminate the token */
} else
break;
}
p--; /* Point to first character beyond number */
if (p == line+1 && *line == '$') {
type = TOK_OTHER; /* TOKEN_HERE */
} else {
if (has_e && !is_hex) {
/* 1e13 is floating-point, but 1e13h is not */
is_float = true;
}
type = is_float ? TOK_FLOAT : TOK_NUMBER;
}
} else if (nasm_isspace(*p)) {
type = TOK_WHITESPACE;
p = nasm_skip_spaces(p);
/*
* Whitespace just before end-of-line is discarded by
* pretending it's a comment; whitespace just before a
* comment gets lumped into the comment.
*/
if (!*p || *p == ';') {
type = TOK_COMMENT;
while (*p)
p++;
}
} else if (*p == ';') {
type = TOK_COMMENT;
while (*p)
p++;
} else {
/*
* Anything else is an operator of some kind. We check
* for all the double-character operators (>>, <<, //,
* %%, <=, >=, ==, !=, <>, &&, ||, ^^) and the triple-
* character operators (<<<, >>>, <=>) but anything
* else is a single-character operator.
*/
type = TOK_OTHER;
switch (*p++) {
case '>':
if (*p == '>') {
p++;
if (*p == '>')
p++;
} else if (*p == '=') {
p++;
}
break;
case '<':
if (*p == '<') {
p++;
if (*p == '<')
p++;
} else if (*p == '=') {
p++;
if (*p == '>')
p++;
} else if (*p == '>') {
p++;
}
break;
case '!':
if (*p == '=')
p++;
break;
case '/':
case '=':
case '&':
case '|':
case '^':
/* These operators can be doubled but nothing else */
if (*p == p[-1])
p++;
break;
default:
break;
}
}
if (type == TOK_WHITESPACE) {
*tail = t = new_White(NULL);
tail = &t->next;
} else if (type != TOK_COMMENT) {
if (!ep)
ep = p;
*tail = t = new_Token(NULL, type, line, ep - line);
*tok_text_buf(t) = firstchar; /* E.g. %{foo} -> {foo -> %foo */
tail = &t->next;
}
line = p;
}
return list;
}
/*
* Tokens are allocated in blocks to improve speed. Set the blocksize
* to 0 to use regular nasm_malloc(); this is useful for debugging.
*
* alloc_Token() returns a zero-initialized token structure.
*/
#define TOKEN_BLOCKSIZE 4096
#if TOKEN_BLOCKSIZE
static Token *freeTokens = NULL;
static Token *tokenblocks = NULL;
static Token *alloc_Token(void)
{
Token *t = freeTokens;
if (unlikely(!t)) {
Token *block;
size_t i;
nasm_newn(block, TOKEN_BLOCKSIZE);
/*
* The first entry in each array are a linked list of
* block allocations and is not used for data.
*/
block[0].next = tokenblocks;
block[0].type = TOK_BLOCK;
tokenblocks = block;
/*
* Add the rest to the free list
*/
for (i = 2; i < TOKEN_BLOCKSIZE - 1; i++)
block[i].next = &block[i+1];
freeTokens = &block[2];
/*
* Return the topmost usable token
*/
return &block[1];
}
freeTokens = t->next;
t->next = NULL;
return t;
}
static Token *delete_Token(Token *t)
{
Token *next = t->next;
nasm_zero(*t);
t->next = freeTokens;
freeTokens = t;
return next;
}
static void delete_Blocks(void)
{
Token *block, *blocktmp;
list_for_each_safe(block, blocktmp, tokenblocks)
nasm_free(block);
freeTokens = tokenblocks = NULL;
}
#else
static inline Token *alloc_Token(void)
{
Token *t;
nasm_new(*t);
return t;
}
static Token *delete_Token(Token *t)
{
Token *next = t->next;
nasm_free(t);
return next;
}
static inline void delete_Blocks(void)
{
/* Nothing to do */
}
#endif
/*
* this function creates a new Token and passes a pointer to it
* back to the caller. It sets the type, text, and next pointer elements.
*/
static Token *new_Token(Token * next, enum pp_token_type type,
const char *text, size_t txtlen)
{
Token *t = alloc_Token();
char *textp;
t->next = next;
t->type = type;
if (type == TOK_WHITESPACE) {
t->len = 1;
t->text.a[0] = ' ';
} else {
if (text && text[0] && !txtlen)
txtlen = tok_strlen(text);
t->len = tok_check_len(txtlen);
if (text) {
textp = (txtlen > INLINE_TEXT)
? (t->text.p.ptr = nasm_malloc(txtlen+1)) : t->text.a;
memcpy(textp, text, txtlen);
textp[txtlen] = '\0'; /* In case we needed malloc() */
} else {
/*
* Allocate a buffer but do not fill it. The caller
* can fill in text, but must not change the length.
* The filled in text must be exactly txtlen once
* the buffer is filled and before the token is added
* to any line lists.
*/
if (txtlen > INLINE_TEXT)
t->text.p.ptr = nasm_zalloc(txtlen+1);
}
}
return t;
}
/*
* Same as new_Token(), but text belongs to the new token and is
* either taken over or freed. This function MUST be called
* with valid txt and txtlen, unlike new_Token().
*/
static Token *new_Token_free(Token * next, enum pp_token_type type,
char *text, size_t txtlen)
{
Token *t = alloc_Token();
t->next = next;
t->type = type;
t->len = tok_check_len(txtlen);
if (txtlen <= INLINE_TEXT) {
memcpy(t->text.a, text, txtlen);
free(text);
} else {
t->text.p.ptr = text;
}
return t;
}
static Token *dup_Token(Token *next, const Token *src)
{
Token *t = alloc_Token();
memcpy(t, src, sizeof *src);
t->next = next;
if (t->len > INLINE_TEXT) {
t->text.p.ptr = nasm_malloc(t->len + 1);
memcpy(t->text.p.ptr, src->text.p.ptr, t->len+1);
}
return t;
}
static Token *new_White(Token *next)
{
Token *t = alloc_Token();
t->next = next;
t->type = TOK_WHITESPACE;
t->len = 1;
t->text.a[0] = ' ';
return t;
}
/*
* This *transfers* the content from one token to another, leaving the
* next pointer of the latter intact. Unlike dup_Token(), the old
* token is destroyed, except for its next pointer, and the text
* pointer allocation, if any, is simply transferred.
*/
static Token *steal_Token(Token *dst, Token *src)
{
/* Overwrite everything except the next pointers */
memcpy((char *)dst + sizeof(Token *), (char *)src + sizeof(Token *),
sizeof(Token) - sizeof(Token *));
/* Clear the donor token */
memset((char *)src + sizeof(Token *), 0, sizeof(Token) - sizeof(Token *));
return dst;
}
/*
* Convert a line of tokens back into text. This modifies the list
* by expanding environment variables.
*
* If expand_locals is not zero, identifiers of the form "%$*xxx"
* are also transformed into ..@ctxnum.xxx
*/
static char *detoken(Token * tlist, bool expand_locals)
{
Token *t;
char *line, *p;
int len = 0;
list_for_each(t, tlist) {
switch (t->type) {
case TOK_ENVIRON:
{
const char *v = pp_getenv(t, true);
set_text(t, v, tok_strlen(v));
t->type = TOK_NAKED_STRING;
break;
}
case TOK_LOCAL_MACRO:
case TOK_LOCAL_SYMBOL:
if (expand_locals) {
const char *q;
char *p;
Context *ctx = get_ctx(tok_text(t), &q);
if (ctx) {
p = nasm_asprintf("..@%"PRIu64".%s", ctx->number, q);
set_text_free(t, p, nasm_last_string_len());
t->type = TOK_ID;
}
}
break;
case TOK_INDIRECT:
/*
* This won't happen in when emitting to the assembler,
* but can happen when emitting output for some of the
* list options. The token string doesn't actually include
* the brackets in this case.
*/
len += 3; /* %[] */
break;
default:
break; /* No modifications */
}
if (debug_level(2)) {
unsigned int t_len = t->len;
unsigned int s_len = tok_strlen(tok_text(t));
if (t_len != s_len) {
nasm_panic("assertion failed: token \"%s\" type %u len %u has t->len %u\n",
tok_text(t), t->type, s_len, t_len);
t->len = s_len;
}
}
len += t->len;
}
p = line = nasm_malloc(len + 1);
list_for_each(t, tlist) {
switch (t->type) {
case TOK_INDIRECT:
*p++ = '%';
*p++ = '[';
p = mempcpy(p, tok_text(t), t->len);
*p++ = ']';
break;
default:
p = mempcpy(p, tok_text(t), t->len);
}
}
*p = '\0';
return line;
}
/*
* A scanner, suitable for use by the expression evaluator, which
* operates on a line of Tokens. Expects a pointer to a pointer to
* the first token in the line to be passed in as its private_data
* field.
*
* FIX: This really needs to be unified with stdscan.
*/
struct ppscan {
Token *tptr;
int ntokens;
};
static int ppscan(void *private_data, struct tokenval *tokval)
{
struct ppscan *pps = private_data;
Token *tline;
const char *txt;
do {
if (pps->ntokens && (tline = pps->tptr)) {
pps->ntokens--;
pps->tptr = tline->next;
} else {
pps->tptr = NULL;
pps->ntokens = 0;
return tokval->t_type = TOKEN_EOS;
}
} while (tline->type == TOK_WHITESPACE || tline->type == TOK_COMMENT);
txt = tok_text(tline);
tokval->t_charptr = (char *)txt; /* Fix this */
if (txt[0] == '$') {
if (!txt[1]) {
return tokval->t_type = TOKEN_HERE;
} else if (txt[1] == '$' && !txt[2]) {
return tokval->t_type = TOKEN_BASE;
} else if (tline->type == TOK_ID) {
tokval->t_charptr++;
return tokval->t_type = TOKEN_ID;
}
}
switch (tline->type) {
default:
if (tline->len == 1)
return tokval->t_type = txt[0];
/* fall through */
case TOK_ID:
return nasm_token_hash(txt, tokval);
case TOK_NUMBER:
{
bool rn_error;
tokval->t_integer = readnum(txt, &rn_error);
if (rn_error)
return tokval->t_type = TOKEN_ERRNUM;
else
return tokval->t_type = TOKEN_NUM;
}
case TOK_FLOAT:
return tokval->t_type = TOKEN_FLOAT;
case TOK_STRING:
tokval->t_charptr = (char *)unquote_token(tline);
tokval->t_inttwo = tline->len;
return tokval->t_type = TOKEN_STR;
}
}
/*
* 1. An expression (true if nonzero 0)
* 2. The keywords true, on, yes for true
* 3. The keywords false, off, no for false
* 4. An empty line, for true
*
* On error, return defval (usually the previous value)
*/
static bool pp_get_boolean_option(Token *tline, bool defval)
{
static const char * const noyes[] = {
"no", "yes",
"false", "true",
"off", "on"
};
struct ppscan pps;
struct tokenval tokval;
expr *evalresult;
tline = skip_white(tline);
if (!tline)
return true;
if (tline->type == TOK_ID) {
size_t i;
const char *txt = tok_text(tline);
for (i = 0; i < ARRAY_SIZE(noyes); i++)
if (!nasm_stricmp(txt, noyes[i]))
return i & 1;
}
pps.tptr = NULL;
pps.tptr = tline;
pps.ntokens = -1;
tokval.t_type = TOKEN_INVALID;
evalresult = evaluate(ppscan, &pps, &tokval, NULL, true, NULL);
if (!evalresult)
return true;
if (tokval.t_type)
nasm_warn(WARN_OTHER, "trailing garbage after expression ignored");
if (!is_really_simple(evalresult)) {
nasm_nonfatal("boolean flag expression must be a constant");
return defval;
}
return reloc_value(evalresult) != 0;
}
/*
* Compare a string to the name of an existing macro; this is a
* simple wrapper which calls either strcmp or nasm_stricmp
* depending on the value of the `casesense' parameter.
*/
static int mstrcmp(const char *p, const char *q, bool casesense)
{
return casesense ? strcmp(p, q) : nasm_stricmp(p, q);
}
/*
* Compare a string to the name of an existing macro; this is a
* simple wrapper which calls either strcmp or nasm_stricmp
* depending on the value of the `casesense' parameter.
*/
static int mmemcmp(const char *p, const char *q, size_t l, bool casesense)
{
return casesense ? memcmp(p, q, l) : nasm_memicmp(p, q, l);
}
/*
* Return the Context structure associated with a %$ token. Return
* NULL, having _already_ reported an error condition, if the
* context stack isn't deep enough for the supplied number of $
* signs.
*
* If "namep" is non-NULL, set it to the pointer to the macro name
* tail, i.e. the part beyond %$...
*/
static Context *get_ctx(const char *name, const char **namep)
{
Context *ctx;
int i;
if (namep)
*namep = name;
if (!name || name[0] != '%' || name[1] != '$')
return NULL;
if (!cstk) {
nasm_nonfatal("`%s': context stack is empty", name);
return NULL;
}
name += 2;
ctx = cstk;
i = 0;
while (ctx && *name == '$') {
name++;
i++;
ctx = ctx->next;
}
if (!ctx) {
nasm_nonfatal("`%s': context stack is only"
" %d level%s deep", name, i, (i == 1 ? "" : "s"));
return NULL;
}
if (namep)
*namep = name;
return ctx;
}
/*
* Open an include file. This routine must always return a valid
* file pointer if it returns - it's responsible for throwing an
* ERR_FATAL and bombing out completely if not. It should also try
* the include path one by one until it finds the file or reaches
* the end of the path.
*
* Note: for INC_PROBE the function returns NULL at all times;
* instead look for the
*/
enum incopen_mode {
INC_NEEDED, /* File must exist */
INC_OPTIONAL, /* Missing is OK */
INC_PROBE /* Only an existence probe */
};
/* This is conducts a full pathname search */
static FILE *inc_fopen_search(const char *file, char **slpath,
enum incopen_mode omode, enum file_flags fmode)
{
const struct strlist_entry *ip = strlist_head(ipath_list);
FILE *fp;
const char *prefix = "";
char *sp;
bool found;
while (1) {
sp = nasm_catfile(prefix, file);
if (omode == INC_PROBE) {
fp = NULL;
found = nasm_file_exists(sp);
} else {
fp = nasm_open_read(sp, fmode);
found = (fp != NULL);
}
if (found) {
*slpath = sp;
return fp;
}
nasm_free(sp);
if (!ip) {
*slpath = NULL;
return NULL;
}
prefix = ip->str;
ip = ip->next;
}
}
/*
* Open a file, or test for the presence of one (depending on omode),
* considering the include path.
*/
static FILE *inc_fopen(const char *file,
struct strlist *dhead,
const char **found_path,
enum incopen_mode omode,
enum file_flags fmode)
{
struct hash_insert hi;
void **hp;
char *path;
FILE *fp = NULL;
hp = hash_find(&FileHash, file, &hi);
if (hp) {
path = *hp;
if (path || omode != INC_NEEDED) {
strlist_add(dhead, path ? path : file);
}
} else {
/* Need to do the actual path search */
fp = inc_fopen_search(file, &path, omode, fmode);
/* Positive or negative result */
hash_add(&hi, nasm_strdup(file), path);
/*
* Add file to dependency path.
*/
if (path || omode != INC_NEEDED)
strlist_add(dhead, file);
}
if (path && !fp && omode != INC_PROBE)
fp = nasm_open_read(path, fmode);
if (omode == INC_NEEDED && !fp) {
if (!path)
errno = ENOENT;
nasm_nonfatal("unable to open include file `%s': %s",
file, strerror(errno));
}
if (found_path)
*found_path = path;
return fp;
}
/*
* Opens an include or input file. Public version, for use by modules
* that get a file:lineno pair and need to look at the file again
* (e.g. the CodeView debug backend). Returns NULL on failure.
*/
FILE *pp_input_fopen(const char *filename, enum file_flags mode)
{
return inc_fopen(filename, NULL, NULL, INC_OPTIONAL, mode);
}
/*
* Determine if we should warn on defining a single-line macro of
* name `name', with `nparam' parameters. If nparam is 0 or -1, will
* return true if _any_ single-line macro of that name is defined.
* Otherwise, will return true if a single-line macro with either
* `nparam' or no parameters is defined.
*
* If a macro with precisely the right number of parameters is
* defined, or nparam is -1, the address of the definition structure
* will be returned in `defn'; otherwise NULL will be returned. If `defn'
* is NULL, no action will be taken regarding its contents, and no
* error will occur.
*
* Note that this is also called with nparam zero to resolve
* `ifdef'.
*/
static bool
smacro_defined(Context *ctx, const char *name, int nparam, SMacro **defn,
bool nocase, bool find_alias)
{
struct hash_table *smtbl;
SMacro *m;
smtbl = ctx ? &ctx->localmac : &smacros;
restart:
m = (SMacro *) hash_findix(smtbl, name);
while (m) {
if (!mstrcmp(m->name, name, m->casesense && nocase) &&
(nparam <= 0 || m->nparam == 0 || nparam == m->nparam ||
(m->greedy && nparam >= m->nparam-1))) {
if (m->alias && !find_alias) {
if (!ppopt.noaliases) {
name = tok_text(m->expansion);
goto restart;
} else {
continue;
}
}
if (defn) {
*defn = (nparam == m->nparam || nparam == -1) ? m : NULL;
}
return true;
}
m = m->next;
}
return false;
}
/* param should be a natural number [0; INT_MAX] */
static int read_param_count(const char *str)
{
int result;
bool err;
result = readnum(str, &err);
if (result < 0 || result > INT_MAX) {
result = 0;
nasm_nonfatal("parameter count `%s' is out of bounds [%d; %d]",
str, 0, INT_MAX);
} else if (err)
nasm_nonfatal("unable to parse parameter count `%s'", str);
return result;
}
/*
* Count and mark off the parameters in a multi-line macro call.
* This is called both from within the multi-line macro expansion
* code, and also to mark off the default parameters when provided
* in a %macro definition line.
*
* Note that we need space in the params array for parameter 0 being
* a possible captured label as well as the final NULL.
*
* Returns a pointer to the pointer to a terminal comma if present;
* used to drop an empty terminal argument for legacy reasons.
*/
static Token **count_mmac_params(Token *tline, int *nparamp, Token ***paramsp)
{
int paramsize;
int nparam = 0;
Token *t;
Token **comma = NULL, **maybe_comma = NULL;
Token **params;
paramsize = PARAM_DELTA;
nasm_newn(params, paramsize);
t = skip_white(tline);
if (t) {
while (true) {
/* Need two slots for captured label and NULL */
if (unlikely(nparam+2 >= paramsize)) {
paramsize += PARAM_DELTA;
params = nasm_realloc(params, sizeof(*params) * paramsize);
}
params[++nparam] = t;
if (tok_is(t, '{')) {
int brace = 1;
comma = NULL; /* Non-empty parameter */
while (brace && (t = t->next)) {
brace += tok_is(t, '{');
brace -= tok_is(t, '}');
}
if (t) {
/*
* Now we've found the closing brace, look further
* for the comma.
*/
t = skip_white(t->next);
if (tok_isnt(t, ','))
nasm_nonfatal("braces do not enclose all of macro parameter");
} else {
nasm_nonfatal("expecting closing brace in macro parameter");
}
}
/* Advance to the next comma */
maybe_comma = &t->next;
while (tok_isnt(t, ',')) {
if (!tok_white(t))
comma = NULL; /* Non-empty parameter */
maybe_comma = &t->next;
t = t->next;
}
if (!t)
break; /* End of string, no comma */
comma = maybe_comma; /* Point to comma pointer */
t = skip_white(t->next); /* Eat the comma and whitespace */
}
}
params[nparam+1] = NULL;
*paramsp = params;
*nparamp = nparam;
return comma;
}
/*
* Determine whether one of the various `if' conditions is true or
* not.
*
* We must free the tline we get passed.
*/
static enum cond_state if_condition(Token * tline, enum preproc_token ct)
{
bool j;
Token *t, *tt, *origline;
struct ppscan pps;
struct tokenval tokval;
expr *evalresult;
enum pp_token_type needtype;
const char *dname = pp_directives[ct];
bool casesense = true;
enum preproc_token cond = PP_COND(ct);
origline = tline;
switch (cond) {
case PP_IFCTX:
j = false; /* have we matched yet? */
while (true) {
tline = skip_white(tline);
if (!tline)
break;
if (tline->type != TOK_ID) {
nasm_nonfatal("`%s' expects context identifiers",
dname);
goto fail;
}
if (cstk && cstk->name && !nasm_stricmp(tok_text(tline), cstk->name))
j = true;
tline = tline->next;
}
break;
case PP_IFDEF:
case PP_IFDEFALIAS:
{
bool alias = cond == PP_IFDEFALIAS;
SMacro *smac;
Context *ctx;
const char *mname;
j = false; /* have we matched yet? */
while (tline) {
tline = skip_white(tline);
if (!tline || (tline->type != TOK_ID &&
tline->type != TOK_LOCAL_MACRO)) {
nasm_nonfatal("`%s' expects macro identifiers",
dname);
goto fail;
}
mname = tok_text(tline);
ctx = get_ctx(mname, &mname);
if (smacro_defined(ctx, mname, -1, &smac, true, alias) && smac
&& smac->alias == alias) {
j = true;
break;
}
tline = tline->next;
}
break;
}
case PP_IFENV:
tline = expand_smacro(tline);
j = false; /* have we matched yet? */
while (tline) {
tline = skip_white(tline);
if (!tline || (tline->type != TOK_ID &&
tline->type != TOK_STRING &&
tline->type != TOK_INTERNAL_STRING &&
tline->type != TOK_ENVIRON)) {
nasm_nonfatal("`%s' expects environment variable names",
dname);
goto fail;
}
j |= !!pp_getenv(tline, false);
tline = tline->next;
}
break;
case PP_IFIDNI:
casesense = false;
/* fall through */
case PP_IFIDN:
tline = expand_smacro(tline);
t = tt = tline;
while (tok_isnt(tt, ','))
tt = tt->next;
if (!tt) {
nasm_nonfatal("`%s' expects two comma-separated arguments",
dname);
goto fail;
}
tt = tt->next;
j = true; /* assume equality unless proved not */
while (tok_isnt(t, ',') && tt) {
unsigned int l1, l2;
const char *t1, *t2;
if (tok_is(tt, ',')) {
nasm_nonfatal("`%s': more than one comma on line",
dname);
goto fail;
}
if (t->type == TOK_WHITESPACE) {
t = t->next;
continue;
}
if (tt->type == TOK_WHITESPACE) {
tt = tt->next;
continue;
}
if (tt->type != t->type) {
j = false; /* found mismatching tokens */
break;
}
t1 = unquote_token(t);
t2 = unquote_token(tt);
l1 = t->len;
l2 = tt->len;
if (l1 != l2 || mmemcmp(t1, t2, l1, casesense)) {
j = false;
break;
}
t = t->next;
tt = tt->next;
}
if (!tok_is(t, ',') || tt)
j = false; /* trailing gunk on one end or other */
break;
case PP_IFMACRO:
{
bool found = false;
MMacro searching, *mmac;
tline = skip_white(tline);
tline = expand_id(tline);
if (!tok_type(tline, TOK_ID)) {
nasm_nonfatal("`%s' expects a macro name", dname);
goto fail;
}
nasm_zero(searching);
searching.name = dup_text(tline);
searching.casesense = true;
searching.nparam_min = 0;
searching.nparam_max = INT_MAX;
tline = expand_smacro(tline->next);
tline = skip_white(tline);
if (!tline) {
} else if (!tok_type(tline, TOK_NUMBER)) {
nasm_nonfatal("`%s' expects a parameter count or nothing",
dname);
} else {
searching.nparam_min = searching.nparam_max =
read_param_count(tok_text(tline));
}
if (tline && tok_is(tline->next, '-')) {
tline = tline->next->next;
if (tok_is(tline, '*'))
searching.nparam_max = INT_MAX;
else if (!tok_type(tline, TOK_NUMBER))
nasm_nonfatal("`%s' expects a parameter count after `-'",
dname);
else {
searching.nparam_max = read_param_count(tok_text(tline));
if (searching.nparam_min > searching.nparam_max) {
nasm_nonfatal("minimum parameter count exceeds maximum");
searching.nparam_max = searching.nparam_min;
}
}
}
if (tline && tok_is(tline->next, '+')) {
tline = tline->next;
searching.plus = true;
}
mmac = (MMacro *) hash_findix(&mmacros, searching.name);
while (mmac) {
if (!strcmp(mmac->name, searching.name) &&
(mmac->nparam_min <= searching.nparam_max
|| searching.plus)
&& (searching.nparam_min <= mmac->nparam_max
|| mmac->plus)) {
found = true;
break;
}
mmac = mmac->next;
}
if (tline && tline->next)
nasm_warn(WARN_OTHER, "trailing garbage after %%ifmacro ignored");
nasm_free(searching.name);
j = found;
break;
}
case PP_IFID:
needtype = TOK_ID;
goto iftype;
case PP_IFNUM:
needtype = TOK_NUMBER;
goto iftype;
case PP_IFSTR:
needtype = TOK_STRING;
goto iftype;
iftype:
t = tline = expand_smacro(tline);
while (tok_white(t) ||
(needtype == TOK_NUMBER && (tok_is(t, '-') | tok_is(t, '+'))))
t = t->next;
j = tok_type(t, needtype);
break;
case PP_IFTOKEN:
tline = expand_smacro(tline);
t = skip_white(tline);
j = false;
if (t) {
t = skip_white(t->next); /* Skip the actual token + whitespace */
j = !t;
}
break;
case PP_IFEMPTY:
tline = expand_smacro(tline);
t = skip_white(tline);
j = !t; /* Should be empty */
break;
case PP_IF:
pps.tptr = tline = expand_smacro(tline);
pps.ntokens = -1;
tokval.t_type = TOKEN_INVALID;
evalresult = evaluate(ppscan, &pps, &tokval, NULL, true, NULL);
if (!evalresult)
return -1;
if (tokval.t_type)
nasm_warn(WARN_OTHER, "trailing garbage after expression ignored");
if (!is_simple(evalresult)) {
nasm_nonfatal("non-constant value given to `%s'",
dname);
goto fail;
}
j = reloc_value(evalresult) != 0;
break;
case PP_IFUSING:
case PP_IFUSABLE:
{
const struct use_package *pkg;
const char *name;
pkg = get_use_pkg(tline, dname, &name);
if (!name)
goto fail;
j = pkg && ((cond == PP_IFUSABLE) | use_loaded[pkg->index]);
break;
}
default:
nasm_nonfatal("unknown preprocessor directive `%s'", dname);
goto fail;
}
free_tlist(origline);
return (j ^ PP_COND_NEGATIVE(ct)) ? COND_IF_TRUE : COND_IF_FALSE;
fail:
free_tlist(origline);
return COND_NEVER;
}
/*
* Default smacro expansion routine: just returns a copy of the
* expansion list.
*/
static Token *
smacro_expand_default(const SMacro *s, Token **params, int nparams)
{
(void)params;
(void)nparams;
return dup_tlist(s->expansion, NULL);
}
/*
* Emit a macro defintion or undef to the listing file, if
* desired. This is similar to detoken(), but it handles the reverse
* expansion list, does not expand %! or local variable tokens, and
* does some special handling for macro parameters.
*/
static void
list_smacro_def(enum preproc_token op, const Context *ctx, const SMacro *m)
{
Token *t;
size_t namelen, size;
char *def, *p;
char *context_prefix = NULL;
size_t context_len;
namelen = strlen(m->name);
size = namelen + 2; /* Include room for space after name + NUL */
if (ctx) {
int context_depth = cstk->depth - ctx->depth + 1;
context_prefix =
nasm_asprintf("[%s::%"PRIu64"] %%%-*s",
ctx->name ? ctx->name : "",
ctx->number, context_depth, "");
context_len = nasm_last_string_len();
memset(context_prefix + context_len - context_depth,
'$', context_depth);
size += context_len;
}
list_for_each(t, m->expansion)
size += t->len;