blob: dc79518d5c62b80048107cfe31f05ec528ba7b62 [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2011-2014 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2003-2007 Determina Corp. */
/* Copyright (c) 2001-2003 Massachusetts Institute of Technology */
/* Copyright (c) 2000-2001 Hewlett-Packard Company */
/* file "decode_private.h" */
#ifndef DECODE_PRIVATE_H
#define DECODE_PRIVATE_H
/* Non-public prefix constants.
* These are used only in the decoding tables. We decode the
* information into the operands.
* For encoding these properties are specified in the operands,
* with our encoder auto-adding the appropriate prefixes.
*/
/* We start after PREFIX_SEG_GS which is 0x40 */
#define PREFIX_DATA 0x0080
#define PREFIX_ADDR 0x0100
#define PREFIX_REX_W 0x0200
#define PREFIX_REX_R 0x0400
#define PREFIX_REX_X 0x0800
#define PREFIX_REX_B 0x1000
#define PREFIX_REX_GENERAL 0x2000 /* 0x40: only matters for SPL...SDL vs AH..BH */
#define PREFIX_REX_ALL (PREFIX_REX_W|PREFIX_REX_R|PREFIX_REX_X|PREFIX_REX_B|\
PREFIX_REX_GENERAL)
#define PREFIX_SIZE_SPECIFIERS (PREFIX_DATA|PREFIX_ADDR|PREFIX_REX_ALL)
/* Unused except in decode tables (we encode the prefix into the opcodes) */
#define PREFIX_REP 0x4000 /* 0xf3 */
#define PREFIX_REPNE 0x8000 /* 0xf2 */
/* First 2 are only used during initial decode so if running out of
* space could replace w/ byte value compare.
*/
#define PREFIX_VEX_2B 0x000010000
#define PREFIX_VEX_3B 0x000020000
#define PREFIX_VEX_L 0x000040000
/* Also only used during initial decode */
#define PREFIX_XOP 0x000080000
/* branch hints show up as segment modifiers */
#define SEG_JCC_NOT_TAKEN SEG_CS
#define SEG_JCC_TAKEN SEG_DS
/* XXX:
* o get rid of sI? I sign-extend all the Ib's on decoding, many are
* actually sign-extended that aren't marked sIb
* o had to add size to immed_int and base_disp, don't have it for indir_reg!
* that's fine for encoding, aren't duplicate instrs differing on size of
* indir reg data, but should set it properly for later IR passes...
* o rewrite emit.c so does emit_inst only once (instr_length() does it!)
*/
/* bits used to encode info in instr_info_t's opcode field */
enum {
OPCODE_TWOBYTES = 0x00000010,
OPCODE_REG = 0x00000020,
OPCODE_MODRM = 0x00000040,
OPCODE_SUFFIX = 0x00000080,
OPCODE_THREEBYTES=0x00000008,
};
/* instr_info_t: each table entry is one of these
* For reading all bytes of instruction, only need to know:
* 1) prefixes + opcode boundary
* 2) whether to read modrm byte, from modrm get sib and disp
* 3) whether to read immed bytes (types A, I, sI, J, and O)
* The rest of the types are for interpretation only
*
* We have room for 2 destinations and 3 sources
* Instrs with 2 dests include push, pop, edx:eax, xchg, rep lods
* Exceptions:
* pusha = 8 sources
* popa = 8 destinations
* enter = 3 dests, 4 srcs (incl. 2 immeds)
* cpuid = 4 dests
* cmpxchg8b = 3 dests, 5 srcs
* movs = 3 dests, rest of string instrs w/ rep/repne have 3+ dests
* Represent exceptions as sequence of instrs?
* All 5 exceptions have only one form, can use list field to
* point into separate table that holds types of extra operands.
*
* Separate immed field and only 2 srcs?
* enter has 2 immeds, all other instrs have <=1
* there are a number of instrs that need 3 srcs even ignoring immeds!
* (pusha,shld,shrd,cmpxchg,cmpxhchg8b,rep outs,rep cmps, rep scas)
* => no separate immed field
*
* FIXMEs:
* lea = computes addr, doesn't touch mem! how encode?
* in & out, ins & outs: are I/O ports in memory?!?
* Should we model fp stack changes?!?
* All i_eSP really read 0x4(esp) or some other offset, depending on
* if take esp value before or after instrs, etc.
* punpck*, pshuf*: say "reads entire x-bits, only uses half of them"
* table is inconsistent in this: for punpck* it gives a memory size of x/2
* (b/c Intel's table gives that), for pshuf* (there are others too I think)
* gives size x (not present in Intel's table)
*/
/* instr_info_t.opcode: split into bytes
* 0th (ms) = prefix byte, if byte 3's 1st nibble's bit 3 and bit 4 are both NOT set;
* modrm byte, if byte 3's 1st nibble's bit 3 IS set.
* suffix byte, if byte 3's 1st nibble's bit 4 IS set.
* 1st = 1st byte of opcode
* 2nd = 2nd byte of opcode (if there are 2)
* 3rd (ls) = split into nibbles
* 1st nibble (ms) = if bit 1 (OPCODE_TWOBYTES) set, opcode has 2 bytes
* if bit 2 (OPCODE_REG) set, opcode has /n
* if bit 3 (OPCODE_MODRM) set, opcode based on entire modrm
* that modrm is stored as the byte 0.
* if REQUIRES_VEX then this bit instead means that
* this instruction must have vex.W set.
* if bit 4 (OPCODE_SUFFIX) set, opcode based on suffix byte
* that byte is stored as the byte 0
* if REQUIRES_VEX then this bit instead means that
* this instruction must have vex.L set.
* XXX: so we do not support an instr that has an opcode
* dependent on both a prefix and the entire modrm or suffix!
* 2nd nibble (ls) = bits 1-3 hold /n for OPCODE_REG
* if bit 4 (OPCODE_THREEBYTES) is set, the opcode has
* 3 bytes, with the first being an implied 0x0f (so
* the 2nd byte is stored as "1st" and 3rd as "2nd").
*/
/* instr_info_t.code:
* + for PREFIX: one of the PREFIX_ constants, or SEG_ constant
* + for EXTENSION and *_EXT: index into extensions table
* + for OP_: pointer to next entry of that opcode
* + may also point to extra operand table
*/
/* classification of instruction bytes up to modrm/disp/immed
* these constants are used for instr_info_t.type field
*/
enum {
/* not a valid opcode */
INVALID = OP_LAST + 1,
/* prefix byte */
PREFIX,
/* 0x0f = two-byte escape code */
ESCAPE,
/* floating point instruction escape code */
FLOAT_EXT,
/* opcode extension via reg field of modrm */
EXTENSION,
/* 2-byte instructions differing by presence of 0xf3/0x66/0xf2 prefixes */
PREFIX_EXT,
/* (rep prefix +) 1-byte-opcode string instruction */
REP_EXT,
/* (repne prefix +) 1-byte-opcode string instruction */
REPNE_EXT,
/* 2-byte instructions differing by mod bits of modrm */
MOD_EXT,
/* 2-byte instructions differing by rm bits of modrm */
RM_EXT,
/* 2-byte instructions whose opcode also depends on a suffix byte */
SUFFIX_EXT,
/* instructions that vary based on whether in 64-bit mode or not */
X64_EXT,
/* 3-byte opcodes beginning 0x0f 0x38 (SSSE3 and SSE4) */
ESCAPE_3BYTE_38,
/* 3-byte opcodes beginning 0x0f 0x3a (SSE4) */
ESCAPE_3BYTE_3a,
/* instructions differing if a rex.b prefix is present */
REX_B_EXT,
/* instructions differing if a rex.w prefix is present */
REX_W_EXT,
/* instructions differing based on whether part of a vex prefix */
VEX_PREFIX_EXT,
/* instructions differing based on whether vex-encoded */
VEX_EXT,
/* instructions differing based on whether vex-encoded and vex.L */
VEX_L_EXT,
/* instructions differing based on vex.W */
VEX_W_EXT,
/* instructions differing based on whether part of an xop prefix */
XOP_PREFIX_EXT,
/* xop opcode map 8 */
XOP_8_EXT,
/* xop opcode map 9 */
XOP_9_EXT,
/* xop opcode map 10 */
XOP_A_EXT,
/* else, from OP_ enum */
};
/* instr_info_t modrm/extra operands flags == ushort only! */
#define HAS_MODRM 0x01 /* else, no modrm */
#define HAS_EXTRA_OPERANDS 0x02 /* else, <= 2 dsts, <= 3 srcs */
/* if HAS_EXTRA_OPERANDS: */
#define EXTRAS_IN_CODE_FIELD 0x04 /* next instr_info_t pointed to by code field */
/* rather than split out into little tables of 32-bit vs OP_INVALID, we use a
* flag to indicate opcodes that are invalid in particular modes:
*/
#define X86_INVALID 0x08
#define X64_INVALID 0x10
/* We use this to avoid needing a single-valid-entry subtable in prefix_extensions
* when decoding. This is never needed for encoding.
*/
#define REQUIRES_PREFIX 0x20
/* instr must be encoded using vex. if this flag is not present, this instruction
* is invalid if encoded using vex.
*/
#define REQUIRES_VEX 0x40
/* Instr must be encoded using a rex.w prefix. We could expand this to
* include other rex flags by combining with OPCODE_* flags, like REQUIRES_VEX
* does today.
*/
#define REQUIRES_REX 0x80
/* Instr must be encoded with VEX.L=0. If VEX.L=1 this is an invalid instr.
* This helps us avoid creating a ton of vex_L_extensions entries.
*/
#define REQUIRES_VEX_L_0 0x0100
/* Instr must be encoded with VEX.L=1. If VEX.L=0 this is an invalid instr.
* This helps us avoid creating a ton of vex_L_extensions entries.
* OPCODE_SUFFIX for REQUIRES_VEX means the same thing for encoding.
*/
#define REQUIRES_VEX_L_1 0x0200
/* Predicated via a jcc condition code */
#define HAS_PRED_CC 0x0400
/* Predicated via something complex */
#define HAS_PRED_COMPLEX 0x0800
struct _decode_info_t {
uint opcode;
/* Holds address and data size prefixes, as well as the prefixes
* that are shared as-is with instr_t (PREFIX_SIGNIFICANT).
* We assume we're in the default mode (32-bit or 64-bit,
* depending on our build) and that the address and data size
* prefixes can be treated as absolute.
*/
uint prefixes;
reg_id_t seg_override; /* REG enum of seg, REG_NULL if none */
/* modrm info */
byte modrm;
byte mod;
byte reg;
byte rm;
bool has_sib;
byte scale;
byte index;
byte base;
bool has_disp;
int disp;
/* immed info */
opnd_size_t size_immed;
opnd_size_t size_immed2;
bool immed_pc_relativize:1;
bool immed_subtract_length:1;
bool immed_pc_rel_offs:1;
ushort immed_shift;
ptr_int_t immed;
ptr_int_t immed2; /* this additional field could be 32-bit on all platforms */
/* These fields are only used when decoding rip-relative data refs */
byte *start_pc;
byte *final_pc;
uint len;
/* This field is only used when encoding rip-relative data refs.
* To save space we could make it a union with disp.
*/
byte *disp_abs;
#ifdef X64
/* Since the mode when an instr_t is involved is per-instr rather than
* per-dcontext we have our own field here instead of passing dcontext around.
* It's up to the caller to set this field to match either the instr_t
* or the dcontext_t field.
*/
bool x86_mode;
#endif
/* PR 302353: support decoding as though somewhere else */
byte *orig_pc;
/* these 3 prefixes may be part of opcode */
bool data_prefix;
bool rep_prefix;
bool repne_prefix;
byte vex_vvvv; /* vvvv bits for extra operand */
bool vex_encoded;
/* for instr_t* target encoding */
ptr_int_t cur_note;
bool has_instr_opnds;
};
/* N.B.: if you change the type enum, change the string names for
* them, kept in encode.c
*/
/* operand types have 2 parts, type and size */
enum {
/* operand types */
TYPE_NONE, /* must be 0 for invalid_instr */
TYPE_A, /* immediate that is absolute address */
TYPE_B, /* vex.vvvv field selects general-purpose register */
TYPE_C, /* reg of modrm selects control reg */
TYPE_D, /* reg of modrm selects debug reg */
TYPE_E, /* modrm selects reg or mem addr */
/* I don't use type F, I have eflags info in separate field */
TYPE_G, /* reg of modrm selects register */
TYPE_H, /* vex.vvvv field selects xmm/ymm register */
TYPE_I, /* immediate */
TYPE_J, /* immediate that is relative offset of EIP */
TYPE_L, /* top 4 bits of 8-bit immed select xmm/ymm register */
TYPE_M, /* modrm selects mem addr */
TYPE_O, /* immediate that is memory offset */
TYPE_P, /* reg of modrm selects MMX */
TYPE_Q, /* modrm selects MMX or mem addr */
TYPE_R, /* modrm selects register */
TYPE_S, /* reg of modrm selects segment register */
TYPE_V, /* reg of modrm selects XMM */
TYPE_W, /* modrm selects XMM or mem addr */
TYPE_X, /* DS:(RE)(E)SI */
TYPE_Y, /* ES:(RE)(E)SDI */
TYPE_P_MODRM, /* == Intel 'N': modrm selects MMX */
TYPE_V_MODRM, /* == Intel 'U': modrm selects XMM */
TYPE_1,
TYPE_FLOATCONST,
TYPE_XLAT, /* DS:(RE)(E)BX+AL */
TYPE_MASKMOVQ, /* DS:(RE)(E)DI */
TYPE_FLOATMEM,
TYPE_VSIB, /* modrm selects mem addr with required VSIB */
TYPE_REG, /* hardcoded register */
TYPE_XREG, /* hardcoded register, default 32/64 bits depending on mode */
TYPE_VAR_REG, /* hardcoded register, default 32 bits, but can be
* 16 w/ data prefix or 64 w/ rex.w: equivalent of Intel 'v'
* == like OPSZ_4_rex8_short2 */
TYPE_VARZ_REG, /* hardcoded register, default 32 bits, but can be
* 16 w/ data prefix: equivalent of Intel 'z'
* == like OPSZ_4_short2 */
TYPE_VAR_XREG, /* hardcoded register, default 32/64 bits depending on mode,
* but can be 16 w/ data prefix: equivalent of Intel 'd64'
* == like OPSZ_4x8_short2 */
TYPE_VAR_REGX, /* hardcoded register, default 32 bits, but can be 64 w/ rex.w:
* equivalent of Intel 'y' == like OPSZ_4_rex8 */
TYPE_VAR_ADDR_XREG, /* hardcoded register, default 32/64 bits depending on mode,
* but can be 16/32 w/ addr prefix: equivalent of Intel 'd64' */
/* For x64 extensions (Intel '+r.') where rex.r can select an extended
* register (r8-r15): we could try to add a flag that modifies the above
* register types, but we'd have to stick it inside some stolen bits. For
* simplicity, we just make each combination a separate type:
*/
TYPE_REG_EX, /* like TYPE_REG but extendable. used for mov_imm 8-bit immed */
TYPE_VAR_REG_EX, /* like TYPE_VAR_REG (OPSZ_4_rex8_short2) but extendable.
* used for xchg and mov_imm 'v' immed. */
TYPE_VAR_XREG_EX, /* like TYPE_VAR_XREG (OPSZ_4x8_short2) but extendable.
* used for pop and push. */
TYPE_VAR_REGX_EX, /* like TYPE_VAR_REGX but extendable. used for bswap. */
TYPE_INDIR_E,
TYPE_INDIR_REG,
TYPE_INDIR_VAR_XREG, /* indirected register that varies (by addr prefix),
* with a base of 32/64 depending on the mode;
* indirected size varies with data prefix */
TYPE_INDIR_VAR_REG, /* indirected register that varies (by addr prefix),
* with a base of 32/64;
* indirected size varies with data and rex prefixes */
TYPE_INDIR_VAR_XIREG, /* indirected register that varies (by addr prefix),
* with a base of 32/64 depending on the mode;
* indirected size varies w/ data prefix, except 64-bit Intel */
TYPE_INDIR_VAR_XREG_OFFS_1, /* TYPE_INDIR_VAR_XREG but with an offset of
* -1 * size */
TYPE_INDIR_VAR_XREG_OFFS_8, /* TYPE_INDIR_VAR_XREG but with an offset of
* -8 * size and a size of 8 stack slots */
TYPE_INDIR_VAR_XREG_OFFS_N, /* TYPE_INDIR_VAR_XREG but with an offset of
* -N * size and a size to match: i.e., it
* varies based on other operands */
TYPE_INDIR_VAR_XIREG_OFFS_1, /* TYPE_INDIR_VAR_XIREG but with an offset of
* -1 * size */
TYPE_INDIR_VAR_REG_OFFS_2, /* TYPE_INDIR_VAR_REG but with an offset of
* -2 * size and a size of 2 stack slots */
/* we have to encode the memory size into the type b/c we use the size
* to store the base reg: but since most base regs are xsp we could
* encode that into the type and store the size in the size field
*/
TYPE_INDIR_VAR_XREG_SIZEx8, /* TYPE_INDIR_VAR_XREG but with a size of
* 8 * regular size */
TYPE_INDIR_VAR_REG_SIZEx2, /* TYPE_INDIR_VAR_REG but with a size of
* 2 * regular size */
TYPE_INDIR_VAR_REG_SIZEx3x5, /* TYPE_INDIR_VAR_REG but with a size of
* 3 * regular size for 32-bit, 5 * regular
* size for 64-bit */
/* when adding new types, update type_names[] in encode.c */
TYPE_BEYOND_LAST_ENUM,
};
#define MODRM_BYTE(mod, reg, rm) ((byte) (((mod) << 6) | ((reg) << 3) | (rm)))
/* for decode_info_t */
#define X64_MODE(di) IF_X64_ELSE(!(di)->x86_mode, false)
/* in decode.c, not exported to non-x86 files */
bool optype_is_indir_reg(int optype);
opnd_size_t resolve_var_reg_size(opnd_size_t sz, bool is_reg);
opnd_size_t resolve_variable_size(decode_info_t *di/*IN: x86_mode, prefixes*/,
opnd_size_t sz, bool is_reg);
opnd_size_t resolve_variable_size_dc(dcontext_t *dcontext,
uint prefixes, opnd_size_t sz, bool is_reg);
/* Also takes in reg8 for TYPE_REG_EX mov_imm */
reg_id_t resolve_var_reg(decode_info_t *di/*IN: x86_mode, prefixes*/,
reg_id_t reg32, bool addr, bool can_shrink
_IF_X64(bool default_64) _IF_X64(bool can_grow)
_IF_X64(bool extendable));
opnd_size_t resolve_addr_size(decode_info_t *di/*IN: x86_mode, prefixes*/);
opnd_size_t indir_var_reg_size(decode_info_t *di, int optype);
int indir_var_reg_offs_factor(int optype);
opnd_size_t expand_subreg_size(opnd_size_t sz);
dr_pred_type_t decode_predicate_from_instr_info(uint opcode, const instr_info_t *info);
/* exported tables */
extern const instr_info_t first_byte[];
extern const instr_info_t second_byte[];
extern const instr_info_t extensions[][8];
extern const instr_info_t prefix_extensions[][8];
extern const instr_info_t mod_extensions[][2];
extern const instr_info_t rm_extensions[][8];
extern const instr_info_t x64_extensions[][2];
extern const instr_info_t rex_b_extensions[][2];
extern const instr_info_t rex_w_extensions[][2];
extern const instr_info_t vex_prefix_extensions[][2];
extern const instr_info_t vex_extensions[][2];
extern const instr_info_t vex_L_extensions[][3];
extern const instr_info_t vex_W_extensions[][2];
extern const byte third_byte_38_index[256];
extern const byte third_byte_3a_index[256];
extern const instr_info_t third_byte_38[];
extern const instr_info_t third_byte_3a[];
extern const instr_info_t rep_extensions[][4];
extern const instr_info_t repne_extensions[][6];
extern const instr_info_t float_low_modrm[];
extern const instr_info_t float_high_modrm[][64];
extern const byte suffix_index[256];
extern const instr_info_t suffix_extensions[];
extern const instr_info_t extra_operands[];
extern const byte xop_8_index[256];
extern const byte xop_9_index[256];
extern const byte xop_a_index[256];
extern const instr_info_t xop_prefix_extensions[][2];
extern const instr_info_t xop_extensions[];
/* table that translates opcode enums into pointers into decoding tables */
extern const instr_info_t * const op_instr[];
#endif /* DECODE_PRIVATE_H */