blob: ec6191cd0c2e929f483d08e7472748747e1ffabe [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2011-2014 Google, Inc. All rights reserved.
* Copyright (c) 2001-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2003-2007 Determina Corp. */
/* Copyright (c) 2001-2003 Massachusetts Institute of Technology */
/* Copyright (c) 2001 Hewlett-Packard Company */
/* decode_fast.c -- a partial but fast x86 decoder */
#include "../globals.h"
#include "decode_fast.h"
#include "../link.h"
#include "arch.h"
#include "instr.h"
#include "instr_create.h"
#include "decode.h"
#include "decode_private.h"
#include "disassemble.h"
#ifdef DEBUG
/* case 10450: give messages to clients */
# undef ASSERT_TRUNCATE
# undef ASSERT_BITFIELD_TRUNCATE
# undef ASSERT_NOT_REACHED
# define ASSERT_TRUNCATE DO_NOT_USE_ASSERT_USE_CLIENT_ASSERT_INSTEAD
# define ASSERT_BITFIELD_TRUNCATE DO_NOT_USE_ASSERT_USE_CLIENT_ASSERT_INSTEAD
# define ASSERT_NOT_REACHED DO_NOT_USE_ASSERT_USE_CLIENT_ASSERT_INSTEAD
#endif
/* This file contains tables and functions that help decode x86
instructions so that we can determine the length of the decode
instruction. All code below based on tables in the ``Intel
Architecture Software Developer's Manual,'' Volume 2: Instruction
Set Reference, 1999.
This decoder assumes that we are running in 32-bit, flat-address mode.
*/
/* NOTE that all of the tables in this file are indexed by the (primary
or secondary) opcode byte. The upper opcode nibble defines the rows,
starting with 0 at the top. The lower opcode nibble defines the
columns, starting with 0 at left. */
/* Data table for fixed part of an x86 instruction. The table is
indexed by the 1st (primary) opcode byte. Zero entries are
reserved opcodes. */
static const byte fixed_length[256] = {
1,1,1,1, 2,5,1,1, 1,1,1,1, 2,5,1,1, /* 0 */
1,1,1,1, 2,5,1,1, 1,1,1,1, 2,5,1,1, /* 1 */
1,1,1,1, 2,5,1,1, 1,1,1,1, 2,5,1,1, /* 2 */
1,1,1,1, 2,5,1,1, 1,1,1,1, 2,5,1,1, /* 3 */
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* 4 */
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* 5 */
1,1,1,1, 1,1,1,1, 5,5,2,2, 1,1,1,1, /* 6 */
2,2,2,2, 2,2,2,2, 2,2,2,2, 2,2,2,2, /* 7 */
2,5,2,2, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* 8 */
1,1,1,1, 1,1,1,1, 1,1,7,1, 1,1,1,1, /* 9 */
5,5,5,5, 1,1,1,1, 2,5,1,1, 1,1,1,1, /* A */
2,2,2,2, 2,2,2,2, 5,5,5,5, 5,5,5,5, /* B */
2,2,3,1, 1,1,2,5, 4,1,3,1, 1,2,1,1, /* C */
1,1,1,1, 2,2,1,1, 1,1,1,1, 1,1,1,1, /* D */
2,2,2,2, 2,2,2,2, 5,5,7,2, 1,1,1,1, /* E */
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1 /* F */
/* f6 and f7 OP_test immeds are handled specially in decode_sizeof() */
};
/* Data table for fixed immediate part of an x86 instruction that
depends upon the existence of an operand-size byte. The table is
indexed by the 1st (primary) opcode byte. Entries with non-zero
values indicate opcodes with a variable-length immediate field. We
use this table if we've seen a operand-size prefix byte to adjust
the fixed_length from dword to word.
*/
static const signed char immed_adjustment[256] = {
0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, /* 0 */
0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, /* 1 */
0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, /* 2 */
0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, /* 3 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5 */
0, 0, 0, 0, 0, 0, 0, 0, -2,-2, 0, 0, 0, 0, 0, 0, /* 6 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7 */
0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 8 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, /* 9 */
0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, /* A */
0, 0, 0, 0, 0, 0, 0, 0, -2,-2,-2,-2, -2,-2,-2,-2, /* B */
0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, /* C */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D */
0, 0, 0, 0, 0, 0, 0, 0, -2,-2,-2,-2, 0, 0, 0, 0, /* E */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F */
};
#ifdef X64
/* for x64 Intel, Jz is always a 64-bit addr ("f64" in Intel table) */
static const signed char immed_adjustment_intel64[256] = {
0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, /* 0 */
0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, /* 1 */
0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, /* 2 */
0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, /* 3 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5 */
0, 0, 0, 0, 0, 0, 0, 0, -2,-2, 0, 0, 0, 0, 0, 0, /* 6 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7 */
0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 8 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, /* 9 */
0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, /* A */
0, 0, 0, 0, 0, 0, 0, 0, -2,-2,-2,-2, -2,-2,-2,-2, /* B */
0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, /* C */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-2, 0, 0, 0, 0, /* E */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F */
};
#endif
/* Data table for fixed immediate part of an x86 instruction that
* depends upon the existence of an address-size byte. The table is
* indexed by the 1st (primary) opcode byte.
* The value here is doubled for x64 mode.
*/
static const signed char disp_adjustment[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 8 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9 */
-2,-2,-2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F */
};
#ifdef X64
/* Data table for immediate adjustments that only apply when
* in x64 mode. We fit two types of adjustments in here:
* default-size adjustments (positive numbers) and rex.w-prefix-based
* adjustments (negative numbers, to be made positive when applied).
*/
static const char x64_adjustment[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 8 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9 */
4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A */
0, 0, 0, 0, 0, 0, 0, 0, -4,-4,-4,-4, -4,-4,-4,-4, /* B */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F */
};
#endif
/* Prototypes for the functions that calculate the variable
* part of the x86 instruction length. */
static int sizeof_modrm(dcontext_t *dcontext, byte *pc, bool addr16
_IF_X64(byte **rip_rel_pc));
static int sizeof_fp_op(dcontext_t *dcontext, byte *pc, bool addr16
_IF_X64(byte **rip_rel_pc));
static int sizeof_escape(dcontext_t *dcontext, byte *pc, bool addr16
_IF_X64(byte **rip_rel_pc));
static int sizeof_3byte_38(dcontext_t *dcontext, byte *pc, bool addr16, bool vex
_IF_X64(byte **rip_rel_pc));
static int sizeof_3byte_3a(dcontext_t *dcontext, byte *pc, bool addr16
_IF_X64(byte **rip_rel_pc));
enum {
VARLEN_NONE,
VARLEN_MODRM,
VARLEN_FP_OP,
VARLEN_ESCAPE, /* 2-byte opcodes */
VARLEN_3BYTE_38_ESCAPE, /* 3-byte opcodes 0f 38 */
VARLEN_3BYTE_3A_ESCAPE, /* 3-byte opcodes 0f 3a */
};
/* Some macros to make the following table look better. */
#define m VARLEN_MODRM
#define f VARLEN_FP_OP
#define e VARLEN_ESCAPE
/* Data table indicating what function to use to calculate
the variable part of the x86 instruction. This table
is indexed by the primary opcode. */
static const byte variable_length[256] = {
m,m,m,m, 0,0,0,0, m,m,m,m, 0,0,0,e, /* 0 */
m,m,m,m, 0,0,0,0, m,m,m,m, 0,0,0,0, /* 1 */
m,m,m,m, 0,0,0,0, m,m,m,m, 0,0,0,0, /* 2 */
m,m,m,m, 0,0,0,0, m,m,m,m, 0,0,0,0, /* 3 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 4 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 5 */
0,0,m,m, 0,0,0,0, 0,m,0,m, 0,0,0,0, /* 6 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 7 */
m,m,m,m, m,m,m,m, m,m,m,m, m,m,m,m, /* 8 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 9 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* A */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* B */
m,m,0,0, m,m,m,m, 0,0,0,0, 0,0,0,0, /* C */
m,m,m,m, 0,0,0,0, f,f,f,f, f,f,f,f, /* D */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* E */
0,0,0,0, 0,0,m,m, 0,0,0,0, 0,0,m,m /* F */
};
/* eliminate the macros */
#undef m
#undef f
#undef e
/* Data table for the additional fixed part of a two-byte opcode.
* This table is indexed by the 2nd opcode byte. Zero entries are
* reserved/bad opcodes.
* N.B.: none of these (except IA32_ON_IA64) need adjustment
* for data16 or addr16.
*/
static const byte escape_fixed_length[256] = {
1,1,1,1, 0,1,1,1, 1,1,0,1, 0,1,1,2, /* 0 */ /* 0f0f has extra suffix opcode byte */
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* 1 */
1,1,1,1, 0,0,0,0, 1,1,1,1, 1,1,1,1, /* 2 */
1,1,1,1, 1,1,0,1, 1,0,1,0, 0,0,0,0, /* 3 */
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* 4 */
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* 5 */
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* 6 */
2,2,2,2, 1,1,1,1, 1,1,0,0, 1,1,1,1, /* 7 */
5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5, /* 8 */
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* 9 */
1,1,1,1, 2,1,0,0, 1,1,1,1, 2,1,1,1, /* A */
#ifdef IA32_ON_IA64
/* change is the 5, could also be 3 depending on which mode we are */
/* FIXME : no modrm byte so the standard variable thing won't work */
/* (need a escape_disp_adjustment table) */
1,1,1,1, 1,1,1,1, 5,1,2,1, 1,1,1,1, /* B */
#else
1,1,1,1, 1,1,1,1, 1,1,2,1, 1,1,1,1, /* B */
#endif
1,1,2,1, 2,2,2,1, 1,1,1,1, 1,1,1,1, /* C */
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* D */
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* E */
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,0 /* F */
/* 0f78 has immeds depending on prefixes: handled in decode_sizeof() */
};
/* Some macros to make the following table look better. */
#define m VARLEN_MODRM
#define e1 VARLEN_3BYTE_38_ESCAPE
#define e2 VARLEN_3BYTE_3A_ESCAPE
/* Data table indicating what function to use to calcuate
the variable part of the escaped x86 instruction. This table
is indexed by the 2nd opcode byte. */
static const byte escape_variable_length[256] = {
m,m,m,m, 0,0,0,0, 0,0,0,0, 0,m,0,m, /* 0 */
m,m,m,m, m,m,m,m, m,m,m,m, m,m,m,m, /* 1 */
m,m,m,m, 0,0,0,0, m,m,m,m, m,m,m,m, /* 2 */
0,0,0,0, 0,0,0,0, e1,0,e2,0, 0,0,0,0, /* 3 */
m,m,m,m, m,m,m,m, m,m,m,m, m,m,m,m, /* 4 */
m,m,m,m, m,m,m,m, m,m,m,m, m,m,m,m, /* 5 */
m,m,m,m, m,m,m,m, m,m,m,m, m,m,m,m, /* 6 */
m,m,m,m, m,m,m,0, m,m,0,0, m,m,m,m, /* 7 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 8 */
m,m,m,m, m,m,m,m, m,m,m,m, m,m,m,m, /* 9 */
0,0,0,m, m,m,0,0, 0,0,0,m, m,m,m,m, /* A */
#ifdef IA32_ON_IA64
m,m,m,m, m,m,m,m, 0,0,m,m, m,m,m,m, /* B */
#else
m,m,m,m, m,m,m,m, m,0,m,m, m,m,m,m, /* B */
#endif
m,m,m,m, m,m,m,m, 0,0,0,0, 0,0,0,0, /* C */
m,m,m,m, m,m,m,m, m,m,m,m, m,m,m,m, /* D */
m,m,m,m, m,m,m,m, m,m,m,m, m,m,m,m, /* E */
m,m,m,m, m,m,m,m, m,m,m,m, m,m,m,0 /* F */
};
/* eliminate the macros */
#undef m
#undef e
/* Data table for the additional fixed part of a three-byte opcode 0f 38.
* This table is indexed by the 3rd opcode byte. Zero entries are
* reserved/bad opcodes.
* N.B.: ALL of these have modrm bytes, and NONE of these need adjustment for data16
* or addr16.
*/
#if 0 /* to be robust wrt future additions we assume all entries are 1 */
static const byte threebyte_38_fixed_length[256] = {
1,1,1,1, 1,1,1,1, 1,1,1,1, 0,0,0,0, /* 0 */
1,0,0,0, 1,1,0,1, 0,0,0,0, 1,1,1,0, /* 1 */
1,1,1,1, 1,1,0,0, 1,1,1,1, 0,0,0,0, /* 2 */
1,1,1,1, 1,1,0,1, 1,1,1,1, 1,1,1,1, /* 3 */
1,1,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 4 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 5 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 6 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 7 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 8 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 9 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* A */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* B */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* C */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* D */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* E */
1,1,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 /* F */
};
#endif
/* Three-byte 0f 3a: all are assumed to have a 1-byte immediate as well! */
#if 0 /* to be robust wrt future additions we assume all entries are 1 */
static const byte threebyte_3a_fixed_length[256] = {
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,1, /* 0 */
0,0,0,0, 1,1,1,1, 1,1,1,1, 1,1,1,0, /* 1 */
1,1,1,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 2 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 3 */
0,1,1,1, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 4 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 5 */
1,1,1,1, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 6 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 7 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 8 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 9 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* A */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* B */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* C */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* D */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* E */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 /* F */
};
#endif
/* Extra size when vex-encoded (from immeds) */
static const byte threebyte_38_vex_extra[256] = {
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 0 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 1 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 2 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 3 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 4 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 5 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 6 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 7 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 8 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 9 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* A */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* B */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* C */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* D */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* E */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 /* F */
};
/* XOP.0x08 is assumed to always have an immed byte */
/* Extra size for XOP opcode 0x09 (from immeds) */
static const byte xop_9_extra[256] = {
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 0 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 1 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 2 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 3 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 4 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 5 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 6 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 7 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 8 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 9 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* A */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* B */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* C */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* D */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* E */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 /* F */
};
/* Extra size for XOP opcode 0x0a (from immeds) */
static const byte xop_a_extra[256] = {
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 0 */
4,0,4,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 1 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 2 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 3 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 4 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 5 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 6 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 7 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 8 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 9 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* A */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* B */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* C */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* D */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* E */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 /* F */
};
/* Returns the length of the instruction at pc.
* If num_prefixes is non-NULL, returns the number of prefix bytes.
* If rip_rel_pos is non-NULL, returns the offset into the instruction
* of a rip-relative addressing displacement (for data only: ignores
* control-transfer relative addressing), or 0 if none.
* May return 0 size for certain invalid instructions
*/
int
decode_sizeof(dcontext_t *dcontext, byte *start_pc, int *num_prefixes
_IF_X64(uint *rip_rel_pos))
{
byte *pc = start_pc;
uint opc = (uint)*pc;
int sz = 0;
ushort varlen;
bool word_operands = false; /* data16 */
bool qword_operands = false; /* rex.w */
bool addr16 = false; /* really "addr32" for x64 mode */
bool found_prefix = true;
bool rep_prefix = false;
byte reg_opcode; /* reg_opcode field of modrm byte */
#ifdef X64
byte *rip_rel_pc = NULL;
#endif
/* Check for prefix byte(s) */
while (found_prefix) {
/* NOTE - rex prefixes must come after all other prefixes (including
* prefixes that are part of the opcode xref PR 271878). We match
* read_instruction() in considering pre-prefix rex bytes as part of
* the following instr, event when ignored, rather then treating them
* as invalid. This in effect nops improperly placed rex prefixes which
* (xref PR 241563 and Intel Manual 2A 2.2.1) is the correct thing to do.
* Rex prefixes are 0x40-0x4f; >=0x48 has rex.w bit set.
*/
if (X64_MODE_DC(dcontext) && opc >= REX_PREFIX_BASE_OPCODE &&
opc <= (REX_PREFIX_BASE_OPCODE | REX_PREFIX_ALL_OPFLAGS)) {
if (opc >= (REX_PREFIX_BASE_OPCODE | REX_PREFIX_W_OPFLAG)) {
qword_operands = true;
if (word_operands)
word_operands = false; /* rex.w trumps data16 */
} /* else, doesn't affect instr size */
opc = (uint)*(++pc);
sz += 1;
} else {
switch (opc) {
case 0x66: /* operand size */
/* rex.w before other prefixes is a nop */
if (qword_operands)
qword_operands = false;
word_operands = true;
opc = (uint)*(++pc);
sz += 1;
break;
case 0xf2: case 0xf3: /* REP */
rep_prefix = true;
/* fall through */
case 0xf0: /* LOCK */
case 0x64: case 0x65: /* segment overrides */
case 0x26: case 0x36:
case 0x2e: case 0x3e:
opc = (uint)*(++pc);
sz += 1;
break;
case 0x67:
addr16 = true;
opc = (uint)*(++pc);
sz += 1;
/* up to caller to check for addr prefix! */
break;
case 0xc4:
case 0xc5: {
/* If 64-bit mode or mod selects for register, this is vex */
if (X64_MODE_DC(dcontext) || TESTALL(MODRM_BYTE(3, 0, 0), *(pc+1))) {
/* Assumptions:
* - no vex-encoded instr size differs based on vex.w,
* so we don't bother to set qword_operands
* - no vex-encoded instr size differs based on prefixes,
* so we don't bother to decode vex.pp
*/
bool vex3 = (opc == 0xc4);
byte vex_mm = 0;
opc = (uint)*(++pc); /* 2nd vex prefix byte */
sz += 1;
if (vex3) {
vex_mm = (byte) (opc & 0x1f);
opc = (uint)*(++pc); /* 3rd vex prefix byte */
sz += 1;
}
opc = (uint)*(++pc); /* 1st opcode byte */
sz += 1;
if (num_prefixes != NULL)
*num_prefixes = sz;
/* no prefixes after vex + already did full size, so goto end */
if (!vex3 || (vex3 && (vex_mm == 1))) {
sz += sizeof_escape(dcontext, pc, addr16
_IF_X64(&rip_rel_pc));
goto decode_sizeof_done;
} else if (vex_mm == 2) {
sz += sizeof_3byte_38(dcontext, pc - 1, addr16, true
_IF_X64(&rip_rel_pc));
goto decode_sizeof_done;
} else if (vex_mm == 3) {
sz += sizeof_3byte_3a(dcontext, pc - 1, addr16
_IF_X64(&rip_rel_pc));
goto decode_sizeof_done;
}
} else
found_prefix = false;
break;
}
case 0x8f: {
/* If XOP.map_select < 8, this is not XOP but instead OP_pop */
byte map_select = *(pc+1) & 0x1f;
if (map_select >= 0x8) {
/* we have the same assumptions as for vex, that no instr size
* differs vased on vex.w or vex.pp
*/
pc += 3; /* skip all 3 xop prefix bytes */
sz += 3;
opc = (uint)*pc; /* opcode byte */
sz += 1;
if (num_prefixes != NULL)
*num_prefixes = sz;
/* all have modrm */
sz += sizeof_modrm(dcontext, pc+1, addr16 _IF_X64(&rip_rel_pc));
if (map_select == 0x8) {
/* these always have an immediate byte */
sz += 1;
} else if (map_select == 0x9)
sz += xop_9_extra[opc];
else if (map_select == 0xa)
sz += xop_a_extra[opc];
else {
ASSERT_CURIOSITY(false && "unknown XOP map_select");
/* to try to handle future ISA additions we don't abort */
}
/* no prefixes after xop + already did full size, so goto end */
goto decode_sizeof_done;
} else
found_prefix = false;
break;
}
default:
found_prefix = false;
}
}
}
if (num_prefixes != NULL)
*num_prefixes = sz;
if (word_operands) {
#ifdef X64
/* for x64 Intel, always 64-bit addr ("f64" in Intel table)
* FIXME: what about 2-byte jcc?
*/
if (X64_MODE_DC(dcontext) && proc_get_vendor() == VENDOR_INTEL)
sz += immed_adjustment_intel64[opc];
else
#endif
sz += immed_adjustment[opc]; /* no adjustment for 2-byte escapes */
}
if (addr16) { /* no adjustment for 2-byte escapes */
if (X64_MODE_DC(dcontext)) /* from 64 bits down to 32 bits */
sz += 2*disp_adjustment[opc];
else /* from 32 bits down to 16 bits */
sz += disp_adjustment[opc];
}
#ifdef X64
if (X64_MODE_DC(dcontext)) {
int adj64 = x64_adjustment[opc];
if (adj64 > 0) /* default size adjustment */
sz += adj64;
else if (qword_operands)
sz += -adj64; /* negative indicates prefix, not default, adjust */
/* else, no adjustment */
}
#endif
/* opc now really points to opcode */
sz += fixed_length[opc];
varlen = variable_length[opc];
/* for a valid instr, sz must be > 0 here, but we don't want to assert
* since we need graceful failure
*/
if (varlen == VARLEN_MODRM)
sz += sizeof_modrm(dcontext, pc+1, addr16 _IF_X64(&rip_rel_pc));
else if (varlen == VARLEN_ESCAPE) {
sz += sizeof_escape(dcontext, pc+1, addr16 _IF_X64(&rip_rel_pc));
/* special case: Intel and AMD added size-differing prefix-dependent instrs! */
if (*(pc+1) == 0x78) {
/* XXX: if have rex.w prefix we clear word_operands: is that legal combo? */
if (word_operands || rep_prefix) {
/* extrq, insertq: 2 1-byte immeds */
sz += 2;
} /* else, vmread, w/ no immeds */
}
} else if (varlen == VARLEN_FP_OP)
sz += sizeof_fp_op(dcontext, pc+1, addr16 _IF_X64(&rip_rel_pc));
else
CLIENT_ASSERT(varlen == VARLEN_NONE, "internal decoding error");
/* special case that doesn't fit the mold (of course one had to exist) */
reg_opcode = (byte) (((*(pc + 1)) & 0x38) >> 3);
if (opc == 0xf6 && reg_opcode == 0) {
sz += 1; /* TEST Eb,ib -- add size of immediate */
} else if (opc == 0xf7 && reg_opcode == 0) {
if (word_operands)
sz += 2; /* TEST Ew,iw -- add size of immediate */
else
sz += 4; /* TEST El,il -- add size of immediate */
}
decode_sizeof_done:
#ifdef X64
if (rip_rel_pos != NULL) {
if (rip_rel_pc != NULL) {
CLIENT_ASSERT(X64_MODE_DC(dcontext),
"decode_sizeof: invalid non-x64 rip_rel instr");
CLIENT_ASSERT(CHECK_TRUNCATE_TYPE_uint(rip_rel_pc - start_pc),
"decode_sizeof: unknown rip_rel instr type");
*rip_rel_pos = (uint) (rip_rel_pc - start_pc);
} else
*rip_rel_pos = 0;
}
#endif
return sz;
}
static int
sizeof_3byte_38(dcontext_t *dcontext, byte *pc, bool addr16, bool vex
_IF_X64(byte **rip_rel_pc))
{
int sz = 1; /* opcode past 0x0f 0x38 */
uint opc = *(++pc);
/* so far all 3-byte instrs have modrm bytes */
/* to be robust for future additions we don't actually
* use the threebyte_38_fixed_length[opc] entry and assume 1 */
if (vex)
sz += threebyte_38_vex_extra[opc];
sz += sizeof_modrm(dcontext, pc+1, addr16 _IF_X64(rip_rel_pc));
return sz;
}
static int
sizeof_3byte_3a(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_pc))
{
pc++;
/* so far all 0f 3a 3-byte instrs have modrm bytes and 1-byte immeds */
/* to be robust for future additions we don't actually
* use the threebyte_3a_fixed_length[opc] entry and assume 1 */
return 1 + sizeof_modrm(dcontext, pc+1, addr16 _IF_X64(rip_rel_pc)) + 1;
}
/* Two-byte opcode map (Tables A-4 and A-5). You use this routine
* when you have identified the primary opcode as 0x0f. You pass this
* routine the next byte to determine the number of extra bytes in the
* entire instruction.
* May return 0 size for certain invalid instructions.
*/
static int
sizeof_escape(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_pc))
{
uint opc = (uint)*pc;
int sz = escape_fixed_length[opc];
ushort varlen = escape_variable_length[opc];
/* for a valid instr, sz must be > 0 here, but we don't want to assert
* since we need graceful failure
*/
if (varlen == VARLEN_MODRM)
return sz + sizeof_modrm(dcontext, pc+1, addr16 _IF_X64(rip_rel_pc));
else if (varlen == VARLEN_3BYTE_38_ESCAPE) {
return sz + sizeof_3byte_38(dcontext, pc, addr16, false _IF_X64(rip_rel_pc));
}
else if (varlen == VARLEN_3BYTE_3A_ESCAPE) {
return sz + sizeof_3byte_3a(dcontext, pc, addr16 _IF_X64(rip_rel_pc));
}
else
CLIENT_ASSERT(varlen == VARLEN_NONE, "internal decoding error");
return sz;
}
/* 32-bit addressing forms with the ModR/M Byte (Table 2-2). You call
* this routine with the byte following the primary opcode byte when you
* know that the operation's next byte is a ModR/M byte. This routine
* passes back the size of the Eaddr specification in bytes based on the
* following encoding of Table 2-2.
*
* Mod R/M
* 0 1 2 3 4 5 6 7
* 0 1 1 1 1 * 5 1 1
* 1 2 2 2 2 3 2 2 2
* 2 5 5 5 5 6 5 5 5
* 3 1 1 1 1 1 1 1 1
* where (*) is 6 if base==5 and 2 otherwise.
*/
static int
sizeof_modrm(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_pc))
{
int l = 0; /* return value for sizeof(eAddr) */
uint modrm = (uint)*pc;
int r_m = modrm & 0x7;
uint mod = modrm >> 6;
uint sib;
#ifdef X64
if (rip_rel_pc != NULL && X64_MODE_DC(dcontext) && mod == 0 && r_m == 5) {
*rip_rel_pc = pc + 1; /* no sib: next 4 bytes are disp */
}
#endif
if (addr16 && !X64_MODE_DC(dcontext)) {
if (mod == 1)
return 2; /* modrm + disp8 */
else if (mod == 2)
return 3; /* modrm + disp16 */
else if (mod == 3)
return 1; /* just modrm */
else {
CLIENT_ASSERT(mod == 0, "internal decoding error on addr16 prefix");
if (r_m == 6)
return 3; /* modrm + disp16 */
else
return 1; /* just modrm */
}
CLIENT_ASSERT(false, "internal decoding error on addr16 prefix");
}
/* for x64, addr16 simply truncates the computed address: there is
* no change in disp sizes */
if (mod == 3) /* register operand */
return 1;
switch (mod) { /* memory or immediate operand */
case 0: l = (r_m == 5) ? 5 : 1; break;
case 1: l = 2; break;
case 2: l = 5; break;
}
if (r_m == 4) {
l += 1; /* adjust for sib byte */
sib = (uint)(*(pc+1));
if ((sib & 0x7) == 5) {
if (mod == 0)
l += 4; /* disp32(,index,s) */
}
}
return l;
}
/* General floating-point instruction formats (Table B-22). You use
* this routine when you have identified the primary opcode as one in
* the range 0xb8 through 0xbf. You pass this routine the next byte
* to determine the number of extra bytes in the entire
* instruction. */
static int
sizeof_fp_op(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_pc))
{
if (*pc > 0xbf)
return 1; /* entire ModR/M byte is an opcode extension */
/* fp opcode in reg/opcode field */
return sizeof_modrm(dcontext, pc, addr16 _IF_X64(rip_rel_pc));
}
/* Table indicating "interesting" instructions, i.e., ones we
* would like to decode. Currently these are control-transfer
* instructions and interrupts.
* This table is indexed by the 1st (primary) opcode byte.
* A 0 indicates we are not interested, a 1 that we are.
* A 2 indicates a second opcode byte exists, a 3 indicates an opcode
* extension is present in the modrm byte.
*/
static const byte interesting[256] = {
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,2, /* 0 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 1 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 2 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 3 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 4 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 5 */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 6 */
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* 7 */ /* jcc_short */
0,0,0,0, 0,0,0,0, 0,0,0,0, 1,0,1,0, /* 8 */ /* mov_seg */
0,0,0,0, 0,0,0,0, 0,0,1,0, 0,0,0,0, /* 9 */ /* call_far */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* A */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* B */
0,0,1,1, 0,0,0,0, 0,0,1,1, 1,1,1,1, /* C */ /* ret*, int* */
0,0,0,0, 0,0,0,0, 0,3,0,0, 0,3,0,0, /* D */ /* fnstenv, fnsave */
1,1,1,1, 0,0,0,0, 1,1,1,1, 0,0,0,0, /* E */ /* loop*, call, jmp* */
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,3, /* F */
};
/* Table indicating operations on the lower 6 eflags (CF,PF,AF,ZF,SF,OF)
* This table is indexed by the 1st (primary) opcode byte.
* We use the eflags constants from instr.h.
* We ignore writing some of the 6 as a conservative simplification.
* Also note that for some groups we assign values to invalid opcodes
* just for simplicity
*/
#define x 0
#define RC EFLAGS_READ_CF
#define RP EFLAGS_READ_PF
#define RZ EFLAGS_READ_ZF
#define RS EFLAGS_READ_SF
#define RO EFLAGS_READ_OF
#define R6 EFLAGS_READ_6
#define RB (EFLAGS_READ_CF|EFLAGS_READ_ZF)
#define RL (EFLAGS_READ_SF|EFLAGS_READ_OF)
#define RE (EFLAGS_READ_SF|EFLAGS_READ_OF|EFLAGS_READ_ZF)
#define R5O (EFLAGS_READ_6 & (~EFLAGS_READ_OF))
#define WC EFLAGS_WRITE_CF
#define WZ EFLAGS_WRITE_ZF
#define W6 EFLAGS_WRITE_6
#define W5 (EFLAGS_WRITE_6 & (~EFLAGS_WRITE_CF))
#define W5O (EFLAGS_WRITE_6 & (~EFLAGS_WRITE_OF))
#define BC (EFLAGS_WRITE_6|EFLAGS_READ_CF)
#define BA (EFLAGS_WRITE_6|EFLAGS_READ_AF)
#define BD (EFLAGS_WRITE_6|EFLAGS_READ_CF|EFLAGS_READ_AF)
#define B6 (EFLAGS_WRITE_6|EFLAGS_READ_6)
#define EFLAGS_6_ESCAPE -1
#define EFLAGS_6_SPECIAL -2
#define E EFLAGS_6_ESCAPE
#define S EFLAGS_6_SPECIAL
static const int eflags_6[256] = {
W6,W6,W6,W6, W6,W6, x, x, W6,W6,W6,W6, W6,W6, x, E, /* 0 */
BC,BC,BC,BC, BC,BC, x, x, BC,BC,BC,BC, BC,BC, x, x, /* 1 */
W6,W6,W6,W6, W6,W6, x,BD, W6,W6,W6,W6, W6,W6, x,BD, /* 2 */
W6,W6,W6,W6, W6,W6, x,BA, W6,W6,W6,W6, W6,W6, x,BA, /* 3 */
W5,W5,W5,W5, W5,W5,W5,W5, W5,W5,W5,W5, W5,W5,W5,W5, /* 4 */
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, /* 5 */
x, x, x,WZ, x, x, x, x, x,W6, x,W6, x, x, x, x, /* 6 */
RO,RO,RC,RC, RZ,RZ,RB,RB, RS,RS,RP,RP, RL,RL,RE,RE, /* 7 */
S, S, S, S, W6,W6, x, x, x, x, x, x, x, x, x, x, /* 8 */
x, x, x, x, x, x, x, x, x, x, x, x, R6,W6,W5O,R5O, /* 9 */
x, x, x, x, x, x,W6,W6, W6,W6, x, x, x, x,W6,W6, /* A */
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, /* B */
S, S, x, x, x, x, x, x, x, x, x, x, R6,R6,R6,W6, /* C */
S, S, S, S, W6,W6, x, x, x, x, S, S, x, x, x, S, /* D */
RZ,RZ, x, x, x, x, x, x, x, x, x, x, x, x, x, x, /* E */
x, x, x, x, x,WC, S, S, WC,WC, x, x, x, x, S, S, /* F */
};
/* Same as eflags_6 table, but for 2nd byte of 0x0f extension opcodes
*/
static const int escape_eflags_6[256] = {
x, x,WZ,WZ, x, x, x, x, x, x, x, x, x, x, x, x, /* 0 */
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, /* 1 */
W6,W6,W6,W6, x, x, x, x, x, x, x, x, x, x,W6,W6, /* 2 */
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, /* 3 */
RO,RO,RC,RC, RZ,RZ,RB,RB, RS,RS,RP,RP, RL,RL,RE,RE, /* 4 */
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, /* 5 */
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, /* 6 */
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, /* 7 */
RO,RO,RC,RC, RZ,RZ,RB,RB, RS,RS,RP,RP, RL,RL,RE,RE, /* 8 */
RO,RO,RC,RC, RZ,RZ,RB,RB, RS,RS,RP,RP, RL,RL,RE,RE, /* 9 */
x, x, x,W6, W6,W6, x, x, x, x,W6,W6, W6,W6, x,W6, /* A */
W6,W6, x,W6, x, x, x, x, x, x,W6,W6, W6,W6, x, x, /* B */
W6,W6, x, x, x, x, x,WZ, x, x, x, x, x, x, x, x, /* C */
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, /* D */
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, /* E */
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, /* F */
};
#undef x
#undef RC
#undef RP
#undef RZ
#undef RS
#undef RO
#undef R6
#undef RB
#undef RL
#undef RE
#undef R5O
#undef WC
#undef WZ
#undef W6
#undef W5
#undef W5O
#undef BC
#undef BA
#undef BD
#undef B6
#undef E
#undef S
/* This routine converts a signed 8-bit offset into a target pc. The
* formal parameter pc should point to the beginning of the branch
* instruction containing the offset and having length len in bytes.
* The x86 architecture calculates offsets from the beginning of the
* instruction following the branch. */
static app_pc
convert_8bit_offset(byte *pc, byte offset, uint len)
{
return ((app_pc)pc) + (((int)(offset << 24)) >> 24) + len;
}
/* Decodes only enough of the instruction at address pc to determine
* its size, its effects on the 6 arithmetic eflags, and whether it is
* a control-transfer instruction. If it is, the operands fields of
* instr are filled in. If not, only the raw bits fields of instr are
* filled in. This corresponds to a Level 3 decoding for control
* transfer instructions but a Level 1 decoding plus arithmetic eflags
* information for all other instructions.
*
* Fills in the PREFIX_SEG_GS and PREFIX_SEG_FS prefix flags for all instrs.
* Does NOT fill in any other prefix flags unless this is a cti instr
* and the flags affect the instr.
*
* Assumes that instr is already initialized, but uses the x86/x64 mode
* for the current thread rather than that set in instr.
* If caller is re-using same instr struct over multiple decodings,
* should call instr_reset or instr_reuse.
* Returns the address of the byte following the instruction.
* Returns NULL on decoding an invalid instr and sets opcode to OP_INVALID.
*/
byte *
decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
{
byte byte0, byte1;
byte *start_pc = pc;
/* find and remember the instruction and its size */
int prefixes;
/* next two needed for eflags analysis */
int eflags;
int i;
byte modrm = 0; /* used only for EFLAGS_6_SPECIAL */
#ifdef X64
/* PR 251479: we need to know about all rip-relative addresses.
* Since change/setting raw bits invalidates, we must set this
* on every return. */
uint rip_rel_pos;
#endif
int sz = decode_sizeof(dcontext, pc, &prefixes _IF_X64(&rip_rel_pos));
if (sz == 0) {
/* invalid instruction! */
instr_set_opcode(instr, OP_INVALID);
return NULL;
}
instr_set_opcode(instr, OP_UNDECODED);
IF_X64(instr_set_x86_mode(instr, get_x86_mode(dcontext)));
/* we call instr_set_raw_bits on every return from here, not up
* front, because any instr_set_src, instr_set_dst, or
* instr_set_opcode will kill original bits state */
/* Fill in SEG_FS and SEG_GS override prefixes, ignore rest for now.
* We rely on having these set during bb building.
* FIXME - could be done in decode_sizeof which is already walking these
* bytes, but would need to complicate its interface and prefixes are
* fairly rare to begin with. */
if (prefixes > 0) {
for (i = 0; i < prefixes; i++, pc++) {
switch (*pc) {
case FS_SEG_OPCODE:
instr_set_prefix_flag(instr, PREFIX_SEG_FS);
break;
case GS_SEG_OPCODE:
instr_set_prefix_flag(instr, PREFIX_SEG_GS);
break;
default:
break;
}
}
}
byte0 = *pc;
byte1 = *(pc + 1);
/* eflags analysis
* we do this even if -unsafe_ignore_eflags b/c it doesn't cost that
* much and we can use the analysis to detect any bb that reads a flag
* prior to writing it
*/
eflags = eflags_6[byte0];
if (eflags == EFLAGS_6_ESCAPE) {
eflags = escape_eflags_6[byte1];
if (eflags == EFLAGS_6_SPECIAL)
modrm = *(pc + 2);
} else if (eflags == EFLAGS_6_SPECIAL) {
modrm = byte1;
}
if (eflags == EFLAGS_6_SPECIAL) {
/* a number of cases exist beyond the ability of 2 tables
* to distinguish
*/
int opc_ext = (modrm >> 3) & 7; /* middle 3 bits */
if (byte0 <= 0x84) {
/* group 1* (80-83): all W6 except /2,/3=B */
if (opc_ext == 2 || opc_ext == 3)
eflags = EFLAGS_WRITE_6|EFLAGS_READ_CF;
else
eflags = EFLAGS_WRITE_6;
} else if (byte0 <= 0xd3) {
/* group 2* (c0,c1,d0-d3): /0,/1=WC|WO, /2,/3=WC|WO|RC, /4,/5,/7=W6 */
if (opc_ext == 0 || opc_ext == 1)
eflags = EFLAGS_WRITE_CF|EFLAGS_WRITE_OF;
else if (opc_ext == 2 || opc_ext == 3)
eflags = EFLAGS_WRITE_CF|EFLAGS_WRITE_OF|EFLAGS_READ_CF;
else if (opc_ext == 4 || opc_ext == 5 || opc_ext == 7)
eflags = EFLAGS_WRITE_6;
else
eflags = 0;
} else if (byte0 <= 0xdf) {
/* floats: dac0-dadf and dbc0-dbdf = RC|RP|RZ */
if ((byte0 == 0xda || byte0 == 0xdb) &&
modrm >= 0xc0 && modrm <= 0xdf)
eflags = EFLAGS_READ_CF|EFLAGS_READ_PF|EFLAGS_READ_ZF;
/* floats: dbe8-dbf7 and dfe8-dff7 = WC|WP|WZ */
else if ((byte0 == 0xdb || byte0 == 0xdf) &&
modrm >= 0xe8 && modrm <= 0xf7)
eflags = EFLAGS_WRITE_CF|EFLAGS_WRITE_PF|EFLAGS_WRITE_ZF;
else
eflags = 0;
} else if (byte0 <= 0xf7) {
/* group 3a (f6) & 3b (f7): all W except /2 (OP_not) */
if (opc_ext == 2)
eflags = 0;
else
eflags = EFLAGS_WRITE_6;
} else {
/* group 4 (fe) & 5 (ff): /0,/1=W5 */
if (opc_ext == 0 || opc_ext == 1)
eflags = EFLAGS_WRITE_6 & (~EFLAGS_WRITE_CF);
else
eflags = 0;
}
}
instr->eflags = eflags;
instr_set_arith_flags_valid(instr, true);
if (interesting[byte0] == 0) {
/* assumption: opcode already OP_UNDECODED */
/* assumption: operands are already marked invalid (instr was reset) */
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (start_pc + sz);
}
/* FIXME: would further "interesting" table produce any noticeable
* performance improvement?
*/
if (prefixes > 0) {
/* prefixes are rare on ctis
* rather than handle them all here, just do full decode
* FIXME: if we start to see more and more jcc branch hints we
* may change our minds here! This is case 211206/6749.
*/
if (decode(dcontext, start_pc, instr) == NULL)
return NULL;
else
return (start_pc + sz);
}
#ifdef FOOL_CPUID
/* for fooling program into thinking hardware is different than it is */
if (byte0==0x0f && byte1==0xa2) { /* cpuid */
instr_set_opcode(instr, OP_cpuid);
/* don't bother to set dsts/srcs */
instr_set_operands_valid(instr, false);
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (start_pc + sz);
}
#endif
/* prefixes won't make a difference for 8-bit-offset jumps */
if (byte0 == 0xeb) { /* jmp_short */
app_pc tgt = convert_8bit_offset(pc, byte1, 2);
instr_set_opcode(instr, OP_jmp_short);
instr_set_num_opnds(dcontext, instr, 0, 1);
instr_set_target(instr, opnd_create_pc(tgt));
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (pc + 2);
}
if ((byte0 & 0xf0) == 0x70) { /* jcc_short */
/* 2-byte pc-relative jumps with an 8-bit displacement */
app_pc tgt = convert_8bit_offset(pc, byte1, 2);
/* Set the instr's opcode field. Relies on special ordering
* in opcode enum. */
instr_set_opcode(instr, OP_jo_short + (byte0 & 0x0f));
/* calculate the branch's target address */
instr_set_num_opnds(dcontext, instr, 0, 1);
instr_set_target(instr, opnd_create_pc(tgt));
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (pc + 2);
}
if (byte0 == 0xe8) { /* call */
int offset = *((int *)(pc + 1));
app_pc tgt = pc + offset + 5;
instr_set_opcode(instr, OP_call);
instr_set_num_opnds(dcontext, instr, 2, 2);
instr_set_target(instr, opnd_create_pc(tgt));
instr_set_src(instr, 1, opnd_create_reg(REG_XSP));
instr_set_dst(instr, 0, opnd_create_reg(REG_XSP));
instr_set_dst(instr, 1, opnd_create_base_disp
(REG_XSP, REG_NULL, 0, 0,
resolve_variable_size_dc(dcontext, 0, OPSZ_call, false)));
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (pc + 5);
}
if (byte0 == 0xe9) { /* jmp */
int offset = *((int *)(pc + 1));
app_pc tgt = pc + offset + 5;
instr_set_opcode(instr, OP_jmp);
instr_set_num_opnds(dcontext, instr, 0, 1);
instr_set_target(instr, opnd_create_pc(tgt));
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (pc + 5);
}
if ((byte0 == 0x0f) && ((byte1 & 0xf0) == 0x80)) { /* jcc */
/* 6-byte pc-relative jumps with a 32-bit displacement */
/* calculate the branch's target address */
int offset = *((int *)(pc + 2));
app_pc tgt = pc + offset + 6;
/* Set the instr's opcode field. Relies on special ordering
* in opcode enum. */
instr_set_opcode(instr, OP_jo + (byte1 & 0x0f));
instr_set_num_opnds(dcontext, instr, 0, 1);
instr_set_target(instr, opnd_create_pc(tgt));
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (pc + 6);
}
if (byte0 == 0xff) { /* check for indirect calls/branches */
/* dispatch based on bits 5,4,3 in mod_rm byte */
uint opc = (byte1 >> 3) & 0x7;
if (opc >= 2 && opc <= 5) {
/* this is an indirect jump or call */
/* we care about the operands and prefixes, so just do the full decode
*/
if (decode(dcontext, start_pc, instr) == NULL)
return NULL;
else
return (start_pc + sz);
}
/* otherwise it wasn't an indirect branch so continue */
}
if ((byte0 & 0xf0) == 0xc0) { /* check for returns */
byte nibble1 = (byte) (byte0 & 0x0f);
switch (nibble1) {
case 2: /* ret w/ 2-byte immed */
case 0xa: /* far ret w/ 2-byte immed */
/* we bailed out to decode() earlier if any prefixes */
CLIENT_ASSERT(prefixes == 0, "decode_cti: internal prefix error");
instr_set_opcode(instr, nibble1 == 2 ? OP_ret : OP_ret_far);
instr_set_num_opnds(dcontext, instr, 1, 3);
instr_set_dst(instr, 0, opnd_create_reg(REG_XSP));
instr_set_src(instr, 0,
opnd_create_immed_int(*((short*)(pc+1)), OPSZ_2));
instr_set_src(instr, 1, opnd_create_reg(REG_XSP));
instr_set_src(instr, 2, opnd_create_base_disp
(REG_XSP, REG_NULL, 0, 0,
resolve_variable_size_dc(dcontext, 0,
nibble1 == 2 ? OPSZ_ret :
OPSZ_REXVARSTACK, false)));
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (pc + 3);
case 3: /* ret w/ no immed */
instr_set_opcode(instr, OP_ret);
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
/* we don't set any operands and leave to an up-decode for that */
return (pc + 1);
case 0xb: /* far ret w/ no immed */
instr_set_opcode(instr, OP_ret_far);
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
/* we don't set any operands and leave to an up-decode for that */
return (pc + 1);
}
/* otherwise it wasn't a return so continue */
}
if ((byte0 & 0xf0) == 0xe0) { /* check for a funny 8-bit branch */
byte nibble1 = (byte) (byte0 & 0x0f);
/* determine the opcode */
if (nibble1 == 0) { /* loopne */
instr_set_opcode(instr, OP_loopne);
} else if (nibble1 == 1) { /* loope */
instr_set_opcode(instr, OP_loope);
} else if (nibble1 == 2) { /* loop */
instr_set_opcode(instr, OP_loop);
} else if (nibble1 == 3) { /* jecxz */
instr_set_opcode(instr, OP_jecxz);
} else if (nibble1 == 10) { /* jmp_far */
/* we need prefix info (data size controls immediate offset size),
* this is rare so go ahead and do full decode
*/
if (decode(dcontext, start_pc, instr) == NULL)
return NULL;
else
return (start_pc + sz);
}
if (instr_opcode_valid(instr)) {
/* calculate the branch's target address */
app_pc tgt = convert_8bit_offset(pc, byte1, 2);
/* all (except jmp far) use ecx as a source */
instr_set_num_opnds(dcontext, instr, 0, 2);
/* if we made it here, no addr prefix, so REG_XCX not REG_ECX or REG_CX */
CLIENT_ASSERT(prefixes == 0, "decoding internal inconsistency");
instr_set_src(instr, 1, opnd_create_reg(REG_XCX));
instr_set_target(instr, opnd_create_pc(tgt));
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (pc + 2);
}
/* otherwise it wasn't a funny 8-bit cbr so continue */
}
if (byte0 == 0x9a) { /* check for far-absolute calls */
/* we need prefix info, this is rare so we do a full decode
*/
if (decode(dcontext, start_pc, instr) == NULL)
return NULL;
else
return (start_pc + sz);
}
/* both win32 and linux want to know about interrupts */
if (byte0 == 0xcd) { /* int */
instr_set_opcode(instr, OP_int);
instr_set_num_opnds(dcontext, instr, 2, 2);
instr_set_dst(instr, 0, opnd_create_reg(REG_XSP));
instr_set_dst(instr, 1,
opnd_create_base_disp(REG_XSP, REG_NULL, 0, 0, OPSZ_4));
instr_set_src(instr, 0, opnd_create_immed_int((char)byte1, OPSZ_1));
instr_set_src(instr, 1, opnd_create_reg(REG_XSP));
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (pc + 2);
}
/* sys{enter,exit,call,ret} */
if (byte0 == 0x0f &&
(byte1 == 0x34 || byte1 == 0x35 || byte1 == 0x05 || byte1 == 0x07)) {
if (byte1 == 0x34) {
instr_set_opcode(instr, OP_sysenter);
instr_set_num_opnds(dcontext, instr, 1, 0);
instr_set_dst(instr, 0, opnd_create_reg(REG_XSP));
} else if (byte1 == 0x34) {
instr_set_opcode(instr, OP_sysexit);
instr_set_num_opnds(dcontext, instr, 1, 0);
instr_set_dst(instr, 0, opnd_create_reg(REG_XSP));
} else if (byte1 == 0x05) {
instr_set_opcode(instr, OP_syscall);
instr_set_num_opnds(dcontext, instr, 1, 0);
instr_set_dst(instr, 0, opnd_create_reg(REG_XCX));
} else if (byte1 == 0x07) {
instr_set_opcode(instr, OP_sysret);
instr_set_num_opnds(dcontext, instr, 0, 0);
}
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (pc + 2);
}
/* iret */
if (byte0 == 0xcf) {
instr_set_opcode(instr, OP_iret);
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (pc + 1);
}
#ifdef UNIX
/* mov_seg instruction detection for i#107: mangling seg update/query. */
if (INTERNAL_OPTION(mangle_app_seg) && (byte0 == 0x8c || byte0 == 0x8e)) {
instr_set_opcode(instr, OP_mov_seg);
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
return (start_pc + sz);
}
#endif
/* i#698: we must intercept floating point instruction pointer saves.
* Rare enough that we do a full decode on an opcode match.
*/
if ((byte0 == 0xdd && ((byte1 >> 3) & 0x7) == 6) /* dd /6 == OP_fnsave */ ||
(byte0 == 0xd9 && ((byte1 >> 3) & 0x7) == 6) /* d9 /6 == OP_fnstenv */) {
if (decode(dcontext, start_pc, instr) == NULL)
return NULL;
else
return (start_pc + sz);
} else if (byte0 == 0x0f && byte1 == 0xae) {
int opc_ext;
byte byte3 = *(pc + 2);
opc_ext = (byte3 >> 3) & 0x7;
if (opc_ext == 0 || /* 0f ae /0 == OP_fxsave */
opc_ext == 4 || /* 0f ae /4 == OP_xsave */
opc_ext == 6) { /* 0f ae /6 == OP_xsaveopt */
}
if (decode(dcontext, start_pc, instr) == NULL)
return NULL;
else
return (start_pc + sz);
}
/* all non-pc-relative instructions */
/* assumption: opcode already OP_UNDECODED */
instr_set_raw_bits(instr, start_pc, sz);
IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
/* assumption: operands are already marked invalid (instr was reset) */
return (start_pc + sz);
}
/* Returns a pointer to the pc of the next instruction
* Returns NULL on decoding an invalid instruction.
*/
byte *
decode_next_pc(dcontext_t *dcontext, byte *pc)
{
int sz = decode_sizeof(dcontext, pc, NULL _IF_X64(NULL));
if (sz == 0)
return NULL;
else
return pc + sz;
}
/* Decodes the size of the instruction at address pc and points instr
* at the raw bits for the instruction.
* This corresponds to a Level 1 decoding.
* Assumes that instr is already initialized, but uses the x86/x64 mode
* for the current thread rather than that set in instr.
* If caller is re-using same instr struct over multiple decodings,
* should call instr_reset or instr_reuse.
* Returns the address of the next byte after the decoded instruction.
* Returns NULL on decoding an invalid instr and sets opcode to OP_INVALID.
*/
byte *
decode_raw(dcontext_t *dcontext, byte *pc, instr_t *instr)
{
int sz = decode_sizeof(dcontext, pc, NULL _IF_X64(NULL));
IF_X64(instr_set_x86_mode(instr, get_x86_mode(dcontext)));
if (sz == 0) {
/* invalid instruction! */
instr_set_opcode(instr, OP_INVALID);
return NULL;
}
instr_set_opcode(instr, OP_UNDECODED);
instr_set_raw_bits(instr, pc, sz);
/* assumption: operands are already marked invalid (instr was reset) */
return (pc + sz);
}