blob: d316e4e421e51a2eab47c2df94ec3c253b92773b [file] [log] [blame]
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "x86_decode.h"
namespace playground {
#if defined(__x86_64__) || defined(__i386__)
unsigned short next_inst(const char **ip, bool is64bit, bool *has_prefix,
char **rex_ptr, char **mod_rm_ptr, char **sib_ptr,
bool *is_group) {
enum {
BYTE_OP = (1<<1), // 0x02
IMM = (1<<2), // 0x04
IMM_BYTE = (2<<2), // 0x08
MEM_ABS = (3<<2), // 0x0C
MODE_MASK = (7<<2), // 0x1C
MOD_RM = (1<<5), // 0x20
STACK = (1<<6), // 0x40
GROUP = (1<<7), // 0x80
GROUP_MASK = 0x7F,
};
static unsigned char opcode_types[512] = {
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x00 - 0x07
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x00, // 0x08 - 0x0F
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x10 - 0x17
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x18 - 0x1F
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x20 - 0x27
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x28 - 0x2F
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x30 - 0x37
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x38 - 0x3F
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x40 - 0x47
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x48 - 0x4F
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x50 - 0x57
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x58 - 0x5F
0x01, 0x01, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0x60 - 0x67
0x45, 0x25, 0x49, 0x29, 0x03, 0x01, 0x03, 0x01, // 0x68 - 0x6F
0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x70 - 0x77
0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x78 - 0x7F
0x27, 0x25, 0x27, 0x29, 0x23, 0x21, 0x23, 0x21, // 0x80 - 0x87
0x23, 0x21, 0x23, 0x21, 0x21, 0x21, 0x21, 0x80, // 0x88 - 0x8F
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x90 - 0x97
0x01, 0x01, 0x05, 0x01, 0x41, 0x41, 0x01, 0x01, // 0x98 - 0x9F
0x0F, 0x0D, 0x0F, 0x0D, 0x03, 0x01, 0x03, 0x01, // 0xA0 - 0xA7
0x09, 0x05, 0x03, 0x01, 0x03, 0x01, 0x03, 0x01, // 0xA8 - 0xAF
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // 0xB0 - 0xB7
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0xB8 - 0xBF
0x27, 0x29, 0x01, 0x01, 0x21, 0x21, 0x27, 0x25, // 0xC0 - 0xC7
0x01, 0x01, 0x01, 0x01, 0x01, 0x09, 0x01, 0x01, // 0xC8 - 0xCF
0x23, 0x21, 0x23, 0x21, 0x09, 0x09, 0x01, 0x01, // 0xD0 - 0xD7
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xD8 - 0xDF
0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0xE0 - 0xE7
0x05, 0x05, 0x05, 0x09, 0x03, 0x01, 0x03, 0x01, // 0xE8 - 0xEF
0x00, 0x01, 0x00, 0x00, 0x01, 0x01, 0x88, 0x90, // 0xF0 - 0xF7
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x98, 0xA0, // 0xF8 - 0xFF
0x00, 0xA8, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, // 0xF00 - 0xF07
0x01, 0x01, 0x00, 0x01, 0x00, 0x21, 0x01, 0x00, // 0xF08 - 0xF0F
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF10 - 0xF17
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF18 - 0xF1F
0x21, 0x21, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0xF20 - 0xF27
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF28 - 0xF2F
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, // 0xF30 - 0xF37
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF38 - 0xF3F
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF40 - 0xF47
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF48 - 0xF4F
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF50 - 0xF57
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF58 - 0xF5F
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF60 - 0xF67
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF68 - 0xF6F
0x21, 0x00, 0x00, 0x00, 0x21, 0x21, 0x21, 0x00, // 0xF70 - 0xF77
0x21, 0x21, 0x00, 0x00, 0x21, 0x21, 0x21, 0x21, // 0xF78 - 0xF7F
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF80 - 0xF87
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF88 - 0xF8F
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF90 - 0xF97
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF98 - 0xF9F
0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x00, 0x00, // 0xFA0 - 0xFA7
0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x21, 0x21, // 0xFA8 - 0xFAF
0x23, 0x21, 0x00, 0x21, 0x00, 0x00, 0x23, 0x21, // 0xFB0 - 0xFB7
0x21, 0x00, 0x29, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFB8 - 0xFBF
0x21, 0x21, 0x00, 0x21, 0x00, 0x00, 0x00, 0x21, // 0xFC0 - 0xFC7
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xFC8 - 0xFCF
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD0 - 0xFD7
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD8 - 0xFDF
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE0 - 0xFE7
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE8 - 0xFEF
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF0 - 0xFF7
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF8 - 0xFFF
};
static unsigned char group_table[56] = {
0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 1A
0x27, 0x27, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, // Group 3 (Byte)
0x25, 0x25, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // Group 3
0x23, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 4
0x21, 0x21, 0x61, 0x21, 0x61, 0x21, 0x61, 0x00, // Group 5
0x00, 0x00, 0x21, 0x21, 0x21, 0x00, 0x21, 0x23, // Group 7
0x21, 0x00, 0x00, 0x21, 0x21, 0x00, 0x21, 0x00, // Group 7 (Alternate)
};
const unsigned char *insn_ptr = reinterpret_cast<const unsigned char *>(*ip);
int operand_width = 4;
int address_width = 4;
if (is64bit) {
address_width = 8;
}
unsigned char byte, rex = 0;
bool found_prefix = false;
if (rex_ptr) {
*rex_ptr = 0;
}
if (mod_rm_ptr) {
*mod_rm_ptr = 0;
}
if (sib_ptr) {
*sib_ptr = 0;
}
for (;; ++insn_ptr) {
switch (byte = *insn_ptr) {
case 0x66: // Operand width prefix
operand_width ^= 6;
break;
case 0x67: // Address width prefix
address_width ^= is64bit ? 12 : 6;
break;
case 0x26: // Segment selector prefixes
case 0x2e:
case 0x36:
case 0x3e:
case 0x64:
case 0x65:
case 0xF0:
case 0xF2:
case 0xF3:
break;
case 0x40: case 0x41: case 0x42: case 0x43: // 64 bit REX prefixes
case 0x44: case 0x45: case 0x46: case 0x47:
case 0x48: case 0x49: case 0x4A: case 0x4B:
case 0x4C: case 0x4D: case 0x4E: case 0x4F:
if (is64bit) {
if (rex_ptr) {
*rex_ptr = (char *)insn_ptr;
}
rex = byte;
found_prefix = true;
continue;
}
// fall through
default:
++insn_ptr;
goto no_more_prefixes;
}
rex = 0;
found_prefix = true;
}
no_more_prefixes:
if (has_prefix) {
*has_prefix = found_prefix;
}
if (rex & REX_W) {
operand_width = 8;
}
unsigned char type;
unsigned short insn = byte;
unsigned int idx = 0;
if (byte == 0x0F) {
byte = *insn_ptr++;
insn = (insn << 8) | byte;
idx = 256;
}
type = opcode_types[idx + byte];
bool found_mod_rm = false;
bool found_group = false;
bool found_sib = false;
unsigned char mod_rm = 0;
unsigned char sib = 0;
if (type & GROUP) {
found_mod_rm = true;
found_group = true;
mod_rm = *insn_ptr;
if (mod_rm_ptr) {
*mod_rm_ptr = (char *)insn_ptr;
}
unsigned char group = (type & GROUP_MASK) + ((mod_rm >> 3) & 0x7);
if ((type & GROUP_MASK) == 40 && (mod_rm >> 6) == 3) {
group += 8;
}
type = group_table[group];
}
if (!type) {
// We know that we still don't decode some of the more obscure
// instructions, but for all practical purposes that doesn't matter.
// Compilers are unlikely to output them, and even if we encounter
// hand-coded assembly, we will soon synchronize to the instruction
// stream again.
//
// std::cerr << "Unsupported instruction at 0x" << std::hex <<
// std::uppercase << reinterpret_cast<long>(*ip) << " [ ";
// for (const unsigned char *ptr =
// reinterpret_cast<const unsigned char *>(*ip);
// ptr < insn_ptr; ) {
// std::cerr << std::hex << std::uppercase << std::setw(2) <<
// std::setfill('0') << (unsigned int)*ptr++ << ' ';
// }
// std::cerr << "]" << std::endl;
} else {
if (is64bit && (type & STACK)) {
operand_width = 8;
}
if (type & MOD_RM) {
found_mod_rm = true;
if (mod_rm_ptr) {
*mod_rm_ptr = (char *)insn_ptr;
}
mod_rm = *insn_ptr++;
int mod = (mod_rm >> 6) & 0x3;
int rm = 8*(rex & REX_B) + (mod_rm & 0x7);
if (mod != 3) {
if (address_width == 2) {
switch (mod) {
case 0:
if (rm != 6 /* SI */) {
break;
}
// fall through
case 2:
insn_ptr++;
// fall through
case 1:
insn_ptr++;
break;
}
} else {
if ((rm & 0x7) == 4) {
found_sib = true;
if (sib_ptr) {
*sib_ptr = (char *)insn_ptr;
}
sib = *insn_ptr++;
if (!mod && (sib & 0x7) == 5 /* BP */) {
insn_ptr += 4;
}
}
switch (mod) {
case 0:
if (rm != 5 /* BP */) {
break;
}
// fall through
case 2:
insn_ptr += 3;
// fall through
case 1:
insn_ptr++;
break;
}
}
}
}
switch (insn) {
case 0xC8: // ENTER
insn_ptr++;
// fall through
case 0x9A: // CALL (far)
case 0xC2: // RET (near)
case 0xCA: // LRET
case 0xEA: // JMP (far)
insn_ptr += 2;
break;
case 0xF80: case 0xF81: case 0xF82: case 0xF83: // Jcc (rel)
case 0xF84: case 0xF85: case 0xF86: case 0xF87:
case 0xF88: case 0xF89: case 0xF8A: case 0xF8B:
case 0xF8C: case 0xF8D: case 0xF8E: case 0xF8F:
insn_ptr += operand_width;
break;
}
switch (type & MODE_MASK) {
case IMM:
if (!(type & BYTE_OP)) {
switch (insn) {
case 0xB8: case 0xB9: case 0xBA: case 0xBB:
case 0xBC: case 0xBD: case 0xBE: case 0xBF:
// Allow MOV to/from 64bit addresses
insn_ptr += operand_width;
break;
default:
insn_ptr += (operand_width == 8) ? 4 : operand_width;
break;
}
break;
}
// fall through
case IMM_BYTE:
insn_ptr++;
break;
case MEM_ABS:
insn_ptr += address_width;
break;
}
}
if (is_group) {
*is_group = found_group;
}
*ip = reinterpret_cast<const char *>(insn_ptr);
(void)found_mod_rm;
(void)found_sib;
return insn;
}
#endif
} // namespace