blob: c32c0e83e9be8d53983d94de68fa79f93f019c29 [file] [log] [blame]
// Copyright 2012 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// A class that attempts to disassemble a function.
#ifndef SYZYGY_CORE_DISASSEMBLER_H_
#define SYZYGY_CORE_DISASSEMBLER_H_
#include <stdint.h>
#include <set>
#include "base/callback.h"
#include "syzygy/core/address.h"
#include "syzygy/core/address_space.h"
#include "distorm.h" // NOLINT
namespace core {
class Disassembler {
public:
typedef std::set<AbsoluteAddress> AddressSet;
typedef core::AddressSpace<AbsoluteAddress, size_t, uint8_t> VisitedSpace;
enum CallbackDirective {
// Indicates that the disassembler should continue.
kDirectiveContinue,
// Indicates that the disassembler should terminate its current
// path in the walk, and continue at the next unvisited location.
kDirectiveTerminatePath,
// Indicates that the disassembler should halt all disassembly.
kDirectiveTerminateWalk,
// Indicate that the disassembler should terminate with an error.
kDirectiveAbort
};
// The instruction callback is invoked for each instruction the disassembler
// encounters. The callback receives three parameters:
// 1. const Disassembler& disasm the disassembler.
// 2. const _DInst& inst the current instruction.
// Returns a CallbackDirective telling the disassembler how to proceed.
typedef base::Callback<CallbackDirective(const Disassembler&,
const _DInst&)>
InstructionCallback;
enum WalkResult {
// Error during walk - e.g. function is not in our PEImageFile
// or the section is not code, or the OnInstruction callback indicated an
// error status.
kWalkError,
// Walk was successful and complete.
kWalkSuccess,
// Walk was incomplete, e.g. it encountered a computed branch or
// similar, so may not have traversed every branch of the function.
kWalkIncomplete,
// Walk was terminated.
kWalkTerminated,
};
// These flag values are passed to OnEndInstructionRun.
enum ControlFlowFlag {
// The instruction run ends with an explicit termination of control flow.
kControlFlowTerminates,
// The instruction implicitly flows into the next instruction run.
kControlFlowContinues,
};
Disassembler(const uint8_t* code,
size_t code_size,
AbsoluteAddress code_addr,
const InstructionCallback& on_instruction);
Disassembler(const uint8_t* code,
size_t code_size,
AbsoluteAddress code_addr,
const AddressSet& entry_points,
const InstructionCallback& on_instruction);
virtual ~Disassembler();
// Add addr to unvisited set.
// @returns true iff addr is unvisited.
// @pre IsInCode(addr, 1).
bool Unvisited(AbsoluteAddress addr);
// Attempts to walk function from known entry points.
// Invokes callback for every instruction as it's encountered.
// @returns the results of the walk.
// @note the instructions may be encountered in any order, as the
// disassembler follows the code's control flow.
virtual WalkResult Walk();
// @name Accessors.
// @{
const uint8_t* code() const { return code_; }
size_t code_size() const { return code_size_; }
const AbsoluteAddress code_addr() const { return code_addr_; }
const AddressSet& unvisited() const { return unvisited_; }
const VisitedSpace& visited() const { return visited_; }
size_t disassembled_bytes() const { return disassembled_bytes_; }
// @}
protected:
// Called every time a basic instruction is hit.
// @param addr is the address of the branch instruction itself.
// @param inst is the disassembled instruction data.
// @returns kWalkContinue on success or kWalkError on failure.
virtual CallbackDirective OnInstruction(AbsoluteAddress addr,
const _DInst& inst);
// Called every time a branch instruction is hit.
// @param addr is the address of the branch instruction itself.
// @param inst is the disassembled instruction data.
// @param dest is the destination address of the branch instruction.
// @returns kWalkContinue on success or kWalkError on failure.
virtual CallbackDirective OnBranchInstruction(AbsoluteAddress addr,
const _DInst& inst,
AbsoluteAddress dest);
// Called every time disassembly is started from a new address. Will be
// called at least once if unvisited_ is non-empty.
// @param start_address denotes the beginning of the instruction run.
// @returns kWalkContinue on success or kWalkError on failure.
virtual CallbackDirective OnStartInstructionRun(
AbsoluteAddress start_address);
// Called on every disassembled instruction.
// @param addr is the address of the instruction that terminates the run.
// @param inst is the terminating instruction.
// @param control_flow a flag denoting whether control flow terminates
// for this instruction run, or flows into the next instruction run.
// @returns kWalkContinue on success or kWalkError on failure.
virtual CallbackDirective OnEndInstructionRun(AbsoluteAddress addr,
const _DInst& inst,
ControlFlowFlag control_flow);
// Called when disassembly is complete and no further entry points remain
// to disassemble from.
// @returns kWalkContinue on success or kWalkError on failure.
virtual CallbackDirective OnDisassemblyComplete();
// Wrapper function to handle invoking both the internal and external
// OnInstruction() callbacks.
// @param addr is the address of the current instruction.
// @param inst is the instruction.
CallbackDirective NotifyOnInstruction(AbsoluteAddress addr,
const _DInst& inst);
// @returns true iff the range [addr ... addr + len) is in the function.
bool IsInBlock(AbsoluteAddress addr) const;
// The code we refer to.
const uint8_t* code_;
const size_t code_size_;
// The original address of the first byte of code_.
const AbsoluteAddress code_addr_;
// Invoke this callback on every instruction.
InstructionCallback on_instruction_;
// Unvisited instruction locations before and during a walk.
// This is seeded by the code entry point(s), and will also contain
// branch targets during disassembly.
AddressSet unvisited_;
// Each visited instruction is stored as a range in this space.
VisitedSpace visited_;
// Number of bytes disassembled to this point during walk.
size_t disassembled_bytes_;
};
} // namespace core
#endif // SYZYGY_CORE_DISASSEMBLER_H_