blob: 2dc5e5f87c8f0a2576f5d3c43b128ddbe957be62 [file] [log] [blame]
// Copyright 2012 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Implementation of basic block decomposer.
//
// TODO(rogerm): Refactor this to just do a straight disassembly of all bytes
// (up to the start of embedded data: jump and case tables) to a list of
// instructions, then chop up the instruction list into basic blocks in
// a second pass, splicing the instructions into the basic-block instruction
// lists and generating successors.
#include "syzygy/block_graph/basic_block_decomposer.h"
#include <algorithm>
#include <vector>
#include "base/logging.h"
#include "base/stringprintf.h"
#include "syzygy/block_graph/basic_block.h"
#include "syzygy/block_graph/basic_block_subgraph.h"
#include "syzygy/block_graph/block_graph.h"
#include "syzygy/block_graph/block_util.h"
#include "mnemonics.h" // NOLINT
namespace block_graph {
namespace {
using block_graph::BasicBlock;
using block_graph::BasicBlockReference;
using block_graph::BasicBlockReferrer;
using block_graph::BasicBlockSubGraph;
using block_graph::BlockGraph;
using block_graph::Instruction;
using block_graph::Successor;
using core::Disassembler;
typedef BlockGraph::Block Block;
typedef BlockGraph::Offset Offset;
typedef BlockGraph::Size Size;
typedef core::AddressSpace<Offset, size_t, BasicBlock*> BBAddressSpace;
typedef BBAddressSpace::Range Range;
typedef BBAddressSpace::RangeMap RangeMap;
typedef BBAddressSpace::RangeMapConstIter RangeMapConstIter;
typedef BBAddressSpace::RangeMapIter RangeMapIter;
// We use a (somewhat) arbitrary value as the disassembly address for a block
// so we can tell the difference between a reference to the beginning of the
// block (offset=0) and a null address.
const size_t kDisassemblyAddress = 65536;
// Look up the reference made from an instruction's byte range within the
// given block. The reference should start AFTER the instruction starts
// and there should be exactly 1 reference in the byte range.
// Returns true if the reference was found, false otherwise.
bool GetReferenceOfInstructionAt(const Block* block,
Offset instr_offset,
Size instr_size,
BlockGraph::Reference* ref) {
DCHECK(block != NULL);
DCHECK_LE(0, instr_offset);
DCHECK_LT(0U, instr_size);
DCHECK(ref != NULL);
// Find the first reference following the instruction offset.
Block::ReferenceMap::const_iterator ref_iter =
block->references().upper_bound(instr_offset);
// If no reference is found then we're done.
if (ref_iter == block->references().end())
return false;
// If the reference occurs outside the instruction then we're done.
Offset next_instr_offset = instr_offset + instr_size;
if (ref_iter->first >= next_instr_offset)
return false;
// Otherwise, the reference should fit into the instruction.
CHECK_LE(static_cast<size_t>(next_instr_offset),
ref_iter->first + ref_iter->second.size());
// And it should be the only reference in the instruction.
if (ref_iter != block->references().begin()) {
Block::ReferenceMap::const_iterator prev_iter = ref_iter;
--prev_iter;
CHECK_GE(static_cast<size_t>(instr_offset),
prev_iter->first + prev_iter->second.size());
}
Block::ReferenceMap::const_iterator next_iter = ref_iter;
++next_iter;
CHECK(next_iter == block->references().end() ||
next_iter->first >= next_instr_offset);
*ref = ref_iter->second;
return true;
}
// Transfer instructions from original to tail, starting with the instruction
// starting at offset.
bool SplitInstructionListAt(Offset offset,
BasicBlock::Instructions* original,
BasicBlock::Instructions* tail) {
DCHECK(original != NULL);
DCHECK(tail != NULL && tail->empty());
BasicBlock::Instructions::iterator it(original->begin());
while (offset > 0 && it != original->end()) {
offset -= it->size();
++it;
}
// Did we terminate at an instruction boundary?
if (offset != 0)
return false;
tail->splice(tail->end(), *original, it, original->end());
return true;
}
} // namespace
BasicBlockDecomposer::BasicBlockDecomposer(const BlockGraph::Block* block,
BasicBlockSubGraph* subgraph)
: Disassembler(block->data(),
block->size(),
AbsoluteAddress(kDisassemblyAddress),
Disassembler::InstructionCallback()),
block_(block),
subgraph_(subgraph),
current_block_start_(0),
check_decomposition_results_(true) {
// TODO(rogerm): Once we're certain this is stable for all input binaries
// turn on check_decomposition_results_ by default only ifndef NDEBUG.
DCHECK(block != NULL);
DCHECK(block->type() == BlockGraph::CODE_BLOCK);
DCHECK(CodeBlockAttributesAreBasicBlockSafe(block));
DCHECK(subgraph != NULL);
}
bool BasicBlockDecomposer::Decompose() {
DCHECK(subgraph_->basic_blocks().empty());
DCHECK(subgraph_->block_descriptions().empty());
DCHECK(original_address_space_.empty());
subgraph_->set_original_block(block_);
InitUnvisitedAndJumpTargets();
WalkResult result = Walk();
if (result != Disassembler::kWalkSuccess &&
result != Disassembler::kWalkIncomplete) {
return false;
}
typedef BasicBlockSubGraph::BlockDescription BlockDescription;
subgraph_->block_descriptions().push_back(BlockDescription());
BlockDescription& desc = subgraph_->block_descriptions().back();
desc.name = block_->name();
desc.type = block_->type();
desc.alignment = block_->alignment();
desc.attributes = block_->attributes();
desc.section = block_->section();
Offset offset = 0;
RangeMapConstIter it = original_address_space_.begin();
for (; it != original_address_space_.end(); ++it) {
DCHECK_EQ(it->first.start(), offset);
desc.basic_block_order.push_back(it->second);
offset += it->first.size();
}
return true;
}
BasicBlockDecomposer::SourceRange BasicBlockDecomposer::GetSourceRange(
Offset offset, Size size) {
// Find the source range for the original bytes. We may not have a data
// range for bytes that were synthesized in other transformations. As a
// rule, however, there should be a covered data range for each instruction,
// successor, that relates back to the original image.
const Block::SourceRanges::RangePair* range_pair =
block_->source_ranges().FindRangePair(offset, size);
// Return an empty range if we found nothing.
if (range_pair == NULL)
return SourceRange();
const Block::DataRange& data_range = range_pair->first;
const Block::SourceRange& source_range = range_pair->second;
if (offset == data_range.start() && size == data_range.size()) {
// We match a data range exactly, so let's use the entire
// matching source range.
return source_range;
}
// The data range doesn't match exactly, so let's slice the corresponding
// source range. The assumption here is that no transformation will ever
// slice the data or source ranges for an instruction, so we should always
// have a covering data and source ranges.
DCHECK_GE(offset, data_range.start());
DCHECK_LE(offset + size, data_range.start() + data_range.size());
Offset start_offs = offset - data_range.start();
return SourceRange(source_range.start() + start_offs, size);
}
bool BasicBlockDecomposer::FindBasicBlock(Offset offset,
BasicBlock** basic_block,
Range* range) const {
DCHECK_LE(0, offset);
DCHECK(basic_block != NULL);
DCHECK(range != NULL);
DCHECK(subgraph_->original_block() != NULL);
DCHECK_GT(subgraph_->original_block()->size(), static_cast<size_t>(offset));
RangeMapConstIter bb_iter =
original_address_space_.FindFirstIntersection(Range(offset, 1));
if (bb_iter == original_address_space_.end())
return false;
*basic_block = bb_iter->second;
*range = bb_iter->first;
return true;
}
BasicBlock* BasicBlockDecomposer::GetBasicBlockAt(Offset offset) const {
DCHECK_LE(0, offset);
DCHECK(subgraph_->original_block() != NULL);
DCHECK_GT(subgraph_->original_block()->size(), static_cast<size_t>(offset));
BasicBlock* bb = NULL;
Range range;
CHECK(FindBasicBlock(offset, &bb, &range));
DCHECK(bb != NULL);
DCHECK_EQ(offset, range.start());
return bb;
}
void BasicBlockDecomposer::InitUnvisitedAndJumpTargets() {
jump_targets_.clear();
// We initialize our jump_targets_ and unvisited sets to the set of
// referenced code locations. This covers all locations which are
// externally referenced, as well as those that are internally referenced
// via a branching instruction or jump table.
BlockGraph::Block::ReferrerSet::const_iterator ref_iter =
block_->referrers().begin();
for (; ref_iter != block_->referrers().end(); ++ref_iter) {
BlockGraph::Reference ref;
bool found = ref_iter->first->GetReference(ref_iter->second, &ref);
DCHECK(found);
DCHECK_EQ(block_, ref.referenced());
DCHECK_LE(0, ref.base());
DCHECK_LT(static_cast<size_t>(ref.base()), block_->size());
DCHECK_EQ(ref.base(), ref.offset());
// Look for the first label past the reference. Back up if we can to the
// previous label.
BlockGraph::Block::LabelMap::const_iterator label_iter =
block_->labels().upper_bound(ref.base());
if (label_iter != block_->labels().begin())
--label_iter;
// If there is no previous label, or it is not a data label, then this is
// a safe jump target.
if (label_iter == block_->labels().end() ||
label_iter->first > ref.offset() ||
!label_iter->second.has_attributes(BlockGraph::DATA_LABEL)) {
AbsoluteAddress addr(code_addr_ + ref.base());
Unvisited(addr);
jump_targets_.insert(addr);
}
}
}
Disassembler::CallbackDirective BasicBlockDecomposer::OnInstruction(
AbsoluteAddress addr, const _DInst& inst) {
Offset offset = addr - code_addr_;
// If this instruction has run into known data, then we have a problem in
// the decomposer.
BlockGraph::Label label;
CHECK(!block_->GetLabel(offset, &label) ||
!label.has_attributes(BlockGraph::DATA_LABEL))
<< "Disassembling into data at offset " << offset << " of "
<< block_->name() << ".";
VLOG(3) << "Disassembled " << GET_MNEMONIC_NAME(inst.opcode)
<< " instruction (" << static_cast<int>(inst.size)
<< " bytes) at offset " << offset << ".";
Instruction::SourceRange source_range = GetSourceRange(offset, inst.size);
current_instructions_.push_back(
Instruction(inst, source_range, inst.size, code_ + offset));
if (label.IsValid())
current_instructions_.back().set_label(label);
// If continuing this basic-block would disassemble into known data then
// end the current basic-block.
if (block_->GetLabel(offset + inst.size, &label) &&
label.has_attributes(BlockGraph::DATA_LABEL)) {
return kDirectiveTerminatePath;
}
// If this instruction is a call to a non-returning function, then this is
// essentially a control flow operation, and we need to end this basic block.
// We'll schedule the disassembly of any instructions which follow it as
// a separate basic block, and mark that basic block as unreachable in a
// post pass.
if (META_GET_FC(inst.meta) == FC_CALL &&
(inst.ops[0].type == O_PC || inst.ops[0].type == O_DISP)) {
BlockGraph::Reference ref;
bool found = GetReferenceOfInstructionAt(block_, offset, inst.size, &ref);
CHECK(found);
if (Instruction::CallsNonReturningFunction(inst, ref.referenced(),
ref.offset())) {
Unvisited(addr + inst.size);
return kDirectiveTerminatePath;
}
}
return kDirectiveContinue;
}
Disassembler::CallbackDirective BasicBlockDecomposer::OnBranchInstruction(
AbsoluteAddress addr, const _DInst& inst, AbsoluteAddress dest) {
// Note: Both addr and dest are fabricated addresses. The code_addr_ has
// been selected such that addr will never be 0; similarly, dest should
// only be 0 for control flow instructions having no explicit destination.
// Do not use dest to resolve the destination, instead find the
// corresponding reference in the byte range of the original instruction.
// The branch instruction should have already been appended to the
// instruction list.
DCHECK_EQ(0, ::memcmp(&current_instructions_.back().representation(),
&inst,
sizeof(inst)));
// Compute the in-block offset to the successor instruction.
Offset succ_offs = addr - code_addr_;
// Make sure we understand the branching condition. If we don't, then there's
// an instruction we have failed to consider.
Successor::Condition condition = Successor::OpCodeToCondition(inst.opcode);
CHECK_NE(Successor::kInvalidCondition, condition)
<< "Received unknown condition for branch instruction: "
<< GET_MNEMONIC_NAME(inst.opcode) << ".";
// If this is a conditional branch add the inverse conditional successor to
// represent the fall-through. If we don't understand the inverse, then
// there's an instruction we have failed to consider.
if (META_GET_FC(inst.meta) == FC_CND_BRANCH) {
Successor::Condition inverse_condition =
Successor::InvertCondition(condition);
CHECK_NE(Successor::kInvalidCondition, inverse_condition)
<< "Non-invertible condition seen for branch instruction: "
<< GET_MNEMONIC_NAME(inst.opcode) << ".";
// Create an (unresolved) successor pointing to the next instruction.
BasicBlockReference ref(BlockGraph::PC_RELATIVE_REF,
1, // The size is irrelevant in successors.
const_cast<Block*>(block_),
(addr + inst.size - code_addr_),
(addr + inst.size - code_addr_));
current_successors_.push_front(
Successor(inverse_condition, ref, 0));
jump_targets_.insert(addr + inst.size);
}
// Note that some control flow instructions have no explicit target (for
// example, RET, SYS* and computed branches); for these dest will be 0.
// We do not explicitly model these with successor relationships. Instead,
// we leave the instruction (and its corresponding references, in the case
// of computed jumps) intact and move on.
if (dest.value() != 0) {
// Take the last instruction out of the instruction list, we'll represent
// it as a successor instead.
Instruction succ_instr = current_instructions_.back();
current_instructions_.pop_back();
DCHECK_EQ(inst.size, succ_instr.size());
// Figure out where the branch is going by finding the reference that's
// inside the instruction's byte range.
BlockGraph::Reference ref;
bool found = GetReferenceOfInstructionAt(
block_, succ_offs, succ_instr.size(), &ref);
// If a reference was found, prefer its destination information
// to the information conveyed by the bytes in the instruction.
if (!found) {
Offset target_offset = dest - code_addr_;
ref = BlockGraph::Reference(BlockGraph::PC_RELATIVE_REF,
1, // Size is irrelevant in successors.
const_cast<Block*>(block_),
target_offset,
target_offset);
} else {
dest = AbsoluteAddress(kDisassemblyAddress + ref.offset());
}
// Create the successor.
BasicBlockReference bb_ref(
ref.type(), ref.size(), ref.referenced(), ref.offset(), ref.base());
Successor succ(condition, bb_ref, succ_instr.size());
if (ref.referenced() == block_)
jump_targets_.insert(dest);
// Copy the source range and label, if any.
succ.set_source_range(succ_instr.source_range());
succ.set_label(succ_instr.label());
current_successors_.push_front(succ);
}
// This marks the end of a basic block. Note that the disassembler will
// handle ending the instruction run and beginning a new one for the next
// basic block (including the branch-not-taken arc).
return kDirectiveContinue;
}
// Called every time disassembly is started from a new address. Will be
// called for at least every address in unvisited_.
Disassembler::CallbackDirective BasicBlockDecomposer::OnStartInstructionRun(
AbsoluteAddress start_address) {
// The address of the beginning of the current basic block.
current_block_start_ = start_address;
DCHECK(current_instructions_.empty());
DCHECK(current_successors_.empty());
return kDirectiveContinue;
}
// Called when a walk from a given entry point has terminated.
Disassembler::CallbackDirective BasicBlockDecomposer::OnEndInstructionRun(
AbsoluteAddress addr, const _DInst& inst, ControlFlowFlag control_flow) {
// If an otherwise straight run of instructions is split because it crosses
// a basic block boundary we need to set up the implicit control flow arc
// here.
if (control_flow == kControlFlowContinues) {
DCHECK(current_successors_.empty());
DCHECK(!current_instructions_.empty());
DCHECK(!current_instructions_.back().IsImplicitControlFlow());
BasicBlockReference ref(BlockGraph::PC_RELATIVE_REF,
1, // Size is immaterial in successors.
const_cast<Block*>(block_),
(addr + inst.size) - code_addr_,
(addr + inst.size) - code_addr_);
current_successors_.push_front(
Successor(Successor::kConditionTrue, ref, 0));
}
// We have reached the end of the current walk or we handled a conditional
// branch. Let's mark this as the end of a basic block.
size_t basic_block_size = addr - current_block_start_ + inst.size;
DCHECK_LT(0U, basic_block_size);
if (!InsertBasicBlockRange(current_block_start_,
basic_block_size,
BasicBlock::BASIC_CODE_BLOCK)) {
return kDirectiveAbort;
}
return kDirectiveContinue;
}
Disassembler::CallbackDirective BasicBlockDecomposer::OnDisassemblyComplete() {
// Split code blocks at branch targets.
if (!SplitCodeBlocksAtBranchTargets()) {
LOG(ERROR) << "Failed to split code blocks at branch targets.";
return kDirectiveAbort;
}
// By this point, we should have basic blocks for all visited code.
CheckAllJumpTargetsStartABasicCodeBlock();
// Demarcate the data basic blocks. There should be no overlap with code.
if (!FillInDataBlocks()) {
LOG(ERROR) << "Failed to fill in data basic-block ranges.";
return kDirectiveAbort;
}
// We may not have covered some ranges of the macro block. For all such
// ranges, build basic blocks and mark them as padding. This might
// include unreachable code in unoptimized input binaries.
if (!FillInPaddingBlocks()) {
LOG(ERROR) << "Failed to fill in padding basic-block ranges.";
return kDirectiveAbort;
}
// We should now have contiguous block ranges that cover every byte in the
// macro block. Verify that this is so.
CheckHasCompleteBasicBlockCoverage();
// We should have propagated all of the labels in the original block into
// the basic-block subgraph.
CheckAllLabelsArePreserved();
// Populate the referrers in the basic block data structures by copying
// them from the original source block.
if (!CopyExternalReferrers()) {
LOG(ERROR) << "Failed to populate basic-block referrers.";
return kDirectiveAbort;
}
// Populate the references in the basic block data structures by copying
// them from the original source block. This does not handle the successor
// references.
if (!CopyReferences()) {
LOG(ERROR) << "Failed to populate basic-block references.";
return kDirectiveAbort;
}
// Wire up the the basic-block successors. These are not handled by
// CopyReferences(), above.
if (!ResolveSuccessors()) {
LOG(ERROR) << "Failed to resolve basic-block successors.";
return kDirectiveAbort;
}
// All the control flow we have derived should be valid.
CheckAllControlFlowIsValid();
// ... and we're done.
return kDirectiveContinue;
}
void BasicBlockDecomposer::CheckAllJumpTargetsStartABasicCodeBlock() const {
if (!check_decomposition_results_)
return;
AddressSet::const_iterator addr_iter(jump_targets_.begin());
for (; addr_iter != jump_targets_.end(); ++addr_iter) {
// The target basic-block should be a code basic-block.
BasicBlock* target_bb = GetBasicBlockAt(*addr_iter - code_addr_);
CHECK(target_bb != NULL);
CHECK_EQ(BasicBlock::BASIC_CODE_BLOCK, target_bb->type());
}
}
void BasicBlockDecomposer::CheckHasCompleteBasicBlockCoverage() const {
if (!check_decomposition_results_)
return;
// Walk through the basic-block address space.
Offset next_start = 0;
RangeMapConstIter it(original_address_space_.begin());
for (; it != original_address_space_.end(); ++it) {
CHECK_EQ(it->first.start(), next_start);
CHECK_EQ(it->first.start(), it->second->offset());
BasicDataBlock* data_block = BasicDataBlock::Cast(it->second);
if (data_block != NULL) {
// Data block's size should match the address segment exactly.
CHECK_EQ(it->first.size(), data_block->size());
}
BasicCodeBlock* code_block = BasicCodeBlock::Cast(it->second);
if (code_block != NULL) {
// Code blocks may be short the trailing successor instruction.
BasicCodeBlock::Successors::const_iterator succ_it(
code_block->successors().begin());
Size block_size = code_block->GetInstructionSize();
for (; succ_it != code_block->successors().end(); ++succ_it)
block_size += succ_it->instruction_size();
CHECK_GE(it->first.size(), block_size);
}
next_start += it->first.size();
}
// At this point, if there were no gaps, next start will be the same as the
// full size of the block we're decomposing.
CHECK_EQ(code_size_, static_cast<size_t>(next_start));
}
void BasicBlockDecomposer::CheckAllControlFlowIsValid() const {
if (!check_decomposition_results_)
return;
// Check that the subgraph is valid. This will make sure that the
// instructions and successors generally make sense.
CHECK(subgraph_->IsValid());
// The only thing left to check is that synthesized flow-through
// successors refer to the adjacent basic-blocks.
RangeMapConstIter it(original_address_space_.begin());
for (; it != original_address_space_.end(); ++it) {
const BasicCodeBlock* bb = BasicCodeBlock::Cast(it->second);
if (bb == NULL)
continue;
const BasicBlock::Instructions& instructions = bb->instructions();
const BasicBlock::Successors& successors = bb->successors();
// There may be at most 2 successors.
switch (successors.size()) {
case 0:
break;
case 1:
// If the successor is synthesized, then flow is from this basic-block
// to the next adjacent one.
if (successors.back().instruction_size() == 0) {
RangeMapConstIter next(it);
++next;
CHECK(next != original_address_space_.end());
CHECK_EQ(successors.back().reference().basic_block(), next->second);
}
break;
case 2: {
// Exactly one of the successors should have been synthesized.
bool front_synthesized = successors.front().instruction_size() == 0;
bool back_synthesized = successors.back().instruction_size() == 0;
CHECK_NE(front_synthesized, back_synthesized);
// The synthesized successor flows from this basic-block to the next
// adjacent one.
const Successor& synthesized =
front_synthesized ? successors.front() : successors.back();
RangeMapConstIter next(it);
++next;
CHECK(next != original_address_space_.end());
CHECK_EQ(synthesized.reference().basic_block(), next->second);
break;
}
default:
NOTREACHED();
}
}
}
void BasicBlockDecomposer::CheckAllLabelsArePreserved() const {
if (!check_decomposition_results_)
return;
const Block* original_block = subgraph_->original_block();
if (original_block == NULL)
return;
const Block::LabelMap original_labels = original_block->labels();
if (original_labels.empty())
return;
// A map to track which labels (by offset) have been found in the subgraph.
std::map<Offset, bool> labels_found;
// Initialize the map of labels found in the subgraph.
Block::LabelMap::const_iterator label_iter = original_labels.begin();
for (; label_iter != original_labels.end(); ++label_iter)
labels_found.insert(std::make_pair(label_iter->first, false));
// Walk through the subgraph and mark all of the labels found.
BasicBlockSubGraph::BBCollection::const_iterator bb_iter =
subgraph_->basic_blocks().begin();
for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
// Account for labels attached to basic-blocks.
const BasicDataBlock* data_block = BasicDataBlock::Cast(*bb_iter);
if (data_block != NULL && data_block->has_label()) {
BlockGraph::Label label;
CHECK(original_block->GetLabel(data_block->offset(), &label));
CHECK(data_block->label() == label);
labels_found[data_block->offset()] = true;
}
const BasicCodeBlock* code_block = BasicCodeBlock::Cast(*bb_iter);
if (code_block != NULL) {
// Account for labels attached to instructions.
BasicBlock::Instructions::const_iterator inst_iter =
code_block->instructions().begin();
Offset inst_offset = code_block->offset();
for (; inst_iter != code_block->instructions().end(); ++inst_iter) {
const Instruction& inst = *inst_iter;
if (inst.has_label()) {
BlockGraph::Label label;
CHECK(original_block->GetLabel(inst_offset, &label));
CHECK(inst.label() == label);
labels_found[inst_offset] = true;
}
inst_offset += inst.size();
}
// Account for labels attached to successors.
BasicBlock::Successors::const_iterator succ_iter =
code_block->successors().begin();
for (; succ_iter != code_block->successors().end(); ++succ_iter) {
const Successor& succ = *succ_iter;
if (succ.has_label()) {
BlockGraph::Label label;
CHECK_NE(0U, succ.instruction_size());
CHECK(original_block->GetLabel(inst_offset, &label));
CHECK(succ.label() == label);
labels_found[inst_offset] = true;
}
inst_offset += succ.instruction_size();
}
}
}
// We should have the right number of labels_found (check if we added
// something to the wrong place).
CHECK_EQ(original_labels.size(), labels_found.size());
// Make sure all of the items in labels_found have been set to true.
std::map<Offset, bool>::const_iterator found_iter = labels_found.begin();
for (; found_iter != labels_found.end(); ++found_iter) {
CHECK(found_iter->second);
}
}
bool BasicBlockDecomposer::InsertBasicBlockRange(AbsoluteAddress addr,
size_t size,
BasicBlockType type) {
DCHECK(type == BasicBlock::BASIC_CODE_BLOCK || current_instructions_.empty());
DCHECK(type == BasicBlock::BASIC_CODE_BLOCK || current_successors_.empty());
BasicBlock::Offset offset = addr - code_addr_;
DCHECK_LE(0, offset);
// Find or create a name for this basic block. Reserve the label, if any,
// to propagate to the basic block if there are no instructions in the
// block to carry the label(s).
BlockGraph::Label label;
std::string basic_block_name;
if (block_->GetLabel(offset, &label)) {
basic_block_name = label.ToString();
} else {
basic_block_name =
base::StringPrintf("<anonymous-%04X-%s>",
addr.value(),
BasicBlock::BasicBlockTypeToString(type));
}
// Pre-flight address space insertion to make sure there's no
// pre-existing conflicting range.
Range byte_range(offset, size);
if (original_address_space_.FindFirstIntersection(byte_range) !=
original_address_space_.end()) {
LOG(ERROR) << "Attempted to insert overlapping basic block.";
return false;
}
if (type == BasicBlock::BASIC_CODE_BLOCK) {
// Create the code block.
BasicCodeBlock* code_block = subgraph_->AddBasicCodeBlock(basic_block_name);
if (code_block == NULL)
return false;
CHECK(original_address_space_.Insert(byte_range, code_block));
// Populate code basic-block with instructions and successors.
code_block->set_offset(offset);
code_block->instructions().swap(current_instructions_);
code_block->successors().swap(current_successors_);
} else {
DCHECK(type == BasicBlock::BASIC_DATA_BLOCK ||
type == BasicBlock::BASIC_PADDING_BLOCK);
// Create the data block.
BasicDataBlock* data_block = subgraph_->AddBasicDataBlock(
basic_block_name, type, size, code_ + offset);
if (data_block == NULL)
return false;
CHECK(original_address_space_.Insert(byte_range, data_block));
// Capture the source range (if any) for the data block.
data_block->set_source_range(GetSourceRange(offset, size));
// Data basic-blocks carry their labels at the head of the basic blocks.
// A padding basic-block might also be labeled if the block contains
// unreachable code (for example, INT3 or NOP instructions following a call
// to a non-returning function).
data_block->set_offset(offset);
data_block->set_label(label);
}
return true;
}
bool BasicBlockDecomposer::SplitCodeBlocksAtBranchTargets() {
// TODO(rogerm): Refactor the basic-block splitting inner-function to the
// BasicBlockSubGraph.
AddressSet::const_iterator jump_target_iter(jump_targets_.begin());
for (; jump_target_iter != jump_targets_.end(); ++jump_target_iter) {
// Resolve the target basic-block.
Offset target_offset = *jump_target_iter - code_addr_;
BasicBlock* target_bb = NULL;
Range target_bb_range;
CHECK(FindBasicBlock(target_offset, &target_bb, &target_bb_range));
// If we're jumping to the start of a basic block, there isn't any work
// to do.
if (target_offset == target_bb_range.start())
continue;
// The target must be a code block.
BasicCodeBlock* target_code_block = BasicCodeBlock::Cast(target_bb);
CHECK(target_code_block != NULL);
// Otherwise, we have found a basic-block that we need to split.
// Let's contract the range the original occupies in the basic-block
// address space, then add a second block at the target offset.
size_t left_split_size = target_offset - target_bb_range.start();
bool removed = original_address_space_.Remove(target_bb_range);
DCHECK(removed);
Range left_split_range(target_bb_range.start(), left_split_size);
bool inserted =
original_address_space_.Insert(left_split_range, target_code_block);
DCHECK(inserted);
// Now we split up containing_range into two new ranges and replace
// containing_range with the two new entries.
// Slice the trailing half of the instructions and the successors
// off the block.
DCHECK(current_instructions_.empty());
DCHECK(current_successors_.empty());
bool split = SplitInstructionListAt(left_split_size,
&target_code_block->instructions(),
&current_instructions_);
DCHECK(split);
target_code_block->successors().swap(current_successors_);
// Set-up the flow-through successor for the first "half".
BasicBlockReference ref(BlockGraph::PC_RELATIVE_REF,
1, // Size is immaterial in successors.
const_cast<Block*>(block_),
target_offset,
target_offset);
target_code_block->successors().push_back(
Successor(Successor::kConditionTrue, ref, 0));
// Create the basic-block representing the second "half".
if (!InsertBasicBlockRange(code_addr_ + target_offset,
target_bb_range.size() - left_split_size,
target_code_block->type())) {
LOG(ERROR) << "Failed to insert second half of split block.";
return false;
}
}
return true;
}
bool BasicBlockDecomposer::FillInDataBlocks() {
BlockGraph::Block::LabelMap::const_iterator iter = block_->labels().begin();
BlockGraph::Block::LabelMap::const_iterator end = block_->labels().end();
for (; iter != end; ++iter) {
if (!iter->second.has_attributes(BlockGraph::DATA_LABEL))
continue;
BlockGraph::Block::LabelMap::const_iterator next = iter;
++next;
BlockGraph::Offset bb_start = iter->first;
BlockGraph::Offset bb_end = (next == end) ? block_->size() : next->first;
size_t bb_size = bb_end - bb_start;
AbsoluteAddress bb_addr(code_addr_ + bb_start);
if (!InsertBasicBlockRange(bb_addr, bb_size, BasicBlock::BASIC_DATA_BLOCK))
return false;
}
return true;
}
bool BasicBlockDecomposer::FillInPaddingBlocks() {
// Add an initial interstitial if needed.
size_t interstitial_size = original_address_space_.empty() ?
code_size_ : original_address_space_.begin()->first.start();
DCHECK_LE(0U, interstitial_size);
if (interstitial_size > 0) {
if (!InsertBasicBlockRange(code_addr_,
interstitial_size,
BasicBlock::BASIC_PADDING_BLOCK)) {
LOG(ERROR) << "Failed to insert initial padding block at 0";
return false;
}
}
// Handle all remaining gaps, including the end.
RangeMapConstIter curr_range = original_address_space_.begin();
for (; curr_range != original_address_space_.end(); ++curr_range) {
RangeMapConstIter next_range = curr_range;
++next_range;
AbsoluteAddress curr_range_end =
code_addr_ + curr_range->first.start() + curr_range->first.size();
interstitial_size = 0;
if (next_range == original_address_space_.end()) {
DCHECK_LE(curr_range_end, code_addr_ + code_size_);
interstitial_size = code_addr_ + code_size_ - curr_range_end;
} else {
DCHECK_LE(curr_range_end, code_addr_ + next_range->first.start());
interstitial_size =
code_addr_ + next_range->first.start() - curr_range_end;
}
if (interstitial_size > 0) {
if (!InsertBasicBlockRange(curr_range_end,
interstitial_size,
BasicBlock::BASIC_PADDING_BLOCK)) {
LOG(ERROR) << "Failed to insert padding block at "
<< curr_range_end.value();
return false;
}
}
}
return true;
}
bool BasicBlockDecomposer::CopyExternalReferrers() {
const BlockGraph::Block::ReferrerSet& referrers = block_->referrers();
BlockGraph::Block::ReferrerSet::const_iterator iter = referrers.begin();
for (; iter != referrers.end(); ++iter) {
// Find the reference this referrer record describes.
const BlockGraph::Block* referrer = iter->first;
DCHECK(referrer != NULL);
// We only care about external referrers.
if (referrer == block_)
continue;
// This is an external referrer. Find the reference in the referring block.
Offset source_offset = iter->second;
BlockGraph::Reference reference;
bool found = referrer->GetReference(source_offset, &reference);
DCHECK(found);
// Find the basic block the reference refers to. It can only have an
// offset that's different from the base if it's not a code block.
BasicBlock* target_bb = GetBasicBlockAt(reference.base());
DCHECK(target_bb != NULL);
DCHECK(reference.base() == reference.offset() ||
target_bb->type() != BasicBlock::BASIC_CODE_BLOCK);
// Insert the referrer into the target bb's referrer set. Note that there
// is no corresponding reference update to the referring block. The
// target bb will track these so a BlockBuilder can properly update
// the referrers when merging a subgraph back into the block-graph.
bool inserted = target_bb->referrers().insert(
BasicBlockReferrer(referrer, source_offset)).second;
DCHECK(inserted);
}
return true;
}
bool BasicBlockDecomposer::CopyReferences(
Offset item_offset, Size item_size, BasicBlockReferenceMap* refs) {
DCHECK_LE(0, item_offset);
DCHECK_LT(0U, item_size);
DCHECK(refs != NULL);
// Figure out the bounds of item.
BlockGraph::Offset end_offset = item_offset + item_size;
// Get iterators encompassing all references within the bounds of item.
BlockGraph::Block::ReferenceMap::const_iterator ref_iter =
block_->references().lower_bound(item_offset);
BlockGraph::Block::ReferenceMap::const_iterator end_iter =
block_->references().lower_bound(end_offset);
for (; ref_iter != end_iter; ++ref_iter) {
// Calculate the local offset of this reference within item.
BlockGraph::Offset local_offset = ref_iter->first - item_offset;
const BlockGraph::Reference& reference = ref_iter->second;
// We expect long references for everything except flow control.
CHECK_EQ(4U, reference.size());
DCHECK_LE(local_offset + reference.size(), static_cast<Size>(end_offset));
if (reference.referenced() != block_) {
// For external references, we can directly reference the other block.
bool inserted = refs->insert(std::make_pair(
local_offset,
BasicBlockReference(reference.type(), reference.size(),
reference.referenced(), reference.offset(),
reference.base()))).second;
DCHECK(inserted);
} else {
// For intra block_ references, find the corresponding basic block in
// the basic block address space.
BasicBlock* target_bb = GetBasicBlockAt(reference.base());
DCHECK(target_bb != NULL);
// Create target basic-block relative values for the base and offset.
CHECK_EQ(reference.offset(), reference.base());
// Insert a reference to the target basic block.
bool inserted = refs->insert(std::make_pair(
local_offset,
BasicBlockReference(reference.type(),
reference.size(),
target_bb))).second;
DCHECK(inserted);
}
}
return true;
}
bool BasicBlockDecomposer::CopyReferences() {
// Copy the references for the source range of each basic-block (by
// instruction for code basic-blocks). External referrers and successors are
// handled in separate passes.
BasicBlockSubGraph::BBCollection::iterator bb_iter =
subgraph_->basic_blocks().begin();
for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
BasicCodeBlock* code_block = BasicCodeBlock::Cast(*bb_iter);
if (code_block != NULL) {
DCHECK_EQ(BasicBlock::BASIC_CODE_BLOCK, code_block->type());
Offset inst_offset = code_block->offset();
BasicBlock::Instructions::iterator inst_iter =
code_block->instructions().begin();
for (; inst_iter != code_block->instructions().end(); ++inst_iter) {
if (!CopyReferences(inst_offset,
inst_iter->size(),
&inst_iter->references())) {
return false;
}
inst_offset += inst_iter->size();
}
}
BasicDataBlock* data_block = BasicDataBlock::Cast(*bb_iter);
if (data_block != NULL) {
DCHECK_NE(BasicBlock::BASIC_CODE_BLOCK, data_block->type());
if (!CopyReferences(data_block->offset(),
data_block->size(),
&data_block->references())) {
return false;
}
}
}
return true;
}
bool BasicBlockDecomposer::ResolveSuccessors() {
BasicBlockSubGraph::BBCollection::iterator bb_iter =
subgraph_->basic_blocks().begin();
for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
// Only code basic-blocks have successors and instructions.
BasicCodeBlock* code_block = BasicCodeBlock::Cast(*bb_iter);
if (code_block == NULL)
continue;
BasicBlock::Successors::iterator succ_iter =
code_block->successors().begin();
BasicBlock::Successors::iterator succ_iter_end =
code_block->successors().end();
for (; succ_iter != succ_iter_end; ++succ_iter) {
if (succ_iter->reference().block() != block_)
continue;
// Find the basic block the successor references.
BasicBlock* target_code_block =
GetBasicBlockAt(succ_iter->reference().offset());
DCHECK(target_code_block != NULL);
// We transform all successor branches into 4-byte pc-relative targets.
succ_iter->set_reference(
BasicBlockReference(
BlockGraph::PC_RELATIVE_REF, 4, target_code_block));
DCHECK(succ_iter->reference().IsValid());
}
}
return true;
}
} // namespace block_graph