blob: 345ca893e07f271dcb2078f139ef715341c27d4e [file] [log] [blame]
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "courgette/assembly_program.h"
#include <memory.h>
#include <algorithm>
#include <map>
#include <set>
#include <sstream>
#include <vector>
#include "base/logging.h"
#include "courgette/courgette.h"
#include "courgette/encoded_program.h"
namespace courgette {
// Opcodes of simple assembly language
enum OP {
ORIGIN, // ORIGIN <rva> - set current address for assembly.
MAKERELOCS, // Generates a base relocation table.
DEFBYTE, // DEFBYTE <value> - emit a byte literal.
REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'.
ABS32, // REL32 <label> - emit am abs32 encoded reference to 'label'.
LAST_OP
};
// Base class for instructions. Because we have so many instructions we want to
// keep them as small as possible. For this reason we avoid virtual functions.
//
class Instruction {
public:
OP op() const { return static_cast<OP>(op_); }
protected:
explicit Instruction(OP op) : op_(op), info_(0) {}
Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
uint32 op_ : 4; // A few bits to store the OP code.
uint32 info_ : 28; // Remaining bits in first word available to subclass.
private:
DISALLOW_COPY_AND_ASSIGN(Instruction);
};
namespace {
// Sets the current address for the emitting instructions.
class OriginInstruction : public Instruction {
public:
explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
RVA origin_rva() const { return rva_; }
private:
RVA rva_;
};
// Emits an entire base relocation table.
class MakeRelocsInstruction : public Instruction {
public:
MakeRelocsInstruction() : Instruction(MAKERELOCS) {}
};
// Emits a single byte.
class ByteInstruction : public Instruction {
public:
explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {}
uint8 byte_value() const { return info_; }
};
// A ABS32 to REL32 instruction emits a reference to a label's address.
class InstructionWithLabel : public Instruction {
public:
InstructionWithLabel(OP op, Label* label)
: Instruction(op, 0), label_(label) {
if (label == NULL) NOTREACHED();
}
Label* label() const { return label_; }
private:
Label* label_;
};
} // namespace
AssemblyProgram::AssemblyProgram()
: byte_instruction_cache_(NULL),
image_base_(0) {
}
static void DeleteContainedLabels(const RVAToLabel& labels) {
for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p)
delete p->second;
}
AssemblyProgram::~AssemblyProgram() {
for (size_t i = 0; i < instructions_.size(); ++i) {
Instruction* instruction = instructions_[i];
if (instruction->op() != DEFBYTE) // Will be in byte_instruction_cache_.
delete instruction;
}
if (byte_instruction_cache_) {
for (size_t i = 0; i < 256; ++i)
delete byte_instruction_cache_[i];
delete[] byte_instruction_cache_;
}
DeleteContainedLabels(rel32_labels_);
DeleteContainedLabels(abs32_labels_);
}
void AssemblyProgram::EmitMakeRelocsInstruction() {
Emit(new MakeRelocsInstruction());
}
void AssemblyProgram::EmitOriginInstruction(RVA rva) {
Emit(new OriginInstruction(rva));
}
void AssemblyProgram::EmitByteInstruction(uint8 byte) {
Emit(GetByteInstruction(byte));
}
void AssemblyProgram::EmitRel32(Label* label) {
Emit(new InstructionWithLabel(REL32, label));
}
void AssemblyProgram::EmitAbs32(Label* label) {
Emit(new InstructionWithLabel(ABS32, label));
}
Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) {
return FindLabel(rva, &abs32_labels_);
}
Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) {
return FindLabel(rva, &rel32_labels_);
}
void AssemblyProgram::DefaultAssignIndexes() {
DefaultAssignIndexes(&abs32_labels_);
DefaultAssignIndexes(&rel32_labels_);
}
void AssemblyProgram::UnassignIndexes() {
UnassignIndexes(&abs32_labels_);
UnassignIndexes(&rel32_labels_);
}
void AssemblyProgram::AssignRemainingIndexes() {
AssignRemainingIndexes(&abs32_labels_);
AssignRemainingIndexes(&rel32_labels_);
}
Label* AssemblyProgram::InstructionAbs32Label(
const Instruction* instruction) const {
if (instruction->op() == ABS32)
return static_cast<const InstructionWithLabel*>(instruction)->label();
return NULL;
}
Label* AssemblyProgram::InstructionRel32Label(
const Instruction* instruction) const {
if (instruction->op() == REL32)
return static_cast<const InstructionWithLabel*>(instruction)->label();
return NULL;
}
Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
Label*& slot = (*labels)[rva];
if (slot == 0) {
slot = new Label(rva);
}
return slot;
}
void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
Label* current = p->second;
current->index_ = Label::kNoIndex;
}
}
// DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
// address order.
//
void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
int index = 0;
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
Label* current = p->second;
if (current->index_ != Label::kNoIndex)
NOTREACHED();
current->index_ = index;
++index;
}
}
// AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
// yet assigned an index.
//
void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
// An address table compresses best when each index is associated with an
// address that is slight larger than the previous index.
// First see which indexes have not been used. The 'available' vector could
// grow even bigger, but the number of addresses is a better starting size
// than empty.
std::vector<bool> available(labels->size(), true);
int used = 0;
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
int index = p->second->index_;
if (index != Label::kNoIndex) {
while (static_cast<size_t>(index) >= available.size())
available.push_back(true);
available.at(index) = false;
++used;
}
}
VLOG(1) << used << " of " << labels->size() << " labels pre-assigned";
// Are there any unused labels that happen to be adjacent following a used
// label?
//
int fill_forward_count = 0;
Label* prev = 0;
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
Label* current = p->second;
if (current->index_ == Label::kNoIndex) {
int index = 0;
if (prev && prev->index_ != Label::kNoIndex)
index = prev->index_ + 1;
if (index < static_cast<int>(available.size()) && available.at(index)) {
current->index_ = index;
available.at(index) = false;
++fill_forward_count;
}
}
prev = current;
}
// Are there any unused labels that happen to be adjacent preceeding a used
// label?
//
int fill_backward_count = 0;
prev = 0;
for (RVAToLabel::reverse_iterator p = labels->rbegin();
p != labels->rend();
++p) {
Label* current = p->second;
if (current->index_ == Label::kNoIndex) {
int prev_index;
if (prev)
prev_index = prev->index_;
else
prev_index = static_cast<uint32>(available.size());
if (prev_index != 0 &&
prev_index != Label::kNoIndex &&
available.at(prev_index - 1)) {
current->index_ = prev_index - 1;
available.at(current->index_) = false;
++fill_backward_count;
}
}
prev = current;
}
// Fill in any remaining indexes
int fill_infill_count = 0;
int index = 0;
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
Label* current = p->second;
if (current->index_ == Label::kNoIndex) {
while (!available.at(index)) {
++index;
}
current->index_ = index;
available.at(index) = false;
++index;
++fill_infill_count;
}
}
VLOG(1) << " fill forward " << fill_forward_count
<< " backward " << fill_backward_count
<< " infill " << fill_infill_count;
}
typedef void (EncodedProgram::*DefineLabelMethod)(int index, RVA value);
static void DefineLabels(const RVAToLabel& labels,
EncodedProgram* encoded_format,
DefineLabelMethod define_label) {
for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p) {
Label* label = p->second;
(encoded_format->*define_label)(label->index_, label->rva_);
}
}
EncodedProgram* AssemblyProgram::Encode() const {
EncodedProgram* encoded = new EncodedProgram();
encoded->set_image_base(image_base_);
DefineLabels(abs32_labels_, encoded, &EncodedProgram::DefineAbs32Label);
DefineLabels(rel32_labels_, encoded, &EncodedProgram::DefineRel32Label);
encoded->EndLabels();
for (size_t i = 0; i < instructions_.size(); ++i) {
Instruction* instruction = instructions_[i];
switch (instruction->op()) {
case ORIGIN: {
OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
encoded->AddOrigin(org->origin_rva());
break;
}
case DEFBYTE: {
uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value();
encoded->AddCopy(1, &b);
break;
}
case REL32: {
Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
encoded->AddRel32(label->index_);
break;
}
case ABS32: {
Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
encoded->AddAbs32(label->index_);
break;
}
case MAKERELOCS: {
encoded->AddMakeRelocs();
break;
}
default: {
NOTREACHED() << "Unknown Insn OP kind";
}
}
}
return encoded;
}
Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
if (!byte_instruction_cache_) {
byte_instruction_cache_ = new Instruction*[256];
for (int i = 0; i < 256; ++i) {
byte_instruction_cache_[i] = new ByteInstruction(static_cast<uint8>(i));
}
}
return byte_instruction_cache_[byte];
}
////////////////////////////////////////////////////////////////////////////////
Status Encode(AssemblyProgram* program, EncodedProgram** output) {
*output = NULL;
EncodedProgram *encoded = program->Encode();
if (encoded) {
*output = encoded;
return C_OK;
} else {
return C_GENERAL_ERROR;
}
}
} // namespace courgette