blob: 23198cce8f46f7b8b9a2156b1a695bf150ada827 [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/zucchini/disassembler_dex.h"
#include <cmath>
#include <set>
#include <utility>
#include "base/logging.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/stringprintf.h"
#include "components/zucchini/buffer_source.h"
#include "components/zucchini/buffer_view.h"
namespace zucchini {
namespace {
// Size of a Dalvik instruction unit. Need to cast to signed int because
// sizeof() gives size_t, which dominates when operated on ptrdiff_t, then
// wrecks havoc for base::checked_cast<int16_t>().
constexpr int kInstrUnitSize = static_cast<int>(sizeof(uint16_t));
// Buffer for ReadDexHeader() to optionally return results.
struct ReadDexHeaderResults {
BufferSource source;
const dex::HeaderItem* header;
int dex_version;
};
// Returns whether |image| points to a DEX file. If this is a possibility and
// |opt_results| is not null, then uses it to pass extracted data to enable
// further parsing.
bool ReadDexHeader(ConstBufferView image, ReadDexHeaderResults* opt_results) {
// This part needs to be fairly efficient since it may be called many times.
BufferSource source(image);
const dex::HeaderItem* header = source.GetPointer<dex::HeaderItem>();
if (!header)
return false;
if (header->magic[0] != 'd' || header->magic[1] != 'e' ||
header->magic[2] != 'x' || header->magic[3] != '\n' ||
header->magic[7] != '\0') {
return false;
}
// Magic matches: More detailed tests can be conducted.
int dex_version = 0;
for (int i = 4; i < 7; ++i) {
if (!isdigit(header->magic[i]))
return false;
dex_version = dex_version * 10 + (header->magic[i] - '0');
}
if (dex_version != 35 && dex_version != 37)
return false;
if (header->file_size > image.size() ||
header->file_size < sizeof(dex::HeaderItem) ||
header->map_off < sizeof(dex::HeaderItem)) {
return false;
}
if (opt_results)
*opt_results = {source, header, dex_version};
return true;
}
} // namespace
/******** CodeItemParser ********/
// A parser to extract successive code items from a DEX image whose header has
// been parsed.
class CodeItemParser {
public:
using size_type = BufferSource::size_type;
explicit CodeItemParser(ConstBufferView image) : image_(image) {}
// Initializes the parser, returns true on success and false on error.
bool Init(const dex::MapItem& code_map_item) {
// Sanity check to quickly fail if |code_map_item.offset| or
// |code_map_item.size| is too large. This is a heuristic because code item
// sizes need to be parsed (sizeof(dex::CodeItem) is a lower bound).
if (!image_.covers_array(code_map_item.offset, code_map_item.size,
sizeof(dex::CodeItem))) {
return false;
}
source_ = std::move(BufferSource(image_).Skip(code_map_item.offset));
return true;
}
// Extracts the header of the next code item, and skips the variable-length
// data. Returns the offset of the code item if successful. Otherwise returns
// kInvalidOffset, and thereafter the parser becomes valid. For reference,
// here's a pseudo-struct of a complete code item:
//
// struct code_item {
// // 4-byte aligned here.
// // 16-byte header defined (dex::CodeItem).
// uint16_t registers_size;
// uint16_t ins_size;
// uint16_t outs_size;
// uint16_t tries_size;
// uint32_t debug_info_off;
// uint32_t insns_size;
//
// // Variable-length data follow.
// uint16_t insns[insns_size]; // Instruction bytes.
// uint16_t padding[(tries_size > 0 && insns_size % 2 == 1) ? 1 : 0];
//
// if (tries_size > 0) {
// // 4-byte aligned here.
// struct try_item { // dex::TryItem.
// uint32_t start_addr;
// uint16_t insn_count;
// uint16_t handler_off;
// } tries[tries_size];
//
// struct encoded_catch_handler_list {
// uleb128 handlers_size;
// struct encoded_catch_handler {
// sleb128 encoded_catch_handler_size;
// struct encoded_type_addr_pair {
// uleb128 type_idx;
// uleb128 addr;
// } handlers[abs(encoded_catch_handler_size)];
// if (encoded_catch_handler_size <= 0) {
// uleb128 catch_all_addr;
// }
// } handlers_list[handlers_size];
// } handlers_group; // Confusingly called "handlers" in DEX doc.
// }
//
// // Padding to 4-bytes align next code_item *only if more exist*.
// }
offset_t GetNext() {
// Read header CodeItem.
if (!source_.AlignOn(image_, 4U))
return kInvalidOffset;
const offset_t code_item_offset =
base::checked_cast<offset_t>(source_.begin() - image_.begin());
const auto* code_item = source_.GetPointer<const dex::CodeItem>();
if (!code_item)
return kInvalidOffset;
DCHECK_EQ(0U, code_item_offset % 4U);
// Skip instruction bytes.
if (!source_.GetArray<uint16_t>(code_item->insns_size))
return kInvalidOffset;
// Skip padding if present.
if (code_item->tries_size > 0 && !source_.AlignOn(image_, 4U))
return kInvalidOffset;
// Skip tries[] and handlers_group to arrive at the next code item. Parsing
// is nontrivial due to use of uleb128 / sleb128.
if (code_item->tries_size > 0) {
// Skip (try_item) tries[].
if (!source_.GetArray<dex::TryItem>(code_item->tries_size))
return kInvalidOffset;
// Skip handlers_group.
uint32_t handlers_size = 0;
if (!source_.GetUleb128(&handlers_size))
return kInvalidOffset;
// Sanity check to quickly reject excessively large |handlers_size|.
if (source_.Remaining() < static_cast<size_type>(handlers_size))
return kInvalidOffset;
// Skip (encoded_catch_handler) handlers_list[].
for (uint32_t k = 0; k < handlers_size; ++k) {
int32_t encoded_catch_handler_size = 0;
if (!source_.GetSleb128(&encoded_catch_handler_size))
return kInvalidOffset;
const size_type abs_size = std::abs(encoded_catch_handler_size);
if (source_.Remaining() < abs_size) // Sanity check.
return kInvalidOffset;
// Skip (encoded_type_addr_pair) handlers[].
for (size_type j = 0; j < abs_size; ++j) {
if (!source_.SkipLeb128() || !source_.SkipLeb128())
return kInvalidOffset;
}
// Skip catch_all_addr.
if (encoded_catch_handler_size <= 0) {
if (!source_.SkipLeb128())
return kInvalidOffset;
}
}
}
// Success! |code_item->insns_size| is validated, but its content is still
// considered unsafe and requires validation.
return code_item_offset;
}
// Given |code_item_offset| that points to the start of a valid code item in
// |image|, returns |insns| bytes as ConstBufferView.
static ConstBufferView GetCodeItemInsns(ConstBufferView image,
offset_t code_item_offset) {
BufferSource source(BufferSource(image).Skip(code_item_offset));
const auto* code_item = source.GetPointer<const dex::CodeItem>();
DCHECK(code_item);
BufferRegion insns{0, code_item->insns_size * kInstrUnitSize};
DCHECK(source.covers(insns));
return source[insns];
}
private:
ConstBufferView image_;
BufferSource source_;
};
/******** DisassemblerDex ********/
DisassemblerDex::DisassemblerDex() : Disassembler(4) {}
DisassemblerDex::~DisassemblerDex() = default;
// static.
bool DisassemblerDex::QuickDetect(ConstBufferView image) {
return ReadDexHeader(image, nullptr);
}
ExecutableType DisassemblerDex::GetExeType() const {
return kExeTypeDex;
}
std::string DisassemblerDex::GetExeTypeString() const {
return base::StringPrintf("DEX (version %d)", dex_version_);
}
std::vector<ReferenceGroup> DisassemblerDex::MakeReferenceGroups() const {
return {};
}
bool DisassemblerDex::Parse(ConstBufferView image) {
image_ = image;
return ParseHeader();
}
bool DisassemblerDex::ParseHeader() {
ReadDexHeaderResults results;
if (!ReadDexHeader(image_, &results))
return false;
header_ = results.header;
dex_version_ = results.dex_version;
BufferSource source = results.source;
// DEX header contains file size, so use it to resize |image_| right away.
image_.shrink(header_->file_size);
// Read map list. This is not a fixed-size array, so instead of reading
// MapList directly, read |MapList::size| first, then visit elements in
// |MapList::list|.
static_assert(
offsetof(dex::MapList, list) == sizeof(decltype(dex::MapList::size)),
"MapList size error.");
source = std::move(BufferSource(image_).Skip(header_->map_off));
decltype(dex::MapList::size) list_size = 0;
if (!source.GetValue(&list_size) || list_size > dex::kMaxItemListSize)
return false;
const auto* item_list = source.GetArray<const dex::MapItem>(list_size);
if (!item_list)
return false;
// Read and validate map list, ensuring that required item types are present.
std::set<uint16_t> required_item_types = {
dex::kTypeStringIdItem, dex::kTypeTypeIdItem, dex::kTypeFieldIdItem,
dex::kTypeMethodIdItem, dex::kTypeCodeItem};
for (offset_t i = 0; i < list_size; ++i) {
const dex::MapItem* item = &item_list[i];
// Sanity check to reject unreasonably large |item->size|.
// TODO(huangs): Implement a more stringent check.
if (!image_.covers({item->offset, item->size}))
return false;
if (!map_item_map_.insert(std::make_pair(item->type, item)).second)
return false; // A given type must appear at most once.
required_item_types.erase(item->type);
}
if (!required_item_types.empty())
return false;
// Make local copies of main map items.
string_map_item_ = *map_item_map_[dex::kTypeStringIdItem];
type_map_item_ = *map_item_map_[dex::kTypeTypeIdItem];
field_map_item_ = *map_item_map_[dex::kTypeFieldIdItem];
method_map_item_ = *map_item_map_[dex::kTypeMethodIdItem];
code_map_item_ = *map_item_map_[dex::kTypeCodeItem];
// Iteratively extract variable-length code items blocks. Any failure would
// indicate invalid DEX. Success indicates that no structural problem is
// found. However, contained instructions still need validation on use.
CodeItemParser code_item_parser(image_);
if (!code_item_parser.Init(code_map_item_))
return false;
code_item_offsets_.resize(code_map_item_.size);
for (size_t i = 0; i < code_map_item_.size; ++i) {
const offset_t code_item_offset = code_item_parser.GetNext();
if (code_item_offset == kInvalidOffset)
return false;
code_item_offsets_[i] = code_item_offset;
}
return true;
}
} // namespace zucchini