blob: 15e86fd7b22c195b4eb36e6674d7bf0df3347a95 [file] [log] [blame]
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "syzygy/pe/pe_transform_policy.h"
namespace pe {
namespace {
using block_graph::BlockGraph;
const size_t kPointerSize = sizeof(core::AbsoluteAddress);
// Returns true if there is a data label at the given offset,
// false otherwise.
bool HasDataLabel(const BlockGraph::Block* code_block,
BlockGraph::Offset offset) {
BlockGraph::Block::LabelMap::const_iterator label_it =
code_block->labels().find(offset);
if (label_it == code_block->labels().end())
return false;
if (!label_it->second.has_attributes(BlockGraph::DATA_LABEL))
return false;
return true;
}
bool IsValidSelfReferenceCodeToCode(
const BlockGraph::Block* code_block,
const BlockGraph::Reference& ref) {
// These references must be direct. They may be 1- or 4-byte PC-relative refs,
// or 4-byte absolute refs.
if (!ref.IsDirect())
return false;
switch (ref.type()) {
case BlockGraph::PC_RELATIVE_REF: {
if (ref.size() != 1 && ref.size() != kPointerSize)
return false;
break;
}
case BlockGraph::ABSOLUTE_REF: {
if (ref.size() != kPointerSize)
return false;
break;
}
default: {
return false;
}
}
return true;
}
bool IsValidSelfReferenceCodeToData(
const BlockGraph::Block* code_block,
const BlockGraph::Reference& ref) {
// Must be direct 4-byte absolute references to a data label.
if (ref.type() != BlockGraph::ABSOLUTE_REF ||
ref.size() != kPointerSize ||
!ref.IsDirect() ||
!HasDataLabel(code_block, ref.offset())) {
return false;
}
return true;
}
bool IsValidSelfReferenceDataToCode(
const BlockGraph::Block* code_block,
const BlockGraph::Reference& ref) {
// Must be 4-byte direct absolute references.
if (ref.type() != BlockGraph::ABSOLUTE_REF || ref.size() != kPointerSize ||
!ref.IsDirect()) {
return false;
}
return true;
}
bool IsValidSelfReferenceDataToData(
const BlockGraph::Block* code_block,
const BlockGraph::Reference& ref) {
// Must be 4-byte direct absolute references. We see this in 'meta' case
// tables, where there will be one case table that is used to select among a
// handful of case tables, and then the selected case table will be used for
// the second round of logic. This happens a lot in very complex conditional
// code like that generated by gtest and gmock.
if (ref.type() != BlockGraph::ABSOLUTE_REF || ref.size() != kPointerSize ||
!ref.IsDirect()) {
return false;
}
return true;
}
bool IsValidExternalReferenceCodeBlockToCode(
const BlockGraph::Block* code_block,
const BlockGraph::Reference& ref) {
// Must be direct 4-byte absolute or pc-rel reference to offset 0.
if (ref.type() != BlockGraph::ABSOLUTE_REF &&
ref.type() != BlockGraph::PC_RELATIVE_REF) {
return false;
}
if (ref.size() != kPointerSize || ref.offset() != 0 || !ref.IsDirect())
return false;
return true;
}
bool IsValidExternalReferenceDataBlockToCode(
const BlockGraph::Block* code_block,
const BlockGraph::Reference& ref) {
// Must be direct 4-byte absolute or relative (PE structures) pointer to
// offset 0.
if (ref.type() != BlockGraph::ABSOLUTE_REF &&
ref.type() != BlockGraph::RELATIVE_REF) {
return false;
}
if (ref.size() != kPointerSize || ref.offset() != 0 || !ref.IsDirect())
return false;
return true;
}
} // namespace
PETransformPolicy::PETransformPolicy()
: block_result_cache_(new BlockResultCache()),
allow_inline_assembly_(false) {
}
bool PETransformPolicy::BlockIsSafeToBasicBlockDecompose(
const BlockGraph::Block* block) const {
DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), block);
if (block->type() != BlockGraph::CODE_BLOCK)
return false;
// Look for a cached result. This prevents repeated (expensive) calculations
// and inspections over the block.
BlockResultCache::const_iterator it = block_result_cache_->find(
block->id());
if (it != block_result_cache_->end())
return it->second;
bool result = CodeBlockIsSafeToBasicBlockDecompose(block);
block_result_cache_->insert(std::make_pair(block->id(), result));
return result;
}
bool PETransformPolicy::ReferenceIsSafeToRedirect(
const BlockGraph::Block* referrer,
const BlockGraph::Reference& reference) const {
DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), referrer);
// TODO(chrisha): Move IsUnsafeReference here!
return true;
}
bool PETransformPolicy::CodeBlockIsSafeToBasicBlockDecompose(
const BlockGraph::Block* code_block) const {
DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
// If the code_block was built by our toolchain it's inherently safe.
if (code_block->attributes() & BlockGraph::BUILT_BY_SYZYGY)
return true;
if (!CodeBlockAttributesAreBasicBlockSafe(code_block, allow_inline_assembly_))
return false;
if (!CodeBlockHasPrivateSymbols(code_block))
return false;
if (!CodeBlockLayoutIsClConsistent(code_block))
return false;
if (!CodeBlockReferencesAreClConsistent(code_block))
return false;
if (!CodeBlockReferrersAreClConsistent(code_block))
return false;
return true;
}
bool PETransformPolicy::CodeBlockHasPrivateSymbols(
const BlockGraph::Block* code_block) {
BlockGraph::Block::LabelMap::const_iterator it = code_block->labels().begin();
for (; it != code_block->labels().end(); ++it) {
if (it->second.attributes() & ~BlockGraph::PUBLIC_SYMBOL_LABEL)
return true;
}
return false;
}
bool PETransformPolicy::CodeBlockAttributesAreBasicBlockSafe(
const BlockGraph::Block* code_block,
bool allow_inline_assembly) {
DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
// If the code_block was built by our toolchain it's inherently safe. This
// attribute is used to whitelist a block.
if (code_block->attributes() & BlockGraph::BUILT_BY_SYZYGY)
return true;
// Any of the following attributes make it unsafe to basic-block
// decompose the code code_block.
static const BlockGraph::BlockAttributes kDefaultInvalidAttributes =
BlockGraph::GAP_BLOCK |
BlockGraph::PADDING_BLOCK |
BlockGraph::HAS_INLINE_ASSEMBLY |
BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER |
BlockGraph::HAS_EXCEPTION_HANDLING;
BlockGraph::BlockAttributes invalid_attributes = kDefaultInvalidAttributes;
if (allow_inline_assembly)
invalid_attributes ^= BlockGraph::HAS_INLINE_ASSEMBLY;
if (code_block->attributes() & invalid_attributes)
return false;
return true;
}
bool PETransformPolicy::CodeBlockLayoutIsClConsistent(
const BlockGraph::Block* code_block) {
DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
// If there are no labels then this is not a valid block.
if (code_block->labels().empty())
return false;
// TODO(chrisha): Ensure that there is a code label at offset zero. If there
// is none then this is not a valid CL-produced code block. This breaks
// a bunch of code and unittests, as we may insert code that violates
// this principle. Changes to the basic-block builder are required before
// putting this in place.
// Iterate over all labels in reverse order, looking at the labels. We want
// to make sure there are no invalid labels, and that all data labels are
// at the tail end of the block (no non-data label may come after a data
// label).
BlockGraph::Block::LabelMap::const_reverse_iterator it =
code_block->labels().rbegin();
bool saw_non_data_label = false;
for (; it != code_block->labels().rend(); ++it) {
const BlockGraph::Label& label = it->second;
// No labels should be beyond the end of the block.
if (it->first >= static_cast<BlockGraph::Offset>(code_block->size())) {
// Except for a solo debug-end label, which can come after the block if
// there is no post-amble.
if (label.attributes() == BlockGraph::DEBUG_END_LABEL)
continue;
return false;
}
if (label.has_attributes(BlockGraph::DATA_LABEL)) {
// There should never be data labels beyond the end of the block.
if (it->first >= static_cast<BlockGraph::Offset>(code_block->size()))
return false;
// If a non-data label was already encountered, and now there's another
// data label then bail: the block does not respect the 'code first,
// data second' supported layout requirement.
if (saw_non_data_label)
return false;
} else {
// Remember that a non-data label was seen. No further data labels should
// be encountered.
saw_non_data_label = true;
}
}
return true;
}
bool PETransformPolicy::CodeBlockReferencesAreClConsistent(
const BlockGraph::Block* code_block) {
DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
// Iterate over the outgoing references from this code_block.
BlockGraph::Block::ReferenceMap::const_iterator ref_it =
code_block->references().begin();
for (; ref_it != code_block->references().end(); ++ref_it) {
// References to data are always safe so we don't inspect them.
if (ref_it->second.referenced()->type() == BlockGraph::CODE_BLOCK) {
// References to code blocks must be direct.
if (!ref_it->second.IsDirect())
return false;
}
}
return true;
}
bool PETransformPolicy::CodeBlockReferrersAreClConsistent(
const BlockGraph::Block* code_block) {
DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
// We expect all data labels to be referenced internally by the code block.
std::set<BlockGraph::Offset> data_label_offsets;
BlockGraph::Block::LabelMap::const_iterator label_it =
code_block->labels().begin();
for (; label_it != code_block->labels().end(); ++label_it) {
if (label_it->second.has_attributes(BlockGraph::DATA_LABEL))
data_label_offsets.insert(label_it->first);
}
// Determine the transition point that divides code from data. This is only
// a valid calculation if the layout has been checked and is valid.
BlockGraph::Offset start_of_data = code_block->size();
if (!data_label_offsets.empty())
start_of_data = *data_label_offsets.begin();
// Iterate through the referrers. Since we have to look up back-references
// this is O(n log n).
BlockGraph::Block::ReferrerSet::const_iterator ref_it =
code_block->referrers().begin();
for (; ref_it != code_block->referrers().end(); ++ref_it) {
// Get the reference associated with this referrer.
BlockGraph::Reference ref;
CHECK(ref_it->first->GetReference(ref_it->second, &ref));
if (ref_it->first == code_block) { // Self-reference.
if (ref_it->second < start_of_data) { // From code
if (ref.offset() < start_of_data) { // To code.
if (!IsValidSelfReferenceCodeToCode(code_block, ref))
return false;
} else { // To data.
if (!IsValidSelfReferenceCodeToData(code_block, ref))
return false;
// Mark the data label as having been seen.
data_label_offsets.erase(ref.offset());
}
} else { // From data.
if (ref.offset() < start_of_data) { // To code.
if (!IsValidSelfReferenceDataToCode(code_block, ref))
return false;
} else { // To data.
if (!IsValidSelfReferenceDataToData(code_block, ref))
return false;
// Mark the data label as having been seen.
data_label_offsets.erase(ref.offset());
}
}
} else { // External.
if (ref_it->first->type() == BlockGraph::CODE_BLOCK) { // From code.
if (ref.offset() < start_of_data) { // To code.
if (!IsValidExternalReferenceCodeBlockToCode(code_block, ref))
return false;
} else { // To data.
// No code block should ever have a pointer to data internal to
// a code block.
return false;
}
} else { // From data.
if (ref.offset() < start_of_data) { // To code.
if (!IsValidExternalReferenceDataBlockToCode(code_block, ref))
return false;
} else { // To data.
// No data block should ever have a pointer to data internal to
// a code block.
return false;
}
}
}
}
// If there are leftover data labels that have not been referenced then we
// are not consistent with CL.EXE compiled code.
if (!data_label_offsets.empty())
return false;
return true;
}
} // namespace pe