| // Copyright 2013 Google Inc. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "syzygy/pe/pe_transform_policy.h" |
| |
| namespace pe { |
| |
| namespace { |
| |
| using block_graph::BlockGraph; |
| |
| const size_t kPointerSize = sizeof(core::AbsoluteAddress); |
| |
| // Returns true if there is a data label at the given offset, |
| // false otherwise. |
| bool HasDataLabel(const BlockGraph::Block* code_block, |
| BlockGraph::Offset offset) { |
| BlockGraph::Block::LabelMap::const_iterator label_it = |
| code_block->labels().find(offset); |
| if (label_it == code_block->labels().end()) |
| return false; |
| if (!label_it->second.has_attributes(BlockGraph::DATA_LABEL)) |
| return false; |
| return true; |
| } |
| |
| bool IsValidSelfReferenceCodeToCode( |
| const BlockGraph::Block* code_block, |
| const BlockGraph::Reference& ref) { |
| // These references must be direct. They may be 1- or 4-byte PC-relative refs, |
| // or 4-byte absolute refs. |
| if (!ref.IsDirect()) |
| return false; |
| |
| switch (ref.type()) { |
| case BlockGraph::PC_RELATIVE_REF: { |
| if (ref.size() != 1 && ref.size() != kPointerSize) |
| return false; |
| break; |
| } |
| |
| case BlockGraph::ABSOLUTE_REF: { |
| if (ref.size() != kPointerSize) |
| return false; |
| break; |
| } |
| |
| default: { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| bool IsValidSelfReferenceCodeToData( |
| const BlockGraph::Block* code_block, |
| const BlockGraph::Reference& ref) { |
| // Must be direct 4-byte absolute references to a data label. |
| if (ref.type() != BlockGraph::ABSOLUTE_REF || |
| ref.size() != kPointerSize || |
| !ref.IsDirect() || |
| !HasDataLabel(code_block, ref.offset())) { |
| return false; |
| } |
| return true; |
| } |
| |
| bool IsValidSelfReferenceDataToCode( |
| const BlockGraph::Block* code_block, |
| const BlockGraph::Reference& ref) { |
| // Must be 4-byte direct absolute references. |
| if (ref.type() != BlockGraph::ABSOLUTE_REF || ref.size() != kPointerSize || |
| !ref.IsDirect()) { |
| return false; |
| } |
| return true; |
| } |
| |
| bool IsValidSelfReferenceDataToData( |
| const BlockGraph::Block* code_block, |
| const BlockGraph::Reference& ref) { |
| // Must be 4-byte direct absolute references. We see this in 'meta' case |
| // tables, where there will be one case table that is used to select among a |
| // handful of case tables, and then the selected case table will be used for |
| // the second round of logic. This happens a lot in very complex conditional |
| // code like that generated by gtest and gmock. |
| if (ref.type() != BlockGraph::ABSOLUTE_REF || ref.size() != kPointerSize || |
| !ref.IsDirect()) { |
| return false; |
| } |
| return true; |
| } |
| |
| bool IsValidExternalReferenceCodeBlockToCode( |
| const BlockGraph::Block* code_block, |
| const BlockGraph::Reference& ref) { |
| // Must be direct 4-byte absolute or pc-rel reference to offset 0. |
| if (ref.type() != BlockGraph::ABSOLUTE_REF && |
| ref.type() != BlockGraph::PC_RELATIVE_REF) { |
| return false; |
| } |
| if (ref.size() != kPointerSize || ref.offset() != 0 || !ref.IsDirect()) |
| return false; |
| return true; |
| } |
| |
| bool IsValidExternalReferenceDataBlockToCode( |
| const BlockGraph::Block* code_block, |
| const BlockGraph::Reference& ref) { |
| // Must be direct 4-byte absolute or relative (PE structures) pointer to |
| // offset 0. |
| if (ref.type() != BlockGraph::ABSOLUTE_REF && |
| ref.type() != BlockGraph::RELATIVE_REF) { |
| return false; |
| } |
| if (ref.size() != kPointerSize || ref.offset() != 0 || !ref.IsDirect()) |
| return false; |
| return true; |
| } |
| |
| } // namespace |
| |
| PETransformPolicy::PETransformPolicy() |
| : block_result_cache_(new BlockResultCache()), |
| allow_inline_assembly_(false) { |
| } |
| |
| bool PETransformPolicy::BlockIsSafeToBasicBlockDecompose( |
| const BlockGraph::Block* block) const { |
| DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), block); |
| |
| if (block->type() != BlockGraph::CODE_BLOCK) |
| return false; |
| |
| // Look for a cached result. This prevents repeated (expensive) calculations |
| // and inspections over the block. |
| BlockResultCache::const_iterator it = block_result_cache_->find( |
| block->id()); |
| if (it != block_result_cache_->end()) |
| return it->second; |
| |
| bool result = CodeBlockIsSafeToBasicBlockDecompose(block); |
| block_result_cache_->insert(std::make_pair(block->id(), result)); |
| return result; |
| } |
| |
| bool PETransformPolicy::ReferenceIsSafeToRedirect( |
| const BlockGraph::Block* referrer, |
| const BlockGraph::Reference& reference) const { |
| DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), referrer); |
| // TODO(chrisha): Move IsUnsafeReference here! |
| return true; |
| } |
| |
| bool PETransformPolicy::CodeBlockIsSafeToBasicBlockDecompose( |
| const BlockGraph::Block* code_block) const { |
| DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block); |
| DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type()); |
| |
| // If the code_block was built by our toolchain it's inherently safe. |
| if (code_block->attributes() & BlockGraph::BUILT_BY_SYZYGY) |
| return true; |
| |
| if (!CodeBlockAttributesAreBasicBlockSafe(code_block, allow_inline_assembly_)) |
| return false; |
| if (!CodeBlockHasPrivateSymbols(code_block)) |
| return false; |
| if (!CodeBlockLayoutIsClConsistent(code_block)) |
| return false; |
| if (!CodeBlockReferencesAreClConsistent(code_block)) |
| return false; |
| if (!CodeBlockReferrersAreClConsistent(code_block)) |
| return false; |
| |
| return true; |
| } |
| |
| bool PETransformPolicy::CodeBlockHasPrivateSymbols( |
| const BlockGraph::Block* code_block) { |
| BlockGraph::Block::LabelMap::const_iterator it = code_block->labels().begin(); |
| for (; it != code_block->labels().end(); ++it) { |
| if (it->second.attributes() & ~BlockGraph::PUBLIC_SYMBOL_LABEL) |
| return true; |
| } |
| return false; |
| } |
| |
| bool PETransformPolicy::CodeBlockAttributesAreBasicBlockSafe( |
| const BlockGraph::Block* code_block, |
| bool allow_inline_assembly) { |
| DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block); |
| DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type()); |
| |
| // If the code_block was built by our toolchain it's inherently safe. This |
| // attribute is used to whitelist a block. |
| if (code_block->attributes() & BlockGraph::BUILT_BY_SYZYGY) |
| return true; |
| |
| // Any of the following attributes make it unsafe to basic-block |
| // decompose the code code_block. |
| static const BlockGraph::BlockAttributes kDefaultInvalidAttributes = |
| BlockGraph::GAP_BLOCK | |
| BlockGraph::PADDING_BLOCK | |
| BlockGraph::HAS_INLINE_ASSEMBLY | |
| BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER | |
| BlockGraph::HAS_EXCEPTION_HANDLING; |
| |
| BlockGraph::BlockAttributes invalid_attributes = kDefaultInvalidAttributes; |
| if (allow_inline_assembly) |
| invalid_attributes ^= BlockGraph::HAS_INLINE_ASSEMBLY; |
| |
| if (code_block->attributes() & invalid_attributes) |
| return false; |
| |
| return true; |
| } |
| |
| bool PETransformPolicy::CodeBlockLayoutIsClConsistent( |
| const BlockGraph::Block* code_block) { |
| DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block); |
| DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type()); |
| |
| // If there are no labels then this is not a valid block. |
| if (code_block->labels().empty()) |
| return false; |
| |
| // TODO(chrisha): Ensure that there is a code label at offset zero. If there |
| // is none then this is not a valid CL-produced code block. This breaks |
| // a bunch of code and unittests, as we may insert code that violates |
| // this principle. Changes to the basic-block builder are required before |
| // putting this in place. |
| |
| // Iterate over all labels in reverse order, looking at the labels. We want |
| // to make sure there are no invalid labels, and that all data labels are |
| // at the tail end of the block (no non-data label may come after a data |
| // label). |
| BlockGraph::Block::LabelMap::const_reverse_iterator it = |
| code_block->labels().rbegin(); |
| bool saw_non_data_label = false; |
| for (; it != code_block->labels().rend(); ++it) { |
| const BlockGraph::Label& label = it->second; |
| |
| // No labels should be beyond the end of the block. |
| if (it->first >= static_cast<BlockGraph::Offset>(code_block->size())) { |
| // Except for a solo debug-end label, which can come after the block if |
| // there is no post-amble. |
| if (label.attributes() == BlockGraph::DEBUG_END_LABEL) |
| continue; |
| return false; |
| } |
| |
| if (label.has_attributes(BlockGraph::DATA_LABEL)) { |
| // There should never be data labels beyond the end of the block. |
| if (it->first >= static_cast<BlockGraph::Offset>(code_block->size())) |
| return false; |
| |
| // If a non-data label was already encountered, and now there's another |
| // data label then bail: the block does not respect the 'code first, |
| // data second' supported layout requirement. |
| if (saw_non_data_label) |
| return false; |
| } else { |
| // Remember that a non-data label was seen. No further data labels should |
| // be encountered. |
| saw_non_data_label = true; |
| } |
| } |
| |
| return true; |
| } |
| |
| bool PETransformPolicy::CodeBlockReferencesAreClConsistent( |
| const BlockGraph::Block* code_block) { |
| DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block); |
| DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type()); |
| |
| // Iterate over the outgoing references from this code_block. |
| BlockGraph::Block::ReferenceMap::const_iterator ref_it = |
| code_block->references().begin(); |
| for (; ref_it != code_block->references().end(); ++ref_it) { |
| // References to data are always safe so we don't inspect them. |
| if (ref_it->second.referenced()->type() == BlockGraph::CODE_BLOCK) { |
| // References to code blocks must be direct. |
| if (!ref_it->second.IsDirect()) |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| bool PETransformPolicy::CodeBlockReferrersAreClConsistent( |
| const BlockGraph::Block* code_block) { |
| DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block); |
| DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type()); |
| |
| // We expect all data labels to be referenced internally by the code block. |
| std::set<BlockGraph::Offset> data_label_offsets; |
| BlockGraph::Block::LabelMap::const_iterator label_it = |
| code_block->labels().begin(); |
| for (; label_it != code_block->labels().end(); ++label_it) { |
| if (label_it->second.has_attributes(BlockGraph::DATA_LABEL)) |
| data_label_offsets.insert(label_it->first); |
| } |
| |
| // Determine the transition point that divides code from data. This is only |
| // a valid calculation if the layout has been checked and is valid. |
| BlockGraph::Offset start_of_data = code_block->size(); |
| if (!data_label_offsets.empty()) |
| start_of_data = *data_label_offsets.begin(); |
| |
| // Iterate through the referrers. Since we have to look up back-references |
| // this is O(n log n). |
| BlockGraph::Block::ReferrerSet::const_iterator ref_it = |
| code_block->referrers().begin(); |
| for (; ref_it != code_block->referrers().end(); ++ref_it) { |
| // Get the reference associated with this referrer. |
| BlockGraph::Reference ref; |
| CHECK(ref_it->first->GetReference(ref_it->second, &ref)); |
| |
| if (ref_it->first == code_block) { // Self-reference. |
| if (ref_it->second < start_of_data) { // From code |
| if (ref.offset() < start_of_data) { // To code. |
| if (!IsValidSelfReferenceCodeToCode(code_block, ref)) |
| return false; |
| } else { // To data. |
| if (!IsValidSelfReferenceCodeToData(code_block, ref)) |
| return false; |
| // Mark the data label as having been seen. |
| data_label_offsets.erase(ref.offset()); |
| } |
| } else { // From data. |
| if (ref.offset() < start_of_data) { // To code. |
| if (!IsValidSelfReferenceDataToCode(code_block, ref)) |
| return false; |
| } else { // To data. |
| if (!IsValidSelfReferenceDataToData(code_block, ref)) |
| return false; |
| // Mark the data label as having been seen. |
| data_label_offsets.erase(ref.offset()); |
| } |
| } |
| } else { // External. |
| if (ref_it->first->type() == BlockGraph::CODE_BLOCK) { // From code. |
| if (ref.offset() < start_of_data) { // To code. |
| if (!IsValidExternalReferenceCodeBlockToCode(code_block, ref)) |
| return false; |
| } else { // To data. |
| // No code block should ever have a pointer to data internal to |
| // a code block. |
| return false; |
| } |
| } else { // From data. |
| if (ref.offset() < start_of_data) { // To code. |
| if (!IsValidExternalReferenceDataBlockToCode(code_block, ref)) |
| return false; |
| } else { // To data. |
| // No data block should ever have a pointer to data internal to |
| // a code block. |
| return false; |
| } |
| } |
| } |
| } |
| |
| // If there are leftover data labels that have not been referenced then we |
| // are not consistent with CL.EXE compiled code. |
| if (!data_label_offsets.empty()) |
| return false; |
| |
| return true; |
| } |
| |
| } // namespace pe |