// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// The COFF image layout builder does a lot of things (maybe too much for
// its own good):
// - Assign an address to each block.
// - Create and add a relocation block for each section, and remove old ones.
// - Fix file offset pointers in section contents, headers, and tables.
//
// Since these tasks are very dependent on internals of COFF, which are
// missing from our intermediate representation (the block graph and
// associated metadata), they must rely on additional data structures and
// book-keeping. Hence they are all collected in this single step instead
// of existing as distinct transforms.
//
// New relocation blocks need to be bound to specific sections, and that
// link is not represented in the section info we have in the block
// graph. It is probably not useful there as it would only be needed during
// patching of COFF headers, and useless with PE.
//
// Old relocation block removal could be done in a separate transform, but
// is image-layout-dependent and hence does not classify as a block graph
// transform.
//
// Fixing references could alternatively be done in the file writer. Most
// header fields need to be patched (or their reference updated) during
// image laying out, though. Also, relocation references need to be handled
// with the creation of the new relocation tables (and do not require
// patching). Here we choose to handle all references in the image layout
// builder instead of spreading the task across classes.

#include "syzygy/pe/coff_image_layout_builder.h"

#include <vector>

#include "base/auto_reset.h"
#include "base/strings/string_util.h"
#include "syzygy/block_graph/typed_block.h"
#include "syzygy/common/align.h"
#include "syzygy/pe/coff_utils.h"
#include "syzygy/pe/pe_utils.h"

namespace pe {
namespace {

using block_graph::BlockGraph;
using block_graph::ConstTypedBlock;
using block_graph::OrderedBlockGraph;
using block_graph::TypedBlock;
using core::FileOffsetAddress;

// A temporary vector holding relocation entries, while building new
// relocation blocks.
typedef std::vector<IMAGE_RELOCATION> RelocVector;

// A map from references to symbol indexes, in order to translate references
// to relocations, which are symbol-based.
typedef std::map<std::pair<BlockGraph::Block*, BlockGraph::Offset>, size_t>
    SymbolMap;

// A map from section IDs to their (new) position in the resulting layout.
typedef std::map<BlockGraph::SectionId, size_t> SectionIndexMap;

// Microsoft specifications recommend 4-byte alignment for object files.
const size_t kFileAlignment = 4;

// The name of the new relocation blocks, generated from references.
const char kNewRelocsBlockName[] = "<refs>";

// Retrieve the COFF relocation type corresponding to the specified
// reference type and size.
//
// @param ref_type the reference type.
// @param ref_size the reference size.
// @param coff_reloc_type where to put the resulting COFF relocation type.
// @returns true on success, false on failure.
bool GetCoffRelocationType(BlockGraph::ReferenceType ref_type,
                           BlockGraph::Size ref_size,
                           uint16_t* coff_reloc_type) {
  switch (ref_type) {
    case BlockGraph::RELOC_ABSOLUTE_REF:
      DCHECK_EQ(sizeof(uint32_t), ref_size);
      *coff_reloc_type = IMAGE_REL_I386_DIR32;
      return true;
    case BlockGraph::RELOC_RELATIVE_REF:
      DCHECK_EQ(sizeof(uint32_t), ref_size);
      *coff_reloc_type = IMAGE_REL_I386_DIR32NB;
      return true;
    case BlockGraph::RELOC_SECTION_REF:
      DCHECK_EQ(sizeof(uint16_t), ref_size);
      *coff_reloc_type = IMAGE_REL_I386_SECTION;
      return true;
    case BlockGraph::RELOC_SECTION_OFFSET_REF:
      if (ref_size == sizeof(uint32_t)) {
         *coff_reloc_type = IMAGE_REL_I386_SECREL;
      } else {
        DCHECK_EQ(1u, ref_size);
        *coff_reloc_type = IMAGE_REL_I386_SECREL7;
      }
      return true;
    case BlockGraph::RELOC_PC_RELATIVE_REF:
      DCHECK_EQ(sizeof(uint32_t), ref_size);
      *coff_reloc_type = IMAGE_REL_I386_REL32;
      return true;
    default:
      LOG(ERROR) << "Unexpected reference type.";
      return false;
  }
}

// Write a reference value at the specified location. Write the full value
// for non-relocation references, or the additional offset only for
// relocation references.
//
// @tparam ValueType the type of data to write.
// @param ref the reference to write.
// @param block_offset the offset within @p block to alter.
// @param block the block to alter.
template <typename ValueType>
bool WriteReferenceValue(BlockGraph::Reference ref,
                         BlockGraph::Offset block_offset,
                         BlockGraph::Block* block) {
  DCHECK_EQ(sizeof(ValueType), ref.size());
  TypedBlock<ValueType> value;
  if (!value.Init(block_offset, block)) {
    LOG(ERROR) << "Unable to cast reference.";
    return false;
  }
  if ((ref.type() & BlockGraph::RELOC_REF_BIT) != 0) {
    *value = ref.offset() - ref.base();
  } else {
    *value = ref.offset();
  }
  return true;
}

// For each relocation reference in @p block, add a COFF relocation to the
// specified vector.
//
// @param block the block whose references are to be translated to
//     relocations.
// @param symbol_map the symbol map to use to match references with symbols.
// @param relocs vector to which the relocations are to be added.
// @returns true on success, false on failure.
bool AddRelocs(const BlockGraph::Block& block,
               const SymbolMap& symbol_map,
               RelocVector* relocs) {
  DCHECK(relocs != NULL);

  BlockGraph::Block::ReferenceMap::const_iterator it =
      block.references().begin();
  for (; it != block.references().end(); ++it) {
    // Skip non-relocation references.
    if ((it->second.type() & BlockGraph::RELOC_REF_BIT) == 0)
      continue;

    IMAGE_RELOCATION reloc = {};

    // Sections constructed by this class all have zero base RVA, so the
    // virtual address is just the offset.
    reloc.VirtualAddress = it->first;

    SymbolMap::const_iterator symbol_it =
        symbol_map.find(std::make_pair(it->second.referenced(),
                                       it->second.base()));
    if (symbol_it == symbol_map.end()) {
      LOG(ERROR) << "Missing COFF symbol for reference within a section block; "
                 << "cannot translate to relocation.";
      return false;
    }
    reloc.SymbolTableIndex = static_cast<DWORD>(symbol_it->second);
    if (!GetCoffRelocationType(it->second.type(), it->second.size(),
                               &reloc.Type))
      return false;

    relocs->push_back(reloc);
  }
  return true;
}

}  // namespace

CoffImageLayoutBuilder::CoffImageLayoutBuilder(ImageLayout* image_layout)
    : PECoffImageLayoutBuilder(image_layout),
      headers_block_(NULL),
      symbols_block_(NULL),
      strings_block_(NULL) {
  PECoffImageLayoutBuilder::Init(kFileAlignment, kFileAlignment);
}

bool CoffImageLayoutBuilder::LayoutImage(
    const OrderedBlockGraph& ordered_graph) {
  DCHECK_EQ(image_layout_->blocks.graph(), ordered_graph.block_graph());

  BlockGraph::Block* headers_block = NULL;
  BlockGraph::Block* symbols_block = NULL;
  BlockGraph::Block* strings_block = NULL;

  if (!FindCoffSpecialBlocks(image_layout_->blocks.graph(),
                             &headers_block, &symbols_block, &strings_block)) {
    LOG(ERROR) << "Block graph is missing some COFF special blocks. "
               << "Not a COFF block graph?";
    return false;
  }
  DCHECK(headers_block != NULL);
  DCHECK(symbols_block != NULL);
  DCHECK(strings_block != NULL);

  DCHECK(headers_block_ == NULL);
  DCHECK(symbols_block_ == NULL);
  DCHECK(strings_block_ == NULL);
  base::AutoReset<BlockGraph::Block*> auto_reset_headers_block(
      &headers_block_, headers_block);
  base::AutoReset<BlockGraph::Block*> auto_reset_symbols_block(
      &symbols_block_, symbols_block);
  base::AutoReset<BlockGraph::Block*> auto_reset_strings_block(
      &strings_block_, strings_block);

  if (!LayoutHeaders())
    return false;

  if (!LayoutSectionBlocks(ordered_graph))
    return false;

  if (!LayoutSymbolAndStringTables(ordered_graph))
    return false;

  if (!RemoveOldRelocBlocks())
    return false;

  return true;
}

bool CoffImageLayoutBuilder::LayoutHeaders() {
  DCHECK(headers_block_ != NULL);
  DCHECK_EQ(0u, image_layout_->blocks.address_space_impl().size());
  DCHECK_EQ(0u, image_layout_->sections.size());

  if (IsValidDosHeaderBlock(headers_block_)) {
    LOG(ERROR) << "Found DOS header in purported COFF file.";
    return false;
  }

  // Lay out headers as the block in the image layout.
  DCHECK_EQ(0u, cursor_.value());
  if (!LayoutBlockImpl(headers_block_))
    return false;

  return true;
}

bool CoffImageLayoutBuilder::LayoutSectionBlocks(
    const OrderedBlockGraph& ordered_graph) {
  DCHECK(headers_block_ != NULL);
  DCHECK(symbols_block_ != NULL);
  DCHECK_LT(0u, cursor_.value());
  DCHECK_EQ(0u, image_layout_->sections.size());

  // Fetch pointers to headers.
  ConstTypedBlock<IMAGE_FILE_HEADER> file_header;
  if (!file_header.Init(0, headers_block_)) {
    LOG(ERROR) << "Unable to cast file header.";
    return false;
  }
  TypedBlock<IMAGE_SECTION_HEADER> section_headers;
  if (!section_headers.Init(sizeof(IMAGE_FILE_HEADER), headers_block_)) {
    LOG(ERROR) << "Unable to cast section headers.";
    return false;
  }
  if (file_header->NumberOfSections != section_headers.ElementCount()) {
    LOG(ERROR) << "File header section count does not agree with "
               << "element count in headers block ("
               << file_header->NumberOfSections
               << " vs " << section_headers.ElementCount() << ").";
    return false;
  }

  // Fetch pointer to symbols.
  ConstTypedBlock<IMAGE_SYMBOL> symbols;
  if (!symbols.Init(0, symbols_block_)) {
    LOG(ERROR) << "Unable to cast symbol table.";
    return false;
  }
  size_t num_symbols = symbols.ElementCount();

  // Collect symbol information for relocations.
  SymbolMap symbol_map;
  BlockGraph::Block::ReferenceMap::const_iterator it =
      symbols_block_->references().begin();
  for (size_t i = 0; i < num_symbols; i += 1 + symbols[i].NumberOfAuxSymbols) {
    if (it != symbols_block_->references().end()) {
      size_t ref_symbol_index = it->first / sizeof(IMAGE_SYMBOL);
      DCHECK_LE(i, ref_symbol_index);
      DCHECK_GT(num_symbols, ref_symbol_index);

      if (i == ref_symbol_index) {
        // Resolved (referenced) symbol. We override previously inserted
        // symbols for the same reference; this gives priority to actual
        // symbols at offset zero, rather than section definition symbols.
        DCHECK_LT(0, symbols[i].SectionNumber);
        std::pair<BlockGraph::Block*, BlockGraph::Offset> ref_pair =
          std::make_pair(it->second.referenced(), it->second.base());
        symbol_map.insert(std::make_pair(ref_pair, i)).first->second = i;

        // Skip any other references for this symbol or its auxiliary
        // symbols.
        size_t next_index = i + 1 + symbols[i].NumberOfAuxSymbols;
        do {
          ++it;
        } while (it != symbols_block_->references().end() &&
                 it->first / sizeof(IMAGE_SYMBOL) < next_index);

        continue;
      }
    }

    // External or misc (unreferenced), that lies between references.
    DCHECK_GE(0, symbols[i].SectionNumber);
    std::pair<BlockGraph::Block*, BlockGraph::Offset> ref_pair =
        std::make_pair(symbols_block_, i * sizeof(symbols[i]));
    symbol_map.insert(std::make_pair(ref_pair, i));
  }
  DCHECK(it == symbols_block_->references().end());

  // Lay out section and relocation blocks.
  OrderedBlockGraph::SectionList::const_iterator section_it =
      ordered_graph.ordered_sections().begin();
  OrderedBlockGraph::SectionList::const_iterator section_end =
      ordered_graph.ordered_sections().end();
  size_t section_index = 0;
  for (; section_it != section_end; ++section_it) {
    BlockGraph::Section* section = (*section_it)->section();
    DCHECK(section != NULL);

    // Fill in common section header information.
    if (section_index >= file_header->NumberOfSections) {
      LOG(ERROR) << "Not enough space in headers block for "
                 << "so many sections (" << section_index << ").";
      return false;
    }
    IMAGE_SECTION_HEADER* header = &section_headers[section_index];

    std::memset(header, 0, sizeof(*header));
    std::strncpy(reinterpret_cast<char*>(header->Name),
                 section->name().c_str(),
                 arraysize(header->Name));
    header->Characteristics = section->characteristics();

    // Handle section data.
    if (!OpenSection(*section))
      return false;

    FileOffsetAddress section_start(cursor_.value());
    RelocVector relocs;

    // Lay out section blocks and collect relocations.
    OrderedBlockGraph::BlockList::const_iterator block_it =
        (*section_it)->ordered_blocks().begin();
    OrderedBlockGraph::BlockList::const_iterator block_end =
        (*section_it)->ordered_blocks().end();
    for (; block_it != block_end; ++block_it) {
      BlockGraph::Block* block = *block_it;
      DCHECK(block != NULL);
      DCHECK(block->type() == BlockGraph::CODE_BLOCK ||
             (block->attributes() &
              (BlockGraph::SECTION_CONTRIB | BlockGraph::COFF_BSS)) != 0);

      // Fix references.
      BlockGraph::Block::ReferenceMap::const_iterator ref_it =
          block->references().begin();
      for (; ref_it != block->references().end(); ++ref_it) {
        // Section blocks should only have relocations and function-relative
        // file pointers, represented as section offsets, thanks to
        // function-level linking.
        BlockGraph::Reference ref(ref_it->second);
        if ((ref.type() & BlockGraph::RELOC_REF_BIT) == 0 &&
            ref.type() != BlockGraph::SECTION_OFFSET_REF) {
            LOG(ERROR) << "Unexpected reference type " << ref.type()
                       << " in section " << section_index << ".";
            return false;
        }

        switch (ref.size()) {
          case sizeof(uint32_t):
            if (!WriteReferenceValue<uint32_t>(ref, ref_it->first, block))
              return false;
            break;
          case sizeof(uint16_t):
            if (!WriteReferenceValue<uint16_t>(ref, ref_it->first, block))
              return false;
            break;
          case sizeof(uint8_t):
            // TODO(chrisha): This is really a special 7-bit relocation; we do
            // not touch these, for now.
            break;
          default:
            LOG(ERROR) << "Unsupported relocation value size ("
                       << ref.size() << ").";
            return false;
        }
      }

      // Lay out and collect relocations.
      if (!LayoutBlock(block))
        return false;
      if (!AddRelocs(*block, symbol_map, &relocs))
        return false;
    }

    if (!CloseSection())
      return false;

    // Fix section header. We use section_index - 1, as the value has been
    // incremented already.
    const ImageLayout::SectionInfo& info =
        image_layout_->sections[section_index];
    if ((section->characteristics() & IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0) {
      // Normal section.
      header->PointerToRawData = section_start.value();
      header->SizeOfRawData = static_cast<DWORD>(info.data_size);
    } else {
      // BSS section. The COFF specifications state that SizeOfRawData
      // should be set to zero, but MSVC sets it to the size of the
      // uninitialized data.
      header->SizeOfRawData = static_cast<DWORD>(info.size);
    }
    DCHECK_EQ(header->Characteristics, info.characteristics);

    // Lay out relocations, if necessary.
    if (relocs.size() != 0) {
      size_t relocs_size = relocs.size() * sizeof(relocs[0]);
      BlockGraph::Block* relocs_block =
          image_layout_->blocks.graph()->AddBlock(BlockGraph::DATA_BLOCK,
                                                  relocs_size,
                                                  kNewRelocsBlockName);
      DCHECK(relocs_block != NULL);
      relocs_block->set_attribute(BlockGraph::COFF_RELOC_DATA);
      if (relocs_block->CopyData(relocs_size, &relocs[0]) == NULL)
        return false;

      // Fix relocation information in header.
      header->PointerToRelocations = cursor_.value();
      header->NumberOfRelocations = static_cast<WORD>(relocs.size());

      // Lay out the relocation block outside of the section.
      if (!LayoutBlockImpl(relocs_block))
        return false;
    }

    ++section_index;
  }

  if (section_index < file_header->NumberOfSections) {
    LOG(ERROR) << "Missing sections from ordered block graph ("
               << file_header->NumberOfSections << " expected vs "
               << section_index << " found).";
    return false;
  }

  return true;
}

bool CoffImageLayoutBuilder::LayoutSymbolAndStringTables(
    const OrderedBlockGraph& ordered_graph) {
  DCHECK(headers_block_ != NULL);
  DCHECK(symbols_block_ != NULL);
  DCHECK(strings_block_ != NULL);

  TypedBlock<IMAGE_FILE_HEADER> file_header;
  if (!file_header.Init(0, headers_block_)) {
    LOG(ERROR) << "Unable to cast file header.";
    return false;
  }

  TypedBlock<IMAGE_SYMBOL> symbols;
  if (!symbols.Init(0, symbols_block_)) {
    LOG(ERROR) << "Unable to cast symbol table.";
    return false;
  }

  file_header->PointerToSymbolTable = cursor_.value();
  file_header->NumberOfSymbols = static_cast<DWORD>(symbols.ElementCount());

  // Lay out the blocks.
  if (!LayoutBlockImpl(symbols_block_))
    return false;
  if (!LayoutBlockImpl(strings_block_))
    return false;

  // Compute the section index map, used to remap symbol section references.
  SectionIndexMap section_index_map;
  OrderedBlockGraph::SectionList::const_iterator section_it =
      ordered_graph.ordered_sections().begin();
  OrderedBlockGraph::SectionList::const_iterator section_end =
      ordered_graph.ordered_sections().end();
  size_t section_index = 0;
  for (; section_it != section_end; ++section_it) {
    BlockGraph::Section* section = (*section_it)->section();
    DCHECK(section != NULL);
    section_index_map.insert(std::make_pair(section->id(), section_index));
    ++section_index;
  }

  // Fix references.
  BlockGraph::Block::ReferenceMap::const_iterator it =
      symbols_block_->references().begin();
  for (; it != symbols_block_->references().end(); ++it) {
    size_t symbol_index = it->first / sizeof(IMAGE_SYMBOL);
    DCHECK_GT(file_header->NumberOfSymbols, symbol_index);

    switch (it->second.type()) {
      case BlockGraph::SECTION_REF: {
        DCHECK_EQ(2u, it->second.size());
        TypedBlock<uint16_t> section_number;
        if (!section_number.Init(it->first, symbols_block_)) {
          LOG(ERROR) << "Unable to cast reference.";
          return false;
        }

        SectionIndexMap::iterator section_index_it =
            section_index_map.find(it->second.referenced()->section());
        if (section_index_it == section_index_map.end()) {
          LOG(ERROR) << "Reference to unmapped section.";
          return false;
        }
        *section_number = static_cast<uint16_t>(section_index_it->second + 1);
        DCHECK_EQ(*section_number, symbols[symbol_index].SectionNumber);
        break;
      }

      case BlockGraph::SECTION_OFFSET_REF: {
        DCHECK_EQ(4u, it->second.size());
        TypedBlock<uint32_t> value;
        if (!value.Init(it->first, symbols_block_)) {
          LOG(ERROR) << "Unable to cast reference.";
          return false;
        }

        *value = it->second.offset();
        DCHECK_EQ(static_cast<size_t>(it->second.offset()),
                  symbols[symbol_index].Value);
        break;
      }

      default:
        LOG(ERROR) << "Unexpected reference type " << it->second.type()
                   << " in symbol table.";
        return false;
    }
  }

  return true;
}

bool CoffImageLayoutBuilder::RemoveOldRelocBlocks() {
  // Find blocks not mapped in the image layout, and ensure they are (old)
  // COFF relocation blocks; if not, that is an error.
  //
  // Relocation blocks found during this pass do not include new relocation
  // blocks (which must have been inserted into the image layout).
  BlockGraph::BlockMap& blocks =
      image_layout_->blocks.graph()->blocks_mutable();
  std::vector<BlockGraph::Block*> blocks_to_remove;

  BlockGraph::BlockMap::iterator it = blocks.begin();
  for (; it != blocks.end(); ++it) {
    if (!image_layout_->blocks.ContainsBlock(&it->second)) {
      if ((it->second.attributes() & BlockGraph::COFF_RELOC_DATA) == 0) {
        LOG(ERROR) << "Found unmapped block \"" << it->second.name()
                   << "\" in block graph; "
                   << "originally mapped at address " << it->second.addr()
                   << ".";
        return false;
      }
      blocks_to_remove.push_back(&it->second);
    }
  }

  // Remove old relocation blocks from the block graph.
  std::vector<BlockGraph::Block*>::iterator it_to_remove =
      blocks_to_remove.begin();
  for (; it_to_remove != blocks_to_remove.end(); ++it_to_remove) {
    if (!image_layout_->blocks.graph()->RemoveBlock(*it_to_remove)) {
      LOG(ERROR) << "Unable to remove block with ID " << (*it_to_remove)->id()
                 << " from the block graph.";
    }
  }

  DCHECK_EQ(image_layout_->blocks.size(),
            image_layout_->blocks.graph()->blocks().size());

  return true;
}

}  // namespace pe
