blob: e274c9d36463e6a57ab02da7bd639d924be17294 [file] [log] [blame]
// Copyright 2011 The Goma Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "elf_parser.h"
#ifdef __linux__
#include <elf.h>
#endif
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <sstream>
#include <string>
#include <vector>
#include <glog/logging.h>
#include <glog/stl_logging.h>
#include "scoped_fd.h"
using std::string;
namespace devtools_goma {
template <typename Ehdr, typename Phdr, typename Shdr, typename Dyn>
class ElfParserImpl : public ElfParser {
public:
ElfParserImpl(const string& filename, ScopedFd&& fd,
const char elfIdent[EI_NIDENT])
: ElfParser(),
filename_(filename),
fd_(std::move(fd)),
valid_(false),
use_program_header_(true),
dynamic_phdr_(nullptr),
strtab_shdr_(nullptr),
dynamic_shdr_(nullptr),
text_offset_(0) {
VLOG(1) << "Elf:" << filename;
memset(&ehdr_, 0, sizeof ehdr_);
memcpy(ehdr_.e_ident, elfIdent, EI_NIDENT);
int elf_class = elfIdent[EI_CLASS];
VLOG(1) << "elf_class=" << elf_class;
int elf_data = elfIdent[EI_DATA];
VLOG(1) << "elf_data=" << elf_data;
valid_ = (memcmp(elfIdent, ELFMAG, SELFMAG) == 0);
if (valid_) {
valid_ = elfIdent[EI_DATA] == ELFDATA2LSB;
LOG_IF(ERROR, !valid_) << "unsupported data encoding:"
<< elfIdent[EI_DATA];
}
CheckIdent();
}
~ElfParserImpl() override {
for (size_t i = 0; i < phdrs_.size(); ++i)
delete phdrs_[i];
for (size_t i = 0; i < shdrs_.size(); ++i)
delete shdrs_[i];
}
bool valid() const override { return valid_; }
void UseProgramHeader(bool use_program_header) override {
use_program_header_ = use_program_header;
}
bool ReadDynamicNeeded(std::vector<string>* needed) override {
VLOG(1) << "ReadDynamicNeeded:" << filename_;
if (!valid_) {
LOG(ERROR) << "not valid:" << filename_;
return false;
}
if (!ReadEhdr()) {
return false;
}
if (use_program_header_) {
if (!ReadPhdrs()) {
return false;
}
if (!ReadDynamicSegment()) {
return false;
}
} else {
if (!ReadShdrs()) {
return false;
}
if (!ReadDynamicSection()) {
return false;
}
}
if (!ReadDtStrtab()) {
return false;
}
if (dyntab_.empty()) {
LOG(ERROR) << "empty dyntab? " << filename_;
return false;
}
if (dt_strtab_.empty()) {
LOG(ERROR) << "empty dt_strtab? " << filename_;
return false;
}
ReadStringEntryInDynamic(DT_NEEDED, needed);
return true;
}
bool ReadDynamicNeededAndRpath(std::vector<string>* needed,
std::vector<string>* rpath) override {
if (!ReadDynamicNeeded(needed))
return false;
ReadStringEntryInDynamic(DT_RUNPATH, rpath);
// A loader checks DT_RPATH if and only if there are no DT_RUNPATH.
if (rpath->empty()) {
ReadStringEntryInDynamic(DT_RPATH, rpath);
}
return true;
}
private:
void CheckIdent();
bool ReadEhdr() {
if (!valid_)
return false;
if (read(fd_.fd(), reinterpret_cast<char*>(&ehdr_) + EI_NIDENT,
sizeof(Ehdr) - EI_NIDENT) != (sizeof(Ehdr) - EI_NIDENT)) {
PLOG(ERROR) << "read ehdr:" << filename_;
valid_ = false;
return false;
}
VLOG(1) << DumpEhdr(ehdr_);
return true;
}
bool ReadPhdrs() {
if (!valid_)
return false;
if (lseek(fd_.fd(), ehdr_.e_phoff, SEEK_SET) == static_cast<off_t>(-1)) {
PLOG(ERROR) << "seek phoff:" << ehdr_.e_phoff << " " << filename_;
valid_ = false;
return false;
}
for (int i = 0; i < ehdr_.e_phnum; ++i) {
Phdr* phdr = new Phdr;
if (read(fd_.fd(), reinterpret_cast<char*>(phdr), sizeof(Phdr)) !=
sizeof(Phdr)) {
PLOG(ERROR) << "read phdr:" << i << " " << filename_;
valid_ = false;
return false;
}
phdrs_.push_back(phdr);
VLOG(1) << i << ":" << DumpPhdr(*phdr);
switch (phdr->p_type) {
case PT_DYNAMIC:
LOG_IF(ERROR, dynamic_phdr_ != nullptr)
<< filename_ << " PT_DYNAMIC "
<< DumpPhdr(*dynamic_phdr_) << " " << DumpPhdr(*phdr);
dynamic_phdr_ = phdr;
break;
case PT_LOAD:
// The first segment, which contains dynstr, is being mapped
// in non-zero address. Update text_offset_ to adjust the
// offset of dynstr later.
if (phdr->p_offset == 0 && phdr->p_vaddr) {
LOG_IF(ERROR, ehdr_.e_type != ET_EXEC)
<< "Non zero vaddr for non EXEC ELF (" << ehdr_.e_type
<< "): " << DumpPhdr(*phdr);
text_offset_ = phdr->p_vaddr;
}
break;
default:
break;
}
}
return valid_;
}
bool ReadShdrs() {
if (!valid_)
return false;
if (lseek(fd_.fd(), ehdr_.e_shoff, SEEK_SET) == static_cast<off_t>(-1)) {
PLOG(ERROR) << "seek shoff:" << ehdr_.e_shoff << " " << filename_;
valid_ = false;
return false;
}
for (int i = 0; i < ehdr_.e_shnum; ++i) {
Shdr* shdr = new Shdr;
if (read(fd_.fd(), reinterpret_cast<char*>(shdr), sizeof(Shdr)) !=
sizeof(Shdr)) {
PLOG(ERROR) << "read shdr:" << i << " " << filename_;
valid_ = false;
return false;
}
shdrs_.push_back(shdr);
VLOG(1) << i << ":" << DumpShdr(*shdr);
// TODO: This cannot handle ET_EXEC as this doesn't
// update text_offset_.
switch (shdr->sh_type) {
case SHT_STRTAB:
// May have several STRTAB. Last one is ok?
strtab_shdr_ = shdr;
break;
case SHT_DYNAMIC:
LOG_IF(ERROR, dynamic_shdr_ != nullptr)
<< filename_ << " SHT_DYNAMIC "
<< DumpShdr(*dynamic_shdr_) << " " << DumpShdr(*shdr);
dynamic_shdr_ = shdr;
break;
default: break;
}
}
if (strtab_shdr_ != nullptr)
ReadStrtab();
return valid_;
}
bool ReadStrtab() {
if (!valid_)
return false;
if (strtab_shdr_ == nullptr)
return false;
VLOG(1) << "strtab:" << DumpShdr(*strtab_shdr_);
return ReadSectionData(*strtab_shdr_, &strtab_);
}
bool ReadDynamicSegment() {
if (!valid_)
return false;
if (dynamic_phdr_ == nullptr)
return false;
VLOG(1) << "dynamic:" << DumpPhdr(*dynamic_phdr_);
return ReadSegmentData(*dynamic_phdr_, &dyntab_);
}
bool ReadDynamicSection() {
if (!valid_)
return false;
if (dynamic_shdr_ == nullptr)
return false;
VLOG(1) << "dynamic:" << DumpShdr(*dynamic_shdr_);
return ReadSectionData(*dynamic_shdr_, &dyntab_);
}
bool ReadSegmentData(const Phdr& phdr, string* data) {
VLOG(1) << "read:" << DumpPhdr(phdr);
return ReadFromFile(phdr.p_offset, phdr.p_filesz, data);
}
bool ReadSectionData(const Shdr& shdr, string* data) {
VLOG(1) << "read:" << DumpShdr(shdr);
return ReadFromFile(shdr.sh_offset, shdr.sh_size, data);
}
bool ReadFromFile(off_t offset, size_t size, string* data) {
if (!valid_)
return false;
if (lseek(fd_.fd(), offset, SEEK_SET) == static_cast<off_t>(-1)) {
PLOG(ERROR) << "seek:" << offset << " " << filename_;
valid_ = false;
return false;
}
data->resize(size);
if (read(fd_.fd(), const_cast<char*>(data->data()), size) !=
static_cast<ssize_t>(size)) {
PLOG(ERROR) << "read data:" << size << " " << filename_;
valid_ = false;
return false;
}
return true;
}
bool ReadDtStrtab() {
if (!valid_)
return false;
if (dyntab_.empty())
return false;
off_t off = 0;
size_t size = 0;
for (size_t pos = 0; pos < dyntab_.size(); pos += sizeof(Dyn)) {
const Dyn* dyn = reinterpret_cast<const Dyn*>(dyntab_.data() + pos);
VLOG(2) << DumpDyn(*dyn);
if (dyn->d_tag == DT_STRTAB)
off = dyn->d_un.d_ptr - text_offset_;
else if (dyn->d_tag == DT_STRSZ)
size = dyn->d_un.d_val;
}
VLOG(1) << "dt_strtab: off=" << off << " size=" << size;
return ReadFromFile(off, size, &dt_strtab_);
}
void ReadStringEntryInDynamic(int type, std::vector<string>* out) {
for (size_t pos = 0; pos < dyntab_.size(); pos += sizeof(Dyn)) {
const Dyn* dyn = reinterpret_cast<const Dyn*>(dyntab_.data() + pos);
if (dyn->d_tag == type) {
if (dyn->d_un.d_val > dt_strtab_.size()) {
LOG(ERROR) << "out of range dt_strtab:" << dyn->d_un.d_val
<< " dt_strtab.size=" << dt_strtab_.size();
continue;
}
out->push_back(dt_strtab_.data() + dyn->d_un.d_val);
}
}
}
string DumpEhdr(const Ehdr& ehdr) {
std::stringstream ss;
ss << "Elf:";
ss << " type:" << ehdr.e_type;
ss << " machine:" << ehdr.e_machine;
ss << " version:" << ehdr.e_version;
ss << " entry:" << ehdr.e_entry;
ss << " phoff:" << ehdr.e_phoff;
ss << " shoff:" << ehdr.e_shoff;
ss << " flags:" << ehdr.e_flags;
ss << " ehsize:" << ehdr.e_ehsize;
ss << " phentsize:" << ehdr.e_phentsize;
ss << " phnum:" << ehdr.e_phnum;
ss << " shentsize:" << ehdr.e_shentsize;
ss << " shnum:" << ehdr.e_shnum;
ss << " shstrndx:" << ehdr.e_shstrndx;
return ss.str();
}
string DumpPhdr(const Phdr& phdr) {
std::stringstream ss;
ss << "Program:";
ss << " type:" << phdr.p_type;
ss << " offset:" << phdr.p_offset;
ss << " vaddr:" << phdr.p_vaddr;
ss << " paddr:" << phdr.p_paddr;
ss << " filesz:" << phdr.p_filesz;
ss << " memsz:" << phdr.p_memsz;
ss << " flags:" << phdr.p_flags;
ss << " align:" << phdr.p_align;
return ss.str();
}
string DumpShdr(const Shdr& shdr) {
std::stringstream ss;
ss << "Section:";
ss << " name:" << shdr.sh_name;
if (shdr.sh_name < strtab_.size()) {
ss << "'" << (strtab_.data() + shdr.sh_name) << "'";
}
ss << " type:" << shdr.sh_type;
ss << " flag:" << shdr.sh_flags;
ss << " addr:" << shdr.sh_offset;
ss << " offset:" << shdr.sh_size;
ss << " size:" << shdr.sh_size;
ss << " link:" << shdr.sh_link;
ss << " info:" << shdr.sh_info;
ss << " addralign:" << shdr.sh_addralign;
ss << " entsize:" << shdr.sh_entsize;
return ss.str();
}
string DumpDyn(const Dyn& dyn) {
std::stringstream ss;
ss << "Dyn:";
ss << " tag:" << dyn.d_tag;
ss << " val:" << dyn.d_un.d_val << " ptr:" << dyn.d_un.d_ptr;
return ss.str();
}
const string filename_;
ScopedFd fd_;
bool valid_;
bool use_program_header_;
Ehdr ehdr_;
std::vector<Phdr*> phdrs_;
Phdr* dynamic_phdr_;
std::vector<Shdr*> shdrs_;
Shdr* strtab_shdr_;
string strtab_;
Shdr* dynamic_shdr_;
string dyntab_;
string dt_strtab_;
size_t text_offset_;
DISALLOW_COPY_AND_ASSIGN(ElfParserImpl);
};
template<>
void ElfParserImpl<Elf32_Ehdr,
Elf32_Phdr, Elf32_Shdr, Elf32_Dyn>::CheckIdent() {
if (valid_) {
valid_ = (ehdr_.e_ident[EI_CLASS] == ELFCLASS32);
LOG_IF(ERROR, !valid_) << "not elf class32";
}
}
template<>
void ElfParserImpl<Elf64_Ehdr,
Elf64_Phdr, Elf64_Shdr, Elf64_Dyn>::CheckIdent() {
if (valid_) {
valid_ = (ehdr_.e_ident[EI_CLASS] = ELFCLASS64);
LOG_IF(ERROR, !valid_) << "not elf class64";
}
}
static ScopedFd OpenElf(const string& filename, char *elfIdent) {
ScopedFd fd(ScopedFd::OpenForRead(filename));
if (!fd.valid()) {
PLOG(WARNING) << "open:" << filename;
return ScopedFd();
}
if (read(fd.fd(), elfIdent, EI_NIDENT) != EI_NIDENT) {
PLOG(WARNING) << "read elf ident:" << filename;
return ScopedFd();
}
if (memcmp(elfIdent, ELFMAG, SELFMAG) != 0) {
LOG(WARNING) << "not elf: " << filename
<< " ident:" << string(elfIdent, SELFMAG);
return ScopedFd();
}
return fd;
}
/* static */
std::unique_ptr<ElfParser> ElfParser::NewElfParser(const string& filename) {
char elfIdent[EI_NIDENT];
ScopedFd fd(OpenElf(filename.c_str(), elfIdent));
if (!fd.valid()) {
PLOG(ERROR) << "open elf:" << filename;
return nullptr;
}
switch (elfIdent[EI_CLASS]) {
case ELFCLASS32:
return std::unique_ptr<ElfParser>(
new ElfParserImpl<Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr, Elf32_Dyn>(
filename, std::move(fd), elfIdent));
case ELFCLASS64:
return std::unique_ptr<ElfParser>(
new ElfParserImpl<Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr, Elf64_Dyn>(
filename, std::move(fd), elfIdent));
default:
LOG(ERROR) << "Unknown elf class:" << elfIdent[EI_CLASS];
return nullptr;
}
}
/* static */
bool ElfParser::IsElf(const string& filename) {
char elfIdent[EI_NIDENT];
ScopedFd fd(OpenElf(filename.c_str(), elfIdent));
return fd.valid();
}
} // namespace devtools_goma
#ifdef TEST
#include <cstdlib>
#include <iostream>
using devtools_goma::ElfParser;
int main(int argc, char* argv[]) {
if (argc != 2) {
std::cout << "Usage: " << argv[0] << " <filename>" << std::endl;
exit(EXIT_FAILURE);
}
google::InitGoogleLogging(argv[0]);
std::unique_ptr<ElfParser> elf = ElfParser::NewElfParser(argv[1]);
CHECK(elf != nullptr);
CHECK(elf->valid());
std::vector<string> needed, rpath;
if (!elf->ReadDynamicNeededAndRpath(&needed, &rpath)) {
LOG(FATAL) << "ReadDynamicNeededAndRpath";
}
for (const auto& it : needed) {
std::cout << "NEEDED:" << it << std::endl;
}
for (const auto& it : rpath) {
std::cout << "RPATH:" << it << std::endl;
}
exit(0);
}
#endif