blob: 258c0b520a3d4f6ecfcb9f864d9bf9668679fcf7 [file] [log] [blame]
//===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the MachO-specific dumper for llvm-objdump.
//
//===----------------------------------------------------------------------===//
#include "llvm-objdump.h"
#include "llvm-c/Disassembler.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Config/config.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MachO.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstring>
#include <system_error>
#if HAVE_CXXABI_H
#include <cxxabi.h>
#endif
using namespace llvm;
using namespace object;
static cl::opt<bool>
UseDbg("g",
cl::desc("Print line information from debug info if available"));
static cl::opt<std::string> DSYMFile("dsym",
cl::desc("Use .dSYM file for debug info"));
static cl::opt<bool> FullLeadingAddr("full-leading-addr",
cl::desc("Print full leading address"));
static cl::opt<bool> NoLeadingAddr("no-leading-addr",
cl::desc("Print no leading address"));
cl::opt<bool> llvm::UniversalHeaders("universal-headers",
cl::desc("Print Mach-O universal headers "
"(requires -macho)"));
cl::opt<bool>
llvm::ArchiveHeaders("archive-headers",
cl::desc("Print archive headers for Mach-O archives "
"(requires -macho)"));
cl::opt<bool>
ArchiveMemberOffsets("archive-member-offsets",
cl::desc("Print the offset to each archive member for "
"Mach-O archives (requires -macho and "
"-archive-headers)"));
cl::opt<bool>
llvm::IndirectSymbols("indirect-symbols",
cl::desc("Print indirect symbol table for Mach-O "
"objects (requires -macho)"));
cl::opt<bool>
llvm::DataInCode("data-in-code",
cl::desc("Print the data in code table for Mach-O objects "
"(requires -macho)"));
cl::opt<bool>
llvm::LinkOptHints("link-opt-hints",
cl::desc("Print the linker optimization hints for "
"Mach-O objects (requires -macho)"));
cl::opt<bool>
llvm::InfoPlist("info-plist",
cl::desc("Print the info plist section as strings for "
"Mach-O objects (requires -macho)"));
cl::opt<bool>
llvm::DylibsUsed("dylibs-used",
cl::desc("Print the shared libraries used for linked "
"Mach-O files (requires -macho)"));
cl::opt<bool>
llvm::DylibId("dylib-id",
cl::desc("Print the shared library's id for the dylib Mach-O "
"file (requires -macho)"));
cl::opt<bool>
llvm::NonVerbose("non-verbose",
cl::desc("Print the info for Mach-O objects in "
"non-verbose or numeric form (requires -macho)"));
cl::opt<bool>
llvm::ObjcMetaData("objc-meta-data",
cl::desc("Print the Objective-C runtime meta data for "
"Mach-O files (requires -macho)"));
cl::opt<std::string> llvm::DisSymName(
"dis-symname",
cl::desc("disassemble just this symbol's instructions (requires -macho"));
static cl::opt<bool> NoSymbolicOperands(
"no-symbolic-operands",
cl::desc("do not symbolic operands when disassembling (requires -macho)"));
static cl::list<std::string>
ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
cl::ZeroOrMore);
bool ArchAll = false;
static std::string ThumbTripleName;
static const Target *GetTarget(const MachOObjectFile *MachOObj,
const char **McpuDefault,
const Target **ThumbTarget) {
// Figure out the target triple.
if (TripleName.empty()) {
llvm::Triple TT("unknown-unknown-unknown");
llvm::Triple ThumbTriple = Triple();
TT = MachOObj->getArch(McpuDefault, &ThumbTriple);
TripleName = TT.str();
ThumbTripleName = ThumbTriple.str();
}
// Get the target specific parser.
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
if (TheTarget && ThumbTripleName.empty())
return TheTarget;
*ThumbTarget = TargetRegistry::lookupTarget(ThumbTripleName, Error);
if (*ThumbTarget)
return TheTarget;
errs() << "llvm-objdump: error: unable to get target for '";
if (!TheTarget)
errs() << TripleName;
else
errs() << ThumbTripleName;
errs() << "', see --version and --triple.\n";
return nullptr;
}
struct SymbolSorter {
bool operator()(const SymbolRef &A, const SymbolRef &B) {
uint64_t AAddr = (A.getType() != SymbolRef::ST_Function) ? 0 : A.getValue();
uint64_t BAddr = (B.getType() != SymbolRef::ST_Function) ? 0 : B.getValue();
return AAddr < BAddr;
}
};
// Types for the storted data in code table that is built before disassembly
// and the predicate function to sort them.
typedef std::pair<uint64_t, DiceRef> DiceTableEntry;
typedef std::vector<DiceTableEntry> DiceTable;
typedef DiceTable::iterator dice_table_iterator;
// This is used to search for a data in code table entry for the PC being
// disassembled. The j parameter has the PC in j.first. A single data in code
// table entry can cover many bytes for each of its Kind's. So if the offset,
// aka the i.first value, of the data in code table entry plus its Length
// covers the PC being searched for this will return true. If not it will
// return false.
static bool compareDiceTableEntries(const DiceTableEntry &i,
const DiceTableEntry &j) {
uint16_t Length;
i.second.getLength(Length);
return j.first >= i.first && j.first < i.first + Length;
}
static uint64_t DumpDataInCode(const uint8_t *bytes, uint64_t Length,
unsigned short Kind) {
uint32_t Value, Size = 1;
switch (Kind) {
default:
case MachO::DICE_KIND_DATA:
if (Length >= 4) {
if (!NoShowRawInsn)
dumpBytes(makeArrayRef(bytes, 4), outs());
Value = bytes[3] << 24 | bytes[2] << 16 | bytes[1] << 8 | bytes[0];
outs() << "\t.long " << Value;
Size = 4;
} else if (Length >= 2) {
if (!NoShowRawInsn)
dumpBytes(makeArrayRef(bytes, 2), outs());
Value = bytes[1] << 8 | bytes[0];
outs() << "\t.short " << Value;
Size = 2;
} else {
if (!NoShowRawInsn)
dumpBytes(makeArrayRef(bytes, 2), outs());
Value = bytes[0];
outs() << "\t.byte " << Value;
Size = 1;
}
if (Kind == MachO::DICE_KIND_DATA)
outs() << "\t@ KIND_DATA\n";
else
outs() << "\t@ data in code kind = " << Kind << "\n";
break;
case MachO::DICE_KIND_JUMP_TABLE8:
if (!NoShowRawInsn)
dumpBytes(makeArrayRef(bytes, 1), outs());
Value = bytes[0];
outs() << "\t.byte " << format("%3u", Value) << "\t@ KIND_JUMP_TABLE8\n";
Size = 1;
break;
case MachO::DICE_KIND_JUMP_TABLE16:
if (!NoShowRawInsn)
dumpBytes(makeArrayRef(bytes, 2), outs());
Value = bytes[1] << 8 | bytes[0];
outs() << "\t.short " << format("%5u", Value & 0xffff)
<< "\t@ KIND_JUMP_TABLE16\n";
Size = 2;
break;
case MachO::DICE_KIND_JUMP_TABLE32:
case MachO::DICE_KIND_ABS_JUMP_TABLE32:
if (!NoShowRawInsn)
dumpBytes(makeArrayRef(bytes, 4), outs());
Value = bytes[3] << 24 | bytes[2] << 16 | bytes[1] << 8 | bytes[0];
outs() << "\t.long " << Value;
if (Kind == MachO::DICE_KIND_JUMP_TABLE32)
outs() << "\t@ KIND_JUMP_TABLE32\n";
else
outs() << "\t@ KIND_ABS_JUMP_TABLE32\n";
Size = 4;
break;
}
return Size;
}
static void getSectionsAndSymbols(MachOObjectFile *MachOObj,
std::vector<SectionRef> &Sections,
std::vector<SymbolRef> &Symbols,
SmallVectorImpl<uint64_t> &FoundFns,
uint64_t &BaseSegmentAddress) {
for (const SymbolRef &Symbol : MachOObj->symbols()) {
ErrorOr<StringRef> SymName = Symbol.getName();
if (std::error_code EC = SymName.getError())
report_fatal_error(EC.message());
if (!SymName->startswith("ltmp"))
Symbols.push_back(Symbol);
}
for (const SectionRef &Section : MachOObj->sections()) {
StringRef SectName;
Section.getName(SectName);
Sections.push_back(Section);
}
bool BaseSegmentAddressSet = false;
for (const auto &Command : MachOObj->load_commands()) {
if (Command.C.cmd == MachO::LC_FUNCTION_STARTS) {
// We found a function starts segment, parse the addresses for later
// consumption.
MachO::linkedit_data_command LLC =
MachOObj->getLinkeditDataLoadCommand(Command);
MachOObj->ReadULEB128s(LLC.dataoff, FoundFns);
} else if (Command.C.cmd == MachO::LC_SEGMENT) {
MachO::segment_command SLC = MachOObj->getSegmentLoadCommand(Command);
StringRef SegName = SLC.segname;
if (!BaseSegmentAddressSet && SegName != "__PAGEZERO") {
BaseSegmentAddressSet = true;
BaseSegmentAddress = SLC.vmaddr;
}
}
}
}
static void PrintIndirectSymbolTable(MachOObjectFile *O, bool verbose,
uint32_t n, uint32_t count,
uint32_t stride, uint64_t addr) {
MachO::dysymtab_command Dysymtab = O->getDysymtabLoadCommand();
uint32_t nindirectsyms = Dysymtab.nindirectsyms;
if (n > nindirectsyms)
outs() << " (entries start past the end of the indirect symbol "
"table) (reserved1 field greater than the table size)";
else if (n + count > nindirectsyms)
outs() << " (entries extends past the end of the indirect symbol "
"table)";
outs() << "\n";
uint32_t cputype = O->getHeader().cputype;
if (cputype & MachO::CPU_ARCH_ABI64)
outs() << "address index";
else
outs() << "address index";
if (verbose)
outs() << " name\n";
else
outs() << "\n";
for (uint32_t j = 0; j < count && n + j < nindirectsyms; j++) {
if (cputype & MachO::CPU_ARCH_ABI64)
outs() << format("0x%016" PRIx64, addr + j * stride) << " ";
else
outs() << format("0x%08" PRIx32, addr + j * stride) << " ";
MachO::dysymtab_command Dysymtab = O->getDysymtabLoadCommand();
uint32_t indirect_symbol = O->getIndirectSymbolTableEntry(Dysymtab, n + j);
if (indirect_symbol == MachO::INDIRECT_SYMBOL_LOCAL) {
outs() << "LOCAL\n";
continue;
}
if (indirect_symbol ==
(MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS)) {
outs() << "LOCAL ABSOLUTE\n";
continue;
}
if (indirect_symbol == MachO::INDIRECT_SYMBOL_ABS) {
outs() << "ABSOLUTE\n";
continue;
}
outs() << format("%5u ", indirect_symbol);
if (verbose) {
MachO::symtab_command Symtab = O->getSymtabLoadCommand();
if (indirect_symbol < Symtab.nsyms) {
symbol_iterator Sym = O->getSymbolByIndex(indirect_symbol);
SymbolRef Symbol = *Sym;
ErrorOr<StringRef> SymName = Symbol.getName();
if (std::error_code EC = SymName.getError())
report_fatal_error(EC.message());
outs() << *SymName;
} else {
outs() << "?";
}
}
outs() << "\n";
}
}
static void PrintIndirectSymbols(MachOObjectFile *O, bool verbose) {
for (const auto &Load : O->load_commands()) {
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 Seg = O->getSegment64LoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
MachO::section_64 Sec = O->getSection64(Load, J);
uint32_t section_type = Sec.flags & MachO::SECTION_TYPE;
if (section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS ||
section_type == MachO::S_LAZY_SYMBOL_POINTERS ||
section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS ||
section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS ||
section_type == MachO::S_SYMBOL_STUBS) {
uint32_t stride;
if (section_type == MachO::S_SYMBOL_STUBS)
stride = Sec.reserved2;
else
stride = 8;
if (stride == 0) {
outs() << "Can't print indirect symbols for (" << Sec.segname << ","
<< Sec.sectname << ") "
<< "(size of stubs in reserved2 field is zero)\n";
continue;
}
uint32_t count = Sec.size / stride;
outs() << "Indirect symbols for (" << Sec.segname << ","
<< Sec.sectname << ") " << count << " entries";
uint32_t n = Sec.reserved1;
PrintIndirectSymbolTable(O, verbose, n, count, stride, Sec.addr);
}
}
} else if (Load.C.cmd == MachO::LC_SEGMENT) {
MachO::segment_command Seg = O->getSegmentLoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
MachO::section Sec = O->getSection(Load, J);
uint32_t section_type = Sec.flags & MachO::SECTION_TYPE;
if (section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS ||
section_type == MachO::S_LAZY_SYMBOL_POINTERS ||
section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS ||
section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS ||
section_type == MachO::S_SYMBOL_STUBS) {
uint32_t stride;
if (section_type == MachO::S_SYMBOL_STUBS)
stride = Sec.reserved2;
else
stride = 4;
if (stride == 0) {
outs() << "Can't print indirect symbols for (" << Sec.segname << ","
<< Sec.sectname << ") "
<< "(size of stubs in reserved2 field is zero)\n";
continue;
}
uint32_t count = Sec.size / stride;
outs() << "Indirect symbols for (" << Sec.segname << ","
<< Sec.sectname << ") " << count << " entries";
uint32_t n = Sec.reserved1;
PrintIndirectSymbolTable(O, verbose, n, count, stride, Sec.addr);
}
}
}
}
}
static void PrintDataInCodeTable(MachOObjectFile *O, bool verbose) {
MachO::linkedit_data_command DIC = O->getDataInCodeLoadCommand();
uint32_t nentries = DIC.datasize / sizeof(struct MachO::data_in_code_entry);
outs() << "Data in code table (" << nentries << " entries)\n";
outs() << "offset length kind\n";
for (dice_iterator DI = O->begin_dices(), DE = O->end_dices(); DI != DE;
++DI) {
uint32_t Offset;
DI->getOffset(Offset);
outs() << format("0x%08" PRIx32, Offset) << " ";
uint16_t Length;
DI->getLength(Length);
outs() << format("%6u", Length) << " ";
uint16_t Kind;
DI->getKind(Kind);
if (verbose) {
switch (Kind) {
case MachO::DICE_KIND_DATA:
outs() << "DATA";
break;
case MachO::DICE_KIND_JUMP_TABLE8:
outs() << "JUMP_TABLE8";
break;
case MachO::DICE_KIND_JUMP_TABLE16:
outs() << "JUMP_TABLE16";
break;
case MachO::DICE_KIND_JUMP_TABLE32:
outs() << "JUMP_TABLE32";
break;
case MachO::DICE_KIND_ABS_JUMP_TABLE32:
outs() << "ABS_JUMP_TABLE32";
break;
default:
outs() << format("0x%04" PRIx32, Kind);
break;
}
} else
outs() << format("0x%04" PRIx32, Kind);
outs() << "\n";
}
}
static void PrintLinkOptHints(MachOObjectFile *O) {
MachO::linkedit_data_command LohLC = O->getLinkOptHintsLoadCommand();
const char *loh = O->getData().substr(LohLC.dataoff, 1).data();
uint32_t nloh = LohLC.datasize;
outs() << "Linker optimiztion hints (" << nloh << " total bytes)\n";
for (uint32_t i = 0; i < nloh;) {
unsigned n;
uint64_t identifier = decodeULEB128((const uint8_t *)(loh + i), &n);
i += n;
outs() << " identifier " << identifier << " ";
if (i >= nloh)
return;
switch (identifier) {
case 1:
outs() << "AdrpAdrp\n";
break;
case 2:
outs() << "AdrpLdr\n";
break;
case 3:
outs() << "AdrpAddLdr\n";
break;
case 4:
outs() << "AdrpLdrGotLdr\n";
break;
case 5:
outs() << "AdrpAddStr\n";
break;
case 6:
outs() << "AdrpLdrGotStr\n";
break;
case 7:
outs() << "AdrpAdd\n";
break;
case 8:
outs() << "AdrpLdrGot\n";
break;
default:
outs() << "Unknown identifier value\n";
break;
}
uint64_t narguments = decodeULEB128((const uint8_t *)(loh + i), &n);
i += n;
outs() << " narguments " << narguments << "\n";
if (i >= nloh)
return;
for (uint32_t j = 0; j < narguments; j++) {
uint64_t value = decodeULEB128((const uint8_t *)(loh + i), &n);
i += n;
outs() << "\tvalue " << format("0x%" PRIx64, value) << "\n";
if (i >= nloh)
return;
}
}
}
static void PrintDylibs(MachOObjectFile *O, bool JustId) {
unsigned Index = 0;
for (const auto &Load : O->load_commands()) {
if ((JustId && Load.C.cmd == MachO::LC_ID_DYLIB) ||
(!JustId && (Load.C.cmd == MachO::LC_ID_DYLIB ||
Load.C.cmd == MachO::LC_LOAD_DYLIB ||
Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB ||
Load.C.cmd == MachO::LC_REEXPORT_DYLIB ||
Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB ||
Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB))) {
MachO::dylib_command dl = O->getDylibIDLoadCommand(Load);
if (dl.dylib.name < dl.cmdsize) {
const char *p = (const char *)(Load.Ptr) + dl.dylib.name;
if (JustId)
outs() << p << "\n";
else {
outs() << "\t" << p;
outs() << " (compatibility version "
<< ((dl.dylib.compatibility_version >> 16) & 0xffff) << "."
<< ((dl.dylib.compatibility_version >> 8) & 0xff) << "."
<< (dl.dylib.compatibility_version & 0xff) << ",";
outs() << " current version "
<< ((dl.dylib.current_version >> 16) & 0xffff) << "."
<< ((dl.dylib.current_version >> 8) & 0xff) << "."
<< (dl.dylib.current_version & 0xff) << ")\n";
}
} else {
outs() << "\tBad offset (" << dl.dylib.name << ") for name of ";
if (Load.C.cmd == MachO::LC_ID_DYLIB)
outs() << "LC_ID_DYLIB ";
else if (Load.C.cmd == MachO::LC_LOAD_DYLIB)
outs() << "LC_LOAD_DYLIB ";
else if (Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB)
outs() << "LC_LOAD_WEAK_DYLIB ";
else if (Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB)
outs() << "LC_LAZY_LOAD_DYLIB ";
else if (Load.C.cmd == MachO::LC_REEXPORT_DYLIB)
outs() << "LC_REEXPORT_DYLIB ";
else if (Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB)
outs() << "LC_LOAD_UPWARD_DYLIB ";
else
outs() << "LC_??? ";
outs() << "command " << Index++ << "\n";
}
}
}
}
typedef DenseMap<uint64_t, StringRef> SymbolAddressMap;
static void CreateSymbolAddressMap(MachOObjectFile *O,
SymbolAddressMap *AddrMap) {
// Create a map of symbol addresses to symbol names.
for (const SymbolRef &Symbol : O->symbols()) {
SymbolRef::Type ST = Symbol.getType();
if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
ST == SymbolRef::ST_Other) {
uint64_t Address = Symbol.getValue();
ErrorOr<StringRef> SymNameOrErr = Symbol.getName();
if (std::error_code EC = SymNameOrErr.getError())
report_fatal_error(EC.message());
StringRef SymName = *SymNameOrErr;
if (!SymName.startswith(".objc"))
(*AddrMap)[Address] = SymName;
}
}
}
// GuessSymbolName is passed the address of what might be a symbol and a
// pointer to the SymbolAddressMap. It returns the name of a symbol
// with that address or nullptr if no symbol is found with that address.
static const char *GuessSymbolName(uint64_t value, SymbolAddressMap *AddrMap) {
const char *SymbolName = nullptr;
// A DenseMap can't lookup up some values.
if (value != 0xffffffffffffffffULL && value != 0xfffffffffffffffeULL) {
StringRef name = AddrMap->lookup(value);
if (!name.empty())
SymbolName = name.data();
}
return SymbolName;
}
static void DumpCstringChar(const char c) {
char p[2];
p[0] = c;
p[1] = '\0';
outs().write_escaped(p);
}
static void DumpCstringSection(MachOObjectFile *O, const char *sect,
uint32_t sect_size, uint64_t sect_addr,
bool print_addresses) {
for (uint32_t i = 0; i < sect_size; i++) {
if (print_addresses) {
if (O->is64Bit())
outs() << format("%016" PRIx64, sect_addr + i) << " ";
else
outs() << format("%08" PRIx64, sect_addr + i) << " ";
}
for (; i < sect_size && sect[i] != '\0'; i++)
DumpCstringChar(sect[i]);
if (i < sect_size && sect[i] == '\0')
outs() << "\n";
}
}
static void DumpLiteral4(uint32_t l, float f) {
outs() << format("0x%08" PRIx32, l);
if ((l & 0x7f800000) != 0x7f800000)
outs() << format(" (%.16e)\n", f);
else {
if (l == 0x7f800000)
outs() << " (+Infinity)\n";
else if (l == 0xff800000)
outs() << " (-Infinity)\n";
else if ((l & 0x00400000) == 0x00400000)
outs() << " (non-signaling Not-a-Number)\n";
else
outs() << " (signaling Not-a-Number)\n";
}
}
static void DumpLiteral4Section(MachOObjectFile *O, const char *sect,
uint32_t sect_size, uint64_t sect_addr,
bool print_addresses) {
for (uint32_t i = 0; i < sect_size; i += sizeof(float)) {
if (print_addresses) {
if (O->is64Bit())
outs() << format("%016" PRIx64, sect_addr + i) << " ";
else
outs() << format("%08" PRIx64, sect_addr + i) << " ";
}
float f;
memcpy(&f, sect + i, sizeof(float));
if (O->isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(f);
uint32_t l;
memcpy(&l, sect + i, sizeof(uint32_t));
if (O->isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(l);
DumpLiteral4(l, f);
}
}
static void DumpLiteral8(MachOObjectFile *O, uint32_t l0, uint32_t l1,
double d) {
outs() << format("0x%08" PRIx32, l0) << " " << format("0x%08" PRIx32, l1);
uint32_t Hi, Lo;
Hi = (O->isLittleEndian()) ? l1 : l0;
Lo = (O->isLittleEndian()) ? l0 : l1;
// Hi is the high word, so this is equivalent to if(isfinite(d))
if ((Hi & 0x7ff00000) != 0x7ff00000)
outs() << format(" (%.16e)\n", d);
else {
if (Hi == 0x7ff00000 && Lo == 0)
outs() << " (+Infinity)\n";
else if (Hi == 0xfff00000 && Lo == 0)
outs() << " (-Infinity)\n";
else if ((Hi & 0x00080000) == 0x00080000)
outs() << " (non-signaling Not-a-Number)\n";
else
outs() << " (signaling Not-a-Number)\n";
}
}
static void DumpLiteral8Section(MachOObjectFile *O, const char *sect,
uint32_t sect_size, uint64_t sect_addr,
bool print_addresses) {
for (uint32_t i = 0; i < sect_size; i += sizeof(double)) {
if (print_addresses) {
if (O->is64Bit())
outs() << format("%016" PRIx64, sect_addr + i) << " ";
else
outs() << format("%08" PRIx64, sect_addr + i) << " ";
}
double d;
memcpy(&d, sect + i, sizeof(double));
if (O->isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(d);
uint32_t l0, l1;
memcpy(&l0, sect + i, sizeof(uint32_t));
memcpy(&l1, sect + i + sizeof(uint32_t), sizeof(uint32_t));
if (O->isLittleEndian() != sys::IsLittleEndianHost) {
sys::swapByteOrder(l0);
sys::swapByteOrder(l1);
}
DumpLiteral8(O, l0, l1, d);
}
}
static void DumpLiteral16(uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3) {
outs() << format("0x%08" PRIx32, l0) << " ";
outs() << format("0x%08" PRIx32, l1) << " ";
outs() << format("0x%08" PRIx32, l2) << " ";
outs() << format("0x%08" PRIx32, l3) << "\n";
}
static void DumpLiteral16Section(MachOObjectFile *O, const char *sect,
uint32_t sect_size, uint64_t sect_addr,
bool print_addresses) {
for (uint32_t i = 0; i < sect_size; i += 16) {
if (print_addresses) {
if (O->is64Bit())
outs() << format("%016" PRIx64, sect_addr + i) << " ";
else
outs() << format("%08" PRIx64, sect_addr + i) << " ";
}
uint32_t l0, l1, l2, l3;
memcpy(&l0, sect + i, sizeof(uint32_t));
memcpy(&l1, sect + i + sizeof(uint32_t), sizeof(uint32_t));
memcpy(&l2, sect + i + 2 * sizeof(uint32_t), sizeof(uint32_t));
memcpy(&l3, sect + i + 3 * sizeof(uint32_t), sizeof(uint32_t));
if (O->isLittleEndian() != sys::IsLittleEndianHost) {
sys::swapByteOrder(l0);
sys::swapByteOrder(l1);
sys::swapByteOrder(l2);
sys::swapByteOrder(l3);
}
DumpLiteral16(l0, l1, l2, l3);
}
}
static void DumpLiteralPointerSection(MachOObjectFile *O,
const SectionRef &Section,
const char *sect, uint32_t sect_size,
uint64_t sect_addr,
bool print_addresses) {
// Collect the literal sections in this Mach-O file.
std::vector<SectionRef> LiteralSections;
for (const SectionRef &Section : O->sections()) {
DataRefImpl Ref = Section.getRawDataRefImpl();
uint32_t section_type;
if (O->is64Bit()) {
const MachO::section_64 Sec = O->getSection64(Ref);
section_type = Sec.flags & MachO::SECTION_TYPE;
} else {
const MachO::section Sec = O->getSection(Ref);
section_type = Sec.flags & MachO::SECTION_TYPE;
}
if (section_type == MachO::S_CSTRING_LITERALS ||
section_type == MachO::S_4BYTE_LITERALS ||
section_type == MachO::S_8BYTE_LITERALS ||
section_type == MachO::S_16BYTE_LITERALS)
LiteralSections.push_back(Section);
}
// Set the size of the literal pointer.
uint32_t lp_size = O->is64Bit() ? 8 : 4;
// Collect the external relocation symbols for the literal pointers.
std::vector<std::pair<uint64_t, SymbolRef>> Relocs;
for (const RelocationRef &Reloc : Section.relocations()) {
DataRefImpl Rel;
MachO::any_relocation_info RE;
bool isExtern = false;
Rel = Reloc.getRawDataRefImpl();
RE = O->getRelocation(Rel);
isExtern = O->getPlainRelocationExternal(RE);
if (isExtern) {
uint64_t RelocOffset = Reloc.getOffset();
symbol_iterator RelocSym = Reloc.getSymbol();
Relocs.push_back(std::make_pair(RelocOffset, *RelocSym));
}
}
array_pod_sort(Relocs.begin(), Relocs.end());
// Dump each literal pointer.
for (uint32_t i = 0; i < sect_size; i += lp_size) {
if (print_addresses) {
if (O->is64Bit())
outs() << format("%016" PRIx64, sect_addr + i) << " ";
else
outs() << format("%08" PRIx64, sect_addr + i) << " ";
}
uint64_t lp;
if (O->is64Bit()) {
memcpy(&lp, sect + i, sizeof(uint64_t));
if (O->isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(lp);
} else {
uint32_t li;
memcpy(&li, sect + i, sizeof(uint32_t));
if (O->isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(li);
lp = li;
}
// First look for an external relocation entry for this literal pointer.
auto Reloc = std::find_if(
Relocs.begin(), Relocs.end(),
[&](const std::pair<uint64_t, SymbolRef> &P) { return P.first == i; });
if (Reloc != Relocs.end()) {
symbol_iterator RelocSym = Reloc->second;
ErrorOr<StringRef> SymName = RelocSym->getName();
if (std::error_code EC = SymName.getError())
report_fatal_error(EC.message());
outs() << "external relocation entry for symbol:" << *SymName << "\n";
continue;
}
// For local references see what the section the literal pointer points to.
auto Sect = std::find_if(LiteralSections.begin(), LiteralSections.end(),
[&](const SectionRef &R) {
return lp >= R.getAddress() &&
lp < R.getAddress() + R.getSize();
});
if (Sect == LiteralSections.end()) {
outs() << format("0x%" PRIx64, lp) << " (not in a literal section)\n";
continue;
}
uint64_t SectAddress = Sect->getAddress();
uint64_t SectSize = Sect->getSize();
StringRef SectName;
Sect->getName(SectName);
DataRefImpl Ref = Sect->getRawDataRefImpl();
StringRef SegmentName = O->getSectionFinalSegmentName(Ref);
outs() << SegmentName << ":" << SectName << ":";
uint32_t section_type;
if (O->is64Bit()) {
const MachO::section_64 Sec = O->getSection64(Ref);
section_type = Sec.flags & MachO::SECTION_TYPE;
} else {
const MachO::section Sec = O->getSection(Ref);
section_type = Sec.flags & MachO::SECTION_TYPE;
}
StringRef BytesStr;
Sect->getContents(BytesStr);
const char *Contents = reinterpret_cast<const char *>(BytesStr.data());
switch (section_type) {
case MachO::S_CSTRING_LITERALS:
for (uint64_t i = lp - SectAddress; i < SectSize && Contents[i] != '\0';
i++) {
DumpCstringChar(Contents[i]);
}
outs() << "\n";
break;
case MachO::S_4BYTE_LITERALS:
float f;
memcpy(&f, Contents + (lp - SectAddress), sizeof(float));
uint32_t l;
memcpy(&l, Contents + (lp - SectAddress), sizeof(uint32_t));
if (O->isLittleEndian() != sys::IsLittleEndianHost) {
sys::swapByteOrder(f);
sys::swapByteOrder(l);
}
DumpLiteral4(l, f);
break;
case MachO::S_8BYTE_LITERALS: {
double d;
memcpy(&d, Contents + (lp - SectAddress), sizeof(double));
uint32_t l0, l1;
memcpy(&l0, Contents + (lp - SectAddress), sizeof(uint32_t));
memcpy(&l1, Contents + (lp - SectAddress) + sizeof(uint32_t),
sizeof(uint32_t));
if (O->isLittleEndian() != sys::IsLittleEndianHost) {
sys::swapByteOrder(f);
sys::swapByteOrder(l0);
sys::swapByteOrder(l1);
}
DumpLiteral8(O, l0, l1, d);
break;
}
case MachO::S_16BYTE_LITERALS: {
uint32_t l0, l1, l2, l3;
memcpy(&l0, Contents + (lp - SectAddress), sizeof(uint32_t));
memcpy(&l1, Contents + (lp - SectAddress) + sizeof(uint32_t),
sizeof(uint32_t));
memcpy(&l2, Contents + (lp - SectAddress) + 2 * sizeof(uint32_t),
sizeof(uint32_t));
memcpy(&l3, Contents + (lp - SectAddress) + 3 * sizeof(uint32_t),
sizeof(uint32_t));
if (O->isLittleEndian() != sys::IsLittleEndianHost) {
sys::swapByteOrder(l0);
sys::swapByteOrder(l1);
sys::swapByteOrder(l2);
sys::swapByteOrder(l3);
}
DumpLiteral16(l0, l1, l2, l3);
break;
}
}
}
}
static void DumpInitTermPointerSection(MachOObjectFile *O, const char *sect,
uint32_t sect_size, uint64_t sect_addr,
SymbolAddressMap *AddrMap,
bool verbose) {
uint32_t stride;
stride = (O->is64Bit()) ? sizeof(uint64_t) : sizeof(uint32_t);
for (uint32_t i = 0; i < sect_size; i += stride) {
const char *SymbolName = nullptr;
if (O->is64Bit()) {
outs() << format("0x%016" PRIx64, sect_addr + i * stride) << " ";
uint64_t pointer_value;
memcpy(&pointer_value, sect + i, stride);
if (O->isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(pointer_value);
outs() << format("0x%016" PRIx64, pointer_value);
if (verbose)
SymbolName = GuessSymbolName(pointer_value, AddrMap);
} else {
outs() << format("0x%08" PRIx64, sect_addr + i * stride) << " ";
uint32_t pointer_value;
memcpy(&pointer_value, sect + i, stride);
if (O->isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(pointer_value);
outs() << format("0x%08" PRIx32, pointer_value);
if (verbose)
SymbolName = GuessSymbolName(pointer_value, AddrMap);
}
if (SymbolName)
outs() << " " << SymbolName;
outs() << "\n";
}
}
static void DumpRawSectionContents(MachOObjectFile *O, const char *sect,
uint32_t size, uint64_t addr) {
uint32_t cputype = O->getHeader().cputype;
if (cputype == MachO::CPU_TYPE_I386 || cputype == MachO::CPU_TYPE_X86_64) {
uint32_t j;
for (uint32_t i = 0; i < size; i += j, addr += j) {
if (O->is64Bit())
outs() << format("%016" PRIx64, addr) << "\t";
else
outs() << format("%08" PRIx64, addr) << "\t";
for (j = 0; j < 16 && i + j < size; j++) {
uint8_t byte_word = *(sect + i + j);
outs() << format("%02" PRIx32, (uint32_t)byte_word) << " ";
}
outs() << "\n";
}
} else {
uint32_t j;
for (uint32_t i = 0; i < size; i += j, addr += j) {
if (O->is64Bit())
outs() << format("%016" PRIx64, addr) << "\t";
else
outs() << format("%08" PRIx64, sect) << "\t";
for (j = 0; j < 4 * sizeof(int32_t) && i + j < size;
j += sizeof(int32_t)) {
if (i + j + sizeof(int32_t) < size) {
uint32_t long_word;
memcpy(&long_word, sect + i + j, sizeof(int32_t));
if (O->isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(long_word);
outs() << format("%08" PRIx32, long_word) << " ";
} else {
for (uint32_t k = 0; i + j + k < size; k++) {
uint8_t byte_word = *(sect + i + j);
outs() << format("%02" PRIx32, (uint32_t)byte_word) << " ";
}
}
}
outs() << "\n";
}
}
}
static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
StringRef DisSegName, StringRef DisSectName);
static void DumpProtocolSection(MachOObjectFile *O, const char *sect,
uint32_t size, uint32_t addr);
static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
bool verbose) {
SymbolAddressMap AddrMap;
if (verbose)
CreateSymbolAddressMap(O, &AddrMap);
for (unsigned i = 0; i < FilterSections.size(); ++i) {
StringRef DumpSection = FilterSections[i];
std::pair<StringRef, StringRef> DumpSegSectName;
DumpSegSectName = DumpSection.split(',');
StringRef DumpSegName, DumpSectName;
if (DumpSegSectName.second.size()) {
DumpSegName = DumpSegSectName.first;
DumpSectName = DumpSegSectName.second;
} else {
DumpSegName = "";
DumpSectName = DumpSegSectName.first;
}
for (const SectionRef &Section : O->sections()) {
StringRef SectName;
Section.getName(SectName);
DataRefImpl Ref = Section.getRawDataRefImpl();
StringRef SegName = O->getSectionFinalSegmentName(Ref);
if ((DumpSegName.empty() || SegName == DumpSegName) &&
(SectName == DumpSectName)) {
uint32_t section_flags;
if (O->is64Bit()) {
const MachO::section_64 Sec = O->getSection64(Ref);
section_flags = Sec.flags;
} else {
const MachO::section Sec = O->getSection(Ref);
section_flags = Sec.flags;
}
uint32_t section_type = section_flags & MachO::SECTION_TYPE;
StringRef BytesStr;
Section.getContents(BytesStr);
const char *sect = reinterpret_cast<const char *>(BytesStr.data());
uint32_t sect_size = BytesStr.size();
uint64_t sect_addr = Section.getAddress();
outs() << "Contents of (" << SegName << "," << SectName
<< ") section\n";
if (verbose) {
if ((section_flags & MachO::S_ATTR_PURE_INSTRUCTIONS) ||
(section_flags & MachO::S_ATTR_SOME_INSTRUCTIONS)) {
DisassembleMachO(Filename, O, SegName, SectName);
continue;
}
if (SegName == "__TEXT" && SectName == "__info_plist") {
outs() << sect;
continue;
}
if (SegName == "__OBJC" && SectName == "__protocol") {
DumpProtocolSection(O, sect, sect_size, sect_addr);
continue;
}
switch (section_type) {
case MachO::S_REGULAR:
DumpRawSectionContents(O, sect, sect_size, sect_addr);
break;
case MachO::S_ZEROFILL:
outs() << "zerofill section and has no contents in the file\n";
break;
case MachO::S_CSTRING_LITERALS:
DumpCstringSection(O, sect, sect_size, sect_addr, !NoLeadingAddr);
break;
case MachO::S_4BYTE_LITERALS:
DumpLiteral4Section(O, sect, sect_size, sect_addr, !NoLeadingAddr);
break;
case MachO::S_8BYTE_LITERALS:
DumpLiteral8Section(O, sect, sect_size, sect_addr, !NoLeadingAddr);
break;
case MachO::S_16BYTE_LITERALS:
DumpLiteral16Section(O, sect, sect_size, sect_addr, !NoLeadingAddr);
break;
case MachO::S_LITERAL_POINTERS:
DumpLiteralPointerSection(O, Section, sect, sect_size, sect_addr,
!NoLeadingAddr);
break;
case MachO::S_MOD_INIT_FUNC_POINTERS:
case MachO::S_MOD_TERM_FUNC_POINTERS:
DumpInitTermPointerSection(O, sect, sect_size, sect_addr, &AddrMap,
verbose);
break;
default:
outs() << "Unknown section type ("
<< format("0x%08" PRIx32, section_type) << ")\n";
DumpRawSectionContents(O, sect, sect_size, sect_addr);
break;
}
} else {
if (section_type == MachO::S_ZEROFILL)
outs() << "zerofill section and has no contents in the file\n";
else
DumpRawSectionContents(O, sect, sect_size, sect_addr);
}
}
}
}
}
static void DumpInfoPlistSectionContents(StringRef Filename,
MachOObjectFile *O) {
for (const SectionRef &Section : O->sections()) {
StringRef SectName;
Section.getName(SectName);
DataRefImpl Ref = Section.getRawDataRefImpl();
StringRef SegName = O->getSectionFinalSegmentName(Ref);
if (SegName == "__TEXT" && SectName == "__info_plist") {
outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
StringRef BytesStr;
Section.getContents(BytesStr);
const char *sect = reinterpret_cast<const char *>(BytesStr.data());
outs() << sect;
return;
}
}
}
// checkMachOAndArchFlags() checks to see if the ObjectFile is a Mach-O file
// and if it is and there is a list of architecture flags is specified then
// check to make sure this Mach-O file is one of those architectures or all
// architectures were specified. If not then an error is generated and this
// routine returns false. Else it returns true.
static bool checkMachOAndArchFlags(ObjectFile *O, StringRef Filename) {
if (isa<MachOObjectFile>(O) && !ArchAll && ArchFlags.size() != 0) {
MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(O);
bool ArchFound = false;
MachO::mach_header H;
MachO::mach_header_64 H_64;
Triple T;
if (MachO->is64Bit()) {
H_64 = MachO->MachOObjectFile::getHeader64();
T = MachOObjectFile::getArch(H_64.cputype, H_64.cpusubtype);
} else {
H = MachO->MachOObjectFile::getHeader();
T = MachOObjectFile::getArch(H.cputype, H.cpusubtype);
}
unsigned i;
for (i = 0; i < ArchFlags.size(); ++i) {
if (ArchFlags[i] == T.getArchName())
ArchFound = true;
break;
}
if (!ArchFound) {
errs() << "llvm-objdump: file: " + Filename + " does not contain "
<< "architecture: " + ArchFlags[i] + "\n";
return false;
}
}
return true;
}
static void printObjcMetaData(MachOObjectFile *O, bool verbose);
// ProcessMachO() is passed a single opened Mach-O file, which may be an
// archive member and or in a slice of a universal file. It prints the
// the file name and header info and then processes it according to the
// command line options.
static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF,
StringRef ArchiveMemberName = StringRef(),
StringRef ArchitectureName = StringRef()) {
// If we are doing some processing here on the Mach-O file print the header
// info. And don't print it otherwise like in the case of printing the
// UniversalHeaders or ArchiveHeaders.
if (Disassemble || PrivateHeaders || ExportsTrie || Rebase || Bind ||
LazyBind || WeakBind || IndirectSymbols || DataInCode || LinkOptHints ||
DylibsUsed || DylibId || ObjcMetaData || (FilterSections.size() != 0)) {
outs() << Filename;
if (!ArchiveMemberName.empty())
outs() << '(' << ArchiveMemberName << ')';
if (!ArchitectureName.empty())
outs() << " (architecture " << ArchitectureName << ")";
outs() << ":\n";
}
if (Disassemble)
DisassembleMachO(Filename, MachOOF, "__TEXT", "__text");
if (IndirectSymbols)
PrintIndirectSymbols(MachOOF, !NonVerbose);
if (DataInCode)
PrintDataInCodeTable(MachOOF, !NonVerbose);
if (LinkOptHints)
PrintLinkOptHints(MachOOF);
if (Relocations)
PrintRelocations(MachOOF);
if (SectionHeaders)
PrintSectionHeaders(MachOOF);
if (SectionContents)
PrintSectionContents(MachOOF);
if (FilterSections.size() != 0)
DumpSectionContents(Filename, MachOOF, !NonVerbose);
if (InfoPlist)
DumpInfoPlistSectionContents(Filename, MachOOF);
if (DylibsUsed)
PrintDylibs(MachOOF, false);
if (DylibId)
PrintDylibs(MachOOF, true);
if (SymbolTable)
PrintSymbolTable(MachOOF);
if (UnwindInfo)
printMachOUnwindInfo(MachOOF);
if (PrivateHeaders) {
printMachOFileHeader(MachOOF);
printMachOLoadCommands(MachOOF);
}
if (FirstPrivateHeader)
printMachOFileHeader(MachOOF);
if (ObjcMetaData)
printObjcMetaData(MachOOF, !NonVerbose);
if (ExportsTrie)
printExportsTrie(MachOOF);
if (Rebase)
printRebaseTable(MachOOF);
if (Bind)
printBindTable(MachOOF);
if (LazyBind)
printLazyBindTable(MachOOF);
if (WeakBind)
printWeakBindTable(MachOOF);
}
// printUnknownCPUType() helps print_fat_headers for unknown CPU's.
static void printUnknownCPUType(uint32_t cputype, uint32_t cpusubtype) {
outs() << " cputype (" << cputype << ")\n";
outs() << " cpusubtype (" << cpusubtype << ")\n";
}
// printCPUType() helps print_fat_headers by printing the cputype and
// pusubtype (symbolically for the one's it knows about).
static void printCPUType(uint32_t cputype, uint32_t cpusubtype) {
switch (cputype) {
case MachO::CPU_TYPE_I386:
switch (cpusubtype) {
case MachO::CPU_SUBTYPE_I386_ALL:
outs() << " cputype CPU_TYPE_I386\n";
outs() << " cpusubtype CPU_SUBTYPE_I386_ALL\n";
break;
default:
printUnknownCPUType(cputype, cpusubtype);
break;
}
break;
case MachO::CPU_TYPE_X86_64:
switch (cpusubtype) {
case MachO::CPU_SUBTYPE_X86_64_ALL:
outs() << " cputype CPU_TYPE_X86_64\n";
outs() << " cpusubtype CPU_SUBTYPE_X86_64_ALL\n";
break;
case MachO::CPU_SUBTYPE_X86_64_H:
outs() << " cputype CPU_TYPE_X86_64\n";
outs() << " cpusubtype CPU_SUBTYPE_X86_64_H\n";
break;
default:
printUnknownCPUType(cputype, cpusubtype);
break;
}
break;
case MachO::CPU_TYPE_ARM:
switch (cpusubtype) {
case MachO::CPU_SUBTYPE_ARM_ALL:
outs() << " cputype CPU_TYPE_ARM\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM_ALL\n";
break;
case MachO::CPU_SUBTYPE_ARM_V4T:
outs() << " cputype CPU_TYPE_ARM\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM_V4T\n";
break;
case MachO::CPU_SUBTYPE_ARM_V5TEJ:
outs() << " cputype CPU_TYPE_ARM\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM_V5TEJ\n";
break;
case MachO::CPU_SUBTYPE_ARM_XSCALE:
outs() << " cputype CPU_TYPE_ARM\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM_XSCALE\n";
break;
case MachO::CPU_SUBTYPE_ARM_V6:
outs() << " cputype CPU_TYPE_ARM\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM_V6\n";
break;
case MachO::CPU_SUBTYPE_ARM_V6M:
outs() << " cputype CPU_TYPE_ARM\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM_V6M\n";
break;
case MachO::CPU_SUBTYPE_ARM_V7:
outs() << " cputype CPU_TYPE_ARM\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM_V7\n";
break;
case MachO::CPU_SUBTYPE_ARM_V7EM:
outs() << " cputype CPU_TYPE_ARM\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM_V7EM\n";
break;
case MachO::CPU_SUBTYPE_ARM_V7K:
outs() << " cputype CPU_TYPE_ARM\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM_V7K\n";
break;
case MachO::CPU_SUBTYPE_ARM_V7M:
outs() << " cputype CPU_TYPE_ARM\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM_V7M\n";
break;
case MachO::CPU_SUBTYPE_ARM_V7S:
outs() << " cputype CPU_TYPE_ARM\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM_V7S\n";
break;
default:
printUnknownCPUType(cputype, cpusubtype);
break;
}
break;
case MachO::CPU_TYPE_ARM64:
switch (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) {
case MachO::CPU_SUBTYPE_ARM64_ALL:
outs() << " cputype CPU_TYPE_ARM64\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM64_ALL\n";
break;
default:
printUnknownCPUType(cputype, cpusubtype);
break;
}
break;
default:
printUnknownCPUType(cputype, cpusubtype);
break;
}
}
static void printMachOUniversalHeaders(const object::MachOUniversalBinary *UB,
bool verbose) {
outs() << "Fat headers\n";
if (verbose)
outs() << "fat_magic FAT_MAGIC\n";
else
outs() << "fat_magic " << format("0x%" PRIx32, MachO::FAT_MAGIC) << "\n";
uint32_t nfat_arch = UB->getNumberOfObjects();
StringRef Buf = UB->getData();
uint64_t size = Buf.size();
uint64_t big_size = sizeof(struct MachO::fat_header) +
nfat_arch * sizeof(struct MachO::fat_arch);
outs() << "nfat_arch " << UB->getNumberOfObjects();
if (nfat_arch == 0)
outs() << " (malformed, contains zero architecture types)\n";
else if (big_size > size)
outs() << " (malformed, architectures past end of file)\n";
else
outs() << "\n";
for (uint32_t i = 0; i < nfat_arch; ++i) {
MachOUniversalBinary::ObjectForArch OFA(UB, i);
uint32_t cputype = OFA.getCPUType();
uint32_t cpusubtype = OFA.getCPUSubType();
outs() << "architecture ";
for (uint32_t j = 0; i != 0 && j <= i - 1; j++) {
MachOUniversalBinary::ObjectForArch other_OFA(UB, j);
uint32_t other_cputype = other_OFA.getCPUType();
uint32_t other_cpusubtype = other_OFA.getCPUSubType();
if (cputype != 0 && cpusubtype != 0 && cputype == other_cputype &&
(cpusubtype & ~MachO::CPU_SUBTYPE_MASK) ==
(other_cpusubtype & ~MachO::CPU_SUBTYPE_MASK)) {
outs() << "(illegal duplicate architecture) ";
break;
}
}
if (verbose) {
outs() << OFA.getArchTypeName() << "\n";
printCPUType(cputype, cpusubtype & ~MachO::CPU_SUBTYPE_MASK);
} else {
outs() << i << "\n";
outs() << " cputype " << cputype << "\n";
outs() << " cpusubtype " << (cpusubtype & ~MachO::CPU_SUBTYPE_MASK)
<< "\n";
}
if (verbose &&
(cpusubtype & MachO::CPU_SUBTYPE_MASK) == MachO::CPU_SUBTYPE_LIB64)
outs() << " capabilities CPU_SUBTYPE_LIB64\n";
else
outs() << " capabilities "
<< format("0x%" PRIx32,
(cpusubtype & MachO::CPU_SUBTYPE_MASK) >> 24) << "\n";
outs() << " offset " << OFA.getOffset();
if (OFA.getOffset() > size)
outs() << " (past end of file)";
if (OFA.getOffset() % (1 << OFA.getAlign()) != 0)
outs() << " (not aligned on it's alignment (2^" << OFA.getAlign() << ")";
outs() << "\n";
outs() << " size " << OFA.getSize();
big_size = OFA.getOffset() + OFA.getSize();
if (big_size > size)
outs() << " (past end of file)";
outs() << "\n";
outs() << " align 2^" << OFA.getAlign() << " (" << (1 << OFA.getAlign())
<< ")\n";
}
}
static void printArchiveChild(const Archive::Child &C, bool verbose,
bool print_offset) {
if (print_offset)
outs() << C.getChildOffset() << "\t";
sys::fs::perms Mode = C.getAccessMode();
if (verbose) {
// FIXME: this first dash, "-", is for (Mode & S_IFMT) == S_IFREG.
// But there is nothing in sys::fs::perms for S_IFMT or S_IFREG.
outs() << "-";
outs() << ((Mode & sys::fs::owner_read) ? "r" : "-");
outs() << ((Mode & sys::fs::owner_write) ? "w" : "-");
outs() << ((Mode & sys::fs::owner_exe) ? "x" : "-");
outs() << ((Mode & sys::fs::group_read) ? "r" : "-");
outs() << ((Mode & sys::fs::group_write) ? "w" : "-");
outs() << ((Mode & sys::fs::group_exe) ? "x" : "-");
outs() << ((Mode & sys::fs::others_read) ? "r" : "-");
outs() << ((Mode & sys::fs::others_write) ? "w" : "-");
outs() << ((Mode & sys::fs::others_exe) ? "x" : "-");
} else {
outs() << format("0%o ", Mode);
}
unsigned UID = C.getUID();
outs() << format("%3d/", UID);
unsigned GID = C.getGID();
outs() << format("%-3d ", GID);
ErrorOr<uint64_t> Size = C.getRawSize();
if (std::error_code EC = Size.getError())
report_fatal_error(EC.message());
outs() << format("%5" PRId64, Size.get()) << " ";
StringRef RawLastModified = C.getRawLastModified();
if (verbose) {
unsigned Seconds;
if (RawLastModified.getAsInteger(10, Seconds))
outs() << "(date: \"%s\" contains non-decimal chars) " << RawLastModified;
else {
// Since cime(3) returns a 26 character string of the form:
// "Sun Sep 16 01:03:52 1973\n\0"
// just print 24 characters.
time_t t = Seconds;
outs() << format("%.24s ", ctime(&t));
}
} else {
outs() << RawLastModified << " ";
}
if (verbose) {
ErrorOr<StringRef> NameOrErr = C.getName();
if (NameOrErr.getError()) {
StringRef RawName = C.getRawName();
outs() << RawName << "\n";
} else {
StringRef Name = NameOrErr.get();
outs() << Name << "\n";
}
} else {
StringRef RawName = C.getRawName();
outs() << RawName << "\n";
}
}
static void printArchiveHeaders(Archive *A, bool verbose, bool print_offset) {
for (Archive::child_iterator I = A->child_begin(false), E = A->child_end();
I != E; ++I) {
if (std::error_code EC = I->getError())
report_fatal_error(EC.message());
const Archive::Child &C = **I;
printArchiveChild(C, verbose, print_offset);
}
}
// ParseInputMachO() parses the named Mach-O file in Filename and handles the
// -arch flags selecting just those slices as specified by them and also parses
// archive files. Then for each individual Mach-O file ProcessMachO() is
// called to process the file based on the command line options.
void llvm::ParseInputMachO(StringRef Filename) {
// Check for -arch all and verifiy the -arch flags are valid.
for (unsigned i = 0; i < ArchFlags.size(); ++i) {
if (ArchFlags[i] == "all") {
ArchAll = true;
} else {
if (!MachOObjectFile::isValidArch(ArchFlags[i])) {
errs() << "llvm-objdump: Unknown architecture named '" + ArchFlags[i] +
"'for the -arch option\n";
return;
}
}
}
// Attempt to open the binary.
ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(Filename);
if (std::error_code EC = BinaryOrErr.getError())
report_error(Filename, EC);
Binary &Bin = *BinaryOrErr.get().getBinary();
if (Archive *A = dyn_cast<Archive>(&Bin)) {
outs() << "Archive : " << Filename << "\n";
if (ArchiveHeaders)
printArchiveHeaders(A, !NonVerbose, ArchiveMemberOffsets);
for (Archive::child_iterator I = A->child_begin(), E = A->child_end();
I != E; ++I) {
if (std::error_code EC = I->getError())
report_error(Filename, EC);
auto &C = I->get();
ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (ChildOrErr.getError())
continue;
if (MachOObjectFile *O = dyn_cast<MachOObjectFile>(&*ChildOrErr.get())) {
if (!checkMachOAndArchFlags(O, Filename))
return;
ProcessMachO(Filename, O, O->getFileName());
}
}
return;
}
if (UniversalHeaders) {
if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin))
printMachOUniversalHeaders(UB, !NonVerbose);
}
if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin)) {
// If we have a list of architecture flags specified dump only those.
if (!ArchAll && ArchFlags.size() != 0) {
// Look for a slice in the universal binary that matches each ArchFlag.
bool ArchFound;
for (unsigned i = 0; i < ArchFlags.size(); ++i) {
ArchFound = false;
for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
E = UB->end_objects();
I != E; ++I) {
if (ArchFlags[i] == I->getArchTypeName()) {
ArchFound = true;
ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr =
I->getAsObjectFile();
std::string ArchitectureName = "";
if (ArchFlags.size() > 1)
ArchitectureName = I->getArchTypeName();
if (ObjOrErr) {
ObjectFile &O = *ObjOrErr.get();
if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
ProcessMachO(Filename, MachOOF, "", ArchitectureName);
} else if (ErrorOr<std::unique_ptr<Archive>> AOrErr =
I->getAsArchive()) {
std::unique_ptr<Archive> &A = *AOrErr;
outs() << "Archive : " << Filename;
if (!ArchitectureName.empty())
outs() << " (architecture " << ArchitectureName << ")";
outs() << "\n";
if (ArchiveHeaders)
printArchiveHeaders(A.get(), !NonVerbose, ArchiveMemberOffsets);
for (Archive::child_iterator AI = A->child_begin(),
AE = A->child_end();
AI != AE; ++AI) {
if (std::error_code EC = AI->getError())
report_error(Filename, EC);
auto &C = AI->get();
ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (ChildOrErr.getError())
continue;
if (MachOObjectFile *O =
dyn_cast<MachOObjectFile>(&*ChildOrErr.get()))
ProcessMachO(Filename, O, O->getFileName(), ArchitectureName);
}
}
}
}
if (!ArchFound) {
errs() << "llvm-objdump: file: " + Filename + " does not contain "
<< "architecture: " + ArchFlags[i] + "\n";
return;
}
}
return;
}
// No architecture flags were specified so if this contains a slice that
// matches the host architecture dump only that.
if (!ArchAll) {
for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
E = UB->end_objects();
I != E; ++I) {
if (MachOObjectFile::getHostArch().getArchName() ==
I->getArchTypeName()) {
ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
std::string ArchiveName;
ArchiveName.clear();
if (ObjOrErr) {
ObjectFile &O = *ObjOrErr.get();
if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
ProcessMachO(Filename, MachOOF);
} else if (ErrorOr<std::unique_ptr<Archive>> AOrErr =
I->getAsArchive()) {
std::unique_ptr<Archive> &A = *AOrErr;
outs() << "Archive : " << Filename << "\n";
if (ArchiveHeaders)
printArchiveHeaders(A.get(), !NonVerbose, ArchiveMemberOffsets);
for (Archive::child_iterator AI = A->child_begin(),
AE = A->child_end();
AI != AE; ++AI) {
if (std::error_code EC = AI->getError())
report_error(Filename, EC);
auto &C = AI->get();
ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (ChildOrErr.getError())
continue;
if (MachOObjectFile *O =
dyn_cast<MachOObjectFile>(&*ChildOrErr.get()))
ProcessMachO(Filename, O, O->getFileName());
}
}
return;
}
}
}
// Either all architectures have been specified or none have been specified
// and this does not contain the host architecture so dump all the slices.
bool moreThanOneArch = UB->getNumberOfObjects() > 1;
for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
E = UB->end_objects();
I != E; ++I) {
ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
std::string ArchitectureName = "";
if (moreThanOneArch)
ArchitectureName = I->getArchTypeName();
if (ObjOrErr) {
ObjectFile &Obj = *ObjOrErr.get();
if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&Obj))
ProcessMachO(Filename, MachOOF, "", ArchitectureName);
} else if (ErrorOr<std::unique_ptr<Archive>> AOrErr = I->getAsArchive()) {
std::unique_ptr<Archive> &A = *AOrErr;
outs() << "Archive : " << Filename;
if (!ArchitectureName.empty())
outs() << " (architecture " << ArchitectureName << ")";
outs() << "\n";
if (ArchiveHeaders)
printArchiveHeaders(A.get(), !NonVerbose, ArchiveMemberOffsets);
for (Archive::child_iterator AI = A->child_begin(), AE = A->child_end();
AI != AE; ++AI) {
if (std::error_code EC = AI->getError())
report_error(Filename, EC);
auto &C = AI->get();
ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (ChildOrErr.getError())
continue;
if (MachOObjectFile *O =
dyn_cast<MachOObjectFile>(&*ChildOrErr.get())) {
if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(O))
ProcessMachO(Filename, MachOOF, MachOOF->getFileName(),
ArchitectureName);
}
}
}
}
return;
}
if (ObjectFile *O = dyn_cast<ObjectFile>(&Bin)) {
if (!checkMachOAndArchFlags(O, Filename))
return;
if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&*O)) {
ProcessMachO(Filename, MachOOF);
} else
errs() << "llvm-objdump: '" << Filename << "': "
<< "Object is not a Mach-O file type.\n";
return;
}
llvm_unreachable("Input object can't be invalid at this point");
}
typedef std::pair<uint64_t, const char *> BindInfoEntry;
typedef std::vector<BindInfoEntry> BindTable;
typedef BindTable::iterator bind_table_iterator;
// The block of info used by the Symbolizer call backs.
struct DisassembleInfo {
bool verbose;
MachOObjectFile *O;
SectionRef S;
SymbolAddressMap *AddrMap;
std::vector<SectionRef> *Sections;
const char *class_name;
const char *selector_name;
char *method;
char *demangled_name;
uint64_t adrp_addr;
uint32_t adrp_inst;
BindTable *bindtable;
uint32_t depth;
};
// SymbolizerGetOpInfo() is the operand information call back function.
// This is called to get the symbolic information for operand(s) of an
// instruction when it is being done. This routine does this from
// the relocation information, symbol table, etc. That block of information
// is a pointer to the struct DisassembleInfo that was passed when the
// disassembler context was created and passed to back to here when
// called back by the disassembler for instruction operands that could have
// relocation information. The address of the instruction containing operand is
// at the Pc parameter. The immediate value the operand has is passed in
// op_info->Value and is at Offset past the start of the instruction and has a
// byte Size of 1, 2 or 4. The symbolc information is returned in TagBuf is the
// LLVMOpInfo1 struct defined in the header "llvm-c/Disassembler.h" as symbol
// names and addends of the symbolic expression to add for the operand. The
// value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic
// information is returned then this function returns 1 else it returns 0.
static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
uint64_t Size, int TagType, void *TagBuf) {
struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo;
struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf;
uint64_t value = op_info->Value;
// Make sure all fields returned are zero if we don't set them.
memset((void *)op_info, '\0', sizeof(struct LLVMOpInfo1));
op_info->Value = value;
// If the TagType is not the value 1 which it code knows about or if no
// verbose symbolic information is wanted then just return 0, indicating no
// information is being returned.
if (TagType != 1 || !info->verbose)
return 0;
unsigned int Arch = info->O->getArch();
if (Arch == Triple::x86) {
if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
return 0;
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
// TODO:
// Search the external relocation entries of a fully linked image
// (if any) for an entry that matches this segment offset.
// uint32_t seg_offset = (Pc + Offset);
return 0;
}
// In MH_OBJECT filetypes search the section's relocation entries (if any)
// for an entry for this section offset.
uint32_t sect_addr = info->S.getAddress();
uint32_t sect_offset = (Pc + Offset) - sect_addr;
bool reloc_found = false;
DataRefImpl Rel;
MachO::any_relocation_info RE;
bool isExtern = false;
SymbolRef Symbol;
bool r_scattered = false;
uint32_t r_value, pair_r_value, r_type;
for (const RelocationRef &Reloc : info->S.relocations()) {
uint64_t RelocOffset = Reloc.getOffset();
if (RelocOffset == sect_offset) {
Rel = Reloc.getRawDataRefImpl();
RE = info->O->getRelocation(Rel);
r_type = info->O->getAnyRelocationType(RE);
r_scattered = info->O->isRelocationScattered(RE);
if (r_scattered) {
r_value = info->O->getScatteredRelocationValue(RE);
if (r_type == MachO::GENERIC_RELOC_SECTDIFF ||
r_type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF) {
DataRefImpl RelNext = Rel;
info->O->moveRelocationNext(RelNext);
MachO::any_relocation_info RENext;
RENext = info->O->getRelocation(RelNext);
if (info->O->isRelocationScattered(RENext))
pair_r_value = info->O->getScatteredRelocationValue(RENext);
else
return 0;
}
} else {
isExtern = info->O->getPlainRelocationExternal(RE);
if (isExtern) {
symbol_iterator RelocSym = Reloc.getSymbol();
Symbol = *RelocSym;
}
}
reloc_found = true;
break;
}
}
if (reloc_found && isExtern) {
ErrorOr<StringRef> SymName = Symbol.getName();
if (std::error_code EC = SymName.getError())
report_fatal_error(EC.message());
const char *name = SymName->data();
op_info->AddSymbol.Present = 1;
op_info->AddSymbol.Name = name;
// For i386 extern relocation entries the value in the instruction is
// the offset from the symbol, and value is already set in op_info->Value.
return 1;
}
if (reloc_found && (r_type == MachO::GENERIC_RELOC_SECTDIFF ||
r_type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF)) {
const char *add = GuessSymbolName(r_value, info->AddrMap);
const char *sub = GuessSymbolName(pair_r_value, info->AddrMap);
uint32_t offset = value - (r_value - pair_r_value);
op_info->AddSymbol.Present = 1;
if (add != nullptr)
op_info->AddSymbol.Name = add;
else
op_info->AddSymbol.Value = r_value;
op_info->SubtractSymbol.Present = 1;
if (sub != nullptr)
op_info->SubtractSymbol.Name = sub;
else
op_info->SubtractSymbol.Value = pair_r_value;
op_info->Value = offset;
return 1;
}
return 0;
}
if (Arch == Triple::x86_64) {
if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
return 0;
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
// TODO:
// Search the external relocation entries of a fully linked image
// (if any) for an entry that matches this segment offset.
// uint64_t seg_offset = (Pc + Offset);
return 0;
}
// In MH_OBJECT filetypes search the section's relocation entries (if any)
// for an entry for this section offset.
uint64_t sect_addr = info->S.getAddress();
uint64_t sect_offset = (Pc + Offset) - sect_addr;
bool reloc_found = false;
DataRefImpl Rel;
MachO::any_relocation_info RE;
bool isExtern = false;
SymbolRef Symbol;
for (const RelocationRef &Reloc : info->S.relocations()) {
uint64_t RelocOffset = Reloc.getOffset();
if (RelocOffset == sect_offset) {
Rel = Reloc.getRawDataRefImpl();
RE = info->O->getRelocation(Rel);
// NOTE: Scattered relocations don't exist on x86_64.
isExtern = info->O->getPlainRelocationExternal(RE);
if (isExtern) {
symbol_iterator RelocSym = Reloc.getSymbol();
Symbol = *RelocSym;
}
reloc_found = true;
break;
}
}
if (reloc_found && isExtern) {
// The Value passed in will be adjusted by the Pc if the instruction
// adds the Pc. But for x86_64 external relocation entries the Value
// is the offset from the external symbol.
if (info->O->getAnyRelocationPCRel(RE))
op_info->Value -= Pc + Offset + Size;
ErrorOr<StringRef> SymName = Symbol.getName();
if (std::error_code EC = SymName.getError())
report_fatal_error(EC.message());
const char *name = SymName->data();
unsigned Type = info->O->getAnyRelocationType(RE);
if (Type == MachO::X86_64_RELOC_SUBTRACTOR) {
DataRefImpl RelNext = Rel;
info->O->moveRelocationNext(RelNext);
MachO::any_relocation_info RENext = info->O->getRelocation(RelNext);
unsigned TypeNext = info->O->getAnyRelocationType(RENext);
bool isExternNext = info->O->getPlainRelocationExternal(RENext);
unsigned SymbolNum = info->O->getPlainRelocationSymbolNum(RENext);
if (TypeNext == MachO::X86_64_RELOC_UNSIGNED && isExternNext) {
op_info->SubtractSymbol.Present = 1;
op_info->SubtractSymbol.Name = name;
symbol_iterator RelocSymNext = info->O->getSymbolByIndex(SymbolNum);
Symbol = *RelocSymNext;
ErrorOr<StringRef> SymNameNext = Symbol.getName();
if (std::error_code EC = SymNameNext.getError())
report_fatal_error(EC.message());
name = SymNameNext->data();
}
}
// TODO: add the VariantKinds to op_info->VariantKind for relocation types
// like: X86_64_RELOC_TLV, X86_64_RELOC_GOT_LOAD and X86_64_RELOC_GOT.
op_info->AddSymbol.Present = 1;
op_info->AddSymbol.Name = name;
return 1;
}
return 0;
}
if (Arch == Triple::arm) {
if (Offset != 0 || (Size != 4 && Size != 2))
return 0;
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
// TODO:
// Search the external relocation entries of a fully linked image
// (if any) for an entry that matches this segment offset.
// uint32_t seg_offset = (Pc + Offset);
return 0;
}
// In MH_OBJECT filetypes search the section's relocation entries (if any)
// for an entry for this section offset.
uint32_t sect_addr = info->S.getAddress();
uint32_t sect_offset = (Pc + Offset) - sect_addr;
DataRefImpl Rel;
MachO::any_relocation_info RE;
bool isExtern = false;
SymbolRef Symbol;
bool r_scattered = false;
uint32_t r_value, pair_r_value, r_type, r_length, other_half;
auto Reloc =
std::find_if(info->S.relocations().begin(), info->S.relocations().end(),
[&](const RelocationRef &Reloc) {
uint64_t RelocOffset = Reloc.getOffset();
return RelocOffset == sect_offset;
});
if (Reloc == info->S.relocations().end())
return 0;
Rel = Reloc->getRawDataRefImpl();
RE = info->O->getRelocation(Rel);
r_length = info->O->getAnyRelocationLength(RE);
r_scattered = info->O->isRelocationScattered(RE);
if (r_scattered) {
r_value = info->O->getScatteredRelocationValue(RE);
r_type = info->O->getScatteredRelocationType(RE);
} else {
r_type = info->O->getAnyRelocationType(RE);
isExtern = info->O->getPlainRelocationExternal(RE);
if (isExtern) {
symbol_iterator RelocSym = Reloc->getSymbol();
Symbol = *RelocSym;
}
}
if (r_type == MachO::ARM_RELOC_HALF ||
r_type == MachO::ARM_RELOC_SECTDIFF ||
r_type == MachO::ARM_RELOC_LOCAL_SECTDIFF ||
r_type == MachO::ARM_RELOC_HALF_SECTDIFF) {
DataRefImpl RelNext = Rel;
info->O->moveRelocationNext(RelNext);
MachO::any_relocation_info RENext;
RENext = info->O->getRelocation(RelNext);
other_half = info->O->getAnyRelocationAddress(RENext) & 0xffff;
if (info->O->isRelocationScattered(RENext))
pair_r_value = info->O->getScatteredRelocationValue(RENext);
}
if (isExtern) {
ErrorOr<StringRef> SymName = Symbol.getName();
if (std::error_code EC = SymName.getError())
report_fatal_error(EC.message());
const char *name = SymName->data();
op_info->AddSymbol.Present = 1;
op_info->AddSymbol.Name = name;
switch (r_type) {
case MachO::ARM_RELOC_HALF:
if ((r_length & 0x1) == 1) {
op_info->Value = value << 16 | other_half;
op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16;
} else {
op_info->Value = other_half << 16 | value;
op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16;
}
break;
default:
break;
}
return 1;
}
// If we have a branch that is not an external relocation entry then
// return 0 so the code in tryAddingSymbolicOperand() can use the
// SymbolLookUp call back with the branch target address to look up the
// symbol and possiblity add an annotation for a symbol stub.
if (isExtern == 0 && (r_type == MachO::ARM_RELOC_BR24 ||
r_type == MachO::ARM_THUMB_RELOC_BR22))
return 0;
uint32_t offset = 0;
if (r_type == MachO::ARM_RELOC_HALF ||
r_type == MachO::ARM_RELOC_HALF_SECTDIFF) {
if ((r_length & 0x1) == 1)
value = value << 16 | other_half;
else
value = other_half << 16 | value;
}
if (r_scattered && (r_type != MachO::ARM_RELOC_HALF &&
r_type != MachO::ARM_RELOC_HALF_SECTDIFF)) {
offset = value - r_value;
value = r_value;
}
if (r_type == MachO::ARM_RELOC_HALF_SECTDIFF) {
if ((r_length & 0x1) == 1)
op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16;
else
op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16;
const char *add = GuessSymbolName(r_value, info->AddrMap);
const char *sub = GuessSymbolName(pair_r_value, info->AddrMap);
int32_t offset = value - (r_value - pair_r_value);
op_info->AddSymbol.Present = 1;
if (add != nullptr)
op_info->AddSymbol.Name = add;
else
op_info->AddSymbol.Value = r_value;
op_info->SubtractSymbol.Present = 1;
if (sub != nullptr)
op_info->SubtractSymbol.Name = sub;
else
op_info->SubtractSymbol.Value = pair_r_value;
op_info->Value = offset;
return 1;
}
op_info->AddSymbol.Present = 1;
op_info->Value = offset;
if (r_type == MachO::ARM_RELOC_HALF) {
if ((r_length & 0x1) == 1)
op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16;
else
op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16;
}
const char *add = GuessSymbolName(value, info->AddrMap);
if (add != nullptr) {
op_info->AddSymbol.Name = add;
return 1;
}
op_info->AddSymbol.Value = value;
return 1;
}
if (Arch == Triple::aarch64) {
if (Offset != 0 || Size != 4)
return 0;
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
// TODO:
// Search the external relocation entries of a fully linked image
// (if any) for an entry that matches this segment offset.
// uint64_t seg_offset = (Pc + Offset);
return 0;
}
// In MH_OBJECT filetypes search the section's relocation entries (if any)
// for an entry for this section offset.
uint64_t sect_addr = info->S.getAddress();
uint64_t sect_offset = (Pc + Offset) - sect_addr;
auto Reloc =
std::find_if(info->S.relocations().begin(), info->S.relocations().end(),
[&](const RelocationRef &Reloc) {
uint64_t RelocOffset = Reloc.getOffset();
return RelocOffset == sect_offset;
});
if (Reloc == info->S.relocations().end())
return 0;
DataRefImpl Rel = Reloc->getRawDataRefImpl();
MachO::any_relocation_info RE = info->O->getRelocation(Rel);
uint32_t r_type = info->O->getAnyRelocationType(RE);
if (r_type == MachO::ARM64_RELOC_ADDEND) {
DataRefImpl RelNext = Rel;
info->O->moveRelocationNext(RelNext);
MachO::any_relocation_info RENext = info->O->getRelocation(RelNext);
if (value == 0) {
value = info->O->getPlainRelocationSymbolNum(RENext);
op_info->Value = value;
}
}
// NOTE: Scattered relocations don't exist on arm64.
if (!info->O->getPlainRelocationExternal(RE))
return 0;
ErrorOr<StringRef> SymName = Reloc->getSymbol()->getName();
if (std::error_code EC = SymName.getError())
report_fatal_error(EC.message());
const char *name = SymName->data();
op_info->AddSymbol.Present = 1;
op_info->AddSymbol.Name = name;
switch (r_type) {
case MachO::ARM64_RELOC_PAGE21:
/* @page */
op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_PAGE;
break;
case MachO::ARM64_RELOC_PAGEOFF12:
/* @pageoff */
op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_PAGEOFF;
break;
case MachO::ARM64_RELOC_GOT_LOAD_PAGE21:
/* @gotpage */
op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_GOTPAGE;
break;
case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12:
/* @gotpageoff */
op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF;
break;
case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21:
/* @tvlppage is not implemented in llvm-mc */
op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_TLVP;
break;
case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
/* @tvlppageoff is not implemented in llvm-mc */
op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_TLVOFF;
break;
default:
case MachO::ARM64_RELOC_BRANCH26:
op_info->VariantKind = LLVMDisassembler_VariantKind_None;
break;
}
return 1;
}
return 0;
}
// GuessCstringPointer is passed the address of what might be a pointer to a
// literal string in a cstring section. If that address is in a cstring section
// it returns a pointer to that string. Else it returns nullptr.
static const char *GuessCstringPointer(uint64_t ReferenceValue,
struct DisassembleInfo *info) {
for (const auto &Load : info->O->load_commands()) {
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
MachO::section_64 Sec = info->O->getSection64(Load, J);
uint32_t section_type = Sec.flags & MachO::SECTION_TYPE;
if (section_type == MachO::S_CSTRING_LITERALS &&
ReferenceValue >= Sec.addr &&
ReferenceValue < Sec.addr + Sec.size) {
uint64_t sect_offset = ReferenceValue - Sec.addr;
uint64_t object_offset = Sec.offset + sect_offset;
StringRef MachOContents = info->O->getData();
uint64_t object_size = MachOContents.size();
const char *object_addr = (const char *)MachOContents.data();
if (object_offset < object_size) {
const char *name = object_addr + object_offset;
return name;
} else {
return nullptr;
}
}
}
} else if (Load.C.cmd == MachO::LC_SEGMENT) {
MachO::segment_command Seg = info->O->getSegmentLoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
MachO::section Sec = info->O->getSection(Load, J);
uint32_t section_type = Sec.flags & MachO::SECTION_TYPE;
if (section_type == MachO::S_CSTRING_LITERALS &&
ReferenceValue >= Sec.addr &&
ReferenceValue < Sec.addr + Sec.size) {
uint64_t sect_offset = ReferenceValue - Sec.addr;
uint64_t object_offset = Sec.offset + sect_offset;
StringRef MachOContents = info->O->getData();
uint64_t object_size = MachOContents.size();
const char *object_addr = (const char *)MachOContents.data();
if (object_offset < object_size) {
const char *name = object_addr + object_offset;
return name;
} else {
return nullptr;
}
}
}
}
}
return nullptr;
}
// GuessIndirectSymbol returns the name of the indirect symbol for the
// ReferenceValue passed in or nullptr. This is used when ReferenceValue maybe
// an address of a symbol stub or a lazy or non-lazy pointer to associate the
// symbol name being referenced by the stub or pointer.
static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
struct DisassembleInfo *info) {
MachO::dysymtab_command Dysymtab = info->O->getDysymtabLoadCommand();
MachO::symtab_command Symtab = info->O->getSymtabLoadCommand();
for (const auto &Load : info->O->load_commands()) {
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
MachO::section_64 Sec = info->O->getSection64(Load, J);
uint32_t section_type = Sec.flags & MachO::SECTION_TYPE;
if ((section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS ||
section_type == MachO::S_LAZY_SYMBOL_POINTERS ||
section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS ||
section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS ||
section_type == MachO::S_SYMBOL_STUBS) &&
ReferenceValue >= Sec.addr &&
ReferenceValue < Sec.addr + Sec.size) {
uint32_t stride;
if (section_type == MachO::S_SYMBOL_STUBS)
stride = Sec.reserved2;
else
stride = 8;
if (stride == 0)
return nullptr;
uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride;
if (index < Dysymtab.nindirectsyms) {
uint32_t indirect_symbol =
info->O->getIndirectSymbolTableEntry(Dysymtab, index);
if (indirect_symbol < Symtab.nsyms) {
symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
SymbolRef Symbol = *Sym;
ErrorOr<StringRef> SymName = Symbol.getName();
if (std::error_code EC = SymName.getError())
report_fatal_error(EC.message());
const char *name = SymName->data();
return name;
}
}
}
}
} else if (Load.C.cmd == MachO::LC_SEGMENT) {
MachO::segment_command Seg = info->O->getSegmentLoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
MachO::section Sec = info->O->getSection(Load, J);
uint32_t section_type = Sec.flags & MachO::SECTION_TYPE;
if ((section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS ||
section_type == MachO::S_LAZY_SYMBOL_POINTERS ||
section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS ||
section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS ||
section_type == MachO::S_SYMBOL_STUBS) &&
ReferenceValue >= Sec.addr &&
ReferenceValue < Sec.addr + Sec.size) {
uint32_t stride;
if (section_type == MachO::S_SYMBOL_STUBS)
stride = Sec.reserved2;
else
stride = 4;
if (stride == 0)
return nullptr;
uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride;
if (index < Dysymtab.nindirectsyms) {
uint32_t indirect_symbol =
info->O->getIndirectSymbolTableEntry(Dysymtab, index);
if (indirect_symbol < Symtab.nsyms) {
symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
SymbolRef Symbol = *Sym;
ErrorOr<StringRef> SymName = Symbol.getName();
if (std::error_code EC = SymName.getError())
report_fatal_error(EC.message());
const char *name = SymName->data();
return name;
}
}
}
}
}
}
return nullptr;
}
// method_reference() is called passing it the ReferenceName that might be
// a reference it to an Objective-C method call. If so then it allocates and
// assembles a method call string with the values last seen and saved in
// the DisassembleInfo's class_name and selector_name fields. This is saved
// into the method field of the info and any previous string is free'ed.
// Then the class_name field in the info is set to nullptr. The method call
// string is set into ReferenceName and ReferenceType is set to
// LLVMDisassembler_ReferenceType_Out_Objc_Message. If this not a method call
// then both ReferenceType and ReferenceName are left unchanged.
static void method_reference(struct DisassembleInfo *info,
uint64_t *ReferenceType,
const char **ReferenceName) {
unsigned int Arch = info->O->getArch();
if (*ReferenceName != nullptr) {
if (strcmp(*ReferenceName, "_objc_msgSend") == 0) {
if (info->selector_name != nullptr) {
if (info->method != nullptr)
free(info->method);
if (info->class_name != nullptr) {
info->method = (char *)malloc(5 + strlen(info->class_name) +
strlen(info->selector_name));
if (info->method != nullptr) {
strcpy(info->method, "+[");
strcat(info->method, info->class_name);
strcat(info->method, " ");
strcat(info->method, info->selector_name);
strcat(info->method, "]");
*ReferenceName = info->method;
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message;
}
} else {
info->method = (char *)malloc(9 + strlen(info->selector_name));
if (info->method != nullptr) {
if (Arch == Triple::x86_64)
strcpy(info->method, "-[%rdi ");
else if (Arch == Triple::aarch64)
strcpy(info->method, "-[x0 ");
else
strcpy(info->method, "-[r? ");
strcat(info->method, info->selector_name);
strcat(info->method, "]");
*ReferenceName = info->method;
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message;
}
}
info->class_name = nullptr;
}
} else if (strcmp(*ReferenceName, "_objc_msgSendSuper2") == 0) {
if (info->selector_name != nullptr) {
if (info->method != nullptr)
free(info->method);
info->method = (char *)malloc(17 + strlen(info->selector_name));
if (info->method != nullptr) {
if (Arch == Triple::x86_64)
strcpy(info->method, "-[[%rdi super] ");
else if (Arch == Triple::aarch64)
strcpy(info->method, "-[[x0 super] ");
else
strcpy(info->method, "-[[r? super] ");
strcat(info->method, info->selector_name);
strcat(info->method, "]");
*ReferenceName = info->method;
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message;
}
info->class_name = nullptr;
}
}
}
}
// GuessPointerPointer() is passed the address of what might be a pointer to
// a reference to an Objective-C class, selector, message ref or cfstring.
// If so the value of the pointer is returned and one of the booleans are set
// to true. If not zero is returned and all the booleans are set to false.
static uint64_t GuessPointerPointer(uint64_t ReferenceValue,
struct DisassembleInfo *info,
bool &classref, bool &selref, bool &msgref,
bool &cfstring) {
classref = false;
selref = false;
msgref = false;
cfstring = false;
for (const auto &Load : info->O->load_commands()) {
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
MachO::section_64 Sec = info->O->getSection64(Load, J);
if ((strncmp(Sec.sectname, "__objc_selrefs", 16) == 0 ||
strncmp(Sec.sectname, "__objc_classrefs", 16) == 0 ||
strncmp(Sec.sectname, "__objc_superrefs", 16) == 0 ||
strncmp(Sec.sectname, "__objc_msgrefs", 16) == 0 ||
strncmp(Sec.sectname, "__cfstring", 16) == 0) &&
ReferenceValue >= Sec.addr &&
ReferenceValue < Sec.addr + Sec.size) {
uint64_t sect_offset = ReferenceValue - Sec.addr;
uint64_t object_offset = Sec.offset + sect_offset;
StringRef MachOContents = info->O->getData();
uint64_t object_size = MachOContents.size();
const char *object_addr = (const char *)MachOContents.data();
if (object_offset < object_size) {
uint64_t pointer_value;
memcpy(&pointer_value, object_addr + object_offset,
sizeof(uint64_t));
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(pointer_value);
if (strncmp(Sec.sectname, "__objc_selrefs", 16) == 0)
selref = true;
else if (strncmp(Sec.sectname, "__objc_classrefs", 16) == 0 ||
strncmp(Sec.sectname, "__objc_superrefs", 16) == 0)
classref = true;
else if (strncmp(Sec.sectname, "__objc_msgrefs", 16) == 0 &&
ReferenceValue + 8 < Sec.addr + Sec.size) {
msgref = true;
memcpy(&pointer_value, object_addr + object_offset + 8,
sizeof(uint64_t));
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(pointer_value);
} else if (strncmp(Sec.sectname, "__cfstring", 16) == 0)
cfstring = true;
return pointer_value;
} else {
return 0;
}
}
}
}
// TODO: Look for LC_SEGMENT for 32-bit Mach-O files.
}
return 0;
}
// get_pointer_64 returns a pointer to the bytes in the object file at the
// Address from a section in the Mach-O file. And indirectly returns the
// offset into the section, number of bytes left in the section past the offset
// and which section is was being referenced. If the Address is not in a
// section nullptr is returned.
static const char *get_pointer_64(uint64_t Address, uint32_t &offset,
uint32_t &left, SectionRef &S,
DisassembleInfo *info,
bool objc_only = false) {
offset = 0;
left = 0;
S = SectionRef();
for (unsigned SectIdx = 0; SectIdx != info->Sections->size(); SectIdx++) {
uint64_t SectAddress = ((*(info->Sections))[SectIdx]).getAddress();
uint64_t SectSize = ((*(info->Sections))[SectIdx]).getSize();
if (SectSize == 0)
continue;
if (objc_only) {
StringRef SectName;
((*(info->Sections))[SectIdx]).getName(SectName);
DataRefImpl Ref = ((*(info->Sections))[SectIdx]).getRawDataRefImpl();
StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
if (SegName != "__OBJC" && SectName != "__cstring")
continue;
}
if (Address >= SectAddress && Address < SectAddress + SectSize) {
S = (*(info->Sections))[SectIdx];
offset = Address - SectAddress;
left = SectSize - offset;
StringRef SectContents;
((*(info->Sections))[SectIdx]).getContents(SectContents);
return SectContents.data() + offset;
}
}
return nullptr;
}
static const char *get_pointer_32(uint32_t Address, uint32_t &offset,
uint32_t &left, SectionRef &S,
DisassembleInfo *info,
bool objc_only = false) {
return get_pointer_64(Address, offset, left, S, info, objc_only);
}
// get_symbol_64() returns the name of a symbol (or nullptr) and the address of
// the symbol indirectly through n_value. Based on the relocation information
// for the specified section offset in the specified section reference.
// If no relocation information is found and a non-zero ReferenceValue for the
// symbol is passed, look up that address in the info's AddrMap.
static const char *get_symbol_64(uint32_t sect_offset, SectionRef S,
DisassembleInfo *info, uint64_t &n_value,
uint64_t ReferenceValue = 0) {
n_value = 0;
if (!info->verbose)
return nullptr;
// See if there is an external relocation entry at the sect_offset.
bool reloc_found = false;
DataRefImpl Rel;
MachO::any_relocation_info RE;
bool isExtern = false;
SymbolRef Symbol;
for (const RelocationRef &Reloc : S.relocations()) {
uint64_t RelocOffset = Reloc.getOffset();
if (RelocOffset == sect_offset) {
Rel = Reloc.getRawDataRefImpl();
RE = info->O->getRelocation(Rel);
if (info->O->isRelocationScattered(RE))
continue;
isExtern = info->O->getPlainRelocationExternal(RE);
if (isExtern) {
symbol_iterator RelocSym = Reloc.getSymbol();
Symbol = *RelocSym;
}
reloc_found = true;
break;
}
}
// If there is an external relocation entry for a symbol in this section
// at this section_offset then use that symbol's value for the n_value
// and return its name.
const char *SymbolName = nullptr;
if (reloc_found && isExtern) {
n_value = Symbol.getValue();
ErrorOr<StringRef>