[Courgette] Add more checks in ELF parsing to fix fuzzer failure.

Recently Clusterfuzz discovered a minimal ELF file that triggers CHECK
failure. This CL ports some Zucchini ELF parsing checks to Courgette
to fix these, and add more. Failing these check just causes potential
image to be not identified as ELF. Details:
* Require string table section to exist (e_shstrndx != SHN_UNDEF == 0).
* Require non-SHT_NOBITS sections to lie with image.
* Require program segments to lie within image.
* NB: "Lie within image" uses tentative image, before UpdateLength()
  is applied to shrink image.
* Require string table section to have type SHT_STRTAB.
  * This prevents loophole to the section within image check.
  * This allows CHECK() to be added to SectionBody() per TODO.
* Require e_ident[{EI_CLASS, EI_DATA, EI_VERSION}] to fixed values
  (32-bit little-endian).

Bug: 934142
Change-Id: I9687d2cbdcbfb957ddd55d0f6a40aea857071d74
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1518806
Reviewed-by: Samuel Huang <huangs@chromium.org>
Reviewed-by: Etienne Pierre-Doray <etiennep@chromium.org>
Reviewed-by: Will Harris <wfh@chromium.org>
Commit-Queue: Samuel Huang <huangs@chromium.org>
Cr-Original-Commit-Position: refs/heads/master@{#640473}
Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src
Cr-Mirrored-Commit: 4db877c713b6cf149d6e85e6924f5d449c4dd279
diff --git a/disassembler.h b/disassembler.h
index 5b1bb8d..78a4005 100644
--- a/disassembler.h
+++ b/disassembler.h
@@ -115,7 +115,12 @@
   bool Good();
   bool Bad(const char *reason);
 
-  // Returns true if the array lies within our memory region.
+  // Returns true if the given range lies within our memory region.
+  bool IsRangeInBounds(size_t offset, size_t size) {
+    return offset <= length() && size <= length() - offset;
+  }
+
+  // Returns true if the given array lies within our memory region.
   bool IsArrayInBounds(size_t offset, size_t elements, size_t element_size) {
     return offset <= length() && elements <= (length() - offset) / element_size;
   }
diff --git a/disassembler_elf_32.cc b/disassembler_elf_32.cc
index fac661c..e585208 100644
--- a/disassembler_elf_32.cc
+++ b/disassembler_elf_32.cc
@@ -101,28 +101,36 @@
 
   header_ = reinterpret_cast<const Elf32_Ehdr*>(start());
 
-  // Have magic for ELF header?
-  if (header_->e_ident[0] != 0x7f ||
-      header_->e_ident[1] != 'E' ||
-      header_->e_ident[2] != 'L' ||
-      header_->e_ident[3] != 'F')
-    return Bad("No Magic Number");
+  // Perform DisassemblerElf32::QuickDetect() checks (with error messages).
 
-  if (header_->e_type != ET_EXEC &&
-      header_->e_type != ET_DYN)
+  // Have magic for ELF header?
+  if (header_->e_ident[EI_MAG0] != 0x7F || header_->e_ident[EI_MAG1] != 'E' ||
+      header_->e_ident[EI_MAG2] != 'L' || header_->e_ident[EI_MAG3] != 'F') {
+    return Bad("No Magic Number");
+  }
+
+  if (header_->e_ident[EI_CLASS] != ELFCLASS32 ||
+      header_->e_ident[EI_DATA] != ELFDATA2LSB ||
+      header_->e_machine != ElfEM()) {
+    return Bad("Not a supported architecture");
+  }
+
+  if (header_->e_type != ET_EXEC && header_->e_type != ET_DYN)
     return Bad("Not an executable file or shared library");
 
-  if (header_->e_machine != ElfEM())
-    return Bad("Not a supported architecture");
-
-  if (header_->e_version != 1)
+  if (header_->e_version != 1 || header_->e_ident[EI_VERSION] != 1)
     return Bad("Unknown file version");
 
   if (header_->e_shentsize != sizeof(Elf32_Shdr))
     return Bad("Unexpected section header size");
 
-  if (!IsArrayInBounds(header_->e_shoff, header_->e_shnum, sizeof(Elf32_Shdr)))
+  // Perform more complex checks, while extracting data.
+
+  if (header_->e_shoff < sizeof(Elf32_Ehdr) ||
+      !IsArrayInBounds(header_->e_shoff, header_->e_shnum,
+                       sizeof(Elf32_Shdr))) {
     return Bad("Out of bounds section header table");
+  }
 
   // Extract |section_header_table_|, ordered by section id.
   const Elf32_Shdr* section_header_table_raw =
@@ -131,34 +139,41 @@
   section_header_table_size_ = header_->e_shnum;
   section_header_table_.assign(section_header_table_raw,
       section_header_table_raw + section_header_table_size_);
-
-  // TODO(huangs): Validate offsets of all section headers.
-
+  if (!CheckSectionRanges())
+    return Bad("Out of bound section");
   section_header_file_offset_order_ =
       GetSectionHeaderFileOffsetOrder(section_header_table_);
-
-  if (!IsArrayInBounds(header_->e_phoff, header_->e_phnum, sizeof(Elf32_Phdr)))
+  if (header_->e_phoff < sizeof(Elf32_Ehdr) ||
+      !IsArrayInBounds(header_->e_phoff, header_->e_phnum,
+                       sizeof(Elf32_Phdr))) {
     return Bad("Out of bounds program header table");
+  }
 
+  // Extract |program_header_table_|.
+  program_header_table_size_ = header_->e_phnum;
   program_header_table_ = reinterpret_cast<const Elf32_Phdr*>(
       FileOffsetToPointer(header_->e_phoff));
-  program_header_table_size_ = header_->e_phnum;
+  if (!CheckProgramSegmentRanges())
+    return Bad("Out of bound segment");
 
+  // Extract |default_string_section_|.
   Elf32_Half string_section_id = header_->e_shstrndx;
+  if (string_section_id == SHN_UNDEF)
+    return Bad("Missing string section");
   if (string_section_id >= header_->e_shnum)
     return Bad("Out of bounds string section index");
-
+  if (SectionHeader(string_section_id)->sh_type != SHT_STRTAB)
+    return Bad("Invalid string section");
+  default_string_section_size_ = SectionHeader(string_section_id)->sh_size;
   default_string_section_ =
       reinterpret_cast<const char*>(SectionBody(string_section_id));
-  default_string_section_size_ = SectionHeader(string_section_id)->sh_size;
   // String section may be empty. If nonempty, then last byte must be null.
   if (default_string_section_size_ > 0) {
     if (default_string_section_[default_string_section_size_ - 1] != '\0')
       return Bad("String section does not terminate");
   }
 
-  if (!UpdateLength())
-    return Bad("Out of bounds section or segment");
+  UpdateLength();
 
   return Good();
 }
@@ -195,15 +210,17 @@
   const Elf32_Ehdr* header = reinterpret_cast<const Elf32_Ehdr*>(start);
 
   // Have magic for ELF header?
-  if (header->e_ident[0] != 0x7f || header->e_ident[1] != 'E' ||
-      header->e_ident[2] != 'L' || header->e_ident[3] != 'F')
+  if (header->e_ident[EI_MAG0] != 0x7F || header->e_ident[EI_MAG1] != 'E' ||
+      header->e_ident[EI_MAG2] != 'L' || header->e_ident[EI_MAG3] != 'F') {
     return false;
-
+  }
+  if (header->e_ident[EI_CLASS] != ELFCLASS32 ||
+      header->e_ident[EI_DATA] != ELFDATA2LSB || header->e_machine != elf_em) {
+    return false;
+  }
   if (header->e_type != ET_EXEC && header->e_type != ET_DYN)
     return false;
-  if (header->e_machine != elf_em)
-    return false;
-  if (header->e_version != 1)
+  if (header->e_version != 1 || header->e_ident[EI_VERSION] != 1)
     return false;
   if (header->e_shentsize != sizeof(Elf32_Shdr))
     return false;
@@ -211,32 +228,47 @@
   return true;
 }
 
-bool DisassemblerElf32::UpdateLength() {
-  Elf32_Off result = 0;
-
-  // Find the end of the last section
+bool DisassemblerElf32::CheckSectionRanges() {
   for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
        ++section_id) {
     const Elf32_Shdr* section_header = SectionHeader(section_id);
+    if (section_header->sh_type == SHT_NOBITS)  // E.g., .bss.
+      continue;
+    if (!IsRangeInBounds(section_header->sh_offset, section_header->sh_size))
+      return false;
+  }
+  return true;
+}
 
+bool DisassemblerElf32::CheckProgramSegmentRanges() {
+  for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount();
+       ++segment_id) {
+    const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id);
+    if (!IsRangeInBounds(segment_header->p_offset, segment_header->p_filesz))
+      return false;
+  }
+  return true;
+}
+
+void DisassemblerElf32::UpdateLength() {
+  Elf32_Off result = 0;
+
+  // Find the end of the last section.
+  for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+       ++section_id) {
+    const Elf32_Shdr* section_header = SectionHeader(section_id);
     if (section_header->sh_type == SHT_NOBITS)
       continue;
-
-    if (!IsArrayInBounds(section_header->sh_offset, section_header->sh_size, 1))
-      return false;
-
+    DCHECK(IsRangeInBounds(section_header->sh_offset, section_header->sh_size));
     Elf32_Off section_end = section_header->sh_offset + section_header->sh_size;
     result = std::max(result, section_end);
   }
 
-  // Find the end of the last segment
+  // Find the end of the last segment.
   for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount();
        ++segment_id) {
     const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id);
-
-    if (!IsArrayInBounds(segment_header->p_offset, segment_header->p_filesz, 1))
-      return false;
-
+    DCHECK(IsRangeInBounds(segment_header->p_offset, segment_header->p_filesz));
     Elf32_Off segment_end = segment_header->p_offset + segment_header->p_filesz;
     result = std::max(result, segment_end);
   }
@@ -250,7 +282,6 @@
   result = std::max(result, segment_table_end);
 
   ReduceLength(result);
-  return true;
 }
 
 CheckBool DisassemblerElf32::SectionName(const Elf32_Shdr& shdr,
diff --git a/disassembler_elf_32.h b/disassembler_elf_32.h
index a75773d..fba67f1 100644
--- a/disassembler_elf_32.h
+++ b/disassembler_elf_32.h
@@ -129,7 +129,13 @@
                           size_t length,
                           e_machine_values elf_em);
 
-  bool UpdateLength();
+  // Returns whether all non-SHT_NOBITS sections lie within image.
+  bool CheckSectionRanges();
+
+  // Returns whether all program segments lie within image.
+  bool CheckProgramSegmentRanges();
+
+  void UpdateLength();
 
   // Misc Section Helpers
 
@@ -143,8 +149,9 @@
   }
 
   const uint8_t* SectionBody(Elf32_Half id) const {
-    // TODO(huangs): Assert that section does not have SHT_NOBITS.
-    return FileOffsetToPointer(SectionHeader(id)->sh_offset);
+    const Elf32_Shdr* section_header = SectionHeader(id);
+    DCHECK(section_header->sh_type != SHT_NOBITS);
+    return FileOffsetToPointer(section_header->sh_offset);
   }
 
   // Gets the |name| of section |shdr|. Returns true on success.
diff --git a/types_elf.h b/types_elf.h
index a98fd8b..d45a10e 100644
--- a/types_elf.h
+++ b/types_elf.h
@@ -36,6 +36,35 @@
   Elf32_Half     e_shstrndx;
 };
 
+// Indexes for header->e_ident[].
+enum e_ident_indexes {
+  EI_MAG0 = 0,        // File identification.
+  EI_MAG1 = 1,        // File identification.
+  EI_MAG2 = 2,        // File identification.
+  EI_MAG3 = 3,        // File identification.
+  EI_CLASS = 4,       // File class.
+  EI_DATA = 5,        // Data encoding.
+  EI_VERSION = 6,     // File version.
+  EI_OSABI = 7,       // Operating system/ABI identification.
+  EI_ABIVERSION = 8,  // ABI version.
+  EI_PAD = 9,         // Start of padding bytes.
+  EI_NIDENT = 16      // Size of e_ident[].
+};
+
+// Values for header->e_ident[EI_CLASS].
+enum e_ident_class_values {
+  ELFCLASSNONE = 0,  // Invalid class.
+  ELFCLASS32 = 1,    // 32-bit objects.
+  ELFCLASS64 = 2     // 64-bit objects.
+};
+
+// Values for header->e_ident[EI_DATA].
+enum e_ident_data_values {
+  ELFDATANONE = 0,  // Unknown data format.
+  ELFDATA2LSB = 1,  // Two's complement, little-endian.
+  ELFDATA2MSB = 2,  // Two's complement, big-endian.
+};
+
 // values for header->e_type
 enum e_type_values {
   ET_NONE = 0,  // No file type
@@ -56,6 +85,8 @@
   // Other values skipped
 };
 
+enum { SHN_UNDEF = 0 };
+
 // A section header in the section header table
 struct Elf32_Shdr {
   Elf32_Word   sh_name;