components/zucchini/image_utils.h - chromium/src - Git at Google

 // Copyright 2017 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
 #define COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_

 #include <stddef.h>
 #include <stdint.h>

 #include <optional>
 #include <string>

 #include "base/format_macros.h"
 #include "base/numerics/safe_conversions.h"
 #include "base/strings/stringprintf.h"
 #include "components/zucchini/buffer_view.h"
 #include "components/zucchini/typed_value.h"

 namespace zucchini {

 // offset_t is used to describe an offset in an image.
 // Files bigger than 4GB are not supported.
 using offset_t = uint32_t;
 // Divide by 2 since label marking uses the most significant bit.
 constexpr offset_t kOffsetBound = static_cast<offset_t>(-1) / 2;
 // Use 0xFFFFFFF*E*, since 0xFFFFFFF*F* is a sentinel value for Dex references.
 constexpr offset_t kInvalidOffset = static_cast<offset_t>(-2);

 // key_t is used to identify an offset in a table.
 using key_t = uint32_t;

 enum Bitness : uint8_t {
   // The numerical values are intended to simplify WidthOf() below.
   kBit32 = 4,
   kBit64 = 8
 };

 inline uint32_t WidthOf(Bitness bitness) {
   return static_cast<uint32_t>(bitness);
 }

 // Used to uniquely identify a reference type.
 // Strongly typed objects are used to avoid ambiguitees with PoolTag.
 struct TypeTag : public TypedValue<TypeTag, uint8_t> {
   // inheriting constructor:
   using TypedValue<TypeTag, uint8_t>::TypedValue;
 };

 // Used to uniquely identify a pool.
 struct PoolTag : public TypedValue<PoolTag, uint8_t> {
   // inheriting constructor:
   using TypedValue<PoolTag, uint8_t>::TypedValue;
 };

 constexpr TypeTag kNoTypeTag(0xFF);  // Typically used to identify raw data.
 constexpr PoolTag kNoPoolTag(0xFF);

 // Specification of references in an image file.
 struct ReferenceTypeTraits {
   constexpr ReferenceTypeTraits(offset_t width_in,
                                 TypeTag type_tag_in,
                                 PoolTag pool_tag_in)
       : width(width_in), type_tag(type_tag_in), pool_tag(pool_tag_in) {}

   // |width| specifies number of bytes covered by the reference's binary
   // encoding.
   const offset_t width;
   // |type_tag| identifies the reference type being described.
   const TypeTag type_tag;
   // |pool_tag| identifies the pool this type belongs to.
   const PoolTag pool_tag;
 };

 // There is no need to store |type| because references of the same type are
 // always aggregated into the same container, and so during iteration we'd have
 // |type| already.
 struct Reference {
   offset_t location;
   offset_t target;
 };

 inline bool operator==(const Reference& a, const Reference& b) {
   return a.location == b.location && a.target == b.target;
 }

 // Interface for extracting References through member function GetNext().
 // This is used by Disassemblers to extract references from an image file.
 // Typically, a Reader lazily extracts values and does not hold any storage.
 class ReferenceReader {
  public:
   virtual ~ReferenceReader() = default;

   // Returns the next available Reference, or nullopt_t if exhausted.
   // Extracted References must be ordered by their location in the image.
   virtual std::optional<Reference> GetNext() = 0;
 };

 // Interface for writing References through member function
 // PutNext(reference). This is used by Disassemblers to write new References
 // in the image file.
 class ReferenceWriter {
  public:
   virtual ~ReferenceWriter() = default;

   // Writes |reference| in the underlying image file. This operation always
   // succeeds.
   virtual void PutNext(Reference reference) = 0;
 };

 // References encoding may be quite complex in some architectures (e.g., ARM),
 // requiring bit-level manipulation. In general, bits in a reference body fall
 // under 2 categories:
 // * Operation bits: Instruction op code, conditionals, or structural data.
 // * Payload bits: Actual target data of the reference. These may be absolute,
 //   or be displacements relative to instruction pointer / program counter.
 // During patch application,
 //   Old reference bytes = {old operation, old payload},
 // is transformed to
 //   New reference bytes = {new operation, new payload}.
 // New image bytes are written by three sources:
 //   (1) Direct copy from old image to new image for matched blocks.
 //   (2) Bytewise diff correction.
 //   (3) Dedicated reference target correction.
 //
 // For references whose operation and payload bits are stored in easily
 // separable bytes (e.g., rel32 reference in X86), (2) can exclude payload bits.
 // So during patch application, (1) naively copies everything, (2) fixes
 // operation bytes only, and (3) fixes payload bytes only.
 //
 // For architectures with references whose operation and payload bits may mix
 // within shared bytes (e.g., ARM rel32), a dilemma arises:
 // * (2) cannot ignores shared bytes, since otherwise new operation bits would
 //   not properly transfer.
 // * Having (2) always overwrite these bytes would reduce the benefits of
 //   reference correction, since references are likely to change.
 //
 // Our solution applies a hybrid approach: For each matching old / new reference
 // pair, define:
 //   Mixed reference bytes = {new operation, old payload},
 //
 // During patch generation, we compute bytewise correction from old reference
 // bytes to the mixed reference bytes. So during patch application, (2) only
 // corrects operation bit changes (and skips if they don't change), and (3)
 // overwrites old payload bits to new payload bits.

 // Interface for mixed reference byte generation. This base class
 // serves as a stub. Architectures whose references store operation bits and
 // payload bits can share common bytes (e.g., ARM rel32) should override this.
 class ReferenceMixer {
  public:
   virtual ~ReferenceMixer() = default;

   // Computes mixed reference bytes by combining (a) "payload bits" from an
   // "old" reference at |old_offset| with (b) "operation bits" from a "new"
   // reference at |new_offset|. Returns the result as ConstBufferView, which is
   // valid only until the next call to Mix().
   virtual ConstBufferView Mix(offset_t old_offset, offset_t new_offset) = 0;
 };

 // An Equivalence is a block of length |length| that approximately match in
 // |old_image| at an offset of |src_offset| and in |new_image| at an offset of
 // |dst_offset|.
 struct Equivalence {
   offset_t src_offset;
   offset_t dst_offset;
   offset_t length;

   offset_t src_end() const { return src_offset + length; }
   offset_t dst_end() const { return dst_offset + length; }
 };

 inline bool operator==(const Equivalence& a, const Equivalence& b) {
   return a.src_offset == b.src_offset && a.dst_offset == b.dst_offset &&
          a.length == b.length;
 }

 // Same as Equivalence, but with a similarity score. This is only used when
 // generating the patch.
 struct EquivalenceCandidate {
   Equivalence eq;
   double similarity;
 };

 template <size_t N>
 inline constexpr uint32_t ExeTypeToUint32(const char (&exe_type)[N]) {
   static_assert(N == 5, "Expected ExeType of length 4 + 1 null byte.");
   return (exe_type[3] << 24) | (exe_type[2] << 16) | (exe_type[1] << 8) |
          exe_type[0];
 }

 // Enumerations for supported executables. Values in this enum must be distinct.
 // Once present, values should never be altered or removed to ensure backwards
 // compatibility and patch type collision avoidance.
 enum ExecutableType : uint32_t {
   kExeTypeUnknown = UINT32_MAX,
   kExeTypeNoOp = ExeTypeToUint32("NoOp"),
   kExeTypeWin32X86 = ExeTypeToUint32("Px86"),
   kExeTypeWin32X64 = ExeTypeToUint32("Px64"),
   kExeTypeElfX86 = ExeTypeToUint32("Ex86"),
   kExeTypeElfX64 = ExeTypeToUint32("Ex64"),
   kExeTypeElfAArch32 = ExeTypeToUint32("EA32"),
   kExeTypeElfAArch64 = ExeTypeToUint32("EA64"),
   kExeTypeDex = ExeTypeToUint32("DEX "),
   kExeTypeZtf = ExeTypeToUint32("ZTF "),
 };

 constexpr ExecutableType CastToExecutableType(uint32_t possible_exe_type) {
   switch (static_cast<ExecutableType>(possible_exe_type)) {
     case kExeTypeNoOp:        // Falls through.
     case kExeTypeWin32X86:    // Falls through.
     case kExeTypeWin32X64:    // Falls through.
     case kExeTypeElfX86:      // Falls through.
     case kExeTypeElfX64:      // Falls through.
     case kExeTypeElfAArch32:  // Falls through.
     case kExeTypeElfAArch64:  // Falls through.
     case kExeTypeDex:         // Falls through.
     case kExeTypeZtf:         // Falls through.
     case kExeTypeUnknown:
       return static_cast<ExecutableType>(possible_exe_type);
     default:
       return kExeTypeUnknown;
   }
 }

 inline std::string CastExecutableTypeToString(ExecutableType exe_type) {
   uint32_t v = static_cast<uint32_t>(exe_type);
   char result[] = {static_cast<char>(v), static_cast<char>(v >> 8),
                    static_cast<char>(v >> 16), static_cast<char>(v >> 24), 0};
   return result;
 }

 // A region in an image with associated executable type |exe_type|. If
 // |exe_type == kExeTypeNoOp|, then the Element represents a region of raw data.
 struct Element : public BufferRegion {
   Element() = default;
   constexpr Element(const BufferRegion& region_in, ExecutableType exe_type_in)
       : BufferRegion(region_in), exe_type(exe_type_in) {}
   constexpr explicit Element(const BufferRegion& region_in)
       : BufferRegion(region_in), exe_type(kExeTypeNoOp) {}

   // Similar to lo() and hi(), but returns values in offset_t.
   offset_t BeginOffset() const { return base::checked_cast<offset_t>(lo()); }
   offset_t EndOffset() const { return base::checked_cast<offset_t>(hi()); }

   BufferRegion region() const { return {offset, size}; }

   friend bool operator==(const Element& a, const Element& b) {
     return a.exe_type == b.exe_type && a.offset == b.offset && a.size == b.size;
   }

   ExecutableType exe_type;
 };

 // A matched pair of Elements.
 struct ElementMatch {
   bool IsValid() const { return old_element.exe_type == new_element.exe_type; }
   ExecutableType exe_type() const { return old_element.exe_type; }

   // Represents match as "#+#=#+#", where "#" denotes the integers:
   //   [offset in "old", size in "old", offset in "new", size in "new"].
   // Note that element type is omitted.
   std::string ToString() const {
     return base::StringPrintf("%" PRIuS "+%" PRIuS "=%" PRIuS "+%" PRIuS "",
                               old_element.offset, old_element.size,
                               new_element.offset, new_element.size);
   }

   Element old_element;
   Element new_element;
 };

 }  // namespace zucchini

 #endif  // COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
	// Copyright 2017 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
	#define COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_

	#include <stddef.h>
	#include <stdint.h>

	#include <optional>
	#include <string>

	#include "base/format_macros.h"
	#include "base/numerics/safe_conversions.h"
	#include "base/strings/stringprintf.h"
	#include "components/zucchini/buffer_view.h"
	#include "components/zucchini/typed_value.h"

	namespace zucchini {

	// offset_t is used to describe an offset in an image.
	// Files bigger than 4GB are not supported.
	using offset_t = uint32_t;
	// Divide by 2 since label marking uses the most significant bit.
	constexpr offset_t kOffsetBound = static_cast<offset_t>(-1) / 2;
	// Use 0xFFFFFFFE, since 0xFFFFFFFF is a sentinel value for Dex references.
	constexpr offset_t kInvalidOffset = static_cast<offset_t>(-2);

	// key_t is used to identify an offset in a table.
	using key_t = uint32_t;

	enum Bitness : uint8_t {
	// The numerical values are intended to simplify WidthOf() below.
	kBit32 = 4,
	kBit64 = 8
	};

	inline uint32_t WidthOf(Bitness bitness) {
	return static_cast<uint32_t>(bitness);
	}

	// Used to uniquely identify a reference type.
	// Strongly typed objects are used to avoid ambiguitees with PoolTag.
	struct TypeTag : public TypedValue<TypeTag, uint8_t> {
	// inheriting constructor:
	using TypedValue<TypeTag, uint8_t>::TypedValue;
	};

	// Used to uniquely identify a pool.
	struct PoolTag : public TypedValue<PoolTag, uint8_t> {
	// inheriting constructor:
	using TypedValue<PoolTag, uint8_t>::TypedValue;
	};

	constexpr TypeTag kNoTypeTag(0xFF); // Typically used to identify raw data.
	constexpr PoolTag kNoPoolTag(0xFF);

	// Specification of references in an image file.
	struct ReferenceTypeTraits {
	constexpr ReferenceTypeTraits(offset_t width_in,
	TypeTag type_tag_in,
	PoolTag pool_tag_in)
	: width(width_in), type_tag(type_tag_in), pool_tag(pool_tag_in) {}

	// \|width\| specifies number of bytes covered by the reference's binary
	// encoding.
	const offset_t width;
	// \|type_tag\| identifies the reference type being described.
	const TypeTag type_tag;
	// \|pool_tag\| identifies the pool this type belongs to.
	const PoolTag pool_tag;
	};

	// There is no need to store \|type\| because references of the same type are
	// always aggregated into the same container, and so during iteration we'd have
	// \|type\| already.
	struct Reference {
	offset_t location;
	offset_t target;
	};

	inline bool operator==(const Reference& a, const Reference& b) {
	return a.location == b.location && a.target == b.target;
	}

	// Interface for extracting References through member function GetNext().
	// This is used by Disassemblers to extract references from an image file.
	// Typically, a Reader lazily extracts values and does not hold any storage.
	class ReferenceReader {
	public:
	virtual ~ReferenceReader() = default;

	// Returns the next available Reference, or nullopt_t if exhausted.
	// Extracted References must be ordered by their location in the image.
	virtual std::optional<Reference> GetNext() = 0;
	};

	// Interface for writing References through member function
	// PutNext(reference). This is used by Disassemblers to write new References
	// in the image file.
	class ReferenceWriter {
	public:
	virtual ~ReferenceWriter() = default;

	// Writes \|reference\| in the underlying image file. This operation always
	// succeeds.
	virtual void PutNext(Reference reference) = 0;
	};

	// References encoding may be quite complex in some architectures (e.g., ARM),
	// requiring bit-level manipulation. In general, bits in a reference body fall
	// under 2 categories:
	// * Operation bits: Instruction op code, conditionals, or structural data.
	// * Payload bits: Actual target data of the reference. These may be absolute,
	// or be displacements relative to instruction pointer / program counter.
	// During patch application,
	// Old reference bytes = {old operation, old payload},
	// is transformed to
	// New reference bytes = {new operation, new payload}.
	// New image bytes are written by three sources:
	// (1) Direct copy from old image to new image for matched blocks.
	// (2) Bytewise diff correction.
	// (3) Dedicated reference target correction.
	//
	// For references whose operation and payload bits are stored in easily
	// separable bytes (e.g., rel32 reference in X86), (2) can exclude payload bits.
	// So during patch application, (1) naively copies everything, (2) fixes
	// operation bytes only, and (3) fixes payload bytes only.
	//
	// For architectures with references whose operation and payload bits may mix
	// within shared bytes (e.g., ARM rel32), a dilemma arises:
	// * (2) cannot ignores shared bytes, since otherwise new operation bits would
	// not properly transfer.
	// * Having (2) always overwrite these bytes would reduce the benefits of
	// reference correction, since references are likely to change.
	//
	// Our solution applies a hybrid approach: For each matching old / new reference
	// pair, define:
	// Mixed reference bytes = {new operation, old payload},
	//
	// During patch generation, we compute bytewise correction from old reference
	// bytes to the mixed reference bytes. So during patch application, (2) only
	// corrects operation bit changes (and skips if they don't change), and (3)
	// overwrites old payload bits to new payload bits.

	// Interface for mixed reference byte generation. This base class
	// serves as a stub. Architectures whose references store operation bits and
	// payload bits can share common bytes (e.g., ARM rel32) should override this.
	class ReferenceMixer {
	public:
	virtual ~ReferenceMixer() = default;

	// Computes mixed reference bytes by combining (a) "payload bits" from an
	// "old" reference at \|old_offset\| with (b) "operation bits" from a "new"
	// reference at \|new_offset\|. Returns the result as ConstBufferView, which is
	// valid only until the next call to Mix().
	virtual ConstBufferView Mix(offset_t old_offset, offset_t new_offset) = 0;
	};

	// An Equivalence is a block of length \|length\| that approximately match in
	// \|old_image\| at an offset of \|src_offset\| and in \|new_image\| at an offset of
	// \|dst_offset\|.
	struct Equivalence {
	offset_t src_offset;
	offset_t dst_offset;
	offset_t length;

	offset_t src_end() const { return src_offset + length; }
	offset_t dst_end() const { return dst_offset + length; }
	};

	inline bool operator==(const Equivalence& a, const Equivalence& b) {
	return a.src_offset == b.src_offset && a.dst_offset == b.dst_offset &&
	a.length == b.length;
	}

	// Same as Equivalence, but with a similarity score. This is only used when
	// generating the patch.
	struct EquivalenceCandidate {
	Equivalence eq;
	double similarity;
	};

	template <size_t N>
	inline constexpr uint32_t ExeTypeToUint32(const char (&exe_type)[N]) {
	static_assert(N == 5, "Expected ExeType of length 4 + 1 null byte.");
	return (exe_type[3] << 24) \| (exe_type[2] << 16) \| (exe_type[1] << 8) \|
	exe_type[0];
	}

	// Enumerations for supported executables. Values in this enum must be distinct.
	// Once present, values should never be altered or removed to ensure backwards
	// compatibility and patch type collision avoidance.
	enum ExecutableType : uint32_t {
	kExeTypeUnknown = UINT32_MAX,
	kExeTypeNoOp = ExeTypeToUint32("NoOp"),
	kExeTypeWin32X86 = ExeTypeToUint32("Px86"),
	kExeTypeWin32X64 = ExeTypeToUint32("Px64"),
	kExeTypeElfX86 = ExeTypeToUint32("Ex86"),
	kExeTypeElfX64 = ExeTypeToUint32("Ex64"),
	kExeTypeElfAArch32 = ExeTypeToUint32("EA32"),
	kExeTypeElfAArch64 = ExeTypeToUint32("EA64"),
	kExeTypeDex = ExeTypeToUint32("DEX "),
	kExeTypeZtf = ExeTypeToUint32("ZTF "),
	};

	constexpr ExecutableType CastToExecutableType(uint32_t possible_exe_type) {
	switch (static_cast<ExecutableType>(possible_exe_type)) {
	case kExeTypeNoOp: // Falls through.
	case kExeTypeWin32X86: // Falls through.
	case kExeTypeWin32X64: // Falls through.
	case kExeTypeElfX86: // Falls through.
	case kExeTypeElfX64: // Falls through.
	case kExeTypeElfAArch32: // Falls through.
	case kExeTypeElfAArch64: // Falls through.
	case kExeTypeDex: // Falls through.
	case kExeTypeZtf: // Falls through.
	case kExeTypeUnknown:
	return static_cast<ExecutableType>(possible_exe_type);
	default:
	return kExeTypeUnknown;
	}
	}

	inline std::string CastExecutableTypeToString(ExecutableType exe_type) {
	uint32_t v = static_cast<uint32_t>(exe_type);
	char result[] = {static_cast<char>(v), static_cast<char>(v >> 8),
	static_cast<char>(v >> 16), static_cast<char>(v >> 24), 0};
	return result;
	}

	// A region in an image with associated executable type \|exe_type\|. If
	// \|exe_type == kExeTypeNoOp\|, then the Element represents a region of raw data.
	struct Element : public BufferRegion {
	Element() = default;
	constexpr Element(const BufferRegion& region_in, ExecutableType exe_type_in)
	: BufferRegion(region_in), exe_type(exe_type_in) {}
	constexpr explicit Element(const BufferRegion& region_in)
	: BufferRegion(region_in), exe_type(kExeTypeNoOp) {}

	// Similar to lo() and hi(), but returns values in offset_t.
	offset_t BeginOffset() const { return base::checked_cast<offset_t>(lo()); }
	offset_t EndOffset() const { return base::checked_cast<offset_t>(hi()); }

	BufferRegion region() const { return {offset, size}; }

	friend bool operator==(const Element& a, const Element& b) {
	return a.exe_type == b.exe_type && a.offset == b.offset && a.size == b.size;
	}

	ExecutableType exe_type;
	};

	// A matched pair of Elements.
	struct ElementMatch {
	bool IsValid() const { return old_element.exe_type == new_element.exe_type; }
	ExecutableType exe_type() const { return old_element.exe_type; }

	// Represents match as "#+#=#+#", where "#" denotes the integers:
	// [offset in "old", size in "old", offset in "new", size in "new"].
	// Note that element type is omitted.
	std::string ToString() const {
	return base::StringPrintf("%" PRIuS "+%" PRIuS "=%" PRIuS "+%" PRIuS "",
	old_element.offset, old_element.size,
	new_element.offset, new_element.size);
	}

	Element old_element;
	Element new_element;
	};

	} // namespace zucchini

	#endif // COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_