Internal change

PiperOrigin-RevId: 468547234
diff --git a/byte_array_mutator.cc b/byte_array_mutator.cc
index c68e6e9..b401e18 100644
--- a/byte_array_mutator.cc
+++ b/byte_array_mutator.cc
@@ -25,6 +25,21 @@
 
 namespace centipede {
 
+size_t ByteArrayMutator::RoundUpToAdd(size_t curr_size, size_t to_add) {
+  const size_t remainder = (curr_size + to_add) % size_alignment_;
+  return (remainder == 0) ? to_add : (to_add + size_alignment_ - remainder);
+}
+
+size_t ByteArrayMutator::RoundDownToRemove(size_t curr_size, size_t to_remove) {
+  if (curr_size <= size_alignment_) return 0;
+  if (to_remove >= curr_size) return curr_size - size_alignment_;
+
+  size_t result_size = curr_size - to_remove;
+  result_size -= (result_size % size_alignment_);
+  to_remove = curr_size - result_size;
+  return (result_size == 0) ? to_remove - size_alignment_ : to_remove;
+}
+
 bool ByteArrayMutator::Mutate(ByteArray &data) {
   return ApplyOneOf<3>(
       {&ByteArrayMutator::MutateSameSize, &ByteArrayMutator::MutateIncreaseSize,
@@ -77,6 +92,10 @@
   // Don't insert too many bytes at once.
   const size_t kMaxInsertSize = 20;
   size_t num_new_bytes = rng_() % kMaxInsertSize + 1;
+  num_new_bytes = RoundUpToAdd(data.size(), num_new_bytes);
+  if (num_new_bytes > kMaxInsertSize) {
+    num_new_bytes -= size_alignment_;
+  }
   // There are N+1 positions to insert something into an array of N.
   size_t pos = rng_() % (data.size() + 1);
   // Fixed array to avoid memory allocation.
@@ -88,10 +107,12 @@
 }
 
 bool ByteArrayMutator::EraseBytes(ByteArray &data) {
-  if (data.size() <= 1) return false;
+  if (data.size() <= size_alignment_) return false;
   // Ok to erase a sizable chunk since small inputs are good (if they
   // produce good features).
   size_t num_bytes_to_erase = rng_() % (data.size() / 2) + 1;
+  num_bytes_to_erase = RoundDownToRemove(data.size(), num_bytes_to_erase);
+  if (num_bytes_to_erase == 0) return false;
   size_t pos = rng_() % (data.size() - num_bytes_to_erase + 1);
   data.erase(data.begin() + pos, data.begin() + pos + num_bytes_to_erase);
   return true;
@@ -125,9 +146,14 @@
 
 void ByteArrayMutator::CrossOverInsert(ByteArray &data,
                                        const ByteArray &other) {
+  if ((data.size() % size_alignment_) + other.size() < size_alignment_) return;
   // insert other[first:first+size] at data[pos]
-  size_t first = rng_() % other.size();
-  size_t size = 1 + rng_() % (other.size() - first);
+  size_t size = 1 + rng_() % other.size();
+  size = RoundUpToAdd(data.size(), size);
+  if (size > other.size()) {
+    size -= size_alignment_;
+  }
+  size_t first = rng_() % (other.size() - size + 1);
   size_t pos = rng_() % (data.size() + 1);
   data.insert(data.begin() + pos, other.begin() + first,
               other.begin() + first + size);
diff --git a/byte_array_mutator.h b/byte_array_mutator.h
index 24b402c..4d340f7 100644
--- a/byte_array_mutator.h
+++ b/byte_array_mutator.h
@@ -100,7 +100,17 @@
   // Erases random bytes.
   bool EraseBytes(ByteArray &data);
 
+  // Set size alignment for mutants with modified sizes. Some mutators do not
+  // change input size, but mutators that insert or erase bytes will produce
+  // mutants with aligned sizes (if possible).
+  void set_size_alignment(size_t size_alignment) {
+    size_alignment_ = size_alignment;
+  }
+
  private:
+  FRIEND_TEST(ByteArrayMutator, RoundUpToAddCorrectly);
+  FRIEND_TEST(ByteArrayMutator, RoundDownToRemoveCorrectly);
+
   // Applies fn[random_index] to data, returns true if a mutation happened.
   template <size_t kArraySize>
   bool ApplyOneOf(const std::array<Fn, kArraySize> &fn, ByteArray &data) {
@@ -113,6 +123,31 @@
     return false;  // May still happen periodically.
   }
 
+  // Given a current size and a number of bytes to add, returns the number of
+  // bytes that should be added for the resulting size to be properly aligned.
+  //
+  // If the original to_add would result in an unaligned input size, we round up
+  // to the next larger aligned size.
+  size_t RoundUpToAdd(size_t curr_size, size_t to_add);
+
+  // Given a current size and a number of bytes to remove, returns the number of
+  // bytes that should be removed for the resulting size to be property aligned.
+  //
+  // If the original to_remove would result in an unaligned input size, we
+  // round down to the next smaller aligned size.
+  //
+  // However, we never return a number of bytes to remove that would result in a
+  // 0 size. In this case, the resulting size will be the smaller of
+  // curr_size and size_alignment_.
+  size_t RoundDownToRemove(size_t curr_size, size_t to_remove);
+
+  // Size alignment in bytes to generate mutants.
+  //
+  // For example, if size_alignment_ is 1, generated mutants can have any
+  // number of bytes. If size_alignment_ is 4, generated mutants will have sizes
+  // that are 4-byte aligned.
+  size_t size_alignment_ = 1;
+
   Rng rng_;
   std::vector<ByteArray> dictionary_;
 };
diff --git a/byte_array_mutator_test.cc b/byte_array_mutator_test.cc
index 5e1f2db..a4bd904 100644
--- a/byte_array_mutator_test.cc
+++ b/byte_array_mutator_test.cc
@@ -24,6 +24,46 @@
 
 namespace centipede {
 
+// Tests that when alignment is not 1 byte, adding bytes to an input will result
+// in a size-aligned mutant (even if the input is not size-aligned).
+//
+// Note: This test cannot be in an anonymous namespace due to the FRIEND_TEST in
+// ByteArrayMutator.
+TEST(ByteArrayMutator, RoundUpToAddCorrectly) {
+  ByteArrayMutator mutator(/*seed=*/1);
+  mutator.set_size_alignment(4);
+
+  EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/0, /*to_add=*/0), 0);
+  EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/4, /*to_add=*/0), 0);
+  EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/4, /*to_add=*/3), 4);
+  EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/5, /*to_add=*/0), 3);
+  EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/5, /*to_add=*/2), 3);
+  EXPECT_EQ(mutator.RoundUpToAdd(/*curr_size=*/5, /*to_add=*/18), 19);
+}
+
+// Tests that when alignment is not 1 byte, removing bytes from an input will
+// result in a size-aligned mutant (even if the input is not size-aligned).
+//
+// Note: This test cannot be in an anonymous namespace due to the FRIEND_TEST in
+// ByteArrayMutator.
+TEST(ByteArrayMutator, RoundDownToRemoveCorrectly) {
+  ByteArrayMutator mutator(/*seed=*/1);
+  mutator.set_size_alignment(4);
+
+  EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/0, /*to_remove=*/0), 0);
+  EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/0, /*to_remove=*/1), 0);
+  EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/1, /*to_remove=*/0), 0);
+  EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/1, /*to_remove=*/1), 0);
+  EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/4, /*to_remove=*/0), 0);
+  EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/4, /*to_remove=*/3), 0);
+  EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/5, /*to_remove=*/0), 1);
+  EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/5, /*to_remove=*/2), 1);
+  EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/7, /*to_remove=*/2), 3);
+  EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/23, /*to_remove=*/4), 7);
+  EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/23, /*to_remove=*/20), 19);
+  EXPECT_EQ(mutator.RoundDownToRemove(/*curr_size=*/23, /*to_remove=*/24), 19);
+}
+
 namespace {
 
 // Tests that two mutators seeded with different rng seeds produce different
@@ -50,9 +90,11 @@
 void TestMutatorFn(ByteArrayMutator::Fn fn, const ByteArray &seed,
                    const std::vector<ByteArray> &expected_mutants,
                    const std::vector<ByteArray> &unexpected_mutants,
+                   size_t size_alignment = 1,
                    const std::vector<ByteArray> &dictionary = {},
                    size_t num_iterations = 100000000) {
   ByteArrayMutator mutator(1);
+  mutator.set_size_alignment(size_alignment);
   mutator.AddToDictionary(dictionary);
   absl::flat_hash_set<ByteArray> expected(expected_mutants.begin(),
                                           expected_mutants.end());
@@ -133,6 +175,26 @@
                 });
 }
 
+TEST(ByteArrayMutator, InsertBytesWithAlignment) {
+  TestMutatorFn(&ByteArrayMutator::InsertBytes, {0, 1, 2},
+                /*expected_mutants=*/
+                {
+                    {0, 1, 2, 3},
+                    {0, 3, 1, 2},
+                    {3, 0, 1, 2},
+                },
+                /*unexpected_mutants=*/
+                {
+                    {0, 1},
+                    {0, 1, 2},
+                    {0, 1, 2, 3, 4},
+                    {0, 3, 1, 4, 2},
+                    {0, 3, 4, 1, 2},
+                    {3, 4, 0, 1, 2},
+                },
+                /*size_alignment=*/4);
+}
+
 // Currently, same as for InsertBytes. Will change in future as we add more
 // mutators.
 TEST(ByteArrayMutator, MutateIncreaseSize) {
@@ -153,6 +215,25 @@
                 });
 }
 
+TEST(ByteArrayMutator, MutateIncreaseSizeWithAlignment) {
+  TestMutatorFn(&ByteArrayMutator::MutateIncreaseSize, {0, 1, 2},
+                /*expected_mutants=*/
+                {
+                    {0, 1, 2, 3},
+                    {0, 3, 1, 2},
+                    {3, 0, 1, 2},
+                },
+                /*unexpected_mutants=*/
+                {
+                    {0, 1},
+                    {0, 1, 2, 3, 4},
+                    {0, 3, 1, 4, 2},
+                    {0, 3, 4, 1, 2},
+                    {3, 4, 0, 1, 2},
+                },
+                /*size_alignment=*/4);
+}
+
 TEST(ByteArrayMutator, EraseBytes) {
   TestMutatorFn(&ByteArrayMutator::EraseBytes, {0, 1, 2, 3},
                 /*expected_mutants=*/
@@ -173,6 +254,46 @@
                 });
 }
 
+TEST(ByteArrayMutator, EraseBytesWithAlignment) {
+  TestMutatorFn(&ByteArrayMutator::EraseBytes, {0, 1, 2, 3},
+                /*expected_mutants=*/
+                {
+                    {0, 1, 2, 3},
+                },
+                /*unexpected_mutants=*/
+                {
+                    {0, 1, 2},
+                    {0, 1, 3},
+                    {0, 2, 3},
+                    {1, 2, 3},
+                    {0, 1},
+                    {0, 3},
+                    {2, 3},
+                    {0},
+                    {1},
+                    {2},
+                },
+                /*size_alignment=*/4);
+  TestMutatorFn(&ByteArrayMutator::EraseBytes, {0, 1, 2, 3, 4},
+                /*expected_mutants=*/
+                {
+                    {0, 1, 2, 3},
+                    {1, 2, 3, 4},
+                    {0, 1, 3, 4},
+                },
+                /*unexpected_mutants=*/
+                {
+                    {0, 1, 2, 3, 4},
+                    {0, 1, 2},
+                    {0, 1, 3},
+                    {0, 2, 3},
+                    {1, 2, 3},
+                    {0, 1},
+                    {0},
+                },
+                /*size_alignment=*/4);
+}
+
 // Currently, same as EraseBytes. Will change in future as we add more mutators.
 TEST(ByteArrayMutator, MutateDecreaseSize) {
   TestMutatorFn(&ByteArrayMutator::MutateDecreaseSize, {0, 1, 2, 3},
@@ -194,6 +315,46 @@
                 });
 }
 
+TEST(ByteArrayMutator, MutateDecreaseSizeWithAlignment) {
+  TestMutatorFn(&ByteArrayMutator::MutateDecreaseSize, {0, 1, 2, 3},
+                /*expected_mutants=*/
+                {
+                    {0, 1, 2, 3},
+                },
+                /*unexpected_mutants=*/
+                {
+                    {0, 1, 2},
+                    {0, 1, 3},
+                    {0, 2, 3},
+                    {1, 2, 3},
+                    {0, 1},
+                    {0, 3},
+                    {2, 3},
+                    {0},
+                    {1},
+                    {2},
+                },
+                /*size_alignment=*/4);
+  TestMutatorFn(&ByteArrayMutator::MutateDecreaseSize, {0, 1, 2, 3, 4},
+                /*expected_mutants=*/
+                {
+                    {0, 1, 2, 3},
+                    {1, 2, 3, 4},
+                    {0, 1, 3, 4},
+                },
+                /*unexpected_mutants=*/
+                {
+                    {0, 1, 2, 3, 4},
+                    {0, 1, 2},
+                    {0, 1, 3},
+                    {0, 2, 3},
+                    {1, 2, 3},
+                    {0, 1},
+                    {0},
+                },
+                /*size_alignment=*/4);
+}
+
 // Tests that MutateSameSize will eventually produce all possible mutants of
 // size 1 and 2. Also tests some of the 3-byte mutants.
 TEST(ByteArrayMutator, MutateSameSize) {
@@ -276,6 +437,7 @@
                     {6, 2, 3, 4, 5},
                     {1, 2, 3, 4, 0},
                 },
+                /*size_alignment=*/1,
                 /*dictionary=*/
                 {
                     {7, 8, 9},
@@ -301,6 +463,7 @@
                     {1, 2, 3, 7, 8},
                     {7, 8, 1, 2, 3},
                 },
+                /*size_alignment=*/1,
                 /*dictionary=*/
                 {
                     {4, 5},
@@ -313,8 +476,10 @@
 // and so we can test for all possible expected mutants.
 void TestCrossOver(void (ByteArrayMutator::*fn)(ByteArray &, const ByteArray &),
                    const ByteArray &seed, const ByteArray &other,
-                   const std::vector<ByteArray> &all_possible_mutants) {
+                   const std::vector<ByteArray> &all_possible_mutants,
+                   size_t size_alignment = 1) {
   ByteArrayMutator mutator(1);
+  mutator.set_size_alignment(size_alignment);
   absl::flat_hash_set<ByteArray> expected(all_possible_mutants.begin(),
                                           all_possible_mutants.end());
   absl::flat_hash_set<ByteArray> found;
@@ -369,6 +534,61 @@
                 });
 }
 
+TEST(ByteArrayMutator, CrossOverInsertWithAlignment) {
+  TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1}, {2},
+                /*all_possible_mutants=*/
+                {
+                    {1},
+                },
+                /*size_alignment=*/4);
+  TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1, 2}, {3, 4},
+                /*all_possible_mutants=*/
+                {
+                    {1, 2, 3, 4},
+                    {1, 3, 4, 2},
+                    {3, 4, 1, 2},
+                },
+                /*size_alignment=*/4);
+  TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1, 2}, {3, 4, 5},
+                /*all_possible_mutants=*/
+                {
+                    {1, 2, 3, 4},
+                    {1, 3, 4, 2},
+                    {3, 4, 1, 2},
+                    {1, 2, 4, 5},
+                    {1, 4, 5, 2},
+                    {4, 5, 1, 2},
+                },
+                /*size_alignment=*/4);
+  TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1, 2, 3, 4, 5}, {6},
+                /*all_possible_mutants=*/
+                {
+                    {1, 2, 3, 4, 5},
+                },
+                /*size_alignment=*/4);
+  TestCrossOver(&ByteArrayMutator::CrossOverInsert, {1, 2, 3}, {4, 5, 6, 7},
+                /*all_possible_mutants=*/
+                {
+                    {4, 1, 2, 3},
+                    {5, 1, 2, 3},
+                    {6, 1, 2, 3},
+                    {7, 1, 2, 3},
+                    {1, 4, 2, 3},
+                    {1, 5, 2, 3},
+                    {1, 6, 2, 3},
+                    {1, 7, 2, 3},
+                    {1, 2, 4, 3},
+                    {1, 2, 5, 3},
+                    {1, 2, 6, 3},
+                    {1, 2, 7, 3},
+                    {1, 2, 3, 4},
+                    {1, 2, 3, 5},
+                    {1, 2, 3, 6},
+                    {1, 2, 3, 7},
+                },
+                /*size_alignment=*/4);
+}
+
 TEST(ByteArrayMutator, CrossOverOverwrite) {
   TestCrossOver(&ByteArrayMutator::CrossOverOverwrite, {1}, {2},
                 /*all_possible_mutants=*/
@@ -424,8 +644,8 @@
 
 TEST(ByteArrayMutator, CrossOver) {
   // Most of CrossOver is tested above in CrossOverOverwrite/CrossOverInsert.
-  // Here just test one set of inputs to ensure CrossOver calls the other
-  // two functions correctly.
+  // Here just test one set of inputs to ensure CrossOver calls the other two
+  // functions correctly.
   TestCrossOver(&ByteArrayMutator::CrossOver, {1, 2}, {3, 4},
                 /*all_possible_mutants=*/
                 {
@@ -464,6 +684,58 @@
   EXPECT_LT(num_failed_generic, kNumIter / 1000);
 }
 
+TEST(ByteArrayMutator, MutateManyWithAlignedInputs) {
+  constexpr size_t kSizeAlignment = 4;
+  ByteArrayMutator mutator(/*seed=*/1);
+  mutator.set_size_alignment(kSizeAlignment);
+  constexpr size_t kNumMutantsToGenerate = 10000;
+  std::vector<ByteArray> mutants;
+
+  // If all inputs are aligned, expect all generated mutants to be aligned.
+  const std::vector<ByteArray> aligned_inputs = {
+      {0, 1, 2, 3},
+      {0, 1, 2, 3, 4, 5, 6, 7},
+      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+  };
+  mutator.MutateMany(aligned_inputs, kNumMutantsToGenerate,
+                     /*crossover_level=*/50, mutants);
+  EXPECT_EQ(mutants.size(), kNumMutantsToGenerate);
+  for (const ByteArray &mutant : mutants) {
+    EXPECT_EQ(mutant.size() % kSizeAlignment, 0);
+  }
+}
+
+TEST(ByteArrayMutator, MutateManyWithUnalignedInputs) {
+  constexpr size_t kSizeAlignment = 4;
+  ByteArrayMutator mutator(/*seed=*/1);
+  mutator.set_size_alignment(kSizeAlignment);
+  constexpr size_t kNumMutantsToGenerate = 10000;
+  std::vector<ByteArray> mutants;
+
+  // If there are unaligned inputs, most mutants should be aligned, but the ones
+  // that are unaligned should be the same size as the unaligned inputs (as they
+  // resulted from mutators that did not change the size of the inputs).
+  const std::vector<ByteArray> unaligned_inputs = {
+      {0},
+      {0, 1},
+      {0, 1, 2},
+      {0, 1, 2, 3, 4},
+      {0, 1, 2, 3, 4, 5},
+      {0, 1, 2, 3, 4, 5, 6},
+      {0, 1, 2, 3, 4, 5, 6, 7, 8},
+      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+  };
+  mutator.MutateMany(unaligned_inputs, kNumMutantsToGenerate,
+                     /*crossover_level=*/50, mutants);
+  EXPECT_EQ(mutants.size(), kNumMutantsToGenerate);
+  for (const ByteArray &mutant : mutants) {
+    if (mutant.size() % kSizeAlignment != 0) {
+      EXPECT_LE(mutant.size(), 11);
+    }
+  }
+}
+
 }  // namespace
 
 }  // namespace centipede
diff --git a/defs.h b/defs.h
index 54c5d35..471e60d 100644
--- a/defs.h
+++ b/defs.h
@@ -27,6 +27,10 @@
 
 using ByteArray = std::vector<uint8_t>;
 
+// Macro used to allow tests to access protected or private members of a class.
+#define FRIEND_TEST(test_case_name, test_name) \
+  friend class test_case_name##_##test_name##_Test
+
 }  // namespace centipede
 
 #endif  // THIRD_PARTY_CENTIPEDE_DEFS_H_