src/blockhash_test.cc - external/github.com/google/open-vcdiff - Git at Google

 // Copyright 2008 Google Inc.
 // Author: Lincoln Smith
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include <config.h>
 #include "blockhash.h"
 #include <limits.h>  // INT_MIN
 #include <string.h>  // memcpy, memcmp, strlen
 #include <iostream>
 #include <memory>  // auto_ptr
 #include "encodetable.h"
 #include "rolling_hash.h"
 #include "testing.h"

 namespace open_vcdiff {

 const int kBlockSize = BlockHash::kBlockSize;

 class BlockHashTest : public testing::Test {
  protected:
   static const int kTimingTestSize = 1 << 21;  // 2M
   static const int kTimingTestIterations = 32;

   BlockHashTest() {
     dh_.reset(BlockHash::CreateDictionaryHash(sample_text,
                                               strlen(sample_text)));
     th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0));
     EXPECT_TRUE(dh_.get() != NULL);
     EXPECT_TRUE(th_.get() != NULL);
   }

   // BlockHashTest is a friend to BlockHash.  Expose the protected functions
   // that will be tested by the children of BlockHashTest.
   static bool BlockContentsMatch(const char* block1, const char* block2) {
     return BlockHash::BlockContentsMatch(block1, block2);
   }

   int FirstMatchingBlock(const BlockHash& block_hash,
                          uint32_t hash_value,
                          const char* block_ptr) const {
     return block_hash.FirstMatchingBlock(hash_value, block_ptr);
   }

   int NextMatchingBlock(const BlockHash& block_hash,
                         int block_number,
                         const char* block_ptr) const {
     return block_hash.NextMatchingBlock(block_number, block_ptr);
   }

   static int MatchingBytesToLeft(const char* source_match_start,
                                  const char* target_match_start,
                                  int max_bytes) {
     return BlockHash::MatchingBytesToLeft(source_match_start,
                                           target_match_start,
                                           max_bytes);
   }

   static int MatchingBytesToRight(const char* source_match_end,
                                   const char* target_match_end,
                                   int max_bytes) {
     return BlockHash::MatchingBytesToRight(source_match_end,
                                            target_match_end,
                                            max_bytes);
   }

   static int StringLengthAsInt(const char* s) {
     return static_cast<int>(strlen(s));
   }

   void InitBlocksToDifferAtNthByte(int n) {
     CHECK(n < kBlockSize);
     memset(compare_buffer_1_, 0xBE, kTimingTestSize);
     memset(compare_buffer_2_, 0xBE, kTimingTestSize);
     for (int index = n; index < kTimingTestSize; index += kBlockSize) {
       compare_buffer_1_[index] = 0x00;
       compare_buffer_2_[index] = 0x01;
     }
   }

   void TestAndPrintTimesForCompareFunctions(bool should_be_identical);

   void TimingTestForBlocksThatDifferAtByte(int n) {
     InitBlocksToDifferAtNthByte(n);
     std::cout << "Comparing blocks that differ at byte " << n << std::endl;
     TestAndPrintTimesForCompareFunctions(false);
   }

   // Copy sample_text_without_spaces and search_string_without_spaces
   // into newly allocated sample_text and search_string buffers,
   // but pad them with space characters so that every character
   // in sample_text_without_spaces matches (kBlockSize - 1)
   // space characters in sample_text, followed by that character.
   // For example:
   // Since sample_text_without_spaces begins "The only thing"...,
   // if kBlockSize is 4, then 3 space characters will be inserted
   // between each letter of sample_text, as follows:
   // "   T   h   e       o   n   l   y       t   h   i   n   g"...
   // This makes testing simpler, because finding a kBlockSize-byte match
   // between the sample text and search string only depends on the
   // trailing letter in each block.
   static void MakeEachLetterABlock(const char* string_without_spaces,
                                    const char** result) {
     const size_t length_without_spaces = strlen(string_without_spaces);
     char* padded_text = new char[(kBlockSize * length_without_spaces) + 1];
     memset(padded_text, ' ', kBlockSize * length_without_spaces);
     char* padded_text_ptr = padded_text + (kBlockSize - 1);
     for (size_t i = 0; i < length_without_spaces; ++i) {
       *padded_text_ptr = string_without_spaces[i];
       padded_text_ptr += kBlockSize;
     }
     padded_text[kBlockSize * length_without_spaces] = '\0';
     *result = padded_text;
   }

   static void SetUpTestCase() {
     MakeEachLetterABlock(sample_text_without_spaces, &sample_text);
     MakeEachLetterABlock(search_string_without_spaces, &search_string);
     MakeEachLetterABlock(search_string_altered_without_spaces,
                          &search_string_altered);
     MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string);
     MakeEachLetterABlock(search_to_beginning_without_spaces,
                          &search_to_beginning_string);
     MakeEachLetterABlock(sample_text_many_matches_without_spaces,
                          &sample_text_many_matches);
     MakeEachLetterABlock(search_string_many_matches_without_spaces,
                          &search_string_many_matches);
     MakeEachLetterABlock("y", &test_string_y);
     MakeEachLetterABlock("e", &test_string_e);
     char* new_test_string_unaligned_e = new char[kBlockSize];
     memset(new_test_string_unaligned_e, ' ', kBlockSize);
     new_test_string_unaligned_e[kBlockSize - 2] = 'e';
     test_string_unaligned_e = new_test_string_unaligned_e;
     char* new_test_string_all_Qs = new char[kBlockSize];
     memset(new_test_string_all_Qs, 'Q', kBlockSize);
     test_string_all_Qs = new_test_string_all_Qs;
     hashed_y = RollingHash<kBlockSize>::Hash(test_string_y);
     hashed_e = RollingHash<kBlockSize>::Hash(test_string_e);
     hashed_f =
         RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]);
     hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e);
     hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs);
   }

   static void TearDownTestCase() {
     delete[] sample_text;
     delete[] search_string;
     delete[] search_string_altered;
     delete[] search_to_end_string;
     delete[] search_to_beginning_string;
     delete[] sample_text_many_matches;
     delete[] search_string_many_matches;
     delete[] test_string_y;
     delete[] test_string_e;
     delete[] test_string_unaligned_e;
     delete[] test_string_all_Qs;
   }

   // Each block in the sample text and search string is kBlockSize bytes long,
   // and consists of (kBlockSize - 1) space characters
   // followed by a single letter of text.

   // Block numbers of certain characters within the sample text:
   // All six occurrences of "e", in order.
   static const int block_of_first_e = 2;
   static const int block_of_second_e = 16;
   static const int block_of_third_e = 21;
   static const int block_of_fourth_e = 27;
   static const int block_of_fifth_e = 35;
   static const int block_of_sixth_e = 42;

   static const int block_of_y_in_only = 7;
   // The block number is multiplied by kBlockSize to arrive at the
   // index, which points to the (kBlockSize - 1) space characters before
   // the letter specified.
   // Indices of certain characters within the sample text.
   static const int index_of_first_e = block_of_first_e * kBlockSize;
   static const int index_of_fourth_e = block_of_fourth_e * kBlockSize;
   static const int index_of_sixth_e = block_of_sixth_e * kBlockSize;
   static const int index_of_y_in_only = block_of_y_in_only * kBlockSize;
   static const int index_of_space_before_fear_is_fear = 25 * kBlockSize;
   static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize;
   static const int index_of_i_in_fear_is_fear = 31 * kBlockSize;
   static const int index_of_space_before_fear_itself = 33 * kBlockSize;
   static const int index_of_space_before_itself = 38 * kBlockSize;
   static const int index_of_ababc = 4 * kBlockSize;

   // Indices of certain characters within the search strings.
   static const int index_of_second_w_in_what_we = 5 * kBlockSize;
   static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize;
   static const int index_of_f_in_fearsome = 16 * kBlockSize;
   static const int index_of_space_in_eat_itself =  12 * kBlockSize;
   static const int index_of_i_in_itself = 13 * kBlockSize;
   static const int index_of_t_in_use_the = 4 * kBlockSize;
   static const int index_of_o_in_online = 8 * kBlockSize;

   static const char sample_text_without_spaces[];
   static const char search_string_without_spaces[];
   static const char search_string_altered_without_spaces[];
   static const char search_to_end_without_spaces[];
   static const char search_to_beginning_without_spaces[];
   static const char sample_text_many_matches_without_spaces[];
   static const char search_string_many_matches_without_spaces[];

   static const char* sample_text;
   static const char* search_string;
   static const char* search_string_altered;
   static const char* search_to_end_string;
   static const char* search_to_beginning_string;
   static const char* sample_text_many_matches;
   static const char* search_string_many_matches;

   static const char* test_string_y;
   static const char* test_string_e;
   static const char* test_string_all_Qs;
   static const char* test_string_unaligned_e;

   static uint32_t hashed_y;
   static uint32_t hashed_e;
   static uint32_t hashed_f;
   static uint32_t hashed_unaligned_e;
   static uint32_t hashed_all_Qs;

   // Boost scoped_ptr, if available, could be used instead of std::auto_ptr.
   std::auto_ptr<const BlockHash> dh_;  // hash table is populated at startup
   std::auto_ptr<BlockHash> th_;  // hash table not populated;
                                 // used to test incremental adds

   BlockHash::Match best_match_;
   char* compare_buffer_1_;
   char* compare_buffer_2_;
   int prime_result_;
 };

 #ifdef GTEST_HAS_DEATH_TEST
 typedef BlockHashTest BlockHashDeathTest;
 #endif  // GTEST_HAS_DEATH_TEST

 // The C++ standard requires a separate definition of these static const values,
 // even though their initializers are given within the class definition.
 const int BlockHashTest::block_of_first_e;
 const int BlockHashTest::block_of_second_e;
 const int BlockHashTest::block_of_third_e;
 const int BlockHashTest::block_of_fourth_e;
 const int BlockHashTest::block_of_fifth_e;
 const int BlockHashTest::block_of_sixth_e;
 const int BlockHashTest::block_of_y_in_only;
 const int BlockHashTest::index_of_first_e;
 const int BlockHashTest::index_of_fourth_e;
 const int BlockHashTest::index_of_sixth_e;
 const int BlockHashTest::index_of_y_in_only;
 const int BlockHashTest::index_of_space_before_fear_is_fear;
 const int BlockHashTest::index_of_longest_match_ear_is_fear;
 const int BlockHashTest::index_of_i_in_fear_is_fear;
 const int BlockHashTest::index_of_space_before_fear_itself;
 const int BlockHashTest::index_of_space_before_itself;
 const int BlockHashTest::index_of_ababc;
 const int BlockHashTest::index_of_second_w_in_what_we;
 const int BlockHashTest::index_of_second_e_in_what_we_hear;
 const int BlockHashTest::index_of_f_in_fearsome;
 const int BlockHashTest::index_of_space_in_eat_itself;
 const int BlockHashTest::index_of_i_in_itself;
 const int BlockHashTest::index_of_t_in_use_the;
 const int BlockHashTest::index_of_o_in_online;

 const char BlockHashTest::sample_text_without_spaces[] =
     "The only thing we have to fear is fear itself";

 const char BlockHashTest::search_string_without_spaces[] =
     "What we hear is fearsome";

 const char BlockHashTest::search_string_altered_without_spaces[] =
     "Vhat ve hear is fearsomm";

 const char BlockHashTest::search_to_end_without_spaces[] =
     "Pop will eat itself, eventually";

 const char BlockHashTest::search_to_beginning_without_spaces[] =
     "Use The online dictionary";

 const char BlockHashTest::sample_text_many_matches_without_spaces[] =
     "ababababcab";

 const char BlockHashTest::search_string_many_matches_without_spaces[] =
     "ababc";

 const char* BlockHashTest::sample_text = NULL;
 const char* BlockHashTest::search_string = NULL;
 const char* BlockHashTest::search_string_altered = NULL;
 const char* BlockHashTest::search_to_end_string = NULL;
 const char* BlockHashTest::search_to_beginning_string = NULL;
 const char* BlockHashTest::sample_text_many_matches = NULL;
 const char* BlockHashTest::search_string_many_matches = NULL;

 const char* BlockHashTest::test_string_y = NULL;
 const char* BlockHashTest::test_string_e = NULL;
 const char* BlockHashTest::test_string_unaligned_e = NULL;
 const char* BlockHashTest::test_string_all_Qs = NULL;

 uint32_t BlockHashTest::hashed_y = 0;
 uint32_t BlockHashTest::hashed_e = 0;
 uint32_t BlockHashTest::hashed_f = 0;
 uint32_t BlockHashTest::hashed_unaligned_e = 0;
 uint32_t BlockHashTest::hashed_all_Qs = 0;

 void BlockHashTest::TestAndPrintTimesForCompareFunctions(
     bool should_be_identical) {
   CHECK(compare_buffer_1_ != NULL);
   CHECK(compare_buffer_2_ != NULL);
   // Prime the memory cache.
   prime_result_ =
       memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize);
   const char* const block1_limit =
       &compare_buffer_1_[kTimingTestSize - kBlockSize];
   int block_compare_words_result = 0;
   CycleTimer block_compare_words_timer;
   block_compare_words_timer.Start();
   for (int i = 0; i < kTimingTestIterations; ++i) {
     const char* block1 = compare_buffer_1_;
     const char* block2 = compare_buffer_2_;
     while (block1 < block1_limit) {
       if (!BlockHash::BlockCompareWords(block1, block2)) {
         ++block_compare_words_result;
       }
       block1 += kBlockSize;
       block2 += kBlockSize;
     }
   }
   block_compare_words_timer.Stop();
   double time_for_block_compare_words =
       static_cast<double>(block_compare_words_timer.GetInUsec())
       / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
   int block_contents_match_result = 0;
   CycleTimer block_contents_match_timer;
   block_contents_match_timer.Start();
   for (int i = 0; i < kTimingTestIterations; ++i) {
     const char* block1 = compare_buffer_1_;
     const char* block2 = compare_buffer_2_;
     while (block1 < block1_limit) {
       if (!BlockHash::BlockContentsMatch(block1, block2)) {
         ++block_contents_match_result;
       }
       block1 += kBlockSize;
       block2 += kBlockSize;
     }
   }
   block_contents_match_timer.Stop();
   double time_for_block_contents_match =
       static_cast<double>(block_contents_match_timer.GetInUsec())
       / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
   EXPECT_EQ(block_contents_match_result, block_compare_words_result);
   if (should_be_identical) {
     CHECK_EQ(0, block_compare_words_result);
   } else {
     CHECK_GT(block_compare_words_result, 0);
   }
   std::cout << "BlockHash::BlockCompareWords: "
             << time_for_block_compare_words << " us per operation" << std::endl;
   std::cout << "BlockHash::BlockContentsMatch: "
             << time_for_block_contents_match << " us per operation"
             << std::endl;
   if (time_for_block_compare_words > 0) {
     double percent_change =
         (((time_for_block_contents_match - time_for_block_compare_words)
           / time_for_block_compare_words) * 100.0);
     if (percent_change >= 0.0) {
       std::cout << "BlockContentsMatch is " << percent_change << "%"
                 << " SLOWER than BlockCompareWords" << std::endl;
     } else {
       std::cout << "BlockContentsMatch is " << (-percent_change) << "%"
                 << " FASTER than BlockCompareWords" << std::endl;
     }
   }
 #if defined(NDEBUG) && !defined(VCDIFF_USE_BLOCK_COMPARE_WORDS)
   // Only check timings for optimized build.  There's plenty of margin: this
   // check will fail only if BlockContentsMatch is at least twice as slow as
   // BlockCompareWords.
   EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match);
 #endif  // NDEBUG && !VCDIFF_USE_BLOCK_COMPARE_WORDS
 }

 // The two strings passed to BlockHash::MatchingBytesToLeft do have matching
 // characters -- in fact, they're the same string -- but since max_bytes is zero
 // or negative, BlockHash::MatchingBytesToLeft should not read from the strings
 // and should return 0.
 TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) {
   EXPECT_EQ(0, MatchingBytesToLeft(
       &search_string[index_of_f_in_fearsome],
       &search_string[index_of_f_in_fearsome],
       0));
   EXPECT_EQ(0, MatchingBytesToRight(
       &search_string[index_of_f_in_fearsome],
       &search_string[index_of_f_in_fearsome],
       0));
 }

 TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) {
   EXPECT_EQ(0, MatchingBytesToLeft(
       &search_string[index_of_f_in_fearsome],
       &search_string[index_of_f_in_fearsome],
       -1));
   EXPECT_EQ(0, MatchingBytesToLeft(
       &search_string[index_of_f_in_fearsome],
       &search_string[index_of_f_in_fearsome],
       INT_MIN));
   EXPECT_EQ(0, MatchingBytesToRight(
       &search_string[index_of_f_in_fearsome],
       &search_string[index_of_f_in_fearsome],
       -1));
   EXPECT_EQ(0, MatchingBytesToRight(
       &search_string[index_of_f_in_fearsome],
       &search_string[index_of_f_in_fearsome],
       INT_MIN));
 }

 TEST_F(BlockHashTest, MaxBytesOneMatch) {
   EXPECT_EQ(1, MatchingBytesToLeft(
       &search_string[index_of_f_in_fearsome],
       &search_string[index_of_f_in_fearsome],
       1));
   EXPECT_EQ(1, MatchingBytesToRight(
       &search_string[index_of_f_in_fearsome],
       &search_string[index_of_f_in_fearsome],
       1));
 }

 TEST_F(BlockHashTest, MaxBytesOneNoMatch) {
   EXPECT_EQ(0, MatchingBytesToLeft(
       &search_string[index_of_f_in_fearsome],
       &search_string[index_of_second_e_in_what_we_hear],
       1));
   EXPECT_EQ(0, MatchingBytesToRight(
       &search_string[index_of_f_in_fearsome],
       &search_string[index_of_second_e_in_what_we_hear - 1],
       1));
 }

 TEST_F(BlockHashTest, LeftLimitedByMaxBytes) {
   // The number of bytes that match between the original "we hear is fearsome"
   // and the altered "ve hear is fearsome".
   const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
   const int max_bytes = expected_length - 1;
   EXPECT_EQ(max_bytes, MatchingBytesToLeft(
       &search_string[index_of_f_in_fearsome],
       &search_string_altered[index_of_f_in_fearsome],
       max_bytes));
 }

 TEST_F(BlockHashTest, LeftNotLimited) {
   // The number of bytes that match between the original "we hear is fearsome"
   // and the altered "ve hear is fearsome".
   const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
   const int max_bytes = expected_length + 1;
   EXPECT_EQ(expected_length, MatchingBytesToLeft(
       &search_string[index_of_f_in_fearsome],
       &search_string_altered[index_of_f_in_fearsome],
       max_bytes));
   EXPECT_EQ(expected_length, MatchingBytesToLeft(
       &search_string[index_of_f_in_fearsome],
       &search_string_altered[index_of_f_in_fearsome],
       INT_MAX));
 }

 TEST_F(BlockHashTest, RightLimitedByMaxBytes) {
   // The number of bytes that match between the original "fearsome"
   // and the altered "fearsomm".
   const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
                               + (kBlockSize - 1);  // spacing between letters
   const int max_bytes = expected_length - 1;
   EXPECT_EQ(max_bytes, MatchingBytesToRight(
       &search_string[index_of_f_in_fearsome],
       &search_string_altered[index_of_f_in_fearsome],
       max_bytes));
 }

 TEST_F(BlockHashTest, RightNotLimited) {
   // The number of bytes that match between the original "we hear is fearsome"
   // and the altered "ve hear is fearsome".
   const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
                               + (kBlockSize - 1);  // spacing between letters
   const int max_bytes = expected_length + 1;
   EXPECT_EQ(expected_length, MatchingBytesToRight(
       &search_string[index_of_f_in_fearsome],
       &search_string_altered[index_of_f_in_fearsome],
       max_bytes));
   EXPECT_EQ(expected_length, MatchingBytesToRight(
       &search_string[index_of_f_in_fearsome],
       &search_string_altered[index_of_f_in_fearsome],
       INT_MAX));
 }

 // If this test fails in a non-x86 or non-gcc environment, consider adding
 // -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and
 // Makefile.in, and reconstructing the Makefile.  That will cause blockhash.cc
 // to use a special implementation (BlockCompareWords) to compare blocks
 // rather than using standard memcmp.
 TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) {
   compare_buffer_1_ = new char[kTimingTestSize];
   compare_buffer_2_ = new char[kTimingTestSize];

   // The value 0xBE is arbitrarily chosen.  First test with identical contents
   // in the buffers, so that the comparison functions cannot short-circuit
   // and will return true.
   memset(compare_buffer_1_, 0xBE, kTimingTestSize);
   memset(compare_buffer_2_, 0xBE, kTimingTestSize);
   std::cout << "Comparing "
             << (kTimingTestSize / kBlockSize) << " identical values:"
             << std::endl;
   TestAndPrintTimesForCompareFunctions(true);

   // Now change one value in the middle of one buffer, so that the contents
   // are no longer the same.
   compare_buffer_1_[kTimingTestSize / 2] = 0x00;
   std::cout << "Comparing "
             << ((kTimingTestSize / kBlockSize) - 1) << " identical values"
             << " and one mismatch:" << std::endl;
   TestAndPrintTimesForCompareFunctions(false);

   // Set one of the bytes of each block to differ so that
   // none of the compare operations will return true, and run timing tests.
   // In practice, BlockHash::BlockContentsMatch will only be called
   // for two blocks whose hash values match, and the two most important
   // cases are: (1) the blocks are identical, or (2) none of their bytes match.
   TimingTestForBlocksThatDifferAtByte(0);
   TimingTestForBlocksThatDifferAtByte(1);
   TimingTestForBlocksThatDifferAtByte(kBlockSize / 2);
   TimingTestForBlocksThatDifferAtByte(kBlockSize - 1);

   delete[] compare_buffer_1_;
   delete[] compare_buffer_2_;
 }

 TEST_F(BlockHashTest, FindFailsBeforeHashing) {
   EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
 }

 TEST_F(BlockHashTest, HashOneFindOne) {
   for (int i = 0; i <= index_of_y_in_only; ++i) {
     th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i]));
   }
   EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y,
                                                    test_string_y));
   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y));
 }

 TEST_F(BlockHashTest, HashAllFindOne) {
   EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y,
                                                    test_string_y));
   EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y));
 }

 TEST_F(BlockHashTest, NonMatchingTextNotFound) {
   EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs));
 }

 // Search for unaligned text.  The test string is contained in the
 // sample text (unlike the non-matching string in NonMatchingTextNotFound,
 // above), but it is not aligned on a block boundary.  FindMatchingBlock
 // will only work if the test string is aligned on a block boundary.
 //
 //    "   T   h   e       o   n   l   y"
 //              ^^^^ Here is the test string
 //
 TEST_F(BlockHashTest, UnalignedTextNotFound) {
   EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e,
                                    test_string_unaligned_e));
 }

 TEST_F(BlockHashTest, FindSixMatches) {
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e,
                                                 test_string_e));
   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e,
                                                 test_string_e));
   EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e,
                                                 test_string_e));
   EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e));

   // Starting over gives same result
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
                                                  test_string_e));
 }

 TEST_F(BlockHashTest, AddRangeFindThreeMatches) {
   // Add hash values only for those characters before the fourth instance
   // of "e" in the sample text.  Tests that the ending index
   // of AddAllBlocksThroughIndex() is not inclusive: only three matches
   // for "e" should be found.
   th_->AddAllBlocksThroughIndex(index_of_fourth_e);
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
                                                 test_string_e));
   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));

   // Starting over gives same result
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));
 }

 // Try indices that are not even multiples of the block size.
 // Add three ranges and verify the results after each
 // call to AddAllBlocksThroughIndex().
 TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) {
   th_->AddAllBlocksThroughIndex(index_of_first_e + 1);
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));
   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e));

   // Starting over gives same result
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));

   // Add the second range to expand the result set
   th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3);
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
                                                 test_string_e));
   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));

   // Starting over gives same result
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));

   // Add the third range to expand the result set
   th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);

   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
                                                 test_string_e));
   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
                                                  test_string_e));
   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));

   // Starting over gives same result
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));
 }

 #ifdef GTEST_HAS_DEATH_TEST
 TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) {
   th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);

   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
                                                 test_string_e));
   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
                                                  test_string_e));
   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));

   // Starting over gives same result
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));

   // These calls will produce DFATAL error messages, and will do nothing,
   // since the ranges have already been added.
   EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3),
                      "<");
   EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1),
                      "<");

   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
                                                 test_string_e));
   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
                                                  test_string_e));
   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));

   // Starting over gives same result
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));
 }
 #endif  // GTEST_HAS_DEATH_TEST

 TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) {
   th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text));
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
                                                 test_string_e));
   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
                                                  test_string_e));
   EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e,
                                                 test_string_e));
   EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e,
                                                 test_string_e));
   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e));

   // Starting over gives same result
   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
                                                  test_string_e));
 }

 TEST_F(BlockHashTest, ZeroSizeSourceAccepted) {
   BlockHash zero_sized_hash(sample_text, 0, 0);
   EXPECT_EQ(true, zero_sized_hash.Init(true));
   EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
 }

 #ifdef GTEST_HAS_DEATH_TEST
 TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) {
   EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, "    ")),
                      "invalid");
 }

 TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) {
   BlockHash bh(sample_text, strlen(sample_text), 0);
   EXPECT_TRUE(bh.Init(false));
   EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice");
 }

 TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) {
   BlockHash bh(sample_text, strlen(sample_text), 0);
   EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e),
                      "called before");
 }

 TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) {
   EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(strlen(sample_text) + 1),
                      "higher than end");
 }
 #endif  // GTEST_HAS_DEATH_TEST

 TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) {
   EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA"));
 }

 TEST_F(BlockHashTest, FindBestMatch) {
   dh_->FindBestMatch(hashed_f,
                      &search_string[index_of_f_in_fearsome],
                      search_string,
                      strlen(search_string),
                      &best_match_);
   EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset());
   EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
   // The match includes the spaces after the final character,
   // which is why (kBlockSize - 1) is added to the expected best size.
   EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
             best_match_.size());
 }

 TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) {
   BlockHash th2(sample_text, strlen(sample_text), 0x10000);
   th2.Init(true);  // hash all blocks
   th2.FindBestMatch(hashed_f,
                     &search_string[index_of_f_in_fearsome],
                     search_string,
                     strlen(search_string),
                     &best_match_);
   // Offset should begin with dictionary_size
   EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear),
             best_match_.source_offset());
   EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
   // The match includes the spaces after the final character,
   // which is why (kBlockSize - 1) is added to the expected best size.
   EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
             best_match_.size());
 }

 TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) {
   // Hash the "i" in "fear itself"
   uint32_t hash_value = RollingHash<kBlockSize>::Hash(
       &search_to_end_string[index_of_i_in_itself]);
   dh_->FindBestMatch(hash_value,
                      &search_to_end_string[index_of_i_in_itself],
                      search_to_end_string,
                      strlen(search_to_end_string),
                      &best_match_);
   EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset());
   EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset());
   EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size());
 }

 TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) {
   // Hash the "i" in "fear itself"
   uint32_t hash_value = RollingHash<kBlockSize>::Hash(
       &search_to_beginning_string[index_of_o_in_online]);
   dh_->FindBestMatch(hash_value,
                      &search_to_beginning_string[index_of_o_in_online],
                      search_to_beginning_string,
                      strlen(search_to_beginning_string),
                      &best_match_);
   EXPECT_EQ(0, best_match_.source_offset());  // beginning of dictionary
   EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset());
   // The match includes the spaces after the final character,
   // which is why (kBlockSize - 1) is added to the expected best size.
   EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1),
             best_match_.size());
 }

 TEST_F(BlockHashTest, BestMatchWithManyMatches) {
   BlockHash many_matches_hash(sample_text_many_matches,
                               strlen(sample_text_many_matches),
                               0);
   EXPECT_TRUE(many_matches_hash.Init(true));
   // Hash the "   a" at the beginning of the search string "ababc"
   uint32_t hash_value =
       RollingHash<kBlockSize>::Hash(search_string_many_matches);
   many_matches_hash.FindBestMatch(hash_value,
                                   search_string_many_matches,
                                   search_string_many_matches,
                                   strlen(search_string_many_matches),
                                   &best_match_);
   EXPECT_EQ(index_of_ababc, best_match_.source_offset());
   EXPECT_EQ(0, best_match_.target_offset());
   EXPECT_EQ(strlen(search_string_many_matches), best_match_.size());
 }

 TEST_F(BlockHashTest, HashCollisionFindsNoMatch) {
   char* collision_search_string = new char[strlen(search_string) + 1];
   memcpy(collision_search_string, search_string, strlen(search_string) + 1);
   char* fearsome_location = &collision_search_string[index_of_f_in_fearsome];

   // Tweak the collision string so that it has the same hash value
   // but different text.  The last four characters of the search string
   // should be "   f", and the bytes given below have the same hash value
   // as those characters.
   CHECK_GE(kBlockSize, 4);
   fearsome_location[kBlockSize - 4] = 0x84;
   fearsome_location[kBlockSize - 3] = 0xF1;
   fearsome_location[kBlockSize - 2] = 0x51;
   fearsome_location[kBlockSize - 1] = 0x00;
   EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location));
   EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome],
                       fearsome_location,
                       kBlockSize));
   // No match should be found this time.
   dh_->FindBestMatch(hashed_f,
       fearsome_location,
       collision_search_string,
       strlen(search_string),  // since collision_search_string has embedded \0
       &best_match_);
   EXPECT_EQ(-1, best_match_.source_offset());
   EXPECT_EQ(-1, best_match_.target_offset());
   EXPECT_EQ(0U, best_match_.size());
   delete[] collision_search_string;
 }

 // If the footprint passed to FindBestMatch does not actually match
 // the search string, it should not find any matches.
 TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) {
   dh_->FindBestMatch(hashed_e,  // Using hashed value of "e" instead of "f"!
                      &search_string[index_of_f_in_fearsome],
                      search_string,
                      strlen(search_string),
                      &best_match_);
   EXPECT_EQ(-1, best_match_.source_offset());
   EXPECT_EQ(-1, best_match_.target_offset());
   EXPECT_EQ(0U, best_match_.size());
 }

 // Use a dictionary containing 1M copies of the letter 'Q',
 // and target data that also contains 1M Qs.  If FindBestMatch
 // is not throttled to find a maximum number of matches, this
 // will take a very long time -- several seconds at least.
 // If this test appears to hang, it is because the throttling code
 // (see BlockHash::kMaxMatchesToCheck for details) is not working.
 TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) {
   const int kTestSize = 1 << 20;  // 1M
   char* huge_dictionary = new char[kTestSize];
   memset(huge_dictionary, 'Q', kTestSize);
   BlockHash huge_bh(huge_dictionary, kTestSize, 0);
   EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true));
   char* huge_target = new char[kTestSize];
   memset(huge_target, 'Q', kTestSize);
   CycleTimer timer;
   timer.Start();
   huge_bh.FindBestMatch(hashed_all_Qs,
                         huge_target + (kTestSize / 2),  // middle of target
                         huge_target,
                         kTestSize,
                         &best_match_);
   timer.Stop();
   double elapsed_time_in_us = static_cast<double>(timer.GetInUsec());
   std::cout << "Time to search for best match with 1M matches: "
             << elapsed_time_in_us << " us" << std::endl;
   // All blocks match the candidate block.  FindBestMatch should have checked
   // a certain number of matches before giving up.  The best match
   // should include at least half the source and target, since the candidate
   // block was in the middle of the target data.
   EXPECT_GT((kTestSize / 2), best_match_.source_offset());
   EXPECT_GT((kTestSize / 2), best_match_.target_offset());
   EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size());
   EXPECT_GT(5000000, elapsed_time_in_us);  // < 5 seconds
 #ifdef NDEBUG
   EXPECT_GT(1000000, elapsed_time_in_us);  // < 1 second
 #endif  // NDEBUG
   delete[] huge_target;
   delete[] huge_dictionary;
 }

 #ifdef GTEST_HAS_DEATH_TEST
 TEST_F(BlockHashDeathTest, AddTooManyBlocks) {
   for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) {
     th_->AddOneIndexHash(i * kBlockSize, hashed_e);
   }
   // Didn't expect another block to be added
   EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text),
                                           hashed_e),
                      "AddBlock");
 }
 #endif  // GTEST_HAS_DEATH_TEST

 }  //  namespace open_vcdiff