| // Copyright 2008 The open-vcdiff Authors. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include <config.h> |
| #include "blockhash.h" |
| #include <limits.h> // INT_MIN |
| #include <string.h> // memcpy, memcmp, strlen |
| #include <iostream> |
| #include "google/encodetable.h" |
| #include "rolling_hash.h" |
| #include "testing.h" |
| #include "unique_ptr.h" // auto_ptr, unique_ptr |
| |
| namespace open_vcdiff { |
| |
| const int kBlockSize = BlockHash::kBlockSize; |
| |
| class BlockHashTest : public testing::Test { |
| protected: |
| static const int kTimingTestSize = 1 << 21; // 2M |
| static const int kTimingTestIterations = 32; |
| |
| BlockHashTest() { |
| dh_.reset(BlockHash::CreateDictionaryHash(sample_text, |
| strlen(sample_text))); |
| th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0)); |
| EXPECT_TRUE(dh_.get() != NULL); |
| EXPECT_TRUE(th_.get() != NULL); |
| } |
| |
| // BlockHashTest is a friend to BlockHash. Expose the protected functions |
| // that will be tested by the children of BlockHashTest. |
| static bool BlockContentsMatch(const char* block1, const char* block2) { |
| return BlockHash::BlockContentsMatch(block1, block2); |
| } |
| |
| int FirstMatchingBlock(const BlockHash& block_hash, |
| uint32_t hash_value, |
| const char* block_ptr) const { |
| return block_hash.FirstMatchingBlock(hash_value, block_ptr); |
| } |
| |
| int NextMatchingBlock(const BlockHash& block_hash, |
| int block_number, |
| const char* block_ptr) const { |
| return block_hash.NextMatchingBlock(block_number, block_ptr); |
| } |
| |
| static int MatchingBytesToLeft(const char* source_match_start, |
| const char* target_match_start, |
| int max_bytes) { |
| return BlockHash::MatchingBytesToLeft(source_match_start, |
| target_match_start, |
| max_bytes); |
| } |
| |
| static int MatchingBytesToRight(const char* source_match_end, |
| const char* target_match_end, |
| int max_bytes) { |
| return BlockHash::MatchingBytesToRight(source_match_end, |
| target_match_end, |
| max_bytes); |
| } |
| |
| static int StringLengthAsInt(const char* s) { |
| return static_cast<int>(strlen(s)); |
| } |
| |
| void InitBlocksToDifferAtNthByte(int n) { |
| CHECK(n < kBlockSize); |
| memset(compare_buffer_1_, 0xBE, kTimingTestSize); |
| memset(compare_buffer_2_, 0xBE, kTimingTestSize); |
| for (int index = n; index < kTimingTestSize; index += kBlockSize) { |
| compare_buffer_1_[index] = 0x00; |
| compare_buffer_2_[index] = 0x01; |
| } |
| } |
| |
| void TestAndPrintTimesForCompareFunctions(bool should_be_identical); |
| |
| void TimingTestForBlocksThatDifferAtByte(int n) { |
| InitBlocksToDifferAtNthByte(n); |
| std::cout << "Comparing blocks that differ at byte " << n << std::endl; |
| TestAndPrintTimesForCompareFunctions(false); |
| } |
| |
| // Copy sample_text_without_spaces and search_string_without_spaces |
| // into newly allocated sample_text and search_string buffers, |
| // but pad them with space characters so that every character |
| // in sample_text_without_spaces matches (kBlockSize - 1) |
| // space characters in sample_text, followed by that character. |
| // For example: |
| // Since sample_text_without_spaces begins "The only thing"..., |
| // if kBlockSize is 4, then 3 space characters will be inserted |
| // between each letter of sample_text, as follows: |
| // " T h e o n l y t h i n g"... |
| // This makes testing simpler, because finding a kBlockSize-byte match |
| // between the sample text and search string only depends on the |
| // trailing letter in each block. |
| static void MakeEachLetterABlock(const char* string_without_spaces, |
| const char** result) { |
| const size_t length_without_spaces = strlen(string_without_spaces); |
| char* padded_text = new char[(kBlockSize * length_without_spaces) + 1]; |
| memset(padded_text, ' ', kBlockSize * length_without_spaces); |
| char* padded_text_ptr = padded_text + (kBlockSize - 1); |
| for (size_t i = 0; i < length_without_spaces; ++i) { |
| *padded_text_ptr = string_without_spaces[i]; |
| padded_text_ptr += kBlockSize; |
| } |
| padded_text[kBlockSize * length_without_spaces] = '\0'; |
| *result = padded_text; |
| } |
| |
| static void SetUpTestCase() { |
| MakeEachLetterABlock(sample_text_without_spaces, &sample_text); |
| MakeEachLetterABlock(search_string_without_spaces, &search_string); |
| MakeEachLetterABlock(search_string_altered_without_spaces, |
| &search_string_altered); |
| MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string); |
| MakeEachLetterABlock(search_to_beginning_without_spaces, |
| &search_to_beginning_string); |
| MakeEachLetterABlock(sample_text_many_matches_without_spaces, |
| &sample_text_many_matches); |
| MakeEachLetterABlock(search_string_many_matches_without_spaces, |
| &search_string_many_matches); |
| MakeEachLetterABlock("y", &test_string_y); |
| MakeEachLetterABlock("e", &test_string_e); |
| char* new_test_string_unaligned_e = new char[kBlockSize]; |
| memset(new_test_string_unaligned_e, ' ', kBlockSize); |
| new_test_string_unaligned_e[kBlockSize - 2] = 'e'; |
| test_string_unaligned_e = new_test_string_unaligned_e; |
| char* new_test_string_all_Qs = new char[kBlockSize]; |
| memset(new_test_string_all_Qs, 'Q', kBlockSize); |
| test_string_all_Qs = new_test_string_all_Qs; |
| hashed_y = RollingHash<kBlockSize>::Hash(test_string_y); |
| hashed_e = RollingHash<kBlockSize>::Hash(test_string_e); |
| hashed_f = |
| RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]); |
| hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e); |
| hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs); |
| } |
| |
| static void TearDownTestCase() { |
| delete[] sample_text; |
| delete[] search_string; |
| delete[] search_string_altered; |
| delete[] search_to_end_string; |
| delete[] search_to_beginning_string; |
| delete[] sample_text_many_matches; |
| delete[] search_string_many_matches; |
| delete[] test_string_y; |
| delete[] test_string_e; |
| delete[] test_string_unaligned_e; |
| delete[] test_string_all_Qs; |
| } |
| |
| // Each block in the sample text and search string is kBlockSize bytes long, |
| // and consists of (kBlockSize - 1) space characters |
| // followed by a single letter of text. |
| |
| // Block numbers of certain characters within the sample text: |
| // All six occurrences of "e", in order. |
| static const int block_of_first_e = 2; |
| static const int block_of_second_e = 16; |
| static const int block_of_third_e = 21; |
| static const int block_of_fourth_e = 27; |
| static const int block_of_fifth_e = 35; |
| static const int block_of_sixth_e = 42; |
| |
| static const int block_of_y_in_only = 7; |
| // The block number is multiplied by kBlockSize to arrive at the |
| // index, which points to the (kBlockSize - 1) space characters before |
| // the letter specified. |
| // Indices of certain characters within the sample text. |
| static const int index_of_first_e = block_of_first_e * kBlockSize; |
| static const int index_of_fourth_e = block_of_fourth_e * kBlockSize; |
| static const int index_of_sixth_e = block_of_sixth_e * kBlockSize; |
| static const int index_of_y_in_only = block_of_y_in_only * kBlockSize; |
| static const int index_of_space_before_fear_is_fear = 25 * kBlockSize; |
| static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize; |
| static const int index_of_i_in_fear_is_fear = 31 * kBlockSize; |
| static const int index_of_space_before_fear_itself = 33 * kBlockSize; |
| static const int index_of_space_before_itself = 38 * kBlockSize; |
| static const int index_of_ababc = 4 * kBlockSize; |
| |
| // Indices of certain characters within the search strings. |
| static const int index_of_second_w_in_what_we = 5 * kBlockSize; |
| static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize; |
| static const int index_of_f_in_fearsome = 16 * kBlockSize; |
| static const int index_of_space_in_eat_itself = 12 * kBlockSize; |
| static const int index_of_i_in_itself = 13 * kBlockSize; |
| static const int index_of_t_in_use_the = 4 * kBlockSize; |
| static const int index_of_o_in_online = 8 * kBlockSize; |
| |
| static const char sample_text_without_spaces[]; |
| static const char search_string_without_spaces[]; |
| static const char search_string_altered_without_spaces[]; |
| static const char search_to_end_without_spaces[]; |
| static const char search_to_beginning_without_spaces[]; |
| static const char sample_text_many_matches_without_spaces[]; |
| static const char search_string_many_matches_without_spaces[]; |
| |
| static const char* sample_text; |
| static const char* search_string; |
| static const char* search_string_altered; |
| static const char* search_to_end_string; |
| static const char* search_to_beginning_string; |
| static const char* sample_text_many_matches; |
| static const char* search_string_many_matches; |
| |
| static const char* test_string_y; |
| static const char* test_string_e; |
| static const char* test_string_all_Qs; |
| static const char* test_string_unaligned_e; |
| |
| static uint32_t hashed_y; |
| static uint32_t hashed_e; |
| static uint32_t hashed_f; |
| static uint32_t hashed_unaligned_e; |
| static uint32_t hashed_all_Qs; |
| |
| UNIQUE_PTR<const BlockHash> dh_; // hash table is populated at startup |
| UNIQUE_PTR<BlockHash> th_; // hash table not populated; |
| // used to test incremental adds |
| |
| BlockHash::Match best_match_; |
| char* compare_buffer_1_; |
| char* compare_buffer_2_; |
| int prime_result_; |
| }; |
| |
| #ifdef GTEST_HAS_DEATH_TEST |
| typedef BlockHashTest BlockHashDeathTest; |
| #endif // GTEST_HAS_DEATH_TEST |
| |
| // The C++ standard requires a separate definition of these static const values, |
| // even though their initializers are given within the class definition. |
| const int BlockHashTest::block_of_first_e; |
| const int BlockHashTest::block_of_second_e; |
| const int BlockHashTest::block_of_third_e; |
| const int BlockHashTest::block_of_fourth_e; |
| const int BlockHashTest::block_of_fifth_e; |
| const int BlockHashTest::block_of_sixth_e; |
| const int BlockHashTest::block_of_y_in_only; |
| const int BlockHashTest::index_of_first_e; |
| const int BlockHashTest::index_of_fourth_e; |
| const int BlockHashTest::index_of_sixth_e; |
| const int BlockHashTest::index_of_y_in_only; |
| const int BlockHashTest::index_of_space_before_fear_is_fear; |
| const int BlockHashTest::index_of_longest_match_ear_is_fear; |
| const int BlockHashTest::index_of_i_in_fear_is_fear; |
| const int BlockHashTest::index_of_space_before_fear_itself; |
| const int BlockHashTest::index_of_space_before_itself; |
| const int BlockHashTest::index_of_ababc; |
| const int BlockHashTest::index_of_second_w_in_what_we; |
| const int BlockHashTest::index_of_second_e_in_what_we_hear; |
| const int BlockHashTest::index_of_f_in_fearsome; |
| const int BlockHashTest::index_of_space_in_eat_itself; |
| const int BlockHashTest::index_of_i_in_itself; |
| const int BlockHashTest::index_of_t_in_use_the; |
| const int BlockHashTest::index_of_o_in_online; |
| |
| const char BlockHashTest::sample_text_without_spaces[] = |
| "The only thing we have to fear is fear itself"; |
| |
| const char BlockHashTest::search_string_without_spaces[] = |
| "What we hear is fearsome"; |
| |
| const char BlockHashTest::search_string_altered_without_spaces[] = |
| "Vhat ve hear is fearsomm"; |
| |
| const char BlockHashTest::search_to_end_without_spaces[] = |
| "Pop will eat itself, eventually"; |
| |
| const char BlockHashTest::search_to_beginning_without_spaces[] = |
| "Use The online dictionary"; |
| |
| const char BlockHashTest::sample_text_many_matches_without_spaces[] = |
| "ababababcab"; |
| |
| const char BlockHashTest::search_string_many_matches_without_spaces[] = |
| "ababc"; |
| |
| const char* BlockHashTest::sample_text = NULL; |
| const char* BlockHashTest::search_string = NULL; |
| const char* BlockHashTest::search_string_altered = NULL; |
| const char* BlockHashTest::search_to_end_string = NULL; |
| const char* BlockHashTest::search_to_beginning_string = NULL; |
| const char* BlockHashTest::sample_text_many_matches = NULL; |
| const char* BlockHashTest::search_string_many_matches = NULL; |
| |
| const char* BlockHashTest::test_string_y = NULL; |
| const char* BlockHashTest::test_string_e = NULL; |
| const char* BlockHashTest::test_string_unaligned_e = NULL; |
| const char* BlockHashTest::test_string_all_Qs = NULL; |
| |
| uint32_t BlockHashTest::hashed_y = 0; |
| uint32_t BlockHashTest::hashed_e = 0; |
| uint32_t BlockHashTest::hashed_f = 0; |
| uint32_t BlockHashTest::hashed_unaligned_e = 0; |
| uint32_t BlockHashTest::hashed_all_Qs = 0; |
| |
| void BlockHashTest::TestAndPrintTimesForCompareFunctions( |
| bool should_be_identical) { |
| CHECK(compare_buffer_1_ != NULL); |
| CHECK(compare_buffer_2_ != NULL); |
| // Prime the memory cache. |
| prime_result_ = |
| memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize); |
| const char* const block1_limit = |
| &compare_buffer_1_[kTimingTestSize - kBlockSize]; |
| int block_compare_words_result = 0; |
| CycleTimer block_compare_words_timer; |
| block_compare_words_timer.Start(); |
| for (int i = 0; i < kTimingTestIterations; ++i) { |
| const char* block1 = compare_buffer_1_; |
| const char* block2 = compare_buffer_2_; |
| while (block1 < block1_limit) { |
| if (!BlockHash::BlockCompareWords(block1, block2)) { |
| ++block_compare_words_result; |
| } |
| block1 += kBlockSize; |
| block2 += kBlockSize; |
| } |
| } |
| block_compare_words_timer.Stop(); |
| double time_for_block_compare_words = |
| static_cast<double>(block_compare_words_timer.GetInUsec()) |
| / ((kTimingTestSize / kBlockSize) * kTimingTestIterations); |
| int block_contents_match_result = 0; |
| CycleTimer block_contents_match_timer; |
| block_contents_match_timer.Start(); |
| for (int i = 0; i < kTimingTestIterations; ++i) { |
| const char* block1 = compare_buffer_1_; |
| const char* block2 = compare_buffer_2_; |
| while (block1 < block1_limit) { |
| if (!BlockHash::BlockContentsMatch(block1, block2)) { |
| ++block_contents_match_result; |
| } |
| block1 += kBlockSize; |
| block2 += kBlockSize; |
| } |
| } |
| block_contents_match_timer.Stop(); |
| double time_for_block_contents_match = |
| static_cast<double>(block_contents_match_timer.GetInUsec()) |
| / ((kTimingTestSize / kBlockSize) * kTimingTestIterations); |
| EXPECT_EQ(block_contents_match_result, block_compare_words_result); |
| if (should_be_identical) { |
| CHECK_EQ(0, block_compare_words_result); |
| } else { |
| CHECK_GT(block_compare_words_result, 0); |
| } |
| std::cout << "BlockHash::BlockCompareWords: " |
| << time_for_block_compare_words << " us per operation" << std::endl; |
| std::cout << "BlockHash::BlockContentsMatch: " |
| << time_for_block_contents_match << " us per operation" |
| << std::endl; |
| if (time_for_block_compare_words > 0) { |
| double percent_change = |
| (((time_for_block_contents_match - time_for_block_compare_words) |
| / time_for_block_compare_words) * 100.0); |
| if (percent_change >= 0.0) { |
| std::cout << "BlockContentsMatch is " << percent_change << "%" |
| << " SLOWER than BlockCompareWords" << std::endl; |
| } else { |
| std::cout << "BlockContentsMatch is " << (-percent_change) << "%" |
| << " FASTER than BlockCompareWords" << std::endl; |
| } |
| } |
| #if defined(NDEBUG) && !defined(VCDIFF_USE_BLOCK_COMPARE_WORDS) |
| // Only check timings for optimized build. There's plenty of margin: this |
| // check will fail only if BlockContentsMatch is at least twice as slow as |
| // BlockCompareWords. |
| EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match); |
| #endif // NDEBUG && !VCDIFF_USE_BLOCK_COMPARE_WORDS |
| } |
| |
| // The two strings passed to BlockHash::MatchingBytesToLeft do have matching |
| // characters -- in fact, they're the same string -- but since max_bytes is zero |
| // or negative, BlockHash::MatchingBytesToLeft should not read from the strings |
| // and should return 0. |
| TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) { |
| EXPECT_EQ(0, MatchingBytesToLeft( |
| &search_string[index_of_f_in_fearsome], |
| &search_string[index_of_f_in_fearsome], |
| 0)); |
| EXPECT_EQ(0, MatchingBytesToRight( |
| &search_string[index_of_f_in_fearsome], |
| &search_string[index_of_f_in_fearsome], |
| 0)); |
| } |
| |
| TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) { |
| EXPECT_EQ(0, MatchingBytesToLeft( |
| &search_string[index_of_f_in_fearsome], |
| &search_string[index_of_f_in_fearsome], |
| -1)); |
| EXPECT_EQ(0, MatchingBytesToLeft( |
| &search_string[index_of_f_in_fearsome], |
| &search_string[index_of_f_in_fearsome], |
| INT_MIN)); |
| EXPECT_EQ(0, MatchingBytesToRight( |
| &search_string[index_of_f_in_fearsome], |
| &search_string[index_of_f_in_fearsome], |
| -1)); |
| EXPECT_EQ(0, MatchingBytesToRight( |
| &search_string[index_of_f_in_fearsome], |
| &search_string[index_of_f_in_fearsome], |
| INT_MIN)); |
| } |
| |
| TEST_F(BlockHashTest, MaxBytesOneMatch) { |
| EXPECT_EQ(1, MatchingBytesToLeft( |
| &search_string[index_of_f_in_fearsome], |
| &search_string[index_of_f_in_fearsome], |
| 1)); |
| EXPECT_EQ(1, MatchingBytesToRight( |
| &search_string[index_of_f_in_fearsome], |
| &search_string[index_of_f_in_fearsome], |
| 1)); |
| } |
| |
| TEST_F(BlockHashTest, MaxBytesOneNoMatch) { |
| EXPECT_EQ(0, MatchingBytesToLeft( |
| &search_string[index_of_f_in_fearsome], |
| &search_string[index_of_second_e_in_what_we_hear], |
| 1)); |
| EXPECT_EQ(0, MatchingBytesToRight( |
| &search_string[index_of_f_in_fearsome], |
| &search_string[index_of_second_e_in_what_we_hear - 1], |
| 1)); |
| } |
| |
| TEST_F(BlockHashTest, LeftLimitedByMaxBytes) { |
| // The number of bytes that match between the original "we hear is fearsome" |
| // and the altered "ve hear is fearsome". |
| const int expected_length = kBlockSize * StringLengthAsInt("e hear is "); |
| const int max_bytes = expected_length - 1; |
| EXPECT_EQ(max_bytes, MatchingBytesToLeft( |
| &search_string[index_of_f_in_fearsome], |
| &search_string_altered[index_of_f_in_fearsome], |
| max_bytes)); |
| } |
| |
| TEST_F(BlockHashTest, LeftNotLimited) { |
| // The number of bytes that match between the original "we hear is fearsome" |
| // and the altered "ve hear is fearsome". |
| const int expected_length = kBlockSize * StringLengthAsInt("e hear is "); |
| const int max_bytes = expected_length + 1; |
| EXPECT_EQ(expected_length, MatchingBytesToLeft( |
| &search_string[index_of_f_in_fearsome], |
| &search_string_altered[index_of_f_in_fearsome], |
| max_bytes)); |
| EXPECT_EQ(expected_length, MatchingBytesToLeft( |
| &search_string[index_of_f_in_fearsome], |
| &search_string_altered[index_of_f_in_fearsome], |
| INT_MAX)); |
| } |
| |
| TEST_F(BlockHashTest, RightLimitedByMaxBytes) { |
| // The number of bytes that match between the original "fearsome" |
| // and the altered "fearsomm". |
| const int expected_length = (kBlockSize * StringLengthAsInt("fearsom")) |
| + (kBlockSize - 1); // spacing between letters |
| const int max_bytes = expected_length - 1; |
| EXPECT_EQ(max_bytes, MatchingBytesToRight( |
| &search_string[index_of_f_in_fearsome], |
| &search_string_altered[index_of_f_in_fearsome], |
| max_bytes)); |
| } |
| |
| TEST_F(BlockHashTest, RightNotLimited) { |
| // The number of bytes that match between the original "we hear is fearsome" |
| // and the altered "ve hear is fearsome". |
| const int expected_length = (kBlockSize * StringLengthAsInt("fearsom")) |
| + (kBlockSize - 1); // spacing between letters |
| const int max_bytes = expected_length + 1; |
| EXPECT_EQ(expected_length, MatchingBytesToRight( |
| &search_string[index_of_f_in_fearsome], |
| &search_string_altered[index_of_f_in_fearsome], |
| max_bytes)); |
| EXPECT_EQ(expected_length, MatchingBytesToRight( |
| &search_string[index_of_f_in_fearsome], |
| &search_string_altered[index_of_f_in_fearsome], |
| INT_MAX)); |
| } |
| |
| // If this test fails in a non-x86 or non-gcc environment, consider adding |
| // -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and |
| // Makefile.in, and reconstructing the Makefile. That will cause blockhash.cc |
| // to use a special implementation (BlockCompareWords) to compare blocks |
| // rather than using standard memcmp. |
| TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) { |
| compare_buffer_1_ = new char[kTimingTestSize]; |
| compare_buffer_2_ = new char[kTimingTestSize]; |
| |
| // The value 0xBE is arbitrarily chosen. First test with identical contents |
| // in the buffers, so that the comparison functions cannot short-circuit |
| // and will return true. |
| memset(compare_buffer_1_, 0xBE, kTimingTestSize); |
| memset(compare_buffer_2_, 0xBE, kTimingTestSize); |
| std::cout << "Comparing " |
| << (kTimingTestSize / kBlockSize) << " identical values:" |
| << std::endl; |
| TestAndPrintTimesForCompareFunctions(true); |
| |
| // Now change one value in the middle of one buffer, so that the contents |
| // are no longer the same. |
| compare_buffer_1_[kTimingTestSize / 2] = 0x00; |
| std::cout << "Comparing " |
| << ((kTimingTestSize / kBlockSize) - 1) << " identical values" |
| << " and one mismatch:" << std::endl; |
| TestAndPrintTimesForCompareFunctions(false); |
| |
| // Set one of the bytes of each block to differ so that |
| // none of the compare operations will return true, and run timing tests. |
| // In practice, BlockHash::BlockContentsMatch will only be called |
| // for two blocks whose hash values match, and the two most important |
| // cases are: (1) the blocks are identical, or (2) none of their bytes match. |
| TimingTestForBlocksThatDifferAtByte(0); |
| TimingTestForBlocksThatDifferAtByte(1); |
| TimingTestForBlocksThatDifferAtByte(kBlockSize / 2); |
| TimingTestForBlocksThatDifferAtByte(kBlockSize - 1); |
| |
| delete[] compare_buffer_1_; |
| delete[] compare_buffer_2_; |
| } |
| |
| TEST_F(BlockHashTest, FindFailsBeforeHashing) { |
| EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y)); |
| } |
| |
| TEST_F(BlockHashTest, HashOneFindOne) { |
| for (int i = 0; i <= index_of_y_in_only; ++i) { |
| th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i])); |
| } |
| EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y, |
| test_string_y)); |
| EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y)); |
| } |
| |
| TEST_F(BlockHashTest, HashAllFindOne) { |
| EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y, |
| test_string_y)); |
| EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y)); |
| } |
| |
| TEST_F(BlockHashTest, NonMatchingTextNotFound) { |
| EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs)); |
| } |
| |
| // Search for unaligned text. The test string is contained in the |
| // sample text (unlike the non-matching string in NonMatchingTextNotFound, |
| // above), but it is not aligned on a block boundary. FindMatchingBlock |
| // will only work if the test string is aligned on a block boundary. |
| // |
| // " T h e o n l y" |
| // ^^^^ Here is the test string |
| // |
| TEST_F(BlockHashTest, UnalignedTextNotFound) { |
| EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e, |
| test_string_unaligned_e)); |
| } |
| |
| TEST_F(BlockHashTest, FindSixMatches) { |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e, |
| test_string_e)); |
| EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e)); |
| |
| // Starting over gives same result |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e, |
| test_string_e)); |
| } |
| |
| TEST_F(BlockHashTest, AddRangeFindThreeMatches) { |
| // Add hash values only for those characters before the fourth instance |
| // of "e" in the sample text. Tests that the ending index |
| // of AddAllBlocksThroughIndex() is not inclusive: only three matches |
| // for "e" should be found. |
| th_->AddAllBlocksThroughIndex(index_of_fourth_e); |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, |
| test_string_e)); |
| EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); |
| |
| // Starting over gives same result |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| } |
| |
| // Try indices that are not even multiples of the block size. |
| // Add three ranges and verify the results after each |
| // call to AddAllBlocksThroughIndex(). |
| TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) { |
| th_->AddAllBlocksThroughIndex(index_of_first_e + 1); |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e)); |
| |
| // Starting over gives same result |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| |
| // Add the second range to expand the result set |
| th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3); |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, |
| test_string_e)); |
| EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); |
| |
| // Starting over gives same result |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| |
| // Add the third range to expand the result set |
| th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1); |
| |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, |
| test_string_e)); |
| EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); |
| |
| // Starting over gives same result |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| } |
| |
| #ifdef GTEST_HAS_DEATH_TEST |
| TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) { |
| th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1); |
| |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, |
| test_string_e)); |
| EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); |
| |
| // Starting over gives same result |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| |
| // These calls will produce DFATAL error messages, and will do nothing, |
| // since the ranges have already been added. |
| EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3), |
| "<"); |
| EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1), |
| "<"); |
| |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, |
| test_string_e)); |
| EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); |
| |
| // Starting over gives same result |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| } |
| #endif // GTEST_HAS_DEATH_TEST |
| |
| TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) { |
| th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text)); |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e, |
| test_string_e)); |
| EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e, |
| test_string_e)); |
| EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e)); |
| |
| // Starting over gives same result |
| EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| test_string_e)); |
| } |
| |
| TEST_F(BlockHashTest, ZeroSizeSourceAccepted) { |
| BlockHash zero_sized_hash(sample_text, 0, 0); |
| EXPECT_EQ(true, zero_sized_hash.Init(true)); |
| EXPECT_EQ(-1, FirstMatchingBlock(zero_sized_hash, hashed_y, test_string_y)); |
| } |
| |
| TEST_F(BlockHashTest, NullSource) { |
| BlockHash null_source_hash(NULL, 0, 0); |
| EXPECT_EQ(true, null_source_hash.Init(true)); |
| EXPECT_EQ(-1, FirstMatchingBlock(null_source_hash, hashed_y, test_string_y)); |
| } |
| |
| #ifdef GTEST_HAS_DEATH_TEST |
| TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) { |
| EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, " ")), |
| "invalid"); |
| } |
| |
| TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) { |
| BlockHash bh(sample_text, strlen(sample_text), 0); |
| EXPECT_TRUE(bh.Init(false)); |
| EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice"); |
| } |
| |
| TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) { |
| BlockHash bh(sample_text, strlen(sample_text), 0); |
| EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e), |
| "called before"); |
| } |
| |
| TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) { |
| EXPECT_DEBUG_DEATH( |
| th_->AddAllBlocksThroughIndex(static_cast<int>(strlen(sample_text) + 1)), |
| "higher than end"); |
| } |
| #endif // GTEST_HAS_DEATH_TEST |
| |
| TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) { |
| EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA")); |
| } |
| |
| TEST_F(BlockHashTest, FindBestMatch) { |
| dh_->FindBestMatch(hashed_f, |
| &search_string[index_of_f_in_fearsome], |
| search_string, |
| strlen(search_string), |
| &best_match_); |
| EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset()); |
| EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset()); |
| // The match includes the spaces after the final character, |
| // which is why (kBlockSize - 1) is added to the expected best size. |
| EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1), |
| best_match_.size()); |
| } |
| |
| TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) { |
| BlockHash th2(sample_text, strlen(sample_text), 0x10000); |
| th2.Init(true); // hash all blocks |
| th2.FindBestMatch(hashed_f, |
| &search_string[index_of_f_in_fearsome], |
| search_string, |
| strlen(search_string), |
| &best_match_); |
| // Offset should begin with dictionary_size |
| EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear), |
| best_match_.source_offset()); |
| EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset()); |
| // The match includes the spaces after the final character, |
| // which is why (kBlockSize - 1) is added to the expected best size. |
| EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1), |
| best_match_.size()); |
| } |
| |
| TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) { |
| // Hash the "i" in "fear itself" |
| uint32_t hash_value = RollingHash<kBlockSize>::Hash( |
| &search_to_end_string[index_of_i_in_itself]); |
| dh_->FindBestMatch(hash_value, |
| &search_to_end_string[index_of_i_in_itself], |
| search_to_end_string, |
| strlen(search_to_end_string), |
| &best_match_); |
| EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset()); |
| EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset()); |
| EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size()); |
| } |
| |
| TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) { |
| // Hash the "i" in "fear itself" |
| uint32_t hash_value = RollingHash<kBlockSize>::Hash( |
| &search_to_beginning_string[index_of_o_in_online]); |
| dh_->FindBestMatch(hash_value, |
| &search_to_beginning_string[index_of_o_in_online], |
| search_to_beginning_string, |
| strlen(search_to_beginning_string), |
| &best_match_); |
| EXPECT_EQ(0, best_match_.source_offset()); // beginning of dictionary |
| EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset()); |
| // The match includes the spaces after the final character, |
| // which is why (kBlockSize - 1) is added to the expected best size. |
| EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1), |
| best_match_.size()); |
| } |
| |
| TEST_F(BlockHashTest, BestMatchWithManyMatches) { |
| BlockHash many_matches_hash(sample_text_many_matches, |
| strlen(sample_text_many_matches), |
| 0); |
| EXPECT_TRUE(many_matches_hash.Init(true)); |
| // Hash the " a" at the beginning of the search string "ababc" |
| uint32_t hash_value = |
| RollingHash<kBlockSize>::Hash(search_string_many_matches); |
| many_matches_hash.FindBestMatch(hash_value, |
| search_string_many_matches, |
| search_string_many_matches, |
| strlen(search_string_many_matches), |
| &best_match_); |
| EXPECT_EQ(index_of_ababc, best_match_.source_offset()); |
| EXPECT_EQ(0, best_match_.target_offset()); |
| EXPECT_EQ(strlen(search_string_many_matches), best_match_.size()); |
| } |
| |
| TEST_F(BlockHashTest, HashCollisionFindsNoMatch) { |
| char* collision_search_string = new char[strlen(search_string) + 1]; |
| memcpy(collision_search_string, search_string, strlen(search_string) + 1); |
| char* fearsome_location = &collision_search_string[index_of_f_in_fearsome]; |
| |
| // Tweak the collision string so that it has the same hash value |
| // but different text. The last four characters of the search string |
| // should be " f", and the bytes given below have the same hash value |
| // as those characters. |
| CHECK_GE(kBlockSize, 4); |
| fearsome_location[kBlockSize - 4] = 0x84; |
| fearsome_location[kBlockSize - 3] = 0xF1; |
| fearsome_location[kBlockSize - 2] = 0x51; |
| fearsome_location[kBlockSize - 1] = 0x00; |
| EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location)); |
| EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome], |
| fearsome_location, |
| kBlockSize)); |
| // No match should be found this time. |
| dh_->FindBestMatch(hashed_f, |
| fearsome_location, |
| collision_search_string, |
| strlen(search_string), // since collision_search_string has embedded \0 |
| &best_match_); |
| EXPECT_EQ(-1, best_match_.source_offset()); |
| EXPECT_EQ(-1, best_match_.target_offset()); |
| EXPECT_EQ(0U, best_match_.size()); |
| delete[] collision_search_string; |
| } |
| |
| // If the footprint passed to FindBestMatch does not actually match |
| // the search string, it should not find any matches. |
| TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) { |
| dh_->FindBestMatch(hashed_e, // Using hashed value of "e" instead of "f"! |
| &search_string[index_of_f_in_fearsome], |
| search_string, |
| strlen(search_string), |
| &best_match_); |
| EXPECT_EQ(-1, best_match_.source_offset()); |
| EXPECT_EQ(-1, best_match_.target_offset()); |
| EXPECT_EQ(0U, best_match_.size()); |
| } |
| |
| // Use a dictionary containing 1M copies of the letter 'Q', |
| // and target data that also contains 1M Qs. If FindBestMatch |
| // is not throttled to find a maximum number of matches, this |
| // will take a very long time -- several seconds at least. |
| // If this test appears to hang, it is because the throttling code |
| // (see BlockHash::kMaxMatchesToCheck for details) is not working. |
| TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) { |
| const int kTestSize = 1 << 20; // 1M |
| char* huge_dictionary = new char[kTestSize]; |
| memset(huge_dictionary, 'Q', kTestSize); |
| BlockHash huge_bh(huge_dictionary, kTestSize, 0); |
| EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true)); |
| char* huge_target = new char[kTestSize]; |
| memset(huge_target, 'Q', kTestSize); |
| CycleTimer timer; |
| timer.Start(); |
| huge_bh.FindBestMatch(hashed_all_Qs, |
| huge_target + (kTestSize / 2), // middle of target |
| huge_target, |
| kTestSize, |
| &best_match_); |
| timer.Stop(); |
| double elapsed_time_in_us = static_cast<double>(timer.GetInUsec()); |
| std::cout << "Time to search for best match with 1M matches: " |
| << elapsed_time_in_us << " us" << std::endl; |
| // All blocks match the candidate block. FindBestMatch should have checked |
| // a certain number of matches before giving up. The best match |
| // should include at least half the source and target, since the candidate |
| // block was in the middle of the target data. |
| EXPECT_GT((kTestSize / 2), best_match_.source_offset()); |
| EXPECT_GT((kTestSize / 2), best_match_.target_offset()); |
| EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size()); |
| EXPECT_GT(5000000, elapsed_time_in_us); // < 5 seconds |
| #ifdef NDEBUG |
| EXPECT_GT(1000000, elapsed_time_in_us); // < 1 second |
| #endif // NDEBUG |
| delete[] huge_target; |
| delete[] huge_dictionary; |
| } |
| |
| #ifdef GTEST_HAS_DEATH_TEST |
| TEST_F(BlockHashDeathTest, AddTooManyBlocks) { |
| for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) { |
| th_->AddOneIndexHash(i * kBlockSize, hashed_e); |
| } |
| // Didn't expect another block to be added |
| EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text), |
| hashed_e), |
| "AddBlock"); |
| } |
| #endif // GTEST_HAS_DEATH_TEST |
| |
| } // namespace open_vcdiff |