base/i18n/streaming_utf8_validator_unittest.cc - chromium/src - Git at Google

 // Copyright 2014 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "base/i18n/streaming_utf8_validator.h"

 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>

 #include <string>

 #include "base/macros.h"
 #include "base/strings/string_piece.h"
 #include "testing/gtest/include/gtest/gtest.h"

 // Define BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST to verify that this class
 // accepts exactly the same set of 4-byte strings as ICU-based validation. This
 // tests every possible 4-byte string, so it is too slow to run routinely on
 // low-powered machines.
 //
 // #define BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

 #ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

 #include "base/bind.h"
 #include "base/location.h"
 #include "base/logging.h"
 #include "base/memory/ref_counted.h"
 #include "base/strings/string_util.h"
 #include "base/strings/stringprintf.h"
 #include "base/strings/utf_string_conversion_utils.h"
 #include "base/synchronization/lock.h"
 #include "base/task_scheduler/post_task.h"
 #include "base/task_scheduler/task_scheduler.h"
 #include "third_party/icu/source/common/unicode/utf8.h"

 #endif  // BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

 namespace base {
 namespace {

 // Avoid having to qualify the enum values in the tests.
 const StreamingUtf8Validator::State VALID_ENDPOINT =
     StreamingUtf8Validator::VALID_ENDPOINT;
 const StreamingUtf8Validator::State VALID_MIDPOINT =
     StreamingUtf8Validator::VALID_MIDPOINT;
 const StreamingUtf8Validator::State INVALID = StreamingUtf8Validator::INVALID;

 #ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

 const uint32_t kThoroughTestChunkSize = 1 << 24;

 class StreamingUtf8ValidatorThoroughTest : public ::testing::Test {
  protected:
   StreamingUtf8ValidatorThoroughTest()
       : tasks_dispatched_(0), tasks_finished_(0) {}

   // This uses the same logic as base::IsStringUTF8 except it considers
   // non-characters valid (and doesn't require a string as input).
   static bool IsStringUtf8(const char* src, int32_t src_len) {
     int32_t char_index = 0;

     while (char_index < src_len) {
       int32_t code_point;
       U8_NEXT(src, char_index, src_len, code_point);
       if (!base::IsValidCodepoint(code_point))
         return false;
     }
     return true;
   }

   // Converts the passed-in integer to a 4 byte string and then
   // verifies that IsStringUtf8 and StreamingUtf8Validator agree on
   // whether it is valid UTF-8 or not.
   void TestNumber(uint32_t n) const {
     char test[sizeof n];
     memcpy(test, &n, sizeof n);
     StreamingUtf8Validator validator;
     EXPECT_EQ(IsStringUtf8(test, sizeof n),
               validator.AddBytes(test, sizeof n) == VALID_ENDPOINT)
         << "Difference of opinion for \""
         << base::StringPrintf("\\x%02X\\x%02X\\x%02X\\x%02X",
                               test[0] & 0xFF,
                               test[1] & 0xFF,
                               test[2] & 0xFF,
                               test[3] & 0xFF) << "\"";
   }

  public:
   // Tests the 4-byte sequences corresponding to the |size| integers
   // starting at |begin|. This is intended to be run from a worker
   // pool. Signals |all_done_| at the end if it thinks all tasks are
   // finished.
   void TestRange(uint32_t begin, uint32_t size) {
     for (uint32_t i = 0; i < size; ++i) {
       TestNumber(begin + i);
     }
     base::AutoLock al(lock_);
     ++tasks_finished_;
     LOG(INFO) << tasks_finished_ << " / " << tasks_dispatched_
               << " tasks done\n";
   }

  protected:
   base::Lock lock_;
   int tasks_dispatched_;
   int tasks_finished_;
 };

 TEST_F(StreamingUtf8ValidatorThoroughTest, TestEverything) {
   base::TaskScheduler::CreateAndStartWithDefaultParams(
       "StreamingUtf8ValidatorThoroughTest");
   {
     base::AutoLock al(lock_);
     uint32_t begin = 0;
     do {
       base::PostTaskWithTraits(
           FROM_HERE, {base::TaskShutdownBehavior::BLOCK_SHUTDOWN},
           base::BindOnce(&StreamingUtf8ValidatorThoroughTest::TestRange,
                          base::Unretained(this), begin,
                          kThoroughTestChunkSize));
       ++tasks_dispatched_;
       begin += kThoroughTestChunkSize;
     } while (begin != 0);
   }
   base::TaskScheduler::GetInstance()->Shutdown();
   base::TaskScheduler::GetInstance()->JoinForTesting();
   base::TaskScheduler::SetInstance(nullptr);
 }

 #endif  // BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

 // These valid and invalid UTF-8 sequences are based on the tests from
 // base/strings/string_util_unittest.cc

 // All of the strings in |valid| must represent a single codepoint, because
 // partial sequences are constructed by taking non-empty prefixes of these
 // strings.
 const char* const valid[] = {"\r",           "\n",           "a",
                              "\xc2\x81",     "\xe1\x80\xbf", "\xf1\x80\xa0\xbf",
                              "\xef\xbb\xbf",  // UTF-8 BOM
 };

 const char* const* const valid_end = valid + arraysize(valid);

 const char* const invalid[] = {
     // always invalid bytes
     "\xc0", "\xc1",
     "\xf5", "\xf6", "\xf7",
     "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff",
     // surrogate code points
     "\xed\xa0\x80", "\xed\x0a\x8f", "\xed\xbf\xbf",
     //
     // overlong sequences
     "\xc0\x80",              // U+0000
     "\xc1\x80",              // "A"
     "\xc1\x81",              // "B"
     "\xe0\x80\x80",          // U+0000
     "\xe0\x82\x80",          // U+0080
     "\xe0\x9f\xbf",          // U+07ff
     "\xf0\x80\x80\x8D",      // U+000D
     "\xf0\x80\x82\x91",      // U+0091
     "\xf0\x80\xa0\x80",      // U+0800
     "\xf0\x8f\xbb\xbf",      // U+FEFF (BOM)
     "\xf8\x80\x80\x80\xbf",  // U+003F
     "\xfc\x80\x80\x80\xa0\xa5",
     //
     // Beyond U+10FFFF
     "\xf4\x90\x80\x80",          // U+110000
     "\xf8\xa0\xbf\x80\xbf",      // 5 bytes
     "\xfc\x9c\xbf\x80\xbf\x80",  // 6 bytes
     //
     // BOMs in UTF-16(BE|LE)
     "\xfe\xff", "\xff\xfe",
 };

 const char* const* const invalid_end = invalid + arraysize(invalid);

 // A ForwardIterator which returns all the non-empty prefixes of the elements of
 // "valid".
 class PartialIterator {
  public:
   // The constructor returns the first iterator, ie. it is equivalent to
   // begin().
   PartialIterator() : index_(0), prefix_length_(0) { Advance(); }
   // The trivial destructor left intentionally undefined.
   // This is a value type; the default copy constructor and assignment operator
   // generated by the compiler are used.

   static PartialIterator end() { return PartialIterator(arraysize(valid), 1); }

   PartialIterator& operator++() {
     Advance();
     return *this;
   }

   base::StringPiece operator*() const {
     return base::StringPiece(valid[index_], prefix_length_);
   }

   bool operator==(const PartialIterator& rhs) const {
     return index_ == rhs.index_ && prefix_length_ == rhs.prefix_length_;
   }

   bool operator!=(const PartialIterator& rhs) const { return !(rhs == *this); }

  private:
   // This constructor is used by the end() method.
   PartialIterator(size_t index, size_t prefix_length)
       : index_(index), prefix_length_(prefix_length) {}

   void Advance() {
     if (index_ < arraysize(valid) && prefix_length_ < strlen(valid[index_]))
       ++prefix_length_;
     while (index_ < arraysize(valid) &&
            prefix_length_ == strlen(valid[index_])) {
       ++index_;
       prefix_length_ = 1;
     }
   }

   // The UTF-8 sequence, as an offset into the |valid| array.
   size_t index_;
   size_t prefix_length_;
 };

 // A test fixture for tests which test one UTF-8 sequence (or invalid
 // byte sequence) at a time.
 class StreamingUtf8ValidatorSingleSequenceTest : public ::testing::Test {
  protected:
   // Iterator must be convertible when de-referenced to StringPiece.
   template <typename Iterator>
   void CheckRange(Iterator begin,
                   Iterator end,
                   StreamingUtf8Validator::State expected) {
     for (Iterator it = begin; it != end; ++it) {
       StreamingUtf8Validator validator;
       base::StringPiece sequence = *it;
       EXPECT_EQ(expected,
                 validator.AddBytes(sequence.data(), sequence.size()))
           << "Failed for \"" << sequence << "\"";
     }
   }

   // Adding input a byte at a time should make absolutely no difference.
   template <typename Iterator>
   void CheckRangeByteAtATime(Iterator begin,
                              Iterator end,
                              StreamingUtf8Validator::State expected) {
     for (Iterator it = begin; it != end; ++it) {
       StreamingUtf8Validator validator;
       base::StringPiece sequence = *it;
       StreamingUtf8Validator::State state = VALID_ENDPOINT;
       for (base::StringPiece::const_iterator cit = sequence.begin();
            cit != sequence.end();
            ++cit) {
         state = validator.AddBytes(&*cit, 1);
       }
       EXPECT_EQ(expected, state) << "Failed for \"" << sequence << "\"";
     }
   }
 };

 // A test fixture for tests which test the concatenation of byte sequences.
 class StreamingUtf8ValidatorDoubleSequenceTest : public ::testing::Test {
  protected:
   // Check every possible concatenation of byte sequences from two
   // ranges, and verify that the combination matches the expected
   // state.
   template <typename Iterator1, typename Iterator2>
   void CheckCombinations(Iterator1 begin1,
                          Iterator1 end1,
                          Iterator2 begin2,
                          Iterator2 end2,
                          StreamingUtf8Validator::State expected) {
     StreamingUtf8Validator validator;
     for (Iterator1 it1 = begin1; it1 != end1; ++it1) {
       base::StringPiece c1 = *it1;
       for (Iterator2 it2 = begin2; it2 != end2; ++it2) {
         base::StringPiece c2 = *it2;
         validator.AddBytes(c1.data(), c1.size());
         EXPECT_EQ(expected, validator.AddBytes(c2.data(), c2.size()))
             << "Failed for \"" << c1 << c2 << "\"";
         validator.Reset();
       }
     }
   }
 };

 TEST(StreamingUtf8ValidatorTest, NothingIsValid) {
   static const char kNothing[] = "";
   EXPECT_EQ(VALID_ENDPOINT, StreamingUtf8Validator().AddBytes(kNothing, 0));
 }

 // Because the members of the |valid| array need to be non-zero length
 // sequences and are measured with strlen(), |valid| cannot be used it
 // to test the NUL character '\0', so the NUL character gets its own
 // test.
 TEST(StreamingUtf8ValidatorTest, NulIsValid) {
   static const char kNul[] = "\x00";
   EXPECT_EQ(VALID_ENDPOINT, StreamingUtf8Validator().AddBytes(kNul, 1));
 }

 // Just a basic sanity test before we start getting fancy.
 TEST(StreamingUtf8ValidatorTest, HelloWorld) {
   static const char kHelloWorld[] = "Hello, World!";
   EXPECT_EQ(
       VALID_ENDPOINT,
       StreamingUtf8Validator().AddBytes(kHelloWorld, strlen(kHelloWorld)));
 }

 // Check that the Reset() method works.
 TEST(StreamingUtf8ValidatorTest, ResetWorks) {
   StreamingUtf8Validator validator;
   EXPECT_EQ(INVALID, validator.AddBytes("\xC0", 1));
   EXPECT_EQ(INVALID, validator.AddBytes("a", 1));
   validator.Reset();
   EXPECT_EQ(VALID_ENDPOINT, validator.AddBytes("a", 1));
 }

 TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Valid) {
   CheckRange(valid, valid_end, VALID_ENDPOINT);
 }

 TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Partial) {
   CheckRange(PartialIterator(), PartialIterator::end(), VALID_MIDPOINT);
 }

 TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Invalid) {
   CheckRange(invalid, invalid_end, INVALID);
 }

 TEST_F(StreamingUtf8ValidatorSingleSequenceTest, ValidByByte) {
   CheckRangeByteAtATime(valid, valid_end, VALID_ENDPOINT);
 }

 TEST_F(StreamingUtf8ValidatorSingleSequenceTest, PartialByByte) {
   CheckRangeByteAtATime(
       PartialIterator(), PartialIterator::end(), VALID_MIDPOINT);
 }

 TEST_F(StreamingUtf8ValidatorSingleSequenceTest, InvalidByByte) {
   CheckRangeByteAtATime(invalid, invalid_end, INVALID);
 }

 TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusValidIsValid) {
   CheckCombinations(valid, valid_end, valid, valid_end, VALID_ENDPOINT);
 }

 TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusPartialIsPartial) {
   CheckCombinations(valid,
                     valid_end,
                     PartialIterator(),
                     PartialIterator::end(),
                     VALID_MIDPOINT);
 }

 TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusValidIsInvalid) {
   CheckCombinations(
       PartialIterator(), PartialIterator::end(), valid, valid_end, INVALID);
 }

 TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusPartialIsInvalid) {
   CheckCombinations(PartialIterator(),
                     PartialIterator::end(),
                     PartialIterator(),
                     PartialIterator::end(),
                     INVALID);
 }

 TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusInvalidIsInvalid) {
   CheckCombinations(valid, valid_end, invalid, invalid_end, INVALID);
 }

 TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusValidIsInvalid) {
   CheckCombinations(invalid, invalid_end, valid, valid_end, INVALID);
 }

 TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusInvalidIsInvalid) {
   CheckCombinations(invalid, invalid_end, invalid, invalid_end, INVALID);
 }

 TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusPartialIsInvalid) {
   CheckCombinations(
       invalid, invalid_end, PartialIterator(), PartialIterator::end(), INVALID);
 }

 TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusInvalidIsInvalid) {
   CheckCombinations(
       PartialIterator(), PartialIterator::end(), invalid, invalid_end, INVALID);
 }

 TEST(StreamingUtf8ValidatorValidateTest, EmptyIsValid) {
   EXPECT_TRUE(StreamingUtf8Validator::Validate(std::string()));
 }

 TEST(StreamingUtf8ValidatorValidateTest, SimpleValidCase) {
   EXPECT_TRUE(StreamingUtf8Validator::Validate("\xc2\x81"));
 }

 TEST(StreamingUtf8ValidatorValidateTest, SimpleInvalidCase) {
   EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc0\x80"));
 }

 TEST(StreamingUtf8ValidatorValidateTest, TruncatedIsInvalid) {
   EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc2"));
 }

 }  // namespace
 }  // namespace base
	// Copyright 2014 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "base/i18n/streaming_utf8_validator.h"

	#include <stddef.h>
	#include <stdint.h>
	#include <stdio.h>
	#include <string.h>

	#include <string>

	#include "base/macros.h"
	#include "base/strings/string_piece.h"
	#include "testing/gtest/include/gtest/gtest.h"

	// Define BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST to verify that this class
	// accepts exactly the same set of 4-byte strings as ICU-based validation. This
	// tests every possible 4-byte string, so it is too slow to run routinely on
	// low-powered machines.
	//
	// #define BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

	#ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

	#include "base/bind.h"
	#include "base/location.h"
	#include "base/logging.h"
	#include "base/memory/ref_counted.h"
	#include "base/strings/string_util.h"
	#include "base/strings/stringprintf.h"
	#include "base/strings/utf_string_conversion_utils.h"
	#include "base/synchronization/lock.h"
	#include "base/task_scheduler/post_task.h"
	#include "base/task_scheduler/task_scheduler.h"
	#include "third_party/icu/source/common/unicode/utf8.h"

	#endif // BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

	namespace base {
	namespace {

	// Avoid having to qualify the enum values in the tests.
	const StreamingUtf8Validator::State VALID_ENDPOINT =
	StreamingUtf8Validator::VALID_ENDPOINT;
	const StreamingUtf8Validator::State VALID_MIDPOINT =
	StreamingUtf8Validator::VALID_MIDPOINT;
	const StreamingUtf8Validator::State INVALID = StreamingUtf8Validator::INVALID;

	#ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

	const uint32_t kThoroughTestChunkSize = 1 << 24;

	class StreamingUtf8ValidatorThoroughTest : public ::testing::Test {
	protected:
	StreamingUtf8ValidatorThoroughTest()
	: tasks_dispatched_(0), tasks_finished_(0) {}

	// This uses the same logic as base::IsStringUTF8 except it considers
	// non-characters valid (and doesn't require a string as input).
	static bool IsStringUtf8(const char* src, int32_t src_len) {
	int32_t char_index = 0;

	while (char_index < src_len) {
	int32_t code_point;
	U8_NEXT(src, char_index, src_len, code_point);
	if (!base::IsValidCodepoint(code_point))
	return false;
	}
	return true;
	}

	// Converts the passed-in integer to a 4 byte string and then
	// verifies that IsStringUtf8 and StreamingUtf8Validator agree on
	// whether it is valid UTF-8 or not.
	void TestNumber(uint32_t n) const {
	char test[sizeof n];
	memcpy(test, &n, sizeof n);
	StreamingUtf8Validator validator;
	EXPECT_EQ(IsStringUtf8(test, sizeof n),
	validator.AddBytes(test, sizeof n) == VALID_ENDPOINT)
	<< "Difference of opinion for \""
	<< base::StringPrintf("\\x%02X\\x%02X\\x%02X\\x%02X",
	test[0] & 0xFF,
	test[1] & 0xFF,
	test[2] & 0xFF,
	test[3] & 0xFF) << "\"";
	}

	public:
	// Tests the 4-byte sequences corresponding to the \|size\| integers
	// starting at \|begin\|. This is intended to be run from a worker
	// pool. Signals \|all_done_\| at the end if it thinks all tasks are
	// finished.
	void TestRange(uint32_t begin, uint32_t size) {
	for (uint32_t i = 0; i < size; ++i) {
	TestNumber(begin + i);
	}
	base::AutoLock al(lock_);
	++tasks_finished_;
	LOG(INFO) << tasks_finished_ << " / " << tasks_dispatched_
	<< " tasks done\n";
	}

	protected:
	base::Lock lock_;
	int tasks_dispatched_;
	int tasks_finished_;
	};

	TEST_F(StreamingUtf8ValidatorThoroughTest, TestEverything) {
	base::TaskScheduler::CreateAndStartWithDefaultParams(
	"StreamingUtf8ValidatorThoroughTest");
	{
	base::AutoLock al(lock_);
	uint32_t begin = 0;
	do {
	base::PostTaskWithTraits(
	FROM_HERE, {base::TaskShutdownBehavior::BLOCK_SHUTDOWN},
	base::BindOnce(&StreamingUtf8ValidatorThoroughTest::TestRange,
	base::Unretained(this), begin,
	kThoroughTestChunkSize));
	++tasks_dispatched_;
	begin += kThoroughTestChunkSize;
	} while (begin != 0);
	}
	base::TaskScheduler::GetInstance()->Shutdown();
	base::TaskScheduler::GetInstance()->JoinForTesting();
	base::TaskScheduler::SetInstance(nullptr);
	}

	#endif // BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

	// These valid and invalid UTF-8 sequences are based on the tests from
	// base/strings/string_util_unittest.cc

	// All of the strings in \|valid\| must represent a single codepoint, because
	// partial sequences are constructed by taking non-empty prefixes of these
	// strings.
	const char* const valid[] = {"\r", "\n", "a",
	"\xc2\x81", "\xe1\x80\xbf", "\xf1\x80\xa0\xbf",
	"\xef\xbb\xbf", // UTF-8 BOM
	};

	const char* const* const valid_end = valid + arraysize(valid);

	const char* const invalid[] = {
	// always invalid bytes
	"\xc0", "\xc1",
	"\xf5", "\xf6", "\xf7",
	"\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff",
	// surrogate code points
	"\xed\xa0\x80", "\xed\x0a\x8f", "\xed\xbf\xbf",
	//
	// overlong sequences
	"\xc0\x80", // U+0000
	"\xc1\x80", // "A"
	"\xc1\x81", // "B"
	"\xe0\x80\x80", // U+0000
	"\xe0\x82\x80", // U+0080
	"\xe0\x9f\xbf", // U+07ff
	"\xf0\x80\x80\x8D", // U+000D
	"\xf0\x80\x82\x91", // U+0091
	"\xf0\x80\xa0\x80", // U+0800
	"\xf0\x8f\xbb\xbf", // U+FEFF (BOM)
	"\xf8\x80\x80\x80\xbf", // U+003F
	"\xfc\x80\x80\x80\xa0\xa5",
	//
	// Beyond U+10FFFF
	"\xf4\x90\x80\x80", // U+110000
	"\xf8\xa0\xbf\x80\xbf", // 5 bytes
	"\xfc\x9c\xbf\x80\xbf\x80", // 6 bytes
	//
	// BOMs in UTF-16(BE\|LE)
	"\xfe\xff", "\xff\xfe",
	};

	const char* const* const invalid_end = invalid + arraysize(invalid);

	// A ForwardIterator which returns all the non-empty prefixes of the elements of
	// "valid".
	class PartialIterator {
	public:
	// The constructor returns the first iterator, ie. it is equivalent to
	// begin().
	PartialIterator() : index_(0), prefix_length_(0) { Advance(); }
	// The trivial destructor left intentionally undefined.
	// This is a value type; the default copy constructor and assignment operator
	// generated by the compiler are used.

	static PartialIterator end() { return PartialIterator(arraysize(valid), 1); }

	PartialIterator& operator++() {
	Advance();
	return *this;
	}

	base::StringPiece operator*() const {
	return base::StringPiece(valid[index_], prefix_length_);
	}

	bool operator==(const PartialIterator& rhs) const {
	return index_ == rhs.index_ && prefix_length_ == rhs.prefix_length_;
	}

	bool operator!=(const PartialIterator& rhs) const { return !(rhs == *this); }

	private:
	// This constructor is used by the end() method.
	PartialIterator(size_t index, size_t prefix_length)
	: index_(index), prefix_length_(prefix_length) {}

	void Advance() {
	if (index_ < arraysize(valid) && prefix_length_ < strlen(valid[index_]))
	++prefix_length_;
	while (index_ < arraysize(valid) &&
	prefix_length_ == strlen(valid[index_])) {
	++index_;
	prefix_length_ = 1;
	}
	}

	// The UTF-8 sequence, as an offset into the \|valid\| array.
	size_t index_;
	size_t prefix_length_;
	};

	// A test fixture for tests which test one UTF-8 sequence (or invalid
	// byte sequence) at a time.
	class StreamingUtf8ValidatorSingleSequenceTest : public ::testing::Test {
	protected:
	// Iterator must be convertible when de-referenced to StringPiece.
	template <typename Iterator>
	void CheckRange(Iterator begin,
	Iterator end,
	StreamingUtf8Validator::State expected) {
	for (Iterator it = begin; it != end; ++it) {
	StreamingUtf8Validator validator;
	base::StringPiece sequence = *it;
	EXPECT_EQ(expected,
	validator.AddBytes(sequence.data(), sequence.size()))
	<< "Failed for \"" << sequence << "\"";
	}
	}

	// Adding input a byte at a time should make absolutely no difference.
	template <typename Iterator>
	void CheckRangeByteAtATime(Iterator begin,
	Iterator end,
	StreamingUtf8Validator::State expected) {
	for (Iterator it = begin; it != end; ++it) {
	StreamingUtf8Validator validator;
	base::StringPiece sequence = *it;
	StreamingUtf8Validator::State state = VALID_ENDPOINT;
	for (base::StringPiece::const_iterator cit = sequence.begin();
	cit != sequence.end();
	++cit) {
	state = validator.AddBytes(&*cit, 1);
	}
	EXPECT_EQ(expected, state) << "Failed for \"" << sequence << "\"";
	}
	}
	};

	// A test fixture for tests which test the concatenation of byte sequences.
	class StreamingUtf8ValidatorDoubleSequenceTest : public ::testing::Test {
	protected:
	// Check every possible concatenation of byte sequences from two
	// ranges, and verify that the combination matches the expected
	// state.
	template <typename Iterator1, typename Iterator2>
	void CheckCombinations(Iterator1 begin1,
	Iterator1 end1,
	Iterator2 begin2,
	Iterator2 end2,
	StreamingUtf8Validator::State expected) {
	StreamingUtf8Validator validator;
	for (Iterator1 it1 = begin1; it1 != end1; ++it1) {
	base::StringPiece c1 = *it1;
	for (Iterator2 it2 = begin2; it2 != end2; ++it2) {
	base::StringPiece c2 = *it2;
	validator.AddBytes(c1.data(), c1.size());
	EXPECT_EQ(expected, validator.AddBytes(c2.data(), c2.size()))
	<< "Failed for \"" << c1 << c2 << "\"";
	validator.Reset();
	}
	}
	}
	};

	TEST(StreamingUtf8ValidatorTest, NothingIsValid) {
	static const char kNothing[] = "";
	EXPECT_EQ(VALID_ENDPOINT, StreamingUtf8Validator().AddBytes(kNothing, 0));
	}

	// Because the members of the \|valid\| array need to be non-zero length
	// sequences and are measured with strlen(), \|valid\| cannot be used it
	// to test the NUL character '\0', so the NUL character gets its own
	// test.
	TEST(StreamingUtf8ValidatorTest, NulIsValid) {
	static const char kNul[] = "\x00";
	EXPECT_EQ(VALID_ENDPOINT, StreamingUtf8Validator().AddBytes(kNul, 1));
	}

	// Just a basic sanity test before we start getting fancy.
	TEST(StreamingUtf8ValidatorTest, HelloWorld) {
	static const char kHelloWorld[] = "Hello, World!";
	EXPECT_EQ(
	VALID_ENDPOINT,
	StreamingUtf8Validator().AddBytes(kHelloWorld, strlen(kHelloWorld)));
	}

	// Check that the Reset() method works.
	TEST(StreamingUtf8ValidatorTest, ResetWorks) {
	StreamingUtf8Validator validator;
	EXPECT_EQ(INVALID, validator.AddBytes("\xC0", 1));
	EXPECT_EQ(INVALID, validator.AddBytes("a", 1));
	validator.Reset();
	EXPECT_EQ(VALID_ENDPOINT, validator.AddBytes("a", 1));
	}

	TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Valid) {
	CheckRange(valid, valid_end, VALID_ENDPOINT);
	}

	TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Partial) {
	CheckRange(PartialIterator(), PartialIterator::end(), VALID_MIDPOINT);
	}

	TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Invalid) {
	CheckRange(invalid, invalid_end, INVALID);
	}

	TEST_F(StreamingUtf8ValidatorSingleSequenceTest, ValidByByte) {
	CheckRangeByteAtATime(valid, valid_end, VALID_ENDPOINT);
	}

	TEST_F(StreamingUtf8ValidatorSingleSequenceTest, PartialByByte) {
	CheckRangeByteAtATime(
	PartialIterator(), PartialIterator::end(), VALID_MIDPOINT);
	}

	TEST_F(StreamingUtf8ValidatorSingleSequenceTest, InvalidByByte) {
	CheckRangeByteAtATime(invalid, invalid_end, INVALID);
	}

	TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusValidIsValid) {
	CheckCombinations(valid, valid_end, valid, valid_end, VALID_ENDPOINT);
	}

	TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusPartialIsPartial) {
	CheckCombinations(valid,
	valid_end,
	PartialIterator(),
	PartialIterator::end(),
	VALID_MIDPOINT);
	}

	TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusValidIsInvalid) {
	CheckCombinations(
	PartialIterator(), PartialIterator::end(), valid, valid_end, INVALID);
	}

	TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusPartialIsInvalid) {
	CheckCombinations(PartialIterator(),
	PartialIterator::end(),
	PartialIterator(),
	PartialIterator::end(),
	INVALID);
	}

	TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusInvalidIsInvalid) {
	CheckCombinations(valid, valid_end, invalid, invalid_end, INVALID);
	}

	TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusValidIsInvalid) {
	CheckCombinations(invalid, invalid_end, valid, valid_end, INVALID);
	}

	TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusInvalidIsInvalid) {
	CheckCombinations(invalid, invalid_end, invalid, invalid_end, INVALID);
	}

	TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusPartialIsInvalid) {
	CheckCombinations(
	invalid, invalid_end, PartialIterator(), PartialIterator::end(), INVALID);
	}

	TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusInvalidIsInvalid) {
	CheckCombinations(
	PartialIterator(), PartialIterator::end(), invalid, invalid_end, INVALID);
	}

	TEST(StreamingUtf8ValidatorValidateTest, EmptyIsValid) {
	EXPECT_TRUE(StreamingUtf8Validator::Validate(std::string()));
	}

	TEST(StreamingUtf8ValidatorValidateTest, SimpleValidCase) {
	EXPECT_TRUE(StreamingUtf8Validator::Validate("\xc2\x81"));
	}

	TEST(StreamingUtf8ValidatorValidateTest, SimpleInvalidCase) {
	EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc0\x80"));
	}

	TEST(StreamingUtf8ValidatorValidateTest, TruncatedIsInvalid) {
	EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc2"));
	}

	} // namespace
	} // namespace base