net/base/url_unescape_iterator_unittest.cc - chromium/src - Git at Google

 // Copyright 2025 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "net/base/url_unescape_iterator.h"

 #include <iterator>
 #include <limits>
 #include <ranges>
 #include <string_view>
 #include <utility>

 #include "base/containers/span.h"
 #include "base/containers/to_vector.h"
 #include "base/strings/escape.h"
 #include "base/strings/strcat.h"
 #include "base/strings/stringprintf.h"
 #include "net/base/url_util.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "third_party/fuzztest/src/fuzztest/fuzztest.h"

 namespace net {

 namespace {

 static_assert(std::forward_iterator<UrlUnescapeIterator>);

 // A single test case. Does not own the referenced strings.
 struct Case {
   std::string_view input;
   std::string_view expected_output;
   std::string_view description;
 };

 // A test case that is constructed at runtime. Can be converted to a Case.
 struct OwningCase {
   std::string input;
   std::string expected_output;
   std::string description;

   OwningCase(std::string_view in,
              std::string_view expected,
              std::string_view desc)
       : input(in), expected_output(expected), description(desc) {}

   explicit operator Case() const {
     return Case(input, expected_output, description);
   }
 };

 // Convenience function used in most tests.
 std::string UnescapeToString(std::string_view in) {
   auto as_range = MakeUrlUnescapeRange(in);
   static_assert(std::ranges::forward_range<decltype(as_range)>);
   return std::string(std::ranges::begin(as_range), std::ranges::end(as_range));
 }

 // Test a contiguous range of cases.
 void TestCases(base::span<const Case> cases) {
   for (const auto [input, expected_output, description] : cases) {
     EXPECT_EQ(UnescapeToString(input), expected_output) << description;
   }
 }

 // Same as above, but for OwningCase.
 void TestCases(base::span<const OwningCase> cases) {
   auto unowned =
       base::ToVector(cases, [](const OwningCase& c) { return Case(c); });
   TestCases(unowned);
 }

 // Converts the test cases in `cases` to percent-encoded form by escaping all
 // non-ASCII characters as %xx, then runs them.
 void EncodeThenTestCases(base::span<const Case> cases) {
   auto encoded = base::ToVector(cases, [](const Case& in) {
     auto [input, expected, description] = in;
     auto escaped = base::EscapeNonASCII(input);
     return OwningCase(escaped, expected, description);
   });
   TestCases(encoded);
 }

 TEST(UrlUnescapeIteratorTest, DefaultConstructor) {
   constexpr UrlUnescapeIterator a;
   constexpr UrlUnescapeIterator b;
   EXPECT_EQ(a, b);
   static_assert(a == b);
 }

 TEST(UrlUnescapeIteratorTest, CopyAndAssignAndEquality) {
   auto [a, b] = MakeUrlUnescapeRange("walk");
   EXPECT_NE(a, b);
   b = a;
   EXPECT_EQ(a, b);
   const UrlUnescapeIterator c = a;
   EXPECT_EQ(a, c);
   const UrlUnescapeIterator d = c;
   EXPECT_EQ(c, d);
   b = d;
   EXPECT_EQ(b, d);
 }

 TEST(UrlUnescapeIteratorTest, PostIncrement) {
   auto [it, end] = MakeUrlUnescapeRange("a");
   const UrlUnescapeIterator old_it = it;
   EXPECT_EQ(old_it, it++);
   EXPECT_NE(old_it, it);
   EXPECT_EQ(it, end);
 }

 TEST(UrlUnescapeIteratorTest, GoodAscii) {
   static constexpr std::string_view kNul("\0", 1u);
   static constexpr Case cases[] = {
       {"", "", "empty"},
       {"a", "a", "one letter"},
       {"word", "word", "multiple letters"},
       {"two words", "two words", "space"},
       {"two+words", "two words", "plus"},
       {"two%20words", "two words", "escaped space"},
       {"%2b", "+", "escaped plus"},
       {"%2B", "+", "escaped plus, uppercase hex"},
       {"++", "  ", "double plus"},
       {"+%20+", "   ", "plus, escaped space, plus"},
       {"%61b", "ab", "escaped start"},
       {"a%62", "ab", "escaped end"},
       {"%00", kNul, "escaped nul byte"},
       {"line%0a", "line\x0a", "escaped newline"},
       {"l%7D", "l\x7d", "escaped del control code"},
   };
   TestCases(cases);
 }

 TEST(UrlUnescapeIteratorTest, BadPercentEncoding) {
   static constexpr Case cases[] = {
       {"%", "%", "percent at end of string"},
       {"%2", "%2", "not followed by two characters"},
       {"%g1", "%g1", "first character not hex"},
       {"%1 ", "%1 ", "second character not hex"},
       {"%+20", "% 20", "first character is plus"},
       {"% 20", "% 20", "first character is space"},
       {"%1%20", "%1 ", "second character is percent"},
       {"%%34%31", "%41", "no double expansion"},
   };
   TestCases(cases);
 }

 static constexpr Case kGoodUtf8[] = {
     {"\xc2\xa5", "\xc2\xa5", "two bytes"},
     {"\xef\xbf\xa5", "\xef\xbf\xa5", "three bytes"},
     {"\xf0\x9f\x86\x91", "\xf0\x9f\x86\x91", "four bytes"},
     {"\xef\xb7\x90", "\xef\xb7\x90", "non-character"},
 };

 TEST(UrlUnescapeIteratorTest, GoodUtf8) {
   TestCases(kGoodUtf8);
 }

 TEST(UrlUnescapeIteratorTest, GoodUtf8Encoded) {
   EncodeThenTestCases(kGoodUtf8);
 }

 // Verifies that mixing encoded and unencoded bytes in a single character
 // works.
 TEST(UrlUnescapeIteratorTest, GoodUtf8MixedEncoded) {
   std::vector<OwningCase> encoded;
   // Not the correct size, just an estimate to reduce resizes.
   encoded.reserve(std::size(kGoodUtf8) * 2);
   for (const auto [input, expected, description] : kGoodUtf8) {
     for (int byte_to_encode = 0; byte_to_encode < input.size();
          ++byte_to_encode) {
       const std::string encoded_byte =
           base::EscapeNonASCII(input.substr(byte_to_encode, 1));
       const std::string encoded_input =
           base::StrCat({input.substr(0, byte_to_encode), encoded_byte,
                         input.substr(byte_to_encode + 1)});
       encoded.emplace_back(encoded_input, expected,
                            base::StringPrintf("%s, encoded byte %zu",
                                               description, byte_to_encode));
     }
   }

   TestCases(encoded);
 }

 #define REPLACEMENT_CHAR "\xef\xbf\xbd"

 constexpr char kReplacementChar[] = REPLACEMENT_CHAR;
 constexpr char kReplacementCharx2[] = REPLACEMENT_CHAR REPLACEMENT_CHAR;
 constexpr char kReplacementCharx3[] =
     REPLACEMENT_CHAR REPLACEMENT_CHAR REPLACEMENT_CHAR;
 constexpr char kReplacementCharx4[] =
     REPLACEMENT_CHAR REPLACEMENT_CHAR REPLACEMENT_CHAR REPLACEMENT_CHAR;
 constexpr char kReplacementCharx5[] = REPLACEMENT_CHAR REPLACEMENT_CHAR
     REPLACEMENT_CHAR REPLACEMENT_CHAR REPLACEMENT_CHAR;
 constexpr char kReplacementCharx6[] = REPLACEMENT_CHAR REPLACEMENT_CHAR
     REPLACEMENT_CHAR REPLACEMENT_CHAR REPLACEMENT_CHAR REPLACEMENT_CHAR;

 std::string ReplacementCharNTimes(size_t n) {
   const std::vector<std::string_view> to_concat(n, kReplacementChar);
   return base::StrCat(to_concat);
 }

 TEST(UrlUnescapeIteratorTest, TruncatedUtf8) {
   std::vector<OwningCase> truncated;
   truncated.reserve(std::size(kGoodUtf8) * 4);
   for (const auto [input, expected, description] : kGoodUtf8) {
     for (int truncate_pos = 1; truncate_pos < input.size(); ++truncate_pos) {
       const std::string truncated_input(input.substr(0, truncate_pos));
       // We expect one replacement character per UTF-8 start byte, regardless
       // of length.
       truncated.emplace_back(truncated_input, kReplacementChar,
                              base::StringPrintf("%s, truncated to length %zu",
                                                 description, truncate_pos));
       truncated.emplace_back(
           base::EscapeNonASCII(truncated_input), kReplacementChar,
           base::StringPrintf("%s, truncated to length %zu, encoded",
                              description, truncate_pos));
     }
   }
   TestCases(truncated);
 }

 TEST(UrlUnescapeIteratorTest, CorruptedUtf8) {
   std::vector<OwningCase> corrupted;
   corrupted.reserve(std::size(kGoodUtf8) * 4);
   for (const auto [input, expected, description] : kGoodUtf8) {
     for (int corrupt_byte = 0; corrupt_byte < input.size(); ++corrupt_byte) {
       const std::string corrupted_input =
           base::StrCat({"-", input.substr(0, corrupt_byte), "X",
                         input.substr(corrupt_byte + 1), "-"});
       // A valid initial sequence will be replaced with a single replacement
       // character. Unexpected continuation bytes will be replaced with one
       // replacement character each.
       const std::string expected_output = base::StrCat(
           {"-", corrupt_byte > 0 ? kReplacementChar : "", "X",
            ReplacementCharNTimes(input.size() - corrupt_byte - 1), "-"});
       corrupted.emplace_back(corrupted_input, expected_output,
                              base::StringPrintf("%s, with byte %zu corrupted",
                                                 description, corrupt_byte));
       corrupted.emplace_back(
           base::EscapeNonASCII(corrupted_input), expected_output,
           base::StringPrintf("%s, with byte %zu corrupted, encoded",
                              description, corrupt_byte));
     }
   }
   TestCases(corrupted);
 }

 constexpr Case kBadUtf8[] = {
     {"\xC0\x80", kReplacementCharx2,
      "Overlong encoding of U+0000 (null). 0xC0 is never a valid start."},
     {"\xC1\xBF", kReplacementCharx2,
      "Overlong encoding of U+007F. 0xC1 is never a valid start."},
     {"\xE0\x80\x80", kReplacementCharx3,
      "Overlong encoding of U+0000 (null) as 3 bytes."},
     {"\xE0\x9F\xBF", kReplacementCharx3,
      "Overlong encoding of U+07FF as 3 bytes (should be 2)."},
     {"\xF0\x80\x80\x80", kReplacementCharx4,
      "Overlong encoding of U+0000 (null) as 4 bytes."},
     {"\xF0\x8F\xBF\xBF", kReplacementCharx4,
      "Overlong encoding of U+FFFF as 4 bytes (should be 3)."},
     {"\xED\xA0\x80", kReplacementCharx3,
      "Invalid surrogate half U+D800 (start of surrogate range)"},
     {"\xED\xBF\xBF", kReplacementCharx3,
      "Invalid surrogate half U+DFFF (end of surrogate range)"},
     {"\xED\xA0\x81\xED\xB0\x80", kReplacementCharx6,
      "Incorrectly encoded surrogate pair"},
     {"\xF4\x90\x80\x80", kReplacementCharx4,
      "Invalid code point U+110000 (beyond Unicode max U+10FFFF)"},
     {"\xF5\x80\x80\x80", kReplacementCharx4,
      "Invalid start byte 0xF5 (would encode > U+10FFFF)"},
     {"\xF8\x80\x80\x80\x80", kReplacementCharx5,
      "Invalid start byte 0xF8 (formerly 5-byte sequence)"},
     {"\xFC\x80\x80\x80\x80\x80", kReplacementCharx6,
      "Invalid start byte 0xFC (formerly 6-byte sequence)"},
     {"\xFE", kReplacementChar, "Invalid byte 0xFE (never used)"},
     {"\xFF", kReplacementChar, "Invalid byte 0xFF (never used)"},
     {"\xc2\xa5\xc1\xc2\xa5", "\xc2\xa5" REPLACEMENT_CHAR "\xc2\xa5",
      "Valid followed by invalid followed by valid"},
     {"\xE2\xE2", kReplacementCharx2, "Overshort with error"},
 };

 TEST(UrlUnescapeIteratorTest, OtherBadUtf8) {
   TestCases(kBadUtf8);
 }

 TEST(UrlUnescapeIteratorTest, OtherBadUtf8Encoded) {
   EncodeThenTestCases(kBadUtf8);
 }

 void SameOutputAsUnescapePercentEncodedUrl(std::string_view input) {
   EXPECT_EQ(UnescapeToString(input), UnescapePercentEncodedUrl(input));
 }

 // Exhaustively test the output is the same as UnescapePercentEncodedUrl() for
 // all single-byte inputs.
 TEST(UrlUnescapeIteratorTest, OneByteSameAsUnescapePercentEncodedUrl) {
   // `i` is int to avoid problems with overflowing.
   for (int i = std::numeric_limits<char>::min();
        i <= std::numeric_limits<char>::max(); ++i) {
     const char c = static_cast<char>(i);
     SameOutputAsUnescapePercentEncodedUrl(std::string_view(&c, 1u));
   }
 }

 // Same thing, but %-encoded.
 TEST(UrlUnescapeIteratorTest, OneByteSameAsUnescapePercentEncodedUrlEncoded) {
   for (int i = 0; i <= 0xFF; ++i) {
     const std::string input = base::StringPrintf("%%%02x", i);
     SameOutputAsUnescapePercentEncodedUrl(input);
   }
 }

 FUZZ_TEST(UrlUnescapeIteratorTest, SameOutputAsUnescapePercentEncodedUrl);

 TEST(UrlUnescapeIteratorTest, TrivialSelfEquals) {
   auto expect_self_equals = [](base::span<const Case> cases) {
     for (const auto [input, _, description] : cases) {
       EXPECT_TRUE(EqualsAfterUrlDecoding(input, input)) << description;
     }
   };
   for (const char* input : {"", "a", "word", " ", "+", "%", "%2", "%20"}) {
     EXPECT_TRUE(EqualsAfterUrlDecoding(input, input)) << input;
   }
   expect_self_equals(kGoodUtf8);
   expect_self_equals(kBadUtf8);
 }

 TEST(UrlUnescapeIteratorTest, EqualsAfterEscaping) {
   auto expect_equals_after_escaping = [](base::span<const Case> cases) {
     for (const auto [input, _, description] : cases) {
       EXPECT_TRUE(
           EqualsAfterUrlDecoding(input, base::EscapeAllExceptUnreserved(input)))
           << description;
       EXPECT_TRUE(
           EqualsAfterUrlDecoding(base::EscapeAllExceptUnreserved(input), input))
           << description << ", backwards";
     }
   };
   expect_equals_after_escaping(kGoodUtf8);
   expect_equals_after_escaping(kBadUtf8);
 }

 struct StringPair {
   std::string_view a;
   std::string_view b;
 };

 TEST(UrlUnescapeIteratorTest, InterestinglyEqual) {
   static constexpr StringPair cases[] = {
       {" ", "+"},        {"+", "%20"},         {"%", "%25"},
       {"%2a", "%2A"},    {"%c2%A5", "%C2%a5"}, {"%c2\xa5", "\xc2%a5"},
       {"%c0", "%c1"},     // both become replacement character
       {"%c2", "%ef%bf"},  // both are truncated UTF-8 codepoints
   };
   for (const auto [a, b] : cases) {
     EXPECT_TRUE(EqualsAfterUrlDecoding(a, b))
         << "(\"" << a << "\", \"" << b << "\")";
   }
 }

 TEST(UrlUnescapeIteratorTest, Unequal) {
   static constexpr StringPair cases[] = {
       {"", "%00"},          {"abc", "ABC"}, {"\xc2\xa5", "\xc2\xa6"},
       {"%c2%a5", "%c2%a6"}, {"%a", "%A"},   {"%2g", "%2G"},
       {"%00a", "%00A"},
   };
   for (const auto [a, b] : cases) {
     EXPECT_FALSE(EqualsAfterUrlDecoding(a, b))
         << "(\"" << a << "\", \"" << b << "\")";
   }
 }

 #undef REPLACEMENT_CHAR

 }  // namespace

 }  // namespace net
	// Copyright 2025 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "net/base/url_unescape_iterator.h"

	#include <iterator>
	#include <limits>
	#include <ranges>
	#include <string_view>
	#include <utility>

	#include "base/containers/span.h"
	#include "base/containers/to_vector.h"
	#include "base/strings/escape.h"
	#include "base/strings/strcat.h"
	#include "base/strings/stringprintf.h"
	#include "net/base/url_util.h"
	#include "testing/gtest/include/gtest/gtest.h"
	#include "third_party/fuzztest/src/fuzztest/fuzztest.h"

	namespace net {

	namespace {

	static_assert(std::forward_iterator<UrlUnescapeIterator>);

	// A single test case. Does not own the referenced strings.
	struct Case {
	std::string_view input;
	std::string_view expected_output;
	std::string_view description;
	};

	// A test case that is constructed at runtime. Can be converted to a Case.
	struct OwningCase {
	std::string input;
	std::string expected_output;
	std::string description;

	OwningCase(std::string_view in,
	std::string_view expected,
	std::string_view desc)
	: input(in), expected_output(expected), description(desc) {}

	explicit operator Case() const {
	return Case(input, expected_output, description);
	}
	};

	// Convenience function used in most tests.
	std::string UnescapeToString(std::string_view in) {
	auto as_range = MakeUrlUnescapeRange(in);
	static_assert(std::ranges::forward_range<decltype(as_range)>);
	return std::string(std::ranges::begin(as_range), std::ranges::end(as_range));
	}

	// Test a contiguous range of cases.
	void TestCases(base::span<const Case> cases) {
	for (const auto [input, expected_output, description] : cases) {
	EXPECT_EQ(UnescapeToString(input), expected_output) << description;
	}
	}

	// Same as above, but for OwningCase.
	void TestCases(base::span<const OwningCase> cases) {
	auto unowned =
	base::ToVector(cases, [](const OwningCase& c) { return Case(c); });
	TestCases(unowned);
	}

	// Converts the test cases in `cases` to percent-encoded form by escaping all
	// non-ASCII characters as %xx, then runs them.
	void EncodeThenTestCases(base::span<const Case> cases) {
	auto encoded = base::ToVector(cases, [](const Case& in) {
	auto [input, expected, description] = in;
	auto escaped = base::EscapeNonASCII(input);
	return OwningCase(escaped, expected, description);
	});
	TestCases(encoded);
	}

	TEST(UrlUnescapeIteratorTest, DefaultConstructor) {
	constexpr UrlUnescapeIterator a;
	constexpr UrlUnescapeIterator b;
	EXPECT_EQ(a, b);
	static_assert(a == b);
	}

	TEST(UrlUnescapeIteratorTest, CopyAndAssignAndEquality) {
	auto [a, b] = MakeUrlUnescapeRange("walk");
	EXPECT_NE(a, b);
	b = a;
	EXPECT_EQ(a, b);
	const UrlUnescapeIterator c = a;
	EXPECT_EQ(a, c);
	const UrlUnescapeIterator d = c;
	EXPECT_EQ(c, d);
	b = d;
	EXPECT_EQ(b, d);
	}

	TEST(UrlUnescapeIteratorTest, PostIncrement) {
	auto [it, end] = MakeUrlUnescapeRange("a");
	const UrlUnescapeIterator old_it = it;
	EXPECT_EQ(old_it, it++);
	EXPECT_NE(old_it, it);
	EXPECT_EQ(it, end);
	}

	TEST(UrlUnescapeIteratorTest, GoodAscii) {
	static constexpr std::string_view kNul("\0", 1u);
	static constexpr Case cases[] = {
	{"", "", "empty"},
	{"a", "a", "one letter"},
	{"word", "word", "multiple letters"},
	{"two words", "two words", "space"},
	{"two+words", "two words", "plus"},
	{"two%20words", "two words", "escaped space"},
	{"%2b", "+", "escaped plus"},
	{"%2B", "+", "escaped plus, uppercase hex"},
	{"++", " ", "double plus"},
	{"+%20+", " ", "plus, escaped space, plus"},
	{"%61b", "ab", "escaped start"},
	{"a%62", "ab", "escaped end"},
	{"%00", kNul, "escaped nul byte"},
	{"line%0a", "line\x0a", "escaped newline"},
	{"l%7D", "l\x7d", "escaped del control code"},
	};
	TestCases(cases);
	}

	TEST(UrlUnescapeIteratorTest, BadPercentEncoding) {
	static constexpr Case cases[] = {
	{"%", "%", "percent at end of string"},
	{"%2", "%2", "not followed by two characters"},
	{"%g1", "%g1", "first character not hex"},
	{"%1 ", "%1 ", "second character not hex"},
	{"%+20", "% 20", "first character is plus"},
	{"% 20", "% 20", "first character is space"},
	{"%1%20", "%1 ", "second character is percent"},
	{"%%34%31", "%41", "no double expansion"},
	};
	TestCases(cases);
	}

	static constexpr Case kGoodUtf8[] = {
	{"\xc2\xa5", "\xc2\xa5", "two bytes"},
	{"\xef\xbf\xa5", "\xef\xbf\xa5", "three bytes"},
	{"\xf0\x9f\x86\x91", "\xf0\x9f\x86\x91", "four bytes"},
	{"\xef\xb7\x90", "\xef\xb7\x90", "non-character"},
	};

	TEST(UrlUnescapeIteratorTest, GoodUtf8) {
	TestCases(kGoodUtf8);
	}

	TEST(UrlUnescapeIteratorTest, GoodUtf8Encoded) {
	EncodeThenTestCases(kGoodUtf8);
	}

	// Verifies that mixing encoded and unencoded bytes in a single character
	// works.
	TEST(UrlUnescapeIteratorTest, GoodUtf8MixedEncoded) {
	std::vector<OwningCase> encoded;
	// Not the correct size, just an estimate to reduce resizes.
	encoded.reserve(std::size(kGoodUtf8) * 2);
	for (const auto [input, expected, description] : kGoodUtf8) {
	for (int byte_to_encode = 0; byte_to_encode < input.size();
	++byte_to_encode) {
	const std::string encoded_byte =
	base::EscapeNonASCII(input.substr(byte_to_encode, 1));
	const std::string encoded_input =
	base::StrCat({input.substr(0, byte_to_encode), encoded_byte,
	input.substr(byte_to_encode + 1)});
	encoded.emplace_back(encoded_input, expected,
	base::StringPrintf("%s, encoded byte %zu",
	description, byte_to_encode));
	}
	}

	TestCases(encoded);
	}

	#define REPLACEMENT_CHAR "\xef\xbf\xbd"

	constexpr char kReplacementChar[] = REPLACEMENT_CHAR;
	constexpr char kReplacementCharx2[] = REPLACEMENT_CHAR REPLACEMENT_CHAR;
	constexpr char kReplacementCharx3[] =
	REPLACEMENT_CHAR REPLACEMENT_CHAR REPLACEMENT_CHAR;
	constexpr char kReplacementCharx4[] =
	REPLACEMENT_CHAR REPLACEMENT_CHAR REPLACEMENT_CHAR REPLACEMENT_CHAR;
	constexpr char kReplacementCharx5[] = REPLACEMENT_CHAR REPLACEMENT_CHAR
	REPLACEMENT_CHAR REPLACEMENT_CHAR REPLACEMENT_CHAR;
	constexpr char kReplacementCharx6[] = REPLACEMENT_CHAR REPLACEMENT_CHAR
	REPLACEMENT_CHAR REPLACEMENT_CHAR REPLACEMENT_CHAR REPLACEMENT_CHAR;

	std::string ReplacementCharNTimes(size_t n) {
	const std::vector<std::string_view> to_concat(n, kReplacementChar);
	return base::StrCat(to_concat);
	}

	TEST(UrlUnescapeIteratorTest, TruncatedUtf8) {
	std::vector<OwningCase> truncated;
	truncated.reserve(std::size(kGoodUtf8) * 4);
	for (const auto [input, expected, description] : kGoodUtf8) {
	for (int truncate_pos = 1; truncate_pos < input.size(); ++truncate_pos) {
	const std::string truncated_input(input.substr(0, truncate_pos));
	// We expect one replacement character per UTF-8 start byte, regardless
	// of length.
	truncated.emplace_back(truncated_input, kReplacementChar,
	base::StringPrintf("%s, truncated to length %zu",
	description, truncate_pos));
	truncated.emplace_back(
	base::EscapeNonASCII(truncated_input), kReplacementChar,
	base::StringPrintf("%s, truncated to length %zu, encoded",
	description, truncate_pos));
	}
	}
	TestCases(truncated);
	}

	TEST(UrlUnescapeIteratorTest, CorruptedUtf8) {
	std::vector<OwningCase> corrupted;
	corrupted.reserve(std::size(kGoodUtf8) * 4);
	for (const auto [input, expected, description] : kGoodUtf8) {
	for (int corrupt_byte = 0; corrupt_byte < input.size(); ++corrupt_byte) {
	const std::string corrupted_input =
	base::StrCat({"-", input.substr(0, corrupt_byte), "X",
	input.substr(corrupt_byte + 1), "-"});
	// A valid initial sequence will be replaced with a single replacement
	// character. Unexpected continuation bytes will be replaced with one
	// replacement character each.
	const std::string expected_output = base::StrCat(
	{"-", corrupt_byte > 0 ? kReplacementChar : "", "X",
	ReplacementCharNTimes(input.size() - corrupt_byte - 1), "-"});
	corrupted.emplace_back(corrupted_input, expected_output,
	base::StringPrintf("%s, with byte %zu corrupted",
	description, corrupt_byte));
	corrupted.emplace_back(
	base::EscapeNonASCII(corrupted_input), expected_output,
	base::StringPrintf("%s, with byte %zu corrupted, encoded",
	description, corrupt_byte));
	}
	}
	TestCases(corrupted);
	}

	constexpr Case kBadUtf8[] = {
	{"\xC0\x80", kReplacementCharx2,
	"Overlong encoding of U+0000 (null). 0xC0 is never a valid start."},
	{"\xC1\xBF", kReplacementCharx2,
	"Overlong encoding of U+007F. 0xC1 is never a valid start."},
	{"\xE0\x80\x80", kReplacementCharx3,
	"Overlong encoding of U+0000 (null) as 3 bytes."},
	{"\xE0\x9F\xBF", kReplacementCharx3,
	"Overlong encoding of U+07FF as 3 bytes (should be 2)."},
	{"\xF0\x80\x80\x80", kReplacementCharx4,
	"Overlong encoding of U+0000 (null) as 4 bytes."},
	{"\xF0\x8F\xBF\xBF", kReplacementCharx4,
	"Overlong encoding of U+FFFF as 4 bytes (should be 3)."},
	{"\xED\xA0\x80", kReplacementCharx3,
	"Invalid surrogate half U+D800 (start of surrogate range)"},
	{"\xED\xBF\xBF", kReplacementCharx3,
	"Invalid surrogate half U+DFFF (end of surrogate range)"},
	{"\xED\xA0\x81\xED\xB0\x80", kReplacementCharx6,
	"Incorrectly encoded surrogate pair"},
	{"\xF4\x90\x80\x80", kReplacementCharx4,
	"Invalid code point U+110000 (beyond Unicode max U+10FFFF)"},
	{"\xF5\x80\x80\x80", kReplacementCharx4,
	"Invalid start byte 0xF5 (would encode > U+10FFFF)"},
	{"\xF8\x80\x80\x80\x80", kReplacementCharx5,
	"Invalid start byte 0xF8 (formerly 5-byte sequence)"},
	{"\xFC\x80\x80\x80\x80\x80", kReplacementCharx6,
	"Invalid start byte 0xFC (formerly 6-byte sequence)"},
	{"\xFE", kReplacementChar, "Invalid byte 0xFE (never used)"},
	{"\xFF", kReplacementChar, "Invalid byte 0xFF (never used)"},
	{"\xc2\xa5\xc1\xc2\xa5", "\xc2\xa5" REPLACEMENT_CHAR "\xc2\xa5",
	"Valid followed by invalid followed by valid"},
	{"\xE2\xE2", kReplacementCharx2, "Overshort with error"},
	};

	TEST(UrlUnescapeIteratorTest, OtherBadUtf8) {
	TestCases(kBadUtf8);
	}

	TEST(UrlUnescapeIteratorTest, OtherBadUtf8Encoded) {
	EncodeThenTestCases(kBadUtf8);
	}

	void SameOutputAsUnescapePercentEncodedUrl(std::string_view input) {
	EXPECT_EQ(UnescapeToString(input), UnescapePercentEncodedUrl(input));
	}

	// Exhaustively test the output is the same as UnescapePercentEncodedUrl() for
	// all single-byte inputs.
	TEST(UrlUnescapeIteratorTest, OneByteSameAsUnescapePercentEncodedUrl) {
	// `i` is int to avoid problems with overflowing.
	for (int i = std::numeric_limits<char>::min();
	i <= std::numeric_limits<char>::max(); ++i) {
	const char c = static_cast<char>(i);
	SameOutputAsUnescapePercentEncodedUrl(std::string_view(&c, 1u));
	}
	}

	// Same thing, but %-encoded.
	TEST(UrlUnescapeIteratorTest, OneByteSameAsUnescapePercentEncodedUrlEncoded) {
	for (int i = 0; i <= 0xFF; ++i) {
	const std::string input = base::StringPrintf("%%%02x", i);
	SameOutputAsUnescapePercentEncodedUrl(input);
	}
	}

	FUZZ_TEST(UrlUnescapeIteratorTest, SameOutputAsUnescapePercentEncodedUrl);

	TEST(UrlUnescapeIteratorTest, TrivialSelfEquals) {
	auto expect_self_equals = [](base::span<const Case> cases) {
	for (const auto [input, _, description] : cases) {
	EXPECT_TRUE(EqualsAfterUrlDecoding(input, input)) << description;
	}
	};
	for (const char* input : {"", "a", "word", " ", "+", "%", "%2", "%20"}) {
	EXPECT_TRUE(EqualsAfterUrlDecoding(input, input)) << input;
	}
	expect_self_equals(kGoodUtf8);
	expect_self_equals(kBadUtf8);
	}

	TEST(UrlUnescapeIteratorTest, EqualsAfterEscaping) {
	auto expect_equals_after_escaping = [](base::span<const Case> cases) {
	for (const auto [input, _, description] : cases) {
	EXPECT_TRUE(
	EqualsAfterUrlDecoding(input, base::EscapeAllExceptUnreserved(input)))
	<< description;
	EXPECT_TRUE(
	EqualsAfterUrlDecoding(base::EscapeAllExceptUnreserved(input), input))
	<< description << ", backwards";
	}
	};
	expect_equals_after_escaping(kGoodUtf8);
	expect_equals_after_escaping(kBadUtf8);
	}

	struct StringPair {
	std::string_view a;
	std::string_view b;
	};

	TEST(UrlUnescapeIteratorTest, InterestinglyEqual) {
	static constexpr StringPair cases[] = {
	{" ", "+"}, {"+", "%20"}, {"%", "%25"},
	{"%2a", "%2A"}, {"%c2%A5", "%C2%a5"}, {"%c2\xa5", "\xc2%a5"},
	{"%c0", "%c1"}, // both become replacement character
	{"%c2", "%ef%bf"}, // both are truncated UTF-8 codepoints
	};
	for (const auto [a, b] : cases) {
	EXPECT_TRUE(EqualsAfterUrlDecoding(a, b))
	<< "(\"" << a << "\", \"" << b << "\")";
	}
	}

	TEST(UrlUnescapeIteratorTest, Unequal) {
	static constexpr StringPair cases[] = {
	{"", "%00"}, {"abc", "ABC"}, {"\xc2\xa5", "\xc2\xa6"},
	{"%c2%a5", "%c2%a6"}, {"%a", "%A"}, {"%2g", "%2G"},
	{"%00a", "%00A"},
	};
	for (const auto [a, b] : cases) {
	EXPECT_FALSE(EqualsAfterUrlDecoding(a, b))
	<< "(\"" << a << "\", \"" << b << "\")";
	}
	}

	#undef REPLACEMENT_CHAR

	} // namespace

	} // namespace net