Reland "Prevent unsafe narrowing: base/strings"
This reverts commit 224425dc19627040de412f578767d3e53e076e7e.
Bug: 1292951
Change-Id: I82336d78a30a164c629e7d9018c23cc60b1833ab
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3708189
Reviewed-by: danakj <danakj@chromium.org>
Owners-Override: danakj <danakj@chromium.org>
Commit-Queue: danakj <danakj@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1015046}
NOKEYCHECK=True
GitOrigin-RevId: 8bb45c228957681d85d0c445ef6bcb0fca3f9bcc
diff --git a/url_canon_etc.cc b/url_canon_etc.cc
index 851926d..53b71ea 100644
--- a/url_canon_etc.cc
+++ b/url_canon_etc.cc
@@ -101,7 +101,7 @@
const Component& scheme,
CanonOutput* output,
Component* out_scheme) {
- if (scheme.len <= 0) {
+ if (!scheme.is_nonempty()) {
// Scheme is unspecified or empty, convert to empty by appending a colon.
*out_scheme = Component(output->length(), 0);
output->push_back(':');
@@ -117,12 +117,13 @@
// FindAndCompareScheme, which could cause some security checks on
// schemes to be incorrect.
bool success = true;
- int end = scheme.end();
- for (int i = scheme.begin; i < end; i++) {
+ size_t begin = static_cast<size_t>(scheme.begin);
+ size_t end = static_cast<size_t>(scheme.end());
+ for (size_t i = begin; i < end; i++) {
UCHAR ch = static_cast<UCHAR>(spec[i]);
char replacement = 0;
if (ch < 0x80) {
- if (i == scheme.begin) {
+ if (i == begin) {
// Need to do a special check for the first letter of the scheme.
if (IsSchemeFirstChar(static_cast<unsigned char>(ch)))
replacement = kSchemeCanonical[ch];
@@ -179,8 +180,9 @@
out_username->begin = output->length();
if (username.len > 0) {
// This will escape characters not valid for the username.
- AppendStringOfType(&username_spec[username.begin], username.len,
- CHAR_USERINFO, output);
+ AppendStringOfType(&username_spec[username.begin],
+ static_cast<size_t>(username.len), CHAR_USERINFO,
+ output);
}
out_username->len = output->length() - out_username->begin;
@@ -189,8 +191,9 @@
if (password.len > 0) {
output->push_back(':');
out_password->begin = output->length();
- AppendStringOfType(&password_spec[password.begin], password.len,
- CHAR_USERINFO, output);
+ AppendStringOfType(&password_spec[password.begin],
+ static_cast<size_t>(password.len), CHAR_USERINFO,
+ output);
out_password->len = output->length() - out_password->begin;
} else {
*out_password = Component();
@@ -223,7 +226,8 @@
// what the error was, and mark the URL as invalid by returning false.
output->push_back(':');
out_port->begin = output->length();
- AppendInvalidNarrowString(spec, port.begin, port.end(), output);
+ AppendInvalidNarrowString(spec, static_cast<size_t>(port.begin),
+ static_cast<size_t>(port.end()), output);
out_port->len = output->length() - out_port->begin;
return false;
}
@@ -285,7 +289,7 @@
const Component& ref,
CanonOutput* output,
Component* out_ref) {
- if (ref.len < 0) {
+ if (!ref.is_valid()) {
// Common case of no ref.
*out_ref = Component();
return;
@@ -297,8 +301,8 @@
out_ref->begin = output->length();
// Now iterate through all the characters, converting to UTF-8 and validating.
- int end = ref.end();
- for (int i = ref.begin; i < end; i++) {
+ size_t end = static_cast<size_t>(ref.end());
+ for (size_t i = static_cast<size_t>(ref.begin); i < end; i++) {
UCHAR current_char = static_cast<UCHAR>(spec[i]);
if (current_char < 0x80) {
if (kShouldEscapeCharInFragment[current_char])
diff --git a/url_canon_host.cc b/url_canon_host.cc
index c2cd9d1..5f7bf71 100644
--- a/url_canon_host.cc
+++ b/url_canon_host.cc
@@ -123,15 +123,15 @@
// |*has_non_ascii| flag.
//
// The return value indicates if the output is a potentially valid host name.
-template<typename INCHAR, typename OUTCHAR>
+template <typename INCHAR, typename OUTCHAR>
bool DoSimpleHost(const INCHAR* host,
- int host_len,
+ size_t host_len,
CanonOutputT<OUTCHAR>* output,
bool* has_non_ascii) {
*has_non_ascii = false;
bool success = true;
- for (int i = 0; i < host_len; ++i) {
+ for (size_t i = 0; i < host_len; ++i) {
unsigned int source = host[i];
if (source == '%') {
// Unescape first, if possible.
@@ -175,7 +175,7 @@
}
// Canonicalizes a host that requires IDN conversion. Returns true on success
-bool DoIDNHost(const char16_t* src, int src_len, CanonOutput* output) {
+bool DoIDNHost(const char16_t* src, size_t src_len, CanonOutput* output) {
int original_output_len = output->length(); // So we can rewind below.
// We need to escape URL before doing IDN conversion, since punicode strings
@@ -202,8 +202,8 @@
// unescaping. Although we unescaped everything before this function call, if
// somebody does %00 as fullwidth, ICU will convert this to ASCII.
bool success = DoSimpleHost(wide_output.data(),
- wide_output.length(),
- output, &has_non_ascii);
+ static_cast<size_t>(wide_output.length()), output,
+ &has_non_ascii);
if (has_non_ascii) {
// ICU generated something that DoSimpleHost didn't think looked like
// ASCII. This is quite rare, but ICU might convert some characters to
@@ -220,7 +220,8 @@
// ASCII isn't strictly necessary, but DoSimpleHost handles this case
// anyway so we handle it/
output->set_length(original_output_len);
- AppendInvalidNarrowString(wide_output.data(), 0, wide_output.length(),
+ AppendInvalidNarrowString(wide_output.data(), 0,
+ static_cast<size_t>(wide_output.length()),
output);
return false;
}
@@ -230,8 +231,11 @@
// 8-bit convert host to its ASCII version: this converts the UTF-8 input to
// UTF-16. The has_escaped flag should be set if the input string requires
// unescaping.
-bool DoComplexHost(const char* host, int host_len,
- bool has_non_ascii, bool has_escaped, CanonOutput* output) {
+bool DoComplexHost(const char* host,
+ size_t host_len,
+ bool has_non_ascii,
+ bool has_escaped,
+ CanonOutput* output) {
// Save the current position in the output. We may write stuff and rewind it
// below, so we need to know where to rewind to.
int begin_length = output->length();
@@ -239,7 +243,7 @@
// Points to the UTF-8 data we want to convert. This will either be the
// input or the unescaped version written to |*output| if necessary.
const char* utf8_source;
- int utf8_source_len;
+ size_t utf8_source_len;
bool are_all_escaped_valid = true;
if (has_escaped) {
// Unescape before converting to UTF-16 for IDN. We write this into the
@@ -264,7 +268,7 @@
// Save the pointer into the data was just converted (it may be appended to
// other data in the output buffer).
utf8_source = &output->data()[begin_length];
- utf8_source_len = output->length() - begin_length;
+ utf8_source_len = static_cast<size_t>(output->length() - begin_length);
} else {
// We don't need to unescape, use input for IDNization later. (We know the
// input has non-ASCII, or the simple version would have been called
@@ -280,17 +284,18 @@
if (!ConvertUTF8ToUTF16(utf8_source, utf8_source_len, &utf16)) {
// In this error case, the input may or may not be the output.
StackBuffer utf8;
- for (int i = 0; i < utf8_source_len; i++)
+ for (size_t i = 0; i < utf8_source_len; i++)
utf8.push_back(utf8_source[i]);
output->set_length(begin_length);
- AppendInvalidNarrowString(utf8.data(), 0, utf8.length(), output);
+ AppendInvalidNarrowString(utf8.data(), 0,
+ static_cast<size_t>(utf8.length()), output);
return false;
}
output->set_length(begin_length);
// This will call DoSimpleHost which will do normal ASCII canonicalization
// and also check for IP addresses in the outpt.
- return DoIDNHost(utf16.data(), utf16.length(), output) &&
+ return DoIDNHost(utf16.data(), static_cast<size_t>(utf16.length()), output) &&
are_all_escaped_valid;
}
@@ -298,7 +303,7 @@
// the backend, so we just pass through. The has_escaped flag should be set if
// the input string requires unescaping.
bool DoComplexHost(const char16_t* host,
- int host_len,
+ size_t host_len,
bool has_non_ascii,
bool has_escaped,
CanonOutput* output) {
@@ -319,8 +324,8 @@
// Once we convert to UTF-8, we can use the 8-bit version of the complex
// host handling code above.
- return DoComplexHost(utf8.data(), utf8.length(), has_non_ascii,
- has_escaped, output);
+ return DoComplexHost(utf8.data(), static_cast<size_t>(utf8.length()),
+ has_non_ascii, has_escaped, output);
}
// No unescaping necessary, we can safely pass the input to ICU. This
@@ -334,16 +339,18 @@
bool DoHostSubstring(const CHAR* spec,
const Component& host,
CanonOutput* output) {
+ DCHECK(host.is_valid());
+
bool has_non_ascii, has_escaped;
ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
if (has_non_ascii || has_escaped) {
- return DoComplexHost(&spec[host.begin], host.len, has_non_ascii,
- has_escaped, output);
+ return DoComplexHost(&spec[host.begin], static_cast<size_t>(host.len),
+ has_non_ascii, has_escaped, output);
}
- const bool success =
- DoSimpleHost(&spec[host.begin], host.len, output, &has_non_ascii);
+ const bool success = DoSimpleHost(
+ &spec[host.begin], static_cast<size_t>(host.len), output, &has_non_ascii);
DCHECK(!has_non_ascii);
return success;
}
@@ -353,7 +360,7 @@
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info) {
- if (host.len <= 0) {
+ if (!host.is_nonempty()) {
// Empty hosts don't need anything.
host_info->family = CanonHostInfo::NEUTRAL;
host_info->out_host = Component();
diff --git a/url_canon_internal.cc b/url_canon_internal.cc
index 48a1e74..192feb9 100644
--- a/url_canon_internal.cc
+++ b/url_canon_internal.cc
@@ -11,17 +11,19 @@
#include <cstdio>
#include <string>
+#include "base/numerics/safe_conversions.h"
#include "base/strings/utf_string_conversion_utils.h"
namespace url {
namespace {
-template<typename CHAR, typename UCHAR>
-void DoAppendStringOfType(const CHAR* source, int length,
+template <typename CHAR, typename UCHAR>
+void DoAppendStringOfType(const CHAR* source,
+ size_t length,
SharedCharTypes type,
CanonOutput* output) {
- for (int i = 0; i < length; i++) {
+ for (size_t i = 0; i < length; i++) {
if (static_cast<UCHAR>(source[i]) >= 0x80) {
// ReadChar will fill the code point with kUnicodeReplacementCharacter
// when the input is invalid, which is what we want.
@@ -41,10 +43,12 @@
// This function assumes the input values are all contained in 8-bit,
// although it allows any type. Returns true if input is valid, false if not.
-template<typename CHAR, typename UCHAR>
-void DoAppendInvalidNarrowString(const CHAR* spec, int begin, int end,
+template <typename CHAR, typename UCHAR>
+void DoAppendInvalidNarrowString(const CHAR* spec,
+ size_t begin,
+ size_t end,
CanonOutput* output) {
- for (int i = begin; i < end; i++) {
+ for (size_t i = begin; i < end; i++) {
UCHAR uch = static_cast<UCHAR>(spec[i]);
if (uch >= 0x80) {
// Handle UTF-8/16 encodings. This call will correctly handle the error
@@ -98,7 +102,8 @@
// Convert to UTF-8.
dest_component->begin = utf8_buffer->length();
success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
- override_component.len, utf8_buffer);
+ static_cast<size_t>(override_component.len),
+ utf8_buffer);
dest_component->len = utf8_buffer->length() - dest_component->begin;
}
}
@@ -235,26 +240,24 @@
const base_icu::UChar32 kUnicodeReplacementCharacter = 0xfffd;
-void AppendStringOfType(const char* source, int length,
+void AppendStringOfType(const char* source,
+ size_t length,
SharedCharTypes type,
CanonOutput* output) {
DoAppendStringOfType<char, unsigned char>(source, length, type, output);
}
void AppendStringOfType(const char16_t* source,
- int length,
+ size_t length,
SharedCharTypes type,
CanonOutput* output) {
DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
}
bool ReadUTFChar(const char* str,
- int* begin,
- int length,
+ size_t* begin,
+ size_t length,
base_icu::UChar32* code_point_out) {
- // This depends on ints and int32s being the same thing. If they're not, it
- // will fail to compile.
- // TODO(mmenke): This should probably be fixed.
if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
!base::IsValidCharacter(*code_point_out)) {
*code_point_out = kUnicodeReplacementCharacter;
@@ -264,12 +267,9 @@
}
bool ReadUTFChar(const char16_t* str,
- int* begin,
- int length,
+ size_t* begin,
+ size_t length,
base_icu::UChar32* code_point_out) {
- // This depends on ints and int32s being the same thing. If they're not, it
- // will fail to compile.
- // TODO(mmenke): This should probably be fixed.
if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
!base::IsValidCharacter(*code_point_out)) {
*code_point_out = kUnicodeReplacementCharacter;
@@ -278,23 +278,25 @@
return true;
}
-void AppendInvalidNarrowString(const char* spec, int begin, int end,
+void AppendInvalidNarrowString(const char* spec,
+ size_t begin,
+ size_t end,
CanonOutput* output) {
DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
}
void AppendInvalidNarrowString(const char16_t* spec,
- int begin,
- int end,
+ size_t begin,
+ size_t end,
CanonOutput* output) {
DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output);
}
bool ConvertUTF16ToUTF8(const char16_t* input,
- int input_len,
+ size_t input_len,
CanonOutput* output) {
bool success = true;
- for (int i = 0; i < input_len; i++) {
+ for (size_t i = 0; i < input_len; i++) {
base_icu::UChar32 code_point;
success &= ReadUTFChar(input, &i, input_len, &code_point);
AppendUTF8Value(code_point, output);
@@ -303,10 +305,10 @@
}
bool ConvertUTF8ToUTF16(const char* input,
- int input_len,
+ size_t input_len,
CanonOutputT<char16_t>* output) {
bool success = true;
- for (int i = 0; i < input_len; i++) {
+ for (size_t i = 0; i < input_len; i++) {
base_icu::UChar32 code_point;
success &= ReadUTFChar(input, &i, input_len, &code_point);
AppendUTF16Value(code_point, output);
diff --git a/url_canon_internal.h b/url_canon_internal.h
index def4636..ec5b61c 100644
--- a/url_canon_internal.h
+++ b/url_canon_internal.h
@@ -77,11 +77,12 @@
// Appends the given string to the output, escaping characters that do not
// match the given |type| in SharedCharTypes.
-void AppendStringOfType(const char* source, int length,
+void AppendStringOfType(const char* source,
+ size_t length,
SharedCharTypes type,
CanonOutput* output);
void AppendStringOfType(const char16_t* source,
- int length,
+ size_t length,
SharedCharTypes type,
CanonOutput* output);
@@ -107,8 +108,8 @@
// Indicates if the given character is a dot or dot equivalent, returning the
// number of characters taken by it. This will be one for a literal dot, 3 for
// an escaped dot. If the character is not a dot, this will return 0.
-template<typename CHAR>
-inline int IsDot(const CHAR* spec, int offset, int end) {
+template <typename CHAR>
+inline size_t IsDot(const CHAR* spec, size_t offset, size_t end) {
if (spec[offset] == '.') {
return 1;
} else if (spec[offset] == '%' && offset + 3 <= end &&
@@ -154,8 +155,8 @@
// (for a single-byte ASCII character, it will not be changed).
COMPONENT_EXPORT(URL)
bool ReadUTFChar(const char* str,
- int* begin,
- int length,
+ size_t* begin,
+ size_t length,
base_icu::UChar32* code_point_out);
// Generic To-UTF-8 converter. This will call the given append method for each
@@ -231,8 +232,8 @@
// (for a single-16-bit-word character, it will not be changed).
COMPONENT_EXPORT(URL)
bool ReadUTFChar(const char16_t* str,
- int* begin,
- int length,
+ size_t* begin,
+ size_t length,
base_icu::UChar32* code_point_out);
// Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
@@ -268,8 +269,8 @@
// Assumes that ch[begin] is within range in the array, but does not assume
// that any following characters are.
inline bool AppendUTF8EscapedChar(const char16_t* str,
- int* begin,
- int length,
+ size_t* begin,
+ size_t length,
CanonOutput* output) {
// UTF-16 input. ReadUTFChar will handle invalid characters for us and give
// us the kUnicodeReplacementCharacter, so we don't have to do special
@@ -281,7 +282,9 @@
}
// Handles UTF-8 input. See the wide version above for usage.
-inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length,
+inline bool AppendUTF8EscapedChar(const char* str,
+ size_t* begin,
+ size_t length,
CanonOutput* output) {
// ReadUTF8Char will handle invalid characters for us and give us the
// kUnicodeReplacementCharacter, so we don't have to do special checking
@@ -308,8 +311,10 @@
return c <= 255;
}
-template<typename CHAR>
-inline bool DecodeEscaped(const CHAR* spec, int* begin, int end,
+template <typename CHAR>
+inline bool DecodeEscaped(const CHAR* spec,
+ size_t* begin,
+ size_t end,
unsigned char* unescaped_value) {
if (*begin + 3 > end ||
!Is8BitChar(spec[*begin + 1]) || !Is8BitChar(spec[*begin + 2])) {
@@ -338,11 +343,13 @@
// This is used in error cases to append invalid output so that it looks
// approximately correct. Non-error cases should not call this function since
// the escaping rules are not guaranteed!
-void AppendInvalidNarrowString(const char* spec, int begin, int end,
+void AppendInvalidNarrowString(const char* spec,
+ size_t begin,
+ size_t end,
CanonOutput* output);
void AppendInvalidNarrowString(const char16_t* spec,
- int begin,
- int end,
+ size_t begin,
+ size_t end,
CanonOutput* output);
// Misc canonicalization helpers ----------------------------------------------
@@ -357,11 +364,11 @@
// normal.
COMPONENT_EXPORT(URL)
bool ConvertUTF16ToUTF8(const char16_t* input,
- int input_len,
+ size_t input_len,
CanonOutput* output);
COMPONENT_EXPORT(URL)
bool ConvertUTF8ToUTF16(const char* input,
- int input_len,
+ size_t input_len,
CanonOutputT<char16_t>* output);
// Converts from UTF-16 to 8-bit using the character set converter. If the
diff --git a/url_canon_mailtourl.cc b/url_canon_mailtourl.cc
index f4fe2b4..ff62bea 100644
--- a/url_canon_mailtourl.cc
+++ b/url_canon_mailtourl.cc
@@ -57,8 +57,8 @@
// Copy the path using path URL's more lax escaping rules.
// We convert to UTF-8 and escape non-ASCII, but leave most
// ASCII characters alone.
- int end = parsed.path.end();
- for (int i = parsed.path.begin; i < end; ++i) {
+ size_t end = static_cast<size_t>(parsed.path.end());
+ for (size_t i = static_cast<size_t>(parsed.path.begin); i < end; ++i) {
UCHAR uch = static_cast<UCHAR>(source.path[i]);
if (ShouldEncodeMailboxCharacter<UCHAR>(uch))
success &= AppendUTF8EscapedChar(source.path, &i, end, output);
diff --git a/url_canon_path.cc b/url_canon_path.cc
index d6fb64b..f50e107 100644
--- a/url_canon_path.cc
+++ b/url_canon_path.cc
@@ -101,9 +101,11 @@
// If the input is "../foo", |after_dot| = 1, |end| = 6, and
// at the end, |*consumed_len| = 2 for the "./" this function consumed. The
// original dot length should be handled by the caller.
-template<typename CHAR>
-DotDisposition ClassifyAfterDot(const CHAR* spec, int after_dot,
- int end, int* consumed_len) {
+template <typename CHAR>
+DotDisposition ClassifyAfterDot(const CHAR* spec,
+ size_t after_dot,
+ size_t end,
+ size_t* consumed_len) {
if (after_dot == end) {
// Single dot at the end.
*consumed_len = 0;
@@ -115,9 +117,9 @@
return DIRECTORY_CUR;
}
- int second_dot_len = IsDot(spec, after_dot, end);
+ size_t second_dot_len = IsDot(spec, after_dot, end);
if (second_dot_len) {
- int after_second_dot = after_dot + second_dot_len;
+ size_t after_second_dot = after_dot + second_dot_len;
if (after_second_dot == end) {
// Double dot at the end.
*consumed_len = second_dot_len;
@@ -193,10 +195,10 @@
// ends with a '%' followed by one or two characters, and the '%' is the one
// pointed to by |last_invalid_percent_index|. The last character in the string
// was just unescaped.
-template<typename CHAR>
+template <typename CHAR>
void CheckForNestedEscapes(const CHAR* spec,
- int next_input_index,
- int input_len,
+ size_t next_input_index,
+ size_t input_len,
int last_invalid_percent_index,
CanonOutput* output) {
const int length = output->length();
@@ -218,9 +220,10 @@
}
// Now output ends like "%cc". Try to unescape this.
- int begin = last_invalid_percent_index;
+ size_t begin = static_cast<size_t>(last_invalid_percent_index);
unsigned char temp;
- if (DecodeEscaped(output->data(), &begin, output->length(), &temp)) {
+ if (DecodeEscaped(output->data(), &begin,
+ static_cast<size_t>(output->length()), &temp)) {
// New escape sequence found. Overwrite the characters following the '%'
// with "25", and push_back() the one or two characters that were following
// the '%' when we were called.
@@ -252,7 +255,10 @@
const Component& path,
int path_begin_in_output,
CanonOutput* output) {
- int end = path.end();
+ if (!path.is_nonempty())
+ return true;
+
+ size_t end = static_cast<size_t>(path.end());
// We use this variable to minimize the amount of work done when unescaping --
// we'll only call CheckForNestedEscapes() when this points at one of the last
@@ -260,7 +266,7 @@
int last_invalid_percent_index = INT_MIN;
bool success = true;
- for (int i = path.begin; i < end; i++) {
+ for (size_t i = static_cast<size_t>(path.begin); i < end; i++) {
DCHECK_LT(last_invalid_percent_index, output->length());
UCHAR uch = static_cast<UCHAR>(spec[i]);
if (sizeof(CHAR) > 1 && uch >= 0x80) {
@@ -276,7 +282,7 @@
unsigned char flags = kPathCharLookup[out_ch];
if (flags & SPECIAL) {
// Needs special handling of some sort.
- int dotlen;
+ size_t dotlen;
if ((dotlen = IsDot(spec, i, end)) > 0) {
// See if this dot was preceded by a slash in the output.
//
@@ -287,7 +293,7 @@
if (output->length() > path_begin_in_output &&
output->at(output->length() - 1) == '/') {
// Slash followed by a dot, check to see if this is means relative
- int consumed_len;
+ size_t consumed_len;
switch (ClassifyAfterDot<CHAR>(spec, i + dotlen, end,
&consumed_len)) {
case NOT_A_DIRECTORY:
diff --git a/url_canon_pathurl.cc b/url_canon_pathurl.cc
index e726cfb..d8d65f3 100644
--- a/url_canon_pathurl.cc
+++ b/url_canon_pathurl.cc
@@ -32,8 +32,8 @@
// https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
new_component->begin = output->length();
- int end = component.end();
- for (int i = component.begin; i < end; i++) {
+ size_t end = static_cast<size_t>(component.end());
+ for (size_t i = static_cast<size_t>(component.begin); i < end; i++) {
UCHAR uch = static_cast<UCHAR>(source[i]);
if (uch < 0x20 || uch > 0x7E)
AppendUTF8EscapedChar(source, &i, end, output);
diff --git a/url_canon_query.cc b/url_canon_query.cc
index b3a1118..d23b45f 100644
--- a/url_canon_query.cc
+++ b/url_canon_query.cc
@@ -72,10 +72,12 @@
const Component& query,
CharsetConverter* converter,
CanonOutput* output) {
+ DCHECK(query.is_valid());
// This function will replace any misencoded values with the invalid
// character. This is what we want so we don't have to check for error.
RawCanonOutputW<1024> utf16;
- ConvertUTF8ToUTF16(&spec[query.begin], query.len, &utf16);
+ ConvertUTF8ToUTF16(&spec[query.begin], static_cast<size_t>(query.len),
+ &utf16);
converter->ConvertFromUTF16(utf16.data(), utf16.length(), output);
}
@@ -86,7 +88,9 @@
const Component& query,
CharsetConverter* converter,
CanonOutput* output) {
- converter->ConvertFromUTF16(&spec[query.begin], query.len, output);
+ DCHECK(query.is_valid());
+ converter->ConvertFromUTF16(&spec[query.begin],
+ static_cast<size_t>(query.len), output);
}
template<typename CHAR, typename UCHAR>
@@ -109,7 +113,8 @@
} else {
// No converter, do our own UTF-8 conversion.
- AppendStringOfType(&spec[query.begin], query.len, CHAR_QUERY, output);
+ AppendStringOfType(&spec[query.begin], static_cast<size_t>(query.len),
+ CHAR_QUERY, output);
}
}
}
diff --git a/url_canon_unittest.cc b/url_canon_unittest.cc
index f6ac9d4..96aa55a 100644
--- a/url_canon_unittest.cc
+++ b/url_canon_unittest.cc
@@ -173,9 +173,9 @@
out_str.clear();
StdStringCanonOutput output(&out_str);
- int input_len = static_cast<int>(strlen(utf_cases[i].input8));
+ size_t input_len = strlen(utf_cases[i].input8);
bool success = true;
- for (int ch = 0; ch < input_len; ch++) {
+ for (size_t ch = 0; ch < input_len; ch++) {
success &= AppendUTF8EscapedChar(utf_cases[i].input8, &ch, input_len,
&output);
}
@@ -189,9 +189,9 @@
std::u16string input_str(
test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
- int input_len = static_cast<int>(input_str.length());
+ size_t input_len = input_str.length();
bool success = true;
- for (int ch = 0; ch < input_len; ch++) {
+ for (size_t ch = 0; ch < input_len; ch++) {
success &= AppendUTF8EscapedChar(input_str.c_str(), &ch, input_len,
&output);
}
diff --git a/url_util.cc b/url_util.cc
index 1f0de84..cd8e2e1 100644
--- a/url_util.cc
+++ b/url_util.cc
@@ -811,11 +811,15 @@
int length,
DecodeURLMode mode,
CanonOutputW* output) {
+ if (length <= 0)
+ return;
+
STACK_UNINITIALIZED RawCanonOutputT<char> unescaped_chars;
- for (int i = 0; i < length; i++) {
+ size_t length_size_t = static_cast<size_t>(length);
+ for (size_t i = 0; i < length_size_t; i++) {
if (input[i] == '%') {
unsigned char ch;
- if (DecodeEscaped(input, &i, length, &ch)) {
+ if (DecodeEscaped(input, &i, length_size_t, &ch)) {
unescaped_chars.push_back(ch);
} else {
// Invalid escape sequence, copy the percent literal.
@@ -830,18 +834,20 @@
int output_initial_length = output->length();
// Convert that 8-bit to UTF-16. It's not clear IE does this at all to
// JavaScript URLs, but Firefox and Safari do.
- for (int i = 0; i < unescaped_chars.length(); i++) {
- unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i));
+ size_t unescaped_length = static_cast<size_t>(unescaped_chars.length());
+ for (size_t i = 0; i < unescaped_length; i++) {
+ unsigned char uch =
+ static_cast<unsigned char>(unescaped_chars.at(static_cast<int>(i)));
if (uch < 0x80) {
// Non-UTF-8, just append directly
output->push_back(uch);
} else {
// next_ch will point to the last character of the decoded
// character.
- int next_character = i;
+ size_t next_character = i;
base_icu::UChar32 code_point;
- if (ReadUTFChar(unescaped_chars.data(), &next_character,
- unescaped_chars.length(), &code_point)) {
+ if (ReadUTFChar(unescaped_chars.data(), &next_character, unescaped_length,
+ &code_point)) {
// Valid UTF-8 character, convert to UTF-16.
AppendUTF16Value(code_point, output);
i = next_character;