Fix a bug in UTF-16/32 detection.
Add a missing buffer length check.
The upstream bug : http://bugs.icu-project.org/trac/ticket/10318
While I'm at it, I'm adding two patches that I forgot to include
in the previous check-in (http://crrev.com/121777 http://crrev.com/158118 )
BUG=275803
TEST=SyzyASAN does not complain any more after this version of ICU is rolled.
R=tsepez@chromium.org
Review URL: https://codereview.chromium.org/22911033
git-svn-id: http://src.chromium.org/svn/trunk/deps/third_party/icu46@219032 4ff67af0-8c30-449e-8e8b-ad334ec8d88c
diff --git a/README.chromium b/README.chromium
index 4cabef4..76acd4b 100644
--- a/README.chromium
+++ b/README.chromium
@@ -250,3 +250,10 @@
24. Fix compilation errors on VS2012.
- patches/vs2012.patch
+
+25. Fix a buffer overflow in UTF-16/32 detection.
+ - patches/csetdet.patch
+ - upstream bug: http://bugs.icu-project.org/trac/ticket/10318
+
+
+
diff --git a/patches/csetdet.patch b/patches/csetdet.patch
new file mode 100644
index 0000000..73df253
--- /dev/null
+++ b/patches/csetdet.patch
@@ -0,0 +1,35 @@
+Index: source/i18n/csrucode.cpp
+===================================================================
+--- source/i18n/csrucode.cpp (revision 214189)
++++ source/i18n/csrucode.cpp (working copy)
+@@ -31,8 +31,9 @@
+ int32_t CharsetRecog_UTF_16_BE::match(InputText* textIn)
+ {
+ const uint8_t *input = textIn->fRawInput;
++ int32_t length = textIn->fRawLength;
+
+- if (input[0] == 0xFE && input[1] == 0xFF) {
++ if (length >=2 && input[0] == 0xFE && input[1] == 0xFF) {
+ return 100;
+ }
+
+@@ -53,8 +54,9 @@
+ int32_t CharsetRecog_UTF_16_LE::match(InputText* textIn)
+ {
+ const uint8_t *input = textIn->fRawInput;
++ int32_t length = textIn->fRawLength;
+
+- if (input[0] == 0xFF && input[1] == 0xFE && (input[2] != 0x00 || input[3] != 0x00)) {
++ if (length >= 4 && input[0] == 0xFF && input[1] == 0xFE && (input[2] != 0x00 || input[3] != 0x00)) {
+ return 100;
+ }
+
+@@ -76,7 +78,7 @@
+ bool hasBOM = FALSE;
+ int32_t confidence = 0;
+
+- if (getChar(input, 0) == 0x0000FEFFUL) {
++ if (limit > 0 && getChar(input, 0) == 0x0000FEFFUL) {
+ hasBOM = TRUE;
+ }
+
diff --git a/patches/ubrk.patch b/patches/ubrk.patch
new file mode 100644
index 0000000..51213fd
--- /dev/null
+++ b/patches/ubrk.patch
@@ -0,0 +1,32 @@
+Index: source/common/ubrk.cpp
+===================================================================
+--- source/common/ubrk.cpp (revision 120256)
++++ source/common/ubrk.cpp (working copy)
+@@ -166,6 +166,13 @@
+ int32_t textLength,
+ UErrorCode* status)
+ {
++ if (bi == NULL) {
++ if (U_SUCCESS(*status)) {
++ *status = U_ILLEGAL_ARGUMENT_ERROR;
++ }
++ return;
++ }
++
+ BreakIterator *brit = (BreakIterator *)bi;
+ UText ut = UTEXT_INITIALIZER;
+ utext_openUChars(&ut, text, textLength, status);
+@@ -181,6 +188,13 @@
+ UText *text,
+ UErrorCode *status)
+ {
++ if (bi == NULL) {
++ if (U_SUCCESS(*status)) {
++ *status = U_ILLEGAL_ARGUMENT_ERROR;
++ }
++ return;
++ }
++
+ RuleBasedBreakIterator *brit = (RuleBasedBreakIterator *)bi;
+ brit->RuleBasedBreakIterator::setText(text, *status);
+ }
diff --git a/patches/utext.patch b/patches/utext.patch
new file mode 100644
index 0000000..d92347f
--- /dev/null
+++ b/patches/utext.patch
@@ -0,0 +1,76 @@
+Index: test/cintltst/utexttst.c
+===================================================================
+--- test/cintltst/utexttst.c (revision 29355)
++++ test/cintltst/utexttst.c (revision 29356)
+@@ -1,6 +1,6 @@
+ /********************************************************************
+ * COPYRIGHT:
+- * Copyright (c) 2005-2009, International Business Machines Corporation and
++ * Copyright (c) 2005-2011, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+ /*
+@@ -210,6 +210,10 @@
+ UChar uString[] = {0x41, 0x42, 0x43, 0};
+ UChar buf[100];
+ int32_t i;
++ /* Test pinning of input bounds */
++ UChar uString2[] = {0x41, 0x42, 0x43, 0x44, 0x45,
++ 0x46, 0x47, 0x48, 0x49, 0x4A, 0};
++ UChar * uString2Ptr = uString2 + 5;
+
+ status = U_ZERO_ERROR;
+ uta = utext_openUChars(NULL, uString, -1, &status);
+@@ -228,6 +232,20 @@
+ i = u_strcmp(uString, buf);
+ TEST_ASSERT(i == 0);
+ utext_close(uta);
++
++ /* Test pinning of input bounds */
++ status = U_ZERO_ERROR;
++ uta = utext_openUChars(NULL, uString2Ptr, -1, &status);
++ TEST_SUCCESS(status);
++
++ status = U_ZERO_ERROR;
++ memset(buf, 0, sizeof(buf));
++ i = utext_extract(uta, -3, 20, buf, 100, &status);
++ TEST_SUCCESS(status);
++ TEST_ASSERT(i == u_strlen(uString2Ptr));
++ i = u_strcmp(uString2Ptr, buf);
++ TEST_ASSERT(i == 0);
++ utext_close(uta);
+ }
+
+ {
+Index: common/utext.cpp
+===================================================================
+--- common/utext.cpp (revision 29355)
++++ common/utext.cpp (revision 29356)
+@@ -1,7 +1,7 @@
+ /*
+ *******************************************************************************
+ *
+-* Copyright (C) 2005-2010, International Business Machines
++* Copyright (C) 2005-2011, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *******************************************************************************
+@@ -2846,7 +2846,6 @@
+ return 0;
+ }
+
+- const UChar *s=(const UChar *)ut->context;
+ int32_t si, di;
+
+ int32_t start32;
+@@ -2856,8 +2855,8 @@
+ // Pins 'start' to the length of the string, if it came in out-of-bounds.
+ // Snaps 'start' to the beginning of a code point.
+ ucstrTextAccess(ut, start, TRUE);
+- U_ASSERT(start <= INT32_MAX);
+- start32 = (int32_t)start;
++ const UChar *s=ut->chunkContents;
++ start32 = ut->chunkOffset;
+
+ int32_t strLength=(int32_t)ut->a;
+ if (strLength >= 0) {
diff --git a/source/i18n/csrucode.cpp b/source/i18n/csrucode.cpp
index 99a76d8..3789fa9 100644
--- a/source/i18n/csrucode.cpp
+++ b/source/i18n/csrucode.cpp
@@ -31,8 +31,9 @@
int32_t CharsetRecog_UTF_16_BE::match(InputText* textIn)
{
const uint8_t *input = textIn->fRawInput;
+ int32_t length = textIn->fRawLength;
- if (input[0] == 0xFE && input[1] == 0xFF) {
+ if (length >=2 && input[0] == 0xFE && input[1] == 0xFF) {
return 100;
}
@@ -53,8 +54,9 @@
int32_t CharsetRecog_UTF_16_LE::match(InputText* textIn)
{
const uint8_t *input = textIn->fRawInput;
+ int32_t length = textIn->fRawLength;
- if (input[0] == 0xFF && input[1] == 0xFE && (input[2] != 0x00 || input[3] != 0x00)) {
+ if (length >= 4 && input[0] == 0xFF && input[1] == 0xFE && (input[2] != 0x00 || input[3] != 0x00)) {
return 100;
}
@@ -76,7 +78,7 @@
bool hasBOM = FALSE;
int32_t confidence = 0;
- if (getChar(input, 0) == 0x0000FEFFUL) {
+ if (limit > 0 && getChar(input, 0) == 0x0000FEFFUL) {
hasBOM = TRUE;
}