Fix for bug 81753, do not read past the end of unicode strings

BUG=81753
TEST=unit_tests


Review URL: http://codereview.chromium.org/7891051

git-svn-id: http://src.chromium.org/svn/trunk/deps/third_party/icu46@101167 4ff67af0-8c30-449e-8e8b-ad334ec8d88c
diff --git a/README.chromium b/README.chromium
index 052028f..6d473fe 100644
--- a/README.chromium
+++ b/README.chromium
@@ -169,3 +169,7 @@
 16. Apply the upstream patch for Korean search collator support (ICU 4.6.1).
    - patches/search_collation.patch
    - upstream bug: http://bugs.icu-project.org/trac/ticket/8290
+
+17. Fix a use of uninitialized memory bug in regular expression matching
+   - patches/rematch.patch
+   - upstream bug: http://bugs.icu-project.org/trac/ticket/8824
diff --git a/patches/rematch.patch b/patches/rematch.patch
new file mode 100644
index 0000000..d8fb961
--- /dev/null
+++ b/patches/rematch.patch
@@ -0,0 +1,60 @@
+Index: source/i18n/rematch.cpp
+===================================================================
+--- source/i18n/rematch.cpp	(revision 98343)
++++ source/i18n/rematch.cpp	(working copy)
+@@ -5598,6 +5598,7 @@
+                     const UChar *foldChars = NULL;
+                     int32_t foldOffset, foldLength;
+                     UChar32 c;
++                    UBool c_is_valid = FALSE;
+                     
+                     #ifdef REGEX_SMART_BACKTRACKING
+                     int32_t originalInputIdx = fp->fInputIdx;
+@@ -5607,23 +5608,29 @@
+                     foldOffset = foldLength = 0;
+ 
+                     while (patternChars < patternEnd && success) {
+-                        if(foldOffset < foldLength) {
+-                            U16_NEXT_UNSAFE(foldChars, foldOffset, c);
+-                        } else {
+-                            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
+-                            foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT);
+-                            if(foldLength >= 0) {
+-                                if(foldLength <= UCASE_MAX_STRING_LENGTH) {   // !!!: Does not correctly handle chars that fold to 0-length strings
+-                                    foldOffset = 0;
+-                                    U16_NEXT_UNSAFE(foldChars, foldOffset, c);
+-                                } else {
+-                                    c = foldLength;
+-                                    foldLength = foldOffset; // to avoid reading chars from the folding buffer
++                        if (fp->fInputIdx < fActiveLimit) {  // don't read past end of string
++                            if(foldOffset < foldLength) {
++                                U16_NEXT_UNSAFE(foldChars, foldOffset, c);
++                                c_is_valid = TRUE;
++                            } else {
++                                // test pre-condition of U16_NEXT: i < length
++                                U_ASSERT(fp->fInputIdx < fActiveLimit);
++                                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
++                                c_is_valid = TRUE;
++                                foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT);
++                                if(foldLength >= 0) {
++                                    if(foldLength <= UCASE_MAX_STRING_LENGTH) {   // !!!: Does not correctly handle chars that fold to 0-length strings
++                                        foldOffset = 0;
++                                        U16_NEXT_UNSAFE(foldChars, foldOffset, c);
++                                    } else {
++                                        c = foldLength;
++                                        foldLength = foldOffset; // to avoid reading chars from the folding buffer
++                                    }
+                                 }
+                             }
+                         }
+                         
+-                        if (fp->fInputIdx <= fActiveLimit) {
++                        if (fp->fInputIdx <= fActiveLimit && c_is_valid) {
+                             if (U_IS_BMP(c)) {
+                                 success = (*patternChars == c);
+                                 patternChars += 1;
+@@ -6070,4 +6077,3 @@
+ U_NAMESPACE_END
+ 
+ #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
+-
diff --git a/source/i18n/rematch.cpp b/source/i18n/rematch.cpp
index dda8a1e..f365bd9 100644
--- a/source/i18n/rematch.cpp
+++ b/source/i18n/rematch.cpp
@@ -5598,6 +5598,7 @@
                     const UChar *foldChars = NULL;
                     int32_t foldOffset, foldLength;
                     UChar32 c;
+                    UBool c_is_valid = FALSE;
                     
                     #ifdef REGEX_SMART_BACKTRACKING
                     int32_t originalInputIdx = fp->fInputIdx;
@@ -5607,23 +5608,29 @@
                     foldOffset = foldLength = 0;
 
                     while (patternChars < patternEnd && success) {
-                        if(foldOffset < foldLength) {
-                            U16_NEXT_UNSAFE(foldChars, foldOffset, c);
-                        } else {
-                            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
-                            foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT);
-                            if(foldLength >= 0) {
-                                if(foldLength <= UCASE_MAX_STRING_LENGTH) {   // !!!: Does not correctly handle chars that fold to 0-length strings
-                                    foldOffset = 0;
-                                    U16_NEXT_UNSAFE(foldChars, foldOffset, c);
-                                } else {
-                                    c = foldLength;
-                                    foldLength = foldOffset; // to avoid reading chars from the folding buffer
+                        if (fp->fInputIdx < fActiveLimit) {  // don't read past end of string
+                            if(foldOffset < foldLength) {
+                                U16_NEXT_UNSAFE(foldChars, foldOffset, c);
+                                c_is_valid = TRUE;
+                            } else {
+                                // test pre-condition of U16_NEXT: i < length
+                                U_ASSERT(fp->fInputIdx < fActiveLimit);
+                                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
+                                c_is_valid = TRUE;
+                                foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT);
+                                if(foldLength >= 0) {
+                                    if(foldLength <= UCASE_MAX_STRING_LENGTH) {   // !!!: Does not correctly handle chars that fold to 0-length strings
+                                        foldOffset = 0;
+                                        U16_NEXT_UNSAFE(foldChars, foldOffset, c);
+                                    } else {
+                                        c = foldLength;
+                                        foldLength = foldOffset; // to avoid reading chars from the folding buffer
+                                    }
                                 }
                             }
                         }
                         
-                        if (fp->fInputIdx <= fActiveLimit) {
+                        if (fp->fInputIdx <= fActiveLimit && c_is_valid) {
                             if (U_IS_BMP(c)) {
                                 success = (*patternChars == c);
                                 patternChars += 1;
@@ -6070,4 +6077,3 @@
 U_NAMESPACE_END
 
 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
-