Fix for bug 81753, do not read past the end of unicode strings
BUG=81753
TEST=unit_tests
Review URL: http://codereview.chromium.org/7891051
git-svn-id: http://src.chromium.org/svn/trunk/deps/third_party/icu46@101167 4ff67af0-8c30-449e-8e8b-ad334ec8d88c
diff --git a/README.chromium b/README.chromium
index 052028f..6d473fe 100644
--- a/README.chromium
+++ b/README.chromium
@@ -169,3 +169,7 @@
16. Apply the upstream patch for Korean search collator support (ICU 4.6.1).
- patches/search_collation.patch
- upstream bug: http://bugs.icu-project.org/trac/ticket/8290
+
+17. Fix a use of uninitialized memory bug in regular expression matching
+ - patches/rematch.patch
+ - upstream bug: http://bugs.icu-project.org/trac/ticket/8824
diff --git a/patches/rematch.patch b/patches/rematch.patch
new file mode 100644
index 0000000..d8fb961
--- /dev/null
+++ b/patches/rematch.patch
@@ -0,0 +1,60 @@
+Index: source/i18n/rematch.cpp
+===================================================================
+--- source/i18n/rematch.cpp (revision 98343)
++++ source/i18n/rematch.cpp (working copy)
+@@ -5598,6 +5598,7 @@
+ const UChar *foldChars = NULL;
+ int32_t foldOffset, foldLength;
+ UChar32 c;
++ UBool c_is_valid = FALSE;
+
+ #ifdef REGEX_SMART_BACKTRACKING
+ int32_t originalInputIdx = fp->fInputIdx;
+@@ -5607,23 +5608,29 @@
+ foldOffset = foldLength = 0;
+
+ while (patternChars < patternEnd && success) {
+- if(foldOffset < foldLength) {
+- U16_NEXT_UNSAFE(foldChars, foldOffset, c);
+- } else {
+- U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
+- foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT);
+- if(foldLength >= 0) {
+- if(foldLength <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle chars that fold to 0-length strings
+- foldOffset = 0;
+- U16_NEXT_UNSAFE(foldChars, foldOffset, c);
+- } else {
+- c = foldLength;
+- foldLength = foldOffset; // to avoid reading chars from the folding buffer
++ if (fp->fInputIdx < fActiveLimit) { // don't read past end of string
++ if(foldOffset < foldLength) {
++ U16_NEXT_UNSAFE(foldChars, foldOffset, c);
++ c_is_valid = TRUE;
++ } else {
++ // test pre-condition of U16_NEXT: i < length
++ U_ASSERT(fp->fInputIdx < fActiveLimit);
++ U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
++ c_is_valid = TRUE;
++ foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT);
++ if(foldLength >= 0) {
++ if(foldLength <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle chars that fold to 0-length strings
++ foldOffset = 0;
++ U16_NEXT_UNSAFE(foldChars, foldOffset, c);
++ } else {
++ c = foldLength;
++ foldLength = foldOffset; // to avoid reading chars from the folding buffer
++ }
+ }
+ }
+ }
+
+- if (fp->fInputIdx <= fActiveLimit) {
++ if (fp->fInputIdx <= fActiveLimit && c_is_valid) {
+ if (U_IS_BMP(c)) {
+ success = (*patternChars == c);
+ patternChars += 1;
+@@ -6070,4 +6077,3 @@
+ U_NAMESPACE_END
+
+ #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
+-
diff --git a/source/i18n/rematch.cpp b/source/i18n/rematch.cpp
index dda8a1e..f365bd9 100644
--- a/source/i18n/rematch.cpp
+++ b/source/i18n/rematch.cpp
@@ -5598,6 +5598,7 @@
const UChar *foldChars = NULL;
int32_t foldOffset, foldLength;
UChar32 c;
+ UBool c_is_valid = FALSE;
#ifdef REGEX_SMART_BACKTRACKING
int32_t originalInputIdx = fp->fInputIdx;
@@ -5607,23 +5608,29 @@
foldOffset = foldLength = 0;
while (patternChars < patternEnd && success) {
- if(foldOffset < foldLength) {
- U16_NEXT_UNSAFE(foldChars, foldOffset, c);
- } else {
- U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
- foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT);
- if(foldLength >= 0) {
- if(foldLength <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle chars that fold to 0-length strings
- foldOffset = 0;
- U16_NEXT_UNSAFE(foldChars, foldOffset, c);
- } else {
- c = foldLength;
- foldLength = foldOffset; // to avoid reading chars from the folding buffer
+ if (fp->fInputIdx < fActiveLimit) { // don't read past end of string
+ if(foldOffset < foldLength) {
+ U16_NEXT_UNSAFE(foldChars, foldOffset, c);
+ c_is_valid = TRUE;
+ } else {
+ // test pre-condition of U16_NEXT: i < length
+ U_ASSERT(fp->fInputIdx < fActiveLimit);
+ U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
+ c_is_valid = TRUE;
+ foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT);
+ if(foldLength >= 0) {
+ if(foldLength <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle chars that fold to 0-length strings
+ foldOffset = 0;
+ U16_NEXT_UNSAFE(foldChars, foldOffset, c);
+ } else {
+ c = foldLength;
+ foldLength = foldOffset; // to avoid reading chars from the folding buffer
+ }
}
}
}
- if (fp->fInputIdx <= fActiveLimit) {
+ if (fp->fInputIdx <= fActiveLimit && c_is_valid) {
if (U_IS_BMP(c)) {
success = (*patternChars == c);
patternChars += 1;
@@ -6070,4 +6077,3 @@
U_NAMESPACE_END
#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
-