ES6 unicode escapes, part 2: Regexps. Allows \u{xxxxx} in regexps. Behind the --harmony-unicode flag. Part 1 is here: https://codereview.chromium.org/716423002 BUG=v8:3648 LOG=N Review URL: https://codereview.chromium.org/788043005 Cr-Commit-Position: refs/heads/master@{#26018}

commit: 2305cfb4e2b196918095d3404267772e93916616 [log] [tgz]
author: marja <marja@chromium.org> Mon Jan 12 09:50:15 2015
committer: Commit bot <commit-bot@chromium.org> Mon Jan 12 09:50:34 2015
tree: 2079bc35689d6df7e11cd4a28bc41959eb5076d7
parent: f9a22a5e587cbd1d8da7424543b9b90118eea18e [diff]
diff --git a/src/bootstrapper.cc b/src/bootstrapper.cc
index 7105eb2..5b616af 100644
--- a/src/bootstrapper.cc
+++ b/src/bootstrapper.cc

@@ -1625,7 +1625,6 @@
 EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_proxies)
 EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_templates)
 EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_sloppy)
-EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_unicode)
 
 void Genesis::InitializeGlobal_harmony_regexps() {
   Handle<JSObject> builtins(native_context()->builtins());
@@ -1639,6 +1638,18 @@
 }
 
 
+void Genesis::InitializeGlobal_harmony_unicode() {
+  Handle<JSObject> builtins(native_context()->builtins());
+
+  Handle<HeapObject> flag(FLAG_harmony_unicode ? heap()->true_value()
+                                               : heap()->false_value());
+  PropertyAttributes attributes =
+      static_cast<PropertyAttributes>(DONT_DELETE | READ_ONLY);
+  Runtime::DefineObjectProperty(builtins, factory()->harmony_unicode_string(),
+                                flag, attributes).Assert();
+}
+
+
 Handle<JSFunction> Genesis::InstallInternalArray(
     Handle<JSBuiltinsObject> builtins,
     const char* name,

diff --git a/src/heap/heap.h b/src/heap/heap.h
index e6ccf2e..6aeaff8 100644
--- a/src/heap/heap.h
+++ b/src/heap/heap.h

@@ -227,7 +227,9 @@
   V(ignore_case_string, "ignoreCase")                      \
   V(multiline_string, "multiline")                         \
   V(sticky_string, "sticky")                               \
+  V(unicode_string, "unicode")                             \
   V(harmony_regexps_string, "harmony_regexps")             \
+  V(harmony_unicode_string, "harmony_unicode")             \
   V(input_string, "input")                                 \
   V(index_string, "index")                                 \
   V(last_index_string, "lastIndex")                        \

diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index 81ad080..a5b9fb5 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc

@@ -154,7 +154,7 @@
   RegExpCompileData parse_result;
   FlatStringReader reader(isolate, pattern);
   if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
-                                 &parse_result, &zone)) {
+                                 flags.is_unicode(), &parse_result, &zone)) {
     // Throw an exception if we fail to parse the pattern.
     return ThrowRegExpException(re,
                                 pattern,
@@ -401,8 +401,7 @@
   RegExpCompileData compile_data;
   FlatStringReader reader(isolate, pattern);
   if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
-                                 &compile_data,
-                                 &zone)) {
+                                 flags.is_unicode(), &compile_data, &zone)) {
     // Throw an exception if we fail to parse the pattern.
     // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
     USE(ThrowRegExpException(re,

diff --git a/src/mirror-debugger.js b/src/mirror-debugger.js
index be068ee..a27a790c 100644
--- a/src/mirror-debugger.js
+++ b/src/mirror-debugger.js

@@ -1274,6 +1274,15 @@
 };
 
 
+/**
+ * Returns whether this regular expression has the unicode (u) flag set.
+ * @return {boolean} Value of the unicode flag
+ */
+RegExpMirror.prototype.unicode = function() {
+  return this.value_.unicode;
+};
+
+
 RegExpMirror.prototype.toText = function() {
   // Simpel to text which is used when on specialization in subclass.
   return "/" + this.source() + "/";

diff --git a/src/objects.h b/src/objects.h
index ccc802d..7eeedfa 100644
--- a/src/objects.h
+++ b/src/objects.h

@@ -7902,7 +7902,8 @@
     GLOBAL = 1,
     IGNORE_CASE = 2,
     MULTILINE = 4,
-    STICKY = 8
+    STICKY = 8,
+    UNICODE_ESCAPES = 16
   };
 
   class Flags {
@@ -7912,6 +7913,7 @@
     bool is_ignore_case() { return (value_ & IGNORE_CASE) != 0; }
     bool is_multiline() { return (value_ & MULTILINE) != 0; }
     bool is_sticky() { return (value_ & STICKY) != 0; }
+    bool is_unicode() { return (value_ & UNICODE_ESCAPES) != 0; }
     uint32_t value() { return value_; }
    private:
     uint32_t value_;

diff --git a/src/parser.cc b/src/parser.cc
index bfdeaa3..3f7ce4d 100644
--- a/src/parser.cc
+++ b/src/parser.cc

@@ -4278,10 +4278,8 @@
 // Regular expressions
 
 
-RegExpParser::RegExpParser(FlatStringReader* in,
-                           Handle<String>* error,
-                           bool multiline,
-                           Zone* zone)
+RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
+                           bool multiline, bool unicode, Zone* zone)
     : isolate_(zone->isolate()),
       zone_(zone),
       error_(error),
@@ -4292,6 +4290,7 @@
       capture_count_(0),
       has_more_(true),
       multiline_(multiline),
+      unicode_(unicode),
       simple_(false),
       contains_anchor_(false),
       is_scanned_for_captures_(false),
@@ -4348,6 +4347,13 @@
 }
 
 
+bool RegExpParser::IsSyntaxCharacter(uc32 c) {
+  return c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' ||
+         c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || c == ']' ||
+         c == '{' || c == '}' || c == '|';
+}
+
+
 RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
   failed_ = true;
   *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();
@@ -4564,9 +4570,15 @@
         }
         uc32 first_digit = Next();
         if (first_digit == '8' || first_digit == '9') {
-          // Treat as identity escape
-          builder->AddCharacter(first_digit);
-          Advance(2);
+          // If the 'u' flag is present, only syntax characters can be escaped,
+          // no other identity escapes are allowed. If the 'u' flag is not
+          // present, all identity escapes are allowed.
+          if (!FLAG_harmony_unicode || !unicode_) {
+            builder->AddCharacter(first_digit);
+            Advance(2);
+          } else {
+            return ReportError(CStrVector("Invalid escape"));
+          }
           break;
         }
       }
@@ -4622,25 +4634,41 @@
         uc32 value;
         if (ParseHexEscape(2, &value)) {
           builder->AddCharacter(value);
-        } else {
+        } else if (!FLAG_harmony_unicode || !unicode_) {
           builder->AddCharacter('x');
+        } else {
+          // If the 'u' flag is present, invalid escapes are not treated as
+          // identity escapes.
+          return ReportError(CStrVector("Invalid escape"));
         }
         break;
       }
       case 'u': {
         Advance(2);
         uc32 value;
-        if (ParseHexEscape(4, &value)) {
+        if (ParseUnicodeEscape(&value)) {
           builder->AddCharacter(value);
-        } else {
+        } else if (!FLAG_harmony_unicode || !unicode_) {
           builder->AddCharacter('u');
+        } else {
+          // If the 'u' flag is present, invalid escapes are not treated as
+          // identity escapes.
+          return ReportError(CStrVector("Invalid unicode escape"));
         }
         break;
       }
       default:
-        // Identity escape.
-        builder->AddCharacter(Next());
-        Advance(2);
+        Advance();
+        // If the 'u' flag is present, only syntax characters can be escaped, no
+        // other identity escapes are allowed. If the 'u' flag is not present,
+        // all identity escapes are allowed.
+        if (!FLAG_harmony_unicode || !unicode_ ||
+            IsSyntaxCharacter(current())) {
+          builder->AddCharacter(current());
+          Advance();
+        } else {
+          return ReportError(CStrVector("Invalid escape"));
+        }
         break;
       }
       break;
@@ -4883,11 +4911,10 @@
 }
 
 
-bool RegExpParser::ParseHexEscape(int length, uc32 *value) {
+bool RegExpParser::ParseHexEscape(int length, uc32* value) {
   int start = position();
   uc32 val = 0;
-  bool done = false;
-  for (int i = 0; !done; i++) {
+  for (int i = 0; i < length; ++i) {
     uc32 c = current();
     int d = HexValue(c);
     if (d < 0) {
@@ -4896,15 +4923,52 @@
     }
     val = val * 16 + d;
     Advance();
-    if (i == length - 1) {
-      done = true;
-    }
   }
   *value = val;
   return true;
 }
 
 
+bool RegExpParser::ParseUnicodeEscape(uc32* value) {
+  // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
+  // allowed). In the latter case, the number of hex digits between { } is
+  // arbitrary. \ and u have already been read.
+  if (current() == '{' && FLAG_harmony_unicode && unicode_) {
+    int start = position();
+    Advance();
+    if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) {
+      if (current() == '}') {
+        Advance();
+        return true;
+      }
+    }
+    Reset(start);
+    return false;
+  }
+  // \u but no {, or \u{...} escapes not allowed.
+  return ParseHexEscape(4, value);
+}
+
+
+bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
+  uc32 x = 0;
+  int d = HexValue(current());
+  if (d < 0) {
+    return false;
+  }
+  while (d >= 0) {
+    x = x * 16 + d;
+    if (x > max_value) {
+      return false;
+    }
+    Advance();
+    d = HexValue(current());
+  }
+  *value = x;
+  return true;
+}
+
+
 uc32 RegExpParser::ParseClassCharacterEscape() {
   DCHECK(current() == '\\');
   DCHECK(has_next() && !IsSpecialClassEscape(Next()));
@@ -4959,27 +5023,41 @@
       if (ParseHexEscape(2, &value)) {
         return value;
       }
-      // If \x is not followed by a two-digit hexadecimal, treat it
-      // as an identity escape.
-      return 'x';
+      if (!FLAG_harmony_unicode || !unicode_) {
+        // If \x is not followed by a two-digit hexadecimal, treat it
+        // as an identity escape.
+        return 'x';
+      }
+      // If the 'u' flag is present, invalid escapes are not treated as
+      // identity escapes.
+      ReportError(CStrVector("Invalid escape"));
+      return 0;
     }
     case 'u': {
       Advance();
       uc32 value;
-      if (ParseHexEscape(4, &value)) {
+      if (ParseUnicodeEscape(&value)) {
         return value;
       }
-      // If \u is not followed by a four-digit hexadecimal, treat it
-      // as an identity escape.
-      return 'u';
+      if (!FLAG_harmony_unicode || !unicode_) {
+        return 'u';
+      }
+      // If the 'u' flag is present, invalid escapes are not treated as
+      // identity escapes.
+      ReportError(CStrVector("Invalid unicode escape"));
+      return 0;
     }
     default: {
-      // Extended identity escape. We accept any character that hasn't
-      // been matched by a more specific case, not just the subset required
-      // by the ECMAScript specification.
       uc32 result = current();
-      Advance();
-      return result;
+      // If the 'u' flag is present, only syntax characters can be escaped, no
+      // other identity escapes are allowed. If the 'u' flag is not present, all
+      // identity escapes are allowed.
+      if (!FLAG_harmony_unicode || !unicode_ || IsSyntaxCharacter(result)) {
+        Advance();
+        return result;
+      }
+      ReportError(CStrVector("Invalid escape"));
+      return 0;
     }
   }
   return 0;
@@ -5085,12 +5163,11 @@
 // ----------------------------------------------------------------------------
 // The Parser interface.
 
-bool RegExpParser::ParseRegExp(FlatStringReader* input,
-                               bool multiline,
-                               RegExpCompileData* result,
+bool RegExpParser::ParseRegExp(FlatStringReader* input, bool multiline,
+                               bool unicode, RegExpCompileData* result,
                                Zone* zone) {
   DCHECK(result != NULL);
-  RegExpParser parser(input, &result->error, multiline, zone);
+  RegExpParser parser(input, &result->error, multiline, unicode, zone);
   RegExpTree* tree = parser.ParsePattern();
   if (parser.failed()) {
     DCHECK(tree == NULL);

diff --git a/src/parser.h b/src/parser.h
index 219f1c4..518b443 100644
--- a/src/parser.h
+++ b/src/parser.h

@@ -222,15 +222,11 @@
 
 class RegExpParser BASE_EMBEDDED {
  public:
-  RegExpParser(FlatStringReader* in,
-               Handle<String>* error,
-               bool multiline_mode,
-               Zone* zone);
+  RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode,
+               bool unicode, Zone* zone);
 
-  static bool ParseRegExp(FlatStringReader* input,
-                          bool multiline,
-                          RegExpCompileData* result,
-                          Zone* zone);
+  static bool ParseRegExp(FlatStringReader* input, bool multiline, bool unicode,
+                          RegExpCompileData* result, Zone* zone);
 
   RegExpTree* ParsePattern();
   RegExpTree* ParseDisjunction();
@@ -248,6 +244,8 @@
   // Checks whether the following is a length-digit hexadecimal number,
   // and sets the value if it is.
   bool ParseHexEscape(int length, uc32* value);
+  bool ParseUnicodeEscape(uc32* value);
+  bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
 
   uc32 ParseOctalLiteral();
 
@@ -272,6 +270,8 @@
   int position() { return next_pos_ - 1; }
   bool failed() { return failed_; }
 
+  static bool IsSyntaxCharacter(uc32 c);
+
   static const int kMaxCaptures = 1 << 16;
   static const uc32 kEndMarker = (1 << 21);
 
@@ -338,6 +338,7 @@
   int capture_count_;
   bool has_more_;
   bool multiline_;
+  bool unicode_;
   bool simple_;
   bool contains_anchor_;
   bool is_scanned_for_captures_;

diff --git a/src/regexp.js b/src/regexp.js
index e1eac76..98d0394 100644
--- a/src/regexp.js
+++ b/src/regexp.js

@@ -22,6 +22,8 @@
     flags = (pattern.global ? 'g' : '')
         + (pattern.ignoreCase ? 'i' : '')
         + (pattern.multiline ? 'm' : '');
+    if (harmony_unicode)
+        flags += (pattern.unicode ? 'u' : '');
     if (harmony_regexps)
         flags += (pattern.sticky ? 'y' : '');
     pattern = pattern.source;
@@ -235,6 +237,7 @@
   if (this.global) result += 'g';
   if (this.ignoreCase) result += 'i';
   if (this.multiline) result += 'm';
+  if (harmony_unicode && this.unicode) result += 'u';
   if (harmony_regexps && this.sticky) result += 'y';
   return result;
 }

diff --git a/src/runtime/runtime-regexp.cc b/src/runtime/runtime-regexp.cc
index be9adff..e2cf202 100644
--- a/src/runtime/runtime-regexp.cc
+++ b/src/runtime/runtime-regexp.cc

@@ -805,7 +805,7 @@
   uint32_t value = JSRegExp::NONE;
   int length = flags->length();
   // A longer flags string cannot be valid.
-  if (length > 4) return JSRegExp::Flags(0);
+  if (length > 5) return JSRegExp::Flags(0);
   for (int i = 0; i < length; i++) {
     uint32_t flag = JSRegExp::NONE;
     switch (flags->Get(i)) {
@@ -818,6 +818,10 @@
       case 'm':
         flag = JSRegExp::MULTILINE;
         break;
+      case 'u':
+        if (!FLAG_harmony_unicode) return JSRegExp::Flags(0);
+        flag = JSRegExp::UNICODE_ESCAPES;
+        break;
       case 'y':
         if (!FLAG_harmony_regexps) return JSRegExp::Flags(0);
         flag = JSRegExp::STICKY;
@@ -859,10 +863,12 @@
   Handle<Object> ignore_case = factory->ToBoolean(flags.is_ignore_case());
   Handle<Object> multiline = factory->ToBoolean(flags.is_multiline());
   Handle<Object> sticky = factory->ToBoolean(flags.is_sticky());
+  Handle<Object> unicode = factory->ToBoolean(flags.is_unicode());
 
   Map* map = regexp->map();
   Object* constructor = map->constructor();
-  if (!FLAG_harmony_regexps && constructor->IsJSFunction() &&
+  if (!FLAG_harmony_regexps && !FLAG_harmony_unicode &&
+      constructor->IsJSFunction() &&
       JSFunction::cast(constructor)->initial_map() == map) {
     // If we still have the original map, set in-object properties directly.
     // Both true and false are immovable immortal objects so no need for write
@@ -896,6 +902,10 @@
       JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(),
                                                sticky, final).Check();
     }
+    if (FLAG_harmony_unicode) {
+      JSObject::SetOwnPropertyIgnoreAttributes(
+          regexp, factory->unicode_string(), unicode, final).Check();
+    }
     JSObject::SetOwnPropertyIgnoreAttributes(
         regexp, factory->last_index_string(), zero, writable).Check();
   }

diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc
index 4a572c8..4ca5c36 100644
--- a/test/cctest/test-regexp.cc
+++ b/test/cctest/test-regexp.cc

@@ -89,8 +89,8 @@
   Zone zone(CcTest::i_isolate());
   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
   RegExpCompileData result;
-  return v8::internal::RegExpParser::ParseRegExp(
-      &reader, false, &result, &zone);
+  return v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
+                                                 &zone);
 }
 
 
@@ -99,8 +99,8 @@
   Zone zone(CcTest::i_isolate());
   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
   RegExpCompileData result;
-  CHECK(v8::internal::RegExpParser::ParseRegExp(
-      &reader, false, &result, &zone));
+  CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
+                                                &zone));
   CHECK(result.tree != NULL);
   CHECK(result.error.is_null());
   std::ostringstream os;
@@ -114,8 +114,8 @@
   Zone zone(CcTest::i_isolate());
   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
   RegExpCompileData result;
-  CHECK(v8::internal::RegExpParser::ParseRegExp(
-      &reader, false, &result, &zone));
+  CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
+                                                &zone));
   CHECK(result.tree != NULL);
   CHECK(result.error.is_null());
   return result.simple;
@@ -132,8 +132,8 @@
   Zone zone(CcTest::i_isolate());
   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
   RegExpCompileData result;
-  CHECK(v8::internal::RegExpParser::ParseRegExp(
-      &reader, false, &result, &zone));
+  CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
+                                                &zone));
   CHECK(result.tree != NULL);
   CHECK(result.error.is_null());
   int min_match = result.tree->min_match();
@@ -405,8 +405,8 @@
   Zone zone(CcTest::i_isolate());
   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
   RegExpCompileData result;
-  CHECK(!v8::internal::RegExpParser::ParseRegExp(
-      &reader, false, &result, &zone));
+  CHECK(!v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
+                                                 &zone));
   CHECK(result.tree == NULL);
   CHECK(!result.error.is_null());
   SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
@@ -497,16 +497,17 @@
 }
 
 
-static RegExpNode* Compile(const char* input, bool multiline, bool is_one_byte,
-                           Zone* zone) {
+static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
+                           bool is_one_byte, Zone* zone) {
   Isolate* isolate = CcTest::i_isolate();
   FlatStringReader reader(isolate, CStrVector(input));
   RegExpCompileData compile_data;
-  if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline,
+  if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline, unicode,
                                                &compile_data, zone))
     return NULL;
-  Handle<String> pattern = isolate->factory()->
-      NewStringFromUtf8(CStrVector(input)).ToHandleChecked();
+  Handle<String> pattern = isolate->factory()
+                               ->NewStringFromUtf8(CStrVector(input))
+                               .ToHandleChecked();
   Handle<String> sample_subject =
       isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
   RegExpEngine::Compile(&compile_data, false, false, multiline, false, pattern,
@@ -515,11 +516,11 @@
 }
 
 
-static void Execute(const char* input, bool multiline, bool is_one_byte,
-                    bool dot_output = false) {
+static void Execute(const char* input, bool multiline, bool unicode,
+                    bool is_one_byte, bool dot_output = false) {
   v8::HandleScope scope(CcTest::isolate());
   Zone zone(CcTest::i_isolate());
-  RegExpNode* node = Compile(input, multiline, is_one_byte, &zone);
+  RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
   USE(node);
 #ifdef DEBUG
   if (dot_output) {

diff --git a/test/mjsunit/harmony/regexp-flags.js b/test/mjsunit/harmony/regexp-flags.js
index 475fda4..bf44d2c 100644
--- a/test/mjsunit/harmony/regexp-flags.js
+++ b/test/mjsunit/harmony/regexp-flags.js

@@ -2,19 +2,14 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-// Flags: --harmony-regexps
+// Flags: --harmony-regexps --harmony-unicode
 
 RegExp.prototype.flags = 'setter should be undefined';
 
 assertEquals('', RegExp('').flags);
 assertEquals('', /./.flags);
-assertEquals('gimy', RegExp('', 'ygmi').flags);
-assertEquals('gimy', /foo/ymig.flags);
-
-// TODO(dslomov): When support for the `u` flag is added, uncomment the first
-// line below and remove the second line.
-//assertEquals(RegExp('', 'yumig').flags, 'gimuy');
-assertThrows(function() { RegExp('', 'yumig').flags; }, SyntaxError);
+assertEquals('gimuy', RegExp('', 'yugmi').flags);
+assertEquals('gimuy', /foo/yumig.flags);
 
 var descriptor = Object.getOwnPropertyDescriptor(RegExp.prototype, 'flags');
 assertTrue(descriptor.configurable);

diff --git a/test/mjsunit/harmony/unicode-escapes-in-regexps.js b/test/mjsunit/harmony/unicode-escapes-in-regexps.js
new file mode 100644
index 0000000..41abcb7
--- /dev/null
+++ b/test/mjsunit/harmony/unicode-escapes-in-regexps.js

@@ -0,0 +1,212 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// ES6 extends the \uxxxx escape and also allows \u{xxxxx}.
+
+// Flags: --harmony-unicode --harmony-regexps
+
+function testRegexpHelper(r) {
+  assertTrue(r.test("foo"));
+  assertTrue(r.test("boo"));
+  assertFalse(r.test("moo"));
+}
+
+
+(function TestUnicodeEscapes() {
+  testRegexpHelper(/(\u0066|\u0062)oo/);
+  testRegexpHelper(/(\u0066|\u0062)oo/u);
+  testRegexpHelper(/(\u{0066}|\u{0062})oo/u);
+  testRegexpHelper(/(\u{66}|\u{000062})oo/u);
+
+  // Note that we need \\ inside a string, otherwise it's interpreted as a
+  // unicode escape inside a string.
+  testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo"));
+  testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo", "u"));
+  testRegexpHelper(new RegExp("(\\u{0066}|\\u{0062})oo", "u"));
+  testRegexpHelper(new RegExp("(\\u{66}|\\u{000062})oo", "u"));
+
+  // Though, unicode escapes via strings should work too.
+  testRegexpHelper(new RegExp("(\u0066|\u0062)oo"));
+  testRegexpHelper(new RegExp("(\u0066|\u0062)oo", "u"));
+  testRegexpHelper(new RegExp("(\u{0066}|\u{0062})oo", "u"));
+  testRegexpHelper(new RegExp("(\u{66}|\u{000062})oo", "u"));
+})();
+
+
+(function TestUnicodeEscapesInCharacterClasses() {
+  testRegexpHelper(/[\u0062-\u0066]oo/);
+  testRegexpHelper(/[\u0062-\u0066]oo/u);
+  testRegexpHelper(/[\u{0062}-\u{0066}]oo/u);
+  testRegexpHelper(/[\u{62}-\u{00000066}]oo/u);
+
+  // Note that we need \\ inside a string, otherwise it's interpreted as a
+  // unicode escape inside a string.
+  testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo"));
+  testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo", "u"));
+  testRegexpHelper(new RegExp("[\\u{0062}-\\u{0066}]oo", "u"));
+  testRegexpHelper(new RegExp("[\\u{62}-\\u{00000066}]oo", "u"));
+
+  // Though, unicode escapes via strings should work too.
+  testRegexpHelper(new RegExp("[\u0062-\u0066]oo"));
+  testRegexpHelper(new RegExp("[\u0062-\u0066]oo", "u"));
+  testRegexpHelper(new RegExp("[\u{0062}-\u{0066}]oo", "u"));
+  testRegexpHelper(new RegExp("[\u{62}-\u{00000066}]oo", "u"));
+})();
+
+
+(function TestBraceEscapesWithoutUnicodeFlag() {
+  // \u followed by illegal escape will be parsed as u. {x} will be the
+  // character count.
+  function helper1(r) {
+    assertFalse(r.test("fbar"));
+    assertFalse(r.test("fubar"));
+    assertTrue(r.test("fuubar"));
+    assertFalse(r.test("fuuubar"));
+  }
+  helper1(/f\u{2}bar/);
+  helper1(new RegExp("f\\u{2}bar"));
+
+  function helper2(r) {
+    assertFalse(r.test("fbar"));
+    assertTrue(r.test("fubar"));
+    assertTrue(r.test("fuubar"));
+    assertFalse(r.test("fuuubar"));
+  }
+
+  helper2(/f\u{1,2}bar/);
+  helper2(new RegExp("f\\u{1,2}bar"));
+
+  function helper3(r) {
+    assertTrue(r.test("u"));
+    assertTrue(r.test("{"));
+    assertTrue(r.test("2"));
+    assertTrue(r.test("}"));
+    assertFalse(r.test("q"));
+    assertFalse(r.test("("));
+    assertFalse(r.test(")"));
+  }
+  helper3(/[\u{2}]/);
+  helper3(new RegExp("[\\u{2}]"));
+})();
+
+
+(function TestInvalidEscapes() {
+  // Without the u flag, invalid unicode escapes and other invalid escapes are
+  // treated as identity escapes.
+  function helper1(r) {
+    assertTrue(r.test("firstuxz89second"));
+  }
+  helper1(/first\u\x\z\8\9second/);
+  helper1(new RegExp("first\\u\\x\\z\\8\\9second"));
+
+  function helper2(r) {
+    assertTrue(r.test("u"));
+    assertTrue(r.test("x"));
+    assertTrue(r.test("z"));
+    assertTrue(r.test("8"));
+    assertTrue(r.test("9"));
+    assertFalse(r.test("q"));
+    assertFalse(r.test("7"));
+  }
+  helper2(/[\u\x\z\8\9]/);
+  helper2(new RegExp("[\\u\\x\\z\\8\\9]"));
+
+  // However, with the u flag, these are treated as invalid escapes.
+  assertThrows("/\\u/u", SyntaxError);
+  assertThrows("/\\u12/u", SyntaxError);
+  assertThrows("/\\ufoo/u", SyntaxError);
+  assertThrows("/\\x/u", SyntaxError);
+  assertThrows("/\\xfoo/u", SyntaxError);
+  assertThrows("/\\z/u", SyntaxError);
+  assertThrows("/\\8/u", SyntaxError);
+  assertThrows("/\\9/u", SyntaxError);
+
+  assertThrows("new RegExp('\\\\u', 'u')", SyntaxError);
+  assertThrows("new RegExp('\\\\u12', 'u')", SyntaxError);
+  assertThrows("new RegExp('\\\\ufoo', 'u')", SyntaxError);
+  assertThrows("new RegExp('\\\\x', 'u')", SyntaxError);
+  assertThrows("new RegExp('\\\\xfoo', 'u')", SyntaxError);
+  assertThrows("new RegExp('\\\\z', 'u')", SyntaxError);
+  assertThrows("new RegExp('\\\\8', 'u')", SyntaxError);
+  assertThrows("new RegExp('\\\\9', 'u')", SyntaxError);
+})();
+
+
+(function TestTooBigHexEscape() {
+  // The hex number inside \u{} has a maximum value.
+  /\u{10ffff}/u
+  new RegExp("\\u{10ffff}", "u")
+  assertThrows("/\\u{110000}/u", SyntaxError);
+  assertThrows("new RegExp('\\\\u{110000}', 'u')", SyntaxError);
+
+  // Without the u flag, they're of course fine ({x} is the count).
+  /\u{110000}/
+  new RegExp("\\u{110000}")
+})();
+
+
+(function TestSyntaxEscapes() {
+  // Syntax escapes work the same with or without the u flag.
+  function helper(r) {
+    assertTrue(r.test("foo[bar"));
+    assertFalse(r.test("foo]bar"));
+  }
+  helper(/foo\[bar/);
+  helper(new RegExp("foo\\[bar"));
+  helper(/foo\[bar/u);
+  helper(new RegExp("foo\\[bar", "u"));
+})();
+
+
+(function TestUnicodeSurrogates() {
+  // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D].
+  function helper(r) {
+    assertTrue(r.test("foo\u{10e6d}bar"));
+  }
+  helper(/foo\ud803\ude6dbar/u);
+  helper(new RegExp("foo\\ud803\\ude6dbar", "u"));
+})();
+
+
+(function AllFlags() {
+  // Test that we can pass all possible regexp flags and they work properly.
+  function helper1(r) {
+    assertTrue(r.global);
+    assertTrue(r.ignoreCase);
+    assertTrue(r.multiline);
+    assertTrue(r.sticky);
+    assertTrue(r.unicode);
+  }
+
+  helper1(/foo/gimyu);
+  helper1(new RegExp("foo", "gimyu"));
+
+  function helper2(r) {
+    assertFalse(r.global);
+    assertFalse(r.ignoreCase);
+    assertFalse(r.multiline);
+    assertFalse(r.sticky);
+    assertFalse(r.unicode);
+  }
+
+  helper2(/foo/);
+  helper2(new RegExp("foo"));
+})();
+
+
+(function DuplicatedFlags() {
+  // Test that duplicating the u flag is not allowed.
+  assertThrows("/foo/ugu");
+  assertThrows("new RegExp('foo', 'ugu')");
+})();
+
+
+(function ToString() {
+  // Test that the u flag is included in the string representation of regexps.
+  function helper(r) {
+    assertEquals(r.toString(), "/foo/u");
+  }
+  helper(/foo/u);
+  helper(new RegExp("foo", "u"));
+})();

diff --git a/test/mjsunit/mirror-regexp.js b/test/mjsunit/mirror-regexp.js
index d6a9d71..0ea0742 100644
--- a/test/mjsunit/mirror-regexp.js
+++ b/test/mjsunit/mirror-regexp.js

@@ -25,7 +25,7 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-// Flags: --expose-debug-as debug
+// Flags: --expose-debug-as debug --harmony-unicode
 // Test the mirror object for regular expression values
 
 var all_attributes = debug.PropertyAttribute.ReadOnly |
@@ -36,6 +36,7 @@
   'global': all_attributes,
   'ignoreCase': all_attributes,
   'multiline': all_attributes,
+  'unicode' : all_attributes,
   'lastIndex': debug.PropertyAttribute.DontEnum | debug.PropertyAttribute.DontDelete
 };
 
@@ -108,3 +109,4 @@
 testRegExpMirror(/[abc]/);
 testRegExpMirror(/[\r\n]/g);
 testRegExpMirror(/a*b/gmi);
+testRegExpMirror(/(\u{0066}|\u{0062})oo/u);
commit	2305cfb4e2b196918095d3404267772e93916616	[log] [tgz]
author	marja <marja@chromium.org>	Mon Jan 12 09:50:15 2015
committer	Commit bot <commit-bot@chromium.org>	Mon Jan 12 09:50:34 2015
tree	2079bc35689d6df7e11cd4a28bc41959eb5076d7
parent	f9a22a5e587cbd1d8da7424543b9b90118eea18e [diff]