ES6 unicode escapes, part 2: Regexps.
Allows \u{xxxxx} in regexps. Behind the --harmony-unicode flag.
Part 1 is here: https://codereview.chromium.org/716423002
BUG=v8:3648
LOG=N
Review URL: https://codereview.chromium.org/788043005
Cr-Commit-Position: refs/heads/master@{#26018}
diff --git a/src/bootstrapper.cc b/src/bootstrapper.cc
index 7105eb2..5b616af 100644
--- a/src/bootstrapper.cc
+++ b/src/bootstrapper.cc
@@ -1625,7 +1625,6 @@
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_proxies)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_templates)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_sloppy)
-EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_unicode)
void Genesis::InitializeGlobal_harmony_regexps() {
Handle<JSObject> builtins(native_context()->builtins());
@@ -1639,6 +1638,18 @@
}
+void Genesis::InitializeGlobal_harmony_unicode() {
+ Handle<JSObject> builtins(native_context()->builtins());
+
+ Handle<HeapObject> flag(FLAG_harmony_unicode ? heap()->true_value()
+ : heap()->false_value());
+ PropertyAttributes attributes =
+ static_cast<PropertyAttributes>(DONT_DELETE | READ_ONLY);
+ Runtime::DefineObjectProperty(builtins, factory()->harmony_unicode_string(),
+ flag, attributes).Assert();
+}
+
+
Handle<JSFunction> Genesis::InstallInternalArray(
Handle<JSBuiltinsObject> builtins,
const char* name,
diff --git a/src/heap/heap.h b/src/heap/heap.h
index e6ccf2e..6aeaff8 100644
--- a/src/heap/heap.h
+++ b/src/heap/heap.h
@@ -227,7 +227,9 @@
V(ignore_case_string, "ignoreCase") \
V(multiline_string, "multiline") \
V(sticky_string, "sticky") \
+ V(unicode_string, "unicode") \
V(harmony_regexps_string, "harmony_regexps") \
+ V(harmony_unicode_string, "harmony_unicode") \
V(input_string, "input") \
V(index_string, "index") \
V(last_index_string, "lastIndex") \
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index 81ad080..a5b9fb5 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -154,7 +154,7 @@
RegExpCompileData parse_result;
FlatStringReader reader(isolate, pattern);
if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
- &parse_result, &zone)) {
+ flags.is_unicode(), &parse_result, &zone)) {
// Throw an exception if we fail to parse the pattern.
return ThrowRegExpException(re,
pattern,
@@ -401,8 +401,7 @@
RegExpCompileData compile_data;
FlatStringReader reader(isolate, pattern);
if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
- &compile_data,
- &zone)) {
+ flags.is_unicode(), &compile_data, &zone)) {
// Throw an exception if we fail to parse the pattern.
// THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
USE(ThrowRegExpException(re,
diff --git a/src/mirror-debugger.js b/src/mirror-debugger.js
index be068ee..a27a790c 100644
--- a/src/mirror-debugger.js
+++ b/src/mirror-debugger.js
@@ -1274,6 +1274,15 @@
};
+/**
+ * Returns whether this regular expression has the unicode (u) flag set.
+ * @return {boolean} Value of the unicode flag
+ */
+RegExpMirror.prototype.unicode = function() {
+ return this.value_.unicode;
+};
+
+
RegExpMirror.prototype.toText = function() {
// Simpel to text which is used when on specialization in subclass.
return "/" + this.source() + "/";
diff --git a/src/objects.h b/src/objects.h
index ccc802d..7eeedfa 100644
--- a/src/objects.h
+++ b/src/objects.h
@@ -7902,7 +7902,8 @@
GLOBAL = 1,
IGNORE_CASE = 2,
MULTILINE = 4,
- STICKY = 8
+ STICKY = 8,
+ UNICODE_ESCAPES = 16
};
class Flags {
@@ -7912,6 +7913,7 @@
bool is_ignore_case() { return (value_ & IGNORE_CASE) != 0; }
bool is_multiline() { return (value_ & MULTILINE) != 0; }
bool is_sticky() { return (value_ & STICKY) != 0; }
+ bool is_unicode() { return (value_ & UNICODE_ESCAPES) != 0; }
uint32_t value() { return value_; }
private:
uint32_t value_;
diff --git a/src/parser.cc b/src/parser.cc
index bfdeaa3..3f7ce4d 100644
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -4278,10 +4278,8 @@
// Regular expressions
-RegExpParser::RegExpParser(FlatStringReader* in,
- Handle<String>* error,
- bool multiline,
- Zone* zone)
+RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
+ bool multiline, bool unicode, Zone* zone)
: isolate_(zone->isolate()),
zone_(zone),
error_(error),
@@ -4292,6 +4290,7 @@
capture_count_(0),
has_more_(true),
multiline_(multiline),
+ unicode_(unicode),
simple_(false),
contains_anchor_(false),
is_scanned_for_captures_(false),
@@ -4348,6 +4347,13 @@
}
+bool RegExpParser::IsSyntaxCharacter(uc32 c) {
+ return c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' ||
+ c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || c == ']' ||
+ c == '{' || c == '}' || c == '|';
+}
+
+
RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
failed_ = true;
*error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();
@@ -4564,9 +4570,15 @@
}
uc32 first_digit = Next();
if (first_digit == '8' || first_digit == '9') {
- // Treat as identity escape
- builder->AddCharacter(first_digit);
- Advance(2);
+ // If the 'u' flag is present, only syntax characters can be escaped,
+ // no other identity escapes are allowed. If the 'u' flag is not
+ // present, all identity escapes are allowed.
+ if (!FLAG_harmony_unicode || !unicode_) {
+ builder->AddCharacter(first_digit);
+ Advance(2);
+ } else {
+ return ReportError(CStrVector("Invalid escape"));
+ }
break;
}
}
@@ -4622,25 +4634,41 @@
uc32 value;
if (ParseHexEscape(2, &value)) {
builder->AddCharacter(value);
- } else {
+ } else if (!FLAG_harmony_unicode || !unicode_) {
builder->AddCharacter('x');
+ } else {
+ // If the 'u' flag is present, invalid escapes are not treated as
+ // identity escapes.
+ return ReportError(CStrVector("Invalid escape"));
}
break;
}
case 'u': {
Advance(2);
uc32 value;
- if (ParseHexEscape(4, &value)) {
+ if (ParseUnicodeEscape(&value)) {
builder->AddCharacter(value);
- } else {
+ } else if (!FLAG_harmony_unicode || !unicode_) {
builder->AddCharacter('u');
+ } else {
+ // If the 'u' flag is present, invalid escapes are not treated as
+ // identity escapes.
+ return ReportError(CStrVector("Invalid unicode escape"));
}
break;
}
default:
- // Identity escape.
- builder->AddCharacter(Next());
- Advance(2);
+ Advance();
+ // If the 'u' flag is present, only syntax characters can be escaped, no
+ // other identity escapes are allowed. If the 'u' flag is not present,
+ // all identity escapes are allowed.
+ if (!FLAG_harmony_unicode || !unicode_ ||
+ IsSyntaxCharacter(current())) {
+ builder->AddCharacter(current());
+ Advance();
+ } else {
+ return ReportError(CStrVector("Invalid escape"));
+ }
break;
}
break;
@@ -4883,11 +4911,10 @@
}
-bool RegExpParser::ParseHexEscape(int length, uc32 *value) {
+bool RegExpParser::ParseHexEscape(int length, uc32* value) {
int start = position();
uc32 val = 0;
- bool done = false;
- for (int i = 0; !done; i++) {
+ for (int i = 0; i < length; ++i) {
uc32 c = current();
int d = HexValue(c);
if (d < 0) {
@@ -4896,15 +4923,52 @@
}
val = val * 16 + d;
Advance();
- if (i == length - 1) {
- done = true;
- }
}
*value = val;
return true;
}
+bool RegExpParser::ParseUnicodeEscape(uc32* value) {
+ // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
+ // allowed). In the latter case, the number of hex digits between { } is
+ // arbitrary. \ and u have already been read.
+ if (current() == '{' && FLAG_harmony_unicode && unicode_) {
+ int start = position();
+ Advance();
+ if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) {
+ if (current() == '}') {
+ Advance();
+ return true;
+ }
+ }
+ Reset(start);
+ return false;
+ }
+ // \u but no {, or \u{...} escapes not allowed.
+ return ParseHexEscape(4, value);
+}
+
+
+bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
+ uc32 x = 0;
+ int d = HexValue(current());
+ if (d < 0) {
+ return false;
+ }
+ while (d >= 0) {
+ x = x * 16 + d;
+ if (x > max_value) {
+ return false;
+ }
+ Advance();
+ d = HexValue(current());
+ }
+ *value = x;
+ return true;
+}
+
+
uc32 RegExpParser::ParseClassCharacterEscape() {
DCHECK(current() == '\\');
DCHECK(has_next() && !IsSpecialClassEscape(Next()));
@@ -4959,27 +5023,41 @@
if (ParseHexEscape(2, &value)) {
return value;
}
- // If \x is not followed by a two-digit hexadecimal, treat it
- // as an identity escape.
- return 'x';
+ if (!FLAG_harmony_unicode || !unicode_) {
+ // If \x is not followed by a two-digit hexadecimal, treat it
+ // as an identity escape.
+ return 'x';
+ }
+ // If the 'u' flag is present, invalid escapes are not treated as
+ // identity escapes.
+ ReportError(CStrVector("Invalid escape"));
+ return 0;
}
case 'u': {
Advance();
uc32 value;
- if (ParseHexEscape(4, &value)) {
+ if (ParseUnicodeEscape(&value)) {
return value;
}
- // If \u is not followed by a four-digit hexadecimal, treat it
- // as an identity escape.
- return 'u';
+ if (!FLAG_harmony_unicode || !unicode_) {
+ return 'u';
+ }
+ // If the 'u' flag is present, invalid escapes are not treated as
+ // identity escapes.
+ ReportError(CStrVector("Invalid unicode escape"));
+ return 0;
}
default: {
- // Extended identity escape. We accept any character that hasn't
- // been matched by a more specific case, not just the subset required
- // by the ECMAScript specification.
uc32 result = current();
- Advance();
- return result;
+ // If the 'u' flag is present, only syntax characters can be escaped, no
+ // other identity escapes are allowed. If the 'u' flag is not present, all
+ // identity escapes are allowed.
+ if (!FLAG_harmony_unicode || !unicode_ || IsSyntaxCharacter(result)) {
+ Advance();
+ return result;
+ }
+ ReportError(CStrVector("Invalid escape"));
+ return 0;
}
}
return 0;
@@ -5085,12 +5163,11 @@
// ----------------------------------------------------------------------------
// The Parser interface.
-bool RegExpParser::ParseRegExp(FlatStringReader* input,
- bool multiline,
- RegExpCompileData* result,
+bool RegExpParser::ParseRegExp(FlatStringReader* input, bool multiline,
+ bool unicode, RegExpCompileData* result,
Zone* zone) {
DCHECK(result != NULL);
- RegExpParser parser(input, &result->error, multiline, zone);
+ RegExpParser parser(input, &result->error, multiline, unicode, zone);
RegExpTree* tree = parser.ParsePattern();
if (parser.failed()) {
DCHECK(tree == NULL);
diff --git a/src/parser.h b/src/parser.h
index 219f1c4..518b443 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -222,15 +222,11 @@
class RegExpParser BASE_EMBEDDED {
public:
- RegExpParser(FlatStringReader* in,
- Handle<String>* error,
- bool multiline_mode,
- Zone* zone);
+ RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode,
+ bool unicode, Zone* zone);
- static bool ParseRegExp(FlatStringReader* input,
- bool multiline,
- RegExpCompileData* result,
- Zone* zone);
+ static bool ParseRegExp(FlatStringReader* input, bool multiline, bool unicode,
+ RegExpCompileData* result, Zone* zone);
RegExpTree* ParsePattern();
RegExpTree* ParseDisjunction();
@@ -248,6 +244,8 @@
// Checks whether the following is a length-digit hexadecimal number,
// and sets the value if it is.
bool ParseHexEscape(int length, uc32* value);
+ bool ParseUnicodeEscape(uc32* value);
+ bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
uc32 ParseOctalLiteral();
@@ -272,6 +270,8 @@
int position() { return next_pos_ - 1; }
bool failed() { return failed_; }
+ static bool IsSyntaxCharacter(uc32 c);
+
static const int kMaxCaptures = 1 << 16;
static const uc32 kEndMarker = (1 << 21);
@@ -338,6 +338,7 @@
int capture_count_;
bool has_more_;
bool multiline_;
+ bool unicode_;
bool simple_;
bool contains_anchor_;
bool is_scanned_for_captures_;
diff --git a/src/regexp.js b/src/regexp.js
index e1eac76..98d0394 100644
--- a/src/regexp.js
+++ b/src/regexp.js
@@ -22,6 +22,8 @@
flags = (pattern.global ? 'g' : '')
+ (pattern.ignoreCase ? 'i' : '')
+ (pattern.multiline ? 'm' : '');
+ if (harmony_unicode)
+ flags += (pattern.unicode ? 'u' : '');
if (harmony_regexps)
flags += (pattern.sticky ? 'y' : '');
pattern = pattern.source;
@@ -235,6 +237,7 @@
if (this.global) result += 'g';
if (this.ignoreCase) result += 'i';
if (this.multiline) result += 'm';
+ if (harmony_unicode && this.unicode) result += 'u';
if (harmony_regexps && this.sticky) result += 'y';
return result;
}
diff --git a/src/runtime/runtime-regexp.cc b/src/runtime/runtime-regexp.cc
index be9adff..e2cf202 100644
--- a/src/runtime/runtime-regexp.cc
+++ b/src/runtime/runtime-regexp.cc
@@ -805,7 +805,7 @@
uint32_t value = JSRegExp::NONE;
int length = flags->length();
// A longer flags string cannot be valid.
- if (length > 4) return JSRegExp::Flags(0);
+ if (length > 5) return JSRegExp::Flags(0);
for (int i = 0; i < length; i++) {
uint32_t flag = JSRegExp::NONE;
switch (flags->Get(i)) {
@@ -818,6 +818,10 @@
case 'm':
flag = JSRegExp::MULTILINE;
break;
+ case 'u':
+ if (!FLAG_harmony_unicode) return JSRegExp::Flags(0);
+ flag = JSRegExp::UNICODE_ESCAPES;
+ break;
case 'y':
if (!FLAG_harmony_regexps) return JSRegExp::Flags(0);
flag = JSRegExp::STICKY;
@@ -859,10 +863,12 @@
Handle<Object> ignore_case = factory->ToBoolean(flags.is_ignore_case());
Handle<Object> multiline = factory->ToBoolean(flags.is_multiline());
Handle<Object> sticky = factory->ToBoolean(flags.is_sticky());
+ Handle<Object> unicode = factory->ToBoolean(flags.is_unicode());
Map* map = regexp->map();
Object* constructor = map->constructor();
- if (!FLAG_harmony_regexps && constructor->IsJSFunction() &&
+ if (!FLAG_harmony_regexps && !FLAG_harmony_unicode &&
+ constructor->IsJSFunction() &&
JSFunction::cast(constructor)->initial_map() == map) {
// If we still have the original map, set in-object properties directly.
// Both true and false are immovable immortal objects so no need for write
@@ -896,6 +902,10 @@
JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(),
sticky, final).Check();
}
+ if (FLAG_harmony_unicode) {
+ JSObject::SetOwnPropertyIgnoreAttributes(
+ regexp, factory->unicode_string(), unicode, final).Check();
+ }
JSObject::SetOwnPropertyIgnoreAttributes(
regexp, factory->last_index_string(), zero, writable).Check();
}
diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc
index 4a572c8..4ca5c36 100644
--- a/test/cctest/test-regexp.cc
+++ b/test/cctest/test-regexp.cc
@@ -89,8 +89,8 @@
Zone zone(CcTest::i_isolate());
FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
RegExpCompileData result;
- return v8::internal::RegExpParser::ParseRegExp(
- &reader, false, &result, &zone);
+ return v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
+ &zone);
}
@@ -99,8 +99,8 @@
Zone zone(CcTest::i_isolate());
FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
RegExpCompileData result;
- CHECK(v8::internal::RegExpParser::ParseRegExp(
- &reader, false, &result, &zone));
+ CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
+ &zone));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
std::ostringstream os;
@@ -114,8 +114,8 @@
Zone zone(CcTest::i_isolate());
FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
RegExpCompileData result;
- CHECK(v8::internal::RegExpParser::ParseRegExp(
- &reader, false, &result, &zone));
+ CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
+ &zone));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
return result.simple;
@@ -132,8 +132,8 @@
Zone zone(CcTest::i_isolate());
FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
RegExpCompileData result;
- CHECK(v8::internal::RegExpParser::ParseRegExp(
- &reader, false, &result, &zone));
+ CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
+ &zone));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
int min_match = result.tree->min_match();
@@ -405,8 +405,8 @@
Zone zone(CcTest::i_isolate());
FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
RegExpCompileData result;
- CHECK(!v8::internal::RegExpParser::ParseRegExp(
- &reader, false, &result, &zone));
+ CHECK(!v8::internal::RegExpParser::ParseRegExp(&reader, false, false, &result,
+ &zone));
CHECK(result.tree == NULL);
CHECK(!result.error.is_null());
SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
@@ -497,16 +497,17 @@
}
-static RegExpNode* Compile(const char* input, bool multiline, bool is_one_byte,
- Zone* zone) {
+static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
+ bool is_one_byte, Zone* zone) {
Isolate* isolate = CcTest::i_isolate();
FlatStringReader reader(isolate, CStrVector(input));
RegExpCompileData compile_data;
- if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline,
+ if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline, unicode,
&compile_data, zone))
return NULL;
- Handle<String> pattern = isolate->factory()->
- NewStringFromUtf8(CStrVector(input)).ToHandleChecked();
+ Handle<String> pattern = isolate->factory()
+ ->NewStringFromUtf8(CStrVector(input))
+ .ToHandleChecked();
Handle<String> sample_subject =
isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
RegExpEngine::Compile(&compile_data, false, false, multiline, false, pattern,
@@ -515,11 +516,11 @@
}
-static void Execute(const char* input, bool multiline, bool is_one_byte,
- bool dot_output = false) {
+static void Execute(const char* input, bool multiline, bool unicode,
+ bool is_one_byte, bool dot_output = false) {
v8::HandleScope scope(CcTest::isolate());
Zone zone(CcTest::i_isolate());
- RegExpNode* node = Compile(input, multiline, is_one_byte, &zone);
+ RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
USE(node);
#ifdef DEBUG
if (dot_output) {
diff --git a/test/mjsunit/harmony/regexp-flags.js b/test/mjsunit/harmony/regexp-flags.js
index 475fda4..bf44d2c 100644
--- a/test/mjsunit/harmony/regexp-flags.js
+++ b/test/mjsunit/harmony/regexp-flags.js
@@ -2,19 +2,14 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-// Flags: --harmony-regexps
+// Flags: --harmony-regexps --harmony-unicode
RegExp.prototype.flags = 'setter should be undefined';
assertEquals('', RegExp('').flags);
assertEquals('', /./.flags);
-assertEquals('gimy', RegExp('', 'ygmi').flags);
-assertEquals('gimy', /foo/ymig.flags);
-
-// TODO(dslomov): When support for the `u` flag is added, uncomment the first
-// line below and remove the second line.
-//assertEquals(RegExp('', 'yumig').flags, 'gimuy');
-assertThrows(function() { RegExp('', 'yumig').flags; }, SyntaxError);
+assertEquals('gimuy', RegExp('', 'yugmi').flags);
+assertEquals('gimuy', /foo/yumig.flags);
var descriptor = Object.getOwnPropertyDescriptor(RegExp.prototype, 'flags');
assertTrue(descriptor.configurable);
diff --git a/test/mjsunit/harmony/unicode-escapes-in-regexps.js b/test/mjsunit/harmony/unicode-escapes-in-regexps.js
new file mode 100644
index 0000000..41abcb7
--- /dev/null
+++ b/test/mjsunit/harmony/unicode-escapes-in-regexps.js
@@ -0,0 +1,212 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// ES6 extends the \uxxxx escape and also allows \u{xxxxx}.
+
+// Flags: --harmony-unicode --harmony-regexps
+
+function testRegexpHelper(r) {
+ assertTrue(r.test("foo"));
+ assertTrue(r.test("boo"));
+ assertFalse(r.test("moo"));
+}
+
+
+(function TestUnicodeEscapes() {
+ testRegexpHelper(/(\u0066|\u0062)oo/);
+ testRegexpHelper(/(\u0066|\u0062)oo/u);
+ testRegexpHelper(/(\u{0066}|\u{0062})oo/u);
+ testRegexpHelper(/(\u{66}|\u{000062})oo/u);
+
+ // Note that we need \\ inside a string, otherwise it's interpreted as a
+ // unicode escape inside a string.
+ testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo"));
+ testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo", "u"));
+ testRegexpHelper(new RegExp("(\\u{0066}|\\u{0062})oo", "u"));
+ testRegexpHelper(new RegExp("(\\u{66}|\\u{000062})oo", "u"));
+
+ // Though, unicode escapes via strings should work too.
+ testRegexpHelper(new RegExp("(\u0066|\u0062)oo"));
+ testRegexpHelper(new RegExp("(\u0066|\u0062)oo", "u"));
+ testRegexpHelper(new RegExp("(\u{0066}|\u{0062})oo", "u"));
+ testRegexpHelper(new RegExp("(\u{66}|\u{000062})oo", "u"));
+})();
+
+
+(function TestUnicodeEscapesInCharacterClasses() {
+ testRegexpHelper(/[\u0062-\u0066]oo/);
+ testRegexpHelper(/[\u0062-\u0066]oo/u);
+ testRegexpHelper(/[\u{0062}-\u{0066}]oo/u);
+ testRegexpHelper(/[\u{62}-\u{00000066}]oo/u);
+
+ // Note that we need \\ inside a string, otherwise it's interpreted as a
+ // unicode escape inside a string.
+ testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo"));
+ testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo", "u"));
+ testRegexpHelper(new RegExp("[\\u{0062}-\\u{0066}]oo", "u"));
+ testRegexpHelper(new RegExp("[\\u{62}-\\u{00000066}]oo", "u"));
+
+ // Though, unicode escapes via strings should work too.
+ testRegexpHelper(new RegExp("[\u0062-\u0066]oo"));
+ testRegexpHelper(new RegExp("[\u0062-\u0066]oo", "u"));
+ testRegexpHelper(new RegExp("[\u{0062}-\u{0066}]oo", "u"));
+ testRegexpHelper(new RegExp("[\u{62}-\u{00000066}]oo", "u"));
+})();
+
+
+(function TestBraceEscapesWithoutUnicodeFlag() {
+ // \u followed by illegal escape will be parsed as u. {x} will be the
+ // character count.
+ function helper1(r) {
+ assertFalse(r.test("fbar"));
+ assertFalse(r.test("fubar"));
+ assertTrue(r.test("fuubar"));
+ assertFalse(r.test("fuuubar"));
+ }
+ helper1(/f\u{2}bar/);
+ helper1(new RegExp("f\\u{2}bar"));
+
+ function helper2(r) {
+ assertFalse(r.test("fbar"));
+ assertTrue(r.test("fubar"));
+ assertTrue(r.test("fuubar"));
+ assertFalse(r.test("fuuubar"));
+ }
+
+ helper2(/f\u{1,2}bar/);
+ helper2(new RegExp("f\\u{1,2}bar"));
+
+ function helper3(r) {
+ assertTrue(r.test("u"));
+ assertTrue(r.test("{"));
+ assertTrue(r.test("2"));
+ assertTrue(r.test("}"));
+ assertFalse(r.test("q"));
+ assertFalse(r.test("("));
+ assertFalse(r.test(")"));
+ }
+ helper3(/[\u{2}]/);
+ helper3(new RegExp("[\\u{2}]"));
+})();
+
+
+(function TestInvalidEscapes() {
+ // Without the u flag, invalid unicode escapes and other invalid escapes are
+ // treated as identity escapes.
+ function helper1(r) {
+ assertTrue(r.test("firstuxz89second"));
+ }
+ helper1(/first\u\x\z\8\9second/);
+ helper1(new RegExp("first\\u\\x\\z\\8\\9second"));
+
+ function helper2(r) {
+ assertTrue(r.test("u"));
+ assertTrue(r.test("x"));
+ assertTrue(r.test("z"));
+ assertTrue(r.test("8"));
+ assertTrue(r.test("9"));
+ assertFalse(r.test("q"));
+ assertFalse(r.test("7"));
+ }
+ helper2(/[\u\x\z\8\9]/);
+ helper2(new RegExp("[\\u\\x\\z\\8\\9]"));
+
+ // However, with the u flag, these are treated as invalid escapes.
+ assertThrows("/\\u/u", SyntaxError);
+ assertThrows("/\\u12/u", SyntaxError);
+ assertThrows("/\\ufoo/u", SyntaxError);
+ assertThrows("/\\x/u", SyntaxError);
+ assertThrows("/\\xfoo/u", SyntaxError);
+ assertThrows("/\\z/u", SyntaxError);
+ assertThrows("/\\8/u", SyntaxError);
+ assertThrows("/\\9/u", SyntaxError);
+
+ assertThrows("new RegExp('\\\\u', 'u')", SyntaxError);
+ assertThrows("new RegExp('\\\\u12', 'u')", SyntaxError);
+ assertThrows("new RegExp('\\\\ufoo', 'u')", SyntaxError);
+ assertThrows("new RegExp('\\\\x', 'u')", SyntaxError);
+ assertThrows("new RegExp('\\\\xfoo', 'u')", SyntaxError);
+ assertThrows("new RegExp('\\\\z', 'u')", SyntaxError);
+ assertThrows("new RegExp('\\\\8', 'u')", SyntaxError);
+ assertThrows("new RegExp('\\\\9', 'u')", SyntaxError);
+})();
+
+
+(function TestTooBigHexEscape() {
+ // The hex number inside \u{} has a maximum value.
+ /\u{10ffff}/u
+ new RegExp("\\u{10ffff}", "u")
+ assertThrows("/\\u{110000}/u", SyntaxError);
+ assertThrows("new RegExp('\\\\u{110000}', 'u')", SyntaxError);
+
+ // Without the u flag, they're of course fine ({x} is the count).
+ /\u{110000}/
+ new RegExp("\\u{110000}")
+})();
+
+
+(function TestSyntaxEscapes() {
+ // Syntax escapes work the same with or without the u flag.
+ function helper(r) {
+ assertTrue(r.test("foo[bar"));
+ assertFalse(r.test("foo]bar"));
+ }
+ helper(/foo\[bar/);
+ helper(new RegExp("foo\\[bar"));
+ helper(/foo\[bar/u);
+ helper(new RegExp("foo\\[bar", "u"));
+})();
+
+
+(function TestUnicodeSurrogates() {
+ // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D].
+ function helper(r) {
+ assertTrue(r.test("foo\u{10e6d}bar"));
+ }
+ helper(/foo\ud803\ude6dbar/u);
+ helper(new RegExp("foo\\ud803\\ude6dbar", "u"));
+})();
+
+
+(function AllFlags() {
+ // Test that we can pass all possible regexp flags and they work properly.
+ function helper1(r) {
+ assertTrue(r.global);
+ assertTrue(r.ignoreCase);
+ assertTrue(r.multiline);
+ assertTrue(r.sticky);
+ assertTrue(r.unicode);
+ }
+
+ helper1(/foo/gimyu);
+ helper1(new RegExp("foo", "gimyu"));
+
+ function helper2(r) {
+ assertFalse(r.global);
+ assertFalse(r.ignoreCase);
+ assertFalse(r.multiline);
+ assertFalse(r.sticky);
+ assertFalse(r.unicode);
+ }
+
+ helper2(/foo/);
+ helper2(new RegExp("foo"));
+})();
+
+
+(function DuplicatedFlags() {
+ // Test that duplicating the u flag is not allowed.
+ assertThrows("/foo/ugu");
+ assertThrows("new RegExp('foo', 'ugu')");
+})();
+
+
+(function ToString() {
+ // Test that the u flag is included in the string representation of regexps.
+ function helper(r) {
+ assertEquals(r.toString(), "/foo/u");
+ }
+ helper(/foo/u);
+ helper(new RegExp("foo", "u"));
+})();
diff --git a/test/mjsunit/mirror-regexp.js b/test/mjsunit/mirror-regexp.js
index d6a9d71..0ea0742 100644
--- a/test/mjsunit/mirror-regexp.js
+++ b/test/mjsunit/mirror-regexp.js
@@ -25,7 +25,7 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// Flags: --expose-debug-as debug
+// Flags: --expose-debug-as debug --harmony-unicode
// Test the mirror object for regular expression values
var all_attributes = debug.PropertyAttribute.ReadOnly |
@@ -36,6 +36,7 @@
'global': all_attributes,
'ignoreCase': all_attributes,
'multiline': all_attributes,
+ 'unicode' : all_attributes,
'lastIndex': debug.PropertyAttribute.DontEnum | debug.PropertyAttribute.DontDelete
};
@@ -108,3 +109,4 @@
testRegExpMirror(/[abc]/);
testRegExpMirror(/[\r\n]/g);
testRegExpMirror(/a*b/gmi);
+testRegExpMirror(/(\u{0066}|\u{0062})oo/u);