test/mjsunit/cyrillic.js - v8/v8.git - Git at Google

 // Copyright 2009 the V8 project authors. All rights reserved.
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 //       notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 //       copyright notice, this list of conditions and the following
 //       disclaimer in the documentation and/or other materials provided
 //       with the distribution.
 //     * Neither the name of Google Inc. nor the names of its
 //       contributors may be used to endorse or promote products derived
 //       from this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 // Test Unicode character ranges in regexps.


 // Cyrillic.
 var cyrillic = {
   FIRST: "\u0410",   // A
   first: "\u0430",   // a
   LAST: "\u042f",    // YA
   last: "\u044f",    // ya
   MIDDLE: "\u0427",  // CHE
   middle: "\u0447",   // che
   // Actually no characters are between the cases in Cyrillic.
   BetweenCases: false};

 var SIGMA = "\u03a3";
 var sigma = "\u03c3";
 var alternative_sigma = "\u03c2";

 // Greek.
 var greek = {
   FIRST: "\u0391",     // ALPHA
   first: "\u03b1",     // alpha
   LAST: "\u03a9",      // OMEGA
   last: "\u03c9",      // omega
   MIDDLE: SIGMA,       // SIGMA
   middle: sigma,       // sigma
   // Epsilon acute is between ALPHA-OMEGA and alpha-omega, ie it
   // is between OMEGA and alpha.
   BetweenCases: "\u03ad"};


 function Range(from, to, flags) {
   return new RegExp("[" + from + "-" + to + "]", flags);
 }

 // Test Cyrillic and Greek separately.
 for (var lang = 0; lang < 2; lang++) {
   var chars = (lang == 0) ? cyrillic : greek;

   for (var i = 0; i < 2; i++) {
     var lc = (i == 0);  // Lower case.
     var first = lc ? chars.first : chars.FIRST;
     var middle = lc ? chars.middle : chars.MIDDLE;
     var last = lc ? chars.last : chars.LAST;
     var first_other_case = lc ? chars.FIRST : chars.first;
     var middle_other_case = lc ? chars.MIDDLE : chars.middle;
     var last_other_case = lc ? chars.LAST : chars.last;

     assertTrue(Range(first, last).test(first), 1);
     assertTrue(Range(first, last).test(middle), 2);
     assertTrue(Range(first, last).test(last), 3);

     assertFalse(Range(first, last).test(first_other_case), 4);
     assertFalse(Range(first, last).test(middle_other_case), 5);
     assertFalse(Range(first, last).test(last_other_case), 6);

     assertTrue(Range(first, last, "i").test(first), 7);
     assertTrue(Range(first, last, "i").test(middle), 8);
     assertTrue(Range(first, last, "i").test(last), 9);

     assertTrue(Range(first, last, "i").test(first_other_case), 10);
     assertTrue(Range(first, last, "i").test(middle_other_case), 11);
     assertTrue(Range(first, last, "i").test(last_other_case), 12);

     if (chars.BetweenCases) {
       assertFalse(Range(first, last).test(chars.BetweenCases), 13);
       assertFalse(Range(first, last, "i").test(chars.BetweenCases), 14);
     }
   }
   if (chars.BetweenCases) {
     assertTrue(Range(chars.FIRST, chars.last).test(chars.BetweenCases), 15);
     assertTrue(Range(chars.FIRST, chars.last, "i").test(chars.BetweenCases), 16);
   }
 }

 // Test range that covers both greek and cyrillic characters.
 for (key in greek) {
   assertTrue(Range(greek.FIRST, cyrillic.last).test(greek[key]), 17 + key);
   if (cyrillic[key]) {
     assertTrue(Range(greek.FIRST, cyrillic.last).test(cyrillic[key]), 18 + key);
   }
 }

 for (var i = 0; i < 2; i++) {
   var ignore_case = (i == 0);
   var flag = ignore_case ? "i" : "";
   assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.first), 19);
   assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.middle), 20);
   assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.last), 21);

   assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.FIRST), 22);
   assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.MIDDLE), 23);
   assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.LAST), 24);

   // A range that covers the lower case greek letters and the upper case cyrillic
   // letters.
   assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.FIRST), 25);
   assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.MIDDLE), 26);
   assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.LAST), 27);

   assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.first), 28);
   assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.middle), 29);
   assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.last), 30);
 }


 // Sigma is special because there are two lower case versions of the same upper
 // case character.  JS requires that case independence means that you should
 // convert everything to upper case, so the two sigma variants are equal to each
 // other in a case independt comparison.
 for (var i = 0; i < 2; i++) {
   var simple = (i != 0);
   var name = simple ? "" : "[]";
   var regex = simple ? SIGMA : "[" + SIGMA + "]";

   assertFalse(new RegExp(regex).test(sigma), 31 + name);
   assertFalse(new RegExp(regex).test(alternative_sigma), 32 + name);
   assertTrue(new RegExp(regex).test(SIGMA), 33 + name);

   assertTrue(new RegExp(regex, "i").test(sigma), 34 + name);
   // JSC and Tracemonkey fail this one.
   assertTrue(new RegExp(regex, "i").test(alternative_sigma), 35 + name);
   assertTrue(new RegExp(regex, "i").test(SIGMA), 36 + name);

   regex = simple ? sigma : "[" + sigma + "]";

   assertTrue(new RegExp(regex).test(sigma), 41 + name);
   assertFalse(new RegExp(regex).test(alternative_sigma), 42 + name);
   assertFalse(new RegExp(regex).test(SIGMA), 43 + name);

   assertTrue(new RegExp(regex, "i").test(sigma), 44 + name);
   // JSC and Tracemonkey fail this one.
   assertTrue(new RegExp(regex, "i").test(alternative_sigma), 45 + name);
   assertTrue(new RegExp(regex, "i").test(SIGMA), 46 + name);

   regex = simple ? alternative_sigma : "[" + alternative_sigma + "]";

   assertFalse(new RegExp(regex).test(sigma), 51 + name);
   assertTrue(new RegExp(regex).test(alternative_sigma), 52 + name);
   assertFalse(new RegExp(regex).test(SIGMA), 53 + name);

   // JSC and Tracemonkey fail this one.
   assertTrue(new RegExp(regex, "i").test(sigma), 54 + name);
   assertTrue(new RegExp(regex, "i").test(alternative_sigma), 55 + name);
   // JSC and Tracemonkey fail this one.
   assertTrue(new RegExp(regex, "i").test(SIGMA), 56 + name);
 }


 for (var add_non_ascii_character_to_subject = 0;
      add_non_ascii_character_to_subject < 2;
      add_non_ascii_character_to_subject++) {
   var suffix = add_non_ascii_character_to_subject ? "\ufffe" : "";
   // A range that covers both ASCII and non-ASCII.
   for (var i = 0; i < 2; i++) {
     var full = (i != 0);
     var mixed = full ? "[a-\uffff]" : "[a-" + cyrillic.LAST + "]";
     var f = full ? "f" : "c";
     for (var j = 0; j < 2; j++) {
       var ignore_case = (j == 0);
       var flag = ignore_case ? "i" : "";
       var re = new RegExp(mixed, flag);
       var expected =
           ignore_case || (full && !!add_non_ascii_character_to_subject);
       assertEquals(expected, re.test("A" + suffix), 58 + flag + f);
       assertTrue(re.test("a" + suffix), 59 + flag + f);
       assertTrue(re.test("~" + suffix), 60 + flag + f);
       assertTrue(re.test(cyrillic.MIDDLE), 61 + flag + f);
       assertEquals(ignore_case || full, re.test(cyrillic.middle), 62 + flag + f);
     }
   }
 }
	// Copyright 2009 the V8 project authors. All rights reserved.
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are
	// met:
	//
	// * Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above
	// copyright notice, this list of conditions and the following
	// disclaimer in the documentation and/or other materials provided
	// with the distribution.
	// * Neither the name of Google Inc. nor the names of its
	// contributors may be used to endorse or promote products derived
	// from this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	// Test Unicode character ranges in regexps.


	// Cyrillic.
	var cyrillic = {
	FIRST: "\u0410", // A
	first: "\u0430", // a
	LAST: "\u042f", // YA
	last: "\u044f", // ya
	MIDDLE: "\u0427", // CHE
	middle: "\u0447", // che
	// Actually no characters are between the cases in Cyrillic.
	BetweenCases: false};

	var SIGMA = "\u03a3";
	var sigma = "\u03c3";
	var alternative_sigma = "\u03c2";

	// Greek.
	var greek = {
	FIRST: "\u0391", // ALPHA
	first: "\u03b1", // alpha
	LAST: "\u03a9", // OMEGA
	last: "\u03c9", // omega
	MIDDLE: SIGMA, // SIGMA
	middle: sigma, // sigma
	// Epsilon acute is between ALPHA-OMEGA and alpha-omega, ie it
	// is between OMEGA and alpha.
	BetweenCases: "\u03ad"};


	function Range(from, to, flags) {
	return new RegExp("[" + from + "-" + to + "]", flags);
	}

	// Test Cyrillic and Greek separately.
	for (var lang = 0; lang < 2; lang++) {
	var chars = (lang == 0) ? cyrillic : greek;

	for (var i = 0; i < 2; i++) {
	var lc = (i == 0); // Lower case.
	var first = lc ? chars.first : chars.FIRST;
	var middle = lc ? chars.middle : chars.MIDDLE;
	var last = lc ? chars.last : chars.LAST;
	var first_other_case = lc ? chars.FIRST : chars.first;
	var middle_other_case = lc ? chars.MIDDLE : chars.middle;
	var last_other_case = lc ? chars.LAST : chars.last;

	assertTrue(Range(first, last).test(first), 1);
	assertTrue(Range(first, last).test(middle), 2);
	assertTrue(Range(first, last).test(last), 3);

	assertFalse(Range(first, last).test(first_other_case), 4);
	assertFalse(Range(first, last).test(middle_other_case), 5);
	assertFalse(Range(first, last).test(last_other_case), 6);

	assertTrue(Range(first, last, "i").test(first), 7);
	assertTrue(Range(first, last, "i").test(middle), 8);
	assertTrue(Range(first, last, "i").test(last), 9);

	assertTrue(Range(first, last, "i").test(first_other_case), 10);
	assertTrue(Range(first, last, "i").test(middle_other_case), 11);
	assertTrue(Range(first, last, "i").test(last_other_case), 12);

	if (chars.BetweenCases) {
	assertFalse(Range(first, last).test(chars.BetweenCases), 13);
	assertFalse(Range(first, last, "i").test(chars.BetweenCases), 14);
	}
	}
	if (chars.BetweenCases) {
	assertTrue(Range(chars.FIRST, chars.last).test(chars.BetweenCases), 15);
	assertTrue(Range(chars.FIRST, chars.last, "i").test(chars.BetweenCases), 16);
	}
	}

	// Test range that covers both greek and cyrillic characters.
	for (key in greek) {
	assertTrue(Range(greek.FIRST, cyrillic.last).test(greek[key]), 17 + key);
	if (cyrillic[key]) {
	assertTrue(Range(greek.FIRST, cyrillic.last).test(cyrillic[key]), 18 + key);
	}
	}

	for (var i = 0; i < 2; i++) {
	var ignore_case = (i == 0);
	var flag = ignore_case ? "i" : "";
	assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.first), 19);
	assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.middle), 20);
	assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.last), 21);

	assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.FIRST), 22);
	assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.MIDDLE), 23);
	assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.LAST), 24);

	// A range that covers the lower case greek letters and the upper case cyrillic
	// letters.
	assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.FIRST), 25);
	assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.MIDDLE), 26);
	assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.LAST), 27);

	assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.first), 28);
	assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.middle), 29);
	assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.last), 30);
	}


	// Sigma is special because there are two lower case versions of the same upper
	// case character. JS requires that case independence means that you should
	// convert everything to upper case, so the two sigma variants are equal to each
	// other in a case independt comparison.
	for (var i = 0; i < 2; i++) {
	var simple = (i != 0);
	var name = simple ? "" : "[]";
	var regex = simple ? SIGMA : "[" + SIGMA + "]";

	assertFalse(new RegExp(regex).test(sigma), 31 + name);
	assertFalse(new RegExp(regex).test(alternative_sigma), 32 + name);
	assertTrue(new RegExp(regex).test(SIGMA), 33 + name);

	assertTrue(new RegExp(regex, "i").test(sigma), 34 + name);
	// JSC and Tracemonkey fail this one.
	assertTrue(new RegExp(regex, "i").test(alternative_sigma), 35 + name);
	assertTrue(new RegExp(regex, "i").test(SIGMA), 36 + name);

	regex = simple ? sigma : "[" + sigma + "]";

	assertTrue(new RegExp(regex).test(sigma), 41 + name);
	assertFalse(new RegExp(regex).test(alternative_sigma), 42 + name);
	assertFalse(new RegExp(regex).test(SIGMA), 43 + name);

	assertTrue(new RegExp(regex, "i").test(sigma), 44 + name);
	// JSC and Tracemonkey fail this one.
	assertTrue(new RegExp(regex, "i").test(alternative_sigma), 45 + name);
	assertTrue(new RegExp(regex, "i").test(SIGMA), 46 + name);

	regex = simple ? alternative_sigma : "[" + alternative_sigma + "]";

	assertFalse(new RegExp(regex).test(sigma), 51 + name);
	assertTrue(new RegExp(regex).test(alternative_sigma), 52 + name);
	assertFalse(new RegExp(regex).test(SIGMA), 53 + name);

	// JSC and Tracemonkey fail this one.
	assertTrue(new RegExp(regex, "i").test(sigma), 54 + name);
	assertTrue(new RegExp(regex, "i").test(alternative_sigma), 55 + name);
	// JSC and Tracemonkey fail this one.
	assertTrue(new RegExp(regex, "i").test(SIGMA), 56 + name);
	}


	for (var add_non_ascii_character_to_subject = 0;
	add_non_ascii_character_to_subject < 2;
	add_non_ascii_character_to_subject++) {
	var suffix = add_non_ascii_character_to_subject ? "\ufffe" : "";
	// A range that covers both ASCII and non-ASCII.
	for (var i = 0; i < 2; i++) {
	var full = (i != 0);
	var mixed = full ? "[a-\uffff]" : "[a-" + cyrillic.LAST + "]";
	var f = full ? "f" : "c";
	for (var j = 0; j < 2; j++) {
	var ignore_case = (j == 0);
	var flag = ignore_case ? "i" : "";
	var re = new RegExp(mixed, flag);
	var expected =
	ignore_case \|\| (full && !!add_non_ascii_character_to_subject);
	assertEquals(expected, re.test("A" + suffix), 58 + flag + f);
	assertTrue(re.test("a" + suffix), 59 + flag + f);
	assertTrue(re.test("~" + suffix), 60 + flag + f);
	assertTrue(re.test(cyrillic.MIDDLE), 61 + flag + f);
	assertEquals(ignore_case \|\| full, re.test(cyrillic.middle), 62 + flag + f);
	}
	}
	}