blob: ee8ecfaea8e0582448b864b9b2733f58b03769ac [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.harmony.regex.tests.java.util.regex;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import junit.framework.TestCase;
/**
* Tests simple Pattern compilation and Matcher methods
*
*/
public class Pattern2Test extends TestCase {
public void testUnicodeCategories() throws PatternSyntaxException {
// Test Unicode categories using \p and \P
// One letter codes: L, M, N, P, S, Z, C
// Two letter codes: Lu, Nd, Sc, Sm, ...
// See java.lang.Character and Unicode standard for complete list
// TODO
// Test \p{L}
// TODO
// Test \p{N}
// TODO
// Test two letter codes:
// From unicode.org:
// Lu
// Ll
// Lt
// Lm
// Lo
// Mn
// Mc
// Me
// Nd
// Nl
// No
// Pc
// Pd
// Ps
// Pe
// Pi
// Pf
// Po
// Sm
// Sc
// Sk
// So
// Zs
// Zl
// Zp
// Cc
// Cf
// Cs
// Co
// Cn
// TODO add more tests per category
//{"Cc", "\u0000", "-\u0041"},
testCategory("Cf", "\u202B");
testCategory("Co", "\uE000");
testCategory("Cs", "\uD800");
testCategory("Ll", "a", "b", "x", "y", "z", "-A", "-Z");
testCategory("Lm", "\u02B9");
testCategory("Lu", "B", "C", "-c");
testCategory("Lo", "\u05E2");
testCategory("Lt", "\u01C5");
testCategory("Mc", "\u0903");
testCategory("Me", "\u0488");
testCategory("Mn", "\u0300");
testCategory("Nd", "\u0030");
testCategory("Nl", "\u2164");
testCategory("No", "\u0BF0");
// testCategory("Pc", "\u30FB");
testCategory("Pd", "\u2015");
testCategory("Pe", "\u207E");
testCategory("Po", "\u00B7");
testCategory("Ps", "\u0F3C");
testCategory("Sc", "\u20A0");
testCategory("Sk", "\u00B8");
testCategory("Sm", "\u002B");
testCategory("So", "\u0B70");
testCategory("Zl", "\u2028");
// testCategory("Pi", "\u200C");
testCategory("Zp", "\u2029");
}
private void testCategory(String cat, String... matches) {
String pa = "{"+cat+"}";
String pat = "\\p"+pa;
String npat = "\\P"+pa;
Pattern p = Pattern.compile(pat);
Pattern pn = Pattern.compile(npat);
for (int j = 0; j < matches.length; j++) {
String t = matches[j];
boolean invert = t.startsWith("-");
if (invert) {
// test negative case, expected to fail
t = t.substring(1);
assertFalse("expected '"+t+"' to not be matched " +
"by pattern '"+pat, p.matcher(t).matches());
assertTrue("expected '"+t+"' to " +
"be matched by pattern '"+npat, pn.matcher(t).matches());
} else {
assertTrue("expected '"+t+"' to be matched " +
"by pattern '"+pat, p.matcher(t).matches());
assertFalse("expected '"+t+"' to " +
"not be matched by pattern '"+npat, pn.matcher(t).matches());
}
}
}
public void testCapturingGroups() throws PatternSyntaxException {
Pattern p;
Matcher m;
// Test simple capturing groups
p = Pattern.compile("(a+)b");
m = p.matcher("aaaaaaaab");
assertTrue(m.matches());
assertEquals(1, m.groupCount());
assertEquals("aaaaaaaa", m.group(1));
p = Pattern.compile("((an)+)((as)+)");
m = p.matcher("ananas");
assertTrue(m.matches());
assertEquals(4, m.groupCount());
assertEquals("ananas", m.group(0));
assertEquals("anan", m.group(1));
assertEquals("an", m.group(2));
assertEquals("as", m.group(3));
assertEquals("as", m.group(4));
// Test grouping without capture (?:...)
p = Pattern.compile("(?:(?:an)+)(as)");
m = p.matcher("ananas");
assertTrue(m.matches());
assertEquals(1, m.groupCount());
assertEquals("as", m.group(1));
try {
m.group(2);
fail("expected IndexOutOfBoundsException");
} catch (IndexOutOfBoundsException ioobe) {
// expected
}
// Test combination of grouping and capture
// TODO
// Test \<num> sequence with capturing and non-capturing groups
// TODO
// Test \<num> with <num> out of range
p = Pattern.compile("((an)+)as\\1");
m = p.matcher("ananasanan");
assertTrue(m.matches());
try {
p = Pattern.compile("((an)+)as\\4");
fail("expected PatternSyntaxException");
} catch (PatternSyntaxException pse) {
// expected
}
}
public void testRepeats() {
Pattern p;
Matcher m;
// Test ?
p = Pattern.compile("(abc)?c");
m = p.matcher("abcc");
assertTrue(m.matches());
m = p.matcher("c");
assertTrue(m.matches());
m = p.matcher("cc");
assertFalse(m.matches());
m = p.matcher("abcabcc");
assertFalse(m.matches());
// Test *
p = Pattern.compile("(abc)*c");
m = p.matcher("abcc");
assertTrue(m.matches());
m = p.matcher("c");
assertTrue(m.matches());
m = p.matcher("cc");
assertFalse(m.matches());
m = p.matcher("abcabcc");
assertTrue(m.matches());
// Test +
p = Pattern.compile("(abc)+c");
m = p.matcher("abcc");
assertTrue(m.matches());
m = p.matcher("c");
assertFalse(m.matches());
m = p.matcher("cc");
assertFalse(m.matches());
m = p.matcher("abcabcc");
assertTrue(m.matches());
// Test {<num>}, including 0, 1 and more
p = Pattern.compile("(abc){0}c");
m = p.matcher("abcc");
assertFalse(m.matches());
m = p.matcher("c");
assertTrue(m.matches());
p = Pattern.compile("(abc){1}c");
m = p.matcher("abcc");
assertTrue(m.matches());
m = p.matcher("c");
assertFalse(m.matches());
m = p.matcher("abcabcc");
assertFalse(m.matches());
p = Pattern.compile("(abc){2}c");
m = p.matcher("abcc");
assertFalse(m.matches());
m = p.matcher("c");
assertFalse(m.matches());
m = p.matcher("cc");
assertFalse(m.matches());
m = p.matcher("abcabcc");
assertTrue(m.matches());
// Test {<num>,}, including 0, 1 and more
// TODO
// Test {<n1>,<n2>}, with n1 < n2, n1 = n2 and n1 > n2 (illegal?)
// TODO
}
public void testAnchors() throws PatternSyntaxException {
Pattern p;
Matcher m;
// Test ^, default and MULTILINE
p = Pattern.compile("^abc\\n^abc", Pattern.MULTILINE);
m = p.matcher("abc\nabc");
assertTrue(m.matches());
p = Pattern.compile("^abc\\n^abc");
m = p.matcher("abc\nabc");
assertFalse(m.matches());
// Test $, default and MULTILINE
// TODO
// Test \b (word boundary)
// TODO
// Test \B (not a word boundary)
// TODO
// Test \A (beginning of string)
// TODO
// Test \Z (end of string)
// TODO
// Test \z (end of string)
// TODO
// Test \G
// TODO
// Test positive lookahead using (?=...)
// TODO
// Test negative lookahead using (?!...)
// TODO
// Test positive lookbehind using (?<=...)
// TODO
// Test negative lookbehind using (?<!...)
// TODO
}
}