| /* Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.harmony.regex.tests.java.util.regex; |
| |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| import java.util.regex.PatternSyntaxException; |
| |
| import junit.framework.TestCase; |
| |
| /** |
| * Tests simple Pattern compilation and Matcher methods |
| * |
| */ |
| public class Pattern2Test extends TestCase { |
| |
| public void testUnicodeCategories() throws PatternSyntaxException { |
| // Test Unicode categories using \p and \P |
| // One letter codes: L, M, N, P, S, Z, C |
| // Two letter codes: Lu, Nd, Sc, Sm, ... |
| // See java.lang.Character and Unicode standard for complete list |
| // TODO |
| // Test \p{L} |
| // TODO |
| |
| // Test \p{N} |
| // TODO |
| |
| // Test two letter codes: |
| // From unicode.org: |
| // Lu |
| // Ll |
| // Lt |
| // Lm |
| // Lo |
| // Mn |
| // Mc |
| // Me |
| // Nd |
| // Nl |
| // No |
| // Pc |
| // Pd |
| // Ps |
| // Pe |
| // Pi |
| // Pf |
| // Po |
| // Sm |
| // Sc |
| // Sk |
| // So |
| // Zs |
| // Zl |
| // Zp |
| // Cc |
| // Cf |
| // Cs |
| // Co |
| // Cn |
| |
| // TODO add more tests per category |
| //{"Cc", "\u0000", "-\u0041"}, |
| testCategory("Cf", "\u202B"); |
| testCategory("Co", "\uE000"); |
| testCategory("Cs", "\uD800"); |
| testCategory("Ll", "a", "b", "x", "y", "z", "-A", "-Z"); |
| testCategory("Lm", "\u02B9"); |
| testCategory("Lu", "B", "C", "-c"); |
| testCategory("Lo", "\u05E2"); |
| testCategory("Lt", "\u01C5"); |
| testCategory("Mc", "\u0903"); |
| testCategory("Me", "\u0488"); |
| testCategory("Mn", "\u0300"); |
| testCategory("Nd", "\u0030"); |
| testCategory("Nl", "\u2164"); |
| testCategory("No", "\u0BF0"); |
| // testCategory("Pc", "\u30FB"); |
| testCategory("Pd", "\u2015"); |
| testCategory("Pe", "\u207E"); |
| testCategory("Po", "\u00B7"); |
| testCategory("Ps", "\u0F3C"); |
| testCategory("Sc", "\u20A0"); |
| testCategory("Sk", "\u00B8"); |
| testCategory("Sm", "\u002B"); |
| testCategory("So", "\u0B70"); |
| testCategory("Zl", "\u2028"); |
| // testCategory("Pi", "\u200C"); |
| testCategory("Zp", "\u2029"); |
| } |
| |
| private void testCategory(String cat, String... matches) { |
| String pa = "{"+cat+"}"; |
| String pat = "\\p"+pa; |
| String npat = "\\P"+pa; |
| Pattern p = Pattern.compile(pat); |
| Pattern pn = Pattern.compile(npat); |
| for (int j = 0; j < matches.length; j++) { |
| String t = matches[j]; |
| boolean invert = t.startsWith("-"); |
| if (invert) { |
| // test negative case, expected to fail |
| t = t.substring(1); |
| assertFalse("expected '"+t+"' to not be matched " + |
| "by pattern '"+pat, p.matcher(t).matches()); |
| assertTrue("expected '"+t+"' to " + |
| "be matched by pattern '"+npat, pn.matcher(t).matches()); |
| } else { |
| assertTrue("expected '"+t+"' to be matched " + |
| "by pattern '"+pat, p.matcher(t).matches()); |
| assertFalse("expected '"+t+"' to " + |
| "not be matched by pattern '"+npat, pn.matcher(t).matches()); |
| } |
| } |
| } |
| |
| public void testCapturingGroups() throws PatternSyntaxException { |
| Pattern p; |
| Matcher m; |
| |
| // Test simple capturing groups |
| p = Pattern.compile("(a+)b"); |
| m = p.matcher("aaaaaaaab"); |
| assertTrue(m.matches()); |
| assertEquals(1, m.groupCount()); |
| assertEquals("aaaaaaaa", m.group(1)); |
| |
| p = Pattern.compile("((an)+)((as)+)"); |
| m = p.matcher("ananas"); |
| assertTrue(m.matches()); |
| assertEquals(4, m.groupCount()); |
| assertEquals("ananas", m.group(0)); |
| assertEquals("anan", m.group(1)); |
| assertEquals("an", m.group(2)); |
| assertEquals("as", m.group(3)); |
| assertEquals("as", m.group(4)); |
| |
| // Test grouping without capture (?:...) |
| p = Pattern.compile("(?:(?:an)+)(as)"); |
| m = p.matcher("ananas"); |
| assertTrue(m.matches()); |
| assertEquals(1, m.groupCount()); |
| assertEquals("as", m.group(1)); |
| try { |
| m.group(2); |
| fail("expected IndexOutOfBoundsException"); |
| } catch (IndexOutOfBoundsException ioobe) { |
| // expected |
| } |
| |
| // Test combination of grouping and capture |
| // TODO |
| |
| // Test \<num> sequence with capturing and non-capturing groups |
| // TODO |
| |
| // Test \<num> with <num> out of range |
| p = Pattern.compile("((an)+)as\\1"); |
| m = p.matcher("ananasanan"); |
| assertTrue(m.matches()); |
| |
| try { |
| p = Pattern.compile("((an)+)as\\4"); |
| fail("expected PatternSyntaxException"); |
| } catch (PatternSyntaxException pse) { |
| // expected |
| } |
| |
| } |
| public void testRepeats() { |
| Pattern p; |
| Matcher m; |
| |
| // Test ? |
| p = Pattern.compile("(abc)?c"); |
| m = p.matcher("abcc"); |
| assertTrue(m.matches()); |
| m = p.matcher("c"); |
| assertTrue(m.matches()); |
| m = p.matcher("cc"); |
| assertFalse(m.matches()); |
| m = p.matcher("abcabcc"); |
| assertFalse(m.matches()); |
| |
| // Test * |
| p = Pattern.compile("(abc)*c"); |
| m = p.matcher("abcc"); |
| assertTrue(m.matches()); |
| m = p.matcher("c"); |
| assertTrue(m.matches()); |
| m = p.matcher("cc"); |
| assertFalse(m.matches()); |
| m = p.matcher("abcabcc"); |
| assertTrue(m.matches()); |
| |
| // Test + |
| p = Pattern.compile("(abc)+c"); |
| m = p.matcher("abcc"); |
| assertTrue(m.matches()); |
| m = p.matcher("c"); |
| assertFalse(m.matches()); |
| m = p.matcher("cc"); |
| assertFalse(m.matches()); |
| m = p.matcher("abcabcc"); |
| assertTrue(m.matches()); |
| |
| // Test {<num>}, including 0, 1 and more |
| p = Pattern.compile("(abc){0}c"); |
| m = p.matcher("abcc"); |
| assertFalse(m.matches()); |
| m = p.matcher("c"); |
| assertTrue(m.matches()); |
| |
| p = Pattern.compile("(abc){1}c"); |
| m = p.matcher("abcc"); |
| assertTrue(m.matches()); |
| m = p.matcher("c"); |
| assertFalse(m.matches()); |
| m = p.matcher("abcabcc"); |
| assertFalse(m.matches()); |
| |
| p = Pattern.compile("(abc){2}c"); |
| m = p.matcher("abcc"); |
| assertFalse(m.matches()); |
| m = p.matcher("c"); |
| assertFalse(m.matches()); |
| m = p.matcher("cc"); |
| assertFalse(m.matches()); |
| m = p.matcher("abcabcc"); |
| assertTrue(m.matches()); |
| |
| // Test {<num>,}, including 0, 1 and more |
| // TODO |
| |
| // Test {<n1>,<n2>}, with n1 < n2, n1 = n2 and n1 > n2 (illegal?) |
| // TODO |
| } |
| public void testAnchors() throws PatternSyntaxException { |
| Pattern p; |
| Matcher m; |
| |
| // Test ^, default and MULTILINE |
| p = Pattern.compile("^abc\\n^abc", Pattern.MULTILINE); |
| m = p.matcher("abc\nabc"); |
| assertTrue(m.matches()); |
| |
| p = Pattern.compile("^abc\\n^abc"); |
| m = p.matcher("abc\nabc"); |
| assertFalse(m.matches()); |
| |
| // Test $, default and MULTILINE |
| // TODO |
| |
| // Test \b (word boundary) |
| // TODO |
| |
| // Test \B (not a word boundary) |
| // TODO |
| |
| // Test \A (beginning of string) |
| // TODO |
| |
| // Test \Z (end of string) |
| // TODO |
| |
| // Test \z (end of string) |
| // TODO |
| |
| // Test \G |
| // TODO |
| |
| // Test positive lookahead using (?=...) |
| // TODO |
| |
| // Test negative lookahead using (?!...) |
| // TODO |
| |
| // Test positive lookbehind using (?<=...) |
| // TODO |
| |
| // Test negative lookbehind using (?<!...) |
| // TODO |
| } |
| } |