Squash *+, *?, +*, +?, ?* and ?+.
Change-Id: Ied7dd7fb2e718cdf397b4dc4167ab622a9dbdc01
Reviewed-on: https://code-review.googlesource.com/11132
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/parse.cc b/re2/parse.cc
index 6d24016..0729b75 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -486,6 +486,17 @@
if (op == stacktop_->op() && fl == stacktop_->parse_flags())
return true;
+ // Squash *+, *?, +*, +?, ?* and ?+. They all squash to *, so because
+ // op is a repeat, we just have to check that stacktop_->op() is too,
+ // then adjust stacktop_.
+ if ((stacktop_->op() == kRegexpStar ||
+ stacktop_->op() == kRegexpPlus ||
+ stacktop_->op() == kRegexpQuest) &&
+ fl == stacktop_->parse_flags()) {
+ stacktop_->op_ = kRegexpStar;
+ return true;
+ }
+
Regexp* re = new Regexp(op, fl);
re->AllocSub(1);
re->down_ = stacktop_->down_;
diff --git a/re2/regexp.cc b/re2/regexp.cc
index 950c985..8c5fe59 100644
--- a/re2/regexp.cc
+++ b/re2/regexp.cc
@@ -190,31 +190,41 @@
return re;
}
-Regexp* Regexp::Plus(Regexp* sub, ParseFlags flags) {
- if (sub->op() == kRegexpPlus && sub->parse_flags() == flags)
+Regexp* Regexp::StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags) {
+ // Squash **, ++ and ??.
+ if (op == sub->op() && flags == sub->parse_flags())
return sub;
- Regexp* re = new Regexp(kRegexpPlus, flags);
+
+ // Squash *+, *?, +*, +?, ?* and ?+. They all squash to *, so because
+ // op is Star/Plus/Quest, we just have to check that sub->op() is too,
+ // then rewrite sub.
+ if ((sub->op() == kRegexpStar ||
+ sub->op() == kRegexpPlus ||
+ sub->op() == kRegexpQuest) &&
+ flags == sub->parse_flags()) {
+ Regexp* re = new Regexp(kRegexpStar, flags);
+ re->AllocSub(1);
+ re->sub()[0] = sub->sub()[0]->Incref();
+ sub->Decref(); // We didn't consume the reference after all.
+ return re;
+ }
+
+ Regexp* re = new Regexp(op, flags);
re->AllocSub(1);
re->sub()[0] = sub;
return re;
}
+Regexp* Regexp::Plus(Regexp* sub, ParseFlags flags) {
+ return StarPlusOrQuest(kRegexpPlus, sub, flags);
+}
+
Regexp* Regexp::Star(Regexp* sub, ParseFlags flags) {
- if (sub->op() == kRegexpStar && sub->parse_flags() == flags)
- return sub;
- Regexp* re = new Regexp(kRegexpStar, flags);
- re->AllocSub(1);
- re->sub()[0] = sub;
- return re;
+ return StarPlusOrQuest(kRegexpStar, sub, flags);
}
Regexp* Regexp::Quest(Regexp* sub, ParseFlags flags) {
- if (sub->op() == kRegexpQuest && sub->parse_flags() == flags)
- return sub;
- Regexp* re = new Regexp(kRegexpQuest, flags);
- re->AllocSub(1);
- re->sub()[0] = sub;
- return re;
+ return StarPlusOrQuest(kRegexpQuest, sub, flags);
}
Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
diff --git a/re2/regexp.h b/re2/regexp.h
index a11fb3a..9b5d98d 100644
--- a/re2/regexp.h
+++ b/re2/regexp.h
@@ -460,6 +460,10 @@
// Computes whether Regexp is already simple.
bool ComputeSimple();
+ // Constructor that generates a Star, Plus or Quest,
+ // squashing the pair if sub is also a Star, Plus or Quest.
+ static Regexp* StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags);
+
// Constructor that generates a concatenation or alternation,
// enforcing the limit on the number of subexpressions for
// a particular Regexp.
diff --git a/re2/testing/parse_test.cc b/re2/testing/parse_test.cc
index 9a13e18..ec80fc4 100644
--- a/re2/testing/parse_test.cc
+++ b/re2/testing/parse_test.cc
@@ -115,10 +115,16 @@
{ "ab|cd", "alt{str{ab}str{cd}}" },
{ "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" },
- // Test squashing of **, ++ and ??.
+ // Test squashing of **, ++, ?? et cetera.
{ "(?:(?:a)*)*", "star{lit{a}}" },
{ "(?:(?:a)+)+", "plus{lit{a}}" },
{ "(?:(?:a)?)?", "que{lit{a}}" },
+ { "(?:(?:a)*)+", "star{lit{a}}" },
+ { "(?:(?:a)*)?", "star{lit{a}}" },
+ { "(?:(?:a)+)*", "star{lit{a}}" },
+ { "(?:(?:a)+)?", "star{lit{a}}" },
+ { "(?:(?:a)?)*", "star{lit{a}}" },
+ { "(?:(?:a)?)+", "star{lit{a}}" },
// Test flattening.
{ "(?:a)", "lit{a}" },
diff --git a/re2/testing/simplify_test.cc b/re2/testing/simplify_test.cc
index 33f8a8c..6bdb2af 100644
--- a/re2/testing/simplify_test.cc
+++ b/re2/testing/simplify_test.cc
@@ -231,6 +231,17 @@
// capture. The new capture must have the cap of the old capture.
// Failure to copy it results in cap=0 -> ToString() logs a fatal error.
{ "(a*aab)", "(aa+b)" },
+
+ // Test squashing of **, ++, ?? et cetera.
+ { "(?:(?:a){0,}){0,}", "a*" },
+ { "(?:(?:a){1,}){1,}", "a+" },
+ { "(?:(?:a){0,1}){0,1}", "a?" },
+ { "(?:(?:a){0,}){1,}", "a*" },
+ { "(?:(?:a){0,}){0,1}", "a*" },
+ { "(?:(?:a){1,}){0,}", "a*" },
+ { "(?:(?:a){1,}){0,1}", "a*" },
+ { "(?:(?:a){0,1}){0,}", "a*" },
+ { "(?:(?:a){0,1}){1,}", "a*" },
};
TEST(TestSimplify, SimpleRegexps) {