|  | #![allow(warnings)] | 
|  |  | 
|  | // This module defines an internal builder that encapsulates all interaction | 
|  | // with meta::Regex construction, and then 4 public API builders that wrap | 
|  | // around it. The docs are essentially repeated on each of the 4 public | 
|  | // builders, with tweaks to the examples as needed. | 
|  | // | 
|  | // The reason why there are so many builders is partially because of a misstep | 
|  | // in the initial API design: the builder constructor takes in the pattern | 
|  | // strings instead of using the `build` method to accept the pattern strings. | 
|  | // This means `new` has a different signature for each builder. It probably | 
|  | // would have been nicer to to use one builder with `fn new()`, and then add | 
|  | // `build(pat)` and `build_many(pats)` constructors. | 
|  | // | 
|  | // The other reason is because I think the `bytes` module should probably | 
|  | // have its own builder type. That way, it is completely isolated from the | 
|  | // top-level API. | 
|  | // | 
|  | // If I could do it again, I'd probably have a `regex::Builder` and a | 
|  | // `regex::bytes::Builder`. Each would have `build` and `build_set` (or | 
|  | // `build_many`) methods for constructing a single pattern `Regex` and a | 
|  | // multi-pattern `RegexSet`, respectively. | 
|  |  | 
|  | use alloc::{ | 
|  | string::{String, ToString}, | 
|  | sync::Arc, | 
|  | vec, | 
|  | vec::Vec, | 
|  | }; | 
|  |  | 
|  | use regex_automata::{ | 
|  | meta, nfa::thompson::WhichCaptures, util::syntax, MatchKind, | 
|  | }; | 
|  |  | 
|  | use crate::error::Error; | 
|  |  | 
|  | /// A builder for constructing a `Regex`, `bytes::Regex`, `RegexSet` or a | 
|  | /// `bytes::RegexSet`. | 
|  | /// | 
|  | /// This is essentially the implementation of the four different builder types | 
|  | /// in the public API: `RegexBuilder`, `bytes::RegexBuilder`, `RegexSetBuilder` | 
|  | /// and `bytes::RegexSetBuilder`. | 
|  | #[derive(Clone, Debug)] | 
|  | struct Builder { | 
|  | pats: Vec<String>, | 
|  | metac: meta::Config, | 
|  | syntaxc: syntax::Config, | 
|  | } | 
|  |  | 
|  | impl Default for Builder { | 
|  | fn default() -> Builder { | 
|  | let metac = meta::Config::new() | 
|  | .nfa_size_limit(Some(10 * (1 << 20))) | 
|  | .hybrid_cache_capacity(2 * (1 << 20)); | 
|  | Builder { pats: vec![], metac, syntaxc: syntax::Config::default() } | 
|  | } | 
|  | } | 
|  |  | 
|  | impl Builder { | 
|  | fn new<I, S>(patterns: I) -> Builder | 
|  | where | 
|  | S: AsRef<str>, | 
|  | I: IntoIterator<Item = S>, | 
|  | { | 
|  | let mut b = Builder::default(); | 
|  | b.pats.extend(patterns.into_iter().map(|p| p.as_ref().to_string())); | 
|  | b | 
|  | } | 
|  |  | 
|  | fn build_one_string(&self) -> Result<crate::Regex, Error> { | 
|  | assert_eq!(1, self.pats.len()); | 
|  | let metac = self | 
|  | .metac | 
|  | .clone() | 
|  | .match_kind(MatchKind::LeftmostFirst) | 
|  | .utf8_empty(true); | 
|  | let syntaxc = self.syntaxc.clone().utf8(true); | 
|  | let pattern = Arc::from(self.pats[0].as_str()); | 
|  | meta::Builder::new() | 
|  | .configure(metac) | 
|  | .syntax(syntaxc) | 
|  | .build(&pattern) | 
|  | .map(|meta| crate::Regex { meta, pattern }) | 
|  | .map_err(Error::from_meta_build_error) | 
|  | } | 
|  |  | 
|  | fn build_one_bytes(&self) -> Result<crate::bytes::Regex, Error> { | 
|  | assert_eq!(1, self.pats.len()); | 
|  | let metac = self | 
|  | .metac | 
|  | .clone() | 
|  | .match_kind(MatchKind::LeftmostFirst) | 
|  | .utf8_empty(false); | 
|  | let syntaxc = self.syntaxc.clone().utf8(false); | 
|  | let pattern = Arc::from(self.pats[0].as_str()); | 
|  | meta::Builder::new() | 
|  | .configure(metac) | 
|  | .syntax(syntaxc) | 
|  | .build(&pattern) | 
|  | .map(|meta| crate::bytes::Regex { meta, pattern }) | 
|  | .map_err(Error::from_meta_build_error) | 
|  | } | 
|  |  | 
|  | fn build_many_string(&self) -> Result<crate::RegexSet, Error> { | 
|  | let metac = self | 
|  | .metac | 
|  | .clone() | 
|  | .match_kind(MatchKind::All) | 
|  | .utf8_empty(true) | 
|  | .which_captures(WhichCaptures::None); | 
|  | let syntaxc = self.syntaxc.clone().utf8(true); | 
|  | let patterns = Arc::from(self.pats.as_slice()); | 
|  | meta::Builder::new() | 
|  | .configure(metac) | 
|  | .syntax(syntaxc) | 
|  | .build_many(&patterns) | 
|  | .map(|meta| crate::RegexSet { meta, patterns }) | 
|  | .map_err(Error::from_meta_build_error) | 
|  | } | 
|  |  | 
|  | fn build_many_bytes(&self) -> Result<crate::bytes::RegexSet, Error> { | 
|  | let metac = self | 
|  | .metac | 
|  | .clone() | 
|  | .match_kind(MatchKind::All) | 
|  | .utf8_empty(false) | 
|  | .which_captures(WhichCaptures::None); | 
|  | let syntaxc = self.syntaxc.clone().utf8(false); | 
|  | let patterns = Arc::from(self.pats.as_slice()); | 
|  | meta::Builder::new() | 
|  | .configure(metac) | 
|  | .syntax(syntaxc) | 
|  | .build_many(&patterns) | 
|  | .map(|meta| crate::bytes::RegexSet { meta, patterns }) | 
|  | .map_err(Error::from_meta_build_error) | 
|  | } | 
|  |  | 
|  | fn case_insensitive(&mut self, yes: bool) -> &mut Builder { | 
|  | self.syntaxc = self.syntaxc.case_insensitive(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | fn multi_line(&mut self, yes: bool) -> &mut Builder { | 
|  | self.syntaxc = self.syntaxc.multi_line(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | fn dot_matches_new_line(&mut self, yes: bool) -> &mut Builder { | 
|  | self.syntaxc = self.syntaxc.dot_matches_new_line(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | fn crlf(&mut self, yes: bool) -> &mut Builder { | 
|  | self.syntaxc = self.syntaxc.crlf(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | fn line_terminator(&mut self, byte: u8) -> &mut Builder { | 
|  | self.metac = self.metac.clone().line_terminator(byte); | 
|  | self.syntaxc = self.syntaxc.line_terminator(byte); | 
|  | self | 
|  | } | 
|  |  | 
|  | fn swap_greed(&mut self, yes: bool) -> &mut Builder { | 
|  | self.syntaxc = self.syntaxc.swap_greed(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | fn ignore_whitespace(&mut self, yes: bool) -> &mut Builder { | 
|  | self.syntaxc = self.syntaxc.ignore_whitespace(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | fn unicode(&mut self, yes: bool) -> &mut Builder { | 
|  | self.syntaxc = self.syntaxc.unicode(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | fn octal(&mut self, yes: bool) -> &mut Builder { | 
|  | self.syntaxc = self.syntaxc.octal(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | fn size_limit(&mut self, limit: usize) -> &mut Builder { | 
|  | self.metac = self.metac.clone().nfa_size_limit(Some(limit)); | 
|  | self | 
|  | } | 
|  |  | 
|  | fn dfa_size_limit(&mut self, limit: usize) -> &mut Builder { | 
|  | self.metac = self.metac.clone().hybrid_cache_capacity(limit); | 
|  | self | 
|  | } | 
|  |  | 
|  | fn nest_limit(&mut self, limit: u32) -> &mut Builder { | 
|  | self.syntaxc = self.syntaxc.nest_limit(limit); | 
|  | self | 
|  | } | 
|  | } | 
|  |  | 
|  | pub(crate) mod string { | 
|  | use crate::{error::Error, Regex, RegexSet}; | 
|  |  | 
|  | use super::Builder; | 
|  |  | 
|  | /// A configurable builder for a [`Regex`]. | 
|  | /// | 
|  | /// This builder can be used to programmatically set flags such as `i` | 
|  | /// (case insensitive) and `x` (for verbose mode). This builder can also be | 
|  | /// used to configure things like the line terminator and a size limit on | 
|  | /// the compiled regular expression. | 
|  | #[derive(Clone, Debug)] | 
|  | pub struct RegexBuilder { | 
|  | builder: Builder, | 
|  | } | 
|  |  | 
|  | impl RegexBuilder { | 
|  | /// Create a new builder with a default configuration for the given | 
|  | /// pattern. | 
|  | /// | 
|  | /// If the pattern is invalid or exceeds the configured size limits, | 
|  | /// then an error will be returned when [`RegexBuilder::build`] is | 
|  | /// called. | 
|  | pub fn new(pattern: &str) -> RegexBuilder { | 
|  | RegexBuilder { builder: Builder::new([pattern]) } | 
|  | } | 
|  |  | 
|  | /// Compiles the pattern given to `RegexBuilder::new` with the | 
|  | /// configuration set on this builder. | 
|  | /// | 
|  | /// If the pattern isn't a valid regex or if a configured size limit | 
|  | /// was exceeded, then an error is returned. | 
|  | pub fn build(&self) -> Result<Regex, Error> { | 
|  | self.builder.build_one_string() | 
|  | } | 
|  |  | 
|  | /// This configures Unicode mode for the entire pattern. | 
|  | /// | 
|  | /// Enabling Unicode mode does a number of things: | 
|  | /// | 
|  | /// * Most fundamentally, it causes the fundamental atom of matching | 
|  | /// to be a single codepoint. When Unicode mode is disabled, it's a | 
|  | /// single byte. For example, when Unicode mode is enabled, `.` will | 
|  | /// match `💩` once, where as it will match 4 times when Unicode mode | 
|  | /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) | 
|  | /// * Case insensitive matching uses Unicode simple case folding rules. | 
|  | /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are | 
|  | /// available. | 
|  | /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and | 
|  | /// `\d`. | 
|  | /// * The word boundary assertions, `\b` and `\B`, use the Unicode | 
|  | /// definition of a word character. | 
|  | /// | 
|  | /// Note that if Unicode mode is disabled, then the regex will fail to | 
|  | /// compile if it could match invalid UTF-8. For example, when Unicode | 
|  | /// mode is disabled, then since `.` matches any byte (except for | 
|  | /// `\n`), then it can match invalid UTF-8 and thus building a regex | 
|  | /// from it will fail. Another example is `\w` and `\W`. Since `\w` can | 
|  | /// only match ASCII bytes when Unicode mode is disabled, it's allowed. | 
|  | /// But `\W` can match more than ASCII bytes, including invalid UTF-8, | 
|  | /// and so it is not allowed. This restriction can be lifted only by | 
|  | /// using a [`bytes::Regex`](crate::bytes::Regex). | 
|  | /// | 
|  | /// For more details on the Unicode support in this crate, see the | 
|  | /// [Unicode section](crate#unicode) in this crate's top-level | 
|  | /// documentation. | 
|  | /// | 
|  | /// The default for this is `true`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"\w") | 
|  | ///     .unicode(false) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// // Normally greek letters would be included in \w, but since | 
|  | /// // Unicode mode is disabled, it only matches ASCII letters. | 
|  | /// assert!(!re.is_match("δ")); | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"s") | 
|  | ///     .case_insensitive(true) | 
|  | ///     .unicode(false) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// // Normally 'Å¿' is included when searching for 's' case | 
|  | /// // insensitively due to Unicode's simple case folding rules. But | 
|  | /// // when Unicode mode is disabled, only ASCII case insensitive rules | 
|  | /// // are used. | 
|  | /// assert!(!re.is_match("Å¿")); | 
|  | /// ``` | 
|  | pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.unicode(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures whether to enable case insensitive matching for the | 
|  | /// entire pattern. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `i` | 
|  | /// in the pattern. For example, `(?i:foo)` matches `foo` case | 
|  | /// insensitively while `(?-i:foo)` matches `foo` case sensitively. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"foo(?-i:bar)quux") | 
|  | ///     .case_insensitive(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match("FoObarQuUx")); | 
|  | /// // Even though case insensitive matching is enabled in the builder, | 
|  | /// // it can be locally disabled within the pattern. In this case, | 
|  | /// // `bar` is matched case sensitively. | 
|  | /// assert!(!re.is_match("fooBARquux")); | 
|  | /// ``` | 
|  | pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.case_insensitive(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures multi-line mode for the entire pattern. | 
|  | /// | 
|  | /// Enabling multi-line mode changes the behavior of the `^` and `$` | 
|  | /// anchor assertions. Instead of only matching at the beginning and | 
|  | /// end of a haystack, respectively, multi-line mode causes them to | 
|  | /// match at the beginning and end of a line *in addition* to the | 
|  | /// beginning and end of a haystack. More precisely, `^` will match at | 
|  | /// the position immediately following a `\n` and `$` will match at the | 
|  | /// position immediately preceding a `\n`. | 
|  | /// | 
|  | /// The behavior of this option can be impacted by other settings too: | 
|  | /// | 
|  | /// * The [`RegexBuilder::line_terminator`] option changes `\n` above | 
|  | /// to any ASCII byte. | 
|  | /// * The [`RegexBuilder::crlf`] option changes the line terminator to | 
|  | /// be either `\r` or `\n`, but never at the position between a `\r` | 
|  | /// and `\n`. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `m` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"^foo$") | 
|  | ///     .multi_line(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert_eq!(Some(1..4), re.find("\nfoo\n").map(|m| m.range())); | 
|  | /// ``` | 
|  | pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.multi_line(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures dot-matches-new-line mode for the entire pattern. | 
|  | /// | 
|  | /// Perhaps surprisingly, the default behavior for `.` is not to match | 
|  | /// any character, but rather, to match any character except for the | 
|  | /// line terminator (which is `\n` by default). When this mode is | 
|  | /// enabled, the behavior changes such that `.` truly matches any | 
|  | /// character. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `s` in | 
|  | /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent | 
|  | /// regexes. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"foo.bar") | 
|  | ///     .dot_matches_new_line(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = "foo\nbar"; | 
|  | /// assert_eq!(Some("foo\nbar"), re.find(hay).map(|m| m.as_str())); | 
|  | /// ``` | 
|  | pub fn dot_matches_new_line( | 
|  | &mut self, | 
|  | yes: bool, | 
|  | ) -> &mut RegexBuilder { | 
|  | self.builder.dot_matches_new_line(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures CRLF mode for the entire pattern. | 
|  | /// | 
|  | /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for | 
|  | /// short) and `\n` ("line feed" or LF for short) are treated as line | 
|  | /// terminators. This results in the following: | 
|  | /// | 
|  | /// * Unless dot-matches-new-line mode is enabled, `.` will now match | 
|  | /// any character except for `\n` and `\r`. | 
|  | /// * When multi-line mode is enabled, `^` will match immediately | 
|  | /// following a `\n` or a `\r`. Similarly, `$` will match immediately | 
|  | /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match | 
|  | /// between `\r` and `\n`. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `R` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"^foo$") | 
|  | ///     .multi_line(true) | 
|  | ///     .crlf(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = "\r\nfoo\r\n"; | 
|  | /// // If CRLF mode weren't enabled here, then '$' wouldn't match | 
|  | /// // immediately after 'foo', and thus no match would be found. | 
|  | /// assert_eq!(Some("foo"), re.find(hay).map(|m| m.as_str())); | 
|  | /// ``` | 
|  | /// | 
|  | /// This example demonstrates that `^` will never match at a position | 
|  | /// between `\r` and `\n`. (`$` will similarly not match between a `\r` | 
|  | /// and a `\n`.) | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"^") | 
|  | ///     .multi_line(true) | 
|  | ///     .crlf(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = "\r\n\r\n"; | 
|  | /// let ranges: Vec<_> = re.find_iter(hay).map(|m| m.range()).collect(); | 
|  | /// assert_eq!(ranges, vec![0..0, 2..2, 4..4]); | 
|  | /// ``` | 
|  | pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.crlf(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Configures the line terminator to be used by the regex. | 
|  | /// | 
|  | /// The line terminator is relevant in two ways for a particular regex: | 
|  | /// | 
|  | /// * When dot-matches-new-line mode is *not* enabled (the default), | 
|  | /// then `.` will match any character except for the configured line | 
|  | /// terminator. | 
|  | /// * When multi-line mode is enabled (not the default), then `^` and | 
|  | /// `$` will match immediately after and before, respectively, a line | 
|  | /// terminator. | 
|  | /// | 
|  | /// In both cases, if CRLF mode is enabled in a particular context, | 
|  | /// then it takes precedence over any configured line terminator. | 
|  | /// | 
|  | /// This option cannot be configured from within the pattern. | 
|  | /// | 
|  | /// The default line terminator is `\n`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// This shows how to treat the NUL byte as a line terminator. This can | 
|  | /// be a useful heuristic when searching binary data. | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"^foo$") | 
|  | ///     .multi_line(true) | 
|  | ///     .line_terminator(b'\x00') | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = "\x00foo\x00"; | 
|  | /// assert_eq!(Some(1..4), re.find(hay).map(|m| m.range())); | 
|  | /// ``` | 
|  | /// | 
|  | /// This example shows that the behavior of `.` is impacted by this | 
|  | /// setting as well: | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r".") | 
|  | ///     .line_terminator(b'\x00') | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match("\n")); | 
|  | /// assert!(!re.is_match("\x00")); | 
|  | /// ``` | 
|  | /// | 
|  | /// This shows that building a regex will fail if the byte given | 
|  | /// is not ASCII and the pattern could result in matching invalid | 
|  | /// UTF-8. This is because any singular non-ASCII byte is not valid | 
|  | /// UTF-8, and it is not permitted for a [`Regex`] to match invalid | 
|  | /// UTF-8. (It is permissible to use a non-ASCII byte when building a | 
|  | /// [`bytes::Regex`](crate::bytes::Regex).) | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// assert!(RegexBuilder::new(r".").line_terminator(0x80).build().is_err()); | 
|  | /// // Note that using a non-ASCII byte isn't enough on its own to | 
|  | /// // cause regex compilation to fail. You actually have to make use | 
|  | /// // of it in the regex in a way that leads to matching invalid | 
|  | /// // UTF-8. If you don't, then regex compilation will succeed! | 
|  | /// assert!(RegexBuilder::new(r"a").line_terminator(0x80).build().is_ok()); | 
|  | /// ``` | 
|  | pub fn line_terminator(&mut self, byte: u8) -> &mut RegexBuilder { | 
|  | self.builder.line_terminator(byte); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures swap-greed mode for the entire pattern. | 
|  | /// | 
|  | /// When swap-greed mode is enabled, patterns like `a+` will become | 
|  | /// non-greedy and patterns like `a+?` will become greedy. In other | 
|  | /// words, the meanings of `a+` and `a+?` are switched. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `U` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"a+") | 
|  | ///     .swap_greed(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert_eq!(Some("a"), re.find("aaa").map(|m| m.as_str())); | 
|  | /// ``` | 
|  | pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.swap_greed(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures verbose mode for the entire pattern. | 
|  | /// | 
|  | /// When enabled, whitespace will treated as insignifcant in the | 
|  | /// pattern and `#` can be used to start a comment until the next new | 
|  | /// line. | 
|  | /// | 
|  | /// Normally, in most places in a pattern, whitespace is treated | 
|  | /// literally. For example ` +` will match one or more ASCII whitespace | 
|  | /// characters. | 
|  | /// | 
|  | /// When verbose mode is enabled, `\#` can be used to match a literal | 
|  | /// `#` and `\ ` can be used to match a literal ASCII whitespace | 
|  | /// character. | 
|  | /// | 
|  | /// Verbose mode is useful for permitting regexes to be formatted and | 
|  | /// broken up more nicely. This may make them more easily readable. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `x` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// let pat = r" | 
|  | ///     \b | 
|  | ///     (?<first>\p{Uppercase}\w*)  # always start with uppercase letter | 
|  | ///     [\s--\n]+                   # whitespace should separate names | 
|  | ///     (?: # middle name can be an initial! | 
|  | ///         (?:(?<initial>\p{Uppercase})\.|(?<middle>\p{Uppercase}\w*)) | 
|  | ///         [\s--\n]+ | 
|  | ///     )? | 
|  | ///     (?<last>\p{Uppercase}\w*) | 
|  | ///     \b | 
|  | /// "; | 
|  | /// let re = RegexBuilder::new(pat) | 
|  | ///     .ignore_whitespace(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// | 
|  | /// let caps = re.captures("Harry Potter").unwrap(); | 
|  | /// assert_eq!("Harry", &caps["first"]); | 
|  | /// assert_eq!("Potter", &caps["last"]); | 
|  | /// | 
|  | /// let caps = re.captures("Harry J. Potter").unwrap(); | 
|  | /// assert_eq!("Harry", &caps["first"]); | 
|  | /// // Since a middle name/initial isn't required for an overall match, | 
|  | /// // we can't assume that 'initial' or 'middle' will be populated! | 
|  | /// assert_eq!(Some("J"), caps.name("initial").map(|m| m.as_str())); | 
|  | /// assert_eq!(None, caps.name("middle").map(|m| m.as_str())); | 
|  | /// assert_eq!("Potter", &caps["last"]); | 
|  | /// | 
|  | /// let caps = re.captures("Harry James Potter").unwrap(); | 
|  | /// assert_eq!("Harry", &caps["first"]); | 
|  | /// // Since a middle name/initial isn't required for an overall match, | 
|  | /// // we can't assume that 'initial' or 'middle' will be populated! | 
|  | /// assert_eq!(None, caps.name("initial").map(|m| m.as_str())); | 
|  | /// assert_eq!(Some("James"), caps.name("middle").map(|m| m.as_str())); | 
|  | /// assert_eq!("Potter", &caps["last"]); | 
|  | /// ``` | 
|  | pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.ignore_whitespace(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures octal mode for the entire pattern. | 
|  | /// | 
|  | /// Octal syntax is a little-known way of uttering Unicode codepoints | 
|  | /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all | 
|  | /// equivalent patterns, where the last example shows octal syntax. | 
|  | /// | 
|  | /// While supporting octal syntax isn't in and of itself a problem, | 
|  | /// it does make good error messages harder. That is, in PCRE based | 
|  | /// regex engines, syntax like `\1` invokes a backreference, which is | 
|  | /// explicitly unsupported this library. However, many users expect | 
|  | /// backreferences to be supported. Therefore, when octal support | 
|  | /// is disabled, the error message will explicitly mention that | 
|  | /// backreferences aren't supported. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// // Normally this pattern would not compile, with an error message | 
|  | /// // about backreferences not being supported. But with octal mode | 
|  | /// // enabled, octal escape sequences work. | 
|  | /// let re = RegexBuilder::new(r"\141") | 
|  | ///     .octal(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match("a")); | 
|  | /// ``` | 
|  | pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.octal(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Sets the approximate size limit, in bytes, of the compiled regex. | 
|  | /// | 
|  | /// This roughly corresponds to the number of heap memory, in | 
|  | /// bytes, occupied by a single regex. If the regex would otherwise | 
|  | /// approximately exceed this limit, then compiling that regex will | 
|  | /// fail. | 
|  | /// | 
|  | /// The main utility of a method like this is to avoid compiling | 
|  | /// regexes that use an unexpected amount of resources, such as | 
|  | /// time and memory. Even if the memory usage of a large regex is | 
|  | /// acceptable, its search time may not be. Namely, worst case time | 
|  | /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and | 
|  | /// `n ~ len(haystack)`. That is, search time depends, in part, on the | 
|  | /// size of the compiled regex. This means that putting a limit on the | 
|  | /// size of the regex limits how much a regex can impact search time. | 
|  | /// | 
|  | /// For more information about regex size limits, see the section on | 
|  | /// [untrusted inputs](crate#untrusted-input) in the top-level crate | 
|  | /// documentation. | 
|  | /// | 
|  | /// The default for this is some reasonable number that permits most | 
|  | /// patterns to compile successfully. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041 | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// // It may surprise you how big some seemingly small patterns can | 
|  | /// // be! Since \w is Unicode aware, this generates a regex that can | 
|  | /// // match approximately 140,000 distinct codepoints. | 
|  | /// assert!(RegexBuilder::new(r"\w").size_limit(45_000).build().is_err()); | 
|  | /// ``` | 
|  | pub fn size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { | 
|  | self.builder.size_limit(bytes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Set the approximate capacity, in bytes, of the cache of transitions | 
|  | /// used by the lazy DFA. | 
|  | /// | 
|  | /// While the lazy DFA isn't always used, in tends to be the most | 
|  | /// commonly use regex engine in default configurations. It tends to | 
|  | /// adopt the performance profile of a fully build DFA, but without the | 
|  | /// downside of taking worst case exponential time to build. | 
|  | /// | 
|  | /// The downside is that it needs to keep a cache of transitions and | 
|  | /// states that are built while running a search, and this cache | 
|  | /// can fill up. When it fills up, the cache will reset itself. Any | 
|  | /// previously generated states and transitions will then need to be | 
|  | /// re-generated. If this happens too many times, then this library | 
|  | /// will bail out of using the lazy DFA and switch to a different regex | 
|  | /// engine. | 
|  | /// | 
|  | /// If your regex provokes this particular downside of the lazy DFA, | 
|  | /// then it may be beneficial to increase its cache capacity. This will | 
|  | /// potentially reduce the frequency of cache resetting (ideally to | 
|  | /// `0`). While it won't fix all potential performance problems with | 
|  | /// the lazy DFA, increasing the cache capacity does fix some. | 
|  | /// | 
|  | /// There is no easy way to determine, a priori, whether increasing | 
|  | /// this cache capacity will help. In general, the larger your regex, | 
|  | /// the more cache it's likely to use. But that isn't an ironclad rule. | 
|  | /// For example, a regex like `[01]*1[01]{N}` would normally produce a | 
|  | /// fully build DFA that is exponential in size with respect to `N`. | 
|  | /// The lazy DFA will prevent exponential space blow-up, but it cache | 
|  | /// is likely to fill up, even when it's large and even for smallish | 
|  | /// values of `N`. | 
|  | /// | 
|  | /// If you aren't sure whether this helps or not, it is sensible to | 
|  | /// set this to some arbitrarily large number in testing, such as | 
|  | /// `usize::MAX`. Namely, this represents the amount of capacity that | 
|  | /// *may* be used. It's probably not a good idea to use `usize::MAX` in | 
|  | /// production though, since it implies there are no controls on heap | 
|  | /// memory used by this library during a search. In effect, set it to | 
|  | /// whatever you're willing to allocate for a single regex search. | 
|  | pub fn dfa_size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { | 
|  | self.builder.dfa_size_limit(bytes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Set the nesting limit for this parser. | 
|  | /// | 
|  | /// The nesting limit controls how deep the abstract syntax tree is | 
|  | /// allowed to be. If the AST exceeds the given limit (e.g., with too | 
|  | /// many nested groups), then an error is returned by the parser. | 
|  | /// | 
|  | /// The purpose of this limit is to act as a heuristic to prevent stack | 
|  | /// overflow for consumers that do structural induction on an AST using | 
|  | /// explicit recursion. While this crate never does this (instead using | 
|  | /// constant stack space and moving the call stack to the heap), other | 
|  | /// crates may. | 
|  | /// | 
|  | /// This limit is not checked until the entire AST is parsed. | 
|  | /// Therefore, if callers want to put a limit on the amount of heap | 
|  | /// space used, then they should impose a limit on the length, in | 
|  | /// bytes, of the concrete pattern string. In particular, this is | 
|  | /// viable since this parser implementation will limit itself to heap | 
|  | /// space proportional to the length of the pattern string. See also | 
|  | /// the [untrusted inputs](crate#untrusted-input) section in the | 
|  | /// top-level crate documentation for more information about this. | 
|  | /// | 
|  | /// Note that a nest limit of `0` will return a nest limit error for | 
|  | /// most patterns but not all. For example, a nest limit of `0` permits | 
|  | /// `a` but not `ab`, since `ab` requires an explicit concatenation, | 
|  | /// which results in a nest depth of `1`. In general, a nest limit is | 
|  | /// not something that manifests in an obvious way in the concrete | 
|  | /// syntax, therefore, it should not be used in a granular way. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexBuilder; | 
|  | /// | 
|  | /// assert!(RegexBuilder::new(r"a").nest_limit(0).build().is_ok()); | 
|  | /// assert!(RegexBuilder::new(r"ab").nest_limit(0).build().is_err()); | 
|  | /// ``` | 
|  | pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { | 
|  | self.builder.nest_limit(limit); | 
|  | self | 
|  | } | 
|  | } | 
|  |  | 
|  | /// A configurable builder for a [`RegexSet`]. | 
|  | /// | 
|  | /// This builder can be used to programmatically set flags such as | 
|  | /// `i` (case insensitive) and `x` (for verbose mode). This builder | 
|  | /// can also be used to configure things like the line terminator | 
|  | /// and a size limit on the compiled regular expression. | 
|  | #[derive(Clone, Debug)] | 
|  | pub struct RegexSetBuilder { | 
|  | builder: Builder, | 
|  | } | 
|  |  | 
|  | impl RegexSetBuilder { | 
|  | /// Create a new builder with a default configuration for the given | 
|  | /// patterns. | 
|  | /// | 
|  | /// If the patterns are invalid or exceed the configured size limits, | 
|  | /// then an error will be returned when [`RegexSetBuilder::build`] is | 
|  | /// called. | 
|  | pub fn new<I, S>(patterns: I) -> RegexSetBuilder | 
|  | where | 
|  | I: IntoIterator<Item = S>, | 
|  | S: AsRef<str>, | 
|  | { | 
|  | RegexSetBuilder { builder: Builder::new(patterns) } | 
|  | } | 
|  |  | 
|  | /// Compiles the patterns given to `RegexSetBuilder::new` with the | 
|  | /// configuration set on this builder. | 
|  | /// | 
|  | /// If the patterns aren't valid regexes or if a configured size limit | 
|  | /// was exceeded, then an error is returned. | 
|  | pub fn build(&self) -> Result<RegexSet, Error> { | 
|  | self.builder.build_many_string() | 
|  | } | 
|  |  | 
|  | /// This configures Unicode mode for the all of the patterns. | 
|  | /// | 
|  | /// Enabling Unicode mode does a number of things: | 
|  | /// | 
|  | /// * Most fundamentally, it causes the fundamental atom of matching | 
|  | /// to be a single codepoint. When Unicode mode is disabled, it's a | 
|  | /// single byte. For example, when Unicode mode is enabled, `.` will | 
|  | /// match `💩` once, where as it will match 4 times when Unicode mode | 
|  | /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) | 
|  | /// * Case insensitive matching uses Unicode simple case folding rules. | 
|  | /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are | 
|  | /// available. | 
|  | /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and | 
|  | /// `\d`. | 
|  | /// * The word boundary assertions, `\b` and `\B`, use the Unicode | 
|  | /// definition of a word character. | 
|  | /// | 
|  | /// Note that if Unicode mode is disabled, then the regex will fail to | 
|  | /// compile if it could match invalid UTF-8. For example, when Unicode | 
|  | /// mode is disabled, then since `.` matches any byte (except for | 
|  | /// `\n`), then it can match invalid UTF-8 and thus building a regex | 
|  | /// from it will fail. Another example is `\w` and `\W`. Since `\w` can | 
|  | /// only match ASCII bytes when Unicode mode is disabled, it's allowed. | 
|  | /// But `\W` can match more than ASCII bytes, including invalid UTF-8, | 
|  | /// and so it is not allowed. This restriction can be lifted only by | 
|  | /// using a [`bytes::RegexSet`](crate::bytes::RegexSet). | 
|  | /// | 
|  | /// For more details on the Unicode support in this crate, see the | 
|  | /// [Unicode section](crate#unicode) in this crate's top-level | 
|  | /// documentation. | 
|  | /// | 
|  | /// The default for this is `true`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"\w"]) | 
|  | ///     .unicode(false) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// // Normally greek letters would be included in \w, but since | 
|  | /// // Unicode mode is disabled, it only matches ASCII letters. | 
|  | /// assert!(!re.is_match("δ")); | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"s"]) | 
|  | ///     .case_insensitive(true) | 
|  | ///     .unicode(false) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// // Normally 'Å¿' is included when searching for 's' case | 
|  | /// // insensitively due to Unicode's simple case folding rules. But | 
|  | /// // when Unicode mode is disabled, only ASCII case insensitive rules | 
|  | /// // are used. | 
|  | /// assert!(!re.is_match("Å¿")); | 
|  | /// ``` | 
|  | pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { | 
|  | self.builder.unicode(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures whether to enable case insensitive matching for all | 
|  | /// of the patterns. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `i` | 
|  | /// in the pattern. For example, `(?i:foo)` matches `foo` case | 
|  | /// insensitively while `(?-i:foo)` matches `foo` case sensitively. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"foo(?-i:bar)quux"]) | 
|  | ///     .case_insensitive(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match("FoObarQuUx")); | 
|  | /// // Even though case insensitive matching is enabled in the builder, | 
|  | /// // it can be locally disabled within the pattern. In this case, | 
|  | /// // `bar` is matched case sensitively. | 
|  | /// assert!(!re.is_match("fooBARquux")); | 
|  | /// ``` | 
|  | pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexSetBuilder { | 
|  | self.builder.case_insensitive(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures multi-line mode for all of the patterns. | 
|  | /// | 
|  | /// Enabling multi-line mode changes the behavior of the `^` and `$` | 
|  | /// anchor assertions. Instead of only matching at the beginning and | 
|  | /// end of a haystack, respectively, multi-line mode causes them to | 
|  | /// match at the beginning and end of a line *in addition* to the | 
|  | /// beginning and end of a haystack. More precisely, `^` will match at | 
|  | /// the position immediately following a `\n` and `$` will match at the | 
|  | /// position immediately preceding a `\n`. | 
|  | /// | 
|  | /// The behavior of this option can be impacted by other settings too: | 
|  | /// | 
|  | /// * The [`RegexSetBuilder::line_terminator`] option changes `\n` | 
|  | /// above to any ASCII byte. | 
|  | /// * The [`RegexSetBuilder::crlf`] option changes the line terminator | 
|  | /// to be either `\r` or `\n`, but never at the position between a `\r` | 
|  | /// and `\n`. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `m` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"^foo$"]) | 
|  | ///     .multi_line(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match("\nfoo\n")); | 
|  | /// ``` | 
|  | pub fn multi_line(&mut self, yes: bool) -> &mut RegexSetBuilder { | 
|  | self.builder.multi_line(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures dot-matches-new-line mode for the entire pattern. | 
|  | /// | 
|  | /// Perhaps surprisingly, the default behavior for `.` is not to match | 
|  | /// any character, but rather, to match any character except for the | 
|  | /// line terminator (which is `\n` by default). When this mode is | 
|  | /// enabled, the behavior changes such that `.` truly matches any | 
|  | /// character. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `s` in | 
|  | /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent | 
|  | /// regexes. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"foo.bar"]) | 
|  | ///     .dot_matches_new_line(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = "foo\nbar"; | 
|  | /// assert!(re.is_match(hay)); | 
|  | /// ``` | 
|  | pub fn dot_matches_new_line( | 
|  | &mut self, | 
|  | yes: bool, | 
|  | ) -> &mut RegexSetBuilder { | 
|  | self.builder.dot_matches_new_line(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures CRLF mode for all of the patterns. | 
|  | /// | 
|  | /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for | 
|  | /// short) and `\n` ("line feed" or LF for short) are treated as line | 
|  | /// terminators. This results in the following: | 
|  | /// | 
|  | /// * Unless dot-matches-new-line mode is enabled, `.` will now match | 
|  | /// any character except for `\n` and `\r`. | 
|  | /// * When multi-line mode is enabled, `^` will match immediately | 
|  | /// following a `\n` or a `\r`. Similarly, `$` will match immediately | 
|  | /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match | 
|  | /// between `\r` and `\n`. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `R` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"^foo$"]) | 
|  | ///     .multi_line(true) | 
|  | ///     .crlf(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = "\r\nfoo\r\n"; | 
|  | /// // If CRLF mode weren't enabled here, then '$' wouldn't match | 
|  | /// // immediately after 'foo', and thus no match would be found. | 
|  | /// assert!(re.is_match(hay)); | 
|  | /// ``` | 
|  | /// | 
|  | /// This example demonstrates that `^` will never match at a position | 
|  | /// between `\r` and `\n`. (`$` will similarly not match between a `\r` | 
|  | /// and a `\n`.) | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"^\n"]) | 
|  | ///     .multi_line(true) | 
|  | ///     .crlf(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(!re.is_match("\r\n")); | 
|  | /// ``` | 
|  | pub fn crlf(&mut self, yes: bool) -> &mut RegexSetBuilder { | 
|  | self.builder.crlf(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Configures the line terminator to be used by the regex. | 
|  | /// | 
|  | /// The line terminator is relevant in two ways for a particular regex: | 
|  | /// | 
|  | /// * When dot-matches-new-line mode is *not* enabled (the default), | 
|  | /// then `.` will match any character except for the configured line | 
|  | /// terminator. | 
|  | /// * When multi-line mode is enabled (not the default), then `^` and | 
|  | /// `$` will match immediately after and before, respectively, a line | 
|  | /// terminator. | 
|  | /// | 
|  | /// In both cases, if CRLF mode is enabled in a particular context, | 
|  | /// then it takes precedence over any configured line terminator. | 
|  | /// | 
|  | /// This option cannot be configured from within the pattern. | 
|  | /// | 
|  | /// The default line terminator is `\n`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// This shows how to treat the NUL byte as a line terminator. This can | 
|  | /// be a useful heuristic when searching binary data. | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"^foo$"]) | 
|  | ///     .multi_line(true) | 
|  | ///     .line_terminator(b'\x00') | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = "\x00foo\x00"; | 
|  | /// assert!(re.is_match(hay)); | 
|  | /// ``` | 
|  | /// | 
|  | /// This example shows that the behavior of `.` is impacted by this | 
|  | /// setting as well: | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"."]) | 
|  | ///     .line_terminator(b'\x00') | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match("\n")); | 
|  | /// assert!(!re.is_match("\x00")); | 
|  | /// ``` | 
|  | /// | 
|  | /// This shows that building a regex will fail if the byte given | 
|  | /// is not ASCII and the pattern could result in matching invalid | 
|  | /// UTF-8. This is because any singular non-ASCII byte is not valid | 
|  | /// UTF-8, and it is not permitted for a [`RegexSet`] to match invalid | 
|  | /// UTF-8. (It is permissible to use a non-ASCII byte when building a | 
|  | /// [`bytes::RegexSet`](crate::bytes::RegexSet).) | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// assert!( | 
|  | ///     RegexSetBuilder::new([r"."]) | 
|  | ///         .line_terminator(0x80) | 
|  | ///         .build() | 
|  | ///         .is_err() | 
|  | /// ); | 
|  | /// // Note that using a non-ASCII byte isn't enough on its own to | 
|  | /// // cause regex compilation to fail. You actually have to make use | 
|  | /// // of it in the regex in a way that leads to matching invalid | 
|  | /// // UTF-8. If you don't, then regex compilation will succeed! | 
|  | /// assert!( | 
|  | ///     RegexSetBuilder::new([r"a"]) | 
|  | ///         .line_terminator(0x80) | 
|  | ///         .build() | 
|  | ///         .is_ok() | 
|  | /// ); | 
|  | /// ``` | 
|  | pub fn line_terminator(&mut self, byte: u8) -> &mut RegexSetBuilder { | 
|  | self.builder.line_terminator(byte); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures swap-greed mode for all of the patterns. | 
|  | /// | 
|  | /// When swap-greed mode is enabled, patterns like `a+` will become | 
|  | /// non-greedy and patterns like `a+?` will become greedy. In other | 
|  | /// words, the meanings of `a+` and `a+?` are switched. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `U` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// Note that this is generally not useful for a `RegexSet` since a | 
|  | /// `RegexSet` can only report whether a pattern matches or not. Since | 
|  | /// greediness never impacts whether a match is found or not (only the | 
|  | /// offsets of the match), it follows that whether parts of a pattern | 
|  | /// are greedy or not doesn't matter for a `RegexSet`. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | pub fn swap_greed(&mut self, yes: bool) -> &mut RegexSetBuilder { | 
|  | self.builder.swap_greed(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures verbose mode for all of the patterns. | 
|  | /// | 
|  | /// When enabled, whitespace will treated as insignifcant in the | 
|  | /// pattern and `#` can be used to start a comment until the next new | 
|  | /// line. | 
|  | /// | 
|  | /// Normally, in most places in a pattern, whitespace is treated | 
|  | /// literally. For example ` +` will match one or more ASCII whitespace | 
|  | /// characters. | 
|  | /// | 
|  | /// When verbose mode is enabled, `\#` can be used to match a literal | 
|  | /// `#` and `\ ` can be used to match a literal ASCII whitespace | 
|  | /// character. | 
|  | /// | 
|  | /// Verbose mode is useful for permitting regexes to be formatted and | 
|  | /// broken up more nicely. This may make them more easily readable. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `x` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// let pat = r" | 
|  | ///     \b | 
|  | ///     (?<first>\p{Uppercase}\w*)  # always start with uppercase letter | 
|  | ///     [\s--\n]+                   # whitespace should separate names | 
|  | ///     (?: # middle name can be an initial! | 
|  | ///         (?:(?<initial>\p{Uppercase})\.|(?<middle>\p{Uppercase}\w*)) | 
|  | ///         [\s--\n]+ | 
|  | ///     )? | 
|  | ///     (?<last>\p{Uppercase}\w*) | 
|  | ///     \b | 
|  | /// "; | 
|  | /// let re = RegexSetBuilder::new([pat]) | 
|  | ///     .ignore_whitespace(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match("Harry Potter")); | 
|  | /// assert!(re.is_match("Harry J. Potter")); | 
|  | /// assert!(re.is_match("Harry James Potter")); | 
|  | /// assert!(!re.is_match("harry J. Potter")); | 
|  | /// ``` | 
|  | pub fn ignore_whitespace( | 
|  | &mut self, | 
|  | yes: bool, | 
|  | ) -> &mut RegexSetBuilder { | 
|  | self.builder.ignore_whitespace(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures octal mode for all of the patterns. | 
|  | /// | 
|  | /// Octal syntax is a little-known way of uttering Unicode codepoints | 
|  | /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all | 
|  | /// equivalent patterns, where the last example shows octal syntax. | 
|  | /// | 
|  | /// While supporting octal syntax isn't in and of itself a problem, | 
|  | /// it does make good error messages harder. That is, in PCRE based | 
|  | /// regex engines, syntax like `\1` invokes a backreference, which is | 
|  | /// explicitly unsupported this library. However, many users expect | 
|  | /// backreferences to be supported. Therefore, when octal support | 
|  | /// is disabled, the error message will explicitly mention that | 
|  | /// backreferences aren't supported. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// // Normally this pattern would not compile, with an error message | 
|  | /// // about backreferences not being supported. But with octal mode | 
|  | /// // enabled, octal escape sequences work. | 
|  | /// let re = RegexSetBuilder::new([r"\141"]) | 
|  | ///     .octal(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match("a")); | 
|  | /// ``` | 
|  | pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { | 
|  | self.builder.octal(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Sets the approximate size limit, in bytes, of the compiled regex. | 
|  | /// | 
|  | /// This roughly corresponds to the number of heap memory, in | 
|  | /// bytes, occupied by a single regex. If the regex would otherwise | 
|  | /// approximately exceed this limit, then compiling that regex will | 
|  | /// fail. | 
|  | /// | 
|  | /// The main utility of a method like this is to avoid compiling | 
|  | /// regexes that use an unexpected amount of resources, such as | 
|  | /// time and memory. Even if the memory usage of a large regex is | 
|  | /// acceptable, its search time may not be. Namely, worst case time | 
|  | /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and | 
|  | /// `n ~ len(haystack)`. That is, search time depends, in part, on the | 
|  | /// size of the compiled regex. This means that putting a limit on the | 
|  | /// size of the regex limits how much a regex can impact search time. | 
|  | /// | 
|  | /// For more information about regex size limits, see the section on | 
|  | /// [untrusted inputs](crate#untrusted-input) in the top-level crate | 
|  | /// documentation. | 
|  | /// | 
|  | /// The default for this is some reasonable number that permits most | 
|  | /// patterns to compile successfully. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041 | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// // It may surprise you how big some seemingly small patterns can | 
|  | /// // be! Since \w is Unicode aware, this generates a regex that can | 
|  | /// // match approximately 140,000 distinct codepoints. | 
|  | /// assert!( | 
|  | ///     RegexSetBuilder::new([r"\w"]) | 
|  | ///         .size_limit(45_000) | 
|  | ///         .build() | 
|  | ///         .is_err() | 
|  | /// ); | 
|  | /// ``` | 
|  | pub fn size_limit(&mut self, bytes: usize) -> &mut RegexSetBuilder { | 
|  | self.builder.size_limit(bytes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Set the approximate capacity, in bytes, of the cache of transitions | 
|  | /// used by the lazy DFA. | 
|  | /// | 
|  | /// While the lazy DFA isn't always used, in tends to be the most | 
|  | /// commonly use regex engine in default configurations. It tends to | 
|  | /// adopt the performance profile of a fully build DFA, but without the | 
|  | /// downside of taking worst case exponential time to build. | 
|  | /// | 
|  | /// The downside is that it needs to keep a cache of transitions and | 
|  | /// states that are built while running a search, and this cache | 
|  | /// can fill up. When it fills up, the cache will reset itself. Any | 
|  | /// previously generated states and transitions will then need to be | 
|  | /// re-generated. If this happens too many times, then this library | 
|  | /// will bail out of using the lazy DFA and switch to a different regex | 
|  | /// engine. | 
|  | /// | 
|  | /// If your regex provokes this particular downside of the lazy DFA, | 
|  | /// then it may be beneficial to increase its cache capacity. This will | 
|  | /// potentially reduce the frequency of cache resetting (ideally to | 
|  | /// `0`). While it won't fix all potential performance problems with | 
|  | /// the lazy DFA, increasing the cache capacity does fix some. | 
|  | /// | 
|  | /// There is no easy way to determine, a priori, whether increasing | 
|  | /// this cache capacity will help. In general, the larger your regex, | 
|  | /// the more cache it's likely to use. But that isn't an ironclad rule. | 
|  | /// For example, a regex like `[01]*1[01]{N}` would normally produce a | 
|  | /// fully build DFA that is exponential in size with respect to `N`. | 
|  | /// The lazy DFA will prevent exponential space blow-up, but it cache | 
|  | /// is likely to fill up, even when it's large and even for smallish | 
|  | /// values of `N`. | 
|  | /// | 
|  | /// If you aren't sure whether this helps or not, it is sensible to | 
|  | /// set this to some arbitrarily large number in testing, such as | 
|  | /// `usize::MAX`. Namely, this represents the amount of capacity that | 
|  | /// *may* be used. It's probably not a good idea to use `usize::MAX` in | 
|  | /// production though, since it implies there are no controls on heap | 
|  | /// memory used by this library during a search. In effect, set it to | 
|  | /// whatever you're willing to allocate for a single regex search. | 
|  | pub fn dfa_size_limit( | 
|  | &mut self, | 
|  | bytes: usize, | 
|  | ) -> &mut RegexSetBuilder { | 
|  | self.builder.dfa_size_limit(bytes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Set the nesting limit for this parser. | 
|  | /// | 
|  | /// The nesting limit controls how deep the abstract syntax tree is | 
|  | /// allowed to be. If the AST exceeds the given limit (e.g., with too | 
|  | /// many nested groups), then an error is returned by the parser. | 
|  | /// | 
|  | /// The purpose of this limit is to act as a heuristic to prevent stack | 
|  | /// overflow for consumers that do structural induction on an AST using | 
|  | /// explicit recursion. While this crate never does this (instead using | 
|  | /// constant stack space and moving the call stack to the heap), other | 
|  | /// crates may. | 
|  | /// | 
|  | /// This limit is not checked until the entire AST is parsed. | 
|  | /// Therefore, if callers want to put a limit on the amount of heap | 
|  | /// space used, then they should impose a limit on the length, in | 
|  | /// bytes, of the concrete pattern string. In particular, this is | 
|  | /// viable since this parser implementation will limit itself to heap | 
|  | /// space proportional to the length of the pattern string. See also | 
|  | /// the [untrusted inputs](crate#untrusted-input) section in the | 
|  | /// top-level crate documentation for more information about this. | 
|  | /// | 
|  | /// Note that a nest limit of `0` will return a nest limit error for | 
|  | /// most patterns but not all. For example, a nest limit of `0` permits | 
|  | /// `a` but not `ab`, since `ab` requires an explicit concatenation, | 
|  | /// which results in a nest depth of `1`. In general, a nest limit is | 
|  | /// not something that manifests in an obvious way in the concrete | 
|  | /// syntax, therefore, it should not be used in a granular way. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::RegexSetBuilder; | 
|  | /// | 
|  | /// assert!(RegexSetBuilder::new([r"a"]).nest_limit(0).build().is_ok()); | 
|  | /// assert!(RegexSetBuilder::new([r"ab"]).nest_limit(0).build().is_err()); | 
|  | /// ``` | 
|  | pub fn nest_limit(&mut self, limit: u32) -> &mut RegexSetBuilder { | 
|  | self.builder.nest_limit(limit); | 
|  | self | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | pub(crate) mod bytes { | 
|  | use crate::{ | 
|  | bytes::{Regex, RegexSet}, | 
|  | error::Error, | 
|  | }; | 
|  |  | 
|  | use super::Builder; | 
|  |  | 
|  | /// A configurable builder for a [`Regex`]. | 
|  | /// | 
|  | /// This builder can be used to programmatically set flags such as `i` | 
|  | /// (case insensitive) and `x` (for verbose mode). This builder can also be | 
|  | /// used to configure things like the line terminator and a size limit on | 
|  | /// the compiled regular expression. | 
|  | #[derive(Clone, Debug)] | 
|  | pub struct RegexBuilder { | 
|  | builder: Builder, | 
|  | } | 
|  |  | 
|  | impl RegexBuilder { | 
|  | /// Create a new builder with a default configuration for the given | 
|  | /// pattern. | 
|  | /// | 
|  | /// If the pattern is invalid or exceeds the configured size limits, | 
|  | /// then an error will be returned when [`RegexBuilder::build`] is | 
|  | /// called. | 
|  | pub fn new(pattern: &str) -> RegexBuilder { | 
|  | RegexBuilder { builder: Builder::new([pattern]) } | 
|  | } | 
|  |  | 
|  | /// Compiles the pattern given to `RegexBuilder::new` with the | 
|  | /// configuration set on this builder. | 
|  | /// | 
|  | /// If the pattern isn't a valid regex or if a configured size limit | 
|  | /// was exceeded, then an error is returned. | 
|  | pub fn build(&self) -> Result<Regex, Error> { | 
|  | self.builder.build_one_bytes() | 
|  | } | 
|  |  | 
|  | /// This configures Unicode mode for the entire pattern. | 
|  | /// | 
|  | /// Enabling Unicode mode does a number of things: | 
|  | /// | 
|  | /// * Most fundamentally, it causes the fundamental atom of matching | 
|  | /// to be a single codepoint. When Unicode mode is disabled, it's a | 
|  | /// single byte. For example, when Unicode mode is enabled, `.` will | 
|  | /// match `💩` once, where as it will match 4 times when Unicode mode | 
|  | /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) | 
|  | /// * Case insensitive matching uses Unicode simple case folding rules. | 
|  | /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are | 
|  | /// available. | 
|  | /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and | 
|  | /// `\d`. | 
|  | /// * The word boundary assertions, `\b` and `\B`, use the Unicode | 
|  | /// definition of a word character. | 
|  | /// | 
|  | /// Note that unlike the top-level `Regex` for searching `&str`, it | 
|  | /// is permitted to disable Unicode mode even if the resulting pattern | 
|  | /// could match invalid UTF-8. For example, `(?-u:.)` is not a valid | 
|  | /// pattern for a top-level `Regex`, but is valid for a `bytes::Regex`. | 
|  | /// | 
|  | /// For more details on the Unicode support in this crate, see the | 
|  | /// [Unicode section](crate#unicode) in this crate's top-level | 
|  | /// documentation. | 
|  | /// | 
|  | /// The default for this is `true`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"\w") | 
|  | ///     .unicode(false) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// // Normally greek letters would be included in \w, but since | 
|  | /// // Unicode mode is disabled, it only matches ASCII letters. | 
|  | /// assert!(!re.is_match("δ".as_bytes())); | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"s") | 
|  | ///     .case_insensitive(true) | 
|  | ///     .unicode(false) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// // Normally 'Å¿' is included when searching for 's' case | 
|  | /// // insensitively due to Unicode's simple case folding rules. But | 
|  | /// // when Unicode mode is disabled, only ASCII case insensitive rules | 
|  | /// // are used. | 
|  | /// assert!(!re.is_match("Å¿".as_bytes())); | 
|  | /// ``` | 
|  | /// | 
|  | /// Since this builder is for constructing a [`bytes::Regex`](Regex), | 
|  | /// one can disable Unicode mode even if it would match invalid UTF-8: | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r".") | 
|  | ///     .unicode(false) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// // Normally greek letters would be included in \w, but since | 
|  | /// // Unicode mode is disabled, it only matches ASCII letters. | 
|  | /// assert!(re.is_match(b"\xFF")); | 
|  | /// ``` | 
|  | pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.unicode(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures whether to enable case insensitive matching for the | 
|  | /// entire pattern. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `i` | 
|  | /// in the pattern. For example, `(?i:foo)` matches `foo` case | 
|  | /// insensitively while `(?-i:foo)` matches `foo` case sensitively. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"foo(?-i:bar)quux") | 
|  | ///     .case_insensitive(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match(b"FoObarQuUx")); | 
|  | /// // Even though case insensitive matching is enabled in the builder, | 
|  | /// // it can be locally disabled within the pattern. In this case, | 
|  | /// // `bar` is matched case sensitively. | 
|  | /// assert!(!re.is_match(b"fooBARquux")); | 
|  | /// ``` | 
|  | pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.case_insensitive(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures multi-line mode for the entire pattern. | 
|  | /// | 
|  | /// Enabling multi-line mode changes the behavior of the `^` and `$` | 
|  | /// anchor assertions. Instead of only matching at the beginning and | 
|  | /// end of a haystack, respectively, multi-line mode causes them to | 
|  | /// match at the beginning and end of a line *in addition* to the | 
|  | /// beginning and end of a haystack. More precisely, `^` will match at | 
|  | /// the position immediately following a `\n` and `$` will match at the | 
|  | /// position immediately preceding a `\n`. | 
|  | /// | 
|  | /// The behavior of this option can be impacted by other settings too: | 
|  | /// | 
|  | /// * The [`RegexBuilder::line_terminator`] option changes `\n` above | 
|  | /// to any ASCII byte. | 
|  | /// * The [`RegexBuilder::crlf`] option changes the line terminator to | 
|  | /// be either `\r` or `\n`, but never at the position between a `\r` | 
|  | /// and `\n`. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `m` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"^foo$") | 
|  | ///     .multi_line(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert_eq!(Some(1..4), re.find(b"\nfoo\n").map(|m| m.range())); | 
|  | /// ``` | 
|  | pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.multi_line(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures dot-matches-new-line mode for the entire pattern. | 
|  | /// | 
|  | /// Perhaps surprisingly, the default behavior for `.` is not to match | 
|  | /// any character, but rather, to match any character except for the | 
|  | /// line terminator (which is `\n` by default). When this mode is | 
|  | /// enabled, the behavior changes such that `.` truly matches any | 
|  | /// character. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `s` in | 
|  | /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent | 
|  | /// regexes. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"foo.bar") | 
|  | ///     .dot_matches_new_line(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = b"foo\nbar"; | 
|  | /// assert_eq!(Some(&b"foo\nbar"[..]), re.find(hay).map(|m| m.as_bytes())); | 
|  | /// ``` | 
|  | pub fn dot_matches_new_line( | 
|  | &mut self, | 
|  | yes: bool, | 
|  | ) -> &mut RegexBuilder { | 
|  | self.builder.dot_matches_new_line(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures CRLF mode for the entire pattern. | 
|  | /// | 
|  | /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for | 
|  | /// short) and `\n` ("line feed" or LF for short) are treated as line | 
|  | /// terminators. This results in the following: | 
|  | /// | 
|  | /// * Unless dot-matches-new-line mode is enabled, `.` will now match | 
|  | /// any character except for `\n` and `\r`. | 
|  | /// * When multi-line mode is enabled, `^` will match immediately | 
|  | /// following a `\n` or a `\r`. Similarly, `$` will match immediately | 
|  | /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match | 
|  | /// between `\r` and `\n`. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `R` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"^foo$") | 
|  | ///     .multi_line(true) | 
|  | ///     .crlf(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = b"\r\nfoo\r\n"; | 
|  | /// // If CRLF mode weren't enabled here, then '$' wouldn't match | 
|  | /// // immediately after 'foo', and thus no match would be found. | 
|  | /// assert_eq!(Some(&b"foo"[..]), re.find(hay).map(|m| m.as_bytes())); | 
|  | /// ``` | 
|  | /// | 
|  | /// This example demonstrates that `^` will never match at a position | 
|  | /// between `\r` and `\n`. (`$` will similarly not match between a `\r` | 
|  | /// and a `\n`.) | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"^") | 
|  | ///     .multi_line(true) | 
|  | ///     .crlf(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = b"\r\n\r\n"; | 
|  | /// let ranges: Vec<_> = re.find_iter(hay).map(|m| m.range()).collect(); | 
|  | /// assert_eq!(ranges, vec![0..0, 2..2, 4..4]); | 
|  | /// ``` | 
|  | pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.crlf(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Configures the line terminator to be used by the regex. | 
|  | /// | 
|  | /// The line terminator is relevant in two ways for a particular regex: | 
|  | /// | 
|  | /// * When dot-matches-new-line mode is *not* enabled (the default), | 
|  | /// then `.` will match any character except for the configured line | 
|  | /// terminator. | 
|  | /// * When multi-line mode is enabled (not the default), then `^` and | 
|  | /// `$` will match immediately after and before, respectively, a line | 
|  | /// terminator. | 
|  | /// | 
|  | /// In both cases, if CRLF mode is enabled in a particular context, | 
|  | /// then it takes precedence over any configured line terminator. | 
|  | /// | 
|  | /// This option cannot be configured from within the pattern. | 
|  | /// | 
|  | /// The default line terminator is `\n`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// This shows how to treat the NUL byte as a line terminator. This can | 
|  | /// be a useful heuristic when searching binary data. | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"^foo$") | 
|  | ///     .multi_line(true) | 
|  | ///     .line_terminator(b'\x00') | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = b"\x00foo\x00"; | 
|  | /// assert_eq!(Some(1..4), re.find(hay).map(|m| m.range())); | 
|  | /// ``` | 
|  | /// | 
|  | /// This example shows that the behavior of `.` is impacted by this | 
|  | /// setting as well: | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r".") | 
|  | ///     .line_terminator(b'\x00') | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match(b"\n")); | 
|  | /// assert!(!re.is_match(b"\x00")); | 
|  | /// ``` | 
|  | /// | 
|  | /// This shows that building a regex will work even when the byte | 
|  | /// given is not ASCII. This is unlike the top-level `Regex` API where | 
|  | /// matching invalid UTF-8 is not allowed. | 
|  | /// | 
|  | /// Note though that you must disable Unicode mode. This is required | 
|  | /// because Unicode mode requires matching one codepoint at a time, | 
|  | /// and there is no way to match a non-ASCII byte as if it were a | 
|  | /// codepoint. | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// assert!( | 
|  | ///     RegexBuilder::new(r".") | 
|  | ///         .unicode(false) | 
|  | ///         .line_terminator(0x80) | 
|  | ///         .build() | 
|  | ///         .is_ok(), | 
|  | /// ); | 
|  | /// ``` | 
|  | pub fn line_terminator(&mut self, byte: u8) -> &mut RegexBuilder { | 
|  | self.builder.line_terminator(byte); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures swap-greed mode for the entire pattern. | 
|  | /// | 
|  | /// When swap-greed mode is enabled, patterns like `a+` will become | 
|  | /// non-greedy and patterns like `a+?` will become greedy. In other | 
|  | /// words, the meanings of `a+` and `a+?` are switched. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `U` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// let re = RegexBuilder::new(r"a+") | 
|  | ///     .swap_greed(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert_eq!(Some(&b"a"[..]), re.find(b"aaa").map(|m| m.as_bytes())); | 
|  | /// ``` | 
|  | pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.swap_greed(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures verbose mode for the entire pattern. | 
|  | /// | 
|  | /// When enabled, whitespace will treated as insignifcant in the | 
|  | /// pattern and `#` can be used to start a comment until the next new | 
|  | /// line. | 
|  | /// | 
|  | /// Normally, in most places in a pattern, whitespace is treated | 
|  | /// literally. For example ` +` will match one or more ASCII whitespace | 
|  | /// characters. | 
|  | /// | 
|  | /// When verbose mode is enabled, `\#` can be used to match a literal | 
|  | /// `#` and `\ ` can be used to match a literal ASCII whitespace | 
|  | /// character. | 
|  | /// | 
|  | /// Verbose mode is useful for permitting regexes to be formatted and | 
|  | /// broken up more nicely. This may make them more easily readable. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `x` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// let pat = r" | 
|  | ///     \b | 
|  | ///     (?<first>\p{Uppercase}\w*)  # always start with uppercase letter | 
|  | ///     [\s--\n]+                   # whitespace should separate names | 
|  | ///     (?: # middle name can be an initial! | 
|  | ///         (?:(?<initial>\p{Uppercase})\.|(?<middle>\p{Uppercase}\w*)) | 
|  | ///         [\s--\n]+ | 
|  | ///     )? | 
|  | ///     (?<last>\p{Uppercase}\w*) | 
|  | ///     \b | 
|  | /// "; | 
|  | /// let re = RegexBuilder::new(pat) | 
|  | ///     .ignore_whitespace(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// | 
|  | /// let caps = re.captures(b"Harry Potter").unwrap(); | 
|  | /// assert_eq!(&b"Harry"[..], &caps["first"]); | 
|  | /// assert_eq!(&b"Potter"[..], &caps["last"]); | 
|  | /// | 
|  | /// let caps = re.captures(b"Harry J. Potter").unwrap(); | 
|  | /// assert_eq!(&b"Harry"[..], &caps["first"]); | 
|  | /// // Since a middle name/initial isn't required for an overall match, | 
|  | /// // we can't assume that 'initial' or 'middle' will be populated! | 
|  | /// assert_eq!( | 
|  | ///     Some(&b"J"[..]), | 
|  | ///     caps.name("initial").map(|m| m.as_bytes()), | 
|  | /// ); | 
|  | /// assert_eq!(None, caps.name("middle").map(|m| m.as_bytes())); | 
|  | /// assert_eq!(&b"Potter"[..], &caps["last"]); | 
|  | /// | 
|  | /// let caps = re.captures(b"Harry James Potter").unwrap(); | 
|  | /// assert_eq!(&b"Harry"[..], &caps["first"]); | 
|  | /// // Since a middle name/initial isn't required for an overall match, | 
|  | /// // we can't assume that 'initial' or 'middle' will be populated! | 
|  | /// assert_eq!(None, caps.name("initial").map(|m| m.as_bytes())); | 
|  | /// assert_eq!( | 
|  | ///     Some(&b"James"[..]), | 
|  | ///     caps.name("middle").map(|m| m.as_bytes()), | 
|  | /// ); | 
|  | /// assert_eq!(&b"Potter"[..], &caps["last"]); | 
|  | /// ``` | 
|  | pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.ignore_whitespace(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures octal mode for the entire pattern. | 
|  | /// | 
|  | /// Octal syntax is a little-known way of uttering Unicode codepoints | 
|  | /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all | 
|  | /// equivalent patterns, where the last example shows octal syntax. | 
|  | /// | 
|  | /// While supporting octal syntax isn't in and of itself a problem, | 
|  | /// it does make good error messages harder. That is, in PCRE based | 
|  | /// regex engines, syntax like `\1` invokes a backreference, which is | 
|  | /// explicitly unsupported this library. However, many users expect | 
|  | /// backreferences to be supported. Therefore, when octal support | 
|  | /// is disabled, the error message will explicitly mention that | 
|  | /// backreferences aren't supported. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// // Normally this pattern would not compile, with an error message | 
|  | /// // about backreferences not being supported. But with octal mode | 
|  | /// // enabled, octal escape sequences work. | 
|  | /// let re = RegexBuilder::new(r"\141") | 
|  | ///     .octal(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match(b"a")); | 
|  | /// ``` | 
|  | pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { | 
|  | self.builder.octal(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Sets the approximate size limit, in bytes, of the compiled regex. | 
|  | /// | 
|  | /// This roughly corresponds to the number of heap memory, in | 
|  | /// bytes, occupied by a single regex. If the regex would otherwise | 
|  | /// approximately exceed this limit, then compiling that regex will | 
|  | /// fail. | 
|  | /// | 
|  | /// The main utility of a method like this is to avoid compiling | 
|  | /// regexes that use an unexpected amount of resources, such as | 
|  | /// time and memory. Even if the memory usage of a large regex is | 
|  | /// acceptable, its search time may not be. Namely, worst case time | 
|  | /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and | 
|  | /// `n ~ len(haystack)`. That is, search time depends, in part, on the | 
|  | /// size of the compiled regex. This means that putting a limit on the | 
|  | /// size of the regex limits how much a regex can impact search time. | 
|  | /// | 
|  | /// For more information about regex size limits, see the section on | 
|  | /// [untrusted inputs](crate#untrusted-input) in the top-level crate | 
|  | /// documentation. | 
|  | /// | 
|  | /// The default for this is some reasonable number that permits most | 
|  | /// patterns to compile successfully. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041 | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// // It may surprise you how big some seemingly small patterns can | 
|  | /// // be! Since \w is Unicode aware, this generates a regex that can | 
|  | /// // match approximately 140,000 distinct codepoints. | 
|  | /// assert!(RegexBuilder::new(r"\w").size_limit(45_000).build().is_err()); | 
|  | /// ``` | 
|  | pub fn size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { | 
|  | self.builder.size_limit(bytes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Set the approximate capacity, in bytes, of the cache of transitions | 
|  | /// used by the lazy DFA. | 
|  | /// | 
|  | /// While the lazy DFA isn't always used, in tends to be the most | 
|  | /// commonly use regex engine in default configurations. It tends to | 
|  | /// adopt the performance profile of a fully build DFA, but without the | 
|  | /// downside of taking worst case exponential time to build. | 
|  | /// | 
|  | /// The downside is that it needs to keep a cache of transitions and | 
|  | /// states that are built while running a search, and this cache | 
|  | /// can fill up. When it fills up, the cache will reset itself. Any | 
|  | /// previously generated states and transitions will then need to be | 
|  | /// re-generated. If this happens too many times, then this library | 
|  | /// will bail out of using the lazy DFA and switch to a different regex | 
|  | /// engine. | 
|  | /// | 
|  | /// If your regex provokes this particular downside of the lazy DFA, | 
|  | /// then it may be beneficial to increase its cache capacity. This will | 
|  | /// potentially reduce the frequency of cache resetting (ideally to | 
|  | /// `0`). While it won't fix all potential performance problems with | 
|  | /// the lazy DFA, increasing the cache capacity does fix some. | 
|  | /// | 
|  | /// There is no easy way to determine, a priori, whether increasing | 
|  | /// this cache capacity will help. In general, the larger your regex, | 
|  | /// the more cache it's likely to use. But that isn't an ironclad rule. | 
|  | /// For example, a regex like `[01]*1[01]{N}` would normally produce a | 
|  | /// fully build DFA that is exponential in size with respect to `N`. | 
|  | /// The lazy DFA will prevent exponential space blow-up, but it cache | 
|  | /// is likely to fill up, even when it's large and even for smallish | 
|  | /// values of `N`. | 
|  | /// | 
|  | /// If you aren't sure whether this helps or not, it is sensible to | 
|  | /// set this to some arbitrarily large number in testing, such as | 
|  | /// `usize::MAX`. Namely, this represents the amount of capacity that | 
|  | /// *may* be used. It's probably not a good idea to use `usize::MAX` in | 
|  | /// production though, since it implies there are no controls on heap | 
|  | /// memory used by this library during a search. In effect, set it to | 
|  | /// whatever you're willing to allocate for a single regex search. | 
|  | pub fn dfa_size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { | 
|  | self.builder.dfa_size_limit(bytes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Set the nesting limit for this parser. | 
|  | /// | 
|  | /// The nesting limit controls how deep the abstract syntax tree is | 
|  | /// allowed to be. If the AST exceeds the given limit (e.g., with too | 
|  | /// many nested groups), then an error is returned by the parser. | 
|  | /// | 
|  | /// The purpose of this limit is to act as a heuristic to prevent stack | 
|  | /// overflow for consumers that do structural induction on an AST using | 
|  | /// explicit recursion. While this crate never does this (instead using | 
|  | /// constant stack space and moving the call stack to the heap), other | 
|  | /// crates may. | 
|  | /// | 
|  | /// This limit is not checked until the entire AST is parsed. | 
|  | /// Therefore, if callers want to put a limit on the amount of heap | 
|  | /// space used, then they should impose a limit on the length, in | 
|  | /// bytes, of the concrete pattern string. In particular, this is | 
|  | /// viable since this parser implementation will limit itself to heap | 
|  | /// space proportional to the length of the pattern string. See also | 
|  | /// the [untrusted inputs](crate#untrusted-input) section in the | 
|  | /// top-level crate documentation for more information about this. | 
|  | /// | 
|  | /// Note that a nest limit of `0` will return a nest limit error for | 
|  | /// most patterns but not all. For example, a nest limit of `0` permits | 
|  | /// `a` but not `ab`, since `ab` requires an explicit concatenation, | 
|  | /// which results in a nest depth of `1`. In general, a nest limit is | 
|  | /// not something that manifests in an obvious way in the concrete | 
|  | /// syntax, therefore, it should not be used in a granular way. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexBuilder; | 
|  | /// | 
|  | /// assert!(RegexBuilder::new(r"a").nest_limit(0).build().is_ok()); | 
|  | /// assert!(RegexBuilder::new(r"ab").nest_limit(0).build().is_err()); | 
|  | /// ``` | 
|  | pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { | 
|  | self.builder.nest_limit(limit); | 
|  | self | 
|  | } | 
|  | } | 
|  |  | 
|  | /// A configurable builder for a [`RegexSet`]. | 
|  | /// | 
|  | /// This builder can be used to programmatically set flags such as `i` | 
|  | /// (case insensitive) and `x` (for verbose mode). This builder can also be | 
|  | /// used to configure things like the line terminator and a size limit on | 
|  | /// the compiled regular expression. | 
|  | #[derive(Clone, Debug)] | 
|  | pub struct RegexSetBuilder { | 
|  | builder: Builder, | 
|  | } | 
|  |  | 
|  | impl RegexSetBuilder { | 
|  | /// Create a new builder with a default configuration for the given | 
|  | /// patterns. | 
|  | /// | 
|  | /// If the patterns are invalid or exceed the configured size limits, | 
|  | /// then an error will be returned when [`RegexSetBuilder::build`] is | 
|  | /// called. | 
|  | pub fn new<I, S>(patterns: I) -> RegexSetBuilder | 
|  | where | 
|  | I: IntoIterator<Item = S>, | 
|  | S: AsRef<str>, | 
|  | { | 
|  | RegexSetBuilder { builder: Builder::new(patterns) } | 
|  | } | 
|  |  | 
|  | /// Compiles the patterns given to `RegexSetBuilder::new` with the | 
|  | /// configuration set on this builder. | 
|  | /// | 
|  | /// If the patterns aren't valid regexes or if a configured size limit | 
|  | /// was exceeded, then an error is returned. | 
|  | pub fn build(&self) -> Result<RegexSet, Error> { | 
|  | self.builder.build_many_bytes() | 
|  | } | 
|  |  | 
|  | /// This configures Unicode mode for the all of the patterns. | 
|  | /// | 
|  | /// Enabling Unicode mode does a number of things: | 
|  | /// | 
|  | /// * Most fundamentally, it causes the fundamental atom of matching | 
|  | /// to be a single codepoint. When Unicode mode is disabled, it's a | 
|  | /// single byte. For example, when Unicode mode is enabled, `.` will | 
|  | /// match `💩` once, where as it will match 4 times when Unicode mode | 
|  | /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) | 
|  | /// * Case insensitive matching uses Unicode simple case folding rules. | 
|  | /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are | 
|  | /// available. | 
|  | /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and | 
|  | /// `\d`. | 
|  | /// * The word boundary assertions, `\b` and `\B`, use the Unicode | 
|  | /// definition of a word character. | 
|  | /// | 
|  | /// Note that unlike the top-level `RegexSet` for searching `&str`, | 
|  | /// it is permitted to disable Unicode mode even if the resulting | 
|  | /// pattern could match invalid UTF-8. For example, `(?-u:.)` is not | 
|  | /// a valid pattern for a top-level `RegexSet`, but is valid for a | 
|  | /// `bytes::RegexSet`. | 
|  | /// | 
|  | /// For more details on the Unicode support in this crate, see the | 
|  | /// [Unicode section](crate#unicode) in this crate's top-level | 
|  | /// documentation. | 
|  | /// | 
|  | /// The default for this is `true`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"\w"]) | 
|  | ///     .unicode(false) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// // Normally greek letters would be included in \w, but since | 
|  | /// // Unicode mode is disabled, it only matches ASCII letters. | 
|  | /// assert!(!re.is_match("δ".as_bytes())); | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"s"]) | 
|  | ///     .case_insensitive(true) | 
|  | ///     .unicode(false) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// // Normally 'Å¿' is included when searching for 's' case | 
|  | /// // insensitively due to Unicode's simple case folding rules. But | 
|  | /// // when Unicode mode is disabled, only ASCII case insensitive rules | 
|  | /// // are used. | 
|  | /// assert!(!re.is_match("Å¿".as_bytes())); | 
|  | /// ``` | 
|  | /// | 
|  | /// Since this builder is for constructing a | 
|  | /// [`bytes::RegexSet`](RegexSet), one can disable Unicode mode even if | 
|  | /// it would match invalid UTF-8: | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"."]) | 
|  | ///     .unicode(false) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// // Normally greek letters would be included in \w, but since | 
|  | /// // Unicode mode is disabled, it only matches ASCII letters. | 
|  | /// assert!(re.is_match(b"\xFF")); | 
|  | /// ``` | 
|  | pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { | 
|  | self.builder.unicode(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures whether to enable case insensitive matching for all | 
|  | /// of the patterns. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `i` | 
|  | /// in the pattern. For example, `(?i:foo)` matches `foo` case | 
|  | /// insensitively while `(?-i:foo)` matches `foo` case sensitively. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"foo(?-i:bar)quux"]) | 
|  | ///     .case_insensitive(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match(b"FoObarQuUx")); | 
|  | /// // Even though case insensitive matching is enabled in the builder, | 
|  | /// // it can be locally disabled within the pattern. In this case, | 
|  | /// // `bar` is matched case sensitively. | 
|  | /// assert!(!re.is_match(b"fooBARquux")); | 
|  | /// ``` | 
|  | pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexSetBuilder { | 
|  | self.builder.case_insensitive(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures multi-line mode for all of the patterns. | 
|  | /// | 
|  | /// Enabling multi-line mode changes the behavior of the `^` and `$` | 
|  | /// anchor assertions. Instead of only matching at the beginning and | 
|  | /// end of a haystack, respectively, multi-line mode causes them to | 
|  | /// match at the beginning and end of a line *in addition* to the | 
|  | /// beginning and end of a haystack. More precisely, `^` will match at | 
|  | /// the position immediately following a `\n` and `$` will match at the | 
|  | /// position immediately preceding a `\n`. | 
|  | /// | 
|  | /// The behavior of this option can be impacted by other settings too: | 
|  | /// | 
|  | /// * The [`RegexSetBuilder::line_terminator`] option changes `\n` | 
|  | /// above to any ASCII byte. | 
|  | /// * The [`RegexSetBuilder::crlf`] option changes the line terminator | 
|  | /// to be either `\r` or `\n`, but never at the position between a `\r` | 
|  | /// and `\n`. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `m` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"^foo$"]) | 
|  | ///     .multi_line(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match(b"\nfoo\n")); | 
|  | /// ``` | 
|  | pub fn multi_line(&mut self, yes: bool) -> &mut RegexSetBuilder { | 
|  | self.builder.multi_line(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures dot-matches-new-line mode for the entire pattern. | 
|  | /// | 
|  | /// Perhaps surprisingly, the default behavior for `.` is not to match | 
|  | /// any character, but rather, to match any character except for the | 
|  | /// line terminator (which is `\n` by default). When this mode is | 
|  | /// enabled, the behavior changes such that `.` truly matches any | 
|  | /// character. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `s` in | 
|  | /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent | 
|  | /// regexes. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"foo.bar"]) | 
|  | ///     .dot_matches_new_line(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = b"foo\nbar"; | 
|  | /// assert!(re.is_match(hay)); | 
|  | /// ``` | 
|  | pub fn dot_matches_new_line( | 
|  | &mut self, | 
|  | yes: bool, | 
|  | ) -> &mut RegexSetBuilder { | 
|  | self.builder.dot_matches_new_line(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures CRLF mode for all of the patterns. | 
|  | /// | 
|  | /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for | 
|  | /// short) and `\n` ("line feed" or LF for short) are treated as line | 
|  | /// terminators. This results in the following: | 
|  | /// | 
|  | /// * Unless dot-matches-new-line mode is enabled, `.` will now match | 
|  | /// any character except for `\n` and `\r`. | 
|  | /// * When multi-line mode is enabled, `^` will match immediately | 
|  | /// following a `\n` or a `\r`. Similarly, `$` will match immediately | 
|  | /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match | 
|  | /// between `\r` and `\n`. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `R` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"^foo$"]) | 
|  | ///     .multi_line(true) | 
|  | ///     .crlf(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = b"\r\nfoo\r\n"; | 
|  | /// // If CRLF mode weren't enabled here, then '$' wouldn't match | 
|  | /// // immediately after 'foo', and thus no match would be found. | 
|  | /// assert!(re.is_match(hay)); | 
|  | /// ``` | 
|  | /// | 
|  | /// This example demonstrates that `^` will never match at a position | 
|  | /// between `\r` and `\n`. (`$` will similarly not match between a `\r` | 
|  | /// and a `\n`.) | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"^\n"]) | 
|  | ///     .multi_line(true) | 
|  | ///     .crlf(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(!re.is_match(b"\r\n")); | 
|  | /// ``` | 
|  | pub fn crlf(&mut self, yes: bool) -> &mut RegexSetBuilder { | 
|  | self.builder.crlf(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Configures the line terminator to be used by the regex. | 
|  | /// | 
|  | /// The line terminator is relevant in two ways for a particular regex: | 
|  | /// | 
|  | /// * When dot-matches-new-line mode is *not* enabled (the default), | 
|  | /// then `.` will match any character except for the configured line | 
|  | /// terminator. | 
|  | /// * When multi-line mode is enabled (not the default), then `^` and | 
|  | /// `$` will match immediately after and before, respectively, a line | 
|  | /// terminator. | 
|  | /// | 
|  | /// In both cases, if CRLF mode is enabled in a particular context, | 
|  | /// then it takes precedence over any configured line terminator. | 
|  | /// | 
|  | /// This option cannot be configured from within the pattern. | 
|  | /// | 
|  | /// The default line terminator is `\n`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// This shows how to treat the NUL byte as a line terminator. This can | 
|  | /// be a useful heuristic when searching binary data. | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"^foo$"]) | 
|  | ///     .multi_line(true) | 
|  | ///     .line_terminator(b'\x00') | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// let hay = b"\x00foo\x00"; | 
|  | /// assert!(re.is_match(hay)); | 
|  | /// ``` | 
|  | /// | 
|  | /// This example shows that the behavior of `.` is impacted by this | 
|  | /// setting as well: | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// let re = RegexSetBuilder::new([r"."]) | 
|  | ///     .line_terminator(b'\x00') | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match(b"\n")); | 
|  | /// assert!(!re.is_match(b"\x00")); | 
|  | /// ``` | 
|  | /// | 
|  | /// This shows that building a regex will work even when the byte given | 
|  | /// is not ASCII. This is unlike the top-level `RegexSet` API where | 
|  | /// matching invalid UTF-8 is not allowed. | 
|  | /// | 
|  | /// Note though that you must disable Unicode mode. This is required | 
|  | /// because Unicode mode requires matching one codepoint at a time, | 
|  | /// and there is no way to match a non-ASCII byte as if it were a | 
|  | /// codepoint. | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// assert!( | 
|  | ///     RegexSetBuilder::new([r"."]) | 
|  | ///         .unicode(false) | 
|  | ///         .line_terminator(0x80) | 
|  | ///         .build() | 
|  | ///         .is_ok(), | 
|  | /// ); | 
|  | /// ``` | 
|  | pub fn line_terminator(&mut self, byte: u8) -> &mut RegexSetBuilder { | 
|  | self.builder.line_terminator(byte); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures swap-greed mode for all of the patterns. | 
|  | /// | 
|  | /// When swap-greed mode is enabled, patterns like `a+` will become | 
|  | /// non-greedy and patterns like `a+?` will become greedy. In other | 
|  | /// words, the meanings of `a+` and `a+?` are switched. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `U` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// Note that this is generally not useful for a `RegexSet` since a | 
|  | /// `RegexSet` can only report whether a pattern matches or not. Since | 
|  | /// greediness never impacts whether a match is found or not (only the | 
|  | /// offsets of the match), it follows that whether parts of a pattern | 
|  | /// are greedy or not doesn't matter for a `RegexSet`. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | pub fn swap_greed(&mut self, yes: bool) -> &mut RegexSetBuilder { | 
|  | self.builder.swap_greed(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures verbose mode for all of the patterns. | 
|  | /// | 
|  | /// When enabled, whitespace will treated as insignifcant in the | 
|  | /// pattern and `#` can be used to start a comment until the next new | 
|  | /// line. | 
|  | /// | 
|  | /// Normally, in most places in a pattern, whitespace is treated | 
|  | /// literally. For example ` +` will match one or more ASCII whitespace | 
|  | /// characters. | 
|  | /// | 
|  | /// When verbose mode is enabled, `\#` can be used to match a literal | 
|  | /// `#` and `\ ` can be used to match a literal ASCII whitespace | 
|  | /// character. | 
|  | /// | 
|  | /// Verbose mode is useful for permitting regexes to be formatted and | 
|  | /// broken up more nicely. This may make them more easily readable. | 
|  | /// | 
|  | /// This setting can also be configured using the inline flag `x` in | 
|  | /// the pattern. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// let pat = r" | 
|  | ///     \b | 
|  | ///     (?<first>\p{Uppercase}\w*)  # always start with uppercase letter | 
|  | ///     [\s--\n]+                   # whitespace should separate names | 
|  | ///     (?: # middle name can be an initial! | 
|  | ///         (?:(?<initial>\p{Uppercase})\.|(?<middle>\p{Uppercase}\w*)) | 
|  | ///         [\s--\n]+ | 
|  | ///     )? | 
|  | ///     (?<last>\p{Uppercase}\w*) | 
|  | ///     \b | 
|  | /// "; | 
|  | /// let re = RegexSetBuilder::new([pat]) | 
|  | ///     .ignore_whitespace(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match(b"Harry Potter")); | 
|  | /// assert!(re.is_match(b"Harry J. Potter")); | 
|  | /// assert!(re.is_match(b"Harry James Potter")); | 
|  | /// assert!(!re.is_match(b"harry J. Potter")); | 
|  | /// ``` | 
|  | pub fn ignore_whitespace( | 
|  | &mut self, | 
|  | yes: bool, | 
|  | ) -> &mut RegexSetBuilder { | 
|  | self.builder.ignore_whitespace(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// This configures octal mode for all of the patterns. | 
|  | /// | 
|  | /// Octal syntax is a little-known way of uttering Unicode codepoints | 
|  | /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all | 
|  | /// equivalent patterns, where the last example shows octal syntax. | 
|  | /// | 
|  | /// While supporting octal syntax isn't in and of itself a problem, | 
|  | /// it does make good error messages harder. That is, in PCRE based | 
|  | /// regex engines, syntax like `\1` invokes a backreference, which is | 
|  | /// explicitly unsupported this library. However, many users expect | 
|  | /// backreferences to be supported. Therefore, when octal support | 
|  | /// is disabled, the error message will explicitly mention that | 
|  | /// backreferences aren't supported. | 
|  | /// | 
|  | /// The default for this is `false`. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// // Normally this pattern would not compile, with an error message | 
|  | /// // about backreferences not being supported. But with octal mode | 
|  | /// // enabled, octal escape sequences work. | 
|  | /// let re = RegexSetBuilder::new([r"\141"]) | 
|  | ///     .octal(true) | 
|  | ///     .build() | 
|  | ///     .unwrap(); | 
|  | /// assert!(re.is_match(b"a")); | 
|  | /// ``` | 
|  | pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { | 
|  | self.builder.octal(yes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Sets the approximate size limit, in bytes, of the compiled regex. | 
|  | /// | 
|  | /// This roughly corresponds to the number of heap memory, in | 
|  | /// bytes, occupied by a single regex. If the regex would otherwise | 
|  | /// approximately exceed this limit, then compiling that regex will | 
|  | /// fail. | 
|  | /// | 
|  | /// The main utility of a method like this is to avoid compiling | 
|  | /// regexes that use an unexpected amount of resources, such as | 
|  | /// time and memory. Even if the memory usage of a large regex is | 
|  | /// acceptable, its search time may not be. Namely, worst case time | 
|  | /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and | 
|  | /// `n ~ len(haystack)`. That is, search time depends, in part, on the | 
|  | /// size of the compiled regex. This means that putting a limit on the | 
|  | /// size of the regex limits how much a regex can impact search time. | 
|  | /// | 
|  | /// For more information about regex size limits, see the section on | 
|  | /// [untrusted inputs](crate#untrusted-input) in the top-level crate | 
|  | /// documentation. | 
|  | /// | 
|  | /// The default for this is some reasonable number that permits most | 
|  | /// patterns to compile successfully. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041 | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// // It may surprise you how big some seemingly small patterns can | 
|  | /// // be! Since \w is Unicode aware, this generates a regex that can | 
|  | /// // match approximately 140,000 distinct codepoints. | 
|  | /// assert!( | 
|  | ///     RegexSetBuilder::new([r"\w"]) | 
|  | ///         .size_limit(45_000) | 
|  | ///         .build() | 
|  | ///         .is_err() | 
|  | /// ); | 
|  | /// ``` | 
|  | pub fn size_limit(&mut self, bytes: usize) -> &mut RegexSetBuilder { | 
|  | self.builder.size_limit(bytes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Set the approximate capacity, in bytes, of the cache of transitions | 
|  | /// used by the lazy DFA. | 
|  | /// | 
|  | /// While the lazy DFA isn't always used, in tends to be the most | 
|  | /// commonly use regex engine in default configurations. It tends to | 
|  | /// adopt the performance profile of a fully build DFA, but without the | 
|  | /// downside of taking worst case exponential time to build. | 
|  | /// | 
|  | /// The downside is that it needs to keep a cache of transitions and | 
|  | /// states that are built while running a search, and this cache | 
|  | /// can fill up. When it fills up, the cache will reset itself. Any | 
|  | /// previously generated states and transitions will then need to be | 
|  | /// re-generated. If this happens too many times, then this library | 
|  | /// will bail out of using the lazy DFA and switch to a different regex | 
|  | /// engine. | 
|  | /// | 
|  | /// If your regex provokes this particular downside of the lazy DFA, | 
|  | /// then it may be beneficial to increase its cache capacity. This will | 
|  | /// potentially reduce the frequency of cache resetting (ideally to | 
|  | /// `0`). While it won't fix all potential performance problems with | 
|  | /// the lazy DFA, increasing the cache capacity does fix some. | 
|  | /// | 
|  | /// There is no easy way to determine, a priori, whether increasing | 
|  | /// this cache capacity will help. In general, the larger your regex, | 
|  | /// the more cache it's likely to use. But that isn't an ironclad rule. | 
|  | /// For example, a regex like `[01]*1[01]{N}` would normally produce a | 
|  | /// fully build DFA that is exponential in size with respect to `N`. | 
|  | /// The lazy DFA will prevent exponential space blow-up, but it cache | 
|  | /// is likely to fill up, even when it's large and even for smallish | 
|  | /// values of `N`. | 
|  | /// | 
|  | /// If you aren't sure whether this helps or not, it is sensible to | 
|  | /// set this to some arbitrarily large number in testing, such as | 
|  | /// `usize::MAX`. Namely, this represents the amount of capacity that | 
|  | /// *may* be used. It's probably not a good idea to use `usize::MAX` in | 
|  | /// production though, since it implies there are no controls on heap | 
|  | /// memory used by this library during a search. In effect, set it to | 
|  | /// whatever you're willing to allocate for a single regex search. | 
|  | pub fn dfa_size_limit( | 
|  | &mut self, | 
|  | bytes: usize, | 
|  | ) -> &mut RegexSetBuilder { | 
|  | self.builder.dfa_size_limit(bytes); | 
|  | self | 
|  | } | 
|  |  | 
|  | /// Set the nesting limit for this parser. | 
|  | /// | 
|  | /// The nesting limit controls how deep the abstract syntax tree is | 
|  | /// allowed to be. If the AST exceeds the given limit (e.g., with too | 
|  | /// many nested groups), then an error is returned by the parser. | 
|  | /// | 
|  | /// The purpose of this limit is to act as a heuristic to prevent stack | 
|  | /// overflow for consumers that do structural induction on an AST using | 
|  | /// explicit recursion. While this crate never does this (instead using | 
|  | /// constant stack space and moving the call stack to the heap), other | 
|  | /// crates may. | 
|  | /// | 
|  | /// This limit is not checked until the entire AST is parsed. | 
|  | /// Therefore, if callers want to put a limit on the amount of heap | 
|  | /// space used, then they should impose a limit on the length, in | 
|  | /// bytes, of the concrete pattern string. In particular, this is | 
|  | /// viable since this parser implementation will limit itself to heap | 
|  | /// space proportional to the length of the pattern string. See also | 
|  | /// the [untrusted inputs](crate#untrusted-input) section in the | 
|  | /// top-level crate documentation for more information about this. | 
|  | /// | 
|  | /// Note that a nest limit of `0` will return a nest limit error for | 
|  | /// most patterns but not all. For example, a nest limit of `0` permits | 
|  | /// `a` but not `ab`, since `ab` requires an explicit concatenation, | 
|  | /// which results in a nest depth of `1`. In general, a nest limit is | 
|  | /// not something that manifests in an obvious way in the concrete | 
|  | /// syntax, therefore, it should not be used in a granular way. | 
|  | /// | 
|  | /// # Example | 
|  | /// | 
|  | /// ``` | 
|  | /// use regex::bytes::RegexSetBuilder; | 
|  | /// | 
|  | /// assert!(RegexSetBuilder::new([r"a"]).nest_limit(0).build().is_ok()); | 
|  | /// assert!(RegexSetBuilder::new([r"ab"]).nest_limit(0).build().is_err()); | 
|  | /// ``` | 
|  | pub fn nest_limit(&mut self, limit: u32) -> &mut RegexSetBuilder { | 
|  | self.builder.nest_limit(limit); | 
|  | self | 
|  | } | 
|  | } | 
|  | } |