summaryrefslogtreecommitdiffstats
path: root/vendor/regex-syntax/src/ast
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:25:53 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:25:53 +0000
commit73e0a5b7696ea019ba35b89f38fc8e7b285d99cb (patch)
tree0d2e175af6f114cb50a675bec0bc76e12e1bceb4 /vendor/regex-syntax/src/ast
parentAdding upstream version 1.75.0+dfsg1. (diff)
downloadrustc-upstream.tar.xz
rustc-upstream.zip
Adding upstream version 1.76.0+dfsg1.upstream/1.76.0+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/regex-syntax/src/ast')
-rw-r--r--vendor/regex-syntax/src/ast/mod.rs440
-rw-r--r--vendor/regex-syntax/src/ast/parse.rs576
-rw-r--r--vendor/regex-syntax/src/ast/print.rs20
-rw-r--r--vendor/regex-syntax/src/ast/visitor.rs17
4 files changed, 794 insertions, 259 deletions
diff --git a/vendor/regex-syntax/src/ast/mod.rs b/vendor/regex-syntax/src/ast/mod.rs
index a95b1c873..6a77ee134 100644
--- a/vendor/regex-syntax/src/ast/mod.rs
+++ b/vendor/regex-syntax/src/ast/mod.rs
@@ -20,6 +20,7 @@ mod visitor;
/// valid Unicode property name. That particular error is reported when
/// translating an AST to the high-level intermediate representation (`HIR`).
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Error {
/// The kind of error.
kind: ErrorKind,
@@ -70,6 +71,7 @@ impl Error {
/// new variant is not considered a breaking change.
#[non_exhaustive]
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum ErrorKind {
/// The capturing group limit was exceeded.
///
@@ -160,6 +162,18 @@ pub enum ErrorKind {
/// `(?i)*`. It is, however, possible to create a repetition operating on
/// an empty sub-expression. For example, `()*` is still considered valid.
RepetitionMissing,
+ /// The special word boundary syntax, `\b{something}`, was used, but
+ /// either EOF without `}` was seen, or an invalid character in the
+ /// braces was seen.
+ SpecialWordBoundaryUnclosed,
+ /// The special word boundary syntax, `\b{something}`, was used, but
+ /// `something` was not recognized as a valid word boundary kind.
+ SpecialWordBoundaryUnrecognized,
+ /// The syntax `\b{` was observed, but afterwards the end of the pattern
+ /// was observed without being able to tell whether it was meant to be a
+ /// bounded repetition on the `\b` or the beginning of a special word
+ /// boundary assertion.
+ SpecialWordOrRepetitionUnexpectedEof,
/// The Unicode class is not valid. This typically occurs when a `\p` is
/// followed by something other than a `{`.
UnicodeClassInvalid,
@@ -258,6 +272,29 @@ impl core::fmt::Display for ErrorKind {
RepetitionMissing => {
write!(f, "repetition operator missing expression")
}
+ SpecialWordBoundaryUnclosed => {
+ write!(
+ f,
+ "special word boundary assertion is either \
+ unclosed or contains an invalid character",
+ )
+ }
+ SpecialWordBoundaryUnrecognized => {
+ write!(
+ f,
+ "unrecognized special word boundary assertion, \
+ valid choices are: start, end, start-half \
+ or end-half",
+ )
+ }
+ SpecialWordOrRepetitionUnexpectedEof => {
+ write!(
+ f,
+ "found either the beginning of a special word \
+ boundary or a bounded repetition on a \\b with \
+ an opening brace, but no closing brace",
+ )
+ }
UnicodeClassInvalid => {
write!(f, "invalid Unicode character class")
}
@@ -278,6 +315,7 @@ impl core::fmt::Display for ErrorKind {
/// All span positions are absolute byte offsets that can be used on the
/// original regular expression that was parsed.
#[derive(Clone, Copy, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Span {
/// The start byte offset.
pub start: Position,
@@ -308,6 +346,7 @@ impl PartialOrd for Span {
/// A position encodes one half of a span, and include the byte offset, line
/// number and column number.
#[derive(Clone, Copy, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Position {
/// The absolute offset of this position, starting at `0` from the
/// beginning of the regular expression pattern string.
@@ -396,6 +435,7 @@ impl Position {
/// comment contains a span of precisely where it occurred in the original
/// regular expression.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct WithComments {
/// The actual ast.
pub ast: Ast,
@@ -408,6 +448,7 @@ pub struct WithComments {
/// A regular expression can only contain comments when the `x` flag is
/// enabled.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Comment {
/// The span of this comment, including the beginning `#` and ending `\n`.
pub span: Span,
@@ -424,31 +465,97 @@ pub struct Comment {
/// This type defines its own destructor that uses constant stack space and
/// heap space proportional to the size of the `Ast`.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum Ast {
/// An empty regex that matches everything.
- Empty(Span),
+ Empty(Box<Span>),
/// A set of flags, e.g., `(?is)`.
- Flags(SetFlags),
+ Flags(Box<SetFlags>),
/// A single character literal, which includes escape sequences.
- Literal(Literal),
+ Literal(Box<Literal>),
/// The "any character" class.
- Dot(Span),
+ Dot(Box<Span>),
/// A single zero-width assertion.
- Assertion(Assertion),
- /// A single character class. This includes all forms of character classes
- /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
- Class(Class),
+ Assertion(Box<Assertion>),
+ /// A single Unicode character class, e.g., `\pL` or `\p{Greek}`.
+ ClassUnicode(Box<ClassUnicode>),
+ /// A single perl character class, e.g., `\d` or `\W`.
+ ClassPerl(Box<ClassPerl>),
+ /// A single bracketed character class set, which may contain zero or more
+ /// character ranges and/or zero or more nested classes. e.g.,
+ /// `[a-zA-Z\pL]`.
+ ClassBracketed(Box<ClassBracketed>),
/// A repetition operator applied to an arbitrary regular expression.
- Repetition(Repetition),
+ Repetition(Box<Repetition>),
/// A grouped regular expression.
- Group(Group),
+ Group(Box<Group>),
/// An alternation of regular expressions.
- Alternation(Alternation),
+ Alternation(Box<Alternation>),
/// A concatenation of regular expressions.
- Concat(Concat),
+ Concat(Box<Concat>),
}
impl Ast {
+ /// Create an "empty" AST item.
+ pub fn empty(span: Span) -> Ast {
+ Ast::Empty(Box::new(span))
+ }
+
+ /// Create a "flags" AST item.
+ pub fn flags(e: SetFlags) -> Ast {
+ Ast::Flags(Box::new(e))
+ }
+
+ /// Create a "literal" AST item.
+ pub fn literal(e: Literal) -> Ast {
+ Ast::Literal(Box::new(e))
+ }
+
+ /// Create a "dot" AST item.
+ pub fn dot(span: Span) -> Ast {
+ Ast::Dot(Box::new(span))
+ }
+
+ /// Create a "assertion" AST item.
+ pub fn assertion(e: Assertion) -> Ast {
+ Ast::Assertion(Box::new(e))
+ }
+
+ /// Create a "Unicode class" AST item.
+ pub fn class_unicode(e: ClassUnicode) -> Ast {
+ Ast::ClassUnicode(Box::new(e))
+ }
+
+ /// Create a "Perl class" AST item.
+ pub fn class_perl(e: ClassPerl) -> Ast {
+ Ast::ClassPerl(Box::new(e))
+ }
+
+ /// Create a "bracketed class" AST item.
+ pub fn class_bracketed(e: ClassBracketed) -> Ast {
+ Ast::ClassBracketed(Box::new(e))
+ }
+
+ /// Create a "repetition" AST item.
+ pub fn repetition(e: Repetition) -> Ast {
+ Ast::Repetition(Box::new(e))
+ }
+
+ /// Create a "group" AST item.
+ pub fn group(e: Group) -> Ast {
+ Ast::Group(Box::new(e))
+ }
+
+ /// Create a "alternation" AST item.
+ pub fn alternation(e: Alternation) -> Ast {
+ Ast::Alternation(Box::new(e))
+ }
+
+ /// Create a "concat" AST item.
+ pub fn concat(e: Concat) -> Ast {
+ Ast::Concat(Box::new(e))
+ }
+
/// Return the span of this abstract syntax tree.
pub fn span(&self) -> &Span {
match *self {
@@ -457,7 +564,9 @@ impl Ast {
Ast::Literal(ref x) => &x.span,
Ast::Dot(ref span) => span,
Ast::Assertion(ref x) => &x.span,
- Ast::Class(ref x) => x.span(),
+ Ast::ClassUnicode(ref x) => &x.span,
+ Ast::ClassPerl(ref x) => &x.span,
+ Ast::ClassBracketed(ref x) => &x.span,
Ast::Repetition(ref x) => &x.span,
Ast::Group(ref x) => &x.span,
Ast::Alternation(ref x) => &x.span,
@@ -481,8 +590,10 @@ impl Ast {
| Ast::Flags(_)
| Ast::Literal(_)
| Ast::Dot(_)
- | Ast::Assertion(_) => false,
- Ast::Class(_)
+ | Ast::Assertion(_)
+ | Ast::ClassUnicode(_)
+ | Ast::ClassPerl(_) => false,
+ Ast::ClassBracketed(_)
| Ast::Repetition(_)
| Ast::Group(_)
| Ast::Alternation(_)
@@ -508,6 +619,7 @@ impl core::fmt::Display for Ast {
/// An alternation of regular expressions.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Alternation {
/// The span of this alternation.
pub span: Span,
@@ -518,20 +630,21 @@ pub struct Alternation {
impl Alternation {
/// Return this alternation as an AST.
///
- /// If this alternation contains zero ASTs, then Ast::Empty is
- /// returned. If this alternation contains exactly 1 AST, then the
- /// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
+ /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
+ /// If this alternation contains exactly 1 AST, then the corresponding AST
+ /// is returned. Otherwise, `Ast::alternation` is returned.
pub fn into_ast(mut self) -> Ast {
match self.asts.len() {
- 0 => Ast::Empty(self.span),
+ 0 => Ast::empty(self.span),
1 => self.asts.pop().unwrap(),
- _ => Ast::Alternation(self),
+ _ => Ast::alternation(self),
}
}
}
/// A concatenation of regular expressions.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Concat {
/// The span of this concatenation.
pub span: Span,
@@ -542,14 +655,14 @@ pub struct Concat {
impl Concat {
/// Return this concatenation as an AST.
///
- /// If this concatenation contains zero ASTs, then Ast::Empty is
- /// returned. If this concatenation contains exactly 1 AST, then the
- /// corresponding AST is returned. Otherwise, Ast::Concat is returned.
+ /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
+ /// If this alternation contains exactly 1 AST, then the corresponding AST
+ /// is returned. Otherwise, `Ast::concat` is returned.
pub fn into_ast(mut self) -> Ast {
match self.asts.len() {
- 0 => Ast::Empty(self.span),
+ 0 => Ast::empty(self.span),
1 => self.asts.pop().unwrap(),
- _ => Ast::Concat(self),
+ _ => Ast::concat(self),
}
}
}
@@ -560,6 +673,7 @@ impl Concat {
/// represented in their literal form, e.g., `a` or in their escaped form,
/// e.g., `\x61`.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Literal {
/// The span of this literal.
pub span: Span,
@@ -584,6 +698,7 @@ impl Literal {
/// The kind of a single literal expression.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum LiteralKind {
/// The literal is written verbatim, e.g., `a` or `☃`.
Verbatim,
@@ -613,6 +728,7 @@ pub enum LiteralKind {
/// A special literal is a special escape sequence recognized by the regex
/// parser, e.g., `\f` or `\n`.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum SpecialLiteralKind {
/// Bell, spelled `\a` (`\x07`).
Bell,
@@ -637,6 +753,7 @@ pub enum SpecialLiteralKind {
/// differ when used without brackets in the number of hex digits that must
/// follow.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum HexLiteralKind {
/// A `\x` prefix. When used without brackets, this form is limited to
/// two digits.
@@ -662,32 +779,9 @@ impl HexLiteralKind {
}
}
-/// A single character class expression.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum Class {
- /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
- Unicode(ClassUnicode),
- /// A perl character class, e.g., `\d` or `\W`.
- Perl(ClassPerl),
- /// A bracketed character class set, which may contain zero or more
- /// character ranges and/or zero or more nested classes. e.g.,
- /// `[a-zA-Z\pL]`.
- Bracketed(ClassBracketed),
-}
-
-impl Class {
- /// Return the span of this character class.
- pub fn span(&self) -> &Span {
- match *self {
- Class::Perl(ref x) => &x.span,
- Class::Unicode(ref x) => &x.span,
- Class::Bracketed(ref x) => &x.span,
- }
- }
-}
-
/// A Perl character class.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct ClassPerl {
/// The span of this class.
pub span: Span,
@@ -700,6 +794,7 @@ pub struct ClassPerl {
/// The available Perl character classes.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum ClassPerlKind {
/// Decimal numbers.
Digit,
@@ -711,6 +806,7 @@ pub enum ClassPerlKind {
/// An ASCII character class.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct ClassAscii {
/// The span of this class.
pub span: Span,
@@ -723,6 +819,7 @@ pub struct ClassAscii {
/// The available ASCII character classes.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum ClassAsciiKind {
/// `[0-9A-Za-z]`
Alnum,
@@ -786,6 +883,7 @@ impl ClassAsciiKind {
/// A Unicode character class.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct ClassUnicode {
/// The span of this class.
pub span: Span,
@@ -838,8 +936,156 @@ pub enum ClassUnicodeKind {
},
}
+#[cfg(feature = "arbitrary")]
+impl arbitrary::Arbitrary<'_> for ClassUnicodeKind {
+ fn arbitrary(
+ u: &mut arbitrary::Unstructured,
+ ) -> arbitrary::Result<ClassUnicodeKind> {
+ #[cfg(any(
+ feature = "unicode-age",
+ feature = "unicode-bool",
+ feature = "unicode-gencat",
+ feature = "unicode-perl",
+ feature = "unicode-script",
+ feature = "unicode-segment",
+ ))]
+ {
+ use alloc::string::ToString;
+
+ use super::unicode_tables::{
+ property_names::PROPERTY_NAMES,
+ property_values::PROPERTY_VALUES,
+ };
+
+ match u.choose_index(3)? {
+ 0 => {
+ let all = PROPERTY_VALUES
+ .iter()
+ .flat_map(|e| e.1.iter())
+ .filter(|(name, _)| name.len() == 1)
+ .count();
+ let idx = u.choose_index(all)?;
+ let value = PROPERTY_VALUES
+ .iter()
+ .flat_map(|e| e.1.iter())
+ .take(idx + 1)
+ .last()
+ .unwrap()
+ .0
+ .chars()
+ .next()
+ .unwrap();
+ Ok(ClassUnicodeKind::OneLetter(value))
+ }
+ 1 => {
+ let all = PROPERTY_VALUES
+ .iter()
+ .map(|e| e.1.len())
+ .sum::<usize>()
+ + PROPERTY_NAMES.len();
+ let idx = u.choose_index(all)?;
+ let name = PROPERTY_VALUES
+ .iter()
+ .flat_map(|e| e.1.iter())
+ .chain(PROPERTY_NAMES)
+ .map(|(_, e)| e)
+ .take(idx + 1)
+ .last()
+ .unwrap();
+ Ok(ClassUnicodeKind::Named(name.to_string()))
+ }
+ 2 => {
+ let all = PROPERTY_VALUES
+ .iter()
+ .map(|e| e.1.len())
+ .sum::<usize>();
+ let idx = u.choose_index(all)?;
+ let (prop, value) = PROPERTY_VALUES
+ .iter()
+ .flat_map(|e| {
+ e.1.iter().map(|(_, value)| (e.0, value))
+ })
+ .take(idx + 1)
+ .last()
+ .unwrap();
+ Ok(ClassUnicodeKind::NamedValue {
+ op: u.arbitrary()?,
+ name: prop.to_string(),
+ value: value.to_string(),
+ })
+ }
+ _ => unreachable!("index chosen is impossible"),
+ }
+ }
+ #[cfg(not(any(
+ feature = "unicode-age",
+ feature = "unicode-bool",
+ feature = "unicode-gencat",
+ feature = "unicode-perl",
+ feature = "unicode-script",
+ feature = "unicode-segment",
+ )))]
+ {
+ match u.choose_index(3)? {
+ 0 => Ok(ClassUnicodeKind::OneLetter(u.arbitrary()?)),
+ 1 => Ok(ClassUnicodeKind::Named(u.arbitrary()?)),
+ 2 => Ok(ClassUnicodeKind::NamedValue {
+ op: u.arbitrary()?,
+ name: u.arbitrary()?,
+ value: u.arbitrary()?,
+ }),
+ _ => unreachable!("index chosen is impossible"),
+ }
+ }
+ }
+
+ fn size_hint(depth: usize) -> (usize, Option<usize>) {
+ #[cfg(any(
+ feature = "unicode-age",
+ feature = "unicode-bool",
+ feature = "unicode-gencat",
+ feature = "unicode-perl",
+ feature = "unicode-script",
+ feature = "unicode-segment",
+ ))]
+ {
+ arbitrary::size_hint::and_all(&[
+ usize::size_hint(depth),
+ usize::size_hint(depth),
+ arbitrary::size_hint::or(
+ (0, Some(0)),
+ ClassUnicodeOpKind::size_hint(depth),
+ ),
+ ])
+ }
+ #[cfg(not(any(
+ feature = "unicode-age",
+ feature = "unicode-bool",
+ feature = "unicode-gencat",
+ feature = "unicode-perl",
+ feature = "unicode-script",
+ feature = "unicode-segment",
+ )))]
+ {
+ arbitrary::size_hint::and(
+ usize::size_hint(depth),
+ arbitrary::size_hint::or_all(&[
+ char::size_hint(depth),
+ String::size_hint(depth),
+ arbitrary::size_hint::and_all(&[
+ String::size_hint(depth),
+ String::size_hint(depth),
+ ClassUnicodeOpKind::size_hint(depth),
+ ]),
+ ]),
+ )
+ }
+ }
+}
+
/// The type of op used in a Unicode character class.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum ClassUnicodeOpKind {
/// A property set to a specific value, e.g., `\p{scx=Katakana}`.
Equal,
@@ -862,6 +1108,7 @@ impl ClassUnicodeOpKind {
/// A bracketed character class, e.g., `[a-z0-9]`.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct ClassBracketed {
/// The span of this class.
pub span: Span,
@@ -880,6 +1127,7 @@ pub struct ClassBracketed {
/// items (literals, ranges, other bracketed classes) or a tree of binary set
/// operations.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum ClassSet {
/// An item, which can be a single literal, range, nested character class
/// or a union of items.
@@ -913,6 +1161,7 @@ impl ClassSet {
/// A single component of a character class set.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum ClassSetItem {
/// An empty item.
///
@@ -956,6 +1205,7 @@ impl ClassSetItem {
/// A single character class range in a set.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct ClassSetRange {
/// The span of this range.
pub span: Span,
@@ -977,6 +1227,7 @@ impl ClassSetRange {
/// A union of items inside a character class set.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct ClassSetUnion {
/// The span of the items in this operation. e.g., the `a-z0-9` in
/// `[^a-z0-9]`
@@ -1021,6 +1272,7 @@ impl ClassSetUnion {
/// A Unicode character class set operation.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct ClassSetBinaryOp {
/// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
pub span: Span,
@@ -1038,6 +1290,7 @@ pub struct ClassSetBinaryOp {
/// explicit union operator. Concatenation inside a character class corresponds
/// to the union operation.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum ClassSetBinaryOpKind {
/// The intersection of two sets, e.g., `\pN&&[a-z]`.
Intersection,
@@ -1051,6 +1304,7 @@ pub enum ClassSetBinaryOpKind {
/// A single zero-width assertion.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Assertion {
/// The span of this assertion.
pub span: Span,
@@ -1060,6 +1314,7 @@ pub struct Assertion {
/// An assertion kind.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum AssertionKind {
/// `^`
StartLine,
@@ -1073,10 +1328,23 @@ pub enum AssertionKind {
WordBoundary,
/// `\B`
NotWordBoundary,
+ /// `\b{start}`
+ WordBoundaryStart,
+ /// `\b{end}`
+ WordBoundaryEnd,
+ /// `\<` (alias for `\b{start}`)
+ WordBoundaryStartAngle,
+ /// `\>` (alias for `\b{end}`)
+ WordBoundaryEndAngle,
+ /// `\b{start-half}`
+ WordBoundaryStartHalf,
+ /// `\b{end-half}`
+ WordBoundaryEndHalf,
}
/// A repetition operation applied to a regular expression.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Repetition {
/// The span of this operation.
pub span: Span,
@@ -1090,6 +1358,7 @@ pub struct Repetition {
/// The repetition operator itself.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct RepetitionOp {
/// The span of this operator. This includes things like `+`, `*?` and
/// `{m,n}`.
@@ -1100,6 +1369,7 @@ pub struct RepetitionOp {
/// The kind of a repetition operator.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum RepetitionKind {
/// `?`
ZeroOrOne,
@@ -1113,6 +1383,7 @@ pub enum RepetitionKind {
/// A range repetition operator.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum RepetitionRange {
/// `{m}`
Exactly(u32),
@@ -1142,6 +1413,7 @@ impl RepetitionRange {
/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
/// `(?is:a)`.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Group {
/// The span of this group.
pub span: Span,
@@ -1183,6 +1455,7 @@ impl Group {
/// The kind of a group.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum GroupKind {
/// `(a)`
CaptureIndex(u32),
@@ -1211,8 +1484,38 @@ pub struct CaptureName {
pub index: u32,
}
+#[cfg(feature = "arbitrary")]
+impl arbitrary::Arbitrary<'_> for CaptureName {
+ fn arbitrary(
+ u: &mut arbitrary::Unstructured,
+ ) -> arbitrary::Result<CaptureName> {
+ let len = u.arbitrary_len::<char>()?;
+ if len == 0 {
+ return Err(arbitrary::Error::NotEnoughData);
+ }
+ let mut name: String = String::new();
+ for _ in 0..len {
+ let ch: char = u.arbitrary()?;
+ let cp = u32::from(ch);
+ let ascii_letter_offset = u8::try_from(cp % 26).unwrap();
+ let ascii_letter = b'a' + ascii_letter_offset;
+ name.push(char::from(ascii_letter));
+ }
+ Ok(CaptureName { span: u.arbitrary()?, name, index: u.arbitrary()? })
+ }
+
+ fn size_hint(depth: usize) -> (usize, Option<usize>) {
+ arbitrary::size_hint::and_all(&[
+ Span::size_hint(depth),
+ usize::size_hint(depth),
+ u32::size_hint(depth),
+ ])
+ }
+}
+
/// A group of flags that is not applied to a particular regular expression.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct SetFlags {
/// The span of these flags, including the grouping parentheses.
pub span: Span,
@@ -1224,6 +1527,7 @@ pub struct SetFlags {
///
/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct Flags {
/// The span of this group of flags.
pub span: Span,
@@ -1276,6 +1580,7 @@ impl Flags {
/// A single item in a group of flags.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub struct FlagsItem {
/// The span of this item.
pub span: Span,
@@ -1285,6 +1590,7 @@ pub struct FlagsItem {
/// The kind of an item in a group of flags.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum FlagsItemKind {
/// A negation operator applied to all subsequent flags in the enclosing
/// group.
@@ -1305,6 +1611,7 @@ impl FlagsItemKind {
/// A single flag.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum Flag {
/// `i`
CaseInsensitive,
@@ -1334,8 +1641,10 @@ impl Drop for Ast {
| Ast::Literal(_)
| Ast::Dot(_)
| Ast::Assertion(_)
- // Classes are recursive, so they get their own Drop impl.
- | Ast::Class(_) => return,
+ | Ast::ClassUnicode(_)
+ | Ast::ClassPerl(_)
+ // Bracketed classes are recursive, they get their own Drop impl.
+ | Ast::ClassBracketed(_) => return,
Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
Ast::Group(ref x) if !x.ast.has_subexprs() => return,
Ast::Alternation(ref x) if x.asts.is_empty() => return,
@@ -1344,7 +1653,7 @@ impl Drop for Ast {
}
let empty_span = || Span::splat(Position::new(0, 0, 0));
- let empty_ast = || Ast::Empty(empty_span());
+ let empty_ast = || Ast::empty(empty_span());
let mut stack = vec![mem::replace(self, empty_ast())];
while let Some(mut ast) = stack.pop() {
match ast {
@@ -1353,8 +1662,11 @@ impl Drop for Ast {
| Ast::Literal(_)
| Ast::Dot(_)
| Ast::Assertion(_)
- // Classes are recursive, so they get their own Drop impl.
- | Ast::Class(_) => {}
+ | Ast::ClassUnicode(_)
+ | Ast::ClassPerl(_)
+ // Bracketed classes are recursive, so they get their own Drop
+ // impl.
+ | Ast::ClassBracketed(_) => {}
Ast::Repetition(ref mut x) => {
stack.push(mem::replace(&mut x.ast, empty_ast()));
}
@@ -1447,9 +1759,9 @@ mod tests {
let run = || {
let span = || Span::splat(Position::new(0, 0, 0));
- let mut ast = Ast::Empty(span());
+ let mut ast = Ast::empty(span());
for i in 0..200 {
- ast = Ast::Group(Group {
+ ast = Ast::group(Group {
span: span(),
kind: GroupKind::CaptureIndex(i),
ast: Box::new(ast),
@@ -1478,4 +1790,20 @@ mod tests {
.join()
.unwrap();
}
+
+ // This tests that our `Ast` has a reasonable size. This isn't a hard rule
+ // and it can be increased if given a good enough reason. But this test
+ // exists because the size of `Ast` was at one point over 200 bytes on a
+ // 64-bit target. Wow.
+ #[test]
+ fn ast_size() {
+ let max = 2 * core::mem::size_of::<usize>();
+ let size = core::mem::size_of::<Ast>();
+ assert!(
+ size <= max,
+ "Ast size of {} bytes is bigger than suggested max {}",
+ size,
+ max
+ );
+ }
}
diff --git a/vendor/regex-syntax/src/ast/parse.rs b/vendor/regex-syntax/src/ast/parse.rs
index 9cf64e9ec..593b14fbc 100644
--- a/vendor/regex-syntax/src/ast/parse.rs
+++ b/vendor/regex-syntax/src/ast/parse.rs
@@ -53,11 +53,11 @@ impl Primitive {
/// Convert this primitive into a proper AST.
fn into_ast(self) -> Ast {
match self {
- Primitive::Literal(lit) => Ast::Literal(lit),
- Primitive::Assertion(assert) => Ast::Assertion(assert),
- Primitive::Dot(span) => Ast::Dot(span),
- Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
- Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
+ Primitive::Literal(lit) => Ast::literal(lit),
+ Primitive::Assertion(assert) => Ast::assertion(assert),
+ Primitive::Dot(span) => Ast::dot(span),
+ Primitive::Perl(cls) => Ast::class_perl(cls),
+ Primitive::Unicode(cls) => Ast::class_unicode(cls),
}
}
@@ -383,7 +383,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
/// Return a reference to the pattern being parsed.
fn pattern(&self) -> &str {
- self.pattern.borrow()
+ self.pattern
}
/// Create a new error with the given span and error type.
@@ -691,7 +691,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
self.parser().ignore_whitespace.set(v);
}
- concat.asts.push(Ast::Flags(set));
+ concat.asts.push(Ast::flags(set));
Ok(concat)
}
Either::Right(group) => {
@@ -764,7 +764,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
group.ast = Box::new(group_concat.into_ast());
}
}
- prior_concat.asts.push(Ast::Group(group));
+ prior_concat.asts.push(Ast::group(group));
Ok(prior_concat)
}
@@ -783,7 +783,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
Some(GroupState::Alternation(mut alt)) => {
alt.span.end = self.pos();
alt.asts.push(concat.into_ast());
- Ok(Ast::Alternation(alt))
+ Ok(Ast::alternation(alt))
}
Some(GroupState::Group { group, .. }) => {
return Err(
@@ -850,7 +850,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
fn pop_class(
&self,
nested_union: ast::ClassSetUnion,
- ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
+ ) -> Result<Either<ast::ClassSetUnion, ast::ClassBracketed>> {
assert_eq!(self.char(), ']');
let item = ast::ClassSet::Item(nested_union.into_item());
@@ -882,7 +882,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
set.span.end = self.pos();
set.kind = prevset;
if stack.is_empty() {
- Ok(Either::Right(ast::Class::Bracketed(set)))
+ Ok(Either::Right(set))
} else {
union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
Ok(Either::Left(union))
@@ -976,7 +976,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
'|' => concat = self.push_alternate(concat)?,
'[' => {
let class = self.parse_set_class()?;
- concat.asts.push(Ast::Class(class));
+ concat.asts.push(Ast::class_bracketed(class));
}
'?' => {
concat = self.parse_uncounted_repetition(
@@ -1057,7 +1057,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
greedy = false;
self.bump();
}
- concat.asts.push(Ast::Repetition(ast::Repetition {
+ concat.asts.push(Ast::repetition(ast::Repetition {
span: ast.span().with_end(self.pos()),
op: ast::RepetitionOp {
span: Span::new(op_start, self.pos()),
@@ -1159,7 +1159,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
);
}
- concat.asts.push(Ast::Repetition(ast::Repetition {
+ concat.asts.push(Ast::repetition(ast::Repetition {
span: ast.span().with_end(self.pos()),
op: ast::RepetitionOp {
span: op_span,
@@ -1212,7 +1212,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
Ok(Either::Right(ast::Group {
span: open_span,
kind: ast::GroupKind::CaptureName { starts_with_p, name },
- ast: Box::new(Ast::Empty(self.span())),
+ ast: Box::new(Ast::empty(self.span())),
}))
} else if self.bump_if("?") {
if self.is_eof() {
@@ -1241,7 +1241,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
Ok(Either::Right(ast::Group {
span: open_span,
kind: ast::GroupKind::NonCapturing(flags),
- ast: Box::new(Ast::Empty(self.span())),
+ ast: Box::new(Ast::empty(self.span())),
}))
}
} else {
@@ -1249,7 +1249,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
Ok(Either::Right(ast::Group {
span: open_span,
kind: ast::GroupKind::CaptureIndex(capture_index),
- ast: Box::new(Ast::Empty(self.span())),
+ ast: Box::new(Ast::empty(self.span())),
}))
}
}
@@ -1528,18 +1528,115 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
span,
kind: ast::AssertionKind::EndText,
})),
- 'b' => Ok(Primitive::Assertion(ast::Assertion {
- span,
- kind: ast::AssertionKind::WordBoundary,
- })),
+ 'b' => {
+ let mut wb = ast::Assertion {
+ span,
+ kind: ast::AssertionKind::WordBoundary,
+ };
+ // After a \b, we "try" to parse things like \b{start} for
+ // special word boundary assertions.
+ if !self.is_eof() && self.char() == '{' {
+ if let Some(kind) =
+ self.maybe_parse_special_word_boundary(start)?
+ {
+ wb.kind = kind;
+ wb.span.end = self.pos();
+ }
+ }
+ Ok(Primitive::Assertion(wb))
+ }
'B' => Ok(Primitive::Assertion(ast::Assertion {
span,
kind: ast::AssertionKind::NotWordBoundary,
})),
+ '<' => Ok(Primitive::Assertion(ast::Assertion {
+ span,
+ kind: ast::AssertionKind::WordBoundaryStartAngle,
+ })),
+ '>' => Ok(Primitive::Assertion(ast::Assertion {
+ span,
+ kind: ast::AssertionKind::WordBoundaryEndAngle,
+ })),
_ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
}
}
+ /// Attempt to parse a specialty word boundary. That is, `\b{start}`,
+ /// `\b{end}`, `\b{start-half}` or `\b{end-half}`.
+ ///
+ /// This is similar to `maybe_parse_ascii_class` in that, in most cases,
+ /// if it fails it will just return `None` with no error. This is done
+ /// because `\b{5}` is a valid expression and we want to let that be parsed
+ /// by the existing counted repetition parsing code. (I thought about just
+ /// invoking the counted repetition code from here, but it seemed a little
+ /// ham-fisted.)
+ ///
+ /// Unlike `maybe_parse_ascii_class` though, this can return an error.
+ /// Namely, if we definitely know it isn't a counted repetition, then we
+ /// return an error specific to the specialty word boundaries.
+ ///
+ /// This assumes the parser is positioned at a `{` immediately following
+ /// a `\b`. When `None` is returned, the parser is returned to the position
+ /// at which it started: pointing at a `{`.
+ ///
+ /// The position given should correspond to the start of the `\b`.
+ fn maybe_parse_special_word_boundary(
+ &self,
+ wb_start: Position,
+ ) -> Result<Option<ast::AssertionKind>> {
+ assert_eq!(self.char(), '{');
+
+ let is_valid_char = |c| match c {
+ 'A'..='Z' | 'a'..='z' | '-' => true,
+ _ => false,
+ };
+ let start = self.pos();
+ if !self.bump_and_bump_space() {
+ return Err(self.error(
+ Span::new(wb_start, self.pos()),
+ ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
+ ));
+ }
+ let start_contents = self.pos();
+ // This is one of the critical bits: if the first non-whitespace
+ // character isn't in [-A-Za-z] (i.e., this can't be a special word
+ // boundary), then we bail and let the counted repetition parser deal
+ // with this.
+ if !is_valid_char(self.char()) {
+ self.parser().pos.set(start);
+ return Ok(None);
+ }
+
+ // Now collect up our chars until we see a '}'.
+ let mut scratch = self.parser().scratch.borrow_mut();
+ scratch.clear();
+ while !self.is_eof() && is_valid_char(self.char()) {
+ scratch.push(self.char());
+ self.bump_and_bump_space();
+ }
+ if self.is_eof() || self.char() != '}' {
+ return Err(self.error(
+ Span::new(start, self.pos()),
+ ast::ErrorKind::SpecialWordBoundaryUnclosed,
+ ));
+ }
+ let end = self.pos();
+ self.bump();
+ let kind = match scratch.as_str() {
+ "start" => ast::AssertionKind::WordBoundaryStart,
+ "end" => ast::AssertionKind::WordBoundaryEnd,
+ "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
+ "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
+ _ => {
+ return Err(self.error(
+ Span::new(start_contents, end),
+ ast::ErrorKind::SpecialWordBoundaryUnrecognized,
+ ))
+ }
+ };
+ Ok(Some(kind))
+ }
+
/// Parse an octal representation of a Unicode codepoint up to 3 digits
/// long. This expects the parser to be positioned at the first octal
/// digit and advances the parser to the first character immediately
@@ -1743,7 +1840,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
/// is successful, then the parser is advanced to the position immediately
/// following the closing `]`.
#[inline(never)]
- fn parse_set_class(&self) -> Result<ast::Class> {
+ fn parse_set_class(&self) -> Result<ast::ClassBracketed> {
assert_eq!(self.char(), '[');
let mut union =
@@ -1967,9 +2064,9 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
// because parsing cannot fail with any interesting error. For example,
// in order to use an ASCII character class, it must be enclosed in
// double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
- // of it as "ASCII character characters have the syntax `[:NAME:]`
- // which can only appear within character brackets." This means that
- // things like `[[:lower:]A]` are legal constructs.
+ // of it as "ASCII character classes have the syntax `[:NAME:]` which
+ // can only appear within character brackets." This means that things
+ // like `[[:lower:]A]` are legal constructs.
//
// However, if one types an incorrect ASCII character class, e.g.,
// `[[:loower:]]`, then we treat that as a normal nested character
@@ -2189,12 +2286,12 @@ impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
| Ast::Literal(_)
| Ast::Dot(_)
| Ast::Assertion(_)
- | Ast::Class(ast::Class::Unicode(_))
- | Ast::Class(ast::Class::Perl(_)) => {
+ | Ast::ClassUnicode(_)
+ | Ast::ClassPerl(_) => {
// These are all base cases, so we don't increment depth.
return Ok(());
}
- Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
+ Ast::ClassBracketed(ref x) => &x.span,
Ast::Repetition(ref x) => &x.span,
Ast::Group(ref x) => &x.span,
Ast::Alternation(ref x) => &x.span,
@@ -2210,12 +2307,12 @@ impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
| Ast::Literal(_)
| Ast::Dot(_)
| Ast::Assertion(_)
- | Ast::Class(ast::Class::Unicode(_))
- | Ast::Class(ast::Class::Perl(_)) => {
+ | Ast::ClassUnicode(_)
+ | Ast::ClassPerl(_) => {
// These are all base cases, so we don't decrement depth.
Ok(())
}
- Ast::Class(ast::Class::Bracketed(_))
+ Ast::ClassBracketed(_)
| Ast::Repetition(_)
| Ast::Group(_)
| Ast::Alternation(_)
@@ -2426,12 +2523,12 @@ mod tests {
/// Create a meta literal starting at the given position.
fn meta_lit(c: char, span: Span) -> Ast {
- Ast::Literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
+ Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
}
/// Create a verbatim literal with the given span.
fn lit_with(c: char, span: Span) -> Ast {
- Ast::Literal(ast::Literal {
+ Ast::literal(ast::Literal {
span,
kind: ast::LiteralKind::Verbatim,
c,
@@ -2445,17 +2542,17 @@ mod tests {
/// Create a concatenation with the given span.
fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
- Ast::Concat(ast::Concat { span, asts })
+ Ast::concat(ast::Concat { span, asts })
}
/// Create an alternation with the given span.
fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
- Ast::Alternation(ast::Alternation { span: span(range), asts })
+ Ast::alternation(ast::Alternation { span: span(range), asts })
}
/// Create a capturing group with the given span.
fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
- Ast::Group(ast::Group {
+ Ast::group(ast::Group {
span: span(range),
kind: ast::GroupKind::CaptureIndex(index),
ast: Box::new(ast),
@@ -2488,7 +2585,7 @@ mod tests {
},
);
}
- Ast::Flags(ast::SetFlags {
+ Ast::flags(ast::SetFlags {
span: span_range(pat, range.clone()),
flags: ast::Flags {
span: span_range(pat, (range.start + 2)..(range.end - 1)),
@@ -2502,7 +2599,7 @@ mod tests {
// A nest limit of 0 still allows some types of regexes.
assert_eq!(
parser_nest_limit("", 0).parse(),
- Ok(Ast::Empty(span(0..0)))
+ Ok(Ast::empty(span(0..0)))
);
assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
@@ -2516,7 +2613,7 @@ mod tests {
);
assert_eq!(
parser_nest_limit("a+", 1).parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..2),
op: ast::RepetitionOp {
span: span(1..2),
@@ -2542,14 +2639,14 @@ mod tests {
);
assert_eq!(
parser_nest_limit("a+*", 2).parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..3),
op: ast::RepetitionOp {
span: span(2..3),
kind: ast::RepetitionKind::ZeroOrMore,
},
greedy: true,
- ast: Box::new(Ast::Repetition(ast::Repetition {
+ ast: Box::new(Ast::repetition(ast::Repetition {
span: span(0..2),
op: ast::RepetitionOp {
span: span(1..2),
@@ -2606,7 +2703,7 @@ mod tests {
);
assert_eq!(
parser_nest_limit("[a]", 1).parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..3),
negated: false,
kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
@@ -2616,7 +2713,7 @@ mod tests {
c: 'a',
}
)),
- })))
+ }))
);
assert_eq!(
parser_nest_limit("[ab]", 1).parse().unwrap_err(),
@@ -2776,7 +2873,7 @@ bar
vec![
lit_with('a', span_range(pat, 0..1)),
lit_with(' ', span_range(pat, 1..2)),
- Ast::Group(ast::Group {
+ Ast::group(ast::Group {
span: span_range(pat, 2..9),
kind: ast::GroupKind::NonCapturing(ast::Flags {
span: span_range(pat, 4..5),
@@ -2803,7 +2900,7 @@ bar
span_range(pat, 0..pat.len()),
vec![
flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- Ast::Group(ast::Group {
+ Ast::group(ast::Group {
span: span_range(pat, 4..pat.len()),
kind: ast::GroupKind::CaptureName {
starts_with_p: true,
@@ -2825,7 +2922,7 @@ bar
span_range(pat, 0..pat.len()),
vec![
flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- Ast::Group(ast::Group {
+ Ast::group(ast::Group {
span: span_range(pat, 4..pat.len()),
kind: ast::GroupKind::CaptureIndex(1),
ast: Box::new(lit_with('a', span_range(pat, 7..8))),
@@ -2840,7 +2937,7 @@ bar
span_range(pat, 0..pat.len()),
vec![
flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- Ast::Group(ast::Group {
+ Ast::group(ast::Group {
span: span_range(pat, 4..pat.len()),
kind: ast::GroupKind::NonCapturing(ast::Flags {
span: span_range(pat, 8..8),
@@ -2858,7 +2955,7 @@ bar
span_range(pat, 0..pat.len()),
vec![
flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- Ast::Literal(ast::Literal {
+ Ast::literal(ast::Literal {
span: span(4..13),
kind: ast::LiteralKind::HexBrace(
ast::HexLiteralKind::X
@@ -2877,7 +2974,7 @@ bar
span_range(pat, 0..pat.len()),
vec![
flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- Ast::Literal(ast::Literal {
+ Ast::literal(ast::Literal {
span: span_range(pat, 4..6),
kind: ast::LiteralKind::Superfluous,
c: ' ',
@@ -2895,9 +2992,9 @@ bar
Ok(concat_with(
span_range(pat, 0..3),
vec![
- Ast::Dot(span_range(pat, 0..1)),
+ Ast::dot(span_range(pat, 0..1)),
lit_with('\n', span_range(pat, 1..2)),
- Ast::Dot(span_range(pat, 2..3)),
+ Ast::dot(span_range(pat, 2..3)),
]
))
);
@@ -2933,7 +3030,7 @@ bar
fn parse_uncounted_repetition() {
assert_eq!(
parser(r"a*").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..2),
op: ast::RepetitionOp {
span: span(1..2),
@@ -2945,7 +3042,7 @@ bar
);
assert_eq!(
parser(r"a+").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..2),
op: ast::RepetitionOp {
span: span(1..2),
@@ -2958,7 +3055,7 @@ bar
assert_eq!(
parser(r"a?").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..2),
op: ast::RepetitionOp {
span: span(1..2),
@@ -2970,7 +3067,7 @@ bar
);
assert_eq!(
parser(r"a??").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..3),
op: ast::RepetitionOp {
span: span(1..3),
@@ -2982,7 +3079,7 @@ bar
);
assert_eq!(
parser(r"a?").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..2),
op: ast::RepetitionOp {
span: span(1..2),
@@ -2997,7 +3094,7 @@ bar
Ok(concat(
0..3,
vec![
- Ast::Repetition(ast::Repetition {
+ Ast::repetition(ast::Repetition {
span: span(0..2),
op: ast::RepetitionOp {
span: span(1..2),
@@ -3015,7 +3112,7 @@ bar
Ok(concat(
0..4,
vec![
- Ast::Repetition(ast::Repetition {
+ Ast::repetition(ast::Repetition {
span: span(0..3),
op: ast::RepetitionOp {
span: span(1..3),
@@ -3034,7 +3131,7 @@ bar
0..3,
vec![
lit('a', 0),
- Ast::Repetition(ast::Repetition {
+ Ast::repetition(ast::Repetition {
span: span(1..3),
op: ast::RepetitionOp {
span: span(2..3),
@@ -3048,7 +3145,7 @@ bar
);
assert_eq!(
parser(r"(ab)?").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..5),
op: ast::RepetitionOp {
span: span(4..5),
@@ -3067,8 +3164,8 @@ bar
Ok(alt(
0..3,
vec![
- Ast::Empty(span(0..0)),
- Ast::Repetition(ast::Repetition {
+ Ast::empty(span(0..0)),
+ Ast::repetition(ast::Repetition {
span: span(1..3),
op: ast::RepetitionOp {
span: span(2..3),
@@ -3157,7 +3254,7 @@ bar
fn parse_counted_repetition() {
assert_eq!(
parser(r"a{5}").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..4),
op: ast::RepetitionOp {
span: span(1..4),
@@ -3171,7 +3268,7 @@ bar
);
assert_eq!(
parser(r"a{5,}").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..5),
op: ast::RepetitionOp {
span: span(1..5),
@@ -3185,7 +3282,7 @@ bar
);
assert_eq!(
parser(r"a{5,9}").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..6),
op: ast::RepetitionOp {
span: span(1..6),
@@ -3199,7 +3296,7 @@ bar
);
assert_eq!(
parser(r"a{5}?").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..5),
op: ast::RepetitionOp {
span: span(1..5),
@@ -3217,7 +3314,7 @@ bar
0..5,
vec![
lit('a', 0),
- Ast::Repetition(ast::Repetition {
+ Ast::repetition(ast::Repetition {
span: span(1..5),
op: ast::RepetitionOp {
span: span(2..5),
@@ -3237,7 +3334,7 @@ bar
0..6,
vec![
lit('a', 0),
- Ast::Repetition(ast::Repetition {
+ Ast::repetition(ast::Repetition {
span: span(1..5),
op: ast::RepetitionOp {
span: span(2..5),
@@ -3255,7 +3352,7 @@ bar
assert_eq!(
parser(r"a{ 5 }").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..6),
op: ast::RepetitionOp {
span: span(1..6),
@@ -3269,7 +3366,7 @@ bar
);
assert_eq!(
parser(r"a{ 5 , 9 }").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..10),
op: ast::RepetitionOp {
span: span(1..10),
@@ -3283,7 +3380,7 @@ bar
);
assert_eq!(
parser_ignore_whitespace(r"a{5,9} ?").parse(),
- Ok(Ast::Repetition(ast::Repetition {
+ Ok(Ast::repetition(ast::Repetition {
span: span(0..8),
op: ast::RepetitionOp {
span: span(1..8),
@@ -3295,6 +3392,23 @@ bar
ast: Box::new(lit('a', 0)),
}))
);
+ assert_eq!(
+ parser(r"\b{5,9}").parse(),
+ Ok(Ast::repetition(ast::Repetition {
+ span: span(0..7),
+ op: ast::RepetitionOp {
+ span: span(2..7),
+ kind: ast::RepetitionKind::Range(
+ ast::RepetitionRange::Bounded(5, 9)
+ ),
+ },
+ greedy: true,
+ ast: Box::new(Ast::assertion(ast::Assertion {
+ span: span(0..2),
+ kind: ast::AssertionKind::WordBoundary,
+ })),
+ }))
+ );
assert_eq!(
parser(r"(?i){0}").parse().unwrap_err(),
@@ -3414,7 +3528,7 @@ bar
fn parse_alternate() {
assert_eq!(
parser(r"a|b").parse(),
- Ok(Ast::Alternation(ast::Alternation {
+ Ok(Ast::alternation(ast::Alternation {
span: span(0..3),
asts: vec![lit('a', 0), lit('b', 2)],
}))
@@ -3424,7 +3538,7 @@ bar
Ok(group(
0..5,
1,
- Ast::Alternation(ast::Alternation {
+ Ast::alternation(ast::Alternation {
span: span(1..4),
asts: vec![lit('a', 1), lit('b', 3)],
})
@@ -3433,14 +3547,14 @@ bar
assert_eq!(
parser(r"a|b|c").parse(),
- Ok(Ast::Alternation(ast::Alternation {
+ Ok(Ast::alternation(ast::Alternation {
span: span(0..5),
asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
}))
);
assert_eq!(
parser(r"ax|by|cz").parse(),
- Ok(Ast::Alternation(ast::Alternation {
+ Ok(Ast::alternation(ast::Alternation {
span: span(0..8),
asts: vec![
concat(0..2, vec![lit('a', 0), lit('x', 1)]),
@@ -3454,7 +3568,7 @@ bar
Ok(group(
0..10,
1,
- Ast::Alternation(ast::Alternation {
+ Ast::alternation(ast::Alternation {
span: span(1..9),
asts: vec![
concat(1..3, vec![lit('a', 1), lit('x', 2)]),
@@ -3503,7 +3617,7 @@ bar
parser(r"|").parse(),
Ok(alt(
0..1,
- vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
+ vec![Ast::empty(span(0..0)), Ast::empty(span(1..1)),]
))
);
assert_eq!(
@@ -3511,19 +3625,19 @@ bar
Ok(alt(
0..2,
vec![
- Ast::Empty(span(0..0)),
- Ast::Empty(span(1..1)),
- Ast::Empty(span(2..2)),
+ Ast::empty(span(0..0)),
+ Ast::empty(span(1..1)),
+ Ast::empty(span(2..2)),
]
))
);
assert_eq!(
parser(r"a|").parse(),
- Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
+ Ok(alt(0..2, vec![lit('a', 0), Ast::empty(span(2..2)),]))
);
assert_eq!(
parser(r"|a").parse(),
- Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
+ Ok(alt(0..2, vec![Ast::empty(span(0..0)), lit('a', 1),]))
);
assert_eq!(
@@ -3533,7 +3647,7 @@ bar
1,
alt(
1..2,
- vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
+ vec![Ast::empty(span(1..1)), Ast::empty(span(2..2)),]
)
))
);
@@ -3542,7 +3656,7 @@ bar
Ok(group(
0..4,
1,
- alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
+ alt(1..3, vec![lit('a', 1), Ast::empty(span(3..3)),])
))
);
assert_eq!(
@@ -3550,7 +3664,7 @@ bar
Ok(group(
0..4,
1,
- alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
+ alt(1..3, vec![Ast::empty(span(1..1)), lit('a', 2),])
))
);
@@ -3606,7 +3720,7 @@ bar
fn parse_group() {
assert_eq!(
parser("(?i)").parse(),
- Ok(Ast::Flags(ast::SetFlags {
+ Ok(Ast::flags(ast::SetFlags {
span: span(0..4),
flags: ast::Flags {
span: span(2..3),
@@ -3621,7 +3735,7 @@ bar
);
assert_eq!(
parser("(?iU)").parse(),
- Ok(Ast::Flags(ast::SetFlags {
+ Ok(Ast::flags(ast::SetFlags {
span: span(0..5),
flags: ast::Flags {
span: span(2..4),
@@ -3644,7 +3758,7 @@ bar
);
assert_eq!(
parser("(?i-U)").parse(),
- Ok(Ast::Flags(ast::SetFlags {
+ Ok(Ast::flags(ast::SetFlags {
span: span(0..6),
flags: ast::Flags {
span: span(2..5),
@@ -3672,15 +3786,15 @@ bar
assert_eq!(
parser("()").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: span(0..2),
kind: ast::GroupKind::CaptureIndex(1),
- ast: Box::new(Ast::Empty(span(1..1))),
+ ast: Box::new(Ast::empty(span(1..1))),
}))
);
assert_eq!(
parser("(a)").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: span(0..3),
kind: ast::GroupKind::CaptureIndex(1),
ast: Box::new(lit('a', 1)),
@@ -3688,20 +3802,20 @@ bar
);
assert_eq!(
parser("(())").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: span(0..4),
kind: ast::GroupKind::CaptureIndex(1),
- ast: Box::new(Ast::Group(ast::Group {
+ ast: Box::new(Ast::group(ast::Group {
span: span(1..3),
kind: ast::GroupKind::CaptureIndex(2),
- ast: Box::new(Ast::Empty(span(2..2))),
+ ast: Box::new(Ast::empty(span(2..2))),
})),
}))
);
assert_eq!(
parser("(?:a)").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: span(0..5),
kind: ast::GroupKind::NonCapturing(ast::Flags {
span: span(2..2),
@@ -3713,7 +3827,7 @@ bar
assert_eq!(
parser("(?i:a)").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: span(0..6),
kind: ast::GroupKind::NonCapturing(ast::Flags {
span: span(2..3),
@@ -3729,7 +3843,7 @@ bar
);
assert_eq!(
parser("(?i-U:a)").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: span(0..8),
kind: ast::GroupKind::NonCapturing(ast::Flags {
span: span(2..5),
@@ -3818,7 +3932,7 @@ bar
fn parse_capture_name() {
assert_eq!(
parser("(?<a>z)").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: span(0..7),
kind: ast::GroupKind::CaptureName {
starts_with_p: false,
@@ -3833,7 +3947,7 @@ bar
);
assert_eq!(
parser("(?P<a>z)").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: span(0..8),
kind: ast::GroupKind::CaptureName {
starts_with_p: true,
@@ -3848,7 +3962,7 @@ bar
);
assert_eq!(
parser("(?P<abc>z)").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: span(0..10),
kind: ast::GroupKind::CaptureName {
starts_with_p: true,
@@ -3864,7 +3978,7 @@ bar
assert_eq!(
parser("(?P<a_1>z)").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: span(0..10),
kind: ast::GroupKind::CaptureName {
starts_with_p: true,
@@ -3880,7 +3994,7 @@ bar
assert_eq!(
parser("(?P<a.1>z)").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: span(0..10),
kind: ast::GroupKind::CaptureName {
starts_with_p: true,
@@ -3896,7 +4010,7 @@ bar
assert_eq!(
parser("(?P<a[1]>z)").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: span(0..11),
kind: ast::GroupKind::CaptureName {
starts_with_p: true,
@@ -3912,7 +4026,7 @@ bar
assert_eq!(
parser("(?P<a¾>)").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: Span::new(
Position::new(0, 1, 1),
Position::new(9, 1, 9),
@@ -3928,7 +4042,7 @@ bar
index: 1,
}
},
- ast: Box::new(Ast::Empty(Span::new(
+ ast: Box::new(Ast::empty(Span::new(
Position::new(8, 1, 8),
Position::new(8, 1, 8),
))),
@@ -3936,7 +4050,7 @@ bar
);
assert_eq!(
parser("(?P<名字>)").parse(),
- Ok(Ast::Group(ast::Group {
+ Ok(Ast::group(ast::Group {
span: Span::new(
Position::new(0, 1, 1),
Position::new(12, 1, 9),
@@ -3952,7 +4066,7 @@ bar
index: 1,
}
},
- ast: Box::new(Ast::Empty(Span::new(
+ ast: Box::new(Ast::empty(Span::new(
Position::new(11, 1, 8),
Position::new(11, 1, 8),
))),
@@ -4382,6 +4496,48 @@ bar
}))
);
assert_eq!(
+ parser(r"\b{start}").parse_primitive(),
+ Ok(Primitive::Assertion(ast::Assertion {
+ span: span(0..9),
+ kind: ast::AssertionKind::WordBoundaryStart,
+ }))
+ );
+ assert_eq!(
+ parser(r"\b{end}").parse_primitive(),
+ Ok(Primitive::Assertion(ast::Assertion {
+ span: span(0..7),
+ kind: ast::AssertionKind::WordBoundaryEnd,
+ }))
+ );
+ assert_eq!(
+ parser(r"\b{start-half}").parse_primitive(),
+ Ok(Primitive::Assertion(ast::Assertion {
+ span: span(0..14),
+ kind: ast::AssertionKind::WordBoundaryStartHalf,
+ }))
+ );
+ assert_eq!(
+ parser(r"\b{end-half}").parse_primitive(),
+ Ok(Primitive::Assertion(ast::Assertion {
+ span: span(0..12),
+ kind: ast::AssertionKind::WordBoundaryEndHalf,
+ }))
+ );
+ assert_eq!(
+ parser(r"\<").parse_primitive(),
+ Ok(Primitive::Assertion(ast::Assertion {
+ span: span(0..2),
+ kind: ast::AssertionKind::WordBoundaryStartAngle,
+ }))
+ );
+ assert_eq!(
+ parser(r"\>").parse_primitive(),
+ Ok(Primitive::Assertion(ast::Assertion {
+ span: span(0..2),
+ kind: ast::AssertionKind::WordBoundaryEndAngle,
+ }))
+ );
+ assert_eq!(
parser(r"\B").parse_primitive(),
Ok(Primitive::Assertion(ast::Assertion {
span: span(0..2),
@@ -4418,20 +4574,60 @@ bar
kind: ast::ErrorKind::EscapeUnrecognized,
}
);
- // But also, < and > are banned, so that we may evolve them into
- // start/end word boundary assertions. (Not sure if we will...)
+
+ // Starting a special word boundary without any non-whitespace chars
+ // after the brace makes it ambiguous whether the user meant to write
+ // a counted repetition (probably not?) or an actual special word
+ // boundary assertion.
assert_eq!(
- parser(r"\<").parse_escape().unwrap_err(),
+ parser(r"\b{").parse_escape().unwrap_err(),
TestError {
- span: span(0..2),
- kind: ast::ErrorKind::EscapeUnrecognized,
+ span: span(0..3),
+ kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
}
);
assert_eq!(
- parser(r"\>").parse_escape().unwrap_err(),
+ parser_ignore_whitespace(r"\b{ ").parse_escape().unwrap_err(),
TestError {
- span: span(0..2),
- kind: ast::ErrorKind::EscapeUnrecognized,
+ span: span(0..4),
+ kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
+ }
+ );
+ // When 'x' is not enabled, the space is seen as a non-[-A-Za-z] char,
+ // and thus causes the parser to treat it as a counted repetition.
+ assert_eq!(
+ parser(r"\b{ ").parse().unwrap_err(),
+ TestError {
+ span: span(4..4),
+ kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+ }
+ );
+ // In this case, we got some valid chars that makes it look like the
+ // user is writing one of the special word boundary assertions, but
+ // we forget to close the brace.
+ assert_eq!(
+ parser(r"\b{foo").parse_escape().unwrap_err(),
+ TestError {
+ span: span(2..6),
+ kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
+ }
+ );
+ // We get the same error as above, except it is provoked by seeing a
+ // char that we know is invalid before seeing a closing brace.
+ assert_eq!(
+ parser(r"\b{foo!}").parse_escape().unwrap_err(),
+ TestError {
+ span: span(2..6),
+ kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
+ }
+ );
+ // And this one occurs when, syntactically, everything looks okay, but
+ // we don't use a valid spelling of a word boundary assertion.
+ assert_eq!(
+ parser(r"\b{foo}").parse_escape().unwrap_err(),
+ TestError {
+ span: span(3..6),
+ kind: ast::ErrorKind::SpecialWordBoundaryUnrecognized,
}
);
@@ -4494,15 +4690,15 @@ bar
);
assert_eq!(
parser_octal(r"\778").parse(),
- Ok(Ast::Concat(ast::Concat {
+ Ok(Ast::concat(ast::Concat {
span: span(0..4),
asts: vec![
- Ast::Literal(ast::Literal {
+ Ast::literal(ast::Literal {
span: span(0..3),
kind: ast::LiteralKind::Octal,
c: '?',
}),
- Ast::Literal(ast::Literal {
+ Ast::literal(ast::Literal {
span: span(3..4),
kind: ast::LiteralKind::Verbatim,
c: '8',
@@ -4512,15 +4708,15 @@ bar
);
assert_eq!(
parser_octal(r"\7777").parse(),
- Ok(Ast::Concat(ast::Concat {
+ Ok(Ast::concat(ast::Concat {
span: span(0..5),
asts: vec![
- Ast::Literal(ast::Literal {
+ Ast::literal(ast::Literal {
span: span(0..4),
kind: ast::LiteralKind::Octal,
c: '\u{01FF}',
}),
- Ast::Literal(ast::Literal {
+ Ast::literal(ast::Literal {
span: span(4..5),
kind: ast::LiteralKind::Verbatim,
c: '7',
@@ -4965,15 +5161,15 @@ bar
assert_eq!(
parser("[[:alnum:]]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..11),
negated: false,
kind: itemset(item_ascii(alnum(span(1..10), false))),
- })))
+ }))
);
assert_eq!(
parser("[[[:alnum:]]]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..13),
negated: false,
kind: itemset(item_bracket(ast::ClassBracketed {
@@ -4981,11 +5177,11 @@ bar
negated: false,
kind: itemset(item_ascii(alnum(span(2..11), false))),
})),
- })))
+ }))
);
assert_eq!(
parser("[[:alnum:]&&[:lower:]]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..22),
negated: false,
kind: intersection(
@@ -4993,11 +5189,11 @@ bar
itemset(item_ascii(alnum(span(1..10), false))),
itemset(item_ascii(lower(span(12..21), false))),
),
- })))
+ }))
);
assert_eq!(
parser("[[:alnum:]--[:lower:]]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..22),
negated: false,
kind: difference(
@@ -5005,11 +5201,11 @@ bar
itemset(item_ascii(alnum(span(1..10), false))),
itemset(item_ascii(lower(span(12..21), false))),
),
- })))
+ }))
);
assert_eq!(
parser("[[:alnum:]~~[:lower:]]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..22),
negated: false,
kind: symdifference(
@@ -5017,20 +5213,20 @@ bar
itemset(item_ascii(alnum(span(1..10), false))),
itemset(item_ascii(lower(span(12..21), false))),
),
- })))
+ }))
);
assert_eq!(
parser("[a]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..3),
negated: false,
kind: itemset(lit(span(1..2), 'a')),
- })))
+ }))
);
assert_eq!(
parser(r"[a\]]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..5),
negated: false,
kind: union(
@@ -5044,11 +5240,11 @@ bar
}),
]
),
- })))
+ }))
);
assert_eq!(
parser(r"[a\-z]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..6),
negated: false,
kind: union(
@@ -5063,44 +5259,44 @@ bar
lit(span(4..5), 'z'),
]
),
- })))
+ }))
);
assert_eq!(
parser("[ab]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..4),
negated: false,
kind: union(
span(1..3),
vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
),
- })))
+ }))
);
assert_eq!(
parser("[a-]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..4),
negated: false,
kind: union(
span(1..3),
vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
),
- })))
+ }))
);
assert_eq!(
parser("[-a]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..4),
negated: false,
kind: union(
span(1..3),
vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
),
- })))
+ }))
);
assert_eq!(
parser(r"[\pL]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..5),
negated: false,
kind: itemset(item_unicode(ast::ClassUnicode {
@@ -5108,11 +5304,11 @@ bar
negated: false,
kind: ast::ClassUnicodeKind::OneLetter('L'),
})),
- })))
+ }))
);
assert_eq!(
parser(r"[\w]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..4),
negated: false,
kind: itemset(item_perl(ast::ClassPerl {
@@ -5120,11 +5316,11 @@ bar
kind: ast::ClassPerlKind::Word,
negated: false,
})),
- })))
+ }))
);
assert_eq!(
parser(r"[a\wz]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..6),
negated: false,
kind: union(
@@ -5139,20 +5335,20 @@ bar
lit(span(4..5), 'z'),
]
),
- })))
+ }))
);
assert_eq!(
parser("[a-z]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..5),
negated: false,
kind: itemset(range(span(1..4), 'a', 'z')),
- })))
+ }))
);
assert_eq!(
parser("[a-cx-z]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..8),
negated: false,
kind: union(
@@ -5162,11 +5358,11 @@ bar
range(span(4..7), 'x', 'z'),
]
),
- })))
+ }))
);
assert_eq!(
parser(r"[\w&&a-cx-z]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..12),
negated: false,
kind: intersection(
@@ -5184,11 +5380,11 @@ bar
]
),
),
- })))
+ }))
);
assert_eq!(
parser(r"[a-cx-z&&\w]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..12),
negated: false,
kind: intersection(
@@ -5206,11 +5402,11 @@ bar
negated: false,
})),
),
- })))
+ }))
);
assert_eq!(
parser(r"[a--b--c]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..9),
negated: false,
kind: difference(
@@ -5222,11 +5418,11 @@ bar
),
itemset(lit(span(7..8), 'c')),
),
- })))
+ }))
);
assert_eq!(
parser(r"[a~~b~~c]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..9),
negated: false,
kind: symdifference(
@@ -5238,11 +5434,11 @@ bar
),
itemset(lit(span(7..8), 'c')),
),
- })))
+ }))
);
assert_eq!(
parser(r"[\^&&^]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..7),
negated: false,
kind: intersection(
@@ -5254,11 +5450,11 @@ bar
})),
itemset(lit(span(5..6), '^')),
),
- })))
+ }))
);
assert_eq!(
parser(r"[\&&&&]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..7),
negated: false,
kind: intersection(
@@ -5270,11 +5466,11 @@ bar
})),
itemset(lit(span(5..6), '&')),
),
- })))
+ }))
);
assert_eq!(
parser(r"[&&&&]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..6),
negated: false,
kind: intersection(
@@ -5286,13 +5482,13 @@ bar
),
itemset(empty(span(5..5))),
),
- })))
+ }))
);
let pat = "[☃-⛄]";
assert_eq!(
parser(pat).parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span_range(pat, 0..9),
negated: false,
kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
@@ -5308,20 +5504,20 @@ bar
c: '⛄',
},
})),
- })))
+ }))
);
assert_eq!(
parser(r"[]]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..3),
negated: false,
kind: itemset(lit(span(1..2), ']')),
- })))
+ }))
);
assert_eq!(
parser(r"[]\[]").parse(),
- Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ok(Ast::class_bracketed(ast::ClassBracketed {
span: span(0..5),
negated: false,
kind: union(
@@ -5335,14 +5531,14 @@ bar
}),
]
),
- })))
+ }))
);
assert_eq!(
parser(r"[\[]]").parse(),
Ok(concat(
0..5,
vec![
- Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ Ast::class_bracketed(ast::ClassBracketed {
span: span(0..4),
negated: false,
kind: itemset(ast::ClassSetItem::Literal(
@@ -5352,8 +5548,8 @@ bar
c: '[',
}
)),
- })),
- Ast::Literal(ast::Literal {
+ }),
+ Ast::literal(ast::Literal {
span: span(4..5),
kind: ast::LiteralKind::Verbatim,
c: ']',
@@ -5914,15 +6110,15 @@ bar
assert_eq!(
parser(r"\pNz").parse(),
- Ok(Ast::Concat(ast::Concat {
+ Ok(Ast::concat(ast::Concat {
span: span(0..4),
asts: vec![
- Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+ Ast::class_unicode(ast::ClassUnicode {
span: span(0..3),
negated: false,
kind: ast::ClassUnicodeKind::OneLetter('N'),
- })),
- Ast::Literal(ast::Literal {
+ }),
+ Ast::literal(ast::Literal {
span: span(3..4),
kind: ast::LiteralKind::Verbatim,
c: 'z',
@@ -5932,15 +6128,15 @@ bar
);
assert_eq!(
parser(r"\p{Greek}z").parse(),
- Ok(Ast::Concat(ast::Concat {
+ Ok(Ast::concat(ast::Concat {
span: span(0..10),
asts: vec![
- Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+ Ast::class_unicode(ast::ClassUnicode {
span: span(0..9),
negated: false,
kind: ast::ClassUnicodeKind::Named(s("Greek")),
- })),
- Ast::Literal(ast::Literal {
+ }),
+ Ast::literal(ast::Literal {
span: span(9..10),
kind: ast::LiteralKind::Verbatim,
c: 'z',
@@ -6017,23 +6213,23 @@ bar
assert_eq!(
parser(r"\d").parse(),
- Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
+ Ok(Ast::class_perl(ast::ClassPerl {
span: span(0..2),
kind: ast::ClassPerlKind::Digit,
negated: false,
- })))
+ }))
);
assert_eq!(
parser(r"\dz").parse(),
- Ok(Ast::Concat(ast::Concat {
+ Ok(Ast::concat(ast::Concat {
span: span(0..3),
asts: vec![
- Ast::Class(ast::Class::Perl(ast::ClassPerl {
+ Ast::class_perl(ast::ClassPerl {
span: span(0..2),
kind: ast::ClassPerlKind::Digit,
negated: false,
- })),
- Ast::Literal(ast::Literal {
+ }),
+ Ast::literal(ast::Literal {
span: span(2..3),
kind: ast::LiteralKind::Verbatim,
c: 'z',
diff --git a/vendor/regex-syntax/src/ast/print.rs b/vendor/regex-syntax/src/ast/print.rs
index 86a87e143..1ceb3c7fa 100644
--- a/vendor/regex-syntax/src/ast/print.rs
+++ b/vendor/regex-syntax/src/ast/print.rs
@@ -80,27 +80,21 @@ impl<W: fmt::Write> Visitor for Writer<W> {
fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
match *ast {
Ast::Group(ref x) => self.fmt_group_pre(x),
- Ast::Class(ast::Class::Bracketed(ref x)) => {
- self.fmt_class_bracketed_pre(x)
- }
+ Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_pre(x),
_ => Ok(()),
}
}
fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
- use crate::ast::Class;
-
match *ast {
Ast::Empty(_) => Ok(()),
Ast::Flags(ref x) => self.fmt_set_flags(x),
Ast::Literal(ref x) => self.fmt_literal(x),
Ast::Dot(_) => self.wtr.write_str("."),
Ast::Assertion(ref x) => self.fmt_assertion(x),
- Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
- Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
- Ast::Class(Class::Bracketed(ref x)) => {
- self.fmt_class_bracketed_post(x)
- }
+ Ast::ClassPerl(ref x) => self.fmt_class_perl(x),
+ Ast::ClassUnicode(ref x) => self.fmt_class_unicode(x),
+ Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_post(x),
Ast::Repetition(ref x) => self.fmt_repetition(x),
Ast::Group(ref x) => self.fmt_group_post(x),
Ast::Alternation(_) => Ok(()),
@@ -267,6 +261,12 @@ impl<W: fmt::Write> Writer<W> {
EndText => self.wtr.write_str(r"\z"),
WordBoundary => self.wtr.write_str(r"\b"),
NotWordBoundary => self.wtr.write_str(r"\B"),
+ WordBoundaryStart => self.wtr.write_str(r"\b{start}"),
+ WordBoundaryEnd => self.wtr.write_str(r"\b{end}"),
+ WordBoundaryStartAngle => self.wtr.write_str(r"\<"),
+ WordBoundaryEndAngle => self.wtr.write_str(r"\>"),
+ WordBoundaryStartHalf => self.wtr.write_str(r"\b{start-half}"),
+ WordBoundaryEndHalf => self.wtr.write_str(r"\b{end-half}"),
}
}
diff --git a/vendor/regex-syntax/src/ast/visitor.rs b/vendor/regex-syntax/src/ast/visitor.rs
index ab136739e..c1bb24d97 100644
--- a/vendor/regex-syntax/src/ast/visitor.rs
+++ b/vendor/regex-syntax/src/ast/visitor.rs
@@ -48,6 +48,11 @@ pub trait Visitor {
Ok(())
}
+ /// This method is called between child nodes of a concatenation.
+ fn visit_concat_in(&mut self) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
/// This method is called on every [`ClassSetItem`](ast::ClassSetItem)
/// before descending into child nodes.
fn visit_class_set_item_pre(
@@ -228,8 +233,14 @@ impl<'a> HeapVisitor<'a> {
// If this is a concat/alternate, then we might have additional
// inductive steps to process.
if let Some(x) = self.pop(frame) {
- if let Frame::Alternation { .. } = x {
- visitor.visit_alternation_in()?;
+ match x {
+ Frame::Alternation { .. } => {
+ visitor.visit_alternation_in()?;
+ }
+ Frame::Concat { .. } => {
+ visitor.visit_concat_in()?;
+ }
+ _ => {}
}
ast = x.child();
self.stack.push((post_ast, x));
@@ -253,7 +264,7 @@ impl<'a> HeapVisitor<'a> {
visitor: &mut V,
) -> Result<Option<Frame<'a>>, V::Err> {
Ok(match *ast {
- Ast::Class(ast::Class::Bracketed(ref x)) => {
+ Ast::ClassBracketed(ref x) => {
self.visit_class(x, visitor)?;
None
}