summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_parse/src/parser/mod.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:59:35 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:59:35 +0000
commitd1b2d29528b7794b41e66fc2136e395a02f8529b (patch)
treea4a17504b260206dec3cf55b2dca82929a348ac2 /compiler/rustc_parse/src/parser/mod.rs
parentReleasing progress-linux version 1.72.1+dfsg1-1~progress7.99u1. (diff)
downloadrustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.tar.xz
rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.zip
Merging upstream version 1.73.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_parse/src/parser/mod.rs')
-rw-r--r--compiler/rustc_parse/src/parser/mod.rs240
1 files changed, 80 insertions, 160 deletions
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index c23420661..77c59bb38 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -24,12 +24,11 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
use rustc_ast::util::case::Case;
use rustc_ast::AttrId;
use rustc_ast::DUMMY_NODE_ID;
-use rustc_ast::{self as ast, AnonConst, AttrStyle, Const, DelimArgs, Extern};
-use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit};
+use rustc_ast::{self as ast, AnonConst, Const, DelimArgs, Extern};
+use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, Mutability, StrLit};
use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind};
use rustc_ast_pretty::pprust;
use rustc_data_structures::fx::FxHashMap;
-use rustc_data_structures::sync::Ordering;
use rustc_errors::PResult;
use rustc_errors::{
Applicability, DiagnosticBuilder, ErrorGuaranteed, FatalError, IntoDiagnostic, MultiSpan,
@@ -38,7 +37,7 @@ use rustc_session::parse::ParseSess;
use rustc_span::source_map::{Span, DUMMY_SP};
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use std::ops::Range;
-use std::{cmp, mem, slice};
+use std::{mem, slice};
use thin_vec::ThinVec;
use tracing::debug;
@@ -135,10 +134,24 @@ pub struct Parser<'a> {
pub capture_cfg: bool,
restrictions: Restrictions,
expected_tokens: Vec<TokenType>,
- // Important: This must only be advanced from `bump` to ensure that
- // `token_cursor.num_next_calls` is updated properly.
token_cursor: TokenCursor,
- desugar_doc_comments: bool,
+ // The number of calls to `bump`, i.e. the position in the token stream.
+ num_bump_calls: usize,
+ // During parsing we may sometimes need to 'unglue' a glued token into two
+ // component tokens (e.g. '>>' into '>' and '>), so the parser can consume
+ // them one at a time. This process bypasses the normal capturing mechanism
+ // (e.g. `num_bump_calls` will not be incremented), since the 'unglued'
+ // tokens due not exist in the original `TokenStream`.
+ //
+ // If we end up consuming both unglued tokens, this is not an issue. We'll
+ // end up capturing the single 'glued' token.
+ //
+ // However, sometimes we may want to capture just the first 'unglued'
+ // token. For example, capturing the `Vec<u8>` in `Option<Vec<u8>>`
+ // requires us to unglue the trailing `>>` token. The `break_last_token`
+ // field is used to track this token. It gets appended to the captured
+ // stream when we evaluate a `LazyAttrTokenStream`.
+ break_last_token: bool,
/// This field is used to keep track of how many left angle brackets we have seen. This is
/// required in order to detect extra leading left angle brackets (`<` characters) and error
/// appropriately.
@@ -162,7 +175,7 @@ pub struct Parser<'a> {
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
// it doesn't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-rustc_data_structures::static_assert_size!(Parser<'_>, 272);
+rustc_data_structures::static_assert_size!(Parser<'_>, 264);
/// Stores span information about a closure.
#[derive(Clone)]
@@ -224,64 +237,29 @@ struct TokenCursor {
// tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
// because it's the outermost token stream which never has delimiters.
stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
-
- desugar_doc_comments: bool,
-
- // Counts the number of calls to `{,inlined_}next`.
- num_next_calls: usize,
-
- // During parsing, we may sometimes need to 'unglue' a
- // glued token into two component tokens
- // (e.g. '>>' into '>' and '>), so that the parser
- // can consume them one at a time. This process
- // bypasses the normal capturing mechanism
- // (e.g. `num_next_calls` will not be incremented),
- // since the 'unglued' tokens due not exist in
- // the original `TokenStream`.
- //
- // If we end up consuming both unglued tokens,
- // then this is not an issue - we'll end up
- // capturing the single 'glued' token.
- //
- // However, in certain circumstances, we may
- // want to capture just the first 'unglued' token.
- // For example, capturing the `Vec<u8>`
- // in `Option<Vec<u8>>` requires us to unglue
- // the trailing `>>` token. The `break_last_token`
- // field is used to track this token - it gets
- // appended to the captured stream when
- // we evaluate a `LazyAttrTokenStream`.
- break_last_token: bool,
}
impl TokenCursor {
- fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
- self.inlined_next(desugar_doc_comments)
+ fn next(&mut self) -> (Token, Spacing) {
+ self.inlined_next()
}
/// This always-inlined version should only be used on hot code paths.
#[inline(always)]
- fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
+ fn inlined_next(&mut self) -> (Token, Spacing) {
loop {
- // FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will
- // need to, whereupon the `delim != Delimiter::Invisible` conditions below can be
- // removed.
+ // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
+ // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
+ // below can be removed.
if let Some(tree) = self.tree_cursor.next_ref() {
match tree {
- &TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) {
- (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => {
- let desugared = self.desugar(attr_style, data, span);
- self.tree_cursor.replace_prev_and_rewind(desugared);
- // Continue to get the first token of the desugared doc comment.
- }
- _ => {
- debug_assert!(!matches!(
- token.kind,
- token::OpenDelim(_) | token::CloseDelim(_)
- ));
- return (token.clone(), spacing);
- }
- },
+ &TokenTree::Token(ref token, spacing) => {
+ debug_assert!(!matches!(
+ token.kind,
+ token::OpenDelim(_) | token::CloseDelim(_)
+ ));
+ return (token.clone(), spacing);
+ }
&TokenTree::Delimited(sp, delim, ref tts) => {
let trees = tts.clone().into_trees();
self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp));
@@ -304,52 +282,6 @@ impl TokenCursor {
}
}
}
-
- // Desugar a doc comment into something like `#[doc = r"foo"]`.
- fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
- // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
- // required to wrap the text. E.g.
- // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
- // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
- // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
- let mut num_of_hashes = 0;
- let mut count = 0;
- for ch in data.as_str().chars() {
- count = match ch {
- '"' => 1,
- '#' if count > 0 => count + 1,
- _ => 0,
- };
- num_of_hashes = cmp::max(num_of_hashes, count);
- }
-
- // `/// foo` becomes `doc = r"foo"`.
- let delim_span = DelimSpan::from_single(span);
- let body = TokenTree::Delimited(
- delim_span,
- Delimiter::Bracket,
- [
- TokenTree::token_alone(token::Ident(sym::doc, false), span),
- TokenTree::token_alone(token::Eq, span),
- TokenTree::token_alone(
- TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
- span,
- ),
- ]
- .into_iter()
- .collect::<TokenStream>(),
- );
-
- if attr_style == AttrStyle::Inner {
- vec![
- TokenTree::token_alone(token::Pound, span),
- TokenTree::token_alone(token::Not, span),
- body,
- ]
- } else {
- vec![TokenTree::token_alone(token::Pound, span), body]
- }
- }
}
#[derive(Debug, Clone, PartialEq)]
@@ -368,7 +300,7 @@ impl TokenType {
fn to_string(&self) -> String {
match self {
TokenType::Token(t) => format!("`{}`", pprust::token_kind_to_string(t)),
- TokenType::Keyword(kw) => format!("`{}`", kw),
+ TokenType::Keyword(kw) => format!("`{kw}`"),
TokenType::Operator => "an operator".to_string(),
TokenType::Lifetime => "lifetime".to_string(),
TokenType::Ident => "identifier".to_string(),
@@ -438,14 +370,13 @@ pub(super) fn token_descr(token: &Token) -> String {
TokenDescription::DocComment => "doc comment",
});
- if let Some(kind) = kind { format!("{} `{}`", kind, name) } else { format!("`{}`", name) }
+ if let Some(kind) = kind { format!("{kind} `{name}`") } else { format!("`{name}`") }
}
impl<'a> Parser<'a> {
pub fn new(
sess: &'a ParseSess,
- tokens: TokenStream,
- desugar_doc_comments: bool,
+ stream: TokenStream,
subparser_name: Option<&'static str>,
) -> Self {
let mut parser = Parser {
@@ -456,14 +387,9 @@ impl<'a> Parser<'a> {
capture_cfg: false,
restrictions: Restrictions::empty(),
expected_tokens: Vec::new(),
- token_cursor: TokenCursor {
- tree_cursor: tokens.into_trees(),
- stack: Vec::new(),
- num_next_calls: 0,
- desugar_doc_comments,
- break_last_token: false,
- },
- desugar_doc_comments,
+ token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
+ num_bump_calls: 0,
+ break_last_token: false,
unmatched_angle_bracket_count: 0,
max_angle_bracket_count: 0,
last_unexpected_token_span: None,
@@ -766,7 +692,7 @@ impl<'a> Parser<'a> {
// If we consume any additional tokens, then this token
// is not needed (we'll capture the entire 'glued' token),
// and `bump` will set this field to `None`
- self.token_cursor.break_last_token = true;
+ self.break_last_token = true;
// Use the spacing of the glued token as the spacing
// of the unglued second token.
self.bump_with((Token::new(second, second_span), self.token_spacing));
@@ -923,7 +849,7 @@ impl<'a> Parser<'a> {
expect_err
.span_suggestion_short(
sp,
- format!("missing `{}`", token_str),
+ format!("missing `{token_str}`"),
token_str,
Applicability::MaybeIncorrect,
)
@@ -1107,12 +1033,12 @@ impl<'a> Parser<'a> {
pub fn bump(&mut self) {
// Note: destructuring here would give nicer code, but it was found in #96210 to be slower
// than `.0`/`.1` access.
- let mut next = self.token_cursor.inlined_next(self.desugar_doc_comments);
- self.token_cursor.num_next_calls += 1;
+ let mut next = self.token_cursor.inlined_next();
+ self.num_bump_calls += 1;
// We've retrieved an token from the underlying
// cursor, so we no longer need to worry about
// an unglued token. See `break_and_eat` for more details
- self.token_cursor.break_last_token = false;
+ self.break_last_token = false;
if next.0.span.is_dummy() {
// Tweak the location for better diagnostics, but keep syntactic context intact.
let fallback_span = self.token.span;
@@ -1126,38 +1052,53 @@ impl<'a> Parser<'a> {
}
/// Look-ahead `dist` tokens of `self.token` and get access to that token there.
- /// When `dist == 0` then the current token is looked at.
+ /// When `dist == 0` then the current token is looked at. `Eof` will be
+ /// returned if the look-ahead is any distance past the end of the tokens.
pub fn look_ahead<R>(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R {
if dist == 0 {
return looker(&self.token);
}
- let tree_cursor = &self.token_cursor.tree_cursor;
if let Some(&(_, delim, span)) = self.token_cursor.stack.last()
&& delim != Delimiter::Invisible
{
+ // We are not in the outermost token stream, and the token stream
+ // we are in has non-skipped delimiters. Look for skipped
+ // delimiters in the lookahead range.
+ let tree_cursor = &self.token_cursor.tree_cursor;
let all_normal = (0..dist).all(|i| {
let token = tree_cursor.look_ahead(i);
!matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _)))
});
if all_normal {
+ // There were no skipped delimiters. Do lookahead by plain indexing.
return match tree_cursor.look_ahead(dist - 1) {
- Some(tree) => match tree {
- TokenTree::Token(token, _) => looker(token),
- TokenTree::Delimited(dspan, delim, _) => {
- looker(&Token::new(token::OpenDelim(*delim), dspan.open))
+ Some(tree) => {
+ // Indexing stayed within the current token stream.
+ match tree {
+ TokenTree::Token(token, _) => looker(token),
+ TokenTree::Delimited(dspan, delim, _) => {
+ looker(&Token::new(token::OpenDelim(*delim), dspan.open))
+ }
}
- },
- None => looker(&Token::new(token::CloseDelim(delim), span.close)),
+ }
+ None => {
+ // Indexing went past the end of the current token
+ // stream. Use the close delimiter, no matter how far
+ // ahead `dist` went.
+ looker(&Token::new(token::CloseDelim(delim), span.close))
+ }
};
}
}
+ // We are in a more complex case. Just clone the token cursor and use
+ // `next`, skipping delimiters as necessary. Slow but simple.
let mut cursor = self.token_cursor.clone();
let mut i = 0;
let mut token = Token::dummy();
while i < dist {
- token = cursor.next(/* desugar_doc_comments */ false).0;
+ token = cursor.next().0;
if matches!(
token.kind,
token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible)
@@ -1166,7 +1107,7 @@ impl<'a> Parser<'a> {
}
i += 1;
}
- return looker(&token);
+ looker(&token)
}
/// Returns whether any of the given keywords are `dist` tokens ahead of the current one.
@@ -1210,7 +1151,8 @@ impl<'a> Parser<'a> {
fn parse_constness_(&mut self, case: Case, is_closure: bool) -> Const {
// Avoid const blocks and const closures to be parsed as const items
if (self.check_const_closure() == is_closure)
- && self.look_ahead(1, |t| t != &token::OpenDelim(Delimiter::Brace))
+ && !self
+ .look_ahead(1, |t| *t == token::OpenDelim(Delimiter::Brace) || t.is_whole_block())
&& self.eat_keyword_case(kw::Const, case)
{
Const::Yes(self.prev_token.uninterpolated_span())
@@ -1288,10 +1230,10 @@ impl<'a> Parser<'a> {
|| self.check(&token::OpenDelim(Delimiter::Brace));
delimited.then(|| {
- // We've confirmed above that there is a delimiter so unwrapping is OK.
- let TokenTree::Delimited(dspan, delim, tokens) = self.parse_token_tree() else { unreachable!() };
-
- DelimArgs { dspan, delim: MacDelimiter::from_token(delim).unwrap(), tokens }
+ let TokenTree::Delimited(dspan, delim, tokens) = self.parse_token_tree() else {
+ unreachable!()
+ };
+ DelimArgs { dspan, delim, tokens }
})
}
@@ -1307,12 +1249,11 @@ impl<'a> Parser<'a> {
}
/// Parses a single token tree from the input.
- pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
+ pub fn parse_token_tree(&mut self) -> TokenTree {
match self.token.kind {
token::OpenDelim(..) => {
// Grab the tokens within the delimiters.
- let tree_cursor = &self.token_cursor.tree_cursor;
- let stream = tree_cursor.stream.clone();
+ let stream = self.token_cursor.tree_cursor.stream.clone();
let (_, delim, span) = *self.token_cursor.stack.last().unwrap();
// Advance the token cursor through the entire delimited
@@ -1343,15 +1284,6 @@ impl<'a> Parser<'a> {
}
}
- /// Parses a stream of tokens into a list of `TokenTree`s, up to EOF.
- pub fn parse_all_token_trees(&mut self) -> PResult<'a, Vec<TokenTree>> {
- let mut tts = Vec::new();
- while self.token != token::Eof {
- tts.push(self.parse_token_tree());
- }
- Ok(tts)
- }
-
pub fn parse_tokens(&mut self) -> TokenStream {
let mut result = Vec::new();
loop {
@@ -1511,7 +1443,7 @@ impl<'a> Parser<'a> {
}
pub fn approx_token_stream_pos(&self) -> usize {
- self.token_cursor.num_next_calls
+ self.num_bump_calls
}
}
@@ -1537,18 +1469,6 @@ pub(crate) fn make_unclosed_delims_error(
Some(err)
}
-pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedDelim>, sess: &ParseSess) {
- let _ = sess.reached_eof.fetch_or(
- unclosed_delims.iter().any(|unmatched_delim| unmatched_delim.found_delim.is_none()),
- Ordering::Relaxed,
- );
- for unmatched in unclosed_delims.drain(..) {
- if let Some(mut e) = make_unclosed_delims_error(unmatched, sess) {
- e.emit();
- }
- }
-}
-
/// A helper struct used when building an `AttrTokenStream` from
/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens
/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
@@ -1571,7 +1491,7 @@ pub enum FlatToken {
}
#[derive(Debug)]
-pub enum NtOrTt {
+pub enum ParseNtResult {
Nt(Nonterminal),
Tt(TokenTree),
}