diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:59:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:59:35 +0000 |
commit | d1b2d29528b7794b41e66fc2136e395a02f8529b (patch) | |
tree | a4a17504b260206dec3cf55b2dca82929a348ac2 /compiler/rustc_parse/src/parser/mod.rs | |
parent | Releasing progress-linux version 1.72.1+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.tar.xz rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.zip |
Merging upstream version 1.73.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_parse/src/parser/mod.rs')
-rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 240 |
1 files changed, 80 insertions, 160 deletions
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index c23420661..77c59bb38 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -24,12 +24,11 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor}; use rustc_ast::util::case::Case; use rustc_ast::AttrId; use rustc_ast::DUMMY_NODE_ID; -use rustc_ast::{self as ast, AnonConst, AttrStyle, Const, DelimArgs, Extern}; -use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit}; +use rustc_ast::{self as ast, AnonConst, Const, DelimArgs, Extern}; +use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, Mutability, StrLit}; use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind}; use rustc_ast_pretty::pprust; use rustc_data_structures::fx::FxHashMap; -use rustc_data_structures::sync::Ordering; use rustc_errors::PResult; use rustc_errors::{ Applicability, DiagnosticBuilder, ErrorGuaranteed, FatalError, IntoDiagnostic, MultiSpan, @@ -38,7 +37,7 @@ use rustc_session::parse::ParseSess; use rustc_span::source_map::{Span, DUMMY_SP}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use std::ops::Range; -use std::{cmp, mem, slice}; +use std::{mem, slice}; use thin_vec::ThinVec; use tracing::debug; @@ -135,10 +134,24 @@ pub struct Parser<'a> { pub capture_cfg: bool, restrictions: Restrictions, expected_tokens: Vec<TokenType>, - // Important: This must only be advanced from `bump` to ensure that - // `token_cursor.num_next_calls` is updated properly. token_cursor: TokenCursor, - desugar_doc_comments: bool, + // The number of calls to `bump`, i.e. the position in the token stream. + num_bump_calls: usize, + // During parsing we may sometimes need to 'unglue' a glued token into two + // component tokens (e.g. '>>' into '>' and '>), so the parser can consume + // them one at a time. This process bypasses the normal capturing mechanism + // (e.g. `num_bump_calls` will not be incremented), since the 'unglued' + // tokens due not exist in the original `TokenStream`. + // + // If we end up consuming both unglued tokens, this is not an issue. We'll + // end up capturing the single 'glued' token. + // + // However, sometimes we may want to capture just the first 'unglued' + // token. For example, capturing the `Vec<u8>` in `Option<Vec<u8>>` + // requires us to unglue the trailing `>>` token. The `break_last_token` + // field is used to track this token. It gets appended to the captured + // stream when we evaluate a `LazyAttrTokenStream`. + break_last_token: bool, /// This field is used to keep track of how many left angle brackets we have seen. This is /// required in order to detect extra leading left angle brackets (`<` characters) and error /// appropriately. @@ -162,7 +175,7 @@ pub struct Parser<'a> { // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure // it doesn't unintentionally get bigger. #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] -rustc_data_structures::static_assert_size!(Parser<'_>, 272); +rustc_data_structures::static_assert_size!(Parser<'_>, 264); /// Stores span information about a closure. #[derive(Clone)] @@ -224,64 +237,29 @@ struct TokenCursor { // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters // because it's the outermost token stream which never has delimiters. stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>, - - desugar_doc_comments: bool, - - // Counts the number of calls to `{,inlined_}next`. - num_next_calls: usize, - - // During parsing, we may sometimes need to 'unglue' a - // glued token into two component tokens - // (e.g. '>>' into '>' and '>), so that the parser - // can consume them one at a time. This process - // bypasses the normal capturing mechanism - // (e.g. `num_next_calls` will not be incremented), - // since the 'unglued' tokens due not exist in - // the original `TokenStream`. - // - // If we end up consuming both unglued tokens, - // then this is not an issue - we'll end up - // capturing the single 'glued' token. - // - // However, in certain circumstances, we may - // want to capture just the first 'unglued' token. - // For example, capturing the `Vec<u8>` - // in `Option<Vec<u8>>` requires us to unglue - // the trailing `>>` token. The `break_last_token` - // field is used to track this token - it gets - // appended to the captured stream when - // we evaluate a `LazyAttrTokenStream`. - break_last_token: bool, } impl TokenCursor { - fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { - self.inlined_next(desugar_doc_comments) + fn next(&mut self) -> (Token, Spacing) { + self.inlined_next() } /// This always-inlined version should only be used on hot code paths. #[inline(always)] - fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { + fn inlined_next(&mut self) -> (Token, Spacing) { loop { - // FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will - // need to, whereupon the `delim != Delimiter::Invisible` conditions below can be - // removed. + // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix + // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions + // below can be removed. if let Some(tree) = self.tree_cursor.next_ref() { match tree { - &TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) { - (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { - let desugared = self.desugar(attr_style, data, span); - self.tree_cursor.replace_prev_and_rewind(desugared); - // Continue to get the first token of the desugared doc comment. - } - _ => { - debug_assert!(!matches!( - token.kind, - token::OpenDelim(_) | token::CloseDelim(_) - )); - return (token.clone(), spacing); - } - }, + &TokenTree::Token(ref token, spacing) => { + debug_assert!(!matches!( + token.kind, + token::OpenDelim(_) | token::CloseDelim(_) + )); + return (token.clone(), spacing); + } &TokenTree::Delimited(sp, delim, ref tts) => { let trees = tts.clone().into_trees(); self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp)); @@ -304,52 +282,6 @@ impl TokenCursor { } } } - - // Desugar a doc comment into something like `#[doc = r"foo"]`. - fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> { - // Searches for the occurrences of `"#*` and returns the minimum number of `#`s - // required to wrap the text. E.g. - // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0) - // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1) - // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3) - let mut num_of_hashes = 0; - let mut count = 0; - for ch in data.as_str().chars() { - count = match ch { - '"' => 1, - '#' if count > 0 => count + 1, - _ => 0, - }; - num_of_hashes = cmp::max(num_of_hashes, count); - } - - // `/// foo` becomes `doc = r"foo"`. - let delim_span = DelimSpan::from_single(span); - let body = TokenTree::Delimited( - delim_span, - Delimiter::Bracket, - [ - TokenTree::token_alone(token::Ident(sym::doc, false), span), - TokenTree::token_alone(token::Eq, span), - TokenTree::token_alone( - TokenKind::lit(token::StrRaw(num_of_hashes), data, None), - span, - ), - ] - .into_iter() - .collect::<TokenStream>(), - ); - - if attr_style == AttrStyle::Inner { - vec![ - TokenTree::token_alone(token::Pound, span), - TokenTree::token_alone(token::Not, span), - body, - ] - } else { - vec![TokenTree::token_alone(token::Pound, span), body] - } - } } #[derive(Debug, Clone, PartialEq)] @@ -368,7 +300,7 @@ impl TokenType { fn to_string(&self) -> String { match self { TokenType::Token(t) => format!("`{}`", pprust::token_kind_to_string(t)), - TokenType::Keyword(kw) => format!("`{}`", kw), + TokenType::Keyword(kw) => format!("`{kw}`"), TokenType::Operator => "an operator".to_string(), TokenType::Lifetime => "lifetime".to_string(), TokenType::Ident => "identifier".to_string(), @@ -438,14 +370,13 @@ pub(super) fn token_descr(token: &Token) -> String { TokenDescription::DocComment => "doc comment", }); - if let Some(kind) = kind { format!("{} `{}`", kind, name) } else { format!("`{}`", name) } + if let Some(kind) = kind { format!("{kind} `{name}`") } else { format!("`{name}`") } } impl<'a> Parser<'a> { pub fn new( sess: &'a ParseSess, - tokens: TokenStream, - desugar_doc_comments: bool, + stream: TokenStream, subparser_name: Option<&'static str>, ) -> Self { let mut parser = Parser { @@ -456,14 +387,9 @@ impl<'a> Parser<'a> { capture_cfg: false, restrictions: Restrictions::empty(), expected_tokens: Vec::new(), - token_cursor: TokenCursor { - tree_cursor: tokens.into_trees(), - stack: Vec::new(), - num_next_calls: 0, - desugar_doc_comments, - break_last_token: false, - }, - desugar_doc_comments, + token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() }, + num_bump_calls: 0, + break_last_token: false, unmatched_angle_bracket_count: 0, max_angle_bracket_count: 0, last_unexpected_token_span: None, @@ -766,7 +692,7 @@ impl<'a> Parser<'a> { // If we consume any additional tokens, then this token // is not needed (we'll capture the entire 'glued' token), // and `bump` will set this field to `None` - self.token_cursor.break_last_token = true; + self.break_last_token = true; // Use the spacing of the glued token as the spacing // of the unglued second token. self.bump_with((Token::new(second, second_span), self.token_spacing)); @@ -923,7 +849,7 @@ impl<'a> Parser<'a> { expect_err .span_suggestion_short( sp, - format!("missing `{}`", token_str), + format!("missing `{token_str}`"), token_str, Applicability::MaybeIncorrect, ) @@ -1107,12 +1033,12 @@ impl<'a> Parser<'a> { pub fn bump(&mut self) { // Note: destructuring here would give nicer code, but it was found in #96210 to be slower // than `.0`/`.1` access. - let mut next = self.token_cursor.inlined_next(self.desugar_doc_comments); - self.token_cursor.num_next_calls += 1; + let mut next = self.token_cursor.inlined_next(); + self.num_bump_calls += 1; // We've retrieved an token from the underlying // cursor, so we no longer need to worry about // an unglued token. See `break_and_eat` for more details - self.token_cursor.break_last_token = false; + self.break_last_token = false; if next.0.span.is_dummy() { // Tweak the location for better diagnostics, but keep syntactic context intact. let fallback_span = self.token.span; @@ -1126,38 +1052,53 @@ impl<'a> Parser<'a> { } /// Look-ahead `dist` tokens of `self.token` and get access to that token there. - /// When `dist == 0` then the current token is looked at. + /// When `dist == 0` then the current token is looked at. `Eof` will be + /// returned if the look-ahead is any distance past the end of the tokens. pub fn look_ahead<R>(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R { if dist == 0 { return looker(&self.token); } - let tree_cursor = &self.token_cursor.tree_cursor; if let Some(&(_, delim, span)) = self.token_cursor.stack.last() && delim != Delimiter::Invisible { + // We are not in the outermost token stream, and the token stream + // we are in has non-skipped delimiters. Look for skipped + // delimiters in the lookahead range. + let tree_cursor = &self.token_cursor.tree_cursor; let all_normal = (0..dist).all(|i| { let token = tree_cursor.look_ahead(i); !matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _))) }); if all_normal { + // There were no skipped delimiters. Do lookahead by plain indexing. return match tree_cursor.look_ahead(dist - 1) { - Some(tree) => match tree { - TokenTree::Token(token, _) => looker(token), - TokenTree::Delimited(dspan, delim, _) => { - looker(&Token::new(token::OpenDelim(*delim), dspan.open)) + Some(tree) => { + // Indexing stayed within the current token stream. + match tree { + TokenTree::Token(token, _) => looker(token), + TokenTree::Delimited(dspan, delim, _) => { + looker(&Token::new(token::OpenDelim(*delim), dspan.open)) + } } - }, - None => looker(&Token::new(token::CloseDelim(delim), span.close)), + } + None => { + // Indexing went past the end of the current token + // stream. Use the close delimiter, no matter how far + // ahead `dist` went. + looker(&Token::new(token::CloseDelim(delim), span.close)) + } }; } } + // We are in a more complex case. Just clone the token cursor and use + // `next`, skipping delimiters as necessary. Slow but simple. let mut cursor = self.token_cursor.clone(); let mut i = 0; let mut token = Token::dummy(); while i < dist { - token = cursor.next(/* desugar_doc_comments */ false).0; + token = cursor.next().0; if matches!( token.kind, token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible) @@ -1166,7 +1107,7 @@ impl<'a> Parser<'a> { } i += 1; } - return looker(&token); + looker(&token) } /// Returns whether any of the given keywords are `dist` tokens ahead of the current one. @@ -1210,7 +1151,8 @@ impl<'a> Parser<'a> { fn parse_constness_(&mut self, case: Case, is_closure: bool) -> Const { // Avoid const blocks and const closures to be parsed as const items if (self.check_const_closure() == is_closure) - && self.look_ahead(1, |t| t != &token::OpenDelim(Delimiter::Brace)) + && !self + .look_ahead(1, |t| *t == token::OpenDelim(Delimiter::Brace) || t.is_whole_block()) && self.eat_keyword_case(kw::Const, case) { Const::Yes(self.prev_token.uninterpolated_span()) @@ -1288,10 +1230,10 @@ impl<'a> Parser<'a> { || self.check(&token::OpenDelim(Delimiter::Brace)); delimited.then(|| { - // We've confirmed above that there is a delimiter so unwrapping is OK. - let TokenTree::Delimited(dspan, delim, tokens) = self.parse_token_tree() else { unreachable!() }; - - DelimArgs { dspan, delim: MacDelimiter::from_token(delim).unwrap(), tokens } + let TokenTree::Delimited(dspan, delim, tokens) = self.parse_token_tree() else { + unreachable!() + }; + DelimArgs { dspan, delim, tokens } }) } @@ -1307,12 +1249,11 @@ impl<'a> Parser<'a> { } /// Parses a single token tree from the input. - pub(crate) fn parse_token_tree(&mut self) -> TokenTree { + pub fn parse_token_tree(&mut self) -> TokenTree { match self.token.kind { token::OpenDelim(..) => { // Grab the tokens within the delimiters. - let tree_cursor = &self.token_cursor.tree_cursor; - let stream = tree_cursor.stream.clone(); + let stream = self.token_cursor.tree_cursor.stream.clone(); let (_, delim, span) = *self.token_cursor.stack.last().unwrap(); // Advance the token cursor through the entire delimited @@ -1343,15 +1284,6 @@ impl<'a> Parser<'a> { } } - /// Parses a stream of tokens into a list of `TokenTree`s, up to EOF. - pub fn parse_all_token_trees(&mut self) -> PResult<'a, Vec<TokenTree>> { - let mut tts = Vec::new(); - while self.token != token::Eof { - tts.push(self.parse_token_tree()); - } - Ok(tts) - } - pub fn parse_tokens(&mut self) -> TokenStream { let mut result = Vec::new(); loop { @@ -1511,7 +1443,7 @@ impl<'a> Parser<'a> { } pub fn approx_token_stream_pos(&self) -> usize { - self.token_cursor.num_next_calls + self.num_bump_calls } } @@ -1537,18 +1469,6 @@ pub(crate) fn make_unclosed_delims_error( Some(err) } -pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedDelim>, sess: &ParseSess) { - let _ = sess.reached_eof.fetch_or( - unclosed_delims.iter().any(|unmatched_delim| unmatched_delim.found_delim.is_none()), - Ordering::Relaxed, - ); - for unmatched in unclosed_delims.drain(..) { - if let Some(mut e) = make_unclosed_delims_error(unmatched, sess) { - e.emit(); - } - } -} - /// A helper struct used when building an `AttrTokenStream` from /// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens /// are stored as `FlatToken::Token`. A vector of `FlatToken`s @@ -1571,7 +1491,7 @@ pub enum FlatToken { } #[derive(Debug)] -pub enum NtOrTt { +pub enum ParseNtResult { Nt(Nonterminal), Tt(TokenTree), } |