Merging upstream version 1.73.0+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-30 03:59:35 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-30 03:59:35 +0000
commit: d1b2d29528b7794b41e66fc2136e395a02f8529b (patch)
tree: a4a17504b260206dec3cf55b2dca82929a348ac2 /compiler/rustc_parse/src/parser/mod.rs
parent: Releasing progress-linux version 1.72.1+dfsg1-1~progress7.99u1. (diff)
download: rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.tar.xz
rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.zip
1 files changed, 80 insertions, 160 deletions
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index c23420661..77c59bb38 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -24,12 +24,11 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
 use rustc_ast::util::case::Case;
 use rustc_ast::AttrId;
 use rustc_ast::DUMMY_NODE_ID;
-use rustc_ast::{self as ast, AnonConst, AttrStyle, Const, DelimArgs, Extern};
-use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit};
+use rustc_ast::{self as ast, AnonConst, Const, DelimArgs, Extern};
+use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, Mutability, StrLit};
 use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind};
 use rustc_ast_pretty::pprust;
 use rustc_data_structures::fx::FxHashMap;
-use rustc_data_structures::sync::Ordering;
 use rustc_errors::PResult;
 use rustc_errors::{
     Applicability, DiagnosticBuilder, ErrorGuaranteed, FatalError, IntoDiagnostic, MultiSpan,
@@ -38,7 +37,7 @@ use rustc_session::parse::ParseSess;
 use rustc_span::source_map::{Span, DUMMY_SP};
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use std::ops::Range;
-use std::{cmp, mem, slice};
+use std::{mem, slice};
 use thin_vec::ThinVec;
 use tracing::debug;
 
@@ -135,10 +134,24 @@ pub struct Parser<'a> {
     pub capture_cfg: bool,
     restrictions: Restrictions,
     expected_tokens: Vec<TokenType>,
-    // Important: This must only be advanced from `bump` to ensure that
-    // `token_cursor.num_next_calls` is updated properly.
     token_cursor: TokenCursor,
-    desugar_doc_comments: bool,
+    // The number of calls to `bump`, i.e. the position in the token stream.
+    num_bump_calls: usize,
+    // During parsing we may sometimes need to 'unglue' a glued token into two
+    // component tokens (e.g. '>>' into '>' and '>), so the parser can consume
+    // them one at a time. This process bypasses the normal capturing mechanism
+    // (e.g. `num_bump_calls` will not be incremented), since the 'unglued'
+    // tokens due not exist in the original `TokenStream`.
+    //
+    // If we end up consuming both unglued tokens, this is not an issue. We'll
+    // end up capturing the single 'glued' token.
+    //
+    // However, sometimes we may want to capture just the first 'unglued'
+    // token. For example, capturing the `Vec<u8>` in `Option<Vec<u8>>`
+    // requires us to unglue the trailing `>>` token. The `break_last_token`
+    // field is used to track this token. It gets appended to the captured
+    // stream when we evaluate a `LazyAttrTokenStream`.
+    break_last_token: bool,
     /// This field is used to keep track of how many left angle brackets we have seen. This is
     /// required in order to detect extra leading left angle brackets (`<` characters) and error
     /// appropriately.
@@ -162,7 +175,7 @@ pub struct Parser<'a> {
 // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
 // it doesn't unintentionally get bigger.
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-rustc_data_structures::static_assert_size!(Parser<'_>, 272);
+rustc_data_structures::static_assert_size!(Parser<'_>, 264);
 
 /// Stores span information about a closure.
 #[derive(Clone)]
@@ -224,64 +237,29 @@ struct TokenCursor {
     // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
     // because it's the outermost token stream which never has delimiters.
     stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
-
-    desugar_doc_comments: bool,
-
-    // Counts the number of calls to `{,inlined_}next`.
-    num_next_calls: usize,
-
-    // During parsing, we may sometimes need to 'unglue' a
-    // glued token into two component tokens
-    // (e.g. '>>' into '>' and '>), so that the parser
-    // can consume them one at a time. This process
-    // bypasses the normal capturing mechanism
-    // (e.g. `num_next_calls` will not be incremented),
-    // since the 'unglued' tokens due not exist in
-    // the original `TokenStream`.
-    //
-    // If we end up consuming both unglued tokens,
-    // then this is not an issue - we'll end up
-    // capturing the single 'glued' token.
-    //
-    // However, in certain circumstances, we may
-    // want to capture just the first 'unglued' token.
-    // For example, capturing the `Vec<u8>`
-    // in `Option<Vec<u8>>` requires us to unglue
-    // the trailing `>>` token. The `break_last_token`
-    // field is used to track this token - it gets
-    // appended to the captured stream when
-    // we evaluate a `LazyAttrTokenStream`.
-    break_last_token: bool,
 }
 
 impl TokenCursor {
-    fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
-        self.inlined_next(desugar_doc_comments)
+    fn next(&mut self) -> (Token, Spacing) {
+        self.inlined_next()
     }
 
     /// This always-inlined version should only be used on hot code paths.
     #[inline(always)]
-    fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
+    fn inlined_next(&mut self) -> (Token, Spacing) {
         loop {
-            // FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will
-            // need to, whereupon the `delim != Delimiter::Invisible` conditions below can be
-            // removed.
+            // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
+            // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
+            // below can be removed.
             if let Some(tree) = self.tree_cursor.next_ref() {
                 match tree {
-                    &TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) {
-                        (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => {
-                            let desugared = self.desugar(attr_style, data, span);
-                            self.tree_cursor.replace_prev_and_rewind(desugared);
-                            // Continue to get the first token of the desugared doc comment.
-                        }
-                        _ => {
-                            debug_assert!(!matches!(
-                                token.kind,
-                                token::OpenDelim(_) | token::CloseDelim(_)
-                            ));
-                            return (token.clone(), spacing);
-                        }
-                    },
+                    &TokenTree::Token(ref token, spacing) => {
+                        debug_assert!(!matches!(
+                            token.kind,
+                            token::OpenDelim(_) | token::CloseDelim(_)
+                        ));
+                        return (token.clone(), spacing);
+                    }
                     &TokenTree::Delimited(sp, delim, ref tts) => {
                         let trees = tts.clone().into_trees();
                         self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp));
@@ -304,52 +282,6 @@ impl TokenCursor {
             }
         }
     }
-
-    // Desugar a doc comment into something like `#[doc = r"foo"]`.
-    fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
-        // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
-        // required to wrap the text. E.g.
-        // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
-        // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
-        // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
-        let mut num_of_hashes = 0;
-        let mut count = 0;
-        for ch in data.as_str().chars() {
-            count = match ch {
-                '"' => 1,
-                '#' if count > 0 => count + 1,
-                _ => 0,
-            };
-            num_of_hashes = cmp::max(num_of_hashes, count);
-        }
-
-        // `/// foo` becomes `doc = r"foo"`.
-        let delim_span = DelimSpan::from_single(span);
-        let body = TokenTree::Delimited(
-            delim_span,
-            Delimiter::Bracket,
-            [
-                TokenTree::token_alone(token::Ident(sym::doc, false), span),
-                TokenTree::token_alone(token::Eq, span),
-                TokenTree::token_alone(
-                    TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
-                    span,
-                ),
-            ]
-            .into_iter()
-            .collect::<TokenStream>(),
-        );
-
-        if attr_style == AttrStyle::Inner {
-            vec![
-                TokenTree::token_alone(token::Pound, span),
-                TokenTree::token_alone(token::Not, span),
-                body,
-            ]
-        } else {
-            vec![TokenTree::token_alone(token::Pound, span), body]
-        }
-    }
 }
 
 #[derive(Debug, Clone, PartialEq)]
@@ -368,7 +300,7 @@ impl TokenType {
     fn to_string(&self) -> String {
         match self {
             TokenType::Token(t) => format!("`{}`", pprust::token_kind_to_string(t)),
-            TokenType::Keyword(kw) => format!("`{}`", kw),
+            TokenType::Keyword(kw) => format!("`{kw}`"),
             TokenType::Operator => "an operator".to_string(),
             TokenType::Lifetime => "lifetime".to_string(),
             TokenType::Ident => "identifier".to_string(),
@@ -438,14 +370,13 @@ pub(super) fn token_descr(token: &Token) -> String {
         TokenDescription::DocComment => "doc comment",
     });
 
-    if let Some(kind) = kind { format!("{} `{}`", kind, name) } else { format!("`{}`", name) }
+    if let Some(kind) = kind { format!("{kind} `{name}`") } else { format!("`{name}`") }
 }
 
 impl<'a> Parser<'a> {
     pub fn new(
         sess: &'a ParseSess,
-        tokens: TokenStream,
-        desugar_doc_comments: bool,
+        stream: TokenStream,
         subparser_name: Option<&'static str>,
     ) -> Self {
         let mut parser = Parser {
@@ -456,14 +387,9 @@ impl<'a> Parser<'a> {
             capture_cfg: false,
             restrictions: Restrictions::empty(),
             expected_tokens: Vec::new(),
-            token_cursor: TokenCursor {
-                tree_cursor: tokens.into_trees(),
-                stack: Vec::new(),
-                num_next_calls: 0,
-                desugar_doc_comments,
-                break_last_token: false,
-            },
-            desugar_doc_comments,
+            token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
+            num_bump_calls: 0,
+            break_last_token: false,
             unmatched_angle_bracket_count: 0,
             max_angle_bracket_count: 0,
             last_unexpected_token_span: None,
@@ -766,7 +692,7 @@ impl<'a> Parser<'a> {
                 // If we consume any additional tokens, then this token
                 // is not needed (we'll capture the entire 'glued' token),
                 // and `bump` will set this field to `None`
-                self.token_cursor.break_last_token = true;
+                self.break_last_token = true;
                 // Use the spacing of the glued token as the spacing
                 // of the unglued second token.
                 self.bump_with((Token::new(second, second_span), self.token_spacing));
@@ -923,7 +849,7 @@ impl<'a> Parser<'a> {
                                     expect_err
                                         .span_suggestion_short(
                                             sp,
-                                            format!("missing `{}`", token_str),
+                                            format!("missing `{token_str}`"),
                                             token_str,
                                             Applicability::MaybeIncorrect,
                                         )
@@ -1107,12 +1033,12 @@ impl<'a> Parser<'a> {
     pub fn bump(&mut self) {
         // Note: destructuring here would give nicer code, but it was found in #96210 to be slower
         // than `.0`/`.1` access.
-        let mut next = self.token_cursor.inlined_next(self.desugar_doc_comments);
-        self.token_cursor.num_next_calls += 1;
+        let mut next = self.token_cursor.inlined_next();
+        self.num_bump_calls += 1;
         // We've retrieved an token from the underlying
         // cursor, so we no longer need to worry about
         // an unglued token. See `break_and_eat` for more details
-        self.token_cursor.break_last_token = false;
+        self.break_last_token = false;
         if next.0.span.is_dummy() {
             // Tweak the location for better diagnostics, but keep syntactic context intact.
             let fallback_span = self.token.span;
@@ -1126,38 +1052,53 @@ impl<'a> Parser<'a> {
     }
 
     /// Look-ahead `dist` tokens of `self.token` and get access to that token there.
-    /// When `dist == 0` then the current token is looked at.
+    /// When `dist == 0` then the current token is looked at. `Eof` will be
+    /// returned if the look-ahead is any distance past the end of the tokens.
     pub fn look_ahead<R>(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R {
         if dist == 0 {
             return looker(&self.token);
         }
 
-        let tree_cursor = &self.token_cursor.tree_cursor;
         if let Some(&(_, delim, span)) = self.token_cursor.stack.last()
             && delim != Delimiter::Invisible
         {
+            // We are not in the outermost token stream, and the token stream
+            // we are in has non-skipped delimiters. Look for skipped
+            // delimiters in the lookahead range.
+            let tree_cursor = &self.token_cursor.tree_cursor;
             let all_normal = (0..dist).all(|i| {
                 let token = tree_cursor.look_ahead(i);
                 !matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _)))
             });
             if all_normal {
+                // There were no skipped delimiters. Do lookahead by plain indexing.
                 return match tree_cursor.look_ahead(dist - 1) {
-                    Some(tree) => match tree {
-                        TokenTree::Token(token, _) => looker(token),
-                        TokenTree::Delimited(dspan, delim, _) => {
-                            looker(&Token::new(token::OpenDelim(*delim), dspan.open))
+                    Some(tree) => {
+                        // Indexing stayed within the current token stream.
+                        match tree {
+                            TokenTree::Token(token, _) => looker(token),
+                            TokenTree::Delimited(dspan, delim, _) => {
+                                looker(&Token::new(token::OpenDelim(*delim), dspan.open))
+                            }
                         }
-                    },
-                    None => looker(&Token::new(token::CloseDelim(delim), span.close)),
+                    }
+                    None => {
+                        // Indexing went past the end of the current token
+                        // stream. Use the close delimiter, no matter how far
+                        // ahead `dist` went.
+                        looker(&Token::new(token::CloseDelim(delim), span.close))
+                    }
                 };
             }
         }
 
+        // We are in a more complex case. Just clone the token cursor and use
+        // `next`, skipping delimiters as necessary. Slow but simple.
         let mut cursor = self.token_cursor.clone();
         let mut i = 0;
         let mut token = Token::dummy();
         while i < dist {
-            token = cursor.next(/* desugar_doc_comments */ false).0;
+            token = cursor.next().0;
             if matches!(
                 token.kind,
                 token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible)
@@ -1166,7 +1107,7 @@ impl<'a> Parser<'a> {
             }
             i += 1;
         }
-        return looker(&token);
+        looker(&token)
     }
 
     /// Returns whether any of the given keywords are `dist` tokens ahead of the current one.
@@ -1210,7 +1151,8 @@ impl<'a> Parser<'a> {
     fn parse_constness_(&mut self, case: Case, is_closure: bool) -> Const {
         // Avoid const blocks and const closures to be parsed as const items
         if (self.check_const_closure() == is_closure)
-            && self.look_ahead(1, |t| t != &token::OpenDelim(Delimiter::Brace))
+            && !self
+                .look_ahead(1, |t| *t == token::OpenDelim(Delimiter::Brace) || t.is_whole_block())
             && self.eat_keyword_case(kw::Const, case)
         {
             Const::Yes(self.prev_token.uninterpolated_span())
@@ -1288,10 +1230,10 @@ impl<'a> Parser<'a> {
             || self.check(&token::OpenDelim(Delimiter::Brace));
 
         delimited.then(|| {
-            // We've confirmed above that there is a delimiter so unwrapping is OK.
-            let TokenTree::Delimited(dspan, delim, tokens) = self.parse_token_tree() else { unreachable!() };
-
-            DelimArgs { dspan, delim: MacDelimiter::from_token(delim).unwrap(), tokens }
+            let TokenTree::Delimited(dspan, delim, tokens) = self.parse_token_tree() else {
+                unreachable!()
+            };
+            DelimArgs { dspan, delim, tokens }
         })
     }
 
@@ -1307,12 +1249,11 @@ impl<'a> Parser<'a> {
     }
 
     /// Parses a single token tree from the input.
-    pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
+    pub fn parse_token_tree(&mut self) -> TokenTree {
         match self.token.kind {
             token::OpenDelim(..) => {
                 // Grab the tokens within the delimiters.
-                let tree_cursor = &self.token_cursor.tree_cursor;
-                let stream = tree_cursor.stream.clone();
+                let stream = self.token_cursor.tree_cursor.stream.clone();
                 let (_, delim, span) = *self.token_cursor.stack.last().unwrap();
 
                 // Advance the token cursor through the entire delimited
@@ -1343,15 +1284,6 @@ impl<'a> Parser<'a> {
         }
     }
 
-    /// Parses a stream of tokens into a list of `TokenTree`s, up to EOF.
-    pub fn parse_all_token_trees(&mut self) -> PResult<'a, Vec<TokenTree>> {
-        let mut tts = Vec::new();
-        while self.token != token::Eof {
-            tts.push(self.parse_token_tree());
-        }
-        Ok(tts)
-    }
-
     pub fn parse_tokens(&mut self) -> TokenStream {
         let mut result = Vec::new();
         loop {
@@ -1511,7 +1443,7 @@ impl<'a> Parser<'a> {
     }
 
     pub fn approx_token_stream_pos(&self) -> usize {
-        self.token_cursor.num_next_calls
+        self.num_bump_calls
     }
 }
 
@@ -1537,18 +1469,6 @@ pub(crate) fn make_unclosed_delims_error(
     Some(err)
 }
 
-pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedDelim>, sess: &ParseSess) {
-    let _ = sess.reached_eof.fetch_or(
-        unclosed_delims.iter().any(|unmatched_delim| unmatched_delim.found_delim.is_none()),
-        Ordering::Relaxed,
-    );
-    for unmatched in unclosed_delims.drain(..) {
-        if let Some(mut e) = make_unclosed_delims_error(unmatched, sess) {
-            e.emit();
-        }
-    }
-}
-
 /// A helper struct used when building an `AttrTokenStream` from
 /// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens
 /// are stored as `FlatToken::Token`. A vector of `FlatToken`s
@@ -1571,7 +1491,7 @@ pub enum FlatToken {
 }
 
 #[derive(Debug)]
-pub enum NtOrTt {
+pub enum ParseNtResult {
     Nt(Nonterminal),
     Tt(TokenTree),
 }
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-30 03:59:35 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-30 03:59:35 +0000
commit	d1b2d29528b7794b41e66fc2136e395a02f8529b (patch)
tree	a4a17504b260206dec3cf55b2dca82929a348ac2 /compiler/rustc_parse/src/parser/mod.rs
parent	Releasing progress-linux version 1.72.1+dfsg1-1~progress7.99u1. (diff)
download	rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.tar.xz rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.zip