From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_parse/Cargo.toml | 22 + compiler/rustc_parse/src/lexer/mod.rs | 717 +++++ compiler/rustc_parse/src/lexer/tokentrees.rs | 296 ++ .../src/lexer/unescape_error_reporting.rs | 381 +++ compiler/rustc_parse/src/lexer/unicode_chars.rs | 386 +++ compiler/rustc_parse/src/lib.rs | 291 ++ compiler/rustc_parse/src/parser/attr.rs | 444 +++ compiler/rustc_parse/src/parser/attr_wrapper.rs | 464 +++ compiler/rustc_parse/src/parser/diagnostics.rs | 2740 ++++++++++++++++ compiler/rustc_parse/src/parser/expr.rs | 3288 ++++++++++++++++++++ compiler/rustc_parse/src/parser/generics.rs | 350 +++ compiler/rustc_parse/src/parser/item.rs | 2426 +++++++++++++++ compiler/rustc_parse/src/parser/mod.rs | 1481 +++++++++ compiler/rustc_parse/src/parser/nonterminal.rs | 203 ++ compiler/rustc_parse/src/parser/pat.rs | 1151 +++++++ compiler/rustc_parse/src/parser/path.rs | 754 +++++ compiler/rustc_parse/src/parser/stmt.rs | 648 ++++ compiler/rustc_parse/src/parser/ty.rs | 891 ++++++ compiler/rustc_parse/src/validate_attr.rs | 200 ++ 19 files changed, 17133 insertions(+) create mode 100644 compiler/rustc_parse/Cargo.toml create mode 100644 compiler/rustc_parse/src/lexer/mod.rs create mode 100644 compiler/rustc_parse/src/lexer/tokentrees.rs create mode 100644 compiler/rustc_parse/src/lexer/unescape_error_reporting.rs create mode 100644 compiler/rustc_parse/src/lexer/unicode_chars.rs create mode 100644 compiler/rustc_parse/src/lib.rs create mode 100644 compiler/rustc_parse/src/parser/attr.rs create mode 100644 compiler/rustc_parse/src/parser/attr_wrapper.rs create mode 100644 compiler/rustc_parse/src/parser/diagnostics.rs create mode 100644 compiler/rustc_parse/src/parser/expr.rs create mode 100644 compiler/rustc_parse/src/parser/generics.rs create mode 100644 compiler/rustc_parse/src/parser/item.rs create mode 100644 compiler/rustc_parse/src/parser/mod.rs create mode 100644 compiler/rustc_parse/src/parser/nonterminal.rs create mode 100644 compiler/rustc_parse/src/parser/pat.rs create mode 100644 compiler/rustc_parse/src/parser/path.rs create mode 100644 compiler/rustc_parse/src/parser/stmt.rs create mode 100644 compiler/rustc_parse/src/parser/ty.rs create mode 100644 compiler/rustc_parse/src/validate_attr.rs (limited to 'compiler/rustc_parse') diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml new file mode 100644 index 000000000..c6ca260e9 --- /dev/null +++ b/compiler/rustc_parse/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "rustc_parse" +version = "0.0.0" +edition = "2021" + +[lib] +doctest = false + +[dependencies] +bitflags = "1.0" +tracing = "0.1" +rustc_ast_pretty = { path = "../rustc_ast_pretty" } +rustc_data_structures = { path = "../rustc_data_structures" } +rustc_feature = { path = "../rustc_feature" } +rustc_lexer = { path = "../rustc_lexer" } +rustc_macros = { path = "../rustc_macros" } +rustc_errors = { path = "../rustc_errors" } +rustc_session = { path = "../rustc_session" } +rustc_span = { path = "../rustc_span" } +rustc_ast = { path = "../rustc_ast" } +unicode-normalization = "0.1.11" +unicode-width = "0.1.4" diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs new file mode 100644 index 000000000..848e142e5 --- /dev/null +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -0,0 +1,717 @@ +use crate::lexer::unicode_chars::UNICODE_ARRAY; +use rustc_ast::ast::{self, AttrStyle}; +use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind}; +use rustc_ast::tokenstream::{Spacing, TokenStream}; +use rustc_ast::util::unicode::contains_text_flow_control_chars; +use rustc_errors::{error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult}; +use rustc_lexer::unescape::{self, Mode}; +use rustc_lexer::{Base, DocStyle, RawStrError}; +use rustc_session::lint::builtin::{ + RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT, +}; +use rustc_session::lint::BuiltinLintDiagnostics; +use rustc_session::parse::ParseSess; +use rustc_span::symbol::{sym, Symbol}; +use rustc_span::{edition::Edition, BytePos, Pos, Span}; + +use tracing::debug; + +mod tokentrees; +mod unescape_error_reporting; +mod unicode_chars; + +use unescape_error_reporting::{emit_unescape_error, escaped_char}; + +// This type is used a lot. Make sure it doesn't unintentionally get bigger. +// +// This assertion is in this crate, rather than in `rustc_lexer`, because that +// crate cannot depend on `rustc_data_structures`. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(rustc_lexer::Token, 12); + +#[derive(Clone, Debug)] +pub struct UnmatchedBrace { + pub expected_delim: Delimiter, + pub found_delim: Option, + pub found_span: Span, + pub unclosed_span: Option, + pub candidate_span: Option, +} + +pub(crate) fn parse_token_trees<'a>( + sess: &'a ParseSess, + src: &'a str, + start_pos: BytePos, + override_span: Option, +) -> (PResult<'a, TokenStream>, Vec) { + StringReader { sess, start_pos, pos: start_pos, src, override_span }.into_token_trees() +} + +struct StringReader<'a> { + sess: &'a ParseSess, + /// Initial position, read-only. + start_pos: BytePos, + /// The absolute offset within the source_map of the current character. + pos: BytePos, + /// Source text to tokenize. + src: &'a str, + override_span: Option, +} + +impl<'a> StringReader<'a> { + fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { + self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi)) + } + + /// Returns the next token, and info about preceding whitespace, if any. + fn next_token(&mut self) -> (Spacing, Token) { + let mut spacing = Spacing::Joint; + + // Skip `#!` at the start of the file + if self.pos == self.start_pos + && let Some(shebang_len) = rustc_lexer::strip_shebang(self.src) + { + self.pos = self.pos + BytePos::from_usize(shebang_len); + spacing = Spacing::Alone; + } + + // Skip trivial (whitespace & comments) tokens + loop { + let start_src_index = self.src_index(self.pos); + let text: &str = &self.src[start_src_index..]; + + if text.is_empty() { + let span = self.mk_sp(self.pos, self.pos); + return (spacing, Token::new(token::Eof, span)); + } + + let token = rustc_lexer::first_token(text); + + let start = self.pos; + self.pos = self.pos + BytePos(token.len); + + debug!("next_token: {:?}({:?})", token.kind, self.str_from(start)); + + match self.cook_lexer_token(token.kind, start) { + Some(kind) => { + let span = self.mk_sp(start, self.pos); + return (spacing, Token::new(kind, span)); + } + None => spacing = Spacing::Alone, + } + } + } + + /// Report a fatal lexical error with a given span. + fn fatal_span(&self, sp: Span, m: &str) -> ! { + self.sess.span_diagnostic.span_fatal(sp, m) + } + + /// Report a lexical error with a given span. + fn err_span(&self, sp: Span, m: &str) { + self.sess.span_diagnostic.struct_span_err(sp, m).emit(); + } + + /// Report a fatal error spanning [`from_pos`, `to_pos`). + fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! { + self.fatal_span(self.mk_sp(from_pos, to_pos), m) + } + + /// Report a lexical error spanning [`from_pos`, `to_pos`). + fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) { + self.err_span(self.mk_sp(from_pos, to_pos), m) + } + + fn struct_fatal_span_char( + &self, + from_pos: BytePos, + to_pos: BytePos, + m: &str, + c: char, + ) -> DiagnosticBuilder<'a, !> { + self.sess + .span_diagnostic + .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c))) + } + + fn struct_err_span_char( + &self, + from_pos: BytePos, + to_pos: BytePos, + m: &str, + c: char, + ) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + self.sess + .span_diagnostic + .struct_span_err(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c))) + } + + /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly + /// complain about it. + fn lint_unicode_text_flow(&self, start: BytePos) { + // Opening delimiter of the length 2 is not included into the comment text. + let content_start = start + BytePos(2); + let content = self.str_from(content_start); + if contains_text_flow_control_chars(content) { + let span = self.mk_sp(start, self.pos); + self.sess.buffer_lint_with_diagnostic( + &TEXT_DIRECTION_CODEPOINT_IN_COMMENT, + span, + ast::CRATE_NODE_ID, + "unicode codepoint changing visible direction of text present in comment", + BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()), + ); + } + } + + /// Turns simple `rustc_lexer::TokenKind` enum into a rich + /// `rustc_ast::TokenKind`. This turns strings into interned + /// symbols and runs additional validation. + fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> Option { + Some(match token { + rustc_lexer::TokenKind::LineComment { doc_style } => { + // Skip non-doc comments + let Some(doc_style) = doc_style else { + self.lint_unicode_text_flow(start); + return None; + }; + + // Opening delimiter of the length 3 is not included into the symbol. + let content_start = start + BytePos(3); + let content = self.str_from(content_start); + self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style) + } + rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => { + if !terminated { + self.report_unterminated_block_comment(start, doc_style); + } + + // Skip non-doc comments + let Some(doc_style) = doc_style else { + self.lint_unicode_text_flow(start); + return None; + }; + + // Opening delimiter of the length 3 and closing delimiter of the length 2 + // are not included into the symbol. + let content_start = start + BytePos(3); + let content_end = self.pos - BytePos(if terminated { 2 } else { 0 }); + let content = self.str_from_to(content_start, content_end); + self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) + } + rustc_lexer::TokenKind::Whitespace => return None, + rustc_lexer::TokenKind::Ident + | rustc_lexer::TokenKind::RawIdent + | rustc_lexer::TokenKind::UnknownPrefix => { + let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent; + let is_unknown_prefix = token == rustc_lexer::TokenKind::UnknownPrefix; + let mut ident_start = start; + if is_raw_ident { + ident_start = ident_start + BytePos(2); + } + if is_unknown_prefix { + self.report_unknown_prefix(start); + } + let sym = nfc_normalize(self.str_from(ident_start)); + let span = self.mk_sp(start, self.pos); + self.sess.symbol_gallery.insert(sym, span); + if is_raw_ident { + if !sym.can_be_raw() { + self.err_span(span, &format!("`{}` cannot be a raw identifier", sym)); + } + self.sess.raw_identifier_spans.borrow_mut().push(span); + } + token::Ident(sym, is_raw_ident) + } + rustc_lexer::TokenKind::InvalidIdent + // Do not recover an identifier with emoji if the codepoint is a confusable + // with a recoverable substitution token, like `➖`. + if !UNICODE_ARRAY + .iter() + .any(|&(c, _, _)| { + let sym = self.str_from(start); + sym.chars().count() == 1 && c == sym.chars().next().unwrap() + }) + => + { + let sym = nfc_normalize(self.str_from(start)); + let span = self.mk_sp(start, self.pos); + self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default().push(span); + token::Ident(sym, false) + } + rustc_lexer::TokenKind::Literal { kind, suffix_start } => { + let suffix_start = start + BytePos(suffix_start); + let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); + let suffix = if suffix_start < self.pos { + let string = self.str_from(suffix_start); + if string == "_" { + self.sess + .span_diagnostic + .struct_span_warn( + self.mk_sp(suffix_start, self.pos), + "underscore literal suffix is not allowed", + ) + .warn( + "this was previously accepted by the compiler but is \ + being phased out; it will become a hard error in \ + a future release!", + ) + .note( + "see issue #42326 \ + \ + for more information", + ) + .emit(); + None + } else { + Some(Symbol::intern(string)) + } + } else { + None + }; + token::Literal(token::Lit { kind, symbol, suffix }) + } + rustc_lexer::TokenKind::Lifetime { starts_with_number } => { + // Include the leading `'` in the real identifier, for macro + // expansion purposes. See #12512 for the gory details of why + // this is necessary. + let lifetime_name = self.str_from(start); + if starts_with_number { + self.err_span_(start, self.pos, "lifetimes cannot start with a number"); + } + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident) + } + rustc_lexer::TokenKind::Semi => token::Semi, + rustc_lexer::TokenKind::Comma => token::Comma, + rustc_lexer::TokenKind::Dot => token::Dot, + rustc_lexer::TokenKind::OpenParen => token::OpenDelim(Delimiter::Parenthesis), + rustc_lexer::TokenKind::CloseParen => token::CloseDelim(Delimiter::Parenthesis), + rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(Delimiter::Brace), + rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(Delimiter::Brace), + rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(Delimiter::Bracket), + rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(Delimiter::Bracket), + rustc_lexer::TokenKind::At => token::At, + rustc_lexer::TokenKind::Pound => token::Pound, + rustc_lexer::TokenKind::Tilde => token::Tilde, + rustc_lexer::TokenKind::Question => token::Question, + rustc_lexer::TokenKind::Colon => token::Colon, + rustc_lexer::TokenKind::Dollar => token::Dollar, + rustc_lexer::TokenKind::Eq => token::Eq, + rustc_lexer::TokenKind::Bang => token::Not, + rustc_lexer::TokenKind::Lt => token::Lt, + rustc_lexer::TokenKind::Gt => token::Gt, + rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus), + rustc_lexer::TokenKind::And => token::BinOp(token::And), + rustc_lexer::TokenKind::Or => token::BinOp(token::Or), + rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus), + rustc_lexer::TokenKind::Star => token::BinOp(token::Star), + rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash), + rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret), + rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), + + rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => { + let c = self.str_from(start).chars().next().unwrap(); + let mut err = + self.struct_err_span_char(start, self.pos, "unknown start of token", c); + // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, + // instead of keeping a table in `check_for_substitution`into the token. Ideally, + // this should be inside `rustc_lexer`. However, we should first remove compound + // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it, + // as there will be less overall work to do this way. + let token = unicode_chars::check_for_substitution(self, start, c, &mut err); + if c == '\x00' { + err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used"); + } + err.emit(); + token? + } + }) + } + + fn cook_doc_comment( + &self, + content_start: BytePos, + content: &str, + comment_kind: CommentKind, + doc_style: DocStyle, + ) -> TokenKind { + if content.contains('\r') { + for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') { + self.err_span_( + content_start + BytePos(idx as u32), + content_start + BytePos(idx as u32 + 1), + match comment_kind { + CommentKind::Line => "bare CR not allowed in doc-comment", + CommentKind::Block => "bare CR not allowed in block doc-comment", + }, + ); + } + } + + let attr_style = match doc_style { + DocStyle::Outer => AttrStyle::Outer, + DocStyle::Inner => AttrStyle::Inner, + }; + + token::DocComment(comment_kind, attr_style, Symbol::intern(content)) + } + + fn cook_lexer_literal( + &self, + start: BytePos, + suffix_start: BytePos, + kind: rustc_lexer::LiteralKind, + ) -> (token::LitKind, Symbol) { + // prefix means `"` or `br"` or `r###"`, ... + let (lit_kind, mode, prefix_len, postfix_len) = match kind { + rustc_lexer::LiteralKind::Char { terminated } => { + if !terminated { + self.sess.span_diagnostic.span_fatal_with_code( + self.mk_sp(start, suffix_start), + "unterminated character literal", + error_code!(E0762), + ) + } + (token::Char, Mode::Char, 1, 1) // ' ' + } + rustc_lexer::LiteralKind::Byte { terminated } => { + if !terminated { + self.sess.span_diagnostic.span_fatal_with_code( + self.mk_sp(start + BytePos(1), suffix_start), + "unterminated byte constant", + error_code!(E0763), + ) + } + (token::Byte, Mode::Byte, 2, 1) // b' ' + } + rustc_lexer::LiteralKind::Str { terminated } => { + if !terminated { + self.sess.span_diagnostic.span_fatal_with_code( + self.mk_sp(start, suffix_start), + "unterminated double quote string", + error_code!(E0765), + ) + } + (token::Str, Mode::Str, 1, 1) // " " + } + rustc_lexer::LiteralKind::ByteStr { terminated } => { + if !terminated { + self.sess.span_diagnostic.span_fatal_with_code( + self.mk_sp(start + BytePos(1), suffix_start), + "unterminated double quote byte string", + error_code!(E0766), + ) + } + (token::ByteStr, Mode::ByteStr, 2, 1) // b" " + } + rustc_lexer::LiteralKind::RawStr { n_hashes } => { + if let Some(n_hashes) = n_hashes { + let n = u32::from(n_hashes); + (token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "## + } else { + self.report_raw_str_error(start, 1); + } + } + rustc_lexer::LiteralKind::RawByteStr { n_hashes } => { + if let Some(n_hashes) = n_hashes { + let n = u32::from(n_hashes); + (token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "## + } else { + self.report_raw_str_error(start, 2); + } + } + rustc_lexer::LiteralKind::Int { base, empty_int } => { + return if empty_int { + self.sess + .span_diagnostic + .struct_span_err_with_code( + self.mk_sp(start, suffix_start), + "no valid digits found for number", + error_code!(E0768), + ) + .emit(); + (token::Integer, sym::integer(0)) + } else { + self.validate_int_literal(base, start, suffix_start); + (token::Integer, self.symbol_from_to(start, suffix_start)) + }; + } + rustc_lexer::LiteralKind::Float { base, empty_exponent } => { + if empty_exponent { + self.err_span_(start, self.pos, "expected at least one digit in exponent"); + } + + match base { + Base::Hexadecimal => self.err_span_( + start, + suffix_start, + "hexadecimal float literal is not supported", + ), + Base::Octal => { + self.err_span_(start, suffix_start, "octal float literal is not supported") + } + Base::Binary => { + self.err_span_(start, suffix_start, "binary float literal is not supported") + } + _ => (), + } + + let id = self.symbol_from_to(start, suffix_start); + return (token::Float, id); + } + }; + let content_start = start + BytePos(prefix_len); + let content_end = suffix_start - BytePos(postfix_len); + let id = self.symbol_from_to(content_start, content_end); + self.validate_literal_escape(mode, content_start, content_end, prefix_len, postfix_len); + (lit_kind, id) + } + + #[inline] + fn src_index(&self, pos: BytePos) -> usize { + (pos - self.start_pos).to_usize() + } + + /// Slice of the source text from `start` up to but excluding `self.pos`, + /// meaning the slice does not include the character `self.ch`. + fn str_from(&self, start: BytePos) -> &str { + self.str_from_to(start, self.pos) + } + + /// As symbol_from, with an explicit endpoint. + fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol { + debug!("taking an ident from {:?} to {:?}", start, end); + Symbol::intern(self.str_from_to(start, end)) + } + + /// Slice of the source text spanning from `start` up to but excluding `end`. + fn str_from_to(&self, start: BytePos, end: BytePos) -> &str { + &self.src[self.src_index(start)..self.src_index(end)] + } + + fn report_raw_str_error(&self, start: BytePos, prefix_len: u32) -> ! { + match rustc_lexer::validate_raw_str(self.str_from(start), prefix_len) { + Err(RawStrError::InvalidStarter { bad_char }) => { + self.report_non_started_raw_string(start, bad_char) + } + Err(RawStrError::NoTerminator { expected, found, possible_terminator_offset }) => self + .report_unterminated_raw_string(start, expected, possible_terminator_offset, found), + Err(RawStrError::TooManyDelimiters { found }) => { + self.report_too_many_hashes(start, found) + } + Ok(()) => panic!("no error found for supposedly invalid raw string literal"), + } + } + + fn report_non_started_raw_string(&self, start: BytePos, bad_char: char) -> ! { + self.struct_fatal_span_char( + start, + self.pos, + "found invalid character; only `#` is allowed in raw string delimitation", + bad_char, + ) + .emit() + } + + fn report_unterminated_raw_string( + &self, + start: BytePos, + n_hashes: u32, + possible_offset: Option, + found_terminators: u32, + ) -> ! { + let mut err = self.sess.span_diagnostic.struct_span_fatal_with_code( + self.mk_sp(start, start), + "unterminated raw string", + error_code!(E0748), + ); + + err.span_label(self.mk_sp(start, start), "unterminated raw string"); + + if n_hashes > 0 { + err.note(&format!( + "this raw string should be terminated with `\"{}`", + "#".repeat(n_hashes as usize) + )); + } + + if let Some(possible_offset) = possible_offset { + let lo = start + BytePos(possible_offset as u32); + let hi = lo + BytePos(found_terminators as u32); + let span = self.mk_sp(lo, hi); + err.span_suggestion( + span, + "consider terminating the string here", + "#".repeat(n_hashes as usize), + Applicability::MaybeIncorrect, + ); + } + + err.emit() + } + + fn report_unterminated_block_comment(&self, start: BytePos, doc_style: Option) { + let msg = match doc_style { + Some(_) => "unterminated block doc-comment", + None => "unterminated block comment", + }; + let last_bpos = self.pos; + let mut err = self.sess.span_diagnostic.struct_span_fatal_with_code( + self.mk_sp(start, last_bpos), + msg, + error_code!(E0758), + ); + let mut nested_block_comment_open_idxs = vec![]; + let mut last_nested_block_comment_idxs = None; + let mut content_chars = self.str_from(start).char_indices().peekable(); + + while let Some((idx, current_char)) = content_chars.next() { + match content_chars.peek() { + Some((_, '*')) if current_char == '/' => { + nested_block_comment_open_idxs.push(idx); + } + Some((_, '/')) if current_char == '*' => { + last_nested_block_comment_idxs = + nested_block_comment_open_idxs.pop().map(|open_idx| (open_idx, idx)); + } + _ => {} + }; + } + + if let Some((nested_open_idx, nested_close_idx)) = last_nested_block_comment_idxs { + err.span_label(self.mk_sp(start, start + BytePos(2)), msg) + .span_label( + self.mk_sp( + start + BytePos(nested_open_idx as u32), + start + BytePos(nested_open_idx as u32 + 2), + ), + "...as last nested comment starts here, maybe you want to close this instead?", + ) + .span_label( + self.mk_sp( + start + BytePos(nested_close_idx as u32), + start + BytePos(nested_close_idx as u32 + 2), + ), + "...and last nested comment terminates here.", + ); + } + + err.emit(); + } + + // RFC 3101 introduced the idea of (reserved) prefixes. As of Rust 2021, + // using a (unknown) prefix is an error. In earlier editions, however, they + // only result in a (allowed by default) lint, and are treated as regular + // identifier tokens. + fn report_unknown_prefix(&self, start: BytePos) { + let prefix_span = self.mk_sp(start, self.pos); + let prefix_str = self.str_from_to(start, self.pos); + let msg = format!("prefix `{}` is unknown", prefix_str); + + let expn_data = prefix_span.ctxt().outer_expn_data(); + + if expn_data.edition >= Edition::Edition2021 { + // In Rust 2021, this is a hard error. + let mut err = self.sess.span_diagnostic.struct_span_err(prefix_span, &msg); + err.span_label(prefix_span, "unknown prefix"); + if prefix_str == "rb" { + err.span_suggestion_verbose( + prefix_span, + "use `br` for a raw byte string", + "br", + Applicability::MaybeIncorrect, + ); + } else if expn_data.is_root() { + err.span_suggestion_verbose( + prefix_span.shrink_to_hi(), + "consider inserting whitespace here", + " ", + Applicability::MaybeIncorrect, + ); + } + err.note("prefixed identifiers and literals are reserved since Rust 2021"); + err.emit(); + } else { + // Before Rust 2021, only emit a lint for migration. + self.sess.buffer_lint_with_diagnostic( + &RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, + prefix_span, + ast::CRATE_NODE_ID, + &msg, + BuiltinLintDiagnostics::ReservedPrefix(prefix_span), + ); + } + } + + fn report_too_many_hashes(&self, start: BytePos, found: u32) -> ! { + self.fatal_span_( + start, + self.pos, + &format!( + "too many `#` symbols: raw strings may be delimited \ + by up to 255 `#` symbols, but found {}", + found + ), + ) + } + + fn validate_literal_escape( + &self, + mode: Mode, + content_start: BytePos, + content_end: BytePos, + prefix_len: u32, + postfix_len: u32, + ) { + let lit_content = self.str_from_to(content_start, content_end); + unescape::unescape_literal(lit_content, mode, &mut |range, result| { + // Here we only check for errors. The actual unescaping is done later. + if let Err(err) = result { + let span_with_quotes = self + .mk_sp(content_start - BytePos(prefix_len), content_end + BytePos(postfix_len)); + let (start, end) = (range.start as u32, range.end as u32); + let lo = content_start + BytePos(start); + let hi = lo + BytePos(end - start); + let span = self.mk_sp(lo, hi); + emit_unescape_error( + &self.sess.span_diagnostic, + lit_content, + span_with_quotes, + span, + mode, + range, + err, + ); + } + }); + } + + fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { + let base = match base { + Base::Binary => 2, + Base::Octal => 8, + _ => return, + }; + let s = self.str_from_to(content_start + BytePos(2), content_end); + for (idx, c) in s.char_indices() { + let idx = idx as u32; + if c != '_' && c.to_digit(base).is_none() { + let lo = content_start + BytePos(2 + idx); + let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32); + self.err_span_(lo, hi, &format!("invalid digit for a base {} literal", base)); + } + } + } +} + +pub fn nfc_normalize(string: &str) -> Symbol { + use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization}; + match is_nfc_quick(string.chars()) { + IsNormalized::Yes => Symbol::intern(string), + _ => { + let normalized_str: String = string.chars().nfc().collect(); + Symbol::intern(&normalized_str) + } + } +} diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs new file mode 100644 index 000000000..aa70912dc --- /dev/null +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -0,0 +1,296 @@ +use super::{StringReader, UnmatchedBrace}; + +use rustc_ast::token::{self, Delimiter, Token}; +use rustc_ast::tokenstream::{DelimSpan, Spacing, TokenStream, TokenTree}; +use rustc_ast_pretty::pprust::token_to_string; +use rustc_data_structures::fx::FxHashMap; +use rustc_errors::PResult; +use rustc_span::Span; + +impl<'a> StringReader<'a> { + pub(super) fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec) { + let mut tt_reader = TokenTreesReader { + string_reader: self, + token: Token::dummy(), + open_braces: Vec::new(), + unmatched_braces: Vec::new(), + matching_delim_spans: Vec::new(), + last_unclosed_found_span: None, + last_delim_empty_block_spans: FxHashMap::default(), + matching_block_spans: Vec::new(), + }; + let res = tt_reader.parse_all_token_trees(); + (res, tt_reader.unmatched_braces) + } +} + +struct TokenTreesReader<'a> { + string_reader: StringReader<'a>, + token: Token, + /// Stack of open delimiters and their spans. Used for error message. + open_braces: Vec<(Delimiter, Span)>, + unmatched_braces: Vec, + /// The type and spans for all braces + /// + /// Used only for error recovery when arriving to EOF with mismatched braces. + matching_delim_spans: Vec<(Delimiter, Span, Span)>, + last_unclosed_found_span: Option, + /// Collect empty block spans that might have been auto-inserted by editors. + last_delim_empty_block_spans: FxHashMap, + /// Collect the spans of braces (Open, Close). Used only + /// for detecting if blocks are empty and only braces. + matching_block_spans: Vec<(Span, Span)>, +} + +impl<'a> TokenTreesReader<'a> { + // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`. + fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> { + let mut buf = TokenStreamBuilder::default(); + + self.bump(); + while self.token != token::Eof { + buf.push(self.parse_token_tree()?); + } + + Ok(buf.into_token_stream()) + } + + // Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`. + fn parse_token_trees_until_close_delim(&mut self) -> TokenStream { + let mut buf = TokenStreamBuilder::default(); + loop { + if let token::CloseDelim(..) = self.token.kind { + return buf.into_token_stream(); + } + + match self.parse_token_tree() { + Ok(tree) => buf.push(tree), + Err(mut e) => { + e.emit(); + return buf.into_token_stream(); + } + } + } + } + + fn parse_token_tree(&mut self) -> PResult<'a, TokenTree> { + let sm = self.string_reader.sess.source_map(); + + match self.token.kind { + token::Eof => { + let msg = "this file contains an unclosed delimiter"; + let mut err = + self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg); + for &(_, sp) in &self.open_braces { + err.span_label(sp, "unclosed delimiter"); + self.unmatched_braces.push(UnmatchedBrace { + expected_delim: Delimiter::Brace, + found_delim: None, + found_span: self.token.span, + unclosed_span: Some(sp), + candidate_span: None, + }); + } + + if let Some((delim, _)) = self.open_braces.last() { + if let Some((_, open_sp, close_sp)) = + self.matching_delim_spans.iter().find(|(d, open_sp, close_sp)| { + if let Some(close_padding) = sm.span_to_margin(*close_sp) { + if let Some(open_padding) = sm.span_to_margin(*open_sp) { + return delim == d && close_padding != open_padding; + } + } + false + }) + // these are in reverse order as they get inserted on close, but + { + // we want the last open/first close + err.span_label(*open_sp, "this delimiter might not be properly closed..."); + err.span_label( + *close_sp, + "...as it matches this but it has different indentation", + ); + } + } + Err(err) + } + token::OpenDelim(delim) => { + // The span for beginning of the delimited section + let pre_span = self.token.span; + + // Parse the open delimiter. + self.open_braces.push((delim, self.token.span)); + self.bump(); + + // Parse the token trees within the delimiters. + // We stop at any delimiter so we can try to recover if the user + // uses an incorrect delimiter. + let tts = self.parse_token_trees_until_close_delim(); + + // Expand to cover the entire delimited token tree + let delim_span = DelimSpan::from_pair(pre_span, self.token.span); + + match self.token.kind { + // Correct delimiter. + token::CloseDelim(d) if d == delim => { + let (open_brace, open_brace_span) = self.open_braces.pop().unwrap(); + let close_brace_span = self.token.span; + + if tts.is_empty() { + let empty_block_span = open_brace_span.to(close_brace_span); + if !sm.is_multiline(empty_block_span) { + // Only track if the block is in the form of `{}`, otherwise it is + // likely that it was written on purpose. + self.last_delim_empty_block_spans.insert(delim, empty_block_span); + } + } + + //only add braces + if let (Delimiter::Brace, Delimiter::Brace) = (open_brace, delim) { + self.matching_block_spans.push((open_brace_span, close_brace_span)); + } + + if self.open_braces.is_empty() { + // Clear up these spans to avoid suggesting them as we've found + // properly matched delimiters so far for an entire block. + self.matching_delim_spans.clear(); + } else { + self.matching_delim_spans.push(( + open_brace, + open_brace_span, + close_brace_span, + )); + } + // Parse the closing delimiter. + self.bump(); + } + // Incorrect delimiter. + token::CloseDelim(other) => { + let mut unclosed_delimiter = None; + let mut candidate = None; + + if self.last_unclosed_found_span != Some(self.token.span) { + // do not complain about the same unclosed delimiter multiple times + self.last_unclosed_found_span = Some(self.token.span); + // This is a conservative error: only report the last unclosed + // delimiter. The previous unclosed delimiters could actually be + // closed! The parser just hasn't gotten to them yet. + if let Some(&(_, sp)) = self.open_braces.last() { + unclosed_delimiter = Some(sp); + }; + if let Some(current_padding) = sm.span_to_margin(self.token.span) { + for (brace, brace_span) in &self.open_braces { + if let Some(padding) = sm.span_to_margin(*brace_span) { + // high likelihood of these two corresponding + if current_padding == padding && brace == &other { + candidate = Some(*brace_span); + } + } + } + } + let (tok, _) = self.open_braces.pop().unwrap(); + self.unmatched_braces.push(UnmatchedBrace { + expected_delim: tok, + found_delim: Some(other), + found_span: self.token.span, + unclosed_span: unclosed_delimiter, + candidate_span: candidate, + }); + } else { + self.open_braces.pop(); + } + + // If the incorrect delimiter matches an earlier opening + // delimiter, then don't consume it (it can be used to + // close the earlier one). Otherwise, consume it. + // E.g., we try to recover from: + // fn foo() { + // bar(baz( + // } // Incorrect delimiter but matches the earlier `{` + if !self.open_braces.iter().any(|&(b, _)| b == other) { + self.bump(); + } + } + token::Eof => { + // Silently recover, the EOF token will be seen again + // and an error emitted then. Thus we don't pop from + // self.open_braces here. + } + _ => {} + } + + Ok(TokenTree::Delimited(delim_span, delim, tts)) + } + token::CloseDelim(delim) => { + // An unexpected closing delimiter (i.e., there is no + // matching opening delimiter). + let token_str = token_to_string(&self.token); + let msg = format!("unexpected closing delimiter: `{}`", token_str); + let mut err = + self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg); + + // Braces are added at the end, so the last element is the biggest block + if let Some(parent) = self.matching_block_spans.last() { + if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) { + // Check if the (empty block) is in the last properly closed block + if (parent.0.to(parent.1)).contains(span) { + err.span_label( + span, + "block is empty, you might have not meant to close it", + ); + } else { + err.span_label(parent.0, "this opening brace..."); + + err.span_label(parent.1, "...matches this closing brace"); + } + } else { + err.span_label(parent.0, "this opening brace..."); + + err.span_label(parent.1, "...matches this closing brace"); + } + } + + err.span_label(self.token.span, "unexpected closing delimiter"); + Err(err) + } + _ => { + let tok = self.token.take(); + let mut spacing = self.bump(); + if !self.token.is_op() { + spacing = Spacing::Alone; + } + Ok(TokenTree::Token(tok, spacing)) + } + } + } + + fn bump(&mut self) -> Spacing { + let (spacing, token) = self.string_reader.next_token(); + self.token = token; + spacing + } +} + +#[derive(Default)] +struct TokenStreamBuilder { + buf: Vec, +} + +impl TokenStreamBuilder { + #[inline(always)] + fn push(&mut self, tree: TokenTree) { + if let Some(TokenTree::Token(prev_token, Spacing::Joint)) = self.buf.last() + && let TokenTree::Token(token, joint) = &tree + && let Some(glued) = prev_token.glue(token) + { + self.buf.pop(); + self.buf.push(TokenTree::Token(glued, *joint)); + } else { + self.buf.push(tree) + } + } + + fn into_token_stream(self) -> TokenStream { + TokenStream::new(self.buf) + } +} diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs new file mode 100644 index 000000000..273827864 --- /dev/null +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -0,0 +1,381 @@ +//! Utilities for rendering escape sequence errors as diagnostics. + +use std::iter::once; +use std::ops::Range; + +use rustc_errors::{pluralize, Applicability, Handler}; +use rustc_lexer::unescape::{EscapeError, Mode}; +use rustc_span::{BytePos, Span}; + +pub(crate) fn emit_unescape_error( + handler: &Handler, + // interior part of the literal, without quotes + lit: &str, + // full span of the literal, including quotes + span_with_quotes: Span, + // interior span of the literal, without quotes + span: Span, + mode: Mode, + // range of the error inside `lit` + range: Range, + error: EscapeError, +) { + tracing::debug!( + "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}", + lit, + span_with_quotes, + mode, + range, + error + ); + let last_char = || { + let c = lit[range.clone()].chars().rev().next().unwrap(); + let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32)); + (c, span) + }; + match error { + EscapeError::LoneSurrogateUnicodeEscape => { + handler + .struct_span_err(span, "invalid unicode character escape") + .span_label(span, "invalid escape") + .help("unicode escape must not be a surrogate") + .emit(); + } + EscapeError::OutOfRangeUnicodeEscape => { + handler + .struct_span_err(span, "invalid unicode character escape") + .span_label(span, "invalid escape") + .help("unicode escape must be at most 10FFFF") + .emit(); + } + EscapeError::MoreThanOneChar => { + use unicode_normalization::{char::is_combining_mark, UnicodeNormalization}; + + let mut has_help = false; + let mut handler = handler.struct_span_err( + span_with_quotes, + "character literal may only contain one codepoint", + ); + + if lit.chars().skip(1).all(|c| is_combining_mark(c)) { + let escaped_marks = + lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::>(); + handler.span_note( + span, + &format!( + "this `{}` is followed by the combining mark{} `{}`", + lit.chars().next().unwrap(), + pluralize!(escaped_marks.len()), + escaped_marks.join(""), + ), + ); + let normalized = lit.nfc().to_string(); + if normalized.chars().count() == 1 { + has_help = true; + handler.span_suggestion( + span, + &format!( + "consider using the normalized form `{}` of this character", + normalized.chars().next().unwrap().escape_default() + ), + normalized, + Applicability::MachineApplicable, + ); + } + } else { + let printable: Vec = lit + .chars() + .filter(|&x| { + unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0 + && !x.is_whitespace() + }) + .collect(); + + if let [ch] = printable.as_slice() { + has_help = true; + + handler.span_note( + span, + &format!( + "there are non-printing characters, the full sequence is `{}`", + lit.escape_default(), + ), + ); + + handler.span_suggestion( + span, + "consider removing the non-printing characters", + ch, + Applicability::MaybeIncorrect, + ); + } + } + + if !has_help { + let (prefix, msg) = if mode.is_bytes() { + ("b", "if you meant to write a byte string literal, use double quotes") + } else { + ("", "if you meant to write a `str` literal, use double quotes") + }; + + handler.span_suggestion( + span_with_quotes, + msg, + format!("{}\"{}\"", prefix, lit), + Applicability::MachineApplicable, + ); + } + + handler.emit(); + } + EscapeError::EscapeOnlyChar => { + let (c, char_span) = last_char(); + + let msg = if mode.is_bytes() { + "byte constant must be escaped" + } else { + "character constant must be escaped" + }; + handler + .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c))) + .span_suggestion( + char_span, + "escape the character", + c.escape_default(), + Applicability::MachineApplicable, + ) + .emit(); + } + EscapeError::BareCarriageReturn => { + let msg = if mode.in_double_quotes() { + "bare CR not allowed in string, use `\\r` instead" + } else { + "character constant must be escaped: `\\r`" + }; + handler + .struct_span_err(span, msg) + .span_suggestion( + span, + "escape the character", + "\\r", + Applicability::MachineApplicable, + ) + .emit(); + } + EscapeError::BareCarriageReturnInRawString => { + assert!(mode.in_double_quotes()); + let msg = "bare CR not allowed in raw string"; + handler.span_err(span, msg); + } + EscapeError::InvalidEscape => { + let (c, span) = last_char(); + + let label = + if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" }; + let ec = escaped_char(c); + let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec)); + diag.span_label(span, label); + if c == '{' || c == '}' && !mode.is_bytes() { + diag.help( + "if used in a formatting string, curly braces are escaped with `{{` and `}}`", + ); + } else if c == '\r' { + diag.help( + "this is an isolated carriage return; consider checking your editor and \ + version control settings", + ); + } else { + if !mode.is_bytes() { + diag.span_suggestion( + span_with_quotes, + "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal", + format!("r\"{}\"", lit), + Applicability::MaybeIncorrect, + ); + } + + diag.help( + "for more information, visit \ + ", + ); + } + diag.emit(); + } + EscapeError::TooShortHexEscape => { + handler.span_err(span, "numeric character escape is too short"); + } + EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => { + let (c, span) = last_char(); + + let msg = if error == EscapeError::InvalidCharInHexEscape { + "invalid character in numeric character escape" + } else { + "invalid character in unicode escape" + }; + let c = escaped_char(c); + + handler + .struct_span_err(span, &format!("{}: `{}`", msg, c)) + .span_label(span, msg) + .emit(); + } + EscapeError::NonAsciiCharInByte => { + assert!(mode.is_bytes()); + let (c, span) = last_char(); + let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant"); + let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { + format!(" but is {:?}", c) + } else { + String::new() + }; + err.span_label(span, &format!("byte constant must be ASCII{}", postfix)); + if (c as u32) <= 0xFF { + err.span_suggestion( + span, + &format!( + "if you meant to use the unicode code point for {:?}, use a \\xHH escape", + c + ), + format!("\\x{:X}", c as u32), + Applicability::MaybeIncorrect, + ); + } else if matches!(mode, Mode::Byte) { + err.span_label(span, "this multibyte character does not fit into a single byte"); + } else if matches!(mode, Mode::ByteStr) { + let mut utf8 = String::new(); + utf8.push(c); + err.span_suggestion( + span, + &format!( + "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes", + c + ), + utf8.as_bytes() + .iter() + .map(|b: &u8| format!("\\x{:X}", *b)) + .fold("".to_string(), |a, c| a + &c), + Applicability::MaybeIncorrect, + ); + } + err.emit(); + } + EscapeError::NonAsciiCharInByteString => { + assert!(mode.is_bytes()); + let (c, span) = last_char(); + let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { + format!(" but is {:?}", c) + } else { + String::new() + }; + handler + .struct_span_err(span, "raw byte string must be ASCII") + .span_label(span, &format!("must be ASCII{}", postfix)) + .emit(); + } + EscapeError::OutOfRangeHexEscape => { + handler + .struct_span_err(span, "out of range hex escape") + .span_label(span, "must be a character in the range [\\x00-\\x7f]") + .emit(); + } + EscapeError::LeadingUnderscoreUnicodeEscape => { + let (c, span) = last_char(); + let msg = "invalid start of unicode escape"; + handler + .struct_span_err(span, &format!("{}: `{}`", msg, c)) + .span_label(span, msg) + .emit(); + } + EscapeError::OverlongUnicodeEscape => { + handler + .struct_span_err(span, "overlong unicode escape") + .span_label(span, "must have at most 6 hex digits") + .emit(); + } + EscapeError::UnclosedUnicodeEscape => { + handler + .struct_span_err(span, "unterminated unicode escape") + .span_label(span, "missing a closing `}`") + .span_suggestion_verbose( + span.shrink_to_hi(), + "terminate the unicode escape", + "}", + Applicability::MaybeIncorrect, + ) + .emit(); + } + EscapeError::NoBraceInUnicodeEscape => { + let msg = "incorrect unicode escape sequence"; + let mut diag = handler.struct_span_err(span, msg); + + let mut suggestion = "\\u{".to_owned(); + let mut suggestion_len = 0; + let (c, char_span) = last_char(); + let chars = once(c).chain(lit[range.end..].chars()); + for c in chars.take(6).take_while(|c| c.is_digit(16)) { + suggestion.push(c); + suggestion_len += c.len_utf8(); + } + + if suggestion_len > 0 { + suggestion.push('}'); + let hi = char_span.lo() + BytePos(suggestion_len as u32); + diag.span_suggestion( + span.with_hi(hi), + "format of unicode escape sequences uses braces", + suggestion, + Applicability::MaybeIncorrect, + ); + } else { + diag.span_label(span, msg); + diag.help("format of unicode escape sequences is `\\u{...}`"); + } + + diag.emit(); + } + EscapeError::UnicodeEscapeInByte => { + let msg = "unicode escape in byte string"; + handler + .struct_span_err(span, msg) + .span_label(span, msg) + .help("unicode escape sequences cannot be used as a byte or in a byte string") + .emit(); + } + EscapeError::EmptyUnicodeEscape => { + handler + .struct_span_err(span, "empty unicode escape") + .span_label(span, "this escape must have at least 1 hex digit") + .emit(); + } + EscapeError::ZeroChars => { + let msg = "empty character literal"; + handler.struct_span_err(span, msg).span_label(span, msg).emit(); + } + EscapeError::LoneSlash => { + let msg = "invalid trailing slash in literal"; + handler.struct_span_err(span, msg).span_label(span, msg).emit(); + } + EscapeError::UnskippedWhitespaceWarning => { + let (c, char_span) = last_char(); + let msg = + format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode()); + handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit(); + } + EscapeError::MultipleSkippedLinesWarning => { + let msg = "multiple lines skipped by escaped newline"; + let bottom_msg = "skipping everything up to and including this point"; + handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit(); + } + } +} + +/// Pushes a character to a message string for error reporting +pub(crate) fn escaped_char(c: char) -> String { + match c { + '\u{20}'..='\u{7e}' => { + // Don't escape \, ' or " for user-facing messages + c.to_string() + } + _ => c.escape_default().to_string(), + } +} diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs new file mode 100644 index 000000000..2c68cc589 --- /dev/null +++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs @@ -0,0 +1,386 @@ +// Characters and their corresponding confusables were collected from +// https://www.unicode.org/Public/security/10.0.0/confusables.txt + +use super::StringReader; +use crate::token::{self, Delimiter}; +use rustc_errors::{Applicability, Diagnostic}; +use rustc_span::{symbol::kw, BytePos, Pos, Span}; + +#[rustfmt::skip] // for line breaks +pub(crate) const UNICODE_ARRAY: &[(char, &str, char)] = &[ + ('
', "Line Separator", ' '), + ('
', "Paragraph Separator", ' '), + (' ', "Ogham Space mark", ' '), + (' ', "En Quad", ' '), + (' ', "Em Quad", ' '), + (' ', "En Space", ' '), + (' ', "Em Space", ' '), + (' ', "Three-Per-Em Space", ' '), + (' ', "Four-Per-Em Space", ' '), + (' ', "Six-Per-Em Space", ' '), + (' ', "Punctuation Space", ' '), + (' ', "Thin Space", ' '), + (' ', "Hair Space", ' '), + (' ', "Medium Mathematical Space", ' '), + (' ', "No-Break Space", ' '), + (' ', "Figure Space", ' '), + (' ', "Narrow No-Break Space", ' '), + (' ', "Ideographic Space", ' '), + + ('ߺ', "Nko Lajanyalan", '_'), + ('﹍', "Dashed Low Line", '_'), + ('﹎', "Centreline Low Line", '_'), + ('﹏', "Wavy Low Line", '_'), + ('_', "Fullwidth Low Line", '_'), + + ('‐', "Hyphen", '-'), + ('‑', "Non-Breaking Hyphen", '-'), + ('‒', "Figure Dash", '-'), + ('–', "En Dash", '-'), + ('—', "Em Dash", '-'), + ('﹘', "Small Em Dash", '-'), + ('۔', "Arabic Full Stop", '-'), + ('⁃', "Hyphen Bullet", '-'), + ('˗', "Modifier Letter Minus Sign", '-'), + ('−', "Minus Sign", '-'), + ('➖', "Heavy Minus Sign", '-'), + ('Ⲻ', "Coptic Letter Dialect-P Ni", '-'), + ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'), + ('-', "Fullwidth Hyphen-Minus", '-'), + ('―', "Horizontal Bar", '-'), + ('─', "Box Drawings Light Horizontal", '-'), + ('━', "Box Drawings Heavy Horizontal", '-'), + ('㇐', "CJK Stroke H", '-'), + ('ꟷ', "Latin Epigraphic Letter Sideways I", '-'), + ('ᅳ', "Hangul Jungseong Eu", '-'), + ('ㅡ', "Hangul Letter Eu", '-'), + ('一', "CJK Unified Ideograph-4E00", '-'), + ('⼀', "Kangxi Radical One", '-'), + + ('؍', "Arabic Date Separator", ','), + ('٫', "Arabic Decimal Separator", ','), + ('‚', "Single Low-9 Quotation Mark", ','), + ('¸', "Cedilla", ','), + ('ꓹ', "Lisu Letter Tone Na Po", ','), + (',', "Fullwidth Comma", ','), + + (';', "Greek Question Mark", ';'), + (';', "Fullwidth Semicolon", ';'), + ('︔', "Presentation Form For Vertical Semicolon", ';'), + + ('ः', "Devanagari Sign Visarga", ':'), + ('ઃ', "Gujarati Sign Visarga", ':'), + (':', "Fullwidth Colon", ':'), + ('։', "Armenian Full Stop", ':'), + ('܃', "Syriac Supralinear Colon", ':'), + ('܄', "Syriac Sublinear Colon", ':'), + ('᛬', "Runic Multiple Punctuation", ':'), + ('︰', "Presentation Form For Vertical Two Dot Leader", ':'), + ('᠃', "Mongolian Full Stop", ':'), + ('᠉', "Mongolian Manchu Full Stop", ':'), + ('⁚', "Two Dot Punctuation", ':'), + ('׃', "Hebrew Punctuation Sof Pasuq", ':'), + ('˸', "Modifier Letter Raised Colon", ':'), + ('꞉', "Modifier Letter Colon", ':'), + ('∶', "Ratio", ':'), + ('ː', "Modifier Letter Triangular Colon", ':'), + ('ꓽ', "Lisu Letter Tone Mya Jeu", ':'), + ('︓', "Presentation Form For Vertical Colon", ':'), + + ('!', "Fullwidth Exclamation Mark", '!'), + ('ǃ', "Latin Letter Retroflex Click", '!'), + ('ⵑ', "Tifinagh Letter Tuareg Yang", '!'), + ('︕', "Presentation Form For Vertical Exclamation Mark", '!'), + + ('ʔ', "Latin Letter Glottal Stop", '?'), + ('Ɂ', "Latin Capital Letter Glottal Stop", '?'), + ('ॽ', "Devanagari Letter Glottal Stop", '?'), + ('Ꭾ', "Cherokee Letter He", '?'), + ('ꛫ', "Bamum Letter Ntuu", '?'), + ('?', "Fullwidth Question Mark", '?'), + ('︖', "Presentation Form For Vertical Question Mark", '?'), + + ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'), + ('․', "One Dot Leader", '.'), + ('܁', "Syriac Supralinear Full Stop", '.'), + ('܂', "Syriac Sublinear Full Stop", '.'), + ('꘎', "Vai Full Stop", '.'), + ('𐩐', "Kharoshthi Punctuation Dot", '.'), + ('٠', "Arabic-Indic Digit Zero", '.'), + ('۰', "Extended Arabic-Indic Digit Zero", '.'), + ('ꓸ', "Lisu Letter Tone Mya Ti", '.'), + ('·', "Middle Dot", '.'), + ('・', "Katakana Middle Dot", '.'), + ('・', "Halfwidth Katakana Middle Dot", '.'), + ('᛫', "Runic Single Punctuation", '.'), + ('·', "Greek Ano Teleia", '.'), + ('⸱', "Word Separator Middle Dot", '.'), + ('𐄁', "Aegean Word Separator Dot", '.'), + ('•', "Bullet", '.'), + ('‧', "Hyphenation Point", '.'), + ('∙', "Bullet Operator", '.'), + ('⋅', "Dot Operator", '.'), + ('ꞏ', "Latin Letter Sinological Dot", '.'), + ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'), + ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'), + ('.', "Fullwidth Full Stop", '.'), + ('。', "Ideographic Full Stop", '.'), + ('︒', "Presentation Form For Vertical Ideographic Full Stop", '.'), + + ('՝', "Armenian Comma", '\''), + (''', "Fullwidth Apostrophe", '\''), + ('‘', "Left Single Quotation Mark", '\''), + ('’', "Right Single Quotation Mark", '\''), + ('‛', "Single High-Reversed-9 Quotation Mark", '\''), + ('′', "Prime", '\''), + ('‵', "Reversed Prime", '\''), + ('՚', "Armenian Apostrophe", '\''), + ('׳', "Hebrew Punctuation Geresh", '\''), + ('`', "Grave Accent", '\''), + ('`', "Greek Varia", '\''), + ('`', "Fullwidth Grave Accent", '\''), + ('´', "Acute Accent", '\''), + ('΄', "Greek Tonos", '\''), + ('´', "Greek Oxia", '\''), + ('᾽', "Greek Koronis", '\''), + ('᾿', "Greek Psili", '\''), + ('῾', "Greek Dasia", '\''), + ('ʹ', "Modifier Letter Prime", '\''), + ('ʹ', "Greek Numeral Sign", '\''), + ('ˈ', "Modifier Letter Vertical Line", '\''), + ('ˊ', "Modifier Letter Acute Accent", '\''), + ('ˋ', "Modifier Letter Grave Accent", '\''), + ('˴', "Modifier Letter Middle Grave Accent", '\''), + ('ʻ', "Modifier Letter Turned Comma", '\''), + ('ʽ', "Modifier Letter Reversed Comma", '\''), + ('ʼ', "Modifier Letter Apostrophe", '\''), + ('ʾ', "Modifier Letter Right Half Ring", '\''), + ('ꞌ', "Latin Small Letter Saltillo", '\''), + ('י', "Hebrew Letter Yod", '\''), + ('ߴ', "Nko High Tone Apostrophe", '\''), + ('ߵ', "Nko Low Tone Apostrophe", '\''), + ('ᑊ', "Canadian Syllabics West-Cree P", '\''), + ('ᛌ', "Runic Letter Short-Twig-Sol S", '\''), + ('𖽑', "Miao Sign Aspiration", '\''), + ('𖽒', "Miao Sign Reformed Voicing", '\''), + + ('᳓', "Vedic Sign Nihshvasa", '"'), + ('"', "Fullwidth Quotation Mark", '"'), + ('“', "Left Double Quotation Mark", '"'), + ('”', "Right Double Quotation Mark", '"'), + ('‟', "Double High-Reversed-9 Quotation Mark", '"'), + ('″', "Double Prime", '"'), + ('‶', "Reversed Double Prime", '"'), + ('〃', "Ditto Mark", '"'), + ('״', "Hebrew Punctuation Gershayim", '"'), + ('˝', "Double Acute Accent", '"'), + ('ʺ', "Modifier Letter Double Prime", '"'), + ('˶', "Modifier Letter Middle Double Acute Accent", '"'), + ('˵', "Modifier Letter Middle Double Grave Accent", '"'), + ('ˮ', "Modifier Letter Double Apostrophe", '"'), + ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'), + ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'), + ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'), + + ('(', "Fullwidth Left Parenthesis", '('), + ('❨', "Medium Left Parenthesis Ornament", '('), + ('﴾', "Ornate Left Parenthesis", '('), + + (')', "Fullwidth Right Parenthesis", ')'), + ('❩', "Medium Right Parenthesis Ornament", ')'), + ('﴿', "Ornate Right Parenthesis", ')'), + + ('[', "Fullwidth Left Square Bracket", '['), + ('❲', "Light Left Tortoise Shell Bracket Ornament", '['), + ('「', "Left Corner Bracket", '['), + ('『', "Left White Corner Bracket", '['), + ('【', "Left Black Lenticular Bracket", '['), + ('〔', "Left Tortoise Shell Bracket", '['), + ('〖', "Left White Lenticular Bracket", '['), + ('〘', "Left White Tortoise Shell Bracket", '['), + ('〚', "Left White Square Bracket", '['), + + (']', "Fullwidth Right Square Bracket", ']'), + ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'), + ('」', "Right Corner Bracket", ']'), + ('』', "Right White Corner Bracket", ']'), + ('】', "Right Black Lenticular Bracket", ']'), + ('〕', "Right Tortoise Shell Bracket", ']'), + ('〗', "Right White Lenticular Bracket", ']'), + ('〙', "Right White Tortoise Shell Bracket", ']'), + ('〛', "Right White Square Bracket", ']'), + + ('❴', "Medium Left Curly Bracket Ornament", '{'), + ('𝄔', "Musical Symbol Brace", '{'), + ('{', "Fullwidth Left Curly Bracket", '{'), + + ('❵', "Medium Right Curly Bracket Ornament", '}'), + ('}', "Fullwidth Right Curly Bracket", '}'), + + ('⁎', "Low Asterisk", '*'), + ('٭', "Arabic Five Pointed Star", '*'), + ('∗', "Asterisk Operator", '*'), + ('𐌟', "Old Italic Letter Ess", '*'), + ('*', "Fullwidth Asterisk", '*'), + + ('᜵', "Philippine Single Punctuation", '/'), + ('⁁', "Caret Insertion Point", '/'), + ('∕', "Division Slash", '/'), + ('⁄', "Fraction Slash", '/'), + ('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'), + ('⟋', "Mathematical Rising Diagonal", '/'), + ('⧸', "Big Solidus", '/'), + ('𝈺', "Greek Instrumental Notation Symbol-47", '/'), + ('㇓', "CJK Stroke Sp", '/'), + ('〳', "Vertical Kana Repeat Mark Upper Half", '/'), + ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), + ('ノ', "Katakana Letter No", '/'), + ('丿', "CJK Unified Ideograph-4E3F", '/'), + ('⼃', "Kangxi Radical Slash", '/'), + ('/', "Fullwidth Solidus", '/'), + + ('\', "Fullwidth Reverse Solidus", '\\'), + ('﹨', "Small Reverse Solidus", '\\'), + ('∖', "Set Minus", '\\'), + ('⟍', "Mathematical Falling Diagonal", '\\'), + ('⧵', "Reverse Solidus Operator", '\\'), + ('⧹', "Big Reverse Solidus", '\\'), + ('⧹', "Greek Vocal Notation Symbol-16", '\\'), + ('⧹', "Greek Instrumental Symbol-48", '\\'), + ('㇔', "CJK Stroke D", '\\'), + ('丶', "CJK Unified Ideograph-4E36", '\\'), + ('⼂', "Kangxi Radical Dot", '\\'), + ('、', "Ideographic Comma", '\\'), + ('ヽ', "Katakana Iteration Mark", '\\'), + + ('ꝸ', "Latin Small Letter Um", '&'), + ('&', "Fullwidth Ampersand", '&'), + + ('᛭', "Runic Cross Punctuation", '+'), + ('➕', "Heavy Plus Sign", '+'), + ('𐊛', "Lycian Letter H", '+'), + ('﬩', "Hebrew Letter Alternative Plus Sign", '+'), + ('+', "Fullwidth Plus Sign", '+'), + + ('‹', "Single Left-Pointing Angle Quotation Mark", '<'), + ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'), + ('˂', "Modifier Letter Left Arrowhead", '<'), + ('𝈶', "Greek Instrumental Symbol-40", '<'), + ('ᐸ', "Canadian Syllabics Pa", '<'), + ('ᚲ', "Runic Letter Kauna", '<'), + ('❬', "Medium Left-Pointing Angle Bracket Ornament", '<'), + ('⟨', "Mathematical Left Angle Bracket", '<'), + ('〈', "Left-Pointing Angle Bracket", '<'), + ('〈', "Left Angle Bracket", '<'), + ('㇛', "CJK Stroke Pd", '<'), + ('く', "Hiragana Letter Ku", '<'), + ('𡿨', "CJK Unified Ideograph-21FE8", '<'), + ('《', "Left Double Angle Bracket", '<'), + ('<', "Fullwidth Less-Than Sign", '<'), + + ('᐀', "Canadian Syllabics Hyphen", '='), + ('⹀', "Double Hyphen", '='), + ('゠', "Katakana-Hiragana Double Hyphen", '='), + ('꓿', "Lisu Punctuation Full Stop", '='), + ('=', "Fullwidth Equals Sign", '='), + + ('›', "Single Right-Pointing Angle Quotation Mark", '>'), + ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'), + ('˃', "Modifier Letter Right Arrowhead", '>'), + ('𝈷', "Greek Instrumental Symbol-42", '>'), + ('ᐳ', "Canadian Syllabics Po", '>'), + ('𖼿', "Miao Letter Archaic Zza", '>'), + ('❭', "Medium Right-Pointing Angle Bracket Ornament", '>'), + ('⟩', "Mathematical Right Angle Bracket", '>'), + ('〉', "Right-Pointing Angle Bracket", '>'), + ('〉', "Right Angle Bracket", '>'), + ('》', "Right Double Angle Bracket", '>'), + ('>', "Fullwidth Greater-Than Sign", '>'), +]; + +// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of +// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`. +// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add +// fancier error recovery to it, as there will be less overall work to do this way. +const ASCII_ARRAY: &[(char, &str, Option)] = &[ + (' ', "Space", None), + ('_', "Underscore", Some(token::Ident(kw::Underscore, false))), + ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))), + (',', "Comma", Some(token::Comma)), + (';', "Semicolon", Some(token::Semi)), + (':', "Colon", Some(token::Colon)), + ('!', "Exclamation Mark", Some(token::Not)), + ('?', "Question Mark", Some(token::Question)), + ('.', "Period", Some(token::Dot)), + ('(', "Left Parenthesis", Some(token::OpenDelim(Delimiter::Parenthesis))), + (')', "Right Parenthesis", Some(token::CloseDelim(Delimiter::Parenthesis))), + ('[', "Left Square Bracket", Some(token::OpenDelim(Delimiter::Bracket))), + (']', "Right Square Bracket", Some(token::CloseDelim(Delimiter::Bracket))), + ('{', "Left Curly Brace", Some(token::OpenDelim(Delimiter::Brace))), + ('}', "Right Curly Brace", Some(token::CloseDelim(Delimiter::Brace))), + ('*', "Asterisk", Some(token::BinOp(token::Star))), + ('/', "Slash", Some(token::BinOp(token::Slash))), + ('\\', "Backslash", None), + ('&', "Ampersand", Some(token::BinOp(token::And))), + ('+', "Plus Sign", Some(token::BinOp(token::Plus))), + ('<', "Less-Than Sign", Some(token::Lt)), + ('=', "Equals Sign", Some(token::Eq)), + ('>', "Greater-Than Sign", Some(token::Gt)), + // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by + // spitting the correct token out. + ('\'', "Single Quote", None), + ('"', "Quotation Mark", None), +]; + +pub(super) fn check_for_substitution<'a>( + reader: &StringReader<'a>, + pos: BytePos, + ch: char, + err: &mut Diagnostic, +) -> Option { + let &(_u_char, u_name, ascii_char) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?; + + let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8())); + + let Some((_ascii_char, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) else { + let msg = format!("substitution character not found for '{}'", ch); + reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); + return None; + }; + + // special help suggestion for "directed" double quotes + if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') { + let msg = format!( + "Unicode characters '“' (Left Double Quotation Mark) and \ + '”' (Right Double Quotation Mark) look like '{}' ({}), but are not", + ascii_char, ascii_name + ); + err.span_suggestion( + Span::with_root_ctxt( + pos, + pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()), + ), + &msg, + format!("\"{}\"", s), + Applicability::MaybeIncorrect, + ); + } else { + let msg = format!( + "Unicode character '{}' ({}) looks like '{}' ({}), but it is not", + ch, u_name, ascii_char, ascii_name + ); + err.span_suggestion(span, &msg, ascii_char, Applicability::MaybeIncorrect); + } + token.clone() +} + +/// Extract string if found at current position with given delimiters +fn peek_delimited(text: &str, from_ch: char, to_ch: char) -> Option<&str> { + let mut chars = text.chars(); + let first_char = chars.next()?; + if first_char != from_ch { + return None; + } + let last_char_idx = chars.as_str().find(to_ch)?; + Some(&chars.as_str()[..last_char_idx]) +} diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs new file mode 100644 index 000000000..8c087c65c --- /dev/null +++ b/compiler/rustc_parse/src/lib.rs @@ -0,0 +1,291 @@ +//! The main parser interface. + +#![feature(array_windows)] +#![feature(box_patterns)] +#![feature(if_let_guard)] +#![feature(let_chains)] +#![feature(let_else)] +#![feature(never_type)] +#![feature(rustc_attrs)] +#![recursion_limit = "256"] + +#[macro_use] +extern crate tracing; + +use rustc_ast as ast; +use rustc_ast::token; +use rustc_ast::tokenstream::TokenStream; +use rustc_ast::Attribute; +use rustc_ast::{AttrItem, MetaItem}; +use rustc_ast_pretty::pprust; +use rustc_data_structures::sync::Lrc; +use rustc_errors::{Applicability, Diagnostic, FatalError, Level, PResult}; +use rustc_session::parse::ParseSess; +use rustc_span::{FileName, SourceFile, Span}; + +use std::path::Path; + +pub const MACRO_ARGUMENTS: Option<&str> = Some("macro arguments"); + +#[macro_use] +pub mod parser; +use parser::{emit_unclosed_delims, make_unclosed_delims_error, Parser}; +pub mod lexer; +pub mod validate_attr; + +// A bunch of utility functions of the form `parse__from_` +// where includes crate, expr, item, stmt, tts, and one that +// uses a HOF to parse anything, and includes file and +// `source_str`. + +/// A variant of 'panictry!' that works on a Vec instead of a single DiagnosticBuilder. +macro_rules! panictry_buffer { + ($handler:expr, $e:expr) => {{ + use rustc_errors::FatalError; + use std::result::Result::{Err, Ok}; + match $e { + Ok(e) => e, + Err(errs) => { + for mut e in errs { + $handler.emit_diagnostic(&mut e); + } + FatalError.raise() + } + } + }}; +} + +pub fn parse_crate_from_file<'a>(input: &Path, sess: &'a ParseSess) -> PResult<'a, ast::Crate> { + let mut parser = new_parser_from_file(sess, input, None); + parser.parse_crate_mod() +} + +pub fn parse_crate_attrs_from_file<'a>( + input: &Path, + sess: &'a ParseSess, +) -> PResult<'a, Vec> { + let mut parser = new_parser_from_file(sess, input, None); + parser.parse_inner_attributes() +} + +pub fn parse_crate_from_source_str( + name: FileName, + source: String, + sess: &ParseSess, +) -> PResult<'_, ast::Crate> { + new_parser_from_source_str(sess, name, source).parse_crate_mod() +} + +pub fn parse_crate_attrs_from_source_str( + name: FileName, + source: String, + sess: &ParseSess, +) -> PResult<'_, Vec> { + new_parser_from_source_str(sess, name, source).parse_inner_attributes() +} + +pub fn parse_stream_from_source_str( + name: FileName, + source: String, + sess: &ParseSess, + override_span: Option, +) -> TokenStream { + let (stream, mut errors) = + source_file_to_stream(sess, sess.source_map().new_source_file(name, source), override_span); + emit_unclosed_delims(&mut errors, &sess); + stream +} + +/// Creates a new parser from a source string. +pub fn new_parser_from_source_str(sess: &ParseSess, name: FileName, source: String) -> Parser<'_> { + panictry_buffer!(&sess.span_diagnostic, maybe_new_parser_from_source_str(sess, name, source)) +} + +/// Creates a new parser from a source string. Returns any buffered errors from lexing the initial +/// token stream. +pub fn maybe_new_parser_from_source_str( + sess: &ParseSess, + name: FileName, + source: String, +) -> Result, Vec> { + maybe_source_file_to_parser(sess, sess.source_map().new_source_file(name, source)) +} + +/// Creates a new parser, handling errors as appropriate if the file doesn't exist. +/// If a span is given, that is used on an error as the source of the problem. +pub fn new_parser_from_file<'a>(sess: &'a ParseSess, path: &Path, sp: Option) -> Parser<'a> { + source_file_to_parser(sess, file_to_source_file(sess, path, sp)) +} + +/// Given a session and a `source_file`, returns a parser. +fn source_file_to_parser(sess: &ParseSess, source_file: Lrc) -> Parser<'_> { + panictry_buffer!(&sess.span_diagnostic, maybe_source_file_to_parser(sess, source_file)) +} + +/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing the +/// initial token stream. +fn maybe_source_file_to_parser( + sess: &ParseSess, + source_file: Lrc, +) -> Result, Vec> { + let end_pos = source_file.end_pos; + let (stream, unclosed_delims) = maybe_file_to_stream(sess, source_file, None)?; + let mut parser = stream_to_parser(sess, stream, None); + parser.unclosed_delims = unclosed_delims; + if parser.token == token::Eof { + parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt(), None); + } + + Ok(parser) +} + +// Base abstractions + +/// Given a session and a path and an optional span (for error reporting), +/// add the path to the session's source_map and return the new source_file or +/// error when a file can't be read. +fn try_file_to_source_file( + sess: &ParseSess, + path: &Path, + spanopt: Option, +) -> Result, Diagnostic> { + sess.source_map().load_file(path).map_err(|e| { + let msg = format!("couldn't read {}: {}", path.display(), e); + let mut diag = Diagnostic::new(Level::Fatal, &msg); + if let Some(sp) = spanopt { + diag.set_span(sp); + } + diag + }) +} + +/// Given a session and a path and an optional span (for error reporting), +/// adds the path to the session's `source_map` and returns the new `source_file`. +fn file_to_source_file(sess: &ParseSess, path: &Path, spanopt: Option) -> Lrc { + match try_file_to_source_file(sess, path, spanopt) { + Ok(source_file) => source_file, + Err(mut d) => { + sess.span_diagnostic.emit_diagnostic(&mut d); + FatalError.raise(); + } + } +} + +/// Given a `source_file`, produces a sequence of token trees. +pub fn source_file_to_stream( + sess: &ParseSess, + source_file: Lrc, + override_span: Option, +) -> (TokenStream, Vec) { + panictry_buffer!(&sess.span_diagnostic, maybe_file_to_stream(sess, source_file, override_span)) +} + +/// Given a source file, produces a sequence of token trees. Returns any buffered errors from +/// parsing the token stream. +pub fn maybe_file_to_stream( + sess: &ParseSess, + source_file: Lrc, + override_span: Option, +) -> Result<(TokenStream, Vec), Vec> { + let src = source_file.src.as_ref().unwrap_or_else(|| { + sess.span_diagnostic.bug(&format!( + "cannot lex `source_file` without source: {}", + sess.source_map().filename_for_diagnostics(&source_file.name) + )); + }); + + let (token_trees, unmatched_braces) = + lexer::parse_token_trees(sess, src.as_str(), source_file.start_pos, override_span); + + match token_trees { + Ok(stream) => Ok((stream, unmatched_braces)), + Err(err) => { + let mut buffer = Vec::with_capacity(1); + err.buffer(&mut buffer); + // Not using `emit_unclosed_delims` to use `db.buffer` + for unmatched in unmatched_braces { + if let Some(err) = make_unclosed_delims_error(unmatched, &sess) { + err.buffer(&mut buffer); + } + } + Err(buffer) + } + } +} + +/// Given a stream and the `ParseSess`, produces a parser. +pub fn stream_to_parser<'a>( + sess: &'a ParseSess, + stream: TokenStream, + subparser_name: Option<&'static str>, +) -> Parser<'a> { + Parser::new(sess, stream, false, subparser_name) +} + +/// Runs the given subparser `f` on the tokens of the given `attr`'s item. +pub fn parse_in<'a, T>( + sess: &'a ParseSess, + tts: TokenStream, + name: &'static str, + mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, +) -> PResult<'a, T> { + let mut parser = Parser::new(sess, tts, false, Some(name)); + let result = f(&mut parser)?; + if parser.token != token::Eof { + parser.unexpected()?; + } + Ok(result) +} + +pub fn fake_token_stream_for_item(sess: &ParseSess, item: &ast::Item) -> TokenStream { + let source = pprust::item_to_string(item); + let filename = FileName::macro_expansion_source_code(&source); + parse_stream_from_source_str(filename, source, sess, Some(item.span)) +} + +pub fn fake_token_stream_for_crate(sess: &ParseSess, krate: &ast::Crate) -> TokenStream { + let source = pprust::crate_to_string_for_macros(krate); + let filename = FileName::macro_expansion_source_code(&source); + parse_stream_from_source_str(filename, source, sess, Some(krate.spans.inner_span)) +} + +pub fn parse_cfg_attr( + attr: &Attribute, + parse_sess: &ParseSess, +) -> Option<(MetaItem, Vec<(AttrItem, Span)>)> { + match attr.get_normal_item().args { + ast::MacArgs::Delimited(dspan, delim, ref tts) if !tts.is_empty() => { + let msg = "wrong `cfg_attr` delimiters"; + crate::validate_attr::check_meta_bad_delim(parse_sess, dspan, delim, msg); + match parse_in(parse_sess, tts.clone(), "`cfg_attr` input", |p| p.parse_cfg_attr()) { + Ok(r) => return Some(r), + Err(mut e) => { + e.help(&format!("the valid syntax is `{}`", CFG_ATTR_GRAMMAR_HELP)) + .note(CFG_ATTR_NOTE_REF) + .emit(); + } + } + } + _ => error_malformed_cfg_attr_missing(attr.span, parse_sess), + } + None +} + +const CFG_ATTR_GRAMMAR_HELP: &str = "#[cfg_attr(condition, attribute, other_attribute, ...)]"; +const CFG_ATTR_NOTE_REF: &str = "for more information, visit \ + "; + +fn error_malformed_cfg_attr_missing(span: Span, parse_sess: &ParseSess) { + parse_sess + .span_diagnostic + .struct_span_err(span, "malformed `cfg_attr` attribute input") + .span_suggestion( + span, + "missing condition and attribute", + CFG_ATTR_GRAMMAR_HELP, + Applicability::HasPlaceholders, + ) + .note(CFG_ATTR_NOTE_REF) + .emit(); +} diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs new file mode 100644 index 000000000..acdbddf40 --- /dev/null +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -0,0 +1,444 @@ +use super::{AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, PathStyle}; +use rustc_ast as ast; +use rustc_ast::attr; +use rustc_ast::token::{self, Delimiter, Nonterminal}; +use rustc_ast_pretty::pprust; +use rustc_errors::{error_code, Diagnostic, PResult}; +use rustc_span::{sym, BytePos, Span}; +use std::convert::TryInto; + +use tracing::debug; + +// Public for rustfmt usage +#[derive(Debug)] +pub enum InnerAttrPolicy<'a> { + Permitted, + Forbidden { reason: &'a str, saw_doc_comment: bool, prev_outer_attr_sp: Option }, +} + +const DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG: &str = "an inner attribute is not \ + permitted in this context"; + +pub(super) const DEFAULT_INNER_ATTR_FORBIDDEN: InnerAttrPolicy<'_> = InnerAttrPolicy::Forbidden { + reason: DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG, + saw_doc_comment: false, + prev_outer_attr_sp: None, +}; + +enum OuterAttributeType { + DocComment, + DocBlockComment, + Attribute, +} + +impl<'a> Parser<'a> { + /// Parses attributes that appear before an item. + pub(super) fn parse_outer_attributes(&mut self) -> PResult<'a, AttrWrapper> { + let mut outer_attrs: Vec = Vec::new(); + let mut just_parsed_doc_comment = false; + let start_pos = self.token_cursor.num_next_calls; + loop { + let attr = if self.check(&token::Pound) { + let prev_outer_attr_sp = outer_attrs.last().map(|attr| attr.span); + + let inner_error_reason = if just_parsed_doc_comment { + "an inner attribute is not permitted following an outer doc comment" + } else if prev_outer_attr_sp.is_some() { + "an inner attribute is not permitted following an outer attribute" + } else { + DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG + }; + let inner_parse_policy = InnerAttrPolicy::Forbidden { + reason: inner_error_reason, + saw_doc_comment: just_parsed_doc_comment, + prev_outer_attr_sp, + }; + just_parsed_doc_comment = false; + Some(self.parse_attribute(inner_parse_policy)?) + } else if let token::DocComment(comment_kind, attr_style, data) = self.token.kind { + if attr_style != ast::AttrStyle::Outer { + let span = self.token.span; + let mut err = self.sess.span_diagnostic.struct_span_err_with_code( + span, + "expected outer doc comment", + error_code!(E0753), + ); + if let Some(replacement_span) = self.annotate_following_item_if_applicable( + &mut err, + span, + match comment_kind { + token::CommentKind::Line => OuterAttributeType::DocComment, + token::CommentKind::Block => OuterAttributeType::DocBlockComment, + }, + ) { + err.note( + "inner doc comments like this (starting with `//!` or `/*!`) can \ + only appear before items", + ); + err.span_suggestion_verbose( + replacement_span, + "you might have meant to write a regular comment", + "", + rustc_errors::Applicability::MachineApplicable, + ); + } + err.emit(); + } + self.bump(); + just_parsed_doc_comment = true; + // Always make an outer attribute - this allows us to recover from a misplaced + // inner attribute. + Some(attr::mk_doc_comment( + comment_kind, + ast::AttrStyle::Outer, + data, + self.prev_token.span, + )) + } else { + None + }; + + if let Some(attr) = attr { + if attr.style == ast::AttrStyle::Outer { + outer_attrs.push(attr); + } + } else { + break; + } + } + Ok(AttrWrapper::new(outer_attrs.into(), start_pos)) + } + + /// Matches `attribute = # ! [ meta_item ]`. + /// `inner_parse_policy` prescribes how to handle inner attributes. + // Public for rustfmt usage. + pub fn parse_attribute( + &mut self, + inner_parse_policy: InnerAttrPolicy<'_>, + ) -> PResult<'a, ast::Attribute> { + debug!( + "parse_attribute: inner_parse_policy={:?} self.token={:?}", + inner_parse_policy, self.token + ); + let lo = self.token.span; + // Attributes can't have attributes of their own [Editor's note: not with that attitude] + self.collect_tokens_no_attrs(|this| { + if this.eat(&token::Pound) { + let style = if this.eat(&token::Not) { + ast::AttrStyle::Inner + } else { + ast::AttrStyle::Outer + }; + + this.expect(&token::OpenDelim(Delimiter::Bracket))?; + let item = this.parse_attr_item(false)?; + this.expect(&token::CloseDelim(Delimiter::Bracket))?; + let attr_sp = lo.to(this.prev_token.span); + + // Emit error if inner attribute is encountered and forbidden. + if style == ast::AttrStyle::Inner { + this.error_on_forbidden_inner_attr(attr_sp, inner_parse_policy); + } + + Ok(attr::mk_attr_from_item(item, None, style, attr_sp)) + } else { + let token_str = pprust::token_to_string(&this.token); + let msg = &format!("expected `#`, found `{token_str}`"); + Err(this.struct_span_err(this.token.span, msg)) + } + }) + } + + fn annotate_following_item_if_applicable( + &self, + err: &mut Diagnostic, + span: Span, + attr_type: OuterAttributeType, + ) -> Option { + let mut snapshot = self.create_snapshot_for_diagnostic(); + let lo = span.lo() + + BytePos(match attr_type { + OuterAttributeType::Attribute => 1, + _ => 2, + }); + let hi = lo + BytePos(1); + let replacement_span = span.with_lo(lo).with_hi(hi); + if let OuterAttributeType::DocBlockComment | OuterAttributeType::DocComment = attr_type { + snapshot.bump(); + } + loop { + // skip any other attributes, we want the item + if snapshot.token.kind == token::Pound { + if let Err(err) = snapshot.parse_attribute(InnerAttrPolicy::Permitted) { + err.cancel(); + return Some(replacement_span); + } + } else { + break; + } + } + match snapshot.parse_item_common( + AttrWrapper::empty(), + true, + false, + FnParseMode { req_name: |_| true, req_body: true }, + ForceCollect::No, + ) { + Ok(Some(item)) => { + let attr_name = match attr_type { + OuterAttributeType::Attribute => "attribute", + _ => "doc comment", + }; + err.span_label( + item.span, + &format!("the inner {} doesn't annotate this {}", attr_name, item.kind.descr()), + ); + err.span_suggestion_verbose( + replacement_span, + &format!( + "to annotate the {}, change the {} from inner to outer style", + item.kind.descr(), + attr_name + ), + match attr_type { + OuterAttributeType::Attribute => "", + OuterAttributeType::DocBlockComment => "*", + OuterAttributeType::DocComment => "/", + }, + rustc_errors::Applicability::MachineApplicable, + ); + return None; + } + Err(item_err) => { + item_err.cancel(); + } + Ok(None) => {} + } + Some(replacement_span) + } + + pub(super) fn error_on_forbidden_inner_attr(&self, attr_sp: Span, policy: InnerAttrPolicy<'_>) { + if let InnerAttrPolicy::Forbidden { reason, saw_doc_comment, prev_outer_attr_sp } = policy { + let prev_outer_attr_note = + if saw_doc_comment { "previous doc comment" } else { "previous outer attribute" }; + + let mut diag = self.struct_span_err(attr_sp, reason); + + if let Some(prev_outer_attr_sp) = prev_outer_attr_sp { + diag.span_label(attr_sp, "not permitted following an outer attribute") + .span_label(prev_outer_attr_sp, prev_outer_attr_note); + } + + diag.note( + "inner attributes, like `#![no_std]`, annotate the item enclosing them, and \ + are usually found at the beginning of source files", + ); + if self + .annotate_following_item_if_applicable( + &mut diag, + attr_sp, + OuterAttributeType::Attribute, + ) + .is_some() + { + diag.note("outer attributes, like `#[test]`, annotate the item following them"); + }; + diag.emit(); + } + } + + /// Parses an inner part of an attribute (the path and following tokens). + /// The tokens must be either a delimited token stream, or empty token stream, + /// or the "legacy" key-value form. + /// PATH `(` TOKEN_STREAM `)` + /// PATH `[` TOKEN_STREAM `]` + /// PATH `{` TOKEN_STREAM `}` + /// PATH + /// PATH `=` UNSUFFIXED_LIT + /// The delimiters or `=` are still put into the resulting token stream. + pub fn parse_attr_item(&mut self, capture_tokens: bool) -> PResult<'a, ast::AttrItem> { + let item = match self.token.kind { + token::Interpolated(ref nt) => match **nt { + Nonterminal::NtMeta(ref item) => Some(item.clone().into_inner()), + _ => None, + }, + _ => None, + }; + Ok(if let Some(item) = item { + self.bump(); + item + } else { + let do_parse = |this: &mut Self| { + let path = this.parse_path(PathStyle::Mod)?; + let args = this.parse_attr_args()?; + Ok(ast::AttrItem { path, args, tokens: None }) + }; + // Attr items don't have attributes + if capture_tokens { self.collect_tokens_no_attrs(do_parse) } else { do_parse(self) }? + }) + } + + /// Parses attributes that appear after the opening of an item. These should + /// be preceded by an exclamation mark, but we accept and warn about one + /// terminated by a semicolon. + /// + /// Matches `inner_attrs*`. + pub(crate) fn parse_inner_attributes(&mut self) -> PResult<'a, Vec> { + let mut attrs: Vec = vec![]; + loop { + let start_pos: u32 = self.token_cursor.num_next_calls.try_into().unwrap(); + // Only try to parse if it is an inner attribute (has `!`). + let attr = if self.check(&token::Pound) && self.look_ahead(1, |t| t == &token::Not) { + Some(self.parse_attribute(InnerAttrPolicy::Permitted)?) + } else if let token::DocComment(comment_kind, attr_style, data) = self.token.kind { + if attr_style == ast::AttrStyle::Inner { + self.bump(); + Some(attr::mk_doc_comment(comment_kind, attr_style, data, self.prev_token.span)) + } else { + None + } + } else { + None + }; + if let Some(attr) = attr { + let end_pos: u32 = self.token_cursor.num_next_calls.try_into().unwrap(); + // If we are currently capturing tokens, mark the location of this inner attribute. + // If capturing ends up creating a `LazyTokenStream`, we will include + // this replace range with it, removing the inner attribute from the final + // `AttrAnnotatedTokenStream`. Inner attributes are stored in the parsed AST note. + // During macro expansion, they are selectively inserted back into the + // token stream (the first inner attribute is removed each time we invoke the + // corresponding macro). + let range = start_pos..end_pos; + if let Capturing::Yes = self.capture_state.capturing { + self.capture_state.inner_attr_ranges.insert(attr.id, (range, vec![])); + } + attrs.push(attr); + } else { + break; + } + } + Ok(attrs) + } + + pub(crate) fn parse_unsuffixed_lit(&mut self) -> PResult<'a, ast::Lit> { + let lit = self.parse_lit()?; + debug!("checking if {:?} is unusuffixed", lit); + + if !lit.kind.is_unsuffixed() { + self.struct_span_err(lit.span, "suffixed literals are not allowed in attributes") + .help( + "instead of using a suffixed literal (`1u8`, `1.0f32`, etc.), \ + use an unsuffixed version (`1`, `1.0`, etc.)", + ) + .emit(); + } + + Ok(lit) + } + + /// Parses `cfg_attr(pred, attr_item_list)` where `attr_item_list` is comma-delimited. + pub fn parse_cfg_attr(&mut self) -> PResult<'a, (ast::MetaItem, Vec<(ast::AttrItem, Span)>)> { + let cfg_predicate = self.parse_meta_item()?; + self.expect(&token::Comma)?; + + // Presumably, the majority of the time there will only be one attr. + let mut expanded_attrs = Vec::with_capacity(1); + while self.token.kind != token::Eof { + let lo = self.token.span; + let item = self.parse_attr_item(true)?; + expanded_attrs.push((item, lo.to(self.prev_token.span))); + if !self.eat(&token::Comma) { + break; + } + } + + Ok((cfg_predicate, expanded_attrs)) + } + + /// Matches `COMMASEP(meta_item_inner)`. + pub(crate) fn parse_meta_seq_top(&mut self) -> PResult<'a, Vec> { + // Presumably, the majority of the time there will only be one attr. + let mut nmis = Vec::with_capacity(1); + while self.token.kind != token::Eof { + nmis.push(self.parse_meta_item_inner()?); + if !self.eat(&token::Comma) { + break; + } + } + Ok(nmis) + } + + /// Matches the following grammar (per RFC 1559). + /// ```ebnf + /// meta_item : PATH ( '=' UNSUFFIXED_LIT | '(' meta_item_inner? ')' )? ; + /// meta_item_inner : (meta_item | UNSUFFIXED_LIT) (',' meta_item_inner)? ; + /// ``` + pub fn parse_meta_item(&mut self) -> PResult<'a, ast::MetaItem> { + let nt_meta = match self.token.kind { + token::Interpolated(ref nt) => match **nt { + token::NtMeta(ref e) => Some(e.clone()), + _ => None, + }, + _ => None, + }; + + if let Some(item) = nt_meta { + return match item.meta(item.path.span) { + Some(meta) => { + self.bump(); + Ok(meta) + } + None => self.unexpected(), + }; + } + + let lo = self.token.span; + let path = self.parse_path(PathStyle::Mod)?; + let kind = self.parse_meta_item_kind()?; + let span = lo.to(self.prev_token.span); + Ok(ast::MetaItem { path, kind, span }) + } + + pub(crate) fn parse_meta_item_kind(&mut self) -> PResult<'a, ast::MetaItemKind> { + Ok(if self.eat(&token::Eq) { + ast::MetaItemKind::NameValue(self.parse_unsuffixed_lit()?) + } else if self.check(&token::OpenDelim(Delimiter::Parenthesis)) { + // Matches `meta_seq = ( COMMASEP(meta_item_inner) )`. + let (list, _) = self.parse_paren_comma_seq(|p| p.parse_meta_item_inner())?; + ast::MetaItemKind::List(list) + } else { + ast::MetaItemKind::Word + }) + } + + /// Matches `meta_item_inner : (meta_item | UNSUFFIXED_LIT) ;`. + fn parse_meta_item_inner(&mut self) -> PResult<'a, ast::NestedMetaItem> { + match self.parse_unsuffixed_lit() { + Ok(lit) => return Ok(ast::NestedMetaItem::Literal(lit)), + Err(err) => err.cancel(), + } + + match self.parse_meta_item() { + Ok(mi) => return Ok(ast::NestedMetaItem::MetaItem(mi)), + Err(err) => err.cancel(), + } + + let found = pprust::token_to_string(&self.token); + let msg = format!("expected unsuffixed literal or identifier, found `{found}`"); + Err(self.struct_span_err(self.token.span, &msg)) + } +} + +pub fn maybe_needs_tokens(attrs: &[ast::Attribute]) -> bool { + // One of the attributes may either itself be a macro, + // or expand to macro attributes (`cfg_attr`). + attrs.iter().any(|attr| { + if attr.is_doc_comment() { + return false; + } + attr.ident().map_or(true, |ident| { + ident.name == sym::cfg_attr || !rustc_feature::is_builtin_attr_name(ident.name) + }) + }) +} diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs new file mode 100644 index 000000000..6c750ff42 --- /dev/null +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -0,0 +1,464 @@ +use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCursor, TrailingToken}; +use rustc_ast::token::{self, Delimiter, Token, TokenKind}; +use rustc_ast::tokenstream::{AttrAnnotatedTokenStream, AttributesData, CreateTokenStream}; +use rustc_ast::tokenstream::{AttrAnnotatedTokenTree, DelimSpan, LazyTokenStream, Spacing}; +use rustc_ast::{self as ast}; +use rustc_ast::{AttrVec, Attribute, HasAttrs, HasTokens}; +use rustc_errors::PResult; +use rustc_span::{sym, Span}; + +use std::convert::TryInto; +use std::ops::Range; + +/// A wrapper type to ensure that the parser handles outer attributes correctly. +/// When we parse outer attributes, we need to ensure that we capture tokens +/// for the attribute target. This allows us to perform cfg-expansion on +/// a token stream before we invoke a derive proc-macro. +/// +/// This wrapper prevents direct access to the underlying `Vec`. +/// Parsing code can only get access to the underlying attributes +/// by passing an `AttrWrapper` to `collect_tokens_trailing_tokens`. +/// This makes it difficult to accidentally construct an AST node +/// (which stores a `Vec`) without first collecting tokens. +/// +/// This struct has its own module, to ensure that the parser code +/// cannot directly access the `attrs` field +#[derive(Debug, Clone)] +pub struct AttrWrapper { + attrs: AttrVec, + // The start of the outer attributes in the token cursor. + // This allows us to create a `ReplaceRange` for the entire attribute + // target, including outer attributes. + start_pos: usize, +} + +// This struct is passed around very frequently, +// so make sure it doesn't accidentally get larger +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(AttrWrapper, 16); + +impl AttrWrapper { + pub(super) fn new(attrs: AttrVec, start_pos: usize) -> AttrWrapper { + AttrWrapper { attrs, start_pos } + } + pub fn empty() -> AttrWrapper { + AttrWrapper { attrs: AttrVec::new(), start_pos: usize::MAX } + } + // FIXME: Delay span bug here? + pub(crate) fn take_for_recovery(self) -> AttrVec { + self.attrs + } + + // FIXME: require passing an NT to prevent misuse of this method + pub(crate) fn prepend_to_nt_inner(self, attrs: &mut Vec) { + let mut self_attrs: Vec<_> = self.attrs.into(); + std::mem::swap(attrs, &mut self_attrs); + attrs.extend(self_attrs); + } + + pub fn is_empty(&self) -> bool { + self.attrs.is_empty() + } + + pub fn maybe_needs_tokens(&self) -> bool { + crate::parser::attr::maybe_needs_tokens(&self.attrs) + } +} + +/// Returns `true` if `attrs` contains a `cfg` or `cfg_attr` attribute +fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool { + // NOTE: Builtin attributes like `cfg` and `cfg_attr` cannot be renamed via imports. + // Therefore, the absence of a literal `cfg` or `cfg_attr` guarantees that + // we don't need to do any eager expansion. + attrs.iter().any(|attr| { + attr.ident().map_or(false, |ident| ident.name == sym::cfg || ident.name == sym::cfg_attr) + }) +} + +// Produces a `TokenStream` on-demand. Using `cursor_snapshot` +// and `num_calls`, we can reconstruct the `TokenStream` seen +// by the callback. This allows us to avoid producing a `TokenStream` +// if it is never needed - for example, a captured `macro_rules!` +// argument that is never passed to a proc macro. +// In practice token stream creation happens rarely compared to +// calls to `collect_tokens` (see some statistics in #78736), +// so we are doing as little up-front work as possible. +// +// This also makes `Parser` very cheap to clone, since +// there is no intermediate collection buffer to clone. +#[derive(Clone)] +struct LazyTokenStreamImpl { + start_token: (Token, Spacing), + cursor_snapshot: TokenCursor, + num_calls: usize, + break_last_token: bool, + replace_ranges: Box<[ReplaceRange]>, +} + +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(LazyTokenStreamImpl, 144); + +impl CreateTokenStream for LazyTokenStreamImpl { + fn create_token_stream(&self) -> AttrAnnotatedTokenStream { + // The token produced by the final call to `{,inlined_}next` was not + // actually consumed by the callback. The combination of chaining the + // initial token and using `take` produces the desired result - we + // produce an empty `TokenStream` if no calls were made, and omit the + // final token otherwise. + let mut cursor_snapshot = self.cursor_snapshot.clone(); + let tokens = + std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1)) + .chain((0..self.num_calls).map(|_| { + let token = cursor_snapshot.next(cursor_snapshot.desugar_doc_comments); + (FlatToken::Token(token.0), token.1) + })) + .take(self.num_calls); + + if !self.replace_ranges.is_empty() { + let mut tokens: Vec<_> = tokens.collect(); + let mut replace_ranges = self.replace_ranges.clone(); + replace_ranges.sort_by_key(|(range, _)| range.start); + + #[cfg(debug_assertions)] + { + for [(range, tokens), (next_range, next_tokens)] in replace_ranges.array_windows() { + assert!( + range.end <= next_range.start || range.end >= next_range.end, + "Replace ranges should either be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})", + range, + tokens, + next_range, + next_tokens, + ); + } + } + + // Process the replace ranges, starting from the highest start + // position and working our way back. If have tokens like: + // + // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }` + // + // Then we will generate replace ranges for both + // the `#[cfg(FALSE)] field: bool` and the entire + // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }` + // + // By starting processing from the replace range with the greatest + // start position, we ensure that any replace range which encloses + // another replace range will capture the *replaced* tokens for the inner + // range, not the original tokens. + for (range, new_tokens) in replace_ranges.iter().rev() { + assert!(!range.is_empty(), "Cannot replace an empty range: {:?}", range); + // Replace ranges are only allowed to decrease the number of tokens. + assert!( + range.len() >= new_tokens.len(), + "Range {:?} has greater len than {:?}", + range, + new_tokens + ); + + // Replace any removed tokens with `FlatToken::Empty`. + // This keeps the total length of `tokens` constant throughout the + // replacement process, allowing us to use all of the `ReplaceRanges` entries + // without adjusting indices. + let filler = std::iter::repeat((FlatToken::Empty, Spacing::Alone)) + .take(range.len() - new_tokens.len()); + + tokens.splice( + (range.start as usize)..(range.end as usize), + new_tokens.clone().into_iter().chain(filler), + ); + } + make_token_stream(tokens.into_iter(), self.break_last_token) + } else { + make_token_stream(tokens, self.break_last_token) + } + } +} + +impl<'a> Parser<'a> { + /// Records all tokens consumed by the provided callback, + /// including the current token. These tokens are collected + /// into a `LazyTokenStream`, and returned along with the result + /// of the callback. + /// + /// Note: If your callback consumes an opening delimiter + /// (including the case where you call `collect_tokens` + /// when the current token is an opening delimiter), + /// you must also consume the corresponding closing delimiter. + /// + /// That is, you can consume + /// `something ([{ }])` or `([{}])`, but not `([{}]` + /// + /// This restriction shouldn't be an issue in practice, + /// since this function is used to record the tokens for + /// a parsed AST item, which always has matching delimiters. + pub fn collect_tokens_trailing_token( + &mut self, + attrs: AttrWrapper, + force_collect: ForceCollect, + f: impl FnOnce(&mut Self, Vec) -> PResult<'a, (R, TrailingToken)>, + ) -> PResult<'a, R> { + // We only bail out when nothing could possibly observe the collected tokens: + // 1. We cannot be force collecting tokens (since force-collecting requires tokens + // by definition + if matches!(force_collect, ForceCollect::No) + // None of our outer attributes can require tokens (e.g. a proc-macro) + && !attrs.maybe_needs_tokens() + // If our target supports custom inner attributes, then we cannot bail + // out early, since we may need to capture tokens for a custom inner attribute + // invocation. + && !R::SUPPORTS_CUSTOM_INNER_ATTRS + // Never bail out early in `capture_cfg` mode, since there might be `#[cfg]` + // or `#[cfg_attr]` attributes. + && !self.capture_cfg + { + return Ok(f(self, attrs.attrs.into())?.0); + } + + let start_token = (self.token.clone(), self.token_spacing); + let cursor_snapshot = self.token_cursor.clone(); + + let has_outer_attrs = !attrs.attrs.is_empty(); + let prev_capturing = std::mem::replace(&mut self.capture_state.capturing, Capturing::Yes); + let replace_ranges_start = self.capture_state.replace_ranges.len(); + + let ret = f(self, attrs.attrs.into()); + + self.capture_state.capturing = prev_capturing; + + let (mut ret, trailing) = ret?; + + // When we're not in `capture-cfg` mode, then bail out early if: + // 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`) + // so there's nothing for us to do. + // 2. Our target already has tokens set (e.g. we've parsed something + // like `#[my_attr] $item`. The actual parsing code takes care of prepending + // any attributes to the nonterminal, so we don't need to modify the + // already captured tokens. + // Note that this check is independent of `force_collect`- if we already + // have tokens, or can't even store them, then there's never a need to + // force collection of new tokens. + if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) { + return Ok(ret); + } + + // This is very similar to the bail out check at the start of this function. + // Now that we've parsed an AST node, we have more information available. + if matches!(force_collect, ForceCollect::No) + // We now have inner attributes available, so this check is more precise + // than `attrs.maybe_needs_tokens()` at the start of the function. + // As a result, we don't need to check `R::SUPPORTS_CUSTOM_INNER_ATTRS` + && !crate::parser::attr::maybe_needs_tokens(ret.attrs()) + // Subtle: We call `has_cfg_or_cfg_attr` with the attrs from `ret`. + // This ensures that we consider inner attributes (e.g. `#![cfg]`), + // which require us to have tokens available + // We also call `has_cfg_or_cfg_attr` at the beginning of this function, + // but we only bail out if there's no possibility of inner attributes + // (!R::SUPPORTS_CUSTOM_INNER_ATTRS) + // We only capture about `#[cfg]` or `#[cfg_attr]` in `capture_cfg` + // mode - during normal parsing, we don't need any special capturing + // for those attributes, since they're builtin. + && !(self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs())) + { + return Ok(ret); + } + + let mut inner_attr_replace_ranges = Vec::new(); + // Take the captured ranges for any inner attributes that we parsed. + for inner_attr in ret.attrs().iter().filter(|a| a.style == ast::AttrStyle::Inner) { + if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&inner_attr.id) { + inner_attr_replace_ranges.push(attr_range); + } else { + self.sess + .span_diagnostic + .delay_span_bug(inner_attr.span, "Missing token range for attribute"); + } + } + + let replace_ranges_end = self.capture_state.replace_ranges.len(); + + let cursor_snapshot_next_calls = cursor_snapshot.num_next_calls; + let mut end_pos = self.token_cursor.num_next_calls; + + // Capture a trailing token if requested by the callback 'f' + match trailing { + TrailingToken::None => {} + TrailingToken::Semi => { + assert_eq!(self.token.kind, token::Semi); + end_pos += 1; + } + TrailingToken::MaybeComma => { + if self.token.kind == token::Comma { + end_pos += 1; + } + } + } + + // If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens), + // then extend the range of captured tokens to include it, since the parser + // was not actually bumped past it. When the `LazyTokenStream` gets converted + // into an `AttrAnnotatedTokenStream`, we will create the proper token. + if self.token_cursor.break_last_token { + assert_eq!( + trailing, + TrailingToken::None, + "Cannot set `break_last_token` and have trailing token" + ); + end_pos += 1; + } + + let num_calls = end_pos - cursor_snapshot_next_calls; + + // If we have no attributes, then we will never need to + // use any replace ranges. + let replace_ranges: Box<[ReplaceRange]> = if ret.attrs().is_empty() && !self.capture_cfg { + Box::new([]) + } else { + // Grab any replace ranges that occur *inside* the current AST node. + // We will perform the actual replacement when we convert the `LazyTokenStream` + // to an `AttrAnnotatedTokenStream` + let start_calls: u32 = cursor_snapshot_next_calls.try_into().unwrap(); + self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end] + .iter() + .cloned() + .chain(inner_attr_replace_ranges.clone().into_iter()) + .map(|(range, tokens)| { + ((range.start - start_calls)..(range.end - start_calls), tokens) + }) + .collect() + }; + + let tokens = LazyTokenStream::new(LazyTokenStreamImpl { + start_token, + num_calls, + cursor_snapshot, + break_last_token: self.token_cursor.break_last_token, + replace_ranges, + }); + + // If we support tokens at all + if let Some(target_tokens) = ret.tokens_mut() { + if target_tokens.is_none() { + // Store se our newly captured tokens into the AST node + *target_tokens = Some(tokens.clone()); + } + } + + let final_attrs = ret.attrs(); + + // If `capture_cfg` is set and we're inside a recursive call to + // `collect_tokens_trailing_token`, then we need to register a replace range + // if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion + // on the captured token stream. + if self.capture_cfg + && matches!(self.capture_state.capturing, Capturing::Yes) + && has_cfg_or_cfg_attr(&final_attrs) + { + let attr_data = AttributesData { attrs: final_attrs.to_vec().into(), tokens }; + + // Replace the entire AST node that we just parsed, including attributes, + // with a `FlatToken::AttrTarget`. If this AST node is inside an item + // that has `#[derive]`, then this will allow us to cfg-expand this + // AST node. + let start_pos = + if has_outer_attrs { attrs.start_pos } else { cursor_snapshot_next_calls }; + let new_tokens = vec![(FlatToken::AttrTarget(attr_data), Spacing::Alone)]; + + assert!( + !self.token_cursor.break_last_token, + "Should not have unglued last token with cfg attr" + ); + let range: Range = (start_pos.try_into().unwrap())..(end_pos.try_into().unwrap()); + self.capture_state.replace_ranges.push((range, new_tokens)); + self.capture_state.replace_ranges.extend(inner_attr_replace_ranges); + } + + // Only clear our `replace_ranges` when we're finished capturing entirely. + if matches!(self.capture_state.capturing, Capturing::No) { + self.capture_state.replace_ranges.clear(); + // We don't clear `inner_attr_ranges`, as doing so repeatedly + // had a measurable performance impact. Most inner attributes that + // we insert will get removed - when we drop the parser, we'll free + // up the memory used by any attributes that we didn't remove from the map. + } + Ok(ret) + } +} + +/// Converts a flattened iterator of tokens (including open and close delimiter tokens) +/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair +/// of open and close delims. +fn make_token_stream( + mut iter: impl Iterator, + break_last_token: bool, +) -> AttrAnnotatedTokenStream { + #[derive(Debug)] + struct FrameData { + // This is `None` for the first frame, `Some` for all others. + open_delim_sp: Option<(Delimiter, Span)>, + inner: Vec<(AttrAnnotatedTokenTree, Spacing)>, + } + let mut stack = vec![FrameData { open_delim_sp: None, inner: vec![] }]; + let mut token_and_spacing = iter.next(); + while let Some((token, spacing)) = token_and_spacing { + match token { + FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => { + stack.push(FrameData { open_delim_sp: Some((delim, span)), inner: vec![] }); + } + FlatToken::Token(Token { kind: TokenKind::CloseDelim(delim), span }) => { + let frame_data = stack + .pop() + .unwrap_or_else(|| panic!("Token stack was empty for token: {:?}", token)); + + let (open_delim, open_sp) = frame_data.open_delim_sp.unwrap(); + assert_eq!( + open_delim, delim, + "Mismatched open/close delims: open={:?} close={:?}", + open_delim, span + ); + let dspan = DelimSpan::from_pair(open_sp, span); + let stream = AttrAnnotatedTokenStream::new(frame_data.inner); + let delimited = AttrAnnotatedTokenTree::Delimited(dspan, delim, stream); + stack + .last_mut() + .unwrap_or_else(|| { + panic!("Bottom token frame is missing for token: {:?}", token) + }) + .inner + .push((delimited, Spacing::Alone)); + } + FlatToken::Token(token) => stack + .last_mut() + .expect("Bottom token frame is missing!") + .inner + .push((AttrAnnotatedTokenTree::Token(token), spacing)), + FlatToken::AttrTarget(data) => stack + .last_mut() + .expect("Bottom token frame is missing!") + .inner + .push((AttrAnnotatedTokenTree::Attributes(data), spacing)), + FlatToken::Empty => {} + } + token_and_spacing = iter.next(); + } + let mut final_buf = stack.pop().expect("Missing final buf!"); + if break_last_token { + let (last_token, spacing) = final_buf.inner.pop().unwrap(); + if let AttrAnnotatedTokenTree::Token(last_token) = last_token { + let unglued_first = last_token.kind.break_two_token_op().unwrap().0; + + // An 'unglued' token is always two ASCII characters + let mut first_span = last_token.span.shrink_to_lo(); + first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1)); + + final_buf.inner.push(( + AttrAnnotatedTokenTree::Token(Token::new(unglued_first, first_span)), + spacing, + )); + } else { + panic!("Unexpected last token {:?}", last_token) + } + } + assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack); + AttrAnnotatedTokenStream::new(final_buf.inner) +} diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs new file mode 100644 index 000000000..a2155ac1d --- /dev/null +++ b/compiler/rustc_parse/src/parser/diagnostics.rs @@ -0,0 +1,2740 @@ +use super::pat::Expected; +use super::{ + BlockMode, CommaRecoveryMode, Parser, PathStyle, Restrictions, SemiColonMode, SeqSep, + TokenExpectType, TokenType, +}; + +use crate::lexer::UnmatchedBrace; +use rustc_ast as ast; +use rustc_ast::ptr::P; +use rustc_ast::token::{self, Delimiter, Lit, LitKind, TokenKind}; +use rustc_ast::util::parser::AssocOp; +use rustc_ast::{ + AngleBracketedArg, AngleBracketedArgs, AnonConst, AttrVec, BinOpKind, BindingMode, Block, + BlockCheckMode, Expr, ExprKind, GenericArg, Generics, Item, ItemKind, Mutability, Param, Pat, + PatKind, Path, PathSegment, QSelf, Ty, TyKind, +}; +use rustc_ast_pretty::pprust; +use rustc_data_structures::fx::FxHashSet; +use rustc_errors::{ + fluent, Applicability, DiagnosticBuilder, DiagnosticMessage, Handler, MultiSpan, PResult, +}; +use rustc_errors::{pluralize, struct_span_err, Diagnostic, EmissionGuarantee, ErrorGuaranteed}; +use rustc_macros::{SessionDiagnostic, SessionSubdiagnostic}; +use rustc_span::source_map::Spanned; +use rustc_span::symbol::{kw, Ident}; +use rustc_span::{Span, SpanSnippetError, DUMMY_SP}; +use std::ops::{Deref, DerefMut}; + +use std::mem::take; + +use crate::parser; +use tracing::{debug, trace}; + +const TURBOFISH_SUGGESTION_STR: &str = + "use `::<...>` instead of `<...>` to specify lifetime, type, or const arguments"; + +/// Creates a placeholder argument. +pub(super) fn dummy_arg(ident: Ident) -> Param { + let pat = P(Pat { + id: ast::DUMMY_NODE_ID, + kind: PatKind::Ident(BindingMode::ByValue(Mutability::Not), ident, None), + span: ident.span, + tokens: None, + }); + let ty = Ty { kind: TyKind::Err, span: ident.span, id: ast::DUMMY_NODE_ID, tokens: None }; + Param { + attrs: AttrVec::default(), + id: ast::DUMMY_NODE_ID, + pat, + span: ident.span, + ty: P(ty), + is_placeholder: false, + } +} + +pub enum Error { + UselessDocComment, +} + +impl Error { + fn span_err( + self, + sp: impl Into, + handler: &Handler, + ) -> DiagnosticBuilder<'_, ErrorGuaranteed> { + match self { + Error::UselessDocComment => { + let mut err = struct_span_err!( + handler, + sp, + E0585, + "found a documentation comment that doesn't document anything", + ); + err.help( + "doc comments must come before what they document, maybe a comment was \ + intended with `//`?", + ); + err + } + } + } +} + +pub(super) trait RecoverQPath: Sized + 'static { + const PATH_STYLE: PathStyle = PathStyle::Expr; + fn to_ty(&self) -> Option>; + fn recovered(qself: Option, path: ast::Path) -> Self; +} + +impl RecoverQPath for Ty { + const PATH_STYLE: PathStyle = PathStyle::Type; + fn to_ty(&self) -> Option> { + Some(P(self.clone())) + } + fn recovered(qself: Option, path: ast::Path) -> Self { + Self { + span: path.span, + kind: TyKind::Path(qself, path), + id: ast::DUMMY_NODE_ID, + tokens: None, + } + } +} + +impl RecoverQPath for Pat { + fn to_ty(&self) -> Option> { + self.to_ty() + } + fn recovered(qself: Option, path: ast::Path) -> Self { + Self { + span: path.span, + kind: PatKind::Path(qself, path), + id: ast::DUMMY_NODE_ID, + tokens: None, + } + } +} + +impl RecoverQPath for Expr { + fn to_ty(&self) -> Option> { + self.to_ty() + } + fn recovered(qself: Option, path: ast::Path) -> Self { + Self { + span: path.span, + kind: ExprKind::Path(qself, path), + attrs: AttrVec::new(), + id: ast::DUMMY_NODE_ID, + tokens: None, + } + } +} + +/// Control whether the closing delimiter should be consumed when calling `Parser::consume_block`. +pub(crate) enum ConsumeClosingDelim { + Yes, + No, +} + +#[derive(Clone, Copy)] +pub enum AttemptLocalParseRecovery { + Yes, + No, +} + +impl AttemptLocalParseRecovery { + pub fn yes(&self) -> bool { + match self { + AttemptLocalParseRecovery::Yes => true, + AttemptLocalParseRecovery::No => false, + } + } + + pub fn no(&self) -> bool { + match self { + AttemptLocalParseRecovery::Yes => false, + AttemptLocalParseRecovery::No => true, + } + } +} + +/// Information for emitting suggestions and recovering from +/// C-style `i++`, `--i`, etc. +#[derive(Debug, Copy, Clone)] +struct IncDecRecovery { + /// Is this increment/decrement its own statement? + standalone: IsStandalone, + /// Is this an increment or decrement? + op: IncOrDec, + /// Is this pre- or postfix? + fixity: UnaryFixity, +} + +/// Is an increment or decrement expression its own statement? +#[derive(Debug, Copy, Clone)] +enum IsStandalone { + /// It's standalone, i.e., its own statement. + Standalone, + /// It's a subexpression, i.e., *not* standalone. + Subexpr, + /// It's maybe standalone; we're not sure. + Maybe, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum IncOrDec { + Inc, + // FIXME: `i--` recovery isn't implemented yet + #[allow(dead_code)] + Dec, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum UnaryFixity { + Pre, + Post, +} + +impl IncOrDec { + fn chr(&self) -> char { + match self { + Self::Inc => '+', + Self::Dec => '-', + } + } + + fn name(&self) -> &'static str { + match self { + Self::Inc => "increment", + Self::Dec => "decrement", + } + } +} + +impl std::fmt::Display for UnaryFixity { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Pre => write!(f, "prefix"), + Self::Post => write!(f, "postfix"), + } + } +} + +struct MultiSugg { + msg: String, + patches: Vec<(Span, String)>, + applicability: Applicability, +} + +impl MultiSugg { + fn emit(self, err: &mut DiagnosticBuilder<'_, G>) { + err.multipart_suggestion(&self.msg, self.patches, self.applicability); + } + + /// Overrides individual messages and applicabilities. + fn emit_many( + err: &mut DiagnosticBuilder<'_, G>, + msg: &str, + applicability: Applicability, + suggestions: impl Iterator, + ) { + err.multipart_suggestions(msg, suggestions.map(|s| s.patches), applicability); + } +} + +#[derive(SessionDiagnostic)] +#[error(parser::maybe_report_ambiguous_plus)] +struct AmbiguousPlus { + pub sum_ty: String, + #[primary_span] + #[suggestion(code = "({sum_ty})")] + pub span: Span, +} + +#[derive(SessionDiagnostic)] +#[error(parser::maybe_recover_from_bad_type_plus, code = "E0178")] +struct BadTypePlus { + pub ty: String, + #[primary_span] + pub span: Span, + #[subdiagnostic] + pub sub: BadTypePlusSub, +} + +#[derive(SessionSubdiagnostic)] +pub enum BadTypePlusSub { + #[suggestion( + parser::add_paren, + code = "{sum_with_parens}", + applicability = "machine-applicable" + )] + AddParen { + sum_with_parens: String, + #[primary_span] + span: Span, + }, + #[label(parser::forgot_paren)] + ForgotParen { + #[primary_span] + span: Span, + }, + #[label(parser::expect_path)] + ExpectPath { + #[primary_span] + span: Span, + }, +} + +#[derive(SessionDiagnostic)] +#[error(parser::maybe_recover_from_bad_qpath_stage_2)] +struct BadQPathStage2 { + #[primary_span] + #[suggestion(applicability = "maybe-incorrect")] + span: Span, + ty: String, +} + +#[derive(SessionDiagnostic)] +#[error(parser::incorrect_semicolon)] +struct IncorrectSemicolon<'a> { + #[primary_span] + #[suggestion_short(applicability = "machine-applicable")] + span: Span, + #[help] + opt_help: Option<()>, + name: &'a str, +} + +#[derive(SessionDiagnostic)] +#[error(parser::incorrect_use_of_await)] +struct IncorrectUseOfAwait { + #[primary_span] + #[suggestion(parser::parentheses_suggestion, applicability = "machine-applicable")] + span: Span, +} + +#[derive(SessionDiagnostic)] +#[error(parser::incorrect_use_of_await)] +struct IncorrectAwait { + #[primary_span] + span: Span, + #[suggestion(parser::postfix_suggestion, code = "{expr}.await{question_mark}")] + sugg_span: (Span, Applicability), + expr: String, + question_mark: &'static str, +} + +#[derive(SessionDiagnostic)] +#[error(parser::in_in_typo)] +struct InInTypo { + #[primary_span] + span: Span, + #[suggestion(applicability = "machine-applicable")] + sugg_span: Span, +} + +// SnapshotParser is used to create a snapshot of the parser +// without causing duplicate errors being emitted when the `Parser` +// is dropped. +pub struct SnapshotParser<'a> { + parser: Parser<'a>, + unclosed_delims: Vec, +} + +impl<'a> Deref for SnapshotParser<'a> { + type Target = Parser<'a>; + + fn deref(&self) -> &Self::Target { + &self.parser + } +} + +impl<'a> DerefMut for SnapshotParser<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.parser + } +} + +impl<'a> Parser<'a> { + #[rustc_lint_diagnostics] + pub(super) fn span_err>( + &self, + sp: S, + err: Error, + ) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + err.span_err(sp, self.diagnostic()) + } + + #[rustc_lint_diagnostics] + pub fn struct_span_err>( + &self, + sp: S, + m: impl Into, + ) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + self.sess.span_diagnostic.struct_span_err(sp, m) + } + + pub fn span_bug>(&self, sp: S, m: impl Into) -> ! { + self.sess.span_diagnostic.span_bug(sp, m) + } + + pub(super) fn diagnostic(&self) -> &'a Handler { + &self.sess.span_diagnostic + } + + /// Replace `self` with `snapshot.parser` and extend `unclosed_delims` with `snapshot.unclosed_delims`. + /// This is to avoid losing unclosed delims errors `create_snapshot_for_diagnostic` clears. + pub(super) fn restore_snapshot(&mut self, snapshot: SnapshotParser<'a>) { + *self = snapshot.parser; + self.unclosed_delims.extend(snapshot.unclosed_delims.clone()); + } + + pub fn unclosed_delims(&self) -> &[UnmatchedBrace] { + &self.unclosed_delims + } + + /// Create a snapshot of the `Parser`. + pub fn create_snapshot_for_diagnostic(&self) -> SnapshotParser<'a> { + let mut snapshot = self.clone(); + let unclosed_delims = self.unclosed_delims.clone(); + // Clear `unclosed_delims` in snapshot to avoid + // duplicate errors being emitted when the `Parser` + // is dropped (which may or may not happen, depending + // if the parsing the snapshot is created for is successful) + snapshot.unclosed_delims.clear(); + SnapshotParser { parser: snapshot, unclosed_delims } + } + + pub(super) fn span_to_snippet(&self, span: Span) -> Result { + self.sess.source_map().span_to_snippet(span) + } + + pub(super) fn expected_ident_found(&self) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + let mut err = self.struct_span_err( + self.token.span, + &format!("expected identifier, found {}", super::token_descr(&self.token)), + ); + let valid_follow = &[ + TokenKind::Eq, + TokenKind::Colon, + TokenKind::Comma, + TokenKind::Semi, + TokenKind::ModSep, + TokenKind::OpenDelim(Delimiter::Brace), + TokenKind::OpenDelim(Delimiter::Parenthesis), + TokenKind::CloseDelim(Delimiter::Brace), + TokenKind::CloseDelim(Delimiter::Parenthesis), + ]; + match self.token.ident() { + Some((ident, false)) + if ident.is_raw_guess() + && self.look_ahead(1, |t| valid_follow.contains(&t.kind)) => + { + err.span_suggestion_verbose( + ident.span.shrink_to_lo(), + &format!("escape `{}` to use it as an identifier", ident.name), + "r#", + Applicability::MaybeIncorrect, + ); + } + _ => {} + } + if let Some(token_descr) = super::token_descr_opt(&self.token) { + err.span_label(self.token.span, format!("expected identifier, found {}", token_descr)); + } else { + err.span_label(self.token.span, "expected identifier"); + if self.token == token::Comma && self.look_ahead(1, |t| t.is_ident()) { + err.span_suggestion( + self.token.span, + "remove this comma", + "", + Applicability::MachineApplicable, + ); + } + } + err + } + + pub(super) fn expected_one_of_not_found( + &mut self, + edible: &[TokenKind], + inedible: &[TokenKind], + ) -> PResult<'a, bool /* recovered */> { + debug!("expected_one_of_not_found(edible: {:?}, inedible: {:?})", edible, inedible); + fn tokens_to_string(tokens: &[TokenType]) -> String { + let mut i = tokens.iter(); + // This might be a sign we need a connect method on `Iterator`. + let b = i.next().map_or_else(String::new, |t| t.to_string()); + i.enumerate().fold(b, |mut b, (i, a)| { + if tokens.len() > 2 && i == tokens.len() - 2 { + b.push_str(", or "); + } else if tokens.len() == 2 && i == tokens.len() - 2 { + b.push_str(" or "); + } else { + b.push_str(", "); + } + b.push_str(&a.to_string()); + b + }) + } + + let mut expected = edible + .iter() + .map(|x| TokenType::Token(x.clone())) + .chain(inedible.iter().map(|x| TokenType::Token(x.clone()))) + .chain(self.expected_tokens.iter().cloned()) + .filter_map(|token| { + // filter out suggestions which suggest the same token which was found and deemed incorrect + fn is_ident_eq_keyword(found: &TokenKind, expected: &TokenType) -> bool { + if let TokenKind::Ident(current_sym, _) = found { + if let TokenType::Keyword(suggested_sym) = expected { + return current_sym == suggested_sym; + } + } + false + } + if token != parser::TokenType::Token(self.token.kind.clone()) { + let eq = is_ident_eq_keyword(&self.token.kind, &token); + // if the suggestion is a keyword and the found token is an ident, + // the content of which are equal to the suggestion's content, + // we can remove that suggestion (see the return None statement below) + + // if this isn't the case however, and the suggestion is a token the + // content of which is the same as the found token's, we remove it as well + if !eq { + if let TokenType::Token(kind) = &token { + if kind == &self.token.kind { + return None; + } + } + return Some(token); + } + } + return None; + }) + .collect::>(); + expected.sort_by_cached_key(|x| x.to_string()); + expected.dedup(); + + let sm = self.sess.source_map(); + let msg = format!("expected `;`, found {}", super::token_descr(&self.token)); + let appl = Applicability::MachineApplicable; + if expected.contains(&TokenType::Token(token::Semi)) { + if self.token.span == DUMMY_SP || self.prev_token.span == DUMMY_SP { + // Likely inside a macro, can't provide meaningful suggestions. + } else if !sm.is_multiline(self.prev_token.span.until(self.token.span)) { + // The current token is in the same line as the prior token, not recoverable. + } else if [token::Comma, token::Colon].contains(&self.token.kind) + && self.prev_token.kind == token::CloseDelim(Delimiter::Parenthesis) + { + // Likely typo: The current token is on a new line and is expected to be + // `.`, `;`, `?`, or an operator after a close delimiter token. + // + // let a = std::process::Command::new("echo") + // .arg("1") + // ,arg("2") + // ^ + // https://github.com/rust-lang/rust/issues/72253 + } else if self.look_ahead(1, |t| { + t == &token::CloseDelim(Delimiter::Brace) + || t.can_begin_expr() && t.kind != token::Colon + }) && [token::Comma, token::Colon].contains(&self.token.kind) + { + // Likely typo: `,` → `;` or `:` → `;`. This is triggered if the current token is + // either `,` or `:`, and the next token could either start a new statement or is a + // block close. For example: + // + // let x = 32: + // let y = 42; + self.bump(); + let sp = self.prev_token.span; + self.struct_span_err(sp, &msg) + .span_suggestion_short(sp, "change this to `;`", ";", appl) + .emit(); + return Ok(true); + } else if self.look_ahead(0, |t| { + t == &token::CloseDelim(Delimiter::Brace) + || (t.can_begin_expr() && t != &token::Semi && t != &token::Pound) + // Avoid triggering with too many trailing `#` in raw string. + || (sm.is_multiline( + self.prev_token.span.shrink_to_hi().until(self.token.span.shrink_to_lo()) + ) && t == &token::Pound) + }) && !expected.contains(&TokenType::Token(token::Comma)) + { + // Missing semicolon typo. This is triggered if the next token could either start a + // new statement or is a block close. For example: + // + // let x = 32 + // let y = 42; + let sp = self.prev_token.span.shrink_to_hi(); + self.struct_span_err(sp, &msg) + .span_label(self.token.span, "unexpected token") + .span_suggestion_short(sp, "add `;` here", ";", appl) + .emit(); + return Ok(true); + } + } + + let expect = tokens_to_string(&expected); + let actual = super::token_descr(&self.token); + let (msg_exp, (label_sp, label_exp)) = if expected.len() > 1 { + let short_expect = if expected.len() > 6 { + format!("{} possible tokens", expected.len()) + } else { + expect.clone() + }; + ( + format!("expected one of {expect}, found {actual}"), + (self.prev_token.span.shrink_to_hi(), format!("expected one of {short_expect}")), + ) + } else if expected.is_empty() { + ( + format!("unexpected token: {}", actual), + (self.prev_token.span, "unexpected token after this".to_string()), + ) + } else { + ( + format!("expected {expect}, found {actual}"), + (self.prev_token.span.shrink_to_hi(), format!("expected {expect}")), + ) + }; + self.last_unexpected_token_span = Some(self.token.span); + let mut err = self.struct_span_err(self.token.span, &msg_exp); + + if let TokenKind::Ident(symbol, _) = &self.prev_token.kind { + if symbol.as_str() == "public" { + err.span_suggestion_short( + self.prev_token.span, + "write `pub` instead of `public` to make the item public", + "pub", + appl, + ); + } + } + + // Add suggestion for a missing closing angle bracket if '>' is included in expected_tokens + // there are unclosed angle brackets + if self.unmatched_angle_bracket_count > 0 + && self.token.kind == TokenKind::Eq + && expected.iter().any(|tok| matches!(tok, TokenType::Token(TokenKind::Gt))) + { + err.span_label(self.prev_token.span, "maybe try to close unmatched angle bracket"); + } + + let sp = if self.token == token::Eof { + // This is EOF; don't want to point at the following char, but rather the last token. + self.prev_token.span + } else { + label_sp + }; + match self.recover_closing_delimiter( + &expected + .iter() + .filter_map(|tt| match tt { + TokenType::Token(t) => Some(t.clone()), + _ => None, + }) + .collect::>(), + err, + ) { + Err(e) => err = e, + Ok(recovered) => { + return Ok(recovered); + } + } + + if self.check_too_many_raw_str_terminators(&mut err) { + if expected.contains(&TokenType::Token(token::Semi)) && self.eat(&token::Semi) { + err.emit(); + return Ok(true); + } else { + return Err(err); + } + } + + if self.prev_token.span == DUMMY_SP { + // Account for macro context where the previous span might not be + // available to avoid incorrect output (#54841). + err.span_label(self.token.span, label_exp); + } else if !sm.is_multiline(self.token.span.shrink_to_hi().until(sp.shrink_to_lo())) { + // When the spans are in the same line, it means that the only content between + // them is whitespace, point at the found token in that case: + // + // X | () => { syntax error }; + // | ^^^^^ expected one of 8 possible tokens here + // + // instead of having: + // + // X | () => { syntax error }; + // | -^^^^^ unexpected token + // | | + // | expected one of 8 possible tokens here + err.span_label(self.token.span, label_exp); + } else { + err.span_label(sp, label_exp); + err.span_label(self.token.span, "unexpected token"); + } + self.maybe_annotate_with_ascription(&mut err, false); + Err(err) + } + + fn check_too_many_raw_str_terminators(&mut self, err: &mut Diagnostic) -> bool { + let sm = self.sess.source_map(); + match (&self.prev_token.kind, &self.token.kind) { + ( + TokenKind::Literal(Lit { + kind: LitKind::StrRaw(n_hashes) | LitKind::ByteStrRaw(n_hashes), + .. + }), + TokenKind::Pound, + ) if !sm.is_multiline( + self.prev_token.span.shrink_to_hi().until(self.token.span.shrink_to_lo()), + ) => + { + let n_hashes: u8 = *n_hashes; + err.set_primary_message("too many `#` when terminating raw string"); + let str_span = self.prev_token.span; + let mut span = self.token.span; + let mut count = 0; + while self.token.kind == TokenKind::Pound + && !sm.is_multiline(span.shrink_to_hi().until(self.token.span.shrink_to_lo())) + { + span = span.with_hi(self.token.span.hi()); + self.bump(); + count += 1; + } + err.set_span(span); + err.span_suggestion( + span, + &format!("remove the extra `#`{}", pluralize!(count)), + "", + Applicability::MachineApplicable, + ); + err.span_label( + str_span, + &format!("this raw string started with {n_hashes} `#`{}", pluralize!(n_hashes)), + ); + true + } + _ => false, + } + } + + pub fn maybe_suggest_struct_literal( + &mut self, + lo: Span, + s: BlockCheckMode, + ) -> Option>> { + if self.token.is_ident() && self.look_ahead(1, |t| t == &token::Colon) { + // We might be having a struct literal where people forgot to include the path: + // fn foo() -> Foo { + // field: value, + // } + let mut snapshot = self.create_snapshot_for_diagnostic(); + let path = + Path { segments: vec![], span: self.prev_token.span.shrink_to_lo(), tokens: None }; + let struct_expr = snapshot.parse_struct_expr(None, path, AttrVec::new(), false); + let block_tail = self.parse_block_tail(lo, s, AttemptLocalParseRecovery::No); + return Some(match (struct_expr, block_tail) { + (Ok(expr), Err(mut err)) => { + // We have encountered the following: + // fn foo() -> Foo { + // field: value, + // } + // Suggest: + // fn foo() -> Foo { Path { + // field: value, + // } } + err.delay_as_bug(); + self.struct_span_err( + expr.span, + fluent::parser::struct_literal_body_without_path, + ) + .multipart_suggestion( + fluent::parser::suggestion, + vec![ + (expr.span.shrink_to_lo(), "{ SomeStruct ".to_string()), + (expr.span.shrink_to_hi(), " }".to_string()), + ], + Applicability::MaybeIncorrect, + ) + .emit(); + self.restore_snapshot(snapshot); + let mut tail = self.mk_block( + vec![self.mk_stmt_err(expr.span)], + s, + lo.to(self.prev_token.span), + ); + tail.could_be_bare_literal = true; + Ok(tail) + } + (Err(err), Ok(tail)) => { + // We have a block tail that contains a somehow valid type ascription expr. + err.cancel(); + Ok(tail) + } + (Err(snapshot_err), Err(err)) => { + // We don't know what went wrong, emit the normal error. + snapshot_err.cancel(); + self.consume_block(Delimiter::Brace, ConsumeClosingDelim::Yes); + Err(err) + } + (Ok(_), Ok(mut tail)) => { + tail.could_be_bare_literal = true; + Ok(tail) + } + }); + } + None + } + + pub fn maybe_annotate_with_ascription( + &mut self, + err: &mut Diagnostic, + maybe_expected_semicolon: bool, + ) { + if let Some((sp, likely_path)) = self.last_type_ascription.take() { + let sm = self.sess.source_map(); + let next_pos = sm.lookup_char_pos(self.token.span.lo()); + let op_pos = sm.lookup_char_pos(sp.hi()); + + let allow_unstable = self.sess.unstable_features.is_nightly_build(); + + if likely_path { + err.span_suggestion( + sp, + "maybe write a path separator here", + "::", + if allow_unstable { + Applicability::MaybeIncorrect + } else { + Applicability::MachineApplicable + }, + ); + self.sess.type_ascription_path_suggestions.borrow_mut().insert(sp); + } else if op_pos.line != next_pos.line && maybe_expected_semicolon { + err.span_suggestion( + sp, + "try using a semicolon", + ";", + Applicability::MaybeIncorrect, + ); + } else if allow_unstable { + err.span_label(sp, "tried to parse a type due to this type ascription"); + } else { + err.span_label(sp, "tried to parse a type due to this"); + } + if allow_unstable { + // Give extra information about type ascription only if it's a nightly compiler. + err.note( + "`#![feature(type_ascription)]` lets you annotate an expression with a type: \ + `: `", + ); + if !likely_path { + // Avoid giving too much info when it was likely an unrelated typo. + err.note( + "see issue #23416 \ + for more information", + ); + } + } + } + } + + /// Eats and discards tokens until one of `kets` is encountered. Respects token trees, + /// passes through any errors encountered. Used for error recovery. + pub(super) fn eat_to_tokens(&mut self, kets: &[&TokenKind]) { + if let Err(err) = + self.parse_seq_to_before_tokens(kets, SeqSep::none(), TokenExpectType::Expect, |p| { + Ok(p.parse_token_tree()) + }) + { + err.cancel(); + } + } + + /// This function checks if there are trailing angle brackets and produces + /// a diagnostic to suggest removing them. + /// + /// ```ignore (diagnostic) + /// let _ = [1, 2, 3].into_iter().collect::>>>(); + /// ^^ help: remove extra angle brackets + /// ``` + /// + /// If `true` is returned, then trailing brackets were recovered, tokens were consumed + /// up until one of the tokens in 'end' was encountered, and an error was emitted. + pub(super) fn check_trailing_angle_brackets( + &mut self, + segment: &PathSegment, + end: &[&TokenKind], + ) -> bool { + // This function is intended to be invoked after parsing a path segment where there are two + // cases: + // + // 1. A specific token is expected after the path segment. + // eg. `x.foo(`, `x.foo::(` (parenthesis - method call), + // `Foo::`, or `Foo::::` (mod sep - continued path). + // 2. No specific token is expected after the path segment. + // eg. `x.foo` (field access) + // + // This function is called after parsing `.foo` and before parsing the token `end` (if + // present). This includes any angle bracket arguments, such as `.foo::` or + // `Foo::`. + + // We only care about trailing angle brackets if we previously parsed angle bracket + // arguments. This helps stop us incorrectly suggesting that extra angle brackets be + // removed in this case: + // + // `x.foo >> (3)` (where `x.foo` is a `u32` for example) + // + // This case is particularly tricky as we won't notice it just looking at the tokens - + // it will appear the same (in terms of upcoming tokens) as below (since the `::` will + // have already been parsed): + // + // `x.foo::>>(3)` + let parsed_angle_bracket_args = + segment.args.as_ref().map_or(false, |args| args.is_angle_bracketed()); + + debug!( + "check_trailing_angle_brackets: parsed_angle_bracket_args={:?}", + parsed_angle_bracket_args, + ); + if !parsed_angle_bracket_args { + return false; + } + + // Keep the span at the start so we can highlight the sequence of `>` characters to be + // removed. + let lo = self.token.span; + + // We need to look-ahead to see if we have `>` characters without moving the cursor forward + // (since we might have the field access case and the characters we're eating are + // actual operators and not trailing characters - ie `x.foo >> 3`). + let mut position = 0; + + // We can encounter `>` or `>>` tokens in any order, so we need to keep track of how + // many of each (so we can correctly pluralize our error messages) and continue to + // advance. + let mut number_of_shr = 0; + let mut number_of_gt = 0; + while self.look_ahead(position, |t| { + trace!("check_trailing_angle_brackets: t={:?}", t); + if *t == token::BinOp(token::BinOpToken::Shr) { + number_of_shr += 1; + true + } else if *t == token::Gt { + number_of_gt += 1; + true + } else { + false + } + }) { + position += 1; + } + + // If we didn't find any trailing `>` characters, then we have nothing to error about. + debug!( + "check_trailing_angle_brackets: number_of_gt={:?} number_of_shr={:?}", + number_of_gt, number_of_shr, + ); + if number_of_gt < 1 && number_of_shr < 1 { + return false; + } + + // Finally, double check that we have our end token as otherwise this is the + // second case. + if self.look_ahead(position, |t| { + trace!("check_trailing_angle_brackets: t={:?}", t); + end.contains(&&t.kind) + }) { + // Eat from where we started until the end token so that parsing can continue + // as if we didn't have those extra angle brackets. + self.eat_to_tokens(end); + let span = lo.until(self.token.span); + + let total_num_of_gt = number_of_gt + number_of_shr * 2; + self.struct_span_err( + span, + &format!("unmatched angle bracket{}", pluralize!(total_num_of_gt)), + ) + .span_suggestion( + span, + &format!("remove extra angle bracket{}", pluralize!(total_num_of_gt)), + "", + Applicability::MachineApplicable, + ) + .emit(); + return true; + } + false + } + + /// Check if a method call with an intended turbofish has been written without surrounding + /// angle brackets. + pub(super) fn check_turbofish_missing_angle_brackets(&mut self, segment: &mut PathSegment) { + if token::ModSep == self.token.kind && segment.args.is_none() { + let snapshot = self.create_snapshot_for_diagnostic(); + self.bump(); + let lo = self.token.span; + match self.parse_angle_args(None) { + Ok(args) => { + let span = lo.to(self.prev_token.span); + // Detect trailing `>` like in `x.collect::Vec<_>>()`. + let mut trailing_span = self.prev_token.span.shrink_to_hi(); + while self.token.kind == token::BinOp(token::Shr) + || self.token.kind == token::Gt + { + trailing_span = trailing_span.to(self.token.span); + self.bump(); + } + if self.token.kind == token::OpenDelim(Delimiter::Parenthesis) { + // Recover from bad turbofish: `foo.collect::Vec<_>()`. + let args = AngleBracketedArgs { args, span }.into(); + segment.args = args; + + self.struct_span_err( + span, + "generic parameters without surrounding angle brackets", + ) + .multipart_suggestion( + "surround the type parameters with angle brackets", + vec![ + (span.shrink_to_lo(), "<".to_string()), + (trailing_span, ">".to_string()), + ], + Applicability::MachineApplicable, + ) + .emit(); + } else { + // This doesn't look like an invalid turbofish, can't recover parse state. + self.restore_snapshot(snapshot); + } + } + Err(err) => { + // We couldn't parse generic parameters, unlikely to be a turbofish. Rely on + // generic parse error instead. + err.cancel(); + self.restore_snapshot(snapshot); + } + } + } + } + + /// When writing a turbofish with multiple type parameters missing the leading `::`, we will + /// encounter a parse error when encountering the first `,`. + pub(super) fn check_mistyped_turbofish_with_multiple_type_params( + &mut self, + mut e: DiagnosticBuilder<'a, ErrorGuaranteed>, + expr: &mut P, + ) -> PResult<'a, ()> { + if let ExprKind::Binary(binop, _, _) = &expr.kind + && let ast::BinOpKind::Lt = binop.node + && self.eat(&token::Comma) + { + let x = self.parse_seq_to_before_end( + &token::Gt, + SeqSep::trailing_allowed(token::Comma), + |p| p.parse_generic_arg(None), + ); + match x { + Ok((_, _, false)) => { + if self.eat(&token::Gt) { + e.span_suggestion_verbose( + binop.span.shrink_to_lo(), + TURBOFISH_SUGGESTION_STR, + "::", + Applicability::MaybeIncorrect, + ) + .emit(); + match self.parse_expr() { + Ok(_) => { + *expr = + self.mk_expr_err(expr.span.to(self.prev_token.span)); + return Ok(()); + } + Err(err) => { + *expr = self.mk_expr_err(expr.span); + err.cancel(); + } + } + } + } + Err(err) => { + err.cancel(); + } + _ => {} + } + } + Err(e) + } + + /// Check to see if a pair of chained operators looks like an attempt at chained comparison, + /// e.g. `1 < x <= 3`. If so, suggest either splitting the comparison into two, or + /// parenthesising the leftmost comparison. + fn attempt_chained_comparison_suggestion( + &mut self, + err: &mut Diagnostic, + inner_op: &Expr, + outer_op: &Spanned, + ) -> bool /* advanced the cursor */ { + if let ExprKind::Binary(op, ref l1, ref r1) = inner_op.kind { + if let ExprKind::Field(_, ident) = l1.kind + && ident.as_str().parse::().is_err() + && !matches!(r1.kind, ExprKind::Lit(_)) + { + // The parser has encountered `foo.bar y > z` and friends. + (BinOpKind::Gt, AssocOp::Greater | AssocOp::GreaterEqual) | + (BinOpKind::Ge, AssocOp::GreaterEqual | AssocOp::Greater) => { + let expr_to_str = |e: &Expr| { + self.span_to_snippet(e.span) + .unwrap_or_else(|_| pprust::expr_to_string(&e)) + }; + err.span_suggestion_verbose( + inner_op.span.shrink_to_hi(), + "split the comparison into two", + format!(" && {}", expr_to_str(&r1)), + Applicability::MaybeIncorrect, + ); + false // Keep the current parse behavior, where the AST is `(x < y) < z`. + } + // `x == y < z` + (BinOpKind::Eq, AssocOp::Less | AssocOp::LessEqual | AssocOp::Greater | AssocOp::GreaterEqual) => { + // Consume `z`/outer-op-rhs. + let snapshot = self.create_snapshot_for_diagnostic(); + match self.parse_expr() { + Ok(r2) => { + // We are sure that outer-op-rhs could be consumed, the suggestion is + // likely correct. + enclose(r1.span, r2.span); + true + } + Err(expr_err) => { + expr_err.cancel(); + self.restore_snapshot(snapshot); + false + } + } + } + // `x > y == z` + (BinOpKind::Lt | BinOpKind::Le | BinOpKind::Gt | BinOpKind::Ge, AssocOp::Equal) => { + let snapshot = self.create_snapshot_for_diagnostic(); + // At this point it is always valid to enclose the lhs in parentheses, no + // further checks are necessary. + match self.parse_expr() { + Ok(_) => { + enclose(l1.span, r1.span); + true + } + Err(expr_err) => { + expr_err.cancel(); + self.restore_snapshot(snapshot); + false + } + } + } + _ => false, + }; + } + false + } + + /// Produces an error if comparison operators are chained (RFC #558). + /// We only need to check the LHS, not the RHS, because all comparison ops have same + /// precedence (see `fn precedence`) and are left-associative (see `fn fixity`). + /// + /// This can also be hit if someone incorrectly writes `foo()` when they should have used + /// the turbofish (`foo::()`) syntax. We attempt some heuristic recovery if that is the + /// case. + /// + /// Keep in mind that given that `outer_op.is_comparison()` holds and comparison ops are left + /// associative we can infer that we have: + /// + /// ```text + /// outer_op + /// / \ + /// inner_op r2 + /// / \ + /// l1 r1 + /// ``` + pub(super) fn check_no_chained_comparison( + &mut self, + inner_op: &Expr, + outer_op: &Spanned, + ) -> PResult<'a, Option>> { + debug_assert!( + outer_op.node.is_comparison(), + "check_no_chained_comparison: {:?} is not comparison", + outer_op.node, + ); + + let mk_err_expr = + |this: &Self, span| Ok(Some(this.mk_expr(span, ExprKind::Err, AttrVec::new()))); + + match inner_op.kind { + ExprKind::Binary(op, ref l1, ref r1) if op.node.is_comparison() => { + let mut err = self.struct_span_err( + vec![op.span, self.prev_token.span], + "comparison operators cannot be chained", + ); + + let suggest = |err: &mut Diagnostic| { + err.span_suggestion_verbose( + op.span.shrink_to_lo(), + TURBOFISH_SUGGESTION_STR, + "::", + Applicability::MaybeIncorrect, + ); + }; + + // Include `<` to provide this recommendation even in a case like + // `Foo>>` + if op.node == BinOpKind::Lt && outer_op.node == AssocOp::Less + || outer_op.node == AssocOp::Greater + { + if outer_op.node == AssocOp::Less { + let snapshot = self.create_snapshot_for_diagnostic(); + self.bump(); + // So far we have parsed `foo(` or `foo< bar >::`, so we rewind the + // parser and bail out. + self.restore_snapshot(snapshot); + } + } + return if token::ModSep == self.token.kind { + // We have some certainty that this was a bad turbofish at this point. + // `foo< bar >::` + suggest(&mut err); + + let snapshot = self.create_snapshot_for_diagnostic(); + self.bump(); // `::` + + // Consume the rest of the likely `foo::new()` or return at `foo`. + match self.parse_expr() { + Ok(_) => { + // 99% certain that the suggestion is correct, continue parsing. + err.emit(); + // FIXME: actually check that the two expressions in the binop are + // paths and resynthesize new fn call expression instead of using + // `ExprKind::Err` placeholder. + mk_err_expr(self, inner_op.span.to(self.prev_token.span)) + } + Err(expr_err) => { + expr_err.cancel(); + // Not entirely sure now, but we bubble the error up with the + // suggestion. + self.restore_snapshot(snapshot); + Err(err) + } + } + } else if token::OpenDelim(Delimiter::Parenthesis) == self.token.kind { + // We have high certainty that this was a bad turbofish at this point. + // `foo< bar >(` + suggest(&mut err); + // Consume the fn call arguments. + match self.consume_fn_args() { + Err(()) => Err(err), + Ok(()) => { + err.emit(); + // FIXME: actually check that the two expressions in the binop are + // paths and resynthesize new fn call expression instead of using + // `ExprKind::Err` placeholder. + mk_err_expr(self, inner_op.span.to(self.prev_token.span)) + } + } + } else { + if !matches!(l1.kind, ExprKind::Lit(_)) + && !matches!(r1.kind, ExprKind::Lit(_)) + { + // All we know is that this is `foo < bar >` and *nothing* else. Try to + // be helpful, but don't attempt to recover. + err.help(TURBOFISH_SUGGESTION_STR); + err.help("or use `(...)` if you meant to specify fn arguments"); + } + + // If it looks like a genuine attempt to chain operators (as opposed to a + // misformatted turbofish, for instance), suggest a correct form. + if self.attempt_chained_comparison_suggestion(&mut err, inner_op, outer_op) + { + err.emit(); + mk_err_expr(self, inner_op.span.to(self.prev_token.span)) + } else { + // These cases cause too many knock-down errors, bail out (#61329). + Err(err) + } + }; + } + let recover = + self.attempt_chained_comparison_suggestion(&mut err, inner_op, outer_op); + err.emit(); + if recover { + return mk_err_expr(self, inner_op.span.to(self.prev_token.span)); + } + } + _ => {} + } + Ok(None) + } + + fn consume_fn_args(&mut self) -> Result<(), ()> { + let snapshot = self.create_snapshot_for_diagnostic(); + self.bump(); // `(` + + // Consume the fn call arguments. + let modifiers = [ + (token::OpenDelim(Delimiter::Parenthesis), 1), + (token::CloseDelim(Delimiter::Parenthesis), -1), + ]; + self.consume_tts(1, &modifiers); + + if self.token.kind == token::Eof { + // Not entirely sure that what we consumed were fn arguments, rollback. + self.restore_snapshot(snapshot); + Err(()) + } else { + // 99% certain that the suggestion is correct, continue parsing. + Ok(()) + } + } + + pub(super) fn maybe_report_ambiguous_plus(&mut self, impl_dyn_multi: bool, ty: &Ty) { + if impl_dyn_multi { + self.sess.emit_err(AmbiguousPlus { sum_ty: pprust::ty_to_string(&ty), span: ty.span }); + } + } + + /// Swift lets users write `Ty?` to mean `Option`. Parse the construct and recover from it. + pub(super) fn maybe_recover_from_question_mark(&mut self, ty: P) -> P { + if self.token == token::Question { + self.bump(); + self.struct_span_err(self.prev_token.span, "invalid `?` in type") + .span_label(self.prev_token.span, "`?` is only allowed on expressions, not types") + .multipart_suggestion( + "if you meant to express that the type might not contain a value, use the `Option` wrapper type", + vec![ + (ty.span.shrink_to_lo(), "Option<".to_string()), + (self.prev_token.span, ">".to_string()), + ], + Applicability::MachineApplicable, + ) + .emit(); + self.mk_ty(ty.span.to(self.prev_token.span), TyKind::Err) + } else { + ty + } + } + + pub(super) fn maybe_recover_from_bad_type_plus(&mut self, ty: &Ty) -> PResult<'a, ()> { + // Do not add `+` to expected tokens. + if !self.token.is_like_plus() { + return Ok(()); + } + + self.bump(); // `+` + let bounds = self.parse_generic_bounds(None)?; + let sum_span = ty.span.to(self.prev_token.span); + + let sub = match ty.kind { + TyKind::Rptr(ref lifetime, ref mut_ty) => { + let sum_with_parens = pprust::to_string(|s| { + s.s.word("&"); + s.print_opt_lifetime(lifetime); + s.print_mutability(mut_ty.mutbl, false); + s.popen(); + s.print_type(&mut_ty.ty); + if !bounds.is_empty() { + s.word(" + "); + s.print_type_bounds(&bounds); + } + s.pclose() + }); + + BadTypePlusSub::AddParen { sum_with_parens, span: sum_span } + } + TyKind::Ptr(..) | TyKind::BareFn(..) => BadTypePlusSub::ForgotParen { span: sum_span }, + _ => BadTypePlusSub::ExpectPath { span: sum_span }, + }; + + self.sess.emit_err(BadTypePlus { ty: pprust::ty_to_string(ty), span: sum_span, sub }); + + Ok(()) + } + + pub(super) fn recover_from_prefix_increment( + &mut self, + operand_expr: P, + op_span: Span, + prev_is_semi: bool, + ) -> PResult<'a, P> { + let standalone = + if prev_is_semi { IsStandalone::Standalone } else { IsStandalone::Subexpr }; + let kind = IncDecRecovery { standalone, op: IncOrDec::Inc, fixity: UnaryFixity::Pre }; + + self.recover_from_inc_dec(operand_expr, kind, op_span) + } + + pub(super) fn recover_from_postfix_increment( + &mut self, + operand_expr: P, + op_span: Span, + ) -> PResult<'a, P> { + let kind = IncDecRecovery { + standalone: IsStandalone::Maybe, + op: IncOrDec::Inc, + fixity: UnaryFixity::Post, + }; + + self.recover_from_inc_dec(operand_expr, kind, op_span) + } + + fn recover_from_inc_dec( + &mut self, + base: P, + kind: IncDecRecovery, + op_span: Span, + ) -> PResult<'a, P> { + let mut err = self.struct_span_err( + op_span, + &format!("Rust has no {} {} operator", kind.fixity, kind.op.name()), + ); + err.span_label(op_span, &format!("not a valid {} operator", kind.fixity)); + + let help_base_case = |mut err: DiagnosticBuilder<'_, _>, base| { + err.help(&format!("use `{}= 1` instead", kind.op.chr())); + err.emit(); + Ok(base) + }; + + // (pre, post) + let spans = match kind.fixity { + UnaryFixity::Pre => (op_span, base.span.shrink_to_hi()), + UnaryFixity::Post => (base.span.shrink_to_lo(), op_span), + }; + + match kind.standalone { + IsStandalone::Standalone => self.inc_dec_standalone_suggest(kind, spans).emit(&mut err), + IsStandalone::Subexpr => { + let Ok(base_src) = self.span_to_snippet(base.span) + else { return help_base_case(err, base) }; + match kind.fixity { + UnaryFixity::Pre => { + self.prefix_inc_dec_suggest(base_src, kind, spans).emit(&mut err) + } + UnaryFixity::Post => { + self.postfix_inc_dec_suggest(base_src, kind, spans).emit(&mut err) + } + } + } + IsStandalone::Maybe => { + let Ok(base_src) = self.span_to_snippet(base.span) + else { return help_base_case(err, base) }; + let sugg1 = match kind.fixity { + UnaryFixity::Pre => self.prefix_inc_dec_suggest(base_src, kind, spans), + UnaryFixity::Post => self.postfix_inc_dec_suggest(base_src, kind, spans), + }; + let sugg2 = self.inc_dec_standalone_suggest(kind, spans); + MultiSugg::emit_many( + &mut err, + "use `+= 1` instead", + Applicability::Unspecified, + [sugg1, sugg2].into_iter(), + ) + } + } + Err(err) + } + + fn prefix_inc_dec_suggest( + &mut self, + base_src: String, + kind: IncDecRecovery, + (pre_span, post_span): (Span, Span), + ) -> MultiSugg { + MultiSugg { + msg: format!("use `{}= 1` instead", kind.op.chr()), + patches: vec![ + (pre_span, "{ ".to_string()), + (post_span, format!(" {}= 1; {} }}", kind.op.chr(), base_src)), + ], + applicability: Applicability::MachineApplicable, + } + } + + fn postfix_inc_dec_suggest( + &mut self, + base_src: String, + kind: IncDecRecovery, + (pre_span, post_span): (Span, Span), + ) -> MultiSugg { + let tmp_var = if base_src.trim() == "tmp" { "tmp_" } else { "tmp" }; + MultiSugg { + msg: format!("use `{}= 1` instead", kind.op.chr()), + patches: vec![ + (pre_span, format!("{{ let {} = ", tmp_var)), + (post_span, format!("; {} {}= 1; {} }}", base_src, kind.op.chr(), tmp_var)), + ], + applicability: Applicability::HasPlaceholders, + } + } + + fn inc_dec_standalone_suggest( + &mut self, + kind: IncDecRecovery, + (pre_span, post_span): (Span, Span), + ) -> MultiSugg { + MultiSugg { + msg: format!("use `{}= 1` instead", kind.op.chr()), + patches: vec![(pre_span, String::new()), (post_span, format!(" {}= 1", kind.op.chr()))], + applicability: Applicability::MachineApplicable, + } + } + + /// Tries to recover from associated item paths like `[T]::AssocItem` / `(T, U)::AssocItem`. + /// Attempts to convert the base expression/pattern/type into a type, parses the `::AssocItem` + /// tail, and combines them into a `::AssocItem` expression/pattern/type. + pub(super) fn maybe_recover_from_bad_qpath( + &mut self, + base: P, + ) -> PResult<'a, P> { + // Do not add `::` to expected tokens. + if self.token == token::ModSep { + if let Some(ty) = base.to_ty() { + return self.maybe_recover_from_bad_qpath_stage_2(ty.span, ty); + } + } + Ok(base) + } + + /// Given an already parsed `Ty`, parses the `::AssocItem` tail and + /// combines them into a `::AssocItem` expression/pattern/type. + pub(super) fn maybe_recover_from_bad_qpath_stage_2( + &mut self, + ty_span: Span, + ty: P, + ) -> PResult<'a, P> { + self.expect(&token::ModSep)?; + + let mut path = ast::Path { segments: Vec::new(), span: DUMMY_SP, tokens: None }; + self.parse_path_segments(&mut path.segments, T::PATH_STYLE, None)?; + path.span = ty_span.to(self.prev_token.span); + + let ty_str = self.span_to_snippet(ty_span).unwrap_or_else(|_| pprust::ty_to_string(&ty)); + self.sess.emit_err(BadQPathStage2 { + span: path.span, + ty: format!("<{}>::{}", ty_str, pprust::path_to_string(&path)), + }); + + let path_span = ty_span.shrink_to_hi(); // Use an empty path since `position == 0`. + Ok(P(T::recovered(Some(QSelf { ty, path_span, position: 0 }), path))) + } + + pub fn maybe_consume_incorrect_semicolon(&mut self, items: &[P]) -> bool { + if self.token.kind == TokenKind::Semi { + self.bump(); + + let mut err = + IncorrectSemicolon { span: self.prev_token.span, opt_help: None, name: "" }; + + if !items.is_empty() { + let previous_item = &items[items.len() - 1]; + let previous_item_kind_name = match previous_item.kind { + // Say "braced struct" because tuple-structs and + // braceless-empty-struct declarations do take a semicolon. + ItemKind::Struct(..) => Some("braced struct"), + ItemKind::Enum(..) => Some("enum"), + ItemKind::Trait(..) => Some("trait"), + ItemKind::Union(..) => Some("union"), + _ => None, + }; + if let Some(name) = previous_item_kind_name { + err.opt_help = Some(()); + err.name = name; + } + } + self.sess.emit_err(err); + true + } else { + false + } + } + + /// Creates a `DiagnosticBuilder` for an unexpected token `t` and tries to recover if it is a + /// closing delimiter. + pub(super) fn unexpected_try_recover( + &mut self, + t: &TokenKind, + ) -> PResult<'a, bool /* recovered */> { + let token_str = pprust::token_kind_to_string(t); + let this_token_str = super::token_descr(&self.token); + let (prev_sp, sp) = match (&self.token.kind, self.subparser_name) { + // Point at the end of the macro call when reaching end of macro arguments. + (token::Eof, Some(_)) => { + let sp = self.sess.source_map().next_point(self.prev_token.span); + (sp, sp) + } + // We don't want to point at the following span after DUMMY_SP. + // This happens when the parser finds an empty TokenStream. + _ if self.prev_token.span == DUMMY_SP => (self.token.span, self.token.span), + // EOF, don't want to point at the following char, but rather the last token. + (token::Eof, None) => (self.prev_token.span, self.token.span), + _ => (self.prev_token.span.shrink_to_hi(), self.token.span), + }; + let msg = format!( + "expected `{}`, found {}", + token_str, + match (&self.token.kind, self.subparser_name) { + (token::Eof, Some(origin)) => format!("end of {origin}"), + _ => this_token_str, + }, + ); + let mut err = self.struct_span_err(sp, &msg); + let label_exp = format!("expected `{token_str}`"); + match self.recover_closing_delimiter(&[t.clone()], err) { + Err(e) => err = e, + Ok(recovered) => { + return Ok(recovered); + } + } + let sm = self.sess.source_map(); + if !sm.is_multiline(prev_sp.until(sp)) { + // When the spans are in the same line, it means that the only content + // between them is whitespace, point only at the found token. + err.span_label(sp, label_exp); + } else { + err.span_label(prev_sp, label_exp); + err.span_label(sp, "unexpected token"); + } + Err(err) + } + + pub(super) fn expect_semi(&mut self) -> PResult<'a, ()> { + if self.eat(&token::Semi) { + return Ok(()); + } + self.expect(&token::Semi).map(drop) // Error unconditionally + } + + /// Consumes alternative await syntaxes like `await!()`, `await `, + /// `await? `, `await()`, and `await { }`. + pub(super) fn recover_incorrect_await_syntax( + &mut self, + lo: Span, + await_sp: Span, + attrs: AttrVec, + ) -> PResult<'a, P> { + let (hi, expr, is_question) = if self.token == token::Not { + // Handle `await!()`. + self.recover_await_macro()? + } else { + self.recover_await_prefix(await_sp)? + }; + let sp = self.error_on_incorrect_await(lo, hi, &expr, is_question); + let kind = match expr.kind { + // Avoid knock-down errors as we don't know whether to interpret this as `foo().await?` + // or `foo()?.await` (the very reason we went with postfix syntax 😅). + ExprKind::Try(_) => ExprKind::Err, + _ => ExprKind::Await(expr), + }; + let expr = self.mk_expr(lo.to(sp), kind, attrs); + self.maybe_recover_from_bad_qpath(expr) + } + + fn recover_await_macro(&mut self) -> PResult<'a, (Span, P, bool)> { + self.expect(&token::Not)?; + self.expect(&token::OpenDelim(Delimiter::Parenthesis))?; + let expr = self.parse_expr()?; + self.expect(&token::CloseDelim(Delimiter::Parenthesis))?; + Ok((self.prev_token.span, expr, false)) + } + + fn recover_await_prefix(&mut self, await_sp: Span) -> PResult<'a, (Span, P, bool)> { + let is_question = self.eat(&token::Question); // Handle `await? `. + let expr = if self.token == token::OpenDelim(Delimiter::Brace) { + // Handle `await { }`. + // This needs to be handled separately from the next arm to avoid + // interpreting `await { }?` as `?.await`. + self.parse_block_expr(None, self.token.span, BlockCheckMode::Default, AttrVec::new()) + } else { + self.parse_expr() + } + .map_err(|mut err| { + err.span_label(await_sp, "while parsing this incorrect await expression"); + err + })?; + Ok((expr.span, expr, is_question)) + } + + fn error_on_incorrect_await(&self, lo: Span, hi: Span, expr: &Expr, is_question: bool) -> Span { + let span = lo.to(hi); + let applicability = match expr.kind { + ExprKind::Try(_) => Applicability::MaybeIncorrect, // `await ?` + _ => Applicability::MachineApplicable, + }; + + self.sess.emit_err(IncorrectAwait { + span, + sugg_span: (span, applicability), + expr: self.span_to_snippet(expr.span).unwrap_or_else(|_| pprust::expr_to_string(&expr)), + question_mark: if is_question { "?" } else { "" }, + }); + + span + } + + /// If encountering `future.await()`, consumes and emits an error. + pub(super) fn recover_from_await_method_call(&mut self) { + if self.token == token::OpenDelim(Delimiter::Parenthesis) + && self.look_ahead(1, |t| t == &token::CloseDelim(Delimiter::Parenthesis)) + { + // future.await() + let lo = self.token.span; + self.bump(); // ( + let span = lo.to(self.token.span); + self.bump(); // ) + + self.sess.emit_err(IncorrectUseOfAwait { span }); + } + } + + pub(super) fn try_macro_suggestion(&mut self) -> PResult<'a, P> { + let is_try = self.token.is_keyword(kw::Try); + let is_questionmark = self.look_ahead(1, |t| t == &token::Not); //check for ! + let is_open = self.look_ahead(2, |t| t == &token::OpenDelim(Delimiter::Parenthesis)); //check for ( + + if is_try && is_questionmark && is_open { + let lo = self.token.span; + self.bump(); //remove try + self.bump(); //remove ! + let try_span = lo.to(self.token.span); //we take the try!( span + self.bump(); //remove ( + let is_empty = self.token == token::CloseDelim(Delimiter::Parenthesis); //check if the block is empty + self.consume_block(Delimiter::Parenthesis, ConsumeClosingDelim::No); //eat the block + let hi = self.token.span; + self.bump(); //remove ) + let mut err = self.struct_span_err(lo.to(hi), "use of deprecated `try` macro"); + err.note("in the 2018 edition `try` is a reserved keyword, and the `try!()` macro is deprecated"); + let prefix = if is_empty { "" } else { "alternatively, " }; + if !is_empty { + err.multipart_suggestion( + "you can use the `?` operator instead", + vec![(try_span, "".to_owned()), (hi, "?".to_owned())], + Applicability::MachineApplicable, + ); + } + err.span_suggestion(lo.shrink_to_lo(), &format!("{prefix}you can still access the deprecated `try!()` macro using the \"raw identifier\" syntax"), "r#", Applicability::MachineApplicable); + err.emit(); + Ok(self.mk_expr_err(lo.to(hi))) + } else { + Err(self.expected_expression_found()) // The user isn't trying to invoke the try! macro + } + } + + /// Recovers a situation like `for ( $pat in $expr )` + /// and suggest writing `for $pat in $expr` instead. + /// + /// This should be called before parsing the `$block`. + pub(super) fn recover_parens_around_for_head( + &mut self, + pat: P, + begin_paren: Option, + ) -> P { + match (&self.token.kind, begin_paren) { + (token::CloseDelim(Delimiter::Parenthesis), Some(begin_par_sp)) => { + self.bump(); + + self.struct_span_err( + MultiSpan::from_spans(vec![begin_par_sp, self.prev_token.span]), + "unexpected parentheses surrounding `for` loop head", + ) + .multipart_suggestion( + "remove parentheses in `for` loop", + vec![(begin_par_sp, String::new()), (self.prev_token.span, String::new())], + // With e.g. `for (x) in y)` this would replace `(x) in y)` + // with `x) in y)` which is syntactically invalid. + // However, this is prevented before we get here. + Applicability::MachineApplicable, + ) + .emit(); + + // Unwrap `(pat)` into `pat` to avoid the `unused_parens` lint. + pat.and_then(|pat| match pat.kind { + PatKind::Paren(pat) => pat, + _ => P(pat), + }) + } + _ => pat, + } + } + + pub(super) fn could_ascription_be_path(&self, node: &ast::ExprKind) -> bool { + (self.token == token::Lt && // `foo:` + } + + pub(super) fn recover_seq_parse_error( + &mut self, + delim: Delimiter, + lo: Span, + result: PResult<'a, P>, + ) -> P { + match result { + Ok(x) => x, + Err(mut err) => { + err.emit(); + // Recover from parse error, callers expect the closing delim to be consumed. + self.consume_block(delim, ConsumeClosingDelim::Yes); + self.mk_expr(lo.to(self.prev_token.span), ExprKind::Err, AttrVec::new()) + } + } + } + + pub(super) fn recover_closing_delimiter( + &mut self, + tokens: &[TokenKind], + mut err: DiagnosticBuilder<'a, ErrorGuaranteed>, + ) -> PResult<'a, bool> { + let mut pos = None; + // We want to use the last closing delim that would apply. + for (i, unmatched) in self.unclosed_delims.iter().enumerate().rev() { + if tokens.contains(&token::CloseDelim(unmatched.expected_delim)) + && Some(self.token.span) > unmatched.unclosed_span + { + pos = Some(i); + } + } + match pos { + Some(pos) => { + // Recover and assume that the detected unclosed delimiter was meant for + // this location. Emit the diagnostic and act as if the delimiter was + // present for the parser's sake. + + // Don't attempt to recover from this unclosed delimiter more than once. + let unmatched = self.unclosed_delims.remove(pos); + let delim = TokenType::Token(token::CloseDelim(unmatched.expected_delim)); + if unmatched.found_delim.is_none() { + // We encountered `Eof`, set this fact here to avoid complaining about missing + // `fn main()` when we found place to suggest the closing brace. + *self.sess.reached_eof.borrow_mut() = true; + } + + // We want to suggest the inclusion of the closing delimiter where it makes + // the most sense, which is immediately after the last token: + // + // {foo(bar {}} + // ^ ^ + // | | + // | help: `)` may belong here + // | + // unclosed delimiter + if let Some(sp) = unmatched.unclosed_span { + let mut primary_span: Vec = + err.span.primary_spans().iter().cloned().collect(); + primary_span.push(sp); + let mut primary_span: MultiSpan = primary_span.into(); + for span_label in err.span.span_labels() { + if let Some(label) = span_label.label { + primary_span.push_span_label(span_label.span, label); + } + } + err.set_span(primary_span); + err.span_label(sp, "unclosed delimiter"); + } + // Backticks should be removed to apply suggestions. + let mut delim = delim.to_string(); + delim.retain(|c| c != '`'); + err.span_suggestion_short( + self.prev_token.span.shrink_to_hi(), + &format!("`{delim}` may belong here"), + delim, + Applicability::MaybeIncorrect, + ); + if unmatched.found_delim.is_none() { + // Encountered `Eof` when lexing blocks. Do not recover here to avoid knockdown + // errors which would be emitted elsewhere in the parser and let other error + // recovery consume the rest of the file. + Err(err) + } else { + err.emit(); + self.expected_tokens.clear(); // Reduce the number of errors. + Ok(true) + } + } + _ => Err(err), + } + } + + /// Eats tokens until we can be relatively sure we reached the end of the + /// statement. This is something of a best-effort heuristic. + /// + /// We terminate when we find an unmatched `}` (without consuming it). + pub(super) fn recover_stmt(&mut self) { + self.recover_stmt_(SemiColonMode::Ignore, BlockMode::Ignore) + } + + /// If `break_on_semi` is `Break`, then we will stop consuming tokens after + /// finding (and consuming) a `;` outside of `{}` or `[]` (note that this is + /// approximate -- it can mean we break too early due to macros, but that + /// should only lead to sub-optimal recovery, not inaccurate parsing). + /// + /// If `break_on_block` is `Break`, then we will stop consuming tokens + /// after finding (and consuming) a brace-delimited block. + pub(super) fn recover_stmt_( + &mut self, + break_on_semi: SemiColonMode, + break_on_block: BlockMode, + ) { + let mut brace_depth = 0; + let mut bracket_depth = 0; + let mut in_block = false; + debug!("recover_stmt_ enter loop (semi={:?}, block={:?})", break_on_semi, break_on_block); + loop { + debug!("recover_stmt_ loop {:?}", self.token); + match self.token.kind { + token::OpenDelim(Delimiter::Brace) => { + brace_depth += 1; + self.bump(); + if break_on_block == BlockMode::Break && brace_depth == 1 && bracket_depth == 0 + { + in_block = true; + } + } + token::OpenDelim(Delimiter::Bracket) => { + bracket_depth += 1; + self.bump(); + } + token::CloseDelim(Delimiter::Brace) => { + if brace_depth == 0 { + debug!("recover_stmt_ return - close delim {:?}", self.token); + break; + } + brace_depth -= 1; + self.bump(); + if in_block && bracket_depth == 0 && brace_depth == 0 { + debug!("recover_stmt_ return - block end {:?}", self.token); + break; + } + } + token::CloseDelim(Delimiter::Bracket) => { + bracket_depth -= 1; + if bracket_depth < 0 { + bracket_depth = 0; + } + self.bump(); + } + token::Eof => { + debug!("recover_stmt_ return - Eof"); + break; + } + token::Semi => { + self.bump(); + if break_on_semi == SemiColonMode::Break + && brace_depth == 0 + && bracket_depth == 0 + { + debug!("recover_stmt_ return - Semi"); + break; + } + } + token::Comma + if break_on_semi == SemiColonMode::Comma + && brace_depth == 0 + && bracket_depth == 0 => + { + debug!("recover_stmt_ return - Semi"); + break; + } + _ => self.bump(), + } + } + } + + pub(super) fn check_for_for_in_in_typo(&mut self, in_span: Span) { + if self.eat_keyword(kw::In) { + // a common typo: `for _ in in bar {}` + self.sess.emit_err(InInTypo { + span: self.prev_token.span, + sugg_span: in_span.until(self.prev_token.span), + }); + } + } + + pub(super) fn eat_incorrect_doc_comment_for_param_type(&mut self) { + if let token::DocComment(..) = self.token.kind { + self.struct_span_err( + self.token.span, + "documentation comments cannot be applied to a function parameter's type", + ) + .span_label(self.token.span, "doc comments are not allowed here") + .emit(); + self.bump(); + } else if self.token == token::Pound + && self.look_ahead(1, |t| *t == token::OpenDelim(Delimiter::Bracket)) + { + let lo = self.token.span; + // Skip every token until next possible arg. + while self.token != token::CloseDelim(Delimiter::Bracket) { + self.bump(); + } + let sp = lo.to(self.token.span); + self.bump(); + self.struct_span_err(sp, "attributes cannot be applied to a function parameter's type") + .span_label(sp, "attributes are not allowed here") + .emit(); + } + } + + pub(super) fn parameter_without_type( + &mut self, + err: &mut Diagnostic, + pat: P, + require_name: bool, + first_param: bool, + ) -> Option { + // If we find a pattern followed by an identifier, it could be an (incorrect) + // C-style parameter declaration. + if self.check_ident() + && self.look_ahead(1, |t| { + *t == token::Comma || *t == token::CloseDelim(Delimiter::Parenthesis) + }) + { + // `fn foo(String s) {}` + let ident = self.parse_ident().unwrap(); + let span = pat.span.with_hi(ident.span.hi()); + + err.span_suggestion( + span, + "declare the type after the parameter binding", + ": ", + Applicability::HasPlaceholders, + ); + return Some(ident); + } else if require_name + && (self.token == token::Comma + || self.token == token::Lt + || self.token == token::CloseDelim(Delimiter::Parenthesis)) + { + let rfc_note = "anonymous parameters are removed in the 2018 edition (see RFC 1685)"; + + let (ident, self_sugg, param_sugg, type_sugg, self_span, param_span, type_span) = + match pat.kind { + PatKind::Ident(_, ident, _) => ( + ident, + "self: ", + ": TypeName".to_string(), + "_: ", + pat.span.shrink_to_lo(), + pat.span.shrink_to_hi(), + pat.span.shrink_to_lo(), + ), + // Also catches `fn foo(&a)`. + PatKind::Ref(ref inner_pat, mutab) + if matches!(inner_pat.clone().into_inner().kind, PatKind::Ident(..)) => + { + match inner_pat.clone().into_inner().kind { + PatKind::Ident(_, ident, _) => { + let mutab = mutab.prefix_str(); + ( + ident, + "self: ", + format!("{ident}: &{mutab}TypeName"), + "_: ", + pat.span.shrink_to_lo(), + pat.span, + pat.span.shrink_to_lo(), + ) + } + _ => unreachable!(), + } + } + _ => { + // Otherwise, try to get a type and emit a suggestion. + if let Some(ty) = pat.to_ty() { + err.span_suggestion_verbose( + pat.span, + "explicitly ignore the parameter name", + format!("_: {}", pprust::ty_to_string(&ty)), + Applicability::MachineApplicable, + ); + err.note(rfc_note); + } + + return None; + } + }; + + // `fn foo(a, b) {}`, `fn foo(a, b) {}` or `fn foo(usize, usize) {}` + if first_param { + err.span_suggestion( + self_span, + "if this is a `self` type, give it a parameter name", + self_sugg, + Applicability::MaybeIncorrect, + ); + } + // Avoid suggesting that `fn foo(HashMap)` is fixed with a change to + // `fn foo(HashMap: TypeName)`. + if self.token != token::Lt { + err.span_suggestion( + param_span, + "if this is a parameter name, give it a type", + param_sugg, + Applicability::HasPlaceholders, + ); + } + err.span_suggestion( + type_span, + "if this is a type, explicitly ignore the parameter name", + type_sugg, + Applicability::MachineApplicable, + ); + err.note(rfc_note); + + // Don't attempt to recover by using the `X` in `X` as the parameter name. + return if self.token == token::Lt { None } else { Some(ident) }; + } + None + } + + pub(super) fn recover_arg_parse(&mut self) -> PResult<'a, (P, P)> { + let pat = self.parse_pat_no_top_alt(Some("argument name"))?; + self.expect(&token::Colon)?; + let ty = self.parse_ty()?; + + struct_span_err!( + self.diagnostic(), + pat.span, + E0642, + "patterns aren't allowed in methods without bodies", + ) + .span_suggestion_short( + pat.span, + "give this argument a name or use an underscore to ignore it", + "_", + Applicability::MachineApplicable, + ) + .emit(); + + // Pretend the pattern is `_`, to avoid duplicate errors from AST validation. + let pat = + P(Pat { kind: PatKind::Wild, span: pat.span, id: ast::DUMMY_NODE_ID, tokens: None }); + Ok((pat, ty)) + } + + pub(super) fn recover_bad_self_param(&mut self, mut param: Param) -> PResult<'a, Param> { + let sp = param.pat.span; + param.ty.kind = TyKind::Err; + self.struct_span_err(sp, "unexpected `self` parameter in function") + .span_label(sp, "must be the first parameter of an associated function") + .emit(); + Ok(param) + } + + pub(super) fn consume_block(&mut self, delim: Delimiter, consume_close: ConsumeClosingDelim) { + let mut brace_depth = 0; + loop { + if self.eat(&token::OpenDelim(delim)) { + brace_depth += 1; + } else if self.check(&token::CloseDelim(delim)) { + if brace_depth == 0 { + if let ConsumeClosingDelim::Yes = consume_close { + // Some of the callers of this method expect to be able to parse the + // closing delimiter themselves, so we leave it alone. Otherwise we advance + // the parser. + self.bump(); + } + return; + } else { + self.bump(); + brace_depth -= 1; + continue; + } + } else if self.token == token::Eof { + return; + } else { + self.bump(); + } + } + } + + pub(super) fn expected_expression_found(&self) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + let (span, msg) = match (&self.token.kind, self.subparser_name) { + (&token::Eof, Some(origin)) => { + let sp = self.sess.source_map().next_point(self.prev_token.span); + (sp, format!("expected expression, found end of {origin}")) + } + _ => ( + self.token.span, + format!("expected expression, found {}", super::token_descr(&self.token),), + ), + }; + let mut err = self.struct_span_err(span, &msg); + let sp = self.sess.source_map().start_point(self.token.span); + if let Some(sp) = self.sess.ambiguous_block_expr_parse.borrow().get(&sp) { + self.sess.expr_parentheses_needed(&mut err, *sp); + } + err.span_label(span, "expected expression"); + err + } + + fn consume_tts( + &mut self, + mut acc: i64, // `i64` because malformed code can have more closing delims than opening. + // Not using `FxHashMap` due to `token::TokenKind: !Eq + !Hash`. + modifier: &[(token::TokenKind, i64)], + ) { + while acc > 0 { + if let Some((_, val)) = modifier.iter().find(|(t, _)| *t == self.token.kind) { + acc += *val; + } + if self.token.kind == token::Eof { + break; + } + self.bump(); + } + } + + /// Replace duplicated recovered parameters with `_` pattern to avoid unnecessary errors. + /// + /// This is necessary because at this point we don't know whether we parsed a function with + /// anonymous parameters or a function with names but no types. In order to minimize + /// unnecessary errors, we assume the parameters are in the shape of `fn foo(a, b, c)` where + /// the parameters are *names* (so we don't emit errors about not being able to find `b` in + /// the local scope), but if we find the same name multiple times, like in `fn foo(i8, i8)`, + /// we deduplicate them to not complain about duplicated parameter names. + pub(super) fn deduplicate_recovered_params_names(&self, fn_inputs: &mut Vec) { + let mut seen_inputs = FxHashSet::default(); + for input in fn_inputs.iter_mut() { + let opt_ident = if let (PatKind::Ident(_, ident, _), TyKind::Err) = + (&input.pat.kind, &input.ty.kind) + { + Some(*ident) + } else { + None + }; + if let Some(ident) = opt_ident { + if seen_inputs.contains(&ident) { + input.pat.kind = PatKind::Wild; + } + seen_inputs.insert(ident); + } + } + } + + /// Handle encountering a symbol in a generic argument list that is not a `,` or `>`. In this + /// case, we emit an error and try to suggest enclosing a const argument in braces if it looks + /// like the user has forgotten them. + pub fn handle_ambiguous_unbraced_const_arg( + &mut self, + args: &mut Vec, + ) -> PResult<'a, bool> { + // If we haven't encountered a closing `>`, then the argument is malformed. + // It's likely that the user has written a const expression without enclosing it + // in braces, so we try to recover here. + let arg = args.pop().unwrap(); + // FIXME: for some reason using `unexpected` or `expected_one_of_not_found` has + // adverse side-effects to subsequent errors and seems to advance the parser. + // We are causing this error here exclusively in case that a `const` expression + // could be recovered from the current parser state, even if followed by more + // arguments after a comma. + let mut err = self.struct_span_err( + self.token.span, + &format!("expected one of `,` or `>`, found {}", super::token_descr(&self.token)), + ); + err.span_label(self.token.span, "expected one of `,` or `>`"); + match self.recover_const_arg(arg.span(), err) { + Ok(arg) => { + args.push(AngleBracketedArg::Arg(arg)); + if self.eat(&token::Comma) { + return Ok(true); // Continue + } + } + Err(mut err) => { + args.push(arg); + // We will emit a more generic error later. + err.delay_as_bug(); + } + } + return Ok(false); // Don't continue. + } + + /// Attempt to parse a generic const argument that has not been enclosed in braces. + /// There are a limited number of expressions that are permitted without being encoded + /// in braces: + /// - Literals. + /// - Single-segment paths (i.e. standalone generic const parameters). + /// All other expressions that can be parsed will emit an error suggesting the expression be + /// wrapped in braces. + pub fn handle_unambiguous_unbraced_const_arg(&mut self) -> PResult<'a, P> { + let start = self.token.span; + let expr = self.parse_expr_res(Restrictions::CONST_EXPR, None).map_err(|mut err| { + err.span_label( + start.shrink_to_lo(), + "while parsing a const generic argument starting here", + ); + err + })?; + if !self.expr_is_valid_const_arg(&expr) { + self.struct_span_err( + expr.span, + "expressions must be enclosed in braces to be used as const generic \ + arguments", + ) + .multipart_suggestion( + "enclose the `const` expression in braces", + vec![ + (expr.span.shrink_to_lo(), "{ ".to_string()), + (expr.span.shrink_to_hi(), " }".to_string()), + ], + Applicability::MachineApplicable, + ) + .emit(); + } + Ok(expr) + } + + fn recover_const_param_decl(&mut self, ty_generics: Option<&Generics>) -> Option { + let snapshot = self.create_snapshot_for_diagnostic(); + let param = match self.parse_const_param(vec![]) { + Ok(param) => param, + Err(err) => { + err.cancel(); + self.restore_snapshot(snapshot); + return None; + } + }; + let mut err = + self.struct_span_err(param.span(), "unexpected `const` parameter declaration"); + err.span_label(param.span(), "expected a `const` expression, not a parameter declaration"); + if let (Some(generics), Ok(snippet)) = + (ty_generics, self.sess.source_map().span_to_snippet(param.span())) + { + let (span, sugg) = match &generics.params[..] { + [] => (generics.span, format!("<{snippet}>")), + [.., generic] => (generic.span().shrink_to_hi(), format!(", {snippet}")), + }; + err.multipart_suggestion( + "`const` parameters must be declared for the `impl`", + vec![(span, sugg), (param.span(), param.ident.to_string())], + Applicability::MachineApplicable, + ); + } + let value = self.mk_expr_err(param.span()); + err.emit(); + Some(GenericArg::Const(AnonConst { id: ast::DUMMY_NODE_ID, value })) + } + + pub fn recover_const_param_declaration( + &mut self, + ty_generics: Option<&Generics>, + ) -> PResult<'a, Option> { + // We have to check for a few different cases. + if let Some(arg) = self.recover_const_param_decl(ty_generics) { + return Ok(Some(arg)); + } + + // We haven't consumed `const` yet. + let start = self.token.span; + self.bump(); // `const` + + // Detect and recover from the old, pre-RFC2000 syntax for const generics. + let mut err = self + .struct_span_err(start, "expected lifetime, type, or constant, found keyword `const`"); + if self.check_const_arg() { + err.span_suggestion_verbose( + start.until(self.token.span), + "the `const` keyword is only needed in the definition of the type", + "", + Applicability::MaybeIncorrect, + ); + err.emit(); + Ok(Some(GenericArg::Const(self.parse_const_arg()?))) + } else { + let after_kw_const = self.token.span; + self.recover_const_arg(after_kw_const, err).map(Some) + } + } + + /// Try to recover from possible generic const argument without `{` and `}`. + /// + /// When encountering code like `foo::< bar + 3 >` or `foo::< bar - baz >` we suggest + /// `foo::<{ bar + 3 }>` and `foo::<{ bar - baz }>`, respectively. We only provide a suggestion + /// if we think that that the resulting expression would be well formed. + pub fn recover_const_arg( + &mut self, + start: Span, + mut err: DiagnosticBuilder<'a, ErrorGuaranteed>, + ) -> PResult<'a, GenericArg> { + let is_op_or_dot = AssocOp::from_token(&self.token) + .and_then(|op| { + if let AssocOp::Greater + | AssocOp::Less + | AssocOp::ShiftRight + | AssocOp::GreaterEqual + // Don't recover from `foo::`, because this could be an attempt to + // assign a value to a defaulted generic parameter. + | AssocOp::Assign + | AssocOp::AssignOp(_) = op + { + None + } else { + Some(op) + } + }) + .is_some() + || self.token.kind == TokenKind::Dot; + // This will be true when a trait object type `Foo +` or a path which was a `const fn` with + // type params has been parsed. + let was_op = + matches!(self.prev_token.kind, token::BinOp(token::Plus | token::Shr) | token::Gt); + if !is_op_or_dot && !was_op { + // We perform these checks and early return to avoid taking a snapshot unnecessarily. + return Err(err); + } + let snapshot = self.create_snapshot_for_diagnostic(); + if is_op_or_dot { + self.bump(); + } + match self.parse_expr_res(Restrictions::CONST_EXPR, None) { + Ok(expr) => { + // Find a mistake like `MyTrait`. + if token::EqEq == snapshot.token.kind { + err.span_suggestion( + snapshot.token.span, + "if you meant to use an associated type binding, replace `==` with `=`", + "=", + Applicability::MaybeIncorrect, + ); + let value = self.mk_expr_err(start.to(expr.span)); + err.emit(); + return Ok(GenericArg::Const(AnonConst { id: ast::DUMMY_NODE_ID, value })); + } else if token::Colon == snapshot.token.kind + && expr.span.lo() == snapshot.token.span.hi() + && matches!(expr.kind, ExprKind::Path(..)) + { + // Find a mistake like "foo::var:A". + err.span_suggestion( + snapshot.token.span, + "write a path separator here", + "::", + Applicability::MaybeIncorrect, + ); + err.emit(); + return Ok(GenericArg::Type(self.mk_ty(start.to(expr.span), TyKind::Err))); + } else if token::Comma == self.token.kind || self.token.kind.should_end_const_arg() + { + // Avoid the following output by checking that we consumed a full const arg: + // help: expressions must be enclosed in braces to be used as const generic + // arguments + // | + // LL | let sr: Vec<{ (u32, _, _) = vec![] }; + // | ^ ^ + return Ok(self.dummy_const_arg_needs_braces(err, start.to(expr.span))); + } + } + Err(err) => { + err.cancel(); + } + } + self.restore_snapshot(snapshot); + Err(err) + } + + /// Creates a dummy const argument, and reports that the expression must be enclosed in braces + pub fn dummy_const_arg_needs_braces( + &self, + mut err: DiagnosticBuilder<'a, ErrorGuaranteed>, + span: Span, + ) -> GenericArg { + err.multipart_suggestion( + "expressions must be enclosed in braces to be used as const generic \ + arguments", + vec![(span.shrink_to_lo(), "{ ".to_string()), (span.shrink_to_hi(), " }".to_string())], + Applicability::MaybeIncorrect, + ); + let value = self.mk_expr_err(span); + err.emit(); + GenericArg::Const(AnonConst { id: ast::DUMMY_NODE_ID, value }) + } + + /// Get the diagnostics for the cases where `move async` is found. + /// + /// `move_async_span` starts at the 'm' of the move keyword and ends with the 'c' of the async keyword + pub(super) fn incorrect_move_async_order_found( + &self, + move_async_span: Span, + ) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + let mut err = + self.struct_span_err(move_async_span, "the order of `move` and `async` is incorrect"); + err.span_suggestion_verbose( + move_async_span, + "try switching the order", + "async move", + Applicability::MaybeIncorrect, + ); + err + } + + /// Some special error handling for the "top-level" patterns in a match arm, + /// `for` loop, `let`, &c. (in contrast to subpatterns within such). + pub(crate) fn maybe_recover_colon_colon_in_pat_typo( + &mut self, + mut first_pat: P, + expected: Expected, + ) -> P { + if token::Colon != self.token.kind { + return first_pat; + } + if !matches!(first_pat.kind, PatKind::Ident(_, _, None) | PatKind::Path(..)) + || !self.look_ahead(1, |token| token.is_ident() && !token.is_reserved_ident()) + { + return first_pat; + } + // The pattern looks like it might be a path with a `::` -> `:` typo: + // `match foo { bar:baz => {} }` + let span = self.token.span; + // We only emit "unexpected `:`" error here if we can successfully parse the + // whole pattern correctly in that case. + let snapshot = self.create_snapshot_for_diagnostic(); + + // Create error for "unexpected `:`". + match self.expected_one_of_not_found(&[], &[]) { + Err(mut err) => { + self.bump(); // Skip the `:`. + match self.parse_pat_no_top_alt(expected) { + Err(inner_err) => { + // Carry on as if we had not done anything, callers will emit a + // reasonable error. + inner_err.cancel(); + err.cancel(); + self.restore_snapshot(snapshot); + } + Ok(mut pat) => { + // We've parsed the rest of the pattern. + let new_span = first_pat.span.to(pat.span); + let mut show_sugg = false; + // Try to construct a recovered pattern. + match &mut pat.kind { + PatKind::Struct(qself @ None, path, ..) + | PatKind::TupleStruct(qself @ None, path, _) + | PatKind::Path(qself @ None, path) => match &first_pat.kind { + PatKind::Ident(_, ident, _) => { + path.segments.insert(0, PathSegment::from_ident(*ident)); + path.span = new_span; + show_sugg = true; + first_pat = pat; + } + PatKind::Path(old_qself, old_path) => { + path.segments = old_path + .segments + .iter() + .cloned() + .chain(take(&mut path.segments)) + .collect(); + path.span = new_span; + *qself = old_qself.clone(); + first_pat = pat; + show_sugg = true; + } + _ => {} + }, + PatKind::Ident(BindingMode::ByValue(Mutability::Not), ident, None) => { + match &first_pat.kind { + PatKind::Ident(_, old_ident, _) => { + let path = PatKind::Path( + None, + Path { + span: new_span, + segments: vec![ + PathSegment::from_ident(*old_ident), + PathSegment::from_ident(*ident), + ], + tokens: None, + }, + ); + first_pat = self.mk_pat(new_span, path); + show_sugg = true; + } + PatKind::Path(old_qself, old_path) => { + let mut segments = old_path.segments.clone(); + segments.push(PathSegment::from_ident(*ident)); + let path = PatKind::Path( + old_qself.clone(), + Path { span: new_span, segments, tokens: None }, + ); + first_pat = self.mk_pat(new_span, path); + show_sugg = true; + } + _ => {} + } + } + _ => {} + } + if show_sugg { + err.span_suggestion( + span, + "maybe write a path separator here", + "::", + Applicability::MaybeIncorrect, + ); + } else { + first_pat = self.mk_pat(new_span, PatKind::Wild); + } + err.emit(); + } + } + } + _ => { + // Carry on as if we had not done anything. This should be unreachable. + self.restore_snapshot(snapshot); + } + }; + first_pat + } + + pub(crate) fn maybe_recover_unexpected_block_label(&mut self) -> bool { + let Some(label) = self.eat_label().filter(|_| { + self.eat(&token::Colon) && self.token.kind == token::OpenDelim(Delimiter::Brace) + }) else { + return false; + }; + let span = label.ident.span.to(self.prev_token.span); + let mut err = self.struct_span_err(span, "block label not supported here"); + err.span_label(span, "not supported here"); + err.tool_only_span_suggestion( + label.ident.span.until(self.token.span), + "remove this block label", + "", + Applicability::MachineApplicable, + ); + err.emit(); + true + } + + /// Some special error handling for the "top-level" patterns in a match arm, + /// `for` loop, `let`, &c. (in contrast to subpatterns within such). + pub(crate) fn maybe_recover_unexpected_comma( + &mut self, + lo: Span, + rt: CommaRecoveryMode, + ) -> PResult<'a, ()> { + if self.token != token::Comma { + return Ok(()); + } + + // An unexpected comma after a top-level pattern is a clue that the + // user (perhaps more accustomed to some other language) forgot the + // parentheses in what should have been a tuple pattern; return a + // suggestion-enhanced error here rather than choking on the comma later. + let comma_span = self.token.span; + self.bump(); + if let Err(err) = self.skip_pat_list() { + // We didn't expect this to work anyway; we just wanted to advance to the + // end of the comma-sequence so we know the span to suggest parenthesizing. + err.cancel(); + } + let seq_span = lo.to(self.prev_token.span); + let mut err = self.struct_span_err(comma_span, "unexpected `,` in pattern"); + if let Ok(seq_snippet) = self.span_to_snippet(seq_span) { + err.multipart_suggestion( + &format!( + "try adding parentheses to match on a tuple{}", + if let CommaRecoveryMode::LikelyTuple = rt { "" } else { "..." }, + ), + vec![ + (seq_span.shrink_to_lo(), "(".to_string()), + (seq_span.shrink_to_hi(), ")".to_string()), + ], + Applicability::MachineApplicable, + ); + if let CommaRecoveryMode::EitherTupleOrPipe = rt { + err.span_suggestion( + seq_span, + "...or a vertical bar to match on multiple alternatives", + seq_snippet.replace(',', " |"), + Applicability::MachineApplicable, + ); + } + } + Err(err) + } + + pub(crate) fn maybe_recover_bounds_doubled_colon(&mut self, ty: &Ty) -> PResult<'a, ()> { + let TyKind::Path(qself, path) = &ty.kind else { return Ok(()) }; + let qself_position = qself.as_ref().map(|qself| qself.position); + for (i, segments) in path.segments.windows(2).enumerate() { + if qself_position.map(|pos| i < pos).unwrap_or(false) { + continue; + } + if let [a, b] = segments { + let (a_span, b_span) = (a.span(), b.span()); + let between_span = a_span.shrink_to_hi().to(b_span.shrink_to_lo()); + if self.span_to_snippet(between_span).as_ref().map(|a| &a[..]) == Ok(":: ") { + let mut err = self.struct_span_err( + path.span.shrink_to_hi(), + "expected `:` followed by trait or lifetime", + ); + err.span_suggestion( + between_span, + "use single colon", + ": ", + Applicability::MachineApplicable, + ); + return Err(err); + } + } + } + Ok(()) + } + + /// Parse and throw away a parenthesized comma separated + /// sequence of patterns until `)` is reached. + fn skip_pat_list(&mut self) -> PResult<'a, ()> { + while !self.check(&token::CloseDelim(Delimiter::Parenthesis)) { + self.parse_pat_no_top_alt(None)?; + if !self.eat(&token::Comma) { + return Ok(()); + } + } + Ok(()) + } +} diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs new file mode 100644 index 000000000..0719a0ef0 --- /dev/null +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -0,0 +1,3288 @@ +use super::diagnostics::SnapshotParser; +use super::pat::{CommaRecoveryMode, RecoverColon, RecoverComma, PARAM_EXPECTED}; +use super::ty::{AllowPlus, RecoverQPath, RecoverReturnSign}; +use super::{ + AttrWrapper, BlockMode, ClosureSpans, ForceCollect, Parser, PathStyle, Restrictions, + SemiColonMode, SeqSep, TokenExpectType, TokenType, TrailingToken, +}; +use crate::maybe_recover_from_interpolated_ty_qpath; + +use core::mem; +use rustc_ast::ptr::P; +use rustc_ast::token::{self, Delimiter, Token, TokenKind}; +use rustc_ast::tokenstream::Spacing; +use rustc_ast::util::classify; +use rustc_ast::util::literal::LitError; +use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity}; +use rustc_ast::visit::Visitor; +use rustc_ast::{self as ast, AttrStyle, AttrVec, CaptureBy, ExprField, Lit, UnOp, DUMMY_NODE_ID}; +use rustc_ast::{AnonConst, BinOp, BinOpKind, FnDecl, FnRetTy, MacCall, Param, Ty, TyKind}; +use rustc_ast::{Arm, Async, BlockCheckMode, Expr, ExprKind, Label, Movability, RangeLimits}; +use rustc_ast::{ClosureBinder, StmtKind}; +use rustc_ast_pretty::pprust; +use rustc_data_structures::thin_vec::ThinVec; +use rustc_errors::{Applicability, Diagnostic, DiagnosticBuilder, ErrorGuaranteed, PResult}; +use rustc_session::lint::builtin::BREAK_WITH_LABEL_AND_LOOP; +use rustc_session::lint::BuiltinLintDiagnostics; +use rustc_span::source_map::{self, Span, Spanned}; +use rustc_span::symbol::{kw, sym, Ident, Symbol}; +use rustc_span::{BytePos, Pos}; + +/// Possibly accepts an `token::Interpolated` expression (a pre-parsed expression +/// dropped into the token stream, which happens while parsing the result of +/// macro expansion). Placement of these is not as complex as I feared it would +/// be. The important thing is to make sure that lookahead doesn't balk at +/// `token::Interpolated` tokens. +macro_rules! maybe_whole_expr { + ($p:expr) => { + if let token::Interpolated(nt) = &$p.token.kind { + match &**nt { + token::NtExpr(e) | token::NtLiteral(e) => { + let e = e.clone(); + $p.bump(); + return Ok(e); + } + token::NtPath(path) => { + let path = (**path).clone(); + $p.bump(); + return Ok($p.mk_expr( + $p.prev_token.span, + ExprKind::Path(None, path), + AttrVec::new(), + )); + } + token::NtBlock(block) => { + let block = block.clone(); + $p.bump(); + return Ok($p.mk_expr( + $p.prev_token.span, + ExprKind::Block(block, None), + AttrVec::new(), + )); + } + _ => {} + }; + } + }; +} + +#[derive(Debug)] +pub(super) enum LhsExpr { + NotYetParsed, + AttributesParsed(AttrWrapper), + AlreadyParsed(P), +} + +impl From> for LhsExpr { + /// Converts `Some(attrs)` into `LhsExpr::AttributesParsed(attrs)` + /// and `None` into `LhsExpr::NotYetParsed`. + /// + /// This conversion does not allocate. + fn from(o: Option) -> Self { + if let Some(attrs) = o { LhsExpr::AttributesParsed(attrs) } else { LhsExpr::NotYetParsed } + } +} + +impl From> for LhsExpr { + /// Converts the `expr: P` into `LhsExpr::AlreadyParsed(expr)`. + /// + /// This conversion does not allocate. + fn from(expr: P) -> Self { + LhsExpr::AlreadyParsed(expr) + } +} + +impl<'a> Parser<'a> { + /// Parses an expression. + #[inline] + pub fn parse_expr(&mut self) -> PResult<'a, P> { + self.current_closure.take(); + + self.parse_expr_res(Restrictions::empty(), None) + } + + /// Parses an expression, forcing tokens to be collected + pub fn parse_expr_force_collect(&mut self) -> PResult<'a, P> { + self.collect_tokens_no_attrs(|this| this.parse_expr()) + } + + pub fn parse_anon_const_expr(&mut self) -> PResult<'a, AnonConst> { + self.parse_expr().map(|value| AnonConst { id: DUMMY_NODE_ID, value }) + } + + fn parse_expr_catch_underscore(&mut self) -> PResult<'a, P> { + match self.parse_expr() { + Ok(expr) => Ok(expr), + Err(mut err) => match self.token.ident() { + Some((Ident { name: kw::Underscore, .. }, false)) + if self.look_ahead(1, |t| t == &token::Comma) => + { + // Special-case handling of `foo(_, _, _)` + err.emit(); + self.bump(); + Ok(self.mk_expr(self.prev_token.span, ExprKind::Err, AttrVec::new())) + } + _ => Err(err), + }, + } + } + + /// Parses a sequence of expressions delimited by parentheses. + fn parse_paren_expr_seq(&mut self) -> PResult<'a, Vec>> { + self.parse_paren_comma_seq(|p| p.parse_expr_catch_underscore()).map(|(r, _)| r) + } + + /// Parses an expression, subject to the given restrictions. + #[inline] + pub(super) fn parse_expr_res( + &mut self, + r: Restrictions, + already_parsed_attrs: Option, + ) -> PResult<'a, P> { + self.with_res(r, |this| this.parse_assoc_expr(already_parsed_attrs)) + } + + /// Parses an associative expression. + /// + /// This parses an expression accounting for associativity and precedence of the operators in + /// the expression. + #[inline] + fn parse_assoc_expr( + &mut self, + already_parsed_attrs: Option, + ) -> PResult<'a, P> { + self.parse_assoc_expr_with(0, already_parsed_attrs.into()) + } + + /// Parses an associative expression with operators of at least `min_prec` precedence. + pub(super) fn parse_assoc_expr_with( + &mut self, + min_prec: usize, + lhs: LhsExpr, + ) -> PResult<'a, P> { + let mut lhs = if let LhsExpr::AlreadyParsed(expr) = lhs { + expr + } else { + let attrs = match lhs { + LhsExpr::AttributesParsed(attrs) => Some(attrs), + _ => None, + }; + if [token::DotDot, token::DotDotDot, token::DotDotEq].contains(&self.token.kind) { + return self.parse_prefix_range_expr(attrs); + } else { + self.parse_prefix_expr(attrs)? + } + }; + let last_type_ascription_set = self.last_type_ascription.is_some(); + + if !self.should_continue_as_assoc_expr(&lhs) { + self.last_type_ascription = None; + return Ok(lhs); + } + + self.expected_tokens.push(TokenType::Operator); + while let Some(op) = self.check_assoc_op() { + // Adjust the span for interpolated LHS to point to the `$lhs` token + // and not to what it refers to. + let lhs_span = match self.prev_token.kind { + TokenKind::Interpolated(..) => self.prev_token.span, + _ => lhs.span, + }; + + let cur_op_span = self.token.span; + let restrictions = if op.node.is_assign_like() { + self.restrictions & Restrictions::NO_STRUCT_LITERAL + } else { + self.restrictions + }; + let prec = op.node.precedence(); + if prec < min_prec { + break; + } + // Check for deprecated `...` syntax + if self.token == token::DotDotDot && op.node == AssocOp::DotDotEq { + self.err_dotdotdot_syntax(self.token.span); + } + + if self.token == token::LArrow { + self.err_larrow_operator(self.token.span); + } + + self.bump(); + if op.node.is_comparison() { + if let Some(expr) = self.check_no_chained_comparison(&lhs, &op)? { + return Ok(expr); + } + } + + // Look for JS' `===` and `!==` and recover + if (op.node == AssocOp::Equal || op.node == AssocOp::NotEqual) + && self.token.kind == token::Eq + && self.prev_token.span.hi() == self.token.span.lo() + { + let sp = op.span.to(self.token.span); + let sugg = match op.node { + AssocOp::Equal => "==", + AssocOp::NotEqual => "!=", + _ => unreachable!(), + }; + self.struct_span_err(sp, &format!("invalid comparison operator `{sugg}=`")) + .span_suggestion_short( + sp, + &format!("`{s}=` is not a valid comparison operator, use `{s}`", s = sugg), + sugg, + Applicability::MachineApplicable, + ) + .emit(); + self.bump(); + } + + // Look for PHP's `<>` and recover + if op.node == AssocOp::Less + && self.token.kind == token::Gt + && self.prev_token.span.hi() == self.token.span.lo() + { + let sp = op.span.to(self.token.span); + self.struct_span_err(sp, "invalid comparison operator `<>`") + .span_suggestion_short( + sp, + "`<>` is not a valid comparison operator, use `!=`", + "!=", + Applicability::MachineApplicable, + ) + .emit(); + self.bump(); + } + + // Look for C++'s `<=>` and recover + if op.node == AssocOp::LessEqual + && self.token.kind == token::Gt + && self.prev_token.span.hi() == self.token.span.lo() + { + let sp = op.span.to(self.token.span); + self.struct_span_err(sp, "invalid comparison operator `<=>`") + .span_label( + sp, + "`<=>` is not a valid comparison operator, use `std::cmp::Ordering`", + ) + .emit(); + self.bump(); + } + + if self.prev_token == token::BinOp(token::Plus) + && self.token == token::BinOp(token::Plus) + && self.prev_token.span.between(self.token.span).is_empty() + { + let op_span = self.prev_token.span.to(self.token.span); + // Eat the second `+` + self.bump(); + lhs = self.recover_from_postfix_increment(lhs, op_span)?; + continue; + } + + let op = op.node; + // Special cases: + if op == AssocOp::As { + lhs = self.parse_assoc_op_cast(lhs, lhs_span, ExprKind::Cast)?; + continue; + } else if op == AssocOp::Colon { + lhs = self.parse_assoc_op_ascribe(lhs, lhs_span)?; + continue; + } else if op == AssocOp::DotDot || op == AssocOp::DotDotEq { + // If we didn't have to handle `x..`/`x..=`, it would be pretty easy to + // generalise it to the Fixity::None code. + lhs = self.parse_range_expr(prec, lhs, op, cur_op_span)?; + break; + } + + let fixity = op.fixity(); + let prec_adjustment = match fixity { + Fixity::Right => 0, + Fixity::Left => 1, + // We currently have no non-associative operators that are not handled above by + // the special cases. The code is here only for future convenience. + Fixity::None => 1, + }; + let rhs = self.with_res(restrictions - Restrictions::STMT_EXPR, |this| { + this.parse_assoc_expr_with(prec + prec_adjustment, LhsExpr::NotYetParsed) + })?; + + let span = self.mk_expr_sp(&lhs, lhs_span, rhs.span); + lhs = match op { + AssocOp::Add + | AssocOp::Subtract + | AssocOp::Multiply + | AssocOp::Divide + | AssocOp::Modulus + | AssocOp::LAnd + | AssocOp::LOr + | AssocOp::BitXor + | AssocOp::BitAnd + | AssocOp::BitOr + | AssocOp::ShiftLeft + | AssocOp::ShiftRight + | AssocOp::Equal + | AssocOp::Less + | AssocOp::LessEqual + | AssocOp::NotEqual + | AssocOp::Greater + | AssocOp::GreaterEqual => { + let ast_op = op.to_ast_binop().unwrap(); + let binary = self.mk_binary(source_map::respan(cur_op_span, ast_op), lhs, rhs); + self.mk_expr(span, binary, AttrVec::new()) + } + AssocOp::Assign => { + self.mk_expr(span, ExprKind::Assign(lhs, rhs, cur_op_span), AttrVec::new()) + } + AssocOp::AssignOp(k) => { + let aop = match k { + token::Plus => BinOpKind::Add, + token::Minus => BinOpKind::Sub, + token::Star => BinOpKind::Mul, + token::Slash => BinOpKind::Div, + token::Percent => BinOpKind::Rem, + token::Caret => BinOpKind::BitXor, + token::And => BinOpKind::BitAnd, + token::Or => BinOpKind::BitOr, + token::Shl => BinOpKind::Shl, + token::Shr => BinOpKind::Shr, + }; + let aopexpr = self.mk_assign_op(source_map::respan(cur_op_span, aop), lhs, rhs); + self.mk_expr(span, aopexpr, AttrVec::new()) + } + AssocOp::As | AssocOp::Colon | AssocOp::DotDot | AssocOp::DotDotEq => { + self.span_bug(span, "AssocOp should have been handled by special case") + } + }; + + if let Fixity::None = fixity { + break; + } + } + if last_type_ascription_set { + self.last_type_ascription = None; + } + Ok(lhs) + } + + fn should_continue_as_assoc_expr(&mut self, lhs: &Expr) -> bool { + match (self.expr_is_complete(lhs), AssocOp::from_token(&self.token)) { + // Semi-statement forms are odd: + // See https://github.com/rust-lang/rust/issues/29071 + (true, None) => false, + (false, _) => true, // Continue parsing the expression. + // An exhaustive check is done in the following block, but these are checked first + // because they *are* ambiguous but also reasonable looking incorrect syntax, so we + // want to keep their span info to improve diagnostics in these cases in a later stage. + (true, Some(AssocOp::Multiply)) | // `{ 42 } *foo = bar;` or `{ 42 } * 3` + (true, Some(AssocOp::Subtract)) | // `{ 42 } -5` + (true, Some(AssocOp::Add)) // `{ 42 } + 42 + // If the next token is a keyword, then the tokens above *are* unambiguously incorrect: + // `if x { a } else { b } && if y { c } else { d }` + if !self.look_ahead(1, |t| t.is_used_keyword()) => { + // These cases are ambiguous and can't be identified in the parser alone. + let sp = self.sess.source_map().start_point(self.token.span); + self.sess.ambiguous_block_expr_parse.borrow_mut().insert(sp, lhs.span); + false + } + (true, Some(AssocOp::LAnd)) | + (true, Some(AssocOp::LOr)) | + (true, Some(AssocOp::BitOr)) => { + // `{ 42 } &&x` (#61475) or `{ 42 } && if x { 1 } else { 0 }`. Separated from the + // above due to #74233. + // These cases are ambiguous and can't be identified in the parser alone. + // + // Bitwise AND is left out because guessing intent is hard. We can make + // suggestions based on the assumption that double-refs are rarely intentional, + // and closures are distinct enough that they don't get mixed up with their + // return value. + let sp = self.sess.source_map().start_point(self.token.span); + self.sess.ambiguous_block_expr_parse.borrow_mut().insert(sp, lhs.span); + false + } + (true, Some(ref op)) if !op.can_continue_expr_unambiguously() => false, + (true, Some(_)) => { + self.error_found_expr_would_be_stmt(lhs); + true + } + } + } + + /// We've found an expression that would be parsed as a statement, + /// but the next token implies this should be parsed as an expression. + /// For example: `if let Some(x) = x { x } else { 0 } / 2`. + fn error_found_expr_would_be_stmt(&self, lhs: &Expr) { + let mut err = self.struct_span_err( + self.token.span, + &format!("expected expression, found `{}`", pprust::token_to_string(&self.token),), + ); + err.span_label(self.token.span, "expected expression"); + self.sess.expr_parentheses_needed(&mut err, lhs.span); + err.emit(); + } + + /// Possibly translate the current token to an associative operator. + /// The method does not advance the current token. + /// + /// Also performs recovery for `and` / `or` which are mistaken for `&&` and `||` respectively. + fn check_assoc_op(&self) -> Option> { + let (op, span) = match (AssocOp::from_token(&self.token), self.token.ident()) { + // When parsing const expressions, stop parsing when encountering `>`. + ( + Some( + AssocOp::ShiftRight + | AssocOp::Greater + | AssocOp::GreaterEqual + | AssocOp::AssignOp(token::BinOpToken::Shr), + ), + _, + ) if self.restrictions.contains(Restrictions::CONST_EXPR) => { + return None; + } + (Some(op), _) => (op, self.token.span), + (None, Some((Ident { name: sym::and, span }, false))) => { + self.error_bad_logical_op("and", "&&", "conjunction"); + (AssocOp::LAnd, span) + } + (None, Some((Ident { name: sym::or, span }, false))) => { + self.error_bad_logical_op("or", "||", "disjunction"); + (AssocOp::LOr, span) + } + _ => return None, + }; + Some(source_map::respan(span, op)) + } + + /// Error on `and` and `or` suggesting `&&` and `||` respectively. + fn error_bad_logical_op(&self, bad: &str, good: &str, english: &str) { + self.struct_span_err(self.token.span, &format!("`{bad}` is not a logical operator")) + .span_suggestion_short( + self.token.span, + &format!("use `{good}` to perform logical {english}"), + good, + Applicability::MachineApplicable, + ) + .note("unlike in e.g., python and PHP, `&&` and `||` are used for logical operators") + .emit(); + } + + /// Checks if this expression is a successfully parsed statement. + fn expr_is_complete(&self, e: &Expr) -> bool { + self.restrictions.contains(Restrictions::STMT_EXPR) + && !classify::expr_requires_semi_to_be_stmt(e) + } + + /// Parses `x..y`, `x..=y`, and `x..`/`x..=`. + /// The other two variants are handled in `parse_prefix_range_expr` below. + fn parse_range_expr( + &mut self, + prec: usize, + lhs: P, + op: AssocOp, + cur_op_span: Span, + ) -> PResult<'a, P> { + let rhs = if self.is_at_start_of_range_notation_rhs() { + Some(self.parse_assoc_expr_with(prec + 1, LhsExpr::NotYetParsed)?) + } else { + None + }; + let rhs_span = rhs.as_ref().map_or(cur_op_span, |x| x.span); + let span = self.mk_expr_sp(&lhs, lhs.span, rhs_span); + let limits = + if op == AssocOp::DotDot { RangeLimits::HalfOpen } else { RangeLimits::Closed }; + let range = self.mk_range(Some(lhs), rhs, limits); + Ok(self.mk_expr(span, range, AttrVec::new())) + } + + fn is_at_start_of_range_notation_rhs(&self) -> bool { + if self.token.can_begin_expr() { + // Parse `for i in 1.. { }` as infinite loop, not as `for i in (1..{})`. + if self.token == token::OpenDelim(Delimiter::Brace) { + return !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL); + } + true + } else { + false + } + } + + /// Parses prefix-forms of range notation: `..expr`, `..`, `..=expr`. + fn parse_prefix_range_expr(&mut self, attrs: Option) -> PResult<'a, P> { + // Check for deprecated `...` syntax. + if self.token == token::DotDotDot { + self.err_dotdotdot_syntax(self.token.span); + } + + debug_assert!( + [token::DotDot, token::DotDotDot, token::DotDotEq].contains(&self.token.kind), + "parse_prefix_range_expr: token {:?} is not DotDot/DotDotEq", + self.token + ); + + let limits = match self.token.kind { + token::DotDot => RangeLimits::HalfOpen, + _ => RangeLimits::Closed, + }; + let op = AssocOp::from_token(&self.token); + // FIXME: `parse_prefix_range_expr` is called when the current + // token is `DotDot`, `DotDotDot`, or `DotDotEq`. If we haven't already + // parsed attributes, then trying to parse them here will always fail. + // We should figure out how we want attributes on range expressions to work. + let attrs = self.parse_or_use_outer_attributes(attrs)?; + self.collect_tokens_for_expr(attrs, |this, attrs| { + let lo = this.token.span; + this.bump(); + let (span, opt_end) = if this.is_at_start_of_range_notation_rhs() { + // RHS must be parsed with more associativity than the dots. + this.parse_assoc_expr_with(op.unwrap().precedence() + 1, LhsExpr::NotYetParsed) + .map(|x| (lo.to(x.span), Some(x)))? + } else { + (lo, None) + }; + let range = this.mk_range(None, opt_end, limits); + Ok(this.mk_expr(span, range, attrs.into())) + }) + } + + /// Parses a prefix-unary-operator expr. + fn parse_prefix_expr(&mut self, attrs: Option) -> PResult<'a, P> { + let attrs = self.parse_or_use_outer_attributes(attrs)?; + let lo = self.token.span; + + macro_rules! make_it { + ($this:ident, $attrs:expr, |this, _| $body:expr) => { + $this.collect_tokens_for_expr($attrs, |$this, attrs| { + let (hi, ex) = $body?; + Ok($this.mk_expr(lo.to(hi), ex, attrs.into())) + }) + }; + } + + let this = self; + + // Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr() + match this.token.uninterpolate().kind { + token::Not => make_it!(this, attrs, |this, _| this.parse_unary_expr(lo, UnOp::Not)), // `!expr` + token::Tilde => make_it!(this, attrs, |this, _| this.recover_tilde_expr(lo)), // `~expr` + token::BinOp(token::Minus) => { + make_it!(this, attrs, |this, _| this.parse_unary_expr(lo, UnOp::Neg)) + } // `-expr` + token::BinOp(token::Star) => { + make_it!(this, attrs, |this, _| this.parse_unary_expr(lo, UnOp::Deref)) + } // `*expr` + token::BinOp(token::And) | token::AndAnd => { + make_it!(this, attrs, |this, _| this.parse_borrow_expr(lo)) + } + token::BinOp(token::Plus) if this.look_ahead(1, |tok| tok.is_numeric_lit()) => { + let mut err = this.struct_span_err(lo, "leading `+` is not supported"); + err.span_label(lo, "unexpected `+`"); + + // a block on the LHS might have been intended to be an expression instead + if let Some(sp) = this.sess.ambiguous_block_expr_parse.borrow().get(&lo) { + this.sess.expr_parentheses_needed(&mut err, *sp); + } else { + err.span_suggestion_verbose( + lo, + "try removing the `+`", + "", + Applicability::MachineApplicable, + ); + } + err.emit(); + + this.bump(); + this.parse_prefix_expr(None) + } // `+expr` + // Recover from `++x`: + token::BinOp(token::Plus) + if this.look_ahead(1, |t| *t == token::BinOp(token::Plus)) => + { + let prev_is_semi = this.prev_token == token::Semi; + let pre_span = this.token.span.to(this.look_ahead(1, |t| t.span)); + // Eat both `+`s. + this.bump(); + this.bump(); + + let operand_expr = this.parse_dot_or_call_expr(Default::default())?; + this.recover_from_prefix_increment(operand_expr, pre_span, prev_is_semi) + } + token::Ident(..) if this.token.is_keyword(kw::Box) => { + make_it!(this, attrs, |this, _| this.parse_box_expr(lo)) + } + token::Ident(..) if this.is_mistaken_not_ident_negation() => { + make_it!(this, attrs, |this, _| this.recover_not_expr(lo)) + } + _ => return this.parse_dot_or_call_expr(Some(attrs)), + } + } + + fn parse_prefix_expr_common(&mut self, lo: Span) -> PResult<'a, (Span, P)> { + self.bump(); + let expr = self.parse_prefix_expr(None); + let (span, expr) = self.interpolated_or_expr_span(expr)?; + Ok((lo.to(span), expr)) + } + + fn parse_unary_expr(&mut self, lo: Span, op: UnOp) -> PResult<'a, (Span, ExprKind)> { + let (span, expr) = self.parse_prefix_expr_common(lo)?; + Ok((span, self.mk_unary(op, expr))) + } + + // Recover on `!` suggesting for bitwise negation instead. + fn recover_tilde_expr(&mut self, lo: Span) -> PResult<'a, (Span, ExprKind)> { + self.struct_span_err(lo, "`~` cannot be used as a unary operator") + .span_suggestion_short( + lo, + "use `!` to perform bitwise not", + "!", + Applicability::MachineApplicable, + ) + .emit(); + + self.parse_unary_expr(lo, UnOp::Not) + } + + /// Parse `box expr`. + fn parse_box_expr(&mut self, lo: Span) -> PResult<'a, (Span, ExprKind)> { + let (span, expr) = self.parse_prefix_expr_common(lo)?; + self.sess.gated_spans.gate(sym::box_syntax, span); + Ok((span, ExprKind::Box(expr))) + } + + fn is_mistaken_not_ident_negation(&self) -> bool { + let token_cannot_continue_expr = |t: &Token| match t.uninterpolate().kind { + // These tokens can start an expression after `!`, but + // can't continue an expression after an ident + token::Ident(name, is_raw) => token::ident_can_begin_expr(name, t.span, is_raw), + token::Literal(..) | token::Pound => true, + _ => t.is_whole_expr(), + }; + self.token.is_ident_named(sym::not) && self.look_ahead(1, token_cannot_continue_expr) + } + + /// Recover on `not expr` in favor of `!expr`. + fn recover_not_expr(&mut self, lo: Span) -> PResult<'a, (Span, ExprKind)> { + // Emit the error... + let not_token = self.look_ahead(1, |t| t.clone()); + self.struct_span_err( + not_token.span, + &format!("unexpected {} after identifier", super::token_descr(¬_token)), + ) + .span_suggestion_short( + // Span the `not` plus trailing whitespace to avoid + // trailing whitespace after the `!` in our suggestion + self.sess.source_map().span_until_non_whitespace(lo.to(not_token.span)), + "use `!` to perform logical negation", + "!", + Applicability::MachineApplicable, + ) + .emit(); + + // ...and recover! + self.parse_unary_expr(lo, UnOp::Not) + } + + /// Returns the span of expr, if it was not interpolated or the span of the interpolated token. + fn interpolated_or_expr_span( + &self, + expr: PResult<'a, P>, + ) -> PResult<'a, (Span, P)> { + expr.map(|e| { + ( + match self.prev_token.kind { + TokenKind::Interpolated(..) => self.prev_token.span, + _ => e.span, + }, + e, + ) + }) + } + + fn parse_assoc_op_cast( + &mut self, + lhs: P, + lhs_span: Span, + expr_kind: fn(P, P) -> ExprKind, + ) -> PResult<'a, P> { + let mk_expr = |this: &mut Self, lhs: P, rhs: P| { + this.mk_expr( + this.mk_expr_sp(&lhs, lhs_span, rhs.span), + expr_kind(lhs, rhs), + AttrVec::new(), + ) + }; + + // Save the state of the parser before parsing type normally, in case there is a + // LessThan comparison after this cast. + let parser_snapshot_before_type = self.clone(); + let cast_expr = match self.parse_as_cast_ty() { + Ok(rhs) => mk_expr(self, lhs, rhs), + Err(type_err) => { + // Rewind to before attempting to parse the type with generics, to recover + // from situations like `x as usize < y` in which we first tried to parse + // `usize < y` as a type with generic arguments. + let parser_snapshot_after_type = mem::replace(self, parser_snapshot_before_type); + + // Check for typo of `'a: loop { break 'a }` with a missing `'`. + match (&lhs.kind, &self.token.kind) { + ( + // `foo: ` + ExprKind::Path(None, ast::Path { segments, .. }), + TokenKind::Ident(kw::For | kw::Loop | kw::While, false), + ) if segments.len() == 1 => { + let snapshot = self.create_snapshot_for_diagnostic(); + let label = Label { + ident: Ident::from_str_and_span( + &format!("'{}", segments[0].ident), + segments[0].ident.span, + ), + }; + match self.parse_labeled_expr(label, AttrVec::new(), false) { + Ok(expr) => { + type_err.cancel(); + self.struct_span_err(label.ident.span, "malformed loop label") + .span_suggestion( + label.ident.span, + "use the correct loop label format", + label.ident, + Applicability::MachineApplicable, + ) + .emit(); + return Ok(expr); + } + Err(err) => { + err.cancel(); + self.restore_snapshot(snapshot); + } + } + } + _ => {} + } + + match self.parse_path(PathStyle::Expr) { + Ok(path) => { + let (op_noun, op_verb) = match self.token.kind { + token::Lt => ("comparison", "comparing"), + token::BinOp(token::Shl) => ("shift", "shifting"), + _ => { + // We can end up here even without `<` being the next token, for + // example because `parse_ty_no_plus` returns `Err` on keywords, + // but `parse_path` returns `Ok` on them due to error recovery. + // Return original error and parser state. + *self = parser_snapshot_after_type; + return Err(type_err); + } + }; + + // Successfully parsed the type path leaving a `<` yet to parse. + type_err.cancel(); + + // Report non-fatal diagnostics, keep `x as usize` as an expression + // in AST and continue parsing. + let msg = format!( + "`<` is interpreted as a start of generic arguments for `{}`, not a {}", + pprust::path_to_string(&path), + op_noun, + ); + let span_after_type = parser_snapshot_after_type.token.span; + let expr = + mk_expr(self, lhs, self.mk_ty(path.span, TyKind::Path(None, path))); + + self.struct_span_err(self.token.span, &msg) + .span_label( + self.look_ahead(1, |t| t.span).to(span_after_type), + "interpreted as generic arguments", + ) + .span_label(self.token.span, format!("not interpreted as {op_noun}")) + .multipart_suggestion( + &format!("try {op_verb} the cast value"), + vec![ + (expr.span.shrink_to_lo(), "(".to_string()), + (expr.span.shrink_to_hi(), ")".to_string()), + ], + Applicability::MachineApplicable, + ) + .emit(); + + expr + } + Err(path_err) => { + // Couldn't parse as a path, return original error and parser state. + path_err.cancel(); + *self = parser_snapshot_after_type; + return Err(type_err); + } + } + } + }; + + self.parse_and_disallow_postfix_after_cast(cast_expr) + } + + /// Parses a postfix operators such as `.`, `?`, or index (`[]`) after a cast, + /// then emits an error and returns the newly parsed tree. + /// The resulting parse tree for `&x as T[0]` has a precedence of `((&x) as T)[0]`. + fn parse_and_disallow_postfix_after_cast( + &mut self, + cast_expr: P, + ) -> PResult<'a, P> { + let span = cast_expr.span; + let (cast_kind, maybe_ascription_span) = + if let ExprKind::Type(ascripted_expr, _) = &cast_expr.kind { + ("type ascription", Some(ascripted_expr.span.shrink_to_hi().with_hi(span.hi()))) + } else { + ("cast", None) + }; + + // Save the memory location of expr before parsing any following postfix operators. + // This will be compared with the memory location of the output expression. + // If they different we can assume we parsed another expression because the existing expression is not reallocated. + let addr_before = &*cast_expr as *const _ as usize; + let with_postfix = self.parse_dot_or_call_expr_with_(cast_expr, span)?; + let changed = addr_before != &*with_postfix as *const _ as usize; + + // Check if an illegal postfix operator has been added after the cast. + // If the resulting expression is not a cast, or has a different memory location, it is an illegal postfix operator. + if !matches!(with_postfix.kind, ExprKind::Cast(_, _) | ExprKind::Type(_, _)) || changed { + let msg = format!( + "{cast_kind} cannot be followed by {}", + match with_postfix.kind { + ExprKind::Index(_, _) => "indexing", + ExprKind::Try(_) => "`?`", + ExprKind::Field(_, _) => "a field access", + ExprKind::MethodCall(_, _, _) => "a method call", + ExprKind::Call(_, _) => "a function call", + ExprKind::Await(_) => "`.await`", + ExprKind::Err => return Ok(with_postfix), + _ => unreachable!("parse_dot_or_call_expr_with_ shouldn't produce this"), + } + ); + let mut err = self.struct_span_err(span, &msg); + + let suggest_parens = |err: &mut DiagnosticBuilder<'_, _>| { + let suggestions = vec![ + (span.shrink_to_lo(), "(".to_string()), + (span.shrink_to_hi(), ")".to_string()), + ]; + err.multipart_suggestion( + "try surrounding the expression in parentheses", + suggestions, + Applicability::MachineApplicable, + ); + }; + + // If type ascription is "likely an error", the user will already be getting a useful + // help message, and doesn't need a second. + if self.last_type_ascription.map_or(false, |last_ascription| last_ascription.1) { + self.maybe_annotate_with_ascription(&mut err, false); + } else if let Some(ascription_span) = maybe_ascription_span { + let is_nightly = self.sess.unstable_features.is_nightly_build(); + if is_nightly { + suggest_parens(&mut err); + } + err.span_suggestion( + ascription_span, + &format!( + "{}remove the type ascription", + if is_nightly { "alternatively, " } else { "" } + ), + "", + if is_nightly { + Applicability::MaybeIncorrect + } else { + Applicability::MachineApplicable + }, + ); + } else { + suggest_parens(&mut err); + } + err.emit(); + }; + Ok(with_postfix) + } + + fn parse_assoc_op_ascribe(&mut self, lhs: P, lhs_span: Span) -> PResult<'a, P> { + let maybe_path = self.could_ascription_be_path(&lhs.kind); + self.last_type_ascription = Some((self.prev_token.span, maybe_path)); + let lhs = self.parse_assoc_op_cast(lhs, lhs_span, ExprKind::Type)?; + self.sess.gated_spans.gate(sym::type_ascription, lhs.span); + Ok(lhs) + } + + /// Parse `& mut? ` or `& raw [ const | mut ] `. + fn parse_borrow_expr(&mut self, lo: Span) -> PResult<'a, (Span, ExprKind)> { + self.expect_and()?; + let has_lifetime = self.token.is_lifetime() && self.look_ahead(1, |t| t != &token::Colon); + let lifetime = has_lifetime.then(|| self.expect_lifetime()); // For recovery, see below. + let (borrow_kind, mutbl) = self.parse_borrow_modifiers(lo); + let expr = self.parse_prefix_expr(None); + let (hi, expr) = self.interpolated_or_expr_span(expr)?; + let span = lo.to(hi); + if let Some(lt) = lifetime { + self.error_remove_borrow_lifetime(span, lt.ident.span); + } + Ok((span, ExprKind::AddrOf(borrow_kind, mutbl, expr))) + } + + fn error_remove_borrow_lifetime(&self, span: Span, lt_span: Span) { + self.struct_span_err(span, "borrow expressions cannot be annotated with lifetimes") + .span_label(lt_span, "annotated with lifetime here") + .span_suggestion( + lt_span, + "remove the lifetime annotation", + "", + Applicability::MachineApplicable, + ) + .emit(); + } + + /// Parse `mut?` or `raw [ const | mut ]`. + fn parse_borrow_modifiers(&mut self, lo: Span) -> (ast::BorrowKind, ast::Mutability) { + if self.check_keyword(kw::Raw) && self.look_ahead(1, Token::is_mutability) { + // `raw [ const | mut ]`. + let found_raw = self.eat_keyword(kw::Raw); + assert!(found_raw); + let mutability = self.parse_const_or_mut().unwrap(); + self.sess.gated_spans.gate(sym::raw_ref_op, lo.to(self.prev_token.span)); + (ast::BorrowKind::Raw, mutability) + } else { + // `mut?` + (ast::BorrowKind::Ref, self.parse_mutability()) + } + } + + /// Parses `a.b` or `a(13)` or `a[4]` or just `a`. + fn parse_dot_or_call_expr(&mut self, attrs: Option) -> PResult<'a, P> { + let attrs = self.parse_or_use_outer_attributes(attrs)?; + self.collect_tokens_for_expr(attrs, |this, attrs| { + let base = this.parse_bottom_expr(); + let (span, base) = this.interpolated_or_expr_span(base)?; + this.parse_dot_or_call_expr_with(base, span, attrs) + }) + } + + pub(super) fn parse_dot_or_call_expr_with( + &mut self, + e0: P, + lo: Span, + mut attrs: Vec, + ) -> PResult<'a, P> { + // Stitch the list of outer attributes onto the return value. + // A little bit ugly, but the best way given the current code + // structure + self.parse_dot_or_call_expr_with_(e0, lo).map(|expr| { + expr.map(|mut expr| { + attrs.extend::>(expr.attrs.into()); + expr.attrs = attrs.into(); + expr + }) + }) + } + + fn parse_dot_or_call_expr_with_(&mut self, mut e: P, lo: Span) -> PResult<'a, P> { + loop { + let has_question = if self.prev_token.kind == TokenKind::Ident(kw::Return, false) { + // we are using noexpect here because we don't expect a `?` directly after a `return` + // which could be suggested otherwise + self.eat_noexpect(&token::Question) + } else { + self.eat(&token::Question) + }; + if has_question { + // `expr?` + e = self.mk_expr(lo.to(self.prev_token.span), ExprKind::Try(e), AttrVec::new()); + continue; + } + let has_dot = if self.prev_token.kind == TokenKind::Ident(kw::Return, false) { + // we are using noexpect here because we don't expect a `.` directly after a `return` + // which could be suggested otherwise + self.eat_noexpect(&token::Dot) + } else { + self.eat(&token::Dot) + }; + if has_dot { + // expr.f + e = self.parse_dot_suffix_expr(lo, e)?; + continue; + } + if self.expr_is_complete(&e) { + return Ok(e); + } + e = match self.token.kind { + token::OpenDelim(Delimiter::Parenthesis) => self.parse_fn_call_expr(lo, e), + token::OpenDelim(Delimiter::Bracket) => self.parse_index_expr(lo, e)?, + _ => return Ok(e), + } + } + } + + fn look_ahead_type_ascription_as_field(&mut self) -> bool { + self.look_ahead(1, |t| t.is_ident()) + && self.look_ahead(2, |t| t == &token::Colon) + && self.look_ahead(3, |t| t.can_begin_expr()) + } + + fn parse_dot_suffix_expr(&mut self, lo: Span, base: P) -> PResult<'a, P> { + match self.token.uninterpolate().kind { + token::Ident(..) => self.parse_dot_suffix(base, lo), + token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) => { + Ok(self.parse_tuple_field_access_expr(lo, base, symbol, suffix, None)) + } + token::Literal(token::Lit { kind: token::Float, symbol, suffix }) => { + Ok(self.parse_tuple_field_access_expr_float(lo, base, symbol, suffix)) + } + _ => { + self.error_unexpected_after_dot(); + Ok(base) + } + } + } + + fn error_unexpected_after_dot(&self) { + // FIXME Could factor this out into non_fatal_unexpected or something. + let actual = pprust::token_to_string(&self.token); + self.struct_span_err(self.token.span, &format!("unexpected token: `{actual}`")).emit(); + } + + // We need an identifier or integer, but the next token is a float. + // Break the float into components to extract the identifier or integer. + // FIXME: With current `TokenCursor` it's hard to break tokens into more than 2 + // parts unless those parts are processed immediately. `TokenCursor` should either + // support pushing "future tokens" (would be also helpful to `break_and_eat`), or + // we should break everything including floats into more basic proc-macro style + // tokens in the lexer (probably preferable). + fn parse_tuple_field_access_expr_float( + &mut self, + lo: Span, + base: P, + float: Symbol, + suffix: Option, + ) -> P { + #[derive(Debug)] + enum FloatComponent { + IdentLike(String), + Punct(char), + } + use FloatComponent::*; + + let float_str = float.as_str(); + let mut components = Vec::new(); + let mut ident_like = String::new(); + for c in float_str.chars() { + if c == '_' || c.is_ascii_alphanumeric() { + ident_like.push(c); + } else if matches!(c, '.' | '+' | '-') { + if !ident_like.is_empty() { + components.push(IdentLike(mem::take(&mut ident_like))); + } + components.push(Punct(c)); + } else { + panic!("unexpected character in a float token: {:?}", c) + } + } + if !ident_like.is_empty() { + components.push(IdentLike(ident_like)); + } + + // With proc macros the span can refer to anything, the source may be too short, + // or too long, or non-ASCII. It only makes sense to break our span into components + // if its underlying text is identical to our float literal. + let span = self.token.span; + let can_take_span_apart = + || self.span_to_snippet(span).as_deref() == Ok(float_str).as_deref(); + + match &*components { + // 1e2 + [IdentLike(i)] => { + self.parse_tuple_field_access_expr(lo, base, Symbol::intern(&i), suffix, None) + } + // 1. + [IdentLike(i), Punct('.')] => { + let (ident_span, dot_span) = if can_take_span_apart() { + let (span, ident_len) = (span.data(), BytePos::from_usize(i.len())); + let ident_span = span.with_hi(span.lo + ident_len); + let dot_span = span.with_lo(span.lo + ident_len); + (ident_span, dot_span) + } else { + (span, span) + }; + assert!(suffix.is_none()); + let symbol = Symbol::intern(&i); + self.token = Token::new(token::Ident(symbol, false), ident_span); + let next_token = (Token::new(token::Dot, dot_span), self.token_spacing); + self.parse_tuple_field_access_expr(lo, base, symbol, None, Some(next_token)) + } + // 1.2 | 1.2e3 + [IdentLike(i1), Punct('.'), IdentLike(i2)] => { + let (ident1_span, dot_span, ident2_span) = if can_take_span_apart() { + let (span, ident1_len) = (span.data(), BytePos::from_usize(i1.len())); + let ident1_span = span.with_hi(span.lo + ident1_len); + let dot_span = span + .with_lo(span.lo + ident1_len) + .with_hi(span.lo + ident1_len + BytePos(1)); + let ident2_span = self.token.span.with_lo(span.lo + ident1_len + BytePos(1)); + (ident1_span, dot_span, ident2_span) + } else { + (span, span, span) + }; + let symbol1 = Symbol::intern(&i1); + self.token = Token::new(token::Ident(symbol1, false), ident1_span); + // This needs to be `Spacing::Alone` to prevent regressions. + // See issue #76399 and PR #76285 for more details + let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone); + let base1 = + self.parse_tuple_field_access_expr(lo, base, symbol1, None, Some(next_token1)); + let symbol2 = Symbol::intern(&i2); + let next_token2 = Token::new(token::Ident(symbol2, false), ident2_span); + self.bump_with((next_token2, self.token_spacing)); // `.` + self.parse_tuple_field_access_expr(lo, base1, symbol2, suffix, None) + } + // 1e+ | 1e- (recovered) + [IdentLike(_), Punct('+' | '-')] | + // 1e+2 | 1e-2 + [IdentLike(_), Punct('+' | '-'), IdentLike(_)] | + // 1.2e+ | 1.2e- + [IdentLike(_), Punct('.'), IdentLike(_), Punct('+' | '-')] | + // 1.2e+3 | 1.2e-3 + [IdentLike(_), Punct('.'), IdentLike(_), Punct('+' | '-'), IdentLike(_)] => { + // See the FIXME about `TokenCursor` above. + self.error_unexpected_after_dot(); + base + } + _ => panic!("unexpected components in a float token: {:?}", components), + } + } + + fn parse_tuple_field_access_expr( + &mut self, + lo: Span, + base: P, + field: Symbol, + suffix: Option, + next_token: Option<(Token, Spacing)>, + ) -> P { + match next_token { + Some(next_token) => self.bump_with(next_token), + None => self.bump(), + } + let span = self.prev_token.span; + let field = ExprKind::Field(base, Ident::new(field, span)); + self.expect_no_suffix(span, "a tuple index", suffix); + self.mk_expr(lo.to(span), field, AttrVec::new()) + } + + /// Parse a function call expression, `expr(...)`. + fn parse_fn_call_expr(&mut self, lo: Span, fun: P) -> P { + let snapshot = if self.token.kind == token::OpenDelim(Delimiter::Parenthesis) + && self.look_ahead_type_ascription_as_field() + { + Some((self.create_snapshot_for_diagnostic(), fun.kind.clone())) + } else { + None + }; + let open_paren = self.token.span; + + let mut seq = self.parse_paren_expr_seq().map(|args| { + self.mk_expr(lo.to(self.prev_token.span), self.mk_call(fun, args), AttrVec::new()) + }); + if let Some(expr) = + self.maybe_recover_struct_lit_bad_delims(lo, open_paren, &mut seq, snapshot) + { + return expr; + } + self.recover_seq_parse_error(Delimiter::Parenthesis, lo, seq) + } + + /// If we encounter a parser state that looks like the user has written a `struct` literal with + /// parentheses instead of braces, recover the parser state and provide suggestions. + #[instrument(skip(self, seq, snapshot), level = "trace")] + fn maybe_recover_struct_lit_bad_delims( + &mut self, + lo: Span, + open_paren: Span, + seq: &mut PResult<'a, P>, + snapshot: Option<(SnapshotParser<'a>, ExprKind)>, + ) -> Option> { + match (seq.as_mut(), snapshot) { + (Err(err), Some((mut snapshot, ExprKind::Path(None, path)))) => { + let name = pprust::path_to_string(&path); + snapshot.bump(); // `(` + match snapshot.parse_struct_fields(path, false, Delimiter::Parenthesis) { + Ok((fields, ..)) + if snapshot.eat(&token::CloseDelim(Delimiter::Parenthesis)) => + { + // We are certain we have `Enum::Foo(a: 3, b: 4)`, suggest + // `Enum::Foo { a: 3, b: 4 }` or `Enum::Foo(3, 4)`. + self.restore_snapshot(snapshot); + let close_paren = self.prev_token.span; + let span = lo.to(self.prev_token.span); + if !fields.is_empty() { + let replacement_err = self.struct_span_err( + span, + "invalid `struct` delimiters or `fn` call arguments", + ); + mem::replace(err, replacement_err).cancel(); + + err.multipart_suggestion( + &format!("if `{name}` is a struct, use braces as delimiters"), + vec![ + (open_paren, " { ".to_string()), + (close_paren, " }".to_string()), + ], + Applicability::MaybeIncorrect, + ); + err.multipart_suggestion( + &format!("if `{name}` is a function, use the arguments directly"), + fields + .into_iter() + .map(|field| (field.span.until(field.expr.span), String::new())) + .collect(), + Applicability::MaybeIncorrect, + ); + err.emit(); + } else { + err.emit(); + } + return Some(self.mk_expr_err(span)); + } + Ok(_) => {} + Err(mut err) => { + err.emit(); + } + } + } + _ => {} + } + None + } + + /// Parse an indexing expression `expr[...]`. + fn parse_index_expr(&mut self, lo: Span, base: P) -> PResult<'a, P> { + self.bump(); // `[` + let index = self.parse_expr()?; + self.expect(&token::CloseDelim(Delimiter::Bracket))?; + Ok(self.mk_expr(lo.to(self.prev_token.span), self.mk_index(base, index), AttrVec::new())) + } + + /// Assuming we have just parsed `.`, continue parsing into an expression. + fn parse_dot_suffix(&mut self, self_arg: P, lo: Span) -> PResult<'a, P> { + if self.token.uninterpolated_span().rust_2018() && self.eat_keyword(kw::Await) { + return Ok(self.mk_await_expr(self_arg, lo)); + } + + let fn_span_lo = self.token.span; + let mut segment = self.parse_path_segment(PathStyle::Expr, None)?; + self.check_trailing_angle_brackets(&segment, &[&token::OpenDelim(Delimiter::Parenthesis)]); + self.check_turbofish_missing_angle_brackets(&mut segment); + + if self.check(&token::OpenDelim(Delimiter::Parenthesis)) { + // Method call `expr.f()` + let mut args = self.parse_paren_expr_seq()?; + args.insert(0, self_arg); + + let fn_span = fn_span_lo.to(self.prev_token.span); + let span = lo.to(self.prev_token.span); + Ok(self.mk_expr(span, ExprKind::MethodCall(segment, args, fn_span), AttrVec::new())) + } else { + // Field access `expr.f` + if let Some(args) = segment.args { + self.struct_span_err( + args.span(), + "field expressions cannot have generic arguments", + ) + .emit(); + } + + let span = lo.to(self.prev_token.span); + Ok(self.mk_expr(span, ExprKind::Field(self_arg, segment.ident), AttrVec::new())) + } + } + + /// At the bottom (top?) of the precedence hierarchy, + /// Parses things like parenthesized exprs, macros, `return`, etc. + /// + /// N.B., this does not parse outer attributes, and is private because it only works + /// correctly if called from `parse_dot_or_call_expr()`. + fn parse_bottom_expr(&mut self) -> PResult<'a, P> { + maybe_recover_from_interpolated_ty_qpath!(self, true); + maybe_whole_expr!(self); + + // Outer attributes are already parsed and will be + // added to the return value after the fact. + // + // Therefore, prevent sub-parser from parsing + // attributes by giving them an empty "already-parsed" list. + let attrs = AttrVec::new(); + + // Note: when adding new syntax here, don't forget to adjust `TokenKind::can_begin_expr()`. + let lo = self.token.span; + if let token::Literal(_) = self.token.kind { + // This match arm is a special-case of the `_` match arm below and + // could be removed without changing functionality, but it's faster + // to have it here, especially for programs with large constants. + self.parse_lit_expr(attrs) + } else if self.check(&token::OpenDelim(Delimiter::Parenthesis)) { + self.parse_tuple_parens_expr(attrs) + } else if self.check(&token::OpenDelim(Delimiter::Brace)) { + self.parse_block_expr(None, lo, BlockCheckMode::Default, attrs) + } else if self.check(&token::BinOp(token::Or)) || self.check(&token::OrOr) { + self.parse_closure_expr(attrs).map_err(|mut err| { + // If the input is something like `if a { 1 } else { 2 } | if a { 3 } else { 4 }` + // then suggest parens around the lhs. + if let Some(sp) = self.sess.ambiguous_block_expr_parse.borrow().get(&lo) { + self.sess.expr_parentheses_needed(&mut err, *sp); + } + err + }) + } else if self.check(&token::OpenDelim(Delimiter::Bracket)) { + self.parse_array_or_repeat_expr(attrs, Delimiter::Bracket) + } else if self.check_path() { + self.parse_path_start_expr(attrs) + } else if self.check_keyword(kw::Move) || self.check_keyword(kw::Static) { + self.parse_closure_expr(attrs) + } else if self.eat_keyword(kw::If) { + self.parse_if_expr(attrs) + } else if self.check_keyword(kw::For) { + if self.choose_generics_over_qpath(1) { + self.parse_closure_expr(attrs) + } else { + assert!(self.eat_keyword(kw::For)); + self.parse_for_expr(None, self.prev_token.span, attrs) + } + } else if self.eat_keyword(kw::While) { + self.parse_while_expr(None, self.prev_token.span, attrs) + } else if let Some(label) = self.eat_label() { + self.parse_labeled_expr(label, attrs, true) + } else if self.eat_keyword(kw::Loop) { + let sp = self.prev_token.span; + self.parse_loop_expr(None, self.prev_token.span, attrs).map_err(|mut err| { + err.span_label(sp, "while parsing this `loop` expression"); + err + }) + } else if self.eat_keyword(kw::Continue) { + let kind = ExprKind::Continue(self.eat_label()); + Ok(self.mk_expr(lo.to(self.prev_token.span), kind, attrs)) + } else if self.eat_keyword(kw::Match) { + let match_sp = self.prev_token.span; + self.parse_match_expr(attrs).map_err(|mut err| { + err.span_label(match_sp, "while parsing this `match` expression"); + err + }) + } else if self.eat_keyword(kw::Unsafe) { + let sp = self.prev_token.span; + self.parse_block_expr(None, lo, BlockCheckMode::Unsafe(ast::UserProvided), attrs) + .map_err(|mut err| { + err.span_label(sp, "while parsing this `unsafe` expression"); + err + }) + } else if self.check_inline_const(0) { + self.parse_const_block(lo.to(self.token.span), false) + } else if self.is_do_catch_block() { + self.recover_do_catch(attrs) + } else if self.is_try_block() { + self.expect_keyword(kw::Try)?; + self.parse_try_block(lo, attrs) + } else if self.eat_keyword(kw::Return) { + self.parse_return_expr(attrs) + } else if self.eat_keyword(kw::Break) { + self.parse_break_expr(attrs) + } else if self.eat_keyword(kw::Yield) { + self.parse_yield_expr(attrs) + } else if self.is_do_yeet() { + self.parse_yeet_expr(attrs) + } else if self.check_keyword(kw::Let) { + self.parse_let_expr(attrs) + } else if self.eat_keyword(kw::Underscore) { + Ok(self.mk_expr(self.prev_token.span, ExprKind::Underscore, attrs)) + } else if !self.unclosed_delims.is_empty() && self.check(&token::Semi) { + // Don't complain about bare semicolons after unclosed braces + // recovery in order to keep the error count down. Fixing the + // delimiters will possibly also fix the bare semicolon found in + // expression context. For example, silence the following error: + // + // error: expected expression, found `;` + // --> file.rs:2:13 + // | + // 2 | foo(bar(; + // | ^ expected expression + self.bump(); + Ok(self.mk_expr_err(self.token.span)) + } else if self.token.uninterpolated_span().rust_2018() { + // `Span::rust_2018()` is somewhat expensive; don't get it repeatedly. + if self.check_keyword(kw::Async) { + if self.is_async_block() { + // Check for `async {` and `async move {`. + self.parse_async_block(attrs) + } else { + self.parse_closure_expr(attrs) + } + } else if self.eat_keyword(kw::Await) { + self.recover_incorrect_await_syntax(lo, self.prev_token.span, attrs) + } else { + self.parse_lit_expr(attrs) + } + } else { + self.parse_lit_expr(attrs) + } + } + + fn parse_lit_expr(&mut self, attrs: AttrVec) -> PResult<'a, P> { + let lo = self.token.span; + match self.parse_opt_lit() { + Some(literal) => { + let expr = self.mk_expr(lo.to(self.prev_token.span), ExprKind::Lit(literal), attrs); + self.maybe_recover_from_bad_qpath(expr) + } + None => self.try_macro_suggestion(), + } + } + + fn parse_tuple_parens_expr(&mut self, attrs: AttrVec) -> PResult<'a, P> { + let lo = self.token.span; + self.expect(&token::OpenDelim(Delimiter::Parenthesis))?; + let (es, trailing_comma) = match self.parse_seq_to_end( + &token::CloseDelim(Delimiter::Parenthesis), + SeqSep::trailing_allowed(token::Comma), + |p| p.parse_expr_catch_underscore(), + ) { + Ok(x) => x, + Err(err) => { + return Ok(self.recover_seq_parse_error(Delimiter::Parenthesis, lo, Err(err))); + } + }; + let kind = if es.len() == 1 && !trailing_comma { + // `(e)` is parenthesized `e`. + ExprKind::Paren(es.into_iter().next().unwrap()) + } else { + // `(e,)` is a tuple with only one field, `e`. + ExprKind::Tup(es) + }; + let expr = self.mk_expr(lo.to(self.prev_token.span), kind, attrs); + self.maybe_recover_from_bad_qpath(expr) + } + + fn parse_array_or_repeat_expr( + &mut self, + attrs: AttrVec, + close_delim: Delimiter, + ) -> PResult<'a, P> { + let lo = self.token.span; + self.bump(); // `[` or other open delim + + let close = &token::CloseDelim(close_delim); + let kind = if self.eat(close) { + // Empty vector + ExprKind::Array(Vec::new()) + } else { + // Non-empty vector + let first_expr = self.parse_expr()?; + if self.eat(&token::Semi) { + // Repeating array syntax: `[ 0; 512 ]` + let count = self.parse_anon_const_expr()?; + self.expect(close)?; + ExprKind::Repeat(first_expr, count) + } else if self.eat(&token::Comma) { + // Vector with two or more elements. + let sep = SeqSep::trailing_allowed(token::Comma); + let (remaining_exprs, _) = self.parse_seq_to_end(close, sep, |p| p.parse_expr())?; + let mut exprs = vec![first_expr]; + exprs.extend(remaining_exprs); + ExprKind::Array(exprs) + } else { + // Vector with one element + self.expect(close)?; + ExprKind::Array(vec![first_expr]) + } + }; + let expr = self.mk_expr(lo.to(self.prev_token.span), kind, attrs); + self.maybe_recover_from_bad_qpath(expr) + } + + fn parse_path_start_expr(&mut self, attrs: AttrVec) -> PResult<'a, P> { + let (qself, path) = if self.eat_lt() { + let (qself, path) = self.parse_qpath(PathStyle::Expr)?; + (Some(qself), path) + } else { + (None, self.parse_path(PathStyle::Expr)?) + }; + let lo = path.span; + + // `!`, as an operator, is prefix, so we know this isn't that. + let (hi, kind) = if self.eat(&token::Not) { + // MACRO INVOCATION expression + if qself.is_some() { + self.struct_span_err(path.span, "macros cannot use qualified paths").emit(); + } + let mac = MacCall { + path, + args: self.parse_mac_args()?, + prior_type_ascription: self.last_type_ascription, + }; + (self.prev_token.span, ExprKind::MacCall(mac)) + } else if self.check(&token::OpenDelim(Delimiter::Brace)) { + if let Some(expr) = self.maybe_parse_struct_expr(qself.as_ref(), &path, &attrs) { + if qself.is_some() { + self.sess.gated_spans.gate(sym::more_qualified_paths, path.span); + } + return expr; + } else { + (path.span, ExprKind::Path(qself, path)) + } + } else { + (path.span, ExprKind::Path(qself, path)) + }; + + let expr = self.mk_expr(lo.to(hi), kind, attrs); + self.maybe_recover_from_bad_qpath(expr) + } + + /// Parse `'label: $expr`. The label is already parsed. + fn parse_labeled_expr( + &mut self, + label: Label, + attrs: AttrVec, + mut consume_colon: bool, + ) -> PResult<'a, P> { + let lo = label.ident.span; + let label = Some(label); + let ate_colon = self.eat(&token::Colon); + let expr = if self.eat_keyword(kw::While) { + self.parse_while_expr(label, lo, attrs) + } else if self.eat_keyword(kw::For) { + self.parse_for_expr(label, lo, attrs) + } else if self.eat_keyword(kw::Loop) { + self.parse_loop_expr(label, lo, attrs) + } else if self.check_noexpect(&token::OpenDelim(Delimiter::Brace)) + || self.token.is_whole_block() + { + self.parse_block_expr(label, lo, BlockCheckMode::Default, attrs) + } else if !ate_colon + && (self.check_noexpect(&TokenKind::Comma) || self.check_noexpect(&TokenKind::Gt)) + { + // We're probably inside of a `Path<'a>` that needs a turbofish + let msg = "expected `while`, `for`, `loop` or `{` after a label"; + self.struct_span_err(self.token.span, msg).span_label(self.token.span, msg).emit(); + consume_colon = false; + Ok(self.mk_expr_err(lo)) + } else { + let msg = "expected `while`, `for`, `loop` or `{` after a label"; + + let mut err = self.struct_span_err(self.token.span, msg); + err.span_label(self.token.span, msg); + + // Continue as an expression in an effort to recover on `'label: non_block_expr`. + let expr = self.parse_expr().map(|expr| { + let span = expr.span; + + let found_labeled_breaks = { + struct FindLabeledBreaksVisitor(bool); + + impl<'ast> Visitor<'ast> for FindLabeledBreaksVisitor { + fn visit_expr_post(&mut self, ex: &'ast Expr) { + if let ExprKind::Break(Some(_label), _) = ex.kind { + self.0 = true; + } + } + } + + let mut vis = FindLabeledBreaksVisitor(false); + vis.visit_expr(&expr); + vis.0 + }; + + // Suggestion involves adding a (as of time of writing this, unstable) labeled block. + // + // If there are no breaks that may use this label, suggest removing the label and + // recover to the unmodified expression. + if !found_labeled_breaks { + let msg = "consider removing the label"; + err.span_suggestion_verbose( + lo.until(span), + msg, + "", + Applicability::MachineApplicable, + ); + + return expr; + } + + let sugg_msg = "consider enclosing expression in a block"; + let suggestions = vec![ + (span.shrink_to_lo(), "{ ".to_owned()), + (span.shrink_to_hi(), " }".to_owned()), + ]; + + err.multipart_suggestion_verbose( + sugg_msg, + suggestions, + Applicability::MachineApplicable, + ); + + // Replace `'label: non_block_expr` with `'label: {non_block_expr}` in order to supress future errors about `break 'label`. + let stmt = self.mk_stmt(span, StmtKind::Expr(expr)); + let blk = self.mk_block(vec![stmt], BlockCheckMode::Default, span); + self.mk_expr(span, ExprKind::Block(blk, label), ThinVec::new()) + }); + + err.emit(); + expr + }?; + + if !ate_colon && consume_colon { + self.error_labeled_expr_must_be_followed_by_colon(lo, expr.span); + } + + Ok(expr) + } + + fn error_labeled_expr_must_be_followed_by_colon(&self, lo: Span, span: Span) { + self.struct_span_err(span, "labeled expression must be followed by `:`") + .span_label(lo, "the label") + .span_suggestion_short( + lo.shrink_to_hi(), + "add `:` after the label", + ": ", + Applicability::MachineApplicable, + ) + .note("labels are used before loops and blocks, allowing e.g., `break 'label` to them") + .emit(); + } + + /// Recover on the syntax `do catch { ... }` suggesting `try { ... }` instead. + fn recover_do_catch(&mut self, attrs: AttrVec) -> PResult<'a, P> { + let lo = self.token.span; + + self.bump(); // `do` + self.bump(); // `catch` + + let span_dc = lo.to(self.prev_token.span); + self.struct_span_err(span_dc, "found removed `do catch` syntax") + .span_suggestion( + span_dc, + "replace with the new syntax", + "try", + Applicability::MachineApplicable, + ) + .note("following RFC #2388, the new non-placeholder syntax is `try`") + .emit(); + + self.parse_try_block(lo, attrs) + } + + /// Parse an expression if the token can begin one. + fn parse_expr_opt(&mut self) -> PResult<'a, Option>> { + Ok(if self.token.can_begin_expr() { Some(self.parse_expr()?) } else { None }) + } + + /// Parse `"return" expr?`. + fn parse_return_expr(&mut self, attrs: AttrVec) -> PResult<'a, P> { + let lo = self.prev_token.span; + let kind = ExprKind::Ret(self.parse_expr_opt()?); + let expr = self.mk_expr(lo.to(self.prev_token.span), kind, attrs); + self.maybe_recover_from_bad_qpath(expr) + } + + /// Parse `"do" "yeet" expr?`. + fn parse_yeet_expr(&mut self, attrs: AttrVec) -> PResult<'a, P> { + let lo = self.token.span; + + self.bump(); // `do` + self.bump(); // `yeet` + + let kind = ExprKind::Yeet(self.parse_expr_opt()?); + + let span = lo.to(self.prev_token.span); + self.sess.gated_spans.gate(sym::yeet_expr, span); + let expr = self.mk_expr(span, kind, attrs); + self.maybe_recover_from_bad_qpath(expr) + } + + /// Parse `"break" (('label (:? expr)?) | expr?)` with `"break"` token already eaten. + /// If the label is followed immediately by a `:` token, the label and `:` are + /// parsed as part of the expression (i.e. a labeled loop). The language team has + /// decided in #87026 to require parentheses as a visual aid to avoid confusion if + /// the break expression of an unlabeled break is a labeled loop (as in + /// `break 'lbl: loop {}`); a labeled break with an unlabeled loop as its value + /// expression only gets a warning for compatibility reasons; and a labeled break + /// with a labeled loop does not even get a warning because there is no ambiguity. + fn parse_break_expr(&mut self, attrs: AttrVec) -> PResult<'a, P> { + let lo = self.prev_token.span; + let mut label = self.eat_label(); + let kind = if label.is_some() && self.token == token::Colon { + // The value expression can be a labeled loop, see issue #86948, e.g.: + // `loop { break 'label: loop { break 'label 42; }; }` + let lexpr = self.parse_labeled_expr(label.take().unwrap(), AttrVec::new(), true)?; + self.struct_span_err( + lexpr.span, + "parentheses are required around this expression to avoid confusion with a labeled break expression", + ) + .multipart_suggestion( + "wrap the expression in parentheses", + vec![ + (lexpr.span.shrink_to_lo(), "(".to_string()), + (lexpr.span.shrink_to_hi(), ")".to_string()), + ], + Applicability::MachineApplicable, + ) + .emit(); + Some(lexpr) + } else if self.token != token::OpenDelim(Delimiter::Brace) + || !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL) + { + let expr = self.parse_expr_opt()?; + if let Some(ref expr) = expr { + if label.is_some() + && matches!( + expr.kind, + ExprKind::While(_, _, None) + | ExprKind::ForLoop(_, _, _, None) + | ExprKind::Loop(_, None) + | ExprKind::Block(_, None) + ) + { + self.sess.buffer_lint_with_diagnostic( + BREAK_WITH_LABEL_AND_LOOP, + lo.to(expr.span), + ast::CRATE_NODE_ID, + "this labeled break expression is easy to confuse with an unlabeled break with a labeled value expression", + BuiltinLintDiagnostics::BreakWithLabelAndLoop(expr.span), + ); + } + } + expr + } else { + None + }; + let expr = self.mk_expr(lo.to(self.prev_token.span), ExprKind::Break(label, kind), attrs); + self.maybe_recover_from_bad_qpath(expr) + } + + /// Parse `"yield" expr?`. + fn parse_yield_expr(&mut self, attrs: AttrVec) -> PResult<'a, P> { + let lo = self.prev_token.span; + let kind = ExprKind::Yield(self.parse_expr_opt()?); + let span = lo.to(self.prev_token.span); + self.sess.gated_spans.gate(sym::generators, span); + let expr = self.mk_expr(span, kind, attrs); + self.maybe_recover_from_bad_qpath(expr) + } + + /// Returns a string literal if the next token is a string literal. + /// In case of error returns `Some(lit)` if the next token is a literal with a wrong kind, + /// and returns `None` if the next token is not literal at all. + pub fn parse_str_lit(&mut self) -> Result> { + match self.parse_opt_lit() { + Some(lit) => match lit.kind { + ast::LitKind::Str(symbol_unescaped, style) => Ok(ast::StrLit { + style, + symbol: lit.token.symbol, + suffix: lit.token.suffix, + span: lit.span, + symbol_unescaped, + }), + _ => Err(Some(lit)), + }, + None => Err(None), + } + } + + pub(super) fn parse_lit(&mut self) -> PResult<'a, Lit> { + self.parse_opt_lit().ok_or_else(|| { + if let token::Interpolated(inner) = &self.token.kind { + let expr = match inner.as_ref() { + token::NtExpr(expr) => Some(expr), + token::NtLiteral(expr) => Some(expr), + _ => None, + }; + if let Some(expr) = expr { + if matches!(expr.kind, ExprKind::Err) { + let mut err = self + .diagnostic() + .struct_span_err(self.token.span, "invalid interpolated expression"); + err.downgrade_to_delayed_bug(); + return err; + } + } + } + let msg = format!("unexpected token: {}", super::token_descr(&self.token)); + self.struct_span_err(self.token.span, &msg) + }) + } + + /// Matches `lit = true | false | token_lit`. + /// Returns `None` if the next token is not a literal. + pub(super) fn parse_opt_lit(&mut self) -> Option { + let mut recovered = None; + if self.token == token::Dot { + // Attempt to recover `.4` as `0.4`. We don't currently have any syntax where + // dot would follow an optional literal, so we do this unconditionally. + recovered = self.look_ahead(1, |next_token| { + if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) = + next_token.kind + { + if self.token.span.hi() == next_token.span.lo() { + let s = String::from("0.") + symbol.as_str(); + let kind = TokenKind::lit(token::Float, Symbol::intern(&s), suffix); + return Some(Token::new(kind, self.token.span.to(next_token.span))); + } + } + None + }); + if let Some(token) = &recovered { + self.bump(); + self.error_float_lits_must_have_int_part(&token); + } + } + + let token = recovered.as_ref().unwrap_or(&self.token); + match Lit::from_token(token) { + Ok(lit) => { + self.bump(); + Some(lit) + } + Err(LitError::NotLiteral) => None, + Err(err) => { + let span = token.span; + let token::Literal(lit) = token.kind else { + unreachable!(); + }; + self.bump(); + self.report_lit_error(err, lit, span); + // Pack possible quotes and prefixes from the original literal into + // the error literal's symbol so they can be pretty-printed faithfully. + let suffixless_lit = token::Lit::new(lit.kind, lit.symbol, None); + let symbol = Symbol::intern(&suffixless_lit.to_string()); + let lit = token::Lit::new(token::Err, symbol, lit.suffix); + Some(Lit::from_lit_token(lit, span).unwrap_or_else(|_| unreachable!())) + } + } + } + + fn error_float_lits_must_have_int_part(&self, token: &Token) { + self.struct_span_err(token.span, "float literals must have an integer part") + .span_suggestion( + token.span, + "must have an integer part", + pprust::token_to_string(token), + Applicability::MachineApplicable, + ) + .emit(); + } + + fn report_lit_error(&self, err: LitError, lit: token::Lit, span: Span) { + // Checks if `s` looks like i32 or u1234 etc. + fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { + s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) + } + + // Try to lowercase the prefix if it's a valid base prefix. + fn fix_base_capitalisation(s: &str) -> Option { + if let Some(stripped) = s.strip_prefix('B') { + Some(format!("0b{stripped}")) + } else if let Some(stripped) = s.strip_prefix('O') { + Some(format!("0o{stripped}")) + } else if let Some(stripped) = s.strip_prefix('X') { + Some(format!("0x{stripped}")) + } else { + None + } + } + + let token::Lit { kind, suffix, .. } = lit; + match err { + // `NotLiteral` is not an error by itself, so we don't report + // it and give the parser opportunity to try something else. + LitError::NotLiteral => {} + // `LexerError` *is* an error, but it was already reported + // by lexer, so here we don't report it the second time. + LitError::LexerError => {} + LitError::InvalidSuffix => { + self.expect_no_suffix( + span, + &format!("{} {} literal", kind.article(), kind.descr()), + suffix, + ); + } + LitError::InvalidIntSuffix => { + let suf = suffix.expect("suffix error with no suffix"); + let suf = suf.as_str(); + if looks_like_width_suffix(&['i', 'u'], &suf) { + // If it looks like a width, try to be helpful. + let msg = format!("invalid width `{}` for integer literal", &suf[1..]); + self.struct_span_err(span, &msg) + .help("valid widths are 8, 16, 32, 64 and 128") + .emit(); + } else if let Some(fixed) = fix_base_capitalisation(suf) { + let msg = "invalid base prefix for number literal"; + + self.struct_span_err(span, msg) + .note("base prefixes (`0xff`, `0b1010`, `0o755`) are lowercase") + .span_suggestion( + span, + "try making the prefix lowercase", + fixed, + Applicability::MaybeIncorrect, + ) + .emit(); + } else { + let msg = format!("invalid suffix `{suf}` for number literal"); + self.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{suf}`")) + .help("the suffix must be one of the numeric types (`u32`, `isize`, `f32`, etc.)") + .emit(); + } + } + LitError::InvalidFloatSuffix => { + let suf = suffix.expect("suffix error with no suffix"); + let suf = suf.as_str(); + if looks_like_width_suffix(&['f'], suf) { + // If it looks like a width, try to be helpful. + let msg = format!("invalid width `{}` for float literal", &suf[1..]); + self.struct_span_err(span, &msg).help("valid widths are 32 and 64").emit(); + } else { + let msg = format!("invalid suffix `{suf}` for float literal"); + self.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{suf}`")) + .help("valid suffixes are `f32` and `f64`") + .emit(); + } + } + LitError::NonDecimalFloat(base) => { + let descr = match base { + 16 => "hexadecimal", + 8 => "octal", + 2 => "binary", + _ => unreachable!(), + }; + self.struct_span_err(span, &format!("{descr} float literal is not supported")) + .span_label(span, "not supported") + .emit(); + } + LitError::IntTooLarge => { + self.struct_span_err(span, "integer literal is too large").emit(); + } + } + } + + pub(super) fn expect_no_suffix(&self, sp: Span, kind: &str, suffix: Option) { + if let Some(suf) = suffix { + let mut err = if kind == "a tuple index" + && [sym::i32, sym::u32, sym::isize, sym::usize].contains(&suf) + { + // #59553: warn instead of reject out of hand to allow the fix to percolate + // through the ecosystem when people fix their macros + let mut err = self + .sess + .span_diagnostic + .struct_span_warn(sp, &format!("suffixes on {kind} are invalid")); + err.note(&format!( + "`{}` is *temporarily* accepted on tuple index fields as it was \ + incorrectly accepted on stable for a few releases", + suf, + )); + err.help( + "on proc macros, you'll want to use `syn::Index::from` or \ + `proc_macro::Literal::*_unsuffixed` for code that will desugar \ + to tuple field access", + ); + err.note( + "see issue #60210 \ + for more information", + ); + err + } else { + self.struct_span_err(sp, &format!("suffixes on {kind} are invalid")) + .forget_guarantee() + }; + err.span_label(sp, format!("invalid suffix `{suf}`")); + err.emit(); + } + } + + /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`). + /// Keep this in sync with `Token::can_begin_literal_maybe_minus`. + pub fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P> { + maybe_whole_expr!(self); + + let lo = self.token.span; + let minus_present = self.eat(&token::BinOp(token::Minus)); + let lit = self.parse_lit()?; + let expr = self.mk_expr(lit.span, ExprKind::Lit(lit), AttrVec::new()); + + if minus_present { + Ok(self.mk_expr( + lo.to(self.prev_token.span), + self.mk_unary(UnOp::Neg, expr), + AttrVec::new(), + )) + } else { + Ok(expr) + } + } + + fn is_array_like_block(&mut self) -> bool { + self.look_ahead(1, |t| matches!(t.kind, TokenKind::Ident(..) | TokenKind::Literal(_))) + && self.look_ahead(2, |t| t == &token::Comma) + && self.look_ahead(3, |t| t.can_begin_expr()) + } + + /// Emits a suggestion if it looks like the user meant an array but + /// accidentally used braces, causing the code to be interpreted as a block + /// expression. + fn maybe_suggest_brackets_instead_of_braces( + &mut self, + lo: Span, + attrs: AttrVec, + ) -> Option> { + let mut snapshot = self.create_snapshot_for_diagnostic(); + match snapshot.parse_array_or_repeat_expr(attrs, Delimiter::Brace) { + Ok(arr) => { + let hi = snapshot.prev_token.span; + self.struct_span_err(arr.span, "this is a block expression, not an array") + .multipart_suggestion( + "to make an array, use square brackets instead of curly braces", + vec![(lo, "[".to_owned()), (hi, "]".to_owned())], + Applicability::MaybeIncorrect, + ) + .emit(); + + self.restore_snapshot(snapshot); + Some(self.mk_expr_err(arr.span)) + } + Err(e) => { + e.cancel(); + None + } + } + } + + /// Parses a block or unsafe block. + pub(super) fn parse_block_expr( + &mut self, + opt_label: Option