From e0192f48c9a19aadec88742de2647de7d16b2a6c Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sun, 5 May 2024 12:23:37 -0700 Subject: [PATCH 1/3] compiler: Privatize `Parser::current_closure` This was added as pub in 2021 and remains only privately used in 2024! --- compiler/rustc_parse/src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 7486da33b2173..ec025b44b7b10 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -170,7 +170,7 @@ pub struct Parser<'a> { capture_state: CaptureState, /// This allows us to recover when the user forget to add braces around /// multiple statements in the closure body. - pub current_closure: Option, + current_closure: Option, /// Whether the parser is allowed to do recovery. /// This is disabled when parsing macro arguments, see #103534 pub recovery: Recovery, From c70290da0a27a4873823c0381ac698c6e7ed8b65 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sun, 5 May 2024 12:44:40 -0700 Subject: [PATCH 2/3] compiler: derive Debug in parser It's annoying to debug the parser if you have to stop every five seconds to add a Debug impl. --- compiler/rustc_ast/src/tokenstream.rs | 2 +- compiler/rustc_parse/src/parser/mod.rs | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index aadcfa7fed594..8e80161af1bf4 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -706,7 +706,7 @@ impl<'t> Iterator for RefTokenTreeCursor<'t> { /// involve associated types) for getting individual elements, or /// `RefTokenTreeCursor` if you really want an `Iterator`, e.g. in a `for` /// loop. -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct TokenTreeCursor { pub stream: TokenStream, index: usize, diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index ec025b44b7b10..2f688c60765a5 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -19,6 +19,7 @@ pub(crate) use item::FnParseMode; pub use pat::{CommaRecoveryMode, RecoverColon, RecoverComma}; pub use path::PathStyle; +use core::fmt; use rustc_ast::ptr::P; use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::{AttributesData, DelimSpacing, DelimSpan, Spacing}; @@ -46,7 +47,7 @@ use crate::errors::{ }; bitflags::bitflags! { - #[derive(Clone, Copy)] + #[derive(Clone, Copy, Debug)] struct Restrictions: u8 { const STMT_EXPR = 1 << 0; const NO_STRUCT_LITERAL = 1 << 1; @@ -72,7 +73,7 @@ enum BlockMode { /// Whether or not we should force collection of tokens for an AST node, /// regardless of whether or not it has attributes -#[derive(Clone, Copy, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq)] pub enum ForceCollect { Yes, No, @@ -120,7 +121,7 @@ macro_rules! maybe_recover_from_interpolated_ty_qpath { }; } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub enum Recovery { Allowed, Forbidden, @@ -182,7 +183,7 @@ pub struct Parser<'a> { rustc_data_structures::static_assert_size!(Parser<'_>, 264); /// Stores span information about a closure. -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct ClosureSpans { pub whole_closure: Span, pub closing_pipe: Span, @@ -211,7 +212,7 @@ pub type ReplaceRange = (Range, Vec<(FlatToken, Spacing)>); /// Controls how we capture tokens. Capturing can be expensive, /// so we try to avoid performing capturing in cases where /// we will never need an `AttrTokenStream`. -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub enum Capturing { /// We aren't performing any capturing - this is the default mode. No, @@ -219,7 +220,7 @@ pub enum Capturing { Yes, } -#[derive(Clone)] +#[derive(Clone, Debug)] struct CaptureState { capturing: Capturing, replace_ranges: Vec, @@ -230,7 +231,7 @@ struct CaptureState { /// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b) /// use this type to emit them as a linear sequence. But a linear sequence is /// what the parser expects, for the most part. -#[derive(Clone)] +#[derive(Clone, Debug)] struct TokenCursor { // Cursor for the current (innermost) token stream. The delimiters for this // token stream are found in `self.stack.last()`; when that is `None` then @@ -335,6 +336,7 @@ enum TokenExpectType { } /// A sequence separator. +#[derive(Debug)] struct SeqSep { /// The separator token. sep: Option, @@ -352,6 +354,7 @@ impl SeqSep { } } +#[derive(Debug)] pub enum FollowedByType { Yes, No, @@ -376,7 +379,7 @@ pub enum Trailing { Yes, } -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum TokenDescription { ReservedIdentifier, Keyword, From 5e67a3783c89ebb1f611e0351c9a36a579e49cec Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sun, 5 May 2024 15:41:00 -0700 Subject: [PATCH 3/3] compiler: add `Parser::debug_lookahead` I tried debugging a parser-related issue but found it annoying to not be able to easily peek into the Parser's token stream. Add a convenience fn that offers an opinionated view into the parser, but one that is useful for answering basic questions about parser state. --- compiler/rustc_parse/src/lib.rs | 1 + compiler/rustc_parse/src/parser/mod.rs | 41 ++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs index a46372d368f42..0f973dfcd7969 100644 --- a/compiler/rustc_parse/src/lib.rs +++ b/compiler/rustc_parse/src/lib.rs @@ -5,6 +5,7 @@ #![allow(rustc::untranslatable_diagnostic)] #![feature(array_windows)] #![feature(box_patterns)] +#![feature(debug_closure_helpers)] #![feature(if_let_guard)] #![feature(iter_intersperse)] #![feature(let_chains)] diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 2f688c60765a5..381250bd7d770 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -1537,6 +1537,47 @@ impl<'a> Parser<'a> { }) } + // debug view of the parser's token stream, up to `{lookahead}` tokens + pub fn debug_lookahead(&self, lookahead: usize) -> impl fmt::Debug + '_ { + struct DebugParser<'dbg> { + parser: &'dbg Parser<'dbg>, + lookahead: usize, + } + + impl fmt::Debug for DebugParser<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { parser, lookahead } = self; + let mut dbg_fmt = f.debug_struct("Parser"); // or at least, one view of + + // we don't need N spans, but we want at least one, so print all of prev_token + dbg_fmt.field("prev_token", &parser.prev_token); + // make it easier to peek farther ahead by taking TokenKinds only until EOF + let tokens = (0..*lookahead) + .map(|i| parser.look_ahead(i, |tok| tok.kind.clone())) + .scan(parser.prev_token == TokenKind::Eof, |eof, tok| { + let current = eof.then_some(tok.clone()); // include a trailing EOF token + *eof |= &tok == &TokenKind::Eof; + current + }); + dbg_fmt.field_with("tokens", |field| field.debug_list().entries(tokens).finish()); + dbg_fmt.field("approx_token_stream_pos", &parser.num_bump_calls); + + // some fields are interesting for certain values, as they relate to macro parsing + if let Some(subparser) = parser.subparser_name { + dbg_fmt.field("subparser_name", &subparser); + } + if let Recovery::Forbidden = parser.recovery { + dbg_fmt.field("recovery", &parser.recovery); + } + + // imply there's "more to know" than this view + dbg_fmt.finish_non_exhaustive() + } + } + + DebugParser { parser: self, lookahead } + } + pub fn clear_expected_tokens(&mut self) { self.expected_tokens.clear(); }