From 061312cf8f3762a2ca07b938748a32aa8a1159b1 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Wed, 4 Sep 2024 15:32:12 +0200 Subject: [PATCH] Document wtf is going on with parsing macros --- ptx_parser/src/ast.rs | 15 ++++++++ ptx_parser/src/lib.rs | 69 +++++++++++++++++++++++++++++++----- ptx_parser_macros/src/lib.rs | 2 +- 3 files changed, 77 insertions(+), 9 deletions(-) diff --git a/ptx_parser/src/ast.rs b/ptx_parser/src/ast.rs index ad44ab7a..d0dc303c 100644 --- a/ptx_parser/src/ast.rs +++ b/ptx_parser/src/ast.rs @@ -13,6 +13,21 @@ pub enum Statement { Block(Vec>), } +// We define the instruction enum through the macro instead of normally, because we have some of how +// we use this type in the compilee. Each instruction can be logically split into two parts: +// properties that define instruction semantics (e.g. is memory load volatile?) that don't change +// during compilation and arguments (e.g. memory load source and destination) that evolve during +// compilation. To support compilation passes we need to be able to visit (and change) every +// argument in a generic way. This macro has visibility over all the fields. Consequently, we use it +// to generate visitor functions. There re three functions to support three different semantics: +// visit-by-ref, visit-by-mutable-ref, visit-and-map. In a previous version of the compiler it was +// done by hand and was very limiting (we supported only visit-and-map). +// The visitor must implement appropriate visitor trait defined below this macro. For convenience, +// we implemented visitors for some corresponding FnMut(...) types. +// Properties in this macro are used to encode information about the instruction arguments (what +// Rust type is used for it post-parsing, what PTX type does it expect, what PTX address space does +// it expect, etc.). +// This information is then available to a visitor. ptx_parser_macros::generate_instruction_type!( pub enum Instruction { Mov { diff --git a/ptx_parser/src/lib.rs b/ptx_parser/src/lib.rs index ed2cf2ae..f842ace6 100644 --- a/ptx_parser/src/lib.rs +++ b/ptx_parser/src/lib.rs @@ -1335,14 +1335,6 @@ fn empty_call<'input>( } } -// Modifiers are turned into arguments to the blocks, with type: -// * If it is an alternative: -// * If it is mandatory then its type is Foo (as defined by the relevant rule) -// * If it is optional then its type is Option -// * Otherwise: -// * If it is mandatory then it is skipped -// * If it is optional then its type is `bool` - type ParsedOperandStr<'input> = ast::ParsedOperand<&'input str>; #[derive(Clone, PartialEq, Default, Debug, Display)] @@ -1351,6 +1343,67 @@ pub struct TokenError(std::ops::Range); impl std::error::Error for TokenError {} +// This macro is responsible for generating parser code for instruction parser. +// Instruction parsing is by far the most complex part of parsing PTX code: +// * There are tens of instruction kinds, each with slightly different parsing rules +// * After parsing, each instruction needs to do some early validation and generate a specific, +// strongly-typed object. We want strong-typing because we have a single PTX parser frontend, but +// there can be multiple different code emitter backends +// * Most importantly, instruction modifiers can come in aby order, so e.g. both +// `ld.relaxed.global.u32 a, b` and `ld.global.relaxed.u32 a, b` are equally valid. This makes +// classic parsing generators fail: if we tried to generate parsing rules that cover every possible +// ordering we'd need thousands of rules. This is not a purely theoretical problem. NVCC and Clang +// will always emit modifiers in the correct order, but people who write inline assembly usually +// get it wrong (even first party developers) +// +// This macro exists purely to generate repetitive code for parsing each instruction. It is +// _not_ self-contained and is _not_ general-purpose: it relies on certain types and functions from +// the enclosing module +// +// derive_parser!(...) input is split into three parts: +// * Token type definition +// * Partial enums +// * Parsing definitions +// +// Token type definition: +// This is the enum type that will be usesby the instruction parser. For every instruction and +// modifier, derive_parser!(...) will add appropriate variant into this type. So e.g. if there is a +// rule for for `bar.sync` then those two variants wil be appended to the Token enum: +// #[token("bar")] Bar, +// #[token(".sync")] DotSync, +// +// Partial enums: +// With proper annotations, derive_parser!(...) parsing definitions are able to interpret +// instruction modifiers as variants of a single enum type. So e.g. for definitions `ld.u32` and +// `ld.u64` the macro can generate `enum ScalarType { U32, U64 }`. The problem is that for some +// (but not all) of those generated enum types we want to add some attributes and additional +// variants. In order to do so, you need to define this enum and derive_parser!(...) will append to +// the type instead of creating a new type. This is sort of replacement for partial classes known +// from C# +// +// Parsing definitions: +// Parsing definitions consist of a list of patterns and rules: +// * Pattern consists of: +// * Opcode: `ld` +// * Modifiers, always start with a dot: `.global`, `.relaxed`. Optionals are enclosed in braces +// * Arguments: `a`, `b`. Optionals are enclosed in braces +// * Code block: => { }. Code blocks implictly take all modifiers ansd arguments +// as parameters. All modifiers and arguments are passed to the code block: +// * If it is an alternative (as defined in rules list later): +// * If it is mandatory then its type is Foo (as defined by the relevant rule) +// * If it is optional then its type is Option +// * Otherwise: +// * If it is mandatory then it is skipped +// * If it is optional then its type is `bool` +// * List of rules. They are associated with the preceding patterns (until different opcode or +// different rules). Rules are used to resolve modifiers. There are two types of rules: +// * Normal rule: `.foobar: FoobarEnum => { .a, .b, .c }`. This means that instead of `.foobar` we +// expecte one of `.a`, `.b`, `.c` and will emit value FoobarEnum::DotA, FoobarEnum::DotB, +// FoobarEnum::DotC appropriately +// * Type-only rule: `FoobarEnum => { .a, .b, .c }` this means that all the occurences of `.a` will +// emit FoobarEnum::DotA to the code block. This helps to avoid copy-paste errors +// Additionally, you can opt out from the usual parsing rule generation with a special `<=` pattern. +// See `call` instruction to see it in action derive_parser!( #[derive(Logos, PartialEq, Eq, Debug, Clone, Copy)] #[logos(skip r"(?:\s+)|(?://[^\n\r]*[\n\r]*)|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)")] diff --git a/ptx_parser_macros/src/lib.rs b/ptx_parser_macros/src/lib.rs index a2f8396f..5f47fac7 100644 --- a/ptx_parser_macros/src/lib.rs +++ b/ptx_parser_macros/src/lib.rs @@ -16,7 +16,7 @@ use syn::{ // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#packed-integer-data-types #[rustfmt::skip] static POSTFIX_MODIFIERS: &[&str] = &[ - ".v2", ".v4", + ".v2", ".v4", ".v8", ".s8", ".s16", ".s16x2", ".s32", ".s64", ".u8", ".u16", ".u16x2", ".u32", ".u64", ".f16", ".f16x2", ".f32", ".f64",