Skip to content

Commit

Permalink
Reduce scope of keywords during lexing
Browse files Browse the repository at this point in the history
This means keywords in one section (i.e. "name") may be used as ids in other sections.
  • Loading branch information
asmaloney committed Jan 20, 2024
1 parent 411d148 commit 37ddf2e
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 47 deletions.
13 changes: 13 additions & 0 deletions amod/amod_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,16 @@ func Example_proceduralFieldUnrecognized() {
// Output:
// ERROR: unrecognized option "foo" in procedural config (line 6, col 15)
}

// Tests that we can use a keyword from one section as an id in another
func Example_keywordInDifferentSection() {
generateToStdout(`
~~ model ~~
name: Test
~~ config ~~
chunks { [name: first last] }
~~ init ~~
~~ productions ~~`)

// Output:
}
116 changes: 88 additions & 28 deletions amod/lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package amod
import (
"fmt"
"io"
"slices"
"strings"
"unicode"
"unicode/utf8"
Expand Down Expand Up @@ -99,18 +100,31 @@ type lexeme struct {
pos int // position within the line
}

// sectionType is used to keep track of what section we are lexing
// We use this to limit the scope of keywords.
type sectionType int

const (
sectionModel sectionType = iota
sectionConfig
sectionInit
sectionProduction
)

// lexer_amod tracks our lexing and provides a channel to emit lexemes
type lexer_amod struct {
name string // used only for error reports
input string // the string being scanned.
line int // the line number
lastNewlinePos int
start int // start position of this lexeme (offset from beginning of file)
pos int // current position in the input (offset from beginning of file)
width int // width of last rune read from input
lexemes chan lexeme // channel of scanned lexemes
keywords map[string]bool // used to lookup identifier to see if they are keywords
inPattern bool // state: a pattern - delimited by [] is lexed specially
start int // start position of this lexeme (offset from beginning of file)
pos int // current position in the input (offset from beginning of file)
width int // width of last rune read from input
lexemes chan lexeme // channel of scanned lexemes

inSectionHeader bool // state: switch currentSection based on ~~ section headers
currentSection sectionType // which section are we lexing? used to switch out keywords
inPattern bool // state: a pattern - delimited by [] is lexed specially
}

// stateFn is used to move through the lexing states
Expand All @@ -122,26 +136,40 @@ const (
commentDelim = "//"
)

var keywords []string = []string{
// keywordsModel are only keywords for the model section
var keywordsModel []string = []string{
"authors",
"description",
"examples",
"name",
}

// keywordsModel are only keywords for the config section
var keywordsConfig []string = []string{
"chunks",
"gactar",
"modules",
}

// keywordsModel are only keywords for the init section
var keywordsInit []string = []string{
"similar",
}

// keywordsModel are only keywords for the productions section
var keywordsProductions []string = []string{
"and",
"any",
"authors",
"buffer_state",
"chunks",
"clear",
"description",
"do",
"examples",
"gactar",
"match",
"module_state",
"modules",
"name",
"nil",
"print",
"recall",
"set",
"similar",
"stop",
"to",
"when",
Expand Down Expand Up @@ -185,17 +213,14 @@ func lex(filename string, data string) *lexer_amod {
cleanData(&data)

l := &lexer_amod{
name: filename,
input: data,
line: 1,
lastNewlinePos: 1, // start @ 1 so first line gets 0 (see emit())
lexemes: make(chan lexeme),
keywords: make(map[string]bool),
inPattern: false,
}

for _, v := range keywords {
l.keywords[v] = true
name: filename,
input: data,
line: 1,
lastNewlinePos: 1, // start @ 1 so first line gets 0 (see emit())
lexemes: make(chan lexeme),
currentSection: sectionModel,
inSectionHeader: false,
inPattern: false,
}

go l.run()
Expand Down Expand Up @@ -252,9 +277,20 @@ func (l *lexer_amod) next() rune {
return r
}

// lookupKeyword looks up "id" to see if it is a keyword based on which section we are lexing
func (l *lexer_amod) lookupKeyword(id string) bool {
v, ok := l.keywords[id]
return v && ok
switch l.currentSection {
case sectionModel:
return slices.Contains(keywordsModel, id)
case sectionConfig:
return slices.Contains(keywordsConfig, id)
case sectionInit:
return slices.Contains(keywordsInit, id)
case sectionProduction:
return slices.Contains(keywordsProductions, id)
}

return false
}

// skip over the pending input before this point
Expand Down Expand Up @@ -428,6 +464,7 @@ func lexStart(l *lexer_amod) stateFn {
if l.nextIs('~') {
l.next()
l.emit(lexemeSectionDelim)
l.inSectionHeader = !l.inSectionHeader
} else {
l.emit(lexemeChar)
}
Expand Down Expand Up @@ -495,9 +532,32 @@ func lexIdentifier(l *lexer_amod) stateFn {
l.next()
}

id := l.input[l.start:l.pos]
isKeyword := false

// If we are in a section header, then change our current section
if l.inSectionHeader {
switch id {
case "model":
l.currentSection = sectionModel
case "config":
l.currentSection = sectionConfig
case "init":
l.currentSection = sectionInit
case "productions":
l.currentSection = sectionProduction
default:
return l.errorf("unrecognized section")
}

// these are keywords in this context
isKeyword = true
} else {
isKeyword = l.lookupKeyword(id)
}

// Perhaps not the best way to do this.
// I'm sure there's a char-by-char way we could implement which would be faster.
isKeyword := l.lookupKeyword(l.input[l.start:l.pos])
switch {
case isKeyword:
l.emit(lexemeKeyword)
Expand Down
9 changes: 9 additions & 0 deletions amod/lex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ func TestInvalidSection(t *testing.T) {
if token.Type != lexer.TokenType(lexemeSectionDelim) {
t.Errorf("expected to lex '%s' as section delimiter (%d) - got type %d", token.Value, lexemeSectionDelim, token.Type)
}

expected := "ERROR on line 1 at position 9: unrecognized section"

token, err = l.Next()
if err == nil {
t.Errorf("expected error: %q", expected)
} else if err.Error() != expected {
t.Errorf("expected error: %q but got %q", expected, err.Error())
}
}

func TestUnterminatedQuote(t *testing.T) {
Expand Down
38 changes: 19 additions & 19 deletions amod/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,26 @@ import (
// paste in the generated EBNF above, click "Convert" and then click "View Diagram"

type amodFile struct {
ModelHeader string `parser:"'~~':SectionDelim 'model' '~~':SectionDelim"`
ModelHeader string `parser:"'~~':SectionDelim 'model':Keyword '~~':SectionDelim"`
Model *modelSection `parser:"@@"`

ConfigHeader string `parser:"'~~':SectionDelim 'config' '~~':SectionDelim"`
ConfigHeader string `parser:"'~~':SectionDelim 'config':Keyword '~~':SectionDelim"`
Config *configSection `parser:"(@@)?"`

InitHeader string `parser:"'~~':SectionDelim 'init' '~~':SectionDelim"`
InitHeader string `parser:"'~~':SectionDelim 'init':Keyword '~~':SectionDelim"`
Init *initSection `parser:"(@@)?"`

ProductionsHeader string `parser:"'~~':SectionDelim 'productions' '~~':SectionDelim"`
ProductionsHeader string `parser:"'~~':SectionDelim 'productions':Keyword '~~':SectionDelim"`
Productions *productionSection `parser:"(@@)?"`

Tokens []lexer.Token
}

type modelSection struct {
Name string `parser:"'name' ':' (@String|@Ident)"`
Description string `parser:"('description' ':' @String)?"`
Authors []string `parser:"('authors' '{' @String* '}')?"`
Examples []*pattern `parser:"('examples' '{' @@* '}')?"`
Name string `parser:"'name':Keyword ':' (@String|@Ident)"`
Description string `parser:"('description':Keyword ':' @String)?"`
Authors []string `parser:"('authors':Keyword '{' @String* '}')?"`
Examples []*pattern `parser:"('examples':Keyword '{' @@* '}')?"`

Tokens []lexer.Token
}
Expand Down Expand Up @@ -165,7 +165,7 @@ type field struct {
}

type gactarConfig struct {
GactarFields []*field `parser:"'gactar' '{' @@* '}'"`
GactarFields []*field `parser:"'gactar':Keyword '{' @@* '}'"`

Tokens []lexer.Token
}
Expand All @@ -178,7 +178,7 @@ type module struct {
}

type moduleConfig struct {
Modules []*module `parser:"'modules' '{' @@* '}'"`
Modules []*module `parser:"'modules':Keyword '{' @@* '}'"`

Tokens []lexer.Token
}
Expand All @@ -193,7 +193,7 @@ type chunkDecl struct {
}

type chunkConfig struct {
ChunkDecls []*chunkDecl `parser:"'chunks' '{' @@* '}'"`
ChunkDecls []*chunkDecl `parser:"'chunks':Keyword '{' @@* '}'"`

Tokens []lexer.Token
}
Expand Down Expand Up @@ -347,19 +347,19 @@ type matchItem struct {
}

type match struct {
Items []*matchItem `parser:"'match' '{' @@+ '}'"`
Items []*matchItem `parser:"'match':Keyword '{' @@+ '}'"`

Tokens []lexer.Token
}

type clearStatement struct {
BufferNames []string `parser:"'clear' ( @Ident ','? )+"`
BufferNames []string `parser:"'clear':Keyword ( @Ident ','? )+"`

Tokens []lexer.Token
}

type printStatement struct {
Args []*printArg `parser:"'print' ( @@ ','? )*"`
Args []*printArg `parser:"'print':Keyword ( @@ ','? )*"`

Tokens []lexer.Token
}
Expand All @@ -381,17 +381,17 @@ type withClause struct {
}

type recallStatement struct {
Pattern *pattern `parser:"'recall' @@"`
Pattern *pattern `parser:"'recall':Keyword @@"`
With *withClause `parser:"@@?"`

Tokens []lexer.Token
}

type setStatement struct {
Set string `parser:"'set'"` // not used, but must be visible for parse to work
Set string `parser:"'set':Keyword"` // not used, but must be visible for parse to work
BufferRef bufferRef `parser:"@@"`

To string `parser:"'to'"` // not used, but must be visible for parse to work
To string `parser:"'to':Keyword"` // not used, but must be visible for parse to work
Value *setArg `parser:"( @@"`
Pattern *pattern `parser:"| @@)"`

Expand All @@ -415,15 +415,15 @@ type statement struct {
}

type do struct {
Do string `parser:"'do'"` // not used, but must be visible for parse to work
Do string `parser:"'do':Keyword"` // not used, but must be visible for parse to work
Statements *[]*statement `parser:"'{' @@+ '}'"`

Tokens []lexer.Token
}

type production struct {
Name string `parser:"@Ident '{'"`
Description *string `parser:"('description' ':' @String)?"`
Description *string `parser:"('description':Keyword ':' @String)?"`
Match *match `parser:"@@"`
Do *do `parser:"@@"`
End string `parser:"'}'"` // not used, but must be visible for parse to work
Expand Down

0 comments on commit 37ddf2e

Please sign in to comment.