-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.rkt
183 lines (160 loc) · 5.19 KB
/
parse.rkt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#lang racket
; TODO:
; - Remove `char-except` in place of something a bit more flexible.
; - Allow `seq` to join AST nodes using "construction decorator" functions.
; - Merge `wrap` and `unwrap` into one function, this will depend on
; work on passing AST construction decorators.
(provide (struct-out emp-node)
(struct-out lit-node)
(struct-out wrap-node)
(struct-out seq-node)
define-parser
lit
char-except
alt
alt*
seq
seq*
opt
emp
plus
star
wrap
unwrap
parse
star-foldr
star-foldl
flatten-ast
; TODO: Remove this provide.
(struct-out result))
; ----------------------------------------
; ----- Context Free Grammar Library -----
; ----------------------------------------
; A AST is one of:
; - #<emp-node>
; - #<lit-node String>
; - #<wrap-node Symbol AST>
; - #<seq-node Symbol AST AST>
(struct emp-node () #:transparent)
(struct lit-node (value) #:transparent)
(struct wrap-node (label value) #:transparent)
(struct seq-node (label left right) #:transparent)
; A Result is one of:
; - 'error
; - (result AST String)
(struct result (node remaining) #:transparent)
; A Parser is a [String -> Result]
; Parser -> Parser : Transformation
; Makes things lazy.
(define-syntax-rule (define-parser name body)
(define (name s)
(body s)))
; String -> Parser
(define (lit str)
(lambda (s)
(let ([len (string-length str)])
(if (and (>= (string-length s) len)
(string=? (substring s 0 len) str))
(result (lit-node str) (substring s len))
'error))))
; String ... -> Parser
(define (char-except . chars)
(lambda (s)
(if (and (>= (string-length s) 1)
(not (member (substring s 0 1) chars)))
(result (lit-node (substring s 0 1)) (substring s 1 (string-length s)))
'error)))
; Parser Parser -> Parser
(define (alt p1 p2)
(lambda (s)
(let ([r1 (p1 s)])
(if (symbol? r1) (p2 s) r1))))
; Parser Parser ... -> Parser
(define (alt* parser . parsers)
(lambda (s)
((if (empty? parsers)
parser
(alt parser (apply alt* parsers))) s)))
; Symbol Parser Parser -> Parser
(define (seq label p1 p2)
(lambda (s)
(let ([r1 (p1 s)])
(if (symbol? r1)
'error
(let ([r2 (p2 (result-remaining r1))])
(if (symbol? r2)
'error
(result (seq-node label (result-node r1) (result-node r2))
(result-remaining r2))))))))
; Symbol Parser Parser ... -> Parser
(define (seq* label parser . parsers)
(lambda (s)
((if (empty? parsers)
parser
(seq label parser (apply seq* label parsers))) s)))
; Parser -> Parser
(define (opt parser)
(lambda (s)
(let ([r (parser s)])
(if (symbol? r) (result (emp-node) s) r))))
; -> Parser
(define (emp)
(lambda (s)
(result (emp-node) s)))
; Parser -> Parser
(define (plus parser)
(lambda (s)
((seq 'rep parser (star parser)) s)))
; Parser -> Parser
(define (star parser)
(lambda (s)
((alt (seq 'rep parser (lambda (s) (if (string=? s "")
(result (emp-node) "")
((star parser) s))))
(emp)) s)))
; Symbol Parser Parser Parser -> Parser
(define (wrap label l b r)
(lambda (s)
(let ([r ((seq* 'wrap l b r) s)])
(if (symbol? r)
'error
(result (wrap-node label
(seq-node-left (seq-node-right (result-node r))))
(result-remaining r))))))
; Parser Parser Parser -> Parser
(define (unwrap l b r)
(lambda (s)
(let ([r ((seq* 'wrap l b r) s)])
(if (symbol? r)
'error
(result (seq-node-left (seq-node-right (result-node r)))
(result-remaining r))))))
; Parser String -> AST
(define (parse parser str)
(let ([r (parser str)])
(cond [(symbol? r)
(error "bad input, got error")]
[(> (string-length (result-remaining r)) 0)
(error "bad input, had leftover text" (result-remaining r))]
[else
(result-node r)])))
; (AST X -> X) X (seq Symbol AST AST) -> X
(define (star-foldr op base ast)
(if (and (seq-node? ast)
(symbol=? (seq-node-label ast) 'rep))
(op (seq-node-left ast) (star-foldr op base (seq-node-right ast)))
base))
; (AST X -> X) X (seq Symbol AST AST) -> X
(define (star-foldl op base ast)
(if (and (seq-node? ast)
(symbol=? (seq-node-label ast) 'rep))
(star-foldl op (op (seq-node-left ast) base) (seq-node-right ast))
base))
; AST -> String
(define (flatten-ast ast)
(cond [(emp-node? ast) ""]
[(lit-node? ast) (lit-node-value ast)]
[(wrap-node? ast) (flatten-ast (wrap-node-value ast))]
[(seq-node? ast) (string-append (flatten-ast (seq-node-left ast))
(flatten-ast (seq-node-right ast)))]
[else (error "bad input: given" ast)]))