-
Notifications
You must be signed in to change notification settings - Fork 11
/
cparser.lua
1725 lines (1574 loc) Β· 57.9 KB
/
cparser.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
-- Copyright (c) Facebook, Inc. and its affiliates.
-- This source code is licensed under the MIT license found in the
-- LICENSE file in the root directory of this source tree.
--
-- Lua module to preprocess and parse C declarations.
-- (Leon Bottou, 2015)
-- @r-lyeh: del lcdecl module
-- @r-lyeh: add cppString method
-- @r-lyeh: add __COUNTER__ macro
-- @r-lyeh: del empty #line directives from output
-- standard libs
local string = require 'string'
local coroutine = require 'coroutine'
local table = require 'table'
local io = require 'io'
-- Lua 5.1 to 5.3 compatibility
local unpack = unpack or table.unpack
-- Debugging
local DEBUG = true
if DEBUG then pcall(require,'strict') end
-- luacheck: globals cparser
-- luacheck: ignore 43 4/ti 4/li
-- luacheck: ignore 212/.*_
-- luacheck: ignore 211/is[A-Z].* 211/Type
-- luacheck: ignore 542
---------------------------------------------------
---------------------------------------------------
---------------------------------------------------
-- ALL UGLY HACKS SHOULD BE HERE
-- Sometimes we cannot find system include files but need to know at
-- least things about them. For instance, certain system include files
-- define alternate forms for keywords.
local knownIncludeQuirks = {}
knownIncludeQuirks["<complex.h>"] = { -- c99
"#ifndef complex", "# define complex _Complex", "#endif"
}
knownIncludeQuirks["<stdbool.h>"] = { -- c99
"#ifndef bool", "# define bool _Bool", "#endif"
}
knownIncludeQuirks["<stdalign.h>"] = { -- c11
"#ifndef alignof", "# define alignof _Alignof", "#endif",
"#ifndef alignas", "# define alignas _Alignas", "#endif"
}
knownIncludeQuirks["<stdnoreturn.h>"] = { -- c11
"#ifndef noreturn", "# define noreturn _Noreturn", "#endif"
}
knownIncludeQuirks["<threads.h>"] = { -- c11
"#ifndef thread_local", "# define thread_local _Thread_local", "#endif"
}
knownIncludeQuirks["<iso646.h>"] = { -- c++
"#define and &&", "#define and_eq &=", "#define bitand &", "#define bitor |",
"#define compl ~", "#define not !", "#define not_eq !=", "#define or ||",
"#define or_eq |=", "#define xor ^", "#define xor_eq ^="
}
---------------------------------------------------
---------------------------------------------------
---------------------------------------------------
-- TAGGED TABLES
-- Utilities to produce and print tagged tables.
-- The tag name is simply the contents of table key <tag>.
-- Function <newTag> returns a node constructor
--
-- Example:
--
-- > Foo = newTag('Foo')
-- > Bar = newTag('Bar')
--
-- > print( Foo{const=true,next=Bar{name="Hello"}} )
-- Foo{next=Bar{name="Hello"},const=true}
--
-- > print( Bar{name="hi!", Foo{1}, Foo{2}, Foo{3}} )
-- Bar{Foo{1},Foo{2},Foo{3},name="hi!"}
local function newTag(tag)
-- the printing function
local function tostr(self)
local function str(x)
if type(x)=='string' then
return string.format("%q",x):gsub("\\\n","\\n")
elseif type(x)=='table' and not getmetatable(x) then
return "{..}"
else
return tostring(x)
end
end
local p = string.format("%s{", self.tag or "Node")
local s = {}
local seqlen = 0
for i=1,#self do
if self[i] then seqlen=i else break end end
for i=1,seqlen do
s[1+#s] = str(self[i]) end
for k,v in pairs(self) do
if type(k) == 'number' then
if k<1 or k>seqlen then
s[1+#s] = string.format("[%s]=%s",k,str(v)) end
elseif type(k) ~= 'string' then
s.extra = true
elseif k:find("^_") and type(v)=='table' then
s[1+#s] = string.format("%s={..}",k) -- hidden
elseif k ~= 'tag' then
s[1+#s] = string.format("%s=%s",k,str(v)) end
end
if s.extra then s[1+#s] = "..." end
return p .. table.concat(s,',') .. '}'
end
-- the constructor
return function(t) -- must be followed by a table constructor
t = t or {}
assert(type(t)=='table')
setmetatable(t, { __tostring=tostr } )
t.tag = tag
return t
end
end
-- hack to print any table: print(Node(nn))
local Node = newTag(nil) -- luacheck: ignore 211
---------------------------------------------------
---------------------------------------------------
---------------------------------------------------
-- UTILITIES
-- Many functions below have an optional argument 'options' which is
-- simply an array of compiler-like options that are specified in the
-- toplevel call and passed to nearly all functions. Because it
-- provides a good communication channel across the code components,
-- many named fields are also used for multiple purposes. The
-- following function is called at the beginning of the user facing
-- functions to make a copy of the user provided option array and
-- setup some of these fields.
local function copyOptions(options)
options = options or {}
assert(type(options)=='table')
local noptions = {}
-- copy options
for k,v in ipairs(options) do noptions[k]=v end
-- copy user modifiable named fields
noptions.sizeof = options.sizeof -- not used yet
noptions.alignof = options.alignof -- not used yet
-- create reversed hash
noptions.hash = {}
for i,v in ipairs(options) do
noptions.hash[v] = i
end
-- compute dialect flags
local dialect = 'gnu99'
for _,v in ipairs(options) do
if v:find("^%-std=%s*[^%s]") then
dialect = v:match("^%-std=%s*(.-)%s*$")
end
end
noptions.dialect = dialect
noptions.dialectGnu = dialect:find("^gnu")
noptions.dialect99 = dialect:find("9[9x]$")
noptions.dialect11 = dialect:find("1[1x]$")
noptions.dialectAnsi = not noptions.dialectGnu
noptions.dialectAnsi = noptions.dialectAnsi and not noptions.dialect99
noptions.dialectAnsi = noptions.dialectAnsi and not noptions.dialect11
-- return
return noptions
end
-- This function tests whether a particular option has been given.
local function hasOption(options, opt)
assert(options)
assert(options.silent or options.hash)
return options.hash and options.hash[opt]
end
-- Generic functions for error messages
local function xmessage(err, options, lineno, message, ...)
local msg = string.format("cparser: (%s) ",lineno)
msg = msg .. string.format(message,...)
if options.silent then
if err == 'error' then error(msg, 0) end
else
if err == 'warning' and hasOption(options, "-Werror") then err = 'error' end
if err == 'error' or not hasOption(options, "-w") then print(msg) end
if err == 'error' then error("cparser: aborted",0) end
end
end
local function xwarning(options, lineno, message, ...)
xmessage('warning', options, lineno, message, ...)
end
local function xerror(options, lineno, message, ...)
xmessage('error', options, lineno, message, ...)
end
local function xassert(cond, ...)
if not cond then xerror(...) end
end
local function xdebug(lineno,message,...)
local msg = string.format("\t\t[%s] ", lineno)
msg = msg .. string.format(message,...)
print(msg)
end
-- Nil-safe max
local function max(a,b)
a = a or b
b = b or a
return a > b and a or b
end
-- Deep table comparison
-- (not very efficient, no loop detection)
local function tableCompare(a,b)
if a == b then
return true
elseif type(a) == 'table' and type(b) == 'table' then
for k,v in pairs(a) do
if not tableCompare(v,b[k]) then return false end
end
for k,v in pairs(b) do
if not tableCompare(a[k],v) then return false end
end
return true
else
return false
end
end
-- Concatenate two possibly null arrays
local function tableAppend(a1, a2)
if not a1 then
return a2
elseif not a2 then
return a1
else
local a = {}
for _,v in ipairs(a1) do a[1+#a] = v end
for _,v in ipairs(a2) do a[1+#a] = v end
return a
end
end
-- Concatenate strings from table (skipping non-string content.)
local function tableConcat(a)
local b = {}
for _,v in ipairs(a) do
if type(v) == 'string' then b[1+#b]=v end end
return table.concat(b)
end
-- Evaluate a lua expression, return nil on error.
local function evalLuaExpression(s)
assert(type(s)=='string')
local f = load(string.gmatch(s,".*"))
local function r(status,...)
if status then return ... end end
return r(pcall(f or error))
end
-- Bitwise manipulations
-- try lua53 operators otherwise revert to iterative version
local bit = evalLuaExpression([[
local bit = {}
function bit.bnot(a) return ~a end
function bit.bor(a,b) return a | b end
function bit.band(a,b) return a & b end
function bit.bxor(a,b) return a ~ b end
function bit.lshift(a,b) return a < 0 and b < 0 and ~((~a) << b) or a << b end
return bit
]])
if not bit then
local function bor(a,b)
local r, c, d = 0, 1, -1
while a > 0 or b > 0 or a < -1 or b < -1 do
if a % 2 > 0 or b % 2 > 0 then r = r + c end
a, b, c, d = math.floor(a / 2), math.floor(b / 2), c * 2, d * 2 end
if a < 0 or b < 0 then r = r + d end
return r end
bit = {}
function bit.bnot(a) return -1-a end
function bit.bor(a,b) return bor(a,b) end
function bit.band(a,b) return -1-bor(-1-a,-1-b) end
function bit.bxor(a,b) return bor(-1-bor(a,-1-b),-1-bor(-1-a,b)) end
function bit.lshift(a,b) return math.floor(a * 2 ^ b) end
end
-- Coroutine helpers.
-- This code uses many coroutines that yield lines or tokens.
-- All functions that can yield take an options table as first argument.
-- Wrap a coroutine f into an iterator
-- The options and all the extra arguments are passed
-- to the coroutine when it starts. Together with the
-- above calling convention, this lets us specify
-- coroutine pipelines (see example in function "cpp".)
local function wrap(options, f, ...)
local function g(...) coroutine.yield(nil) f(...) end
local c = coroutine.create(g)
coroutine.resume(c, options, ...)
local function r(s,...)
if not s then local m = ... ; error(m, 0) end
return ...
end
return function()
if coroutine.status(c) ~= 'dead' then
return r(coroutine.resume(c))
end
end
end
-- Collect coroutine outputs into an array
-- The options and the extra arguments are passed to the coroutine.
local function callAndCollect(options, f, ...) -- Bell Labs nostalgia
local collect = {}
for s in wrap(options, f, ...) do
collect[1+#collect] = s
end
return collect
end
-- Yields all outputs from iterator iter.
-- Argument options is ignored.
local function yieldFromIterator(options_, iter)
local function yes(v,...) coroutine.yield(v,...) return v end
while yes(iter()) do end
end
-- Yields all values from array <arr>.
-- This function successively yields all values in the table.
-- Every yield is augmented with all extra arguments passed to the function.
-- Argument options is ignored.
local function yieldFromArray(options_, arr, ...)
for _,v in ipairs(arr) do
coroutine.yield(v, ...)
end
end
---------------------------------------------------
---------------------------------------------------
---------------------------------------------------
-- INITIAL PREPROCESSING
-- A routine that pulls lines from a line iterator
-- and yields them together with a location
-- composed of the optional prefix, a colon, and a line number.
-- Argument options is ignored.
-- Lua provides good line iterators such as:
-- io.lines(filename) filedesc:lines() str:gmatch("[^\n]+")
local function yieldLines(options_,lineIterator,prefix)
prefix = prefix or ""
assert(type(prefix)=='string')
local n = 0
for s in lineIterator do
n = n + 1
coroutine.yield(s, string.format("%s:%d", prefix, n))
end
end
-- A routine that obtains lines from coroutine <lines>,
-- joins lines terminated by a backslash, and yield the
-- resulting lines. The coroutine is initialized with
-- argument <options> and all extra arguments.
-- Reference: https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html (3)
local function joinLines(options, lines, ...)
local li = wrap(options, lines, ...)
for s, n in li do
while type(s) == 'string' and s:find("\\%s*$") do
local t = li() or ""
s = s:gsub("\\%s*$", "") .. t
end
coroutine.yield(s, n)
end
end
-- A routine that obtain lines from coroutine <lines>, eliminate the
-- comments and yields the resulting lines. The coroutine is
-- initialized with argument <options> and all extra arguments.
-- Reference: https://gcc.gnu.org/onlinedocs/cpp/Initial-processing.html (4)
local function eliminateComments(options, lines, ...)
local lineIterator = wrap(options, lines, ...)
local s,n = lineIterator()
while type(s) == 'string' do
local inString = false
local q = s:find("[\'\"\\/]", 1)
while q ~= nil do
if hasOption(options,"-d:comments") then
xdebug(n, "comment: [%s][%s] %s",s:sub(1,q-1),s:sub(q),inString)
end
local c = s:byte(q)
if inString then
if c == 92 then -- \
q = q + 1
elseif c == inString then
inString = false
end
else
if c == 34 or c == 39 then -- " or '
inString = c
elseif c == 47 and s:byte(q+1) == 47 then -- "//"
s = s:sub(1,q-1)
elseif c == 47 and s:byte(q+1) == 42 then -- "/*"
local p = s:find("%*/",q+2)
if p ~= nil then
s = s:sub(1,q-1) .. " " .. s:sub(p+2)
else
s = s:sub(1,q-1)
local ss,pp
repeat
ss = lineIterator()
xassert(ss ~= nil, options, n, "Unterminated comment")
pp = ss:find("%*/")
until pp
s = s .. " " .. ss:sub(pp+2)
end
end
end
q = s:find("[\'\"\\/]", q+1)
end
coroutine.yield(s, n)
s, n = lineIterator()
end
end
---------------------------------------------------
---------------------------------------------------
---------------------------------------------------
-- TOKENIZER
local keywordTable = {
------ Standard keywords
"auto", "break", "case", "char", "const", "continue", "default", "do",
"double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
"long", "register", "return", "short", "signed", "sizeof", "static", "struct",
"switch", "typedef", "union", "unsigned", "void", "volatile", "while",
------ Nonstandard or dialect specific keywords do not belong here
------ because the main function of this table is to say which
------ identifiers cannot be variable names.
}
local punctuatorTable = {
"+", "-", "*", "/", "%", "&", "|", "^", ">>", "<<", "~",
"=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", ">>=", "<<=",
"(", ")", "[", "]", "{", "}", "++", "--",
"==", "!=", ">=", "<=", ">", "<", "&&", "||", "!",
".", "->", "*", "&", "?", ":", "::", "->*", ".*", ";", ",",
"#", "##", "...", "@", "\\" -- preprocessor stuff
}
local keywordHash = {}
for _,v in ipairs(keywordTable) do
keywordHash[v] = true
end
local punctuatorHash = {}
for _,v in ipairs(punctuatorTable) do
local l = v:len()
local b = v:byte()
punctuatorHash[v] = true
punctuatorHash[b] = max(l,punctuatorHash[b])
end
-- The following functions test the types of the tokens returned by the tokenizer.
-- They should not be applied to arbitrary strings.
local function isSpace(tok)
return type(tok) == 'string' and tok:find("^%s") ~= nil end
local function isNewline(tok) -- Subtype of space
return type(tok) == 'string' and tok:find("^\n") ~= nil end
local function isNumber(tok)
return type(tok) == 'string' and tok:find("^[.0-9]") ~= nil end
local function isString(tok)
if type(tok) ~= 'string' then return false end
return tok:find("^[\'\"]") ~= nil end
local function isHeaderName(tok)
if type(tok) ~= 'string' then return false end
return tok:find("^\"") or tok:find("^<") and tok:find(">$") end
local function isPunctuator(tok)
return type(tok) == 'string' and punctuatorHash[tok] ~= nil end
local function isIdentifier(tok)
return type(tok) == 'string' and tok:find("^[A-Za-z_$]") ~= nil end
local function isKeyword(tok) -- Subtype of identifier
return keywordHash[tok] ~= nil end
local function isName(tok) -- Subtype of identifier
return isIdentifier(tok) and not keywordHash[tok] end
-- Magic tokens are used to mark macro expansion boundaries (see expandMacros.)
local function isMagic(tok)
return tok and type(tok) ~= 'string' end
local function isBlank(tok) -- Treats magic token as space.
return isMagic(tok) or isSpace(tok) end
-- The tokenizeLine() function takes a line, splits it into tokens,
-- and yields tokens and locations. The number tokens are the weird
-- preprocessor numbers defined by ansi c. The string tokens include
-- character constants and angle-bracket delimited strings occuring
-- after an include directive. Every line begins with a newline
-- token giving the proper indentation. All subsequent spaces
-- are reduced to a single space character.
local function tokenizeLine(options, s, n, notNewline)
-- little optimization for multiline macros
-- s may be an array of precomputed tokens
if type(s) == 'table' then
return yieldFromArray(options, s, n)
end
-- normal operation
assert(type(s) == 'string')
local p = s:find("[^%s]")
-- produce a newline token
if p and not notNewline then
local r = '\n' .. s:sub(1,p-1)
coroutine.yield(r, n)
end
-- produce one token
local function token()
local b, l, r
if hasOption(options, "-d:tokenize") then
xdebug(n, "[%s][%s]",s:sub(1,p-1),s:sub(p))
end
-- space
l = s:find("[^%s]", p)
if l == nil then
return nil
elseif l > p then
p = l
return " ", n
end
-- identifier
r = s:match("^[a-zA-Z_$][a-zA-Z0-9_$]*", p)
if r ~= nil then
p = p + r:len()
return r, n
end
-- preprocessor numbers
r = s:match("^%.?[0-9][0-9a-zA-Z._]*", p)
if r ~= nil then
l = r:len()
while r:find("[eEpP]$") and s:find("^[-+]", p+l) do
r = r .. s:match("^[-+][0-9a-zA-Z._]*", p+l)
l = r:len()
end
p = p + l
return r, n
end
-- angle-delimited strings in include directives
b = s:byte(p)
if b == 60 and s:find("^%s*#%s*include") then
r = s:match("^<[^>]+>", p)
if r ~= nil then
p = p + r:len()
return r, n
end
end
-- punctuator
l = punctuatorHash[b]
if l ~= nil then
while l > 0 do
r = s:sub(p,p+l-1)
if punctuatorHash[r] then
p = p + l
return r, n
end
l = l - 1
end
end
-- string
if b == 34 or b == 39 then -- quotes
local q = p
repeat
q = s:find("[\'\"\\]", q + 1)
l = s:byte(q)
xassert(q ~= nil, options, n, "Unterminated string or character constant")
if l == 92 then
q = q + 1
end
until l == b
r = s:sub(p,q)
p = q + 1
return r, n
end
-- other stuff (we prefer to signal an error here)
xerror(options, n,"Unrecognized character (%s)", s:sub(p))
end
-- loop
if p then
for tok,tokn in token do
coroutine.yield(tok, tokn)
end
end
end
-- Obtain lines from coroutine <lines>,
-- and yields their tokens. The coroutine is initialized with
-- argument <options> and all extra arguments.
local function tokenize(options, lines, ...)
for s,n in wrap(options, lines, ...) do
tokenizeLine(options, s, n)
end
end
---------------------------------------------------
---------------------------------------------------
---------------------------------------------------
-- PREPROCESSING
-- Preprocessing is performed by two coroutines. The first one
-- processes all the preprocessor directives and yields the remaining
-- lines. The second one processes tokens from the remaining lines and
-- perform macro expansions. Both take a table of macro definitions as
-- argument. The first one writes into the table and the second one
-- reads from it.
--
-- Each macro definition is an array of tokens (for a single line
-- macro) or a table whose entry <"lines"> contains an array of arrays
-- of tokens (#defmacro). If the macro takes arguments, the entry
-- <"args"> contains a list of argument names. If the macro is
-- recursive (#defrecmacro), the entry <recursive> is set.
-- Alternatively, the macro definition may be a function called at
-- macro-expansion time. This provides for complicated situations.
-- forward declarations
local function expandMacros() end
local function processDirectives() end
-- Starting with the second coroutine which takes a token producing
-- coroutine and yields the preprocessed tokens. Argument macros is
-- the macro definition table.
-- The standard mandates that the result of a macro-expansion must be
-- scanned for further macro invocations whose argunent list possibly
-- consume tokens that follow the macro-expansion. This means that one
-- cannot recursively call expandMacros but one must prepend the
-- macro-expansion in front of the remaining tokens. The standard also
-- mandates that the result of any macro-expansion must be marked to
-- prevent recursive invocation of the macro that generated it,
-- whether when expanding macro arguments or expanding the macro
-- itself. We achieve this by bracketing every macro-expansion with
-- magic tokens that track which macro definitions must be disabled.
-- These magic tokens are removed later in the coroutines
-- <filterSpaces> or <preprocessedLines>.
expandMacros = function(options, macros, tokens, ...)
-- basic iterator
local ti = wrap(options, tokens, ...)
-- prepending tokens in front of the token stream
local prepend = {}
local function prependToken(s,n)
table.insert(prepend,{s,n}) end
local function prependTokens(pti)
local pos = 1+#prepend
for s,n in pti do table.insert(prepend,pos,{s,n}) end end
local ti = function()
if #prepend > 0 then return unpack(table.remove(prepend))
else return ti() end end
-- iterator that handles magic tokens to update macro definition table
local ti = function()
local s,n = ti()
while type(s) == 'table' do
if s.tag == 'push' then
local nmacros = {}
setmetatable(nmacros, {__index=macros})
if s.symb then nmacros[s.symb] = false end
macros = nmacros
elseif s.tag == 'pop' then
local mt = getmetatable(macros)
if mt and mt.__index then macros = mt.__index end
end
coroutine.yield(s,n)
s,n = ti()
end
return s,n
end
-- redefine ti() to ensure tok,n remain up-to-date
local tok,n = ti()
local ti = function() tok,n=ti() return tok,n end
-- collect one macro arguments into an array
-- stop when reaching a closing parenthesis or a comma
local function collectArgument(ti, varargs)
local count = 0
local tokens = {}
ti()
while isSpace(tok) do
tok = ti()
end
while tok do
if tok == ')' and count == 0 then
break
elseif tok == ')' then
count = count - 1
elseif tok == '(' then
count = count + 1
elseif tok == ',' and count == 0 and not varargs then
break
end
if isSpace(tok) then tok = " " end
tokens[1+#tokens] = tok
tok = ti()
end
if #tokens > 0 and isSpace(tokens[#tokens]) then
tokens[#tokens] = nil
end
return tokens
end
-- collects all macro arguments
local function collectArguments(ti,def,ntok,nn)
local args = def.args
local nargs = { [0]={} }
if #args == 0 then ti() end
for _,name in ipairs(args) do
if tok == ')' and name == "__VA_ARGS__" then
nargs[0][name] = { negComma=true }
nargs[name] = { negComma=true }
else
xassert(tok=='(' or tok==',', options, nn, "not enough arguments for macro '%s'", ntok)
local arg = collectArgument(ti, name == "__VA_ARGS__")
nargs[0][name] = arg
nargs[name] = callAndCollect(options, expandMacros, macros, yieldFromArray, arg, nn)
end
end
if def.nva then -- named variadic argument (implies dialectGnu)
nargs[def.nva] = nargs["__VA_ARGS__"]
nargs[0][def.nva] = nargs[0]["__VA_ARGS__"]
end
xassert(tok, options, nn, "unterminated arguments for macro '%s'", ntok)
xassert(tok==')', options, nn, "too many arguments for macro '%s'", ntok)
return nargs
end
-- coroutine that substitute the macro arguments
-- and stringification and concatenation are handled here
local function substituteArguments(options, def, nargs, n, inDirective)
local uargs = nargs[0] or nargs -- unexpanded argument values
if inDirective then nargs = uargs end -- use unexpanded arguments in directives
-- prepare loop
local i,j,k = 1,1,1
while def[i] do
if isBlank(def[i]) then
-- copy blanks
coroutine.yield(def[i], n)
else
-- positions j and k on next non-space tokens
local function updateJandK()
if j <= i then j=i
repeat j=j+1 until def[j] == nil or not isBlank(def[j]) end
if k <= j then k=j
repeat k=k+1 until def[k] == nil or not isBlank(def[k]) end
end
updateJandK()
-- alternatives
if def[i]=='#' and def[j] and nargs[def[j]] then
-- stringification (with the weird quoting rules)
local v = { '\"' }
for _,t in ipairs(uargs[def[j]]) do
if type(t)=='string' then
if t:find("^%s+$") then t = ' ' end
if t:find("^[\'\"]") then t = string.format("%q", t):sub(2,-2) end
v[1+#v] = t end end
v[1+#v] = '\"'
coroutine.yield(tableConcat(v), n)
i = j
elseif def.nva and def[i]==',' and def[j]=='##' and def[k]==def.nva then
-- named variadic macro argument with ## to signal negative comma (gcc crap)
if nargs[def.nva].negComma then i=i+1 end
while i < j do coroutine.yield(def[i], n) ; i=i+1 end
elseif def[i]==',' and def[j]=='__VA_ARGS__' and def[k]==')' then
-- __VA_ARGS__ with implied negative comma semantics
if nargs[def[j]].negComma then i=i+1 end
while i < j do coroutine.yield(def[i], n) ; i=i+1 end
i = j-1
elseif def[j]=='##' and def[k] and not inDirective then
-- concatenation
local u = {}
local function addToU(s)
if nargs[s] then for _,v in ipairs(uargs[s]) do u[1+#u] = v end
else u[1+#u]=s end end
addToU(def[i])
while def[j] == '##' and def[k] do
addToU(def[k])
i = k
updateJandK()
end
tokenizeLine(options, tableConcat(u), n, true)
elseif nargs[def[i]] then
-- substitution
yieldFromArray(options, nargs[def[i]], n)
else
-- copy
coroutine.yield(def[i], n)
end
end
i = i + 1
end
end
-- main loop
local newline, directive = true, false
while tok ~= nil do
-- detects Zpassed directives
if newline and tok == '#' then
newline, directive = false, true
elseif not isBlank(tok) then
newline = false
elseif isNewline(tok) then
newline, directive = true, false
end
-- process code
local def = macros[tok]
if not def or directive then
-- not a macro
coroutine.yield(tok, n)
elseif type(def) == 'function' then
-- magic macro
def(ti,tok,n)
elseif def.args == nil then
-- object-like macro
prependToken({tag='pop'},n)
prependTokens(wrap(options, substituteArguments, def, {}, n))
prependToken({tag='push', symb=tok},n)
else
-- function-like macro
local ntok, nn = tok,n
local spc = false
ti()
if isSpace(tok) then spc=true ti() end
if (tok ~= '(') then
coroutine.yield(ntok, nn)
if spc then coroutine.yield(' ', n) end
if tok then prependToken(tok,n) end
else
local nargs = collectArguments(ti,def,ntok,nn)
if def.lines == nil then
-- single-line function-like macro
prependToken({tag='pop'},n)
prependTokens(wrap(options, substituteArguments, def, nargs, nn))
prependToken({tag='push', symb=ntok},nn)
else
-- multi-line function-like macro
local lines = def.lines
-- a coroutine that yields the macro definition
local function yieldMacroLines()
local count = 0
for i=1,#lines,2 do
local ls,ln = lines[i], lines[i+1]
-- are we possibly in a cpp directive
local dir = false
if ls[2] and ls[2]:find('^#') then
dir = isIdentifier(ls[3]) and ls[3] or ls[4]
end
if dir and nargs[dir] then
dir = false -- leading stringification
elseif dir == 'defmacro' then
count = count + 1 -- entering a multiline macto
elseif dir == 'endmacro' then
count = count - 1 -- leaving a multiline macro
end
dir = dir or count > 0
-- substitute
ls = callAndCollect(options,substituteArguments,ls,nargs,ln,dir)
-- compute lines (optimize speed by passing body lines as tokens)
local j=1
while isBlank(ls[j]) do j=j+1 end
if ls[j] and ls[j]:find("^#") then -- but not directives
ls = ls[1]:sub(2) .. tableConcat(ls, nil, 2)
end
coroutine.yield(ls,ln)
end
end
-- recursively reenters preprocessing subroutines in order to handle
-- preprocessor directives located inside the macro expansion. As a result
-- we cannot expand macro invocations that extend beyond the macro-expansion.
local nmacros = {}
setmetatable(nmacros,{__index=macros})
if not def.recursive then nmacros[ntok]=false end
if not def.recursive then coroutine.yield({tag='push',symb=ntok}) end
expandMacros(options, nmacros, tokenize, processDirectives, nmacros, yieldMacroLines)
if not def.recursive then coroutine.yield({tag='pop'}) end
end
end
end
ti()
end
end
-- Processing conditional directive requires evaluating conditions
-- This function takes an iterator on preprocessed expression tokens
-- and computes the value. This does not handle defined(X) expressions.
-- Optional argument resolver is a function that takes an indentifer
-- name and returns a value. Otherwise zero is assumed
local function evaluateCppExpression(options, tokenIterator, n, resolver)
-- redefine token iterator to skip spaces and update tok
local tok
local function ti()
repeat tok = tokenIterator()
until not isBlank(tok) return tok
end
-- operator tables
local unaryOps = {
["!"] = function(v) return v == 0 and 1 or 0 end,
["~"] = function(v) return bit.bnot(v) end,
["+"] = function(v) return v end,
["-"] = function(v) return -v end,
["L"] = function(v) return v end
}
local binaryOps = {
["*"] = function(a,b) return a * b end,
["/"] = function(a,b) xassert(b~=0,options,n,"division by zero"); return math.floor(a / b) end,
["%"] = function(a,b) xassert(b~=0,options,n,"division by zero"); return a % b end,
["+"] = function(a,b) return a + b end,
["-"] = function(a,b) return a - b end,
[">>"] = function(a,b) return bit.lshift(a, -b) end,
["<<"] = function(a,b) return bit.lshift(a, b) end,
[">="] = function(a,b) return a >= b and 1 or 0 end,
["<="] = function(a,b) return a <= b and 1 or 0 end,
[">"] = function(a,b) return a > b and 1 or 0 end,
["<"] = function(a,b) return a < b and 1 or 0 end,
["=="] = function(a,b) return a == b and 1 or 0 end,
["!="] = function(a,b) return a ~= b and 1 or 0 end,
["&"] = function(a,b) return bit.band(a,b) end,
["^"] = function(a,b) return bit.bxor(a,b) end,
["|"] = function(a,b) return bit.bor(a,b) end,
["&&"] = function(a,b) return (a ~= 0 and b ~= 0) and 1 or 0 end,
["||"] = function(a,b) return (a ~= 0 or b ~= 0) and 1 or 0 end,
}
local binaryPrec = {
["*"] = 1, ["/"] = 1, ["%"] = 1,
["+"] = 2, ["-"] = 2,
[">>"] = 3, ["<<"] = 3,
[">="] = 4, ["<="] = 4, ["<"] = 4, [">"] = 4,
["=="] = 5, ["!="] = 5,
["&"] = 6, ["^"] = 7, ["|"] = 8,
["&&"] = 9, ["||"] = 10
}