-
Notifications
You must be signed in to change notification settings - Fork 0
/
CalcLex.cpp
149 lines (135 loc) · 3.75 KB
/
CalcLex.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#include "CalcLex.h"
CalcLex::CalcLex() : source(NULL), currToken("")
{
}
CalcLex::~CalcLex()
{
}
list<Symbol>* CalcLex::tokenizeString(string data) {
source = new istringstream(data);
list<Symbol>* tokens = tokenize();
delete(source);
source = NULL;
return tokens;
}
list<Symbol>* CalcLex::tokenizeFile(string filePath) {
ifstream* file = new ifstream(filePath);
if (file->fail()) {
cout << "Error: Could not open file " << filePath << endl;
file->close();
delete(file);
return NULL;
}
source = file;
list<Symbol>* tokens = tokenize();
file->close();
delete(file);
source = NULL;
return tokens;
}
list<Symbol>* CalcLex::tokenize() {
currToken = "";
list<Symbol>* tokens = new list<Symbol>; // Holds all of the tokens returned from gettoken();
for (;; currToken = "") {
TOKEN tok = gettoken();
if (tok == TOKEN::COMMENT) // Comments are not added to the list
continue;
if (tok == TOKEN::ERROR) {
cout << "Lex Error: Unrecognized symbol \"" << currToken << "\"" << endl;
return NULL;
}
tokens->push_back(Symbol(tok, currToken));
if (tok == TOKEN::EOFSY) // When we encounter this, we're done
break;
}
return tokens;
}
// return next character, move read pointer forward, store character in currToken
char CalcLex::getNext() {
char next = source->get();
if (source->eof() == true)
return 0;
if (next >= 'A' && next <= 'Z') // make letters lowercase
next += ('a' - 'A');
currToken += next;
return next;
}
// return next character but does not add to currToken or move read pointer forward
char CalcLex::peekNext() {
char next = source->peek();
if (next >= 'A' && next <= 'Z')
next -= ('a' - 'A');
return next;
}
// Logic path for when encountering a '/'
TOKEN CalcLex::slashPath() {
if (peekNext() == '*') {// its a comment
getNext();
while (true) {
char next;
//while (next != '*' || next != '\n') { int a = 1; }
while ((next = getNext()) != '*' && next != '\n') {}
if (peekNext() == '/') { // end of comment or line
getNext();
break;
}
else if (next == '\n')
break;
}
return TOKEN::COMMENT;
}
else { // its a division symbol
return TOKEN::DIVOP;
}
}
// Logic path for encountering a number as the first character in the token
TOKEN CalcLex::numPath() {
char next;
bool decimal = false; // added in response to comments from Lab 4, set to true once the first decimal is reached
while (((next = peekNext()) >= '0' && next <= '9') || (!decimal && (decimal = (next == '.')))) { getNext(); }
return TOKEN::NUMCONST;
}
// Logic path for encountering a letter as the first character in the token
TOKEN CalcLex::letterPath() {
char next;
while (((next = peekNext()) >= 'a' && next <= 'z') || (next >= '0' && next <= '9') || next == '_') { getNext(); }
if (currToken == "read")
return TOKEN::READSY;
else if (currToken == "write")
return TOKEN::WRITESY;
else
return TOKEN::ID;
}
// Returns the next token in the file, returns the type and puts the text data in currToken
TOKEN CalcLex::gettoken() {
char start = getNext();
if (start == ' ' || start == '\n' || start == '\t')
return TOKEN::COMMENT;
else if (start == '(')
return TOKEN::LPAREN;
else if (start == ')')
return TOKEN::RPAREN;
else if (start == '+')
return TOKEN::ADDOP;
else if (start == '-')
return TOKEN::SUBOP;
else if (start == '*')
return TOKEN::MULTOP;
else if (start == '/')
return slashPath();
else if (start >= '0' && start <= '9')
return numPath();
else if (start >= 'a' && start <= 'z')
return letterPath();
else if (start == ':') {
if (peekNext() == '=') {
getNext();
return TOKEN::ASSIGNOP;
}
return TOKEN::ERROR;
}
else if (start <= 0) // end of file
return TOKEN::EOFSY;
else // for any unrecognized symbols. This wil cause the tokenize() to return a null pointer
return TOKEN::ERROR;
}