-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlexicalAnalysis.cpp
More file actions
150 lines (132 loc) · 3.36 KB
/
Copy pathlexicalAnalysis.cpp
File metadata and controls
150 lines (132 loc) · 3.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <map>
#include <memory>
#include <string>
#include <vector>
//===----------------------------------------------------------------------===//
// Lexer
//===----------------------------------------------------------------------===//
// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
// of these for known things.
enum Token {
tok_eof = -1,
tok_VARIABLE = -2,
tok_INTEGER = -3,
tok_TEXT = -4,
tok_ASSIGN_SYMBOL = -5,
tok_FUNC = -6,
tok_PRINT = -7,
tok_RETURN = -8,
tok_CONTINUE = -9,
tok_IF = -10,
tok_THEN = -11,
tok_ELSE = -12,
tok_FI = -13,
tok_WHILE = -14,
tok_DO = -15,
tok_DONE = -16,
tok_VAR = -17,
};
static std::string IdentifierStr; // Filled in if tok_identifier
static double NumVal; // Filled in if tok_integer
static std::string StrVal; // Filled in if tok_text
/// gettok - Return the next token from standard input.
static int gettok() {
static int LastChar = ' ';
// Skip any whitespace.
while (isspace(LastChar))
LastChar = getchar();
// skip any comment.
if (LastChar == '/') {
if (LastChar = getchar() == '/') {
while (LastChar != '\n')
LastChar = getchar();
gettok();
}
}
if (LastChar == ':') {//assign_symbol
std::string temp;
temp = LastChar;
LastChar = getchar();
if (LastChar == '=') {
LastChar = getchar();
return tok_ASSIGN_SYMBOL;
}
}
if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
IdentifierStr = LastChar;
while (isalnum((LastChar = getchar())))
IdentifierStr += LastChar;
if (IdentifierStr == "FUNC")
return tok_FUNC;
if (IdentifierStr == "PRINT")
return tok_PRINT;
if (IdentifierStr == "RETURN")
return tok_RETURN;
if (IdentifierStr == "CONTINUE")
return tok_CONTINUE;
if (IdentifierStr == "IF")
return tok_IF;
if (IdentifierStr == "THEN")
return tok_THEN;
if (IdentifierStr == "ELSE")
return tok_ELSE;
if (IdentifierStr == "FI")
return tok_FI;
if (IdentifierStr == "WHILE")
return tok_WHILE;
if (IdentifierStr == "DO")
return tok_DO;
if (IdentifierStr == "DONE")
return tok_DONE;
if (IdentifierStr == "VAR")
return tok_VAR;
return tok_VARIABLE;
}
if (isdigit(LastChar)) { // Number: [0-9]+
std::string NumStr;
do {
NumStr += LastChar;
LastChar = getchar();
} while (isdigit(LastChar));
NumVal = strtod(NumStr.c_str(), nullptr);
return tok_INTEGER;
}
if (LastChar == '\"') {
std::string textStr;
LastChar=getchar();
do {
textStr += LastChar;
LastChar = getchar();
} while (LastChar != '\"');
StrVal = textStr;
LastChar=getchar();
return tok_TEXT;
}
if (LastChar == '#') {
// Comment until end of line.
do
LastChar = getchar();
while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
if (LastChar != EOF)
return gettok();
}
// Check for end of file. Don't eat the EOF.
if (LastChar == EOF)
return tok_eof;
// Otherwise, just return the character as its ascii value.
int ThisChar = LastChar;
LastChar = getchar();
return ThisChar;
}
int main() {
int tok;
while (1) {
tok = gettok();
printf("%d\n",tok);
}
}