-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscanner.go
More file actions
226 lines (211 loc) · 4.94 KB
/
scanner.go
File metadata and controls
226 lines (211 loc) · 4.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
package lexer
import (
"bufio"
"bytes"
"io"
)
// type Token int
// 词法分析中的初步结果(部分内容可以经过tokenize来形成语义更加明确的token)
const (
// 特殊标记
T_ILLEGAL Token = iota
T_EOF
WS // 空白字符
// 常规类型数据
T_IDENT // ID,此时我们并不区分关键词,而是归类到同一类
T_INTEGER // 整数
T_FLOAT // 浮点数
T_STRING // 字符串
// 其他标记
T_ASTERISK // *
T_COMMA // ,
T_LEFT_PARENTHESIS // (
T_RIGHT_PARENTHESIS // )
T_SEMICOLON // ;
T_EQUAL // =
T_ANGLE_LEFT // <
T_ANGLE_LEFT_EQUAL //<=
T_ANGLE_RIGHT_EQUAL //>=
T_ANGLE_RIGHT // >
T_NOT_EQUAL // <> or !=
T_POINT // .
)
type State int // 状态机的状态
const (
STATE_INIT State = iota
STATE_INTEGER
STATE_POINT
STATE_FRACTION
STATE_IDENT
STATE_ANGLE_LEFT
STATE_ANGLE_RIGHT
STATE_END
)
type CharType int // 单个字符的数据类型
const (
NUM CharType = iota
CHAR
SPECIAL_SYMBOL
ILLEGAL_SYMBOL
SPACE
UNDERLINE
)
// eof represents a marker rune for the end of the reader.
var eof = rune(0)
type InputScanner struct {
r *bufio.Reader
apostropne bool // apostropne is true means
}
func NewScanner(r io.Reader) *InputScanner {
return &InputScanner{r: bufio.NewReader(r), apostropne: false}
}
// scanner不断从输入流中读取数据,尝试拼接出一个个初步解析的token
func (s *InputScanner) Scan() (tok Token, lit string) {
ch := s.read()
var buf bytes.Buffer
state := STATE_INIT
for state != STATE_END {
if checkCharType(ch) == ILLEGAL_SYMBOL {
return T_ILLEGAL, string(ch)
}
// buf.WriteRune(ch)
switch state {
case STATE_INIT:
switch checkCharType(ch) {
case NUM:
buf.WriteRune(ch)
state = STATE_INTEGER
case CHAR:
buf.WriteRune(ch)
state = STATE_IDENT
case SPECIAL_SYMBOL:
switch ch {
case eof:
return T_EOF, ""
case '.':
return T_POINT, string(ch)
case '*':
return T_ASTERISK, string(ch)
case ',':
return T_COMMA, string(ch)
case '(':
return T_LEFT_PARENTHESIS, string(ch)
case ')':
return T_RIGHT_PARENTHESIS, string(ch)
case ';':
return T_SEMICOLON, string(ch)
case '=':
return T_EQUAL, string(ch)
case '<':
buf.WriteRune(ch)
state = STATE_ANGLE_LEFT
case '>':
buf.WriteRune(ch)
state = STATE_ANGLE_RIGHT
}
case SPACE:
case UNDERLINE:
return T_ILLEGAL, string(ch)
}
case STATE_INTEGER:
switch checkCharType(ch) {
case NUM:
buf.WriteRune(ch)
case CHAR, SPACE, UNDERLINE:
s.unread()
return T_INTEGER, buf.String()
case SPECIAL_SYMBOL:
if ch == '.' {
buf.WriteRune(ch)
state = STATE_POINT
} else {
s.unread()
return T_INTEGER, buf.String()
}
}
case STATE_POINT:
switch checkCharType(ch) {
case NUM:
buf.WriteRune(ch)
state = STATE_FRACTION
case CHAR, SPECIAL_SYMBOL, SPACE, UNDERLINE:
return T_ILLEGAL, string(ch)
}
case STATE_FRACTION:
switch checkCharType(ch) {
case NUM:
buf.WriteRune(ch)
case CHAR, SPECIAL_SYMBOL, SPACE, UNDERLINE:
s.unread()
return T_FLOAT, buf.String()
}
case STATE_IDENT:
switch checkCharType(ch) {
case NUM, CHAR, UNDERLINE:
buf.WriteRune(ch)
case SPECIAL_SYMBOL, SPACE:
s.unread()
return T_IDENT, buf.String()
}
case STATE_ANGLE_LEFT:
switch checkCharType(ch) {
case NUM, CHAR, SPACE:
s.unread()
return T_ANGLE_LEFT, buf.String()
case SPECIAL_SYMBOL:
// ch = s.read()
if ch == '=' {
return T_ANGLE_LEFT_EQUAL, "<="
} else if ch == '>' {
return T_NOT_EQUAL, "<>"
} else {
s.unread()
return T_ANGLE_LEFT, buf.String()
}
}
case STATE_ANGLE_RIGHT:
switch checkCharType(ch) {
case NUM, CHAR, SPACE:
s.unread()
return T_ANGLE_RIGHT, buf.String()
case SPECIAL_SYMBOL:
// ch = s.read()
if ch == '=' {
return T_ANGLE_RIGHT_EQUAL, ">="
} else {
s.unread()
return T_ANGLE_RIGHT, buf.String()
}
}
}
ch = s.read()
}
return T_ILLEGAL, string(ch)
}
// read reads the next rune from the buffered reader.
// Returns the rune(0) if an error occurs (or io.T_EOF is returned).
func (s *InputScanner) read() rune {
ch, _, err := s.r.ReadRune()
if err != nil {
return eof
}
return ch
}
// unread places the previously read rune back on the reader.
func (s *InputScanner) unread() { _ = s.r.UnreadRune() }
func checkCharType(ch rune) CharType {
if ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' {
return CHAR
} else if ch >= '0' && ch <= '9' {
// fmt.Println("检测到数字")
return NUM
} else if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
return SPACE
} else if ch == '.' || ch == '*' || ch == ',' || ch == '(' || ch == ')' || ch == ';' || ch == '=' || ch == '<' || ch == '>' || ch == eof {
return SPECIAL_SYMBOL
} else if ch == '_' {
return UNDERLINE
} else {
return ILLEGAL_SYMBOL
}
}