-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathlexer.go
More file actions
304 lines (244 loc) · 6 KB
/
lexer.go
File metadata and controls
304 lines (244 loc) · 6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
package imapsrv
import (
"bufio"
"bytes"
"fmt"
"net/textproto"
"strconv"
)
// lexer is responsible for reading input, and making sense of it
type lexer struct {
// Line based reader
reader *textproto.Reader
// The current line
line []byte
// The index to the current character
idx int
// The start of tokens, used for rewinding to the previous token
tokens []int
}
// Ascii codes
const (
endOfInput = 0x00
cr = 0x0d
lf = 0x0a
space = 0x20
doubleQuote = 0x22
plus = 0x2b
zero = 0x30
nine = 0x39
leftCurly = 0x7b
rightCurly = 0x7d
leftParenthesis = 0x28
rightParenthesis = 0x29
rightBracket = 0x5d
percent = 0x25
asterisk = 0x2a
backslash = 0x5c
)
// astringExceptionsChar is a list of chars that are not present in the astring charset
var astringExceptionsChar = []byte{
space,
leftParenthesis,
rightParenthesis,
percent,
asterisk,
backslash,
leftCurly,
}
// tagExceptionsChar is a list of chars that are not present in the tag charset
var tagExceptionsChar = []byte{
space,
leftParenthesis,
rightParenthesis,
percent,
asterisk,
backslash,
leftCurly,
plus,
}
// listMailboxExceptionsChar is a list of chars that are not present in the list-mailbox charset
var listMailboxExceptionsChar = []byte{
space,
leftParenthesis,
rightParenthesis,
rightBracket,
backslash,
leftCurly,
}
// createLexer creates a partially initialised IMAP lexer
// lexer.newLine() must be the first call to this lexer
func createLexer(in *bufio.Reader) *lexer {
return &lexer{reader: textproto.NewReader(in)}
}
//-------- IMAP tokens ---------------------------------------------------------
// astring treats the input as a string
func (l *lexer) astring() (bool, string) {
l.skipSpace()
l.startToken()
return l.generalString("ASTRING", astringExceptionsChar)
}
// tag treats the input as a tag string
func (l *lexer) tag() (bool, string) {
l.skipSpace()
l.startToken()
return l.nonquoted("TAG", tagExceptionsChar)
}
// listMailbox treats the input as a list mailbox
func (l *lexer) listMailbox() (bool, string) {
l.skipSpace()
l.startToken()
return l.generalString("LIST-MAILBOX", listMailboxExceptionsChar)
}
//-------- IMAP token helper functions -----------------------------------------
// generalString handles a string that can be bare, a literal or quoted
func (l *lexer) generalString(name string, exceptions []byte) (bool, string) {
// Consider the first character - this gives the type of argument
switch l.current() {
case doubleQuote:
l.consume()
return true, l.qstring()
case leftCurly:
l.consume()
return true, l.literal()
default:
return l.nonquoted(name, exceptions)
}
}
// qstring reads a quoted string
func (l *lexer) qstring() string {
var buffer = make([]byte, 0, 16)
c := l.current()
// Collect the characters that are within double quotes
for c != doubleQuote {
switch c {
case cr, lf:
err := parseError(fmt.Sprintf(
"Unexpected character %q in quoted string", c))
panic(err)
case backslash:
c = l.consume()
buffer = append(buffer, c)
default:
buffer = append(buffer, c)
}
// Get the next byte
c = l.consume()
}
// Ignore the closing quote
l.consume()
return string(buffer)
}
// literal parses a length tagged literal
// TODO: send a continuation request after the first line is read
func (l *lexer) literal() string {
lengthBuffer := make([]byte, 0, 8)
c := l.current()
// Get the length of the literal
for c != rightCurly {
if c < zero || c > nine {
err := parseError(fmt.Sprintf(
"Unexpected character %q in literal length", c))
panic(err)
}
lengthBuffer = append(lengthBuffer, c)
c = l.consume()
}
// Extract the literal length as an int
length, err := strconv.ParseInt(string(lengthBuffer), 10, 32)
if err != nil {
panic(parseError(err.Error()))
}
// Consider the next line
l.newLine()
// Does the literal have a valid length?
if length <= 0 {
return ""
}
// Read the literal
buffer := make([]byte, 0, length)
c = l.current()
for {
buffer = append(buffer, c)
// Is this the end of the literal?
length -= 1
if length == 0 {
break
}
c = l.consumeAll()
}
return string(buffer)
}
// nonquoted reads a non-quoted string
func (l *lexer) nonquoted(name string, exceptions []byte) (bool, string) {
buffer := make([]byte, 0, 16)
// Get the current byte
c := l.current()
for c > space && c < 0x7f && -1 == bytes.IndexByte(exceptions, c) {
buffer = append(buffer, c)
c = l.consume()
}
// Check that characters were consumed
if len(buffer) == 0 {
return false, ""
}
return true, string(buffer)
}
//-------- Low level lexer functions -------------------------------------------
// consume a single byte and return the new character
// Does not go through newlines
func (l *lexer) consume() byte {
// Is there any line left?
if l.idx >= len(l.line)-1 {
// Return linefeed
return lf
}
// Move to the next byte
l.idx += 1
return l.current()
}
// consumeAll a single byte and return the new character
// Goes through newlines
func (l *lexer) consumeAll() byte {
// Is there any line left?
if l.idx >= len(l.line)-1 {
l.newLine()
return l.current()
}
// Move to the next byte
l.idx += 1
return l.current()
}
// current gets the current byte
func (l *lexer) current() byte {
return l.line[l.idx]
}
// newLine moves onto a new line
func (l *lexer) newLine() {
// Read the line
line, err := l.reader.ReadLineBytes()
if err != nil {
panic(parseError(err.Error()))
}
// Reset the lexer - we cannot rewind past line boundaries
l.line = line
l.idx = 0
l.tokens = make([]int, 0, 8)
}
// skipSpace skips any spaces
func (l *lexer) skipSpace() {
c := l.current()
for c == space {
c = l.consume()
}
}
// startToken marks the start a new token
func (l *lexer) startToken() {
l.tokens = append(l.tokens, l.idx)
}
// pushBack moves back one token
func (l *lexer) pushBack() {
last := len(l.tokens) - 1
l.idx = l.tokens[last]
l.tokens = l.tokens[:last]
}