-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbashParser.h
More file actions
256 lines (203 loc) · 11 KB
/
Copy pathbashParser.h
File metadata and controls
256 lines (203 loc) · 11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
// parse.h Stan Eisenstat (09/10/17)
//
// Header file for command line parser used in Parse
//
// Bash version based on left-associative parse tree
#ifndef PARSE_INCLUDED
#define PARSE_INCLUDED // parse.h has been #include-d
// A token is
//
// (1) a maximal, contiguous, nonempty sequence of nonwhitespace characters
// other than the metacharacters <, >, ;, &, |, (, and ) [a SIMPLE token];
//
// (2) a redirection symbol (<, <<, >, >>, 2>, 2>>, or &>);
//
// (3) a pipeline symbol (|);
//
// (4) a command operator (&& or ||);
//
// (5) a command terminator (; or &);
//
// (6) a left or right parenthesis (used to group commands).
// A token list is a headless linked list of typed tokens. All storage is
// allocated by malloc() / realloc(). The token type is specified by the
// symbolic constants defined below.
typedef struct token { // Struct for each token in linked list
char *text; // String containing token (if SIMPLE)
int type; // Corresponding type
struct token *next; // Pointer to next token in linked list
} token;
// Break the string LINE into a headless linked list of typed tokens and
// return a pointer to the first token (or NULL if none were found or an
// error was detected).
token *tokenize (char *line);
// Print the list of tokens LIST
void dumpList (token *list);
// Free list of tokens LIST
void freeList (token *list);
/////////////////////////////////////////////////////////////////////////////
// Token types used by tokenize() and parse()
enum {
// Token types used by tokenize() et al.
SIMPLE, // Maximal contiguous sequence ... (as above)
RED_IN, // < Redirect stdin to file
RED_IN_HERE, // << Redirect stdin to HERE document
RED_OUT, // > Redirect stdout to file
RED_OUT_APP, // >> Append stdout to file
RED_OUT_ERR, // &> Redirect stdout and stderr to file (UNUSED)
RED_ERR, // 2> Redirect stderr to file (UNUSED)
RED_ERR_APP, // 2>> Append stderr to file (UNUSED)
PIPE, // |
SEP_AND, // &&
SEP_OR, // ||
SEP_END, // ;
SEP_BG, // &
PAR_LEFT, // (
PAR_RIGHT, // )
// Other types used by parse()
NONE, // Nontoken: Did not find a token
ERROR, // Nontoken: Encountered an error
SUBCMD // Nontoken: CMD struct for subcommand
};
// String containing all metacharacters that terminate SIMPLE tokens
#define METACHAR "<>;&|()"
// String containing all characters that may appear in variable names
#define VARCHR "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_0123456789"
// Macro that checks whether a token is a redirection symbol
#define RED_OP(type) (type == RED_IN || type == RED_IN_HERE || \
type == RED_OUT || type == RED_OUT_APP || \
type == RED_ERR || type == RED_ERR_APP || \
type == RED_OUT_ERR)
/////////////////////////////////////////////////////////////////////////////
// The syntax for a command is
//
// <local> = VARIABLE=VALUE
// <red_op> = < / << / > / >> / 2> / 2>> / &>
// <redirect> = <red_op> FILENAME
// <prefix> = <local> / <redirect> / <prefix> <local> / <prefix> <redirect>
// <suffix> = SIMPLE / <redirect> / <suffix> SIMPLE / <suffix> <redirect>
// <redList> = <redirect> / <redList> <redirect>
// <simple> = SIMPLE / <prefix> SIMPLE / SIMPLE <suffix>
// / <prefix> SIMPLE <suffix>
// <subcmd> = (<command>) / <prefix> (<command>) / (<command>) <redList>
// / <prefix> (<command>) <redList>
// <stage> = <simple> / <subcmd>
// <pipeline> = <stage> / <pipeline> | <stage>
// <and-or> = <pipeline> / <and-or> && <pipeline> / <and-or> || <pipeline>
// <sequence> = <and-or> / <sequence> ; <and-or> / <sequence> & <and-or>
// <command> = <sequence> / <sequence> ; / <sequence> &
//
// Note that FILENAME = SIMPLE.
//
// A command is represented as a tree of CMD structs containing its <simple>
// commands and the "operators" | (= PIPE), && (= SEP_AND), || (= SEP_OR),
// ; (= SEP_END), & (= SEP_BG), and SUBCMD. The command tree is determined
// by (but is not equal to) the parse tree in the above grammar.
//
// The tree for a <simple> is a single struct of type SIMPLE that specifies its
// arguments (argc, argv[]); its local variables (nLocal, locVar[], locVal[]);
// and whether and where to redirect its standard input (fromType, fromFile),
// its standard output (toType, toFile), and its standard error (errType,
// errFile). The left and right children are NULL.
//
// The tree for a <stage> is either the tree for a <simple> or a CMD struct of
// type SUBCMD (which may have local variables and redirection) whose left
// child is the tree representing a <command> and whose right child is NULL.
// Note that I/O redirection is associated with a <stage> (i.e., a <simple> or
// <subcmd>), but not with a <pipeline> (redirection for the first/last stage
// is associated with the stage, not the pipeline).
//
// The tree for a <pipeline> is either the tree for a <stage> or a CMD struct
// of type PIPE whose right child is a tree representing the last <stage> and
// whose left child is the tree representing the rest of the <pipeline>.
//
// The tree for an <and-or> is either the tree for a <pipeline> or a CMD
// struct of type && (= SEP_AND) or || (= SEP_OR) whose left child is a tree
// representing an <and-or> and whose right child is a tree representing a
// <pipe-line>.
//
// The tree for a <sequence> is either the tree for an <and-or> or a CMD
// struct of type ; (= SEP_END) or & (= SEP_BG) whose left child is a tree
// representing a <sequence> and whose right child is a tree representing an
// <and-or>.
//
// The tree for a <command> is either the tree for a <sequence> or a CMD
// struct of type ; (= SEP_END) or & (= SEP_BG) whose left child is the tree
// representing a <sequence> and whose right child is NULL.
// Examples (where A, B, C, D, and E are <simple>): //
// //
// Expression Tree //
// //
// < A B | C | D | E > F PIPE //
// / \ //
// PIPE E >F //
// / \ //
// PIPE D //
// / \ //
// <A B C //
// //
// A && B || C && D && //
// / \ //
// || D //
// / \ //
// && C //
// / \ //
// A B //
// //
// A ; B & C ; D || E ; ; //
// / //
// ; //
// / \ //
// & || //
// / \ / \ //
// ; C D E //
// / \ //
// A B //
// //
// (A ; B &) | (C || D) && E && //
// / \ //
// PIPE E //
// / \ //
// SUB SUB //
// / / //
// & || //
// / / \ //
// ; C D //
// / \ //
// A B //
typedef struct cmd {
int type; // Node type: SIMPLE, PIPE, SEP_AND, SEP_OR, SEP_END,
// SEP_BG, SUBCMD, or NONE (default)
int argc; // Number of command-line arguments
char **argv; // Null-terminated argument vector or NULL
int nLocal; // Number of local variable assignments
char **locVar; // Array of local variable names and the values to
char **locVal; // assign to them when the command executes
int fromType; // Redirect stdin: NONE (default), RED_IN (<), or
// RED_IN_HERE (<<)
char *fromFile; // File to redirect stdin, contents of here document,
// or NULL (default)
int toType; // Redirect stdout: NONE (default), RED_OUT (>),
// RED_OUT_APP (>>)
char *toFile; // File to redirect stdout or NULL (default)
int errType; // Unused for this project.
char *errFile; // Unused for this project.
struct cmd *left; // Left subtree or NULL (default)
struct cmd *right; // Right subtree or NULL (default)
} CMD;
// Note: In a <stage> with a HERE document, fromFile should point to a string
// containing the lines in that document.
//
// Note: In a <stage> with &> (= RED_OUT_ERR) redirection, toType and errType
// should be RED_OUT_ERR, toFile should point to the filename, and errFile
// should be NULL.
// Allocate, initialize, and return a pointer to an empty command structure
CMD *mallocCMD (void);
// Print the command data structure CMD as a tree whose root is at level LEVEL
void dumpTree (CMD *exec, int level);
// Free the command structure CMD
void freeCMD (CMD *cmd);
// Parse a token list into a command structure and return a pointer to
// that structure (NULL if errors found).
CMD *parse (token *tok);
#endif