-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.c
More file actions
67 lines (55 loc) · 2 KB
/
parser.c
File metadata and controls
67 lines (55 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#include "parser.h"
#include <regex.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#define BLOCK 4096
static inline int min(int x, int y) {
return x < y ? x : y;
}
// We can store a block of words, each a block number of characters.
// This is generally okay for the English language.
static char words[BLOCK][BLOCK];
char *next_word(FILE *infile, regex_t *word_regex) {
static uint32_t index = 0; // Track the word to return.
static uint32_t count = 0; // How many words have we stored?
static char buffer[BLOCK]; // Internal buffer to read into.
static char *cursor; // Tracks position in buffer.
static regmatch_t match; // Struct to track regex matches.
if (!index) {
int matches = 0; // Assume no matches yet.
cursor = buffer; // Reset buffer cursor.
// Loop while we have no matches.
while (!matches) {
// No more possible matches in the file.
if (!fgets(buffer, BLOCK, infile)) {
return NULL;
}
// Fill the word buffer with as many matches as possible.
for (int i = 0; i < BLOCK; i += 1) {
// From the cursor, look for 1 match without extended regex flags.
if (regexec(word_regex, cursor, 1, &match, 0)) {
break;
}
// No match in the line.
if (match.rm_so < 0) {
break;
}
// Range for the match.
int start = match.rm_so;
int end = match.rm_eo;
int length = end - start;
// Copy it in, tack on null-terminator.
memcpy(words[i], cursor + start, min(length, BLOCK));
words[i][length] = '\0';
cursor += end;
matches += 1;
}
// Store number of matches.
count = matches;
}
}
char *word = words[index];
index = (index + 1) % count;
return word;
}