Skip to content

Commit e4af64e

Browse files
committed
added regular expression filter
1 parent 2e9d655 commit e4af64e

5 files changed

Lines changed: 69 additions & 21 deletions

File tree

build.zig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,20 @@ pub fn build(b: *std.Build) void {
44
const target = b.standardTargetOptions(.{});
55
const optimize = b.standardOptimizeOption(.{});
66

7+
const mvzr = b.dependency("mvzr", .{
8+
.target = target,
9+
.optimize = optimize,
10+
});
11+
712
const exe = b.addExecutable(.{
813
.name = "csvcut",
914
.root_module = b.createModule(.{
1015
.root_source_file = b.path("src/main.zig"),
1116
.target = target,
1217
.optimize = optimize,
18+
.imports = &.{
19+
.{ .name = "mvzr", .module = mvzr.module("mvzr") },
20+
},
1321
}),
1422
});
1523

build.zig.zon

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@
33
.version = "0.1.1-beta",
44
.fingerprint = 0x8035f4e75082840a,
55

6-
.dependencies = .{},
6+
.dependencies = .{
7+
.mvzr = .{
8+
.url = "https://github.com/mnemnion/mvzr/archive/refs/tags/v0.3.7.tar.gz",
9+
.hash = "mvzr-0.3.7-ZSOky5FtAQB2VrFQPNbXHQCFJxWTMAYEK7ljYEaMR6jt",
10+
},
11+
},
712

813
.paths = .{
914
"build.zig",

src/USAGE.txt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,19 @@ Options:
8181
but removes given columns from output
8282

8383
--filter [filter,..] list of filters that are applied
84-
argument field1=value1,field2=value2,....
84+
argument field1=value,field2~regex,....
85+
only lines were all fields are matching are outputted
86+
87+
field=value => the value has to match exactly
88+
field~regex => the regular expression has to match
89+
8590
can be used multiple times, if any filter matches
8691
the line will be output
8792

8893
--filterOut [filter,...] list of negative filters that are applied
8994
inverse of --filter
9095
ignores lines that match any filter
91-
is applied befroe --filter
96+
is applied before --filter
9297

9398
--trim removes space & tab characters from start & end
9499
--unique only output unique lines

src/exitCode.zig

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ pub const ExitCode = enum(u8) {
1212
includeAndExcludeTogether,
1313
countAndUniqueAreExclusive,
1414
extraLfWithoutLength,
15+
regexCompileError,
1516

1617
couldNotOpenInputFile,
1718
couldNotOpenOutputFile,
@@ -34,7 +35,7 @@ pub const ExitCode = enum(u8) {
3435
.includeAndExcludeTogether => return "--include and --exclude cannot be used together",
3536
.extraLfWithoutLength => return "--extraLF and --extraCRLF are only used for fixed field processing with --lengths",
3637
.countAndUniqueAreExclusive => return "--count and --unique are exclusive, use either, not both at the same time",
37-
38+
.regexCompileError => return "invalid regex '{s}' for field '{s}'",
3839
.couldNotOpenInputFile => return "could not open input file '{s}' reason: {}",
3940
.couldNotOpenOutputFile => return "could not open output file '{s}' reason: {}",
4041
.couldNotReadHeader => return "could not read header from file '{s}'",

src/options.zig

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
const std = @import("std");
22
const CsvLine = @import("FieldReader/CsvLine.zig");
3+
const ExitCode = @import("exitCode.zig").ExitCode;
4+
const mvzr = @import("mvzr");
35

46
pub const OutputFormat = enum {
57
csv,
@@ -23,7 +25,8 @@ const Selection = union(enum) {
2325
const OptionError = error{
2426
NoSuchField,
2527
NoHeader,
26-
MoreThanOneEqualInFilter,
28+
NoEqualOrTildeInFilter,
29+
RegexCompileError,
2730
};
2831

2932
pub const Options = struct {
@@ -202,14 +205,21 @@ pub const Options = struct {
202205
}
203206
};
204207

208+
const FilterType = enum { equal, regex };
209+
const FilterValue = struct {
210+
value: []const u8,
211+
type: FilterType,
212+
regex: ?mvzr.Regex,
213+
};
214+
205215
pub const Filter = struct {
206216
selectionList: SelectionList,
207-
values: std.array_list.Managed([]const u8),
217+
values: std.array_list.Managed(FilterValue),
208218

209219
pub fn init(allocator: std.mem.Allocator) !Filter {
210220
return .{
211221
.selectionList = try SelectionList.init(allocator),
212-
.values = std.array_list.Managed([]const u8).init(allocator),
222+
.values = std.array_list.Managed(FilterValue).init(allocator),
213223
};
214224
}
215225

@@ -219,18 +229,31 @@ pub const Filter = struct {
219229
}
220230

221231
pub fn append(self: *Filter, filter: []const u8) !void {
222-
var it = std.mem.splitScalar(u8, filter, '=');
223-
if (it.next()) |field| {
224-
try self.selectionList.append(field);
225-
}
226-
if (it.next()) |value| {
227-
try self.values.append(value);
232+
var eqlPos: usize = 0;
233+
var filterType: FilterType = undefined;
234+
235+
if (std.mem.indexOfScalar(u8, filter, '=')) |pos| {
236+
eqlPos = pos;
237+
filterType = .equal;
238+
} else if (std.mem.indexOfScalar(u8, filter, '~')) |pos| {
239+
eqlPos = pos;
240+
filterType = .regex;
228241
} else {
229-
try self.values.append("");
242+
return OptionError.NoEqualOrTildeInFilter;
230243
}
231-
if (it.next()) |_| {
232-
return OptionError.MoreThanOneEqualInFilter;
244+
245+
const field = filter[0..eqlPos];
246+
try self.selectionList.append(field);
247+
248+
const value = filter[eqlPos + 1 ..];
249+
var regex: ?mvzr.Regex = null;
250+
if (filterType == .regex) {
251+
regex = mvzr.compile(value);
252+
if (regex == null) {
253+
ExitCode.regexCompileError.printErrorAndExit(.{ value, field });
254+
}
233255
}
256+
try self.values.append(.{ .value = value, .type = filterType, .regex = regex });
234257
}
235258

236259
pub fn calculateIndices(self: *Filter, header: ?[][]const u8) !void {
@@ -239,8 +262,14 @@ pub const Filter = struct {
239262

240263
pub fn matches(self: *const Filter, fields: [][]const u8) bool {
241264
for (self.selectionList.indices.?, 0..) |fieldIndex, i| {
242-
if (!std.mem.eql(u8, fields[fieldIndex], self.values.items[i])) {
243-
return false;
265+
const value = self.values.items[i];
266+
switch (value.type) {
267+
.equal => if (!std.mem.eql(u8, fields[fieldIndex], value.value)) {
268+
return false;
269+
},
270+
.regex => if (!value.regex.?.isMatch(fields[fieldIndex])) {
271+
return false;
272+
},
244273
}
245274
}
246275
return true;
@@ -304,9 +333,9 @@ const SelectionList = struct {
304333
return minusPos;
305334
}
306335

307-
fn addRange(list: *std.array_list.Managed(Selection), field: []const u8, miusPos: usize) !void {
308-
if (toNumber(field[0..miusPos])) |from| {
309-
if (toNumber(field[miusPos + 1 ..])) |to| {
336+
fn addRange(list: *std.array_list.Managed(Selection), field: []const u8, minusPos: usize) !void {
337+
if (toNumber(field[0..minusPos])) |from| {
338+
if (toNumber(field[minusPos + 1 ..])) |to| {
310339
for (from..to + 1) |index| {
311340
try list.append(.{ .index = index - 1 });
312341
}

0 commit comments

Comments
 (0)