Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 48 additions & 27 deletions src/strings.zig
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ pub const string = struct {
var i: usize = 0;
while (i < m): (i += 1) {
border[i+1] = border[i];
while (border[i+1] > -1 and needle[usize(border[i+1])] != needle[i]) {
border[i+1] = border[usize(border[i+1])];
while (border[i+1] > -1 and needle[@intCast(usize, border[i+1])] != needle[i]) {
border[i+1] = border[@intCast(usize, border[i+1])];
}
border[i+1]+=1;
}
Expand All @@ -87,27 +87,26 @@ pub const string = struct {
const max_found = self.buffer.len / needle.len;

var results = try self.allocator.alloc(usize, max_found);
defer self.allocator.free(results);
var n = self.buffer.len;
var seen: i64 = 0;
var j: usize = 0;
var found: usize = 0;

while (j < n): (j += 1) {
while (seen > -1 and needle[usize(seen)] != self.buffer[j]) {
seen = border[usize(seen)];
while (seen > -1 and needle[@intCast(usize, seen)] != self.buffer[j]) {
seen = border[@intCast(usize, seen)];
}
seen+=1;
if (seen == i64(m)) {
if (seen == @intCast(i64, m)) {
found += 1;
results[found-1] = j-m+1;
seen = border[m];
}
}
results = try self.allocator.realloc(usize, results, found);
return results;
return mem_dupn(self.allocator, usize, results, found);
}


// compute the levenshtein edit distance to another string
pub fn levenshtein(self: *const string, other: []const u8) !usize {
var prevrow = try self.allocator.alloc(usize, other.len+1);
Expand Down Expand Up @@ -139,7 +138,7 @@ pub const string = struct {
if (self.buffer[i-1] == other[j-1]) {
currrow[j] = prevrow[j-1];
} else {
currrow[j] = @inlineCall(min, prevrow[j]+1,
currrow[j] = min(prevrow[j] + 1,
currrow[j-1]+1,
prevrow[j-1]+1);
}
Expand All @@ -153,7 +152,7 @@ pub const string = struct {
pub fn replace(self: *string, before: []const u8, after: []const u8) !void {
var indices = try self.kmp(before);
if (indices.len == 0) return;
var diff = i128(before.len) - i128(after.len);
var diff = @intCast(i128, before.len) - @intCast(i128, after.len);
// var it = indices.iterator();
var new_size: usize = 0;
if (diff == 0) { // no need to resize buffer
Expand All @@ -163,9 +162,9 @@ pub const string = struct {
return;
} else if (diff < 0) { // grow buffer
diff = diff * -1;
new_size = self.buffer.len + (indices.len*usize(diff));
new_size = self.buffer.len + (indices.len * @intCast(usize, diff));
} else { // shrink buffer
new_size = self.buffer.len - (indices.len*usize(diff));
new_size = self.buffer.len - (indices.len * @intCast(usize, diff));
}
var new_buff = try self.allocator.alloc(u8, new_size);
var i: usize = 0;
Expand All @@ -176,7 +175,7 @@ pub const string = struct {
new_buff[j] = self.buffer[i];
i += 1;
j += 1;
} else {
} else {
mem.copy(u8, new_buff[j..j+after.len], after);
i += before.len;
j += after.len;
Expand All @@ -200,7 +199,7 @@ pub const string = struct {
pub fn lower(self: *const string) void {
for (self.buffer) |c, i| {
if (ascii_upper_start <= c and c <= ascii_upper_end) {
self.buffer[i] = ascii_lower[@inlineCall(upper_map, c)];
self.buffer[i] = ascii_lower[upper_map(c)];
}
}
}
Expand All @@ -209,7 +208,7 @@ pub const string = struct {
pub fn upper(self: *const string) void {
for (self.buffer) |c, i| {
if (ascii_lower_start <= c and c <= ascii_lower_end) {
self.buffer[i] = ascii_upper[@inlineCall(lower_map, c)];
self.buffer[i] = ascii_upper[lower_map(c)];
}
}
}
Expand All @@ -218,18 +217,18 @@ pub const string = struct {
pub fn swapcase(self: *const string) void {
for (self.buffer) |c, i| {
if (ascii_lower_start <= c and c <= ascii_lower_end) {
self.buffer[i] = ascii_upper[@inlineCall(lower_map, c)];
self.buffer[i] = ascii_upper[lower_map(c)];
} else if (ascii_upper_start <= c and c <= ascii_upper_end) {
self.buffer[i] = ascii_lower[@inlineCall(upper_map, c)];
self.buffer[i] = ascii_lower[upper_map(c)];
}
}
}

pub fn concat(self: *string, other: []const u8) !void {
if (other.len == 0) return;
const orig_len = self.buffer.len;
self.buffer = try self.allocator.realloc(u8, self.buffer,
self.size() + other.len);
self.buffer = try self.allocator.realloc(self.buffer, @sizeOf(u8) *
(self.size() + other.len));
mem.copy(u8, self.buffer[orig_len..], other);
}

Expand Down Expand Up @@ -317,7 +316,7 @@ pub const string = struct {
// split the string by a specified separator, returning
// an ArrayList of []u8.
pub fn split_to_u8(self: *const string, sep: []const u8) ![][]const u8 {
var indices = try @inlineCall(self.find_all, sep);
var indices = try self.find_all(sep);

var results = try self.allocator.alloc([]const u8, indices.len+1);
var i: usize = 0;
Expand Down Expand Up @@ -362,19 +361,20 @@ pub const string = struct {

pub fn single_space_indices(self: *const string) ![]usize {
var results = try self.allocator.alloc(usize, self.buffer.len);
defer self.allocator.free(results);
var i: usize = 0;
for (self.buffer) |c, j| {
if (c == ' ') {
results[i] = j;
i += 1;
}
}
results = try self.allocator.realloc(usize, results, i);
return results[0..];
return mem_dupn(self.allocator, usize, results, i);
}

pub fn all_space_indices(self: *const string) ![]usize {
var results = try self.allocator.alloc(usize, self.buffer.len);
defer self.allocator.free(results);
var i: usize = 0;
for (self.buffer) |c, j| {
switch (c) {
Expand All @@ -386,20 +386,19 @@ pub const string = struct {
else => continue,
}
}
results = try self.allocator.realloc(usize, results, i);
return results;
return mem_dupn(self.allocator, usize, results, i);
}
};

fn upper_map(c: u8) usize {
inline fn upper_map(c: u8) usize {
return c - ascii_upper_start;
}

fn lower_map(c: u8) usize {
inline fn lower_map(c: u8) usize {
return c - ascii_lower_start;
}

fn min(x: usize, y: usize, z: usize) usize {
inline fn min(x: usize, y: usize, z: usize) usize {
var result = x;
if (y < result) {
result = y;
Expand All @@ -408,3 +407,25 @@ fn min(x: usize, y: usize, z: usize) usize {
}
return result;
}

pub inline fn mem_copyn(comptime T: type, dest: []T, source: []const T, n: usize) error{OutOfBounds}![]T {
var idx: usize = 0;
var limit = if (source.len < n) source.len else n;
if (limit > dest.len) {
return error.OutOfBounds;
}
while (idx < limit) : (idx += 1) {
dest[idx] = source[idx];
}
return dest; // not necessary but often convenient
}

pub inline fn mem_dupn(allocator: *Allocator, comptime T: type, source: []const T, n: usize) error{OutOfBounds,OutOfMemory}![]T {
var limit = if (source.len < n) source.len else n;
var dest = try allocator.alloc(T, limit);
return try mem_copyn(T, dest, source, limit);
}

pub inline fn mem_dup(allocator: *Allocator, comptime T: type, source: []const T) error{OutOfBounds,OutOfMemory}![]T {
return try mem_dupn(allocator, T, source, source.len);
}
15 changes: 7 additions & 8 deletions test/split_bench.zig
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
const string = @import("../src/strings.zig").string;
const time = std.os.time;
const string = @import("strings").string;
const time = @import("std").time;
const Timer = time.Timer;
const io = @import("std").io;
const std = @import("std");
const warn = @import("std").debug.warn;

fn read_file(path: []const u8) ![]u8 {
var allocator = std.heap.c_allocator;
return try io.readFileAlloc(allocator, path);
return try std.fs.Dir.readFileAlloc(std.fs.cwd(), allocator, path, 2 * 1000 * 1000);
}

pub fn main() !void {
Expand All @@ -28,8 +28,7 @@ pub fn main() !void {
}
const end = timer.read();


warn("\nlen: {}\n", results[0]);
const elapsed_s = f64(end - start) / time.ns_per_s;
warn("\nelapsed seconds: {.3}\n\n", elapsed_s);
}
warn("\nlen: {}\n", .{results[0]});
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
warn("\nelapsed seconds: {:.3}\n\n", .{elapsed_s});
}
16 changes: 7 additions & 9 deletions test/test.zig
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const strings = @import("../src/strings.zig");
const strings = @import("strings");
const string = strings.string;
const warn = @import("std").debug.warn;
const mem = @import("std").mem;
Expand Down Expand Up @@ -67,16 +67,16 @@ test "strings.upper_lower" {
test "strings.edit_distance" {
// levenshtein edit distance
var s3 = try string.init("apple");
assert((try s3.levenshtein("snapple")) == usize(2));
assert((try s3.levenshtein("snapple")) == @intCast(usize, 2));

var s4 = try string.init("book");
assert((try s4.levenshtein("burn")) == usize(3));
assert((try s4.levenshtein("burn")) == @intCast(usize, 3));

var s5 = try string.init("pencil");
assert((try s5.levenshtein("telephone")) == usize(8));
assert((try s5.levenshtein("telephone")) == @intCast(usize, 8));

var s6 = try string.init("flowers");
assert((try s6.levenshtein("wolf")) == usize(6));
assert((try s6.levenshtein("wolf")) == @intCast(usize, 6));
}

test "strings.replace" {
Expand Down Expand Up @@ -160,8 +160,6 @@ test "strings.split" {
assert(moby_full_split.len == 192865);
}



var moby =
\\Call me Ishmael. Some years ago—never mind how long precisely—having little or
\\no money in my purse, and nothing particular to interest me on shore, I thought
Expand All @@ -181,5 +179,5 @@ var moby =

fn read_file(path: []const u8) ![]u8 {
var allocator = std.heap.c_allocator;
return try io.readFileAlloc(allocator, path);
}
return try std.fs.Dir.readFileAlloc(std.fs.cwd(), allocator, path, 10 * 1000 * 1000);
}