Skip to content

Commit a3f3f02

Browse files
authored
Merge pull request #14 from marselester/lookup-cache
Lookup cache
2 parents 9f656e5 + 202a3e2 commit a3f3f02

14 files changed

Lines changed: 701 additions & 227 deletions

File tree

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
run: zig build test
2626
- name: Run lookup example
2727
run: zig build example_lookup
28-
- name: Run within example
29-
run: zig build example_within
28+
- name: Run scan example
29+
run: zig build example_scan
3030
- name: Run inspect example
3131
run: zig build example_inspect

README.md

Lines changed: 154 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,37 @@ var db = try maxminddb.Reader.mmap(allocator, db_path, .{ .ipv4_index_first_n_bi
5454
defer db.close();
5555
```
5656

57-
Use `ArenaAllocator` for best performance, see [benchmarks](./benchmarks/).
57+
Each `lookup` result owns an arena with all decoded allocations.
58+
Call `deinit()` to free it or use `ArenaAllocator` with `reset()`,
59+
see [benchmarks](./benchmarks/lookup.zig).
60+
61+
```zig
62+
if (try db.lookup(maxminddb.geolite2.City, allocator, ip, .{})) |result| {
63+
defer result.deinit();
64+
std.debug.print("{f} {s}\n", .{ result.network, result.value.city.names.?.get("en").? });
65+
}
66+
67+
var arena = std.heap.ArenaAllocator.init(allocator);
68+
defer arena.deinit();
69+
70+
const arena_allocator = arena.allocator();
71+
for (ips) |ip| {
72+
if (try db.lookup(maxminddb.geolite2.City, arena_allocator, ip, .{})) |result| {
73+
std.debug.print("{f} {s}\n", .{ result.network, result.value.city.names.?.get("en").? });
74+
}
75+
76+
_ = arena.reset(.retain_capacity);
77+
}
78+
```
5879

5980
If you don't need all the fields, use `.only` to decode only the top-level fields you want.
6081

6182
```zig
6283
const fields = &.{ "city", "country" };
63-
const city = try db.lookup(allocator, maxminddb.geolite2.City, ip, .{ .only = fields });
84+
if (try db.lookup(maxminddb.geolite2.City, allocator, ip, .{ .only = fields })) |result| {
85+
defer result.deinit();
86+
std.debug.print("{f} {s}\n", .{ result.network, result.value.city.names.?.get("en").? });
87+
}
6488
```
6589

6690
Alternatively, define your own struct with only the fields you need.
@@ -74,27 +98,42 @@ const MyCity = struct {
7498
} = .{},
7599
};
76100
77-
const city = try db.lookup(allocator, MyCity, ip, .{});
101+
if (try db.lookup(MyCity, allocator, ip, .{})) |result| {
102+
defer result.deinit();
103+
std.debug.print("{s}\n", .{result.value.city.names.en});
104+
}
78105
```
79106

80107
Use `any.Value` to decode any record without knowing the schema.
81108

82109
```zig
83-
const result = try db.lookup(allocator, maxminddb.any.Value, ip, .{ .only = fields });
84-
if (result) |r| {
110+
if (try db.lookup(maxminddb.any.Value, allocator, ip, .{ .only = fields })) |result| {
111+
defer result.deinit();
85112
// Formats as compact JSON.
86-
std.debug.print("{f}\n", .{r.value});
113+
std.debug.print("{f}\n", .{result.value});
114+
}
115+
```
116+
117+
Use `lookupWithCache` to skip decoding when different IPs resolve to the same record.
118+
The cache owns decoded memory, so results don't need to be individually freed.
119+
120+
```zig
121+
var cache = try maxminddb.Cache(maxminddb.geolite2.City).init(allocator, .{});
122+
defer cache.deinit();
123+
124+
if (try db.lookupWithCache(maxminddb.geolite2.City, &cache, ip, .{})) |result| {
125+
std.debug.print("{f} {s}\n", .{ result.network, result.value.city.names.?.get("en").? });
87126
}
88127
```
89128

90129
Here are reference results on Apple M2 Pro (1M random IPv4 lookups against GeoLite2-City
91130
with `ipv4_index_first_n_bits = 16`):
92131

93-
| Benchmark | All fields | Filtered (city) |
94-
|--- |--- |--- |
95-
| `geolite2.City` | ~1,284,000 | ~1,348,000 |
96-
| `MyCity` | ~1,383,000 | |
97-
| `any.Value` | ~1,254,000 | ~1,349,000 |
132+
| Type | Default | `.only` | `Cache` |
133+
|--- |--- |--- |--- |
134+
| `geolite2.City` | ~1,420,000 | ~1,348,000 | ~1,565,000 |
135+
| `MyCity` | ~1,383,000 | | |
136+
| `any.Value` | ~1,254,000 | ~1,349,000 | |
98137

99138
<details>
100139

@@ -140,6 +179,30 @@ Lookups Per Second (avg):1315870.3443053183
140179

141180
<details>
142181

182+
<summary>geolite2.City with Cache</summary>
183+
184+
```sh
185+
$ for i in $(seq 1 10); do
186+
zig build benchmark_lookup_cache -Doptimize=ReleaseFast -- GeoLite2-City.mmdb 1000000 \
187+
2>&1 | grep 'Lookups Per Second'
188+
done
189+
190+
Lookups Per Second (avg):1493822.3908664712
191+
Lookups Per Second (avg):1503051.0049070602
192+
Lookups Per Second (avg):1499514.437731375
193+
Lookups Per Second (avg):1491749.9700251492
194+
Lookups Per Second (avg):1449924.9391983037
195+
Lookups Per Second (avg):1396100.6211600688
196+
Lookups Per Second (avg):1465750.9875955326
197+
Lookups Per Second (avg):1515611.9396877384
198+
Lookups Per Second (avg):1485235.6423035355
199+
Lookups Per Second (avg):1439334.222943596
200+
```
201+
202+
</details>
203+
204+
<details>
205+
143206
<summary>MyCity</summary>
144207

145208
```sh
@@ -203,3 +266,83 @@ Lookups Per Second (avg):1315986.2950186788
203266
```
204267

205268
</details>
269+
270+
Use `scan` to iterate over all networks in the database.
271+
272+
```zig
273+
var it = try db.scan(maxminddb.any.Value, allocator, maxminddb.Network.all_ipv6, .{});
274+
275+
while (try it.next()) |item| {
276+
defer item.deinit();
277+
std.debug.print("{f} {f}\n", .{ item.network, item.value });
278+
}
279+
```
280+
281+
Use `scanWithCache` to avoid re-decoding networks that share the same record.
282+
The cache owns decoded memory, so results don't need to be individually freed.
283+
284+
```zig
285+
var cache = try maxminddb.Cache(maxminddb.any.Value).init(allocator, .{});
286+
defer cache.deinit();
287+
288+
var it = try db.scanWithCache(maxminddb.any.Value, &cache, maxminddb.Network.all_ipv6, .{});
289+
290+
while (try it.next()) |item| {
291+
std.debug.print("{f} {f}\n", .{ item.network, item.value });
292+
}
293+
```
294+
295+
Here are reference results on Apple M2 Pro (full GeoLite2-City scan using `any.Value`):
296+
297+
| Mode | Records/sec |
298+
|--- |--- |
299+
| Default | ~1,295,000 |
300+
| `Cache` | ~2,930,000 |
301+
302+
<details>
303+
304+
<summary>no cache (any.Value)</summary>
305+
306+
```sh
307+
$ for i in $(seq 1 10); do
308+
zig build benchmark_scan -Doptimize=ReleaseFast -- GeoLite2-City.mmdb \
309+
2>&1 | grep 'Records Per Second'
310+
done
311+
312+
Records Per Second: 1216758.945145436
313+
Records Per Second: 1238440.9772222256
314+
Records Per Second: 1234710.6362391203
315+
Records Per Second: 1229527.4688849829
316+
Records Per Second: 1243478.3908140333
317+
Records Per Second: 1226863.3718734735
318+
Records Per Second: 1240073.3248202254
319+
Records Per Second: 1247541.1528026997
320+
Records Per Second: 1230510.441029532
321+
Records Per Second: 1246311.587919839
322+
```
323+
324+
</details>
325+
326+
<details>
327+
328+
<summary>cache (any.Value)</summary>
329+
330+
```sh
331+
$ for i in $(seq 1 10); do
332+
zig build benchmark_scan_cache -Doptimize=ReleaseFast -- GeoLite2-City.mmdb \
333+
2>&1 | grep 'Records Per Second'
334+
done
335+
336+
Records Per Second: 2847560.3756875996
337+
Records Per Second: 2925388.867798729
338+
Records Per Second: 2919203.9046571665
339+
Records Per Second: 2814410.555872645
340+
Records Per Second: 2933972.04386147
341+
Records Per Second: 2900700.06160036
342+
Records Per Second: 2922279.338699886
343+
Records Per Second: 2862525.847598088
344+
Records Per Second: 2916760.542913819
345+
Records Per Second: 2908245.98918392
346+
```
347+
348+
</details>

benchmarks/inspect.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ pub fn main() !void {
5959
const ip = std.net.Address.initIp4(ip_bytes, 0);
6060

6161
const result = db.lookup(
62-
arena_allocator,
6362
maxminddb.any.Value,
63+
arena_allocator,
6464
ip,
6565
.{ .only = fields },
6666
) catch |err| {

benchmarks/lookup.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ pub fn main() !void {
5959
const ip = std.net.Address.initIp4(ip_bytes, 0);
6060

6161
const result = db.lookup(
62-
arena_allocator,
6362
maxminddb.geolite2.City,
63+
arena_allocator,
6464
ip,
6565
.{ .only = fields },
6666
) catch |err| {

benchmarks/lookup_cache.zig

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
const std = @import("std");
2+
const maxminddb = @import("maxminddb");
3+
4+
const default_db_path: []const u8 = "GeoLite2-City.mmdb";
5+
const default_num_lookups: u64 = 1_000_000;
6+
const max_mmdb_fields = 32;
7+
8+
pub fn main() !void {
9+
const allocator = std.heap.smp_allocator;
10+
11+
const args = try std.process.argsAlloc(allocator);
12+
defer std.process.argsFree(allocator, args);
13+
14+
var db_path: []const u8 = default_db_path;
15+
var num_lookups = default_num_lookups;
16+
var fields: ?[]const []const u8 = null;
17+
if (args.len > 1) db_path = args[1];
18+
if (args.len > 2) num_lookups = try std.fmt.parseUnsigned(u64, args[2], 10);
19+
if (args.len > 3) {
20+
var items: [max_mmdb_fields][]const u8 = undefined;
21+
22+
var it = std.mem.splitScalar(u8, args[3], ',');
23+
var i: usize = 0;
24+
while (it.next()) |part| : (i += 1) {
25+
items[i] = part;
26+
}
27+
28+
fields = items[0..i];
29+
}
30+
31+
std.debug.print("Benchmarking with:\n", .{});
32+
std.debug.print(" Database: {s}\n", .{db_path});
33+
std.debug.print(" Lookups: {d}\n", .{num_lookups});
34+
std.debug.print("Opening database...\n", .{});
35+
36+
var open_timer = try std.time.Timer.start();
37+
var db = try maxminddb.Reader.mmap(allocator, db_path, .{ .ipv4_index_first_n_bits = 16 });
38+
defer db.close();
39+
const open_time_ms = @as(f64, @floatFromInt(open_timer.read())) /
40+
@as(f64, @floatFromInt(std.time.ns_per_ms));
41+
std.debug.print("Database opened successfully in {d} ms. Type: {s}\n", .{
42+
open_time_ms,
43+
db.metadata.database_type,
44+
});
45+
46+
var cache = try maxminddb.Cache(maxminddb.geolite2.City).init(allocator, .{});
47+
defer cache.deinit();
48+
49+
std.debug.print("Starting benchmark...\n", .{});
50+
var timer = try std.time.Timer.start();
51+
var not_found_count: u64 = 0;
52+
var lookup_errors: u64 = 0;
53+
var ip_bytes: [4]u8 = undefined;
54+
55+
for (0..num_lookups) |_| {
56+
std.crypto.random.bytes(&ip_bytes);
57+
const ip = std.net.Address.initIp4(ip_bytes, 0);
58+
59+
const result = db.lookupWithCache(
60+
maxminddb.geolite2.City,
61+
&cache,
62+
ip,
63+
.{ .only = fields },
64+
) catch |err| {
65+
std.debug.print("! Lookup error for IP {any}: {any}\n", .{ ip, err });
66+
lookup_errors += 1;
67+
continue;
68+
};
69+
if (result == null) {
70+
not_found_count += 1;
71+
continue;
72+
}
73+
}
74+
75+
const elapsed_ns = timer.read();
76+
const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) /
77+
@as(f64, @floatFromInt(std.time.ns_per_s));
78+
const lookups_per_second = if (elapsed_s > 0)
79+
@as(f64, @floatFromInt(num_lookups)) / elapsed_s
80+
else
81+
0.0;
82+
const successful_lookups = num_lookups - not_found_count - lookup_errors;
83+
84+
std.debug.print("\n--- Benchmark Finished ---\n", .{});
85+
std.debug.print("Total Lookups Attempted: {d}\n", .{num_lookups});
86+
std.debug.print("Successful Lookups: {d}\n", .{successful_lookups});
87+
std.debug.print("IPs Not Found: {d}\n", .{not_found_count});
88+
std.debug.print("Lookup Errors: {d}\n", .{lookup_errors});
89+
std.debug.print("Elapsed Time: {d} s\n", .{elapsed_s});
90+
std.debug.print("Lookups Per Second (avg):{d}\n", .{lookups_per_second});
91+
}

benchmarks/mycity.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ pub fn main() !void {
5353
const ip = std.net.Address.initIp4(ip_bytes, 0);
5454

5555
const result = db.lookup(
56-
arena_allocator,
5756
MyCity,
57+
arena_allocator,
5858
ip,
5959
.{},
6060
) catch |err| {

benchmarks/scan.zig

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
const std = @import("std");
2+
const maxminddb = @import("maxminddb");
3+
4+
const default_db_path: []const u8 = "GeoLite2-City.mmdb";
5+
6+
pub fn main() !void {
7+
const allocator = std.heap.smp_allocator;
8+
9+
const args = try std.process.argsAlloc(allocator);
10+
defer std.process.argsFree(allocator, args);
11+
12+
var db_path: []const u8 = default_db_path;
13+
if (args.len > 1) db_path = args[1];
14+
15+
std.debug.print("Benchmarking with:\n", .{});
16+
std.debug.print(" Database: {s}\n", .{db_path});
17+
std.debug.print("Opening database...\n", .{});
18+
19+
var open_timer = try std.time.Timer.start();
20+
var db = try maxminddb.Reader.mmap(allocator, db_path, .{});
21+
defer db.close();
22+
const open_time_ms = @as(f64, @floatFromInt(open_timer.read())) /
23+
@as(f64, @floatFromInt(std.time.ns_per_ms));
24+
std.debug.print("Database opened successfully in {d} ms. Type: {s}\n", .{
25+
open_time_ms,
26+
db.metadata.database_type,
27+
});
28+
29+
const network = if (db.metadata.ip_version == 4)
30+
maxminddb.Network.all_ipv4
31+
else
32+
maxminddb.Network.all_ipv6;
33+
34+
std.debug.print("Starting benchmark...\n", .{});
35+
var timer = try std.time.Timer.start();
36+
37+
var it = try db.scan(maxminddb.any.Value, allocator, network, .{});
38+
39+
var n: usize = 0;
40+
while (try it.next()) |item| {
41+
n += 1;
42+
item.deinit();
43+
}
44+
45+
const elapsed_ns = timer.read();
46+
const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) /
47+
@as(f64, @floatFromInt(std.time.ns_per_s));
48+
49+
const records_per_second = if (elapsed_s > 0)
50+
@as(f64, @floatFromInt(n)) / elapsed_s
51+
else
52+
0.0;
53+
54+
std.debug.print("\n--- Benchmark Finished ---\n", .{});
55+
std.debug.print("Records: {d}\n", .{n});
56+
std.debug.print("Elapsed Time: {d} s\n", .{elapsed_s});
57+
std.debug.print("Records Per Second: {d}\n", .{records_per_second});
58+
}

0 commit comments

Comments
 (0)