diff --git a/README.md b/README.md
index c74ba40..1aa59f6 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ It's based on [maxminddb-rust](https://github.com/oschwald/maxminddb-rust) imple
You must create a copy if you wish to continue using the string when the database is closed.
You'll need [MaxMind-DB/test-data](https://github.com/maxmind/MaxMind-DB/tree/main/test-data)
-to run tests/examples and `GeoLite2-City.mmdb` to run the benchmark.
+to run tests/examples and `GeoLite2-City.mmdb` to run the benchmarks.
```sh
$ git submodule update --init
@@ -45,9 +45,18 @@ See [examples](./examples/).
## Suggestions
+Build the IPv4 index to speed up lookups with `.ipv4_index_first_n_bits` if you have a long-lived `Reader`.
+The recommended value is 16 (~320KB fits L2 cache, ~1-4ms to build when warm
+and ~10ms-120ms due to page faults) or 12 (~20KB) for constrained devices.
+
+```zig
+var db = try maxminddb.Reader.mmap(allocator, db_path, .{ .ipv4_index_first_n_bits = 16 });
+defer db.close();
+```
+
Use `ArenaAllocator` for best performance, see [benchmarks](./benchmarks/).
-If you don't need all the fields, use `Options.only` to decode only the top-level fields you want.
+If you don't need all the fields, use `.only` to decode only the top-level fields you want.
```zig
const fields = &.{ "city", "country" };
@@ -78,13 +87,14 @@ if (result) |r| {
}
```
-Here are reference results on Apple M2 Pro (1M random IPv4 lookups against GeoLite2-City):
+Here are reference results on Apple M2 Pro (1M random IPv4 lookups against GeoLite2-City
+with `ipv4_index_first_n_bits = 16`):
| Benchmark | All fields | Filtered (city) |
|--- |--- |--- |
-| `geolite2.City` | ~1,189,000 | ~1,245,000 |
-| `MyCity` | ~1,228,000 | — |
-| `any.Value` | ~1,150,000 | ~1,234,000 |
+| `geolite2.City` | ~1,284,000 | ~1,348,000 |
+| `MyCity` | ~1,383,000 | — |
+| `any.Value` | ~1,254,000 | ~1,349,000 |
@@ -103,27 +113,51 @@ $ for i in $(seq 1 10); do
2>&1 | grep 'Lookups Per Second'
done
-Lookups Per Second (avg):939020.9936331962
-Lookups Per Second (avg):1202068.1587479531
-Lookups Per Second (avg):1226191.8873913633
-Lookups Per Second (avg):1190260.5152708234
-Lookups Per Second (avg):1187237.1418382763
-Lookups Per Second (avg):1180139.664667138
-Lookups Per Second (avg):1184298.3951793911
-Lookups Per Second (avg):1172927.7709424824
-Lookups Per Second (avg):1192207.8482477544
-Lookups Per Second (avg):1182672.4879777646
+Lookups Per Second (avg):1181277.2875127245
+Lookups Per Second (avg):1298229.636700173
+Lookups Per Second (avg):1284580.6443966748
+Lookups Per Second (avg):1293284.3402910086
+Lookups Per Second (avg):1285891.7841541092
+Lookups Per Second (avg):1283654.9587741245
+Lookups Per Second (avg):1287798.220295312
+Lookups Per Second (avg):1291991.2632139924
+Lookups Per Second (avg):1282363.8582417285
+Lookups Per Second (avg):1246191.3914272592
---
-Lookups Per Second (avg):1255008.2012150432
-Lookups Per Second (avg):1244663.9575842023
-Lookups Per Second (avg):1255868.10809833
-Lookups Per Second (avg):1244955.1445213587
-Lookups Per Second (avg):1221882.1368531892
-Lookups Per Second (avg):1255099.9559031925
-Lookups Per Second (avg):1251926.597665689
-Lookups Per Second (avg):1221997.1083589145
-Lookups Per Second (avg):1186516.0167055523
-Lookups Per Second (avg):1226974.481844842
+Lookups Per Second (avg):1323980.8070552205
+Lookups Per Second (avg):1351732.5910886768
+Lookups Per Second (avg):1351039.987754606
+Lookups Per Second (avg):1348480.894738865
+Lookups Per Second (avg):1357111.6649975393
+Lookups Per Second (avg):1348661.0150208646
+Lookups Per Second (avg):1357781.4722981465
+Lookups Per Second (avg):1356498.714039219
+Lookups Per Second (avg):1346452.11429767
+Lookups Per Second (avg):1315870.3443053183
+```
+
+
+
+
+
+MyCity
+
+```sh
+$ for i in $(seq 1 10); do
+ zig build benchmark_mycity -Doptimize=ReleaseFast -- GeoLite2-City.mmdb 1000000 \
+ 2>&1 | grep 'Lookups Per Second'
+ done
+
+Lookups Per Second (avg):1405912.7999428671
+Lookups Per Second (avg):1376923.8357458028
+Lookups Per Second (avg):1372073.1321839818
+Lookups Per Second (avg):1378707.359082014
+Lookups Per Second (avg):1395492.1172529764
+Lookups Per Second (avg):1394880.1743390427
+Lookups Per Second (avg):1390645.867575583
+Lookups Per Second (avg):1373588.0075019994
+Lookups Per Second (avg):1372678.8857965483
+Lookups Per Second (avg):1387958.9236387985
```
@@ -145,27 +179,27 @@ $ for i in $(seq 1 10); do
2>&1 | grep 'Lookups Per Second'
done
-Lookups Per Second (avg):975677.3396010846
-Lookups Per Second (avg):1140100.8142809793
-Lookups Per Second (avg):1148647.9154542664
-Lookups Per Second (avg):1159945.4593645008
-Lookups Per Second (avg):1146155.6701547962
-Lookups Per Second (avg):1152253.0540916577
-Lookups Per Second (avg):1168908.0392599553
-Lookups Per Second (avg):1138716.2824329527
-Lookups Per Second (avg):1150480.114967662
-Lookups Per Second (avg):1161504.7700823087
+Lookups Per Second (avg):1249814.6118740842
+Lookups Per Second (avg):1225988.817449499
+Lookups Per Second (avg):1264197.1313154744
+Lookups Per Second (avg):1270859.3015692532
+Lookups Per Second (avg):1261325.321815331
+Lookups Per Second (avg):1269464.4605490116
+Lookups Per Second (avg):1260642.9131866288
+Lookups Per Second (avg):1248199.6670115339
+Lookups Per Second (avg):1259984.7888336368
+Lookups Per Second (avg):1227344.2469651096
---
-Lookups Per Second (avg):1232606.0656379322
-Lookups Per Second (avg):1234686.4799143772
-Lookups Per Second (avg):1081398.2429103954
-Lookups Per Second (avg):1243047.4800630722
-Lookups Per Second (avg):1217435.2550309
-Lookups Per Second (avg):1237809.9577944186
-Lookups Per Second (avg):1232356.3798965935
-Lookups Per Second (avg):1242459.8219555076
-Lookups Per Second (avg):1213491.9682358333
-Lookups Per Second (avg):1241524.1410712942
+Lookups Per Second (avg):1366697.6894286321
+Lookups Per Second (avg):1359936.8717304142
+Lookups Per Second (avg):1350500.9773859177
+Lookups Per Second (avg):1345155.3802565804
+Lookups Per Second (avg):1354979.4314596548
+Lookups Per Second (avg):1363058.6900699302
+Lookups Per Second (avg):1351386.2025057953
+Lookups Per Second (avg):1360068.193819238
+Lookups Per Second (avg):1342324.820976454
+Lookups Per Second (avg):1315986.2950186788
```
diff --git a/benchmarks/inspect.zig b/benchmarks/inspect.zig
index bb07aec..597778d 100644
--- a/benchmarks/inspect.zig
+++ b/benchmarks/inspect.zig
@@ -35,8 +35,8 @@ pub fn main() !void {
std.debug.print("Opening database...\n", .{});
var open_timer = try std.time.Timer.start();
- var db = try maxminddb.Reader.mmap(allocator, db_path);
- defer db.unmap();
+ var db = try maxminddb.Reader.mmap(allocator, db_path, .{ .ipv4_index_first_n_bits = 16 });
+ defer db.close();
const open_time_ms = @as(f64, @floatFromInt(open_timer.read())) /
@as(f64, @floatFromInt(std.time.ns_per_ms));
std.debug.print("Database opened successfully in {d} ms. Type: {s}\n", .{
diff --git a/benchmarks/lookup.zig b/benchmarks/lookup.zig
index 64ebf4d..f260157 100644
--- a/benchmarks/lookup.zig
+++ b/benchmarks/lookup.zig
@@ -35,8 +35,8 @@ pub fn main() !void {
std.debug.print("Opening database...\n", .{});
var open_timer = try std.time.Timer.start();
- var db = try maxminddb.Reader.mmap(allocator, db_path);
- defer db.unmap();
+ var db = try maxminddb.Reader.mmap(allocator, db_path, .{ .ipv4_index_first_n_bits = 16 });
+ defer db.close();
const open_time_ms = @as(f64, @floatFromInt(open_timer.read())) /
@as(f64, @floatFromInt(std.time.ns_per_ms));
std.debug.print("Database opened successfully in {d} ms. Type: {s}\n", .{
diff --git a/benchmarks/mycity.zig b/benchmarks/mycity.zig
new file mode 100644
index 0000000..558ce7a
--- /dev/null
+++ b/benchmarks/mycity.zig
@@ -0,0 +1,89 @@
+const std = @import("std");
+const maxminddb = @import("maxminddb");
+
+const default_db_path: []const u8 = "GeoLite2-City.mmdb";
+const default_num_lookups: u64 = 1_000_000;
+
+const MyCity = struct {
+ city: struct {
+ names: struct {
+ en: []const u8 = "",
+ } = .{},
+ } = .{},
+};
+
+pub fn main() !void {
+ const allocator = std.heap.smp_allocator;
+
+ const args = try std.process.argsAlloc(allocator);
+ defer std.process.argsFree(allocator, args);
+
+ var db_path: []const u8 = default_db_path;
+ var num_lookups = default_num_lookups;
+ if (args.len > 1) db_path = args[1];
+ if (args.len > 2) num_lookups = try std.fmt.parseUnsigned(u64, args[2], 10);
+
+ std.debug.print("Benchmarking with:\n", .{});
+ std.debug.print(" Database: {s}\n", .{db_path});
+ std.debug.print(" Lookups: {d}\n", .{num_lookups});
+ std.debug.print("Opening database...\n", .{});
+
+ var open_timer = try std.time.Timer.start();
+ var db = try maxminddb.Reader.mmap(allocator, db_path, .{ .ipv4_index_first_n_bits = 16 });
+ defer db.close();
+ const open_time_ms = @as(f64, @floatFromInt(open_timer.read())) /
+ @as(f64, @floatFromInt(std.time.ns_per_ms));
+ std.debug.print("Database opened successfully in {d} ms. Type: {s}\n", .{
+ open_time_ms,
+ db.metadata.database_type,
+ });
+
+ var arena = std.heap.ArenaAllocator.init(allocator);
+ defer arena.deinit();
+ const arena_allocator = arena.allocator();
+
+ std.debug.print("Starting benchmark...\n", .{});
+ var timer = try std.time.Timer.start();
+ var not_found_count: u64 = 0;
+ var lookup_errors: u64 = 0;
+ var ip_bytes: [4]u8 = undefined;
+
+ for (0..num_lookups) |_| {
+ std.crypto.random.bytes(&ip_bytes);
+ const ip = std.net.Address.initIp4(ip_bytes, 0);
+
+ const result = db.lookup(
+ arena_allocator,
+ MyCity,
+ ip,
+ .{},
+ ) catch |err| {
+ std.debug.print("! Lookup error for IP {any}: {any}\n", .{ ip, err });
+ lookup_errors += 1;
+ continue;
+ };
+ if (result == null) {
+ not_found_count += 1;
+ continue;
+ }
+
+ _ = arena.reset(.retain_capacity);
+ }
+
+ const elapsed_ns = timer.read();
+ const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) /
+ @as(f64, @floatFromInt(std.time.ns_per_s));
+ const lookups_per_second = if (elapsed_s > 0)
+ @as(f64, @floatFromInt(num_lookups)) / elapsed_s
+ else
+ 0.0;
+ const successful_lookups = num_lookups - not_found_count - lookup_errors;
+
+ std.debug.print("\n--- Benchmark Finished ---\n", .{});
+ std.debug.print("Total Lookups Attempted: {d}\n", .{num_lookups});
+ std.debug.print("Successful Lookups: {d}\n", .{successful_lookups});
+ std.debug.print("IPs Not Found: {d}\n", .{not_found_count});
+ std.debug.print("Lookup Errors: {d}\n", .{lookup_errors});
+ std.debug.print("Elapsed Time: {d} s\n", .{elapsed_s});
+ std.debug.print("Lookups Per Second (avg):{d}\n", .{lookups_per_second});
+}
diff --git a/build.zig b/build.zig
index 04cd66b..fc3a1dd 100644
--- a/build.zig
+++ b/build.zig
@@ -30,6 +30,7 @@ pub fn build(b: *std.Build) void {
.{ .file = "examples/within.zig", .name = "example_within" },
.{ .file = "examples/inspect.zig", .name = "example_inspect" },
.{ .file = "benchmarks/lookup.zig", .name = "benchmark_lookup" },
+ .{ .file = "benchmarks/mycity.zig", .name = "benchmark_mycity" },
.{ .file = "benchmarks/inspect.zig", .name = "benchmark_inspect" },
};
diff --git a/examples/inspect.zig b/examples/inspect.zig
index 502e2ff..8d2f970 100644
--- a/examples/inspect.zig
+++ b/examples/inspect.zig
@@ -11,8 +11,8 @@ pub fn main() !void {
const db_path = if (args.len > 1) args[1] else "test-data/test-data/GeoIP2-City-Test.mmdb";
const ip = if (args.len > 2) args[2] else "89.160.20.128";
- var db = try maxminddb.Reader.mmap(allocator, db_path);
- defer db.unmap();
+ var db = try maxminddb.Reader.mmap(allocator, db_path, .{});
+ defer db.close();
const result = try db.lookup(
allocator,
diff --git a/examples/lookup.zig b/examples/lookup.zig
index 39d04c3..a36c155 100644
--- a/examples/lookup.zig
+++ b/examples/lookup.zig
@@ -2,16 +2,14 @@ const std = @import("std");
const maxminddb = @import("maxminddb");
const db_path = "test-data/test-data/GeoIP2-City-Test.mmdb";
-// We expect a DB file not larger than 1 GB.
-const max_db_size: usize = 1024 * 1024 * 1024;
pub fn main() !void {
var gpa: std.heap.DebugAllocator(.{}) = .init;
const allocator = gpa.allocator();
defer _ = gpa.detectLeaks();
- var db = try maxminddb.Reader.open(allocator, db_path, max_db_size);
- defer db.close(allocator);
+ var db = try maxminddb.Reader.open(allocator, db_path, .{});
+ defer db.close();
// Note, for better performance use arena allocator and reset it after calling lookup().
// You won't need to call city.deinit() in that case.
diff --git a/examples/within.zig b/examples/within.zig
index 77fa9a2..42c3815 100644
--- a/examples/within.zig
+++ b/examples/within.zig
@@ -8,8 +8,8 @@ pub fn main() !void {
const allocator = gpa.allocator();
defer _ = gpa.detectLeaks();
- var db = try maxminddb.Reader.mmap(allocator, db_path);
- defer db.unmap();
+ var db = try maxminddb.Reader.mmap(allocator, db_path, .{});
+ defer db.close();
const network = if (db.metadata.ip_version == 4)
maxminddb.Network.all_ipv4
@@ -22,7 +22,6 @@ pub fn main() !void {
// The iterator owns the values; each next() call invalidates the previous item.
var n: usize = 0;
while (try it.next()) |item| {
-
const continent = item.value.continent.code;
const country = item.value.country.iso_code;
var city: []const u8 = "";
diff --git a/src/maxminddb.zig b/src/maxminddb.zig
index 83f773e..8db5363 100644
--- a/src/maxminddb.zig
+++ b/src/maxminddb.zig
@@ -15,6 +15,7 @@ pub const Result = reader.Result;
pub const Metadata = reader.Metadata;
pub const Iterator = reader.Iterator;
pub const Network = net.Network;
+pub const Options = reader.Options;
pub const LookupOptions = reader.LookupOptions;
pub const WithinOptions = reader.WithinOptions;
pub const Map = collection.Map;
@@ -138,8 +139,9 @@ test "GeoLite2 Country" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoLite2-Country-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geolite_country, DatabaseType.new(db.metadata.database_type));
@@ -191,8 +193,9 @@ test "GeoLite2 City" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoLite2-City-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geolite_city, DatabaseType.new(db.metadata.database_type));
@@ -262,8 +265,9 @@ test "GeoLite2 ASN" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoLite2-ASN-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geolite_asn, DatabaseType.new(db.metadata.database_type));
@@ -286,8 +290,9 @@ test "GeoIP2 Country" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-Country-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geoip_country, DatabaseType.new(db.metadata.database_type));
@@ -341,8 +346,9 @@ test "GeoIP2 Country RepresentedCountry" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-Country-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
const ip = try std.net.Address.parseIp("202.196.224.0", 0);
const got = (try db.lookup(allocator, geoip2.Country, ip, .{})).?;
@@ -366,8 +372,9 @@ test "GeoIP2 City" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-City-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geoip_city, DatabaseType.new(db.metadata.database_type));
@@ -450,8 +457,9 @@ test "GeoIP2 Enterprise" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-Enterprise-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geoip_enterprise, DatabaseType.new(db.metadata.database_type));
@@ -549,8 +557,9 @@ test "GeoIP2 ISP" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-ISP-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geoip_isp, DatabaseType.new(db.metadata.database_type));
@@ -573,8 +582,9 @@ test "GeoIP2 Connection-Type" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-Connection-Type-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geoip_connection_type, DatabaseType.new(db.metadata.database_type));
@@ -592,8 +602,9 @@ test "GeoIP2 Anonymous-IP" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-Anonymous-IP-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geoip_anonymous_ip, DatabaseType.new(db.metadata.database_type));
@@ -616,8 +627,9 @@ test "GeoIP Anonymous-Plus" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP-Anonymous-Plus-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geoip_anonymous_plus, DatabaseType.new(db.metadata.database_type));
@@ -639,8 +651,9 @@ test "GeoIP2 DensityIncome" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-DensityIncome-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geoip_densityincome, DatabaseType.new(db.metadata.database_type));
@@ -659,8 +672,9 @@ test "GeoIP2 Domain" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-Domain-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geoip_domain, DatabaseType.new(db.metadata.database_type));
@@ -678,8 +692,9 @@ test "GeoIP2 IP-Risk" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-IP-Risk-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geoip_ip_risk, DatabaseType.new(db.metadata.database_type));
@@ -715,8 +730,9 @@ test "GeoIP2 Static-IP-Score" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-Static-IP-Score-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geoip_static_ip_score, DatabaseType.new(db.metadata.database_type));
@@ -734,8 +750,9 @@ test "GeoIP2 User-Count" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-User-Count-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
try expectEqual(DatabaseType.geoip_user_count, DatabaseType.new(db.metadata.database_type));
@@ -754,8 +771,9 @@ test "lookup with field name filtering" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoLite2-City-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
const ip = try std.net.Address.parseIp("89.160.20.128", 0);
@@ -783,8 +801,9 @@ test "lookup with custom record" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoLite2-City-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
const MyCity = struct {
city: struct {
@@ -807,8 +826,9 @@ test "lookup with any.Value" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoLite2-City-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
const ip = try std.net.Address.parseIp("89.160.20.128", 0);
const got = (try db.lookup(allocator, any.Value, ip, .{})).?;
@@ -829,8 +849,9 @@ test "lookup with any.Value and field name filtering" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoLite2-City-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
const ip = try std.net.Address.parseIp("89.160.20.128", 0);
const got = (try db.lookup(
@@ -857,8 +878,9 @@ test "within returns all networks" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoLite2-City-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
var it = try db.within(allocator, geolite2.City, net.Network.all_ipv6, .{});
defer it.deinit();
@@ -873,8 +895,9 @@ test "within yields record when query prefix is narrower than record network" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoLite2-ASN-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
// 89.160.20.0/24 is inside the /17 record.
// The iterator must still yield it even though the data record is found
@@ -900,8 +923,9 @@ test "within yields record when start node is a data pointer" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
const network = try net.Network.parse("0.0.0.0/0");
var it = try db.within(allocator, any.Value, network, .{});
@@ -919,8 +943,9 @@ test "reject IPv6 on IPv4-only database" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/MaxMind-DB-test-ipv4-32.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
const network = try net.Network.parse("::/0");
const it = db.within(allocator, any.Value, network, .{});
@@ -935,8 +960,9 @@ test "within skips empty records" {
var db = try Reader.mmap(
allocator,
"test-data/test-data/GeoIP2-Anonymous-IP-Test.mmdb",
+ .{},
);
- defer db.unmap();
+ defer db.close();
// All records including empty.
{
diff --git a/src/net.zig b/src/net.zig
index df4967b..4e50f05 100644
--- a/src/net.zig
+++ b/src/net.zig
@@ -150,22 +150,22 @@ pub const IP = union(enum) {
.v4 => |b| {
// Combines IP bytes into a big-endian u32, e.g.,
// 89.160.20.128 = 89 << 24 | 160 << 16 | 20 << 8 | 128
- const ipAsNumber = std.mem.readInt(u32, &b, .big);
+ const ip_as_number = std.mem.readInt(u32, &b, .big);
const ones: u32 = std.math.maxInt(u32);
const bitmask = if (prefix_len == 0) 0 else ones << @intCast(32 - prefix_len);
var out: [4]u8 = undefined;
- std.mem.writeInt(u32, &out, ipAsNumber & bitmask, .big);
+ std.mem.writeInt(u32, &out, ip_as_number & bitmask, .big);
return .{ .v4 = out };
},
.v6 => |b| {
- const ipAsNumber = std.mem.readInt(u128, &b, .big);
+ const ip_as_number = std.mem.readInt(u128, &b, .big);
const ones: u128 = std.math.maxInt(u128);
const bitmask = if (prefix_len == 0) 0 else ones << @intCast(128 - prefix_len);
var out: [16]u8 = undefined;
- std.mem.writeInt(u128, &out, ipAsNumber & bitmask, .big);
+ std.mem.writeInt(u128, &out, ip_as_number & bitmask, .big);
return .{ .v6 = out };
},
diff --git a/src/reader.zig b/src/reader.zig
index 2b2e400..3abd49b 100644
--- a/src/reader.zig
+++ b/src/reader.zig
@@ -11,6 +11,7 @@ pub const ReadError = error{
CorruptedTree,
UnknownRecordSize,
InvalidPrefixLen,
+ IndexAlreadyBuilt,
IPv6AddressInIPv4Database,
};
@@ -32,6 +33,31 @@ pub const Metadata = struct {
const data_section_separator_size = 16;
+// Maximum db size for Reader.open().
+// 64-bit: 20GB covers ~2.3B nodes (record_size=32) with ~2GB data section.
+// 32-bit: 2GB matches the user-space address limit.
+const max_db_size: usize = if (@sizeOf(usize) >= 8)
+ 20 * 1024 * 1024 * 1024
+else
+ 2 * 1024 * 1024 * 1024;
+
+pub const Options = struct {
+ /// Builds an index of the first N bits of IPv4 addresses to speed up lookups,
+ /// but not the within() iterator.
+ ///
+ /// It adds a one-time build cost of ~1-4ms and uses memory proportional to 2^N.
+ /// The first open is slower (~10-120ms) because page faults load the tree from disk.
+ /// Best suited for long-lived Readers with many lookups.
+ ///
+ /// Sparse databases such as Anonymous-IP or ISP benefit more (~70%-140%)
+ /// because tree traversal dominates whereas dense databases (City, Enterprise)
+ /// benefit less (~12%-18%) because record decoding is the bottleneck.
+ ///
+ /// The recommended value is 16 (~320KB, fits L2 cache), or 12 (~20KB) for constrained devices.
+ /// The valid range is between 0 and 24 where 0 disables the index.
+ ipv4_index_first_n_bits: u8 = 0,
+};
+
pub const LookupOptions = struct {
only: ?[]const []const u8 = null,
};
@@ -42,23 +68,31 @@ pub const WithinOptions = struct {
};
pub const Reader = struct {
+ metadata: Metadata,
src: []const u8,
offset: usize,
ipv4_start: usize,
- metadata: Metadata,
- metadata_arena: std.heap.ArenaAllocator,
-
- // Loads a MaxMind DB file into memory.
- pub fn open(allocator: std.mem.Allocator, path: []const u8, max_db_size: usize) !Reader {
- var f = try std.fs.cwd().openFile(path, .{});
- defer f.close();
-
- const src = try f.readToEndAlloc(allocator, max_db_size);
- errdefer allocator.free(src);
-
- var metadata_arena = std.heap.ArenaAllocator.init(allocator);
- errdefer metadata_arena.deinit();
- const metadata = try decodeMetadata(metadata_arena.allocator(), src);
+ // ipv4_index is a flat array of tree node IDs and data offsets
+ // for fast lookup of IPv4 addresses by their first N bits.
+ // Instead of traversing the tree bit by bit from the root,
+ // the first N levels are pre-computed into a direct-access array.
+ ipv4_index_first_n_bits: u8,
+ ipv4_index: ?[]u32,
+ // ipv4_index_prefix_len stores the prefix length at which
+ // each terminal was reached during the index construction.
+ // This lets us return the correct prefix length
+ // without re-traversing the tree for terminal nodes in the index.
+ ipv4_index_prefix_len: ?[]u8,
+ is_mapped: bool,
+ arena: *std.heap.ArenaAllocator,
+
+ fn init(arena: *std.heap.ArenaAllocator, src: []const u8, options: Options) !Reader {
+ const metadata = try decodeMetadata(arena.allocator(), src);
+
+ switch (metadata.record_size) {
+ 24, 28, 32 => {},
+ else => return ReadError.UnknownRecordSize,
+ }
const search_tree_size = try std.math.mul(
usize,
@@ -71,68 +105,76 @@ pub const Reader = struct {
}
var r = Reader{
+ .metadata = metadata,
.src = src,
.offset = data_offset,
.ipv4_start = 0,
- .metadata = metadata,
- .metadata_arena = metadata_arena,
+ .ipv4_index_first_n_bits = options.ipv4_index_first_n_bits,
+ .ipv4_index = null,
+ .ipv4_index_prefix_len = null,
+ .is_mapped = false,
+ .arena = arena,
};
- r.ipv4_start = try r.findIPv4Start();
+ r.setIPv4Start();
+
+ if (r.ipv4_index_first_n_bits > 0) {
+ try r.buildIPv4Index();
+ }
return r;
}
- // Frees the memory occupied by the DB file.
- // From this point all the DB records are unusable because their fields were backed by the same memory.
- // Note, the records still have to be deinited since they might contain arrays or maps.
- pub fn close(self: *Reader, allocator: std.mem.Allocator) void {
- self.metadata_arena.deinit();
- allocator.free(self.src);
+ /// Loads a MaxMind DB file into memory.
+ pub fn open(allocator: std.mem.Allocator, path: []const u8, options: Options) !Reader {
+ var f = try std.fs.cwd().openFile(path, .{});
+ defer f.close();
+
+ const arena = try allocator.create(std.heap.ArenaAllocator);
+ errdefer {
+ arena.deinit();
+ allocator.destroy(arena);
+ }
+ arena.* = std.heap.ArenaAllocator.init(allocator);
+
+ const src = try f.readToEndAlloc(arena.allocator(), max_db_size);
+
+ return try init(arena, src, options);
}
- // Maps a MaxMind DB file into memory.
- pub fn mmap(allocator: std.mem.Allocator, path: []const u8) !Reader {
+ /// Maps a MaxMind DB file into memory.
+ pub fn mmap(allocator: std.mem.Allocator, path: []const u8, options: Options) !Reader {
const src = try memorymap.map(path);
errdefer memorymap.unmap(src);
- var metadata_arena = std.heap.ArenaAllocator.init(allocator);
- errdefer metadata_arena.deinit();
- const metadata = try decodeMetadata(metadata_arena.allocator(), src);
-
- const search_tree_size = try std.math.mul(
- usize,
- metadata.node_count,
- metadata.record_size / 4,
- );
- const data_offset = search_tree_size + data_section_separator_size;
- if (data_offset > src.len) {
- return ReadError.CorruptedTree;
+ const arena = try allocator.create(std.heap.ArenaAllocator);
+ errdefer {
+ arena.deinit();
+ allocator.destroy(arena);
}
+ arena.* = std.heap.ArenaAllocator.init(allocator);
- var r = Reader{
- .src = src,
- .offset = data_offset,
- .ipv4_start = 0,
- .metadata = metadata,
- .metadata_arena = metadata_arena,
- };
-
- r.ipv4_start = try r.findIPv4Start();
+ var r = try init(arena, src, options);
+ r.is_mapped = true;
return r;
}
- // Unmaps the DB file.
- // From this point all the DB records are unusable because their fields were backed by the same memory.
- // Note, the records still have to be deinited since they might contain arrays or maps.
- pub fn unmap(self: *Reader) void {
- self.metadata_arena.deinit();
- memorymap.unmap(self.src);
+ /// Frees the memory occupied by the DB file.
+ /// From this point all the DB records are unusable because their fields were backed by the same memory.
+ /// Note, the records still have to be deinited since they might contain arrays or maps.
+ pub fn close(self: *Reader) void {
+ const allocator = self.arena.child_allocator;
+ self.arena.deinit();
+ allocator.destroy(self.arena);
+
+ if (self.is_mapped) {
+ memorymap.unmap(self.src);
+ }
}
- // Looks up a value by an IP address.
- // The returned Result owns an arena with all decoded allocations.
+ /// Looks up a value by an IP address.
+ /// The returned Result owns an arena with all decoded allocations.
pub fn lookup(
self: *Reader,
allocator: std.mem.Allocator,
@@ -145,13 +187,22 @@ pub const Reader = struct {
return ReadError.IPv6AddressInIPv4Database;
}
- const pointer, const prefix_len = try self.findAddressInTree(ip);
+ var pointer: usize = 0;
+ var prefix_len: usize = 0;
+ if (self.ipv4_index != null and ip == .v4) {
+ pointer, prefix_len = try self.findAddressInTreeWithIndex(ip);
+ } else {
+ const start_node = self.startNode(ip.bitCount());
+ pointer, prefix_len = try self.findAddressInTree(ip, start_node, 0);
+ }
+
if (pointer == 0) {
return null;
}
var arena = std.heap.ArenaAllocator.init(allocator);
errdefer arena.deinit();
+
const value = try self.resolveDataPointerAndDecode(
arena.allocator(),
T,
@@ -166,7 +217,7 @@ pub const Reader = struct {
};
}
- // Iterates over blocks of IP networks.
+ /// Iterates over blocks of IP networks.
pub fn within(
self: *Reader,
allocator: std.mem.Allocator,
@@ -197,7 +248,7 @@ pub const Reader = struct {
var depth: usize = 0;
if (node < node_count) {
while (depth < prefix_len) {
- node = try self.readNode(node, ip_bytes.bitAt(depth));
+ node = self.readNode(node, ip_bytes.bitAt(depth));
depth += 1;
if (node >= node_count) {
break;
@@ -240,6 +291,66 @@ pub const Reader = struct {
return try d.decodeRecord(allocator, Metadata, null);
}
+ fn buildIPv4Index(self: *Reader) !void {
+ if (self.ipv4_index_first_n_bits > 24) {
+ return ReadError.InvalidPrefixLen;
+ }
+ if (self.ipv4_index != null) {
+ return ReadError.IndexAlreadyBuilt;
+ }
+
+ const index_size = std.math.shl(usize, 1, self.ipv4_index_first_n_bits);
+ self.ipv4_index = try self.arena.allocator().alloc(u32, index_size);
+ errdefer self.ipv4_index = null;
+
+ self.ipv4_index_prefix_len = try self.arena.allocator().alloc(u8, index_size);
+ errdefer self.ipv4_index_prefix_len = null;
+
+ self.populateIndex(self.ipv4_start, 0, index_size, 0);
+ }
+
+ // Recursively traverses the first N levels of the search tree and fills the flat index array.
+ // Each index slot corresponds to an N-bit prefix, for example,
+ // slot 0000 covers all IPs starting with 0000.
+ //
+ // The range [start, start+count) tracks which slots belong to the current subtree.
+ // At each level we split in half: left child (0-bit) gets the lower half,
+ // right child (1-bit) gets the upper half.
+ //
+ // This works because the array is indexed by the N-bit prefix as a binary number:
+ // prefixes starting with 0 occupy the lower half of any range,
+ // prefixes starting with 1 occupy the upper half.
+ //
+ // When a node is terminal (data pointer or not-found) before depth N,
+ // we fill all remaining slots in the range with that node because
+ // every IP prefix in that range resolves to the same record.
+ fn populateIndex(
+ self: *Reader,
+ node: usize,
+ start: usize,
+ count: usize,
+ bit_depth: usize,
+ ) void {
+ // If the node is terminal or we've reached the max index depth,
+ // fill the range with this node.
+ if (count == 1 or node >= self.metadata.node_count) {
+ const node_u32: u32 = @intCast(node);
+ const prefix_len: u8 = @intCast(bit_depth);
+
+ @memset(self.ipv4_index.?[start..][0..count], node_u32);
+ @memset(self.ipv4_index_prefix_len.?[start..][0..count], prefix_len);
+
+ return;
+ }
+
+ const half = count / 2;
+ const left_node = self.readNode(node, 0);
+ self.populateIndex(left_node, start, half, bit_depth + 1);
+
+ const right_node = self.readNode(node, 1);
+ self.populateIndex(right_node, start + half, half, bit_depth + 1);
+ }
+
fn resolveDataPointerAndDecode(
self: *Reader,
allocator: std.mem.Allocator,
@@ -282,20 +393,47 @@ pub const Reader = struct {
return d.isEmptyMap();
}
- fn findAddressInTree(self: *Reader, ip: net.IP) !struct { usize, usize } {
- const bit_count = ip.bitCount();
- var node = self.startNode(bit_count);
+ // Uses the IPv4 index for fast lookups.
+ // The index covers the first N bits of the IPv4 address, allowing us to
+ // skip directly to the node at depth N instead of traversing bit by bit.
+ fn findAddressInTreeWithIndex(self: *Reader, ip: net.IP) !struct { usize, usize } {
+ const ip_int = std.mem.readInt(u32, &ip.v4, .big);
+ const index_pos = std.math.shr(usize, ip_int, 32 - self.ipv4_index_first_n_bits);
+
+ const node: usize = self.ipv4_index.?[index_pos];
+
+ // If we hit a terminal at or before bit N of IPv4, return the prefix length
+ // that was stored during index construction.
+ if (node >= self.metadata.node_count) {
+ const prefix_len: usize = self.ipv4_index_prefix_len.?[index_pos];
+ if (node == self.metadata.node_count) {
+ return .{ 0, prefix_len };
+ }
+ return .{ node, prefix_len };
+ }
+ // Continue traversal from where the index ends (bit N of IPv4 portion).
+ return try self.findAddressInTree(ip, node, self.ipv4_index_first_n_bits);
+ }
+
+ fn findAddressInTree(
+ self: *Reader,
+ ip: net.IP,
+ start_node: usize,
+ start_bit: usize,
+ ) !struct { usize, usize } {
+ const stop_bit = ip.bitCount();
const node_count: usize = self.metadata.node_count;
- var prefix_len = bit_count;
- for (0..bit_count) |i| {
+ var node = start_node;
+ var prefix_len = stop_bit;
+ for (start_bit..stop_bit) |i| {
if (node >= node_count) {
prefix_len = i;
break;
}
- node = try self.readNode(node, ip.bitAt(i));
+ node = self.readNode(node, ip.bitAt(i));
}
if (node == node_count) {
@@ -313,26 +451,25 @@ pub const Reader = struct {
return if (length == 128) 0 else self.ipv4_start;
}
- fn findIPv4Start(self: *Reader) !usize {
+ fn setIPv4Start(self: *Reader) void {
if (self.metadata.ip_version != 6) {
- return 0;
+ return;
}
+ const node_count: usize = self.metadata.node_count;
+
// We are looking up an IPv4 address in an IPv6 tree.
// Skip over the first 96 nodes.
var node: usize = 0;
- for (0..96) |_| {
- if (node >= self.metadata.node_count) {
- break;
- }
-
- node = try self.readNode(node, 0);
+ var i: usize = 0;
+ while (i < 96 and node < node_count) : (i += 1) {
+ node = self.readNode(node, 0);
}
- return node;
+ self.ipv4_start = node;
}
- fn readNode(self: *Reader, node_number: usize, index: usize) !usize {
+ fn readNode(self: *Reader, node_number: usize, index: usize) usize {
const src = self.src;
const base_offset: usize = node_number * self.metadata.record_size / 4;
@@ -356,7 +493,7 @@ pub const Reader = struct {
const offset = base_offset + index * 4;
return decoder.toUsize(src[offset .. offset + 4], 0);
},
- else => ReadError.UnknownRecordSize,
+ else => unreachable,
};
}
@@ -530,7 +667,7 @@ pub fn Iterator(T: type) type {
};
} else if (current.node < self.node_count) {
// In order traversal of the children on the right (1-bit).
- var node = try reader.readNode(current.node, 1);
+ var node = reader.readNode(current.node, 1);
var right_ip_bytes = current.ip_bytes;
if (current.prefix_len < bit_count) {
@@ -548,7 +685,7 @@ pub fn Iterator(T: type) type {
});
// In order traversal of the children on the left (0-bit).
- node = try reader.readNode(current.node, 0);
+ node = reader.readNode(current.node, 0);
self.stack.appendAssumeCapacity(WithinNode{
.node = node,
.ip_bytes = current.ip_bytes,