Skip to content
Open
29 changes: 15 additions & 14 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,23 @@ categories = ["algorithms", "data-structures"]

[dependencies]
bitpacking = "0.9.2"
bytecheck = { version = "~0.6.8", default-features = false, optional = true }
num = "0.4.1"
rkyv = { version = "0.7.42", features = ["validation", "strict"], optional = true }
wyhash = "0.5.0"
rkyv = { version = "0.8", optional = true }
wyhash = "0.6"

[dev-dependencies]
bitvec = "1.0.1"
criterion = { version = "0.5.1", features = ["html_reports"] }
criterion = { version = "0.7", features = ["html_reports"] }
paste = "1.0.14"
proptest = "1.4.0"
rand = "0.8.5"
rand_chacha = "0.3.1"
rkyv = { version = "0.7.42", features = ["validation", "strict"] }
rustc-hash = "2"
test-case = "3.3.1"

[features]
default = []
rkyv_derive = ["rkyv", "bytecheck"]
rkyv_derive = ["rkyv"]

[[bench]]
name = "rank"
Expand All @@ -44,6 +43,11 @@ name = "mphf"
harness = false
required-features = ["rkyv_derive"]

[[bench]]
name = "map"
harness = false
required-features = ["rkyv_derive"]

[[bench]]
name = "map_with_dict"
harness = false
Expand All @@ -59,11 +63,8 @@ name = "set"
harness = false
required-features = ["rkyv_derive"]

[profile.bench]
debug = true

[profile.release]
codegen-units = 1
debug = true
lto = "fat"
opt-level = 3
# [profile.release]
# codegen-units = 1
# debug = true
# lto = "fat"
# opt-level = 3
107 changes: 107 additions & 0 deletions benches/map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
use std::{collections::HashMap, env, hint::black_box, time::Instant};

use entropy_map::{ArchivedMap, Map};

use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;

/// Benchmark results for N = 1M:
///
/// Map construction took: 13.64868523s
///
/// Map/HashMap get
/// time: [18.350 ms 18.518 ms 18.707 ms]
/// thrpt: [53.455 Melem/s 54.001 Melem/s 54.496 Melem/s]
///
/// Map/entropy get
/// time: [37.033 ms 37.293 ms 37.613 ms]
/// thrpt: [26.587 Melem/s 26.815 Melem/s 27.003 Melem/s]
///
/// Map/HashMap archived get
/// time: [37.152 ms 37.373 ms 37.712 ms]
/// thrpt: [26.517 Melem/s 26.757 Melem/s 26.917 Melem/s]
///
/// Map rkyv serialization took: 4.447392ms
///
/// Map/entropy archived get
/// time: [40.613 ms 41.039 ms 41.563 ms]
/// thrpt: [24.060 Melem/s 24.367 Melem/s 24.623 Melem/s]
pub fn benchmark(c: &mut Criterion) {
let n: usize = env::var("N").unwrap_or("1000000".to_string()).parse().unwrap();
let query_n: usize = env::var("QN").unwrap_or("1000000".to_string()).parse().unwrap();

let mut rng = ChaCha8Rng::seed_from_u64(123);

let original_map: HashMap<u64, u32> = (0..n)
.map(|_| {
let key = rng.gen::<u64>();
let value = rng.gen::<u32>();
(key, value)
})
.collect();

// created with another hasher so the memory order is different to check random access
let hash_map: HashMap<u64, u32, rustc_hash::FxBuildHasher> = HashMap::from_iter(original_map.clone());

let t0 = Instant::now();
let map: Map<u64, u32, 64, 12, u16> = Map::from_iter_with_params(original_map.clone(), 2.4).unwrap();
println!("Map construction took: {:?}", t0.elapsed());

let mut group = c.benchmark_group("Map");
group.throughput(Throughput::Elements(query_n as u64));

group.bench_function("HashMap get", |b| {
b.iter(|| {
for key in original_map.keys().take(query_n) {
black_box(hash_map.get(key).unwrap());
}
});
});

group.bench_function("entropy get", |b| {
b.iter(|| {
for key in original_map.keys().take(query_n) {
black_box(map.get(key).unwrap());
}
});
});

let rkyv_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&hash_map).unwrap();
let rkyv_hash_map = rkyv::access::<
rkyv::collections::swiss_table::map::ArchivedHashMap<u64, u32>,
rkyv::rancor::Error,
>(&rkyv_bytes)
.unwrap();

group.bench_function("HashMap archived get", |b| {
b.iter(|| {
for key in original_map.keys().take(query_n) {
black_box(rkyv_hash_map.get(key).unwrap());
}
});
});

let t0 = Instant::now();
let rkyv_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&map).unwrap();
println!("Map rkyv serialization took: {:?}", t0.elapsed());

let rkyv_map = rkyv::access::<ArchivedMap<u64, u32, 64, 12, u16>, rkyv::rancor::Error>(&rkyv_bytes).unwrap();

group.bench_function("entropy archived get", |b| {
b.iter(|| {
for key in original_map.keys().take(query_n) {
black_box(rkyv_map.get(key).unwrap());
}
});
});

group.finish();
}

criterion_group! {
name = benches;
config = Criterion::default();
targets = benchmark,
}
criterion_main!(benches);
64 changes: 38 additions & 26 deletions benches/map_with_dict.rs
Original file line number Diff line number Diff line change
@@ -1,67 +1,79 @@
use std::collections::HashMap;
use std::env;
use std::time::Instant;
use std::{collections::HashMap, env, hint::black_box, time::Instant};

use entropy_map::MapWithDict;
use entropy_map::{ArchivedMapWithDict, MapWithDict};

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;

/// Benchmark results for N = 1M:
///
/// map generation took: 55.309498ms
/// map_with_dict construction took: 1.411034205s
/// map_with_dict rkyv serialization took: 8.233451ms
/// MapWithDict construction took: 1.103815976s
///
/// # map_with_dict/get
/// time: [75.423 ms 75.814 ms 76.304 ms]
/// thrpt: [13.106 Melem/s 13.190 Melem/s 13.259 Melem/s]
/// MapWithDict/HashMap get
/// time: [18.856 ms 18.921 ms 18.994 ms]
/// thrpt: [52.650 Melem/s 52.850 Melem/s 53.033 Melem/s]
///
/// # map_with_dict/get-rkyv
/// time: [74.267 ms 74.681 ms 75.225 ms]
/// thrpt: [13.293 Melem/s 13.390 Melem/s 13.465 Melem/s]
/// MapWithDict/entropy get
/// time: [45.107 ms 45.406 ms 45.728 ms]
/// thrpt: [21.868 Melem/s 22.023 Melem/s 22.170 Melem/s]
///
/// MapWithDict rkyv serialization took: 2.496905ms
///
/// MapWithDict/entropy archived get
/// time: [40.738 ms 41.139 ms 41.575 ms]
/// thrpt: [24.053 Melem/s 24.308 Melem/s 24.547 Melem/s]
pub fn benchmark(c: &mut Criterion) {
let n: usize = env::var("N").unwrap_or("1000000".to_string()).parse().unwrap();
let query_n: usize = env::var("QN").unwrap_or("1000000".to_string()).parse().unwrap();

let mut rng = ChaCha8Rng::seed_from_u64(123);

let t0 = Instant::now();
let original_map: HashMap<u64, u32> = (0..n)
.map(|_| {
let key = rng.gen::<u64>();
let value = rng.gen_range(1..=10);
// let value = rng.gen_range(1..=10);
let value = rng.gen::<u32>();
(key, value)
})
.collect();
println!("map generation took: {:?}", t0.elapsed());

// created with another hasher so the memory order is different to check random access
let hash_map: HashMap<u64, u32, rustc_hash::FxBuildHasher> = HashMap::from_iter(original_map.clone());

let t0 = Instant::now();
let map = MapWithDict::try_from(original_map.clone()).expect("failed to build map");
println!("map_with_dict construction took: {:?}", t0.elapsed());
println!("MapWithDict construction took: {:?}", t0.elapsed());

let mut group = c.benchmark_group("map_with_dict");
let mut group = c.benchmark_group("MapWithDict");
group.throughput(Throughput::Elements(query_n as u64));

group.bench_function("get", |b| {
group.bench_function("HashMap get", |b| {
b.iter(|| {
for key in original_map.keys().take(query_n) {
black_box(hash_map.get(key).unwrap());
}
});
});

group.bench_function("entropy get", |b| {
b.iter(|| {
for key in original_map.keys().take(query_n) {
map.get(black_box(key)).unwrap();
black_box(map.get(key).unwrap());
}
});
});

let t0 = Instant::now();
let rkyv_bytes = rkyv::to_bytes::<_, 1024>(&map).unwrap();
println!("map_with_dict rkyv serialization took: {:?}", t0.elapsed());
let rkyv_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&map).unwrap();
println!("MapWithDict rkyv serialization took: {:?}", t0.elapsed());

let rkyv_map = rkyv::check_archived_root::<MapWithDict<u64, u32>>(&rkyv_bytes).unwrap();
let rkyv_map = rkyv::access::<ArchivedMapWithDict<u64, u32>, rkyv::rancor::Error>(&rkyv_bytes).unwrap();

group.bench_function("get-rkyv", |b| {
group.bench_function("entropy archived get", |b| {
b.iter(|| {
for key in original_map.keys().take(query_n) {
rkyv_map.get(black_box(key)).unwrap();
black_box(rkyv_map.get(key).unwrap());
}
});
});
Expand Down
44 changes: 20 additions & 24 deletions benches/map_with_dict_bitpacked.rs
Original file line number Diff line number Diff line change
@@ -1,33 +1,30 @@
use std::collections::HashMap;
use std::env;
use std::time::Instant;
use std::{collections::HashMap, env, hint::black_box, time::Instant};

use entropy_map::MapWithDictBitpacked;
use entropy_map::{ArchivedMapWithDictBitpacked, MapWithDictBitpacked};

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;

/// Benchmark results for N = 1M:
///
/// map generation took: 199.621887ms
/// map_with_dict_bitpacked construction took: 2.36439657s
/// map_with_dict_bitpacked rkyv serialization took: 20.455775ms
/// MapWithDictBitpacked construction took: 1.530962829s
///
/// # map_with_dict_bitpacked/get_values
/// time: [169.36 ms 170.24 ms 171.06 ms]
/// thrpt: [5.8459 Melem/s 5.8740 Melem/s 5.9044 Melem/s]
/// MapWithDictBitpacked/get_values
/// time: [95.556 ms 96.288 ms 97.068 ms]
/// thrpt: [10.302 Melem/s 10.385 Melem/s 10.465 Melem/s]
///
/// # map_with_dict_bitpacked/get_values-rkyv
/// time: [167.92 ms 168.82 ms 169.65 ms]
/// thrpt: [5.8946 Melem/s 5.9233 Melem/s 5.9553 Melem/s]
/// MapWithDictBitpacked rkyv serialization took: 4.85859ms
///
/// MapWithDictBitpacked/archived get_values
/// time: [79.066 ms 79.977 ms 81.002 ms]
/// thrpt: [12.345 Melem/s 12.504 Melem/s 12.648 Melem/s]
pub fn benchmark(c: &mut Criterion) {
let n: usize = env::var("N").unwrap_or("1000000".to_string()).parse().unwrap();
let query_n: usize = env::var("QN").unwrap_or("1000000".to_string()).parse().unwrap();

let mut rng = ChaCha8Rng::seed_from_u64(123);

let t0 = Instant::now();
let mut values_buf = vec![0; 10];
let original_map: HashMap<u64, Vec<u32>> = (0..n)
.map(|_| {
Expand All @@ -36,33 +33,32 @@ pub fn benchmark(c: &mut Criterion) {
(key, value)
})
.collect();
println!("map generation took: {:?}", t0.elapsed());

let t0 = Instant::now();
let map = MapWithDictBitpacked::try_from(original_map.clone()).expect("failed to build map");
println!("map_with_dict_bitpacked construction took: {:?}", t0.elapsed());
println!("MapWithDictBitpacked construction took: {:?}", t0.elapsed());

let mut group = c.benchmark_group("map_with_dict_bitpacked");
let mut group = c.benchmark_group("MapWithDictBitpacked");
group.throughput(Throughput::Elements(query_n as u64));

group.bench_function("get_values", |b| {
b.iter(|| {
for key in original_map.keys().take(query_n) {
map.get_values(black_box(key), &mut values_buf);
black_box(map.get_values(key, &mut values_buf));
}
});
});

let t0 = Instant::now();
let rkyv_bytes = rkyv::to_bytes::<_, 1024>(&map).unwrap();
println!("map_with_dict_bitpacked rkyv serialization took: {:?}", t0.elapsed());
let rkyv_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&map).unwrap();
println!("MapWithDictBitpacked rkyv serialization took: {:?}", t0.elapsed());

let rkyv_map = rkyv::check_archived_root::<MapWithDictBitpacked<u64>>(&rkyv_bytes).unwrap();
let rkyv_map = rkyv::access::<ArchivedMapWithDictBitpacked<u64>, rkyv::rancor::Error>(&rkyv_bytes).unwrap();

group.bench_function("get-rkyv", |b| {
group.bench_function("archived get_values", |b| {
b.iter(|| {
for key in original_map.keys().take(query_n) {
rkyv_map.get_values(black_box(key), &mut values_buf);
black_box(rkyv_map.get_values(key, &mut values_buf));
}
});
});
Expand Down
Loading