Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
5c47027
added `into_bit_vec` to RsVec (#31)
Cydhra May 11, 2025
4078e88
add example to into_bit_vec
Cydhra May 11, 2025
a075a5b
add `into_parentheses_vec` to `BpTree` (#31)
Cydhra May 11, 2025
0b39146
add `must_use` to `into_parentheses_vec`
Cydhra May 11, 2025
84c6af9
reduced branching in get_bits, improving performance of various data …
Cydhra May 11, 2025
6238ba5
same branching reduction in BitVec::get_bits_unchecked
Cydhra May 11, 2025
7c444de
Quality-of-life API improvements.
arnsholt May 12, 2025
93d45e4
Change From implementations to use methods on their types.
arnsholt May 12, 2025
b571822
revert From<WaveletMatrix> implementation for BitVec
Cydhra May 12, 2025
952efa2
implement From<BpTree> for RsVec
Cydhra May 12, 2025
97ccf05
specify returned bit order of `BitVec::get_bits`
Cydhra May 12, 2025
14eb4cc
add test case for `BitVec::extend`
Cydhra May 12, 2025
752936c
ensure extend only reallocates once, update its documentation
Cydhra May 12, 2025
cd23966
add error section to `split_at` documentation
Cydhra May 12, 2025
7e2b2c7
fix: split_at_unchecked doesn't drop second part from self
Cydhra May 12, 2025
9269caf
fix: split_at_unchecked copies wrong partial limb at the start of the…
Cydhra May 12, 2025
69756b2
fix: if we split in the last limb, or in a single-limb vector, we cou…
Cydhra May 12, 2025
dce7512
add test case for split_at
Cydhra May 12, 2025
90b1558
add test for the checked version of split_at
Cydhra May 12, 2025
b800b03
Fix split_at implementation, and add more tests for splitting.
arnsholt May 13, 2025
0a7dcd5
Early return in split if second half is empty.
arnsholt May 13, 2025
63ef123
cleanup tests, add test for empty vector
Cydhra May 15, 2025
8e8c60f
slight doc adjustment
Cydhra May 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 37 additions & 4 deletions src/bit_vec/fast_rs_vec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,16 +373,43 @@ impl RsVec {
pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
debug_assert!(len <= WORD_SIZE);
let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
if pos % WORD_SIZE + len == WORD_SIZE {
partial_word
} else if pos % WORD_SIZE + len < WORD_SIZE {
partial_word & ((1 << (len % WORD_SIZE)) - 1)
if pos % WORD_SIZE + len <= WORD_SIZE {
partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
} else {
(partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
& 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
}
}

/// Convert the `RsVec` into a [`BitVec`].
/// This consumes the `RsVec`, and discards all meta-data.
/// Since [`RsVec`]s are innately immutable, this conversion is the only way to modify the
/// underlying data.
///
/// # Example
/// ```rust
/// use vers_vecs::{BitVec, RsVec};
///
/// let mut bit_vec = BitVec::new();
/// bit_vec.append_word(u64::MAX);
///
/// let rs_vec = RsVec::from_bit_vec(bit_vec);
/// assert_eq!(rs_vec.rank1(64), 64);
///
/// let mut bit_vec = rs_vec.into_bit_vec();
/// bit_vec.flip_bit(32);
/// let rs_vec = RsVec::from_bit_vec(bit_vec);
/// assert_eq!(rs_vec.rank1(64), 63);
/// assert_eq!(rs_vec.select0(0), 32);
/// ```
#[must_use]
pub fn into_bit_vec(self) -> BitVec {
BitVec {
data: self.data,
len: self.len,
}
}

/// Check if two `RsVec`s are equal. For sparse vectors (either sparsely filled with 1-bits or
/// 0-bits), this is faster than comparing the vectors bit by bit.
/// Choose the value of `ZERO` depending on which bits are more sparse.
Expand Down Expand Up @@ -515,6 +542,12 @@ impl From<BitVec> for RsVec {
}
}

impl From<RsVec> for BitVec {
fn from(value: RsVec) -> Self {
value.into_bit_vec()
}
}

// iter code in here to keep it more organized
mod iter;
// select code in here to keep it more organized
Expand Down
124 changes: 120 additions & 4 deletions src/bit_vec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,26 @@ impl BitVec {
self.len += len;
}

/// Append the bits of another bit vector to the end of this vector.
/// If this vector does not contain a multiple of 64 bits, the appended limbs need to be
/// shifted to the left.
/// This function is guaranteed to reallocate the underlying vector at most once.
pub fn extend_bitvec(&mut self, other: &Self) {
// reserve space for the new bits, ensuring at most one re-allocation
self.data
.reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());

let full_limbs = other.len() / WORD_SIZE;
for i in 0..full_limbs {
self.append_bits(other.data[i], WORD_SIZE);
}

let partial_bits = other.len % WORD_SIZE;
if partial_bits > 0 {
self.append_bits(other.data[full_limbs], partial_bits);
}
}

/// Return the length of the bit vector. The length is measured in bits.
#[must_use]
pub fn len(&self) -> usize {
Expand Down Expand Up @@ -912,6 +932,9 @@ impl BitVec {
/// If the position at the end of the query is larger than the length of the vector,
/// None is returned (even if the query partially overlaps with the vector).
/// If the length of the query is larger than 64, None is returned.
///
/// The first bit at `pos` is the most significant bit of the return value
/// limited to `len` bits.
#[must_use]
pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
if len > WORD_SIZE || len == 0 {
Expand Down Expand Up @@ -948,10 +971,8 @@ impl BitVec {
pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
debug_assert!(len <= WORD_SIZE);
let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
if pos % WORD_SIZE + len == WORD_SIZE {
partial_word
} else if pos % WORD_SIZE + len < WORD_SIZE {
partial_word & ((1 << (len % WORD_SIZE)) - 1)
if pos % WORD_SIZE + len <= WORD_SIZE {
partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
} else {
(partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
& 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
Expand Down Expand Up @@ -1194,6 +1215,85 @@ impl BitVec {
pub fn heap_size(&self) -> usize {
self.data.len() * size_of::<u64>()
}

/// Split the vector in two at the specified index. The left half contains bits `0..at` and the
/// right half the remaining bits `at..`. If the split index is larger than the length of the
/// vector, the vector is returned unmodified in an `Err` variant.
///
/// # Errors
/// If the index is out of bounds, the function will return an error
/// containing the original vector.
///
/// See also: [`split_at_unchecked`]
pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
if at > self.len {
Err(self)
} else {
Ok(self.split_at_unchecked(at))
}
}

/// Split the vector in two at the specified index. The left half contains bits `0..at` and the
/// right half the remaining bits `at..`.
///
/// # Panics
/// If the index is larger than the length of the vector the function will panic or run
/// out of memory.
/// Use [`split_at`] to properly handle this case.
#[must_use]
pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
let other_len = self.len - at;
let mut other = Self::with_capacity(other_len);

if other_len == 0 {
return (self, other);
}

let first_limb = at / WORD_SIZE;
let last_limb = self.len / WORD_SIZE;

// First, we figure out the number of bits from the first limb to retain in this vector:
let leading_partial = at % WORD_SIZE;

// If the split point is in the last limb, and the vector ends before the last bit, first_limb
// and last_limb will be equal, and the other half is simply other_len bits off the limb
// right shifted by the number of bits to retain in this vector.
if first_limb == last_limb {
other.append_bits_unchecked(self.data[first_limb] >> leading_partial, other_len);
} else {
// Otherwise, some range n..last_limb should be copied in their entirety to the other half,
// with n=first_limb+1 if the split point is inside the first limb (leading_partial > 0), or
// n=first_limb if the entire first limb belongs in the other half.
let full_limbs = if leading_partial > 0 {
// If the split point is inside the first limb, we also have to remember to copy over
// the trailing bits to the new vector.
other.append_bits_unchecked(
self.data[first_limb] >> leading_partial,
WORD_SIZE - leading_partial,
);
first_limb + 1..last_limb
} else {
first_limb..last_limb
};

// Copy over any full limbs.
for i in full_limbs {
other.append_bits_unchecked(self.data[i], WORD_SIZE);
}

// Finally, if the vector has a partially filled last limb, we need to put those bits
// in the other half.
let trailing_partial = self.len % WORD_SIZE;
if trailing_partial > 0 {
other.append_bits_unchecked(self.data[last_limb], trailing_partial);
}
}

// remove the copied bits from the original vector
self.drop_last(other_len);

(self, other)
}
}

impl_vector_iterator! { BitVec, BitVecIter, BitVecRefIter }
Expand All @@ -1218,6 +1318,22 @@ impl From<Vec<u64>> for BitVec {
}
}

impl Extend<BitVec> for BitVec {
fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
for v in iter {
self.extend_bitvec(&v)
}
}
}

impl<'t> Extend<&'t BitVec> for BitVec {
fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
for v in iter {
self.extend_bitvec(v)
}
}
}

/// Create a new bit vector from u64 values.
/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
/// The function will append the bits of each element to the bit vector in the order they are
Expand Down
Loading