Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 132 additions & 7 deletions arrow-buffer/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,18 @@ impl Buffer {
/// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
///
/// self.ptr and self.data can be different after slicing or advancing the buffer.
///
/// to check if the buffer whether sliced, you can call [`Self::is_sliced`]
pub fn ptr_offset(&self) -> usize {
// Safety: `ptr` is always in bounds of `data`.
unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize }
}

/// Returns whether the buffer is sliced (does not point to entire original data)
pub fn is_sliced(&self) -> bool {
self.length != self.data.len()
}

/// Returns the pointer to the start of the buffer without the offset.
pub fn data_ptr(&self) -> NonNull<u8> {
self.data.ptr()
Expand Down Expand Up @@ -358,10 +365,12 @@ impl Buffer {
UnalignedBitChunk::new(self.as_slice(), offset, len).count_ones()
}

/// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared.
/// Returns `Err` if this is shared or its allocation is from an external source or
/// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared or sliced.
/// Returns `Err` if this is shared or the buffer is sliced (you can check if sliced by calling [`Self::is_sliced`]) or its allocation is from an external source or
/// it is not allocated with alignment [`ALIGNMENT`]
///
/// If you want to still get MutableBuffer regardless of sliced you can use [`Self::into_mutable_unsliced`]
///
/// # Example: Creating a [`MutableBuffer`] from a [`Buffer`]
/// ```
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
Expand All @@ -381,14 +390,56 @@ impl Buffer {
///
/// [`ALIGNMENT`]: crate::alloc::ALIGNMENT
pub fn into_mutable(self) -> Result<MutableBuffer, Self> {
// Disallow converting sliced buffer into mutable to avoid pitfall of doing a roundtrip from sliced owned buffer will result in a different length:
// ```
// fn example(owned_sliced_buffer: Buffer) -> Buffer {
// let og_len = owned_sliced_buffer.len();
// let roundtrip = Buffer::from(owned_sliced_buffer.into_mutable().unwrap());
// assert_eq!(og_len, roundtrip.len()); // <-- this will have different length for sliced buffer, causing confusion
// }
// ```
if self.is_sliced() {
return Err(self);
}
self.into_mutable_unsliced()
}

/// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared.
/// This is the same as [`Self::into_mutable`] but allow sliced buffer and will the entire unsliced data as [`MutableBuffer`]
///
/// when the buffer is sliced, the returned [`MutableBuffer`] will contain the **entire** unsliced data, so calling `Buffer::from` on the resulting [`MutableBuffer`] from this function
/// will be the unsliced buffer
/// ```
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
/// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]);
/// let sliced: Buffer = buffer.slice_with_length(1, 2);
/// drop(buffer); // <- keep `sliced` the only reference
///
/// assert_eq!(sliced.as_slice(), &[2, 3]);
/// let back = Buffer::from(sliced.into_mutable_unsliced().unwrap());
/// assert_eq!(back.as_slice(), &[1, 2, 3, 4]);
/// ```
///
/// # Example: Creating a [`MutableBuffer`] from a [`Buffer`]
/// ```
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
/// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]);
/// let sliced: Buffer = buffer.slice_with_length(1, 2);
/// drop(buffer); // <- keep `sliced` the only reference
/// assert_eq!(sliced.as_slice(), &[2, 3]);
///
/// let mutable = sliced.into_mutable_unsliced().unwrap(); // <- this will succeed as the buffer is not shared and is not sliced
/// assert_eq!(mutable.as_slice(), &[1, 2, 3, 4]);
///
/// let buffer_back = Buffer::from(mutable);
/// assert_eq!(buffer_back.as_slice(), &[1, 2, 3, 4]);
/// ```
pub fn into_mutable_unsliced(self) -> Result<MutableBuffer, Self> {
let ptr = self.ptr;
let length = self.length;

Arc::try_unwrap(self.data)
.and_then(|bytes| {
// The pointer of underlying buffer should not be offset.
assert_eq!(ptr, bytes.ptr().as_ptr());
MutableBuffer::from_bytes(bytes).map_err(Arc::new)
})
.and_then(|bytes| MutableBuffer::from_bytes(bytes).map_err(Arc::new))
.map_err(|bytes| Buffer {
data: bytes,
ptr,
Expand Down Expand Up @@ -1080,4 +1131,78 @@ mod tests {
drop(capture);
assert_eq!(buffer2.strong_count(), 1);
}

#[test]
fn test_is_sliced() {
let buffer = Buffer::from(&[1, 2, 3, 4]);
assert!(!buffer.is_sliced());
assert!(!buffer.clone().is_sliced());
{
let mut advanced = buffer.clone();
advanced.advance(0);
assert!(!advanced.is_sliced());
}
{
let mut advanced = buffer.clone();
advanced.advance(1);
assert!(advanced.is_sliced());
}

assert!(!buffer.slice(0).is_sliced());
assert!(buffer.slice(1).is_sliced());

assert!(buffer.slice_with_length(1, 3).is_sliced());
assert!(!buffer.slice_with_length(0, 4).is_sliced());
assert!(buffer.slice_with_length(0, 3).is_sliced());
assert!(buffer.slice_with_length(0, 0).is_sliced());
}

#[test]
fn into_mutable_should_return_error_for_sliced_owned_buffer() {
let original_buffer_data = [1_u8, 2, 3, 4, 5, 6, 7, 8];
for (slice_from, slice_length) in [
(0, 0),
(original_buffer_data.len(), 0),
(2, 4),
(2, original_buffer_data.len() - 2),
] {
let buffer = Buffer::from(original_buffer_data);
let sliced = buffer.slice_with_length(slice_from, slice_length);
drop(buffer); // Keep only 1 owner

assert!(sliced.is_sliced());
sliced
.into_mutable()
.expect_err("should not convert sliced buffer");
}
}

#[test]
fn into_mutable_unsliced_should_return_the_entire_data_regardless_of_slicing() {
let original_buffer_data = [1_u8, 2, 3, 4, 5, 6, 7, 8];
for (slice_from, slice_length) in [
(0, 0),
(0, original_buffer_data.len()),
(original_buffer_data.len(), 0),
(2, 4),
(2, original_buffer_data.len() - 2),
] {
let buffer = Buffer::from(original_buffer_data);
let original_buffer_len = buffer.len();
let original_data_ptr = buffer.data_ptr();
let sliced = buffer.slice_with_length(slice_from, slice_length);
drop(buffer); // Keep only 1 owner

let mutable = sliced
.into_mutable_unsliced()
.expect("should convert to mutable");
assert_eq!(mutable.len(), original_buffer_len);
let new_buffer = Buffer::from(mutable);
assert_eq!(new_buffer.data_ptr(), original_data_ptr);
assert_eq!(new_buffer.len(), original_buffer_len);
assert!(!new_buffer.is_sliced());

assert_eq!(new_buffer.as_slice(), &original_buffer_data);
}
}
}
Loading