diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index fc001afe5a07..b61e06c8a7e6 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -106,11 +106,18 @@ impl Buffer { /// Returns the offset, in bytes, of `Self::ptr` to `Self::data` /// /// self.ptr and self.data can be different after slicing or advancing the buffer. + /// + /// to check if the buffer whether sliced, you can call [`Self::is_sliced`] pub fn ptr_offset(&self) -> usize { // Safety: `ptr` is always in bounds of `data`. unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize } } + /// Returns whether the buffer is sliced (does not point to entire original data) + pub fn is_sliced(&self) -> bool { + self.length != self.data.len() + } + /// Returns the pointer to the start of the buffer without the offset. pub fn data_ptr(&self) -> NonNull { self.data.ptr() @@ -358,10 +365,12 @@ impl Buffer { UnalignedBitChunk::new(self.as_slice(), offset, len).count_ones() } - /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared. - /// Returns `Err` if this is shared or its allocation is from an external source or + /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared or sliced. + /// Returns `Err` if this is shared or the buffer is sliced (you can check if sliced by calling [`Self::is_sliced`]) or its allocation is from an external source or /// it is not allocated with alignment [`ALIGNMENT`] /// + /// If you want to still get MutableBuffer regardless of sliced you can use [`Self::into_mutable_unsliced`] + /// /// # Example: Creating a [`MutableBuffer`] from a [`Buffer`] /// ``` /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; @@ -381,14 +390,56 @@ impl Buffer { /// /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT pub fn into_mutable(self) -> Result { + // Disallow converting sliced buffer into mutable to avoid pitfall of doing a roundtrip from sliced owned buffer will result in a different length: + // ``` + // fn example(owned_sliced_buffer: Buffer) -> Buffer { + // let og_len = owned_sliced_buffer.len(); + // let roundtrip = Buffer::from(owned_sliced_buffer.into_mutable().unwrap()); + // assert_eq!(og_len, roundtrip.len()); // <-- this will have different length for sliced buffer, causing confusion + // } + // ``` + if self.is_sliced() { + return Err(self); + } + self.into_mutable_unsliced() + } + + /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared. + /// This is the same as [`Self::into_mutable`] but allow sliced buffer and will the entire unsliced data as [`MutableBuffer`] + /// + /// when the buffer is sliced, the returned [`MutableBuffer`] will contain the **entire** unsliced data, so calling `Buffer::from` on the resulting [`MutableBuffer`] from this function + /// will be the unsliced buffer + /// ``` + /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; + /// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]); + /// let sliced: Buffer = buffer.slice_with_length(1, 2); + /// drop(buffer); // <- keep `sliced` the only reference + /// + /// assert_eq!(sliced.as_slice(), &[2, 3]); + /// let back = Buffer::from(sliced.into_mutable_unsliced().unwrap()); + /// assert_eq!(back.as_slice(), &[1, 2, 3, 4]); + /// ``` + /// + /// # Example: Creating a [`MutableBuffer`] from a [`Buffer`] + /// ``` + /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; + /// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]); + /// let sliced: Buffer = buffer.slice_with_length(1, 2); + /// drop(buffer); // <- keep `sliced` the only reference + /// assert_eq!(sliced.as_slice(), &[2, 3]); + /// + /// let mutable = sliced.into_mutable_unsliced().unwrap(); // <- this will succeed as the buffer is not shared and is not sliced + /// assert_eq!(mutable.as_slice(), &[1, 2, 3, 4]); + /// + /// let buffer_back = Buffer::from(mutable); + /// assert_eq!(buffer_back.as_slice(), &[1, 2, 3, 4]); + /// ``` + pub fn into_mutable_unsliced(self) -> Result { let ptr = self.ptr; let length = self.length; + Arc::try_unwrap(self.data) - .and_then(|bytes| { - // The pointer of underlying buffer should not be offset. - assert_eq!(ptr, bytes.ptr().as_ptr()); - MutableBuffer::from_bytes(bytes).map_err(Arc::new) - }) + .and_then(|bytes| MutableBuffer::from_bytes(bytes).map_err(Arc::new)) .map_err(|bytes| Buffer { data: bytes, ptr, @@ -1080,4 +1131,78 @@ mod tests { drop(capture); assert_eq!(buffer2.strong_count(), 1); } + + #[test] + fn test_is_sliced() { + let buffer = Buffer::from(&[1, 2, 3, 4]); + assert!(!buffer.is_sliced()); + assert!(!buffer.clone().is_sliced()); + { + let mut advanced = buffer.clone(); + advanced.advance(0); + assert!(!advanced.is_sliced()); + } + { + let mut advanced = buffer.clone(); + advanced.advance(1); + assert!(advanced.is_sliced()); + } + + assert!(!buffer.slice(0).is_sliced()); + assert!(buffer.slice(1).is_sliced()); + + assert!(buffer.slice_with_length(1, 3).is_sliced()); + assert!(!buffer.slice_with_length(0, 4).is_sliced()); + assert!(buffer.slice_with_length(0, 3).is_sliced()); + assert!(buffer.slice_with_length(0, 0).is_sliced()); + } + + #[test] + fn into_mutable_should_return_error_for_sliced_owned_buffer() { + let original_buffer_data = [1_u8, 2, 3, 4, 5, 6, 7, 8]; + for (slice_from, slice_length) in [ + (0, 0), + (original_buffer_data.len(), 0), + (2, 4), + (2, original_buffer_data.len() - 2), + ] { + let buffer = Buffer::from(original_buffer_data); + let sliced = buffer.slice_with_length(slice_from, slice_length); + drop(buffer); // Keep only 1 owner + + assert!(sliced.is_sliced()); + sliced + .into_mutable() + .expect_err("should not convert sliced buffer"); + } + } + + #[test] + fn into_mutable_unsliced_should_return_the_entire_data_regardless_of_slicing() { + let original_buffer_data = [1_u8, 2, 3, 4, 5, 6, 7, 8]; + for (slice_from, slice_length) in [ + (0, 0), + (0, original_buffer_data.len()), + (original_buffer_data.len(), 0), + (2, 4), + (2, original_buffer_data.len() - 2), + ] { + let buffer = Buffer::from(original_buffer_data); + let original_buffer_len = buffer.len(); + let original_data_ptr = buffer.data_ptr(); + let sliced = buffer.slice_with_length(slice_from, slice_length); + drop(buffer); // Keep only 1 owner + + let mutable = sliced + .into_mutable_unsliced() + .expect("should convert to mutable"); + assert_eq!(mutable.len(), original_buffer_len); + let new_buffer = Buffer::from(mutable); + assert_eq!(new_buffer.data_ptr(), original_data_ptr); + assert_eq!(new_buffer.len(), original_buffer_len); + assert!(!new_buffer.is_sliced()); + + assert_eq!(new_buffer.as_slice(), &original_buffer_data); + } + } }