From 180774562677bd500df8149b65c79475acd5cb14 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 11 Jun 2026 17:38:04 +0300 Subject: [PATCH 1/5] fix: disallow sliced buffer regardless of sliced direction in `Buffer::into_mutable` --- arrow-buffer/src/buffer/immutable.rs | 148 ++++++++++++++++++++++++--- 1 file changed, 136 insertions(+), 12 deletions(-) diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index fc001afe5a07..0d4ba99e648c 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -106,11 +106,18 @@ impl Buffer { /// Returns the offset, in bytes, of `Self::ptr` to `Self::data` /// /// self.ptr and self.data can be different after slicing or advancing the buffer. + /// + /// to check if the buffer whether sliced, you can call [`Self::is_sliced`] pub fn ptr_offset(&self) -> usize { // Safety: `ptr` is always in bounds of `data`. unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize } } + /// Returns whether the buffer is sliced (does not point to entire original data) + pub fn is_sliced(&self) -> bool { + self.length != self.data.len() + } + /// Returns the pointer to the start of the buffer without the offset. pub fn data_ptr(&self) -> NonNull { self.data.ptr() @@ -358,10 +365,12 @@ impl Buffer { UnalignedBitChunk::new(self.as_slice(), offset, len).count_ones() } - /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared. - /// Returns `Err` if this is shared or its allocation is from an external source or + /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared or sliced. + /// Returns `Err` if this is shared or the buffer is sliced (you can check if sliced by calling [`Self::is_sliced`]) or its allocation is from an external source or /// it is not allocated with alignment [`ALIGNMENT`] /// + /// If you want to still get MutableBuffer regardless of sliced you can use [`Self::into_mutable_unsliced`] + /// /// # Example: Creating a [`MutableBuffer`] from a [`Buffer`] /// ``` /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; @@ -381,19 +390,62 @@ impl Buffer { /// /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT pub fn into_mutable(self) -> Result { + // Disallow converting sliced buffer into mutable to avoid pitfall of doing a roundtrip from sliced owned buffer will result in a different length: + // ``` + // fn example(owned_sliced_buffer: Buffer) -> Buffer { + // let og_len = owned_sliced_buffer.len(); + // let roundtrip = Buffer::from(owned_sliced_buffer.into_mutable().unwrap()); + // assert_eq!(og_len, roundtrip.len()); // <-- this will have different length for sliced buffer, causing confusion + // } + // ``` + if self.is_sliced() { + return Err(self) + } + // SAFETY: we validated that the buffer is not sliced + self.into_mutable_unsliced() + } + + /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared. + /// This is the same as [`Self::into_mutable`] but allow sliced buffer and will the entire unsliced data as [`MutableBuffer`] + /// + /// when the buffer is sliced, the returned [`MutableBuffer`] will contain the **entire** unsliced data, so calling `Buffer::from` on the resulting [`MutableBuffer`] from this function + /// will be the unsliced buffer + /// ``` + /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; + /// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]); + /// let sliced: Buffer = buffer.slice_with_length(1, 2); + /// drop(buffer) // <- keep `sliced` the only reference + /// + /// assert_eq!(sliced.as_slice(), &[2, 3]); + /// let back = Buffer::from(sliced.into_mutable_unsliced().unwrap()) + /// assert_eq!(back.as_slice(), &[1, 2, 3, 4]); + /// ``` + /// + /// # Example: Creating a [`MutableBuffer`] from a [`Buffer`] + /// ``` + /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; + /// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]); + /// let sliced: Buffer = buffer.slice_with_length(1, 2); + /// drop(buffer) // <- keep `sliced` the only reference + /// assert_eq!(sliced.as_slice(), &[2, 3]); + /// + /// let mutable = sliced.into_mutable_unsliced().unwrap(); // <- this will succeed as the buffer is not shared and is not sliced + /// assert_eq!(mutable.as_slice(), &[1, 2, 3, 4]); + /// + /// let buffer_back = Buffer::from(mutable); + /// assert_eq!(buffer.as_slice(), &[1, 2, 3, 4]); + /// ``` + pub fn into_mutable_unsliced(self) -> Result { let ptr = self.ptr; let length = self.length; + Arc::try_unwrap(self.data) - .and_then(|bytes| { - // The pointer of underlying buffer should not be offset. - assert_eq!(ptr, bytes.ptr().as_ptr()); - MutableBuffer::from_bytes(bytes).map_err(Arc::new) - }) - .map_err(|bytes| Buffer { - data: bytes, - ptr, - length, - }) + .and_then(|bytes| MutableBuffer::from_bytes(bytes).map_err(Arc::new)) + .map_err(|bytes| Buffer { + data: bytes, + ptr, + length, + }) } /// Converts self into a `Vec`, if possible. @@ -1080,4 +1132,76 @@ mod tests { drop(capture); assert_eq!(buffer2.strong_count(), 1); } + + #[test] + fn test_is_sliced() { + let buffer = Buffer::from(&[1, 2, 3, 4]); + assert!(!buffer.is_sliced()); + assert!(!buffer.clone().is_sliced()); + { + let mut advanced = buffer.clone(); + advanced.advance(0); + assert!(!advanced.is_sliced()); + } + { + let mut advanced = buffer.clone(); + advanced.advance(1); + assert!(advanced.is_sliced()); + } + + assert!(!buffer.slice(0).is_sliced()); + assert!(buffer.slice(1).is_sliced()); + + assert!(buffer.slice_with_length(1, 3).is_sliced()); + assert!(!buffer.slice_with_length(0, 4).is_sliced()); + assert!(buffer.slice_with_length(0, 3).is_sliced()); + assert!(buffer.slice_with_length(0, 0).is_sliced()); + } + + #[test] + fn into_mutable_should_return_error_for_sliced_owned_buffer() { + let original_buffer_data = [1_u8, 2, 3, 4, 5, 6, 7, 8]; + for (slice_from, slice_length) in [ + (0, 0), + (original_buffer_data.len(), 0), + (2, 4), + (2, original_buffer_data.len() - 2), + ] { + let buffer = Buffer::from(original_buffer_data); + let original_buffer_len = buffer.len(); + let original_data_ptr = buffer.data_ptr(); + let sliced = buffer.slice_with_length(slice_from, slice_length); + drop(buffer); // Keep only 1 owner + + assert!(buffer.is_sliced()); + sliced.into_mutable().expect_err("should not convert sliced buffer"); + } + } + + #[test] + fn into_mutable_unsliced_should_return_the_entire_data_regardless_of_slicing() { + let original_buffer_data = [1_u8, 2, 3, 4, 5, 6, 7, 8]; + for (slice_from, slice_length) in [ + (0, 0), + (0, original_buffer_data.len()), + (original_buffer_data.len(), 0), + (2, 4), + (2, original_buffer_data.len() - 2), + ] { + let buffer = Buffer::from(original_buffer_data); + let original_buffer_len = buffer.len(); + let original_data_ptr = buffer.data_ptr(); + let sliced = buffer.slice_with_length(slice_from, slice_length); + drop(buffer); // Keep only 1 owner + + let mutable = sliced.into_mutable_unsliced().expect("should convert to mutable"); + assert_eq!(mutable.len(), original_buffer_len); + let new_buffer = Buffer::from(mutable); + assert_eq!(new_buffer.data_ptr(), original_data_ptr); + assert_eq!(new_buffer.len(), original_buffer_len); + assert!(!new_buffer.is_sliced()); + + assert_eq!(new_buffer.as_slice(), &original_buffer_data); + } + } } From b147a0bed141e6b087369fe055e1a3843b28b572 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 11 Jun 2026 17:49:28 +0300 Subject: [PATCH 2/5] lint + format --- arrow-buffer/src/buffer/immutable.rs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index 0d4ba99e648c..b7e916d467c1 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -399,7 +399,7 @@ impl Buffer { // } // ``` if self.is_sliced() { - return Err(self) + return Err(self); } // SAFETY: we validated that the buffer is not sliced self.into_mutable_unsliced() @@ -440,12 +440,12 @@ impl Buffer { let length = self.length; Arc::try_unwrap(self.data) - .and_then(|bytes| MutableBuffer::from_bytes(bytes).map_err(Arc::new)) - .map_err(|bytes| Buffer { - data: bytes, - ptr, - length, - }) + .and_then(|bytes| MutableBuffer::from_bytes(bytes).map_err(Arc::new)) + .map_err(|bytes| Buffer { + data: bytes, + ptr, + length, + }) } /// Converts self into a `Vec`, if possible. @@ -1174,7 +1174,9 @@ mod tests { drop(buffer); // Keep only 1 owner assert!(buffer.is_sliced()); - sliced.into_mutable().expect_err("should not convert sliced buffer"); + sliced + .into_mutable() + .expect_err("should not convert sliced buffer"); } } @@ -1194,7 +1196,9 @@ mod tests { let sliced = buffer.slice_with_length(slice_from, slice_length); drop(buffer); // Keep only 1 owner - let mutable = sliced.into_mutable_unsliced().expect("should convert to mutable"); + let mutable = sliced + .into_mutable_unsliced() + .expect("should convert to mutable"); assert_eq!(mutable.len(), original_buffer_len); let new_buffer = Buffer::from(mutable); assert_eq!(new_buffer.data_ptr(), original_data_ptr); From a10d7c9106b3df812eab687ebeb31030970a0ee4 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 11 Jun 2026 17:51:10 +0300 Subject: [PATCH 3/5] remove comment --- arrow-buffer/src/buffer/immutable.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index b7e916d467c1..f3eaa5e36962 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -401,7 +401,6 @@ impl Buffer { if self.is_sliced() { return Err(self); } - // SAFETY: we validated that the buffer is not sliced self.into_mutable_unsliced() } From ed6bfe9521838a237396821aa0999a49d1994688 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 11 Jun 2026 18:11:43 +0300 Subject: [PATCH 4/5] format and lint --- arrow-buffer/src/buffer/immutable.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index f3eaa5e36962..6fabd5b9bdb4 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -1167,12 +1167,10 @@ mod tests { (2, original_buffer_data.len() - 2), ] { let buffer = Buffer::from(original_buffer_data); - let original_buffer_len = buffer.len(); - let original_data_ptr = buffer.data_ptr(); let sliced = buffer.slice_with_length(slice_from, slice_length); drop(buffer); // Keep only 1 owner - assert!(buffer.is_sliced()); + assert!(sliced.is_sliced()); sliced .into_mutable() .expect_err("should not convert sliced buffer"); From cb382dfff4b2532df6c01266b4044195bbf0bc7d Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 11 Jun 2026 19:00:43 +0300 Subject: [PATCH 5/5] fix doc --- arrow-buffer/src/buffer/immutable.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index 6fabd5b9bdb4..b61e06c8a7e6 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -413,10 +413,10 @@ impl Buffer { /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; /// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]); /// let sliced: Buffer = buffer.slice_with_length(1, 2); - /// drop(buffer) // <- keep `sliced` the only reference + /// drop(buffer); // <- keep `sliced` the only reference /// /// assert_eq!(sliced.as_slice(), &[2, 3]); - /// let back = Buffer::from(sliced.into_mutable_unsliced().unwrap()) + /// let back = Buffer::from(sliced.into_mutable_unsliced().unwrap()); /// assert_eq!(back.as_slice(), &[1, 2, 3, 4]); /// ``` /// @@ -425,14 +425,14 @@ impl Buffer { /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; /// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]); /// let sliced: Buffer = buffer.slice_with_length(1, 2); - /// drop(buffer) // <- keep `sliced` the only reference + /// drop(buffer); // <- keep `sliced` the only reference /// assert_eq!(sliced.as_slice(), &[2, 3]); /// /// let mutable = sliced.into_mutable_unsliced().unwrap(); // <- this will succeed as the buffer is not shared and is not sliced /// assert_eq!(mutable.as_slice(), &[1, 2, 3, 4]); /// /// let buffer_back = Buffer::from(mutable); - /// assert_eq!(buffer.as_slice(), &[1, 2, 3, 4]); + /// assert_eq!(buffer_back.as_slice(), &[1, 2, 3, 4]); /// ``` pub fn into_mutable_unsliced(self) -> Result { let ptr = self.ptr;