From 3efc97240efe0b56bd8de8a6921d4c0a0096749b Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 11 Jun 2026 13:57:12 +0300 Subject: [PATCH 1/4] wip --- arrow-buffer/src/buffer/immutable.rs | 47 ++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index fc001afe5a07..12ad671c1cc3 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -106,11 +106,18 @@ impl Buffer { /// Returns the offset, in bytes, of `Self::ptr` to `Self::data` /// /// self.ptr and self.data can be different after slicing or advancing the buffer. + /// + /// to check if the buffer is sliced you can call [`Self::is_sliced`] pub fn ptr_offset(&self) -> usize { // Safety: `ptr` is always in bounds of `data`. unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize } } + /// Returns whether the buffer is sliced + pub fn is_sliced(&self) -> bool { + self.ptr != self.data.as_ptr() || self.length != self.data.len() + } + /// Returns the pointer to the start of the buffer without the offset. pub fn data_ptr(&self) -> NonNull { self.data.ptr() @@ -362,6 +369,10 @@ impl Buffer { /// Returns `Err` if this is shared or its allocation is from an external source or /// it is not allocated with alignment [`ALIGNMENT`] /// + /// If the buffer is sliced, the returned [`MutableBuffer`] will be a view of the original buffer. + /// + /// you can check if the buffer is sliced by + /// /// # Example: Creating a [`MutableBuffer`] from a [`Buffer`] /// ``` /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; @@ -382,18 +393,20 @@ impl Buffer { /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT pub fn into_mutable(self) -> Result { let ptr = self.ptr; + let data_ptr = self.data_ptr(); let length = self.length; + Arc::try_unwrap(self.data) - .and_then(|bytes| { - // The pointer of underlying buffer should not be offset. - assert_eq!(ptr, bytes.ptr().as_ptr()); - MutableBuffer::from_bytes(bytes).map_err(Arc::new) - }) - .map_err(|bytes| Buffer { - data: bytes, - ptr, - length, - }) + .and_then(|bytes| { + // The pointer of underlying buffer should be the same + assert_eq!(data_ptr, bytes.ptr()); + MutableBuffer::from_bytes(bytes).map_err(Arc::new) + }) + .map_err(|bytes| Buffer { + data: bytes, + ptr, + length, + }) } /// Converts self into a `Vec`, if possible. @@ -1080,4 +1093,18 @@ mod tests { drop(capture); assert_eq!(buffer2.strong_count(), 1); } + + // test cases + // | offset | ptr_length | data_length | result | + // | ------ | ---------- | ----------- | ------ | + // | 0 | 10 | 10 | false | + // | 0 | 8 | 10 | true | + // | 1 | 8 | 10 | true | + // | 1 | 9 | 10 | true | + // + // + #[test] + fn test_is_sliced_should_return_false() { + + } } From b0a5fb1d459ea0ec09bc87b9b13633363a5d1d1d Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 11 Jun 2026 14:29:54 +0300 Subject: [PATCH 2/4] feat(buffer): allow `Buffer::into_mutable` to return for sliced buffer from the middle and add `Buffer::is_sliced` helper before, `Buffer::into_mutable` would panic if you pass it `Buffer` that was offseted from the underlying bytes, but it would be ok for sliced `Buffer` that start at the same position but ends before the data itself --- arrow-buffer/src/buffer/immutable.rs | 63 +++++++++++++++++++++------- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index 12ad671c1cc3..4aa1b00e0a66 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -113,9 +113,9 @@ impl Buffer { unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize } } - /// Returns whether the buffer is sliced + /// Returns whether the buffer is sliced (does not point to entire original data) pub fn is_sliced(&self) -> bool { - self.ptr != self.data.as_ptr() || self.length != self.data.len() + self.length != self.data.len() } /// Returns the pointer to the start of the buffer without the offset. @@ -369,9 +369,10 @@ impl Buffer { /// Returns `Err` if this is shared or its allocation is from an external source or /// it is not allocated with alignment [`ALIGNMENT`] /// - /// If the buffer is sliced, the returned [`MutableBuffer`] will be a view of the original buffer. + /// If the buffer is sliced, the returned [`MutableBuffer`] will be a view of the original buffer + /// (include values outside the sliced data). /// - /// you can check if the buffer is sliced by + /// you can check if the buffer is sliced by calling [`Self::is_sliced`] /// /// # Example: Creating a [`MutableBuffer`] from a [`Buffer`] /// ``` @@ -1094,17 +1095,49 @@ mod tests { assert_eq!(buffer2.strong_count(), 1); } - // test cases - // | offset | ptr_length | data_length | result | - // | ------ | ---------- | ----------- | ------ | - // | 0 | 10 | 10 | false | - // | 0 | 8 | 10 | true | - // | 1 | 8 | 10 | true | - // | 1 | 9 | 10 | true | - // - // #[test] - fn test_is_sliced_should_return_false() { - + fn test_is_sliced() { + let buffer = Buffer::from(&[1, 2, 3, 4]); + assert!(!buffer.is_sliced()); + assert!(!buffer.clone().is_sliced()); + { + let mut advanced = buffer.clone(); + advanced.advance(0); + assert!(!advanced.is_sliced()); + } + { + let mut advanced = buffer.clone(); + advanced.advance(1); + assert!(advanced.is_sliced()); + } + + assert!(!buffer.slice(0).is_sliced()); + assert!(buffer.slice(1).is_sliced()); + + assert!(buffer.slice_with_length(1, 3).is_sliced()); + assert!(!buffer.slice_with_length(0, 4).is_sliced()); + assert!(buffer.slice_with_length(0, 3).is_sliced()); + assert!(buffer.slice_with_length(0, 0).is_sliced()); + } + + #[test] + fn into_mutable_should_return_the_entire_data_regardless_of_slicing() { + let original_buffer_data = [1_u8, 2, 3, 4, 5, 6, 7, 8]; + for (slice_from, slice_length) in [(0, 0), (0, original_buffer_data.len()), (original_buffer_data.len(), 0), (2, 4), (2, original_buffer_data.len() - 2)] { + let buffer = Buffer::from(original_buffer_data.clone()); + let original_buffer_len = buffer.len(); + let original_data_ptr = buffer.data_ptr(); + let sliced = buffer.slice_with_length(slice_from, slice_length); + drop(buffer); // Keep only 1 owner + + let mutable = sliced.into_mutable().expect("should convert to mutable"); + assert_eq!(mutable.len(), original_buffer_len); + let new_buffer = Buffer::from(mutable); + assert_eq!(new_buffer.data_ptr(), original_data_ptr); + assert_eq!(new_buffer.len(), original_buffer_len); + assert!(!new_buffer.is_sliced()); + + assert_eq!(new_buffer.as_slice(), &original_buffer_data); + } } } From 8c2ec6ad1ba98894831f116d6e9d50bfec85aa18 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 11 Jun 2026 14:38:26 +0300 Subject: [PATCH 3/4] format and lint --- arrow-buffer/src/buffer/immutable.rs | 108 ++++++++++++++------------- 1 file changed, 57 insertions(+), 51 deletions(-) diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index 4aa1b00e0a66..56e3c0a4c9b8 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -113,7 +113,7 @@ impl Buffer { unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize } } - /// Returns whether the buffer is sliced (does not point to entire original data) + /// Returns whether the buffer is sliced (does not point to entire original data) pub fn is_sliced(&self) -> bool { self.length != self.data.len() } @@ -398,16 +398,16 @@ impl Buffer { let length = self.length; Arc::try_unwrap(self.data) - .and_then(|bytes| { - // The pointer of underlying buffer should be the same - assert_eq!(data_ptr, bytes.ptr()); - MutableBuffer::from_bytes(bytes).map_err(Arc::new) - }) - .map_err(|bytes| Buffer { - data: bytes, - ptr, - length, - }) + .and_then(|bytes| { + // The pointer of underlying buffer should be the same + assert_eq!(data_ptr, bytes.ptr()); + MutableBuffer::from_bytes(bytes).map_err(Arc::new) + }) + .map_err(|bytes| Buffer { + data: bytes, + ptr, + length, + }) } /// Converts self into a `Vec`, if possible. @@ -1097,47 +1097,53 @@ mod tests { #[test] fn test_is_sliced() { - let buffer = Buffer::from(&[1, 2, 3, 4]); - assert!(!buffer.is_sliced()); - assert!(!buffer.clone().is_sliced()); - { - let mut advanced = buffer.clone(); - advanced.advance(0); - assert!(!advanced.is_sliced()); - } - { - let mut advanced = buffer.clone(); - advanced.advance(1); - assert!(advanced.is_sliced()); - } - - assert!(!buffer.slice(0).is_sliced()); - assert!(buffer.slice(1).is_sliced()); - - assert!(buffer.slice_with_length(1, 3).is_sliced()); - assert!(!buffer.slice_with_length(0, 4).is_sliced()); - assert!(buffer.slice_with_length(0, 3).is_sliced()); - assert!(buffer.slice_with_length(0, 0).is_sliced()); - } - + let buffer = Buffer::from(&[1, 2, 3, 4]); + assert!(!buffer.is_sliced()); + assert!(!buffer.clone().is_sliced()); + { + let mut advanced = buffer.clone(); + advanced.advance(0); + assert!(!advanced.is_sliced()); + } + { + let mut advanced = buffer.clone(); + advanced.advance(1); + assert!(advanced.is_sliced()); + } + + assert!(!buffer.slice(0).is_sliced()); + assert!(buffer.slice(1).is_sliced()); + + assert!(buffer.slice_with_length(1, 3).is_sliced()); + assert!(!buffer.slice_with_length(0, 4).is_sliced()); + assert!(buffer.slice_with_length(0, 3).is_sliced()); + assert!(buffer.slice_with_length(0, 0).is_sliced()); + } + #[test] fn into_mutable_should_return_the_entire_data_regardless_of_slicing() { - let original_buffer_data = [1_u8, 2, 3, 4, 5, 6, 7, 8]; - for (slice_from, slice_length) in [(0, 0), (0, original_buffer_data.len()), (original_buffer_data.len(), 0), (2, 4), (2, original_buffer_data.len() - 2)] { - let buffer = Buffer::from(original_buffer_data.clone()); - let original_buffer_len = buffer.len(); - let original_data_ptr = buffer.data_ptr(); - let sliced = buffer.slice_with_length(slice_from, slice_length); - drop(buffer); // Keep only 1 owner - - let mutable = sliced.into_mutable().expect("should convert to mutable"); - assert_eq!(mutable.len(), original_buffer_len); - let new_buffer = Buffer::from(mutable); - assert_eq!(new_buffer.data_ptr(), original_data_ptr); - assert_eq!(new_buffer.len(), original_buffer_len); - assert!(!new_buffer.is_sliced()); - - assert_eq!(new_buffer.as_slice(), &original_buffer_data); - } + let original_buffer_data = [1_u8, 2, 3, 4, 5, 6, 7, 8]; + for (slice_from, slice_length) in [ + (0, 0), + (0, original_buffer_data.len()), + (original_buffer_data.len(), 0), + (2, 4), + (2, original_buffer_data.len() - 2), + ] { + let buffer = Buffer::from(original_buffer_data.clone()); + let original_buffer_len = buffer.len(); + let original_data_ptr = buffer.data_ptr(); + let sliced = buffer.slice_with_length(slice_from, slice_length); + drop(buffer); // Keep only 1 owner + + let mutable = sliced.into_mutable().expect("should convert to mutable"); + assert_eq!(mutable.len(), original_buffer_len); + let new_buffer = Buffer::from(mutable); + assert_eq!(new_buffer.data_ptr(), original_data_ptr); + assert_eq!(new_buffer.len(), original_buffer_len); + assert!(!new_buffer.is_sliced()); + + assert_eq!(new_buffer.as_slice(), &original_buffer_data); + } } } From 173dba5471a17e4944b7468842f819a06e8ff85f Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 11 Jun 2026 14:41:26 +0300 Subject: [PATCH 4/4] remove clone --- arrow-buffer/src/buffer/immutable.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index 56e3c0a4c9b8..0bbe3781585e 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -1130,7 +1130,7 @@ mod tests { (2, 4), (2, original_buffer_data.len() - 2), ] { - let buffer = Buffer::from(original_buffer_data.clone()); + let buffer = Buffer::from(original_buffer_data); let original_buffer_len = buffer.len(); let original_data_ptr = buffer.data_ptr(); let sliced = buffer.slice_with_length(slice_from, slice_length);