feat: add OffsetBuffer::subtract to allow to shift offsets by value#10120
feat: add OffsetBuffer::subtract to allow to shift offsets by value#10120rluvaton wants to merge 9 commits into
OffsetBuffer::subtract to allow to shift offsets by value#10120Conversation
| /// [`LargeStringArray`]: crate::array::LargeStringArray | ||
| pub trait OffsetSizeTrait: | ||
| ArrowNativeType + std::ops::AddAssign + Integer + num_traits::CheckedAdd | ||
| ArrowNativeType + std::ops::AddAssign + Integer + num_traits::CheckedAdd + num_traits::CheckedSub |
There was a problem hiding this comment.
this is sealed since ArrowNativeType is sealed, so it is safe to add this here
| // Replace this test with test that assert a reuse after PR #10118 is merged | ||
| #[test] | ||
| fn for_sliced_unshared_buffer_shift_should_not_reuse_buffer() { | ||
| // Underlying [0, 3, 6, 9, 12]; slice -> view [3, 6, 9]. | ||
| let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![1, 3, 6, 9, 12])); | ||
| let sliced = offsets.slice(1, 2); // [3, 6, 9] | ||
| drop(offsets); // uniquely owned | ||
| assert_eq!(sliced.as_ref(), &[3, 6, 9]); | ||
|
|
||
| let ptr_before = sliced.as_ptr(); | ||
| let result = sliced.subtract(1); | ||
|
|
||
| assert_ne!( | ||
| ptr_before, | ||
| result.as_ptr(), | ||
| "should not be reused until #10118 is merged" | ||
| ); | ||
|
|
||
| assert_eq!(result.as_ref(), &[2, 5, 8]); | ||
| } |
There was a problem hiding this comment.
After pr #10118 is merged we should replace this test with the following test that verify reuse
(this test need some cleanup but the idea is the same)
#[test]
fn for_sliced_unshared_buffer_shift_should_reuse_buffer_but_only_modify_the_data_in_slice() {
// Underlying [0, 3, 6, 9, 12]; slice -> view [3, 6, 9].
let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![1, 3, 6, 9, 12]));
let sliced = offsets.slice(1, 2); // [3, 6, 9]
drop(offsets); // uniquely owned -> eligible for in-place reuse
assert_eq!(sliced.as_ref(), &[3, 6, 9]);
let ptr_before = sliced.as_ptr();
let result = sliced.subtract(1);
// Reuses the allocation...
assert_eq!(
ptr_before,
result.as_ptr(),
"uniquely-owned sliced buffer should be mutated in place"
);
// ...but only the slice is shifted, and the length stays the slice length.
assert_eq!(result.as_ref(), &[2, 5, 8]);
assert_eq!(result.len(), 3);
let underlying_buffer = result.inner().inner();
// data should be shifted by 1
assert_eq!(underlying_buffer.ptr_offset(), std::mem::size_of::<i32>());
let original_value_ptr = unsafe {underlying_buffer.data_ptr().as_ptr() as *mut i32};
let value_in_ptr = unsafe {*original_value_ptr};
assert_eq!(value_in_ptr, 1);
let last_value_ptr = unsafe {(underlying_buffer.data_ptr().as_ptr() as *mut i32).add(5)};
assert_eq!(unsafe {*last_value_ptr}, 12);
}
alamb
left a comment
There was a problem hiding this comment.
Thanks @rluvaton
The PR says
When dealing with sliced variable size arrays it is common to subtract the offset to shift them to 0, so this is a helper for that
But this PR doesn't have any uses of this API - are there places in the arrow crate that subtract the offsets that we could update to use this API (and thus get additional test coverage)?
|
And I want to optimize cast to only cast visible list values which would require me to shift the offsets there are many more cases, but did not find in arrow uses that still keep the offset buffer |
Which issue does this PR close?
N/A
Rationale for this change
When dealing with sliced variable size arrays it is common to subtract the offset to shift them to 0, so this is a helper for that
What changes are included in this PR?
added
OffsetBuffer::subtractmethodAre these changes tested?
yes
Are there any user-facing changes?
new function