-
Notifications
You must be signed in to change notification settings - Fork 127
Description
Fuzzing Crash Report
Analysis
Crash Location: vortex-array/src/vtable/dyn_.rs:execute
Error Message:
to_canonical should succeed in fuzz test:
Executed canonical dtype mismatch for Encoding<vortex_array::arrays::masked::vtable::MaskedVTable>
Stack Trace:
3: execute<vortex_array::arrays::masked::vtable::MaskedVTable>
at ./vortex-array/src/vtable/dyn_.rs:152:13
4: execute
at ./vortex-array/src/executor.rs:195:14
5: execute<alloc::sync::Arc<dyn vortex_array::array::Array, alloc::alloc::Global>>
at ./vortex-array/src/executor.rs:40:9
6: execute
at ./vortex-array/src/columnar.rs:105:27
7: execute
at ./vortex-array/src/canonical.rs:487:18
8: execute<vortex_array::canonical::Canonical>
at ./vortex-array/src/executor.rs:40:9
9: to_canonical<vortex_array::arrays::masked::vtable::MaskedVTable>
at ./vortex-array/src/array/mod.rs:609:14
Root Cause: The MaskedArray execute function is producing a canonical array with a dtype that doesn't match the input MaskedArray's dtype. This violates the debug assertion at vortex-array/src/vtable/dyn_.rs:152-156 which verifies that after execution, the result dtype must match the input dtype.
Based on the fuzzer debug output, the sequence is:
- Start with a ChunkedArray containing VarBinArray and VarBinViewArray chunks (both Utf8, NonNullable)
- Compress it (still NonNullable)
- Apply a mask operation, creating a MaskedArray with Nullable dtype wrapping the compressed data
- Try to convert to canonical - this triggers the crash
The issue appears to be in the dtype transformation during the mask operation or during the execution to canonical form. The MaskedArray's execute implementation (vortex-array/src/arrays/masked/vtable/mod.rs:110-129) calls mask_validity_canonical which should preserve dtype, but there seems to be a mismatch happening.
Debug Output
FuzzArrayAction {
array: ChunkedArray {
dtype: Utf8(
NonNullable,
),
len: 3,
chunk_offsets: PrimitiveArray {
dtype: Primitive(
U64,
NonNullable,
),
buffer: BufferHandle(
Host(
Buffer<u8> {
length: 24,
alignment: Alignment(
8,
),
as_slice: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...],
},
),
),
validity: NonNullable,
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [],
},
},
},
},
chunks: [
VarBinArray {
dtype: Utf8(
NonNullable,
),
bytes: BufferHandle(
Host(
Buffer<u8> {
length: 0,
alignment: Alignment(
1,
),
as_slice: [],
},
),
),
offsets: PrimitiveArray {
dtype: Primitive(
U32,
NonNullable,
),
buffer: BufferHandle(
Host(
Buffer<u8> {
length: 4,
alignment: Alignment(
4,
),
as_slice: [0, 0, 0, 0],
},
),
),
validity: NonNullable,
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [
(
IsSorted,
Exact(
ScalarValue(
Bool(
true,
),
),
),
),
],
},
},
},
},
validity: NonNullable,
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [],
},
},
},
},
VarBinViewArray {
dtype: Utf8(
NonNullable,
),
buffers: [],
views: BufferHandle(
Host(
Buffer<u8> {
length: 48,
alignment: Alignment(
16,
),
as_slice: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...],
},
),
),
validity: NonNullable,
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [],
},
},
},
},
],
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [],
},
},
},
},
actions: [
(
Compress(
Default,
),
Array(
ChunkedArray {
dtype: Utf8(
NonNullable,
),
len: 3,
chunk_offsets: PrimitiveArray {
dtype: Primitive(
U64,
NonNullable,
),
buffer: BufferHandle(
Host(
Buffer<u8> {
length: 24,
alignment: Alignment(
8,
),
as_slice: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...],
},
),
),
validity: NonNullable,
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [],
},
},
},
},
chunks: [
VarBinArray {
dtype: Utf8(
NonNullable,
),
bytes: BufferHandle(
Host(
Buffer<u8> {
length: 0,
alignment: Alignment(
1,
),
as_slice: [],
},
),
),
offsets: PrimitiveArray {
dtype: Primitive(
U32,
NonNullable,
),
buffer: BufferHandle(
Host(
Buffer<u8> {
length: 4,
alignment: Alignment(
4,
),
as_slice: [0, 0, 0, 0],
},
),
),
validity: NonNullable,
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [
(
IsSorted,
Exact(
ScalarValue(
Bool(
true,
),
),
),
),
],
},
},
},
},
validity: NonNullable,
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [],
},
},
},
},
VarBinViewArray {
dtype: Utf8(
NonNullable,
),
buffers: [],
views: BufferHandle(
Host(
Buffer<u8> {
length: 48,
alignment: Alignment(
16,
),
as_slice: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...],
},
),
),
validity: NonNullable,
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [],
},
},
},
},
],
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [],
},
},
},
},
),
),
(
Mask(
Values(
MaskValues {
buffer: BitBuffer {
buffer: Buffer<u8> {
length: 1,
alignment: Alignment(
1,
),
as_slice: [4],
},
offset: 0,
len: 3,
},
indices: OnceLock(
<uninit>,
),
slices: OnceLock(
<uninit>,
),
true_count: 1,
density: 0.3333333333333333,
},
),
),
Array(
VarBinViewArray {
dtype: Utf8(
Nullable,
),
buffers: [],
views: BufferHandle(
Host(
Buffer<u8> {
length: 48,
alignment: Alignment(
16,
),
as_slice: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...],
},
),
),
validity: Array(
BoolArray {
dtype: Bool(
NonNullable,
),
bits: BufferHandle(
Host(
Buffer<u8> {
length: 1,
alignment: Alignment(
1,
),
as_slice: [251],
},
),
),
offset: 0,
len: 3,
validity: NonNullable,
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [],
},
},
},
},
),
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [],
},
},
},
},
),
),
(
ScalarAt(
[
0,
],
),
ScalarVec(
[
Scalar {
dtype: Utf8(
Nullable,
),
value: ScalarValue(
BufferString(
BufferString {
string: "",
},
),
),
},
],
),
),
(
Compress(
Compact,
),
Array(
VarBinViewArray {
dtype: Utf8(
Nullable,
),
buffers: [],
views: BufferHandle(
Host(
Buffer<u8> {
length: 48,
alignment: Alignment(
16,
),
as_slice: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...],
},
),
),
validity: Array(
BoolArray {
dtype: Bool(
NonNullable,
),
bits: BufferHandle(
Host(
Buffer<u8> {
length: 1,
alignment: Alignment(
1,
),
as_slice: [251],
},
),
),
offset: 0,
len: 3,
validity: NonNullable,
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [],
},
},
},
},
),
stats_set: ArrayStats {
inner: RwLock {
data: StatsSet {
values: [],
},
},
},
},
),
),
],
}
Summary
- Target:
array_ops - Crash File:
crash-caec2ffa2e36c2363c142c55a4176bef24a9ef3c - Branch: develop
- Commit: db7811a
- Crash Artifact: https://github.com/spiraldb/vortex/actions/runs//artifacts/
Reproduction
-
Download the crash artifact:
- Direct download: https://github.com/spiraldb/vortex/actions/runs//artifacts/
- Or find
fuzz-artifactsat: https://github.com/spiraldb/vortex/actions/runs/ - Extract the zip file
-
Reproduce locally:
# The artifact contains array_ops/crash-caec2ffa2e36c2363c142c55a4176bef24a9ef3c
cargo +nightly fuzz run -D --sanitizer=none array_ops array_ops/crash-caec2ffa2e36c2363c142c55a4176bef24a9ef3c -- -rss_limit_mb=0- Get full backtrace:
RUST_BACKTRACE=full cargo +nightly fuzz run -D --sanitizer=none array_ops array_ops/crash-caec2ffa2e36c2363c142c55a4176bef24a9ef3c -- -rss_limit_mb=0Auto-created by fuzzing workflow with Claude analysis