From 9b8ed37d21d82041d84a7baff0193c04e4350d11 Mon Sep 17 00:00:00 2001 From: erezrokah Date: Mon, 23 Mar 2026 17:37:25 +0000 Subject: [PATCH 1/2] feat(parquet): Support int32 to uint64 in reverseTransformArray Add reverseTransformFromInt32 to handle int32 to uint64 widening, mirroring the existing uint32 to uint64 support. --- parquet/read.go | 19 +++++++++++ parquet/read_test.go | 81 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/parquet/read.go b/parquet/read.go index d087b8a..69b76b1 100644 --- a/parquet/read.go +++ b/parquet/read.go @@ -68,6 +68,8 @@ func reverseTransformArray(dt arrow.DataType, arr arrow.Array) arrow.Array { return reverseTransformTime64(dt.(*arrow.Time64Type), arr) case *array.Date32: return reverseTransformFromDate32(dt, arr) + case *array.Int32: + return reverseTransformFromInt32(dt, arr) case *array.Uint32: return reverseTransformFromUint32(dt, arr) case *array.Struct: @@ -102,6 +104,23 @@ func reverseTransformArray(dt arrow.DataType, arr arrow.Array) arrow.Array { } } +func reverseTransformFromInt32(dt arrow.DataType, arr *array.Int32) arrow.Array { + switch dt { + case arrow.PrimitiveTypes.Uint64: + builder := array.NewUint64Builder(memory.DefaultAllocator) + for i := 0; i < arr.Len(); i++ { + if arr.IsNull(i) { + builder.AppendNull() + continue + } + builder.Append(uint64(arr.Value(i))) + } + return builder.NewArray() + default: + panic(fmt.Errorf("unsupported conversion from %s to %s", arr.DataType(), dt)) + } +} + func reverseTransformFromUint32(dt arrow.DataType, arr *array.Uint32) arrow.Array { switch dt { case arrow.PrimitiveTypes.Uint64: diff --git a/parquet/read_test.go b/parquet/read_test.go index b27b30f..789e924 100644 --- a/parquet/read_test.go +++ b/parquet/read_test.go @@ -89,3 +89,84 @@ func TestReverseTransformArray_Uint32ToUint64_ListOf(t *testing.T) { require.Equal(t, uint64(2), values.Value(1)) require.Equal(t, uint64(3), values.Value(2)) } + +func TestReverseTransformArray_Int32ToUint64(t *testing.T) { + builder := array.NewInt32Builder(memory.DefaultAllocator) + defer builder.Release() + + builder.Append(0) + builder.Append(42) + builder.AppendNull() + builder.Append(2147483647) // max int32 + + arr := builder.NewArray() + defer arr.Release() + + result := reverseTransformArray(arrow.PrimitiveTypes.Uint64, arr) + defer result.Release() + + require.Equal(t, arrow.PrimitiveTypes.Uint64, result.DataType()) + require.Equal(t, 4, result.Len()) + + u64 := result.(*array.Uint64) + require.Equal(t, uint64(0), u64.Value(0)) + require.False(t, u64.IsNull(0)) + + require.Equal(t, uint64(42), u64.Value(1)) + require.False(t, u64.IsNull(1)) + + require.True(t, u64.IsNull(2)) + + require.Equal(t, uint64(2147483647), u64.Value(3)) + require.False(t, u64.IsNull(3)) +} + +func TestReverseTransformArray_Int32ToUint64_Empty(t *testing.T) { + builder := array.NewInt32Builder(memory.DefaultAllocator) + defer builder.Release() + + arr := builder.NewArray() + defer arr.Release() + + result := reverseTransformArray(arrow.PrimitiveTypes.Uint64, arr) + defer result.Release() + + require.Equal(t, arrow.PrimitiveTypes.Uint64, result.DataType()) + require.Equal(t, 0, result.Len()) +} + +func TestReverseTransformArray_Int32ToUint64_ListOf(t *testing.T) { + bldr := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int32) + defer bldr.Release() + + vb := bldr.ValueBuilder().(*array.Int32Builder) + + bldr.Append(true) + vb.Append(1) + vb.Append(2) + + bldr.Append(true) + vb.Append(3) + + bldr.AppendNull() + + arr := bldr.NewArray() + defer arr.Release() + + targetDt := arrow.ListOf(arrow.PrimitiveTypes.Uint64) + result := reverseTransformArray(targetDt, arr) + defer result.Release() + + require.True(t, arrow.TypeEqual(targetDt, result.DataType())) + require.Equal(t, 3, result.Len()) + + listArr := result.(*array.List) + require.False(t, listArr.IsNull(0)) + require.False(t, listArr.IsNull(1)) + require.True(t, listArr.IsNull(2)) + + values := listArr.ListValues().(*array.Uint64) + require.Equal(t, uint64(1), values.Value(0)) + require.Equal(t, uint64(2), values.Value(1)) + require.Equal(t, uint64(3), values.Value(2)) +} From cc2970a8f8cf68f0a0541f5fdb632a405f2b96e9 Mon Sep 17 00:00:00 2001 From: erezrokah Date: Mon, 23 Mar 2026 17:39:54 +0000 Subject: [PATCH 2/2] fix(parquet): Panic on negative int32 values when converting to uint64 --- parquet/read.go | 6 +++++- parquet/read_test.go | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/parquet/read.go b/parquet/read.go index 69b76b1..c4a19f9 100644 --- a/parquet/read.go +++ b/parquet/read.go @@ -113,7 +113,11 @@ func reverseTransformFromInt32(dt arrow.DataType, arr *array.Int32) arrow.Array builder.AppendNull() continue } - builder.Append(uint64(arr.Value(i))) + v := arr.Value(i) + if v < 0 { + panic(fmt.Errorf("negative int32 value %d at index %d cannot be converted to uint64", v, i)) + } + builder.Append(uint64(v)) } return builder.NewArray() default: diff --git a/parquet/read_test.go b/parquet/read_test.go index 789e924..e990ae8 100644 --- a/parquet/read_test.go +++ b/parquet/read_test.go @@ -170,3 +170,17 @@ func TestReverseTransformArray_Int32ToUint64_ListOf(t *testing.T) { require.Equal(t, uint64(2), values.Value(1)) require.Equal(t, uint64(3), values.Value(2)) } + +func TestReverseTransformArray_Int32ToUint64_NegativePanics(t *testing.T) { + builder := array.NewInt32Builder(memory.DefaultAllocator) + defer builder.Release() + + builder.Append(-1) + + arr := builder.NewArray() + defer arr.Release() + + require.PanicsWithError(t, "negative int32 value -1 at index 0 cannot be converted to uint64", func() { + reverseTransformArray(arrow.PrimitiveTypes.Uint64, arr) + }) +}