-
Notifications
You must be signed in to change notification settings - Fork 29.2k
[SPARK-57100][SQL] Add columnar (ColumnVector) support for nanosecond timestamp types #56198
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
2912125
68fa774
e46c1f2
7a10e70
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,7 @@ | |
| import org.apache.spark.sql.vectorized.ColumnarMap; | ||
| import org.apache.spark.unsafe.array.ByteArrayMethods; | ||
| import org.apache.spark.unsafe.types.CalendarInterval; | ||
| import org.apache.spark.unsafe.types.TimestampNanosVal; | ||
| import org.apache.spark.unsafe.types.UTF8String; | ||
|
|
||
| /** | ||
|
|
@@ -493,6 +494,16 @@ public void putInterval(int rowId, CalendarInterval value) { | |
| getChild(2).putLong(rowId, value.microseconds); | ||
| } | ||
|
|
||
| public void putTimestampNTZNanos(int rowId, TimestampNanosVal value) { | ||
| getChild(0).putLong(rowId, value.epochMicros); | ||
| getChild(1).putShort(rowId, value.nanosWithinMicro); | ||
| } | ||
|
|
||
| public void putTimestampLTZNanos(int rowId, TimestampNanosVal value) { | ||
| getChild(0).putLong(rowId, value.epochMicros); | ||
| getChild(1).putShort(rowId, value.nanosWithinMicro); | ||
| } | ||
|
|
||
| @Override | ||
| public UTF8String getUTF8String(int rowId) { | ||
| if (isNullAt(rowId)) return null; | ||
|
|
@@ -751,7 +762,10 @@ public final int appendStruct(boolean isNull) { | |
| putNull(elementsAppended); | ||
| elementsAppended++; | ||
| for (WritableColumnVector c: childColumns) { | ||
| if (c.type instanceof StructType || c.type instanceof VariantType) { | ||
| if (c.type instanceof StructType || c.type instanceof VariantType | ||
| || c.type instanceof CalendarIntervalType | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding The fix is correct, but:
Up to you whether to split out into a separate commit or keep bundled. |
||
| || c.type instanceof TimestampNTZNanosType | ||
| || c.type instanceof TimestampLTZNanosType) { | ||
| c.appendStruct(true); | ||
| } else { | ||
| c.appendNull(); | ||
|
|
@@ -1056,6 +1070,11 @@ protected WritableColumnVector(int capacity, DataType dataType) { | |
| this.childColumns[0] = reserveNewColumn(capacity, DataTypes.IntegerType); | ||
| this.childColumns[1] = reserveNewColumn(capacity, DataTypes.IntegerType); | ||
| this.childColumns[2] = reserveNewColumn(capacity, DataTypes.LongType); | ||
| } else if (type instanceof TimestampNTZNanosType || type instanceof TimestampLTZNanosType) { | ||
| // Two columns. EpochMicros as Long. NanosWithinMicro as Short. | ||
| this.childColumns = new WritableColumnVector[2]; | ||
| this.childColumns[0] = reserveNewColumn(capacity, DataTypes.LongType); | ||
| this.childColumns[1] = reserveNewColumn(capacity, DataTypes.ShortType); | ||
| } else if (type instanceof VariantType) { | ||
| this.childColumns = new WritableColumnVector[2]; | ||
| this.childColumns[0] = reserveNewColumn(capacity, DataTypes.BinaryType); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,7 @@ import org.apache.spark.sql.execution.columnar.compression.ColumnBuilderHelper | |
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types._ | ||
| import org.apache.spark.sql.vectorized.ColumnarArray | ||
| import org.apache.spark.unsafe.types.{UTF8String, VariantVal} | ||
| import org.apache.spark.unsafe.types.{TimestampNanosVal, UTF8String, VariantVal} | ||
| import org.apache.spark.util.ArrayImplicits._ | ||
|
|
||
| class ColumnVectorSuite extends SparkFunSuite with SQLHelper { | ||
|
|
@@ -379,6 +379,50 @@ class ColumnVectorSuite extends SparkFunSuite with SQLHelper { | |
| } | ||
| } | ||
|
|
||
| testVectors("timestamp_ntz_nanos", 10, TimestampNTZNanosType(9)) { testVector => | ||
| val values = (0 until 10).map(i => TimestampNanosVal.fromParts(i * 1000L, i.toShort)) | ||
| values.foreach { v => | ||
| testVector.putNotNull(testVector.elementsAppended) | ||
| testVector.putTimestampNTZNanos(testVector.elementsAppended, v) | ||
| testVector.elementsAppended += 1 | ||
| } | ||
| values.zipWithIndex.foreach { case (v, i) => | ||
| assert(testVector.getTimestampNTZNanos(i) === v) | ||
| } | ||
| testVector.putNull(0) | ||
| assert(testVector.isNullAt(0)) | ||
| } | ||
|
|
||
| testVectors("timestamp_ltz_nanos", 10, TimestampLTZNanosType(9)) { testVector => | ||
| val values = (0 until 10).map(i => TimestampNanosVal.fromParts(i * 1000L, i.toShort)) | ||
| values.foreach { v => | ||
| testVector.putNotNull(testVector.elementsAppended) | ||
| testVector.putTimestampLTZNanos(testVector.elementsAppended, v) | ||
| testVector.elementsAppended += 1 | ||
| } | ||
| values.zipWithIndex.foreach { case (v, i) => | ||
| assert(testVector.getTimestampLTZNanos(i) === v) | ||
| } | ||
| testVector.putNull(0) | ||
| assert(testVector.isNullAt(0)) | ||
| } | ||
|
|
||
| testVectors("mutable ColumnarRow with TimestampNTZNanosType", 5, | ||
| TimestampNTZNanosType(9)) { testVector => | ||
| val mutableRow = new MutableColumnarRow(Array(testVector)) | ||
| val values = (0 until 5).map(i => TimestampNanosVal.fromParts(i * 100L, i.toShort)) | ||
| values.zipWithIndex.foreach { case (v, i) => | ||
| mutableRow.rowId = i | ||
| mutableRow.setTimestampNTZNanos(0, v) | ||
| } | ||
| values.zipWithIndex.foreach { case (v, i) => | ||
| mutableRow.rowId = i | ||
| assert(mutableRow.getTimestampNTZNanos(0) === v) | ||
| assert(mutableRow.get(0, TimestampNTZNanosType(9)) === v) | ||
| assert(mutableRow.copy().get(0, TimestampNTZNanosType(9)) === v) | ||
| } | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The PR adds a |
||
|
|
||
| testVectors("mutable ColumnarRow with TimestampNTZType", 10, TimestampNTZType) { testVector => | ||
| val mutableRow = new MutableColumnarRow(Array(testVector)) | ||
| (0 until 10).foreach { i => | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The two
else ifbranches are identical.appendValuebelow at line 178 already collapses both nanos types into one condition with||. Suggest the same here: