From 353c13bb069ffdf928f7494f6208dc5ef6512993 Mon Sep 17 00:00:00 2001 From: Javier de la Torre Date: Sat, 14 Mar 2026 09:52:53 +0100 Subject: [PATCH] feat: add Arrow-native geospatial serialization support (SPARK-54232) Add geospatialAsArrow field (field 6) to TSparkArrowTypes Thrift struct and expose it as WithArrowNativeGeospatial() ConnOption. When enabled, geometry/geography columns are returned as Struct instead of EWKT strings, enabling efficient binary geometry transfer over Arrow without string parsing. This is opt-in (defaults to false) and requires Databricks Runtime with SPARK-54232 support. Changes: - cli_service.go: Add GeospatialAsArrow field 6 to TSparkArrowTypes - config.go: Add UseArrowNativeGeospatial to ArrowConfig - connection.go: Wire through to TExecuteStatementReq - connector.go: Add WithArrowNativeGeospatial() ConnOption --- connection.go | 1 + connector.go | 9 ++++++ internal/cli_service/cli_service.go | 44 +++++++++++++++++++++++++++++ internal/config/config.go | 17 +++++++---- 4 files changed, 66 insertions(+), 5 deletions(-) diff --git a/connection.go b/connection.go index c297d5bd..90ea1349 100644 --- a/connection.go +++ b/connection.go @@ -315,6 +315,7 @@ func (c *conn) executeStatement(ctx context.Context, query string, args []driver TimestampAsArrow: &c.cfg.UseArrowNativeTimestamp, ComplexTypesAsArrow: &c.cfg.UseArrowNativeComplexTypes, IntervalTypesAsArrow: &c.cfg.UseArrowNativeIntervalTypes, + GeospatialAsArrow: &c.cfg.UseArrowNativeGeospatial, } } diff --git a/connector.go b/connector.go index 1f77ac3f..3aba77d5 100644 --- a/connector.go +++ b/connector.go @@ -276,6 +276,15 @@ func WithCloudFetch(useCloudFetch bool) ConnOption { } } +// WithArrowNativeGeospatial enables Arrow-native geospatial serialization. +// When true, geometry/geography columns are returned as Struct +// instead of EWKT strings. Requires Databricks Runtime with SPARK-54232 support. +func WithArrowNativeGeospatial(enable bool) ConnOption { + return func(c *config.Config) { + c.UseArrowNativeGeospatial = enable + } +} + // WithMaxDownloadThreads sets up maximum download threads for cloud fetch. Default is 10. func WithMaxDownloadThreads(numThreads int) ConnOption { return func(c *config.Config) { diff --git a/internal/cli_service/cli_service.go b/internal/cli_service/cli_service.go index 71952c69..c9cc6c99 100644 --- a/internal/cli_service/cli_service.go +++ b/internal/cli_service/cli_service.go @@ -11450,6 +11450,7 @@ type TSparkArrowTypes struct { ComplexTypesAsArrow *bool `thrift:"complexTypesAsArrow,3" db:"complexTypesAsArrow" json:"complexTypesAsArrow,omitempty"` IntervalTypesAsArrow *bool `thrift:"intervalTypesAsArrow,4" db:"intervalTypesAsArrow" json:"intervalTypesAsArrow,omitempty"` NullTypeAsArrow *bool `thrift:"nullTypeAsArrow,5" db:"nullTypeAsArrow" json:"nullTypeAsArrow,omitempty"` + GeospatialAsArrow *bool `thrift:"geospatialAsArrow,6" db:"geospatialAsArrow" json:"geospatialAsArrow,omitempty"` } func NewTSparkArrowTypes() *TSparkArrowTypes { @@ -11511,6 +11512,17 @@ func (p *TSparkArrowTypes) IsSetNullTypeAsArrow() bool { return p.NullTypeAsArrow != nil } +var TSparkArrowTypes_GeospatialAsArrow_DEFAULT bool +func (p *TSparkArrowTypes) GetGeospatialAsArrow() bool { + if !p.IsSetGeospatialAsArrow() { + return TSparkArrowTypes_GeospatialAsArrow_DEFAULT + } +return *p.GeospatialAsArrow +} +func (p *TSparkArrowTypes) IsSetGeospatialAsArrow() bool { + return p.GeospatialAsArrow != nil +} + func (p *TSparkArrowTypes) Read(ctx context.Context, iprot thrift.TProtocol) error { if _, err := iprot.ReadStructBegin(ctx); err != nil { return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) @@ -11574,6 +11586,16 @@ func (p *TSparkArrowTypes) Read(ctx context.Context, iprot thrift.TProtocol) err return err } } + case 6: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } default: if err := iprot.Skip(ctx, fieldTypeId); err != nil { return err @@ -11634,6 +11656,15 @@ func (p *TSparkArrowTypes) ReadField5(ctx context.Context, iprot thrift.TProtoc return nil } +func (p *TSparkArrowTypes) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) +} else { + p.GeospatialAsArrow = &v +} + return nil +} + func (p *TSparkArrowTypes) Write(ctx context.Context, oprot thrift.TProtocol) error { if err := oprot.WriteStructBegin(ctx, "TSparkArrowTypes"); err != nil { return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } @@ -11643,6 +11674,7 @@ func (p *TSparkArrowTypes) Write(ctx context.Context, oprot thrift.TProtocol) er if err := p.writeField3(ctx, oprot); err != nil { return err } if err := p.writeField4(ctx, oprot); err != nil { return err } if err := p.writeField5(ctx, oprot); err != nil { return err } + if err := p.writeField6(ctx, oprot); err != nil { return err } } if err := oprot.WriteFieldStop(ctx); err != nil { return thrift.PrependError("write field stop error: ", err) } @@ -11711,6 +11743,18 @@ func (p *TSparkArrowTypes) writeField5(ctx context.Context, oprot thrift.TProtoc return err } +func (p *TSparkArrowTypes) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { + if p.IsSetGeospatialAsArrow() { + if err := oprot.WriteFieldBegin(ctx, "geospatialAsArrow", thrift.BOOL, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:geospatialAsArrow: ", p), err) } + if err := oprot.WriteBool(ctx, bool(*p.GeospatialAsArrow)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.geospatialAsArrow (6) field write error: ", p), err) } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:geospatialAsArrow: ", p), err) } + } + return err +} + func (p *TSparkArrowTypes) Equals(other *TSparkArrowTypes) bool { if p == other { return true diff --git a/internal/config/config.go b/internal/config/config.go index e13cb98f..a774702c 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -452,6 +452,12 @@ type ArrowConfig struct { // the following are currently not supported UseArrowNativeComplexTypes bool UseArrowNativeIntervalTypes bool + + // UseArrowNativeGeospatial enables Arrow-native geospatial serialization. + // When true, the server returns geometry/geography columns as + // Struct instead of EWKT strings. + // Requires Databricks Runtime with SPARK-54232 support. + UseArrowNativeGeospatial bool } func (ucfg ArrowConfig) WithDefaults() ArrowConfig { @@ -465,11 +471,12 @@ func (ucfg ArrowConfig) WithDefaults() ArrowConfig { // DeepCopy returns a true deep copy of UserConfig func (arrowConfig ArrowConfig) DeepCopy() ArrowConfig { return ArrowConfig{ - UseArrowBatches: arrowConfig.UseArrowBatches, - UseArrowNativeDecimal: arrowConfig.UseArrowNativeDecimal, - UseArrowNativeTimestamp: arrowConfig.UseArrowNativeTimestamp, - UseArrowNativeComplexTypes: arrowConfig.UseArrowNativeComplexTypes, - UseArrowNativeIntervalTypes: arrowConfig.UseArrowNativeIntervalTypes, + UseArrowBatches: arrowConfig.UseArrowBatches, + UseArrowNativeDecimal: arrowConfig.UseArrowNativeDecimal, + UseArrowNativeTimestamp: arrowConfig.UseArrowNativeTimestamp, + UseArrowNativeComplexTypes: arrowConfig.UseArrowNativeComplexTypes, + UseArrowNativeIntervalTypes: arrowConfig.UseArrowNativeIntervalTypes, + UseArrowNativeGeospatial: arrowConfig.UseArrowNativeGeospatial, } }