Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ class VeloxParquetWriterInjects extends VeloxFormatWriterInjects {
// i.e., compression, block size, block rows.
val sparkOptions = new mutable.HashMap[String, String]()
sparkOptions.put(SQLConf.PARQUET_COMPRESSION.key, compressionCodec)
sparkOptions.put(
SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key,
SQLConf.get.writeLegacyParquetFormat.toString)
val blockSize = options.getOrElse(
GlutenConfig.PARQUET_BLOCK_SIZE,
GlutenConfig.get.columnarParquetWriteBlockSize.toString)
Expand Down
4 changes: 4 additions & 0 deletions cpp/core/config/GlutenConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ const std::string kParquetWriterVersion = "parquet.writer.version";

const std::string kParquetCompressionCodec = "spark.sql.parquet.compression.codec";

/// Maps to Spark `spark.sql.parquet.writeLegacyFormat`; drives Velox
/// `WriterOptions::storeDecimalAsInteger` (inverted: legacy true -> store as integer false).
const std::string kParquetStoreDecimalAsInteger = "spark.sql.parquet.writeLegacyFormat";

const std::string kColumnarToRowMemoryThreshold = "spark.gluten.sql.columnarToRowMemoryThreshold";

const std::string kUGIUserName = "spark.gluten.ugi.username";
Expand Down
6 changes: 6 additions & 0 deletions cpp/velox/utils/VeloxWriterUtils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ std::unique_ptr<WriterOptions> makeParquetWriteOption(const std::unordered_map<s
}
auto writeOption = std::make_unique<WriterOptions>();
writeOption->parquetWriteTimestampUnit = TimestampPrecision::kMicroseconds /*micro*/;
bool writeLegacyParquetFormat = false;
if (auto it = sparkConfs.find(kParquetStoreDecimalAsInteger); it != sparkConfs.end()) {
writeLegacyParquetFormat = boost::iequals(it->second, "true");
}
// Spark legacy Parquet uses FLBA-style decimals; Velox uses INT32/INT64 when writeLegacyParquetFormat is false.
writeOption->storeDecimalAsInteger = !writeLegacyParquetFormat;
auto compressionCodec = CompressionKind::CompressionKind_SNAPPY;
if (auto it = sparkConfs.find(kParquetCompressionCodec); it != sparkConfs.end()) {
auto compressionCodecStr = it->second;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ object GlutenConfig extends ConfigRegistry {
DEBUG_ENABLED.key,
// datasource config
SPARK_SQL_PARQUET_COMPRESSION_CODEC,
PARQUET_WRITE_LEGACY_FORMAT.key,
// datasource config end
GlutenCoreConfig.COLUMNAR_OVERHEAD_SIZE_IN_BYTES.key,
GlutenCoreConfig.COLUMNAR_OFFHEAP_SIZE_IN_BYTES.key,
Expand Down
Loading