From 64f42ff8ea882cf668991ab705b3c314b3e18a3a Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Mon, 25 May 2026 17:00:39 -0700 Subject: [PATCH] Enable parquet prefetch --- .../operator/persistent/reader/parquet/parquet_reader.h | 4 +--- .../operator/persistent/reader/parquet/thrift_tools.h | 2 +- .../operator/persistent/reader/parquet/parquet_reader.cpp | 8 +++++++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h b/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h index c0a2cedd5f..f8731bfd47 100644 --- a/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h +++ b/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h @@ -32,9 +32,7 @@ struct ParquetReaderScanState { ResizeableBuffer defineBuf; ResizeableBuffer repeatBuf; - // TODO(Ziyi): We currently only support reading from local file system, thus the prefetch - // mode is disabled by default. Add this back when we support remote file system. - bool prefetchMode = false; + bool prefetchMode = true; bool currentGroupPrefetched = false; }; diff --git a/src/include/processor/operator/persistent/reader/parquet/thrift_tools.h b/src/include/processor/operator/persistent/reader/parquet/thrift_tools.h index 8dc298c99a..54fe992cd0 100644 --- a/src/include/processor/operator/persistent/reader/parquet/thrift_tools.h +++ b/src/include/processor/operator/persistent/reader/parquet/thrift_tools.h @@ -69,7 +69,7 @@ struct ReadAheadBuffer { auto new_start = std::min(existing_head->location, new_read_head.location); auto new_length = - std::min(existing_head->GetEnd(), new_read_head.GetEnd()) - new_start; + std::max(existing_head->GetEnd(), new_read_head.GetEnd()) - new_start; existing_head->location = new_start; existing_head->size = new_length; return; diff --git a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp index 0972426319..05bb3911ef 100644 --- a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +++ b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp @@ -36,7 +36,7 @@ void ParquetReader::initializeScan(ParquetReaderScanState& state, state.groupOffset = 0; state.groupIdxList = std::move(groups_to_read); if (!state.fileInfo || state.fileInfo->path != filePath) { - state.prefetchMode = false; + state.prefetchMode = true; state.fileInfo = vfs->openFile(filePath, common::FileOpenFlags(FileFlags::READ_ONLY), context); } @@ -69,6 +69,9 @@ bool ParquetReader::scanInternal(ParquetReaderScanState& state, DataChunk& resul uint64_t toScanCompressedBytes = 0; for (auto colIdx = 0u; colIdx < result.getNumValueVectors(); colIdx++) { + if (!columnSkips.empty() && columnSkips[colIdx]) { + continue; + } prepareRowGroupBuffer(state, colIdx); auto fileColIdx = colIdx; @@ -104,6 +107,9 @@ bool ParquetReader::scanInternal(ParquetReaderScanState& state, DataChunk& resul } else { // Prefetch column-wise. for (auto colIdx = 0u; colIdx < result.getNumValueVectors(); colIdx++) { + if (!columnSkips.empty() && columnSkips[colIdx]) { + continue; + } auto fileColIdx = colIdx; auto rootReader = dynamic_cast_checked(state.rootReader.get());