From 3512d9949344a450d02f77b898a60103b1153002 Mon Sep 17 00:00:00 2001 From: OmBiradar Date: Fri, 15 May 2026 07:07:35 +0530 Subject: [PATCH] Improve parquet reading using multi threads Signed-off-by: OmBiradar --- cpp/src/parquet/arrow/reader.cc | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index a60af69aec9f..36b264be70d9 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -735,10 +735,9 @@ class PARQUET_NO_EXPORT StructReader : public ColumnReaderImpl { bool IsOrHasRepeatedChild() const final { return has_repeated_child_; } Status LoadBatch(int64_t records_to_read) override { - for (const std::unique_ptr& reader : children_) { - RETURN_NOT_OK(reader->LoadBatch(records_to_read)); - } - return Status::OK(); + return ::arrow::internal::OptionalParallelFor( + ctx_->reader_properties->use_threads(), static_cast(children_.size()), + [&](int i) { return children_[i]->LoadBatch(records_to_read); }); } Status BuildArray(int64_t length_upper_bound, std::shared_ptr* out) override; @@ -825,10 +824,17 @@ Status StructReader::BuildArray(int64_t length_upper_bound, END_PARQUET_CATCH_EXCEPTIONS // Gather children arrays and def levels - for (auto& child : children_) { - std::shared_ptr field; - RETURN_NOT_OK(child->BuildArray(validity_io.values_read, &field)); - ARROW_ASSIGN_OR_RAISE(std::shared_ptr array_data, ChunksToSingle(*field)); + const int num_children = static_cast(children_.size()); + std::vector> chunked_fields(num_children); + + RETURN_NOT_OK(::arrow::internal::OptionalParallelFor( + ctx_->reader_properties->use_threads(), num_children, [&](int i) { + return children_[i]->BuildArray(validity_io.values_read, &chunked_fields[i]); + })); + children_array_data.reserve(num_children); + for (int i = 0; i < num_children; ++i) { + ARROW_ASSIGN_OR_RAISE(std::shared_ptr array_data, + ChunksToSingle(*chunked_fields[i])); children_array_data.push_back(std::move(array_data)); }