diff --git a/.github/workflows/deploy_development_cookbooks.yml b/.github/workflows/deploy_development_cookbooks.yml index a8addeb7..260ef075 100644 --- a/.github/workflows/deploy_development_cookbooks.yml +++ b/.github/workflows/deploy_development_cookbooks.yml @@ -66,6 +66,8 @@ jobs: run: echo ${CONDA_PREFIX} - name: Build cookbook + env: + - ARROW_NIGHTLY: 1 run: make cpp - name: Upload cpp book diff --git a/.github/workflows/test_arrow_nightly_cookbook.yml b/.github/workflows/test_arrow_nightly_cookbook.yml index 5421d0f0..b6b0b986 100644 --- a/.github/workflows/test_arrow_nightly_cookbook.yml +++ b/.github/workflows/test_arrow_nightly_cookbook.yml @@ -48,6 +48,8 @@ jobs: test_cpp_dev: name: "Test C++ Cookbook on Arrow Nightlies" runs-on: ubuntu-latest + env: + ARROW_NIGHTLY: 1 defaults: run: shell: bash -l {0} diff --git a/Makefile b/Makefile index 12654353..06ceff99 100644 --- a/Makefile +++ b/Makefile @@ -88,7 +88,7 @@ cpptest: @echo ">>> Running C++ Tests/Snippets <<<\n" rm -rf cpp/recipe-test-build mkdir cpp/recipe-test-build - cd cpp/recipe-test-build && cmake ../code -DCMAKE_BUILD_TYPE=Release && cmake --build . && ctest --output-on-failure -j 1 + cd cpp/recipe-test-build && cmake ../code -G Ninja -DCMAKE_BUILD_TYPE=Release && cmake --build . && ctest --output-on-failure -j 1 mkdir -p cpp/build cp cpp/recipe-test-build/recipes_out.arrow cpp/build diff --git a/cpp/CONTRIBUTING.md b/cpp/CONTRIBUTING.md index a82b37c8..2b9d8493 100644 --- a/cpp/CONTRIBUTING.md +++ b/cpp/CONTRIBUTING.md @@ -95,7 +95,7 @@ output block when the recipe is rendered into the cookbook. ## Referencing Arrow C++ Documentation The Arrow project has its own documentation for the C++ implementation that -is hosted at https://arrow.apache.org/docs/cpp/index.html. Fortunately, +is hosted at . Fortunately, this documentation is also built with Sphinx and so we can use the extension `intersphinx` to reference sections of this documentation. To do so simply write a standard Sphinx reference like so: @@ -121,6 +121,7 @@ cmake build. For example: ``` mkdir cpp/code/build cd cpp/code/build +# Optional: Run `export ARROW_NIGHTLY=1` to build Arrow from git. cmake ../code -DCMAKE_BUILD_TYPE=Debug cmake --build . ctest diff --git a/cpp/code/CMakeLists.txt b/cpp/code/CMakeLists.txt index 46a15e97..7c9890fe 100644 --- a/cpp/code/CMakeLists.txt +++ b/cpp/code/CMakeLists.txt @@ -18,23 +18,66 @@ cmake_minimum_required(VERSION 3.19) project(arrow-cookbook) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++") endif() # Add Arrow and other required packages -find_package(Arrow REQUIRED) -if(NOT ${ARROW_VERSION} VERSION_GREATER "9.0.0") - get_filename_component(ARROW_CMAKE_BASE_DIR ${Arrow_CONFIG} DIRECTORY) - list(INSERT CMAKE_MODULE_PATH 0 ${ARROW_CMAKE_BASE_DIR}) +if(DEFINED ENV{ARROW_NIGHTLY}) + set(CMAKE_BUILD_TYPE Debug) + set(ARROW_BUILD_SHARED True) + set(ARROW_DEPENDENCY_SOURCE "AUTO") + set(ARROW_ENABLE_THREADING ON) + set(ARROW_SIMD_LEVEL NONE) # macOS-specific workaround + + set(ARROW_WITH_SNAPPY ON) + + set(ARROW_ACERO ON) + set(ARROW_COMPUTE ON) + set(ARROW_DATASET ON) + set(ARROW_FILESYSTEM ON) + set(ARROW_FLIGHT ON) + set(ARROW_IPC ON) + set(ARROW_PARQUET ON) + + include(FetchContent) + + FetchContent_Declare(Arrow + GIT_REPOSITORY https://github.com/apache/arrow.git + GIT_TAG main + GIT_SHALLOW TRUE SOURCE_SUBDIR cpp + OVERRIDE_FIND_PACKAGE + ) + + FetchContent_MakeAvailable(Arrow) + + # These are some Linux-only things the FetchContent build needs in order + # to compile + file(INSTALL "${arrow_BINARY_DIR}/src/arrow/util/config.h" + DESTINATION "${arrow_SOURCE_DIR}/cpp/src/arrow/util") + file(INSTALL "${arrow_BINARY_DIR}/src/parquet/parquet_version.h" + DESTINATION "${arrow_SOURCE_DIR}/cpp/src/parquet") + target_include_directories( + arrow_shared + SYSTEM INTERFACE "$" + ) + # Force FetchContent Arrow headers to the front of every target's include + # list so they take priority over any system Arrow headers added transitively + # (e.g. /opt/homebrew/include from GTest::gtest). Without this the recipe + # executables compile against the older installed Arrow headers but link + # against the FetchContent Arrow runtime, causing ABI mismatches. + include_directories(BEFORE SYSTEM "${arrow_SOURCE_DIR}/cpp/src") + +else() + find_package(Arrow REQUIRED) + find_package(ArrowDataset REQUIRED) + find_package(ArrowFlight REQUIRED) + find_package(Parquet REQUIRED) endif() -find_package(ArrowDataset REQUIRED) -find_package(ArrowFlight REQUIRED) -find_package(Parquet REQUIRED) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set(CMAKE_CXX_CLANG_TIDY "clang-tidy") + set(CMAKE_CXX_CLANG_TIDY "clang-tidy") endif() # Create test targets @@ -44,31 +87,36 @@ find_package(GTest REQUIRED) include(GoogleTest) function(RECIPE TARGET) - add_executable( + add_executable( ${TARGET} ${TARGET}.cc common.cc main.cc ) - if(TARGET Arrow::arrow_shared) - target_link_libraries( + if(TARGET Arrow::arrow_shared) + target_link_libraries( ${TARGET} ArrowDataset::arrow_dataset_shared ArrowFlight::arrow_flight_shared GTest::gtest ) - else() - target_link_libraries(parquet_shared INTERFACE arrow_shared) - target_link_libraries(arrow_dataset_shared INTERFACE parquet_shared) - target_link_libraries(arrow_flight_shared INTERFACE arrow_shared) - target_link_libraries(${TARGET} arrow_dataset_shared arrow_flight_shared GTest::gtest) + else() + target_link_libraries(parquet_shared INTERFACE arrow_shared) + target_link_libraries(arrow_dataset_shared INTERFACE parquet_shared) + target_link_libraries(arrow_flight_shared INTERFACE arrow_shared) + target_link_libraries(${TARGET} arrow_dataset_shared arrow_flight_shared GTest::gtest) + endif() + if (MSVC) + target_compile_options(${TARGET} PRIVATE /W4 /WX) + else () + target_compile_options(${TARGET} PRIVATE -Wall -Wextra -Wpedantic -Werror) + # _Nullable/_Nonnull nullability annotations in absl macros trigger + # -Wnullability-extension under -Wpedantic; this is Clang-only. + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options(${TARGET} PRIVATE -Wno-nullability-extension) endif() - if (MSVC) - target_compile_options(${TARGET} PRIVATE /W4 /WX) - else () - target_compile_options(${TARGET} PRIVATE -Wall -Wextra -Wpedantic -Werror) - endif () + endif () - gtest_discover_tests(${TARGET}) + gtest_discover_tests(${TARGET}) endfunction() recipe(basic_arrow) @@ -76,10 +124,9 @@ recipe(creating_arrow_objects) recipe(datasets) recipe(flight) - # Add protobuf to flight -find_package(gRPC CONFIG REQUIRED) find_package(Threads) +find_package(gRPC CONFIG REQUIRED) set(PROTO_FILES protos/helloworld.proto diff --git a/cpp/code/common.h b/cpp/code/common.h index 756ca818..018ee8ad 100644 --- a/cpp/code/common.h +++ b/cpp/code/common.h @@ -18,30 +18,11 @@ #ifndef ARROW_COOKBOOK_COMMON_H #define ARROW_COOKBOOK_COMMON_H -#include -#include +#include #include #include -#define ARROW_STRINGIFY(x) #x -#define ARROW_CONCAT(x, y) x##y - -#define ARROW_ASSIGN_OR_RAISE_NAME(x, y) ARROW_CONCAT(x, y) - -#define ASSERT_OK(expr) \ - for (const ::arrow::Status _st = ::arrow::ToStatus((expr)); !_st.ok();) \ - FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _st.ToString() - -#define ASSIGN_OR_HANDLE_ERROR_IMPL(handle_error, status_name, lhs, rexpr) \ - auto&& status_name = (rexpr); \ - handle_error(status_name.status()); \ - lhs = std::move(status_name).ValueOrDie(); - -#define ASSERT_OK_AND_ASSIGN(lhs, rexpr) \ - ASSIGN_OR_HANDLE_ERROR_IMPL( \ - ASSERT_OK, ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), lhs, rexpr); - inline std::stringstream rout; void StartRecipe(const std::string& recipe_name); diff --git a/cpp/code/datasets.cc b/cpp/code/datasets.cc index 8f0ba8ba..3329fde1 100644 --- a/cpp/code/datasets.cc +++ b/cpp/code/datasets.cc @@ -87,10 +87,15 @@ class DatasetReadingTest : public ::testing::Test { fs->OpenInputFile(airquality_path)); std::unique_ptr parquet_reader = parquet::ParquetFileReader::Open(file); - ARROW_ASSIGN_OR_RAISE(auto reader, parquet::arrow::FileReader::Make( - arrow::default_memory_pool(), std::move(parquet_reader))); + ARROW_ASSIGN_OR_RAISE(auto reader, + parquet::arrow::FileReader::Make(arrow::default_memory_pool(), + std::move(parquet_reader))); std::shared_ptr table; +#if ARROW_VERSION_MAJOR >= 24 + ARROW_ASSIGN_OR_RAISE(table, reader->ReadTable()); +#else ARROW_RETURN_NOT_OK(reader->ReadTable(&table)); +#endif return table; } diff --git a/cpp/code/flight.cc b/cpp/code/flight.cc index 7cd03174..038f43e7 100644 --- a/cpp/code/flight.cc +++ b/cpp/code/flight.cc @@ -97,7 +97,11 @@ class ParquetStorageService : public arrow::flight::FlightServerBase { parquet::arrow::OpenFile(std::move(input), arrow::default_memory_pool())); std::shared_ptr table; +#if ARROW_VERSION_MAJOR >= 24 + ARROW_ASSIGN_OR_RAISE(table, reader->ReadTable()); +#else ARROW_RETURN_NOT_OK(reader->ReadTable(&table)); +#endif // Note that we can't directly pass TableBatchReader to // RecordBatchStream because TableBatchReader keeps a non-owning // reference to the underlying Table, which would then get freed @@ -148,7 +152,7 @@ class ParquetStorageService : public arrow::flight::FlightServerBase { endpoint.ticket.ticket = file_info.base_name(); arrow::flight::Location location; ARROW_ASSIGN_OR_RAISE(location, - arrow::flight::Location::ForGrpcTcp("localhost", port())); + arrow::flight::Location::ForGrpcTcp("localhost", port())); endpoint.locations.push_back(location); int64_t total_records = reader->parquet_reader()->metadata()->num_rows(); @@ -197,7 +201,7 @@ arrow::Status TestPutGetDelete() { arrow::flight::Location server_location; ARROW_ASSIGN_OR_RAISE(server_location, - arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0)); + arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0)); arrow::flight::FlightServerOptions options(server_location); auto server = std::unique_ptr( @@ -209,7 +213,7 @@ arrow::Status TestPutGetDelete() { StartRecipe("ParquetStorageService::Connect"); arrow::flight::Location location; ARROW_ASSIGN_OR_RAISE(location, - arrow::flight::Location::ForGrpcTcp("localhost", server->port())); + arrow::flight::Location::ForGrpcTcp("localhost", server->port())); std::unique_ptr client; ARROW_ASSIGN_OR_RAISE(client, arrow::flight::FlightClient::Connect(location)); @@ -315,7 +319,7 @@ arrow::Status TestClientOptions() { arrow::flight::Location server_location; ARROW_ASSIGN_OR_RAISE(server_location, - arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0)); + arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0)); arrow::flight::FlightServerOptions options(server_location); auto server = std::unique_ptr( @@ -329,12 +333,12 @@ arrow::Status TestClientOptions() { arrow::flight::Location location; ARROW_ASSIGN_OR_RAISE(location, - arrow::flight::Location::ForGrpcTcp("localhost", server->port())); + arrow::flight::Location::ForGrpcTcp("localhost", server->port())); std::unique_ptr client; // pass client_options into Connect() ARROW_ASSIGN_OR_RAISE(client, - arrow::flight::FlightClient::Connect(location, client_options)); + arrow::flight::FlightClient::Connect(location, client_options)); rout << "Connected to " << location.ToString() << std::endl; EndRecipe("TestClientOptions::Connect"); @@ -352,7 +356,7 @@ arrow::Status TestCustomGrpcImpl() { StartRecipe("CustomGrpcImpl::StartServer"); arrow::flight::Location server_location; ARROW_ASSIGN_OR_RAISE(server_location, - arrow::flight::Location::ForGrpcTcp("0.0.0.0", 5000)); + arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0)); arrow::flight::FlightServerOptions options(server_location); auto server = std::unique_ptr( @@ -372,8 +376,8 @@ arrow::Status TestCustomGrpcImpl() { EndRecipe("CustomGrpcImpl::StartServer"); StartRecipe("CustomGrpcImpl::CreateClient"); - auto client_channel = - grpc::CreateChannel("0.0.0.0:5000", grpc::InsecureChannelCredentials()); + auto client_channel = grpc::CreateChannel("0.0.0.0:" + std::to_string(server->port()), + grpc::InsecureChannelCredentials()); auto stub = HelloWorldService::NewStub(client_channel); diff --git a/cpp/code/main.cc b/cpp/code/main.cc index 3fbe3eaa..e32e9223 100644 --- a/cpp/code/main.cc +++ b/cpp/code/main.cc @@ -19,12 +19,17 @@ #include +#include #include #include "gtest/gtest.h" #include "common.h" int main(int argc, char** argv) { + if (!arrow::compute::Initialize().ok()) { + std::cerr << "Failed to initialize Arrow compute functions" << std::endl; + return -1; + } testing::InitGoogleTest(&argc, argv); int retval = RUN_ALL_TESTS(); if (retval == 0 && HasRecipeOutput()) { diff --git a/cpp/dev.yml b/cpp/dev.yml index d461f87e..90eda6eb 100644 --- a/cpp/dev.yml +++ b/cpp/dev.yml @@ -16,15 +16,22 @@ name: cookbook-cpp-dev channels: - - arrow-nightlies - conda-forge dependencies: - python=3.10 - compilers - - arrow-nightlies::libarrow + - cmake + - ninja - sphinx - gtest - gmock - - arrow-nightlies::pyarrow - clang-tools - zlib + - grpc-cpp + - protobuf + - abseil-cpp + - c-ares + - re2 + - thrift-cpp + - rapidjson + - snappy