Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/deploy_development_cookbooks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ jobs:
run:
echo ${CONDA_PREFIX}
- name: Build cookbook
env:
- ARROW_NIGHTLY: 1
run:
make cpp
- name: Upload cpp book
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/test_arrow_nightly_cookbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ jobs:
test_cpp_dev:
name: "Test C++ Cookbook on Arrow Nightlies"
runs-on: ubuntu-latest
env:
ARROW_NIGHTLY: 1
defaults:
run:
shell: bash -l {0}
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ cpptest:
@echo ">>> Running C++ Tests/Snippets <<<\n"
rm -rf cpp/recipe-test-build
mkdir cpp/recipe-test-build
cd cpp/recipe-test-build && cmake ../code -DCMAKE_BUILD_TYPE=Release && cmake --build . && ctest --output-on-failure -j 1
cd cpp/recipe-test-build && cmake ../code -G Ninja -DCMAKE_BUILD_TYPE=Release && cmake --build . && ctest --output-on-failure -j 1
mkdir -p cpp/build
cp cpp/recipe-test-build/recipes_out.arrow cpp/build

Expand Down
3 changes: 2 additions & 1 deletion cpp/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ output block when the recipe is rendered into the cookbook.
## Referencing Arrow C++ Documentation

The Arrow project has its own documentation for the C++ implementation that
is hosted at https://arrow.apache.org/docs/cpp/index.html. Fortunately,
is hosted at <https://arrow.apache.org/docs/cpp/index.html>. Fortunately,
this documentation is also built with Sphinx and so we can use the extension
`intersphinx` to reference sections of this documentation. To do so simply
write a standard Sphinx reference like so:
Expand All @@ -121,6 +121,7 @@ cmake build. For example:
```
mkdir cpp/code/build
cd cpp/code/build
# Optional: Run `export ARROW_NIGHTLY=1` to build Arrow from git.
cmake ../code -DCMAKE_BUILD_TYPE=Debug
cmake --build .
ctest
Expand Down
99 changes: 73 additions & 26 deletions cpp/code/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,66 @@
cmake_minimum_required(VERSION 3.19)
project(arrow-cookbook)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD 20)
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++")
endif()

# Add Arrow and other required packages
find_package(Arrow REQUIRED)
if(NOT ${ARROW_VERSION} VERSION_GREATER "9.0.0")
get_filename_component(ARROW_CMAKE_BASE_DIR ${Arrow_CONFIG} DIRECTORY)
list(INSERT CMAKE_MODULE_PATH 0 ${ARROW_CMAKE_BASE_DIR})
if(DEFINED ENV{ARROW_NIGHTLY})
set(CMAKE_BUILD_TYPE Debug)
set(ARROW_BUILD_SHARED True)
set(ARROW_DEPENDENCY_SOURCE "AUTO")
set(ARROW_ENABLE_THREADING ON)
set(ARROW_SIMD_LEVEL NONE) # macOS-specific workaround

set(ARROW_WITH_SNAPPY ON)

set(ARROW_ACERO ON)
set(ARROW_COMPUTE ON)
set(ARROW_DATASET ON)
set(ARROW_FILESYSTEM ON)
set(ARROW_FLIGHT ON)
set(ARROW_IPC ON)
set(ARROW_PARQUET ON)

include(FetchContent)

FetchContent_Declare(Arrow
GIT_REPOSITORY https://github.com/apache/arrow.git
GIT_TAG main
GIT_SHALLOW TRUE SOURCE_SUBDIR cpp
OVERRIDE_FIND_PACKAGE
)

FetchContent_MakeAvailable(Arrow)

# These are some Linux-only things the FetchContent build needs in order
# to compile
file(INSTALL "${arrow_BINARY_DIR}/src/arrow/util/config.h"
DESTINATION "${arrow_SOURCE_DIR}/cpp/src/arrow/util")
file(INSTALL "${arrow_BINARY_DIR}/src/parquet/parquet_version.h"
DESTINATION "${arrow_SOURCE_DIR}/cpp/src/parquet")
target_include_directories(
arrow_shared
SYSTEM INTERFACE "$<BUILD_INTERFACE:${arrow_SOURCE_DIR}/cpp/src>"
)
# Force FetchContent Arrow headers to the front of every target's include
# list so they take priority over any system Arrow headers added transitively
# (e.g. /opt/homebrew/include from GTest::gtest). Without this the recipe
# executables compile against the older installed Arrow headers but link
# against the FetchContent Arrow runtime, causing ABI mismatches.
include_directories(BEFORE SYSTEM "${arrow_SOURCE_DIR}/cpp/src")

else()
find_package(Arrow REQUIRED)
find_package(ArrowDataset REQUIRED)
find_package(ArrowFlight REQUIRED)
find_package(Parquet REQUIRED)
endif()
find_package(ArrowDataset REQUIRED)
find_package(ArrowFlight REQUIRED)
find_package(Parquet REQUIRED)

if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_CXX_CLANG_TIDY "clang-tidy")
set(CMAKE_CXX_CLANG_TIDY "clang-tidy")
endif()

# Create test targets
Expand All @@ -44,42 +87,46 @@ find_package(GTest REQUIRED)
include(GoogleTest)

function(RECIPE TARGET)
add_executable(
add_executable(
${TARGET}
${TARGET}.cc
common.cc
main.cc
)
if(TARGET Arrow::arrow_shared)
target_link_libraries(
if(TARGET Arrow::arrow_shared)
target_link_libraries(
${TARGET}
ArrowDataset::arrow_dataset_shared
ArrowFlight::arrow_flight_shared GTest::gtest
)
else()
target_link_libraries(parquet_shared INTERFACE arrow_shared)
target_link_libraries(arrow_dataset_shared INTERFACE parquet_shared)
target_link_libraries(arrow_flight_shared INTERFACE arrow_shared)
target_link_libraries(${TARGET} arrow_dataset_shared arrow_flight_shared GTest::gtest)
else()
target_link_libraries(parquet_shared INTERFACE arrow_shared)
target_link_libraries(arrow_dataset_shared INTERFACE parquet_shared)
target_link_libraries(arrow_flight_shared INTERFACE arrow_shared)
target_link_libraries(${TARGET} arrow_dataset_shared arrow_flight_shared GTest::gtest)
endif()
if (MSVC)
target_compile_options(${TARGET} PRIVATE /W4 /WX)
else ()
target_compile_options(${TARGET} PRIVATE -Wall -Wextra -Wpedantic -Werror)
# _Nullable/_Nonnull nullability annotations in absl macros trigger
# -Wnullability-extension under -Wpedantic; this is Clang-only.
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
target_compile_options(${TARGET} PRIVATE -Wno-nullability-extension)
endif()
if (MSVC)
target_compile_options(${TARGET} PRIVATE /W4 /WX)
else ()
target_compile_options(${TARGET} PRIVATE -Wall -Wextra -Wpedantic -Werror)
endif ()
endif ()

gtest_discover_tests(${TARGET})
gtest_discover_tests(${TARGET})
endfunction()

recipe(basic_arrow)
recipe(creating_arrow_objects)
recipe(datasets)
recipe(flight)


# Add protobuf to flight
find_package(gRPC CONFIG REQUIRED)
find_package(Threads)
find_package(gRPC CONFIG REQUIRED)

set(PROTO_FILES
protos/helloworld.proto
Expand Down
21 changes: 1 addition & 20 deletions cpp/code/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,11 @@
#ifndef ARROW_COOKBOOK_COMMON_H
#define ARROW_COOKBOOK_COMMON_H

#include <arrow/result.h>
#include <arrow/status.h>
#include <arrow/testing/gtest_util.h>

#include <sstream>
#include <string>

#define ARROW_STRINGIFY(x) #x
#define ARROW_CONCAT(x, y) x##y

#define ARROW_ASSIGN_OR_RAISE_NAME(x, y) ARROW_CONCAT(x, y)

#define ASSERT_OK(expr) \
for (const ::arrow::Status _st = ::arrow::ToStatus((expr)); !_st.ok();) \
FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _st.ToString()

#define ASSIGN_OR_HANDLE_ERROR_IMPL(handle_error, status_name, lhs, rexpr) \
auto&& status_name = (rexpr); \
handle_error(status_name.status()); \
lhs = std::move(status_name).ValueOrDie();

#define ASSERT_OK_AND_ASSIGN(lhs, rexpr) \
ASSIGN_OR_HANDLE_ERROR_IMPL( \
ASSERT_OK, ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), lhs, rexpr);

inline std::stringstream rout;

void StartRecipe(const std::string& recipe_name);
Expand Down
9 changes: 7 additions & 2 deletions cpp/code/datasets.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,15 @@ class DatasetReadingTest : public ::testing::Test {
fs->OpenInputFile(airquality_path));
std::unique_ptr<parquet::ParquetFileReader> parquet_reader =
parquet::ParquetFileReader::Open(file);
ARROW_ASSIGN_OR_RAISE(auto reader, parquet::arrow::FileReader::Make(
arrow::default_memory_pool(), std::move(parquet_reader)));
ARROW_ASSIGN_OR_RAISE(auto reader,
parquet::arrow::FileReader::Make(arrow::default_memory_pool(),
std::move(parquet_reader)));
std::shared_ptr<arrow::Table> table;
#if ARROW_VERSION_MAJOR >= 24
ARROW_ASSIGN_OR_RAISE(table, reader->ReadTable());
#else
ARROW_RETURN_NOT_OK(reader->ReadTable(&table));
#endif
return table;
}

Expand Down
22 changes: 13 additions & 9 deletions cpp/code/flight.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,11 @@ class ParquetStorageService : public arrow::flight::FlightServerBase {
parquet::arrow::OpenFile(std::move(input), arrow::default_memory_pool()));

std::shared_ptr<arrow::Table> table;
#if ARROW_VERSION_MAJOR >= 24
ARROW_ASSIGN_OR_RAISE(table, reader->ReadTable());
#else
ARROW_RETURN_NOT_OK(reader->ReadTable(&table));
#endif
// Note that we can't directly pass TableBatchReader to
// RecordBatchStream because TableBatchReader keeps a non-owning
// reference to the underlying Table, which would then get freed
Expand Down Expand Up @@ -148,7 +152,7 @@ class ParquetStorageService : public arrow::flight::FlightServerBase {
endpoint.ticket.ticket = file_info.base_name();
arrow::flight::Location location;
ARROW_ASSIGN_OR_RAISE(location,
arrow::flight::Location::ForGrpcTcp("localhost", port()));
arrow::flight::Location::ForGrpcTcp("localhost", port()));
endpoint.locations.push_back(location);

int64_t total_records = reader->parquet_reader()->metadata()->num_rows();
Expand Down Expand Up @@ -197,7 +201,7 @@ arrow::Status TestPutGetDelete() {

arrow::flight::Location server_location;
ARROW_ASSIGN_OR_RAISE(server_location,
arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0));
arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0));

arrow::flight::FlightServerOptions options(server_location);
auto server = std::unique_ptr<arrow::flight::FlightServerBase>(
Expand All @@ -209,7 +213,7 @@ arrow::Status TestPutGetDelete() {
StartRecipe("ParquetStorageService::Connect");
arrow::flight::Location location;
ARROW_ASSIGN_OR_RAISE(location,
arrow::flight::Location::ForGrpcTcp("localhost", server->port()));
arrow::flight::Location::ForGrpcTcp("localhost", server->port()));

std::unique_ptr<arrow::flight::FlightClient> client;
ARROW_ASSIGN_OR_RAISE(client, arrow::flight::FlightClient::Connect(location));
Expand Down Expand Up @@ -315,7 +319,7 @@ arrow::Status TestClientOptions() {

arrow::flight::Location server_location;
ARROW_ASSIGN_OR_RAISE(server_location,
arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0));
arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0));

arrow::flight::FlightServerOptions options(server_location);
auto server = std::unique_ptr<arrow::flight::FlightServerBase>(
Expand All @@ -329,12 +333,12 @@ arrow::Status TestClientOptions() {

arrow::flight::Location location;
ARROW_ASSIGN_OR_RAISE(location,
arrow::flight::Location::ForGrpcTcp("localhost", server->port()));
arrow::flight::Location::ForGrpcTcp("localhost", server->port()));

std::unique_ptr<arrow::flight::FlightClient> client;
// pass client_options into Connect()
ARROW_ASSIGN_OR_RAISE(client,
arrow::flight::FlightClient::Connect(location, client_options));
arrow::flight::FlightClient::Connect(location, client_options));
rout << "Connected to " << location.ToString() << std::endl;
EndRecipe("TestClientOptions::Connect");

Expand All @@ -352,7 +356,7 @@ arrow::Status TestCustomGrpcImpl() {
StartRecipe("CustomGrpcImpl::StartServer");
arrow::flight::Location server_location;
ARROW_ASSIGN_OR_RAISE(server_location,
arrow::flight::Location::ForGrpcTcp("0.0.0.0", 5000));
arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0));

arrow::flight::FlightServerOptions options(server_location);
auto server = std::unique_ptr<arrow::flight::FlightServerBase>(
Expand All @@ -372,8 +376,8 @@ arrow::Status TestCustomGrpcImpl() {
EndRecipe("CustomGrpcImpl::StartServer");

StartRecipe("CustomGrpcImpl::CreateClient");
auto client_channel =
grpc::CreateChannel("0.0.0.0:5000", grpc::InsecureChannelCredentials());
auto client_channel = grpc::CreateChannel("0.0.0.0:" + std::to_string(server->port()),
grpc::InsecureChannelCredentials());

auto stub = HelloWorldService::NewStub(client_channel);

Expand Down
5 changes: 5 additions & 0 deletions cpp/code/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@

#include <filesystem>

#include <arrow/compute/api.h>
#include <arrow/status.h>
#include "gtest/gtest.h"

#include "common.h"

int main(int argc, char** argv) {
if (!arrow::compute::Initialize().ok()) {
std::cerr << "Failed to initialize Arrow compute functions" << std::endl;
return -1;
}
testing::InitGoogleTest(&argc, argv);
int retval = RUN_ALL_TESTS();
if (retval == 0 && HasRecipeOutput()) {
Expand Down
13 changes: 10 additions & 3 deletions cpp/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,22 @@

name: cookbook-cpp-dev
channels:
- arrow-nightlies
- conda-forge
dependencies:
- python=3.10
- compilers
- arrow-nightlies::libarrow
- cmake
- ninja
- sphinx
- gtest
- gmock
- arrow-nightlies::pyarrow
- clang-tools
- zlib
- grpc-cpp
- protobuf
- abseil-cpp
- c-ares
- re2
- thrift-cpp
- rapidjson
- snappy
Loading