From 216c6f42592438517352661da7b223891324ffcb Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Fri, 28 Feb 2025 08:31:00 +0000 Subject: [PATCH 01/21] Separate windows and ubuntu --- .github/workflows/build_test.yml | 38 +++++++++++++++++++++---- scripts/find_vs.ps1 | 49 ++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 6 deletions(-) create mode 100644 scripts/find_vs.ps1 diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index e69559ea..6fb9b307 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -27,23 +27,49 @@ jobs: run: git diff --exit-code - name: Checking code style run: ./scripts/run-clang-tidy.sh - build: + + ubuntu-build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 with: submodules: recursive + - name: Setup Ubuntu + run: ./scripts/setup-ubuntu.sh + + - name: Build run: | - ./scripts/setup-ubuntu.sh mkdir build - - name: Run cmake - run: | cmake --version cmake -Bbuild -GNinja -DCMAKE_BUILD_TYPE=Debug -DBUILD_TZ_LIB=ON - - name: Build - run: ninja -C build + ninja -C build + - name: Test run: ctest --test-dir build --output-on-failure --timeout 30 + windows-build: + runs-on: windows-latest + + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Set up JDK 11 + uses: actions/setup-java@v3 + with: + distribution: 'temurin' + java-version: '11' + + - name: Build + run: | + ./scripts/find_vs.ps1 + mkdir build + cmake --version + cmake -Bbuild -GNinja -DCMAKE_BUILD_TYPE=Debug -DBUILD_TZ_LIB=ON + ninja -C build + + - name: Test + run: ctest --test-dir build --output-on-failure --timeout 30 \ No newline at end of file diff --git a/scripts/find_vs.ps1 b/scripts/find_vs.ps1 new file mode 100644 index 00000000..0751bf12 --- /dev/null +++ b/scripts/find_vs.ps1 @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: Apache-2.0 +# Find and enter a Visual Studio development environment. +# Required to use Ninja instead of msbuild on our build agents. +function Enter-VsDevEnv { + [CmdletBinding()] + param( + [Parameter()] + [switch]$Prerelease, + [Parameter()] + [string]$architecture = "x64" + ) + + $ErrorActionPreference = 'Stop' + + if ($null -eq (Get-InstalledModule -name 'VSSetup' -ErrorAction SilentlyContinue)) { + Install-Module -Name 'VSSetup' -Scope CurrentUser -SkipPublisherCheck -Force + } + Import-Module -Name 'VSSetup' + + Write-Verbose 'Searching for VC++ instances' + $vsinfo = ` + Get-VSSetupInstance -All -Prerelease:$Prerelease ` + | Select-VSSetupInstance ` + -Latest -Product * ` + -Require 'Microsoft.VisualStudio.Component.VC.Tools.x86.x64' + + $vspath = $vsinfo.InstallationPath + + switch ($env:PROCESSOR_ARCHITECTURE) { + "amd64" { $hostarch = "x64" } + "x86" { $hostarch = "x86" } + "arm64" { $hostarch = "arm64" } + default { throw "Unknown architecture: $switch" } + } + + $devShellModule = "$vspath\Common7\Tools\Microsoft.VisualStudio.DevShell.dll" + + Import-Module -Global -Name $devShellModule + + Write-Verbose 'Setting up environment variables' + Enter-VsDevShell -VsInstanceId $vsinfo.InstanceId -SkipAutomaticLocation ` + -devCmdArguments "-arch=$architecture -host_arch=$hostarch" + + Set-Item -Force -path "Env:\Platform" -Value $architecture + + remove-Module Microsoft.VisualStudio.DevShell, VSSetup +} + +Enter-VsDevEnv From f3f6de194815eec4476d0f4ac8ca438146455191 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Fri, 28 Feb 2025 09:06:37 +0000 Subject: [PATCH 02/21] add GTest fix --- CMakeLists.txt | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fae3ba9..29ea6442 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,29 @@ file(DOWNLOAD https://www.antlr.org/download/antlr-4.13.2-complete.jar "${ANTLR_EXECUTABLE_DIR}/antlr.jar") set(ANTLR_EXECUTABLE "${ANTLR_EXECUTABLE_DIR}/antlr.jar") +if(MSVC) + # ------------------------------------------------------------------------------ + # googlegtest include fix + # ------------------------------------------------------------------------------ + # For unknown reasons, the googletest dependency within ANTLR4 (antlr4->googletest) + # applies incorrect assumptions about include path inside the gmock project. + # This only happens when building with MSVC. + # We can hot-fix this by injecting correct paths here. + function(fix_gtest_include TARGET) + target_include_directories(${TARGET} PUBLIC + $ + $ + $ + $ + $ + ) + endfunction() + set(gtest_erroneous_targets gmock gmock_main) + foreach(target ${gtest_erroneous_targets}) + fix_gtest_include(${target}) + endforeach() +endif() + # Local files come first. include_directories(include) include_directories(src) From 8bb9c889ddc6cf5f0881486c5e8ccfdbbec0507d Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Fri, 28 Feb 2025 09:10:10 +0000 Subject: [PATCH 03/21] cmake format? --- CMakeLists.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 29ea6442..cd5cde6e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,17 +60,17 @@ if(MSVC) # This only happens when building with MSVC. # We can hot-fix this by injecting correct paths here. function(fix_gtest_include TARGET) - target_include_directories(${TARGET} PUBLIC - $ - $ - $ - $ - $ - ) + target_include_directories( + ${TARGET} + PUBLIC $ + $ + $ + $ + $) endfunction() set(gtest_erroneous_targets gmock gmock_main) foreach(target ${gtest_erroneous_targets}) - fix_gtest_include(${target}) + fix_gtest_include(${target}) endforeach() endif() From b4b4b901c9b83d9b26c1b6a15d6319cfa6cc9378 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Fri, 28 Feb 2025 09:16:15 +0000 Subject: [PATCH 04/21] disable antlr tests --- CMakeLists.txt | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cd5cde6e..2e1d4b86 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ set(CMAKE_CXX_STANDARD 17) add_definitions(-DANTLR4CPP_STATIC) # using /MD flag for antlr4_runtime (for Visual C++ compilers only) set(ANTLR4_WITH_STATIC_CRT OFF) +set(ANTLR4_BUILD_CPP_TESTS OFF) set(ANTLR4_TAG 4.13.2) set(ANTLR4_ZIP_REPOSITORY https://github.com/antlr/antlr4/archive/refs/tags/${ANTLR4_TAG}.zip) @@ -51,29 +52,6 @@ file(DOWNLOAD https://www.antlr.org/download/antlr-4.13.2-complete.jar "${ANTLR_EXECUTABLE_DIR}/antlr.jar") set(ANTLR_EXECUTABLE "${ANTLR_EXECUTABLE_DIR}/antlr.jar") -if(MSVC) - # ------------------------------------------------------------------------------ - # googlegtest include fix - # ------------------------------------------------------------------------------ - # For unknown reasons, the googletest dependency within ANTLR4 (antlr4->googletest) - # applies incorrect assumptions about include path inside the gmock project. - # This only happens when building with MSVC. - # We can hot-fix this by injecting correct paths here. - function(fix_gtest_include TARGET) - target_include_directories( - ${TARGET} - PUBLIC $ - $ - $ - $ - $) - endfunction() - set(gtest_erroneous_targets gmock gmock_main) - foreach(target ${gtest_erroneous_targets}) - fix_gtest_include(${target}) - endforeach() -endif() - # Local files come first. include_directories(include) include_directories(src) From 8535d29f214b45410f7eca5501b71111d739dc2b Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Fri, 28 Feb 2025 09:19:51 +0000 Subject: [PATCH 05/21] msvc fix --- third_party/CMakeLists.txt | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index f3247590..886a3531 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -31,6 +31,28 @@ if(NOT ${GTEST_FOUND}) OVERRIDE_FIND_PACKAGE) fetchcontent_makeavailable(GTest) endif() +if(MSVC) + # ------------------------------------------------------------------------------ + # gtest MSVC fix + # ------------------------------------------------------------------------------ + # For some reason, googletest has include path issues when built with MSVC. + # Specifically, this seems like some incorrect assumptions about include paths + # inside the gmock project. + # We can fix this by injecting the include paths here. + function(fix_gtest_include TARGET) + target_include_directories( + ${TARGET} + PUBLIC $ + $ + $ + $ + $) + endfunction() + set(gtest_erroneous_targets gmock gmock_main) + foreach(target ${gtest_erroneous_targets}) + fix_gtest_include(${target}) + endforeach() +endif() set(PREVIOUS_BUILD_TESTING ${BUILD_TESTING}) set(BUILD_TESTING OFF) From 837ca3a19725eed459cc508cc184a7516f7a5ec4 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Fri, 28 Feb 2025 09:31:48 +0000 Subject: [PATCH 06/21] formatting --- third_party/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index 886a3531..6e1f74ea 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -43,10 +43,10 @@ if(MSVC) target_include_directories( ${TARGET} PUBLIC $ - $ - $ - $ - $) + $ + $ + $ + $) endfunction() set(gtest_erroneous_targets gmock gmock_main) foreach(target ${gtest_erroneous_targets}) From 6186597afa747a57d4c66777057b86f825cad4ec Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Fri, 28 Feb 2025 09:47:11 +0000 Subject: [PATCH 07/21] antlr inherit cxx standard --- third_party/antlr4/cmake/ExternalAntlr4Cpp.cmake | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/third_party/antlr4/cmake/ExternalAntlr4Cpp.cmake b/third_party/antlr4/cmake/ExternalAntlr4Cpp.cmake index 19ac84ce..d5724b32 100644 --- a/third_party/antlr4/cmake/ExternalAntlr4Cpp.cmake +++ b/third_party/antlr4/cmake/ExternalAntlr4Cpp.cmake @@ -100,8 +100,7 @@ if(ANTLR4_ZIP_REPOSITORY) -DDISABLE_WARNINGS:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - # -DCMAKE_CXX_STANDARD:STRING=17 # if desired, compile the runtime with a different C++ standard - # -DCMAKE_CXX_STANDARD:STRING=${CMAKE_CXX_STANDARD} # alternatively, compile the runtime with the same C++ standard as the outer project + -DCMAKE_CXX_STANDARD:STRING=${CMAKE_CXX_STANDARD} INSTALL_COMMAND "" EXCLUDE_FROM_ALL 1) else() @@ -122,8 +121,7 @@ else() -DDISABLE_WARNINGS:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - # -DCMAKE_CXX_STANDARD:STRING=17 # if desired, compile the runtime with a different C++ standard - # -DCMAKE_CXX_STANDARD:STRING=${CMAKE_CXX_STANDARD} # alternatively, compile the runtime with the same C++ standard as the outer project + -DCMAKE_CXX_STANDARD:STRING=${CMAKE_CXX_STANDARD} INSTALL_COMMAND "" EXCLUDE_FROM_ALL 1) endif() From 277fceb9a08994949fa139f212a2e31a688023d1 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 08:37:19 +0000 Subject: [PATCH 08/21] Use most recent antlr commit --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2e1d4b86..70e89002 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,9 +42,9 @@ add_definitions(-DANTLR4CPP_STATIC) # using /MD flag for antlr4_runtime (for Visual C++ compilers only) set(ANTLR4_WITH_STATIC_CRT OFF) set(ANTLR4_BUILD_CPP_TESTS OFF) -set(ANTLR4_TAG 4.13.2) -set(ANTLR4_ZIP_REPOSITORY - https://github.com/antlr/antlr4/archive/refs/tags/${ANTLR4_TAG}.zip) +# Note: df4d68c adds a fix for MSVC compilers. No release has been made since; +# latest release was 4.13.2. Revert back to a tag once 4.13.3 is released. +set(ANTLR4_TAG df4d68c09cdef73e023b8838a8bc7ca4dff1d1de) include(ExternalAntlr4Cpp) include_directories(${ANTLR4_INCLUDE_DIRS}) set(ANTLR_EXECUTABLE_DIR ${CMAKE_CURRENT_BINARY_DIR}) From 0c5710e3ef51e68b6eb423d61404e92640b860bf Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 08:49:23 +0000 Subject: [PATCH 09/21] Bump protobuf version --- third_party/protobuf.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/protobuf.cmake b/third_party/protobuf.cmake index 4cf786f3..b6199286 100644 --- a/third_party/protobuf.cmake +++ b/third_party/protobuf.cmake @@ -12,7 +12,7 @@ FetchContent_Declare(GTest ) FetchContent_Declare(Protobuf GIT_REPOSITORY https://github.com/protocolbuffers/protobuf.git - GIT_TAG v28.2 + GIT_TAG v29.3 SYSTEM OVERRIDE_FIND_PACKAGE ) From 040011b80fa2baf51e9836995ef0a7d4a1df77d0 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 09:04:19 +0000 Subject: [PATCH 10/21] flag fixes --- third_party/protobuf.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/third_party/protobuf.cmake b/third_party/protobuf.cmake index b6199286..a8b6ff7d 100644 --- a/third_party/protobuf.cmake +++ b/third_party/protobuf.cmake @@ -20,6 +20,8 @@ FetchContent_Declare(Protobuf # Disable warnings for dependency targets. set(protobuf_BUILD_TESTS OFF CACHE INTERNAL "") if(MSVC) + set(protobuf_MSVC_STATIC_RUNTIME OFF) + set(gtest_force_shared_crt ON) add_compile_options("/W0") else() add_compile_options("-w") From 1939bfbac5e14edc2d7364c094b7efbabdf6ce32 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 12:02:14 +0100 Subject: [PATCH 11/21] I/O fixes Windows needs to a-priori know how to open a file (binary or text). If not, text-mode is assumed, which then will translate line endings `\n->\r\n`. This, in turn, will break the protobuf loader. This in turn means that windows can't rely on `loadPlan`s format detection, in case the underlying file is a binary file. This change adds adds a `forceBinary` flag to `loadPlan`, which must be used on Windows when providing binary files. `loadPlan` is used a few different places, and this doesn't pipe that argument in everywhere. Fixes have been added to an extent s.t., tests pass. --- include/substrait/common/Io.h | 5 ++++- src/substrait/common/Io.cpp | 6 ++++-- src/substrait/common/tests/IoTest.cpp | 11 ++++++++++- .../textplan/converter/LoadBinary.cpp | 15 +++++++++++---- src/substrait/textplan/converter/LoadBinary.h | 7 +++++-- .../textplan/converter/SaveBinary.cpp | 18 ++++++------------ 6 files changed, 40 insertions(+), 22 deletions(-) diff --git a/include/substrait/common/Io.h b/include/substrait/common/Io.h index a53697d8..2227fc09 100644 --- a/include/substrait/common/Io.h +++ b/include/substrait/common/Io.h @@ -30,12 +30,15 @@ enum class PlanFileFormat { * amount of memory that it consumed on disk. * * \param input_filename The filename containing the plan to convert. + * \param force_binary If true, the plan will be opened as a binary file. + * Required on Windows to avoid text mode line-ending translation. * \return If loading was successful, returns a plan. If loading was not * successful this is a status containing a list of parse errors in the status's * message. */ absl::StatusOr<::substrait::proto::Plan> loadPlan( - std::string_view input_filename); + std::string_view input_filename, + bool force_binary = false); /* * \brief Writes the provided plan to disk. diff --git a/src/substrait/common/Io.cpp b/src/substrait/common/Io.cpp index a90ff853..fde3422f 100644 --- a/src/substrait/common/Io.cpp +++ b/src/substrait/common/Io.cpp @@ -37,8 +37,10 @@ PlanFileFormat detectFormat(std::string_view content) { } // namespace absl::StatusOr<::substrait::proto::Plan> loadPlan( - std::string_view input_filename) { - auto contentOrError = textplan::readFromFile(input_filename.data()); + std::string_view input_filename, + bool forceBinary) { + auto contentOrError = + textplan::readFromFile(input_filename.data(), forceBinary); if (!contentOrError.ok()) { return contentOrError.status(); } diff --git a/src/substrait/common/tests/IoTest.cpp b/src/substrait/common/tests/IoTest.cpp index e97ea063..7d0b0bea 100644 --- a/src/substrait/common/tests/IoTest.cpp +++ b/src/substrait/common/tests/IoTest.cpp @@ -52,7 +52,8 @@ class SaveAndLoadTestFixture : public ::testing::TestWithParam { std::filesystem::path("my_temp_dir")) .string(); - if (!std::filesystem::create_directory(testFileDirectory_)) { + std::filesystem::create_directory(testFileDirectory_); + if (!std::filesystem::exists(testFileDirectory_)) { ASSERT_TRUE(false) << "Failed to create temporary directory."; testFileDirectory_.clear(); } @@ -87,7 +88,15 @@ TEST_P(SaveAndLoadTestFixture, SaveAndLoad) { auto status = ::io::substrait::savePlan(plan, tempFilename, encoding); ASSERT_TRUE(status.ok()) << "Save failed.\n" << status; +#ifdef _WIN32 + // Windows cannot rely on io::substrait::loadPlan to detect the file format, + // since it needs to a-priori specify how the file should be loaded. + bool forceBinary = encoding == PlanFileFormat::kBinary; + auto result = ::io::substrait::loadPlan(tempFilename, forceBinary); +#else auto result = ::io::substrait::loadPlan(tempFilename); +#endif + ASSERT_TRUE(result.ok()) << "Load failed.\n" << result.status(); ASSERT_THAT( *result, diff --git a/src/substrait/textplan/converter/LoadBinary.cpp b/src/substrait/textplan/converter/LoadBinary.cpp index 6691b507..c464587b 100644 --- a/src/substrait/textplan/converter/LoadBinary.cpp +++ b/src/substrait/textplan/converter/LoadBinary.cpp @@ -42,9 +42,16 @@ class StringErrorCollector : public google::protobuf::io::ErrorCollector { } // namespace -absl::StatusOr readFromFile(std::string_view msgPath) { - std::ifstream textFile(std::string{msgPath}); - if (textFile.fail()) { +absl::StatusOr readFromFile( + std::string_view msgPath, + bool forceBinary) { + std::ifstream file; + if (forceBinary) + file.open(std::string{msgPath}, std::ios::binary); + else + file.open(std::string{msgPath}, std::ios::in); + + if (file.fail()) { auto currDir = std::filesystem::current_path().string(); return absl::ErrnoToStatus( errno, @@ -52,7 +59,7 @@ absl::StatusOr readFromFile(std::string_view msgPath) { "Failed to open file {} when running in {}", msgPath, currDir)); } std::stringstream buffer; - buffer << textFile.rdbuf(); + buffer << file.rdbuf(); return buffer.str(); } diff --git a/src/substrait/textplan/converter/LoadBinary.h b/src/substrait/textplan/converter/LoadBinary.h index a4d4e38f..8bad9166 100644 --- a/src/substrait/textplan/converter/LoadBinary.h +++ b/src/substrait/textplan/converter/LoadBinary.h @@ -12,8 +12,11 @@ class Plan; namespace io::substrait::textplan { -// Read the contents of a file from disk. -absl::StatusOr readFromFile(std::string_view msgPath); +// Read the contents of a file from disk. 'forceBinary' enables file reading in +// binary mode. +absl::StatusOr readFromFile( + std::string_view msgPath, + bool forceBinary = false); // Reads a plan from a json-encoded text proto. // Returns a list of errors if the file cannot be parsed. diff --git a/src/substrait/textplan/converter/SaveBinary.cpp b/src/substrait/textplan/converter/SaveBinary.cpp index 165ef534..f5603e9f 100644 --- a/src/substrait/textplan/converter/SaveBinary.cpp +++ b/src/substrait/textplan/converter/SaveBinary.cpp @@ -27,24 +27,18 @@ namespace io::substrait::textplan { absl::Status savePlanToBinary( const ::substrait::proto::Plan& plan, std::string_view output_filename) { - int outputFileDescriptor = - creat(std::string{output_filename}.c_str(), S_IREAD | S_IWRITE); - if (outputFileDescriptor == -1) { - return absl::ErrnoToStatus( - errno, + // Open file in binary mode and get its file descriptor + std::ofstream of(std::string{output_filename}, std::ios::binary); + if (!of) { + return absl::InternalError( fmt::format("Failed to open file {} for writing", output_filename)); } - auto stream = - new google::protobuf::io::FileOutputStream(outputFileDescriptor); - if (!plan.SerializeToZeroCopyStream(stream)) { + if (!plan.SerializeToOstream(&of)) { return ::absl::UnknownError("Failed to write plan to stream."); } - if (!stream->Close()) { - return absl::AbortedError("Failed to close file descriptor."); - } - delete stream; + of.close(); return absl::OkStatus(); } From af8a2289f02bcb10926964a435595e5c73c5eccf Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 12:06:28 +0100 Subject: [PATCH 12/21] Fix incorrect set_difference usage --- .../textplan/tests/ParseResultMatchers.cpp | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/substrait/textplan/tests/ParseResultMatchers.cpp b/src/substrait/textplan/tests/ParseResultMatchers.cpp index 5801ae2b..7fe12bfd 100644 --- a/src/substrait/textplan/tests/ParseResultMatchers.cpp +++ b/src/substrait/textplan/tests/ParseResultMatchers.cpp @@ -149,16 +149,20 @@ class HasSymbolsMatcher { bool MatchAndExplain(const ParseResult& result, std::ostream* listener) const { - auto actualSymbols = symbolNames(result.getSymbolTable().getSymbols()); + // Note: Need set or sorted vector for set_difference. + auto actualSymbolsSorted = + symbolNames(result.getSymbolTable().getSymbols()); + std::sort(actualSymbolsSorted.begin(), actualSymbolsSorted.end()); + std::vector extraSymbols; + auto expectedSymbolsSorted = expectedSymbols_; + std::sort(expectedSymbolsSorted.begin(), expectedSymbolsSorted.end()); if (listener != nullptr) { - std::vector extraSymbols(actualSymbols.size()); auto end = std::set_difference( - actualSymbols.begin(), - actualSymbols.end(), - expectedSymbols_.begin(), - expectedSymbols_.end(), - extraSymbols.begin()); - extraSymbols.resize(end - extraSymbols.begin()); + actualSymbolsSorted.begin(), + actualSymbolsSorted.end(), + expectedSymbolsSorted.begin(), + expectedSymbolsSorted.end(), + std::back_inserter(extraSymbols)); if (!extraSymbols.empty()) { *listener << std::endl << " with extra symbols: "; for (const auto& symbol : extraSymbols) { @@ -166,14 +170,13 @@ class HasSymbolsMatcher { } } - std::vector missingSymbols(expectedSymbols_.size()); + std::vector missingSymbols; end = std::set_difference( - expectedSymbols_.begin(), - expectedSymbols_.end(), - actualSymbols.begin(), - actualSymbols.end(), - missingSymbols.begin()); - missingSymbols.resize(end - missingSymbols.begin()); + expectedSymbolsSorted.begin(), + expectedSymbolsSorted.end(), + actualSymbolsSorted.begin(), + actualSymbolsSorted.end(), + std::back_inserter(missingSymbols)); if (!missingSymbols.empty()) { if (!extraSymbols.empty()) { *listener << ", and missing symbols: "; @@ -185,7 +188,7 @@ class HasSymbolsMatcher { } } } - return actualSymbols == expectedSymbols_; + return actualSymbolsSorted == expectedSymbolsSorted; } void DescribeTo(std::ostream* os) const { From a1c5e722776ef1340f6de57f097266e248a4c770 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 13:07:50 +0100 Subject: [PATCH 13/21] Always load binary --- include/substrait/common/Io.h | 5 +---- src/substrait/common/Io.cpp | 6 ++---- src/substrait/common/tests/IoTest.cpp | 9 --------- src/substrait/textplan/converter/LoadBinary.cpp | 11 ++--------- src/substrait/textplan/converter/LoadBinary.h | 4 +--- 5 files changed, 6 insertions(+), 29 deletions(-) diff --git a/include/substrait/common/Io.h b/include/substrait/common/Io.h index 2227fc09..a53697d8 100644 --- a/include/substrait/common/Io.h +++ b/include/substrait/common/Io.h @@ -30,15 +30,12 @@ enum class PlanFileFormat { * amount of memory that it consumed on disk. * * \param input_filename The filename containing the plan to convert. - * \param force_binary If true, the plan will be opened as a binary file. - * Required on Windows to avoid text mode line-ending translation. * \return If loading was successful, returns a plan. If loading was not * successful this is a status containing a list of parse errors in the status's * message. */ absl::StatusOr<::substrait::proto::Plan> loadPlan( - std::string_view input_filename, - bool force_binary = false); + std::string_view input_filename); /* * \brief Writes the provided plan to disk. diff --git a/src/substrait/common/Io.cpp b/src/substrait/common/Io.cpp index fde3422f..a90ff853 100644 --- a/src/substrait/common/Io.cpp +++ b/src/substrait/common/Io.cpp @@ -37,10 +37,8 @@ PlanFileFormat detectFormat(std::string_view content) { } // namespace absl::StatusOr<::substrait::proto::Plan> loadPlan( - std::string_view input_filename, - bool forceBinary) { - auto contentOrError = - textplan::readFromFile(input_filename.data(), forceBinary); + std::string_view input_filename) { + auto contentOrError = textplan::readFromFile(input_filename.data()); if (!contentOrError.ok()) { return contentOrError.status(); } diff --git a/src/substrait/common/tests/IoTest.cpp b/src/substrait/common/tests/IoTest.cpp index 7d0b0bea..3f5a2593 100644 --- a/src/substrait/common/tests/IoTest.cpp +++ b/src/substrait/common/tests/IoTest.cpp @@ -87,16 +87,7 @@ TEST_P(SaveAndLoadTestFixture, SaveAndLoad) { read->mutable_named_table()->add_names("table_name"); auto status = ::io::substrait::savePlan(plan, tempFilename, encoding); ASSERT_TRUE(status.ok()) << "Save failed.\n" << status; - -#ifdef _WIN32 - // Windows cannot rely on io::substrait::loadPlan to detect the file format, - // since it needs to a-priori specify how the file should be loaded. - bool forceBinary = encoding == PlanFileFormat::kBinary; - auto result = ::io::substrait::loadPlan(tempFilename, forceBinary); -#else auto result = ::io::substrait::loadPlan(tempFilename); -#endif - ASSERT_TRUE(result.ok()) << "Load failed.\n" << result.status(); ASSERT_THAT( *result, diff --git a/src/substrait/textplan/converter/LoadBinary.cpp b/src/substrait/textplan/converter/LoadBinary.cpp index c464587b..f1d8696b 100644 --- a/src/substrait/textplan/converter/LoadBinary.cpp +++ b/src/substrait/textplan/converter/LoadBinary.cpp @@ -42,15 +42,8 @@ class StringErrorCollector : public google::protobuf::io::ErrorCollector { } // namespace -absl::StatusOr readFromFile( - std::string_view msgPath, - bool forceBinary) { - std::ifstream file; - if (forceBinary) - file.open(std::string{msgPath}, std::ios::binary); - else - file.open(std::string{msgPath}, std::ios::in); - +absl::StatusOr readFromFile(std::string_view msgPath) { + std::ifstream file(std::string{msgPath}, std::ios::binary); if (file.fail()) { auto currDir = std::filesystem::current_path().string(); return absl::ErrnoToStatus( diff --git a/src/substrait/textplan/converter/LoadBinary.h b/src/substrait/textplan/converter/LoadBinary.h index 8bad9166..c501af1d 100644 --- a/src/substrait/textplan/converter/LoadBinary.h +++ b/src/substrait/textplan/converter/LoadBinary.h @@ -14,9 +14,7 @@ namespace io::substrait::textplan { // Read the contents of a file from disk. 'forceBinary' enables file reading in // binary mode. -absl::StatusOr readFromFile( - std::string_view msgPath, - bool forceBinary = false); +absl::StatusOr readFromFile(std::string_view msgPath); // Reads a plan from a json-encoded text proto. // Returns a list of errors if the file cannot be parsed. From 2cdf91042973ff058c70054b33fae605fff64591 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 13:14:08 +0100 Subject: [PATCH 14/21] Copy planloader.dll to planloader test dir --- export/planloader/tests/CMakeLists.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/export/planloader/tests/CMakeLists.txt b/export/planloader/tests/CMakeLists.txt index 4115612b..d5d89ebe 100644 --- a/export/planloader/tests/CMakeLists.txt +++ b/export/planloader/tests/CMakeLists.txt @@ -32,3 +32,14 @@ add_custom_command( message( STATUS "test data will be here: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" ) + +# For Windows; copy planloader dll to the test executable directory so that it +# can be found during test execution +if(WIN32) + add_custom_command( + TARGET planloader_test + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + $ + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests") +endif() From f4a706e9433eb5f55c743f48da71d99eb609b8ab Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 13:44:09 +0100 Subject: [PATCH 15/21] Portable function lookup test --- src/substrait/function/tests/FunctionLookupTest.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/substrait/function/tests/FunctionLookupTest.cpp b/src/substrait/function/tests/FunctionLookupTest.cpp index 8bbff887..5460cdb0 100644 --- a/src/substrait/function/tests/FunctionLookupTest.cpp +++ b/src/substrait/function/tests/FunctionLookupTest.cpp @@ -2,6 +2,7 @@ #include +#include #include #include "substrait/function/FunctionLookup.h" @@ -12,9 +13,10 @@ class FunctionLookupTest : public ::testing::Test { protected: static std::string getExtensionAbsolutePath() { const std::string absolutePath = __FILE__; - auto const pos = absolutePath.find_last_of('/'); - return absolutePath.substr(0, pos) + - "/../../../../third_party/substrait/extensions/"; + std::filesystem::path path(absolutePath); + std::filesystem::path parentDir = path.parent_path(); + return (parentDir / "../../../../third_party/substrait/extensions/") + .string(); } void SetUp() override { From 56f3619938430e92e763bbbd0b1b6d31c19c7188 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 13:57:10 +0100 Subject: [PATCH 16/21] disable text format tests on windows --- src/substrait/common/tests/IoTest.cpp | 20 ++++++++++++++----- .../parser/tests/TextPlanParserTest.cpp | 11 ++++++++-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/substrait/common/tests/IoTest.cpp b/src/substrait/common/tests/IoTest.cpp index 3f5a2593..e3bd33a4 100644 --- a/src/substrait/common/tests/IoTest.cpp +++ b/src/substrait/common/tests/IoTest.cpp @@ -109,14 +109,24 @@ TEST_P(SaveAndLoadTestFixture, SaveAndLoad) { })"))); } +static auto getFormats() { + return testing::Values( + PlanFileFormat::kBinary, + PlanFileFormat::kJson, + PlanFileFormat::kProtoText + +#ifndef _WIN32 + // Text format is currently not supported on Windows + , + PlanFileFormat::kText +#endif + ); +} + INSTANTIATE_TEST_SUITE_P( SaveAndLoadTests, SaveAndLoadTestFixture, - testing::Values( - PlanFileFormat::kBinary, - PlanFileFormat::kJson, - PlanFileFormat::kProtoText, - PlanFileFormat::kText), + getFormats(), [](const testing::TestParamInfo& info) { return planFileEncodingToString(info.param); }); diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index 461dbd64..ecbfdf72 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -27,7 +27,14 @@ class TestCase { ::testing::Matcher expectedMatch; }; -class TextPlanParserTestFixture : public ::testing::TestWithParam {}; +class TextPlanParserTestFixture : public ::testing::TestWithParam { + void SetUp() override { +#if defined(_WIN32) || defined(_WIN64) + GTEST_SKIP() << "Skipping textplanparser test on Windows."; +#endif + ::testing::TestWithParam::SetUp(); + } +}; std::vector getTestCases() { static std::vector cases = { @@ -1301,7 +1308,7 @@ std::vector getTestCases() { return cases; } -TEST(TextPlanParser, LoadFromFile) { +TEST_P(TextPlanParserTestFixture, LoadFromFile) { auto stream = loadTextFile("data/provided_sample1.splan"); ASSERT_TRUE(stream.has_value()) << "Test input file missing."; auto result = parseStream(&*stream); From 8efd44ac28b9e1f844af86c8524278bb4d780f57 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 13:59:29 +0100 Subject: [PATCH 17/21] format --- export/planloader/tests/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/export/planloader/tests/CMakeLists.txt b/export/planloader/tests/CMakeLists.txt index d5d89ebe..809258be 100644 --- a/export/planloader/tests/CMakeLists.txt +++ b/export/planloader/tests/CMakeLists.txt @@ -39,7 +39,6 @@ if(WIN32) add_custom_command( TARGET planloader_test POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy - $ + COMMAND ${CMAKE_COMMAND} -E copy $ "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests") endif() From 2d69be3ff704103a539c266a8aa942a529832819 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 14:03:29 +0100 Subject: [PATCH 18/21] NFC --- src/substrait/textplan/converter/LoadBinary.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/substrait/textplan/converter/LoadBinary.h b/src/substrait/textplan/converter/LoadBinary.h index c501af1d..a4d4e38f 100644 --- a/src/substrait/textplan/converter/LoadBinary.h +++ b/src/substrait/textplan/converter/LoadBinary.h @@ -12,8 +12,7 @@ class Plan; namespace io::substrait::textplan { -// Read the contents of a file from disk. 'forceBinary' enables file reading in -// binary mode. +// Read the contents of a file from disk. absl::StatusOr readFromFile(std::string_view msgPath); // Reads a plan from a json-encoded text proto. From 010c188ce65d2ad626ae07b4042f4ae5473aacd1 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 14:04:00 +0100 Subject: [PATCH 19/21] newline --- .github/workflows/build_test.yml | 2 +- export/planloader/tests/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index d98ff756..e61ec1bf 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -72,4 +72,4 @@ jobs: ninja -C build - name: Test - run: ctest --test-dir build --output-on-failure --timeout 30 \ No newline at end of file + run: ctest --test-dir build --output-on-failure --timeout 30 diff --git a/export/planloader/tests/CMakeLists.txt b/export/planloader/tests/CMakeLists.txt index 809258be..bb59fcf7 100644 --- a/export/planloader/tests/CMakeLists.txt +++ b/export/planloader/tests/CMakeLists.txt @@ -34,7 +34,7 @@ message( ) # For Windows; copy planloader dll to the test executable directory so that it -# can be found during test execution +# can be found during test execution. if(WIN32) add_custom_command( TARGET planloader_test From 03b4794bb4e0bf8fe1af293310a7e25a7732d6bc Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 15:31:35 +0100 Subject: [PATCH 20/21] retrigger CI --- export/planloader/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/export/planloader/tests/CMakeLists.txt b/export/planloader/tests/CMakeLists.txt index bb59fcf7..b44dbdf8 100644 --- a/export/planloader/tests/CMakeLists.txt +++ b/export/planloader/tests/CMakeLists.txt @@ -33,7 +33,7 @@ message( STATUS "test data will be here: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" ) -# For Windows; copy planloader dll to the test executable directory so that it +# For Windows: copy planloader dll to the test executable directory so that it # can be found during test execution. if(WIN32) add_custom_command( From acd684187c5ffd54800858a15300e3ebdba0470c Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Mon, 3 Mar 2025 15:32:34 +0100 Subject: [PATCH 21/21] rename pipeline --- .github/workflows/build_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index e61ec1bf..686e9135 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -name: Ubuntu Build & Test +name: Build & Test on: push: