From 7447a0f0269e05a8f60dbcf45fc5cee333caa29c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 26 May 2026 13:59:21 +0200 Subject: [PATCH 01/11] GH-46369: [C++] Add key-value pairs to the FileSystemFactory interface --- cpp/src/arrow/filesystem/filesystem.cc | 28 +++++++--- cpp/src/arrow/filesystem/filesystem.h | 68 +++++++++++++++++++++++- cpp/src/arrow/filesystem/localfs_test.cc | 19 +++++-- cpp/src/arrow/filesystem/s3fs.cc | 8 +-- cpp/src/arrow/testing/examplefs.cc | 5 +- 5 files changed, 111 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index 8281bed7ce16..75b9616ff00f 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -893,10 +893,10 @@ Status LoadFileSystemFactories(const char* libpath) { namespace { -Result> FileSystemFromUriReal(const Uri& uri, - const std::string& uri_string, - const io::IOContext& io_context, - std::string* out_path) { +Result> FileSystemFromUriReal( + const Uri& uri, const std::string& uri_string, + const std::vector>& options, + const io::IOContext& io_context, std::string* out_path) { const auto scheme = uri.scheme(); { @@ -904,7 +904,7 @@ Result> FileSystemFromUriReal(const Uri& uri, auto* factory, FileSystemFactoryRegistry::GetInstance()->FactoryForScheme(scheme)); if (factory != nullptr) { - return factory->function(uri, io_context, out_path); + return factory->function(uri, options, io_context, out_path); } } @@ -962,14 +962,28 @@ Result> FileSystemFromUriReal(const Uri& uri, Result> FileSystemFromUri(const std::string& uri_string, std::string* out_path) { - return FileSystemFromUri(uri_string, io::default_io_context(), out_path); + return FileSystemFromUri(uri_string, /*options=*/{}, io::default_io_context(), + out_path); +} + +Result> FileSystemFromUri( + const std::string& uri_string, + const std::vector>& options, std::string* out_path) { + return FileSystemFromUri(uri_string, options, io::default_io_context(), out_path); } Result> FileSystemFromUri(const std::string& uri_string, const io::IOContext& io_context, std::string* out_path) { + return FileSystemFromUri(uri_string, /*options=*/{}, io_context, out_path); +} + +Result> FileSystemFromUri( + const std::string& uri_string, + const std::vector>& options, + const io::IOContext& io_context, std::string* out_path) { ARROW_ASSIGN_OR_RAISE(auto fsuri, ParseFileSystemUri(uri_string)); - return FileSystemFromUriReal(fsuri, uri_string, io_context, out_path); + return FileSystemFromUriReal(fsuri, uri_string, options, io_context, out_path); } Result> FileSystemFromUriOrPath(const std::string& uri_string, diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 3a47eb62f524..5c4fc3c42c9e 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -28,6 +29,7 @@ #include "arrow/filesystem/type_fwd.h" #include "arrow/io/interfaces.h" +#include "arrow/result.h" #include "arrow/type_fwd.h" #include "arrow/util/compare.h" #include "arrow/util/macros.h" @@ -359,11 +361,34 @@ class ARROW_EXPORT FileSystem struct FileSystemFactory { std::function>( - const Uri& uri, const io::IOContext& io_context, std::string* out_path)> + const Uri& uri, const std::vector>& options, + const io::IOContext& io_context, std::string* out_path)> function; std::string_view file; int line; + /// Construct from an options-aware factory function. + FileSystemFactory(std::function>( + const Uri&, const std::vector>&, + const io::IOContext&, std::string*)> + fn, + std::string_view file, int line) + : function(std::move(fn)), file(file), line(line) {} + + /// Construct from a non-options aware factory function maintaing source compatibility + /// with existing factories. + FileSystemFactory(std::function>( + const Uri&, const io::IOContext&, std::string*)> + fn, + std::string_view file, int line) + : function([fn = std::move(fn)]( + const Uri& uri, + const std::vector>& /*ignored*/, + const io::IOContext& ctx, + std::string* out_path) { return fn(uri, ctx, out_path); }), + file(file), + line(line) {} + bool operator==(const FileSystemFactory& other) const { // In the case where libarrow is linked statically both to the executable and to a // dynamically loaded filesystem implementation library, the library contains a @@ -547,6 +572,26 @@ ARROW_EXPORT Result> FileSystemFromUri(const std::string& uri, std::string* out_path = NULLPTR); +/// \brief Create a new FileSystem by URI with extended backend-specific filesystem +/// options +/// +/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", +/// "gs" and "gcs". +/// +/// Support for other schemes can be added using RegisterFileSystemFactory. +/// +/// \param[in] uri the URI to give access to +/// \param[in] options a list of backend-specific filesystem options +/// Each option is a (name, value) pair. +/// The expected type is specific to the backend and +/// option name. +/// \param[out] out_path (optional) Path inside the filesystem. +/// \return out_fs FileSystem instance. +ARROW_EXPORT +Result> FileSystemFromUri( + const std::string& uri, const std::vector>& options, + std::string* out_path = NULLPTR); + /// \brief Create a new FileSystem by URI with a custom IO context /// /// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", @@ -563,6 +608,27 @@ Result> FileSystemFromUri(const std::string& uri, const io::IOContext& io_context, std::string* out_path = NULLPTR); +/// \brief Create a new FileSystem by URI with a custom IO context with backend-specific +/// filesystem options +/// +/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", +/// "gs" and "gcs". +/// +/// Support for other schemes can be added using RegisterFileSystemFactory. +/// +/// \param[in] uri a URI-based path, ex: file:///some/local/path +/// \param[in] options a list of backend-specific filesystem options +/// Each option is a (name, value) pair. +/// The expected type is specific to the backend and +/// option name. +/// \param[in] io_context an IOContext which will be associated with the filesystem +/// \param[out] out_path (optional) Path inside the filesystem. +/// \return out_fs FileSystem instance. +ARROW_EXPORT +Result> FileSystemFromUri( + const std::string& uri, const std::vector>& options, + const io::IOContext& io_context, std::string* out_path = NULLPTR); + /// \brief Create a new FileSystem by URI /// /// Support for other schemes can be added using RegisterFileSystemFactory. diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 2e91783c92dc..c63f4bfbf1cd 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -171,10 +171,21 @@ TEST(FileSystemFromUri, RuntimeRegisteredFactory) { } FileSystemRegistrar kSegfaultFileSystemModule[]{ - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), -}; + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {}), + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {}), + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {})}; TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) { // Since multiple registrars are defined in this translation unit which all // register factories for the 'segfault' scheme, using that scheme in FileSystemFromUri diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 0c15f6f18444..d5eb31a16458 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -3603,11 +3603,13 @@ Result ResolveS3BucketRegion(const std::string& bucket) { auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM( "s3", - [](const arrow::util::Uri& uri, const io::IOContext& io_context, + [](const arrow::util::Uri& uri, + const std::vector>& options, + const io::IOContext& io_context, std::string* out_path) -> Result> { RETURN_NOT_OK(EnsureS3Initialized()); - ARROW_ASSIGN_OR_RAISE(auto options, S3Options::FromUri(uri, out_path)); - return S3FileSystem::Make(options, io_context); + ARROW_ASSIGN_OR_RAISE(auto s3_options, S3Options::FromUri(uri, out_path)); + return S3FileSystem::Make(s3_options, io_context); }, [] { DCHECK_OK(EnsureS3Finalized()); }); diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index 5c9d5f9d9071..eab26349e2ef 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -26,12 +26,13 @@ namespace arrow::fs { auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( "example", - [](const Uri& uri, const io::IOContext& io_context, + [](const Uri& uri, const std::vector>& options, + const io::IOContext& io_context, std::string* out_path) -> Result> { constexpr std::string_view kScheme = "example"; EXPECT_EQ(uri.scheme(), kScheme); auto local_uri = "file" + uri.ToString().substr(kScheme.size()); - return FileSystemFromUri(local_uri, io_context, out_path); + return FileSystemFromUri(local_uri, options, io_context, out_path); }, {}); From ce6aed2125777acb514ba1b171cae3c636b72370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 8 Jun 2026 11:52:02 +0200 Subject: [PATCH 02/11] Add test for extra options being passed and improve documentation --- cpp/src/arrow/filesystem/filesystem.h | 10 +++++++++- cpp/src/arrow/filesystem/localfs_test.cc | 11 ++++++++++- cpp/src/arrow/testing/examplefs.cc | 23 ++++++++++++++++++++++- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 5c4fc3c42c9e..617d061251a1 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -375,7 +375,7 @@ struct FileSystemFactory { std::string_view file, int line) : function(std::move(fn)), file(file), line(line) {} - /// Construct from a non-options aware factory function maintaing source compatibility + /// Construct from a non-options aware factory function maintaining source compatibility /// with existing factories. FileSystemFactory(std::function>( const Uri&, const io::IOContext&, std::string*)> @@ -585,6 +585,10 @@ Result> FileSystemFromUri(const std::string& uri, /// Each option is a (name, value) pair. /// The expected type is specific to the backend and /// option name. +/// Options are only forwarded to schemes dispatched through a +/// registered FileSystemFactory (currently "s3" and any scheme +/// added via RegisterFileSystemFactory). They are ignored by the +/// built-in schemes. /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT @@ -621,6 +625,10 @@ Result> FileSystemFromUri(const std::string& uri, /// Each option is a (name, value) pair. /// The expected type is specific to the backend and /// option name. +/// Options are only forwarded to schemes dispatched through a +/// registered FileSystemFactory (currently "s3" and any scheme +/// added via RegisterFileSystemFactory). They are ignored by the +/// built-in schemes. /// \param[in] io_context an IOContext which will be associated with the filesystem /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index c63f4bfbf1cd..47cb9e8adbb4 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -144,10 +145,18 @@ TEST(FileSystemFromUri, LoadedRegisteredFactory) { EXPECT_THAT(FileSystemFromUri("example:///hey/yo", &path), Raises(StatusCode::Invalid)); EXPECT_THAT(LoadFileSystemFactories(ARROW_FILESYSTEM_EXAMPLE_LIBPATH), Ok()); - ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("example:///hey/yo", &path)); EXPECT_EQ(path, "/hey/yo"); EXPECT_EQ(fs->type_name(), "local"); + + // Validate extra options are forwarded to the factory. + std::vector> options{ + {"example_option_string", std::string("example_value")}, + {"example_option_int", 42}, + }; + ASSERT_OK_AND_ASSIGN(fs, FileSystemFromUri("example:///hey/yo", options, &path)); + EXPECT_EQ(path, "/hey/yo/example_value/42"); + EXPECT_EQ(fs->type_name(), "local"); } TEST(FileSystemFromUri, RuntimeRegisteredFactory) { diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index eab26349e2ef..2a1d07a8c4f6 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +#include +#include + #include "arrow/filesystem/filesystem.h" #include "arrow/filesystem/filesystem_library.h" #include "arrow/result.h" @@ -32,7 +35,25 @@ auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( constexpr std::string_view kScheme = "example"; EXPECT_EQ(uri.scheme(), kScheme); auto local_uri = "file" + uri.ToString().substr(kScheme.size()); - return FileSystemFromUri(local_uri, options, io_context, out_path); + ARROW_ASSIGN_OR_RAISE(auto fs, + FileSystemFromUri(local_uri, options, io_context, out_path)); + for (const auto& [key, value] : options) { + EXPECT_TRUE(value.has_value()); + if (key == "example_option_string") { + EXPECT_TRUE(value.type() == typeid(std::string)); + if (out_path != nullptr) { + *out_path += "/" + std::any_cast(value); + } + } else if (key == "example_option_int") { + EXPECT_TRUE(value.type() == typeid(int)); + if (out_path != nullptr) { + *out_path += "/" + std::to_string(std::any_cast(value)); + } + } else { + ADD_FAILURE() << "Unexpected option: " << key; + } + } + return fs; }, {}); From 426ad3b42948e7b5a0c5b43ae889d997d59b858a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 8 Jun 2026 12:45:15 +0200 Subject: [PATCH 03/11] Reject non-empty options, add tests to validate it and add missing plumbing for ARROW_S3_MODULE to CI --- ci/scripts/cpp_build.sh | 2 ++ cpp/src/arrow/filesystem/filesystem.cc | 4 ++++ cpp/src/arrow/filesystem/filesystem_test.cc | 6 ++++++ cpp/src/arrow/filesystem/s3fs.cc | 5 +++++ cpp/src/arrow/filesystem/s3fs_module_test.cc | 8 ++++++++ 5 files changed, 25 insertions(+) diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 1e2f3e8f8f1a..3d9b2ba72d41 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -70,6 +70,7 @@ if [ "${ARROW_ENABLE_THREADING:-ON}" = "OFF" ]; then ARROW_JEMALLOC=OFF ARROW_MIMALLOC=OFF ARROW_S3=OFF + ARROW_S3_MODULE=OFF ARROW_WITH_OPENTELEMETRY=OFF fi @@ -229,6 +230,7 @@ else -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ -DARROW_S3=${ARROW_S3:-OFF} \ + -DARROW_S3_MODULE=${ARROW_S3_MODULE:-OFF} \ -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \ -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \ -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index 75b9616ff00f..354c2d156b29 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -907,6 +907,10 @@ Result> FileSystemFromUriReal( return factory->function(uri, options, io_context, out_path); } } + if (!options.empty()) { + return Status::NotImplemented("Filesystem options are not supported yet for scheme '", + scheme, "', got ", options.size(), " option(s)"); + } if (scheme == "abfs" || scheme == "abfss") { #ifdef ARROW_AZURE diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc index 5072c3a8c25b..c874af96a667 100644 --- a/cpp/src/arrow/filesystem/filesystem_test.cc +++ b/cpp/src/arrow/filesystem/filesystem_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -640,6 +641,11 @@ TEST_F(TestMockFS, FileSystemFromUri) { Invalid, ::testing::HasSubstr("syntax error at character ' ' (position 12)"), FileSystemFromUri("mock:/folder name/bar", &path)); CheckDirs({}); + std::vector> options{{"some_option", 1}}; + EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, + ::testing::HasSubstr("options are not supported"), + FileSystemFromUri("mock:///foo/bar", options, &path)); + CheckDirs({}); } //////////////////////////////////////////////////////////////////////////// diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index d5eb31a16458..639e3037f395 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -3607,6 +3607,11 @@ auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM( const std::vector>& options, const io::IOContext& io_context, std::string* out_path) -> Result> { + if (!options.empty()) { + return Status::NotImplemented( + "S3 filesystem factory options are not supported yet, got: ", options.size(), + " options"); + } RETURN_NOT_OK(EnsureS3Initialized()); ARROW_ASSIGN_OR_RAISE(auto s3_options, S3Options::FromUri(uri, out_path)); return S3FileSystem::Make(s3_options, io_context); diff --git a/cpp/src/arrow/filesystem/s3fs_module_test.cc b/cpp/src/arrow/filesystem/s3fs_module_test.cc index 987a8979b271..345562ac64d9 100644 --- a/cpp/src/arrow/filesystem/s3fs_module_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_module_test.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include #include @@ -82,4 +83,11 @@ TEST(S3Test, FromUri) { "&allow_bucket_creation=0&allow_bucket_deletion=0"); } +TEST(S3Test, FromUriRejectsOptions) { + std::vector> options{{"some_option", 1}}; + EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, + ::testing::HasSubstr("options are not supported"), + FileSystemFromUri("s3://bucket/key", options)); +} + } // namespace arrow::fs From 20069f4762ed921f0f2b83d69114b433bcdb4d0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 8 Jun 2026 13:21:24 +0200 Subject: [PATCH 04/11] Fix CI and minor improvement on docstring --- cpp/src/arrow/filesystem/CMakeLists.txt | 2 +- cpp/src/arrow/filesystem/filesystem.h | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index e6330df42603..ee46f4d256ce 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -128,7 +128,7 @@ if(ARROW_S3) endif() endif() - if(ARROW_S3_MODULE) + if(ARROW_S3_MODULE AND ARROW_BUILD_TESTS) add_arrow_test(s3fs_module_test SOURCES s3fs_module_test.cc diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 617d061251a1..2f334edbc498 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -585,10 +585,11 @@ Result> FileSystemFromUri(const std::string& uri, /// Each option is a (name, value) pair. /// The expected type is specific to the backend and /// option name. -/// Options are only forwarded to schemes dispatched through a -/// registered FileSystemFactory (currently "s3" and any scheme -/// added via RegisterFileSystemFactory). They are ignored by the -/// built-in schemes. +/// Options are forwarded to schemes dispatched through a registered +/// FileSystemFactory. A registered factory that does not support options +/// (currently "s3") returns NotImplemented for non-empty options. +/// Schemes not handled by a registered factory will also return +/// NotImplemented for non-empty options. /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT @@ -625,10 +626,11 @@ Result> FileSystemFromUri(const std::string& uri, /// Each option is a (name, value) pair. /// The expected type is specific to the backend and /// option name. -/// Options are only forwarded to schemes dispatched through a -/// registered FileSystemFactory (currently "s3" and any scheme -/// added via RegisterFileSystemFactory). They are ignored by the -/// built-in schemes. +/// Options are forwarded to schemes dispatched through a registered +/// FileSystemFactory. A registered factory that does not support options +/// (currently "s3") returns NotImplemented for non-empty options. +/// Schemes not handled by a registered factory will also return +/// NotImplemented for non-empty options. /// \param[in] io_context an IOContext which will be associated with the filesystem /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. From ce9fd8a007ff38f4622948c690d461e99ca2e21b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 9 Jun 2026 12:47:48 +0200 Subject: [PATCH 05/11] Add test for typed option to validate std::any_cast cross DSO --- cpp/src/arrow/filesystem/localfs_test.cc | 11 ++++++++++- cpp/src/arrow/testing/examplefs.cc | 10 ++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 47cb9e8adbb4..bff1a73acda6 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -30,6 +30,7 @@ #include "arrow/filesystem/path_util.h" #include "arrow/filesystem/test_util.h" #include "arrow/filesystem/util_internal.h" +#include "arrow/testing/examplefs.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" #include "arrow/util/io_util.h" @@ -84,6 +85,12 @@ Result> FSFromUriOrPath(const std::string& uri, //////////////////////////////////////////////////////////////////////////// // Registered FileSystemFactory tests +struct ConcreteTypedOption : ExampleTypedOption { + explicit ConcreteTypedOption(int value) : value_(value) {} + int value() const override { return value_; } + int value_; +}; + class SlowFileSystemPublicProps : public SlowFileSystem { public: SlowFileSystemPublicProps(std::shared_ptr base_fs, double average_latency, @@ -153,9 +160,11 @@ TEST(FileSystemFromUri, LoadedRegisteredFactory) { std::vector> options{ {"example_option_string", std::string("example_value")}, {"example_option_int", 42}, + {"example_typed_option", + std::shared_ptr(std::make_shared(12345))}, }; ASSERT_OK_AND_ASSIGN(fs, FileSystemFromUri("example:///hey/yo", options, &path)); - EXPECT_EQ(path, "/hey/yo/example_value/42"); + EXPECT_EQ(path, "/hey/yo/example_value/42/12345"); EXPECT_EQ(fs->type_name(), "local"); } diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index 2a1d07a8c4f6..819f12a2abd7 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -21,6 +21,7 @@ #include "arrow/filesystem/filesystem.h" #include "arrow/filesystem/filesystem_library.h" #include "arrow/result.h" +#include "arrow/testing/examplefs.h" #include "arrow/util/uri.h" #include @@ -49,6 +50,15 @@ auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( if (out_path != nullptr) { *out_path += "/" + std::to_string(std::any_cast(value)); } + } else if (key == "example_typed_option") { + if (const auto* opt = + std::any_cast>(&value)) { + if (out_path != nullptr) { + *out_path += "/" + std::to_string((*opt)->value()); + } + } else if (out_path != nullptr) { + *out_path += "/typed_cast_failed"; + } } else { ADD_FAILURE() << "Unexpected option: " << key; } From 94142b282d953da89174a9c5c4cc43c7be51f957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 9 Jun 2026 12:52:23 +0200 Subject: [PATCH 06/11] Add missing new header file from commit :) --- cpp/src/arrow/testing/examplefs.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 cpp/src/arrow/testing/examplefs.h diff --git a/cpp/src/arrow/testing/examplefs.h b/cpp/src/arrow/testing/examplefs.h new file mode 100644 index 000000000000..e04b84c982ad --- /dev/null +++ b/cpp/src/arrow/testing/examplefs.h @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +namespace arrow::fs { + +class ExampleTypedOption { + public: + virtual ~ExampleTypedOption() = default; + virtual int value() const = 0; +}; + +} // namespace arrow::fs From ddac9dfdf39b43b122933218840d08602f370d2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 9 Jun 2026 15:09:22 +0200 Subject: [PATCH 07/11] Several review fixes, rename to FileSystemFromUriAndOptions instead of FileSystemFromUri, do not silently ignore non-empty options if underlying FS does not support them and use FileSystemFactoryOptions instead of std::vector> everywhere --- cpp/src/arrow/filesystem/filesystem.cc | 24 ++++++++--------- cpp/src/arrow/filesystem/filesystem.h | 27 ++++++++++++-------- cpp/src/arrow/filesystem/filesystem_test.cc | 8 +++--- cpp/src/arrow/filesystem/localfs_test.cc | 5 ++-- cpp/src/arrow/filesystem/s3fs.cc | 3 +-- cpp/src/arrow/filesystem/s3fs_module_test.cc | 8 +++--- cpp/src/arrow/testing/examplefs.cc | 5 ++-- 7 files changed, 43 insertions(+), 37 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index 354c2d156b29..f92336e004ea 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -895,8 +895,8 @@ namespace { Result> FileSystemFromUriReal( const Uri& uri, const std::string& uri_string, - const std::vector>& options, - const io::IOContext& io_context, std::string* out_path) { + const FileSystemFactoryOptions& options, const io::IOContext& io_context, + std::string* out_path) { const auto scheme = uri.scheme(); { @@ -966,25 +966,25 @@ Result> FileSystemFromUriReal( Result> FileSystemFromUri(const std::string& uri_string, std::string* out_path) { - return FileSystemFromUri(uri_string, /*options=*/{}, io::default_io_context(), - out_path); + return FileSystemFromUriAndOptions(uri_string, /*options=*/{}, io::default_io_context(), + out_path); } -Result> FileSystemFromUri( - const std::string& uri_string, - const std::vector>& options, std::string* out_path) { - return FileSystemFromUri(uri_string, options, io::default_io_context(), out_path); +Result> FileSystemFromUriAndOptions( + const std::string& uri_string, const FileSystemFactoryOptions& options, + std::string* out_path) { + return FileSystemFromUriAndOptions(uri_string, options, io::default_io_context(), + out_path); } Result> FileSystemFromUri(const std::string& uri_string, const io::IOContext& io_context, std::string* out_path) { - return FileSystemFromUri(uri_string, /*options=*/{}, io_context, out_path); + return FileSystemFromUriAndOptions(uri_string, /*options=*/{}, io_context, out_path); } -Result> FileSystemFromUri( - const std::string& uri_string, - const std::vector>& options, +Result> FileSystemFromUriAndOptions( + const std::string& uri_string, const FileSystemFactoryOptions& options, const io::IOContext& io_context, std::string* out_path) { ARROW_ASSIGN_OR_RAISE(auto fsuri, ParseFileSystemUri(uri_string)); return FileSystemFromUriReal(fsuri, uri_string, options, io_context, out_path); diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 2f334edbc498..97a1a68a094a 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -359,9 +359,11 @@ class ARROW_EXPORT FileSystem bool default_async_is_sync_ = true; }; +using FileSystemFactoryOptions = std::vector>; + struct FileSystemFactory { std::function>( - const Uri& uri, const std::vector>& options, + const Uri& uri, const FileSystemFactoryOptions& options, const io::IOContext& io_context, std::string* out_path)> function; std::string_view file; @@ -369,8 +371,8 @@ struct FileSystemFactory { /// Construct from an options-aware factory function. FileSystemFactory(std::function>( - const Uri&, const std::vector>&, - const io::IOContext&, std::string*)> + const Uri&, const FileSystemFactoryOptions&, const io::IOContext&, + std::string*)> fn, std::string_view file, int line) : function(std::move(fn)), file(file), line(line) {} @@ -382,10 +384,15 @@ struct FileSystemFactory { fn, std::string_view file, int line) : function([fn = std::move(fn)]( - const Uri& uri, - const std::vector>& /*ignored*/, + const Uri& uri, const FileSystemFactoryOptions& options, const io::IOContext& ctx, - std::string* out_path) { return fn(uri, ctx, out_path); }), + std::string* out_path) -> Result> { + if (!options.empty()) { + return Status::NotImplemented( + "This filesystem does not support additional options"); + } + return fn(uri, ctx, out_path); + }), file(file), line(line) {} @@ -593,8 +600,8 @@ Result> FileSystemFromUri(const std::string& uri, /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT -Result> FileSystemFromUri( - const std::string& uri, const std::vector>& options, +Result> FileSystemFromUriAndOptions( + const std::string& uri, const FileSystemFactoryOptions& options, std::string* out_path = NULLPTR); /// \brief Create a new FileSystem by URI with a custom IO context @@ -635,8 +642,8 @@ Result> FileSystemFromUri(const std::string& uri, /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT -Result> FileSystemFromUri( - const std::string& uri, const std::vector>& options, +Result> FileSystemFromUriAndOptions( + const std::string& uri, const FileSystemFactoryOptions& options, const io::IOContext& io_context, std::string* out_path = NULLPTR); /// \brief Create a new FileSystem by URI diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc index c874af96a667..10a3922e36ac 100644 --- a/cpp/src/arrow/filesystem/filesystem_test.cc +++ b/cpp/src/arrow/filesystem/filesystem_test.cc @@ -641,10 +641,10 @@ TEST_F(TestMockFS, FileSystemFromUri) { Invalid, ::testing::HasSubstr("syntax error at character ' ' (position 12)"), FileSystemFromUri("mock:/folder name/bar", &path)); CheckDirs({}); - std::vector> options{{"some_option", 1}}; - EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, - ::testing::HasSubstr("options are not supported"), - FileSystemFromUri("mock:///foo/bar", options, &path)); + FileSystemFactoryOptions options{{"some_option", 1}}; + EXPECT_RAISES_WITH_MESSAGE_THAT( + NotImplemented, ::testing::HasSubstr("options are not supported"), + FileSystemFromUriAndOptions("mock:///foo/bar", options, &path)); CheckDirs({}); } diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index bff1a73acda6..2dc6fe97d595 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -157,13 +157,14 @@ TEST(FileSystemFromUri, LoadedRegisteredFactory) { EXPECT_EQ(fs->type_name(), "local"); // Validate extra options are forwarded to the factory. - std::vector> options{ + FileSystemFactoryOptions options{ {"example_option_string", std::string("example_value")}, {"example_option_int", 42}, {"example_typed_option", std::shared_ptr(std::make_shared(12345))}, }; - ASSERT_OK_AND_ASSIGN(fs, FileSystemFromUri("example:///hey/yo", options, &path)); + ASSERT_OK_AND_ASSIGN(fs, + FileSystemFromUriAndOptions("example:///hey/yo", options, &path)); EXPECT_EQ(path, "/hey/yo/example_value/42/12345"); EXPECT_EQ(fs->type_name(), "local"); } diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 639e3037f395..1a488157cb05 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -3603,8 +3603,7 @@ Result ResolveS3BucketRegion(const std::string& bucket) { auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM( "s3", - [](const arrow::util::Uri& uri, - const std::vector>& options, + [](const arrow::util::Uri& uri, const FileSystemFactoryOptions& options, const io::IOContext& io_context, std::string* out_path) -> Result> { if (!options.empty()) { diff --git a/cpp/src/arrow/filesystem/s3fs_module_test.cc b/cpp/src/arrow/filesystem/s3fs_module_test.cc index 345562ac64d9..c7b208192d06 100644 --- a/cpp/src/arrow/filesystem/s3fs_module_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_module_test.cc @@ -84,10 +84,10 @@ TEST(S3Test, FromUri) { } TEST(S3Test, FromUriRejectsOptions) { - std::vector> options{{"some_option", 1}}; - EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, - ::testing::HasSubstr("options are not supported"), - FileSystemFromUri("s3://bucket/key", options)); + FileSystemFactoryOptions options{{"some_option", 1}}; + EXPECT_RAISES_WITH_MESSAGE_THAT( + NotImplemented, ::testing::HasSubstr("options are not supported"), + FileSystemFromUriAndOptions("s3://bucket/key", options)); } } // namespace arrow::fs diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index 819f12a2abd7..cbe087a6d280 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -30,14 +30,13 @@ namespace arrow::fs { auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( "example", - [](const Uri& uri, const std::vector>& options, + [](const Uri& uri, const FileSystemFactoryOptions& options, const io::IOContext& io_context, std::string* out_path) -> Result> { constexpr std::string_view kScheme = "example"; EXPECT_EQ(uri.scheme(), kScheme); auto local_uri = "file" + uri.ToString().substr(kScheme.size()); - ARROW_ASSIGN_OR_RAISE(auto fs, - FileSystemFromUri(local_uri, options, io_context, out_path)); + ARROW_ASSIGN_OR_RAISE(auto fs, FileSystemFromUri(local_uri, io_context, out_path)); for (const auto& [key, value] : options) { EXPECT_TRUE(value.has_value()); if (key == "example_option_string") { From eafbcd17d34a1859adbe3bb4fa30e46c7f47e0e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 9 Jun 2026 15:25:28 +0200 Subject: [PATCH 08/11] A couple more review fixes to make copilot happy --- cpp/src/arrow/filesystem/filesystem.h | 15 ++++++--------- cpp/src/arrow/testing/examplefs.cc | 14 ++++++++------ 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 97a1a68a094a..639c0ba8d4c3 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -593,10 +593,9 @@ Result> FileSystemFromUri(const std::string& uri, /// The expected type is specific to the backend and /// option name. /// Options are forwarded to schemes dispatched through a registered -/// FileSystemFactory. A registered factory that does not support options -/// (currently "s3") returns NotImplemented for non-empty options. -/// Schemes not handled by a registered factory will also return -/// NotImplemented for non-empty options. +/// FileSystemFactory. Non-empty options return NotImplemented for a registered +/// FileSystemFactory that does not support them or for schemes not handled by +/// a registered factory. /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT @@ -634,11 +633,9 @@ Result> FileSystemFromUri(const std::string& uri, /// The expected type is specific to the backend and /// option name. /// Options are forwarded to schemes dispatched through a registered -/// FileSystemFactory. A registered factory that does not support options -/// (currently "s3") returns NotImplemented for non-empty options. -/// Schemes not handled by a registered factory will also return -/// NotImplemented for non-empty options. -/// \param[in] io_context an IOContext which will be associated with the filesystem +/// FileSystemFactory. Non-empty options return NotImplemented for a registered +/// FileSystemFactory that does not support them or for schemes not handled by +/// a registered factory. /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index cbe087a6d280..08fa9326e210 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -40,14 +40,16 @@ auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( for (const auto& [key, value] : options) { EXPECT_TRUE(value.has_value()); if (key == "example_option_string") { - EXPECT_TRUE(value.type() == typeid(std::string)); - if (out_path != nullptr) { - *out_path += "/" + std::any_cast(value); + if (const auto* s = std::any_cast(&value)) { + if (out_path != nullptr) *out_path += "/" + *s; + } else { + ADD_FAILURE() << "example_option_string has wrong type"; } } else if (key == "example_option_int") { - EXPECT_TRUE(value.type() == typeid(int)); - if (out_path != nullptr) { - *out_path += "/" + std::to_string(std::any_cast(value)); + if (const auto* i = std::any_cast(&value)) { + if (out_path != nullptr) *out_path += "/" + std::to_string(*i); + } else { + ADD_FAILURE() << "example_option_int has wrong type"; } } else if (key == "example_typed_option") { if (const auto* opt = From 5c898f480a0fa2590e89049a8d56506c95ab7b92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 9 Jun 2026 15:38:09 +0200 Subject: [PATCH 09/11] Re-add wrongly removed doxygen param due to copy&paste error --- cpp/src/arrow/filesystem/filesystem.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 639c0ba8d4c3..250dfdc82148 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -636,6 +636,7 @@ Result> FileSystemFromUri(const std::string& uri, /// FileSystemFactory. Non-empty options return NotImplemented for a registered /// FileSystemFactory that does not support them or for schemes not handled by /// a registered factory. +/// \param[in] io_context an IOContext which will be associated with the filesystem /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT From c9c887cc0692810dd90feac87eb131a16dbaee51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 9 Jun 2026 16:27:37 +0200 Subject: [PATCH 10/11] Remove unnecessary include, update a couple of error messages, re-org tests for FileSystemFromUriAndOptions and add a new one to test old behaviour --- cpp/src/arrow/filesystem/filesystem.h | 3 +- cpp/src/arrow/filesystem/localfs_test.cc | 52 ++++++++++++++++++------ cpp/src/arrow/filesystem/s3fs.cc | 2 +- cpp/src/arrow/testing/examplefs.cc | 1 - 4 files changed, 43 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 250dfdc82148..a0478763a463 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -389,7 +389,8 @@ struct FileSystemFactory { std::string* out_path) -> Result> { if (!options.empty()) { return Status::NotImplemented( - "This filesystem does not support additional options"); + "Filesystem factory does not support additional options, got ", + options.size(), " option(s)"); } return fn(uri, ctx, out_path); }), diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 2dc6fe97d595..212d91989659 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -155,18 +155,6 @@ TEST(FileSystemFromUri, LoadedRegisteredFactory) { ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("example:///hey/yo", &path)); EXPECT_EQ(path, "/hey/yo"); EXPECT_EQ(fs->type_name(), "local"); - - // Validate extra options are forwarded to the factory. - FileSystemFactoryOptions options{ - {"example_option_string", std::string("example_value")}, - {"example_option_int", 42}, - {"example_typed_option", - std::shared_ptr(std::make_shared(12345))}, - }; - ASSERT_OK_AND_ASSIGN(fs, - FileSystemFromUriAndOptions("example:///hey/yo", options, &path)); - EXPECT_EQ(path, "/hey/yo/example_value/42/12345"); - EXPECT_EQ(fs->type_name(), "local"); } TEST(FileSystemFromUri, RuntimeRegisteredFactory) { @@ -215,6 +203,46 @@ TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) { // other schemes are not affected by the collision EXPECT_THAT(FileSystemFromUri("slowfile:///hey/yo", &path), Ok()); } + +TEST(FileSystemFromUriAndOptions, LoadedRegisteredFactory) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Emscripten dynamic library testing disabled"; +#endif + std::string path; + EXPECT_THAT(LoadFileSystemFactories(ARROW_FILESYSTEM_EXAMPLE_LIBPATH), Ok()); + // Validate extra options are forwarded to the factory. + FileSystemFactoryOptions options{ + {"example_option_string", std::string("example_value")}, + {"example_option_int", 42}, + {"example_typed_option", + std::shared_ptr(std::make_shared(12345))}, + }; + ASSERT_OK_AND_ASSIGN(auto fs, + FileSystemFromUriAndOptions("example:///hey/yo", options, &path)); + EXPECT_EQ(path, "/hey/yo/example_value/42/12345"); + EXPECT_EQ(fs->type_name(), "local"); +} + +TEST(FileSystemFromUriAndOptions, RuntimeRegisteredFactory) { + std::string path; + EXPECT_THAT(FileSystemFromUriAndOptions("slowfile3:///hey/yo", {}, &path), + Raises(StatusCode::Invalid)); + + EXPECT_THAT( + RegisterFileSystemFactory("slowfile3", {SlowFileSystemFactory, __FILE__, __LINE__}), + Ok()); + + ASSERT_OK_AND_ASSIGN(auto fs, + FileSystemFromUriAndOptions("slowfile3:///hey/yo", {}, &path)); + EXPECT_EQ(path, "/hey/yo"); + EXPECT_EQ(fs->type_name(), "slow"); + + // Validate that legacy (3-arg) factories reject non-empty options. + FileSystemFactoryOptions unsupported{{"some_option", 1}}; + EXPECT_THAT(FileSystemFromUriAndOptions("slowfile3:///hey/yo", unsupported, &path), + Raises(StatusCode::NotImplemented)); +} + //////////////////////////////////////////////////////////////////////////// // Misc tests diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 1a488157cb05..515d8d252881 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -3609,7 +3609,7 @@ auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM( if (!options.empty()) { return Status::NotImplemented( "S3 filesystem factory options are not supported yet, got: ", options.size(), - " options"); + " option(s)"); } RETURN_NOT_OK(EnsureS3Initialized()); ARROW_ASSIGN_OR_RAISE(auto s3_options, S3Options::FromUri(uri, out_path)); diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index 08fa9326e210..da4651acdc50 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -16,7 +16,6 @@ // under the License. #include -#include #include "arrow/filesystem/filesystem.h" #include "arrow/filesystem/filesystem_library.h" From 31ee32cc27d870944924217e83b4f5482c044063 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 8 Jun 2026 17:46:44 +0200 Subject: [PATCH 11/11] GH-50126: [C++] Make S3Filesystem consume key-value options Fix linting after rebase Add s3 fs module test for FromUriAndOptionsCredentials and refactor S3Options::FromUri and S3Options::FromUriAndOptions Add tests for FromUriAndOptions credentials Add S3RetryStrategy to options Add option for default_metadata Minor fix --- cpp/src/arrow/filesystem/s3fs.cc | 128 ++++++++++++++----- cpp/src/arrow/filesystem/s3fs.h | 18 +++ cpp/src/arrow/filesystem/s3fs_module_test.cc | 47 ++++++- cpp/src/arrow/filesystem/s3fs_test.cc | 86 +++++++++++++ 4 files changed, 248 insertions(+), 31 deletions(-) diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 515d8d252881..b2a3471ece6f 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -327,7 +327,68 @@ S3Options S3Options::FromAssumeRoleWithWebIdentity() { } Result S3Options::FromUri(const Uri& uri, std::string* out_path) { - S3Options options; + return FromUriAndOptions(uri, FileSystemFactoryOptions{}, out_path); +} + +Result S3Options::FromUri(const std::string& uri_string, + std::string* out_path) { + Uri uri; + RETURN_NOT_OK(uri.Parse(uri_string)); + return FromUri(uri, out_path); +} + +namespace { + +template +Result GetOption(const std::string& key, const std::any& value) { + if (const auto* v = std::any_cast(&value)) { + return *v; + } + return Status::Invalid("S3 filesystem option '", key, "' has the wrong type"); +} + +template +Result> GetConstSharedPtrOption(const std::string& key, + const std::any& value) { + if (const auto* v = std::any_cast>(&value)) return *v; + if (const auto* v = std::any_cast>(&value)) return *v; + return Status::Invalid("S3 filesystem option '", key, "' has the wrong type"); +} + +} // namespace + +Result S3Options::FromUriAndOptions(const ::arrow::util::Uri& uri, + const FileSystemFactoryOptions& options, + std::string* out_path) { + std::optional access_key, secret_key, session_token; + std::shared_ptr retry_strategy; + std::shared_ptr default_metadata; + for (const auto& [key, value] : options) { + if (key == "access_key") { + ARROW_ASSIGN_OR_RAISE(access_key, GetOption(key, value)); + } else if (key == "secret_key") { + ARROW_ASSIGN_OR_RAISE(secret_key, GetOption(key, value)); + } else if (key == "session_token") { + ARROW_ASSIGN_OR_RAISE(session_token, GetOption(key, value)); + } else if (key == "retry_strategy") { + ARROW_ASSIGN_OR_RAISE(retry_strategy, + GetOption>(key, value)); + } else if (key == "default_metadata") { + ARROW_ASSIGN_OR_RAISE(default_metadata, + GetConstSharedPtrOption(key, value)); + } else { + return Status::Invalid("Unexpected option for S3 filesystem: '", key, "'"); + } + } + + if (access_key.has_value() != secret_key.has_value()) { + return Status::Invalid( + "Both 'access_key' and 'secret_key' must be provided together"); + } + if (session_token.has_value() && !access_key.has_value()) { + return Status::Invalid("'session_token' requires 'access_key' and 'secret_key'"); + } + S3Options s3_options; const auto bucket = uri.host(); auto path = uri.path(); @@ -355,68 +416,81 @@ Result S3Options::FromUri(const Uri& uri, std::string* out_path) { options_map.emplace(kv.first, kv.second); } - const auto username = uri.username(); - if (!username.empty()) { - options.ConfigureAccessKey(username, uri.password()); + if (access_key.has_value()) { + s3_options.ConfigureAccessKey(*access_key, *secret_key, session_token.value_or("")); } else { - options.ConfigureDefaultCredentials(); + const auto username = uri.username(); + if (!username.empty()) { + s3_options.ConfigureAccessKey(username, uri.password()); + } else { + s3_options.ConfigureDefaultCredentials(); + } } + // Prefer AWS service-specific endpoint url auto s3_endpoint_env = arrow::internal::GetEnvVar(kAwsEndpointUrlS3EnvVar); if (s3_endpoint_env.ok()) { - options.endpoint_override = *s3_endpoint_env; + s3_options.endpoint_override = *s3_endpoint_env; } else { auto endpoint_env = arrow::internal::GetEnvVar(kAwsEndpointUrlEnvVar); if (endpoint_env.ok()) { - options.endpoint_override = *endpoint_env; + s3_options.endpoint_override = *endpoint_env; } } bool region_set = false; for (const auto& kv : options_map) { if (kv.first == "region") { - options.region = kv.second; + s3_options.region = kv.second; region_set = true; } else if (kv.first == "scheme") { - options.scheme = kv.second; + s3_options.scheme = kv.second; } else if (kv.first == "endpoint_override") { - options.endpoint_override = kv.second; + s3_options.endpoint_override = kv.second; } else if (kv.first == "allow_delayed_open") { - ARROW_ASSIGN_OR_RAISE(options.allow_delayed_open, + ARROW_ASSIGN_OR_RAISE(s3_options.allow_delayed_open, ::arrow::internal::ParseBoolean(kv.second)); } else if (kv.first == "allow_bucket_creation") { - ARROW_ASSIGN_OR_RAISE(options.allow_bucket_creation, + ARROW_ASSIGN_OR_RAISE(s3_options.allow_bucket_creation, ::arrow::internal::ParseBoolean(kv.second)); } else if (kv.first == "allow_bucket_deletion") { - ARROW_ASSIGN_OR_RAISE(options.allow_bucket_deletion, + ARROW_ASSIGN_OR_RAISE(s3_options.allow_bucket_deletion, ::arrow::internal::ParseBoolean(kv.second)); } else if (kv.first == "tls_ca_file_path") { - options.tls_ca_file_path = kv.second; + s3_options.tls_ca_file_path = kv.second; } else if (kv.first == "tls_ca_dir_path") { - options.tls_ca_dir_path = kv.second; + s3_options.tls_ca_dir_path = kv.second; } else if (kv.first == "tls_verify_certificates") { - ARROW_ASSIGN_OR_RAISE(options.tls_verify_certificates, + ARROW_ASSIGN_OR_RAISE(s3_options.tls_verify_certificates, ::arrow::internal::ParseBoolean(kv.second)); } else if (kv.first == "smart_defaults") { - options.smart_defaults = kv.second; + s3_options.smart_defaults = kv.second; } else { return Status::Invalid("Unexpected query parameter in S3 URI: '", kv.first, "'"); } } - if (!region_set && !bucket.empty() && options.endpoint_override.empty()) { + if (retry_strategy) { + s3_options.retry_strategy = std::move(retry_strategy); + } + if (default_metadata) { + s3_options.default_metadata = std::move(default_metadata); + } + + if (!region_set && !bucket.empty() && s3_options.endpoint_override.empty()) { // XXX Should we use a dedicated resolver with the given credentials? - ARROW_ASSIGN_OR_RAISE(options.region, ResolveS3BucketRegion(bucket)); + ARROW_ASSIGN_OR_RAISE(s3_options.region, ResolveS3BucketRegion(bucket)); } - return options; + return s3_options; } -Result S3Options::FromUri(const std::string& uri_string, - std::string* out_path) { +Result S3Options::FromUriAndOptions(const std::string& uri_string, + const FileSystemFactoryOptions& options, + std::string* out_path) { Uri uri; RETURN_NOT_OK(uri.Parse(uri_string)); - return FromUri(uri, out_path); + return FromUriAndOptions(uri, options, out_path); } bool S3Options::Equals(const S3Options& other) const { @@ -3606,13 +3680,9 @@ auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM( [](const arrow::util::Uri& uri, const FileSystemFactoryOptions& options, const io::IOContext& io_context, std::string* out_path) -> Result> { - if (!options.empty()) { - return Status::NotImplemented( - "S3 filesystem factory options are not supported yet, got: ", options.size(), - " option(s)"); - } RETURN_NOT_OK(EnsureS3Initialized()); - ARROW_ASSIGN_OR_RAISE(auto s3_options, S3Options::FromUri(uri, out_path)); + ARROW_ASSIGN_OR_RAISE(auto s3_options, + S3Options::FromUriAndOptions(uri, options, out_path)); return S3FileSystem::Make(s3_options, io_context); }, [] { DCHECK_OK(EnsureS3Finalized()); }); diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index 158d70a93fce..b727715a62b4 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -290,6 +291,23 @@ struct ARROW_EXPORT S3Options { std::string* out_path = NULLPTR); static Result FromUri(const std::string& uri, std::string* out_path = NULLPTR); + + /// Equivalent to FromUri() with specific backend options that can't be represented + /// on the URI or are better kept out of it (such as credentials). + /// Each option is a (name, value) pair. Recognized keys: + /// - "access_key" (std::string) + /// - "secret_key" (std::string) + /// - "session_token" (std::string) + /// - "retry_strategy" (std::shared_ptr) + /// - "default_metadata" (std::shared_ptr) + /// Options take precedence over the URI; unknown keys or invalid values return + /// Status::Invalid. + static Result FromUriAndOptions(const ::arrow::util::Uri& uri, + const FileSystemFactoryOptions& options, + std::string* out_path = NULLPTR); + static Result FromUriAndOptions(const std::string& uri, + const FileSystemFactoryOptions& options, + std::string* out_path = NULLPTR); }; /// S3-backed FileSystem implementation. diff --git a/cpp/src/arrow/filesystem/s3fs_module_test.cc b/cpp/src/arrow/filesystem/s3fs_module_test.cc index c7b208192d06..f056b7fe55f0 100644 --- a/cpp/src/arrow/filesystem/s3fs_module_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_module_test.cc @@ -83,10 +83,53 @@ TEST(S3Test, FromUri) { "&allow_bucket_creation=0&allow_bucket_deletion=0"); } -TEST(S3Test, FromUriRejectsOptions) { +TEST(S3Test, FromUriAndOptionsCredentials) { + ASSERT_OK_AND_ASSIGN(auto minio, GetMinioEnv()->GetOneServer()); + std::string path; + FileSystemFactoryOptions options{ + {"access_key", std::string(minio->access_key())}, + {"secret_key", std::string(minio->secret_key())}, + }; + // Credentials supplied via options, NOT in the URI. + ASSERT_OK_AND_ASSIGN( + auto fs, + FileSystemFromUriAndOptions("s3://bucket/somedir/subdir/subfile", options, &path)); + // They crossed the module boundary and were applied -> reflected in MakeUri. + EXPECT_EQ(fs->MakeUri("/" + path), + "s3://minio:miniopass@bucket/somedir/subdir/subfile" + "?region=us-east-1&scheme=https&endpoint_override=" + "&allow_bucket_creation=0&allow_bucket_deletion=0"); +} + +namespace { +class NoopRetryStrategy : public S3RetryStrategy { + public: + bool ShouldRetry(const AWSErrorDetail&, int64_t) override { return false; } + int64_t CalculateDelayBeforeNextRetry(const AWSErrorDetail&, int64_t) override { + return 0; + } +}; +} // namespace + +TEST(S3Test, FromUriAndOptionsRetryStrategy) { + ASSERT_OK_AND_ASSIGN(auto minio, GetMinioEnv()->GetOneServer()); + FileSystemFactoryOptions options{ + {"access_key", std::string(minio->access_key())}, + {"secret_key", std::string(minio->secret_key())}, + {"retry_strategy", + std::shared_ptr(std::make_shared())}, + }; + std::string path; + ASSERT_OK_AND_ASSIGN( + auto fs, + FileSystemFromUriAndOptions("s3://bucket/somedir/subdir/subfile", options, &path)); + ASSERT_NE(fs, nullptr); +} + +TEST(S3Test, FromUriRejectsUnknownOptions) { FileSystemFactoryOptions options{{"some_option", 1}}; EXPECT_RAISES_WITH_MESSAGE_THAT( - NotImplemented, ::testing::HasSubstr("options are not supported"), + Invalid, ::testing::HasSubstr("Unexpected option"), FileSystemFromUriAndOptions("s3://bucket/key", options)); } diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 6be20603a8f3..9e1a6bc8ce85 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -398,6 +398,92 @@ TEST_F(S3OptionsTest, FromAccessKey) { ASSERT_EQ(options.GetSessionToken(), "token"); } +TEST_F(S3OptionsTest, FromUriAndOptionsCredentials) { + std::string path; + S3Options options; + FileSystemFactoryOptions kv{ + {"access_key", std::string("ak")}, + {"secret_key", std::string("sk")}, + }; + ASSERT_OK_AND_ASSIGN(options, S3Options::FromUriAndOptions("s3://", kv, &path)); + ASSERT_EQ(options.GetAccessKey(), "ak"); + ASSERT_EQ(options.GetSecretKey(), "sk"); + ASSERT_EQ(options.GetSessionToken(), ""); + + kv.push_back({"session_token", std::string("tok")}); + ASSERT_OK_AND_ASSIGN(options, S3Options::FromUriAndOptions("s3://", kv, &path)); + ASSERT_EQ(options.GetAccessKey(), "ak"); + ASSERT_EQ(options.GetSecretKey(), "sk"); + ASSERT_EQ(options.GetSessionToken(), "tok"); + + // Failure scenarios + // Pairing is correct + ASSERT_THAT( + S3Options::FromUriAndOptions("s3://", {{"access_key", std::string("ak")}}, &path), + Raises(StatusCode::Invalid, ::testing::HasSubstr("must be provided together"))); + ASSERT_THAT( + S3Options::FromUriAndOptions("s3://", {{"session_token", std::string("tok")}}, + &path), + Raises(StatusCode::Invalid, ::testing::HasSubstr("session_token' requires"))); + // unknown option key + ASSERT_THAT(S3Options::FromUriAndOptions("s3://", {{"bogus", std::string("x")}}, &path), + Raises(StatusCode::Invalid, ::testing::HasSubstr("Unexpected option"))); + // const char* is rejected. Options must be an explicit std::string + ASSERT_THAT(S3Options::FromUriAndOptions("s3://", {{"access_key", "42"}}, &path), + Raises(StatusCode::Invalid, ::testing::HasSubstr("wrong type"))); +} + +TEST_F(S3OptionsTest, FromUriAndOptionsCredentialPrecedence) { + std::string path; + S3Options options; + // options override URI userinfo when not empty + FileSystemFactoryOptions kv{ + {"access_key", std::string("opt_access_key")}, + {"secret_key", std::string("opt_secret_key")}, + }; + ASSERT_OK_AND_ASSIGN( + options, + S3Options::FromUriAndOptions( + "s3://uri_access_key:uri_secret_key@mybucket?region=us-east-1", kv, &path)); + ASSERT_EQ(options.GetAccessKey(), "opt_access_key"); + ASSERT_EQ(options.GetSecretKey(), "opt_secret_key"); + + ASSERT_OK_AND_ASSIGN( + options, + S3Options::FromUriAndOptions( + "s3://uri_access_key:uri_secret_key@mybucket?region=us-east-1", {}, &path)); + ASSERT_EQ(options.GetAccessKey(), "uri_access_key"); + ASSERT_EQ(options.GetSecretKey(), "uri_secret_key"); +} + +TEST_F(S3OptionsTest, FromUriAndOptionsRetryStrategy) { + std::string path; + S3Options options; + auto retry = S3RetryStrategy::GetAwsDefaultRetryStrategy(/*max_attempts=*/3); + FileSystemFactoryOptions kv{{"retry_strategy", retry}}; + ASSERT_OK_AND_ASSIGN(options, S3Options::FromUriAndOptions("s3://", kv, &path)); + // The exact typed object crossed through std::any unchanged. + ASSERT_EQ(options.retry_strategy, retry); + + // wrong type is rejected, same as the credential keys + ASSERT_THAT(S3Options::FromUriAndOptions("s3://", + {{"retry_strategy", std::string("x")}}, &path), + Raises(StatusCode::Invalid, ::testing::HasSubstr("wrong type"))); +} + +TEST_F(S3OptionsTest, FromUriAndOptionsDefaultMetadata) { + std::string path; + S3Options options; + auto metadata = KeyValueMetadata::Make({"Content-Type"}, {"x-arrow/test"}); + FileSystemFactoryOptions kv{{"default_metadata", metadata}}; + ASSERT_OK_AND_ASSIGN(options, S3Options::FromUriAndOptions("s3://", kv, &path)); + ASSERT_EQ(options.default_metadata, metadata); + + ASSERT_THAT(S3Options::FromUriAndOptions( + "s3://", {{"default_metadata", std::string("x")}}, &path), + Raises(StatusCode::Invalid, ::testing::HasSubstr("wrong type"))); +} + TEST_F(S3OptionsTest, FromAssumeRole) { S3Options options;