From 7447a0f0269e05a8f60dbcf45fc5cee333caa29c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 26 May 2026 13:59:21 +0200 Subject: [PATCH 01/11] GH-46369: [C++] Add key-value pairs to the FileSystemFactory interface --- cpp/src/arrow/filesystem/filesystem.cc | 28 +++++++--- cpp/src/arrow/filesystem/filesystem.h | 68 +++++++++++++++++++++++- cpp/src/arrow/filesystem/localfs_test.cc | 19 +++++-- cpp/src/arrow/filesystem/s3fs.cc | 8 +-- cpp/src/arrow/testing/examplefs.cc | 5 +- 5 files changed, 111 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index 8281bed7ce16..75b9616ff00f 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -893,10 +893,10 @@ Status LoadFileSystemFactories(const char* libpath) { namespace { -Result> FileSystemFromUriReal(const Uri& uri, - const std::string& uri_string, - const io::IOContext& io_context, - std::string* out_path) { +Result> FileSystemFromUriReal( + const Uri& uri, const std::string& uri_string, + const std::vector>& options, + const io::IOContext& io_context, std::string* out_path) { const auto scheme = uri.scheme(); { @@ -904,7 +904,7 @@ Result> FileSystemFromUriReal(const Uri& uri, auto* factory, FileSystemFactoryRegistry::GetInstance()->FactoryForScheme(scheme)); if (factory != nullptr) { - return factory->function(uri, io_context, out_path); + return factory->function(uri, options, io_context, out_path); } } @@ -962,14 +962,28 @@ Result> FileSystemFromUriReal(const Uri& uri, Result> FileSystemFromUri(const std::string& uri_string, std::string* out_path) { - return FileSystemFromUri(uri_string, io::default_io_context(), out_path); + return FileSystemFromUri(uri_string, /*options=*/{}, io::default_io_context(), + out_path); +} + +Result> FileSystemFromUri( + const std::string& uri_string, + const std::vector>& options, std::string* out_path) { + return FileSystemFromUri(uri_string, options, io::default_io_context(), out_path); } Result> FileSystemFromUri(const std::string& uri_string, const io::IOContext& io_context, std::string* out_path) { + return FileSystemFromUri(uri_string, /*options=*/{}, io_context, out_path); +} + +Result> FileSystemFromUri( + const std::string& uri_string, + const std::vector>& options, + const io::IOContext& io_context, std::string* out_path) { ARROW_ASSIGN_OR_RAISE(auto fsuri, ParseFileSystemUri(uri_string)); - return FileSystemFromUriReal(fsuri, uri_string, io_context, out_path); + return FileSystemFromUriReal(fsuri, uri_string, options, io_context, out_path); } Result> FileSystemFromUriOrPath(const std::string& uri_string, diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 3a47eb62f524..5c4fc3c42c9e 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -28,6 +29,7 @@ #include "arrow/filesystem/type_fwd.h" #include "arrow/io/interfaces.h" +#include "arrow/result.h" #include "arrow/type_fwd.h" #include "arrow/util/compare.h" #include "arrow/util/macros.h" @@ -359,11 +361,34 @@ class ARROW_EXPORT FileSystem struct FileSystemFactory { std::function>( - const Uri& uri, const io::IOContext& io_context, std::string* out_path)> + const Uri& uri, const std::vector>& options, + const io::IOContext& io_context, std::string* out_path)> function; std::string_view file; int line; + /// Construct from an options-aware factory function. + FileSystemFactory(std::function>( + const Uri&, const std::vector>&, + const io::IOContext&, std::string*)> + fn, + std::string_view file, int line) + : function(std::move(fn)), file(file), line(line) {} + + /// Construct from a non-options aware factory function maintaing source compatibility + /// with existing factories. + FileSystemFactory(std::function>( + const Uri&, const io::IOContext&, std::string*)> + fn, + std::string_view file, int line) + : function([fn = std::move(fn)]( + const Uri& uri, + const std::vector>& /*ignored*/, + const io::IOContext& ctx, + std::string* out_path) { return fn(uri, ctx, out_path); }), + file(file), + line(line) {} + bool operator==(const FileSystemFactory& other) const { // In the case where libarrow is linked statically both to the executable and to a // dynamically loaded filesystem implementation library, the library contains a @@ -547,6 +572,26 @@ ARROW_EXPORT Result> FileSystemFromUri(const std::string& uri, std::string* out_path = NULLPTR); +/// \brief Create a new FileSystem by URI with extended backend-specific filesystem +/// options +/// +/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", +/// "gs" and "gcs". +/// +/// Support for other schemes can be added using RegisterFileSystemFactory. +/// +/// \param[in] uri the URI to give access to +/// \param[in] options a list of backend-specific filesystem options +/// Each option is a (name, value) pair. +/// The expected type is specific to the backend and +/// option name. +/// \param[out] out_path (optional) Path inside the filesystem. +/// \return out_fs FileSystem instance. +ARROW_EXPORT +Result> FileSystemFromUri( + const std::string& uri, const std::vector>& options, + std::string* out_path = NULLPTR); + /// \brief Create a new FileSystem by URI with a custom IO context /// /// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", @@ -563,6 +608,27 @@ Result> FileSystemFromUri(const std::string& uri, const io::IOContext& io_context, std::string* out_path = NULLPTR); +/// \brief Create a new FileSystem by URI with a custom IO context with backend-specific +/// filesystem options +/// +/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", +/// "gs" and "gcs". +/// +/// Support for other schemes can be added using RegisterFileSystemFactory. +/// +/// \param[in] uri a URI-based path, ex: file:///some/local/path +/// \param[in] options a list of backend-specific filesystem options +/// Each option is a (name, value) pair. +/// The expected type is specific to the backend and +/// option name. +/// \param[in] io_context an IOContext which will be associated with the filesystem +/// \param[out] out_path (optional) Path inside the filesystem. +/// \return out_fs FileSystem instance. +ARROW_EXPORT +Result> FileSystemFromUri( + const std::string& uri, const std::vector>& options, + const io::IOContext& io_context, std::string* out_path = NULLPTR); + /// \brief Create a new FileSystem by URI /// /// Support for other schemes can be added using RegisterFileSystemFactory. diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 2e91783c92dc..c63f4bfbf1cd 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -171,10 +171,21 @@ TEST(FileSystemFromUri, RuntimeRegisteredFactory) { } FileSystemRegistrar kSegfaultFileSystemModule[]{ - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), -}; + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {}), + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {}), + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {})}; TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) { // Since multiple registrars are defined in this translation unit which all // register factories for the 'segfault' scheme, using that scheme in FileSystemFromUri diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 0c15f6f18444..d5eb31a16458 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -3603,11 +3603,13 @@ Result ResolveS3BucketRegion(const std::string& bucket) { auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM( "s3", - [](const arrow::util::Uri& uri, const io::IOContext& io_context, + [](const arrow::util::Uri& uri, + const std::vector>& options, + const io::IOContext& io_context, std::string* out_path) -> Result> { RETURN_NOT_OK(EnsureS3Initialized()); - ARROW_ASSIGN_OR_RAISE(auto options, S3Options::FromUri(uri, out_path)); - return S3FileSystem::Make(options, io_context); + ARROW_ASSIGN_OR_RAISE(auto s3_options, S3Options::FromUri(uri, out_path)); + return S3FileSystem::Make(s3_options, io_context); }, [] { DCHECK_OK(EnsureS3Finalized()); }); diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index 5c9d5f9d9071..eab26349e2ef 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -26,12 +26,13 @@ namespace arrow::fs { auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( "example", - [](const Uri& uri, const io::IOContext& io_context, + [](const Uri& uri, const std::vector>& options, + const io::IOContext& io_context, std::string* out_path) -> Result> { constexpr std::string_view kScheme = "example"; EXPECT_EQ(uri.scheme(), kScheme); auto local_uri = "file" + uri.ToString().substr(kScheme.size()); - return FileSystemFromUri(local_uri, io_context, out_path); + return FileSystemFromUri(local_uri, options, io_context, out_path); }, {}); From ce6aed2125777acb514ba1b171cae3c636b72370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 8 Jun 2026 11:52:02 +0200 Subject: [PATCH 02/11] Add test for extra options being passed and improve documentation --- cpp/src/arrow/filesystem/filesystem.h | 10 +++++++++- cpp/src/arrow/filesystem/localfs_test.cc | 11 ++++++++++- cpp/src/arrow/testing/examplefs.cc | 23 ++++++++++++++++++++++- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 5c4fc3c42c9e..617d061251a1 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -375,7 +375,7 @@ struct FileSystemFactory { std::string_view file, int line) : function(std::move(fn)), file(file), line(line) {} - /// Construct from a non-options aware factory function maintaing source compatibility + /// Construct from a non-options aware factory function maintaining source compatibility /// with existing factories. FileSystemFactory(std::function>( const Uri&, const io::IOContext&, std::string*)> @@ -585,6 +585,10 @@ Result> FileSystemFromUri(const std::string& uri, /// Each option is a (name, value) pair. /// The expected type is specific to the backend and /// option name. +/// Options are only forwarded to schemes dispatched through a +/// registered FileSystemFactory (currently "s3" and any scheme +/// added via RegisterFileSystemFactory). They are ignored by the +/// built-in schemes. /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT @@ -621,6 +625,10 @@ Result> FileSystemFromUri(const std::string& uri, /// Each option is a (name, value) pair. /// The expected type is specific to the backend and /// option name. +/// Options are only forwarded to schemes dispatched through a +/// registered FileSystemFactory (currently "s3" and any scheme +/// added via RegisterFileSystemFactory). They are ignored by the +/// built-in schemes. /// \param[in] io_context an IOContext which will be associated with the filesystem /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index c63f4bfbf1cd..47cb9e8adbb4 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -144,10 +145,18 @@ TEST(FileSystemFromUri, LoadedRegisteredFactory) { EXPECT_THAT(FileSystemFromUri("example:///hey/yo", &path), Raises(StatusCode::Invalid)); EXPECT_THAT(LoadFileSystemFactories(ARROW_FILESYSTEM_EXAMPLE_LIBPATH), Ok()); - ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("example:///hey/yo", &path)); EXPECT_EQ(path, "/hey/yo"); EXPECT_EQ(fs->type_name(), "local"); + + // Validate extra options are forwarded to the factory. + std::vector> options{ + {"example_option_string", std::string("example_value")}, + {"example_option_int", 42}, + }; + ASSERT_OK_AND_ASSIGN(fs, FileSystemFromUri("example:///hey/yo", options, &path)); + EXPECT_EQ(path, "/hey/yo/example_value/42"); + EXPECT_EQ(fs->type_name(), "local"); } TEST(FileSystemFromUri, RuntimeRegisteredFactory) { diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index eab26349e2ef..2a1d07a8c4f6 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +#include +#include + #include "arrow/filesystem/filesystem.h" #include "arrow/filesystem/filesystem_library.h" #include "arrow/result.h" @@ -32,7 +35,25 @@ auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( constexpr std::string_view kScheme = "example"; EXPECT_EQ(uri.scheme(), kScheme); auto local_uri = "file" + uri.ToString().substr(kScheme.size()); - return FileSystemFromUri(local_uri, options, io_context, out_path); + ARROW_ASSIGN_OR_RAISE(auto fs, + FileSystemFromUri(local_uri, options, io_context, out_path)); + for (const auto& [key, value] : options) { + EXPECT_TRUE(value.has_value()); + if (key == "example_option_string") { + EXPECT_TRUE(value.type() == typeid(std::string)); + if (out_path != nullptr) { + *out_path += "/" + std::any_cast(value); + } + } else if (key == "example_option_int") { + EXPECT_TRUE(value.type() == typeid(int)); + if (out_path != nullptr) { + *out_path += "/" + std::to_string(std::any_cast(value)); + } + } else { + ADD_FAILURE() << "Unexpected option: " << key; + } + } + return fs; }, {}); From 426ad3b42948e7b5a0c5b43ae889d997d59b858a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 8 Jun 2026 12:45:15 +0200 Subject: [PATCH 03/11] Reject non-empty options, add tests to validate it and add missing plumbing for ARROW_S3_MODULE to CI --- ci/scripts/cpp_build.sh | 2 ++ cpp/src/arrow/filesystem/filesystem.cc | 4 ++++ cpp/src/arrow/filesystem/filesystem_test.cc | 6 ++++++ cpp/src/arrow/filesystem/s3fs.cc | 5 +++++ cpp/src/arrow/filesystem/s3fs_module_test.cc | 8 ++++++++ 5 files changed, 25 insertions(+) diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 1e2f3e8f8f1a..3d9b2ba72d41 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -70,6 +70,7 @@ if [ "${ARROW_ENABLE_THREADING:-ON}" = "OFF" ]; then ARROW_JEMALLOC=OFF ARROW_MIMALLOC=OFF ARROW_S3=OFF + ARROW_S3_MODULE=OFF ARROW_WITH_OPENTELEMETRY=OFF fi @@ -229,6 +230,7 @@ else -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ -DARROW_S3=${ARROW_S3:-OFF} \ + -DARROW_S3_MODULE=${ARROW_S3_MODULE:-OFF} \ -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \ -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \ -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index 75b9616ff00f..354c2d156b29 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -907,6 +907,10 @@ Result> FileSystemFromUriReal( return factory->function(uri, options, io_context, out_path); } } + if (!options.empty()) { + return Status::NotImplemented("Filesystem options are not supported yet for scheme '", + scheme, "', got ", options.size(), " option(s)"); + } if (scheme == "abfs" || scheme == "abfss") { #ifdef ARROW_AZURE diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc index 5072c3a8c25b..c874af96a667 100644 --- a/cpp/src/arrow/filesystem/filesystem_test.cc +++ b/cpp/src/arrow/filesystem/filesystem_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -640,6 +641,11 @@ TEST_F(TestMockFS, FileSystemFromUri) { Invalid, ::testing::HasSubstr("syntax error at character ' ' (position 12)"), FileSystemFromUri("mock:/folder name/bar", &path)); CheckDirs({}); + std::vector> options{{"some_option", 1}}; + EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, + ::testing::HasSubstr("options are not supported"), + FileSystemFromUri("mock:///foo/bar", options, &path)); + CheckDirs({}); } //////////////////////////////////////////////////////////////////////////// diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index d5eb31a16458..639e3037f395 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -3607,6 +3607,11 @@ auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM( const std::vector>& options, const io::IOContext& io_context, std::string* out_path) -> Result> { + if (!options.empty()) { + return Status::NotImplemented( + "S3 filesystem factory options are not supported yet, got: ", options.size(), + " options"); + } RETURN_NOT_OK(EnsureS3Initialized()); ARROW_ASSIGN_OR_RAISE(auto s3_options, S3Options::FromUri(uri, out_path)); return S3FileSystem::Make(s3_options, io_context); diff --git a/cpp/src/arrow/filesystem/s3fs_module_test.cc b/cpp/src/arrow/filesystem/s3fs_module_test.cc index 987a8979b271..345562ac64d9 100644 --- a/cpp/src/arrow/filesystem/s3fs_module_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_module_test.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include #include @@ -82,4 +83,11 @@ TEST(S3Test, FromUri) { "&allow_bucket_creation=0&allow_bucket_deletion=0"); } +TEST(S3Test, FromUriRejectsOptions) { + std::vector> options{{"some_option", 1}}; + EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, + ::testing::HasSubstr("options are not supported"), + FileSystemFromUri("s3://bucket/key", options)); +} + } // namespace arrow::fs From 20069f4762ed921f0f2b83d69114b433bcdb4d0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 8 Jun 2026 13:21:24 +0200 Subject: [PATCH 04/11] Fix CI and minor improvement on docstring --- cpp/src/arrow/filesystem/CMakeLists.txt | 2 +- cpp/src/arrow/filesystem/filesystem.h | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index e6330df42603..ee46f4d256ce 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -128,7 +128,7 @@ if(ARROW_S3) endif() endif() - if(ARROW_S3_MODULE) + if(ARROW_S3_MODULE AND ARROW_BUILD_TESTS) add_arrow_test(s3fs_module_test SOURCES s3fs_module_test.cc diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 617d061251a1..2f334edbc498 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -585,10 +585,11 @@ Result> FileSystemFromUri(const std::string& uri, /// Each option is a (name, value) pair. /// The expected type is specific to the backend and /// option name. -/// Options are only forwarded to schemes dispatched through a -/// registered FileSystemFactory (currently "s3" and any scheme -/// added via RegisterFileSystemFactory). They are ignored by the -/// built-in schemes. +/// Options are forwarded to schemes dispatched through a registered +/// FileSystemFactory. A registered factory that does not support options +/// (currently "s3") returns NotImplemented for non-empty options. +/// Schemes not handled by a registered factory will also return +/// NotImplemented for non-empty options. /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT @@ -625,10 +626,11 @@ Result> FileSystemFromUri(const std::string& uri, /// Each option is a (name, value) pair. /// The expected type is specific to the backend and /// option name. -/// Options are only forwarded to schemes dispatched through a -/// registered FileSystemFactory (currently "s3" and any scheme -/// added via RegisterFileSystemFactory). They are ignored by the -/// built-in schemes. +/// Options are forwarded to schemes dispatched through a registered +/// FileSystemFactory. A registered factory that does not support options +/// (currently "s3") returns NotImplemented for non-empty options. +/// Schemes not handled by a registered factory will also return +/// NotImplemented for non-empty options. /// \param[in] io_context an IOContext which will be associated with the filesystem /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. From ce9fd8a007ff38f4622948c690d461e99ca2e21b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 9 Jun 2026 12:47:48 +0200 Subject: [PATCH 05/11] Add test for typed option to validate std::any_cast cross DSO --- cpp/src/arrow/filesystem/localfs_test.cc | 11 ++++++++++- cpp/src/arrow/testing/examplefs.cc | 10 ++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 47cb9e8adbb4..bff1a73acda6 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -30,6 +30,7 @@ #include "arrow/filesystem/path_util.h" #include "arrow/filesystem/test_util.h" #include "arrow/filesystem/util_internal.h" +#include "arrow/testing/examplefs.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" #include "arrow/util/io_util.h" @@ -84,6 +85,12 @@ Result> FSFromUriOrPath(const std::string& uri, //////////////////////////////////////////////////////////////////////////// // Registered FileSystemFactory tests +struct ConcreteTypedOption : ExampleTypedOption { + explicit ConcreteTypedOption(int value) : value_(value) {} + int value() const override { return value_; } + int value_; +}; + class SlowFileSystemPublicProps : public SlowFileSystem { public: SlowFileSystemPublicProps(std::shared_ptr base_fs, double average_latency, @@ -153,9 +160,11 @@ TEST(FileSystemFromUri, LoadedRegisteredFactory) { std::vector> options{ {"example_option_string", std::string("example_value")}, {"example_option_int", 42}, + {"example_typed_option", + std::shared_ptr(std::make_shared(12345))}, }; ASSERT_OK_AND_ASSIGN(fs, FileSystemFromUri("example:///hey/yo", options, &path)); - EXPECT_EQ(path, "/hey/yo/example_value/42"); + EXPECT_EQ(path, "/hey/yo/example_value/42/12345"); EXPECT_EQ(fs->type_name(), "local"); } diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index 2a1d07a8c4f6..819f12a2abd7 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -21,6 +21,7 @@ #include "arrow/filesystem/filesystem.h" #include "arrow/filesystem/filesystem_library.h" #include "arrow/result.h" +#include "arrow/testing/examplefs.h" #include "arrow/util/uri.h" #include @@ -49,6 +50,15 @@ auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( if (out_path != nullptr) { *out_path += "/" + std::to_string(std::any_cast(value)); } + } else if (key == "example_typed_option") { + if (const auto* opt = + std::any_cast>(&value)) { + if (out_path != nullptr) { + *out_path += "/" + std::to_string((*opt)->value()); + } + } else if (out_path != nullptr) { + *out_path += "/typed_cast_failed"; + } } else { ADD_FAILURE() << "Unexpected option: " << key; } From 94142b282d953da89174a9c5c4cc43c7be51f957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 9 Jun 2026 12:52:23 +0200 Subject: [PATCH 06/11] Add missing new header file from commit :) --- cpp/src/arrow/testing/examplefs.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 cpp/src/arrow/testing/examplefs.h diff --git a/cpp/src/arrow/testing/examplefs.h b/cpp/src/arrow/testing/examplefs.h new file mode 100644 index 000000000000..e04b84c982ad --- /dev/null +++ b/cpp/src/arrow/testing/examplefs.h @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +namespace arrow::fs { + +class ExampleTypedOption { + public: + virtual ~ExampleTypedOption() = default; + virtual int value() const = 0; +}; + +} // namespace arrow::fs From ddac9dfdf39b43b122933218840d08602f370d2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 9 Jun 2026 15:09:22 +0200 Subject: [PATCH 07/11] Several review fixes, rename to FileSystemFromUriAndOptions instead of FileSystemFromUri, do not silently ignore non-empty options if underlying FS does not support them and use FileSystemFactoryOptions instead of std::vector> everywhere --- cpp/src/arrow/filesystem/filesystem.cc | 24 ++++++++--------- cpp/src/arrow/filesystem/filesystem.h | 27 ++++++++++++-------- cpp/src/arrow/filesystem/filesystem_test.cc | 8 +++--- cpp/src/arrow/filesystem/localfs_test.cc | 5 ++-- cpp/src/arrow/filesystem/s3fs.cc | 3 +-- cpp/src/arrow/filesystem/s3fs_module_test.cc | 8 +++--- cpp/src/arrow/testing/examplefs.cc | 5 ++-- 7 files changed, 43 insertions(+), 37 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index 354c2d156b29..f92336e004ea 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -895,8 +895,8 @@ namespace { Result> FileSystemFromUriReal( const Uri& uri, const std::string& uri_string, - const std::vector>& options, - const io::IOContext& io_context, std::string* out_path) { + const FileSystemFactoryOptions& options, const io::IOContext& io_context, + std::string* out_path) { const auto scheme = uri.scheme(); { @@ -966,25 +966,25 @@ Result> FileSystemFromUriReal( Result> FileSystemFromUri(const std::string& uri_string, std::string* out_path) { - return FileSystemFromUri(uri_string, /*options=*/{}, io::default_io_context(), - out_path); + return FileSystemFromUriAndOptions(uri_string, /*options=*/{}, io::default_io_context(), + out_path); } -Result> FileSystemFromUri( - const std::string& uri_string, - const std::vector>& options, std::string* out_path) { - return FileSystemFromUri(uri_string, options, io::default_io_context(), out_path); +Result> FileSystemFromUriAndOptions( + const std::string& uri_string, const FileSystemFactoryOptions& options, + std::string* out_path) { + return FileSystemFromUriAndOptions(uri_string, options, io::default_io_context(), + out_path); } Result> FileSystemFromUri(const std::string& uri_string, const io::IOContext& io_context, std::string* out_path) { - return FileSystemFromUri(uri_string, /*options=*/{}, io_context, out_path); + return FileSystemFromUriAndOptions(uri_string, /*options=*/{}, io_context, out_path); } -Result> FileSystemFromUri( - const std::string& uri_string, - const std::vector>& options, +Result> FileSystemFromUriAndOptions( + const std::string& uri_string, const FileSystemFactoryOptions& options, const io::IOContext& io_context, std::string* out_path) { ARROW_ASSIGN_OR_RAISE(auto fsuri, ParseFileSystemUri(uri_string)); return FileSystemFromUriReal(fsuri, uri_string, options, io_context, out_path); diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 2f334edbc498..97a1a68a094a 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -359,9 +359,11 @@ class ARROW_EXPORT FileSystem bool default_async_is_sync_ = true; }; +using FileSystemFactoryOptions = std::vector>; + struct FileSystemFactory { std::function>( - const Uri& uri, const std::vector>& options, + const Uri& uri, const FileSystemFactoryOptions& options, const io::IOContext& io_context, std::string* out_path)> function; std::string_view file; @@ -369,8 +371,8 @@ struct FileSystemFactory { /// Construct from an options-aware factory function. FileSystemFactory(std::function>( - const Uri&, const std::vector>&, - const io::IOContext&, std::string*)> + const Uri&, const FileSystemFactoryOptions&, const io::IOContext&, + std::string*)> fn, std::string_view file, int line) : function(std::move(fn)), file(file), line(line) {} @@ -382,10 +384,15 @@ struct FileSystemFactory { fn, std::string_view file, int line) : function([fn = std::move(fn)]( - const Uri& uri, - const std::vector>& /*ignored*/, + const Uri& uri, const FileSystemFactoryOptions& options, const io::IOContext& ctx, - std::string* out_path) { return fn(uri, ctx, out_path); }), + std::string* out_path) -> Result> { + if (!options.empty()) { + return Status::NotImplemented( + "This filesystem does not support additional options"); + } + return fn(uri, ctx, out_path); + }), file(file), line(line) {} @@ -593,8 +600,8 @@ Result> FileSystemFromUri(const std::string& uri, /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT -Result> FileSystemFromUri( - const std::string& uri, const std::vector>& options, +Result> FileSystemFromUriAndOptions( + const std::string& uri, const FileSystemFactoryOptions& options, std::string* out_path = NULLPTR); /// \brief Create a new FileSystem by URI with a custom IO context @@ -635,8 +642,8 @@ Result> FileSystemFromUri(const std::string& uri, /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT -Result> FileSystemFromUri( - const std::string& uri, const std::vector>& options, +Result> FileSystemFromUriAndOptions( + const std::string& uri, const FileSystemFactoryOptions& options, const io::IOContext& io_context, std::string* out_path = NULLPTR); /// \brief Create a new FileSystem by URI diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc index c874af96a667..10a3922e36ac 100644 --- a/cpp/src/arrow/filesystem/filesystem_test.cc +++ b/cpp/src/arrow/filesystem/filesystem_test.cc @@ -641,10 +641,10 @@ TEST_F(TestMockFS, FileSystemFromUri) { Invalid, ::testing::HasSubstr("syntax error at character ' ' (position 12)"), FileSystemFromUri("mock:/folder name/bar", &path)); CheckDirs({}); - std::vector> options{{"some_option", 1}}; - EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, - ::testing::HasSubstr("options are not supported"), - FileSystemFromUri("mock:///foo/bar", options, &path)); + FileSystemFactoryOptions options{{"some_option", 1}}; + EXPECT_RAISES_WITH_MESSAGE_THAT( + NotImplemented, ::testing::HasSubstr("options are not supported"), + FileSystemFromUriAndOptions("mock:///foo/bar", options, &path)); CheckDirs({}); } diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index bff1a73acda6..2dc6fe97d595 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -157,13 +157,14 @@ TEST(FileSystemFromUri, LoadedRegisteredFactory) { EXPECT_EQ(fs->type_name(), "local"); // Validate extra options are forwarded to the factory. - std::vector> options{ + FileSystemFactoryOptions options{ {"example_option_string", std::string("example_value")}, {"example_option_int", 42}, {"example_typed_option", std::shared_ptr(std::make_shared(12345))}, }; - ASSERT_OK_AND_ASSIGN(fs, FileSystemFromUri("example:///hey/yo", options, &path)); + ASSERT_OK_AND_ASSIGN(fs, + FileSystemFromUriAndOptions("example:///hey/yo", options, &path)); EXPECT_EQ(path, "/hey/yo/example_value/42/12345"); EXPECT_EQ(fs->type_name(), "local"); } diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 639e3037f395..1a488157cb05 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -3603,8 +3603,7 @@ Result ResolveS3BucketRegion(const std::string& bucket) { auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM( "s3", - [](const arrow::util::Uri& uri, - const std::vector>& options, + [](const arrow::util::Uri& uri, const FileSystemFactoryOptions& options, const io::IOContext& io_context, std::string* out_path) -> Result> { if (!options.empty()) { diff --git a/cpp/src/arrow/filesystem/s3fs_module_test.cc b/cpp/src/arrow/filesystem/s3fs_module_test.cc index 345562ac64d9..c7b208192d06 100644 --- a/cpp/src/arrow/filesystem/s3fs_module_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_module_test.cc @@ -84,10 +84,10 @@ TEST(S3Test, FromUri) { } TEST(S3Test, FromUriRejectsOptions) { - std::vector> options{{"some_option", 1}}; - EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, - ::testing::HasSubstr("options are not supported"), - FileSystemFromUri("s3://bucket/key", options)); + FileSystemFactoryOptions options{{"some_option", 1}}; + EXPECT_RAISES_WITH_MESSAGE_THAT( + NotImplemented, ::testing::HasSubstr("options are not supported"), + FileSystemFromUriAndOptions("s3://bucket/key", options)); } } // namespace arrow::fs diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index 819f12a2abd7..cbe087a6d280 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -30,14 +30,13 @@ namespace arrow::fs { auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( "example", - [](const Uri& uri, const std::vector>& options, + [](const Uri& uri, const FileSystemFactoryOptions& options, const io::IOContext& io_context, std::string* out_path) -> Result> { constexpr std::string_view kScheme = "example"; EXPECT_EQ(uri.scheme(), kScheme); auto local_uri = "file" + uri.ToString().substr(kScheme.size()); - ARROW_ASSIGN_OR_RAISE(auto fs, - FileSystemFromUri(local_uri, options, io_context, out_path)); + ARROW_ASSIGN_OR_RAISE(auto fs, FileSystemFromUri(local_uri, io_context, out_path)); for (const auto& [key, value] : options) { EXPECT_TRUE(value.has_value()); if (key == "example_option_string") { From eafbcd17d34a1859adbe3bb4fa30e46c7f47e0e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 9 Jun 2026 15:25:28 +0200 Subject: [PATCH 08/11] A couple more review fixes to make copilot happy --- cpp/src/arrow/filesystem/filesystem.h | 15 ++++++--------- cpp/src/arrow/testing/examplefs.cc | 14 ++++++++------ 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 97a1a68a094a..639c0ba8d4c3 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -593,10 +593,9 @@ Result> FileSystemFromUri(const std::string& uri, /// The expected type is specific to the backend and /// option name. /// Options are forwarded to schemes dispatched through a registered -/// FileSystemFactory. A registered factory that does not support options -/// (currently "s3") returns NotImplemented for non-empty options. -/// Schemes not handled by a registered factory will also return -/// NotImplemented for non-empty options. +/// FileSystemFactory. Non-empty options return NotImplemented for a registered +/// FileSystemFactory that does not support them or for schemes not handled by +/// a registered factory. /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT @@ -634,11 +633,9 @@ Result> FileSystemFromUri(const std::string& uri, /// The expected type is specific to the backend and /// option name. /// Options are forwarded to schemes dispatched through a registered -/// FileSystemFactory. A registered factory that does not support options -/// (currently "s3") returns NotImplemented for non-empty options. -/// Schemes not handled by a registered factory will also return -/// NotImplemented for non-empty options. -/// \param[in] io_context an IOContext which will be associated with the filesystem +/// FileSystemFactory. Non-empty options return NotImplemented for a registered +/// FileSystemFactory that does not support them or for schemes not handled by +/// a registered factory. /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index cbe087a6d280..08fa9326e210 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -40,14 +40,16 @@ auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( for (const auto& [key, value] : options) { EXPECT_TRUE(value.has_value()); if (key == "example_option_string") { - EXPECT_TRUE(value.type() == typeid(std::string)); - if (out_path != nullptr) { - *out_path += "/" + std::any_cast(value); + if (const auto* s = std::any_cast(&value)) { + if (out_path != nullptr) *out_path += "/" + *s; + } else { + ADD_FAILURE() << "example_option_string has wrong type"; } } else if (key == "example_option_int") { - EXPECT_TRUE(value.type() == typeid(int)); - if (out_path != nullptr) { - *out_path += "/" + std::to_string(std::any_cast(value)); + if (const auto* i = std::any_cast(&value)) { + if (out_path != nullptr) *out_path += "/" + std::to_string(*i); + } else { + ADD_FAILURE() << "example_option_int has wrong type"; } } else if (key == "example_typed_option") { if (const auto* opt = From 5c898f480a0fa2590e89049a8d56506c95ab7b92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 9 Jun 2026 15:38:09 +0200 Subject: [PATCH 09/11] Re-add wrongly removed doxygen param due to copy&paste error --- cpp/src/arrow/filesystem/filesystem.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 639c0ba8d4c3..250dfdc82148 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -636,6 +636,7 @@ Result> FileSystemFromUri(const std::string& uri, /// FileSystemFactory. Non-empty options return NotImplemented for a registered /// FileSystemFactory that does not support them or for schemes not handled by /// a registered factory. +/// \param[in] io_context an IOContext which will be associated with the filesystem /// \param[out] out_path (optional) Path inside the filesystem. /// \return out_fs FileSystem instance. ARROW_EXPORT From c9c887cc0692810dd90feac87eb131a16dbaee51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 9 Jun 2026 16:27:37 +0200 Subject: [PATCH 10/11] Remove unnecessary include, update a couple of error messages, re-org tests for FileSystemFromUriAndOptions and add a new one to test old behaviour --- cpp/src/arrow/filesystem/filesystem.h | 3 +- cpp/src/arrow/filesystem/localfs_test.cc | 52 ++++++++++++++++++------ cpp/src/arrow/filesystem/s3fs.cc | 2 +- cpp/src/arrow/testing/examplefs.cc | 1 - 4 files changed, 43 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 250dfdc82148..a0478763a463 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -389,7 +389,8 @@ struct FileSystemFactory { std::string* out_path) -> Result> { if (!options.empty()) { return Status::NotImplemented( - "This filesystem does not support additional options"); + "Filesystem factory does not support additional options, got ", + options.size(), " option(s)"); } return fn(uri, ctx, out_path); }), diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 2dc6fe97d595..212d91989659 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -155,18 +155,6 @@ TEST(FileSystemFromUri, LoadedRegisteredFactory) { ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("example:///hey/yo", &path)); EXPECT_EQ(path, "/hey/yo"); EXPECT_EQ(fs->type_name(), "local"); - - // Validate extra options are forwarded to the factory. - FileSystemFactoryOptions options{ - {"example_option_string", std::string("example_value")}, - {"example_option_int", 42}, - {"example_typed_option", - std::shared_ptr(std::make_shared(12345))}, - }; - ASSERT_OK_AND_ASSIGN(fs, - FileSystemFromUriAndOptions("example:///hey/yo", options, &path)); - EXPECT_EQ(path, "/hey/yo/example_value/42/12345"); - EXPECT_EQ(fs->type_name(), "local"); } TEST(FileSystemFromUri, RuntimeRegisteredFactory) { @@ -215,6 +203,46 @@ TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) { // other schemes are not affected by the collision EXPECT_THAT(FileSystemFromUri("slowfile:///hey/yo", &path), Ok()); } + +TEST(FileSystemFromUriAndOptions, LoadedRegisteredFactory) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Emscripten dynamic library testing disabled"; +#endif + std::string path; + EXPECT_THAT(LoadFileSystemFactories(ARROW_FILESYSTEM_EXAMPLE_LIBPATH), Ok()); + // Validate extra options are forwarded to the factory. + FileSystemFactoryOptions options{ + {"example_option_string", std::string("example_value")}, + {"example_option_int", 42}, + {"example_typed_option", + std::shared_ptr(std::make_shared(12345))}, + }; + ASSERT_OK_AND_ASSIGN(auto fs, + FileSystemFromUriAndOptions("example:///hey/yo", options, &path)); + EXPECT_EQ(path, "/hey/yo/example_value/42/12345"); + EXPECT_EQ(fs->type_name(), "local"); +} + +TEST(FileSystemFromUriAndOptions, RuntimeRegisteredFactory) { + std::string path; + EXPECT_THAT(FileSystemFromUriAndOptions("slowfile3:///hey/yo", {}, &path), + Raises(StatusCode::Invalid)); + + EXPECT_THAT( + RegisterFileSystemFactory("slowfile3", {SlowFileSystemFactory, __FILE__, __LINE__}), + Ok()); + + ASSERT_OK_AND_ASSIGN(auto fs, + FileSystemFromUriAndOptions("slowfile3:///hey/yo", {}, &path)); + EXPECT_EQ(path, "/hey/yo"); + EXPECT_EQ(fs->type_name(), "slow"); + + // Validate that legacy (3-arg) factories reject non-empty options. + FileSystemFactoryOptions unsupported{{"some_option", 1}}; + EXPECT_THAT(FileSystemFromUriAndOptions("slowfile3:///hey/yo", unsupported, &path), + Raises(StatusCode::NotImplemented)); +} + //////////////////////////////////////////////////////////////////////////// // Misc tests diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 1a488157cb05..515d8d252881 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -3609,7 +3609,7 @@ auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM( if (!options.empty()) { return Status::NotImplemented( "S3 filesystem factory options are not supported yet, got: ", options.size(), - " options"); + " option(s)"); } RETURN_NOT_OK(EnsureS3Initialized()); ARROW_ASSIGN_OR_RAISE(auto s3_options, S3Options::FromUri(uri, out_path)); diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index 08fa9326e210..da4651acdc50 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -16,7 +16,6 @@ // under the License. #include -#include #include "arrow/filesystem/filesystem.h" #include "arrow/filesystem/filesystem_library.h" From acfc93977623148ca495decf6ef31c0a5a0d32d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 10 Jun 2026 16:27:26 +0200 Subject: [PATCH 11/11] Add binding validations for passing std::any around --- c_glib/arrow-glib/file-system.cpp | 36 ++++++++++++++++ c_glib/arrow-glib/file-system.h | 7 ++++ c_glib/test/test-mock-file-system.rb | 7 ++++ cpp/src/arrow/filesystem/filesystem.cc | 17 ++++++++ cpp/src/arrow/filesystem/filesystem.h | 12 ++++++ python/pyarrow/_fs.pyx | 18 ++++++++ python/pyarrow/fs.py | 1 + python/pyarrow/includes/common.pxd | 1 + python/pyarrow/includes/libarrow_fs.pxd | 6 +++ python/pyarrow/tests/test_fs.py | 9 +++- r/R/arrowExports.R | 20 +++++---- r/src/arrowExports.cpp | 55 +++++++++++++++---------- r/src/filesystem.cpp | 11 +++++ r/tests/testthat/test-filesystem.R | 7 ++++ 14 files changed, 176 insertions(+), 31 deletions(-) diff --git a/c_glib/arrow-glib/file-system.cpp b/c_glib/arrow-glib/file-system.cpp index 9ba494e40595..0c8bc206bb01 100644 --- a/c_glib/arrow-glib/file-system.cpp +++ b/c_glib/arrow-glib/file-system.cpp @@ -640,6 +640,42 @@ garrow_file_system_create(const gchar *uri, GError **error) } } +/** + * garrow_file_system_example_accept_options: + * @string_value: A string option value. + * @int_value: An integer option value. + * @typed_value: An integer used to build a typed option value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * This is a showcase demonstrating that backend-specific options, stored + * as `std::any` in #arrow::fs::FileSystemFactoryOptions, can be built from + * the C GLib bindings and consumed by Arrow C++. + * + * Returns: (nullable) (transfer full): A string describing the options + * received by Arrow C++, or %NULL on error. + * + * Since: 25.0.0 + */ +gchar * +garrow_file_system_example_accept_options(const gchar *string_value, + gint int_value, + gint typed_value, + GError **error) +{ + arrow::fs::FileSystemFactoryOptions options = { + {"example_option_string", std::any{std::string(string_value)}}, + {"example_option_int", std::any{static_cast(int_value)}}, + {"example_option_typed_var", + std::any{arrow::fs::ExampleOption(static_cast(typed_value))}}, + }; + auto result = arrow::fs::ExampleAcceptOptions(options); + if (garrow::check(error, result, "[file-system][example-accept-options]")) { + return g_strdup(result->c_str()); + } else { + return NULL; + } +} + /** * garrow_file_system_get_type_name: * @file_system: A #GArrowFileSystem. diff --git a/c_glib/arrow-glib/file-system.h b/c_glib/arrow-glib/file-system.h index 9a903c6af68c..9600f17f3df6 100644 --- a/c_glib/arrow-glib/file-system.h +++ b/c_glib/arrow-glib/file-system.h @@ -100,6 +100,13 @@ struct _GArrowFileSystemClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_25_0 +gchar * +garrow_file_system_example_accept_options(const gchar *string_value, + gint int_value, + gint typed_value, + GError **error); + GARROW_AVAILABLE_IN_3_0 GArrowFileSystem * garrow_file_system_create(const gchar *uri, GError **error); diff --git a/c_glib/test/test-mock-file-system.rb b/c_glib/test/test-mock-file-system.rb index c6148d6990ee..5ac783084d8b 100644 --- a/c_glib/test/test-mock-file-system.rb +++ b/c_glib/test/test-mock-file-system.rb @@ -27,4 +27,11 @@ def setup def test_type_name assert_equal("mock", @fs.type_name) end + + def test_example_accept_options + assert_equal("example_option_string=str(hi);" + + "example_option_int=int(42);" + + "example_option_typed_var=typed(7);", + Arrow::FileSystem.example_accept_options("hi", 42, 7)) + end end diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index f92336e004ea..d5ec64768e59 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -964,6 +964,23 @@ Result> FileSystemFromUriReal( } // namespace +Result ExampleAcceptOptions(const FileSystemFactoryOptions& options) { + if (options.empty()) return Status::Invalid("no options"); + std::string out; + for (const auto& [key, value] : options) { + if (const auto* s = std::any_cast(&value)) { + out += key + "=str(" + *s + ");"; + } else if (const auto* i = std::any_cast(&value)) { + out += key + "=int(" + std::to_string(*i) + ");"; + } else if (const auto* t = std::any_cast(&value)) { + out += key + "=typed(" + std::to_string(t->value()) + ");"; + } else { + return Status::Invalid("option '", key, "' has unsupported type"); + } + } + return out; +} + Result> FileSystemFromUri(const std::string& uri_string, std::string* out_path) { return FileSystemFromUriAndOptions(uri_string, /*options=*/{}, io::default_io_context(), diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index a0478763a463..2c95fdc5fc97 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -361,6 +361,18 @@ class ARROW_EXPORT FileSystem using FileSystemFactoryOptions = std::vector>; +class ARROW_EXPORT ExampleOption { + public: + explicit ExampleOption(int value) : value_(value) {} + int value() const { return value_; } + + private: + int value_; +}; + +ARROW_EXPORT +Result ExampleAcceptOptions(const FileSystemFactoryOptions& options); + struct FileSystemFactory { std::function>( const Uri& uri, const FileSystemFactoryOptions& options, diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx index 0739b6acba32..255d4a720342 100644 --- a/python/pyarrow/_fs.pyx +++ b/python/pyarrow/_fs.pyx @@ -1651,3 +1651,21 @@ def _copy_files_selector(FileSystem source_fs, FileSelector source_sel, destination_fs.unwrap(), c_destination_base_dir, c_default_io_context(), chunk_size, use_threads, )) + + +def _example_accept_options(str value, int value_int, int value_typed): + cdef: + CFSFileSystemFactoryOptions options + pair[c_string, c_any] entry + c_string result + entry.first = tobytes("example_option_string") + entry.second = tobytes(value) + options.push_back(entry) + entry.first = tobytes("example_option_int") + entry.second = value_int + options.push_back(entry) + entry.first = tobytes("example_option_typed_var") + entry.second = CExampleOption(value_typed) + options.push_back(entry) + result = GetResultValue(CExampleAcceptOptions(options)) + return frombytes(result) diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py index 670ccaaf2455..6e81ff980aec 100644 --- a/python/pyarrow/fs.py +++ b/python/pyarrow/fs.py @@ -33,6 +33,7 @@ PyFileSystem, _copy_files, _copy_files_selector, + _example_accept_options ) # For backward compatibility. diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd index f97f2ff1c473..160584684700 100644 --- a/python/pyarrow/includes/common.pxd +++ b/python/pyarrow/includes/common.pxd @@ -20,6 +20,7 @@ from libc.stdint cimport * from libcpp cimport bool as c_bool, nullptr +from libcpp.any cimport any as c_any from libcpp.functional cimport function from libcpp.memory cimport (shared_ptr, unique_ptr, make_shared, static_pointer_cast, dynamic_pointer_cast) diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd index d18dc2d2bde1..432128ac7837 100644 --- a/python/pyarrow/includes/libarrow_fs.pxd +++ b/python/pyarrow/includes/libarrow_fs.pxd @@ -29,6 +29,12 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: CFileType_File "arrow::fs::FileType::File" CFileType_Directory "arrow::fs::FileType::Directory" + ctypedef vector[pair[c_string, c_any]] CFSFileSystemFactoryOptions "arrow::fs::FileSystemFactoryOptions" + CResult[c_string] CExampleAcceptOptions "arrow::fs::ExampleAcceptOptions"(const CFSFileSystemFactoryOptions& options) + cdef cppclass CExampleOption "arrow::fs::ExampleOption": + CExampleOption(int value) + int value() const + cdef cppclass CFileInfo "arrow::fs::FileInfo": CFileInfo() CFileInfo(CFileInfo) diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 5bf1950c0654..d3450a8d67a5 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -36,7 +36,7 @@ from pyarrow.fs import (FileType, FileInfo, FileSelector, FileSystem, LocalFileSystem, SubTreeFileSystem, _MockFileSystem, FileSystemHandler, PyFileSystem, FSSpecHandler, - copy_files) + copy_files, _example_accept_options) from pyarrow.util import find_free_port @@ -2285,3 +2285,10 @@ def test_huggingface_filesystem_from_uri(): expected_fs = PyFileSystem(FSSpecHandler(HfFileSystem())) assert fs == expected_fs assert path == "datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet" + + +def test_example_accept_options(): + result = _example_accept_options("hi", 42, 7) + expected = "example_option_string=str(hi);example_option_int=int(42);" + \ + "example_option_typed_var=typed(7);" + assert result == expected diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 52274d29f0d9..d1cdc4f43093 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1248,14 +1248,6 @@ Field__Equals <- function(field, other, check_metadata) { .Call(`_arrow_Field__Equals`, field, other, check_metadata) } -Field__nullable <- function(field) { - .Call(`_arrow_Field__nullable`, field) -} - -Field__type <- function(field) { - .Call(`_arrow_Field__type`, field) -} - Field__HasMetadata <- function(field) { .Call(`_arrow_Field__HasMetadata`, field) } @@ -1272,6 +1264,14 @@ Field__RemoveMetadata <- function(field) { .Call(`_arrow_Field__RemoveMetadata`, field) } +Field__nullable <- function(field) { + .Call(`_arrow_Field__nullable`, field) +} + +Field__type <- function(field) { + .Call(`_arrow_Field__type`, field) +} + fs___FileInfo__type <- function(x) { .Call(`_arrow_fs___FileInfo__type`, x) } @@ -1384,6 +1384,10 @@ fs___FileSystem__type_name <- function(file_system) { .Call(`_arrow_fs___FileSystem__type_name`, file_system) } +fs___example_accept_options <- function(string_value, int_value, typed_value) { + .Call(`_arrow_fs___example_accept_options`, string_value, int_value, typed_value) +} + fs___SubTreeFileSystem__create <- function(base_path, base_fs) { .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs) } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 5482c8679f68..4f33b82975f2 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -3238,22 +3238,6 @@ BEGIN_CPP11 END_CPP11 } // field.cpp -bool Field__nullable(const std::shared_ptr& field); -extern "C" SEXP _arrow_Field__nullable(SEXP field_sexp){ -BEGIN_CPP11 - arrow::r::Input&>::type field(field_sexp); - return cpp11::as_sexp(Field__nullable(field)); -END_CPP11 -} -// field.cpp -std::shared_ptr Field__type(const std::shared_ptr& field); -extern "C" SEXP _arrow_Field__type(SEXP field_sexp){ -BEGIN_CPP11 - arrow::r::Input&>::type field(field_sexp); - return cpp11::as_sexp(Field__type(field)); -END_CPP11 -} -// field.cpp bool Field__HasMetadata(const std::shared_ptr& field); extern "C" SEXP _arrow_Field__HasMetadata(SEXP field_sexp){ BEGIN_CPP11 @@ -3286,6 +3270,22 @@ BEGIN_CPP11 return cpp11::as_sexp(Field__RemoveMetadata(field)); END_CPP11 } +// field.cpp +bool Field__nullable(const std::shared_ptr& field); +extern "C" SEXP _arrow_Field__nullable(SEXP field_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type field(field_sexp); + return cpp11::as_sexp(Field__nullable(field)); +END_CPP11 +} +// field.cpp +std::shared_ptr Field__type(const std::shared_ptr& field); +extern "C" SEXP _arrow_Field__type(SEXP field_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type field(field_sexp); + return cpp11::as_sexp(Field__type(field)); +END_CPP11 +} // filesystem.cpp fs::FileType fs___FileInfo__type(const std::shared_ptr& x); extern "C" SEXP _arrow_fs___FileInfo__type(SEXP x_sexp){ @@ -3544,6 +3544,16 @@ BEGIN_CPP11 END_CPP11 } // filesystem.cpp +std::string fs___example_accept_options(const std::string& string_value, int int_value, int typed_value); +extern "C" SEXP _arrow_fs___example_accept_options(SEXP string_value_sexp, SEXP int_value_sexp, SEXP typed_value_sexp){ +BEGIN_CPP11 + arrow::r::Input::type string_value(string_value_sexp); + arrow::r::Input::type int_value(int_value_sexp); + arrow::r::Input::type typed_value(typed_value_sexp); + return cpp11::as_sexp(fs___example_accept_options(string_value, int_value, typed_value)); +END_CPP11 +} +// filesystem.cpp std::shared_ptr fs___SubTreeFileSystem__create(const std::string& base_path, const std::shared_ptr& base_fs); extern "C" SEXP _arrow_fs___SubTreeFileSystem__create(SEXP base_path_sexp, SEXP base_fs_sexp){ BEGIN_CPP11 @@ -5878,10 +5888,10 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, { "_arrow_compute__Initialize", (DL_FUNC) &_arrow_compute__Initialize, 0}, { "_arrow_RegisterScalarUDF", (DL_FUNC) &_arrow_RegisterScalarUDF, 2}, - { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, - { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, - { "_arrow_set_timezone_database", (DL_FUNC) &_arrow_set_timezone_database, 1}, - { "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, + { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, + { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, + { "_arrow_set_timezone_database", (DL_FUNC) &_arrow_set_timezone_database, 1}, + { "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, { "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, @@ -6054,12 +6064,12 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, { "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 3}, - { "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, - { "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, { "_arrow_Field__HasMetadata", (DL_FUNC) &_arrow_Field__HasMetadata, 1}, { "_arrow_Field__metadata", (DL_FUNC) &_arrow_Field__metadata, 1}, { "_arrow_Field__WithMetadata", (DL_FUNC) &_arrow_Field__WithMetadata, 2}, { "_arrow_Field__RemoveMetadata", (DL_FUNC) &_arrow_Field__RemoveMetadata, 1}, + { "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, + { "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, { "_arrow_fs___FileInfo__type", (DL_FUNC) &_arrow_fs___FileInfo__type, 1}, { "_arrow_fs___FileInfo__set_type", (DL_FUNC) &_arrow_fs___FileInfo__set_type, 2}, { "_arrow_fs___FileInfo__path", (DL_FUNC) &_arrow_fs___FileInfo__path, 1}, @@ -6088,6 +6098,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_fs___FileSystem__OpenOutputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenOutputStream, 2}, { "_arrow_fs___FileSystem__OpenAppendStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenAppendStream, 2}, { "_arrow_fs___FileSystem__type_name", (DL_FUNC) &_arrow_fs___FileSystem__type_name, 1}, + { "_arrow_fs___example_accept_options", (DL_FUNC) &_arrow_fs___example_accept_options, 3}, { "_arrow_fs___SubTreeFileSystem__create", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__create, 2}, { "_arrow_fs___SubTreeFileSystem__base_fs", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_fs, 1}, { "_arrow_fs___SubTreeFileSystem__base_path", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_path, 1}, diff --git a/r/src/filesystem.cpp b/r/src/filesystem.cpp index 82cf99514d8c..f9550e5386b3 100644 --- a/r/src/filesystem.cpp +++ b/r/src/filesystem.cpp @@ -238,6 +238,17 @@ std::string fs___FileSystem__type_name( return file_system->type_name(); } +// [[arrow::export]] +std::string fs___example_accept_options(const std::string& string_value, int int_value, + int typed_value) { + fs::FileSystemFactoryOptions options = { + {"example_option_string", std::any{string_value}}, + {"example_option_int", std::any{int_value}}, + {"example_option_typed_var", std::any{fs::ExampleOption(typed_value)}}, + }; + return ValueOrStop(fs::ExampleAcceptOptions(options)); +} + // [[arrow::export]] std::shared_ptr fs___SubTreeFileSystem__create( const std::string& base_path, const std::shared_ptr& base_fs) { diff --git a/r/tests/testthat/test-filesystem.R b/r/tests/testthat/test-filesystem.R index 08715584467e..a69a6a67e6ee 100644 --- a/r/tests/testthat/test-filesystem.R +++ b/r/tests/testthat/test-filesystem.R @@ -202,3 +202,10 @@ test_that("gs_bucket", { ) expect_identical(bucket$base_path, "arrow-datasets/") }) + +test_that("ExampleAcceptOptions round-trips std::any options", { + expect_equal( + fs___example_accept_options("hi", 42L, 7L), + "example_option_string=str(hi);example_option_int=int(42);example_option_typed_var=typed(7);" + ) +})