diff --git a/c_glib/arrow-glib/file-system.cpp b/c_glib/arrow-glib/file-system.cpp index 9ba494e40595..0c8bc206bb01 100644 --- a/c_glib/arrow-glib/file-system.cpp +++ b/c_glib/arrow-glib/file-system.cpp @@ -640,6 +640,42 @@ garrow_file_system_create(const gchar *uri, GError **error) } } +/** + * garrow_file_system_example_accept_options: + * @string_value: A string option value. + * @int_value: An integer option value. + * @typed_value: An integer used to build a typed option value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * This is a showcase demonstrating that backend-specific options, stored + * as `std::any` in #arrow::fs::FileSystemFactoryOptions, can be built from + * the C GLib bindings and consumed by Arrow C++. + * + * Returns: (nullable) (transfer full): A string describing the options + * received by Arrow C++, or %NULL on error. + * + * Since: 25.0.0 + */ +gchar * +garrow_file_system_example_accept_options(const gchar *string_value, + gint int_value, + gint typed_value, + GError **error) +{ + arrow::fs::FileSystemFactoryOptions options = { + {"example_option_string", std::any{std::string(string_value)}}, + {"example_option_int", std::any{static_cast(int_value)}}, + {"example_option_typed_var", + std::any{arrow::fs::ExampleOption(static_cast(typed_value))}}, + }; + auto result = arrow::fs::ExampleAcceptOptions(options); + if (garrow::check(error, result, "[file-system][example-accept-options]")) { + return g_strdup(result->c_str()); + } else { + return NULL; + } +} + /** * garrow_file_system_get_type_name: * @file_system: A #GArrowFileSystem. diff --git a/c_glib/arrow-glib/file-system.h b/c_glib/arrow-glib/file-system.h index 9a903c6af68c..9600f17f3df6 100644 --- a/c_glib/arrow-glib/file-system.h +++ b/c_glib/arrow-glib/file-system.h @@ -100,6 +100,13 @@ struct _GArrowFileSystemClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_25_0 +gchar * +garrow_file_system_example_accept_options(const gchar *string_value, + gint int_value, + gint typed_value, + GError **error); + GARROW_AVAILABLE_IN_3_0 GArrowFileSystem * garrow_file_system_create(const gchar *uri, GError **error); diff --git a/c_glib/test/test-mock-file-system.rb b/c_glib/test/test-mock-file-system.rb index c6148d6990ee..5ac783084d8b 100644 --- a/c_glib/test/test-mock-file-system.rb +++ b/c_glib/test/test-mock-file-system.rb @@ -27,4 +27,11 @@ def setup def test_type_name assert_equal("mock", @fs.type_name) end + + def test_example_accept_options + assert_equal("example_option_string=str(hi);" + + "example_option_int=int(42);" + + "example_option_typed_var=typed(7);", + Arrow::FileSystem.example_accept_options("hi", 42, 7)) + end end diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 1e2f3e8f8f1a..3d9b2ba72d41 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -70,6 +70,7 @@ if [ "${ARROW_ENABLE_THREADING:-ON}" = "OFF" ]; then ARROW_JEMALLOC=OFF ARROW_MIMALLOC=OFF ARROW_S3=OFF + ARROW_S3_MODULE=OFF ARROW_WITH_OPENTELEMETRY=OFF fi @@ -229,6 +230,7 @@ else -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ -DARROW_S3=${ARROW_S3:-OFF} \ + -DARROW_S3_MODULE=${ARROW_S3_MODULE:-OFF} \ -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \ -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \ -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index e6330df42603..ee46f4d256ce 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -128,7 +128,7 @@ if(ARROW_S3) endif() endif() - if(ARROW_S3_MODULE) + if(ARROW_S3_MODULE AND ARROW_BUILD_TESTS) add_arrow_test(s3fs_module_test SOURCES s3fs_module_test.cc diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index 8281bed7ce16..d5ec64768e59 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -893,10 +893,10 @@ Status LoadFileSystemFactories(const char* libpath) { namespace { -Result> FileSystemFromUriReal(const Uri& uri, - const std::string& uri_string, - const io::IOContext& io_context, - std::string* out_path) { +Result> FileSystemFromUriReal( + const Uri& uri, const std::string& uri_string, + const FileSystemFactoryOptions& options, const io::IOContext& io_context, + std::string* out_path) { const auto scheme = uri.scheme(); { @@ -904,9 +904,13 @@ Result> FileSystemFromUriReal(const Uri& uri, auto* factory, FileSystemFactoryRegistry::GetInstance()->FactoryForScheme(scheme)); if (factory != nullptr) { - return factory->function(uri, io_context, out_path); + return factory->function(uri, options, io_context, out_path); } } + if (!options.empty()) { + return Status::NotImplemented("Filesystem options are not supported yet for scheme '", + scheme, "', got ", options.size(), " option(s)"); + } if (scheme == "abfs" || scheme == "abfss") { #ifdef ARROW_AZURE @@ -960,16 +964,47 @@ Result> FileSystemFromUriReal(const Uri& uri, } // namespace +Result ExampleAcceptOptions(const FileSystemFactoryOptions& options) { + if (options.empty()) return Status::Invalid("no options"); + std::string out; + for (const auto& [key, value] : options) { + if (const auto* s = std::any_cast(&value)) { + out += key + "=str(" + *s + ");"; + } else if (const auto* i = std::any_cast(&value)) { + out += key + "=int(" + std::to_string(*i) + ");"; + } else if (const auto* t = std::any_cast(&value)) { + out += key + "=typed(" + std::to_string(t->value()) + ");"; + } else { + return Status::Invalid("option '", key, "' has unsupported type"); + } + } + return out; +} + Result> FileSystemFromUri(const std::string& uri_string, std::string* out_path) { - return FileSystemFromUri(uri_string, io::default_io_context(), out_path); + return FileSystemFromUriAndOptions(uri_string, /*options=*/{}, io::default_io_context(), + out_path); +} + +Result> FileSystemFromUriAndOptions( + const std::string& uri_string, const FileSystemFactoryOptions& options, + std::string* out_path) { + return FileSystemFromUriAndOptions(uri_string, options, io::default_io_context(), + out_path); } Result> FileSystemFromUri(const std::string& uri_string, const io::IOContext& io_context, std::string* out_path) { + return FileSystemFromUriAndOptions(uri_string, /*options=*/{}, io_context, out_path); +} + +Result> FileSystemFromUriAndOptions( + const std::string& uri_string, const FileSystemFactoryOptions& options, + const io::IOContext& io_context, std::string* out_path) { ARROW_ASSIGN_OR_RAISE(auto fsuri, ParseFileSystemUri(uri_string)); - return FileSystemFromUriReal(fsuri, uri_string, io_context, out_path); + return FileSystemFromUriReal(fsuri, uri_string, options, io_context, out_path); } Result> FileSystemFromUriOrPath(const std::string& uri_string, diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 3a47eb62f524..2c95fdc5fc97 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -28,6 +29,7 @@ #include "arrow/filesystem/type_fwd.h" #include "arrow/io/interfaces.h" +#include "arrow/result.h" #include "arrow/type_fwd.h" #include "arrow/util/compare.h" #include "arrow/util/macros.h" @@ -357,13 +359,56 @@ class ARROW_EXPORT FileSystem bool default_async_is_sync_ = true; }; +using FileSystemFactoryOptions = std::vector>; + +class ARROW_EXPORT ExampleOption { + public: + explicit ExampleOption(int value) : value_(value) {} + int value() const { return value_; } + + private: + int value_; +}; + +ARROW_EXPORT +Result ExampleAcceptOptions(const FileSystemFactoryOptions& options); + struct FileSystemFactory { std::function>( - const Uri& uri, const io::IOContext& io_context, std::string* out_path)> + const Uri& uri, const FileSystemFactoryOptions& options, + const io::IOContext& io_context, std::string* out_path)> function; std::string_view file; int line; + /// Construct from an options-aware factory function. + FileSystemFactory(std::function>( + const Uri&, const FileSystemFactoryOptions&, const io::IOContext&, + std::string*)> + fn, + std::string_view file, int line) + : function(std::move(fn)), file(file), line(line) {} + + /// Construct from a non-options aware factory function maintaining source compatibility + /// with existing factories. + FileSystemFactory(std::function>( + const Uri&, const io::IOContext&, std::string*)> + fn, + std::string_view file, int line) + : function([fn = std::move(fn)]( + const Uri& uri, const FileSystemFactoryOptions& options, + const io::IOContext& ctx, + std::string* out_path) -> Result> { + if (!options.empty()) { + return Status::NotImplemented( + "Filesystem factory does not support additional options, got ", + options.size(), " option(s)"); + } + return fn(uri, ctx, out_path); + }), + file(file), + line(line) {} + bool operator==(const FileSystemFactory& other) const { // In the case where libarrow is linked statically both to the executable and to a // dynamically loaded filesystem implementation library, the library contains a @@ -547,6 +592,30 @@ ARROW_EXPORT Result> FileSystemFromUri(const std::string& uri, std::string* out_path = NULLPTR); +/// \brief Create a new FileSystem by URI with extended backend-specific filesystem +/// options +/// +/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", +/// "gs" and "gcs". +/// +/// Support for other schemes can be added using RegisterFileSystemFactory. +/// +/// \param[in] uri the URI to give access to +/// \param[in] options a list of backend-specific filesystem options +/// Each option is a (name, value) pair. +/// The expected type is specific to the backend and +/// option name. +/// Options are forwarded to schemes dispatched through a registered +/// FileSystemFactory. Non-empty options return NotImplemented for a registered +/// FileSystemFactory that does not support them or for schemes not handled by +/// a registered factory. +/// \param[out] out_path (optional) Path inside the filesystem. +/// \return out_fs FileSystem instance. +ARROW_EXPORT +Result> FileSystemFromUriAndOptions( + const std::string& uri, const FileSystemFactoryOptions& options, + std::string* out_path = NULLPTR); + /// \brief Create a new FileSystem by URI with a custom IO context /// /// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", @@ -563,6 +632,31 @@ Result> FileSystemFromUri(const std::string& uri, const io::IOContext& io_context, std::string* out_path = NULLPTR); +/// \brief Create a new FileSystem by URI with a custom IO context with backend-specific +/// filesystem options +/// +/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", +/// "gs" and "gcs". +/// +/// Support for other schemes can be added using RegisterFileSystemFactory. +/// +/// \param[in] uri a URI-based path, ex: file:///some/local/path +/// \param[in] options a list of backend-specific filesystem options +/// Each option is a (name, value) pair. +/// The expected type is specific to the backend and +/// option name. +/// Options are forwarded to schemes dispatched through a registered +/// FileSystemFactory. Non-empty options return NotImplemented for a registered +/// FileSystemFactory that does not support them or for schemes not handled by +/// a registered factory. +/// \param[in] io_context an IOContext which will be associated with the filesystem +/// \param[out] out_path (optional) Path inside the filesystem. +/// \return out_fs FileSystem instance. +ARROW_EXPORT +Result> FileSystemFromUriAndOptions( + const std::string& uri, const FileSystemFactoryOptions& options, + const io::IOContext& io_context, std::string* out_path = NULLPTR); + /// \brief Create a new FileSystem by URI /// /// Support for other schemes can be added using RegisterFileSystemFactory. diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc index 5072c3a8c25b..10a3922e36ac 100644 --- a/cpp/src/arrow/filesystem/filesystem_test.cc +++ b/cpp/src/arrow/filesystem/filesystem_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -640,6 +641,11 @@ TEST_F(TestMockFS, FileSystemFromUri) { Invalid, ::testing::HasSubstr("syntax error at character ' ' (position 12)"), FileSystemFromUri("mock:/folder name/bar", &path)); CheckDirs({}); + FileSystemFactoryOptions options{{"some_option", 1}}; + EXPECT_RAISES_WITH_MESSAGE_THAT( + NotImplemented, ::testing::HasSubstr("options are not supported"), + FileSystemFromUriAndOptions("mock:///foo/bar", options, &path)); + CheckDirs({}); } //////////////////////////////////////////////////////////////////////////// diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 2e91783c92dc..212d91989659 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -29,6 +30,7 @@ #include "arrow/filesystem/path_util.h" #include "arrow/filesystem/test_util.h" #include "arrow/filesystem/util_internal.h" +#include "arrow/testing/examplefs.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" #include "arrow/util/io_util.h" @@ -83,6 +85,12 @@ Result> FSFromUriOrPath(const std::string& uri, //////////////////////////////////////////////////////////////////////////// // Registered FileSystemFactory tests +struct ConcreteTypedOption : ExampleTypedOption { + explicit ConcreteTypedOption(int value) : value_(value) {} + int value() const override { return value_; } + int value_; +}; + class SlowFileSystemPublicProps : public SlowFileSystem { public: SlowFileSystemPublicProps(std::shared_ptr base_fs, double average_latency, @@ -144,7 +152,6 @@ TEST(FileSystemFromUri, LoadedRegisteredFactory) { EXPECT_THAT(FileSystemFromUri("example:///hey/yo", &path), Raises(StatusCode::Invalid)); EXPECT_THAT(LoadFileSystemFactories(ARROW_FILESYSTEM_EXAMPLE_LIBPATH), Ok()); - ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("example:///hey/yo", &path)); EXPECT_EQ(path, "/hey/yo"); EXPECT_EQ(fs->type_name(), "local"); @@ -171,10 +178,21 @@ TEST(FileSystemFromUri, RuntimeRegisteredFactory) { } FileSystemRegistrar kSegfaultFileSystemModule[]{ - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), -}; + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {}), + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {}), + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {})}; TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) { // Since multiple registrars are defined in this translation unit which all // register factories for the 'segfault' scheme, using that scheme in FileSystemFromUri @@ -185,6 +203,46 @@ TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) { // other schemes are not affected by the collision EXPECT_THAT(FileSystemFromUri("slowfile:///hey/yo", &path), Ok()); } + +TEST(FileSystemFromUriAndOptions, LoadedRegisteredFactory) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Emscripten dynamic library testing disabled"; +#endif + std::string path; + EXPECT_THAT(LoadFileSystemFactories(ARROW_FILESYSTEM_EXAMPLE_LIBPATH), Ok()); + // Validate extra options are forwarded to the factory. + FileSystemFactoryOptions options{ + {"example_option_string", std::string("example_value")}, + {"example_option_int", 42}, + {"example_typed_option", + std::shared_ptr(std::make_shared(12345))}, + }; + ASSERT_OK_AND_ASSIGN(auto fs, + FileSystemFromUriAndOptions("example:///hey/yo", options, &path)); + EXPECT_EQ(path, "/hey/yo/example_value/42/12345"); + EXPECT_EQ(fs->type_name(), "local"); +} + +TEST(FileSystemFromUriAndOptions, RuntimeRegisteredFactory) { + std::string path; + EXPECT_THAT(FileSystemFromUriAndOptions("slowfile3:///hey/yo", {}, &path), + Raises(StatusCode::Invalid)); + + EXPECT_THAT( + RegisterFileSystemFactory("slowfile3", {SlowFileSystemFactory, __FILE__, __LINE__}), + Ok()); + + ASSERT_OK_AND_ASSIGN(auto fs, + FileSystemFromUriAndOptions("slowfile3:///hey/yo", {}, &path)); + EXPECT_EQ(path, "/hey/yo"); + EXPECT_EQ(fs->type_name(), "slow"); + + // Validate that legacy (3-arg) factories reject non-empty options. + FileSystemFactoryOptions unsupported{{"some_option", 1}}; + EXPECT_THAT(FileSystemFromUriAndOptions("slowfile3:///hey/yo", unsupported, &path), + Raises(StatusCode::NotImplemented)); +} + //////////////////////////////////////////////////////////////////////////// // Misc tests diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 0c15f6f18444..515d8d252881 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -3603,11 +3603,17 @@ Result ResolveS3BucketRegion(const std::string& bucket) { auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM( "s3", - [](const arrow::util::Uri& uri, const io::IOContext& io_context, + [](const arrow::util::Uri& uri, const FileSystemFactoryOptions& options, + const io::IOContext& io_context, std::string* out_path) -> Result> { + if (!options.empty()) { + return Status::NotImplemented( + "S3 filesystem factory options are not supported yet, got: ", options.size(), + " option(s)"); + } RETURN_NOT_OK(EnsureS3Initialized()); - ARROW_ASSIGN_OR_RAISE(auto options, S3Options::FromUri(uri, out_path)); - return S3FileSystem::Make(options, io_context); + ARROW_ASSIGN_OR_RAISE(auto s3_options, S3Options::FromUri(uri, out_path)); + return S3FileSystem::Make(s3_options, io_context); }, [] { DCHECK_OK(EnsureS3Finalized()); }); diff --git a/cpp/src/arrow/filesystem/s3fs_module_test.cc b/cpp/src/arrow/filesystem/s3fs_module_test.cc index 987a8979b271..c7b208192d06 100644 --- a/cpp/src/arrow/filesystem/s3fs_module_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_module_test.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include #include @@ -82,4 +83,11 @@ TEST(S3Test, FromUri) { "&allow_bucket_creation=0&allow_bucket_deletion=0"); } +TEST(S3Test, FromUriRejectsOptions) { + FileSystemFactoryOptions options{{"some_option", 1}}; + EXPECT_RAISES_WITH_MESSAGE_THAT( + NotImplemented, ::testing::HasSubstr("options are not supported"), + FileSystemFromUriAndOptions("s3://bucket/key", options)); +} + } // namespace arrow::fs diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index 5c9d5f9d9071..da4651acdc50 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -15,9 +15,12 @@ // specific language governing permissions and limitations // under the License. +#include + #include "arrow/filesystem/filesystem.h" #include "arrow/filesystem/filesystem_library.h" #include "arrow/result.h" +#include "arrow/testing/examplefs.h" #include "arrow/util/uri.h" #include @@ -26,12 +29,41 @@ namespace arrow::fs { auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( "example", - [](const Uri& uri, const io::IOContext& io_context, + [](const Uri& uri, const FileSystemFactoryOptions& options, + const io::IOContext& io_context, std::string* out_path) -> Result> { constexpr std::string_view kScheme = "example"; EXPECT_EQ(uri.scheme(), kScheme); auto local_uri = "file" + uri.ToString().substr(kScheme.size()); - return FileSystemFromUri(local_uri, io_context, out_path); + ARROW_ASSIGN_OR_RAISE(auto fs, FileSystemFromUri(local_uri, io_context, out_path)); + for (const auto& [key, value] : options) { + EXPECT_TRUE(value.has_value()); + if (key == "example_option_string") { + if (const auto* s = std::any_cast(&value)) { + if (out_path != nullptr) *out_path += "/" + *s; + } else { + ADD_FAILURE() << "example_option_string has wrong type"; + } + } else if (key == "example_option_int") { + if (const auto* i = std::any_cast(&value)) { + if (out_path != nullptr) *out_path += "/" + std::to_string(*i); + } else { + ADD_FAILURE() << "example_option_int has wrong type"; + } + } else if (key == "example_typed_option") { + if (const auto* opt = + std::any_cast>(&value)) { + if (out_path != nullptr) { + *out_path += "/" + std::to_string((*opt)->value()); + } + } else if (out_path != nullptr) { + *out_path += "/typed_cast_failed"; + } + } else { + ADD_FAILURE() << "Unexpected option: " << key; + } + } + return fs; }, {}); diff --git a/cpp/src/arrow/testing/examplefs.h b/cpp/src/arrow/testing/examplefs.h new file mode 100644 index 000000000000..e04b84c982ad --- /dev/null +++ b/cpp/src/arrow/testing/examplefs.h @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +namespace arrow::fs { + +class ExampleTypedOption { + public: + virtual ~ExampleTypedOption() = default; + virtual int value() const = 0; +}; + +} // namespace arrow::fs diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx index 0739b6acba32..255d4a720342 100644 --- a/python/pyarrow/_fs.pyx +++ b/python/pyarrow/_fs.pyx @@ -1651,3 +1651,21 @@ def _copy_files_selector(FileSystem source_fs, FileSelector source_sel, destination_fs.unwrap(), c_destination_base_dir, c_default_io_context(), chunk_size, use_threads, )) + + +def _example_accept_options(str value, int value_int, int value_typed): + cdef: + CFSFileSystemFactoryOptions options + pair[c_string, c_any] entry + c_string result + entry.first = tobytes("example_option_string") + entry.second = tobytes(value) + options.push_back(entry) + entry.first = tobytes("example_option_int") + entry.second = value_int + options.push_back(entry) + entry.first = tobytes("example_option_typed_var") + entry.second = CExampleOption(value_typed) + options.push_back(entry) + result = GetResultValue(CExampleAcceptOptions(options)) + return frombytes(result) diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py index 670ccaaf2455..6e81ff980aec 100644 --- a/python/pyarrow/fs.py +++ b/python/pyarrow/fs.py @@ -33,6 +33,7 @@ PyFileSystem, _copy_files, _copy_files_selector, + _example_accept_options ) # For backward compatibility. diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd index f97f2ff1c473..160584684700 100644 --- a/python/pyarrow/includes/common.pxd +++ b/python/pyarrow/includes/common.pxd @@ -20,6 +20,7 @@ from libc.stdint cimport * from libcpp cimport bool as c_bool, nullptr +from libcpp.any cimport any as c_any from libcpp.functional cimport function from libcpp.memory cimport (shared_ptr, unique_ptr, make_shared, static_pointer_cast, dynamic_pointer_cast) diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd index d18dc2d2bde1..432128ac7837 100644 --- a/python/pyarrow/includes/libarrow_fs.pxd +++ b/python/pyarrow/includes/libarrow_fs.pxd @@ -29,6 +29,12 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: CFileType_File "arrow::fs::FileType::File" CFileType_Directory "arrow::fs::FileType::Directory" + ctypedef vector[pair[c_string, c_any]] CFSFileSystemFactoryOptions "arrow::fs::FileSystemFactoryOptions" + CResult[c_string] CExampleAcceptOptions "arrow::fs::ExampleAcceptOptions"(const CFSFileSystemFactoryOptions& options) + cdef cppclass CExampleOption "arrow::fs::ExampleOption": + CExampleOption(int value) + int value() const + cdef cppclass CFileInfo "arrow::fs::FileInfo": CFileInfo() CFileInfo(CFileInfo) diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 5bf1950c0654..d3450a8d67a5 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -36,7 +36,7 @@ from pyarrow.fs import (FileType, FileInfo, FileSelector, FileSystem, LocalFileSystem, SubTreeFileSystem, _MockFileSystem, FileSystemHandler, PyFileSystem, FSSpecHandler, - copy_files) + copy_files, _example_accept_options) from pyarrow.util import find_free_port @@ -2285,3 +2285,10 @@ def test_huggingface_filesystem_from_uri(): expected_fs = PyFileSystem(FSSpecHandler(HfFileSystem())) assert fs == expected_fs assert path == "datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet" + + +def test_example_accept_options(): + result = _example_accept_options("hi", 42, 7) + expected = "example_option_string=str(hi);example_option_int=int(42);" + \ + "example_option_typed_var=typed(7);" + assert result == expected diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 52274d29f0d9..d1cdc4f43093 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1248,14 +1248,6 @@ Field__Equals <- function(field, other, check_metadata) { .Call(`_arrow_Field__Equals`, field, other, check_metadata) } -Field__nullable <- function(field) { - .Call(`_arrow_Field__nullable`, field) -} - -Field__type <- function(field) { - .Call(`_arrow_Field__type`, field) -} - Field__HasMetadata <- function(field) { .Call(`_arrow_Field__HasMetadata`, field) } @@ -1272,6 +1264,14 @@ Field__RemoveMetadata <- function(field) { .Call(`_arrow_Field__RemoveMetadata`, field) } +Field__nullable <- function(field) { + .Call(`_arrow_Field__nullable`, field) +} + +Field__type <- function(field) { + .Call(`_arrow_Field__type`, field) +} + fs___FileInfo__type <- function(x) { .Call(`_arrow_fs___FileInfo__type`, x) } @@ -1384,6 +1384,10 @@ fs___FileSystem__type_name <- function(file_system) { .Call(`_arrow_fs___FileSystem__type_name`, file_system) } +fs___example_accept_options <- function(string_value, int_value, typed_value) { + .Call(`_arrow_fs___example_accept_options`, string_value, int_value, typed_value) +} + fs___SubTreeFileSystem__create <- function(base_path, base_fs) { .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs) } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 5482c8679f68..4f33b82975f2 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -3238,22 +3238,6 @@ BEGIN_CPP11 END_CPP11 } // field.cpp -bool Field__nullable(const std::shared_ptr& field); -extern "C" SEXP _arrow_Field__nullable(SEXP field_sexp){ -BEGIN_CPP11 - arrow::r::Input&>::type field(field_sexp); - return cpp11::as_sexp(Field__nullable(field)); -END_CPP11 -} -// field.cpp -std::shared_ptr Field__type(const std::shared_ptr& field); -extern "C" SEXP _arrow_Field__type(SEXP field_sexp){ -BEGIN_CPP11 - arrow::r::Input&>::type field(field_sexp); - return cpp11::as_sexp(Field__type(field)); -END_CPP11 -} -// field.cpp bool Field__HasMetadata(const std::shared_ptr& field); extern "C" SEXP _arrow_Field__HasMetadata(SEXP field_sexp){ BEGIN_CPP11 @@ -3286,6 +3270,22 @@ BEGIN_CPP11 return cpp11::as_sexp(Field__RemoveMetadata(field)); END_CPP11 } +// field.cpp +bool Field__nullable(const std::shared_ptr& field); +extern "C" SEXP _arrow_Field__nullable(SEXP field_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type field(field_sexp); + return cpp11::as_sexp(Field__nullable(field)); +END_CPP11 +} +// field.cpp +std::shared_ptr Field__type(const std::shared_ptr& field); +extern "C" SEXP _arrow_Field__type(SEXP field_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type field(field_sexp); + return cpp11::as_sexp(Field__type(field)); +END_CPP11 +} // filesystem.cpp fs::FileType fs___FileInfo__type(const std::shared_ptr& x); extern "C" SEXP _arrow_fs___FileInfo__type(SEXP x_sexp){ @@ -3544,6 +3544,16 @@ BEGIN_CPP11 END_CPP11 } // filesystem.cpp +std::string fs___example_accept_options(const std::string& string_value, int int_value, int typed_value); +extern "C" SEXP _arrow_fs___example_accept_options(SEXP string_value_sexp, SEXP int_value_sexp, SEXP typed_value_sexp){ +BEGIN_CPP11 + arrow::r::Input::type string_value(string_value_sexp); + arrow::r::Input::type int_value(int_value_sexp); + arrow::r::Input::type typed_value(typed_value_sexp); + return cpp11::as_sexp(fs___example_accept_options(string_value, int_value, typed_value)); +END_CPP11 +} +// filesystem.cpp std::shared_ptr fs___SubTreeFileSystem__create(const std::string& base_path, const std::shared_ptr& base_fs); extern "C" SEXP _arrow_fs___SubTreeFileSystem__create(SEXP base_path_sexp, SEXP base_fs_sexp){ BEGIN_CPP11 @@ -5878,10 +5888,10 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, { "_arrow_compute__Initialize", (DL_FUNC) &_arrow_compute__Initialize, 0}, { "_arrow_RegisterScalarUDF", (DL_FUNC) &_arrow_RegisterScalarUDF, 2}, - { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, - { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, - { "_arrow_set_timezone_database", (DL_FUNC) &_arrow_set_timezone_database, 1}, - { "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, + { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, + { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, + { "_arrow_set_timezone_database", (DL_FUNC) &_arrow_set_timezone_database, 1}, + { "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, { "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, @@ -6054,12 +6064,12 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, { "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 3}, - { "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, - { "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, { "_arrow_Field__HasMetadata", (DL_FUNC) &_arrow_Field__HasMetadata, 1}, { "_arrow_Field__metadata", (DL_FUNC) &_arrow_Field__metadata, 1}, { "_arrow_Field__WithMetadata", (DL_FUNC) &_arrow_Field__WithMetadata, 2}, { "_arrow_Field__RemoveMetadata", (DL_FUNC) &_arrow_Field__RemoveMetadata, 1}, + { "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, + { "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, { "_arrow_fs___FileInfo__type", (DL_FUNC) &_arrow_fs___FileInfo__type, 1}, { "_arrow_fs___FileInfo__set_type", (DL_FUNC) &_arrow_fs___FileInfo__set_type, 2}, { "_arrow_fs___FileInfo__path", (DL_FUNC) &_arrow_fs___FileInfo__path, 1}, @@ -6088,6 +6098,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_fs___FileSystem__OpenOutputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenOutputStream, 2}, { "_arrow_fs___FileSystem__OpenAppendStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenAppendStream, 2}, { "_arrow_fs___FileSystem__type_name", (DL_FUNC) &_arrow_fs___FileSystem__type_name, 1}, + { "_arrow_fs___example_accept_options", (DL_FUNC) &_arrow_fs___example_accept_options, 3}, { "_arrow_fs___SubTreeFileSystem__create", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__create, 2}, { "_arrow_fs___SubTreeFileSystem__base_fs", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_fs, 1}, { "_arrow_fs___SubTreeFileSystem__base_path", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_path, 1}, diff --git a/r/src/filesystem.cpp b/r/src/filesystem.cpp index 82cf99514d8c..f9550e5386b3 100644 --- a/r/src/filesystem.cpp +++ b/r/src/filesystem.cpp @@ -238,6 +238,17 @@ std::string fs___FileSystem__type_name( return file_system->type_name(); } +// [[arrow::export]] +std::string fs___example_accept_options(const std::string& string_value, int int_value, + int typed_value) { + fs::FileSystemFactoryOptions options = { + {"example_option_string", std::any{string_value}}, + {"example_option_int", std::any{int_value}}, + {"example_option_typed_var", std::any{fs::ExampleOption(typed_value)}}, + }; + return ValueOrStop(fs::ExampleAcceptOptions(options)); +} + // [[arrow::export]] std::shared_ptr fs___SubTreeFileSystem__create( const std::string& base_path, const std::shared_ptr& base_fs) { diff --git a/r/tests/testthat/test-filesystem.R b/r/tests/testthat/test-filesystem.R index 08715584467e..a69a6a67e6ee 100644 --- a/r/tests/testthat/test-filesystem.R +++ b/r/tests/testthat/test-filesystem.R @@ -202,3 +202,10 @@ test_that("gs_bucket", { ) expect_identical(bucket$base_path, "arrow-datasets/") }) + +test_that("ExampleAcceptOptions round-trips std::any options", { + expect_equal( + fs___example_accept_options("hi", 42L, 7L), + "example_option_string=str(hi);example_option_int=int(42);example_option_typed_var=typed(7);" + ) +})