Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions c_glib/arrow-glib/file-system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,42 @@ garrow_file_system_create(const gchar *uri, GError **error)
}
}

/**
* garrow_file_system_example_accept_options:
* @string_value: A string option value.
* @int_value: An integer option value.
* @typed_value: An integer used to build a typed option value.
* @error: (nullable): Return location for a #GError or %NULL.
*
* This is a showcase demonstrating that backend-specific options, stored
* as `std::any` in #arrow::fs::FileSystemFactoryOptions, can be built from
* the C GLib bindings and consumed by Arrow C++.
*
* Returns: (nullable) (transfer full): A string describing the options
* received by Arrow C++, or %NULL on error.
*
* Since: 25.0.0
*/
gchar *
garrow_file_system_example_accept_options(const gchar *string_value,
gint int_value,
gint typed_value,
GError **error)
{
arrow::fs::FileSystemFactoryOptions options = {
{"example_option_string", std::any{std::string(string_value)}},
{"example_option_int", std::any{static_cast<int>(int_value)}},
{"example_option_typed_var",
std::any{arrow::fs::ExampleOption(static_cast<int>(typed_value))}},
};
auto result = arrow::fs::ExampleAcceptOptions(options);
if (garrow::check(error, result, "[file-system][example-accept-options]")) {
return g_strdup(result->c_str());
} else {
return NULL;
}
}

/**
* garrow_file_system_get_type_name:
* @file_system: A #GArrowFileSystem.
Expand Down
7 changes: 7 additions & 0 deletions c_glib/arrow-glib/file-system.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ struct _GArrowFileSystemClass
GObjectClass parent_class;
};

GARROW_AVAILABLE_IN_25_0
gchar *
garrow_file_system_example_accept_options(const gchar *string_value,
gint int_value,
gint typed_value,
GError **error);

GARROW_AVAILABLE_IN_3_0
GArrowFileSystem *
garrow_file_system_create(const gchar *uri, GError **error);
Expand Down
7 changes: 7 additions & 0 deletions c_glib/test/test-mock-file-system.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,11 @@ def setup
def test_type_name
assert_equal("mock", @fs.type_name)
end

def test_example_accept_options
assert_equal("example_option_string=str(hi);" +
"example_option_int=int(42);" +
"example_option_typed_var=typed(7);",
Arrow::FileSystem.example_accept_options("hi", 42, 7))
end
end
2 changes: 2 additions & 0 deletions ci/scripts/cpp_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ if [ "${ARROW_ENABLE_THREADING:-ON}" = "OFF" ]; then
ARROW_JEMALLOC=OFF
ARROW_MIMALLOC=OFF
ARROW_S3=OFF
ARROW_S3_MODULE=OFF
ARROW_WITH_OPENTELEMETRY=OFF
fi

Expand Down Expand Up @@ -229,6 +230,7 @@ else
-DARROW_PARQUET=${ARROW_PARQUET:-OFF} \
-DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \
-DARROW_S3=${ARROW_S3:-OFF} \
-DARROW_S3_MODULE=${ARROW_S3_MODULE:-OFF} \
-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \
-DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \
-DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/filesystem/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ if(ARROW_S3)
endif()
endif()

if(ARROW_S3_MODULE)
if(ARROW_S3_MODULE AND ARROW_BUILD_TESTS)
add_arrow_test(s3fs_module_test
SOURCES
s3fs_module_test.cc
Expand Down
49 changes: 42 additions & 7 deletions cpp/src/arrow/filesystem/filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -893,20 +893,24 @@ Status LoadFileSystemFactories(const char* libpath) {

namespace {

Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,
const std::string& uri_string,
const io::IOContext& io_context,
std::string* out_path) {
Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(
const Uri& uri, const std::string& uri_string,
const FileSystemFactoryOptions& options, const io::IOContext& io_context,
std::string* out_path) {
const auto scheme = uri.scheme();

{
ARROW_ASSIGN_OR_RAISE(
auto* factory,
FileSystemFactoryRegistry::GetInstance()->FactoryForScheme(scheme));
if (factory != nullptr) {
return factory->function(uri, io_context, out_path);
return factory->function(uri, options, io_context, out_path);
}
}
if (!options.empty()) {
return Status::NotImplemented("Filesystem options are not supported yet for scheme '",
scheme, "', got ", options.size(), " option(s)");
}

if (scheme == "abfs" || scheme == "abfss") {
#ifdef ARROW_AZURE
Expand Down Expand Up @@ -960,16 +964,47 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,

} // namespace

Result<std::string> ExampleAcceptOptions(const FileSystemFactoryOptions& options) {
if (options.empty()) return Status::Invalid("no options");
std::string out;
for (const auto& [key, value] : options) {
if (const auto* s = std::any_cast<std::string>(&value)) {
out += key + "=str(" + *s + ");";
} else if (const auto* i = std::any_cast<int>(&value)) {
out += key + "=int(" + std::to_string(*i) + ");";
} else if (const auto* t = std::any_cast<ExampleOption>(&value)) {
out += key + "=typed(" + std::to_string(t->value()) + ");";
} else {
return Status::Invalid("option '", key, "' has unsupported type");
}
}
return out;
}

Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri_string,
std::string* out_path) {
return FileSystemFromUri(uri_string, io::default_io_context(), out_path);
return FileSystemFromUriAndOptions(uri_string, /*options=*/{}, io::default_io_context(),
out_path);
}

Result<std::shared_ptr<FileSystem>> FileSystemFromUriAndOptions(
const std::string& uri_string, const FileSystemFactoryOptions& options,
std::string* out_path) {
return FileSystemFromUriAndOptions(uri_string, options, io::default_io_context(),
out_path);
}

Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri_string,
const io::IOContext& io_context,
std::string* out_path) {
return FileSystemFromUriAndOptions(uri_string, /*options=*/{}, io_context, out_path);
}

Result<std::shared_ptr<FileSystem>> FileSystemFromUriAndOptions(
const std::string& uri_string, const FileSystemFactoryOptions& options,
const io::IOContext& io_context, std::string* out_path) {
ARROW_ASSIGN_OR_RAISE(auto fsuri, ParseFileSystemUri(uri_string));
return FileSystemFromUriReal(fsuri, uri_string, io_context, out_path);
return FileSystemFromUriReal(fsuri, uri_string, options, io_context, out_path);
}

Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(const std::string& uri_string,
Expand Down
96 changes: 95 additions & 1 deletion cpp/src/arrow/filesystem/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#pragma once

#include <any>
#include <chrono>
#include <cstdint>
#include <functional>
Expand All @@ -28,6 +29,7 @@

#include "arrow/filesystem/type_fwd.h"
#include "arrow/io/interfaces.h"
#include "arrow/result.h"
#include "arrow/type_fwd.h"
#include "arrow/util/compare.h"
#include "arrow/util/macros.h"
Expand Down Expand Up @@ -357,13 +359,56 @@ class ARROW_EXPORT FileSystem
bool default_async_is_sync_ = true;
};

using FileSystemFactoryOptions = std::vector<std::pair<std::string, std::any>>;

class ARROW_EXPORT ExampleOption {
public:
explicit ExampleOption(int value) : value_(value) {}
Comment on lines +362 to +366
int value() const { return value_; }

private:
int value_;
};

ARROW_EXPORT
Result<std::string> ExampleAcceptOptions(const FileSystemFactoryOptions& options);

struct FileSystemFactory {
std::function<Result<std::shared_ptr<FileSystem>>(
const Uri& uri, const io::IOContext& io_context, std::string* out_path)>
const Uri& uri, const FileSystemFactoryOptions& options,
const io::IOContext& io_context, std::string* out_path)>
function;
std::string_view file;
int line;

/// Construct from an options-aware factory function.
FileSystemFactory(std::function<Result<std::shared_ptr<FileSystem>>(
const Uri&, const FileSystemFactoryOptions&, const io::IOContext&,
std::string*)>
fn,
std::string_view file, int line)
: function(std::move(fn)), file(file), line(line) {}

/// Construct from a non-options aware factory function maintaining source compatibility
/// with existing factories.
FileSystemFactory(std::function<Result<std::shared_ptr<FileSystem>>(
const Uri&, const io::IOContext&, std::string*)>
fn,
std::string_view file, int line)
: function([fn = std::move(fn)](
const Uri& uri, const FileSystemFactoryOptions& options,
const io::IOContext& ctx,
std::string* out_path) -> Result<std::shared_ptr<FileSystem>> {
if (!options.empty()) {
return Status::NotImplemented(
"Filesystem factory does not support additional options, got ",
options.size(), " option(s)");
}
return fn(uri, ctx, out_path);
}),
file(file),
line(line) {}

bool operator==(const FileSystemFactory& other) const {
// In the case where libarrow is linked statically both to the executable and to a
// dynamically loaded filesystem implementation library, the library contains a
Expand Down Expand Up @@ -547,6 +592,30 @@ ARROW_EXPORT
Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
std::string* out_path = NULLPTR);

/// \brief Create a new FileSystem by URI with extended backend-specific filesystem
/// options
///
/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
/// "gs" and "gcs".
Comment on lines +598 to +599
///
/// Support for other schemes can be added using RegisterFileSystemFactory.
///
/// \param[in] uri the URI to give access to
/// \param[in] options a list of backend-specific filesystem options
/// Each option is a (name, value) pair.
/// The expected type is specific to the backend and
/// option name.
/// Options are forwarded to schemes dispatched through a registered
/// FileSystemFactory. Non-empty options return NotImplemented for a registered
/// FileSystemFactory that does not support them or for schemes not handled by
/// a registered factory.
/// \param[out] out_path (optional) Path inside the filesystem.
/// \return out_fs FileSystem instance.
ARROW_EXPORT
Result<std::shared_ptr<FileSystem>> FileSystemFromUriAndOptions(
const std::string& uri, const FileSystemFactoryOptions& options,
std::string* out_path = NULLPTR);

/// \brief Create a new FileSystem by URI with a custom IO context
///
/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
Expand All @@ -563,6 +632,31 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
const io::IOContext& io_context,
std::string* out_path = NULLPTR);

/// \brief Create a new FileSystem by URI with a custom IO context with backend-specific
/// filesystem options
///
/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
/// "gs" and "gcs".
Comment on lines +638 to +639
///
/// Support for other schemes can be added using RegisterFileSystemFactory.
///
/// \param[in] uri a URI-based path, ex: file:///some/local/path
/// \param[in] options a list of backend-specific filesystem options
/// Each option is a (name, value) pair.
/// The expected type is specific to the backend and
/// option name.
/// Options are forwarded to schemes dispatched through a registered
/// FileSystemFactory. Non-empty options return NotImplemented for a registered
/// FileSystemFactory that does not support them or for schemes not handled by
/// a registered factory.
/// \param[in] io_context an IOContext which will be associated with the filesystem
/// \param[out] out_path (optional) Path inside the filesystem.
/// \return out_fs FileSystem instance.
ARROW_EXPORT
Result<std::shared_ptr<FileSystem>> FileSystemFromUriAndOptions(
const std::string& uri, const FileSystemFactoryOptions& options,
const io::IOContext& io_context, std::string* out_path = NULLPTR);

/// \brief Create a new FileSystem by URI
///
/// Support for other schemes can be added using RegisterFileSystemFactory.
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/arrow/filesystem/filesystem_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

#include <any>
#include <memory>
#include <string>
#include <utility>
Expand Down Expand Up @@ -640,6 +641,11 @@ TEST_F(TestMockFS, FileSystemFromUri) {
Invalid, ::testing::HasSubstr("syntax error at character ' ' (position 12)"),
FileSystemFromUri("mock:/folder name/bar", &path));
CheckDirs({});
FileSystemFactoryOptions options{{"some_option", 1}};
EXPECT_RAISES_WITH_MESSAGE_THAT(
NotImplemented, ::testing::HasSubstr("options are not supported"),
FileSystemFromUriAndOptions("mock:///foo/bar", options, &path));
CheckDirs({});
}

////////////////////////////////////////////////////////////////////////////
Expand Down
Loading
Loading