From 0b91c72c320e19c479fdb30f2afe691127ccd846 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 9 Oct 2025 21:44:22 -0300 Subject: [PATCH 1/7] feat: Add S3 storage backend Initial implementation of hb_store_s3 module providing S3-compatible storage support via erlcloud. Implements standard store interface with read, write, list, and resolve operations. --- rebar.config | 3 +- rebar.lock | 15 ++ src/hb_store_s3.erl | 602 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 619 insertions(+), 1 deletion(-) create mode 100644 src/hb_store_s3.erl diff --git a/rebar.config b/rebar.config index 80c1cd9ac..162f28cd9 100644 --- a/rebar.config +++ b/rebar.config @@ -128,7 +128,8 @@ {prometheus_httpd, "2.1.15"}, {prometheus, "6.0.3"}, {graphql, "0.17.1", {pkg, graphql_erl}}, - {luerl, "1.3.0"} + {luerl, "1.3.0"}, + {erlcloud, "3.8.3"} ]}. {shell, [ diff --git a/rebar.lock b/rebar.lock index abe0deb60..0c64dfb3e 100644 --- a/rebar.lock +++ b/rebar.lock @@ -4,15 +4,20 @@ {git,"https://github.com/ArweaveTeam/b64fast.git", {ref,"58f0502e49bf73b29d95c6d02460d1fb8d2a5273"}}, 0}, + {<<"base16">>,{pkg,<<"base16">>,<<"2.0.1">>},1}, {<<"cowboy">>,{pkg,<<"cowboy">>,<<"2.14.0">>},0}, {<<"cowlib">>,{pkg,<<"cowlib">>,<<"2.16.0">>},0}, {<<"ddskerl">>,{pkg,<<"ddskerl">>,<<"0.4.2">>},1}, + {<<"eini">>,{pkg,<<"eini">>,<<"1.2.9">>},1}, {<<"elmdb">>, {git,"https://github.com/twilson63/elmdb-rs.git", {ref,"90c8857cd4ccff341fbe415b96bc5703d17ff7f0"}}, 0}, + {<<"erlcloud">>,{pkg,<<"erlcloud">>,<<"3.8.3">>},0}, {<<"graphql">>,{pkg,<<"graphql_erl">>,<<"0.17.1">>},0}, {<<"gun">>,{pkg,<<"gun">>,<<"2.2.0">>},0}, + {<<"jsx">>,{pkg,<<"jsx">>,<<"2.11.0">>},1}, + {<<"lhttpc">>,{pkg,<<"lhttpc">>,<<"1.7.1">>},1}, {<<"luerl">>,{pkg,<<"luerl">>,<<"1.3.0">>},0}, {<<"prometheus">>,{pkg,<<"prometheus">>,<<"6.0.3">>},0}, {<<"prometheus_cowboy">>,{pkg,<<"prometheus_cowboy">>,<<"0.2.0">>},0}, @@ -21,11 +26,16 @@ [ {pkg_hash,[ {<<"accept">>, <<"CD6E34A2D7E28CA38B2D3CB233734CA0C221EFBC1F171F91FEC5F162CC2D18DA">>}, + {<<"base16">>, <<"F0549F732E03BE8124ED0D19FD5EE52146CC8BE24C48CBC3F23AB44B157F11A2">>}, {<<"cowboy">>, <<"565DCF221BA99B1255B0ADCEC24D2D8DBE79E46EC79F30F8373CCEADC6A41E2A">>}, {<<"cowlib">>, <<"54592074EBBBB92EE4746C8A8846E5605052F29309D3A873468D76CDF932076F">>}, {<<"ddskerl">>, <<"A51A90BE9AC9B36A94017670BED479C623B10CA9D4BDA1EDF3A0E48CAEEADA2A">>}, + {<<"eini">>, <<"FCC3CBD49BBDD9A1D9735C7365DAFFCD84481CCE81E6CB80537883AA44AC4895">>}, + {<<"erlcloud">>, <<"66DE6C7D37C6E688C7AE198D56B9CBE07B2CB80054B88A798437C533C3C7F418">>}, {<<"graphql">>, <<"EB59FCBB39F667DC1C78C950426278015C3423F7A6ED2A121D3DB8B1D2C5F8B4">>}, {<<"gun">>, <<"B8F6B7D417E277D4C2B0DC3C07DFDF892447B087F1CC1CAFF9C0F556B884E33D">>}, + {<<"jsx">>, <<"08154624050333919B4AC1B789667D5F4DB166DC50E190C4D778D1587F102EE0">>}, + {<<"lhttpc">>, <<"8522AF9877765C33318A3AE486BE69BC165E835D05C3334A8166FD7B318D446B">>}, {<<"luerl">>, <<"B56423DDB721432AB980B818FEECB84ADBAB115E2E11522CF94BCD0729CAA501">>}, {<<"prometheus">>, <<"95302236124C0F919163A7762BF7D2B171B919B6FF6148D26EB38A5D2DEF7B81">>}, {<<"prometheus_cowboy">>, <<"526F75D9850A9125496F78BCEECCA0F237BC7B403C976D44508543AE5967DAD9">>}, @@ -33,11 +43,16 @@ {<<"ranch">>, <<"25528F82BC8D7C6152C57666CA99EC716510FE0925CB188172F41CE93117B1B0">>}]}, {pkg_hash_ext,[ {<<"accept">>, <<"CA69388943F5DAD2E7232A5478F16086E3C872F48E32B88B378E1885A59F5649">>}, + {<<"base16">>, <<"06EA2D48343282E712160BA89F692B471DB8B36ABE8394F3445FF9032251D772">>}, {<<"cowboy">>, <<"EA99769574550FE8A83225C752E8A62780A586770EF408816B82B6FE6D46476B">>}, {<<"cowlib">>, <<"7F478D80D66B747344F0EA7708C187645CFCC08B11AA424632F78E25BF05DB51">>}, {<<"ddskerl">>, <<"63F907373D7E548151D584D4DA8A38928FD26EC9477B94C0FFAAD87D7CB69FE7">>}, + {<<"eini">>, <<"DA64AE8DB7C2F502E6F20CDF44CD3D9BE364412B87FF49FEBF282540F673DFCB">>}, + {<<"erlcloud">>, <<"6ED59CBD8816045765E3E76F22D08118766DF1BB1D24F5D90794C8505BCD6D44">>}, {<<"graphql">>, <<"4D0F08EC57EF0983E2596763900872B1AB7E94F8EE3817B9F67EEC911FF7C386">>}, {<<"gun">>, <<"76022700C64287FEB4DF93A1795CFF6741B83FB37415C40C34C38D2A4645261A">>}, + {<<"jsx">>, <<"EED26A0D04D217F9EECEFFFB89714452556CF90EB38F290A27A4D45B9988F8C0">>}, + {<<"lhttpc">>, <<"154EEB27692482B52BE86406DCD1D18A2405CAFCE0E8DAA4A1A7BFA7FE295896">>}, {<<"luerl">>, <<"6B3138AA829F0FBC4CD0F083F273B4030A2B6CE99155194A6DB8C67B2C3480A4">>}, {<<"prometheus">>, <<"53554ECADAC0354066801D514D1A244DD026175E4EE3A9A30192B71D530C8268">>}, {<<"prometheus_cowboy">>, <<"2C7EB12F4B970D91E3B47BAAD0F138F6ADC34E53EEB0AE18068FF0AFAB441B24">>}, diff --git a/src/hb_store_s3.erl b/src/hb_store_s3.erl new file mode 100644 index 000000000..fb78d2006 --- /dev/null +++ b/src/hb_store_s3.erl @@ -0,0 +1,602 @@ +%%%----------------------------------------------------------------------------- +%%% @doc S3-backed implementation of the HyperBEAM store behavior. +%%% This module provides persistent storage using Amazon S3 or compatible +%%% object storage services (MinIO, Wasabi, etc.). +%%% @end +%%%----------------------------------------------------------------------------- +-module(hb_store_s3). +-behaviour(hb_store). + +%% Store behavior callbacks +-export([start/1, stop/1, reset/1, scope/0, scope/1]). +-export([read/2, write/3, list/2, type/2]). +-export([make_group/2, make_link/3, resolve/2]). +-export([path/2, add_path/3]). + +%% Helper functions +-export([match/2]). + +-include("include/hb.hrl"). +-include_lib("erlcloud/include/erlcloud_aws.hrl"). + +%% Type definitions +-type opts() :: map(). +-type key() :: binary() | list(). +-type value() :: binary(). + +%% Configuration defaults +-define(DEFAULT_REGION, <<"us-east-1">>). +-define(DEFAULT_ENDPOINT, <<"https://s3.amazonaws.com">>). +-define(MAX_LINK_DEPTH, 100). +-define(LINK_MARKER, <<"link:">>). + +%%%----------------------------------------------------------------------------- +%%% Configuration and Initialization (Phase 2) +%%%----------------------------------------------------------------------------- + +%% @doc Initialize the S3 store connection. +%% This function is called when the store is first accessed. +%% It validates the configuration and tests the connection. +-spec start(opts()) -> ok | {error, term()}. +start(Opts) -> + try + % Step 1: Validate required configuration keys + ok = validate_config(Opts), + + % Step 2: Create erlcloud configuration + Config = make_erlcloud_config(Opts), + + % Step 3: Test bucket access + Bucket = maps:get(<<"bucket">>, Opts), + ok = test_bucket_access(Bucket, Config), + + % Step 4: Store configuration for later use + StoreRef = get_store_ref(Opts), + persistent_term:put(StoreRef, #{ + bucket => Bucket, + prefix => maps:get(<<"prefix">>, Opts, <<>>), + config => Config + }), + + ?event(store_s3, {started, {bucket, Bucket}}), + ok + catch + error:Reason -> + ?event(error, {s3_start_failed, {reason, Reason}}), + {error, Reason} + end. + +%% @doc Validate that all required configuration keys are present. +%% Required keys: bucket, access-key-id, secret-access-key +validate_config(Opts) -> + Required = [<<"bucket">>, <<"access-key-id">>, <<"secret-access-key">>], + Missing = [K || K <- Required, not maps:is_key(K, Opts)], + case Missing of + [] -> + ok; + _ -> + error({missing_config_keys, Missing}) + end. + +%% @doc Build erlcloud AWS configuration from our options. +make_erlcloud_config(Opts) -> + % Get configuration values with defaults + AccessKey = binary_to_list(maps:get(<<"access-key-id">>, Opts)), + SecretKey = binary_to_list(maps:get(<<"secret-access-key">>, Opts)), + Region = binary_to_list(maps:get(<<"region">>, Opts, ?DEFAULT_REGION)), + Endpoint = maps:get(<<"endpoint">>, Opts, ?DEFAULT_ENDPOINT), + + % Parse endpoint URL + {Scheme, Host, Port} = parse_endpoint(Endpoint), + + % Create base configuration + BaseConfig = erlcloud_s3:new(AccessKey, SecretKey, Host, Port), + + % Add additional settings + BaseConfig#aws_config{ + s3_scheme = Scheme, + s3_bucket_after_host = false, + s3_bucket_access_method = path, + aws_region = Region, + http_client = httpc + }. + +%% @doc Parse an endpoint URL into scheme, host, and port. +%% Example: "https://s3.amazonaws.com" -> {"https://", "s3.amazonaws.com", 443} +parse_endpoint(Endpoint) when is_binary(Endpoint) -> + parse_endpoint(binary_to_list(Endpoint)); +parse_endpoint(Endpoint) when is_list(Endpoint) -> + case string:split(Endpoint, "://") of + [Scheme, HostPort] -> + case string:split(HostPort, ":", trailing) of + [Host, PortStr] -> + Port = list_to_integer(PortStr), + {Scheme ++ "://", Host, Port}; + [Host] -> + % Default port based on scheme + DefaultPort = case Scheme of + "https" -> 443; + "http" -> 80 + end, + {Scheme ++ "://", Host, DefaultPort} + end; + [HostOnly] -> + % No scheme provided, assume HTTP + {"http://", HostOnly, 80} + end. + +%% @doc Test that we can access the configured bucket. +test_bucket_access(Bucket, Config) -> + BucketStr = binary_to_list(Bucket), + % Try to list objects with max-keys=1 to minimize data transfer + case erlcloud_s3:list_objects(BucketStr, [{max_keys, 1}], Config) of + L when is_list(L) -> + ok; + {error, {aws_error, {http_error, 404, _, _}}} -> + error({bucket_not_found, Bucket}); + {error, Reason} -> + error({bucket_access_failed, Reason}) + end. + +%% @doc Get a unique reference for this store instance. +get_store_ref(Opts) -> + Bucket = maps:get(<<"bucket">>, Opts), + Prefix = maps:get(<<"prefix">>, Opts, <<>>), + {?MODULE, Bucket, Prefix}. + +%% @doc Get stored configuration from persistent_term. +get_config(Opts) -> + StoreRef = get_store_ref(Opts), + case persistent_term:get(StoreRef, undefined) of + undefined -> + error(store_not_started); + Config -> + Config + end. + +%%%----------------------------------------------------------------------------- +%%% Core Read/Write Operations (Phase 3) +%%%----------------------------------------------------------------------------- + +%% @doc Write a value to a key in S3. +-spec write(opts(), key(), value()) -> ok | {error, term()}. +write(Opts, Key, Value) when is_list(Key) -> + % Convert list paths to binary + write(Opts, hb_store:join(Key), Value); +write(Opts, Key, Value) -> + try + % Get stored configuration + #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + + % Build full S3 key with prefix + FullKey = build_s3_key(Prefix, Key), + + % Write to S3 + BucketStr = binary_to_list(Bucket), + KeyStr = binary_to_list(FullKey), + + ?event(store_s3, {write, {key, FullKey}, {size, byte_size(Value)}}), + + case erlcloud_s3:put_object(BucketStr, KeyStr, Value, [], Config) of + L when is_list(L) -> + % Success - erlcloud returns a proplist + ok; + {error, Reason} -> + ?event(error, {s3_write_failed, {key, FullKey}, {reason, Reason}}), + {error, Reason} + end + catch + error:CatchReason -> + ?event(error, {s3_write_error, {key, Key}, {reason, CatchReason}}), + {error, CatchReason} + end. + +%% @doc Build full S3 key with optional prefix. +build_s3_key(<<>>, Key) -> + hb_store:join(Key); +build_s3_key(Prefix, Key) -> + Path = hb_store:join(Key), + % Ensure prefix ends with / for proper key namespacing + PrefixWithSlash = case binary:last(Prefix) of + $/ -> Prefix; + _ -> <> + end, + <>. + +%%%----------------------------------------------------------------------------- +%%% Link Support (Phase 4) +%%%----------------------------------------------------------------------------- + +%% @doc Read a value from S3, following links if necessary. +-spec read(opts(), key()) -> {ok, value()} | not_found. +read(Opts, Key) when is_list(Key) -> + read(Opts, hb_store:join(Key)); +read(Opts, Key) -> + read_with_links(Opts, Key, 0). + +%% Internal read that tracks link depth to prevent infinite loops +read_with_links(_Opts, _Key, Depth) when Depth > ?MAX_LINK_DEPTH -> + ?event(error, {too_many_links, {depth, Depth}}), + not_found; +read_with_links(Opts, Key, Depth) -> + % Read the key directly + case read_direct(Opts, Key) of + {ok, Value} -> + % Check if it's a link + case is_link(Value) of + {true, Target} -> + ?event(store_s3, {follow_link, {from, Key}, {to, Target}}), + read_with_links(Opts, Target, Depth + 1); + false -> + {ok, Value} + end; + not_found -> + not_found + end. + +%% Direct read without link resolution +read_direct(Opts, Key) -> + try + #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + + FullKey = build_s3_key(Prefix, Key), + BucketStr = binary_to_list(Bucket), + KeyStr = binary_to_list(FullKey), + + case erlcloud_s3:get_object(BucketStr, KeyStr, [], Config) of + L when is_list(L) -> + Content = proplists:get_value(content, L), + {ok, hb_util:bin(Content)}; + {error, {aws_error, {http_error, 404, _, _}}} -> + not_found; + {error, _Reason} -> + not_found + end + catch + error:_CatchReason -> + not_found + end. + +%% @doc Create a symbolic link from New to Existing. +%% Links are stored as values with "link:" prefix. +-spec make_link(opts(), key(), key()) -> ok | {error, term()}. +make_link(Opts, Existing, New) -> + % Convert to binary if needed + ExistingBin = hb_util:bin(hb_store:join(Existing)), + + % Build link value with marker + LinkValue = <>, + + ?event(store_s3, {make_link, {from, New}, {to, Existing}}), + + write(Opts, New, LinkValue). + +%% @doc Check if a value is a link and extract the target. +%% Returns {true, Target} or false. +is_link(Value) -> + LinkPrefixSize = byte_size(?LINK_MARKER), + case byte_size(Value) > LinkPrefixSize of + true -> + case binary:part(Value, 0, LinkPrefixSize) of + ?LINK_MARKER -> + Target = binary:part(Value, LinkPrefixSize, + byte_size(Value) - LinkPrefixSize), + {true, Target}; + _ -> + false + end; + false -> + false + end. + +%%%----------------------------------------------------------------------------- +%%% Groups and Listing (Phase 5) +%%%----------------------------------------------------------------------------- + +%% @doc Create a group (virtual directory). +%% In S3, directories don't really exist, so this is a no-op. +%% Groups are detected by listing operations. +-spec make_group(opts(), key()) -> ok. +make_group(_Opts, _Path) -> + % S3 doesn't need explicit directory creation + % They exist implicitly when objects are stored with that prefix + ok. + +%% @doc List immediate children under a given path. +%% Treats the path as a directory prefix. +-spec list(opts(), key()) -> {ok, [binary()]} | {error, term()}. +list(Opts, Path) when is_list(Path) -> + list(Opts, hb_store:join(Path)); +list(Opts, Path) -> + try + #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + + % Check if Path is a link and resolve it + ResolvedPath = case read_direct(Opts, Path) of + {ok, Value} -> + case is_link(Value) of + {true, Target} -> + Target; + false -> + Path + end; + not_found -> + Path + end, + + % Build S3 prefix for listing + FullPath = build_s3_key(Prefix, ResolvedPath), + + % Ensure path ends with / for S3 listing + SearchPrefix = ensure_trailing_slash(FullPath), + + BucketStr = binary_to_list(Bucket), + PrefixStr = binary_to_list(SearchPrefix), + + ?event(store_s3, {list, {prefix, SearchPrefix}}), + + % Use delimiter to get only immediate children + ListOpts = [{prefix, PrefixStr}, {delimiter, "/"}], + + case erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of + L when is_list(L) -> + Children = extract_children(SearchPrefix, L), + {ok, Children}; + {error, _Reason} -> + {ok, []} + end + catch + error:Reason -> + ?event(error, {s3_list_error, {path, Path}, {reason, Reason}}), + {error, Reason} + end. + +%% @doc Ensure a path ends with / for S3 directory listing. +ensure_trailing_slash(<<>>) -> + <<>>; +ensure_trailing_slash(Path) -> + case binary:last(Path) of + $/ -> Path; + _ -> <> + end. + +%% @doc Extract immediate children from S3 list response. +%% Returns only the child names, not full paths. +%% Returns both objects (files) and common prefixes (directories), like file:list_dir(). +extract_children(Prefix, S3Response) -> + % Get regular objects (actual files) + Contents = proplists:get_value(contents, S3Response, []), + + % Get common prefixes (subdirectories) + CommonPrefixes = proplists:get_value(common_prefixes, S3Response, []), + + % Extract object names - only immediate children + Objects = lists:filtermap( + fun(Obj) -> + Key = list_to_binary(proplists:get_value(key, Obj, "")), + case strip_prefix(Prefix, Key) of + <<>> -> false; + Child -> + % Only include if it's an immediate child (no / in name) + case binary:match(Child, <<"/">>) of + nomatch -> {true, Child}; + _ -> false + end + end + end, + Contents + ), + + % Extract directory names (common prefixes) + Dirs = lists:filtermap( + fun(P) -> + PrefixBin = list_to_binary(proplists:get_value(prefix, P, "")), + case strip_prefix(Prefix, PrefixBin) of + <<>> -> false; + Child -> + % Remove trailing slash from directory name + ChildName = case binary:last(Child) of + $/ -> binary:part(Child, 0, byte_size(Child) - 1); + _ -> Child + end, + {true, ChildName} + end + end, + CommonPrefixes + ), + + % Return unique sorted list (both files and directories, like file:list_dir) + lists:usort(Objects ++ Dirs). + +%% @doc Remove a prefix from a binary if it matches. +strip_prefix(Prefix, Bin) -> + PrefixLen = byte_size(Prefix), + case Bin of + <> -> Rest; + _ -> Bin + end. + +%%%----------------------------------------------------------------------------- +%%% Type Detection (Phase 6) +%%%----------------------------------------------------------------------------- + +%% @doc Determine if a key represents a simple value or composite group. +-spec type(opts(), key()) -> simple | composite | not_found. +type(Opts, Key) when is_list(Key) -> + type(Opts, hb_store:join(Key)); +type(Opts, Key) -> + % Try to read the key directly + case read_direct(Opts, Key) of + {ok, Value} -> + % Check if it's a link and resolve it + case is_link(Value) of + {true, Target} -> + % Recursively check the target's type + type(Opts, Target); + false -> + % It's a simple value + simple + end; + not_found -> + % Check if it has children (is a composite/directory) + case has_children(Opts, Key) of + true -> composite; + false -> not_found + end + end. + +%% @doc Check if a path has any children (is a directory). +has_children(Opts, Path) -> + #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + + FullPath = build_s3_key(Prefix, Path), + SearchPrefix = ensure_trailing_slash(FullPath), + + BucketStr = binary_to_list(Bucket), + PrefixStr = binary_to_list(SearchPrefix), + + % List with max-keys=1 to check if anything exists + ListOpts = [{prefix, PrefixStr}, {max_keys, 1}], + + case erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of + L when is_list(L) -> + Contents = proplists:get_value(contents, L, []), + length(Contents) > 0; + _ -> + false + end. + +%%%----------------------------------------------------------------------------- +%%% Path Resolution (Phase 7) +%%%----------------------------------------------------------------------------- + +%% @doc Resolve any links in a path. +%% Follows links in each path segment except the last. +-spec resolve(opts(), key()) -> binary(). +resolve(Opts, Path) when is_list(Path) -> + resolve(Opts, hb_store:join(Path)); +resolve(Opts, Path) when is_binary(Path) -> + Parts = binary:split(Path, <<"/">>, [global]), + case resolve_path_segments(Opts, Parts, 0) of + {ok, Resolved} -> Resolved; + {error, _} -> Path + end. + +%% Internal path resolution that resolves all segments including the last +resolve_path_segments(_Opts, _Path, Depth) when Depth > ?MAX_LINK_DEPTH -> + {error, too_many_redirects}; +resolve_path_segments(_Opts, [], _Depth) -> + {ok, <<>>}; +resolve_path_segments(Opts, Parts, Depth) -> + resolve_path_accumulate(Opts, Parts, <<>>, Depth). + +% Accumulator-based resolution +resolve_path_accumulate(_Opts, [], Acc, _Depth) -> + {ok, Acc}; +resolve_path_accumulate(_Opts, _Parts, _Acc, Depth) when Depth > ?MAX_LINK_DEPTH -> + {error, too_many_redirects}; +resolve_path_accumulate(Opts, [Head|Tail], Acc, Depth) -> + % Build the current path segment + CurrentPath = case Acc of + <<>> -> Head; + _ -> <> + end, + + % Check if current path is a link + case read_direct(Opts, CurrentPath) of + {ok, Value} -> + case is_link(Value) of + {true, Target} -> + % It's a link - replace accumulated path with target and continue + resolve_path_accumulate(Opts, Tail, Target, Depth + 1); + false -> + % It's a regular value, continue accumulating + resolve_path_accumulate(Opts, Tail, CurrentPath, Depth) + end; + not_found -> + % Path segment doesn't exist as a link, continue accumulating + resolve_path_accumulate(Opts, Tail, CurrentPath, Depth) + end. + +%% @doc Convert path to canonical form. +-spec path(opts(), key()) -> binary(). +path(_Opts, Path) -> + hb_store:join(Path). + +%% @doc Add two path components together. +-spec add_path(opts(), key(), key()) -> list(). +add_path(_Opts, Path1, Path2) when is_list(Path1), is_list(Path2) -> + Path1 ++ Path2; +add_path(_Opts, Path1, Path2) -> + P1 = case is_binary(Path1) of + true -> binary:split(Path1, <<"/">>, [global]); + false -> Path1 + end, + P2 = case is_binary(Path2) of + true -> binary:split(Path2, <<"/">>, [global]); + false -> Path2 + end, + P1 ++ P2. + +%%%----------------------------------------------------------------------------- +%%% Remaining Functions (Phase 8) +%%%----------------------------------------------------------------------------- + +%% @doc Stop the S3 store and clean up resources. +-spec stop(opts()) -> ok. +stop(Opts) -> + StoreRef = get_store_ref(Opts), + persistent_term:erase(StoreRef), + ok. + +%% @doc Reset the store by deleting all objects. +%% Requires "dangerous_reset" => true for safety. +-spec reset(opts()) -> ok | {error, term()}. +reset(Opts) -> + case maps:get(<<"dangerous_reset">>, Opts, false) of + true -> + % Only proceed if explicitly confirmed + delete_all_objects(Opts); + false -> + {error, reset_not_confirmed} + end. + +delete_all_objects(Opts) -> + #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + + BucketStr = binary_to_list(Bucket), + PrefixStr = binary_to_list(Prefix), + + % List all objects with prefix + case erlcloud_s3:list_objects(BucketStr, [{prefix, PrefixStr}], Config) of + L when is_list(L) -> + Contents = proplists:get_value(contents, L, []), + Keys = [proplists:get_value(key, Obj) || Obj <- Contents], + + % Delete all objects + case Keys of + [] -> ok; + _ -> + erlcloud_s3:delete_objects(BucketStr, Keys, Config), + ok + end; + _ -> + ok + end. + +%% @doc Return the scope of this store. +%% Defaults to local to match filesystem behavior, but can be overridden in config. +-spec scope() -> local. +scope() -> local. + +-spec scope(opts()) -> local | remote. +scope(#{ <<"scope">> := Scope }) -> Scope; +scope(_Opts) -> scope(). + +%% @doc Match keys based on a template. +%% Simple implementation - just returns not_found for now. +-spec match(opts(), map()) -> {ok, [binary()]} | not_found. +match(_Opts, _Template) -> + % This would require listing all objects and checking each one + % For MVP, we'll skip this feature + not_found. From 841b020a86c54e2b0c28e212d352cc07910d8679 Mon Sep 17 00:00:00 2001 From: Niko Storni Date: Sat, 11 Oct 2025 04:46:11 +0200 Subject: [PATCH 2/7] style: code cleanup fixed duplicate reads and writes --- src/hb_cache_control.erl | 14 +- src/hb_store_s3.erl | 314 ++++++++++++++++++++------------------- 2 files changed, 169 insertions(+), 159 deletions(-) diff --git a/src/hb_cache_control.erl b/src/hb_cache_control.erl index b5cbb5bb1..cdc9f0311 100644 --- a/src/hb_cache_control.erl +++ b/src/hb_cache_control.erl @@ -136,11 +136,13 @@ perform_cache_write(Base, Req, Res, Opts) -> hb_cache:write(Req, Opts), case Res of <<_/binary>> -> - hb_cache:write_binary( - hb_path:hashpath(Base, Req, Opts), - Res, - Opts - ); + Store = hb_opts:get(store, no_viable_store, Opts), + HP = hb_path:hashpath(Base, Req, Opts), + DataPath = <<"data/", (hb_path:hashpath(Res, Opts))/binary>>, + case hb_store:type(Store, DataPath) of + simple -> hb_store:make_link(Store, DataPath, HP); + _ -> hb_cache:write_binary(HP, Res, Opts) + end; Map when is_map(Map) -> hb_cache:write(Res, Opts); _ -> @@ -423,4 +425,4 @@ cache_message_result_test() -> {ok, Res3} = hb_ao:resolve(Base, Req, #{ cache_control => [<<"only-if-cached">>] }), ?event({res2, Res2}), ?event({res3, Res3}), - ?assertEqual(Res2, Res3). \ No newline at end of file + ?assertEqual(Res2, Res3). diff --git a/src/hb_store_s3.erl b/src/hb_store_s3.erl index fb78d2006..781152516 100644 --- a/src/hb_store_s3.erl +++ b/src/hb_store_s3.erl @@ -29,6 +29,9 @@ -define(DEFAULT_ENDPOINT, <<"https://s3.amazonaws.com">>). -define(MAX_LINK_DEPTH, 100). -define(LINK_MARKER, <<"link:">>). +%% Namespace for storing link objects separately to avoid file/prefix collisions +-define(LINKS_NS, <<"links/">>). +-define(GROUPS_NS, <<"groups/">>). %%%----------------------------------------------------------------------------- %%% Configuration and Initialization (Phase 2) @@ -39,31 +42,28 @@ %% It validates the configuration and tests the connection. -spec start(opts()) -> ok | {error, term()}. start(Opts) -> - try - % Step 1: Validate required configuration keys - ok = validate_config(Opts), - - % Step 2: Create erlcloud configuration + maybe + ok ?= validate_config(Opts), Config = make_erlcloud_config(Opts), - - % Step 3: Test bucket access Bucket = maps:get(<<"bucket">>, Opts), - ok = test_bucket_access(Bucket, Config), - - % Step 4: Store configuration for later use + ok ?= test_bucket_access(Bucket, Config), StoreRef = get_store_ref(Opts), - persistent_term:put(StoreRef, #{ + _ = persistent_term:put(StoreRef, #{ bucket => Bucket, prefix => maps:get(<<"prefix">>, Opts, <<>>), config => Config }), - ?event(store_s3, {started, {bucket, Bucket}}), - ok - catch - error:Reason -> - ?event(error, {s3_start_failed, {reason, Reason}}), - {error, Reason} + ?event(store_s3, {started, {bucket, Bucket}, {prefix, maps:get(<<"prefix">>, Opts, <<>>)}}), + {ok, #{ + module => ?MODULE, + bucket => Bucket, + prefix => maps:get(<<"prefix">>, Opts, <<>>) + }} + else + Error -> + ?event(error, {s3_start_failed, {reason, Error}}), + {error, Error} end. %% @doc Validate that all required configuration keys are present. @@ -80,19 +80,15 @@ validate_config(Opts) -> %% @doc Build erlcloud AWS configuration from our options. make_erlcloud_config(Opts) -> - % Get configuration values with defaults - AccessKey = binary_to_list(maps:get(<<"access-key-id">>, Opts)), - SecretKey = binary_to_list(maps:get(<<"secret-access-key">>, Opts)), - Region = binary_to_list(maps:get(<<"region">>, Opts, ?DEFAULT_REGION)), + AccessKey = hb_util:list(maps:get(<<"access-key-id">>, Opts)), + SecretKey = hb_util:list(maps:get(<<"secret-access-key">>, Opts)), + Region = hb_util:list(maps:get(<<"region">>, Opts, ?DEFAULT_REGION)), Endpoint = maps:get(<<"endpoint">>, Opts, ?DEFAULT_ENDPOINT), - % Parse endpoint URL {Scheme, Host, Port} = parse_endpoint(Endpoint), - % Create base configuration BaseConfig = erlcloud_s3:new(AccessKey, SecretKey, Host, Port), - % Add additional settings BaseConfig#aws_config{ s3_scheme = Scheme, s3_bucket_after_host = false, @@ -104,7 +100,7 @@ make_erlcloud_config(Opts) -> %% @doc Parse an endpoint URL into scheme, host, and port. %% Example: "https://s3.amazonaws.com" -> {"https://", "s3.amazonaws.com", 443} parse_endpoint(Endpoint) when is_binary(Endpoint) -> - parse_endpoint(binary_to_list(Endpoint)); + parse_endpoint(hb_util:list(Endpoint)); parse_endpoint(Endpoint) when is_list(Endpoint) -> case string:split(Endpoint, "://") of [Scheme, HostPort] -> @@ -113,7 +109,6 @@ parse_endpoint(Endpoint) when is_list(Endpoint) -> Port = list_to_integer(PortStr), {Scheme ++ "://", Host, Port}; [Host] -> - % Default port based on scheme DefaultPort = case Scheme of "https" -> 443; "http" -> 80 @@ -121,14 +116,12 @@ parse_endpoint(Endpoint) when is_list(Endpoint) -> {Scheme ++ "://", Host, DefaultPort} end; [HostOnly] -> - % No scheme provided, assume HTTP {"http://", HostOnly, 80} end. %% @doc Test that we can access the configured bucket. test_bucket_access(Bucket, Config) -> - BucketStr = binary_to_list(Bucket), - % Try to list objects with max-keys=1 to minimize data transfer + BucketStr = hb_util:list(Bucket), case erlcloud_s3:list_objects(BucketStr, [{max_keys, 1}], Config) of L when is_list(L) -> ok; @@ -140,8 +133,8 @@ test_bucket_access(Bucket, Config) -> %% @doc Get a unique reference for this store instance. get_store_ref(Opts) -> - Bucket = maps:get(<<"bucket">>, Opts), - Prefix = maps:get(<<"prefix">>, Opts, <<>>), + Bucket = hb_util:bin(maps:get(<<"bucket">>, Opts)), + Prefix = hb_util:bin(maps:get(<<"prefix">>, Opts, <<>>)), {?MODULE, Bucket, Prefix}. %% @doc Get stored configuration from persistent_term. @@ -161,34 +154,27 @@ get_config(Opts) -> %% @doc Write a value to a key in S3. -spec write(opts(), key(), value()) -> ok | {error, term()}. write(Opts, Key, Value) when is_list(Key) -> - % Convert list paths to binary write(Opts, hb_store:join(Key), Value); write(Opts, Key, Value) -> - try - % Get stored configuration + maybe #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), - - % Build full S3 key with prefix - FullKey = build_s3_key(Prefix, Key), - - % Write to S3 - BucketStr = binary_to_list(Bucket), - KeyStr = binary_to_list(FullKey), - + RelKey = hb_store:join(Key), + FullKey = case RelKey of + <<"data/", _/binary>> -> build_s3_key(Prefix, RelKey); + _ -> build_groups_key(Prefix, RelKey) + end, + BucketStr = hb_util:list(Bucket), + KeyStr = hb_util:list(FullKey), ?event(store_s3, {write, {key, FullKey}, {size, byte_size(Value)}}), - - case erlcloud_s3:put_object(BucketStr, KeyStr, Value, [], Config) of - L when is_list(L) -> - % Success - erlcloud returns a proplist - ok; - {error, Reason} -> - ?event(error, {s3_write_failed, {key, FullKey}, {reason, Reason}}), - {error, Reason} - end - catch - error:CatchReason -> - ?event(error, {s3_write_error, {key, Key}, {reason, CatchReason}}), - {error, CatchReason} + ok ?= case erlcloud_s3:put_object(BucketStr, KeyStr, Value, [], Config) of + L when is_list(L) -> ok; + {error, Reason} -> error(Reason) + end, + ok + else + Error -> + ?event(error, {s3_write_error, {key, Key}, {reason, Error}}), + {error, Error} end. %% @doc Build full S3 key with optional prefix. @@ -196,13 +182,34 @@ build_s3_key(<<>>, Key) -> hb_store:join(Key); build_s3_key(Prefix, Key) -> Path = hb_store:join(Key), - % Ensure prefix ends with / for proper key namespacing PrefixWithSlash = case binary:last(Prefix) of $/ -> Prefix; _ -> <> end, <>. +%% @doc Build an S3 key under the links namespace for a logical key. +build_links_key(<<>>, Key) -> + <>; +build_links_key(Prefix, Key) -> + Path = hb_store:join(Key), + PrefixWithSlash = case binary:last(Prefix) of + $/ -> Prefix; + _ -> <> + end, + <>. + +%% @doc Build an S3 key under the groups namespace for a logical key. +build_groups_key(<<>>, Key) -> + <>; +build_groups_key(Prefix, Key) -> + Path = hb_store:join(Key), + PrefixWithSlash = case binary:last(Prefix) of + $/ -> Prefix; + _ -> <> + end, + <>. + %%%----------------------------------------------------------------------------- %%% Link Support (Phase 4) %%%----------------------------------------------------------------------------- @@ -219,13 +226,10 @@ read_with_links(_Opts, _Key, Depth) when Depth > ?MAX_LINK_DEPTH -> ?event(error, {too_many_links, {depth, Depth}}), not_found; read_with_links(Opts, Key, Depth) -> - % Read the key directly case read_direct(Opts, Key) of {ok, Value} -> - % Check if it's a link case is_link(Value) of {true, Target} -> - ?event(store_s3, {follow_link, {from, Key}, {to, Target}}), read_with_links(Opts, Target, Depth + 1); false -> {ok, Value} @@ -238,22 +242,34 @@ read_with_links(Opts, Key, Depth) -> read_direct(Opts, Key) -> try #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), - - FullKey = build_s3_key(Prefix, Key), - BucketStr = binary_to_list(Bucket), - KeyStr = binary_to_list(FullKey), - + BucketStr = hb_util:list(Bucket), + {FullKey, IsDataKey} = case Key of + <<"data/", _/binary>> -> + {build_s3_key(Prefix, Key), true}; + _ -> + HasSlash = (binary:match(hb_store:join(Key), <<"/">>) =/= nomatch), + NsKey = case HasSlash of + true -> build_groups_key(Prefix, Key); + false -> build_links_key(Prefix, Key) + end, + {NsKey, false} + end, + KeyStr = hb_util:list(FullKey), case erlcloud_s3:get_object(BucketStr, KeyStr, [], Config) of L when is_list(L) -> Content = proplists:get_value(content, L), {ok, hb_util:bin(Content)}; {error, {aws_error, {http_error, 404, _, _}}} -> not_found; - {error, _Reason} -> + {error, Reason} -> + case IsDataKey of + false -> ?event(error, {s3_read_error, {key, Key}, {reason, Reason}}); + true -> ok + end, not_found end catch - error:_CatchReason -> + _:_ -> not_found end. @@ -261,15 +277,23 @@ read_direct(Opts, Key) -> %% Links are stored as values with "link:" prefix. -spec make_link(opts(), key(), key()) -> ok | {error, term()}. make_link(Opts, Existing, New) -> - % Convert to binary if needed ExistingBin = hb_util:bin(hb_store:join(Existing)), - - % Build link value with marker LinkValue = <>, - ?event(store_s3, {make_link, {from, New}, {to, Existing}}), - - write(Opts, New, LinkValue). + maybe + #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + BucketStr = hb_util:list(Bucket), + HasSlash = (binary:match(hb_store:join(New), <<"/">>) =/= nomatch), + NsKey = case HasSlash of + true -> build_groups_key(Prefix, New); + false -> build_links_key(Prefix, New) + end, + NsKeyStr = hb_util:list(NsKey), + _ = erlcloud_s3:put_object(BucketStr, NsKeyStr, LinkValue, [], Config), + ok + else + _ -> {error, make_link_failed} + end. %% @doc Check if a value is a link and extract the target. %% Returns {true, Target} or false. @@ -298,8 +322,6 @@ is_link(Value) -> %% Groups are detected by listing operations. -spec make_group(opts(), key()) -> ok. make_group(_Opts, _Path) -> - % S3 doesn't need explicit directory creation - % They exist implicitly when objects are stored with that prefix ok. %% @doc List immediate children under a given path. @@ -308,10 +330,8 @@ make_group(_Opts, _Path) -> list(Opts, Path) when is_list(Path) -> list(Opts, hb_store:join(Path)); list(Opts, Path) -> - try + maybe #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), - - % Check if Path is a link and resolve it ResolvedPath = case read_direct(Opts, Path) of {ok, Value} -> case is_link(Value) of @@ -323,21 +343,11 @@ list(Opts, Path) -> not_found -> Path end, - - % Build S3 prefix for listing - FullPath = build_s3_key(Prefix, ResolvedPath), - - % Ensure path ends with / for S3 listing + FullPath = build_groups_key(Prefix, ResolvedPath), SearchPrefix = ensure_trailing_slash(FullPath), - - BucketStr = binary_to_list(Bucket), - PrefixStr = binary_to_list(SearchPrefix), - - ?event(store_s3, {list, {prefix, SearchPrefix}}), - - % Use delimiter to get only immediate children + BucketStr = hb_util:list(Bucket), + PrefixStr = hb_util:list(SearchPrefix), ListOpts = [{prefix, PrefixStr}, {delimiter, "/"}], - case erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of L when is_list(L) -> Children = extract_children(SearchPrefix, L), @@ -345,8 +355,8 @@ list(Opts, Path) -> {error, _Reason} -> {ok, []} end - catch - error:Reason -> + else + Reason -> ?event(error, {s3_list_error, {path, Path}, {reason, Reason}}), {error, Reason} end. @@ -364,20 +374,14 @@ ensure_trailing_slash(Path) -> %% Returns only the child names, not full paths. %% Returns both objects (files) and common prefixes (directories), like file:list_dir(). extract_children(Prefix, S3Response) -> - % Get regular objects (actual files) Contents = proplists:get_value(contents, S3Response, []), - - % Get common prefixes (subdirectories) CommonPrefixes = proplists:get_value(common_prefixes, S3Response, []), - - % Extract object names - only immediate children Objects = lists:filtermap( fun(Obj) -> - Key = list_to_binary(proplists:get_value(key, Obj, "")), + Key = hb_util:bin(proplists:get_value(key, Obj, "")), case strip_prefix(Prefix, Key) of <<>> -> false; Child -> - % Only include if it's an immediate child (no / in name) case binary:match(Child, <<"/">>) of nomatch -> {true, Child}; _ -> false @@ -387,14 +391,12 @@ extract_children(Prefix, S3Response) -> Contents ), - % Extract directory names (common prefixes) Dirs = lists:filtermap( fun(P) -> - PrefixBin = list_to_binary(proplists:get_value(prefix, P, "")), + PrefixBin = hb_util:bin(proplists:get_value(prefix, P, "")), case strip_prefix(Prefix, PrefixBin) of <<>> -> false; Child -> - % Remove trailing slash from directory name ChildName = case binary:last(Child) of $/ -> binary:part(Child, 0, byte_size(Child) - 1); _ -> Child @@ -404,8 +406,6 @@ extract_children(Prefix, S3Response) -> end, CommonPrefixes ), - - % Return unique sorted list (both files and directories, like file:list_dir) lists:usort(Objects ++ Dirs). %% @doc Remove a prefix from a binary if it matches. @@ -425,45 +425,66 @@ strip_prefix(Prefix, Bin) -> type(Opts, Key) when is_list(Key) -> type(Opts, hb_store:join(Key)); type(Opts, Key) -> - % Try to read the key directly - case read_direct(Opts, Key) of - {ok, Value} -> - % Check if it's a link and resolve it - case is_link(Value) of - {true, Target} -> - % Recursively check the target's type - type(Opts, Target); + case Key of + <<"data/", _/binary>> -> + case head_exists(Opts, Key) of + true -> + simple; false -> - % It's a simple value - simple + not_found end; - not_found -> - % Check if it has children (is a composite/directory) - case has_children(Opts, Key) of - true -> composite; - false -> not_found + _ -> + case read_direct(Opts, Key) of + {ok, Value} -> + case is_link(Value) of + {true, Target} -> + type(Opts, Target); + false -> + simple + end; + not_found -> + case has_children(Opts, Key) of + true -> + composite; + false -> + not_found + end end end. +%% @doc HEAD check for object existence without downloading content +head_exists(Opts, Key) -> + try + #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + FullKey = build_s3_key(Prefix, Key), + BucketStr = hb_util:list(Bucket), + KeyStr = hb_util:list(FullKey), + case erlcloud_s3:head_object(BucketStr, KeyStr, [], Config) of + L when is_list(L) -> true; + _ -> false + end + catch + _:_ -> false + end. + %% @doc Check if a path has any children (is a directory). has_children(Opts, Path) -> - #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), - - FullPath = build_s3_key(Prefix, Path), - SearchPrefix = ensure_trailing_slash(FullPath), - - BucketStr = binary_to_list(Bucket), - PrefixStr = binary_to_list(SearchPrefix), - - % List with max-keys=1 to check if anything exists - ListOpts = [{prefix, PrefixStr}, {max_keys, 1}], - - case erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of - L when is_list(L) -> - Contents = proplists:get_value(contents, L, []), - length(Contents) > 0; - _ -> - false + try + #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + FullPath = build_groups_key(Prefix, Path), + SearchPrefix = ensure_trailing_slash(FullPath), + BucketStr = hb_util:list(Bucket), + PrefixStr = hb_util:list(SearchPrefix), + ListOpts = [{prefix, PrefixStr}, {max_keys, 1}], + case erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of + L when is_list(L) -> + Contents = proplists:get_value(contents, L, []), + Contents =/= []; + _ -> + false + end + catch + _:_ -> false end. %%%----------------------------------------------------------------------------- @@ -490,31 +511,26 @@ resolve_path_segments(_Opts, [], _Depth) -> resolve_path_segments(Opts, Parts, Depth) -> resolve_path_accumulate(Opts, Parts, <<>>, Depth). -% Accumulator-based resolution resolve_path_accumulate(_Opts, [], Acc, _Depth) -> {ok, Acc}; +resolve_path_accumulate(_Opts, FullPath = [<<"data">> | _], <<>>, _Depth) -> + {ok, hb_store:join(FullPath)}; resolve_path_accumulate(_Opts, _Parts, _Acc, Depth) when Depth > ?MAX_LINK_DEPTH -> {error, too_many_redirects}; resolve_path_accumulate(Opts, [Head|Tail], Acc, Depth) -> - % Build the current path segment CurrentPath = case Acc of <<>> -> Head; _ -> <> end, - - % Check if current path is a link case read_direct(Opts, CurrentPath) of {ok, Value} -> case is_link(Value) of {true, Target} -> - % It's a link - replace accumulated path with target and continue resolve_path_accumulate(Opts, Tail, Target, Depth + 1); false -> - % It's a regular value, continue accumulating resolve_path_accumulate(Opts, Tail, CurrentPath, Depth) end; not_found -> - % Path segment doesn't exist as a link, continue accumulating resolve_path_accumulate(Opts, Tail, CurrentPath, Depth) end. @@ -555,7 +571,6 @@ stop(Opts) -> reset(Opts) -> case maps:get(<<"dangerous_reset">>, Opts, false) of true -> - % Only proceed if explicitly confirmed delete_all_objects(Opts); false -> {error, reset_not_confirmed} @@ -563,17 +578,12 @@ reset(Opts) -> delete_all_objects(Opts) -> #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), - - BucketStr = binary_to_list(Bucket), - PrefixStr = binary_to_list(Prefix), - - % List all objects with prefix + BucketStr = hb_util:list(Bucket), + PrefixStr = hb_util:list(Prefix), case erlcloud_s3:list_objects(BucketStr, [{prefix, PrefixStr}], Config) of L when is_list(L) -> Contents = proplists:get_value(contents, L, []), Keys = [proplists:get_value(key, Obj) || Obj <- Contents], - - % Delete all objects case Keys of [] -> ok; _ -> @@ -597,6 +607,4 @@ scope(_Opts) -> scope(). %% Simple implementation - just returns not_found for now. -spec match(opts(), map()) -> {ok, [binary()]} | not_found. match(_Opts, _Template) -> - % This would require listing all objects and checking each one - % For MVP, we'll skip this feature not_found. From a5a7bd652ebe22f1bc31b1588b9cc8d1a23ed795 Mon Sep 17 00:00:00 2001 From: speeddragon Date: Tue, 14 Oct 2025 18:21:09 +0100 Subject: [PATCH 3/7] test: Add S3 store to test suites Integrate hb_store_s3 into hb_cache and hb_store test suites. Fix link resolution and reset functionality for S3 compatibility. Skip match tests for S3 store (not supported). --- src/hb_cache.erl | 8 +- src/hb_store.erl | 11 + src/hb_store_s3.erl | 506 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 515 insertions(+), 10 deletions(-) diff --git a/src/hb_cache.erl b/src/hb_cache.erl index dc2efe08e..927050446 100644 --- a/src/hb_cache.erl +++ b/src/hb_cache.erl @@ -970,7 +970,9 @@ test_message_with_list(Store) -> {ok, RetrievedItem} = read(Path, Opts), ?assert(hb_message:match(Msg, RetrievedItem, strict, Opts)). -test_match_message(Store) when map_get(<<"store-module">>, Store) =/= hb_store_lmdb -> +test_match_message(Store) when map_get(<<"store-module">>, Store) =/= hb_store_lmdb -> + Module = map_get(<<"store-module">>, Store), + ?event({test_skip, {module, Module}}), skip; test_match_message(Store) -> hb_store:reset(Store), @@ -998,6 +1000,8 @@ test_match_message(Store) -> ?assertEqual([ID2b], MatchedItems2). test_match_linked_message(Store) when map_get(<<"store-module">>, Store) =/= hb_store_lmdb -> + Module = map_get(<<"store-module">>, Store), + ?event({test_skip, {module, Module}}), skip; test_match_linked_message(Store) -> hb_store:reset(Store), @@ -1024,6 +1028,8 @@ test_match_linked_message(Store) -> ). test_match_typed_message(Store) when map_get(<<"store-module">>, Store) =/= hb_store_lmdb -> + Module = map_get(<<"store-module">>, Store), + ?event({test_skip, {module, Module}}), skip; test_match_typed_message(Store) -> hb_store:reset(Store), diff --git a/src/hb_store.erl b/src/hb_store.erl index 5cb9e5527..822bb1edb 100644 --- a/src/hb_store.erl +++ b/src/hb_store.erl @@ -439,6 +439,17 @@ test_stores() -> <<"name">> => <<"cache-TEST/lru">> } ] + }, + %% Should S3 be under a feature flag? + (hb_test_utils:test_store(hb_store_s3))#{ + %% NOTE: To be tuned + <<"benchmark-scale">> => 0.005, + %% Default config + <<"bucket">> => <<"hb-s3">>, + <<"access-key-id">> => <<"niko">>, + <<"secret-access-key">> => <<"minio-niko-rocks">>, + <<"endpoint">> => <<"localhost:9000">>, + <<"dangerous_reset">> => true } ] ++ rocks_stores(). diff --git a/src/hb_store_s3.erl b/src/hb_store_s3.erl index 781152516..7ec8b3071 100644 --- a/src/hb_store_s3.erl +++ b/src/hb_store_s3.erl @@ -18,6 +18,7 @@ -include("include/hb.hrl"). -include_lib("erlcloud/include/erlcloud_aws.hrl"). +-include_lib("eunit/include/eunit.hrl"). %% Type definitions -type opts() :: map(). @@ -158,10 +159,19 @@ write(Opts, Key, Value) when is_list(Key) -> write(Opts, Key, Value) -> maybe #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + + HasSlash = (binary:match(hb_store:join(Key), <<"/">>) =/= nomatch), + RelKey = hb_store:join(Key), FullKey = case RelKey of <<"data/", _/binary>> -> build_s3_key(Prefix, RelKey); - _ -> build_groups_key(Prefix, RelKey) + _ -> + %% NOTE: This behaviour didn't match the one from read + %% (basic_test was failing), so I needed to add this logic. + case HasSlash of + true -> build_groups_key(Prefix, RelKey); + false -> build_links_key(Prefix, RelKey) + end end, BucketStr = hb_util:list(Bucket), KeyStr = hb_util:list(FullKey), @@ -216,26 +226,64 @@ build_groups_key(Prefix, Key) -> %% @doc Read a value from S3, following links if necessary. -spec read(opts(), key()) -> {ok, value()} | not_found. +% TODO: Remove, we don't need this match anymore. +read(_Opts, []) -> not_found; + read(Opts, Key) when is_list(Key) -> read(Opts, hb_store:join(Key)); read(Opts, Key) -> - read_with_links(Opts, Key, 0). + read_with_links(Opts, Key, 0, false). %% Internal read that tracks link depth to prevent infinite loops -read_with_links(_Opts, _Key, Depth) when Depth > ?MAX_LINK_DEPTH -> +read_with_links(_Opts, _Key, Depth, _IsSubFolderLinkSearchMode) when Depth > ?MAX_LINK_DEPTH -> ?event(error, {too_many_links, {depth, Depth}}), not_found; -read_with_links(Opts, Key, Depth) -> +read_with_links(Opts, Key, Depth, IsSubFolderLinkSearchMode) -> + ?event(store_s3, {read_key, Key}), case read_direct(Opts, Key) of {ok, Value} -> case is_link(Value) of {true, Target} -> - read_with_links(Opts, Target, Depth + 1); + ?event(store_s3, {link_found, Target}), + read_with_links(Opts, Target, Depth + 1, IsSubFolderLinkSearchMode); false -> {ok, Value} end; not_found -> - not_found + %% NOTE: We need to create a more complex test for deep linking + ?event(store_s3, {not_found, + {key, Key}, + {depth, Depth}, + {is_sub_folder_link_search_mode, IsSubFolderLinkSearchMode}}), + %% TODO: This Depth might not be the best case to handle this scenario + %% where we go back to try to find the link to then resolve forward + %% (attach the end key back to the proper path). + case IsSubFolderLinkSearchMode of + true -> + {resolved_link_key, Key}; + false -> + case lists:reverse(binary:split(Key, <<"/">>, [global])) of + [_] -> + %% Doesn't matter looking further down + not_found; + [EndKey | ReversedNewKey] -> + NewKey = lists:reverse(ReversedNewKey), + ?event(store_s3, {check_new_key, NewKey}), + case read_with_links(Opts, NewKey, Depth, true) of + {ok, _Value} -> + %% NOTE: When the key has a value, what should we do? We still + %% need to resolve + throw("Unhandled case when the previous key has information stored"); + {resolved_link_key, NewKey} -> + not_found; + {resolved_link_key, Value} -> + ?event(store_s3, {{resolved_link_key, Value}, {end_key, EndKey}}), + read(Opts, <>); + not_found -> + not_found + end + end + end end. %% Direct read without link resolution @@ -321,7 +369,11 @@ is_link(Value) -> %% In S3, directories don't really exist, so this is a no-op. %% Groups are detected by listing operations. -spec make_group(opts(), key()) -> ok. -make_group(_Opts, _Path) -> +make_group(Opts, Path) -> + % NOTE: We need a way to just create an empty group. + % This is also ignored when tried to retrieve a list of + % keys for a given path. + write(Opts, <>, <<"group">>), ok. %% @doc List immediate children under a given path. @@ -330,6 +382,7 @@ make_group(_Opts, _Path) -> list(Opts, Path) when is_list(Path) -> list(Opts, hb_store:join(Path)); list(Opts, Path) -> + UnwantedChildren = [<<"empty_group">>], maybe #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), ResolvedPath = case read_direct(Opts, Path) of @@ -351,7 +404,7 @@ list(Opts, Path) -> case erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of L when is_list(L) -> Children = extract_children(SearchPrefix, L), - {ok, Children}; + {ok, Children -- UnwantedChildren}; {error, _Reason} -> {ok, []} end @@ -587,7 +640,7 @@ delete_all_objects(Opts) -> case Keys of [] -> ok; _ -> - erlcloud_s3:delete_objects(BucketStr, Keys, Config), + erlcloud_s3:delete_objects_batch(BucketStr, Keys, Config), ok end; _ -> @@ -608,3 +661,438 @@ scope(_Opts) -> scope(). -spec match(opts(), map()) -> {ok, [binary()]} | not_found. match(_Opts, _Template) -> not_found. + +%% @doc Test suite demonstrating basic store operations. +%% +%% The following functions implement unit tests using EUnit to verify that +%% the S3 store implementation correctly handles various scenarios including +%% basic read/write operations, hierarchical listing, group creation, link +%% resolution, and type detection. + +default_test_opts() -> + #{<<"store-module">> => ?MODULE, + <<"bucket">> => <<"hb-s3">>, + <<"access-key-id">> => <<"niko">>, + <<"secret-access-key">> => <<"minio-niko-rocks">>, + <<"endpoint">> => <<"localhost:9000">>, + <<"dangerous_reset">> => true}. + +%% @doc Basic store test - verifies fundamental read/write functionality. +%% +%% This test creates a temporary database, writes a key-value pair, reads it +%% back to verify correctness, and cleans up by stopping the database. It +%% serves as a sanity check that the basic storage mechanism is working. + +basic_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + Value = <<"world">>, + Key = <<"hello">>, + ?event(s3_test, {{key, Key},{value, Value}}), + Res = write(StoreOpts, Key, Value), + ?assertEqual(ok, Res), + {ok, Response} = read(StoreOpts, Key), + ?assertEqual(Value, Response), + ok = stop(StoreOpts). + +%% @doc List test - verifies prefix-based key listing functionality. +%% +%% This test creates several keys with hierarchical names and verifies that +%% the list operation correctly returns only keys matching a specific prefix. +%% It demonstrates the directory-like navigation capabilities of the store. +list_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + ?assertEqual({ok, []}, list(StoreOpts, <<"colors">>)), + % Create immediate children under colors/ + write(StoreOpts, <<"colors/red">>, <<"1">>), + write(StoreOpts, <<"colors/blue">>, <<"2">>), + write(StoreOpts, <<"colors/green">>, <<"3">>), + % Create nested directories under colors/ - these should show up as immediate children + write(StoreOpts, <<"colors/multi/foo">>, <<"4">>), + write(StoreOpts, <<"colors/multi/bar">>, <<"5">>), + write(StoreOpts, <<"colors/primary/red">>, <<"6">>), + write(StoreOpts, <<"colors/primary/blue">>, <<"7">>), + write(StoreOpts, <<"colors/nested/deep/value">>, <<"8">>), + % Create other top-level directories + write(StoreOpts, <<"foo/bar">>, <<"baz">>), + write(StoreOpts, <<"beep/boop">>, <<"bam">>), + read(StoreOpts, <<"colors">>), + % Test listing colors/ - should return immediate children only + {ok, ListResult} = list(StoreOpts, <<"colors">>), + ?event({list_result, ListResult}), + % Expected: red, blue, green (files) + multi, primary, nested (directories) + % Should NOT include deeply nested items like foo, bar, deep, value + ExpectedChildren = [<<"blue">>, <<"green">>, <<"multi">>, <<"nested">>, <<"primary">>, <<"red">>], + ?assert(lists:all(fun(Key) -> lists:member(Key, ExpectedChildren) end, ListResult)), + % Test listing a nested directory - should only show immediate children + {ok, NestedListResult} = list(StoreOpts, <<"colors/multi">>), + ?event({nested_list_result, NestedListResult}), + ExpectedNestedChildren = [<<"bar">>, <<"foo">>], + ?assert(lists:all(fun(Key) -> lists:member(Key, ExpectedNestedChildren) end, NestedListResult)), + % Test listing a deeper nested directory + {ok, DeepListResult} = list(StoreOpts, <<"colors/nested">>), + ?event({deep_list_result, DeepListResult}), + ExpectedDeepChildren = [<<"deep">>], + ?assert(lists:all(fun(Key) -> lists:member(Key, ExpectedDeepChildren) end, DeepListResult)), + ok = stop(StoreOpts). + +%% @doc Group test - verifies group creation and type detection. +%% +%% This test creates a group entry and verifies that it is correctly identified +%% as a composite type and cannot be read directly (like filesystem directories). +group_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + make_group(StoreOpts, <<"colors">>), + % Groups should be detected as composite types + ?assertEqual(composite, type(StoreOpts, <<"colors">>)), + % Groups should not be readable directly (like directories in filesystem) + ?assertEqual(not_found, read(StoreOpts, <<"colors">>)). + +%% @doc Link test - verifies symbolic link creation and resolution. +%% +%% This test creates a regular key-value pair, creates a link pointing to it, +%% and verifies that reading from the link location returns the original value. +%% This demonstrates the transparent link resolution mechanism. +link_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + write(StoreOpts, <<"foo/bar/baz">>, <<"Bam">>), + make_link(StoreOpts, <<"foo/bar/baz">>, <<"foo/beep/baz">>), + {ok, Result} = read(StoreOpts, <<"foo/beep/baz">>), + ?event({ result, Result}), + ?assertEqual(<<"Bam">>, Result). + +link_fragment_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + ok = write(StoreOpts, [<<"data">>, <<"bar">>, <<"baz">>], <<"Bam">>), + ok = make_link(StoreOpts, [<<"data">>, <<"bar">>], <<"my-link">>), + {ok, Result} = read(StoreOpts, [<<"my-link">>, <<"baz">>]), + ?event({ result, Result}), + ?assertEqual(<<"Bam">>, Result). + +%% @doc Type test - verifies type detection for both simple and composite entries. +%% +%% This test creates both a group (composite) entry and a regular (simple) entry, +%% then verifies that the type detection function correctly identifies each one. +%% This demonstrates the semantic classification system used by the store. +type_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + % TODO: Make this line acive and remove the write bellow + % that create a children inside the file, when `make_group` + % code is improved. + %make_group(StoreOpts, <<"assets">>), + ok = write(StoreOpts, <<"assets/data">>, <<"value">>), + Type = type(StoreOpts, <<"assets">>), + ?event({type, Type}), + ?assertEqual(composite, Type), + write(StoreOpts, <<"assets/1">>, <<"bam">>), + Type2 = type(StoreOpts, <<"assets/1">>), + ?event({type2, Type2}), + ?assertEqual(simple, Type2). + +%% @doc Link key list test - verifies symbolic link creation using structured key paths. +%% +%% This test demonstrates the store's ability to handle complex key structures +%% represented as lists of binary segments, and verifies that symbolic links +%% work correctly when the target key is specified as a list rather than a +%% flat binary string. +%% +%% The test creates a hierarchical key structure using a list format (which +%% presumably gets converted to a path-like binary internally), creates a +%% symbolic link pointing to that structured key, and verifies that link +%% resolution works transparently to return the original value. +%% +%% This is particularly important for applications that organize data in +%% hierarchical structures where keys represent nested paths or categories, +%% and need to create shortcuts or aliases to deeply nested data. +link_key_list_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + write(StoreOpts, [ <<"parent">>, <<"key">> ], <<"value">>), + make_link(StoreOpts, [ <<"parent">>, <<"key">> ], <<"my-link">>), + {ok, Result} = read(StoreOpts, <<"my-link">>), + ?event({result, Result}), + ?assertEqual(<<"value">>, Result). + +%% @doc Path traversal link test - verifies link resolution during path traversal. +%% +%% This test verifies that when reading a path as a list, intermediate path +%% segments that are links get resolved correctly. For example, if "link" +%% is a symbolic link to "group", then reading ["link", "key"] should +%% resolve to reading ["group", "key"]. +%% +%% This functionality enables transparent redirection at the directory level, +%% allowing reorganization of hierarchical data without breaking existing +%% access patterns. +path_traversal_link_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + % Create the actual data at group/key + write(StoreOpts, [<<"group">>, <<"key">>], <<"target-value">>), + % Create a link from "link" to "group" + make_link(StoreOpts, <<"group">>, <<"link">>), + % Reading via the link path should resolve to the target value + {ok, Result} = read(StoreOpts, [<<"link">>, <<"key">>]), + ?event({path_traversal_result, Result}), + ?assertEqual(<<"target-value">>, Result), + ok = stop(StoreOpts). + +%% @doc Test that matches the exact hb_store hierarchical test pattern +exact_hb_store_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + % Follow exact same pattern as hb_store test + ?event(step1_make_group), + make_group(StoreOpts, <<"test-dir1">>), + ?event(step2_write_file), + write(StoreOpts, [<<"test-dir1">>, <<"test-file">>], <<"test-data">>), + ?event(step3_make_link), + make_link(StoreOpts, [<<"test-dir1">>], <<"test-link">>), + % Debug: test that the link behaves like the target (groups are unreadable) + ?event(step4_check_link), + LinkResult = read(StoreOpts, <<"test-link">>), + ?event({link_result, LinkResult}), + % Since test-dir1 is a group and groups are unreadable, the link should also be unreadable + ?assertEqual(not_found, LinkResult), + % Debug: test intermediate steps + ?event(step5_test_direct_read), + DirectResult = read(StoreOpts, <<"test-dir1/test-file">>), + ?event({direct_result, DirectResult}), + % This should work: reading via the link path + ?event(step6_test_link_read), + Result = read(StoreOpts, [<<"test-link">>, <<"test-file">>]), + ?event({final_result, Result}), + ?assertEqual({ok, <<"test-data">>}, Result), + ok = stop(StoreOpts). + +%% @doc Test cache-style usage through hb_store interface +cache_style_test() -> + hb:init(), + StoreOpts = default_test_opts(), + % Start the store + hb_store:start(StoreOpts), + reset(StoreOpts), + % Test writing through hb_store interface + ok = hb_store:write(StoreOpts, <<"test-key">>, <<"test-value">>), + % Test reading through hb_store interface + Result = hb_store:read(StoreOpts, <<"test-key">>), + ?event({cache_style_read_result, Result}), + ?assertEqual({ok, <<"test-value">>}, Result), + hb_store:stop(StoreOpts). + +%% @doc Test nested map storage with cache-like linking behavior +%% +%% This test demonstrates how to store a nested map structure where: +%% 1. Each value is stored at data/{hash_of_value} +%% 2. Links are created to compose the values back into the original map structure +%% 3. Reading the composed structure reconstructs the original nested map +nested_map_cache_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + % Clean up any previous test data + reset(StoreOpts), + % Original nested map structure + OriginalMap = #{ + <<"target">> => <<"Foo">>, + <<"commitments">> => #{ + <<"key1">> => #{ + <<"alg">> => <<"rsa-pss-512">>, + <<"committer">> => <<"unique-id">> + }, + <<"key2">> => #{ + <<"alg">> => <<"hmac">>, + <<"commiter">> => <<"unique-id-2">> + } + }, + <<"other-key">> => #{ + <<"other-key-key">> => <<"other-key-value">> + } + }, + ?event({original_map, OriginalMap}), + % Step 1: Store each leaf value at data/{hash} + TargetValue = <<"Foo">>, + TargetHash = base64:encode(crypto:hash(sha256, TargetValue)), + write(StoreOpts, <<"data/", TargetHash/binary>>, TargetValue), + AlgValue1 = <<"rsa-pss-512">>, + AlgHash1 = base64:encode(crypto:hash(sha256, AlgValue1)), + write(StoreOpts, <<"data/", AlgHash1/binary>>, AlgValue1), + CommitterValue1 = <<"unique-id">>, + CommitterHash1 = base64:encode(crypto:hash(sha256, CommitterValue1)), + write(StoreOpts, <<"data/", CommitterHash1/binary>>, CommitterValue1), + AlgValue2 = <<"hmac">>, + AlgHash2 = base64:encode(crypto:hash(sha256, AlgValue2)), + write(StoreOpts, <<"data/", AlgHash2/binary>>, AlgValue2), + CommitterValue2 = <<"unique-id-2">>, + CommitterHash2 = base64:encode(crypto:hash(sha256, CommitterValue2)), + write(StoreOpts, <<"data/", CommitterHash2/binary>>, CommitterValue2), + OtherKeyValue = <<"other-key-value">>, + OtherKeyHash = base64:encode(crypto:hash(sha256, OtherKeyValue)), + write(StoreOpts, <<"data/", OtherKeyHash/binary>>, OtherKeyValue), + % Step 2: Create the nested structure with groups and links + % Create the root group + make_group(StoreOpts, <<"root">>), + % Create links for the root level keys + make_link(StoreOpts, <<"data/", TargetHash/binary>>, <<"root/target">>), + % Create the commitments subgroup + make_group(StoreOpts, <<"root/commitments">>), + % Create the key1 subgroup within commitments + make_group(StoreOpts, <<"root/commitments/key1">>), + make_link(StoreOpts, <<"data/", AlgHash1/binary>>, <<"root/commitments/key1/alg">>), + make_link(StoreOpts, <<"data/", CommitterHash1/binary>>, <<"root/commitments/key1/committer">>), + % Create the key2 subgroup within commitments + make_group(StoreOpts, <<"root/commitments/key2">>), + make_link(StoreOpts, <<"data/", AlgHash2/binary>>, <<"root/commitments/key2/alg">>), + make_link(StoreOpts, <<"data/", CommitterHash2/binary>>, <<"root/commitments/key2/commiter">>), + % Create the other-key subgroup + make_group(StoreOpts, <<"root/other-key">>), + make_link(StoreOpts, <<"data/", OtherKeyHash/binary>>, <<"root/other-key/other-key-key">>), + % Step 3: Test reading the structure back + % Verify the root is a composite + ?assertEqual(composite, type(StoreOpts, <<"root">>)), + % List the root contents + {ok, RootKeys} = list(StoreOpts, <<"root">>), + ?event({root_keys, RootKeys}), + ExpectedRootKeys = [<<"commitments">>, <<"other-key">>, <<"target">>], + ?assert(lists:all(fun(Key) -> lists:member(Key, ExpectedRootKeys) end, RootKeys)), + % Read the target directly + {ok, TargetValueRead} = read(StoreOpts, <<"root/target">>), + ?assertEqual(<<"Foo">>, TargetValueRead), + % Verify commitments is a composite + ?assertEqual(composite, type(StoreOpts, <<"root/commitments">>)), + % Verify other-key is a composite + ?assertEqual(composite, type(StoreOpts, <<"root/other-key">>)), + % Step 4: Test programmatic reconstruction of the nested map + ReconstructedMap = reconstruct_map(StoreOpts, <<"root">>), + ?event({reconstructed_map, ReconstructedMap}), + % Verify the reconstructed map matches the original structure + ?assert(hb_message:match(OriginalMap, ReconstructedMap)), + stop(StoreOpts). + +%% Helper function to recursively reconstruct a map from the store +reconstruct_map(StoreOpts, Path) -> + case type(StoreOpts, Path) of + composite -> + % This is a group, reconstruct it as a map + {ok, ImmediateChildren} = list(StoreOpts, Path), + % The list function now correctly returns only immediate children + ?event({path, Path, immediate_children, ImmediateChildren}), + maps:from_list([ + {Key, reconstruct_map(StoreOpts, <>)} + || Key <- ImmediateChildren + ]); + simple -> + % This is a simple value, read it directly + {ok, Value} = read(StoreOpts, Path), + Value; + not_found -> + % Path doesn't exist + undefined + end. + +%% @doc Debug test to understand cache linking behavior +cache_debug_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + % Simulate what the cache does: + % 1. Create a group for message ID + MessageID = <<"test_message_123">>, + make_group(StoreOpts, MessageID), + % 2. Store a value at data/hash + Value = <<"test_value">>, + ValueHash = base64:encode(crypto:hash(sha256, Value)), + DataPath = <<"data/", ValueHash/binary>>, + write(StoreOpts, DataPath, Value), + % 3. Calculate a key hashpath (simplified version) + KeyHashPath = <>, + % 4. Create link from data path to key hash path + make_link(StoreOpts, DataPath, KeyHashPath), + % 5. Test what the cache would see: + ?event(debug_cache_test, {step, check_message_type}), + MsgType = type(StoreOpts, MessageID), + ?event(debug_cache_test, {message_type, MsgType}), + ?event(debug_cache_test, {step, list_message_contents}), + {ok, Subkeys} = list(StoreOpts, MessageID), + ?event(debug_cache_test, {message_subkeys, Subkeys}), + ?event(debug_cache_test, {step, read_key_hashpath}), + KeyHashResult = read(StoreOpts, KeyHashPath), + ?event(debug_cache_test, {key_hash_read_result, KeyHashResult}), + % 6. Test with path as list (what cache does): + ?event(debug_cache_test, {step, read_path_as_list}), + PathAsList = [MessageID, <<"key_hash_abc">>], + PathAsListResult = read(StoreOpts, PathAsList), + ?event(debug_cache_test, {path_as_list_result, PathAsListResult}), + stop(StoreOpts). + +%% @doc Isolated test focusing on the exact cache issue +isolated_type_debug_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + % Create the exact scenario from user's description: + % 1. A message ID with nested structure + MessageID = <<"message123">>, + make_group(StoreOpts, MessageID), + % 2. Create nested groups for "commitments" and "other-test-key" + CommitmentsPath = <>, + OtherKeyPath = <>, + ?event(isolated_debug, {creating_nested_groups, CommitmentsPath, OtherKeyPath}), + make_group(StoreOpts, CommitmentsPath), + make_group(StoreOpts, OtherKeyPath), + % 3. Add some actual data within those groups + write(StoreOpts, <>, <<"signature_data_1">>), + write(StoreOpts, <>, <<"nested_value">>), + % 4. Test type detection on the nested paths + ?event(isolated_debug, {testing_main_message_type}), + MainType = type(StoreOpts, MessageID), + ?event(isolated_debug, {main_message_type, MainType}), + ?event(isolated_debug, {testing_commitments_type}), + CommitmentsType = type(StoreOpts, CommitmentsPath), + ?event(isolated_debug, {commitments_type, CommitmentsType}), + ?event(isolated_debug, {testing_other_key_type}), + OtherKeyType = type(StoreOpts, OtherKeyPath), + ?event(isolated_debug, {other_key_type, OtherKeyType}), + % 5. Test what happens when reading these nested paths + ?event(isolated_debug, {reading_commitments_directly}), + CommitmentsResult = read(StoreOpts, CommitmentsPath), + ?event(isolated_debug, {commitments_read_result, CommitmentsResult}), + ?event(isolated_debug, {reading_other_key_directly}), + OtherKeyResult = read(StoreOpts, OtherKeyPath), + ?event(isolated_debug, {other_key_read_result, OtherKeyResult}), + stop(StoreOpts). + +%% @doc Test that list function resolves links correctly +list_with_link_test() -> + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + % Create a group with some children + make_group(StoreOpts, <<"real-group">>), + write(StoreOpts, <<"real-group/child1">>, <<"value1">>), + write(StoreOpts, <<"real-group/child2">>, <<"value2">>), + write(StoreOpts, <<"real-group/child3">>, <<"value3">>), + % Create a link to the group + make_link(StoreOpts, <<"real-group">>, <<"link-to-group">>), + % List the real group to verify expected children + {ok, RealGroupChildren} = list(StoreOpts, <<"real-group">>), + ?event({real_group_children, RealGroupChildren}), + ExpectedChildren = [<<"child1">>, <<"child2">>, <<"child3">>], + ?assertEqual(ExpectedChildren, lists:sort(RealGroupChildren)), + % List via the link - should return the same children + {ok, LinkChildren} = list(StoreOpts, <<"link-to-group">>), + ?event({link_children, LinkChildren}), + ?assertEqual(ExpectedChildren, lists:sort(LinkChildren)), + stop(StoreOpts). + From 4d2edf17ac609c38c2ac936a21b29653d9a2357f Mon Sep 17 00:00:00 2001 From: speeddragon Date: Wed, 22 Oct 2025 18:39:00 +0100 Subject: [PATCH 4/7] impr: Replace hackney with gun for S3 HTTP client Remove hackney dependency in favor of gun for S3 operations. Improve error handling, add function documentation, and clean up code style throughout the S3 store module. --- README.md | 56 +++- rebar.config | 12 +- src/hb_cache.erl | 8 +- src/hb_cache_control.erl | 14 +- src/hb_store.erl | 29 +- src/hb_store_s3.erl | 563 +++++++++++++++++++++---------------- test/docker-compose-s3.yml | 27 ++ 7 files changed, 434 insertions(+), 275 deletions(-) create mode 100644 test/docker-compose-s3.yml diff --git a/README.md b/README.md index dc6bf80e1..0f5dd940b 100644 --- a/README.md +++ b/README.md @@ -87,9 +87,9 @@ HyperBEAM supports several optional build profiles that enable additional featur - `genesis_wasm`: Enables Genesis WebAssembly support - `rocksdb`: Enables RocksDB storage backend (adds RocksDB v1.8.0 dependency) +- `s3`: Enables S3 storage backend - `http3`: Enables HTTP/3 support via QUIC protocol - Using these profiles allows you to optimize HyperBEAM for your specific use case without adding unnecessary dependencies to the base installation. To start a shell with profiles: @@ -107,6 +107,11 @@ To create a release with profiles: ```bash # Create release with profiles rebar3 as rocksdb,genesis_wasm release + +# Run S3 integration tests (make sure you have docker-compose installed) +docker-compose -f test/docker-compose-s3.yml up -d +rebar3 as s3 eunit --module hb_store_s3 +docker-compose -f test/docker-compose-s3.yml down -d ``` Note: Profiles modify compile-time options that get baked into the release. Choose the profiles you need before starting HyperBEAM. @@ -266,6 +271,55 @@ schedule of another execution. Details on other devices found in the pre-loaded set can be located in their respective documentation. +## Testing + +### Running tests + +Specific tests can be ran with `--test` parameter: + +``` +rebar3 eunit --test hb_store_lmdb:link_fragment_test +``` + +All tests inside a module can be ran with `--module` parameter: + +``` +rebar3 eunit --module hb_store_lmdb +``` + +To run multiple module append a comma: + +``` +rebar3 eunit --module hb_store_lmdb,hb_store_lru +``` + +Some tests might need extra profiles to be enabled before running it: + +``` +rebar3 as s3 eunit --module hb_store_s3 +``` + +It might also need external dependencies like MinIO to be up and running: + +``` +rebar3 as s3 cmd docker_up, eunit --module hb_store_s3, cmd docker_down +``` + +To test generator tests, you need to add `-g` to the eunit command: + +``` +rebar3 eunit -g hb_cache:cache_suite_test_ +``` + +### Generating test coverage report + +Add `--cover` to the eunit command to generate test coverage data. +To generate the HTML report, run the command `cover`. + +``` +rebar3 as s3 eunit --module hb_store_s3 --cover, cover +``` + ## Documentation HyperBEAM uses [MkDocs](https://www.mkdocs.org/) with the [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/) theme to build its documentation site. All documentation source files are located in the `docs/` directory. diff --git a/rebar.config b/rebar.config index 162f28cd9..72be6ab92 100644 --- a/rebar.config +++ b/rebar.config @@ -41,6 +41,12 @@ {d, 'ENABLE_ROCKSDB', true} ]} ]}, + {s3, [ + {deps, [{erlcloud, {git, "https://github.com/erlcloud/erlcloud.git", {ref, "7322624227e12120d88c47a72a2ba0a032ace9f3"}}}]}, + {erl_opts, [ + {d, 'ENABLE_S3', true} + ]} + ]}, {http3, [ {deps, [ {quicer, {git, "https://github.com/emqx/quic.git", @@ -56,6 +62,9 @@ {add, cowboy, [{erl_opts, [{d, 'COWBOY_QUICER', 1}]}]}, {add, gun, [{erl_opts, [{d, 'GUN_QUICER', 1}]}]} ]} + ]}, + {test, [ + {deps, [{meck, "1.1.0"}]} ]} ]}. @@ -128,8 +137,7 @@ {prometheus_httpd, "2.1.15"}, {prometheus, "6.0.3"}, {graphql, "0.17.1", {pkg, graphql_erl}}, - {luerl, "1.3.0"}, - {erlcloud, "3.8.3"} + {luerl, "1.3.0"} ]}. {shell, [ diff --git a/src/hb_cache.erl b/src/hb_cache.erl index 927050446..dc2efe08e 100644 --- a/src/hb_cache.erl +++ b/src/hb_cache.erl @@ -970,9 +970,7 @@ test_message_with_list(Store) -> {ok, RetrievedItem} = read(Path, Opts), ?assert(hb_message:match(Msg, RetrievedItem, strict, Opts)). -test_match_message(Store) when map_get(<<"store-module">>, Store) =/= hb_store_lmdb -> - Module = map_get(<<"store-module">>, Store), - ?event({test_skip, {module, Module}}), +test_match_message(Store) when map_get(<<"store-module">>, Store) =/= hb_store_lmdb -> skip; test_match_message(Store) -> hb_store:reset(Store), @@ -1000,8 +998,6 @@ test_match_message(Store) -> ?assertEqual([ID2b], MatchedItems2). test_match_linked_message(Store) when map_get(<<"store-module">>, Store) =/= hb_store_lmdb -> - Module = map_get(<<"store-module">>, Store), - ?event({test_skip, {module, Module}}), skip; test_match_linked_message(Store) -> hb_store:reset(Store), @@ -1028,8 +1024,6 @@ test_match_linked_message(Store) -> ). test_match_typed_message(Store) when map_get(<<"store-module">>, Store) =/= hb_store_lmdb -> - Module = map_get(<<"store-module">>, Store), - ?event({test_skip, {module, Module}}), skip; test_match_typed_message(Store) -> hb_store:reset(Store), diff --git a/src/hb_cache_control.erl b/src/hb_cache_control.erl index cdc9f0311..b5cbb5bb1 100644 --- a/src/hb_cache_control.erl +++ b/src/hb_cache_control.erl @@ -136,13 +136,11 @@ perform_cache_write(Base, Req, Res, Opts) -> hb_cache:write(Req, Opts), case Res of <<_/binary>> -> - Store = hb_opts:get(store, no_viable_store, Opts), - HP = hb_path:hashpath(Base, Req, Opts), - DataPath = <<"data/", (hb_path:hashpath(Res, Opts))/binary>>, - case hb_store:type(Store, DataPath) of - simple -> hb_store:make_link(Store, DataPath, HP); - _ -> hb_cache:write_binary(HP, Res, Opts) - end; + hb_cache:write_binary( + hb_path:hashpath(Base, Req, Opts), + Res, + Opts + ); Map when is_map(Map) -> hb_cache:write(Res, Opts); _ -> @@ -425,4 +423,4 @@ cache_message_result_test() -> {ok, Res3} = hb_ao:resolve(Base, Req, #{ cache_control => [<<"only-if-cached">>] }), ?event({res2, Res2}), ?event({res3, Res3}), - ?assertEqual(Res2, Res3). + ?assertEqual(Res2, Res3). \ No newline at end of file diff --git a/src/hb_store.erl b/src/hb_store.erl index 822bb1edb..bbec1edc4 100644 --- a/src/hb_store.erl +++ b/src/hb_store.erl @@ -439,19 +439,8 @@ test_stores() -> <<"name">> => <<"cache-TEST/lru">> } ] - }, - %% Should S3 be under a feature flag? - (hb_test_utils:test_store(hb_store_s3))#{ - %% NOTE: To be tuned - <<"benchmark-scale">> => 0.005, - %% Default config - <<"bucket">> => <<"hb-s3">>, - <<"access-key-id">> => <<"niko">>, - <<"secret-access-key">> => <<"minio-niko-rocks">>, - <<"endpoint">> => <<"localhost:9000">>, - <<"dangerous_reset">> => true } - ] ++ rocks_stores(). + ] ++ rocks_stores() ++ s3_stores(). -ifdef(ENABLE_ROCKSDB). rocks_stores() -> @@ -464,16 +453,26 @@ rocks_stores() -> -else. rocks_stores() -> []. -endif. - +-ifdef(ENABLE_S3). +s3_stores() -> + [(hb_store_s3:default_test_opts())#{ + <<"benchmark-scale">> => 0.01 + }]. +-else. +s3_stores() -> []. +-endif. generate_test_suite(Suite) -> generate_test_suite(Suite, test_stores()). generate_test_suite(Suite, Stores) -> hb:init(), + application:ensure_all_started(hb), lists:map( fun(Store = #{<<"store-module">> := Mod}) -> {foreach, fun() -> - hb_store:start(Store) + hb_store:start(Store), + % If the test fails, the store isn't cleared. + hb_store:reset(Store) end, fun(_) -> hb_store:reset(Store) @@ -1064,4 +1063,4 @@ make_link_access_test() -> ReadResult = read(StoreList, SourceKey), ?event(testing, {read_linked_value, ReadResult}), ?assertEqual({ok, TestValue}, ReadResult), - ?assertEqual(ok, LinkResult). \ No newline at end of file + ?assertEqual(ok, LinkResult). diff --git a/src/hb_store_s3.erl b/src/hb_store_s3.erl index 7ec8b3071..a7dfa566a 100644 --- a/src/hb_store_s3.erl +++ b/src/hb_store_s3.erl @@ -12,13 +12,13 @@ -export([read/2, write/3, list/2, type/2]). -export([make_group/2, make_link/3, resolve/2]). -export([path/2, add_path/3]). +-export([default_test_opts/0, get_config/1]). %% Helper functions -export([match/2]). -include("include/hb.hrl"). -include_lib("erlcloud/include/erlcloud_aws.hrl"). --include_lib("eunit/include/eunit.hrl"). %% Type definitions -type opts() :: map(). @@ -28,12 +28,17 @@ %% Configuration defaults -define(DEFAULT_REGION, <<"us-east-1">>). -define(DEFAULT_ENDPOINT, <<"https://s3.amazonaws.com">>). --define(MAX_LINK_DEPTH, 100). +-define(DEFAULT_FORCE_PATH_STYLE, false). +-define(MAX_REDIRECTS, 100). % Only resolve 1000 links to data -define(LINK_MARKER, <<"link:">>). -%% Namespace for storing link objects separately to avoid file/prefix collisions +%% Namespace for storing link objects separately to avoid file collisions -define(LINKS_NS, <<"links/">>). -define(GROUPS_NS, <<"groups/">>). +-define(DEFAULT_RETRY_DELAY, 1000). % Wait for 1 second before retry. +-define(DEFAULT_RETRY_MODE, exp_backoff). +-define(DEFAULT_RETRIES, 5). % Retries for 5 times until it returns. +-define(DEFAULT_MAX_RETRY_DELAY, 300000). % Max 5 minutes waiting to retry. %%%----------------------------------------------------------------------------- %%% Configuration and Initialization (Phase 2) %%%----------------------------------------------------------------------------- @@ -45,21 +50,39 @@ start(Opts) -> maybe ok ?= validate_config(Opts), - Config = make_erlcloud_config(Opts), + AccessKey = hb_util:list(maps:get(<<"priv_access_key_id">>, Opts)), + SecretKey = hb_util:list(maps:get(<<"priv_secret_access_key">>, Opts)), + Region = hb_util:list(maps:get(<<"region">>, Opts, ?DEFAULT_REGION)), + Endpoint = maps:get(<<"endpoint">>, Opts, ?DEFAULT_ENDPOINT), Bucket = maps:get(<<"bucket">>, Opts), + ForcePathStyle = case maps:get(<<"force_path_style">>, Opts, ?DEFAULT_FORCE_PATH_STYLE) of + true -> path; + false -> auto + end, + #{ + scheme := Scheme, + host := Host, + port := Port + } ?= uri_string:parse(Endpoint), + BaseConfig = erlcloud_s3:new(AccessKey, SecretKey, hb_util:list(Host), Port), + Config = BaseConfig#aws_config{ + s3_scheme = hb_util:list(hb_util:list(Scheme) ++ "://"), + s3_bucket_after_host = false, + s3_bucket_access_method = ForcePathStyle, + aws_region = Region, + % Use `gun_pool` to define a connection pool. + http_client = httpc%fun gun_request/6 + }, ok ?= test_bucket_access(Bucket, Config), StoreRef = get_store_ref(Opts), - _ = persistent_term:put(StoreRef, #{ + ok ?= persistent_term:put(StoreRef, #{ bucket => Bucket, - prefix => maps:get(<<"prefix">>, Opts, <<>>), config => Config }), - - ?event(store_s3, {started, {bucket, Bucket}, {prefix, maps:get(<<"prefix">>, Opts, <<>>)}}), + ?event(store_s3, {started, {bucket, Bucket}}), {ok, #{ module => ?MODULE, - bucket => Bucket, - prefix => maps:get(<<"prefix">>, Opts, <<>>) + bucket => Bucket }} else Error -> @@ -67,10 +90,47 @@ start(Opts) -> {error, Error} end. + +%% Interface erlcloud_s3 with HB HTTP Client +gun_request(URL, Method, Headers, Body, Timeout, _Config) when is_atom(Method) -> + case uri_string:parse(URL) of + #{port := Port, scheme := Scheme, host := Host} = ParsedURL -> + Peer = uri_string:normalize(#{port => Port, scheme => Scheme, host => Host}), + HeadersMap = maps:from_list(Headers), + MethodBinary = string:uppercase(atom_to_binary(Method)), + Args = #{ + peer => Peer, + path => uri_string:normalize(maps:with([path, fragment, query], ParsedURL)), + method => MethodBinary, + headers => HeadersMap, + body => Body + }, + Opts = #{connect_timeout => Timeout}, + Response = hb_http_client:request(Args, Opts), + handle_gun_response(Response); + Reason -> + ?event(error, {parsing_url, {url, URL}, {reason, Reason}}), + {error, Reason} + end. + +handle_gun_response({ok, Status, ResponseHeaders, Body}) -> + {ok, {{Status, undefined}, header_str(ResponseHeaders), Body}}; + +handle_gun_response({error, _} = Error) -> + Error. + +header_str(Hdrs) -> + [{string:to_lower(to_list_string(K)), to_list_string(V)} || {K, V} <- Hdrs]. + +to_list_string(Val) when erlang:is_binary(Val) -> + erlang:binary_to_list(Val); +to_list_string(Val) when erlang:is_list(Val) -> + Val. + %% @doc Validate that all required configuration keys are present. -%% Required keys: bucket, access-key-id, secret-access-key +%% Required keys: bucket, priv_access_key_id, priv_secret_access_key validate_config(Opts) -> - Required = [<<"bucket">>, <<"access-key-id">>, <<"secret-access-key">>], + Required = [<<"bucket">>, <<"priv_access_key_id">>, <<"priv_secret_access_key">>], Missing = [K || K <- Required, not maps:is_key(K, Opts)], case Missing of [] -> @@ -79,64 +139,26 @@ validate_config(Opts) -> error({missing_config_keys, Missing}) end. -%% @doc Build erlcloud AWS configuration from our options. -make_erlcloud_config(Opts) -> - AccessKey = hb_util:list(maps:get(<<"access-key-id">>, Opts)), - SecretKey = hb_util:list(maps:get(<<"secret-access-key">>, Opts)), - Region = hb_util:list(maps:get(<<"region">>, Opts, ?DEFAULT_REGION)), - Endpoint = maps:get(<<"endpoint">>, Opts, ?DEFAULT_ENDPOINT), - - {Scheme, Host, Port} = parse_endpoint(Endpoint), - - BaseConfig = erlcloud_s3:new(AccessKey, SecretKey, Host, Port), - - BaseConfig#aws_config{ - s3_scheme = Scheme, - s3_bucket_after_host = false, - s3_bucket_access_method = path, - aws_region = Region, - http_client = httpc - }. - -%% @doc Parse an endpoint URL into scheme, host, and port. -%% Example: "https://s3.amazonaws.com" -> {"https://", "s3.amazonaws.com", 443} -parse_endpoint(Endpoint) when is_binary(Endpoint) -> - parse_endpoint(hb_util:list(Endpoint)); -parse_endpoint(Endpoint) when is_list(Endpoint) -> - case string:split(Endpoint, "://") of - [Scheme, HostPort] -> - case string:split(HostPort, ":", trailing) of - [Host, PortStr] -> - Port = list_to_integer(PortStr), - {Scheme ++ "://", Host, Port}; - [Host] -> - DefaultPort = case Scheme of - "https" -> 443; - "http" -> 80 - end, - {Scheme ++ "://", Host, DefaultPort} - end; - [HostOnly] -> - {"http://", HostOnly, 80} - end. - %% @doc Test that we can access the configured bucket. test_bucket_access(Bucket, Config) -> BucketStr = hb_util:list(Bucket), - case erlcloud_s3:list_objects(BucketStr, [{max_keys, 1}], Config) of - L when is_list(L) -> - ok; - {error, {aws_error, {http_error, 404, _, _}}} -> - error({bucket_not_found, Bucket}); - {error, Reason} -> - error({bucket_access_failed, Reason}) + try erlcloud_s3:list_objects(BucketStr, [{max_keys, 1}], Config) of + L when is_list(L) -> ok + catch + _Class:Reason:Stacktrace -> + case Reason of + {aws_error, {http_error, 404, _, _}} -> + error({bucket_not_found, Bucket}); + _ -> + ?event(error, {error, {reason, Reason}, {stacktrace, Stacktrace}}), + error({bucket_access_failed, Reason}) + end end. %% @doc Get a unique reference for this store instance. get_store_ref(Opts) -> Bucket = hb_util:bin(maps:get(<<"bucket">>, Opts)), - Prefix = hb_util:bin(maps:get(<<"prefix">>, Opts, <<>>)), - {?MODULE, Bucket, Prefix}. + {?MODULE, Bucket}. %% @doc Get stored configuration from persistent_term. get_config(Opts) -> @@ -153,72 +175,60 @@ get_config(Opts) -> %%%----------------------------------------------------------------------------- %% @doc Write a value to a key in S3. --spec write(opts(), key(), value()) -> ok | {error, term()}. +-spec write(opts(), key(), value()) -> ok. write(Opts, Key, Value) when is_list(Key) -> write(Opts, hb_store:join(Key), Value); -write(Opts, Key, Value) -> +write(Opts, Key, Value) when is_binary(Key) -> + RetryAttempts = maps:get(<<"retry-attempts">>, Opts, ?DEFAULT_RETRIES), + write(Opts, Key, Value, RetryAttempts). +write(_Opts, _Key, _Value, 0) -> + ok; +write(Opts, Key, Value, AttemptsRemaining) -> maybe - #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + #{bucket := Bucket, config := Config} = get_config(Opts), HasSlash = (binary:match(hb_store:join(Key), <<"/">>) =/= nomatch), RelKey = hb_store:join(Key), FullKey = case RelKey of - <<"data/", _/binary>> -> build_s3_key(Prefix, RelKey); - _ -> - %% NOTE: This behaviour didn't match the one from read - %% (basic_test was failing), so I needed to add this logic. + <<"data/", _/binary>> -> RelKey; + _ -> case HasSlash of - true -> build_groups_key(Prefix, RelKey); - false -> build_links_key(Prefix, RelKey) + true -> build_groups_key(RelKey); + false -> build_links_key(RelKey) end end, BucketStr = hb_util:list(Bucket), KeyStr = hb_util:list(FullKey), ?event(store_s3, {write, {key, FullKey}, {size, byte_size(Value)}}), - ok ?= case erlcloud_s3:put_object(BucketStr, KeyStr, Value, [], Config) of - L when is_list(L) -> ok; - {error, Reason} -> error(Reason) - end, + ok ?= try erlcloud_s3:put_object(BucketStr, KeyStr, Value, [], Config) of + L when is_list(L) -> ok + catch _Class:Reason -> Reason end, ok else Error -> + %% Store S3 retry mechanism ?event(error, {s3_write_error, {key, Key}, {reason, Error}}), - {error, Error} + MaxRetries = maps:get(<<"retry-attempts">>, Opts, ?DEFAULT_RETRIES), + MinRetryDelay = maps:get(<<"min-retry-delay">>, Opts, ?DEFAULT_RETRY_DELAY), + MaxRetryDelay = maps:get(<<"max-retry-delay">>, Opts, ?DEFAULT_MAX_RETRY_DELAY), + RetryTime = case maps:get(<<"retry-mode">>, Opts, ?DEFAULT_RETRY_MODE) of + exp_backoff -> + min(MinRetryDelay * round(math:pow(2, MaxRetries - AttemptsRemaining)), MaxRetryDelay); + _ -> MinRetryDelay + end, + ?event(store_s3, {retry_in, RetryTime}), + timer:sleep(RetryTime), + write(Opts, Key, Value, AttemptsRemaining - 1) end. -%% @doc Build full S3 key with optional prefix. -build_s3_key(<<>>, Key) -> - hb_store:join(Key); -build_s3_key(Prefix, Key) -> - Path = hb_store:join(Key), - PrefixWithSlash = case binary:last(Prefix) of - $/ -> Prefix; - _ -> <> - end, - <>. - %% @doc Build an S3 key under the links namespace for a logical key. -build_links_key(<<>>, Key) -> - <>; -build_links_key(Prefix, Key) -> - Path = hb_store:join(Key), - PrefixWithSlash = case binary:last(Prefix) of - $/ -> Prefix; - _ -> <> - end, - <>. +build_links_key(Key) -> + <>. %% @doc Build an S3 key under the groups namespace for a logical key. -build_groups_key(<<>>, Key) -> - <>; -build_groups_key(Prefix, Key) -> - Path = hb_store:join(Key), - PrefixWithSlash = case binary:last(Prefix) of - $/ -> Prefix; - _ -> <> - end, - <>. +build_groups_key(Key) -> + <>. %%%----------------------------------------------------------------------------- %%% Link Support (Phase 4) @@ -226,98 +236,86 @@ build_groups_key(Prefix, Key) -> %% @doc Read a value from S3, following links if necessary. -spec read(opts(), key()) -> {ok, value()} | not_found. -% TODO: Remove, we don't need this match anymore. -read(_Opts, []) -> not_found; - read(Opts, Key) when is_list(Key) -> read(Opts, hb_store:join(Key)); -read(Opts, Key) -> - read_with_links(Opts, Key, 0, false). - -%% Internal read that tracks link depth to prevent infinite loops -read_with_links(_Opts, _Key, Depth, _IsSubFolderLinkSearchMode) when Depth > ?MAX_LINK_DEPTH -> - ?event(error, {too_many_links, {depth, Depth}}), - not_found; -read_with_links(Opts, Key, Depth, IsSubFolderLinkSearchMode) -> - ?event(store_s3, {read_key, Key}), - case read_direct(Opts, Key) of +read(Opts, Key) when is_binary(Key) -> + % Try direct read first (fast path for non-link paths) + case read_with_links(Opts, Key) of {ok, Value} -> + {ok, Value}; + not_found -> + try + PathParts = binary:split(Key, <<"/">>, [global]), + case resolve_path_segments(Opts, PathParts) of + {ok, ResolvedPathParts} -> + ResolvedPathBin = to_path(ResolvedPathParts), + read_with_links(Opts, ResolvedPathBin); + {error, _} -> + not_found + end + catch + Class:Reason:Stacktrace -> + ?event(error, + { + resolve_path_links_failed, + {class, Class}, + {reason, Reason}, + {stacktrace, Stacktrace}, + {key, Key} + } + ), + % If link resolution fails, return not_found + not_found + end + end. + +read_with_links(Opts, Path) -> + case read_direct(Opts, Path) of + {ok, Value} -> + % Check if this value is actually a link to another key case is_link(Value) of - {true, Target} -> - ?event(store_s3, {link_found, Target}), - read_with_links(Opts, Target, Depth + 1, IsSubFolderLinkSearchMode); + {true, Link} -> + % Extract the target key and recursively resolve the link + read_with_links(Opts, Link); false -> {ok, Value} end; not_found -> - %% NOTE: We need to create a more complex test for deep linking - ?event(store_s3, {not_found, - {key, Key}, - {depth, Depth}, - {is_sub_folder_link_search_mode, IsSubFolderLinkSearchMode}}), - %% TODO: This Depth might not be the best case to handle this scenario - %% where we go back to try to find the link to then resolve forward - %% (attach the end key back to the proper path). - case IsSubFolderLinkSearchMode of - true -> - {resolved_link_key, Key}; - false -> - case lists:reverse(binary:split(Key, <<"/">>, [global])) of - [_] -> - %% Doesn't matter looking further down - not_found; - [EndKey | ReversedNewKey] -> - NewKey = lists:reverse(ReversedNewKey), - ?event(store_s3, {check_new_key, NewKey}), - case read_with_links(Opts, NewKey, Depth, true) of - {ok, _Value} -> - %% NOTE: When the key has a value, what should we do? We still - %% need to resolve - throw("Unhandled case when the previous key has information stored"); - {resolved_link_key, NewKey} -> - not_found; - {resolved_link_key, Value} -> - ?event(store_s3, {{resolved_link_key, Value}, {end_key, EndKey}}), - read(Opts, <>); - not_found -> - not_found - end - end - end + not_found end. +%% @doc Helper function to convert to a path +to_path(PathParts) when is_list(PathParts) -> + hb_util:bin(lists:join(<<"/">>, PathParts)); +to_path(Path) when is_binary(Path) -> + Path. + %% Direct read without link resolution read_direct(Opts, Key) -> - try - #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), - BucketStr = hb_util:list(Bucket), - {FullKey, IsDataKey} = case Key of - <<"data/", _/binary>> -> - {build_s3_key(Prefix, Key), true}; - _ -> - HasSlash = (binary:match(hb_store:join(Key), <<"/">>) =/= nomatch), - NsKey = case HasSlash of - true -> build_groups_key(Prefix, Key); - false -> build_links_key(Prefix, Key) - end, - {NsKey, false} - end, - KeyStr = hb_util:list(FullKey), - case erlcloud_s3:get_object(BucketStr, KeyStr, [], Config) of - L when is_list(L) -> - Content = proplists:get_value(content, L), - {ok, hb_util:bin(Content)}; - {error, {aws_error, {http_error, 404, _, _}}} -> - not_found; - {error, Reason} -> - case IsDataKey of - false -> ?event(error, {s3_read_error, {key, Key}, {reason, Reason}}); - true -> ok - end, - not_found - end + #{bucket := Bucket, config := Config} = get_config(Opts), + BucketStr = hb_util:list(Bucket), + FullKey = case Key of + <<"data/", _/binary>> -> + hb_store:join(Key); + _ -> + HasSlash = (binary:match(hb_store:join(Key), <<"/">>) =/= nomatch), + NsKey = case HasSlash of + true -> build_groups_key(Key); + false -> build_links_key(Key) + end, + NsKey + end, + KeyStr = hb_util:list(FullKey), + try erlcloud_s3:get_object(BucketStr, KeyStr, [], Config) of + L when is_list(L) -> + Content = proplists:get_value(content, L), + {ok, hb_util:bin(Content)} catch - _:_ -> + _:{aws_error, {http_error, 404, _, _}} -> + not_found; + _:Reason -> + ?event(error, {s3_read_error, {key, Key}, {reason, Reason}}), + %% To enable store chain fallback not_found end. @@ -329,15 +327,17 @@ make_link(Opts, Existing, New) -> LinkValue = <>, ?event(store_s3, {make_link, {from, New}, {to, Existing}}), maybe - #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + #{bucket := Bucket, config := Config} = get_config(Opts), BucketStr = hb_util:list(Bucket), HasSlash = (binary:match(hb_store:join(New), <<"/">>) =/= nomatch), NsKey = case HasSlash of - true -> build_groups_key(Prefix, New); - false -> build_links_key(Prefix, New) + true -> build_groups_key(New); + false -> build_links_key(New) end, NsKeyStr = hb_util:list(NsKey), - _ = erlcloud_s3:put_object(BucketStr, NsKeyStr, LinkValue, [], Config), + ok ?= try erlcloud_s3:put_object(BucketStr, NsKeyStr, LinkValue, [], Config) of + L when is_list(L) -> ok + catch _Class:Reason -> {error, Reason} end, ok else _ -> {error, make_link_failed} @@ -370,10 +370,8 @@ is_link(Value) -> %% Groups are detected by listing operations. -spec make_group(opts(), key()) -> ok. make_group(Opts, Path) -> - % NOTE: We need a way to just create an empty group. - % This is also ignored when tried to retrieve a list of - % keys for a given path. - write(Opts, <>, <<"group">>), + % We need to write to a file to has_children can consider this a group. + write(Opts, <>, <<"empty">>), ok. %% @doc List immediate children under a given path. @@ -382,9 +380,10 @@ make_group(Opts, Path) -> list(Opts, Path) when is_list(Path) -> list(Opts, hb_store:join(Path)); list(Opts, Path) -> + %% Check make_group note. UnwantedChildren = [<<"empty_group">>], maybe - #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + #{bucket := Bucket, config := Config} = get_config(Opts), ResolvedPath = case read_direct(Opts, Path) of {ok, Value} -> case is_link(Value) of @@ -393,20 +392,22 @@ list(Opts, Path) -> false -> Path end; - not_found -> + _ -> Path end, - FullPath = build_groups_key(Prefix, ResolvedPath), + FullPath = build_groups_key(ResolvedPath), SearchPrefix = ensure_trailing_slash(FullPath), BucketStr = hb_util:list(Bucket), PrefixStr = hb_util:list(SearchPrefix), ListOpts = [{prefix, PrefixStr}, {delimiter, "/"}], - case erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of + ?event(store_s3, {list_opts, ListOpts}), + try erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of L when is_list(L) -> Children = extract_children(SearchPrefix, L), - {ok, Children -- UnwantedChildren}; - {error, _Reason} -> - {ok, []} + {ok, Children -- UnwantedChildren} + catch + _Class:ErrorReason -> + {error, ErrorReason} end else Reason -> @@ -501,21 +502,20 @@ type(Opts, Key) -> composite; false -> not_found - end + end; + {error, _} -> + not_found end end. %% @doc HEAD check for object existence without downloading content head_exists(Opts, Key) -> try - #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), - FullKey = build_s3_key(Prefix, Key), + #{bucket := Bucket, config := Config} = get_config(Opts), + FullKey = hb_store:join(Key), BucketStr = hb_util:list(Bucket), KeyStr = hb_util:list(FullKey), - case erlcloud_s3:head_object(BucketStr, KeyStr, [], Config) of - L when is_list(L) -> true; - _ -> false - end + is_list(erlcloud_s3:head_object(BucketStr, KeyStr, [], Config)) catch _:_ -> false end. @@ -523,8 +523,8 @@ head_exists(Opts, Key) -> %% @doc Check if a path has any children (is a directory). has_children(Opts, Path) -> try - #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), - FullPath = build_groups_key(Prefix, Path), + #{bucket := Bucket, config := Config} = get_config(Opts), + FullPath = build_groups_key(Path), SearchPrefix = ensure_trailing_slash(FullPath), BucketStr = hb_util:list(Bucket), PrefixStr = hb_util:list(SearchPrefix), @@ -532,9 +532,7 @@ has_children(Opts, Path) -> case erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of L when is_list(L) -> Contents = proplists:get_value(contents, L, []), - Contents =/= []; - _ -> - false + Contents =/= [] end catch _:_ -> false @@ -556,19 +554,22 @@ resolve(Opts, Path) when is_binary(Path) -> {error, _} -> Path end. +resolve_path_segments(Opts, Path) -> + resolve_path_segments(Opts, Path, 0). %% Internal path resolution that resolves all segments including the last -resolve_path_segments(_Opts, _Path, Depth) when Depth > ?MAX_LINK_DEPTH -> +resolve_path_segments(_Opts, _Path, Depth) when Depth > ?MAX_REDIRECTS -> {error, too_many_redirects}; resolve_path_segments(_Opts, [], _Depth) -> {ok, <<>>}; resolve_path_segments(Opts, Parts, Depth) -> resolve_path_accumulate(Opts, Parts, <<>>, Depth). +%% Internal helper that accumulates the resolved path resolve_path_accumulate(_Opts, [], Acc, _Depth) -> {ok, Acc}; resolve_path_accumulate(_Opts, FullPath = [<<"data">> | _], <<>>, _Depth) -> {ok, hb_store:join(FullPath)}; -resolve_path_accumulate(_Opts, _Parts, _Acc, Depth) when Depth > ?MAX_LINK_DEPTH -> +resolve_path_accumulate(_Opts, _Parts, _Acc, Depth) when Depth > ?MAX_REDIRECTS -> {error, too_many_redirects}; resolve_path_accumulate(Opts, [Head|Tail], Acc, Depth) -> CurrentPath = case Acc of @@ -630,10 +631,9 @@ reset(Opts) -> end. delete_all_objects(Opts) -> - #{bucket := Bucket, prefix := Prefix, config := Config} = get_config(Opts), + #{bucket := Bucket, config := Config} = get_config(Opts), BucketStr = hb_util:list(Bucket), - PrefixStr = hb_util:list(Prefix), - case erlcloud_s3:list_objects(BucketStr, [{prefix, PrefixStr}], Config) of + try erlcloud_s3:list_objects(BucketStr, [], Config) of L when is_list(L) -> Contents = proplists:get_value(contents, L, []), Keys = [proplists:get_value(key, Obj) || Obj <- Contents], @@ -642,9 +642,9 @@ delete_all_objects(Opts) -> _ -> erlcloud_s3:delete_objects_batch(BucketStr, Keys, Config), ok - end; - _ -> - ok + end + catch _Class:Reason -> + {error, Reason} end. %% @doc Return the scope of this store. @@ -662,20 +662,29 @@ scope(_Opts) -> scope(). match(_Opts, _Template) -> not_found. -%% @doc Test suite demonstrating basic store operations. +%% @doc Integration test suite demonstrating basic store operations. %% -%% The following functions implement unit tests using EUnit to verify that +%% The following functions implement integration tests using EUnit to verify that %% the S3 store implementation correctly handles various scenarios including %% basic read/write operations, hierarchical listing, group creation, link %% resolution, and type detection. +%% +%% Be sure that minio io server is running before executing the integration tests. default_test_opts() -> - #{<<"store-module">> => ?MODULE, - <<"bucket">> => <<"hb-s3">>, - <<"access-key-id">> => <<"niko">>, - <<"secret-access-key">> => <<"minio-niko-rocks">>, - <<"endpoint">> => <<"localhost:9000">>, - <<"dangerous_reset">> => true}. + #{ + <<"store-module">> => ?MODULE, + <<"bucket">> => <<"hb-s3">>, + <<"priv_access_key_id">> => <<"minioadmin">>, + <<"priv_secret_access_key">> => <<"minioadmin">>, + <<"endpoint">> => <<"http://localhost:9000">>, + <<"dangerous_reset">> => true, + <<"force_path_style">> => true + }. + +-ifdef(ENABLE_S3). +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). %% @doc Basic store test - verifies fundamental read/write functionality. %% @@ -683,7 +692,11 @@ default_test_opts() -> %% back to verify correctness, and cleans up by stopping the database. It %% serves as a sanity check that the basic storage mechanism is working. +init() -> + application:ensure_all_started(hb). + basic_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), @@ -696,12 +709,69 @@ basic_test() -> ?assertEqual(Value, Response), ok = stop(StoreOpts). +not_found_test() -> + init(), + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + NonExistingKey = <<"NonExistingKey">>, + Result = read(StoreOpts, NonExistingKey), + ?assertEqual(not_found, Result), + ok = stop(StoreOpts). + +bucket_not_found_test() -> + init(), + StoreOpts = (default_test_opts())#{<<"bucket">> => <<"invalid_bucket">>}, + ?assertError({bucket_access_failed, {aws_error, {http_error, 400, "Bad Request", _}}}, start(StoreOpts)), + ok = stop(StoreOpts). + +failed_write_test() -> + init(), + StoreOpts = (default_test_opts())#{<<"retry-attempts">> => 2}, + start(StoreOpts), + reset(StoreOpts), + Key = <<"write_test">>, + Value = <<"value">>, + + ok = meck:new(erlcloud_s3, [unstick, passthrough]), + XMLBody = <<"">>, + ok = meck:expect(erlcloud_s3, put_object, fun(_, _, _, _, _) -> + error({aws_error,{http_error, 400, "Bad Request", XMLBody}}) end), + + {Time, Result} = timer:tc(fun() -> write(StoreOpts, Key, Value) end, second), + ?assertMatch(ok, Result), + ?assert(Time >= 3), + + ?assert(meck:called(erlcloud_s3, put_object, ['_', '_', '_', '_', '_'])), + ok = meck:unload(erlcloud_s3), + ok = stop(StoreOpts). + +failed_read_test() -> + init(), + StoreOpts = default_test_opts(), + start(StoreOpts), + reset(StoreOpts), + Key = <<"read_test">>, + + ok = meck:new(erlcloud_s3, [passthrough]), + XMLBody = <<"">>, + ok = meck:expect(erlcloud_s3, get_object, fun(_, _, _, _) -> + error({aws_error,{http_error, 400, "Bad Request", XMLBody}}) end), + + Result = read(StoreOpts, Key), + ?assertMatch(not_found, Result), + + ?assert(meck:called(erlcloud_s3, get_object, ['_', '_', '_', '_'])), + ok = meck:unload(erlcloud_s3), + ok = stop(StoreOpts). + %% @doc List test - verifies prefix-based key listing functionality. %% %% This test creates several keys with hierarchical names and verifies that %% the list operation correctly returns only keys matching a specific prefix. %% It demonstrates the directory-like navigation capabilities of the store. list_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), @@ -719,7 +789,7 @@ list_test() -> % Create other top-level directories write(StoreOpts, <<"foo/bar">>, <<"baz">>), write(StoreOpts, <<"beep/boop">>, <<"bam">>), - read(StoreOpts, <<"colors">>), + read(StoreOpts, <<"colors">>), % Test listing colors/ - should return immediate children only {ok, ListResult} = list(StoreOpts, <<"colors">>), ?event({list_result, ListResult}), @@ -741,9 +811,10 @@ list_test() -> %% @doc Group test - verifies group creation and type detection. %% -%% This test creates a group entry and verifies that it is correctly identified +%% This test creates a group entry and verifies that it is correctly identified %% as a composite type and cannot be read directly (like filesystem directories). group_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), @@ -759,6 +830,7 @@ group_test() -> %% and verifies that reading from the link location returns the original value. %% This demonstrates the transparent link resolution mechanism. link_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), @@ -769,6 +841,7 @@ link_test() -> ?assertEqual(<<"Bam">>, Result). link_fragment_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), @@ -784,14 +857,11 @@ link_fragment_test() -> %% then verifies that the type detection function correctly identifies each one. %% This demonstrates the semantic classification system used by the store. type_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), - % TODO: Make this line acive and remove the write bellow - % that create a children inside the file, when `make_group` - % code is improved. - %make_group(StoreOpts, <<"assets">>), - ok = write(StoreOpts, <<"assets/data">>, <<"value">>), + make_group(StoreOpts, <<"assets">>), Type = type(StoreOpts, <<"assets">>), ?event({type, Type}), ?assertEqual(composite, Type), @@ -816,6 +886,7 @@ type_test() -> %% hierarchical structures where keys represent nested paths or categories, %% and need to create shortcuts or aliases to deeply nested data. link_key_list_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), @@ -828,14 +899,15 @@ link_key_list_test() -> %% @doc Path traversal link test - verifies link resolution during path traversal. %% %% This test verifies that when reading a path as a list, intermediate path -%% segments that are links get resolved correctly. For example, if "link" -%% is a symbolic link to "group", then reading ["link", "key"] should +%% segments that are links get resolved correctly. For example, if "link" +%% is a symbolic link to "group", then reading ["link", "key"] should %% resolve to reading ["group", "key"]. %% %% This functionality enables transparent redirection at the directory level, %% allowing reorganization of hierarchical data without breaking existing %% access patterns. path_traversal_link_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), @@ -851,6 +923,7 @@ path_traversal_link_test() -> %% @doc Test that matches the exact hb_store hierarchical test pattern exact_hb_store_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), % Follow exact same pattern as hb_store test @@ -870,7 +943,7 @@ exact_hb_store_test() -> ?event(step5_test_direct_read), DirectResult = read(StoreOpts, <<"test-dir1/test-file">>), ?event({direct_result, DirectResult}), - % This should work: reading via the link path + % This should work: reading via the link path ?event(step6_test_link_read), Result = read(StoreOpts, [<<"test-link">>, <<"test-file">>]), ?event({final_result, Result}), @@ -879,12 +952,13 @@ exact_hb_store_test() -> %% @doc Test cache-style usage through hb_store interface cache_style_test() -> + init(), hb:init(), StoreOpts = default_test_opts(), % Start the store hb_store:start(StoreOpts), reset(StoreOpts), - % Test writing through hb_store interface + % Test writing through hb_store interface ok = hb_store:write(StoreOpts, <<"test-key">>, <<"test-value">>), % Test reading through hb_store interface Result = hb_store:read(StoreOpts, <<"test-key">>), @@ -895,10 +969,11 @@ cache_style_test() -> %% @doc Test nested map storage with cache-like linking behavior %% %% This test demonstrates how to store a nested map structure where: -%% 1. Each value is stored at data/{hash_of_value} +%% 1. Each value is stored at data/{hash_of_value} %% 2. Links are created to compose the values back into the original map structure %% 3. Reading the composed structure reconstructs the original nested map nested_map_cache_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), % Clean up any previous test data @@ -913,7 +988,7 @@ nested_map_cache_test() -> }, <<"key2">> => #{ <<"alg">> => <<"hmac">>, - <<"commiter">> => <<"unique-id-2">> + <<"commiter">> => <<"unique-id-2">> } }, <<"other-key">> => #{ @@ -971,7 +1046,7 @@ nested_map_cache_test() -> ?assertEqual(<<"Foo">>, TargetValueRead), % Verify commitments is a composite ?assertEqual(composite, type(StoreOpts, <<"root/commitments">>)), - % Verify other-key is a composite + % Verify other-key is a composite ?assertEqual(composite, type(StoreOpts, <<"root/other-key">>)), % Step 4: Test programmatic reconstruction of the nested map ReconstructedMap = reconstruct_map(StoreOpts, <<"root">>), @@ -1003,6 +1078,7 @@ reconstruct_map(StoreOpts, Path) -> %% @doc Debug test to understand cache linking behavior cache_debug_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), @@ -1038,6 +1114,7 @@ cache_debug_test() -> %% @doc Isolated test focusing on the exact cache issue isolated_type_debug_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), @@ -1075,6 +1152,7 @@ isolated_type_debug_test() -> %% @doc Test that list function resolves links correctly list_with_link_test() -> + init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), @@ -1095,4 +1173,5 @@ list_with_link_test() -> ?event({link_children, LinkChildren}), ?assertEqual(ExpectedChildren, lists:sort(LinkChildren)), stop(StoreOpts). - +-endif. +-endif. \ No newline at end of file diff --git a/test/docker-compose-s3.yml b/test/docker-compose-s3.yml new file mode 100644 index 000000000..acbb2dba8 --- /dev/null +++ b/test/docker-compose-s3.yml @@ -0,0 +1,27 @@ +volumes: + minio_data: + driver: local + +services: + minio: + image: quay.io/minio/minio + ports: + - "9000:9000" + - "9001:9001" + volumes: + - ~/minio/data:/data + environment: + - MINIO_ROOT_USER=minioadmin + - MINIO_ROOT_PASSWORD=minioadmin + command: server /data --console-address ":9001" + + mc: + image: quay.io/minio/minio + depends_on: + - minio + entrypoint: > + /bin/sh -c " + /usr/bin/mc rb --force minio/hb-s3; + /usr/bin/mc mb --quiet minio/hb-s3; + /usr/bin/mc policy set public minio/hb-s3; + " \ No newline at end of file From 0a1c1b424402076c0b36f21911563daea0f1f078 Mon Sep 17 00:00:00 2001 From: speeddragon Date: Wed, 29 Oct 2025 19:18:34 +0000 Subject: [PATCH 5/7] refactor: Simplify S3 store by removing links and groups Remove link and group abstractions from S3 store in favor of direct key-value operations. Fix boolean type handling (use binary instead of boolean ao-type). Fix HTTP request handling and cache bugs. --- README.md | 55 +---- rebar.config | 6 +- src/hb_store_s3.erl | 485 +++++++++++++++++++------------------ test/docker-compose-s3.yml | 2 +- 4 files changed, 260 insertions(+), 288 deletions(-) diff --git a/README.md b/README.md index 0f5dd940b..8b8c42f52 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,7 @@ HyperBEAM supports several optional build profiles that enable additional featur - `s3`: Enables S3 storage backend - `http3`: Enables HTTP/3 support via QUIC protocol + Using these profiles allows you to optimize HyperBEAM for your specific use case without adding unnecessary dependencies to the base installation. To start a shell with profiles: @@ -107,11 +108,6 @@ To create a release with profiles: ```bash # Create release with profiles rebar3 as rocksdb,genesis_wasm release - -# Run S3 integration tests (make sure you have docker-compose installed) -docker-compose -f test/docker-compose-s3.yml up -d -rebar3 as s3 eunit --module hb_store_s3 -docker-compose -f test/docker-compose-s3.yml down -d ``` Note: Profiles modify compile-time options that get baked into the release. Choose the profiles you need before starting HyperBEAM. @@ -271,55 +267,6 @@ schedule of another execution. Details on other devices found in the pre-loaded set can be located in their respective documentation. -## Testing - -### Running tests - -Specific tests can be ran with `--test` parameter: - -``` -rebar3 eunit --test hb_store_lmdb:link_fragment_test -``` - -All tests inside a module can be ran with `--module` parameter: - -``` -rebar3 eunit --module hb_store_lmdb -``` - -To run multiple module append a comma: - -``` -rebar3 eunit --module hb_store_lmdb,hb_store_lru -``` - -Some tests might need extra profiles to be enabled before running it: - -``` -rebar3 as s3 eunit --module hb_store_s3 -``` - -It might also need external dependencies like MinIO to be up and running: - -``` -rebar3 as s3 cmd docker_up, eunit --module hb_store_s3, cmd docker_down -``` - -To test generator tests, you need to add `-g` to the eunit command: - -``` -rebar3 eunit -g hb_cache:cache_suite_test_ -``` - -### Generating test coverage report - -Add `--cover` to the eunit command to generate test coverage data. -To generate the HTML report, run the command `cover`. - -``` -rebar3 as s3 eunit --module hb_store_s3 --cover, cover -``` - ## Documentation HyperBEAM uses [MkDocs](https://www.mkdocs.org/) with the [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/) theme to build its documentation site. All documentation source files are located in the `docs/` directory. diff --git a/rebar.config b/rebar.config index 72be6ab92..f83cc4b42 100644 --- a/rebar.config +++ b/rebar.config @@ -42,7 +42,11 @@ ]} ]}, {s3, [ - {deps, [{erlcloud, {git, "https://github.com/erlcloud/erlcloud.git", {ref, "7322624227e12120d88c47a72a2ba0a032ace9f3"}}}]}, + {deps, [ + {erlcloud, {git, "https://github.com/erlcloud/erlcloud.git", + {ref, "7322624227e12120d88c47a72a2ba0a032ace9f3"}} + } + ]}, {erl_opts, [ {d, 'ENABLE_S3', true} ]} diff --git a/src/hb_store_s3.erl b/src/hb_store_s3.erl index a7dfa566a..72d89b4c6 100644 --- a/src/hb_store_s3.erl +++ b/src/hb_store_s3.erl @@ -1,9 +1,22 @@ -%%%----------------------------------------------------------------------------- %%% @doc S3-backed implementation of the HyperBEAM store behavior. %%% This module provides persistent storage using Amazon S3 or compatible %%% object storage services (MinIO, Wasabi, etc.). +%%% +%%% To run tests enable the `s3` profile. +%%% +%%% ``` +%%% rebar3 as s3 eunit --module hb_store_s3 +%%% ``` +%%% +%%% It might also need external dependencies like MinIO to be up and running: +%%% +%%% ``` +%%% docker-compose -f test/docker-compose-s3.yml up -d +%%% rebar3 as s3 eunit --module hb_store_s3 +%%% docker-compose -f test/docker-compose-s3.yml down -d +%%% ``` +%%% %%% @end -%%%----------------------------------------------------------------------------- -module(hb_store_s3). -behaviour(hb_store). @@ -28,20 +41,18 @@ %% Configuration defaults -define(DEFAULT_REGION, <<"us-east-1">>). -define(DEFAULT_ENDPOINT, <<"https://s3.amazonaws.com">>). --define(DEFAULT_FORCE_PATH_STYLE, false). +-define(DEFAULT_FORCE_PATH_STYLE, <<"false">>). -define(MAX_REDIRECTS, 100). % Only resolve 1000 links to data -define(LINK_MARKER, <<"link:">>). %% Namespace for storing link objects separately to avoid file collisions -define(LINKS_NS, <<"links/">>). -define(GROUPS_NS, <<"groups/">>). +-define(CREATE_GROUP_KEY, <<"make_group">>). -define(DEFAULT_RETRY_DELAY, 1000). % Wait for 1 second before retry. -define(DEFAULT_RETRY_MODE, exp_backoff). -define(DEFAULT_RETRIES, 5). % Retries for 5 times until it returns. -define(DEFAULT_MAX_RETRY_DELAY, 300000). % Max 5 minutes waiting to retry. -%%%----------------------------------------------------------------------------- -%%% Configuration and Initialization (Phase 2) -%%%----------------------------------------------------------------------------- %% @doc Initialize the S3 store connection. %% This function is called when the store is first accessed. @@ -56,8 +67,8 @@ start(Opts) -> Endpoint = maps:get(<<"endpoint">>, Opts, ?DEFAULT_ENDPOINT), Bucket = maps:get(<<"bucket">>, Opts), ForcePathStyle = case maps:get(<<"force_path_style">>, Opts, ?DEFAULT_FORCE_PATH_STYLE) of - true -> path; - false -> auto + <<"true">> -> path; + <<"false">> -> auto end, #{ scheme := Scheme, @@ -71,40 +82,45 @@ start(Opts) -> s3_bucket_access_method = ForcePathStyle, aws_region = Region, % Use `gun_pool` to define a connection pool. - http_client = httpc%fun gun_request/6 + http_client = fun gun_request/6 }, ok ?= test_bucket_access(Bucket, Config), StoreRef = get_store_ref(Opts), - ok ?= persistent_term:put(StoreRef, #{ - bucket => Bucket, - config => Config - }), + ok ?= persistent_term:put( + StoreRef, + #{bucket => Bucket, config => Config} + ), ?event(store_s3, {started, {bucket, Bucket}}), - {ok, #{ - module => ?MODULE, - bucket => Bucket - }} + {ok, #{module => ?MODULE, bucket => Bucket}} else Error -> ?event(error, {s3_start_failed, {reason, Error}}), {error, Error} end. - -%% Interface erlcloud_s3 with HB HTTP Client +%% @doc Interface erlcloud_s3 with HB HTTP Client gun_request(URL, Method, Headers, Body, Timeout, _Config) when is_atom(Method) -> case uri_string:parse(URL) of #{port := Port, scheme := Scheme, host := Host} = ParsedURL -> - Peer = uri_string:normalize(#{port => Port, scheme => Scheme, host => Host}), + Peer = uri_string:normalize( + #{ + port => Port, + scheme => Scheme, + host => Host + } + ), + Path = uri_string:normalize( + maps:with([path, fragment, query], ParsedURL) + ), HeadersMap = maps:from_list(Headers), MethodBinary = string:uppercase(atom_to_binary(Method)), Args = #{ - peer => Peer, - path => uri_string:normalize(maps:with([path, fragment, query], ParsedURL)), - method => MethodBinary, - headers => HeadersMap, - body => Body - }, + peer => Peer, + path => Path, + method => MethodBinary, + headers => HeadersMap, + body => Body + }, Opts = #{connect_timeout => Timeout}, Response = hb_http_client:request(Args, Opts), handle_gun_response(Response); @@ -113,6 +129,7 @@ gun_request(URL, Method, Headers, Body, Timeout, _Config) when is_atom(Method) - {error, Reason} end. +%% @doc Handle gun response and translate it to erlcloud_s3 expected types handle_gun_response({ok, Status, ResponseHeaders, Body}) -> {ok, {{Status, undefined}, header_str(ResponseHeaders), Body}}; @@ -145,12 +162,18 @@ test_bucket_access(Bucket, Config) -> try erlcloud_s3:list_objects(BucketStr, [{max_keys, 1}], Config) of L when is_list(L) -> ok catch - _Class:Reason:Stacktrace -> + Class:Reason:Stacktrace -> case Reason of {aws_error, {http_error, 404, _, _}} -> error({bucket_not_found, Bucket}); _ -> - ?event(error, {error, {reason, Reason}, {stacktrace, Stacktrace}}), + ?event(error, + {checking_bucket_access, + {class, Class}, + {reason, Reason}, + {stacktrace, Stacktrace} + } + ), error({bucket_access_failed, Reason}) end end. @@ -170,10 +193,6 @@ get_config(Opts) -> Config end. -%%%----------------------------------------------------------------------------- -%%% Core Read/Write Operations (Phase 3) -%%%----------------------------------------------------------------------------- - %% @doc Write a value to a key in S3. -spec write(opts(), key(), value()) -> ok. write(Opts, Key, Value) when is_list(Key) -> @@ -184,68 +203,67 @@ write(Opts, Key, Value) when is_binary(Key) -> write(_Opts, _Key, _Value, 0) -> ok; write(Opts, Key, Value, AttemptsRemaining) -> - maybe - #{bucket := Bucket, config := Config} = get_config(Opts), - - HasSlash = (binary:match(hb_store:join(Key), <<"/">>) =/= nomatch), - - RelKey = hb_store:join(Key), - FullKey = case RelKey of - <<"data/", _/binary>> -> RelKey; - _ -> - case HasSlash of - true -> build_groups_key(RelKey); - false -> build_links_key(RelKey) - end - end, - BucketStr = hb_util:list(Bucket), - KeyStr = hb_util:list(FullKey), - ?event(store_s3, {write, {key, FullKey}, {size, byte_size(Value)}}), - ok ?= try erlcloud_s3:put_object(BucketStr, KeyStr, Value, [], Config) of - L when is_list(L) -> ok - catch _Class:Reason -> Reason end, - ok - else - Error -> - %% Store S3 retry mechanism - ?event(error, {s3_write_error, {key, Key}, {reason, Error}}), - MaxRetries = maps:get(<<"retry-attempts">>, Opts, ?DEFAULT_RETRIES), - MinRetryDelay = maps:get(<<"min-retry-delay">>, Opts, ?DEFAULT_RETRY_DELAY), - MaxRetryDelay = maps:get(<<"max-retry-delay">>, Opts, ?DEFAULT_MAX_RETRY_DELAY), - RetryTime = case maps:get(<<"retry-mode">>, Opts, ?DEFAULT_RETRY_MODE) of - exp_backoff -> - min(MinRetryDelay * round(math:pow(2, MaxRetries - AttemptsRemaining)), MaxRetryDelay); - _ -> MinRetryDelay - end, - ?event(store_s3, {retry_in, RetryTime}), - timer:sleep(RetryTime), - write(Opts, Key, Value, AttemptsRemaining - 1) + #{bucket := Bucket, config := Config} = get_config(Opts), + BucketStr = hb_util:list(Bucket), + KeyStr = hb_util:list(Key), + ?event(store_s3, + {s3_write, + {key, Key}, + {size, byte_size(Value)}, + {value, Value} + } + ), + try erlcloud_s3:put_object(BucketStr, KeyStr, Value, [], Config) of + L when is_list(L) -> ok + catch + Class:Reason -> + ?event(error, + {s3_write_error, + {key, Key}, + {class, Class}, + {reason, Reason} + } + ), + wait_before_next_retry(Opts, AttemptsRemaining), + write(Opts, Key, Value, AttemptsRemaining - 1) end. -%% @doc Build an S3 key under the links namespace for a logical key. -build_links_key(Key) -> - <>. - -%% @doc Build an S3 key under the groups namespace for a logical key. -build_groups_key(Key) -> - <>. - -%%%----------------------------------------------------------------------------- -%%% Link Support (Phase 4) -%%%----------------------------------------------------------------------------- +%% @doc Retry logic +wait_before_next_retry(Opts, AttemptsRemaining) -> + MaxRetries = maps:get(<<"retry-attempts">>, Opts, ?DEFAULT_RETRIES), + MinRetryDelay = maps:get(<<"min-retry-delay">>, Opts, ?DEFAULT_RETRY_DELAY), + MaxRetryDelay = maps:get(<<"max-retry-delay">>, Opts, ?DEFAULT_MAX_RETRY_DELAY), + RetryTime = case maps:get(<<"retry-mode">>, Opts, ?DEFAULT_RETRY_MODE) of + exp_backoff -> + min( + MinRetryDelay * round(math:pow(2, MaxRetries - AttemptsRemaining)), + MaxRetryDelay + ); + _ -> + MinRetryDelay + end, + ?event(store_s3, {retry_in, RetryTime}), + timer:sleep(RetryTime). %% @doc Read a value from S3, following links if necessary. -spec read(opts(), key()) -> {ok, value()} | not_found. -read(Opts, Key) when is_list(Key) -> - read(Opts, hb_store:join(Key)); -read(Opts, Key) when is_binary(Key) -> +read(Opts, Key) -> + read(Opts, Key, false). + +-spec read(opts(), key(), boolean()) -> {ok, value()} | not_found | group. +read(Opts, Key, ReturnGroup) -> + NormalizedKey = case is_list(Key) of + true -> hb_store:join(Key); + false -> Key + end, % Try direct read first (fast path for non-link paths) - case read_with_links(Opts, Key) of + ?event(store_s3, {s3_read, {key, NormalizedKey}}), + Result = case read_with_links(Opts, NormalizedKey) of {ok, Value} -> {ok, Value}; - not_found -> + Value when Value == not_found orelse Value == group -> try - PathParts = binary:split(Key, <<"/">>, [global]), + PathParts = binary:split(NormalizedKey, <<"/">>, [global]), case resolve_path_segments(Opts, PathParts) of {ok, ResolvedPathParts} -> ResolvedPathBin = to_path(ResolvedPathParts), @@ -256,17 +274,25 @@ read(Opts, Key) when is_binary(Key) -> catch Class:Reason:Stacktrace -> ?event(error, - { - resolve_path_links_failed, + {resolve_path_links_failed, {class, Class}, {reason, Reason}, {stacktrace, Stacktrace}, - {key, Key} + {key, NormalizedKey} } ), % If link resolution fails, return not_found not_found end + end, + case Result of + group -> + case ReturnGroup of + true -> group; + false -> not_found + end; + _ -> + Result end. read_with_links(Opts, Path) -> @@ -281,7 +307,13 @@ read_with_links(Opts, Path) -> {ok, Value} end; not_found -> - not_found + %% Folders in S3 don't return a value + %% so we check for our group key. + GroupKey = create_make_group_key(Path), + case head_exists(Opts, GroupKey) of + true -> group; + false -> not_found + end end. %% @doc Helper function to convert to a path @@ -294,21 +326,11 @@ to_path(Path) when is_binary(Path) -> read_direct(Opts, Key) -> #{bucket := Bucket, config := Config} = get_config(Opts), BucketStr = hb_util:list(Bucket), - FullKey = case Key of - <<"data/", _/binary>> -> - hb_store:join(Key); - _ -> - HasSlash = (binary:match(hb_store:join(Key), <<"/">>) =/= nomatch), - NsKey = case HasSlash of - true -> build_groups_key(Key); - false -> build_links_key(Key) - end, - NsKey - end, - KeyStr = hb_util:list(FullKey), + KeyStr = hb_util:list(Key), + ?event(store_s3, {s3_read_direct, {key, Key}}), try erlcloud_s3:get_object(BucketStr, KeyStr, [], Config) of - L when is_list(L) -> - Content = proplists:get_value(content, L), + Response when is_list(Response) -> + Content = proplists:get_value(content, Response), {ok, hb_util:bin(Content)} catch _:{aws_error, {http_error, 404, _, _}} -> @@ -319,29 +341,61 @@ read_direct(Opts, Key) -> not_found end. +%% @doc Ensure all parent groups exist for a given path. +%% +%% This function creates the necessary parent groups for a path, similar to +%% how filesystem stores use ensure_dir. For example, if the path is +%% "a/b/c/file", it will ensure groups "a", "a/b", and "a/b/c" exist. +%% +%% @param Opts Database configuration map +%% @param Path The path whose parents should exist +%% @returns ok +-spec ensure_parent_groups(map(), binary()) -> ok. +ensure_parent_groups(Opts, Path) -> + PathParts = binary:split(Path, <<"/">>, [global]), + case PathParts of + [_] -> + % Single segment, no parents to create + ok; + _ -> + % Multiple segments, create parent groups + ParentParts = lists:droplast(PathParts), + create_parent_groups(Opts, [], ParentParts) + end. + +%% @doc Helper function to recursively create parent groups. +create_parent_groups(_Opts, _Current, []) -> + ok; +create_parent_groups(Opts, Current, [Next | Rest]) -> + NewCurrent = Current ++ [Next], + GroupPath = to_path(NewCurrent), + GroupKey = create_make_group_key(GroupPath), + GroupPathValue = read_direct(Opts, GroupPath), + GroupKeyValue = read_direct(Opts, GroupKey), + + % Only create group if it doesn't already exist. + case {GroupPathValue, GroupKeyValue} of + {not_found, not_found} -> + make_group(Opts, GroupPath); + {{ok, _}, _} -> + % Already exists, skip + ok; + {_, {ok, _}} -> + % Already exists, skip + ok + end, + create_parent_groups(Opts, NewCurrent, Rest). + %% @doc Create a symbolic link from New to Existing. %% Links are stored as values with "link:" prefix. --spec make_link(opts(), key(), key()) -> ok | {error, term()}. +-spec make_link(opts(), key(), key()) -> ok. make_link(Opts, Existing, New) -> - ExistingBin = hb_util:bin(hb_store:join(Existing)), - LinkValue = <>, + ExistingBin = hb_store:join(Existing), + % Ensure parent groups exist for the new link path (like filesystem ensure_dir) + ensure_parent_groups(Opts, New), ?event(store_s3, {make_link, {from, New}, {to, Existing}}), - maybe - #{bucket := Bucket, config := Config} = get_config(Opts), - BucketStr = hb_util:list(Bucket), - HasSlash = (binary:match(hb_store:join(New), <<"/">>) =/= nomatch), - NsKey = case HasSlash of - true -> build_groups_key(New); - false -> build_links_key(New) - end, - NsKeyStr = hb_util:list(NsKey), - ok ?= try erlcloud_s3:put_object(BucketStr, NsKeyStr, LinkValue, [], Config) of - L when is_list(L) -> ok - catch _Class:Reason -> {error, Reason} end, - ok - else - _ -> {error, make_link_failed} - end. + LinkValue = <>, + write(Opts, New, LinkValue). %% @doc Check if a value is a link and extract the target. %% Returns {true, Target} or false. @@ -351,8 +405,12 @@ is_link(Value) -> true -> case binary:part(Value, 0, LinkPrefixSize) of ?LINK_MARKER -> - Target = binary:part(Value, LinkPrefixSize, - byte_size(Value) - LinkPrefixSize), + Target = + binary:part( + Value, + LinkPrefixSize, + byte_size(Value) - LinkPrefixSize + ), {true, Target}; _ -> false @@ -361,18 +419,24 @@ is_link(Value) -> false end. -%%%----------------------------------------------------------------------------- -%%% Groups and Listing (Phase 5) -%%%----------------------------------------------------------------------------- - %% @doc Create a group (virtual directory). %% In S3, directories don't really exist, so this is a no-op. %% Groups are detected by listing operations. -spec make_group(opts(), key()) -> ok. make_group(Opts, Path) -> - % We need to write to a file to has_children can consider this a group. - write(Opts, <>, <<"empty">>), - ok. + GroupKey = create_make_group_key(Path), + delete_object(Opts, Path), + write(Opts, GroupKey, <<>>). + +create_make_group_key(Path) -> + PathSlashed = ensure_trailing_slash(Path), + <>. + +delete_object(Opts, Path) -> + #{bucket := Bucket, config := Config} = get_config(Opts), + BucketStr = hb_util:list(Bucket), + PathStr = hb_util:list(Path), + erlcloud_s3:delete_object(BucketStr, PathStr, Config). %% @doc List immediate children under a given path. %% Treats the path as a directory prefix. @@ -381,38 +445,38 @@ list(Opts, Path) when is_list(Path) -> list(Opts, hb_store:join(Path)); list(Opts, Path) -> %% Check make_group note. - UnwantedChildren = [<<"empty_group">>], - maybe - #{bucket := Bucket, config := Config} = get_config(Opts), - ResolvedPath = case read_direct(Opts, Path) of - {ok, Value} -> - case is_link(Value) of - {true, Target} -> - Target; - false -> - Path - end; - _ -> - Path - end, - FullPath = build_groups_key(ResolvedPath), - SearchPrefix = ensure_trailing_slash(FullPath), - BucketStr = hb_util:list(Bucket), - PrefixStr = hb_util:list(SearchPrefix), - ListOpts = [{prefix, PrefixStr}, {delimiter, "/"}], - ?event(store_s3, {list_opts, ListOpts}), - try erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of - L when is_list(L) -> - Children = extract_children(SearchPrefix, L), - {ok, Children -- UnwantedChildren} - catch - _Class:ErrorReason -> - {error, ErrorReason} - end - else - Reason -> - ?event(error, {s3_list_error, {path, Path}, {reason, Reason}}), - {error, Reason} + RemoveChildren = [?CREATE_GROUP_KEY], + #{bucket := Bucket, config := Config} = get_config(Opts), + ResolvedPath = case read_direct(Opts, Path) of + {ok, Value} -> + case is_link(Value) of + {true, Target} -> Target; + false -> Path + end; + _ -> + Path + end, + FullPath = ResolvedPath, + SearchPrefix = ensure_trailing_slash(FullPath), + BucketStr = hb_util:list(Bucket), + PrefixStr = hb_util:list(SearchPrefix), + ListOpts = [{prefix, PrefixStr}, {delimiter, "/"}], + ?event(store_s3, {list_opts, {Opts, ListOpts}}), + try erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of + L when is_list(L) -> + Children = extract_children(SearchPrefix, L), + {ok, Children -- RemoveChildren} + catch + Class:Reason2:Stacktrace -> + ?event(error, + {s3_error_listing, + {path, Path}, + {class, Class}, + {reason, Reason2}, + {stacktrace, Stacktrace} + } + ), + {error, Reason2} end. %% @doc Ensure a path ends with / for S3 directory listing. @@ -444,12 +508,12 @@ extract_children(Prefix, S3Response) -> end, Contents ), - Dirs = lists:filtermap( fun(P) -> PrefixBin = hb_util:bin(proplists:get_value(prefix, P, "")), case strip_prefix(Prefix, PrefixBin) of - <<>> -> false; + <<>> -> + false; Child -> ChildName = case binary:last(Child) of $/ -> binary:part(Child, 0, byte_size(Child) - 1); @@ -470,78 +534,31 @@ strip_prefix(Prefix, Bin) -> _ -> Bin end. -%%%----------------------------------------------------------------------------- -%%% Type Detection (Phase 6) -%%%----------------------------------------------------------------------------- - %% @doc Determine if a key represents a simple value or composite group. -spec type(opts(), key()) -> simple | composite | not_found. type(Opts, Key) when is_list(Key) -> type(Opts, hb_store:join(Key)); type(Opts, Key) -> - case Key of - <<"data/", _/binary>> -> - case head_exists(Opts, Key) of - true -> - simple; - false -> - not_found - end; - _ -> - case read_direct(Opts, Key) of - {ok, Value} -> - case is_link(Value) of - {true, Target} -> - type(Opts, Target); - false -> - simple - end; - not_found -> - case has_children(Opts, Key) of - true -> - composite; - false -> - not_found - end; - {error, _} -> - not_found - end + case read(Opts, Key, true) of + {ok, _Value} -> simple; + group -> composite; + not_found -> not_found end. %% @doc HEAD check for object existence without downloading content +head_exists(Opts, Key) when is_list(Key) -> + head_exists(Opts, hb_store:join(Key)); head_exists(Opts, Key) -> + #{bucket := Bucket, config := Config} = get_config(Opts), + BucketStr = hb_util:list(Bucket), + KeyStr = hb_util:list(Key), + ?event(store_s3, {head_exists, {key, Key}}), try - #{bucket := Bucket, config := Config} = get_config(Opts), - FullKey = hb_store:join(Key), - BucketStr = hb_util:list(Bucket), - KeyStr = hb_util:list(FullKey), is_list(erlcloud_s3:head_object(BucketStr, KeyStr, [], Config)) catch _:_ -> false end. -%% @doc Check if a path has any children (is a directory). -has_children(Opts, Path) -> - try - #{bucket := Bucket, config := Config} = get_config(Opts), - FullPath = build_groups_key(Path), - SearchPrefix = ensure_trailing_slash(FullPath), - BucketStr = hb_util:list(Bucket), - PrefixStr = hb_util:list(SearchPrefix), - ListOpts = [{prefix, PrefixStr}, {max_keys, 1}], - case erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of - L when is_list(L) -> - Contents = proplists:get_value(contents, L, []), - Contents =/= [] - end - catch - _:_ -> false - end. - -%%%----------------------------------------------------------------------------- -%%% Path Resolution (Phase 7) -%%%----------------------------------------------------------------------------- - %% @doc Resolve any links in a path. %% Follows links in each path segment except the last. -spec resolve(opts(), key()) -> binary(). @@ -608,10 +625,6 @@ add_path(_Opts, Path1, Path2) -> end, P1 ++ P2. -%%%----------------------------------------------------------------------------- -%%% Remaining Functions (Phase 8) -%%%----------------------------------------------------------------------------- - %% @doc Stop the S3 store and clean up resources. -spec stop(opts()) -> ok. stop(Opts) -> @@ -634,8 +647,8 @@ delete_all_objects(Opts) -> #{bucket := Bucket, config := Config} = get_config(Opts), BucketStr = hb_util:list(Bucket), try erlcloud_s3:list_objects(BucketStr, [], Config) of - L when is_list(L) -> - Contents = proplists:get_value(contents, L, []), + Response when is_list(Response) -> + Contents = proplists:get_value(contents, Response, []), Keys = [proplists:get_value(key, Obj) || Obj <- Contents], case Keys of [] -> ok; @@ -643,7 +656,14 @@ delete_all_objects(Opts) -> erlcloud_s3:delete_objects_batch(BucketStr, Keys, Config), ok end - catch _Class:Reason -> + catch Class:Reason:Stacktrace -> + ?event(error, + {deleting_all_objects, + {class, Class}, + {reason, Reason}, + {stacktrace, Stacktrace} + } + ), {error, Reason} end. @@ -679,7 +699,7 @@ default_test_opts() -> <<"priv_secret_access_key">> => <<"minioadmin">>, <<"endpoint">> => <<"http://localhost:9000">>, <<"dangerous_reset">> => true, - <<"force_path_style">> => true + <<"force_path_style">> => <<"true">> }. -ifdef(ENABLE_S3). @@ -692,7 +712,8 @@ default_test_opts() -> %% back to verify correctness, and cleans up by stopping the database. It %% serves as a sanity check that the basic storage mechanism is working. -init() -> +init() -> + %% Needed to use the gun http client used by HB. application:ensure_all_started(hb). basic_test() -> @@ -722,7 +743,7 @@ not_found_test() -> bucket_not_found_test() -> init(), StoreOpts = (default_test_opts())#{<<"bucket">> => <<"invalid_bucket">>}, - ?assertError({bucket_access_failed, {aws_error, {http_error, 400, "Bad Request", _}}}, start(StoreOpts)), + ?assertError({bucket_access_failed, {aws_error, {http_error, 400, _, _}}}, start(StoreOpts)), ok = stop(StoreOpts). failed_write_test() -> diff --git a/test/docker-compose-s3.yml b/test/docker-compose-s3.yml index acbb2dba8..43a8e99f0 100644 --- a/test/docker-compose-s3.yml +++ b/test/docker-compose-s3.yml @@ -4,7 +4,7 @@ volumes: services: minio: - image: quay.io/minio/minio + image: quay.io/minio/minio:RELEASE.2025-09-07T16-13-09Z ports: - "9000:9000" - "9001:9001" From c95a6c1647ecd08fed10e9b76a5ad4850edc79f0 Mon Sep 17 00:00:00 2001 From: speeddragon Date: Fri, 31 Oct 2025 17:22:45 +0000 Subject: [PATCH 6/7] feat: Add sharding support to S3 store Add configurable sharding for S3 keys to distribute data across prefixes. Fix shard_key generation and path resolution with sharding. Ensure module compiles correctly without S3 profile. --- rebar.config | 2 +- rebar.lock | 15 -- src/hb_store_s3.erl | 376 ++++++++++++++++++++++++++++++-------------- 3 files changed, 259 insertions(+), 134 deletions(-) diff --git a/rebar.config b/rebar.config index f83cc4b42..14d055860 100644 --- a/rebar.config +++ b/rebar.config @@ -44,7 +44,7 @@ {s3, [ {deps, [ {erlcloud, {git, "https://github.com/erlcloud/erlcloud.git", - {ref, "7322624227e12120d88c47a72a2ba0a032ace9f3"}} + {ref, "fe58402b40b93a20176d12b4bc211ea6a5b0d915"}} } ]}, {erl_opts, [ diff --git a/rebar.lock b/rebar.lock index 0c64dfb3e..abe0deb60 100644 --- a/rebar.lock +++ b/rebar.lock @@ -4,20 +4,15 @@ {git,"https://github.com/ArweaveTeam/b64fast.git", {ref,"58f0502e49bf73b29d95c6d02460d1fb8d2a5273"}}, 0}, - {<<"base16">>,{pkg,<<"base16">>,<<"2.0.1">>},1}, {<<"cowboy">>,{pkg,<<"cowboy">>,<<"2.14.0">>},0}, {<<"cowlib">>,{pkg,<<"cowlib">>,<<"2.16.0">>},0}, {<<"ddskerl">>,{pkg,<<"ddskerl">>,<<"0.4.2">>},1}, - {<<"eini">>,{pkg,<<"eini">>,<<"1.2.9">>},1}, {<<"elmdb">>, {git,"https://github.com/twilson63/elmdb-rs.git", {ref,"90c8857cd4ccff341fbe415b96bc5703d17ff7f0"}}, 0}, - {<<"erlcloud">>,{pkg,<<"erlcloud">>,<<"3.8.3">>},0}, {<<"graphql">>,{pkg,<<"graphql_erl">>,<<"0.17.1">>},0}, {<<"gun">>,{pkg,<<"gun">>,<<"2.2.0">>},0}, - {<<"jsx">>,{pkg,<<"jsx">>,<<"2.11.0">>},1}, - {<<"lhttpc">>,{pkg,<<"lhttpc">>,<<"1.7.1">>},1}, {<<"luerl">>,{pkg,<<"luerl">>,<<"1.3.0">>},0}, {<<"prometheus">>,{pkg,<<"prometheus">>,<<"6.0.3">>},0}, {<<"prometheus_cowboy">>,{pkg,<<"prometheus_cowboy">>,<<"0.2.0">>},0}, @@ -26,16 +21,11 @@ [ {pkg_hash,[ {<<"accept">>, <<"CD6E34A2D7E28CA38B2D3CB233734CA0C221EFBC1F171F91FEC5F162CC2D18DA">>}, - {<<"base16">>, <<"F0549F732E03BE8124ED0D19FD5EE52146CC8BE24C48CBC3F23AB44B157F11A2">>}, {<<"cowboy">>, <<"565DCF221BA99B1255B0ADCEC24D2D8DBE79E46EC79F30F8373CCEADC6A41E2A">>}, {<<"cowlib">>, <<"54592074EBBBB92EE4746C8A8846E5605052F29309D3A873468D76CDF932076F">>}, {<<"ddskerl">>, <<"A51A90BE9AC9B36A94017670BED479C623B10CA9D4BDA1EDF3A0E48CAEEADA2A">>}, - {<<"eini">>, <<"FCC3CBD49BBDD9A1D9735C7365DAFFCD84481CCE81E6CB80537883AA44AC4895">>}, - {<<"erlcloud">>, <<"66DE6C7D37C6E688C7AE198D56B9CBE07B2CB80054B88A798437C533C3C7F418">>}, {<<"graphql">>, <<"EB59FCBB39F667DC1C78C950426278015C3423F7A6ED2A121D3DB8B1D2C5F8B4">>}, {<<"gun">>, <<"B8F6B7D417E277D4C2B0DC3C07DFDF892447B087F1CC1CAFF9C0F556B884E33D">>}, - {<<"jsx">>, <<"08154624050333919B4AC1B789667D5F4DB166DC50E190C4D778D1587F102EE0">>}, - {<<"lhttpc">>, <<"8522AF9877765C33318A3AE486BE69BC165E835D05C3334A8166FD7B318D446B">>}, {<<"luerl">>, <<"B56423DDB721432AB980B818FEECB84ADBAB115E2E11522CF94BCD0729CAA501">>}, {<<"prometheus">>, <<"95302236124C0F919163A7762BF7D2B171B919B6FF6148D26EB38A5D2DEF7B81">>}, {<<"prometheus_cowboy">>, <<"526F75D9850A9125496F78BCEECCA0F237BC7B403C976D44508543AE5967DAD9">>}, @@ -43,16 +33,11 @@ {<<"ranch">>, <<"25528F82BC8D7C6152C57666CA99EC716510FE0925CB188172F41CE93117B1B0">>}]}, {pkg_hash_ext,[ {<<"accept">>, <<"CA69388943F5DAD2E7232A5478F16086E3C872F48E32B88B378E1885A59F5649">>}, - {<<"base16">>, <<"06EA2D48343282E712160BA89F692B471DB8B36ABE8394F3445FF9032251D772">>}, {<<"cowboy">>, <<"EA99769574550FE8A83225C752E8A62780A586770EF408816B82B6FE6D46476B">>}, {<<"cowlib">>, <<"7F478D80D66B747344F0EA7708C187645CFCC08B11AA424632F78E25BF05DB51">>}, {<<"ddskerl">>, <<"63F907373D7E548151D584D4DA8A38928FD26EC9477B94C0FFAAD87D7CB69FE7">>}, - {<<"eini">>, <<"DA64AE8DB7C2F502E6F20CDF44CD3D9BE364412B87FF49FEBF282540F673DFCB">>}, - {<<"erlcloud">>, <<"6ED59CBD8816045765E3E76F22D08118766DF1BB1D24F5D90794C8505BCD6D44">>}, {<<"graphql">>, <<"4D0F08EC57EF0983E2596763900872B1AB7E94F8EE3817B9F67EEC911FF7C386">>}, {<<"gun">>, <<"76022700C64287FEB4DF93A1795CFF6741B83FB37415C40C34C38D2A4645261A">>}, - {<<"jsx">>, <<"EED26A0D04D217F9EECEFFFB89714452556CF90EB38F290A27A4D45B9988F8C0">>}, - {<<"lhttpc">>, <<"154EEB27692482B52BE86406DCD1D18A2405CAFCE0E8DAA4A1A7BFA7FE295896">>}, {<<"luerl">>, <<"6B3138AA829F0FBC4CD0F083F273B4030A2B6CE99155194A6DB8C67B2C3480A4">>}, {<<"prometheus">>, <<"53554ECADAC0354066801D514D1A244DD026175E4EE3A9A30192B71D530C8268">>}, {<<"prometheus_cowboy">>, <<"2C7EB12F4B970D91E3B47BAAD0F138F6ADC34E53EEB0AE18068FF0AFAB441B24">>}, diff --git a/src/hb_store_s3.erl b/src/hb_store_s3.erl index 72d89b4c6..643222c0d 100644 --- a/src/hb_store_s3.erl +++ b/src/hb_store_s3.erl @@ -18,8 +18,14 @@ %%% %%% @end -module(hb_store_s3). --behaviour(hb_store). +-ifndef(ENABLE_S3). +-export([start/1]). + +start(_) -> error(s3_profile_not_enabled). + +-else. +-behaviour(hb_store). %% Store behavior callbacks -export([start/1, stop/1, reset/1, scope/0, scope/1]). -export([read/2, write/3, list/2, type/2]). @@ -45,8 +51,6 @@ -define(MAX_REDIRECTS, 100). % Only resolve 1000 links to data -define(LINK_MARKER, <<"link:">>). %% Namespace for storing link objects separately to avoid file collisions --define(LINKS_NS, <<"links/">>). --define(GROUPS_NS, <<"groups/">>). -define(CREATE_GROUP_KEY, <<"make_group">>). -define(DEFAULT_RETRY_DELAY, 1000). % Wait for 1 second before retry. @@ -54,6 +58,8 @@ -define(DEFAULT_RETRIES, 5). % Retries for 5 times until it returns. -define(DEFAULT_MAX_RETRY_DELAY, 300000). % Max 5 minutes waiting to retry. +-define(SHARD_CUT, 4). + %% @doc Initialize the S3 store connection. %% This function is called when the store is first accessed. %% It validates the configuration and tests the connection. @@ -159,8 +165,8 @@ validate_config(Opts) -> %% @doc Test that we can access the configured bucket. test_bucket_access(Bucket, Config) -> BucketStr = hb_util:list(Bucket), - try erlcloud_s3:list_objects(BucketStr, [{max_keys, 1}], Config) of - L when is_list(L) -> ok + try erlcloud_s3:head_bucket(BucketStr, Config) of + Response when is_list(Response) -> ok catch Class:Reason:Stacktrace -> case Reason of @@ -200,36 +206,38 @@ write(Opts, Key, Value) when is_list(Key) -> write(Opts, Key, Value) when is_binary(Key) -> RetryAttempts = maps:get(<<"retry-attempts">>, Opts, ?DEFAULT_RETRIES), write(Opts, Key, Value, RetryAttempts). -write(_Opts, _Key, _Value, 0) -> +write(_Opts, Key, _Value, 0) -> + ?event(warning, {max_retries_reached_s3_write, {key, Key}}), ok; write(Opts, Key, Value, AttemptsRemaining) -> #{bucket := Bucket, config := Config} = get_config(Opts), BucketStr = hb_util:list(Bucket), - KeyStr = hb_util:list(Key), - ?event(store_s3, - {s3_write, - {key, Key}, - {size, byte_size(Value)}, + ShardedKey = shard_key(Key), + ShardedKeyStr = hb_util:list(ShardedKey), + ?event(store_s3, + {s3_write, + {key, Key}, + {size, byte_size(Value)}, {value, Value} } ), - try erlcloud_s3:put_object(BucketStr, KeyStr, Value, [], Config) of + try erlcloud_s3:put_object(BucketStr, ShardedKeyStr, Value, [], Config) of L when is_list(L) -> ok - catch - Class:Reason -> - ?event(error, - {s3_write_error, - {key, Key}, - {class, Class}, - {reason, Reason} - } - ), - wait_before_next_retry(Opts, AttemptsRemaining), - write(Opts, Key, Value, AttemptsRemaining - 1) + catch + Class:Reason -> + ?event(error, + {s3_write_error, + {key, Key}, + {class, Class}, + {reason, Reason} + } + ), + wait_before_next_retry(Opts, AttemptsRemaining), + write(Opts, Key, Value, AttemptsRemaining - 1) end. %% @doc Retry logic -wait_before_next_retry(Opts, AttemptsRemaining) -> +wait_before_next_retry(Opts, AttemptsRemaining) -> MaxRetries = maps:get(<<"retry-attempts">>, Opts, ?DEFAULT_RETRIES), MinRetryDelay = maps:get(<<"min-retry-delay">>, Opts, ?DEFAULT_RETRY_DELAY), MaxRetryDelay = maps:get(<<"max-retry-delay">>, Opts, ?DEFAULT_MAX_RETRY_DELAY), @@ -239,7 +247,7 @@ wait_before_next_retry(Opts, AttemptsRemaining) -> MinRetryDelay * round(math:pow(2, MaxRetries - AttemptsRemaining)), MaxRetryDelay ); - _ -> + _ -> MinRetryDelay end, ?event(store_s3, {retry_in, RetryTime}), @@ -254,7 +262,7 @@ read(Opts, Key) -> read(Opts, Key, ReturnGroup) -> NormalizedKey = case is_list(Key) of true -> hb_store:join(Key); - false -> Key + false -> Key end, % Try direct read first (fast path for non-link paths) ?event(store_s3, {s3_read, {key, NormalizedKey}}), @@ -307,7 +315,7 @@ read_with_links(Opts, Path) -> {ok, Value} end; not_found -> - %% Folders in S3 don't return a value + %% Folders in S3 don't return a value %% so we check for our group key. GroupKey = create_make_group_key(Path), case head_exists(Opts, GroupKey) of @@ -326,9 +334,10 @@ to_path(Path) when is_binary(Path) -> read_direct(Opts, Key) -> #{bucket := Bucket, config := Config} = get_config(Opts), BucketStr = hb_util:list(Bucket), - KeyStr = hb_util:list(Key), - ?event(store_s3, {s3_read_direct, {key, Key}}), - try erlcloud_s3:get_object(BucketStr, KeyStr, [], Config) of + ShardedKey = shard_key(Key), + ShardedKeyStr = hb_util:list(ShardedKey), + ?event(store_s3, {s3_read_direct, {key, Key}, {sharded_key, ShardedKey}}), + try erlcloud_s3:get_object(BucketStr, ShardedKeyStr, [], Config) of Response when is_list(Response) -> Content = proplists:get_value(content, Response), {ok, hb_util:bin(Content)} @@ -341,6 +350,36 @@ read_direct(Opts, Key) -> not_found end. +-spec shard_key(binary()) -> binary(). +shard_key(<<"data/", DataKey/binary>>) -> + ShardedKey = shard_key(DataKey), + <<"data/", ShardedKey/binary>>; +shard_key(Key) when is_binary(Key) andalso byte_size(Key) > ?SHARD_CUT -> + % Only shard the first part of the path + case binary:split(Key, <<"/">>) of + [FirstKey, Rest] -> + ShardedKey = shard_key_inner(FirstKey), + <>; + _ -> + shard_key_inner(Key) + end; +shard_key(<<"data">>) -> <<"data">>; +shard_key(Key)-> + ShardCut = integer_to_binary(?SHARD_CUT), + ?event(error, + {invalid_key_for_sharding, + {key, Key}, + {reason, + <<"Should be a binary with min length of ", + ShardCut/binary>>} + } + ), + error({invalid_key_for_sharding, Key}). + +shard_key_inner(Key) -> + Part1 = string:slice(Key, 0, ?SHARD_CUT), + Part2 = string:slice(Key, ?SHARD_CUT), + <>. %% @doc Ensure all parent groups exist for a given path. %% %% This function creates the necessary parent groups for a path, similar to @@ -354,7 +393,7 @@ read_direct(Opts, Key) -> ensure_parent_groups(Opts, Path) -> PathParts = binary:split(Path, <<"/">>, [global]), case PathParts of - [_] -> + [_] -> % Single segment, no parents to create ok; _ -> @@ -432,11 +471,12 @@ create_make_group_key(Path) -> PathSlashed = ensure_trailing_slash(Path), <>. -delete_object(Opts, Path) -> +delete_object(Opts, Key) when is_binary(Key) -> #{bucket := Bucket, config := Config} = get_config(Opts), BucketStr = hb_util:list(Bucket), - PathStr = hb_util:list(Path), - erlcloud_s3:delete_object(BucketStr, PathStr, Config). + ShardedKey = shard_key(Key), + ShardedKeyStr = hb_util:list(ShardedKey), + erlcloud_s3:delete_object(BucketStr, ShardedKeyStr, Config). %% @doc List immediate children under a given path. %% Treats the path as a directory prefix. @@ -458,18 +498,19 @@ list(Opts, Path) -> end, FullPath = ResolvedPath, SearchPrefix = ensure_trailing_slash(FullPath), + ShardedSearchPrefix = shard_key(SearchPrefix), BucketStr = hb_util:list(Bucket), - PrefixStr = hb_util:list(SearchPrefix), - ListOpts = [{prefix, PrefixStr}, {delimiter, "/"}], - ?event(store_s3, {list_opts, {Opts, ListOpts}}), + ShardedSearchPrefixStr = hb_util:list(ShardedSearchPrefix), + ListOpts = [{prefix, ShardedSearchPrefixStr}, {delimiter, "/"}], + ?event(store_s3, {list_opts, {list_opts, ListOpts}}), try erlcloud_s3:list_objects(BucketStr, ListOpts, Config) of - L when is_list(L) -> - Children = extract_children(SearchPrefix, L), + Response when is_list(Response) -> + Children = extract_children(ShardedSearchPrefix, Response), {ok, Children -- RemoveChildren} catch Class:Reason2:Stacktrace -> - ?event(error, - {s3_error_listing, + ?event(error, + {s3_error_listing, {path, Path}, {class, Class}, {reason, Reason2}, @@ -512,7 +553,7 @@ extract_children(Prefix, S3Response) -> fun(P) -> PrefixBin = hb_util:bin(proplists:get_value(prefix, P, "")), case strip_prefix(Prefix, PrefixBin) of - <<>> -> + <<>> -> false; Child -> ChildName = case binary:last(Child) of @@ -546,15 +587,14 @@ type(Opts, Key) -> end. %% @doc HEAD check for object existence without downloading content -head_exists(Opts, Key) when is_list(Key) -> - head_exists(Opts, hb_store:join(Key)); -head_exists(Opts, Key) -> +head_exists(Opts, Key) when is_binary(Key) -> #{bucket := Bucket, config := Config} = get_config(Opts), BucketStr = hb_util:list(Bucket), - KeyStr = hb_util:list(Key), + ShardedKey = shard_key(Key), + ShardedKeyStr = hb_util:list(ShardedKey), ?event(store_s3, {head_exists, {key, Key}}), try - is_list(erlcloud_s3:head_object(BucketStr, KeyStr, [], Config)) + is_list(erlcloud_s3:head_object(BucketStr, ShardedKeyStr, [], Config)) catch _:_ -> false end. @@ -574,35 +614,39 @@ resolve(Opts, Path) when is_binary(Path) -> resolve_path_segments(Opts, Path) -> resolve_path_segments(Opts, Path, 0). %% Internal path resolution that resolves all segments including the last -resolve_path_segments(_Opts, _Path, Depth) when Depth > ?MAX_REDIRECTS -> +resolve_path_segments(_Opts, _Parts, Depth) when Depth > ?MAX_REDIRECTS -> {error, too_many_redirects}; -resolve_path_segments(_Opts, [], _Depth) -> - {ok, <<>>}; +resolve_path_segments(_Opts, [LastSegment], _Depth) -> + {ok, hb_store:path(LastSegment)}; resolve_path_segments(Opts, Parts, Depth) -> - resolve_path_accumulate(Opts, Parts, <<>>, Depth). + resolve_path_accumulate(Opts, Parts, [], Depth). %% Internal helper that accumulates the resolved path -resolve_path_accumulate(_Opts, [], Acc, _Depth) -> - {ok, Acc}; +resolve_path_accumulate(_Opts, [], AccPath, _Depth) -> + {ok, hb_store:path(lists:reverse(AccPath))}; resolve_path_accumulate(_Opts, FullPath = [<<"data">> | _], <<>>, _Depth) -> {ok, hb_store:join(FullPath)}; -resolve_path_accumulate(_Opts, _Parts, _Acc, Depth) when Depth > ?MAX_REDIRECTS -> - {error, too_many_redirects}; -resolve_path_accumulate(Opts, [Head|Tail], Acc, Depth) -> - CurrentPath = case Acc of - <<>> -> Head; - _ -> <> - end, - case read_direct(Opts, CurrentPath) of +resolve_path_accumulate(Opts, [Head|Tail], AccPath, Depth) -> + % Build the accumulated path so far + CurrentPath = lists:reverse([Head | AccPath]), + CurrentPathBin = to_path(CurrentPath), + % Check if the accumulated path (not just the segment) is a link + case read_direct(Opts, CurrentPathBin) of {ok, Value} -> case is_link(Value) of - {true, Target} -> - resolve_path_accumulate(Opts, Tail, Target, Depth + 1); + {true, Link} -> + % The accumulated path is a link! Resolve it + LinkSegments = binary:split(Link, <<"/">>, [global]), + % Replace the accumulated path with the link target and + % continue with remaining segments + NewPath = LinkSegments ++ Tail, + + resolve_path_segments(Opts, NewPath, Depth + 1); false -> - resolve_path_accumulate(Opts, Tail, CurrentPath, Depth) + resolve_path_accumulate(Opts, Tail, [Head | AccPath], Depth) end; not_found -> - resolve_path_accumulate(Opts, Tail, CurrentPath, Depth) + resolve_path_accumulate(Opts, Tail, [Head | AccPath], Depth) end. %% @doc Convert path to canonical form. @@ -651,16 +695,17 @@ delete_all_objects(Opts) -> Contents = proplists:get_value(contents, Response, []), Keys = [proplists:get_value(key, Obj) || Obj <- Contents], case Keys of - [] -> ok; + [] -> + ok; _ -> erlcloud_s3:delete_objects_batch(BucketStr, Keys, Config), ok end catch Class:Reason:Stacktrace -> - ?event(error, - {deleting_all_objects, - {class, Class}, - {reason, Reason}, + ?event(error, + {deleting_all_objects, + {class, Class}, + {reason, Reason}, {stacktrace, Stacktrace} } ), @@ -702,7 +747,6 @@ default_test_opts() -> <<"force_path_style">> => <<"true">> }. --ifdef(ENABLE_S3). -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). @@ -716,18 +760,42 @@ init() -> %% Needed to use the gun http client used by HB. application:ensure_all_started(hb). -basic_test() -> +sharding_test() -> init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), - Value = <<"world">>, - Key = <<"hello">>, - ?event(s3_test, {{key, Key},{value, Value}}), - Res = write(StoreOpts, Key, Value), - ?assertEqual(ok, Res), + + Value = <<"value">>, + Key = <<"TWaabU6nSWpn7zPaiWSd9gQkfNwa1t3onrDRNB-bFiI">>, + %% Write + ok = write(StoreOpts, Key, Value), + + %% Confirm sharding + + #{bucket := Bucket, config := Config} = get_config(StoreOpts), + Result = erlcloud_s3:head_object( + hb_util:list(Bucket), + "TWaa/bU6nSWpn7zPaiWSd9gQkfNwa1t3onrDRNB-bFiI", + Config + ), + ?assert(is_list(Result)), + + %% Read {ok, Response} = read(StoreOpts, Key), ?assertEqual(Value, Response), + %% Head + ?assert(head_exists(StoreOpts, Key)), + %% Delete + delete_object(StoreOpts, Key), + ?assertNot(head_exists(StoreOpts, Key)), + % Group + GroupKey = <<"UDgFxz7qUcB_TijjDfhUpXD3UGXpw8Xq6OrpoDiv3Y0">>, + make_group(StoreOpts, GroupKey), + write(StoreOpts, <>, <<"application/json">>), + %% List + R = list(StoreOpts, GroupKey), + ?assertEqual({ok,[<<"content-type">>]}, R), ok = stop(StoreOpts). not_found_test() -> @@ -751,7 +819,7 @@ failed_write_test() -> StoreOpts = (default_test_opts())#{<<"retry-attempts">> => 2}, start(StoreOpts), reset(StoreOpts), - Key = <<"write_test">>, + Key = hb_message:id(#{}), Value = <<"value">>, ok = meck:new(erlcloud_s3, [unstick, passthrough]), @@ -772,7 +840,7 @@ failed_read_test() -> StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), - Key = <<"read_test">>, + Key = hb_message:id(#{}), ok = meck:new(erlcloud_s3, [passthrough]), XMLBody = <<"">>, @@ -796,35 +864,38 @@ list_test() -> StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), - ?assertEqual({ok, []}, list(StoreOpts, <<"colors">>)), + GroupName = hb_message:id(#{key => <<"colors">>}), + ?assertEqual({ok, []}, list(StoreOpts, GroupName)), % Create immediate children under colors/ - write(StoreOpts, <<"colors/red">>, <<"1">>), - write(StoreOpts, <<"colors/blue">>, <<"2">>), - write(StoreOpts, <<"colors/green">>, <<"3">>), + write(StoreOpts, <>, <<"1">>), + write(StoreOpts, <>, <<"2">>), + write(StoreOpts, <>, <<"3">>), % Create nested directories under colors/ - these should show up as immediate children - write(StoreOpts, <<"colors/multi/foo">>, <<"4">>), - write(StoreOpts, <<"colors/multi/bar">>, <<"5">>), - write(StoreOpts, <<"colors/primary/red">>, <<"6">>), - write(StoreOpts, <<"colors/primary/blue">>, <<"7">>), - write(StoreOpts, <<"colors/nested/deep/value">>, <<"8">>), + write(StoreOpts, <>, <<"4">>), + write(StoreOpts, <>, <<"5">>), + write(StoreOpts, <>, <<"6">>), + write(StoreOpts, <>, <<"7">>), + write(StoreOpts, <>, <<"8">>), % Create other top-level directories - write(StoreOpts, <<"foo/bar">>, <<"baz">>), - write(StoreOpts, <<"beep/boop">>, <<"bam">>), - read(StoreOpts, <<"colors">>), + FooKey = hb_message:id(#{key => <<"foobar">>}), + BeepKey = hb_message:id(#{key => <<"beepboop">>}), + write(StoreOpts, <>, <<"baz">>), + write(StoreOpts, <>, <<"bam">>), + read(StoreOpts, GroupName), % Test listing colors/ - should return immediate children only - {ok, ListResult} = list(StoreOpts, <<"colors">>), + {ok, ListResult} = list(StoreOpts, GroupName), ?event({list_result, ListResult}), % Expected: red, blue, green (files) + multi, primary, nested (directories) % Should NOT include deeply nested items like foo, bar, deep, value ExpectedChildren = [<<"blue">>, <<"green">>, <<"multi">>, <<"nested">>, <<"primary">>, <<"red">>], ?assert(lists:all(fun(Key) -> lists:member(Key, ExpectedChildren) end, ListResult)), % Test listing a nested directory - should only show immediate children - {ok, NestedListResult} = list(StoreOpts, <<"colors/multi">>), + {ok, NestedListResult} = list(StoreOpts, <>), ?event({nested_list_result, NestedListResult}), ExpectedNestedChildren = [<<"bar">>, <<"foo">>], ?assert(lists:all(fun(Key) -> lists:member(Key, ExpectedNestedChildren) end, NestedListResult)), % Test listing a deeper nested directory - {ok, DeepListResult} = list(StoreOpts, <<"colors/nested">>), + {ok, DeepListResult} = list(StoreOpts, <>), ?event({deep_list_result, DeepListResult}), ExpectedDeepChildren = [<<"deep">>], ?assert(lists:all(fun(Key) -> lists:member(Key, ExpectedDeepChildren) end, DeepListResult)), @@ -855,9 +926,9 @@ link_test() -> StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), - write(StoreOpts, <<"foo/bar/baz">>, <<"Bam">>), - make_link(StoreOpts, <<"foo/bar/baz">>, <<"foo/beep/baz">>), - {ok, Result} = read(StoreOpts, <<"foo/beep/baz">>), + write(StoreOpts, <<"foooo/bar/baz">>, <<"Bam">>), + make_link(StoreOpts, <<"foooo/bar/baz">>, <<"foooo/beep/baz">>), + {ok, Result} = read(StoreOpts, <<"foooo/beep/baz">>), ?event({ result, Result}), ?assertEqual(<<"Bam">>, Result). @@ -866,8 +937,8 @@ link_fragment_test() -> StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), - ok = write(StoreOpts, [<<"data">>, <<"bar">>, <<"baz">>], <<"Bam">>), - ok = make_link(StoreOpts, [<<"data">>, <<"bar">>], <<"my-link">>), + ok = write(StoreOpts, [<<"data">>, <<"barrr">>, <<"baz">>], <<"Bam">>), + ok = make_link(StoreOpts, [<<"data">>, <<"barrr">>], <<"my-link">>), {ok, Result} = read(StoreOpts, [<<"my-link">>, <<"baz">>]), ?event({ result, Result}), ?assertEqual(<<"Bam">>, Result). @@ -935,9 +1006,9 @@ path_traversal_link_test() -> % Create the actual data at group/key write(StoreOpts, [<<"group">>, <<"key">>], <<"target-value">>), % Create a link from "link" to "group" - make_link(StoreOpts, <<"group">>, <<"link">>), + make_link(StoreOpts, <<"group">>, <<"link1">>), % Reading via the link path should resolve to the target value - {ok, Result} = read(StoreOpts, [<<"link">>, <<"key">>]), + {ok, Result} = read(StoreOpts, [<<"link1">>, <<"key">>]), ?event({path_traversal_result, Result}), ?assertEqual(<<"target-value">>, Result), ok = stop(StoreOpts). @@ -1038,39 +1109,39 @@ nested_map_cache_test() -> write(StoreOpts, <<"data/", OtherKeyHash/binary>>, OtherKeyValue), % Step 2: Create the nested structure with groups and links % Create the root group - make_group(StoreOpts, <<"root">>), + make_group(StoreOpts, <<"rooot">>), % Create links for the root level keys - make_link(StoreOpts, <<"data/", TargetHash/binary>>, <<"root/target">>), + make_link(StoreOpts, <<"data/", TargetHash/binary>>, <<"rooot/target">>), % Create the commitments subgroup - make_group(StoreOpts, <<"root/commitments">>), + make_group(StoreOpts, <<"rooot/commitments">>), % Create the key1 subgroup within commitments - make_group(StoreOpts, <<"root/commitments/key1">>), - make_link(StoreOpts, <<"data/", AlgHash1/binary>>, <<"root/commitments/key1/alg">>), - make_link(StoreOpts, <<"data/", CommitterHash1/binary>>, <<"root/commitments/key1/committer">>), + make_group(StoreOpts, <<"rooot/commitments/key1">>), + make_link(StoreOpts, <<"data/", AlgHash1/binary>>, <<"rooot/commitments/key1/alg">>), + make_link(StoreOpts, <<"data/", CommitterHash1/binary>>, <<"rooot/commitments/key1/committer">>), % Create the key2 subgroup within commitments - make_group(StoreOpts, <<"root/commitments/key2">>), - make_link(StoreOpts, <<"data/", AlgHash2/binary>>, <<"root/commitments/key2/alg">>), - make_link(StoreOpts, <<"data/", CommitterHash2/binary>>, <<"root/commitments/key2/commiter">>), + make_group(StoreOpts, <<"rooot/commitments/key2">>), + make_link(StoreOpts, <<"data/", AlgHash2/binary>>, <<"rooot/commitments/key2/alg">>), + make_link(StoreOpts, <<"data/", CommitterHash2/binary>>, <<"rooot/commitments/key2/commiter">>), % Create the other-key subgroup - make_group(StoreOpts, <<"root/other-key">>), - make_link(StoreOpts, <<"data/", OtherKeyHash/binary>>, <<"root/other-key/other-key-key">>), + make_group(StoreOpts, <<"rooot/other-key">>), + make_link(StoreOpts, <<"data/", OtherKeyHash/binary>>, <<"rooot/other-key/other-key-key">>), % Step 3: Test reading the structure back % Verify the root is a composite - ?assertEqual(composite, type(StoreOpts, <<"root">>)), + ?assertEqual(composite, type(StoreOpts, <<"rooot">>)), % List the root contents - {ok, RootKeys} = list(StoreOpts, <<"root">>), + {ok, RootKeys} = list(StoreOpts, <<"rooot">>), ?event({root_keys, RootKeys}), ExpectedRootKeys = [<<"commitments">>, <<"other-key">>, <<"target">>], ?assert(lists:all(fun(Key) -> lists:member(Key, ExpectedRootKeys) end, RootKeys)), % Read the target directly - {ok, TargetValueRead} = read(StoreOpts, <<"root/target">>), + {ok, TargetValueRead} = read(StoreOpts, <<"rooot/target">>), ?assertEqual(<<"Foo">>, TargetValueRead), % Verify commitments is a composite - ?assertEqual(composite, type(StoreOpts, <<"root/commitments">>)), + ?assertEqual(composite, type(StoreOpts, <<"rooot/commitments">>)), % Verify other-key is a composite - ?assertEqual(composite, type(StoreOpts, <<"root/other-key">>)), + ?assertEqual(composite, type(StoreOpts, <<"rooot/other-key">>)), % Step 4: Test programmatic reconstruction of the nested map - ReconstructedMap = reconstruct_map(StoreOpts, <<"root">>), + ReconstructedMap = reconstruct_map(StoreOpts, <<"rooot">>), ?event({reconstructed_map, ReconstructedMap}), % Verify the reconstructed map matches the original structure ?assert(hb_message:match(OriginalMap, ReconstructedMap)), @@ -1194,5 +1265,74 @@ list_with_link_test() -> ?event({link_children, LinkChildren}), ?assertEqual(ExpectedChildren, lists:sort(LinkChildren)), stop(StoreOpts). + +resolve_bellow_max_test() -> + init(), + StoreOpts = default_test_opts(), + + start(StoreOpts), + reset(StoreOpts), + + GroupName = hb_message:id(#{}), + make_group(StoreOpts, GroupName), + Key1 = <>, + ok = write(StoreOpts, Key1, <<"Value">>), + LastLink = lists:foldl( + fun (ID, Link) -> + BinID = integer_to_binary(ID), + NewLink = <<"link_", BinID/binary>>, + make_link(StoreOpts, Link, NewLink), + NewLink + end, + GroupName, + lists:seq(1, 88) + ), + PathToResolve = <>, + Result = resolve(StoreOpts, PathToResolve), + % Return the resolved link path + ?assertEqual(Key1, Result). + +resolve_above_max_test() -> + init(), + StoreOpts = default_test_opts(), + + start(StoreOpts), + reset(StoreOpts), + + GroupName = hb_message:id(#{}), + make_group(StoreOpts, GroupName), + Key1 = <>, + ok = write(StoreOpts, Key1, <<"Value">>), + LastLink = lists:foldl( + fun (ID, Link) -> + BinID = integer_to_binary(ID), + NewLink = <<"link_", BinID/binary>>, + make_link(StoreOpts, Link, NewLink), + NewLink + end, + GroupName, + lists:seq(1, ?MAX_REDIRECTS + 1) + ), + PathToResolve = <>, + Result = resolve(StoreOpts, PathToResolve), + % Return the same path as given + ?assertEqual(PathToResolve, Result). + +shard_key_test() -> + InvalidKey1 = <<"data/xpto">>, + ?assertError({invalid_key_for_sharding, <<"xpto">>}, shard_key(InvalidKey1)), + InvalidKey2 = <<"xpto">>, + ?assertException(error, {invalid_key_for_sharding, InvalidKey2}, shard_key(InvalidKey2)), + Key1 = <<"data/xpto1">>, + ?assertEqual(<<"data/xpto/1">>, shard_key(Key1)), + Key2 = <<"xpto1">>, + ?assertEqual(<<"xpto/1">>, shard_key(Key2)), + %% Only first key is sharded. + Key3 = <<"xpto1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, + ?assertEqual(<<"xpto/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, shard_key(Key3)), + Key4 = <<"data/xpto1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, + ?assertEqual(<<"data/xpto/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, shard_key(Key4)), + ok. + -endif. -endif. \ No newline at end of file From db9b0d73d3fe029526fd526d41648a04534fc246 Mon Sep 17 00:00:00 2001 From: speeddragon Date: Fri, 7 Nov 2025 17:24:02 +0000 Subject: [PATCH 7/7] fix: Configure erlcloud for S3 profile release Add erlcloud to release dependencies under S3 profile. Remove unused functions, improve write retry behavior, and clean up minor code style issues. --- rebar.config | 5 +- src/hb_store.erl | 2 +- src/hb_store_s3.erl | 136 +++++++++++++++++++++++++------------------- 3 files changed, 80 insertions(+), 63 deletions(-) diff --git a/rebar.config b/rebar.config index 14d055860..4dc40d1fe 100644 --- a/rebar.config +++ b/rebar.config @@ -43,13 +43,14 @@ ]}, {s3, [ {deps, [ - {erlcloud, {git, "https://github.com/erlcloud/erlcloud.git", + {erlcloud, {git, "https://github.com/erlcloud/erlcloud.git", {ref, "fe58402b40b93a20176d12b4bc211ea6a5b0d915"}} } ]}, {erl_opts, [ {d, 'ENABLE_S3', true} - ]} + ]}, + {relx, [{release, {'hb', "0.0.1"}, [hb, b64fast, cowboy, gun, luerl, prometheus, prometheus_cowboy, elmdb, erlcloud]}]} ]}, {http3, [ {deps, [ diff --git a/src/hb_store.erl b/src/hb_store.erl index bbec1edc4..891ee8b1e 100644 --- a/src/hb_store.erl +++ b/src/hb_store.erl @@ -1063,4 +1063,4 @@ make_link_access_test() -> ReadResult = read(StoreList, SourceKey), ?event(testing, {read_linked_value, ReadResult}), ?assertEqual({ok, TestValue}, ReadResult), - ?assertEqual(ok, LinkResult). + ?assertEqual(ok, LinkResult). \ No newline at end of file diff --git a/src/hb_store_s3.erl b/src/hb_store_s3.erl index 643222c0d..b0b907a00 100644 --- a/src/hb_store_s3.erl +++ b/src/hb_store_s3.erl @@ -21,9 +21,7 @@ -ifndef(ENABLE_S3). -export([start/1]). - start(_) -> error(s3_profile_not_enabled). - -else. -behaviour(hb_store). %% Store behavior callbacks @@ -33,9 +31,6 @@ start(_) -> error(s3_profile_not_enabled). -export([path/2, add_path/3]). -export([default_test_opts/0, get_config/1]). -%% Helper functions --export([match/2]). - -include("include/hb.hrl"). -include_lib("erlcloud/include/erlcloud_aws.hrl"). @@ -48,16 +43,16 @@ start(_) -> error(s3_profile_not_enabled). -define(DEFAULT_REGION, <<"us-east-1">>). -define(DEFAULT_ENDPOINT, <<"https://s3.amazonaws.com">>). -define(DEFAULT_FORCE_PATH_STYLE, <<"false">>). --define(MAX_REDIRECTS, 100). % Only resolve 1000 links to data --define(LINK_MARKER, <<"link:">>). -%% Namespace for storing link objects separately to avoid file collisions --define(CREATE_GROUP_KEY, <<"make_group">>). - +-define(MAX_REDIRECTS, 100). -define(DEFAULT_RETRY_DELAY, 1000). % Wait for 1 second before retry. -define(DEFAULT_RETRY_MODE, exp_backoff). -define(DEFAULT_RETRIES, 5). % Retries for 5 times until it returns. -define(DEFAULT_MAX_RETRY_DELAY, 300000). % Max 5 minutes waiting to retry. - +-define(LINK_MARKER, <<"link:">>). +% Key that symbolizes a group, since in S3 directories doesn't exist. +-define(CREATE_GROUP_KEY, <<"make_group">>). +% Split the first key into at 2, one with 4 byte size, and the other +% with the remaining -define(SHARD_CUT, 4). %% @doc Initialize the S3 store connection. @@ -87,7 +82,7 @@ start(Opts) -> s3_bucket_after_host = false, s3_bucket_access_method = ForcePathStyle, aws_region = Region, - % Use `gun_pool` to define a connection pool. + % Use `gun_pool` to define a connection pool. Default is `httpc` http_client = fun gun_request/6 }, ok ?= test_bucket_access(Bucket, Config), @@ -200,7 +195,7 @@ get_config(Opts) -> end. %% @doc Write a value to a key in S3. --spec write(opts(), key(), value()) -> ok. +-spec write(opts(), key(), value()) -> ok | no_return. write(Opts, Key, Value) when is_list(Key) -> write(Opts, hb_store:join(Key), Value); write(Opts, Key, Value) when is_binary(Key) -> @@ -208,7 +203,7 @@ write(Opts, Key, Value) when is_binary(Key) -> write(Opts, Key, Value, RetryAttempts). write(_Opts, Key, _Value, 0) -> ?event(warning, {max_retries_reached_s3_write, {key, Key}}), - ok; + erlang:error({max_retries_reached_s3_write, {key, Key}}); write(Opts, Key, Value, AttemptsRemaining) -> #{bucket := Bucket, config := Config} = get_config(Opts), BucketStr = hb_util:list(Bucket), @@ -222,7 +217,7 @@ write(Opts, Key, Value, AttemptsRemaining) -> } ), try erlcloud_s3:put_object(BucketStr, ShardedKeyStr, Value, [], Config) of - L when is_list(L) -> ok + Response when is_list(Response) -> ok catch Class:Reason -> ?event(error, @@ -350,6 +345,15 @@ read_direct(Opts, Key) -> not_found end. +%% @doc Shardk the first key (excluding `data`) into 2 parts defined +%% by ?SHARD_CUT: +%% - String with ?SHARD_CUT length +%% - String with the reamining length +%% +%% NOTE: Keys with byte size of 4 or less aren't supported (excepted `data`). +%% +%% > shard_key(<<"UDgFxz7qUcB_TijjDfhUpXD3UGXpw8Xq6OrpoDiv3Y0">>). +%% <<"UDgF/xz7qUcB_TijjDfhUpXD3UGXpw8Xq6OrpoDiv3Y0">> -spec shard_key(binary()) -> binary(). shard_key(<<"data/", DataKey/binary>>) -> ShardedKey = shard_key(DataKey), @@ -363,6 +367,7 @@ shard_key(Key) when is_binary(Key) andalso byte_size(Key) > ?SHARD_CUT -> _ -> shard_key_inner(Key) end; +%% Data and computer are more specific prefixes, that will not be sharded shard_key(<<"data">>) -> <<"data">>; shard_key(Key)-> ShardCut = integer_to_binary(?SHARD_CUT), @@ -411,7 +416,6 @@ create_parent_groups(Opts, Current, [Next | Rest]) -> GroupKey = create_make_group_key(GroupPath), GroupPathValue = read_direct(Opts, GroupPath), GroupKeyValue = read_direct(Opts, GroupKey), - % Only create group if it doesn't already exist. case {GroupPathValue, GroupKeyValue} of {not_found, not_found} -> @@ -459,8 +463,7 @@ is_link(Value) -> end. %% @doc Create a group (virtual directory). -%% In S3, directories don't really exist, so this is a no-op. -%% Groups are detected by listing operations. +%% In S3, directories don't really exist. We create a file to represent them. -spec make_group(opts(), key()) -> ok. make_group(Opts, Path) -> GroupKey = create_make_group_key(Path), @@ -594,7 +597,8 @@ head_exists(Opts, Key) when is_binary(Key) -> ShardedKeyStr = hb_util:list(ShardedKey), ?event(store_s3, {head_exists, {key, Key}}), try - is_list(erlcloud_s3:head_object(BucketStr, ShardedKeyStr, [], Config)) + Response = erlcloud_s3:head_object(BucketStr, ShardedKeyStr, [], Config), + is_list(Response) catch _:_ -> false end. @@ -640,7 +644,6 @@ resolve_path_accumulate(Opts, [Head|Tail], AccPath, Depth) -> % Replace the accumulated path with the link target and % continue with remaining segments NewPath = LinkSegments ++ Tail, - resolve_path_segments(Opts, NewPath, Depth + 1); false -> resolve_path_accumulate(Opts, Tail, [Head | AccPath], Depth) @@ -687,6 +690,7 @@ reset(Opts) -> {error, reset_not_confirmed} end. +%% @doc Delete all objects from a bucket. delete_all_objects(Opts) -> #{bucket := Bucket, config := Config} = get_config(Opts), BucketStr = hb_util:list(Bucket), @@ -721,12 +725,6 @@ scope() -> local. scope(#{ <<"scope">> := Scope }) -> Scope; scope(_Opts) -> scope(). -%% @doc Match keys based on a template. -%% Simple implementation - just returns not_found for now. --spec match(opts(), map()) -> {ok, [binary()]} | not_found. -match(_Opts, _Template) -> - not_found. - %% @doc Integration test suite demonstrating basic store operations. %% %% The following functions implement integration tests using EUnit to verify that @@ -735,7 +733,6 @@ match(_Opts, _Template) -> %% resolution, and type detection. %% %% Be sure that minio io server is running before executing the integration tests. - default_test_opts() -> #{ <<"store-module">> => ?MODULE, @@ -755,24 +752,21 @@ default_test_opts() -> %% This test creates a temporary database, writes a key-value pair, reads it %% back to verify correctness, and cleans up by stopping the database. It %% serves as a sanity check that the basic storage mechanism is working. - init() -> %% Needed to use the gun http client used by HB. application:ensure_all_started(hb). -sharding_test() -> +%% @doc Testing default operations for basic store functions. +simple_test() -> init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), - Value = <<"value">>, Key = <<"TWaabU6nSWpn7zPaiWSd9gQkfNwa1t3onrDRNB-bFiI">>, %% Write ok = write(StoreOpts, Key, Value), - %% Confirm sharding - #{bucket := Bucket, config := Config} = get_config(StoreOpts), Result = erlcloud_s3:head_object( hb_util:list(Bucket), @@ -780,7 +774,6 @@ sharding_test() -> Config ), ?assert(is_list(Result)), - %% Read {ok, Response} = read(StoreOpts, Key), ?assertEqual(Value, Response), @@ -792,12 +785,14 @@ sharding_test() -> % Group GroupKey = <<"UDgFxz7qUcB_TijjDfhUpXD3UGXpw8Xq6OrpoDiv3Y0">>, make_group(StoreOpts, GroupKey), - write(StoreOpts, <>, <<"application/json">>), + Key2 = add_path(#{}, GroupKey, <<"content-type">>), + write(StoreOpts, Key2, <<"application/json">>), %% List R = list(StoreOpts, GroupKey), ?assertEqual({ok,[<<"content-type">>]}, R), ok = stop(StoreOpts). +%% @doc Testing reading a value where key doesn't exists. not_found_test() -> init(), StoreOpts = default_test_opts(), @@ -808,12 +803,15 @@ not_found_test() -> ?assertEqual(not_found, Result), ok = stop(StoreOpts). +%% @doc Testing starting store with a bucket that doesn't exists. bucket_not_found_test() -> init(), StoreOpts = (default_test_opts())#{<<"bucket">> => <<"invalid_bucket">>}, ?assertError({bucket_access_failed, {aws_error, {http_error, 400, _, _}}}, start(StoreOpts)), ok = stop(StoreOpts). +%% @doc Test a failure in writing a key/value to the store. It should +%% retry writing it. failed_write_test() -> init(), StoreOpts = (default_test_opts())#{<<"retry-attempts">> => 2}, @@ -821,36 +819,44 @@ failed_write_test() -> reset(StoreOpts), Key = hb_message:id(#{}), Value = <<"value">>, - + % Mock S3 dependency to return 400 error ok = meck:new(erlcloud_s3, [unstick, passthrough]), XMLBody = <<"">>, ok = meck:expect(erlcloud_s3, put_object, fun(_, _, _, _, _) -> error({aws_error,{http_error, 400, "Bad Request", XMLBody}}) end), - - {Time, Result} = timer:tc(fun() -> write(StoreOpts, Key, Value) end, second), + % Make sure it spends time retry write to the store + {Time, Result} = timer:tc(fun() -> + ?assertError( + {max_retries_reached_s3_write, {key, Key}}, + write(StoreOpts, Key, Value) + ) + end, + millisecond + ), ?assertMatch(ok, Result), - ?assert(Time >= 3), - + ?assert(Time >= 3*?DEFAULT_RETRY_DELAY), ?assert(meck:called(erlcloud_s3, put_object, ['_', '_', '_', '_', '_'])), + % Unload and stop store ok = meck:unload(erlcloud_s3), ok = stop(StoreOpts). +%% @doc Test a failure in reading a key from S3 datasource. failed_read_test() -> init(), StoreOpts = default_test_opts(), start(StoreOpts), reset(StoreOpts), Key = hb_message:id(#{}), - + % Mock S3 dependency to return 400 error ok = meck:new(erlcloud_s3, [passthrough]), XMLBody = <<"">>, ok = meck:expect(erlcloud_s3, get_object, fun(_, _, _, _) -> error({aws_error,{http_error, 400, "Bad Request", XMLBody}}) end), - + % Read key and test result Result = read(StoreOpts, Key), ?assertMatch(not_found, Result), - ?assert(meck:called(erlcloud_s3, get_object, ['_', '_', '_', '_'])), + % Unload and stop store ok = meck:unload(erlcloud_s3), ok = stop(StoreOpts). @@ -1049,7 +1055,7 @@ cache_style_test() -> StoreOpts = default_test_opts(), % Start the store hb_store:start(StoreOpts), - reset(StoreOpts), + hb_store:reset(StoreOpts), % Test writing through hb_store interface ok = hb_store:write(StoreOpts, <<"test-key">>, <<"test-value">>), % Test reading through hb_store interface @@ -1266,17 +1272,18 @@ list_with_link_test() -> ?assertEqual(ExpectedChildren, lists:sort(LinkChildren)), stop(StoreOpts). -resolve_bellow_max_test() -> +%% Test if resolves link bellow the maxmimum number of link redirection. +resolve_bellow_max_redirect_test() -> init(), StoreOpts = default_test_opts(), - start(StoreOpts), reset(StoreOpts), - + % Create a group with a children GroupName = hb_message:id(#{}), make_group(StoreOpts, GroupName), Key1 = <>, ok = write(StoreOpts, Key1, <<"Value">>), + % Create a link chain (a link that refers the previous one) LastLink = lists:foldl( fun (ID, Link) -> BinID = integer_to_binary(ID), @@ -1285,24 +1292,27 @@ resolve_bellow_max_test() -> NewLink end, GroupName, - lists:seq(1, 88) + lists:seq(1, ?MAX_REDIRECTS) ), + % Resolve link path PathToResolve = <>, Result = resolve(StoreOpts, PathToResolve), % Return the resolved link path ?assertEqual(Key1, Result). -resolve_above_max_test() -> +%% Test if returns the same path when number of redirection is above the +%% maximum defined. +resolve_above_max_redirect_test() -> init(), StoreOpts = default_test_opts(), - start(StoreOpts), reset(StoreOpts), - + % Create a group with a children GroupName = hb_message:id(#{}), make_group(StoreOpts, GroupName), Key1 = <>, ok = write(StoreOpts, Key1, <<"Value">>), + % Create a link chain (a link that refers the previous one) LastLink = lists:foldl( fun (ID, Link) -> BinID = integer_to_binary(ID), @@ -1313,26 +1323,32 @@ resolve_above_max_test() -> GroupName, lists:seq(1, ?MAX_REDIRECTS + 1) ), + % Resolve link path PathToResolve = <>, Result = resolve(StoreOpts, PathToResolve), % Return the same path as given ?assertEqual(PathToResolve, Result). -shard_key_test() -> +%% @doc Test invalid keys, defined by keys with length less or equal +%% to ?SHARD_CUT +invalid_sharded_key_test() -> InvalidKey1 = <<"data/xpto">>, ?assertError({invalid_key_for_sharding, <<"xpto">>}, shard_key(InvalidKey1)), InvalidKey2 = <<"xpto">>, - ?assertException(error, {invalid_key_for_sharding, InvalidKey2}, shard_key(InvalidKey2)), + ?assertException(error, {invalid_key_for_sharding, InvalidKey2}, shard_key(InvalidKey2)). + +%% @doc Shard valid keys. +shard_key_test() -> Key1 = <<"data/xpto1">>, ?assertEqual(<<"data/xpto/1">>, shard_key(Key1)), Key2 = <<"xpto1">>, - ?assertEqual(<<"xpto/1">>, shard_key(Key2)), - %% Only first key is sharded. - Key3 = <<"xpto1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, - ?assertEqual(<<"xpto/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, shard_key(Key3)), - Key4 = <<"data/xpto1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, - ?assertEqual(<<"data/xpto/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, shard_key(Key4)), - ok. - + ?assertEqual(<<"xpto/1">>, shard_key(Key2)). + +%% @doc Test that only shards the first Key (second when it starts with `data/`) +only_first_key_is_sharded_test() -> + Key1 = <<"xpto1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, + ?assertEqual(<<"xpto/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, shard_key(Key1)), + Key2 = <<"data/xpto1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, + ?assertEqual(<<"data/xpto/1/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, shard_key(Key2)). -endif. -endif. \ No newline at end of file