From 4f86502dfe0dcd803d7edf8dd5e3cdcd965267d0 Mon Sep 17 00:00:00 2001 From: Noah Date: Fri, 12 Sep 2025 14:49:31 -0700 Subject: [PATCH 1/8] fix HTTP port parsing, dial TLS correctly, follow 301 redirects --- src/hb_http_client.erl | 98 ++++++++++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 28 deletions(-) diff --git a/src/hb_http_client.erl b/src/hb_http_client.erl index 4df7e3ce9..cb1e8c859 100644 --- a/src/hb_http_client.erl +++ b/src/hb_http_client.erl @@ -105,15 +105,14 @@ httpc_req(Args, _, Opts) -> end. gun_req(Args, ReestablishedConnection, Opts) -> - StartTime = os:system_time(millisecond), - #{ peer := Peer, path := Path, method := Method } = Args, - Response = + StartTime = os:system_time(millisecond), + #{ peer := Peer, path := Path, method := Method } = Args, + Response = case catch gen_server:call(?MODULE, {get_connection, Args, Opts}, infinity) of {ok, PID} -> ar_rate_limiter:throttle(Peer, Path, Opts), case request(PID, Args, Opts) of - {error, Error} when Error == {shutdown, normal}; - Error == noproc -> + {error, Error} when Error == {shutdown, normal}; Error == noproc -> case ReestablishedConnection of true -> {error, client_error}; @@ -121,30 +120,41 @@ gun_req(Args, ReestablishedConnection, Opts) -> req(Args, true, Opts) end; Reply -> - Reply - end; + case Reply of + {_Ok, 301, RedirectRes, _} -> + handle_redirect( + Args, + ReestablishedConnection, + Opts, + RedirectRes, + Reply + ); + _ -> + Reply + end + end; {'EXIT', _} -> {error, client_error}; Error -> Error - end, - EndTime = os:system_time(millisecond), - %% Only log the metric for the top-level call to req/2 - not the recursive call - %% that happens when the connection is reestablished. - case ReestablishedConnection of - true -> - ok; - false -> - record_duration(#{ - <<"request-method">> => method_to_bin(Method), - <<"request-path">> => hb_util:bin(Path), - <<"status-class">> => get_status_class(Response), - <<"duration">> => EndTime - StartTime - }, - Opts - ) - end, - Response. + end, + EndTime = os:system_time(millisecond), + %% Only log the metric for the top-level call to req/2 - not the recursive call + %% that happens when the connection is reestablished. + case ReestablishedConnection of + true -> + ok; + false -> + record_duration(#{ + <<"request-method">> => method_to_bin(Method), + <<"request-path">> => hb_util:bin(Path), + <<"status-class">> => get_status_class(Response), + <<"duration">> => EndTime - StartTime + }, + Opts + ) + end, + Response. %% @doc Record the duration of the request in an async process. We write the %% data to prometheus if the application is enabled, as well as invoking the @@ -455,6 +465,32 @@ terminate(Reason, #state{ status_by_pid = StatusByPID }) -> %%% Private functions. %%% ================================================================== +handle_redirect(Args, ReestablishedConnection, Opts, Res, Reply) -> + case lists:keyfind(<<"location">>, 1, Res) of + false -> + % Server returned a 301 but no Location header, so we can't follow the redirect. + Reply; + {_LocationHeaderName, Location} -> + case uri_string:parse(Location) of + {error, _Reason, _Detail} -> + % Server returned a Location header but the URI was malformed. + Reply; + Parsed -> + #{ scheme := NewScheme, host := NewHost, path := NewPath } = Parsed, + NewPeer = lists:flatten( + io_lib:format( + "~s://~s~s", + [NewScheme, NewHost, NewPath] + ) + ), + NewArgs = Args#{ + peer := NewPeer, + path := NewPath + }, + gun_req(NewArgs, ReestablishedConnection, Opts) + end + end. + %% @doc Safe wrapper for prometheus_gauge:inc/2. inc_prometheus_gauge(Name) -> case application:get_application(prometheus) of @@ -481,7 +517,13 @@ inc_prometheus_counter(Name, Labels, Value) -> end. open_connection(#{ peer := Peer }, Opts) -> - {Host, Port} = parse_peer(Peer, Opts), + ParsedPeer = uri_string:parse(iolist_to_binary(Peer)), + #{ scheme := Scheme, host := Host } = ParsedPeer, + DefaultPort = case Scheme of + <<"https">> -> 443; + <<"http">> -> 80 + end, + Port = maps:get(port, ParsedPeer, DefaultPort), ?event(http_outbound, {parsed_peer, {peer, Peer}, {host, Host}, {port, Port}}), BaseGunOpts = #{ @@ -526,7 +568,7 @@ open_connection(#{ peer := Peer }, Opts) -> {transport, Transport} } ), - gun:open(Host, Port, GunOpts). + gun:open(hb_util:list(Host), Port, GunOpts). parse_peer(Peer, Opts) -> Parsed = uri_string:parse(Peer), @@ -755,4 +797,4 @@ get_status_class(Data) when is_binary(Data) -> get_status_class(Data) when is_atom(Data) -> atom_to_binary(Data); get_status_class(_) -> - <<"unknown">>. \ No newline at end of file + <<"unknown">>. From 10ee035515b66576bd07ab952fdbdf05d2ecb0a4 Mon Sep 17 00:00:00 2001 From: Noah Date: Mon, 15 Sep 2025 08:55:55 -0700 Subject: [PATCH 2/8] fix httpc port / scheme parsing --- src/hb_http_client.erl | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/src/hb_http_client.erl b/src/hb_http_client.erl index cb1e8c859..636dc1d75 100644 --- a/src/hb_http_client.erl +++ b/src/hb_http_client.erl @@ -35,11 +35,13 @@ httpc_req(Args, _, Opts) -> body := Body } = Args, ?event({httpc_req, Args}), - {Host, Port} = parse_peer(Peer, Opts), - Scheme = case Port of - 443 -> "https"; - _ -> "http" + ParsedPeer = uri_string:parse(iolist_to_binary(Peer)), + #{ scheme := Scheme, host := Host } = ParsedPeer, + DefaultPort = case Scheme of + <<"https">> -> 443; + <<"http">> -> 80 end, + Port = maps:get(port, ParsedPeer, DefaultPort), ?event(http_client, {httpc_req, {explicit, Args}}), URL = binary_to_list(iolist_to_binary([Scheme, "://", Host, ":", integer_to_binary(Port), Path])), FilteredHeaders = hb_maps:without([<<"content-type">>, <<"cookie">>], Headers, Opts), @@ -522,7 +524,7 @@ open_connection(#{ peer := Peer }, Opts) -> DefaultPort = case Scheme of <<"https">> -> 443; <<"http">> -> 80 - end, + end, Port = maps:get(port, ParsedPeer, DefaultPort), ?event(http_outbound, {parsed_peer, {peer, Peer}, {host, Host}, {port, Port}}), BaseGunOpts = @@ -570,21 +572,6 @@ open_connection(#{ peer := Peer }, Opts) -> ), gun:open(hb_util:list(Host), Port, GunOpts). -parse_peer(Peer, Opts) -> - Parsed = uri_string:parse(Peer), - case Parsed of - #{ host := Host, port := Port } -> - {hb_util:list(Host), Port}; - URI = #{ host := Host } -> - { - hb_util:list(Host), - case hb_maps:get(scheme, URI, undefined, Opts) of - <<"https">> -> 443; - _ -> hb_opts:get(port, 8734, Opts) - end - } - end. - reply_error([], _Reason) -> ok; reply_error([PendingRequest | PendingRequests], Reason) -> From 3f2df7f6bba650517fad759a64f4d100a129b44d Mon Sep 17 00:00:00 2001 From: Noah Date: Mon, 15 Sep 2025 09:23:03 -0700 Subject: [PATCH 3/8] http client: properly handle redirects which include an explicit port --- src/hb_http_client.erl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/hb_http_client.erl b/src/hb_http_client.erl index 636dc1d75..e9b286e08 100644 --- a/src/hb_http_client.erl +++ b/src/hb_http_client.erl @@ -470,7 +470,7 @@ terminate(Reason, #state{ status_by_pid = StatusByPID }) -> handle_redirect(Args, ReestablishedConnection, Opts, Res, Reply) -> case lists:keyfind(<<"location">>, 1, Res) of false -> - % Server returned a 301 but no Location header, so we can't follow the redirect. + % There's no Location header, so we can't follow the redirect. Reply; {_LocationHeaderName, Location} -> case uri_string:parse(Location) of @@ -479,10 +479,15 @@ handle_redirect(Args, ReestablishedConnection, Opts, Res, Reply) -> Reply; Parsed -> #{ scheme := NewScheme, host := NewHost, path := NewPath } = Parsed, + Port = maps:get(port, Parsed, undefined), + FormattedPort = case Port of + undefined -> ""; + _ -> lists:flatten(io_lib:format(":~i", [Port])) + end, NewPeer = lists:flatten( io_lib:format( - "~s://~s~s", - [NewScheme, NewHost, NewPath] + "~s://~s~s~s", + [NewScheme, NewHost, FormattedPort, NewPath] ) ), NewArgs = Args#{ From 6fccb674c58b6ecc5e571b15196380ee22c08ce2 Mon Sep 17 00:00:00 2001 From: Noah Date: Mon, 15 Sep 2025 10:06:48 -0700 Subject: [PATCH 4/8] http client: properly enable TLS over non-443 port --- src/hb_http_client.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hb_http_client.erl b/src/hb_http_client.erl index e9b286e08..bd86c3ede 100644 --- a/src/hb_http_client.erl +++ b/src/hb_http_client.erl @@ -552,8 +552,8 @@ open_connection(#{ peer := Peer }, Opts) -> ) }, Transport = - case Port of - 443 -> tls; + case Scheme of + <<"https">> -> tls; _ -> tcp end, DefaultProto = @@ -565,7 +565,7 @@ open_connection(#{ peer := Peer }, Opts) -> GunOpts = case Proto = hb_opts:get(protocol, DefaultProto, Opts) of http3 -> BaseGunOpts#{protocols => [http3], transport => quic}; - _ -> BaseGunOpts + _ -> BaseGunOpts#{transport => Transport} end, ?event(http_outbound, {gun_open, From 7522ca1f294504cde693bc55bd1380fbb138f155 Mon Sep 17 00:00:00 2001 From: Noah Date: Mon, 15 Sep 2025 10:10:14 -0700 Subject: [PATCH 5/8] http client: don't silently handle unexpected/malformed schemes --- src/hb_http_client.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb_http_client.erl b/src/hb_http_client.erl index bd86c3ede..7411c1af1 100644 --- a/src/hb_http_client.erl +++ b/src/hb_http_client.erl @@ -554,7 +554,7 @@ open_connection(#{ peer := Peer }, Opts) -> Transport = case Scheme of <<"https">> -> tls; - _ -> tcp + <<"http">> -> tcp end, DefaultProto = case hb_features:http3() of From b64a58e24d734df7f3de06290171fab75482e285 Mon Sep 17 00:00:00 2001 From: Noah Date: Mon, 15 Sep 2025 12:04:14 -0700 Subject: [PATCH 6/8] http client: parameterize automatic redirects --- src/hb_http_client.erl | 7 +++++-- src/hb_opts.erl | 4 +++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/hb_http_client.erl b/src/hb_http_client.erl index 7411c1af1..9bdeff47b 100644 --- a/src/hb_http_client.erl +++ b/src/hb_http_client.erl @@ -80,9 +80,11 @@ httpc_req(Args, _, Opts) -> } end, ?event({http_client_outbound, Method, URL, Request}), + FollowRedirects = hb_maps:get(http_follow_redirects, Opts, true), + ReqOpts = [{autoredirect, FollowRedirects}], HTTPCOpts = [{full_result, true}, {body_format, binary}], StartTime = os:system_time(millisecond), - case httpc:request(Method, Request, [], HTTPCOpts) of + case httpc:request(Method, Request, ReqOpts, HTTPCOpts) of {ok, {{_, Status, _}, RawRespHeaders, RespBody}} -> EndTime = os:system_time(millisecond), RespHeaders = @@ -122,8 +124,9 @@ gun_req(Args, ReestablishedConnection, Opts) -> req(Args, true, Opts) end; Reply -> + FollowRedirects = hb_maps:get(http_follow_redirects, Opts, true), case Reply of - {_Ok, 301, RedirectRes, _} -> + {_Ok, 301, RedirectRes, _} when FollowRedirects -> handle_redirect( Args, ReestablishedConnection, diff --git a/src/hb_opts.erl b/src/hb_opts.erl index 6d262593b..4e0564eb0 100644 --- a/src/hb_opts.erl +++ b/src/hb_opts.erl @@ -107,6 +107,8 @@ default_message() -> %% What HTTP client should the node use? %% Options: gun, httpc http_client => gun, + %% Should the HTTP client automatically follow 3xx redirects? + http_follow_redirects => true, %% Scheduling mode: Determines when the SU should inform the recipient %% that an assignment has been scheduled for a message. %% Options: aggressive(!), local_confirmation, remote_confirmation, @@ -920,4 +922,4 @@ ensure_node_history_test() -> ] }, ?assertEqual({error, invalid_values}, ensure_node_history(InvalidItems, RequiredOpts)). --endif. \ No newline at end of file +-endif. From ae7b1f0e24aed2fffac38ab01e0dc08ef7f41fbf Mon Sep 17 00:00:00 2001 From: Noah Date: Mon, 15 Sep 2025 12:59:19 -0700 Subject: [PATCH 7/8] http client: parameterize and limit the maximum number of autoredirects --- src/hb_http_client.erl | 12 ++++++++---- src/hb_opts.erl | 3 +++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/hb_http_client.erl b/src/hb_http_client.erl index 9bdeff47b..c17eb672d 100644 --- a/src/hb_http_client.erl +++ b/src/hb_http_client.erl @@ -22,7 +22,10 @@ start_link(Opts) -> req(Args, Opts) -> req(Args, false, Opts). req(Args, ReestablishedConnection, Opts) -> case hb_opts:get(http_client, gun, Opts) of - gun -> gun_req(Args, ReestablishedConnection, Opts); + gun -> + MaxRedirects = hb_maps:get(gun_max_redirects, Opts, 5), + GunArgs = Args#{redirects_left => MaxRedirects}, + gun_req(GunArgs, ReestablishedConnection, Opts); httpc -> httpc_req(Args, ReestablishedConnection, Opts) end. @@ -110,7 +113,7 @@ httpc_req(Args, _, Opts) -> gun_req(Args, ReestablishedConnection, Opts) -> StartTime = os:system_time(millisecond), - #{ peer := Peer, path := Path, method := Method } = Args, + #{ peer := Peer, path := Path, method := Method, redirects_left := RedirectsLeft } = Args, Response = case catch gen_server:call(?MODULE, {get_connection, Args, Opts}, infinity) of {ok, PID} -> @@ -126,9 +129,10 @@ gun_req(Args, ReestablishedConnection, Opts) -> Reply -> FollowRedirects = hb_maps:get(http_follow_redirects, Opts, true), case Reply of - {_Ok, 301, RedirectRes, _} when FollowRedirects -> + {_Ok, 301, RedirectRes, _} when FollowRedirects, RedirectsLeft > 0 -> + RedirectArgs = Args#{ redirects_left := RedirectsLeft - 1 }, handle_redirect( - Args, + RedirectArgs, ReestablishedConnection, Opts, RedirectRes, diff --git a/src/hb_opts.erl b/src/hb_opts.erl index 4e0564eb0..0485b288b 100644 --- a/src/hb_opts.erl +++ b/src/hb_opts.erl @@ -109,6 +109,9 @@ default_message() -> http_client => gun, %% Should the HTTP client automatically follow 3xx redirects? http_follow_redirects => true, + %% For the gun HTTP client, to mitigate resource exhaustion attacks, what's + %% the maximum number of automatic 3xx redirects we'll allow? + gun_max_redirects => 5, %% Scheduling mode: Determines when the SU should inform the recipient %% that an assignment has been scheduled for a message. %% Options: aggressive(!), local_confirmation, remote_confirmation, From 1d92d635c69acf493081beb6e6dc680ca9fe2f98 Mon Sep 17 00:00:00 2001 From: Noah Date: Mon, 15 Sep 2025 13:41:58 -0700 Subject: [PATCH 8/8] http client: handle redirects for all pertinent 3xx responses --- src/hb_http_client.erl | 25 ++++++++++++------------- src/hb_opts.erl | 3 ++- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/hb_http_client.erl b/src/hb_http_client.erl index c17eb672d..317e4a740 100644 --- a/src/hb_http_client.erl +++ b/src/hb_http_client.erl @@ -126,20 +126,19 @@ gun_req(Args, ReestablishedConnection, Opts) -> false -> req(Args, true, Opts) end; - Reply -> + Reply = {_Ok, StatusCode, RedirectRes, _} -> FollowRedirects = hb_maps:get(http_follow_redirects, Opts, true), - case Reply of - {_Ok, 301, RedirectRes, _} when FollowRedirects, RedirectsLeft > 0 -> - RedirectArgs = Args#{ redirects_left := RedirectsLeft - 1 }, - handle_redirect( - RedirectArgs, - ReestablishedConnection, - Opts, - RedirectRes, - Reply - ); - _ -> - Reply + case lists:member(StatusCode, [301, 302, 307, 308]) of + true when FollowRedirects, RedirectsLeft > 0 -> + RedirectArgs = Args#{ redirects_left := RedirectsLeft - 1 }, + handle_redirect( + RedirectArgs, + ReestablishedConnection, + Opts, + RedirectRes, + Reply + ); + _ -> Reply end end; {'EXIT', _} -> diff --git a/src/hb_opts.erl b/src/hb_opts.erl index 0485b288b..b5d8619dc 100644 --- a/src/hb_opts.erl +++ b/src/hb_opts.erl @@ -110,7 +110,8 @@ default_message() -> %% Should the HTTP client automatically follow 3xx redirects? http_follow_redirects => true, %% For the gun HTTP client, to mitigate resource exhaustion attacks, what's - %% the maximum number of automatic 3xx redirects we'll allow? + %% the maximum number of automatic 3xx redirects we'll allow when + %% http_follow_redirects = true? gun_max_redirects => 5, %% Scheduling mode: Determines when the SU should inform the recipient %% that an assignment has been scheduled for a message.