From 2866b658ea0b4a5c3a6cb5b77617e953897a3d97 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 25 Oct 2022 14:59:38 +0300 Subject: [PATCH 01/10] Allow to set custom codec in client / server (#23) Which is reponsible for: * (de)serializing requests / responses * providing request / response metadata for events Also permit wider service / args models depending on chosen server / client codecs. Backport #22. --- src/woody.erl | 4 +- src/woody_client_codec.erl | 81 ++++++++++++++++++ src/woody_client_thrift_v2.erl | 64 +++++++------- src/woody_server_codec.erl | 104 +++++++++++++++++++++++ src/woody_server_thrift_handler.erl | 2 +- src/woody_server_thrift_v2.erl | 124 ++++++++++++++-------------- 6 files changed, 284 insertions(+), 95 deletions(-) create mode 100644 src/woody_client_codec.erl create mode 100644 src/woody_server_codec.erl diff --git a/src/woody.erl b/src/woody.erl index d817d38..afac9f4 100644 --- a/src/woody.erl +++ b/src/woody.erl @@ -34,10 +34,10 @@ %% Thrift -type service_name() :: atom(). --type service() :: {module(), service_name()}. +-type service() :: {module(), service_name()} | any(). -type context() :: woody_context:ctx(). -type func() :: atom(). --type args() :: tuple(). +-type args() :: tuple() | any(). -type request() :: {service(), func(), args()}. -type result() :: _. -type th_handler() :: {service(), handler(options())}. diff --git a/src/woody_client_codec.erl b/src/woody_client_codec.erl new file mode 100644 index 0000000..df3cd44 --- /dev/null +++ b/src/woody_client_codec.erl @@ -0,0 +1,81 @@ +-module(woody_client_codec). + +-type codec() :: module(). + +-export_type([codec/0]). + +-export([get_service_name/2]). +-export([get_rpc_type/3]). + +-export([write_call/6]). +-export([read_result/5]). + +-type buffer() :: iodata(). +-type service() :: woody:service(). +-type func() :: woody:func(). +-type args() :: woody:args(). +-type seqid() :: non_neg_integer(). + +-type result() :: + ok + | {reply, woody:result()} + | {exception, woody:result()}. + +-callback get_service_name(service()) -> + woody:service_name(). + +-callback get_rpc_type(service(), func()) -> + woody:rpc_type(). + +-callback write_call(buffer(), service(), func(), args(), seqid()) -> + {ok, buffer()} + | {error, _Reason}. + +-callback read_result(buffer(), service(), func(), seqid()) -> + {ok, result(), _Rest :: buffer()} + | {error, _Reason}. + +%% + +-spec get_service_name(codec(), service()) -> + woody:service_name(). +get_service_name(thrift_client_codec, {_Module, Service}) -> + Service; +get_service_name(Codec, Service) -> + Codec:get_service_name(Service). + +-spec get_rpc_type(codec(), service(), func()) -> + woody:rpc_type(). +get_rpc_type(thrift_client_codec, Service, Function) -> + woody_util:get_rpc_type(Service, Function); +get_rpc_type(Codec, Service, Function) -> + Codec:get_rpc_type(Service, Function). + +-spec write_call(codec(), buffer(), service(), func(), args(), seqid()) -> + {ok, buffer()} + | {error, _Reason}. +write_call(thrift_client_codec, Buffer, Service, Function, Args, SeqId) -> + thrift_client_codec:write_function_call( + Buffer, + thrift_strict_binary_codec, + Service, + Function, + Args, + SeqId + ); +write_call(Codec, Buffer, Service, Function, Args, SeqId) -> + Codec:write_call(Buffer, Service, Function, Args, SeqId). + +-spec read_result(codec(), buffer(), service(), func(), seqid()) -> + {ok, result(), _Rest :: buffer()} + | {error, _Reason}. +read_result(thrift_client_codec, Buffer, Service, Function, SeqId) -> + thrift_client_codec:read_function_result( + Buffer, + thrift_strict_binary_codec, + Service, + Function, + SeqId + ); +read_result(Codec, Buffer, Service, Function, SeqId) -> + Codec:read_result(Buffer, Service, Function, SeqId). diff --git a/src/woody_client_thrift_v2.erl b/src/woody_client_thrift_v2.erl index 360a13b..fa5cd35 100644 --- a/src/woody_client_thrift_v2.erl +++ b/src/woody_client_thrift_v2.erl @@ -16,6 +16,7 @@ event_handler := woody:ev_handlers(), transport_opts => transport_options(), resolver_opts => woody_resolver:options(), + codec => woody_client_codec:codec(), protocol => thrift, transport => http }. @@ -45,22 +46,24 @@ child_spec(Options) -> hackney_pool:child_spec(Name, maps:to_list(TransportOpts)). -spec call(woody:request(), options(), woody_state:st()) -> woody_client:result(). -call({Service = {_, ServiceName}, Function, Args}, Opts, WoodyState) -> +call({Service, Function, Args}, Opts, WoodyState) -> WoodyContext = woody_state:get_context(WoodyState), + ClientCodec = maps:get(codec, Opts, thrift_client_codec), WoodyState1 = woody_state:add_ev_meta( #{ - service => ServiceName, + service => woody_client_codec:get_service_name(ClientCodec, Service), service_schema => Service, function => Function, - type => woody_util:get_rpc_type(Service, Function), + type => woody_client_codec:get_rpc_type(ClientCodec, Service, Function), args => Args, + codec => ClientCodec, deadline => woody_context:get_deadline(WoodyContext), metadata => woody_context:get_meta(WoodyContext) }, WoodyState ), _ = log_event(?EV_CALL_SERVICE, WoodyState1, #{}), - do_call(Service, Function, Args, Opts, WoodyState1). + do_call(ClientCodec, Service, Function, Args, Opts, WoodyState1). %% %% Internal functions @@ -71,18 +74,25 @@ call({Service = {_, ServiceName}, Function, Args}, Opts, WoodyState) -> -type http_headers() :: [{binary(), binary()}]. -type header_parse_value() :: none | woody:http_header_val(). --define(CODEC, thrift_strict_binary_codec). -define(ERROR_RESP_BODY, <<"parse http response body error">>). -define(ERROR_RESP_HEADER, <<"parse http response headers error">>). -define(BAD_RESP_HEADER, <<"reason unknown due to bad ", ?HEADER_PREFIX/binary, "-error- headers">>). --define(SERVER_VIOLATION_ERROR, +-define(APPLICATION_EXCEPTION_ERROR, {external, result_unexpected, << "server violated thrift protocol: " "sent TApplicationException (unknown exception) with http code 200" >>} ). +-define(EXCESS_BODY_ERROR(Bytes, Result), + {external, result_unexpected, + genlib:format( + "server violated thrift protocol: excess ~p bytes in response: ~p", + [byte_size(Bytes), Result] + )} +). + -spec get_transport_opts(options()) -> woody_client_thrift_http_transport:transport_options(). get_transport_opts(Opts) -> maps:get(transport_opts, Opts, #{}). @@ -91,15 +101,15 @@ get_transport_opts(Opts) -> get_resolver_opts(Opts) -> maps:get(resolver_opts, Opts, #{}). --spec do_call(woody:service(), woody:func(), woody:args(), options(), woody_state:st()) -> woody_client:result(). -do_call(Service, Function, Args, Opts, WoodyState) -> - Buffer = ?CODEC:new(), +-spec do_call(module(), woody:service(), woody:func(), woody:args(), options(), woody_state:st()) -> + woody_client:result(). +do_call(Codec, Service, Function, Args, Opts, WoodyState) -> Result = - case thrift_client_codec:write_function_call(Buffer, ?CODEC, Service, Function, Args, 0) of + case woody_client_codec:write_call(Codec, <<>>, Service, Function, Args, 0) of {ok, Buffer1} -> case send_call(Buffer1, Opts, WoodyState) of {ok, Response} -> - handle_result(Service, Function, Response); + handle_result(Codec, Service, Function, Response); Error -> Error end; @@ -109,32 +119,28 @@ do_call(Service, Function, Args, Opts, WoodyState) -> log_result(Result, WoodyState), map_result(Result). --spec handle_result(woody:service(), woody:func(), binary()) -> _Result. -handle_result(Service, Function, Response) -> - Buffer = ?CODEC:new(Response), - case thrift_client_codec:read_function_result(Buffer, ?CODEC, Service, Function, 0) of - {ok, Result, Leftovers} -> - Bytes = ?CODEC:close(Leftovers), +-spec handle_result(module(), woody:service(), woody:func(), binary()) -> _Result. +handle_result(Codec, Service, Function, Response) -> + case woody_client_codec:read_result(Codec, Response, Service, Function, 0) of + {ok, Result, <<>>} -> case Result of - ok when Bytes == <<>> -> + ok -> {ok, ok}; - {reply, Reply} when Bytes == <<>> -> + {reply, Reply} -> {ok, Reply}; - {exception, #'TApplicationException'{}} when Bytes == <<>> -> - {error, {system, ?SERVER_VIOLATION_ERROR}}; - {exception, Exception} when Bytes == <<>> -> - {error, {business, Exception}}; - _ -> - {error, {excess_response_body, Bytes, Result}} + {exception, #'TApplicationException'{}} -> + {error, {system, ?APPLICATION_EXCEPTION_ERROR}}; + {exception, Exception} -> + {error, {business, Exception}} end; + {ok, Result, Leftovers} -> + {error, ?EXCESS_BODY_ERROR(Leftovers, Result)}; {error, _} = Error -> Error end. -%% erlfmt-ignore --spec send_call(?CODEC:buffer(), options(), woody_state:st()) -> - {ok, binary()} | {error, {system, _}}. - +-spec send_call(iodata(), options(), woody_state:st()) -> + {ok, iodata()} | {error, {system, _}}. send_call(Buffer, #{url := Url} = Opts, WoodyState) -> Context = woody_state:get_context(WoodyState), TransportOpts = get_transport_opts(Opts), diff --git a/src/woody_server_codec.erl b/src/woody_server_codec.erl new file mode 100644 index 0000000..836656a --- /dev/null +++ b/src/woody_server_codec.erl @@ -0,0 +1,104 @@ +-module(woody_server_codec). + +-type codec() :: module(). + +-export_type([codec/0]). +-export_type([invocation/0]). + +-export([get_service_name/2]). + +-export([read_call/3]). +-export([write_result/6]). + +-export([catch_business_exception/4]). + +-type buffer() :: iodata(). +-type service() :: woody:service(). +-type func() :: woody:func(). +-type args() :: woody:args(). +-type seqid() :: non_neg_integer(). + +-type invocation() :: {woody:rpc_type(), func(), args()}. + +-type result() :: + ok + | {reply, woody:result()} + | {exception, _Name, woody:result()}. + +-callback get_service_name(service()) -> + woody:service_name(). + +-callback read_call(buffer(), service()) -> + {ok, seqid(), invocation(), _Rest :: buffer()} + | {error, _Reason}. + +-callback write_result(buffer(), service(), func(), result(), seqid()) -> + {ok, buffer()} + | {error, _Reason}. + +%% + +-spec get_service_name(codec(), service()) -> + woody:service_name(). +get_service_name(thrift_processor_codec, {_Module, Service}) -> + Service; +get_service_name(Codec, Service) -> + Codec:get_service_name(Service). + +-spec read_call(codec(), buffer(), service()) -> + {ok, seqid(), invocation(), _Rest :: buffer()} + | {error, _Reason}. +read_call(thrift_processor_codec, Buffer, Service) -> + case thrift_processor_codec:read_function_call(Buffer, thrift_strict_binary_codec, Service) of + {ok, SeqId, {Type, Function, Args}, Rest} -> + RpcType = maps:get(Type, #{call => call, oneway => cast}), + {ok, SeqId, {RpcType, Function, Args}, Rest}; + {error, _} = Error -> + Error + end; +read_call(Codec, Buffer, Service) -> + Codec:read_call(Buffer, Service). + +-spec write_result(codec(), buffer(), service(), func(), result(), seqid()) -> + {ok, buffer()} + | {error, _Reason}. +write_result(thrift_processor_codec, Buffer, Service, Function, {exception, _Name, {Type, Exception}}, SeqId) -> + thrift_processor_codec:write_function_result( + Buffer, + thrift_strict_binary_codec, + Service, + Function, + {exception, Type, Exception}, + SeqId + ); +write_result(thrift_processor_codec, Buffer, Service, Function, Res, SeqId) -> + thrift_processor_codec:write_function_result( + Buffer, + thrift_strict_binary_codec, + Service, + Function, + Res, + SeqId + ); +write_result(Codec, Buffer, Service, Function, Res, SeqId) -> + Codec:write_result(Buffer, Service, Function, Res, SeqId). + +-spec catch_business_exception(codec(), service(), func(), _Exception) -> + {exception, _Name :: atom(), _TypedException} + | {error, _Reason}. +catch_business_exception(thrift_processor_codec, Service, Function, Exception) -> + case thrift_processor_codec:match_exception(Service, Function, Exception) of + {ok, Type} -> + {exception, get_exception_name(Type, Exception), {Type, Exception}}; + {error, _} = Error -> + Error + end; +catch_business_exception(_Codec, _Service, _Function, _Exception) -> + {error, noimpl}. + +-spec get_exception_name(_Type, woody:result()) -> + atom(). +get_exception_name({{struct, exception, {_Mod, Name}}, _}, _) -> + Name; +get_exception_name(_Type, Exception) -> + element(1, Exception). diff --git a/src/woody_server_thrift_handler.erl b/src/woody_server_thrift_handler.erl index 204fb04..781dfab 100644 --- a/src/woody_server_thrift_handler.erl +++ b/src/woody_server_thrift_handler.erl @@ -84,7 +84,7 @@ invoke_handler(State) -> handle_result(Result, Reply, MsgType). -spec handle_function(woody:handler(woody:options()), woody:func(), woody:args(), woody_state:st()) -> - {ok, woody:result()} | no_return(). + {ok, woody:result()} | {exception, _TypeName, woody:result()} | no_return(). handle_function(Handler, Function, Args, WoodyState) -> _ = woody_event_handler:handle_event(?EV_INVOKE_SERVICE_HANDLER, WoodyState, #{}), {Module, Opts} = woody_util:get_mod_opts(Handler), diff --git a/src/woody_server_thrift_v2.erl b/src/woody_server_thrift_v2.erl index d5c611b..a9c6e3c 100644 --- a/src/woody_server_thrift_v2.erl +++ b/src/woody_server_thrift_v2.erl @@ -71,9 +71,12 @@ -export_type([options/0]). +-type codec() :: woody_server_codec:codec(). + -type route_opts() :: #{ handlers := list(woody:http_handler(woody:th_handler())), event_handler := woody:ev_handlers(), + codec => codec(), protocol => thrift, transport => http, read_body_opts => read_body_opts(), @@ -99,6 +102,7 @@ -type state() :: #{ th_handler => woody:th_handler(), ev_handler := woody:ev_handlers(), + codec => module(), read_body_opts := read_body_opts(), handler_limits := handler_limits(), regexp_meta := re_mp(), @@ -213,6 +217,7 @@ get_route(_, Handler) -> init_state(Opts = #{}) -> #{ ev_handler => maps:get(event_handler, Opts), + codec => maps:get(codec, Opts, thrift_processor_codec), read_body_opts => maps:get(read_body_opts, Opts, #{}), handler_limits => maps:get(handler_limits, Opts, #{}), regexp_meta => compile_filter_meta() @@ -256,6 +261,7 @@ handle( Req, State = #{ url := Url, + codec := ProcessorCodec, woody_state := WoodyState, read_body_opts := ReadBodyOpts, th_handler := ThriftHandler @@ -265,7 +271,7 @@ handle( case get_body(Req, ReadBodyOpts) of {ok, Body, Req1} when byte_size(Body) > 0 -> _ = handle_event(?EV_SERVER_RECEIVE, WoodyState, #{url => Url, status => ok}), - handle_request(Body, ThriftHandler, WoodyState, Req1); + handle_request(ProcessorCodec, Body, ThriftHandler, WoodyState, Req1); {ok, <<>>, Req1} -> reply_client_error(400, <<"body empty">>, Req1, State) end, @@ -294,8 +300,6 @@ terminate(Reason, _Req, #{ev_handler := EvHandler} = Opts) -> %% init functions --define(CODEC, thrift_strict_binary_codec). - %% First perform basic http checks: method, content type, etc, %% then check woody related headers: IDs, deadline, meta. @@ -474,18 +478,15 @@ do_get_body(Body, Req, Opts) -> do_get_body(<>, Req1, Opts) end. --spec handle_request(woody:http_body(), woody:th_handler(), woody_state:st(), cowboy_req:req()) -> cowboy_req:req(). -handle_request(Body, ThriftHandler = {Service, _}, WoodyState, Req) -> +-spec handle_request(codec(), woody:http_body(), woody:th_handler(), woody_state:st(), cowboy_req:req()) -> + cowboy_req:req(). +handle_request(Codec, Body, ThriftHandler = {Service, _}, WoodyState, Req) -> ok = woody_monitor_h:set_event(?EV_SERVICE_HANDLER_RESULT, Req), - Buffer = ?CODEC:new(Body), - case thrift_processor_codec:read_function_call(Buffer, ?CODEC, Service) of - {ok, SeqId, Invocation, Leftovers} -> - case ?CODEC:close(Leftovers) of - <<>> -> - handle_invocation(SeqId, Invocation, ThriftHandler, Req, WoodyState); - Bytes -> - handle_decode_error({excess_response_body, Bytes, Invocation}, Req, WoodyState) - end; + case woody_server_codec:read_call(Codec, Body, Service) of + {ok, SeqId, Invocation, <<>>} -> + handle_invocation(Codec, SeqId, Invocation, ThriftHandler, Req, WoodyState); + {ok, _SeqId, Invocation, Leftovers} -> + handle_error(client_error({excess_request_bytes, Leftovers, Invocation}), Req, WoodyState); {error, Reason} -> handle_decode_error(Reason, Req, WoodyState) end. @@ -518,30 +519,31 @@ client_error({bad_function_name, FName}) -> client_error(Reason) -> {client, woody_util:to_binary([<<"thrift decode error: ">>, woody_error:format_details(Reason)])}. --spec handle_invocation(integer(), Invocation, woody:th_handler(), cowboy_req:req(), woody_state:st()) -> +-spec handle_invocation(codec(), integer(), Invocation, woody:th_handler(), cowboy_req:req(), woody_state:st()) -> cowboy_req:req() when - Invocation :: {call | oneway, woody:func(), woody:args()}. -handle_invocation(SeqId, {ReplyType, Function, Args}, {Service, Handler}, Req, WoodyState) -> - WoodyState1 = add_ev_meta(WoodyState, Service, ReplyType, Function, Args), - case ReplyType of + Invocation :: woody_server_codec:invocation(). +handle_invocation(Codec, SeqId, {RpcType, Function, Args}, {Service, Handler}, Req, WoodyState) -> + WoodyState1 = add_ev_meta(WoodyState, Codec, Service, RpcType, Function, Args), + case RpcType of call -> - Result = handle_call(Handler, Service, Function, Args, WoodyState1), - handle_result(Result, Service, Function, SeqId, Req, WoodyState1); - oneway -> + Result = handle_call(Codec, Handler, Service, Function, Args, WoodyState1), + handle_result(Codec, Result, Service, Function, SeqId, Req, WoodyState1); + cast -> Req1 = reply(200, Req, WoodyState1), - _Result = handle_call(Handler, Service, Function, Args, WoodyState1), + _Result = handle_call(Codec, Handler, Service, Function, Args, WoodyState1), Req1 end. -type call_result() :: ok | {reply, woody:result()} - | {exception, _TypeName, _Exception} + | {exception, _Name :: binary(), _Exception :: woody:result()} | {error, {system, woody_error:system_error()}}. --spec handle_call(woody:handler(_), woody:service(), woody:func(), woody:args(), woody_state:st()) -> call_result(). -handle_call(Handler, Service, Function, Args, WoodyState) -> +-spec handle_call(codec(), woody:handler(_), woody:service(), woody:func(), woody:args(), woody_state:st()) -> + call_result(). +handle_call(Codec, Handler, Service, Function, Args, WoodyState) -> try Result = call_handler(Handler, Function, Args, WoodyState), _ = handle_event( @@ -550,35 +552,42 @@ handle_call(Handler, Service, Function, Args, WoodyState) -> #{status => ok, result => Result} ), case Result of - {ok, ok} -> ok; - {ok, Reply} -> {reply, Reply} + {ok, Reply} -> + {reply, Reply}; + {exception, _Name, BusinessException} -> + _ = handle_event( + ?EV_SERVICE_HANDLER_RESULT, + WoodyState, + #{status => error, class => business, result => BusinessException} + ), + Result end catch throw:Exception:Stack -> - process_handler_throw(Exception, Stack, Service, Function, WoodyState); + process_handler_throw(Codec, Exception, Stack, Service, Function, WoodyState); Class:Reason:Stack -> process_handler_error(Class, Reason, Stack, WoodyState) end. -spec call_handler(woody:handler(_), woody:func(), woody:args(), woody_state:st()) -> - {ok, woody:result()} | no_return(). + {ok, woody:result()} | {exception, _Name, woody:result()} | no_return(). call_handler(Handler, Function, Args, WoodyState) -> woody_server_thrift_handler:handle_function(Handler, Function, Args, WoodyState). --spec process_handler_throw(_Exception, woody_error:stack(), woody:service(), woody:func(), woody_state:st()) -> - {exception, _TypeName, _Exception} +-spec process_handler_throw(codec(), _Ex, woody_error:stack(), woody:service(), woody:func(), woody_state:st()) -> + {exception, _Name, woody:result()} | {error, {system, woody_error:system_error()}}. -process_handler_throw(Exception, Stack, Service, Function, WoodyState) -> - case thrift_processor_codec:match_exception(Service, Function, Exception) of - {ok, TypeName} -> +process_handler_throw(Codec, ThrownException, Stack, Service, Function, WoodyState) -> + case woody_server_codec:catch_business_exception(Codec, Service, Function, ThrownException) of + {exception, _Name, _TypedException} = Result -> _ = handle_event( ?EV_SERVICE_HANDLER_RESULT, WoodyState, - #{status => error, class => business, result => Exception} + #{status => error, class => business, result => ThrownException} ), - {exception, TypeName, Exception}; + Result; {error, _} -> - process_handler_error(throw, Exception, Stack, WoodyState) + process_handler_error(throw, ThrownException, Stack, WoodyState) end. -spec process_handler_error(_Class :: atom(), _Reason, woody_error:stack(), woody_state:st()) -> @@ -599,35 +608,27 @@ process_handler_error(Class, Reason, Stack, WoodyState) -> Error = {internal, result_unexpected, format_unexpected_error(Class, Reason, Stack)}, {error, {system, Error}}. --spec handle_result(call_result(), woody:service(), woody:func(), integer(), Req, woody_state:st()) -> Req when +-spec handle_result(module(), call_result(), woody:service(), woody:func(), integer(), Req, woody_state:st()) -> + Req +when Req :: cowboy_req:req(). -handle_result(Res, Service, Function, SeqId, Req, WoodyState) when Res == ok; element(1, Res) == reply -> - Buffer = ?CODEC:new(), - case thrift_processor_codec:write_function_result(Buffer, ?CODEC, Service, Function, Res, SeqId) of - {ok, Buffer1} -> - Response = ?CODEC:close(Buffer1), +handle_result(Codec, Res, Service, Function, SeqId, Req, WoodyState) when Res == ok; element(1, Res) == reply -> + case woody_server_codec:write_result(Codec, <<>>, Service, Function, Res, SeqId) of + {ok, Response} -> reply(200, cowboy_req:set_resp_body(Response, Req), WoodyState); {error, Reason} -> handle_encode_error(Reason, Req, WoodyState) end; -handle_result(Res = {exception, TypeName, Exception}, Service, Function, SeqId, Req, WoodyState) -> - Buffer = ?CODEC:new(), - case thrift_processor_codec:write_function_result(Buffer, ?CODEC, Service, Function, Res, SeqId) of - {ok, Buffer1} -> - ExceptionName = get_exception_name(TypeName, Exception), - Response = ?CODEC:close(Buffer1), - handle_error({business, {ExceptionName, Response}}, Req, WoodyState); +handle_result(Codec, {exception, Name, _Exception} = Result, Service, Function, SeqId, Req, WoodyState) -> + case woody_server_codec:write_result(Codec, <<>>, Service, Function, Result, SeqId) of + {ok, Response} -> + handle_error({business, {genlib:to_binary(Name), Response}}, Req, WoodyState); {error, Reason} -> handle_encode_error(Reason, Req, WoodyState) end; -handle_result({error, Error}, _Service, _Function, _SeqId, Req, WoodyState) -> +handle_result(_Codec, {error, Error}, _Service, _Function, _SeqId, Req, WoodyState) -> handle_error(Error, Req, WoodyState). -get_exception_name({{struct, exception, {_Mod, Name}}, _}, _) -> - genlib:to_binary(Name); -get_exception_name(_TypeName, Exception) -> - genlib:to_binary(element(1, Exception)). - -spec handle_encode_error(_Reason, cowboy_req:req(), woody_state:st()) -> cowboy_req:req(). handle_encode_error(Reason, Req, WoodyState) -> _ = handle_event( @@ -641,22 +642,19 @@ handle_encode_error(Reason, Req, WoodyState) -> Error = {internal, result_unexpected, format_unexpected_error(error, Reason, [])}, handle_error({system, Error}, Req, WoodyState). -add_ev_meta(WoodyState, Service = {_, ServiceName}, ReplyType, Function, Args) -> +add_ev_meta(WoodyState, Codec, Service, RpcType, Function, Args) -> woody_state:add_ev_meta( #{ - service => ServiceName, + service => woody_server_codec:get_service_name(Codec, Service), service_schema => Service, function => Function, args => Args, - type => get_rpc_reply_type(ReplyType), + type => RpcType, deadline => woody_context:get_deadline(woody_state:get_context(WoodyState)) }, WoodyState ). -get_rpc_reply_type(oneway) -> cast; -get_rpc_reply_type(call) -> call. - format_unexpected_error(Class, Reason, Stack) -> woody_util:to_binary( [Class, ":", woody_error:format_details(Reason), " ", genlib_format:format_stacktrace(Stack)] From aa8f9d0cd2061c020f181cc4d58e724874dfa092 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 31 Oct 2022 17:30:00 +0300 Subject: [PATCH 02/10] Fixup TLS client options after resolving (#24) --- src/woody_client_thrift_http_transport.erl | 2 +- src/woody_client_thrift_v2.erl | 22 ++++++++++++++-- src/woody_resolver.erl | 19 +++++++++----- test/woody_ssl_SUITE.erl | 30 ++++++++++++++++++++-- test/woody_tests_SUITE.erl | 20 +++++++-------- 5 files changed, 72 insertions(+), 21 deletions(-) diff --git a/src/woody_client_thrift_http_transport.erl b/src/woody_client_thrift_http_transport.erl index 8c4046c..4ba8512 100644 --- a/src/woody_client_thrift_http_transport.erl +++ b/src/woody_client_thrift_http_transport.erl @@ -124,7 +124,7 @@ send(Url, Body, Options, ResOpts, WoodyState) -> % MSPF-416: We resolve url host to an ip here to prevent % reusing keep-alive connections to dead hosts case woody_resolver:resolve_url(Url, WoodyState, ResOpts) of - {ok, {OldUrl, NewUrl}} -> + {ok, {OldUrl, NewUrl}, _ConnectOpts} -> Headers = add_host_header(OldUrl, make_woody_headers(Context)), Options1 = set_defaults(Options), Options2 = set_timeouts(Options1, Context), diff --git a/src/woody_client_thrift_v2.erl b/src/woody_client_thrift_v2.erl index fa5cd35..383f933 100644 --- a/src/woody_client_thrift_v2.erl +++ b/src/woody_client_thrift_v2.erl @@ -154,11 +154,16 @@ send_call(Buffer, #{url := Url} = Opts, WoodyState) -> % MSPF-416: We resolve url host to an ip here to prevent % reusing keep-alive connections to dead hosts case woody_resolver:resolve_url(Url, WoodyState, ResolverOpts) of - {ok, {OldUrl, NewUrl}} -> + {ok, {OldUrl, NewUrl}, ConnectOpts} -> Headers = add_host_header(OldUrl, make_woody_headers(Context)), TransportOpts1 = set_defaults(TransportOpts), TransportOpts2 = set_timeouts(TransportOpts1, Context), - Result = hackney:request(post, NewUrl, Headers, Buffer, maps:to_list(TransportOpts2)), + % NOTE + % This is to ensure hackney won't try to resolve original hostname again in + % `set_tls_overrides/2`. + TransportOpts3 = append_connect_opts(TransportOpts2, ConnectOpts), + TransportOpts4 = set_tls_overrides(TransportOpts3, OldUrl), + Result = hackney:request(post, NewUrl, Headers, Buffer, maps:to_list(TransportOpts4)), handle_response(Result, WoodyState); {error, Reason} -> Error = {error, {resolve_failed, Reason}}, @@ -203,6 +208,19 @@ calc_timeouts(Timeout) -> T end. +append_connect_opts(Options, ConnectOpts) -> + Options#{connect_options => maps:get(connect_options, Options, []) ++ ConnectOpts}. + +set_tls_overrides(Options = #{ssl_options := _}, _OrigUrl) -> + Options; +set_tls_overrides(Options, #hackney_url{scheme = https, host = OrigHost}) -> + % NOTE + % Beware, we're abusing implementation details here. + SslOpts = hackney_connection:connect_options(hackney_ssl, OrigHost, maps:to_list(Options)), + Options#{ssl_options => SslOpts}; +set_tls_overrides(Options, #hackney_url{scheme = _}) -> + Options. + -spec make_woody_headers(woody_context:ctx()) -> http_headers(). make_woody_headers(Context) -> add_optional_headers(Context, [ diff --git a/src/woody_resolver.erl b/src/woody_resolver.erl index 96a655f..6468b82 100644 --- a/src/woody_resolver.erl +++ b/src/woody_resolver.erl @@ -36,13 +36,13 @@ %% -spec resolve_url(url(), woody_state:st()) -> - {ok, resolve_result()} + {ok, resolve_result(), [gen_tcp:connect_option()]} | {error, resolve_error()}. resolve_url(Url, WoodyState) -> resolve_url(Url, WoodyState, #{}). -spec resolve_url(url(), woody_state:st(), options()) -> - {ok, resolve_result()} + {ok, resolve_result(), [gen_tcp:connect_option()]} | {error, resolve_error()}. resolve_url(Url, WoodyState, Opts) when is_list(Url) -> resolve_url(unicode:characters_to_binary(Url), WoodyState, Opts); @@ -61,21 +61,28 @@ parse_url(Url) -> resolve_parsed_url(ParsedUrl = #hackney_url{}, WoodyState, Opts) -> case inet:parse_address(ParsedUrl#hackney_url.host) of % url host is already an ip, move on - {ok, _} -> {ok, {ParsedUrl, ParsedUrl}}; - {error, _} -> do_resolve_url(ParsedUrl, WoodyState, Opts) + {ok, IpAddr} -> + IpFamily = + case tuple_size(IpAddr) of + 4 -> inet; + 8 -> inet6 + end, + {ok, {ParsedUrl, ParsedUrl}, [IpFamily]}; + {error, _} -> + do_resolve_url(ParsedUrl, WoodyState, Opts) end. do_resolve_url(ParsedUrl, WoodyState, Opts) -> UnresolvedHost = ParsedUrl#hackney_url.host, _ = log_event(?EV_CLIENT_RESOLVE_BEGIN, WoodyState, #{host => UnresolvedHost}), case lookup_host(UnresolvedHost, Opts) of - {ok, {IpAddr, _} = AddrInfo} -> + {ok, {IpAddr, IpFamily} = AddrInfo} -> _ = log_event(?EV_CLIENT_RESOLVE_RESULT, WoodyState, #{ status => ok, host => UnresolvedHost, address => inet:ntoa(IpAddr) }), - {ok, {ParsedUrl, replace_host(ParsedUrl, AddrInfo)}}; + {ok, {ParsedUrl, replace_host(ParsedUrl, AddrInfo)}, [IpFamily]}; {error, Reason} -> _ = log_event(?EV_CLIENT_RESOLVE_RESULT, WoodyState, #{ status => error, diff --git a/test/woody_ssl_SUITE.erl b/test/woody_ssl_SUITE.erl index 1ea9336..2deb3f4 100644 --- a/test/woody_ssl_SUITE.erl +++ b/test/woody_ssl_SUITE.erl @@ -23,7 +23,8 @@ -export([ client_wo_cert_test/1, valid_client_cert_test/1, - invalid_client_cert_test/1 + invalid_client_cert_test/1, + valid_cert_external_server/1 ]). %% woody_server_thrift_handler callback @@ -58,7 +59,8 @@ all() -> [ {group, 'tlsv1.3'}, {group, 'tlsv1.2'}, - {group, 'tlsv1.1'} + {group, 'tlsv1.1'}, + valid_cert_external_server ]. -spec groups() -> [{group_name(), list(), [case_name()]}]. @@ -141,6 +143,30 @@ invalid_client_cert_test(C) -> {match, _} = re:run(Reason, <<"^{tls_alert,[\"\{]unknown[ _]ca.*$">>, []) end. +-spec valid_cert_external_server(config()) -> _. +valid_cert_external_server(_C) -> + % NOTE + % This testcase needs internet connectivity. + % This testcase relies on correct TLS setup and implementation on example.org servers. + Url = <<"https://example.org/just-testing-tls-nevermind">>, + Context = woody_context:new(to_binary(?FUNCTION_NAME)), + Service = {?THRIFT_DEFS, 'Weapons'}, + Options = #{ + url => Url, + event_handler => {woody_ct_event_h, {client, external}} + }, + try + _ = woody_client:call({Service, get_weapon, {<<"Example">>, <<>>}}, Options, Context), + error(unreachable) + catch + error:{woody_error, { + external, + result_unexpected, + <<"This server does not implement the woody protocol", _/binary>> + }} -> + ok + end. + -spec client_ssl_opts(atom()) -> [ssl:tls_client_option()]. client_ssl_opts('tlsv1.3') -> % NOTE diff --git a/test/woody_tests_SUITE.erl b/test/woody_tests_SUITE.erl index a081c38..a8e0f06 100644 --- a/test/woody_tests_SUITE.erl +++ b/test/woody_tests_SUITE.erl @@ -945,30 +945,30 @@ calls_with_cache(_) -> woody_resolver_inet(C) -> WoodyState = woody_state:new(client, woody_context:new(), ?MODULE), ok = inet_db:set_inet6(false), - {ok, ?RESPONSE(http, <<"127.0.0.1">>, <<"127.0.0.1">>, <<"/test">>)} = + {ok, ?RESPONSE(http, <<"127.0.0.1">>, <<"127.0.0.1">>, <<"/test">>), [inet]} = woody_resolver:resolve_url(<<"http://127.0.0.1/test">>, WoodyState), - {ok, ?RESPONSE(http, <<"localhost">>, <<"127.0.0.1:80">>, <<"/test">>)} = + {ok, ?RESPONSE(http, <<"localhost">>, <<"127.0.0.1:80">>, <<"/test">>), [inet]} = woody_resolver:resolve_url(<<"http://localhost/test">>, WoodyState), - {ok, ?RESPONSE(http, <<"localhost">>, <<"127.0.0.1:80">>, <<"/test?q=a">>)} = + {ok, ?RESPONSE(http, <<"localhost">>, <<"127.0.0.1:80">>, <<"/test?q=a">>), [inet]} = woody_resolver:resolve_url("http://localhost/test?q=a", WoodyState), - {ok, ?RESPONSE(https, <<"localhost:8080">>, <<"127.0.0.1:8080">>, <<"/test">>)} = + {ok, ?RESPONSE(https, <<"localhost:8080">>, <<"127.0.0.1:8080">>, <<"/test">>), [inet]} = woody_resolver:resolve_url(<<"https://localhost:8080/test">>, WoodyState), - {ok, ?RESPONSE(https, <<"localhost">>, <<"127.0.0.1:443">>, <<>>)} = + {ok, ?RESPONSE(https, <<"localhost">>, <<"127.0.0.1:443">>, <<>>), [inet]} = woody_resolver:resolve_url(<<"https://localhost">>, WoodyState), ok = inet_db:set_inet6(?config(env_inet6, C)). woody_resolver_inet6(C) -> WoodyState = woody_state:new(client, woody_context:new(), ?MODULE), ok = inet_db:set_inet6(true), - {ok, ?RESPONSE(http, <<"[::1]">>, <<"[::1]">>, <<"/test">>)} = + {ok, ?RESPONSE(http, <<"[::1]">>, <<"[::1]">>, <<"/test">>), [inet6]} = woody_resolver:resolve_url(<<"http://[::1]/test">>, WoodyState), - {ok, ?RESPONSE(http, <<"localhost">>, <<"[::1]:80">>, <<"/test">>)} = + {ok, ?RESPONSE(http, <<"localhost">>, <<"[::1]:80">>, <<"/test">>), [inet6]} = woody_resolver:resolve_url(<<"http://localhost/test">>, WoodyState), - {ok, ?RESPONSE(http, <<"localhost">>, <<"[::1]:80">>, <<"/test?q=a">>)} = + {ok, ?RESPONSE(http, <<"localhost">>, <<"[::1]:80">>, <<"/test?q=a">>), [inet6]} = woody_resolver:resolve_url("http://localhost/test?q=a", WoodyState), - {ok, ?RESPONSE(https, <<"localhost:8080">>, <<"[::1]:8080">>, <<"/test">>)} = + {ok, ?RESPONSE(https, <<"localhost:8080">>, <<"[::1]:8080">>, <<"/test">>), [inet6]} = woody_resolver:resolve_url(<<"https://localhost:8080/test">>, WoodyState), - {ok, ?RESPONSE(https, <<"localhost">>, <<"[::1]:443">>, <<>>)} = + {ok, ?RESPONSE(https, <<"localhost">>, <<"[::1]:443">>, <<>>), [inet6]} = woody_resolver:resolve_url(<<"https://localhost">>, WoodyState), ok = inet_db:set_inet6(?config(env_inet6, C)). From bc6ca1aa363dbb0606aabf3571cc9d5113a20bd9 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 24 Nov 2022 03:58:53 +0700 Subject: [PATCH 03/10] chore(deps): update valitydev/erlang-workflows action to v1.0.10 (#25) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- .github/workflows/erlang-checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/erlang-checks.yml b/.github/workflows/erlang-checks.yml index d701276..93e14a4 100644 --- a/.github/workflows/erlang-checks.yml +++ b/.github/workflows/erlang-checks.yml @@ -30,7 +30,7 @@ jobs: run: name: Run checks needs: setup - uses: valitydev/erlang-workflows/.github/workflows/erlang-parallel-build.yml@v1.0.9 + uses: valitydev/erlang-workflows/.github/workflows/erlang-parallel-build.yml@v1.0.10 with: otp-version: ${{ needs.setup.outputs.otp-version }} rebar-version: ${{ needs.setup.outputs.rebar-version }} From 2e8126d939b9ce41d2558b9b9e22e4a17fae5b01 Mon Sep 17 00:00:00 2001 From: Vality Bot Date: Thu, 24 Nov 2022 04:03:56 +0700 Subject: [PATCH 04/10] =?UTF-8?q?=F0=9F=94=84=20Synced=20file(s)=20with=20?= =?UTF-8?q?valitydev/configurations=20(#5)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * πŸ”„ Created local '.github/workflows/basic-linters.yml' from remote 'workflows/base/basic-linters.yml' * πŸ”„ Synced local 'LICENSE' with remote 'LICENSE' * πŸ”„ Created local '.github/settings.yml' from remote '.github/settings.yml' * πŸ”„ Synced local '.github/settings.yml' with remote 'erlang/.github/settings.yml' Co-authored-by: Pavel Popov --- .github/settings.yml | 46 +++++++++++++++++++++++++++++ .github/workflows/basic-linters.yml | 10 +++++++ LICENSE | 2 +- 3 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 .github/settings.yml create mode 100644 .github/workflows/basic-linters.yml diff --git a/.github/settings.yml b/.github/settings.yml new file mode 100644 index 0000000..2b2979e --- /dev/null +++ b/.github/settings.yml @@ -0,0 +1,46 @@ +# These settings are synced to GitHub by https://probot.github.io/apps/settings/ +_extends: .github + +teams: + - name: devs + permission: push + - name: bots + permission: push + - name: maintainers + permission: push + - name: erlang + permission: maintain + +branches: + - name: "master" + # https://developer.github.com/v3/repos/branches/#update-branch-protection + # Branch Protection settings. Set to null to disable + protection: + required_pull_request_reviews: + required_approving_review_count: 1 + dismiss_stale_reviews: false + require_code_owner_reviews: true + dismissal_restrictions: {} + required_status_checks: + strict: true + checks: + - context: "Load .env" + app_id: 15368 + - context: "Run checks / Build" + app_id: 15368 + - context: "Run checks / Check" + app_id: 15368 + - context: "Run checks / Dialyze" + app_id: 15368 + - context: "Run checks / Test" + app_id: 15368 + - context: "codecov/patch" + app_id: 254 + - context: "codecov/project" + app_id: 254 + enforce_admins: false + required_conversation_resolution: false + required_linear_history: true + restrictions: null + allow_force_pushes: false + allow_deletions: false diff --git a/.github/workflows/basic-linters.yml b/.github/workflows/basic-linters.yml new file mode 100644 index 0000000..6114f14 --- /dev/null +++ b/.github/workflows/basic-linters.yml @@ -0,0 +1,10 @@ +name: Vality basic linters + +on: + pull_request: + branches: + - "*" + +jobs: + lint: + uses: valitydev/base-workflows/.github/workflows/basic-linters.yml@v1 diff --git a/LICENSE b/LICENSE index 2bb9ad2..d9a10c0 100644 --- a/LICENSE +++ b/LICENSE @@ -173,4 +173,4 @@ incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. - END OF TERMS AND CONDITIONS \ No newline at end of file + END OF TERMS AND CONDITIONS From 511e422a594e4525c4bac00c80f709dba0ccfa49 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 24 Nov 2022 04:07:13 +0700 Subject: [PATCH 05/10] chore(deps): update actions/checkout action to v3 (#7) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: Pavel Popov --- .github/workflows/erlang-checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/erlang-checks.yml b/.github/workflows/erlang-checks.yml index 93e14a4..300ca08 100644 --- a/.github/workflows/erlang-checks.yml +++ b/.github/workflows/erlang-checks.yml @@ -18,7 +18,7 @@ jobs: thrift-version: ${{ steps.thrift-version.outputs.version }} steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - run: grep -v '^#' .env >> $GITHUB_ENV - id: otp-version run: echo "::set-output name=version::$OTP_VERSION" From 5d46291a6bfcee0bae2a9346a7d927603a909249 Mon Sep 17 00:00:00 2001 From: Aleksey Kashapov Date: Wed, 4 Oct 2023 14:44:43 +0300 Subject: [PATCH 06/10] TD-686: Adopts opentelemetry API (#27) * Adds otel propagation to legacy thrift transport implementations * Complete otel propagation in pass-through testcase * Implements woody event handler for otel spans --- rebar.config | 8 +- rebar.lock | 8 ++ src/woody.app.src | 3 +- src/woody_client_thrift_http_transport.erl | 2 +- src/woody_client_thrift_v2.erl | 5 +- src/woody_event_handler_otel.erl | 100 +++++++++++++++++++++ src/woody_server_thrift_http_handler.erl | 1 + src/woody_server_thrift_v2.erl | 1 + src/woody_util.erl | 44 +++++++++ test/woody_ct_otel_collector.erl | 97 ++++++++++++++++++++ test/woody_tests_SUITE.erl | 71 ++++++++++++++- 11 files changed, 330 insertions(+), 10 deletions(-) create mode 100644 src/woody_event_handler_otel.erl create mode 100644 test/woody_ct_otel_collector.erl diff --git a/rebar.config b/rebar.config index 41578e7..749cd4b 100644 --- a/rebar.config +++ b/rebar.config @@ -32,7 +32,8 @@ {cache, "2.3.3"}, {thrift, {git, "https://github.com/valitydev/thrift_erlang.git", {branch, "master"}}}, {snowflake, {git, "https://github.com/valitydev/snowflake.git", {branch, "master"}}}, - {genlib, {git, "https://github.com/valitydev/genlib.git", {branch, "master"}}} + {genlib, {git, "https://github.com/valitydev/genlib.git", {branch, "master"}}}, + {opentelemetry_api, "1.2.1"} ]}. %% XRef checks @@ -87,10 +88,11 @@ {git, "https://github.com/valitydev/damsel.git", {ref, "3fa6f31db54b2ae781b27898ab4daf56bb36eb36"}}}, {mg_proto, {git, "https://github.com/valitydev/machinegun-proto.git", - {ref, "ebae56fe2b3e79e4eb34afc8cb55c9012ae989f8"}}} + {ref, "ebae56fe2b3e79e4eb34afc8cb55c9012ae989f8"}}}, + {opentelemetry, "1.3.0"} ]}, {dialyzer, [ - {plt_extra_apps, [how_are_you, eunit, proper, common_test, cth_readable]} + {plt_extra_apps, [how_are_you, eunit, proper, common_test, cth_readable, opentelemetry]} ]} ]} ]}. diff --git a/rebar.lock b/rebar.lock index 255b17c..103c490 100644 --- a/rebar.lock +++ b/rebar.lock @@ -12,6 +12,10 @@ {<<"idna">>,{pkg,<<"idna">>,<<"6.1.1">>},1}, {<<"metrics">>,{pkg,<<"metrics">>,<<"1.0.1">>},1}, {<<"mimerl">>,{pkg,<<"mimerl">>,<<"1.2.0">>},1}, + {<<"opentelemetry_api">>,{pkg,<<"opentelemetry_api">>,<<"1.2.1">>},0}, + {<<"opentelemetry_semantic_conventions">>, + {pkg,<<"opentelemetry_semantic_conventions">>,<<"0.2.0">>}, + 1}, {<<"parse_trans">>,{pkg,<<"parse_trans">>,<<"3.3.1">>},1}, {<<"ranch">>,{pkg,<<"ranch">>,<<"1.8.0">>},1}, {<<"snowflake">>, @@ -35,6 +39,8 @@ {<<"idna">>, <<"8A63070E9F7D0C62EB9D9FCB360A7DE382448200FBBD1B106CC96D3D8099DF8D">>}, {<<"metrics">>, <<"25F094DEA2CDA98213CECC3AEFF09E940299D950904393B2A29D191C346A8486">>}, {<<"mimerl">>, <<"67E2D3F571088D5CFD3E550C383094B47159F3EEE8FFA08E64106CDF5E981BE3">>}, + {<<"opentelemetry_api">>, <<"7B69ED4F40025C005DE0B74FCE8C0549625D59CB4DF12D15C32FE6DC5076FF42">>}, + {<<"opentelemetry_semantic_conventions">>, <<"B67FE459C2938FCAB341CB0951C44860C62347C005ACE1B50F8402576F241435">>}, {<<"parse_trans">>, <<"16328AB840CC09919BD10DAB29E431DA3AF9E9E7E7E6F0089DD5A2D2820011D8">>}, {<<"ranch">>, <<"8C7A100A139FD57F17327B6413E4167AC559FBC04CA7448E9BE9057311597A1D">>}, {<<"ssl_verify_fun">>, <<"CF344F5692C82D2CD7554F5EC8FD961548D4FD09E7D22F5B62482E5AEAEBD4B0">>}, @@ -49,6 +55,8 @@ {<<"idna">>, <<"92376EB7894412ED19AC475E4A86F7B413C1B9FBB5BD16DCCD57934157944CEA">>}, {<<"metrics">>, <<"69B09ADDDC4F74A40716AE54D140F93BEB0FB8978D8636EADED0C31B6F099F16">>}, {<<"mimerl">>, <<"F278585650AA581986264638EBF698F8BB19DF297F66AD91B18910DFC6E19323">>}, + {<<"opentelemetry_api">>, <<"6D7A27B7CAD2AD69A09CABF6670514CAFCEC717C8441BEB5C96322BAC3D05350">>}, + {<<"opentelemetry_semantic_conventions">>, <<"D61FA1F5639EE8668D74B527E6806E0503EFC55A42DB7B5F39939D84C07D6895">>}, {<<"parse_trans">>, <<"07CD9577885F56362D414E8C4C4E6BDF10D43A8767ABB92D24CBE8B24C54888B">>}, {<<"ranch">>, <<"49FBCFD3682FAB1F5D109351B61257676DA1A2FDBE295904176D5E521A2DDFE5">>}, {<<"ssl_verify_fun">>, <<"BDB0D2471F453C88FF3908E7686F86F9BE327D065CC1EC16FA4540197EA04680">>}, diff --git a/src/woody.app.src b/src/woody.app.src index 4dcb2f9..a24ac6f 100644 --- a/src/woody.app.src +++ b/src/woody.app.src @@ -11,7 +11,8 @@ hackney, thrift, gproc, - cache + cache, + opentelemetry_api ]}, {env, [ {enable_debug, false} diff --git a/src/woody_client_thrift_http_transport.erl b/src/woody_client_thrift_http_transport.erl index 4ba8512..404df9a 100644 --- a/src/woody_client_thrift_http_transport.erl +++ b/src/woody_client_thrift_http_transport.erl @@ -128,7 +128,7 @@ send(Url, Body, Options, ResOpts, WoodyState) -> Headers = add_host_header(OldUrl, make_woody_headers(Context)), Options1 = set_defaults(Options), Options2 = set_timeouts(Options1, Context), - HeaderList = maps:to_list(Headers), + HeaderList = otel_propagator_text_map:inject(maps:to_list(Headers)), Result = hackney:request(post, NewUrl, HeaderList, Body, maps:to_list(Options2)), transform_request_results(Result); {error, Reason} -> diff --git a/src/woody_client_thrift_v2.erl b/src/woody_client_thrift_v2.erl index 383f933..dfe4ba8 100644 --- a/src/woody_client_thrift_v2.erl +++ b/src/woody_client_thrift_v2.erl @@ -155,7 +155,8 @@ send_call(Buffer, #{url := Url} = Opts, WoodyState) -> % reusing keep-alive connections to dead hosts case woody_resolver:resolve_url(Url, WoodyState, ResolverOpts) of {ok, {OldUrl, NewUrl}, ConnectOpts} -> - Headers = add_host_header(OldUrl, make_woody_headers(Context)), + Headers0 = add_host_header(OldUrl, make_woody_headers(Context)), + Headers1 = otel_propagator_text_map:inject(Headers0), TransportOpts1 = set_defaults(TransportOpts), TransportOpts2 = set_timeouts(TransportOpts1, Context), % NOTE @@ -163,7 +164,7 @@ send_call(Buffer, #{url := Url} = Opts, WoodyState) -> % `set_tls_overrides/2`. TransportOpts3 = append_connect_opts(TransportOpts2, ConnectOpts), TransportOpts4 = set_tls_overrides(TransportOpts3, OldUrl), - Result = hackney:request(post, NewUrl, Headers, Buffer, maps:to_list(TransportOpts4)), + Result = hackney:request(post, NewUrl, Headers1, Buffer, maps:to_list(TransportOpts4)), handle_response(Result, WoodyState); {error, Reason} -> Error = {error, {resolve_failed, Reason}}, diff --git a/src/woody_event_handler_otel.erl b/src/woody_event_handler_otel.erl new file mode 100644 index 0000000..4307dac --- /dev/null +++ b/src/woody_event_handler_otel.erl @@ -0,0 +1,100 @@ +-module(woody_event_handler_otel). + +-include_lib("opentelemetry_api/include/opentelemetry.hrl"). +-include("woody_defs.hrl"). + +-behaviour(woody_event_handler). + +-export([handle_event/4]). + +-spec handle_event(Event, RpcId, Meta, Opts) -> ok when + Event :: woody_event_handler:event(), + RpcId :: woody:rpc_id() | undefined, + Meta :: woody_event_handler:event_meta(), + Opts :: woody:options(). + +-define(IS_SPAN_START(Event), Event =:= ?EV_CALL_SERVICE orelse Event =:= ?EV_INVOKE_SERVICE_HANDLER). +-define(IS_SPAN_END(Event), Event =:= ?EV_SERVICE_RESULT orelse Event =:= ?EV_SERVICE_HANDLER_RESULT). +-define(IS_CLIENT_INTERNAL(Event), + Event =:= ?EV_CLIENT_CACHE_HIT orelse + Event =:= ?EV_CLIENT_CACHE_MISS orelse + Event =:= ?EV_CLIENT_CACHE_UPDATE orelse + Event =:= ?EV_CLIENT_SEND orelse + Event =:= ?EV_CLIENT_RECEIVE +). + +%% Client events +handle_event(Event, RpcID, _Meta = #{url := Url}, _Opts) when ?IS_CLIENT_INTERNAL(Event) -> + with_span(otel_ctx:get_current(), mk_ref(RpcID), fun(SpanCtx) -> + _ = otel_span:add_event(SpanCtx, atom_to_binary(Event), #{url => Url}) + end); +%% Internal error handling +handle_event(?EV_INTERNAL_ERROR, RpcID, Meta = #{error := Error, class := Class, reason := Reason}, _Opts) -> + Stacktrace = maps:get(stack, Meta, []), + Details = io_lib:format("~ts: ~ts", [Error, Reason]), + with_span(otel_ctx:get_current(), mk_ref(RpcID), fun(SpanCtx) -> + _ = otel_span:record_exception(SpanCtx, genlib:define(Class, error), Details, Stacktrace, #{}), + otel_maybe_cleanup(Meta, SpanCtx) + end); +%% Registers span starts/ends for woody client calls and woody server function invocations. +handle_event(Event, RpcID, Meta, _Opts) when ?IS_SPAN_START(Event) -> + Tracer = opentelemetry:get_application_tracer(?MODULE), + span_start(Tracer, otel_ctx:get_current(), mk_ref(RpcID), mk_name(Meta), mk_opts(Event)); +handle_event(Event, RpcID, Meta, _Opts) when ?IS_SPAN_END(Event) -> + span_end(otel_ctx:get_current(), mk_ref(RpcID), fun(SpanCtx) -> + otel_maybe_erroneous_result(SpanCtx, Meta) + end); +handle_event(_Event, _RpcID, _Meta, _Opts) -> + ok. + +%% + +span_start(Tracer, Ctx, Key, SpanName, Opts) -> + SpanCtx = otel_tracer:start_span(Ctx, Tracer, SpanName, Opts), + Ctx1 = woody_util:span_stack_put(Key, SpanCtx, Ctx), + Ctx2 = otel_tracer:set_current_span(Ctx1, SpanCtx), + _ = otel_ctx:attach(Ctx2), + ok. + +span_end(Ctx, Key, OnBeforeEnd) -> + case woody_util:span_stack_pop(Key, Ctx) of + {error, notfound} -> + ok; + {ok, SpanCtx, ParentSpanCtx, Ctx1} -> + SpanCtx1 = OnBeforeEnd(SpanCtx), + _ = otel_span:end_span(SpanCtx1, undefined), + Ctx2 = otel_tracer:set_current_span(Ctx1, ParentSpanCtx), + _ = otel_ctx:attach(Ctx2), + ok + end. + +with_span(Ctx, Key, F) -> + SpanCtx = woody_util:span_stack_get(Key, Ctx, otel_tracer:current_span_ctx(Ctx)), + _ = F(SpanCtx), + ok. + +otel_maybe_cleanup(#{final := true}, SpanCtx) -> + _ = otel_span:end_span(SpanCtx, undefined), + otel_ctx:clear(), + ok; +otel_maybe_cleanup(_Meta, _SpanCtx) -> + ok. + +otel_maybe_erroneous_result(SpanCtx, Meta = #{status := error, result := Reason}) -> + Class = maps:get(except_class, Meta, error), + Stacktrace = maps:get(stack, Meta, []), + _ = otel_span:record_exception(SpanCtx, Class, Reason, Stacktrace, #{}), + SpanCtx; +otel_maybe_erroneous_result(SpanCtx, _Meta) -> + SpanCtx. + +mk_opts(?EV_CALL_SERVICE) -> + #{kind => ?SPAN_KIND_CLIENT}; +mk_opts(?EV_INVOKE_SERVICE_HANDLER) -> + #{kind => ?SPAN_KIND_SERVER}. + +mk_ref(#{span_id := WoodySpanId}) -> + WoodySpanId. + +mk_name(#{role := Role, service := Service, function := Function}) -> + woody_util:to_binary([Role, " ", Service, ":", Function]). diff --git a/src/woody_server_thrift_http_handler.erl b/src/woody_server_thrift_http_handler.erl index 824696c..897d9c7 100644 --- a/src/woody_server_thrift_http_handler.erl +++ b/src/woody_server_thrift_http_handler.erl @@ -393,6 +393,7 @@ check_deadline(Deadline, Req, State = #{url := Url, woody_state := WoodyState}) -spec check_metadata_headers(woody:http_headers(), cowboy_req:req(), state()) -> check_result(). check_metadata_headers(Headers, Req, State = #{woody_state := WoodyState, regexp_meta := ReMeta}) -> + _OtelCtx = otel_propagator_text_map:extract(maps:to_list(Headers)), WoodyState1 = set_metadata(find_metadata(Headers, ReMeta), WoodyState), {ok, Req, update_woody_state(State, WoodyState1, Req)}. diff --git a/src/woody_server_thrift_v2.erl b/src/woody_server_thrift_v2.erl index a9c6e3c..2caaf61 100644 --- a/src/woody_server_thrift_v2.erl +++ b/src/woody_server_thrift_v2.erl @@ -407,6 +407,7 @@ check_deadline(Deadline, Req, State = #{url := Url, woody_state := WoodyState}) -spec check_metadata_headers(woody:http_headers(), cowboy_req:req(), state()) -> check_result(). check_metadata_headers(Headers, Req, State = #{woody_state := WoodyState, regexp_meta := ReMeta}) -> + _OtelCtx = otel_propagator_text_map:extract(maps:to_list(Headers)), WoodyState1 = set_metadata(find_metadata(Headers, ReMeta), WoodyState), {ok, Req, update_woody_state(State, WoodyState1, Req)}. diff --git a/src/woody_util.erl b/src/woody_util.erl index 0172fd3..7c7e5fc 100644 --- a/src/woody_util.erl +++ b/src/woody_util.erl @@ -11,6 +11,12 @@ -define(DEFAULT_HANDLER_OPTS, undefined). +%% + +-export([span_stack_put/3]). +-export([span_stack_get/3]). +-export([span_stack_pop/2]). + %% %% Internal API %% @@ -51,3 +57,41 @@ get_rpc_type({Module, Service}, Function) -> -spec get_rpc_reply_type(_ThriftReplyType) -> woody:rpc_type(). get_rpc_reply_type(oneway_void) -> cast; get_rpc_reply_type(_) -> call. + +%% OTEL context span helpers +%% NOTE Those helpers are designed specifically to manage stacking spans during +%% woody client (or server) calls _inside_ one single process context. +%% Thus, use of process dictionary via `otel_ctx'. + +-define(OTEL_SPANS_STACK, 'spans_ctx_stack'). + +-type span_key() :: atom() | binary() | string(). +-type maybe_span_ctx() :: opentelemetry:span_ctx() | undefined. + +-spec span_stack_put(span_key(), opentelemetry:span_ctx(), otel_ctx:t()) -> otel_ctx:t(). +span_stack_put(Key, SpanCtx, Context) -> + Stack = otel_ctx:get_value(Context, ?OTEL_SPANS_STACK, []), + Entry = {Key, SpanCtx, otel_tracer:current_span_ctx(Context)}, + otel_ctx:set_value(Context, ?OTEL_SPANS_STACK, [Entry | Stack]). + +-spec span_stack_get(span_key(), otel_ctx:t(), maybe_span_ctx()) -> maybe_span_ctx(). +span_stack_get(Key, Context, Default) -> + Stack = otel_ctx:get_value(Context, ?OTEL_SPANS_STACK, []), + case lists:keyfind(Key, 1, Stack) of + false -> + Default; + {_Key, SpanCtx, _ParentSpanCtx} -> + SpanCtx + end. + +-spec span_stack_pop(span_key(), otel_ctx:t()) -> + {ok, opentelemetry:span_ctx(), maybe_span_ctx(), otel_ctx:t()} | {error, notfound}. +span_stack_pop(Key, Context) -> + Stack = otel_ctx:get_value(Context, ?OTEL_SPANS_STACK, []), + case lists:keytake(Key, 1, Stack) of + false -> + {error, notfound}; + {value, {_Key, SpanCtx, ParentSpanCtx}, Stack1} -> + Context1 = otel_ctx:set_value(Context, ?OTEL_SPANS_STACK, Stack1), + {ok, SpanCtx, ParentSpanCtx, Context1} + end. diff --git a/test/woody_ct_otel_collector.erl b/test/woody_ct_otel_collector.erl new file mode 100644 index 0000000..9617cb4 --- /dev/null +++ b/test/woody_ct_otel_collector.erl @@ -0,0 +1,97 @@ +-module(woody_ct_otel_collector). + +-behaviour(gen_server). + +-export([ + start_link/0, + get_trace/1, + get_traces/0 +]). + +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2 +]). + +-include_lib("opentelemetry/include/otel_span.hrl"). + +-type span() :: #span{}. + +-type span_node() :: #{span := span(), children := [span_node()]}. + +-type trace() :: #{ + id := opentelemetry:trace_id(), + node := span_node() +}. + +% + +-spec start_link() -> {ok, pid()}. +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +-spec get_trace(opentelemetry:trace_id()) -> {ok, trace()} | {error, notfound}. +get_trace(TraceId) -> + gen_server:call(?MODULE, {trace, TraceId}). + +-spec get_traces() -> {ok, [trace()]}. +get_traces() -> + gen_server:call(?MODULE, traces). + +-spec init(_) -> {ok, _}. +init(_Opts) -> + {ok, #{}}. + +-spec handle_info(_, T) -> {noreply, T}. +handle_info({span, Span}, State0) -> + State1 = maps:update_with(Span#span.trace_id, fun(V) -> [Span | V] end, [Span], State0), + {noreply, State1}; +handle_info(_Msg, State) -> + {noreply, State}. + +-spec handle_call(_, _, T) -> {noreply, T}. +handle_call(traces, _From, State) -> + Result = maps:map(fun(TraceId, Spans) -> build_trace(TraceId, Spans) end, State), + {reply, maps:values(Result), State}; +handle_call({trace, TraceId}, _From, State) -> + Result = + case maps:get(TraceId, State, undefined) of + undefined -> {error, notfound}; + Spans -> {ok, build_trace(TraceId, Spans)} + end, + {reply, Result, State}; +handle_call(_Msg, _From, State) -> + {noreply, State}. + +-spec handle_cast(_, T) -> {noreply, T}. +handle_cast(_Msg, State) -> + {noreply, State}. + +% + +build_trace(TraceId, Spans0) -> + Spans1 = lists:sort(fun(#span{start_time = A}, #span{start_time = B}) -> A < B end, Spans0), + [RootSpan | _] = lists:filter( + fun + (#span{parent_span_id = undefined}) -> true; + (_) -> false + end, + Spans1 + ), + #{ + id => TraceId, + node => lists:foldl(fun(Span, RootNode) -> update_node(Span, RootNode) end, new_span_node(RootSpan), Spans1) + }. + +update_node( + Span = #span{parent_span_id = ParentId}, + SpanNode = #{span := #span{span_id = ParentId}, children := Children} +) -> + SpanNode#{children => [new_span_node(Span) | Children]}; +update_node(Span, SpanNode = #{children := Children}) -> + SpanNode#{children => lists:map(fun(Child) -> update_node(Span, Child) end, Children)}. + +new_span_node(Span) -> + #{span => Span, children => []}. diff --git a/test/woody_tests_SUITE.erl b/test/woody_tests_SUITE.erl index a8e0f06..6ffd9a7 100644 --- a/test/woody_tests_SUITE.erl +++ b/test/woody_tests_SUITE.erl @@ -3,6 +3,8 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("stdlib/include/assert.hrl"). -include_lib("hackney/include/hackney_lib.hrl"). +-include_lib("opentelemetry_api/include/opentelemetry.hrl"). +-include_lib("opentelemetry/include/otel_span.hrl"). -include("woody_test_thrift.hrl"). -include("woody_defs.hrl"). @@ -111,6 +113,11 @@ -define(WEAPON_STACK_OVERFLOW, pos_out_of_boundaries). -define(BAD_POWERUP_REPLY, powerup_unknown). +-define(OTEL_SPAN(Name, Children), #{span := #span{name = Name}, children := Children}). +-define(OTEL_SPAN(Name, SpanAttributes, Children), #{ + span := #span{name = Name, attributes = SpanAttributes}, children := Children +}). + -type config() :: [{atom(), any()}]. -type case_name() :: atom(). -type group_name() :: atom(). @@ -293,7 +300,8 @@ init_per_suite(C) -> ]), {ok, Apps} = application:ensure_all_started(woody), {ok, HayApps} = application:ensure_all_started(how_are_you), - [{apps, HayApps ++ Apps} | C]. + {ok, OtelApps} = setup_opentelemetry(), + [{apps, OtelApps ++ HayApps ++ Apps} | C]. end_per_suite(C) -> % unset so it won't report metrics next suite @@ -352,6 +360,20 @@ init_per_group(Name, Config) -> end_per_group(_Name, _Config) -> ok. +setup_opentelemetry() -> + ok = application:set_env([ + {opentelemetry, [ + {text_map_propagators, [ + baggage, + trace_context + ]}, + {span_processor, simple}, + {traces_exporter, {otel_exporter_pid, woody_ct_otel_collector}} + ]} + ]), + ok = application:start(opentelemetry), + {ok, [opentelemetry]}. + start_tc_sup() -> supervisor:start_link({local, ?MODULE}, ?MODULE, []). @@ -673,8 +695,37 @@ call_pass_thru_ok_test(C) -> Armor = <<"AntiGrav Boots">>, Context = make_context(<<"call_pass_thru_ok">>), Expect = {ok, genlib_map:get(Armor, powerups())}, + Baggage = #{<<"service">> => <<"Powerups">>}, + SpanName = <<"test Powerups:proxy_get_powerup">>, + %% NOTE: `otel_baggage` uses `otel_ctx` that relies on __process dictionary__ + ok = otel_baggage:set(Baggage), + Tracer = opentelemetry:get_application_tracer(?MODULE), + %% NOTE: Starts span and sets its context as current span in __process dictionary__ + SpanCtx = otel_tracer:set_current_span(otel_tracer:start_span(Tracer, SpanName, #{})), Expect = call(Context, 'Powerups', proxy_get_powerup, {Armor, self_to_bin()}, C), - {ok, _} = receive_msg(Armor, Context). + %% NOTE: Ends span and puts context into __process dictionary__ + _ = otel_tracer:set_current_span(otel_span:end_span(SpanCtx, undefined)), + {ok, _} = receive_msg(Armor, Context), + {ok, #{node := Root}} = woody_ct_otel_collector:get_trace(SpanCtx#span_ctx.trace_id), + %% Prepare otel attributes to match against + ProxyAttrs = mk_otel_attributes(Baggage), + Attrs = mk_otel_attributes(Baggage#{<<"proxied">> => <<"true">>}), + ?assertMatch( + ?OTEL_SPAN(SpanName, [ + ?OTEL_SPAN(<<"client Powerups:proxy_get_powerup">>, [ + %% Upon invocation event baggage is expected to be put in span attributes + ?OTEL_SPAN(<<"server Powerups:proxy_get_powerup">>, ProxyAttrs, [ + %% New client starts call here + ?OTEL_SPAN(<<"client Powerups:get_powerup">>, [ + ?OTEL_SPAN(<<"server Powerups:get_powerup">>, Attrs, [ + %% Expect no child spans uphere + ]) + ]) + ]) + ]) + ]), + Root + ). call_pass_thru_except_test(C) -> Armor = <<"Shield Belt">>, @@ -985,7 +1036,7 @@ init(_) -> {ok, { {one_for_one, 1, 1}, - [] + [#{id => woody_ct_otel_collector, start => {woody_ct_otel_collector, start_link, []}}] }}. %% @@ -1015,6 +1066,8 @@ handle_function(ProxyGetPowerup, {Name, To}, Context, _Opts) when ProxyGetPowerup =:= proxy_get_powerup orelse ProxyGetPowerup =:= bad_proxy_get_powerup -> + %% NOTE: Merges baggage, requires #{binary() => binary()} + ok = otel_baggage:set(#{<<"proxied">> => <<"true">>}), % NOTE % Client may return `{exception, _}` tuple with some business level exception % here, yet handler expects us to `throw/1` them. This is expected here it @@ -1054,6 +1107,12 @@ handle_event( -> _ = handle_proxy_event(Event, Code, TraceId, ParentId), log_event(Event, RpcId, Meta); +%% Handle invocation +handle_event(Event = ?EV_INVOKE_SERVICE_HANDLER, RpcId, Meta, _) -> + log_event(Event, RpcId, Meta), + SpanCtx = otel_tracer:current_span_ctx(), + _ = otel_span:set_attributes(SpanCtx, maps:map(fun(_Key, {Value, _Metadata}) -> Value end, otel_baggage:get_all())), + ok; handle_event(Event, RpcId, Meta, _) -> log_event(Event, RpcId, Meta). @@ -1084,6 +1143,7 @@ handle_proxy_event(Event, Code, Descr) -> erlang:error(badarg, [Event, Code, Descr]). log_event(Event, RpcId, Meta) -> + ok = woody_event_handler_otel:handle_event(Event, RpcId, Meta, []), woody_ct_event_h:handle_event(Event, RpcId, Meta, []). %% @@ -1285,3 +1345,8 @@ handle_sleep(Context) -> BinTimer -> timer:sleep(binary_to_integer(BinTimer)) end. + +mk_otel_attributes(Attributes) -> + SpanAttributeCountLimit = otel_span_limits:attribute_count_limit(), + SpanAttributeValueLengthLimit = otel_span_limits:attribute_value_length_limit(), + otel_attributes:new(Attributes, SpanAttributeCountLimit, SpanAttributeValueLengthLimit). From 2cbe19998c073c545bf0f6b4b5211639c40b9925 Mon Sep 17 00:00:00 2001 From: Aleksey Kashapov Date: Tue, 19 Dec 2023 15:17:08 +0300 Subject: [PATCH 07/10] TD-788: Adds metrics collectors for `hackney` and `ranch` stats (#30) * Adds ranch active_connections metric * Adds collectors test checks * Adds explicit 'return' clause to label helper func * Rename collector modules for consistency * Adds event handler option for RPC events severity mapping * Adds events severity mapping description --- README.md | 107 ++++++++ rebar.config | 6 +- rebar.lock | 6 + src/woody_event_handler.erl | 282 ++++++++++++++++----- src/woody_event_handler_default.erl | 3 +- src/woody_hackney_prometheus_collector.erl | 83 ++++++ src/woody_ranch_prometheus_collector.erl | 83 ++++++ test/woody_tests_SUITE.erl | 86 +++++-- 8 files changed, 573 insertions(+), 83 deletions(-) create mode 100644 src/woody_hackney_prometheus_collector.erl create mode 100644 src/woody_ranch_prometheus_collector.erl diff --git a/README.md b/README.md index 318a622..2c7f8b4 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,80 @@ check_loan_limits(Limits, Context, N) -> ### Woody Event Handler Π˜Π½Ρ‚Π΅Ρ€Ρ„Π΅ΠΉΡ для получСния ΠΈ логирования событий RPC Π±ΠΈΠ±Π»ΠΈΠΎΡ‚Π΅ΠΊΠΈ. Π’Π°ΠΊΠΆΠ΅ содСрТит Π²ΡΠΏΠΎΠΌΠΎΠ³Π°Ρ‚Π΅Π»ΡŒΠ½Ρ‹Π΅ Ρ„ΡƒΠ½ΠΊΡ†ΠΈΠΈ для ΡƒΠ΄ΠΎΠ±Π½ΠΎΠ³ΠΎ форматирования событий. ΠŸΡ€ΠΈΠΌΠ΅Ρ€ Ρ€Π΅Π°Π»ΠΈΠ·Π°Ρ†ΠΈΠΈ _event handler_'Π° - [woody_event_handler_default.erl](src/woody_event_handler_default.erl). +Π§Π΅Ρ€Π΅Π· ΠΎΠΏΡ†ΠΈΠΈ ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Ρ‡ΠΈΠΊΠ° ΠΌΠΎΠΆΠ½ΠΎ ΡΠΎΠΎΠ±Ρ‰ΠΈΡ‚ΡŒ ΠΏΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€Ρ‹ соотвСтствия событий RPC для уровня логирования: + +``` erlang +woody_event_handler_default:handle_event(Event, RpcId, Meta, #{ + formatter_opts => ..., + events_severity => #{ + ['call service'] => debug, + ... + } +}). +``` + +Π“Π΄Π΅ эти ΠΏΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€Ρ‹ ΠΈΠΌΠ΅ΡŽΡ‚ значСния ΠΏΠΎ ΡƒΠΌΠΎΠ»Ρ‡Π°Π½ΠΈΡŽ Π² ΡΠ»Π΅Π΄ΡƒΡŽΡ‰Π΅ΠΌ Π²ΠΈΠ΄Π΅: + +``` erlang +#{ + events_severity => #{ + %% ΠŸΠΎΠ³Ρ€Π°Π½ΠΈΡ‡Π½Ρ‹Π΅ события Ρ€Π°Π±ΠΎΡ‚Ρ‹ ΠΊΠ»ΠΈΠ΅Π½Ρ‚Π° + ['client begin'] => debug, + ['client end'] => debug, + + %% Начало Π²Ρ‹Π·ΠΎΠ²Π° сСрвиса, ΠΏΠ΅Ρ€Π΅Π΄ Ρ„ΠΎΡ€ΠΌΠΈΡ€ΠΎΠ²Π°Π½ΠΈΠ΅ΠΌ запроса + ['call service'] => info, + + %% Π Π΅Π·ΡƒΠ»ΡŒΡ‚Π°Ρ‚ Π²Ρ‹Π·ΠΎΠ²Π° сСрвиса Π½Π° ΠΊΠ»ΠΈΠ΅Π½Ρ‚Π΅ + ['service result'] => info, + ['service result', error] => error, + %% Π‘ΠΎΠ±Ρ‹Ρ‚ΠΈΠ΅ ΡΠΎΡΡ‚ΠΎΡΠ²ΡˆΠ΅Π³ΠΎΡΡ Π²Ρ‹Π·ΠΎΠ²Π° с Π²ΠΎΠ·Π²Ρ€Π°Ρ‰Ρ‘Π½Π½ΠΎΠΉ ошибкой Π² качСствС + %% Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚Π°Ρ‚Π° + ['service result', warning] => warning, + + %% ΠšΠ»ΠΈΠ΅Π½Ρ‚ΡΠΊΠΈΠ΅ события, Π²ΠΊΠ»ΡŽΡ‡Π°Ρ обнаруТСния хоста + ['client send'] => debug, + ['client resolve begin'] => debug, + ['client resolve result'] => debug, + ['client receive'] => debug, + ['client receive', error] => warning, + + %% НСпосрСдствСнныС события обслуТивания запроса сСрвСром + ['server receive'] => debug, + ['server send'] => debug, + ['server send', error] => warning, + + %% Начало обслуТивания Π²Ρ‹Π·ΠΎΠ²Π° Ρ„ΡƒΠ½ΠΊΡ†ΠΈΠΈ сСрвиса + ['invoke service handler'] => info, + + %% Π—Π°Π²Π΅Ρ€ΡˆΠ΅Π½ΠΈΠ΅ обслуТиваниС Π²Ρ‹Π·ΠΎΠ²Π° с Ρ€Π°Π·Π½Ρ‹ΠΌ ΠΈΡ‚ΠΎΠ³ΠΎΠΌ + ['service handler result'] => info, + ['service handler result', error, business] => info, + ['service handler result', error, system] => error, + %% ΠžΠ±ΡΠ»ΡƒΠΆΠΈΠ²Π°Π½ΠΈΠ΅ Π²Ρ‹Π·ΠΎΠ²Π° Π·Π°Π²Π΅Ρ€ΡˆΠΈΠ»ΠΎΡΡŒ ΠΏΠΎΠΏΡ€Π°Π²ΠΈΠΌΠΎΠΉ ошибкой; + %% ΠΏΠΎ ΠΊΡ€Π°ΠΉΠ½Π΅ΠΉ ΠΌΠ΅Ρ€Π΅ ΠΎΠ½Π° Π½Π΅ Π² Ρ€Π°ΠΌΠΊΠ°Ρ… бизнСс-Π»ΠΎΠ³ΠΈΠΊΠΈ Π½ΠΎ ΠΈ Π½Π΅ систСмноС + %% ΠΈΡΠΊΠ»ΡŽΡ‡Π΅Π½ΠΈΠ΅ + ['service handler result', warning] => warning, + + %% Бобытия ΠΊΠ΅ΡˆΠΈΡ€ΡƒΡŽΡ‰Π΅ΠΉ ΠΎΠ±Π΅Ρ€Ρ‚ΠΊΠΈ ΠΊΠ»ΠΈΠ΅Π½Ρ‚Π° + ['client cache begin'] => debug, + ['client cache end'] => debug, + ['client cache hit'] => info, + ['client cache miss'] => debug, + ['client cache update'] => debug, + ['client cache result'] => debug, + + %% Π’Π½ΡƒΡ‚Ρ€Π΅Π½Π½ΠΈΠ΅ ошибки с Ρ€Π°Π·Π½Ρ‹ΠΌ контСкстом/происхоТдСниСм + ['internal error', system] => error, + ['internal error', business] => warning, + %% Π‘ΠΎΠ±Ρ‹Ρ‚ΠΈΠ΅ трассировки Π½Π° ΡƒΡ€ΠΎΠ²Π½Π΅ woody, см. Π΄Π°Π»Π΅Π΅ + + ['trace event'] => debug + } +}. +``` + + ### Tracing МоТно динамичСски Π²ΠΊΠ»ΡŽΡ‡Π°Ρ‚ΡŒ ΠΈ Π²Ρ‹ΠΊΠ»ΡŽΡ‡Π°Ρ‚ΡŒ трассировку http запросов ΠΈ ΠΎΡ‚Π²Π΅Ρ‚ΠΎΠ². @@ -176,3 +250,36 @@ check_loan_limits(Limits, Context, N) -> application:set_env(woody, trace_http_server, true). application:unset_env(woody, trace_http_server). ``` + +### Prometheus metrics + +Π§Ρ‚ΠΎΠ±Ρ‹ ΠΎΡΡƒΡ‰Π΅ΡΡ‚Π²Π»ΡΡ‚ΡŒ экспорт ΠΌΠ΅Ρ‚Ρ€ΠΈΠΊ слСдуСт Π΄ΠΎΠ±Π°Π²ΠΈΡ‚ΡŒ [ΡΠΎΠΎΡ‚Π²Π΅Ρ‚ΡΡ‚Π²ΡƒΡŽΡ‰ΠΈΠΉ хэндлСр](https://github.com/deadtrickster/prometheus-cowboy#exporting-metrics-with-handlers) для cowboy-сСрвСра. + +``` erlang +{deps, [ + ... + {prometheus_cowboy, "0.1.8"} +]} +``` + +Для сбора сСрвСрных ΠΌΠ΅Ρ‚Ρ€ΠΈΠΊ Π½Π΅ΠΎΠ±Ρ…ΠΎΠ΄ΠΈΠΌΠΎ Π½Π° стартС прилоТСния ΠΎΠ±ΡŠΡΠ²ΠΈΡ‚ΡŒ ΠΈΡ… + +``` erlang +ok = woody_ranch_prometheus_collector:setup() +``` + +Если Π΄ΠΎΠΏΠΎΠ»Π½ΠΈΡ‚Π΅Π»ΡŒΠ½ΠΎ ΠΈΠ½Ρ‚Π΅Ρ€Π΅ΡΡƒΡŽΡ‚ всС ΠΌΠ΅Ρ‚Ρ€ΠΈΠΊΠΈ ковбоя Ρ‚ΠΎ ΠΌΠΎΠΆΠ½ΠΎ Π΄ΠΎΠ±Π°Π²ΠΈΡ‚ΡŒ [Ρ€Π΅Π°Π»ΠΈΠ·Π°Ρ†ΠΈΡŽ обсСрвСра ΠΈΠ· Π±ΠΈΠ±Π»ΠΈΠΎΡ‚Π΅ΠΊΠΈ](https://github.com/deadtrickster/prometheus-cowboy?tab=readme-ov-file#exporting-cowboy2-metrics). + +Для сбора клиСнтских ΠΌΠ΅Ρ‚Ρ€ΠΈΠΊ Π½Π΅ΠΎΠ±Ρ…ΠΎΠ΄ΠΈΠΌΠΎ Π½Π° стартС прилоТСния ΠΎΠ±ΡŠΡΠ²ΠΈΡ‚ΡŒ ΠΈΡ… + +``` erlang +ok = woody_hackney_prometheus_collector:setup() +``` + +Π­Ρ‚ΠΎ Π±ΡƒΠ΄Π΅Ρ‚ ΠΏΡƒΠ±Π»ΠΈΠΊΠΎΠ²Π°Ρ‚ΡŒ цСлочислСнныС значСния Π² шкалС 'woody_hackney_pool_usage' с ΠΌΠ΅Ρ‚ΠΊΠ°ΠΌΠΈ `pool` Π² качСствС названия ΠΏΡƒΠ»Π° ΠΈ `status` Π² качСствС ΠΏΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€Π° ΡΠΎΠΎΡ‚Π²Π΅Ρ‚ΡΡ‚Π²ΡƒΡŽΡ‰Π΅Π³ΠΎ значСния: + +- `in_use_count` -- ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌΡ‹Π΅ соСдинСния Π² ΠΏΡƒΠ»Π΅; +- `free_count` -- свободноС количСство Π² ΠΏΡƒΠ»Π΅; +- `queue_count` -- ΠΎΡ‡Π΅Ρ€Π΅Π΄ΡŒ Π·Π° свободными соСдинСнСиями + +**TODO** Π’ΠΎΠ·ΠΌΠΎΠΆΠ½ΠΎ стоит Ρ€Π°ΡΡΠΌΠΎΡ‚Ρ€Π΅Ρ‚ΡŒ ΠΏΡƒΠ±Π»ΠΈΠΊΠ°Ρ†ΠΈΡŽ ΠΌΠ΅Ρ‚Ρ€ΠΈΠΊ ΠΏΠΎ количСству исполняСмых запросов Π² ΠΎΠ±Ρ‰Π΅ΠΌ, с Ρ€Π°Π·Π±ΠΈΠ²ΠΊΠΎΠΉ ΠΏΠΎ хосту ΠΈ количСства Π½ΠΎΠ²Ρ‹Ρ… ΠΈ ΠΏΠ΅Ρ€Π΅ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌΡ‹Ρ… соСдининий Π² ΠΊΠ°ΠΆΠ΄ΠΎΠΌ ΠΈΠ· ΠΏΡƒΠ»ΠΎΠ². [Π₯Π°ΠΊΠ½ΠΈ это прСдоставляСт](https://github.com/benoitc/hackney/tree/1.18.0#metrics). diff --git a/rebar.config b/rebar.config index 749cd4b..5ae0b31 100644 --- a/rebar.config +++ b/rebar.config @@ -33,6 +33,7 @@ {thrift, {git, "https://github.com/valitydev/thrift_erlang.git", {branch, "master"}}}, {snowflake, {git, "https://github.com/valitydev/snowflake.git", {branch, "master"}}}, {genlib, {git, "https://github.com/valitydev/genlib.git", {branch, "master"}}}, + {prometheus, "4.8.1"}, {opentelemetry_api, "1.2.1"} ]}. @@ -81,9 +82,6 @@ {deps, [ {cth_readable, "1.4.9"}, {proper, "1.4.0"}, - {woody_api_hay, - {git, "https://github.com/valitydev/woody_api_hay.git", - {ref, "4c39134cddaa9bf6fb8db18e7030ae64f1efb3a9"}}}, {damsel, {git, "https://github.com/valitydev/damsel.git", {ref, "3fa6f31db54b2ae781b27898ab4daf56bb36eb36"}}}, {mg_proto, @@ -92,7 +90,7 @@ {opentelemetry, "1.3.0"} ]}, {dialyzer, [ - {plt_extra_apps, [how_are_you, eunit, proper, common_test, cth_readable, opentelemetry]} + {plt_extra_apps, [prometheus, eunit, proper, common_test, cth_readable, opentelemetry]} ]} ]} ]}. diff --git a/rebar.lock b/rebar.lock index 103c490..191d79b 100644 --- a/rebar.lock +++ b/rebar.lock @@ -17,6 +17,8 @@ {pkg,<<"opentelemetry_semantic_conventions">>,<<"0.2.0">>}, 1}, {<<"parse_trans">>,{pkg,<<"parse_trans">>,<<"3.3.1">>},1}, + {<<"prometheus">>,{pkg,<<"prometheus">>,<<"4.8.1">>},0}, + {<<"quantile_estimator">>,{pkg,<<"quantile_estimator">>,<<"0.2.1">>},1}, {<<"ranch">>,{pkg,<<"ranch">>,<<"1.8.0">>},1}, {<<"snowflake">>, {git,"https://github.com/valitydev/snowflake.git", @@ -42,6 +44,8 @@ {<<"opentelemetry_api">>, <<"7B69ED4F40025C005DE0B74FCE8C0549625D59CB4DF12D15C32FE6DC5076FF42">>}, {<<"opentelemetry_semantic_conventions">>, <<"B67FE459C2938FCAB341CB0951C44860C62347C005ACE1B50F8402576F241435">>}, {<<"parse_trans">>, <<"16328AB840CC09919BD10DAB29E431DA3AF9E9E7E7E6F0089DD5A2D2820011D8">>}, + {<<"prometheus">>, <<"FA76B152555273739C14B06F09F485CF6D5D301FE4E9D31B7FF803D26025D7A0">>}, + {<<"quantile_estimator">>, <<"EF50A361F11B5F26B5F16D0696E46A9E4661756492C981F7B2229EF42FF1CD15">>}, {<<"ranch">>, <<"8C7A100A139FD57F17327B6413E4167AC559FBC04CA7448E9BE9057311597A1D">>}, {<<"ssl_verify_fun">>, <<"CF344F5692C82D2CD7554F5EC8FD961548D4FD09E7D22F5B62482E5AEAEBD4B0">>}, {<<"unicode_util_compat">>, <<"BC84380C9AB48177092F43AC89E4DFA2C6D62B40B8BD132B1059ECC7232F9A78">>}]}, @@ -58,6 +62,8 @@ {<<"opentelemetry_api">>, <<"6D7A27B7CAD2AD69A09CABF6670514CAFCEC717C8441BEB5C96322BAC3D05350">>}, {<<"opentelemetry_semantic_conventions">>, <<"D61FA1F5639EE8668D74B527E6806E0503EFC55A42DB7B5F39939D84C07D6895">>}, {<<"parse_trans">>, <<"07CD9577885F56362D414E8C4C4E6BDF10D43A8767ABB92D24CBE8B24C54888B">>}, + {<<"prometheus">>, <<"6EDFBE928D271C7F657A6F2C46258738086584BD6CAE4A000B8B9A6009BA23A5">>}, + {<<"quantile_estimator">>, <<"282A8A323CA2A845C9E6F787D166348F776C1D4A41EDE63046D72D422E3DA946">>}, {<<"ranch">>, <<"49FBCFD3682FAB1F5D109351B61257676DA1A2FDBE295904176D5E521A2DDFE5">>}, {<<"ssl_verify_fun">>, <<"BDB0D2471F453C88FF3908E7686F86F9BE327D065CC1EC16FA4540197EA04680">>}, {<<"unicode_util_compat">>, <<"25EEE6D67DF61960CF6A794239566599B09E17E668D3700247BC498638152521">>}]} diff --git a/src/woody_event_handler.erl b/src/woody_event_handler.erl index f819d41..493eea4 100644 --- a/src/woody_event_handler.erl +++ b/src/woody_event_handler.erl @@ -4,6 +4,7 @@ -export([handle_event/3, handle_event/4]). -export([get_event_severity/2]). +-export([get_event_severity/3]). -export([format_event/3, format_event/4]). -export([format_meta/2, format_meta/3]). -export([format_rpc_id/1]). @@ -221,7 +222,8 @@ -export_type([meta/0]). -type options() :: #{ - formatter_opts => woody_event_formatter:opts() + formatter_opts => woody_event_formatter:opts(), + events_severity => #{nonempty_list(atom()) => severity()} }. -export_type([options/0]). @@ -379,68 +381,77 @@ format_event(UnknownEventType, Meta, _Opts) -> {" unknown woody event type '~s' with meta ~p", [UnknownEventType, Meta]}. -spec get_event_severity(event(), event_meta()) -> severity(). -get_event_severity(?EV_CLIENT_BEGIN, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_END, _Meta) -> - debug; -get_event_severity(?EV_CALL_SERVICE, _Meta) -> - info; -get_event_severity(?EV_SERVICE_RESULT, #{status := ok}) -> - info; -get_event_severity(?EV_SERVICE_RESULT, #{status := error, stack := _Stack}) -> - error; -get_event_severity(?EV_SERVICE_RESULT, #{status := error, result := _Error}) -> - warning; -get_event_severity(?EV_CLIENT_SEND, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_RESOLVE_RESULT, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_RECEIVE, #{status := ok}) -> - debug; -get_event_severity(?EV_CLIENT_RECEIVE, #{status := error}) -> - warning; -get_event_severity(?EV_SERVER_RECEIVE, _Meta) -> - debug; -get_event_severity(?EV_SERVER_SEND, #{status := ok}) -> - debug; -get_event_severity(?EV_SERVER_SEND, #{status := error}) -> - warning; -get_event_severity(?EV_INVOKE_SERVICE_HANDLER, _Meta) -> - info; -get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := ok}) -> - info; -get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := business}) -> - info; -get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := system, stack := _Stack}) -> - error; -get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := system, result := _Error}) -> - warning; -get_event_severity(?EV_CLIENT_CACHE_BEGIN, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_CACHE_END, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_CACHE_HIT, _Meta) -> - info; -get_event_severity(?EV_CLIENT_CACHE_MISS, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_CACHE_UPDATE, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_CACHE_RESULT, _Meta) -> - debug; -get_event_severity(?EV_INTERNAL_ERROR, #{error := _, class := _Class, stack := _Stack}) -> - error; -get_event_severity(?EV_INTERNAL_ERROR, #{error := _, reason := _Reason}) -> - warning; -get_event_severity(?EV_TRACE, _Meta) -> - debug; -get_event_severity(_UnknownEventType, _Meta) -> - warning. +get_event_severity(Event, Meta) -> + get_event_severity(Event, Meta, #{}). + +-spec get_event_severity(event(), event_meta(), options()) -> severity(). +get_event_severity(?EV_CLIENT_BEGIN = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_END = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CALL_SERVICE = Event, _Meta, Opts) -> + map_severity([Event], Opts, info); +get_event_severity(?EV_SERVICE_RESULT = Event, #{status := ok}, Opts) -> + map_severity([Event], Opts, info); +get_event_severity(?EV_SERVICE_RESULT = Event, #{status := error, stack := _Stack}, Opts) -> + map_severity([Event, error], Opts, error); +get_event_severity(?EV_SERVICE_RESULT = Event, #{status := error, result := _Error}, Opts) -> + map_severity([Event, warning], Opts, warning); +get_event_severity(?EV_CLIENT_SEND = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_RESOLVE_BEGIN = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_RESOLVE_RESULT = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_RECEIVE = Event, #{status := ok}, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_RECEIVE = Event, #{status := error}, Opts) -> + map_severity([Event, error], Opts, warning); +get_event_severity(?EV_SERVER_RECEIVE = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_SERVER_SEND = Event, #{status := ok}, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_SERVER_SEND = Event, #{status := error}, Opts) -> + map_severity([Event, error], Opts, warning); +get_event_severity(?EV_INVOKE_SERVICE_HANDLER = Event, _Meta, Opts) -> + map_severity([Event], Opts, info); +get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := ok}, Opts) -> + map_severity([Event], Opts, info); +get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := business}, Opts) -> + map_severity([Event, error, business], Opts, info); +get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := system, stack := _Stack}, Opts) -> + map_severity([Event, error, system], Opts, error); +get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := system, result := _Error}, Opts) -> + map_severity([Event, warning], Opts, warning); +get_event_severity(?EV_CLIENT_CACHE_BEGIN = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_CACHE_END = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_CACHE_HIT = Event, _Meta, Opts) -> + map_severity([Event], Opts, info); +get_event_severity(?EV_CLIENT_CACHE_MISS = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_CACHE_UPDATE = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_CACHE_RESULT = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_INTERNAL_ERROR = Event, #{error := _, class := _Class, stack := _Stack}, Opts) -> + map_severity([Event, system], Opts, error); +get_event_severity(?EV_INTERNAL_ERROR = Event, #{error := _, reason := _Reason}, Opts) -> + map_severity([Event, business], Opts, warning); +get_event_severity(?EV_TRACE = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(UnknownEventType, _Meta, Opts) -> + map_severity([UnknownEventType], Opts, warning). %% %% Internal functions %% +-spec map_severity(nonempty_list(atom()), options(), severity()) -> severity(). +map_severity(Code, Opts, Default) -> + EventsSeverity = maps:get(events_severity, Opts, #{}), + maps:get(Code, EventsSeverity, Default). + -spec format_service_request(map(), options()) -> msg(). format_service_request(#{service_schema := {Module, Service}, function := Function, args := Args}, Opts) -> woody_event_formatter:format_call(Module, Service, Function, Args, get_formatter_opts(Opts)). @@ -1582,4 +1593,157 @@ exception_test_() -> ) ]. +-spec event_severity_defaults_test_() -> _. +event_severity_defaults_test_() -> + Opts = #{events_severity => default_severity_map()}, + Meta = #{role => server}, + [ + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_BEGIN, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_END, Meta, Opts)), + + ?_assertEqual(info, get_event_severity(?EV_CALL_SERVICE, Meta, Opts)), + + ?_assertEqual(info, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => ok}, Opts)), + ?_assertEqual(error, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, stack => []}, Opts)), + ?_assertEqual(warning, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, result => {}}, Opts)), + + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_SEND, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_RESOLVE_RESULT, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => ok}, Opts)), + ?_assertEqual(warning, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => error}, Opts)), + + ?_assertEqual(debug, get_event_severity(?EV_SERVER_RECEIVE, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_SERVER_SEND, Meta#{status => ok}, Opts)), + ?_assertEqual(warning, get_event_severity(?EV_SERVER_SEND, Meta#{status => error}, Opts)), + + ?_assertEqual(info, get_event_severity(?EV_INVOKE_SERVICE_HANDLER, Meta, Opts)), + + ?_assertEqual(info, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => ok}, Opts)), + ?_assertEqual( + info, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => business}, Opts) + ), + ?_assertEqual( + error, + get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, stack => []}, Opts) + ), + ?_assertEqual( + warning, + get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, result => {}}, Opts) + ), + + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_BEGIN, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_END, Meta, Opts)), + ?_assertEqual(info, get_event_severity(?EV_CLIENT_CACHE_HIT, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_MISS, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_UPDATE, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_RESULT, Meta, Opts)), + + ?_assertEqual( + error, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, class => throw, stack => []}, Opts) + ), + ?_assertEqual(warning, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, reason => test}, Opts)), + + ?_assertEqual(debug, get_event_severity(?EV_TRACE, Meta, Opts)) + ]. + +-spec event_severity_all_info_test_() -> _. +event_severity_all_info_test_() -> + ExpectAll = info, + Opts = #{events_severity => replace_all_with(ExpectAll, default_severity_map())}, + Meta = #{role => server}, + [ + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_BEGIN, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_END, Meta, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_CALL_SERVICE, Meta, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => ok}, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, stack => []}, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, result => {}}, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_SEND, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RESOLVE_RESULT, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => ok}, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => error}, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_RECEIVE, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_SEND, Meta#{status => ok}, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_SEND, Meta#{status => error}, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_INVOKE_SERVICE_HANDLER, Meta, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => ok}, Opts)), + ?_assertEqual( + ExpectAll, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => business}, Opts) + ), + ?_assertEqual( + ExpectAll, + get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, stack => []}, Opts) + ), + ?_assertEqual( + ExpectAll, + get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, result => {}}, Opts) + ), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_BEGIN, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_END, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_HIT, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_MISS, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_UPDATE, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_RESULT, Meta, Opts)), + + ?_assertEqual( + ExpectAll, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, class => throw, stack => []}, Opts) + ), + ?_assertEqual(ExpectAll, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, reason => test}, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_TRACE, Meta, Opts)) + ]. + +replace_all_with(NewSeverity, SeverityMap) -> + maps:map(fun(_K, _V) -> NewSeverity end, SeverityMap). + +default_severity_map() -> + #{ + ['client begin'] => debug, + ['client end'] => debug, + + ['call service'] => info, + + ['service result'] => info, + ['service result', error] => error, + ['service result', warning] => warning, + + ['client send'] => debug, + ['client resolve begin'] => debug, + ['client resolve result'] => debug, + ['client receive'] => debug, + ['client receive', error] => warning, + + ['server receive'] => debug, + ['server send'] => debug, + ['server send', error] => warning, + + ['invoke service handler'] => info, + + ['service handler result'] => info, + ['service handler result', error, business] => info, + ['service handler result', error, system] => error, + ['service handler result', warning] => warning, + + ['client cache begin'] => debug, + ['client cache end'] => debug, + ['client cache hit'] => info, + ['client cache miss'] => debug, + ['client cache update'] => debug, + ['client cache result'] => debug, + + ['internal error', system] => error, + ['internal error', business] => warning, + + ['trace event'] => debug + }. + -endif. diff --git a/src/woody_event_handler_default.erl b/src/woody_event_handler_default.erl index e7efdfc..9dcb6dd 100644 --- a/src/woody_event_handler_default.erl +++ b/src/woody_event_handler_default.erl @@ -22,7 +22,8 @@ handle_event(Event, RpcId, Meta, Opts) -> EHOpts = get_event_handler_opts(Opts), {Format, Msg} = woody_event_handler:format_event(Event, Meta, RpcId, EHOpts), - Function = get_logger_function(woody_event_handler:get_event_severity(Event, Meta)), + Severity = woody_event_handler:get_event_severity(Event, Meta, Opts), + Function = get_logger_function(Severity), _ = error_logger:Function(Format, Msg), ok. diff --git a/src/woody_hackney_prometheus_collector.erl b/src/woody_hackney_prometheus_collector.erl new file mode 100644 index 0000000..a2b26a0 --- /dev/null +++ b/src/woody_hackney_prometheus_collector.erl @@ -0,0 +1,83 @@ +-module(woody_hackney_prometheus_collector). + +-export([setup/0]). + +%% + +-behaviour(prometheus_collector). + +-export([collect_mf/2]). +-export([collect_metrics/2]). +-export([deregister_cleanup/1]). + +-define(POOL_USAGE, woody_hackney_pool_usage). + +%% Installation + +%% @doc Installs custom collector for hackney's pool metrics +-spec setup() -> ok. +setup() -> + prometheus_registry:register_collector(registry(), ?MODULE). + +%% Collector API + +-type data() :: [data_item()]. +-type data_item() :: {data_labels(), non_neg_integer()}. +-type data_labels() :: [{atom(), atom() | nonempty_string() | binary() | iolist()}]. +-type pool_stats() :: [{atom(), any()}]. + +-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok. +collect_mf(_Registry, Callback) -> + F = fun({Pool, _Pid}) -> + make_pool_data(Pool, get_pool_stats(Pool)) + end, + Data = lists:flatten(lists:map(F, get_hackney_pools())), + Callback(create_gauge(Data)). + +-spec collect_metrics(prometheus_metric:name(), data()) -> + prometheus_model:'Metric'() | [prometheus_model:'Metric'()]. +collect_metrics(_Name, Data) -> + [prometheus_model_helpers:gauge_metric(Labels, Value) || {Labels, Value} <- Data]. + +-spec deregister_cleanup(prometheus_registry:registry()) -> ok. +deregister_cleanup(_Registry) -> + %% Nothing to clean up + ok. + +%% Private + +registry() -> + default. + +get_pool_stats(Pool) -> + %% NOTE It looks like 'hackney_pool' table data can occasionally contain + %% dead pools + try + hackney_pool:get_stats(Pool) + catch + %% "Time to make the chimi-fuckin'-changas." + exit:{noproc, _Reason} -> + [] + end. + +-spec create_gauge(data()) -> prometheus_model:'MetricFamily'(). +create_gauge(Data) -> + Help = "Connection pool status by used, free and queued connections count", + prometheus_model_helpers:create_mf(?POOL_USAGE, Help, gauge, ?MODULE, Data). + +-spec make_pool_data(atom(), pool_stats()) -> data(). +make_pool_data(Pool, Stats0) -> + Stats1 = maps:with([in_use_count, free_count, queue_count], maps:from_list(Stats0)), + lists:foldl(fun({S, V}, Data) -> [make_data_item(Pool, S, V) | Data] end, [], maps:to_list(Stats1)). + +make_data_item(Pool, Status, Value) -> + Labels = [{pool, Pool}, {status, Status}], + {Labels, Value}. + +get_hackney_pools() -> + %% Shamelessly pasted from + %% https://github.com/soundtrackyourbrand/prometheus-hackney-collector + %% + %% Basically, we rely on not publicly exposed table containing started pools + %% under hackney's hood. + ets:tab2list(hackney_pool). diff --git a/src/woody_ranch_prometheus_collector.erl b/src/woody_ranch_prometheus_collector.erl new file mode 100644 index 0000000..38e7733 --- /dev/null +++ b/src/woody_ranch_prometheus_collector.erl @@ -0,0 +1,83 @@ +-module(woody_ranch_prometheus_collector). + +-export([setup/0]). + +%% + +-behaviour(prometheus_collector). + +-export([collect_mf/2]). +-export([collect_metrics/2]). +-export([deregister_cleanup/1]). + +-define(CONNECTIONS, woody_ranch_listener_connections). + +%% Installation + +-spec setup() -> ok. +setup() -> + prometheus_registry:register_collector(registry(), ?MODULE). + +%% Collector API + +-type data() :: [data_item()]. +-type data_item() :: {data_labels(), non_neg_integer()}. +-type data_labels() :: [{atom(), atom() | nonempty_string() | binary() | iolist()}]. +-type maybe_woody_server_ref() :: {module(), ID :: atom()} | ranch:ref(). +-type ranch_info() :: + [{maybe_woody_server_ref(), [{atom(), any()}]}] + | #{maybe_woody_server_ref() => #{atom() => any()}}. + +-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok. +collect_mf(_Registry, Callback) -> + F = fun({ListenerRef, ListenerInfo}) -> + make_listener_data(ListenerRef, ListenerInfo) + end, + Data = lists:flatten(lists:map(F, get_listeners_info())), + Callback(create_gauge(Data)). + +-spec collect_metrics(prometheus_metric:name(), data()) -> + prometheus_model:'Metric'() | [prometheus_model:'Metric'()]. +collect_metrics(_Name, Data) -> + [prometheus_model_helpers:gauge_metric(Labels, Value) || {Labels, Value} <- Data]. + +-spec deregister_cleanup(prometheus_registry:registry()) -> ok. +deregister_cleanup(_Registry) -> + %% Nothing to clean up + ok. + +%% Private + +registry() -> + default. + +-spec create_gauge(data()) -> prometheus_model:'MetricFamily'(). +create_gauge(Data) -> + Help = "Number of active connections", + prometheus_model_helpers:create_mf(?CONNECTIONS, Help, gauge, ?MODULE, Data). + +-spec make_listener_data(maybe_woody_server_ref(), #{atom() => any()}) -> data(). +make_listener_data(Ref, #{active_connections := V}) -> + Labels = [{listener, Ref}], + [{Labels, V}]; +make_listener_data(_Ref, _Info) -> + []. + +get_listeners_info() -> + lists:filter( + fun + ({_Ref, #{active_connections := _}}) -> true; + (_Else) -> false + end, + %% See https://ninenines.eu/docs/en/ranch/1.8/guide/listeners/#_obtaining_information_about_listeners + normalize_listeners_info(ranch:info()) + ). + +-dialyzer({no_match, normalize_listeners_info/1}). +-spec normalize_listeners_info(ranch_info()) -> [{maybe_woody_server_ref(), #{atom() => any()}}]. +%% Ranch v2 +normalize_listeners_info(#{} = Info) -> + maps:to_list(Info); +%% Ranch v1 +normalize_listeners_info(Info) -> + lists:map(fun({Ref, ListenerInfo}) -> {Ref, maps:from_list(ListenerInfo)} end, Info). diff --git a/test/woody_tests_SUITE.erl b/test/woody_tests_SUITE.erl index 6ffd9a7..79d1a5b 100644 --- a/test/woody_tests_SUITE.erl +++ b/test/woody_tests_SUITE.erl @@ -5,6 +5,7 @@ -include_lib("hackney/include/hackney_lib.hrl"). -include_lib("opentelemetry_api/include/opentelemetry.hrl"). -include_lib("opentelemetry/include/otel_span.hrl"). +-include_lib("prometheus/include/prometheus_model.hrl"). -include("woody_test_thrift.hrl"). -include("woody_defs.hrl"). @@ -280,6 +281,8 @@ cross_test_groups() -> } }. +-define(RANCH_REF, woody_ct). + %% %% starting/stopping %% @@ -287,21 +290,12 @@ init_per_suite(C) -> % dbg:tracer(), dbg:p(all, c), % dbg:tpl({woody_joint_workers, '_', '_'}, x), %%Apps = genlib_app:start_application_with(woody, [{trace_http_server, true}]), - application:set_env(hackney, mod_metrics, woody_client_metrics), - application:set_env(woody, woody_client_metrics_options, #{ - metric_key_mapping => #{ - [hackney, nb_requests] => [hackney, requests_in_process] - } - }), - application:set_env(how_are_you, metrics_handlers, [ - {woody_api_hay, #{ - interval => 1000 - }} - ]), {ok, Apps} = application:ensure_all_started(woody), - {ok, HayApps} = application:ensure_all_started(how_are_you), + {ok, MetricsApps} = application:ensure_all_started(prometheus), + ok = woody_ranch_prometheus_collector:setup(), + ok = woody_hackney_prometheus_collector:setup(), {ok, OtelApps} = setup_opentelemetry(), - [{apps, OtelApps ++ HayApps ++ Apps} | C]. + [{apps, OtelApps ++ MetricsApps ++ Apps} | C]. end_per_suite(C) -> % unset so it won't report metrics next suite @@ -334,21 +328,21 @@ init_per_testcase(find_multiple_pools_test, C) -> {ok, Sup} = start_tc_sup(), Pool1 = {swords, 15000, 100}, Pool2 = {shields, undefined, 50}, - ok = start_woody_server_with_pools(woody_ct, Sup, ['Weapons', 'Powerups'], [Pool1, Pool2], C), + ok = start_woody_server_with_pools(?RANCH_REF, Sup, ['Weapons', 'Powerups'], [Pool1, Pool2], C), [{sup, Sup} | C]; init_per_testcase(calls_with_cache, C) -> {ok, Sup} = start_tc_sup(), {ok, _} = start_caching_client(caching_client_ct, Sup), - {ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], C), + {ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], C), [{sup, Sup} | C]; init_per_testcase(server_handled_client_timeout_test, C) -> {ok, Sup} = start_tc_sup(), {ok, _} = supervisor:start_child(Sup, server_timeout_event_handler:child_spec()), - {ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], server_timeout_event_handler, C), + {ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], server_timeout_event_handler, C), [{sup, Sup} | C]; init_per_testcase(_, C) -> {ok, Sup} = start_tc_sup(), - {ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], C), + {ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], C), [{sup, Sup} | C]. init_per_group(woody_resolver, Config) -> @@ -381,7 +375,7 @@ start_error_server(TC, Sup) -> Code = get_fail_code(TC), Dispatch = cowboy_router:compile([{'_', [{?PATH_WEAPONS, ?MODULE, Code}]}]), Server = ranch:child_spec( - woody_ct, + ?RANCH_REF, ranch_tcp, [{ip, ?SERVER_IP}, {port, ?SERVER_PORT}], cowboy_clear, @@ -826,7 +820,18 @@ call_fail_w_no_headers(Id, Class, _Code) -> find_multiple_pools_test(_) -> true = is_pid(hackney_pool:find_pool(swords)), - true = is_pid(hackney_pool:find_pool(shields)). + true = is_pid(hackney_pool:find_pool(shields)), + MF = smuggle_mf_return_value(fun(F) -> woody_hackney_prometheus_collector:collect_mf(default, F) end), + %% We can't know order and values from hackney's pool info, but we can + %% expect that values for those pools must be provided. However exact number + %% of values can vary based on order of testcase execution and other + %% side-effects. + ValuesCount = length([swords, shields]) * length([queue_count, in_use_count, free_count]), + ?assertMatch( + #'MetricFamily'{type = 'GAUGE', name = <<"woody_hackney_pool_usage">>, metric = Values} when + length(Values) >= ValuesCount, + MF + ). call_thrift_multiplexed_test(_) -> Client = make_thrift_multiplexed_client( @@ -1229,6 +1234,7 @@ gun_test_basic(Id, Gun, Expect, C) -> catch Class:Reason -> ok end, + ok = assert_connections_metrics(), check_msg(Gun, Context). get_except({ok, _}) -> @@ -1350,3 +1356,45 @@ mk_otel_attributes(Attributes) -> SpanAttributeCountLimit = otel_span_limits:attribute_count_limit(), SpanAttributeValueLengthLimit = otel_span_limits:attribute_value_length_limit(), otel_attributes:new(Attributes, SpanAttributeCountLimit, SpanAttributeValueLengthLimit). + +assert_connections_metrics() -> + MF = smuggle_mf_return_value(fun(F) -> woody_ranch_prometheus_collector:collect_mf(default, F) end), + %% Sadly we can't match on listener ref since its an iodata representation of tuple. + ?assertMatch( + #'MetricFamily'{ + name = <<"woody_ranch_listener_connections">>, + type = 'GAUGE', + metric = [ + #'Metric'{ + label = [#'LabelPair'{name = <<"listener">>, value = _Ref}], + gauge = #'Gauge'{value = V} + } + ] + } when V >= 1, + MF + ), + %% NOTE See `prometheus_model_helpers:ensure_binary_or_string/1` + RefImpl1 = io_lib:format("~p", [{woody_server_thrift_http_handler, ?RANCH_REF}]), + RefImpl2 = io_lib:format("~p", [{woody_server_thrift_v2, ?RANCH_REF}]), + ?assertMatch( + Expected when Expected =:= RefImpl1 orelse Expected =:= RefImpl2, + get_metric_ref(hd(MF#'MetricFamily'.metric)) + ). + +get_metric_ref(#'Metric'{label = Labels}) -> + get_metric_ref_(Labels). + +get_metric_ref_([]) -> + undefined; +get_metric_ref_([#'LabelPair'{name = <<"listener">>, value = Ref} | _]) -> + Ref; +get_metric_ref_([_Label | Labels]) -> + get_metric_ref_(Labels). + +%% Collector module expects 'ok' return. Because of that dialyzer won't quit bitching. +smuggle_mf_return_value(Fun) -> + _ = Fun(fun(MF) -> + _ = erlang:put(smuggle, MF), + ok + end), + erlang:get(smuggle). From 14998c8e4752822e8aef19164edbb705360886ce Mon Sep 17 00:00:00 2001 From: Aleksey Kashapov Date: Tue, 19 Dec 2023 17:48:13 +0300 Subject: [PATCH 08/10] TD-788: Fixes broken prometheus collector return (#31) --- src/woody_hackney_prometheus_collector.erl | 3 ++- src/woody_ranch_prometheus_collector.erl | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/woody_hackney_prometheus_collector.erl b/src/woody_hackney_prometheus_collector.erl index a2b26a0..56dd06d 100644 --- a/src/woody_hackney_prometheus_collector.erl +++ b/src/woody_hackney_prometheus_collector.erl @@ -32,7 +32,8 @@ collect_mf(_Registry, Callback) -> make_pool_data(Pool, get_pool_stats(Pool)) end, Data = lists:flatten(lists:map(F, get_hackney_pools())), - Callback(create_gauge(Data)). + _ = Callback(create_gauge(Data)), + ok. -spec collect_metrics(prometheus_metric:name(), data()) -> prometheus_model:'Metric'() | [prometheus_model:'Metric'()]. diff --git a/src/woody_ranch_prometheus_collector.erl b/src/woody_ranch_prometheus_collector.erl index 38e7733..2ff92f8 100644 --- a/src/woody_ranch_prometheus_collector.erl +++ b/src/woody_ranch_prometheus_collector.erl @@ -34,7 +34,8 @@ collect_mf(_Registry, Callback) -> make_listener_data(ListenerRef, ListenerInfo) end, Data = lists:flatten(lists:map(F, get_listeners_info())), - Callback(create_gauge(Data)). + _ = Callback(create_gauge(Data)), + ok. -spec collect_metrics(prometheus_metric:name(), data()) -> prometheus_model:'Metric'() | [prometheus_model:'Metric'()]. From 3e2337a818086f33f0a1ede5d204aee7744c7c36 Mon Sep 17 00:00:00 2001 From: Aleksey Kashapov Date: Fri, 12 Jan 2024 12:55:44 +0300 Subject: [PATCH 09/10] TD-788: Adds prometheus support for hackney per host metrics (#32) --- src/woody_hackney_prometheus.erl | 134 +++++++++++++++++++++++++++++++ test/woody_tests_SUITE.erl | 3 +- 2 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 src/woody_hackney_prometheus.erl diff --git a/src/woody_hackney_prometheus.erl b/src/woody_hackney_prometheus.erl new file mode 100644 index 0000000..310a381 --- /dev/null +++ b/src/woody_hackney_prometheus.erl @@ -0,0 +1,134 @@ +%% Translates hackney metrics for prometheus. +%% See +%% https://github.com/benoitc/hackney/tree/1.18.0?tab=readme-ov-file#metrics +%% +%% For pool metrics see dedicated collector module +%% `woody_hackney_prometheus_collector`. +-module(woody_hackney_prometheus). + +-export([ + new/2, + delete/1, + increment_counter/1, + increment_counter/2, + decrement_counter/1, + decrement_counter/2, + update_histogram/2, + update_gauge/2, + update_meter/2 +]). + +-type name() :: any(). +-type metric() :: counter | histogram | gauge | meter. + +%% API + +-define(NB_REQUESTS, hackney_nb_requests). +-define(TOTAL_REQUESTS, hackney_total_requests). +-define(HOST_NB_REQUESTS, hackney_host_nb_requests). +-define(HOST_CONNECT_TIMEOUT, hackney_host_connect_timeout). +-define(HOST_CONNECT_ERROR, hackney_host_connect_error). +-define(HOST_NEW_CONNECTION, hackney_host_new_connection). +-define(HOST_REUSE_CONNECTION, hackney_host_reuse_connection). + +%% Ignore unsupported metric +-spec new(metric(), name()) -> ok. +%% Total counters +new(counter, [hackney, nb_requests]) -> + true = prometheus_gauge:declare([ + {name, ?NB_REQUESTS}, + {registry, registry()}, + {labels, []}, + {help, "Number of running requests."} + ]), + %% Per host counters, see + %% https://github.com/benoitc/hackney/tree/1.18.0?tab=readme-ov-file#metrics-per-hosts + %% NOTE Hackney won't call `metrics:new/3` for those counters + true = prometheus_gauge:declare([ + {name, ?HOST_NB_REQUESTS}, + {registry, registry()}, + {labels, [host]}, + {help, "Number of running requests."} + ]), + [ + true = prometheus_counter:declare([ + {name, Name}, + {registry, registry()}, + {labels, [host]}, + {help, Help} + ]) + || {Name, Help} <- [ + {?HOST_CONNECT_TIMEOUT, "Number of connect timeout."}, + {?HOST_CONNECT_ERROR, "Number of timeout errors."}, + {?HOST_NEW_CONNECTION, "Number of new pool connections per host."}, + {?HOST_REUSE_CONNECTION, "Number of reused pool connections per host."} + ] + ], + ok; +new(counter, [hackney, total_requests]) -> + true = prometheus_counter:declare([ + {name, ?TOTAL_REQUESTS}, + {registry, registry()}, + {labels, []}, + {help, "Total number of requests."} + ]), + ok; +new(_Type, _Name) -> + ok. + +-spec delete(name()) -> ok. +delete(_Name) -> + ok. + +-spec increment_counter(name()) -> ok. +increment_counter(Name) -> + increment_counter(Name, 1). + +-spec increment_counter(name(), pos_integer()) -> ok. +increment_counter([hackney, nb_requests], Value) -> + prometheus_gauge:inc(registry(), ?NB_REQUESTS, [], Value); +increment_counter([hackney, total_requests], Value) -> + prometheus_counter:inc(registry(), ?TOTAL_REQUESTS, [], Value); +increment_counter([hackney, Host, nb_requests], Value) -> + prometheus_gauge:inc(registry(), ?HOST_NB_REQUESTS, [{host, Host}], Value); +increment_counter([hackney, Host, connect_timeout], Value) -> + prometheus_counter:inc(registry(), ?HOST_CONNECT_TIMEOUT, [{host, Host}], Value); +increment_counter([hackney, Host, connect_error], Value) -> + prometheus_counter:inc(registry(), ?HOST_CONNECT_ERROR, [{host, Host}], Value); +increment_counter([hackney_pool, Host, new_connection], Value) -> + prometheus_counter:inc(registry(), ?HOST_NEW_CONNECTION, [{host, Host}], Value); +increment_counter([hackney_pool, Host, reuse_connection], Value) -> + prometheus_counter:inc(registry(), ?HOST_REUSE_CONNECTION, [{host, Host}], Value); +increment_counter(_Name, _Value) -> + ok. + +-spec decrement_counter(name()) -> ok. +decrement_counter(Name) -> + decrement_counter(Name, 1). + +-spec decrement_counter(name(), pos_integer()) -> ok. +decrement_counter([hackney, nb_requests], Value) -> + prometheus_gauge:dec(registry(), ?NB_REQUESTS, [], Value); +decrement_counter([hackney, Host, nb_requests], Value) -> + prometheus_gauge:dec(registry(), ?HOST_NB_REQUESTS, [{host, Host}], Value); +decrement_counter(_Name, _Value) -> + ok. + +-spec update_histogram(name(), fun(() -> ok) | number()) -> ok. +update_histogram(_Name, Fun) when is_function(Fun, 0) -> + Fun(); +update_histogram(_Name, _Value) -> + ok. + +-spec update_gauge(name(), number()) -> ok. +update_gauge(_Name, _Value) -> + ok. + +-spec update_meter(name(), number()) -> ok. +update_meter(_Name, _Value) -> + ok. + +%% + +registry() -> + default. diff --git a/test/woody_tests_SUITE.erl b/test/woody_tests_SUITE.erl index 79d1a5b..9de03d5 100644 --- a/test/woody_tests_SUITE.erl +++ b/test/woody_tests_SUITE.erl @@ -290,8 +290,9 @@ init_per_suite(C) -> % dbg:tracer(), dbg:p(all, c), % dbg:tpl({woody_joint_workers, '_', '_'}, x), %%Apps = genlib_app:start_application_with(woody, [{trace_http_server, true}]), - {ok, Apps} = application:ensure_all_started(woody), + ok = application:set_env(hackney, mod_metrics, woody_hackney_prometheus), {ok, MetricsApps} = application:ensure_all_started(prometheus), + {ok, Apps} = application:ensure_all_started(woody), ok = woody_ranch_prometheus_collector:setup(), ok = woody_hackney_prometheus_collector:setup(), {ok, OtelApps} = setup_opentelemetry(), From 81219ba5408e1c67f5eaed3c7e566ede42da88d4 Mon Sep 17 00:00:00 2001 From: Aleksey Kashapov Date: Tue, 16 Apr 2024 16:18:20 +0300 Subject: [PATCH 10/10] TD-788: Fixes 'host' labels values in hackney metrics (#34) * TD-788: Fixes 'host' labels values in hackney metrics * Temporarily disables codecov report upload --- .github/workflows/erlang-checks.yml | 1 + src/woody_hackney_prometheus.erl | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/erlang-checks.yml b/.github/workflows/erlang-checks.yml index 300ca08..f29549f 100644 --- a/.github/workflows/erlang-checks.yml +++ b/.github/workflows/erlang-checks.yml @@ -36,3 +36,4 @@ jobs: rebar-version: ${{ needs.setup.outputs.rebar-version }} use-thrift: true thrift-version: ${{ needs.setup.outputs.thrift-version }} + upload-coverage: false diff --git a/src/woody_hackney_prometheus.erl b/src/woody_hackney_prometheus.erl index 310a381..193b011 100644 --- a/src/woody_hackney_prometheus.erl +++ b/src/woody_hackney_prometheus.erl @@ -90,15 +90,15 @@ increment_counter([hackney, nb_requests], Value) -> increment_counter([hackney, total_requests], Value) -> prometheus_counter:inc(registry(), ?TOTAL_REQUESTS, [], Value); increment_counter([hackney, Host, nb_requests], Value) -> - prometheus_gauge:inc(registry(), ?HOST_NB_REQUESTS, [{host, Host}], Value); + prometheus_gauge:inc(registry(), ?HOST_NB_REQUESTS, [Host], Value); increment_counter([hackney, Host, connect_timeout], Value) -> - prometheus_counter:inc(registry(), ?HOST_CONNECT_TIMEOUT, [{host, Host}], Value); + prometheus_counter:inc(registry(), ?HOST_CONNECT_TIMEOUT, [Host], Value); increment_counter([hackney, Host, connect_error], Value) -> - prometheus_counter:inc(registry(), ?HOST_CONNECT_ERROR, [{host, Host}], Value); + prometheus_counter:inc(registry(), ?HOST_CONNECT_ERROR, [Host], Value); increment_counter([hackney_pool, Host, new_connection], Value) -> - prometheus_counter:inc(registry(), ?HOST_NEW_CONNECTION, [{host, Host}], Value); + prometheus_counter:inc(registry(), ?HOST_NEW_CONNECTION, [Host], Value); increment_counter([hackney_pool, Host, reuse_connection], Value) -> - prometheus_counter:inc(registry(), ?HOST_REUSE_CONNECTION, [{host, Host}], Value); + prometheus_counter:inc(registry(), ?HOST_REUSE_CONNECTION, [Host], Value); increment_counter(_Name, _Value) -> ok. @@ -110,7 +110,7 @@ decrement_counter(Name) -> decrement_counter([hackney, nb_requests], Value) -> prometheus_gauge:dec(registry(), ?NB_REQUESTS, [], Value); decrement_counter([hackney, Host, nb_requests], Value) -> - prometheus_gauge:dec(registry(), ?HOST_NB_REQUESTS, [{host, Host}], Value); + prometheus_gauge:dec(registry(), ?HOST_NB_REQUESTS, [Host], Value); decrement_counter(_Name, _Value) -> ok.