initial commit
This commit is contained in:
411
fluxer_gateway/src/telemetry/gateway_metrics_collector.erl
Normal file
411
fluxer_gateway/src/telemetry/gateway_metrics_collector.erl
Normal file
@@ -0,0 +1,411 @@
|
||||
%% Copyright (C) 2026 Fluxer Contributors
|
||||
%%
|
||||
%% This file is part of Fluxer.
|
||||
%%
|
||||
%% Fluxer is free software: you can redistribute it and/or modify
|
||||
%% it under the terms of the GNU Affero General Public License as published by
|
||||
%% the Free Software Foundation, either version 3 of the License, or
|
||||
%% (at your option) any later version.
|
||||
%%
|
||||
%% Fluxer is distributed in the hope that it will be useful,
|
||||
%% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
%% GNU Affero General Public License for more details.
|
||||
%%
|
||||
%% You should have received a copy of the GNU Affero General Public License
|
||||
%% along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
-module(gateway_metrics_collector).
|
||||
-behaviour(gen_server).
|
||||
|
||||
-export([start_link/0]).
|
||||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).
|
||||
|
||||
-export([
|
||||
inc_connections/0,
|
||||
inc_disconnections/0,
|
||||
inc_heartbeat_success/0,
|
||||
inc_heartbeat_failure/0,
|
||||
inc_resume_success/0,
|
||||
inc_resume_failure/0,
|
||||
inc_identify_rate_limited/0,
|
||||
record_rpc_latency/1,
|
||||
inc_websocket_close/1
|
||||
]).
|
||||
|
||||
-type state() :: #{
|
||||
report_interval_ms := pos_integer(),
|
||||
timer_ref := reference() | undefined,
|
||||
connections := non_neg_integer(),
|
||||
disconnections := non_neg_integer(),
|
||||
heartbeat_success := non_neg_integer(),
|
||||
heartbeat_failure := non_neg_integer(),
|
||||
resume_success := non_neg_integer(),
|
||||
resume_failure := non_neg_integer(),
|
||||
identify_rate_limited := non_neg_integer(),
|
||||
rpc_latencies := [non_neg_integer()]
|
||||
}.
|
||||
|
||||
-define(DEFAULT_REPORT_INTERVAL_MS, 30000).
|
||||
|
||||
-spec start_link() -> {ok, pid()} | {error, term()}.
|
||||
start_link() ->
|
||||
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||
|
||||
-spec init(list()) -> {ok, state()}.
|
||||
init([]) ->
|
||||
Enabled = get_enabled(),
|
||||
ReportInterval = get_report_interval(),
|
||||
BaseState = #{
|
||||
report_interval_ms => ReportInterval,
|
||||
timer_ref => undefined,
|
||||
connections => 0,
|
||||
disconnections => 0,
|
||||
heartbeat_success => 0,
|
||||
heartbeat_failure => 0,
|
||||
resume_success => 0,
|
||||
resume_failure => 0,
|
||||
identify_rate_limited => 0,
|
||||
rpc_latencies => []
|
||||
},
|
||||
case Enabled of
|
||||
true ->
|
||||
logger:info("[gateway_metrics_collector] starting with ~p ms interval", [ReportInterval]),
|
||||
TimerRef = schedule_collection(ReportInterval),
|
||||
{ok, BaseState#{timer_ref := TimerRef}};
|
||||
false ->
|
||||
logger:info("[gateway_metrics_collector] disabled"),
|
||||
{ok, BaseState}
|
||||
end.
|
||||
|
||||
-spec handle_call(term(), gen_server:from(), state()) -> {reply, term(), state()}.
|
||||
handle_call(_Request, _From, State) ->
|
||||
{reply, ok, State}.
|
||||
|
||||
-spec handle_cast(term(), state()) -> {noreply, state()}.
|
||||
handle_cast(inc_connections, #{connections := Connections} = State) ->
|
||||
{noreply, State#{connections := Connections + 1}};
|
||||
handle_cast(inc_disconnections, #{disconnections := Disconnections} = State) ->
|
||||
{noreply, State#{disconnections := Disconnections + 1}};
|
||||
handle_cast(inc_heartbeat_success, #{heartbeat_success := HeartbeatSuccess} = State) ->
|
||||
{noreply, State#{heartbeat_success := HeartbeatSuccess + 1}};
|
||||
handle_cast(inc_heartbeat_failure, #{heartbeat_failure := HeartbeatFailure} = State) ->
|
||||
{noreply, State#{heartbeat_failure := HeartbeatFailure + 1}};
|
||||
handle_cast(inc_resume_success, #{resume_success := ResumeSuccess} = State) ->
|
||||
{noreply, State#{resume_success := ResumeSuccess + 1}};
|
||||
handle_cast(inc_resume_failure, #{resume_failure := ResumeFailure} = State) ->
|
||||
{noreply, State#{resume_failure := ResumeFailure + 1}};
|
||||
handle_cast(inc_identify_rate_limited, #{identify_rate_limited := IdentifyRateLimited} = State) ->
|
||||
{noreply, State#{identify_rate_limited := IdentifyRateLimited + 1}};
|
||||
handle_cast({record_rpc_latency, LatencyMs}, #{rpc_latencies := Latencies} = State) ->
|
||||
MaxLatencies = 1000,
|
||||
NewLatencies = case length(Latencies) >= MaxLatencies of
|
||||
true -> [LatencyMs | lists:sublist(Latencies, MaxLatencies - 1)];
|
||||
false -> [LatencyMs | Latencies]
|
||||
end,
|
||||
{noreply, State#{rpc_latencies := NewLatencies}};
|
||||
handle_cast(_Msg, State) ->
|
||||
{noreply, State}.
|
||||
|
||||
-spec handle_info(term(), state()) -> {noreply, state()}.
|
||||
handle_info(collect_and_report, #{report_interval_ms := Interval} = State) ->
|
||||
collect_and_report_metrics(State),
|
||||
TimerRef = schedule_collection(Interval),
|
||||
ResetState = State#{
|
||||
timer_ref := TimerRef,
|
||||
connections := 0,
|
||||
disconnections := 0,
|
||||
heartbeat_success := 0,
|
||||
heartbeat_failure := 0,
|
||||
resume_success := 0,
|
||||
resume_failure := 0,
|
||||
identify_rate_limited := 0,
|
||||
rpc_latencies := []
|
||||
},
|
||||
{noreply, ResetState};
|
||||
handle_info(_Info, State) ->
|
||||
{noreply, State}.
|
||||
|
||||
-spec terminate(term(), state()) -> ok.
|
||||
terminate(_Reason, #{timer_ref := TimerRef}) ->
|
||||
case TimerRef of
|
||||
undefined -> ok;
|
||||
Ref -> erlang:cancel_timer(Ref)
|
||||
end,
|
||||
ok.
|
||||
|
||||
-spec code_change(term(), state() | tuple(), term()) -> {ok, state()}.
|
||||
code_change(_OldVsn, {state, ReportIntervalMs, TimerRef, Connections, Disconnections,
|
||||
HeartbeatSuccess, HeartbeatFailure, ResumeSuccess, ResumeFailure,
|
||||
IdentifyRateLimited, RpcLatencies}, _Extra) ->
|
||||
{ok, #{
|
||||
report_interval_ms => ReportIntervalMs,
|
||||
timer_ref => TimerRef,
|
||||
connections => Connections,
|
||||
disconnections => Disconnections,
|
||||
heartbeat_success => HeartbeatSuccess,
|
||||
heartbeat_failure => HeartbeatFailure,
|
||||
resume_success => ResumeSuccess,
|
||||
resume_failure => ResumeFailure,
|
||||
identify_rate_limited => IdentifyRateLimited,
|
||||
rpc_latencies => RpcLatencies
|
||||
}};
|
||||
code_change(_OldVsn, State, _Extra) ->
|
||||
{ok, State}.
|
||||
|
||||
-spec schedule_collection(pos_integer()) -> reference().
|
||||
schedule_collection(IntervalMs) ->
|
||||
erlang:send_after(IntervalMs, self(), collect_and_report).
|
||||
|
||||
-spec get_enabled() -> boolean().
|
||||
get_enabled() ->
|
||||
case fluxer_gateway_env:get(gateway_metrics_enabled) of
|
||||
false -> false;
|
||||
_ -> metrics_client:is_enabled()
|
||||
end.
|
||||
|
||||
-spec get_report_interval() -> pos_integer().
|
||||
get_report_interval() ->
|
||||
case fluxer_gateway_env:get(gateway_metrics_report_interval_ms) of
|
||||
Value when is_integer(Value), Value > 0 -> Value;
|
||||
_ -> ?DEFAULT_REPORT_INTERVAL_MS
|
||||
end.
|
||||
|
||||
-spec collect_and_report_metrics(state()) -> ok.
|
||||
collect_and_report_metrics(State) ->
|
||||
Gauges = lists:flatten([
|
||||
collect_process_counts(),
|
||||
collect_mailbox_sizes(),
|
||||
collect_memory_stats(),
|
||||
collect_system_stats(),
|
||||
collect_event_metrics(State)
|
||||
]),
|
||||
case Gauges of
|
||||
[] -> ok;
|
||||
_ -> metrics_client:batch(Gauges)
|
||||
end.
|
||||
|
||||
-spec collect_event_metrics(state()) -> [map()].
|
||||
collect_event_metrics(State) ->
|
||||
#{
|
||||
rpc_latencies := RpcLatencies,
|
||||
connections := Connections,
|
||||
disconnections := Disconnections,
|
||||
heartbeat_success := HeartbeatSuccess,
|
||||
heartbeat_failure := HeartbeatFailure,
|
||||
resume_success := ResumeSuccess,
|
||||
resume_failure := ResumeFailure,
|
||||
identify_rate_limited := IdentifyRateLimited
|
||||
} = State,
|
||||
RpcLatencyStats = calculate_latency_stats(RpcLatencies),
|
||||
lists:flatten([
|
||||
[gauge(<<"gateway.websocket.connections">>, Connections)],
|
||||
[gauge(<<"gateway.websocket.disconnections">>, Disconnections)],
|
||||
[gauge(<<"gateway.heartbeat.success">>, HeartbeatSuccess)],
|
||||
[gauge(<<"gateway.heartbeat.failure">>, HeartbeatFailure)],
|
||||
[gauge(<<"gateway.resume.success">>, ResumeSuccess)],
|
||||
[gauge(<<"gateway.resume.failure">>, ResumeFailure)],
|
||||
[gauge(<<"gateway.identify.rate_limited">>, IdentifyRateLimited)],
|
||||
RpcLatencyStats
|
||||
]).
|
||||
|
||||
-spec calculate_latency_stats([non_neg_integer()]) -> [map()].
|
||||
calculate_latency_stats([]) ->
|
||||
[];
|
||||
calculate_latency_stats(Latencies) ->
|
||||
Sorted = lists:sort(Latencies),
|
||||
Count = length(Sorted),
|
||||
Sum = lists:sum(Sorted),
|
||||
Avg = Sum / Count,
|
||||
Min = hd(Sorted),
|
||||
Max = lists:last(Sorted),
|
||||
P50 = percentile(Sorted, 50),
|
||||
P95 = percentile(Sorted, 95),
|
||||
P99 = percentile(Sorted, 99),
|
||||
[
|
||||
gauge(<<"gateway.rpc.latency.avg">>, Avg),
|
||||
gauge(<<"gateway.rpc.latency.min">>, Min),
|
||||
gauge(<<"gateway.rpc.latency.max">>, Max),
|
||||
gauge(<<"gateway.rpc.latency.p50">>, P50),
|
||||
gauge(<<"gateway.rpc.latency.p95">>, P95),
|
||||
gauge(<<"gateway.rpc.latency.p99">>, P99),
|
||||
gauge(<<"gateway.rpc.latency.count">>, Count)
|
||||
].
|
||||
|
||||
-spec percentile([number()], number()) -> number().
|
||||
percentile(SortedList, Percent) ->
|
||||
Len = length(SortedList),
|
||||
Index = max(1, min(Len, round(Len * Percent / 100))),
|
||||
lists:nth(Index, SortedList).
|
||||
|
||||
-spec collect_process_counts() -> [map()].
|
||||
collect_process_counts() ->
|
||||
SessionCount = get_manager_count(session_manager),
|
||||
GuildCount = get_manager_count(guild_manager),
|
||||
PresenceCount = get_manager_count(presence_manager),
|
||||
CallCount = get_manager_count(call_manager),
|
||||
[
|
||||
gauge(<<"gateway.sessions.count">>, SessionCount),
|
||||
gauge(<<"gateway.guilds.count">>, GuildCount),
|
||||
gauge(<<"gateway.presences.count">>, PresenceCount),
|
||||
gauge(<<"gateway.calls.count">>, CallCount)
|
||||
].
|
||||
|
||||
-spec get_manager_count(atom()) -> non_neg_integer().
|
||||
get_manager_count(Manager) ->
|
||||
case catch gen_server:call(Manager, get_global_count, 1000) of
|
||||
{ok, Count} when is_integer(Count) -> Count;
|
||||
Count when is_integer(Count) -> Count;
|
||||
_ -> 0
|
||||
end.
|
||||
|
||||
-spec collect_mailbox_sizes() -> [map()].
|
||||
collect_mailbox_sizes() ->
|
||||
Managers = [
|
||||
{session_manager, <<"gateway.mailbox.session_manager">>},
|
||||
{guild_manager, <<"gateway.mailbox.guild_manager">>},
|
||||
{presence_manager, <<"gateway.mailbox.presence_manager">>},
|
||||
{call_manager, <<"gateway.mailbox.call_manager">>},
|
||||
{push, <<"gateway.mailbox.push">>},
|
||||
{presence_cache, <<"gateway.mailbox.presence_cache">>},
|
||||
{presence_bus, <<"gateway.mailbox.presence_bus">>}
|
||||
],
|
||||
MailboxMetrics = lists:filtermap(fun({Manager, MetricName}) ->
|
||||
case get_mailbox_size(Manager) of
|
||||
undefined -> false;
|
||||
Size -> {true, gauge(MetricName, Size)}
|
||||
end
|
||||
end, Managers),
|
||||
TotalMailbox = lists:foldl(fun({Manager, _}, Acc) ->
|
||||
case get_mailbox_size(Manager) of
|
||||
undefined -> Acc;
|
||||
Size -> Acc + Size
|
||||
end
|
||||
end, 0, Managers),
|
||||
[gauge(<<"gateway.mailbox.total">>, TotalMailbox) | MailboxMetrics].
|
||||
|
||||
-spec get_mailbox_size(atom()) -> non_neg_integer() | undefined.
|
||||
get_mailbox_size(Manager) ->
|
||||
case whereis(Manager) of
|
||||
undefined -> undefined;
|
||||
Pid ->
|
||||
case erlang:process_info(Pid, message_queue_len) of
|
||||
{message_queue_len, Size} -> Size;
|
||||
undefined -> undefined
|
||||
end
|
||||
end.
|
||||
|
||||
-spec collect_memory_stats() -> [map()].
|
||||
collect_memory_stats() ->
|
||||
PresenceCacheMemory = get_presence_cache_memory(),
|
||||
PushMemory = get_push_process_memory(),
|
||||
GuildMemoryStats = collect_guild_memory_stats(),
|
||||
lists:flatten([
|
||||
[gauge(<<"gateway.memory.presence_cache">>, PresenceCacheMemory)],
|
||||
[gauge(<<"gateway.memory.push">>, PushMemory)],
|
||||
GuildMemoryStats
|
||||
]).
|
||||
|
||||
-spec collect_guild_memory_stats() -> [map()].
|
||||
collect_guild_memory_stats() ->
|
||||
case catch process_memory_stats:get_guild_memory_stats(10000) of
|
||||
GuildStats when is_list(GuildStats), length(GuildStats) > 0 ->
|
||||
Memories = [maps:get(memory, G, 0) || G <- GuildStats],
|
||||
TotalMemory = lists:sum(Memories),
|
||||
GuildCount = length(Memories),
|
||||
AvgMemory = TotalMemory / GuildCount,
|
||||
MaxMemory = lists:max(Memories),
|
||||
MinMemory = lists:min(Memories),
|
||||
[
|
||||
gauge(<<"gateway.memory.guilds.total">>, TotalMemory),
|
||||
gauge(<<"gateway.memory.guilds.count">>, GuildCount),
|
||||
gauge(<<"gateway.memory.guilds.avg">>, AvgMemory),
|
||||
gauge(<<"gateway.memory.guilds.max">>, MaxMemory),
|
||||
gauge(<<"gateway.memory.guilds.min">>, MinMemory)
|
||||
];
|
||||
_ ->
|
||||
[]
|
||||
end.
|
||||
|
||||
-spec get_presence_cache_memory() -> non_neg_integer().
|
||||
get_presence_cache_memory() ->
|
||||
case catch presence_cache:get_memory_stats() of
|
||||
{ok, #{memory_bytes := Bytes}} -> Bytes;
|
||||
_ -> 0
|
||||
end.
|
||||
|
||||
-spec get_push_process_memory() -> non_neg_integer().
|
||||
get_push_process_memory() ->
|
||||
case whereis(push) of
|
||||
undefined -> 0;
|
||||
Pid ->
|
||||
case erlang:process_info(Pid, memory) of
|
||||
{memory, Bytes} -> Bytes;
|
||||
undefined -> 0
|
||||
end
|
||||
end.
|
||||
|
||||
-spec collect_system_stats() -> [map()].
|
||||
collect_system_stats() ->
|
||||
{TotalMemory, ProcessMemory, SystemMemory} = get_memory_info(),
|
||||
ProcessCount = erlang:system_info(process_count),
|
||||
[
|
||||
gauge(<<"gateway.memory.total">>, TotalMemory),
|
||||
gauge(<<"gateway.memory.processes">>, ProcessMemory),
|
||||
gauge(<<"gateway.memory.system">>, SystemMemory),
|
||||
gauge(<<"gateway.process_count">>, ProcessCount)
|
||||
].
|
||||
|
||||
-spec get_memory_info() -> {non_neg_integer(), non_neg_integer(), non_neg_integer()}.
|
||||
get_memory_info() ->
|
||||
MemData = erlang:memory(),
|
||||
Total = proplists:get_value(total, MemData, 0),
|
||||
Processes = proplists:get_value(processes, MemData, 0),
|
||||
System = proplists:get_value(system, MemData, 0),
|
||||
{Total, Processes, System}.
|
||||
|
||||
-spec gauge(binary(), number()) -> map().
|
||||
gauge(Name, Value) ->
|
||||
#{
|
||||
type => gauge,
|
||||
name => Name,
|
||||
dimensions => #{},
|
||||
value => Value
|
||||
}.
|
||||
|
||||
-spec inc_connections() -> ok.
|
||||
inc_connections() ->
|
||||
gen_server:cast(?MODULE, inc_connections).
|
||||
|
||||
-spec inc_disconnections() -> ok.
|
||||
inc_disconnections() ->
|
||||
gen_server:cast(?MODULE, inc_disconnections).
|
||||
|
||||
-spec inc_heartbeat_success() -> ok.
|
||||
inc_heartbeat_success() ->
|
||||
gen_server:cast(?MODULE, inc_heartbeat_success).
|
||||
|
||||
-spec inc_heartbeat_failure() -> ok.
|
||||
inc_heartbeat_failure() ->
|
||||
gen_server:cast(?MODULE, inc_heartbeat_failure).
|
||||
|
||||
-spec inc_resume_success() -> ok.
|
||||
inc_resume_success() ->
|
||||
gen_server:cast(?MODULE, inc_resume_success).
|
||||
|
||||
-spec inc_resume_failure() -> ok.
|
||||
inc_resume_failure() ->
|
||||
gen_server:cast(?MODULE, inc_resume_failure).
|
||||
|
||||
-spec inc_identify_rate_limited() -> ok.
|
||||
inc_identify_rate_limited() ->
|
||||
gen_server:cast(?MODULE, inc_identify_rate_limited).
|
||||
|
||||
-spec record_rpc_latency(non_neg_integer()) -> ok.
|
||||
record_rpc_latency(LatencyMs) ->
|
||||
gen_server:cast(?MODULE, {record_rpc_latency, LatencyMs}).
|
||||
|
||||
-spec inc_websocket_close(atom()) -> ok.
|
||||
inc_websocket_close(Reason) ->
|
||||
ReasonBin = atom_to_binary(Reason, utf8),
|
||||
metrics_client:counter(<<"gateway.websocket.close">>, #{<<"reason">> => ReasonBin}).
|
||||
165
fluxer_gateway/src/telemetry/metrics_client.erl
Normal file
165
fluxer_gateway/src/telemetry/metrics_client.erl
Normal file
@@ -0,0 +1,165 @@
|
||||
%% Copyright (C) 2026 Fluxer Contributors
|
||||
%%
|
||||
%% This file is part of Fluxer.
|
||||
%%
|
||||
%% Fluxer is free software: you can redistribute it and/or modify
|
||||
%% it under the terms of the GNU Affero General Public License as published by
|
||||
%% the Free Software Foundation, either version 3 of the License, or
|
||||
%% (at your option) any later version.
|
||||
%%
|
||||
%% Fluxer is distributed in the hope that it will be useful,
|
||||
%% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
%% GNU Affero General Public License for more details.
|
||||
%%
|
||||
%% You should have received a copy of the GNU Affero General Public License
|
||||
%% along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
-module(metrics_client).
|
||||
|
||||
-export([
|
||||
counter/1,
|
||||
counter/2,
|
||||
gauge/2,
|
||||
gauge/3,
|
||||
histogram/2,
|
||||
histogram/3,
|
||||
crash/2,
|
||||
batch/1,
|
||||
is_enabled/0
|
||||
]).
|
||||
|
||||
-spec counter(binary()) -> ok.
|
||||
counter(Name) ->
|
||||
counter(Name, #{}).
|
||||
|
||||
-spec counter(binary(), map()) -> ok.
|
||||
counter(Name, Dimensions) ->
|
||||
fire_and_forget(<<"/metrics/counter">>, #{
|
||||
<<"name">> => Name,
|
||||
<<"dimensions">> => Dimensions,
|
||||
<<"value">> => 1
|
||||
}).
|
||||
|
||||
-spec gauge(binary(), number()) -> ok.
|
||||
gauge(Name, Value) ->
|
||||
gauge(Name, #{}, Value).
|
||||
|
||||
-spec gauge(binary(), map(), number()) -> ok.
|
||||
gauge(Name, Dimensions, Value) ->
|
||||
fire_and_forget(<<"/metrics/gauge">>, #{
|
||||
<<"name">> => Name,
|
||||
<<"dimensions">> => Dimensions,
|
||||
<<"value">> => Value
|
||||
}).
|
||||
|
||||
-spec histogram(binary(), number()) -> ok.
|
||||
histogram(Name, ValueMs) ->
|
||||
histogram(Name, #{}, ValueMs).
|
||||
|
||||
-spec histogram(binary(), map(), number()) -> ok.
|
||||
histogram(Name, Dimensions, ValueMs) ->
|
||||
fire_and_forget(<<"/metrics/histogram">>, #{
|
||||
<<"name">> => Name,
|
||||
<<"dimensions">> => Dimensions,
|
||||
<<"value_ms">> => ValueMs
|
||||
}).
|
||||
|
||||
-spec crash(binary(), binary()) -> ok.
|
||||
crash(GuildId, Stacktrace) ->
|
||||
fire_and_forget(<<"/metrics/crash">>, #{
|
||||
<<"guild_id">> => GuildId,
|
||||
<<"stacktrace">> => Stacktrace
|
||||
}).
|
||||
|
||||
-spec is_enabled() -> boolean().
|
||||
is_enabled() ->
|
||||
case metrics_host() of
|
||||
Host when is_list(Host), Host =/= "" -> true;
|
||||
Host when is_binary(Host), byte_size(Host) > 0 -> true;
|
||||
_ -> false
|
||||
end.
|
||||
|
||||
fire_and_forget(Path, Body) ->
|
||||
case metrics_host() of
|
||||
Host when is_list(Host), Host =/= "" ->
|
||||
spawn(fun() -> do_send(Host, Path, Body) end),
|
||||
ok;
|
||||
Host when is_binary(Host), byte_size(Host) > 0 ->
|
||||
spawn(fun() -> do_send(Host, Path, Body) end),
|
||||
ok;
|
||||
_ ->
|
||||
ok
|
||||
end.
|
||||
|
||||
do_send(Host, Path, Body) ->
|
||||
do_send(Host, Path, Body, 0).
|
||||
|
||||
do_send(Host, Path, Body, Attempt) ->
|
||||
Url = iolist_to_binary(["http://", Host, Path]),
|
||||
Headers = [{<<"Content-Type">>, <<"application/json">>}],
|
||||
JsonBody = jsx:encode(Body),
|
||||
MaxRetries = 1,
|
||||
|
||||
case
|
||||
hackney:request(post, Url, Headers, JsonBody, [
|
||||
{recv_timeout, 5000}, {connect_timeout, 2000}
|
||||
])
|
||||
of
|
||||
{ok, StatusCode, _RespHeaders, ClientRef} when StatusCode >= 200, StatusCode < 300 ->
|
||||
hackney:skip_body(ClientRef),
|
||||
ok;
|
||||
{ok, StatusCode, _RespHeaders, ClientRef} ->
|
||||
hackney:skip_body(ClientRef),
|
||||
case Attempt < MaxRetries of
|
||||
true ->
|
||||
do_send(Host, Path, Body, Attempt + 1);
|
||||
false ->
|
||||
logger:warning("Failed to send metric after ~p attempts: ~p ~s", [Attempt + 1, StatusCode, Path]),
|
||||
ok
|
||||
end;
|
||||
{error, Reason} ->
|
||||
case Attempt < MaxRetries of
|
||||
true ->
|
||||
do_send(Host, Path, Body, Attempt + 1);
|
||||
false ->
|
||||
logger:warning("Failed to send metric after ~p attempts: ~p ~s", [Attempt + 1, Reason, Path]),
|
||||
ok
|
||||
end
|
||||
end.
|
||||
|
||||
-spec batch([map()]) -> ok.
|
||||
batch(Metrics) when is_list(Metrics) ->
|
||||
case metrics_host() of
|
||||
Host when is_list(Host), Host =/= "" ->
|
||||
spawn(fun() -> do_batch(Host, Metrics) end),
|
||||
ok;
|
||||
Host when is_binary(Host), byte_size(Host) > 0 ->
|
||||
spawn(fun() -> do_batch(Host, Metrics) end),
|
||||
ok;
|
||||
_ ->
|
||||
ok
|
||||
end.
|
||||
|
||||
do_batch(Host, Metrics) ->
|
||||
Gauges = [format_gauge(M) || M <- Metrics, maps:get(type, M, undefined) =:= gauge],
|
||||
Counters = [format_counter(M) || M <- Metrics, maps:get(type, M, undefined) =:= counter],
|
||||
Histograms = [format_histogram(M) || M <- Metrics, maps:get(type, M, undefined) =:= histogram],
|
||||
Body = #{
|
||||
<<"gauges">> => Gauges,
|
||||
<<"counters">> => Counters,
|
||||
<<"histograms">> => Histograms
|
||||
},
|
||||
do_send(Host, <<"/metrics/batch">>, Body).
|
||||
|
||||
format_gauge(#{name := Name, dimensions := Dims, value := Value}) ->
|
||||
#{<<"name">> => Name, <<"dimensions">> => Dims, <<"value">> => Value}.
|
||||
|
||||
format_counter(#{name := Name, dimensions := Dims, value := Value}) ->
|
||||
#{<<"name">> => Name, <<"dimensions">> => Dims, <<"value">> => Value}.
|
||||
|
||||
format_histogram(#{name := Name, dimensions := Dims, value := Value}) ->
|
||||
#{<<"name">> => Name, <<"dimensions">> => Dims, <<"value_ms">> => Value}.
|
||||
|
||||
metrics_host() ->
|
||||
fluxer_gateway_env:get(metrics_host).
|
||||
97
fluxer_gateway/src/telemetry/process_memory_stats.erl
Normal file
97
fluxer_gateway/src/telemetry/process_memory_stats.erl
Normal file
@@ -0,0 +1,97 @@
|
||||
%% Copyright (C) 2026 Fluxer Contributors
|
||||
%%
|
||||
%% This file is part of Fluxer.
|
||||
%%
|
||||
%% Fluxer is free software: you can redistribute it and/or modify
|
||||
%% it under the terms of the GNU Affero General Public License as published by
|
||||
%% the Free Software Foundation, either version 3 of the License, or
|
||||
%% (at your option) any later version.
|
||||
%%
|
||||
%% Fluxer is distributed in the hope that it will be useful,
|
||||
%% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
%% GNU Affero General Public License for more details.
|
||||
%%
|
||||
%% You should have received a copy of the GNU Affero General Public License
|
||||
%% along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
-module(process_memory_stats).
|
||||
|
||||
-export([get_guild_memory_stats/1]).
|
||||
|
||||
get_guild_memory_stats(Limit) ->
|
||||
AllProcesses = erlang:processes(),
|
||||
|
||||
GuildProcessInfos = lists:filtermap(
|
||||
fun(Pid) ->
|
||||
case get_guild_process_info(Pid) of
|
||||
undefined -> false;
|
||||
Info -> {true, Info}
|
||||
end
|
||||
end,
|
||||
AllProcesses
|
||||
),
|
||||
|
||||
Sorted = lists:sort(
|
||||
fun(#{memory := M1}, #{memory := M2}) -> M1 >= M2 end,
|
||||
GuildProcessInfos
|
||||
),
|
||||
|
||||
lists:sublist(Sorted, Limit).
|
||||
|
||||
get_guild_process_info(Pid) ->
|
||||
case erlang:process_info(Pid, [registered_name, memory, initial_call, dictionary]) of
|
||||
undefined ->
|
||||
undefined;
|
||||
InfoList ->
|
||||
Memory = proplists:get_value(memory, InfoList, 0),
|
||||
InitialCall = proplists:get_value(initial_call, InfoList),
|
||||
Dictionary = proplists:get_value(dictionary, InfoList, []),
|
||||
|
||||
Module =
|
||||
case lists:keyfind('$initial_call', 1, Dictionary) of
|
||||
{'$initial_call', {M, _, _}} ->
|
||||
M;
|
||||
_ ->
|
||||
case InitialCall of
|
||||
{M, _, _} -> M;
|
||||
_ -> undefined
|
||||
end
|
||||
end,
|
||||
|
||||
case Module of
|
||||
guild ->
|
||||
case catch sys:get_state(Pid, 100) of
|
||||
State when is_map(State) ->
|
||||
GuildId = maps:get(id, State, undefined),
|
||||
Data = maps:get(data, State, #{}),
|
||||
Guild = maps:get(<<"guild">>, Data, #{}),
|
||||
GuildName = maps:get(<<"name">>, Guild, <<"Unknown">>),
|
||||
GuildIcon = maps:get(<<"icon">>, Guild, null),
|
||||
|
||||
Members = maps:get(<<"members">>, Data, []),
|
||||
MemberCount = length(Members),
|
||||
|
||||
SessionCount = map_size(maps:get(sessions, State, #{})),
|
||||
PresenceCount = map_size(maps:get(presences, State, #{})),
|
||||
|
||||
#{
|
||||
guild_id =>
|
||||
case GuildId of
|
||||
undefined -> null;
|
||||
Id -> integer_to_binary(Id)
|
||||
end,
|
||||
guild_name => GuildName,
|
||||
guild_icon => GuildIcon,
|
||||
memory => Memory,
|
||||
member_count => MemberCount,
|
||||
session_count => SessionCount,
|
||||
presence_count => PresenceCount
|
||||
};
|
||||
_ ->
|
||||
undefined
|
||||
end;
|
||||
_ ->
|
||||
undefined
|
||||
end
|
||||
end.
|
||||
664
fluxer_gateway/src/telemetry/process_registry.erl
Normal file
664
fluxer_gateway/src/telemetry/process_registry.erl
Normal file
@@ -0,0 +1,664 @@
|
||||
%% Copyright (C) 2026 Fluxer Contributors
|
||||
%%
|
||||
%% This file is part of Fluxer.
|
||||
%%
|
||||
%% Fluxer is free software: you can redistribute it and/or modify
|
||||
%% it under the terms of the GNU Affero General Public License as published by
|
||||
%% the Free Software Foundation, either version 3 of the License, or
|
||||
%% (at your option) any later version.
|
||||
%%
|
||||
%% Fluxer is distributed in the hope that it will be useful,
|
||||
%% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
%% GNU Affero General Public License for more details.
|
||||
%%
|
||||
%% You should have received a copy of the GNU Affero General Public License
|
||||
%% along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
-module(process_registry).
|
||||
|
||||
-export([
|
||||
build_process_name/2,
|
||||
register_and_monitor/3,
|
||||
lookup_or_monitor/3,
|
||||
safe_unregister/1,
|
||||
cleanup_on_down/2,
|
||||
get_count/1
|
||||
]).
|
||||
|
||||
-type process_id() :: integer() | binary() | string().
|
||||
-type process_prefix() :: atom() | string().
|
||||
-type process_map() :: #{term() => {pid(), reference()} | loading}.
|
||||
-type register_result() :: {ok, pid(), reference(), process_map()} | {error, term()}.
|
||||
-type lookup_result() :: {ok, pid(), reference(), process_map()} | {error, not_found}.
|
||||
|
||||
-export_type([process_id/0, process_prefix/0, process_map/0]).
|
||||
|
||||
-spec build_process_name(process_prefix(), process_id()) -> atom().
|
||||
build_process_name(Prefix, Id) when is_atom(Prefix), is_integer(Id) ->
|
||||
list_to_atom(atom_to_list(Prefix) ++ "_" ++ integer_to_list(Id));
|
||||
build_process_name(Prefix, Id) when is_atom(Prefix), is_binary(Id) ->
|
||||
list_to_atom(atom_to_list(Prefix) ++ "_" ++ binary_to_list(Id));
|
||||
build_process_name(Prefix, Id) when is_atom(Prefix), is_list(Id) ->
|
||||
list_to_atom(atom_to_list(Prefix) ++ "_" ++ Id);
|
||||
build_process_name(Prefix, Id) when is_list(Prefix), is_integer(Id) ->
|
||||
list_to_atom(Prefix ++ "_" ++ integer_to_list(Id));
|
||||
build_process_name(Prefix, Id) when is_list(Prefix), is_binary(Id) ->
|
||||
list_to_atom(Prefix ++ "_" ++ binary_to_list(Id));
|
||||
build_process_name(Prefix, Id) when is_list(Prefix), is_list(Id) ->
|
||||
list_to_atom(Prefix ++ "_" ++ Id).
|
||||
|
||||
-spec register_and_monitor(atom(), pid(), process_map()) -> register_result().
|
||||
register_and_monitor(Name, Pid, ProcessMap) ->
|
||||
try
|
||||
register(Name, Pid),
|
||||
Ref = monitor(process, Pid),
|
||||
NewMap = maps:put(Name, {Pid, Ref}, ProcessMap),
|
||||
{ok, Pid, Ref, NewMap}
|
||||
catch
|
||||
error:badarg ->
|
||||
catch gen_server:stop(Pid, normal, 5000),
|
||||
case whereis(Name) of
|
||||
undefined ->
|
||||
{error, registration_race_condition};
|
||||
ExistingPid ->
|
||||
ExistingRef = monitor(process, ExistingPid),
|
||||
ExistingMap = maps:put(Name, {ExistingPid, ExistingRef}, ProcessMap),
|
||||
{ok, ExistingPid, ExistingRef, ExistingMap}
|
||||
end;
|
||||
Error:Reason ->
|
||||
{error, {Error, Reason}}
|
||||
end.
|
||||
|
||||
-spec lookup_or_monitor(atom(), term(), process_map()) -> lookup_result().
|
||||
lookup_or_monitor(Name, Key, ProcessMap) ->
|
||||
case whereis(Name) of
|
||||
undefined ->
|
||||
{error, not_found};
|
||||
Pid ->
|
||||
Ref = monitor(process, Pid),
|
||||
NewMap = maps:put(Key, {Pid, Ref}, ProcessMap),
|
||||
{ok, Pid, Ref, NewMap}
|
||||
end.
|
||||
|
||||
-spec safe_unregister(atom()) -> ok.
|
||||
safe_unregister(Name) ->
|
||||
try
|
||||
unregister(Name),
|
||||
ok
|
||||
catch
|
||||
error:badarg ->
|
||||
ok;
|
||||
_:_ ->
|
||||
ok
|
||||
end.
|
||||
|
||||
-spec cleanup_on_down(pid(), process_map()) -> process_map().
|
||||
cleanup_on_down(DeadPid, ProcessMap) ->
|
||||
maps:filter(
|
||||
fun
|
||||
(_Key, loading) ->
|
||||
true;
|
||||
(_Key, {Pid, _Ref}) ->
|
||||
Pid =/= DeadPid
|
||||
end,
|
||||
ProcessMap
|
||||
).
|
||||
|
||||
-spec get_count(process_map()) -> non_neg_integer().
|
||||
get_count(ProcessMap) ->
|
||||
maps:size(
|
||||
maps:filter(
|
||||
fun
|
||||
(_Key, loading) -> false;
|
||||
(_Key, {_Pid, _Ref}) -> true
|
||||
end,
|
||||
ProcessMap
|
||||
)
|
||||
).
|
||||
|
||||
-ifdef(TEST).
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
build_process_name_integer_atom_test() ->
|
||||
?assertEqual('guild_123456', build_process_name(guild, 123456)),
|
||||
?assertEqual('channel_0', build_process_name(channel, 0)),
|
||||
?assertEqual('voice_999', build_process_name(voice, 999)).
|
||||
|
||||
build_process_name_integer_string_test() ->
|
||||
?assertEqual('channel_999', build_process_name("channel", 999)),
|
||||
?assertEqual('guild_12345', build_process_name("guild", 12345)),
|
||||
?assertEqual('voice_0', build_process_name("voice", 0)).
|
||||
|
||||
build_process_name_binary_atom_test() ->
|
||||
?assertEqual('guild_123456', build_process_name(guild, <<"123456">>)),
|
||||
?assertEqual('voice_789', build_process_name(voice, <<"789">>)),
|
||||
?assertEqual('channel_abc', build_process_name(channel, <<"abc">>)).
|
||||
|
||||
build_process_name_binary_string_test() ->
|
||||
?assertEqual('voice_789', build_process_name("voice", <<"789">>)),
|
||||
?assertEqual('guild_test', build_process_name("guild", <<"test">>)),
|
||||
?assertEqual('channel_123', build_process_name("channel", <<"123">>)).
|
||||
|
||||
build_process_name_string_atom_test() ->
|
||||
?assertEqual('guild_123456', build_process_name(guild, "123456")),
|
||||
?assertEqual('channel_abc', build_process_name(channel, "abc")),
|
||||
?assertEqual('voice_xyz', build_process_name(voice, "xyz")).
|
||||
|
||||
build_process_name_string_string_test() ->
|
||||
?assertEqual('channel_abc', build_process_name("channel", "abc")),
|
||||
?assertEqual('guild_test', build_process_name("guild", "test")),
|
||||
?assertEqual('voice_123', build_process_name("voice", "123")).
|
||||
|
||||
build_process_name_special_chars_test() ->
|
||||
?assertEqual('guild_123_456', build_process_name(guild, "123_456")),
|
||||
?assertEqual('channel_test-channel', build_process_name(channel, "test-channel")).
|
||||
|
||||
register_and_monitor_success_test() ->
|
||||
Name = test_process_reg_success,
|
||||
ProcessMap = #{},
|
||||
|
||||
Pid = spawn(fun() -> timer:sleep(100) end),
|
||||
|
||||
Result = register_and_monitor(Name, Pid, ProcessMap),
|
||||
|
||||
?assertMatch({ok, Pid, _Ref, _NewMap}, Result),
|
||||
{ok, ReturnedPid, Ref, NewMap} = Result,
|
||||
|
||||
?assertEqual(Pid, ReturnedPid),
|
||||
?assertEqual(Pid, whereis(Name)),
|
||||
|
||||
?assertEqual(1, maps:size(NewMap)),
|
||||
?assertEqual({Pid, Ref}, maps:get(Name, NewMap)),
|
||||
|
||||
?assert(is_reference(Ref)),
|
||||
|
||||
unregister(Name).
|
||||
|
||||
register_and_monitor_existing_map_test() ->
|
||||
Name = test_process_reg_existing,
|
||||
ExistingPid = list_to_pid("<0.100.0>"),
|
||||
ExistingRef = make_ref(),
|
||||
ProcessMap = #{other_process => {ExistingPid, ExistingRef}},
|
||||
|
||||
Pid = spawn(fun() -> timer:sleep(100) end),
|
||||
|
||||
{ok, _ReturnedPid, _Ref, NewMap} = register_and_monitor(Name, Pid, ProcessMap),
|
||||
|
||||
?assertEqual(2, maps:size(NewMap)),
|
||||
?assert(maps:is_key(other_process, NewMap)),
|
||||
?assert(maps:is_key(Name, NewMap)),
|
||||
|
||||
unregister(Name).
|
||||
|
||||
register_and_monitor_race_condition_test() ->
|
||||
Name = test_process_race,
|
||||
ProcessMap = #{},
|
||||
|
||||
WinnerPid = spawn(fun() -> timer:sleep(200) end),
|
||||
register(Name, WinnerPid),
|
||||
|
||||
LoserPid = spawn(fun() -> timer:sleep(100) end),
|
||||
|
||||
Result = register_and_monitor(Name, LoserPid, ProcessMap),
|
||||
|
||||
?assertMatch({ok, WinnerPid, _Ref, _NewMap}, Result),
|
||||
{ok, ReturnedPid, Ref, NewMap} = Result,
|
||||
|
||||
?assertEqual(WinnerPid, ReturnedPid),
|
||||
?assertEqual(WinnerPid, whereis(Name)),
|
||||
|
||||
timer:sleep(50),
|
||||
?assertEqual(false, is_process_alive(LoserPid)),
|
||||
|
||||
?assertEqual({WinnerPid, Ref}, maps:get(Name, NewMap)),
|
||||
|
||||
unregister(Name).
|
||||
|
||||
register_and_monitor_race_dead_test() ->
|
||||
Name = test_process_race_dead,
|
||||
ProcessMap = #{},
|
||||
|
||||
DeadPid = spawn(fun() -> ok end),
|
||||
timer:sleep(10),
|
||||
?assertEqual(false, is_process_alive(DeadPid)),
|
||||
|
||||
NewPid = spawn(fun() -> timer:sleep(100) end),
|
||||
|
||||
Result = register_and_monitor(Name, NewPid, ProcessMap),
|
||||
?assertMatch({ok, NewPid, _Ref, _NewMap}, Result),
|
||||
|
||||
catch unregister(Name).
|
||||
|
||||
register_and_monitor_dead_process_test() ->
|
||||
Name = test_process_dead,
|
||||
ProcessMap = #{},
|
||||
|
||||
DeadPid = spawn(fun() -> exit(normal) end),
|
||||
timer:sleep(10),
|
||||
?assertEqual(false, is_process_alive(DeadPid)),
|
||||
|
||||
Result = register_and_monitor(Name, DeadPid, ProcessMap),
|
||||
|
||||
case Result of
|
||||
{ok, DeadPid, _Ref, _NewMap} ->
|
||||
?assertEqual(DeadPid, whereis(Name)),
|
||||
catch unregister(Name);
|
||||
{error, _} ->
|
||||
ok
|
||||
end.
|
||||
|
||||
register_and_monitor_concurrent_test_() ->
|
||||
{timeout, 10, fun() ->
|
||||
Name = test_process_concurrent,
|
||||
|
||||
Parent = self(),
|
||||
Pids = [
|
||||
spawn(fun() ->
|
||||
Pid = spawn(fun() -> timer:sleep(200) end),
|
||||
Result = register_and_monitor(Name, Pid, #{}),
|
||||
Parent ! {self(), Result}
|
||||
end)
|
||||
|| _ <- lists:seq(1, 5)
|
||||
],
|
||||
|
||||
Results = [
|
||||
receive
|
||||
{P, R} -> R
|
||||
after 2000 -> timeout
|
||||
end
|
||||
|| P <- Pids
|
||||
],
|
||||
|
||||
?assertEqual(5, length(Results)),
|
||||
|
||||
SuccessResults = [R || R <- Results, element(1, R) =:= ok],
|
||||
RaceErrors = [R || R <- Results, R =:= {error, registration_race_condition}],
|
||||
Timeouts = [R || R <- Results, R =:= timeout],
|
||||
|
||||
?assertEqual(0, length(Timeouts)),
|
||||
|
||||
?assert(length(SuccessResults) >= 1),
|
||||
|
||||
?assertEqual(5, length(SuccessResults) + length(RaceErrors)),
|
||||
|
||||
case SuccessResults of
|
||||
[] ->
|
||||
?assert(false);
|
||||
[{ok, FirstPid, _, _} | RestResults] ->
|
||||
AllSamePid = lists:all(
|
||||
fun
|
||||
({ok, P, _, _}) -> P =:= FirstPid;
|
||||
(_) -> false
|
||||
end,
|
||||
RestResults
|
||||
),
|
||||
?assert(AllSamePid)
|
||||
end,
|
||||
|
||||
catch unregister(Name)
|
||||
end}.
|
||||
|
||||
lookup_or_monitor_success_test() ->
|
||||
Name = test_lookup_success,
|
||||
Key = test_key,
|
||||
ProcessMap = #{},
|
||||
|
||||
Pid = spawn(fun() -> timer:sleep(200) end),
|
||||
register(Name, Pid),
|
||||
|
||||
Result = lookup_or_monitor(Name, Key, ProcessMap),
|
||||
|
||||
?assertMatch({ok, Pid, _Ref, _NewMap}, Result),
|
||||
{ok, ReturnedPid, Ref, NewMap} = Result,
|
||||
|
||||
?assertEqual(Pid, ReturnedPid),
|
||||
?assert(is_reference(Ref)),
|
||||
?assertEqual({Pid, Ref}, maps:get(Key, NewMap)),
|
||||
|
||||
unregister(Name).
|
||||
|
||||
lookup_or_monitor_not_found_test() ->
|
||||
Name = test_lookup_not_found_99999,
|
||||
Key = test_key,
|
||||
ProcessMap = #{},
|
||||
|
||||
Result = lookup_or_monitor(Name, Key, ProcessMap),
|
||||
?assertEqual({error, not_found}, Result).
|
||||
|
||||
lookup_or_monitor_existing_map_test() ->
|
||||
Name = test_lookup_existing,
|
||||
Key = new_key,
|
||||
ExistingPid = list_to_pid("<0.100.0>"),
|
||||
ExistingRef = make_ref(),
|
||||
ProcessMap = #{existing_key => {ExistingPid, ExistingRef}},
|
||||
|
||||
Pid = spawn(fun() -> timer:sleep(200) end),
|
||||
register(Name, Pid),
|
||||
|
||||
{ok, _ReturnedPid, _Ref, NewMap} = lookup_or_monitor(Name, Key, ProcessMap),
|
||||
|
||||
?assertEqual(2, maps:size(NewMap)),
|
||||
?assert(maps:is_key(existing_key, NewMap)),
|
||||
?assert(maps:is_key(Key, NewMap)),
|
||||
|
||||
unregister(Name).
|
||||
|
||||
lookup_or_monitor_different_key_test() ->
|
||||
Name = test_lookup_diff_key,
|
||||
Key = different_key_name,
|
||||
ProcessMap = #{},
|
||||
|
||||
Pid = spawn(fun() -> timer:sleep(200) end),
|
||||
register(Name, Pid),
|
||||
|
||||
{ok, _ReturnedPid, Ref, NewMap} = lookup_or_monitor(Name, Key, ProcessMap),
|
||||
|
||||
?assertEqual({Pid, Ref}, maps:get(Key, NewMap)),
|
||||
?assertEqual(false, maps:is_key(Name, NewMap)),
|
||||
|
||||
unregister(Name).
|
||||
|
||||
lookup_or_monitor_dead_process_test() ->
|
||||
Name = test_lookup_dead,
|
||||
Key = test_key,
|
||||
ProcessMap = #{},
|
||||
|
||||
Pid = spawn(fun() -> ok end),
|
||||
register(Name, Pid),
|
||||
timer:sleep(10),
|
||||
|
||||
Result = lookup_or_monitor(Name, Key, ProcessMap),
|
||||
?assertEqual({error, not_found}, Result).
|
||||
|
||||
safe_unregister_registered_test() ->
|
||||
Name = test_safe_unreg_registered,
|
||||
|
||||
Pid = spawn(fun() -> timer:sleep(100) end),
|
||||
register(Name, Pid),
|
||||
|
||||
?assertEqual(Pid, whereis(Name)),
|
||||
?assertEqual(ok, safe_unregister(Name)),
|
||||
?assertEqual(undefined, whereis(Name)).
|
||||
|
||||
safe_unregister_unregistered_test() ->
|
||||
?assertEqual(ok, safe_unregister(nonexistent_process_name_12345)).
|
||||
|
||||
safe_unregister_multiple_test() ->
|
||||
Name = test_safe_unreg_multiple,
|
||||
|
||||
Pid = spawn(fun() -> timer:sleep(100) end),
|
||||
register(Name, Pid),
|
||||
|
||||
?assertEqual(ok, safe_unregister(Name)),
|
||||
?assertEqual(ok, safe_unregister(Name)),
|
||||
?assertEqual(ok, safe_unregister(Name)).
|
||||
|
||||
safe_unregister_edge_cases_test() ->
|
||||
?assertEqual(ok, safe_unregister(undefined_name_xyz)),
|
||||
?assertEqual(ok, safe_unregister('some_random_name')),
|
||||
?assertEqual(ok, safe_unregister('')).
|
||||
|
||||
cleanup_on_down_preserves_loading_test() ->
|
||||
DeadPid = list_to_pid("<0.100.0>"),
|
||||
AlivePid = list_to_pid("<0.101.0>"),
|
||||
Ref1 = make_ref(),
|
||||
Ref2 = make_ref(),
|
||||
|
||||
Map = #{
|
||||
guild_1 => {DeadPid, Ref1},
|
||||
guild_2 => loading,
|
||||
guild_3 => {AlivePid, Ref2}
|
||||
},
|
||||
|
||||
Result = cleanup_on_down(DeadPid, Map),
|
||||
|
||||
?assertEqual(2, maps:size(Result)),
|
||||
?assertEqual(loading, maps:get(guild_2, Result)),
|
||||
?assertEqual({AlivePid, Ref2}, maps:get(guild_3, Result)),
|
||||
?assertEqual(false, maps:is_key(guild_1, Result)).
|
||||
|
||||
cleanup_on_down_multiple_loading_test() ->
|
||||
DeadPid = list_to_pid("<0.100.0>"),
|
||||
AlivePid = list_to_pid("<0.101.0>"),
|
||||
Ref1 = make_ref(),
|
||||
Ref2 = make_ref(),
|
||||
|
||||
Map = #{
|
||||
guild_1 => {DeadPid, Ref1},
|
||||
guild_2 => loading,
|
||||
guild_3 => {AlivePid, Ref2},
|
||||
guild_4 => loading,
|
||||
guild_5 => loading
|
||||
},
|
||||
|
||||
Result = cleanup_on_down(DeadPid, Map),
|
||||
|
||||
?assertEqual(4, maps:size(Result)),
|
||||
?assertEqual(loading, maps:get(guild_2, Result)),
|
||||
?assertEqual(loading, maps:get(guild_4, Result)),
|
||||
?assertEqual(loading, maps:get(guild_5, Result)),
|
||||
?assertEqual({AlivePid, Ref2}, maps:get(guild_3, Result)),
|
||||
?assertEqual(false, maps:is_key(guild_1, Result)).
|
||||
|
||||
cleanup_on_down_single_removal_test() ->
|
||||
DeadPid = list_to_pid("<0.100.0>"),
|
||||
AlivePid1 = list_to_pid("<0.101.0>"),
|
||||
AlivePid2 = list_to_pid("<0.102.0>"),
|
||||
Ref1 = make_ref(),
|
||||
Ref2 = make_ref(),
|
||||
Ref3 = make_ref(),
|
||||
|
||||
Map = #{
|
||||
guild_1 => {AlivePid1, Ref1},
|
||||
guild_2 => {DeadPid, Ref2},
|
||||
guild_3 => {AlivePid2, Ref3}
|
||||
},
|
||||
|
||||
Result = cleanup_on_down(DeadPid, Map),
|
||||
|
||||
?assertEqual(2, maps:size(Result)),
|
||||
?assertEqual({AlivePid1, Ref1}, maps:get(guild_1, Result)),
|
||||
?assertEqual({AlivePid2, Ref3}, maps:get(guild_3, Result)),
|
||||
?assertEqual(false, maps:is_key(guild_2, Result)).
|
||||
|
||||
cleanup_on_down_empty_test() ->
|
||||
DeadPid = list_to_pid("<0.100.0>"),
|
||||
Result = cleanup_on_down(DeadPid, #{}),
|
||||
?assertEqual(#{}, Result).
|
||||
|
||||
cleanup_on_down_only_loading_test() ->
|
||||
DeadPid = list_to_pid("<0.100.0>"),
|
||||
Map = #{
|
||||
guild_1 => loading,
|
||||
guild_2 => loading
|
||||
},
|
||||
Result = cleanup_on_down(DeadPid, Map),
|
||||
?assertEqual(Map, Result).
|
||||
|
||||
cleanup_on_down_pid_not_found_test() ->
|
||||
DeadPid = list_to_pid("<0.100.0>"),
|
||||
AlivePid = list_to_pid("<0.101.0>"),
|
||||
Ref = make_ref(),
|
||||
|
||||
Map = #{
|
||||
guild_1 => {AlivePid, Ref},
|
||||
guild_2 => loading
|
||||
},
|
||||
|
||||
Result = cleanup_on_down(DeadPid, Map),
|
||||
?assertEqual(Map, Result).
|
||||
|
||||
cleanup_on_down_duplicate_pids_test() ->
|
||||
DeadPid = list_to_pid("<0.100.0>"),
|
||||
Ref1 = make_ref(),
|
||||
Ref2 = make_ref(),
|
||||
|
||||
Map = #{
|
||||
guild_1 => {DeadPid, Ref1},
|
||||
guild_2 => {DeadPid, Ref2}
|
||||
},
|
||||
|
||||
Result = cleanup_on_down(DeadPid, Map),
|
||||
?assertEqual(0, maps:size(Result)).
|
||||
|
||||
get_count_mixed_test() ->
|
||||
Pid1 = list_to_pid("<0.100.0>"),
|
||||
Pid2 = list_to_pid("<0.101.0>"),
|
||||
Ref1 = make_ref(),
|
||||
Ref2 = make_ref(),
|
||||
|
||||
Map = #{
|
||||
guild_1 => {Pid1, Ref1},
|
||||
guild_2 => loading,
|
||||
guild_3 => {Pid2, Ref2},
|
||||
guild_4 => loading
|
||||
},
|
||||
|
||||
?assertEqual(2, get_count(Map)).
|
||||
|
||||
get_count_empty_test() ->
|
||||
?assertEqual(0, get_count(#{})).
|
||||
|
||||
get_count_only_loading_test() ->
|
||||
Map = #{
|
||||
guild_1 => loading,
|
||||
guild_2 => loading
|
||||
},
|
||||
?assertEqual(0, get_count(Map)).
|
||||
|
||||
get_count_only_processes_test() ->
|
||||
Pid1 = list_to_pid("<0.100.0>"),
|
||||
Pid2 = list_to_pid("<0.101.0>"),
|
||||
Pid3 = list_to_pid("<0.102.0>"),
|
||||
Ref1 = make_ref(),
|
||||
Ref2 = make_ref(),
|
||||
Ref3 = make_ref(),
|
||||
|
||||
Map = #{
|
||||
guild_1 => {Pid1, Ref1},
|
||||
guild_2 => {Pid2, Ref2},
|
||||
guild_3 => {Pid3, Ref3}
|
||||
},
|
||||
|
||||
?assertEqual(3, get_count(Map)).
|
||||
|
||||
get_count_single_test() ->
|
||||
Pid = list_to_pid("<0.100.0>"),
|
||||
Ref = make_ref(),
|
||||
Map = #{guild_1 => {Pid, Ref}},
|
||||
?assertEqual(1, get_count(Map)).
|
||||
|
||||
get_count_single_loading_test() ->
|
||||
Map = #{guild_1 => loading},
|
||||
?assertEqual(0, get_count(Map)).
|
||||
|
||||
integration_full_lifecycle_test() ->
|
||||
Id = 12345,
|
||||
Name = build_process_name(guild, Id),
|
||||
?assertEqual('guild_12345', Name),
|
||||
|
||||
Pid = spawn(fun() -> timer:sleep(200) end),
|
||||
{ok, Pid, _Ref, Map1} = register_and_monitor(Name, Pid, #{}),
|
||||
?assertEqual(1, get_count(Map1)),
|
||||
|
||||
Map2 = maps:put(guild_67890, loading, Map1),
|
||||
?assertEqual(1, get_count(Map2)),
|
||||
|
||||
OtherName = build_process_name(channel, 67890),
|
||||
OtherPid = spawn(fun() -> timer:sleep(200) end),
|
||||
register(OtherName, OtherPid),
|
||||
{ok, OtherPid, _OtherRef, Map3} = lookup_or_monitor(OtherName, channel_67890, Map2),
|
||||
?assertEqual(2, get_count(Map3)),
|
||||
|
||||
Map4 = cleanup_on_down(Pid, Map3),
|
||||
?assertEqual(1, get_count(Map4)),
|
||||
?assertEqual(loading, maps:get(guild_67890, Map4)),
|
||||
|
||||
safe_unregister(Name),
|
||||
safe_unregister(OtherName),
|
||||
|
||||
?assertEqual(undefined, whereis(Name)),
|
||||
?assertEqual(undefined, whereis(OtherName)).
|
||||
|
||||
integration_process_death_test_() ->
|
||||
{timeout, 10, fun() ->
|
||||
Name = test_integration_death,
|
||||
|
||||
Pid = spawn(fun() ->
|
||||
receive
|
||||
die -> exit(normal)
|
||||
after 100 -> exit(normal)
|
||||
end
|
||||
end),
|
||||
|
||||
{ok, Pid, Ref, Map} = register_and_monitor(Name, Pid, #{}),
|
||||
?assertEqual(1, get_count(Map)),
|
||||
|
||||
Pid ! die,
|
||||
|
||||
receive
|
||||
{'DOWN', Ref, process, Pid, _Reason} ->
|
||||
Map2 = cleanup_on_down(Pid, Map),
|
||||
?assertEqual(0, get_count(Map2)),
|
||||
safe_unregister(Name)
|
||||
after 500 ->
|
||||
?assert(false)
|
||||
end
|
||||
end}.
|
||||
|
||||
integration_race_conditions_test_() ->
|
||||
{timeout, 10, fun() ->
|
||||
Name = test_integration_race,
|
||||
|
||||
Parent = self(),
|
||||
|
||||
FirstPid = spawn(fun() -> timer:sleep(300) end),
|
||||
{ok, FirstPid, _FirstRef, _Map1} = register_and_monitor(Name, FirstPid, #{}),
|
||||
|
||||
Workers = [
|
||||
spawn(fun() ->
|
||||
NewPid = spawn(fun() -> timer:sleep(100) end),
|
||||
Result = register_and_monitor(Name, NewPid, #{}),
|
||||
Parent ! {register_result, Result}
|
||||
end)
|
||||
|| _ <- lists:seq(1, 3)
|
||||
],
|
||||
|
||||
Results = [
|
||||
receive
|
||||
{register_result, R} -> R
|
||||
after 1000 -> timeout
|
||||
end
|
||||
|| _ <- Workers
|
||||
],
|
||||
AllGotFirstPid = lists:all(
|
||||
fun
|
||||
({ok, P, _, _}) -> P =:= FirstPid;
|
||||
(_) -> false
|
||||
end,
|
||||
Results
|
||||
),
|
||||
?assert(AllGotFirstPid),
|
||||
|
||||
safe_unregister(Name)
|
||||
end}.
|
||||
|
||||
integration_rapid_cycles_test_() ->
|
||||
{timeout, 10, fun() ->
|
||||
lists:foreach(
|
||||
fun(N) ->
|
||||
Name = list_to_atom("test_rapid_" ++ integer_to_list(N)),
|
||||
Pid = spawn(fun() -> timer:sleep(50) end),
|
||||
|
||||
{ok, Pid, _Ref, Map} = register_and_monitor(Name, Pid, #{}),
|
||||
?assertEqual(1, get_count(Map)),
|
||||
|
||||
safe_unregister(Name),
|
||||
?assertEqual(undefined, whereis(Name))
|
||||
end,
|
||||
lists:seq(1, 10)
|
||||
)
|
||||
end}.
|
||||
|
||||
-endif.
|
||||
Reference in New Issue
Block a user