Skip to content

Commit

Permalink
Make sure that TelemetryMetricsStatsD can be started if host can't be…
Browse files Browse the repository at this point in the history
… resolved

If the StatsD collector is not up for some reason and the application can't resolve
the name by calling `:inet.gethostbyname(host)` then it prevents the entire application from starting.

This commit makes sure that the TelemetryMetricsStatsD can be started in the errored state
and depending on the `host_resolution_interval` it can be recovered or not.

If `host_resolution_interval` isn't passed, then TelemetryMetricsStatsD will not try to resolve the host again.
However, when it is passed, it can try to recover it in `host_resolution_interval` milliseconds.
  • Loading branch information
mrcnkoba committed Apr 23, 2024
1 parent 70dc6dd commit 9bcf689
Showing 1 changed file with 93 additions and 45 deletions.
138 changes: 93 additions & 45 deletions lib/telemetry_metrics_statsd.ex
Original file line number Diff line number Diff line change
Expand Up @@ -425,35 +425,48 @@ defmodule TelemetryMetricsStatsd do
_ -> configure_host_resolution(options)
end

udps =
for _ <- 1..options.pool_size do
{:ok, udp} = UDP.open(udp_config)
{:udp, udp}
end
case udp_config do
%{host: _} = udp_config ->
udps =
for _ <- 1..options.pool_size do
{:ok, udp} = UDP.open(udp_config)
{:udp, udp}
end

pool_id = :ets.new(__MODULE__, [:bag, :protected, read_concurrency: true])
:ets.insert(pool_id, udps)

handler_ids =
EventHandler.attach(
metrics,
self(),
pool_id,
options.mtu,
options.prefix,
options.formatter,
options.global_tags
)
pool_id = :ets.new(__MODULE__, [:bag, :protected, read_concurrency: true])
:ets.insert(pool_id, udps)

handler_ids =
EventHandler.attach(
metrics,
self(),
pool_id,
options.mtu,
options.prefix,
options.formatter,
options.global_tags
)

{:ok,
%{
udp_config: udp_config,
handler_ids: handler_ids,
pool_id: pool_id,
host: options.host,
port: options.port,
host_resolution_interval: options.host_resolution_interval
}}
{:ok,
%{
udp_config: udp_config,
handler_ids: handler_ids,
pool_id: pool_id,
host: options.host,
port: options.port,
host_resolution_interval: options.host_resolution_interval
}}
_ ->
{:ok,
%{
udp_config: :error,
handler_ids: [],
pool_id: nil,
host: options.host,
port: options.port,
host_resolution_interval: options.host_resolution_interval
}}
end
end

@impl true
Expand Down Expand Up @@ -492,28 +505,42 @@ defmodule TelemetryMetricsStatsd do

@impl true
def handle_info(:resolve_host, state) do
%{host: host, udp_config: %{host: current_address}, host_resolution_interval: interval} =
%{host: host, udp_config: udp_config, host_resolution_interval: interval} =
state

new_state =
case :inet.gethostbyname(host) do
{:ok, hostent(h_addr_list: ips)} ->
if Enum.member?(ips, current_address) do
state
else
[new_address | _] = ips
update_host(state, new_address)
cond do
udp_config == :error ->
state

Enum.member?(ips, udp_config.host) ->
state

true ->
[new_address | _] = ips
update_host(state, new_address)
end

{:error, reason} ->
Logger.log(
@log_level_warning,
"Failed to resolve the hostname #{host}: #{inspect(reason)}. " <>
"Using the previously resolved address of #{:inet.ntoa(current_address)}."
)
end
case udp_config do
:error ->
Logger.log(
@log_level_warning,
"Failed to resolve the hostname #{host}: #{inspect(reason)}. " <>
"Previously resolved hostname was unsuccessful. The library will not send any metrics."
)
_ -> Logger.log(
@log_level_warning,
"Failed to resolve the hostname #{host}: #{inspect(reason)}. " <>
"Using the previously resolved address of #{:inet.ntoa(udp_config.host)}."
)
end

Process.send_after(self(), :resolve_host, interval)
Process.send_after(self(), :resolve_host, interval)
state
end

{:noreply, new_state}
end
Expand Down Expand Up @@ -548,18 +575,39 @@ defmodule TelemetryMetricsStatsd do
host_resolution_interval: interval
})
when is_integer(interval) do
{:ok, hostent(h_addr_list: [ip | _ips])} = :inet.gethostbyname(host, inet_address_family)
Process.send_after(self(), :resolve_host, interval)
%{host: ip, port: port, inet_address_family: inet_address_family}

case :inet.gethostbyname(host, inet_address_family) do
{:ok, hostent(h_addr_list: [ip | _ips])} ->
%{host: ip, port: port, inet_address_family: inet_address_family}

{:error, reason} ->
Logger.log(
@log_level_warning,
"Failed to resolve the hostname #{host}: #{inspect(reason)}. " <>
"Retrying to resolve it again in #{interval} milliseconds."
)
Process.send_after(self(), :resolve_host, interval)
:error
end

end

defp configure_host_resolution(%{
host: host,
port: port,
inet_address_family: inet_address_family
}) do
{:ok, hostent(h_addr_list: [ip | _ips])} = :inet.gethostbyname(host, inet_address_family)
%{host: ip, port: port, inet_address_family: inet_address_family}
case :inet.gethostbyname(host, inet_address_family) do
{:ok, hostent(h_addr_list: [ip | _ips])} ->
%{host: ip, port: port, inet_address_family: inet_address_family}

{:error, reason} ->
Logger.log(
@log_level_warning,
"Failed to resolve the hostname #{host}: #{inspect(reason)}. Metrics will not be sent at all."
)
:error
end
end

defp update_pool(pool_id, new_host, new_port) do
Expand Down

0 comments on commit 9bcf689

Please sign in to comment.