Skip to content

Commit

Permalink
Round the result of avg_over_time (#152)
Browse files Browse the repository at this point in the history
This is so that the severity label is always set. Else PagerDuty's API
fails to send the notification when the average is transitioning between
integers, and it causes the AlertmanagerNotificationsFailed alert to
fire.
  • Loading branch information
facundofc authored Aug 9, 2024
1 parent f77fe02 commit 49eb7f0
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion lib/charms/nrpe_exporter/v0/nrpe_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,9 @@ def _generate_alert(self, relation, cmd, id, unit, nagios_host_context) -> dict:
return {
"alert": "{}NrpeAlert".format("".join([x.title() for x in cmd.split("_")])),
# Average over 5 minutes considering a 60-second scrape interval
"expr": f"avg_over_time(command_status{{juju_unit='{unit_label}',command='{cmd}'}}[15m]) > 1"
# We need to "round" so the severity label is always set. This is
# necessary for PagerDuty's dynamic notifications.
"expr": f"round(avg_over_time(command_status{{juju_unit='{unit_label}',command='{cmd}'}}[15m])) > 1"
+ f" or (absent_over_time(command_status{{juju_unit='{unit_label}',command='{cmd}'}}[10m]) == 1)"
+ f" or (absent_over_time(up{{juju_unit='{unit_label}'}}[10m]) == 1)",
"for": "0m",
Expand Down

0 comments on commit 49eb7f0

Please sign in to comment.