Skip to content

Commit

Permalink
Update muted to be a map[string][]string
Browse files Browse the repository at this point in the history
  • Loading branch information
grobinson-grafana committed Oct 20, 2023
1 parent d39d93c commit a31b20c
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 52 deletions.
4 changes: 2 additions & 2 deletions api/v2/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ func (api *API) getAlertsHandler(params alert_ops.GetAlertsParams) middleware.Re
continue
}

alert := AlertToOpenAPIAlert(a, api.getAlertStatus(a.Fingerprint()), receivers)
alert := AlertToOpenAPIAlert(a, api.getAlertStatus(a.Fingerprint()), receivers, nil)

res = append(res, alert)
}
Expand Down Expand Up @@ -415,7 +415,7 @@ func (api *API) getAlertGroupsHandler(params alertgroup_ops.GetAlertGroupsParams
fp := alert.Fingerprint()
receivers := allReceivers[fp]
status := api.getAlertStatus(fp)
apiAlert := AlertToOpenAPIAlert(alert, status, receivers)
apiAlert := AlertToOpenAPIAlert(alert, status, receivers, alertGroup)
ag.Alerts = append(ag.Alerts, apiAlert)
}
res = append(res, ag)
Expand Down
2 changes: 1 addition & 1 deletion api/v2/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ func TestAlertToOpenAPIAlert(t *testing.T) {
UpdatedAt: updated,
}
)
openAPIAlert := AlertToOpenAPIAlert(alert, types.AlertStatus{State: types.AlertStateActive}, receivers)
openAPIAlert := AlertToOpenAPIAlert(alert, types.AlertStatus{State: types.AlertStateActive}, receivers, nil)
require.Equal(t, &open_api_models.GettableAlert{
Annotations: open_api_models.LabelSet{},
Alert: open_api_models.Alert{
Expand Down
22 changes: 20 additions & 2 deletions api/v2/compat.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
prometheus_model "github.com/prometheus/common/model"

open_api_models "github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/alertmanager/dispatch"
"github.com/prometheus/alertmanager/silence/silencepb"
"github.com/prometheus/alertmanager/types"
)
Expand Down Expand Up @@ -117,7 +118,9 @@ func PostableSilenceToProto(s *open_api_models.PostableSilence) (*silencepb.Sile
}

// AlertToOpenAPIAlert converts internal alerts, alert types, and receivers to *open_api_models.GettableAlert.
func AlertToOpenAPIAlert(alert *types.Alert, status types.AlertStatus, receivers []string) *open_api_models.GettableAlert {
// The alert group is optional, and allows active and mute time intervals from all routes to be filtered to
// the route in use.
func AlertToOpenAPIAlert(alert *types.Alert, status types.AlertStatus, receivers []string, alertGroup *dispatch.AlertGroup) *open_api_models.GettableAlert {
startsAt := strfmt.DateTime(alert.StartsAt)
updatedAt := strfmt.DateTime(alert.UpdatedAt)
endsAt := strfmt.DateTime(alert.EndsAt)
Expand All @@ -127,6 +130,21 @@ func AlertToOpenAPIAlert(alert *types.Alert, status types.AlertStatus, receivers
apiReceivers = append(apiReceivers, &open_api_models.Receiver{Name: &receivers[i]})
}

// create a list of unique, active and mute time interval names.
seenMuted := make(map[string]struct{})
for groupKey, intervalNames := range status.MutedBy {
// If an optional alert group is present, filter to the route in use.
if alertGroup == nil || (alertGroup != nil && alertGroup.Key == groupKey) {
for _, intervalName := range intervalNames {
seenMuted[intervalName] = struct{}{}
}
}
}
mutedBy := make([]string, 0, len(seenMuted))
for k, _ := range seenMuted {

Check failure on line 144 in api/v2/compat.go

View workflow job for this annotation

GitHub Actions / lint

File is not `gofumpt`-ed with `-extra` (gofumpt)
mutedBy = append(mutedBy, k)
}

fp := alert.Fingerprint().String()
state := string(status.State)
aa := &open_api_models.GettableAlert{
Expand All @@ -144,7 +162,7 @@ func AlertToOpenAPIAlert(alert *types.Alert, status types.AlertStatus, receivers
State: &state,
SilencedBy: status.SilencedBy,
InhibitedBy: status.InhibitedBy,
MutedBy: status.MutedBy,
MutedBy: mutedBy,
},
}

Expand Down
2 changes: 1 addition & 1 deletion cmd/alertmanager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ func run() int {
timeIntervals[ti.Name] = ti.TimeIntervals
}

intervener := timeinterval.NewIntervener(timeIntervals)
intervener := timeinterval.NewIntervener(timeIntervals, marker)

inhibitor.Stop()
disp.Stop()
Expand Down
2 changes: 2 additions & 0 deletions dispatch/dispatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ type AlertGroup struct {
Alerts types.AlertSlice
Labels model.LabelSet
Receiver string
Key string
}

type AlertGroups []*AlertGroup
Expand Down Expand Up @@ -237,6 +238,7 @@ func (d *Dispatcher) Groups(routeFilter func(*Route) bool, alertFilter func(*typ
alertGroup := &AlertGroup{
Labels: ag.labels,
Receiver: receiver,
Key: ag.GroupKey(),
}

alerts := ag.alerts.List()
Expand Down
6 changes: 6 additions & 0 deletions dispatch/dispatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@ route:
"alertname": "OtherAlert",
},
Receiver: "prod",
Key: "{}:{alertname=\"OtherAlert\"}",
},
&AlertGroup{
Alerts: []*types.Alert{inputAlerts[1]},
Expand All @@ -423,6 +424,7 @@ route:
"service": "api",
},
Receiver: "testing",
Key: "{}/{env=\"testing\"}:{alertname=\"TestingAlert\", service=\"api\"}",
},
&AlertGroup{
Alerts: []*types.Alert{inputAlerts[2], inputAlerts[3]},
Expand All @@ -432,6 +434,7 @@ route:
"cluster": "aa",
},
Receiver: "prod",
Key: "{}/{env=\"prod\"}:{alertname=\"HighErrorRate\", cluster=\"aa\", service=\"api\"}",
},
&AlertGroup{
Alerts: []*types.Alert{inputAlerts[4]},
Expand All @@ -441,6 +444,7 @@ route:
"cluster": "bb",
},
Receiver: "prod",
Key: "{}/{env=\"prod\"}:{alertname=\"HighErrorRate\", cluster=\"bb\", service=\"api\"}",
},
&AlertGroup{
Alerts: []*types.Alert{inputAlerts[5]},
Expand All @@ -450,6 +454,7 @@ route:
"cluster": "bb",
},
Receiver: "kafka",
Key: "{}/{kafka=\"yes\"}:{alertname=\"HighLatency\", cluster=\"bb\", service=\"db\"}",
},
&AlertGroup{
Alerts: []*types.Alert{inputAlerts[5]},
Expand All @@ -459,6 +464,7 @@ route:
"cluster": "bb",
},
Receiver: "prod",
Key: "{}/{env=\"prod\"}:{alertname=\"HighLatency\", cluster=\"bb\", service=\"db\"}",
},
}, alertGroups)
require.Equal(t, map[model.Fingerprint][]string{
Expand Down
32 changes: 15 additions & 17 deletions notify/notify.go
Original file line number Diff line number Diff line change
Expand Up @@ -882,28 +882,25 @@ func NewTimeMuteStage(m types.TimeMuter) *TimeMuteStage {
// TimeMuteStage is responsible for muting alerts whose route is not in an active time.
func (tms TimeMuteStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
muteTimeIntervalNames, ok := MuteTimeIntervalNames(ctx)
if !ok {
// Skip this stage if there are no mute timings.
if !ok || len(muteTimeIntervalNames) == 0 {
return ctx, alerts, nil
}

now, ok := Now(ctx)
if !ok {
return ctx, alerts, errors.New("missing now timestamp")
}

// Skip this stage if there are no mute timings.
if len(muteTimeIntervalNames) == 0 {
return ctx, alerts, nil
groupKey, ok := GroupKey(ctx)
if !ok {
return ctx, alerts, errors.New("group key missing")
}

muted, err := tms.muter.Mutes(muteTimeIntervalNames, now)
muted, err := tms.muter.Mutes(groupKey, alerts, muteTimeIntervalNames, now)
if err != nil {
return ctx, alerts, err
}
//for _, a := range alerts {
// If the alert is not muted then mutedBy is a nil slice. This will set the
// alerts back to active.
//tms.marker.SetMuted(a.Fingerprint(), mutedBy...)
//}
// If the current time is inside a mute time, all alerts are removed from the pipeline.
if muted {
level.Debug(l).Log("msg", "Notifications not sent, route is within mute time")
Expand All @@ -922,12 +919,8 @@ func NewTimeActiveStage(m types.TimeMuter) *TimeActiveStage {
// TimeActiveStage is responsible for muting alerts whose route is not in an active time.
func (tas TimeActiveStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
activeTimeIntervalNames, ok := ActiveTimeIntervalNames(ctx)
if !ok {
return ctx, alerts, nil
}

// if we don't have active time intervals at all it is always active.
if len(activeTimeIntervalNames) == 0 {
if !ok || len(activeTimeIntervalNames) == 0 {
return ctx, alerts, nil
}

Expand All @@ -936,13 +929,18 @@ func (tas TimeActiveStage) Exec(ctx context.Context, l log.Logger, alerts ...*ty
return ctx, alerts, errors.New("missing now timestamp")
}

muted, err := tas.muter.Mutes(activeTimeIntervalNames, now)
groupKey, ok := GroupKey(ctx)
if !ok {
return ctx, alerts, errors.New("group key missing")
}

active, err := tas.muter.Active(groupKey, alerts, activeTimeIntervalNames, now)
if err != nil {
return ctx, alerts, err
}

// If the current time is not inside an active time, all alerts are removed from the pipeline
if !muted {
if !active {
level.Debug(l).Log("msg", "Notifications not sent, route is not within active time")
return ctx, nil, nil
}
Expand Down
15 changes: 9 additions & 6 deletions notify/notify_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -841,8 +841,8 @@ func TestTimeMuteStage(t *testing.T) {
}

marker := types.NewMarker(prometheus.NewRegistry())
m := map[string][]timeinterval.TimeInterval{"test": intervals}
intervener := timeinterval.NewIntervener(m)
ti := map[string][]timeinterval.TimeInterval{"test": intervals}
intervener := timeinterval.NewIntervener(ti, marker)
stage := NewTimeMuteStage(intervener)

muted := []*types.Alert{}
Expand All @@ -861,6 +861,7 @@ func TestTimeMuteStage(t *testing.T) {
alerts := []*types.Alert{{Alert: a}}
ctx := context.Background()
ctx = WithNow(ctx, now)
ctx = WithGroupKey(ctx, a.Fingerprint().String())
ctx = WithActiveTimeIntervals(ctx, []string{})
ctx = WithMuteTimeIntervals(ctx, []string{"test"})

Expand Down Expand Up @@ -894,7 +895,7 @@ func TestTimeMuteStage(t *testing.T) {

func TestTimeActiveStage(t *testing.T) {
// Route mutes alerts inside business hours if it is an active time interval
muteIn := `
muteOut := `
---
- weekdays: ['monday:friday']
times:
Expand Down Expand Up @@ -935,12 +936,13 @@ func TestTimeActiveStage(t *testing.T) {
},
}
var intervals []timeinterval.TimeInterval
err := yaml.Unmarshal([]byte(muteIn), &intervals)
err := yaml.Unmarshal([]byte(muteOut), &intervals)
if err != nil {
t.Fatalf("Couldn't unmarshal time interval %s", err)
}
m := map[string][]timeinterval.TimeInterval{"test": intervals}
intervener := timeinterval.NewIntervener(m)
marker := types.NewMarker(prometheus.NewRegistry())
ti := map[string][]timeinterval.TimeInterval{"test": intervals}
intervener := timeinterval.NewIntervener(ti, marker)
stage := NewTimeActiveStage(intervener)

outAlerts := []*types.Alert{}
Expand All @@ -958,6 +960,7 @@ func TestTimeActiveStage(t *testing.T) {
alerts := []*types.Alert{{Alert: a}}
ctx := context.Background()
ctx = WithNow(ctx, now)
ctx = WithGroupKey(ctx, a.Fingerprint().String())
ctx = WithActiveTimeIntervals(ctx, []string{"test"})
ctx = WithMuteTimeIntervals(ctx, []string{})

Expand Down
40 changes: 33 additions & 7 deletions timeinterval/timeinterval.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,34 +25,60 @@ import (
"time"

"gopkg.in/yaml.v2"

"github.com/prometheus/alertmanager/types"
)

// Intervener determines whether a given time and active route time interval should mute outgoing notifications.
// It implements the TimeMuter interface.
type Intervener struct {
intervals map[string][]TimeInterval
marker types.Marker
}

func (i *Intervener) Active(groupKey string, alerts []*types.Alert, names []string, now time.Time) (bool, error) {
matches, err := i.inIntervals(names, now)
if err != nil {
return false, err
}
for _, a := range alerts {
i.marker.SetMuted(groupKey, a.Fingerprint(), names...)
}
return len(matches) > 0, nil
}

func (i *Intervener) Mutes(groupKey string, alerts []*types.Alert, names []string, now time.Time) (bool, error) {
matches, err := i.inIntervals(names, now)
if err != nil {
return false, err
}
for _, a := range alerts {
i.marker.SetMuted(groupKey, a.Fingerprint(), matches...)
}
return len(matches) > 0, nil
}

func (i *Intervener) Mutes(names []string, now time.Time) (bool, error) {
func (i *Intervener) inIntervals(names []string, now time.Time) ([]string, error) {
var matches []string
for _, name := range names {
interval, ok := i.intervals[name]
if !ok {
return false, fmt.Errorf("time interval %s doesn't exist in config", name)
return nil, fmt.Errorf("time interval %s doesn't exist in config", name)
}

for _, ti := range interval {
if ti.ContainsTime(now.UTC()) {
return true, nil
matches = append(matches, name)
break
}
}
}

return false, nil
return matches, nil
}

func NewIntervener(ti map[string][]TimeInterval) *Intervener {
func NewIntervener(ti map[string][]TimeInterval, m types.Marker) *Intervener {
return &Intervener{
intervals: ti,
marker: m,
}
}

Expand Down
10 changes: 7 additions & 3 deletions timeinterval/timeinterval_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ import (
"testing"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"

"github.com/prometheus/alertmanager/types"
)

var timeIntervalTestCases = []struct {
Expand Down Expand Up @@ -686,7 +689,7 @@ func TestIntervener_Mutes(t *testing.T) {
var intervals []TimeInterval
err := yaml.Unmarshal([]byte(muteIn), &intervals)
require.NoError(t, err)
m := map[string][]TimeInterval{intervalName: intervals}
ti := map[string][]TimeInterval{intervalName: intervals}

tc := []struct {
name string
Expand Down Expand Up @@ -741,9 +744,10 @@ func TestIntervener_Mutes(t *testing.T) {
now, err := time.Parse(time.RFC822Z, tt.firedAt)
require.NoError(t, err)

intervener := NewIntervener(m)
m := types.NewMarker(prometheus.NewRegistry())
intervener := NewIntervener(ti, m)

expected, err := intervener.Mutes([]string{intervalName}, now)
expected, err := intervener.Mutes("", []*types.Alert{}, []string{intervalName}, now)
if err != nil {
require.Error(t, tt.err)
require.False(t, tt.expected)
Expand Down
Loading

0 comments on commit a31b20c

Please sign in to comment.