Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: validating prometheus target #1077

Merged
merged 15 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 42 additions & 4 deletions api/handler/triggers.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"strings"
"time"

prometheus "github.com/prometheus/client_golang/api/prometheus/v1"
kissken marked this conversation as resolved.
Show resolved Hide resolved

"github.com/go-chi/chi"
"github.com/go-chi/render"
"github.com/moira-alert/moira"
Expand Down Expand Up @@ -150,10 +152,40 @@ func createTrigger(writer http.ResponseWriter, request *http.Request) {
}
}

func is4xxCode(statusCode int64) bool {
return statusCode >= 400 && statusCode < 500
}

func errorResponseOnPrometheusError(promErr *prometheus.Error) *api.ErrorResponse {
// In github.com/prometheus/client_golang/api/prometheus/v1 Error has field `Type`
// which can be used to understand "the reason" of error. There are some constants in the lib.
if promErr.Type == prometheus.ErrBadData {
return api.ErrorInvalidRequest(fmt.Errorf("invalid prometheus targets: %w", promErr))
}

// VictoriaMetrics also supports prometheus api, BUT puts status code into Error.Type.
// So we can't just use constants from prometheus api client lib.
statusCode, err := strconv.ParseInt(string(promErr.Type), 10, 64)
if err != nil {
return api.ErrorInternalServer(promErr)
}

codes4xxLeadTo500 := map[int64]struct{}{
http.StatusUnauthorized: {},
http.StatusForbidden: {},
}

if _, leadTo500 := codes4xxLeadTo500[statusCode]; is4xxCode(statusCode) && !leadTo500 {
return api.ErrorInvalidRequest(promErr)
}

return api.ErrorInternalServer(promErr)
}

func getTriggerFromRequest(request *http.Request) (*dto.Trigger, *api.ErrorResponse) {
trigger := &dto.Trigger{}
if err := render.Bind(request, trigger); err != nil {
switch err.(type) { // nolint:errorlint
switch typedErr := err.(type) { // nolint:errorlint
case local.ErrParseExpr, local.ErrEvalExpr, local.ErrUnknownFunction:
return nil, api.ErrorInvalidRequest(fmt.Errorf("invalid graphite targets: %s", err.Error()))
case expression.ErrInvalidExpression:
Expand All @@ -169,6 +201,8 @@ func getTriggerFromRequest(request *http.Request) (*dto.Trigger, *api.ErrorRespo
return nil, response
case *json.UnmarshalTypeError:
return nil, api.ErrorInvalidRequest(fmt.Errorf("invalid payload: %s", err.Error()))
case *prometheus.Error:
almostinf marked this conversation as resolved.
Show resolved Hide resolved
return nil, errorResponseOnPrometheusError(typedErr)
default:
return nil, api.ErrorInternalServer(err)
}
Expand Down Expand Up @@ -208,10 +242,14 @@ func triggerCheck(writer http.ResponseWriter, request *http.Request) {
response := dto.TriggerCheckResponse{}

if err := render.Bind(request, trigger); err != nil {
switch err.(type) { // nolint:errorlint
switch typedErr := err.(type) { // nolint:errorlint
case expression.ErrInvalidExpression, local.ErrParseExpr, local.ErrEvalExpr, local.ErrUnknownFunction:
// TODO write comment, why errors are ignored, it is not obvious.
// In getTriggerFromRequest these types of errors lead to 400.
// TODO: move ErrInvalidExpression to separate case

// These errors are skipped because if there are error from local source then it will be caught in
// dto.TargetVerification and will be explained in detail.
case *prometheus.Error:
render.Render(writer, request, errorResponseOnPrometheusError(typedErr)) //nolint
default:
render.Render(writer, request, api.ErrorInvalidRequest(err)) //nolint
return
Expand Down
115 changes: 115 additions & 0 deletions api/handler/triggers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ import (
"testing"
"time"

logging "github.com/moira-alert/moira/logging/zerolog_adapter"
"github.com/moira-alert/moira/metric_source/remote"

prometheus "github.com/prometheus/client_golang/api/prometheus/v1"

"github.com/moira-alert/moira"
"github.com/moira-alert/moira/api"
dataBase "github.com/moira-alert/moira/database"
Expand Down Expand Up @@ -139,6 +144,116 @@ func TestGetTriggerFromRequest(t *testing.T) {
So(err, ShouldHaveSameTypeAs, api.ErrorInvalidRequest(fmt.Errorf("")))
})
})

Convey("With incorrect targets errors", t, func() {
graphiteLocalSrc := mock_metric_source.NewMockMetricSource(mockCtrl)
graphiteRemoteSrc := mock_metric_source.NewMockMetricSource(mockCtrl)
prometheusSrc := mock_metric_source.NewMockMetricSource(mockCtrl)
allSourceProvider := metricSource.CreateTestMetricSourceProvider(graphiteLocalSrc, graphiteRemoteSrc, prometheusSrc)

graphiteLocalSrc.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)).AnyTimes()
graphiteRemoteSrc.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)).AnyTimes()
prometheusSrc.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)).AnyTimes()

triggerWarnValue := 0.0
triggerErrorValue := 1.0
ttlState := moira.TTLState("NODATA")
triggerDTO := dto.Trigger{
TriggerModel: dto.TriggerModel{
ID: "test_id",
Name: "Test trigger",
Desc: new(string),
Targets: []string{"foo.bar"},
WarnValue: &triggerWarnValue,
ErrorValue: &triggerErrorValue,
TriggerType: "rising",
Tags: []string{"Normal", "DevOps", "DevOpsGraphite-duty"},
TTLState: &ttlState,
TTL: moira.DefaultTTL,
Schedule: &moira.ScheduleData{},
Expression: "",
Patterns: []string{},
ClusterId: moira.DefaultCluster,
MuteNewMetrics: false,
AloneMetrics: map[string]bool{},
CreatedAt: &time.Time{},
UpdatedAt: &time.Time{},
CreatedBy: "",
UpdatedBy: "anonymous",
},
}

Convey("for graphite remote", func() {
triggerDTO.TriggerSource = moira.GraphiteRemote
body, _ := json.Marshal(triggerDTO)

request := httptest.NewRequest(http.MethodPut, "/trigger", bytes.NewReader(body))
request.Header.Add("content-type", "application/json")
request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", allSourceProvider))

testLogger, _ := logging.GetLogger("Test")

request = middleware.WithLogEntry(request, middleware.NewLogEntry(testLogger, request))

var returnedErr error = remote.ErrRemoteTriggerResponse{
InternalError: fmt.Errorf(""),
}

graphiteRemoteSrc.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).
Return(nil, returnedErr)

_, errRsp := getTriggerFromRequest(request)
So(errRsp, ShouldResemble, api.ErrorRemoteServerUnavailable(returnedErr))
})

Convey("for prometheus remote", func() {
triggerDTO.TriggerSource = moira.PrometheusRemote
body, _ := json.Marshal(triggerDTO)

Convey("with error type = bad_data got bad request", func() {
request := httptest.NewRequest(http.MethodPut, "/trigger", bytes.NewReader(body))
request.Header.Add("content-type", "application/json")
request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", allSourceProvider))

var returnedErr error = &prometheus.Error{
Type: prometheus.ErrBadData,
}

prometheusSrc.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).
Return(nil, returnedErr)

_, errRsp := getTriggerFromRequest(request)
So(errRsp, ShouldResemble, api.ErrorInvalidRequest(fmt.Errorf("invalid prometheus targets: %w", returnedErr)))
})

Convey("with other types internal server error is returned", func() {
otherTypes := []prometheus.ErrorType{
prometheus.ErrBadResponse,
prometheus.ErrCanceled,
prometheus.ErrClient,
prometheus.ErrExec,
prometheus.ErrTimeout,
prometheus.ErrServer,
}

for _, errType := range otherTypes {
request := httptest.NewRequest(http.MethodPut, "/trigger", bytes.NewReader(body))
request.Header.Add("content-type", "application/json")
request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", allSourceProvider))

var returnedErr error = &prometheus.Error{
Type: errType,
}

prometheusSrc.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).
Return(nil, returnedErr)

_, errRsp := getTriggerFromRequest(request)
So(errRsp, ShouldResemble, api.ErrorInternalServer(returnedErr))
}
})
})
})
}

func TestGetMetricTTLByTrigger(t *testing.T) {
Expand Down
Loading