Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(filter): Wildcard trigger compatibility with graphite-clickhouse #896

Closed
64 changes: 56 additions & 8 deletions filter/series_by_tag.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package filter

import (
"bytes"
"fmt"
"regexp"
"strings"

"github.com/moira-alert/moira"
)

var (
Expand Down Expand Up @@ -39,11 +42,17 @@ type TagSpec struct {
Value string
}

// transformWildcardToRegexpInSeriesByTag is used to convert regular expression from graphite regexp format
// to standard regexp parsable by the go regexp engine.
func transformWildcardToRegexpInSeriesByTag(input string) (string, bool) {
var isTransformed = false

result := input

if strings.Contains(result, "*") {
result = strings.ReplaceAll(result, "*", ".*")
isTransformed = true
}

for {
matchedWildcardIndexes := wildcardExprRegex.FindStringSubmatchIndex(result)
if len(matchedWildcardIndexes) != correctLengthOfMatchedWildcardIndexesSlice {
Expand All @@ -63,7 +72,7 @@ func transformWildcardToRegexpInSeriesByTag(input string) (string, bool) {
}

if isTransformed {
result += "$"
result = "^" + result + "$"
}

return result, isTransformed
Expand Down Expand Up @@ -151,10 +160,8 @@ func CreateMatchingHandlerForPattern(tagSpecs []TagSpec) (string, MatchingHandle

func createMatchingHandlerForOneTag(spec TagSpec) MatchingHandler {
var matchingHandlerCondition func(string) bool
allowMatchEmpty := false
switch spec.Operator {
case EqualOperator:
allowMatchEmpty = true
matchingHandlerCondition = func(value string) bool {
return value == spec.Value
}
Expand All @@ -163,13 +170,26 @@ func createMatchingHandlerForOneTag(spec TagSpec) MatchingHandler {
return value != spec.Value
}
case MatchOperator:
allowMatchEmpty = true
matchRegex := regexp.MustCompile("^" + spec.Value)
value := cleanAsterisks(spec.Value)
if !strings.HasPrefix(value, "^") {
value = ".*" + value
}
if !strings.HasSuffix(value, "$") {
value += ".*"
}
matchRegex := regexp.MustCompile(value)
matchingHandlerCondition = func(value string) bool {
return matchRegex.MatchString(value)
}
case NotMatchOperator:
matchRegex := regexp.MustCompile("^" + spec.Value)
value := cleanAsterisks(spec.Value)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is needed ? Replace * to .* needed only if full string is *. Also may be add more tests for *

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I replaced * with .* because in graphite-clickhouse, there is a greedy match. The SQL equivalent was something like %<string>% which basically means that both sides of the expressions are greedily matched.

Also, what we want when we query for cpu.machine*.loaded is not "zero or more characters of e" but any amount of characters after e. That is why it is needed for * to be changed to .* for this to work in regexp.

In the graphite-clickhouse source code, there is something like this too. Check https://github.com/go-graphite/graphite-clickhouse/blob/1db4b3d740c0f3e72ed8a3449603f4a016b5dda8/pkg/where/where.go#L64

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@msaf1980 I don't know if i missed any tests, I have tried the following special cases here: https://github.com/moira-alert/moira/pull/896/files#diff-4e74ec0972e1312934727b9a6b03635e81344c8d854205ea594290ad604bedc7

  • empty
  • single asterisks
  • asterisk after an asterisk
  • properly written asterisk should remain i.e.
    • .* => ..*
    • .* => .*
  • when regexp has been anchored with ^ and $
  • prefix and suffix asterisks
  • mixed (correct and incorrectly written) asterisks

Copy link
Member

@msaf1980 msaf1980 Aug 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In graphite-clickhouse seriesByTags * replaced to .* only in wildcards (it's not regular expression, but translated to regexp for check). Only one case whan * translated to .* in regexp, is a bad regexp *.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, add tests for regexp like a*b and a.*b for check why replacement * to .* is a bad thing.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

if !strings.HasPrefix(value, "^") {
value = ".*" + value
}
if !strings.HasSuffix(value, "$") {
value += ".*"
}
matchRegex := regexp.MustCompile(value)
matchingHandlerCondition = func(value string) bool {
return !matchRegex.MatchString(value)
}
Expand All @@ -187,6 +207,34 @@ func createMatchingHandlerForOneTag(spec TagSpec) MatchingHandler {
if value, found := labels[spec.Name]; found {
return matchingHandlerCondition(value)
}
return allowMatchEmpty && matchEmpty
return matchEmpty
}
}

// cleanAsterisks converts instances of "*" to ".*" wildcard match
func cleanAsterisks(s string) string {
// store `*` indices
positions := make([]int, 0)
for i := 0; i < len(s); i++ {
if s[i] == '*' {
positions = append(positions, i)
}
}
if len(positions) == 0 {
return s
}

b := moira.UnsafeStringToBytes(s)
var writer bytes.Buffer
writer.Grow(len(s) + len(positions))
writeIndex := 0
for _, i := range positions {
writer.Write(b[writeIndex:i])
if i == 0 || b[i-1] != '.' {
writer.WriteByte('.')
}
writeIndex = i
}
writer.Write(b[writeIndex:])
return writer.String()
}
Loading
Loading