Skip to content

Commit

Permalink
fix(cache): don't add entries multiple times in chains
Browse files Browse the repository at this point in the history
Also means we save a bit of time by not checking every single entry with
all caches.
  • Loading branch information
ThinkChaos committed Nov 11, 2023
1 parent 531044d commit bcf2bc9
Show file tree
Hide file tree
Showing 9 changed files with 80 additions and 71 deletions.
8 changes: 6 additions & 2 deletions cache/stringcache/chained_grouped_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,14 @@ type chainedGroupFactory struct {
cacheFactories []GroupFactory
}

func (c *chainedGroupFactory) AddEntry(entry string) {
func (c *chainedGroupFactory) AddEntry(entry string) bool {
for _, factory := range c.cacheFactories {
factory.AddEntry(entry)
if factory.AddEntry(entry) {
return true
}
}

return false
}

func (c *chainedGroupFactory) Count() int {
Expand Down
12 changes: 6 additions & 6 deletions cache/stringcache/chained_grouped_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ var _ = Describe("Chained grouped cache", func() {
})

It("factory has 4 elements (both caches)", func() {
Expect(factory.Count()).Should(BeNumerically("==", 4))
Expect(factory.Count()).Should(BeNumerically("==", 2))
})

It("should have element count of 4", func() {
factory.Finish()
Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 4))
Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 2))
})

It("should find strings", func() {
Expand Down Expand Up @@ -80,8 +80,8 @@ var _ = Describe("Chained grouped cache", func() {
})

It("should contain 4 elements in 2 groups", func() {
Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 4))
Expect(cache.ElementCount("group2")).Should(BeNumerically("==", 4))
Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 2))
Expect(cache.ElementCount("group2")).Should(BeNumerically("==", 2))
Expect(cache.Contains("g1", []string{"group1", "group2"})).Should(ConsistOf("group1"))
Expect(cache.Contains("g2", []string{"group1", "group2"})).Should(ConsistOf("group2"))
Expect(cache.Contains("both", []string{"group1", "group2"})).Should(ConsistOf("group1", "group2"))
Expand All @@ -92,8 +92,8 @@ var _ = Describe("Chained grouped cache", func() {
factory.AddEntry("newString")
factory.Finish()

Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 2))
Expect(cache.ElementCount("group2")).Should(BeNumerically("==", 4))
Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 1))
Expect(cache.ElementCount("group2")).Should(BeNumerically("==", 2))
Expect(cache.Contains("g1", []string{"group1", "group2"})).Should(BeEmpty())
Expect(cache.Contains("newString", []string{"group1", "group2"})).Should(ConsistOf("group1"))
Expect(cache.Contains("g2", []string{"group1", "group2"})).Should(ConsistOf("group2"))
Expand Down
2 changes: 1 addition & 1 deletion cache/stringcache/grouped_cache_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type GroupedStringCache interface {

type GroupFactory interface {
// AddEntry adds a new string to the factory to be added later to the cache groups.
AddEntry(entry string)
AddEntry(entry string) bool

// Count returns amount of processed string in the factory
Count() int
Expand Down
4 changes: 2 additions & 2 deletions cache/stringcache/in_memory_grouped_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ type inMemoryGroupFactory struct {
finishFn func(stringCache)
}

func (c *inMemoryGroupFactory) AddEntry(entry string) {
c.factory.addEntry(entry)
func (c *inMemoryGroupFactory) AddEntry(entry string) bool {
return c.factory.addEntry(entry)
}

func (c *inMemoryGroupFactory) Count() int {
Expand Down
33 changes: 9 additions & 24 deletions cache/stringcache/in_memory_grouped_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ var _ = Describe("In-Memory grouped cache", func() {
cache = stringcache.NewInMemoryGroupedStringCache()
factory = cache.Refresh("group1")

factory.AddEntry("string1")
factory.AddEntry("string2")
Expect(factory.AddEntry("string1")).Should(BeTrue())
Expect(factory.AddEntry("string2")).Should(BeTrue())
})

It("cache should still have 0 element, since finish was not executed", func() {
Expand All @@ -69,28 +69,13 @@ var _ = Describe("In-Memory grouped cache", func() {
Expect(cache.Contains("string2", []string{"group1", "someOtherGroup"})).Should(ConsistOf("group1"))
})
})
When("String grouped cache is used", func() {
BeforeEach(func() {
cache = stringcache.NewInMemoryGroupedStringCache()
factory = cache.Refresh("group1")

factory.AddEntry("string1")
factory.AddEntry("/string2/")
factory.Finish()
})

It("should ignore regex", func() {
Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 1))
Expect(cache.Contains("string1", []string{"group1"})).Should(ConsistOf("group1"))
})
})
When("Regex grouped cache is used", func() {
BeforeEach(func() {
cache = stringcache.NewInMemoryGroupedRegexCache()
factory = cache.Refresh("group1")

factory.AddEntry("string1")
factory.AddEntry("/string2/")
Expect(factory.AddEntry("string1")).Should(BeFalse())
Expect(factory.AddEntry("/string2/")).Should(BeTrue())
factory.Finish()
})

Expand All @@ -109,13 +94,13 @@ var _ = Describe("In-Memory grouped cache", func() {
cache = stringcache.NewInMemoryGroupedStringCache()
factory = cache.Refresh("group1")

factory.AddEntry("g1")
factory.AddEntry("both")
Expect(factory.AddEntry("g1")).Should(BeTrue())
Expect(factory.AddEntry("both")).Should(BeTrue())
factory.Finish()

factory = cache.Refresh("group2")
factory.AddEntry("g2")
factory.AddEntry("both")
Expect(factory.AddEntry("g2")).Should(BeTrue())
Expect(factory.AddEntry("both")).Should(BeTrue())
factory.Finish()
})

Expand All @@ -129,7 +114,7 @@ var _ = Describe("In-Memory grouped cache", func() {

It("Should replace group content on refresh", func() {
factory = cache.Refresh("group1")
factory.AddEntry("newString")
Expect(factory.AddEntry("newString")).Should(BeTrue())
factory.Finish()

Expect(cache.ElementCount("group1")).Should(BeNumerically("==", 1))
Expand Down
50 changes: 30 additions & 20 deletions cache/stringcache/string_caches.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ type stringCache interface {
}

type cacheFactory interface {
addEntry(entry string)
addEntry(entry string) bool
create() stringCache
count() int
}
Expand Down Expand Up @@ -86,7 +86,7 @@ func (s *stringCacheFactory) insertString(entry string) {
ix := sort.SearchStrings(bucket, normalized)

if !(ix < len(bucket) && bucket[ix] == normalized) {
// extent internal bucket
// extend internal bucket
bucket = append(s.getBucket(entryLen), "")

// move elements to make place for the insertion
Expand All @@ -98,15 +98,22 @@ func (s *stringCacheFactory) insertString(entry string) {
}
}

func (s *stringCacheFactory) addEntry(entry string) {
// skip empty strings and regex
if len(entry) > 0 && !isRegex(entry) {
s.cnt++
s.insertString(entry)
func (s *stringCacheFactory) addEntry(entry string) bool {
if len(entry) == 0 {
return true // invalid but handled
}

s.cnt++
s.insertString(entry)

return true
}

func (s *stringCacheFactory) create() stringCache {
if len(s.tmp) == 0 {
return nil
}

cache := make(stringMap, len(s.tmp))
for k, v := range s.tmp {
cache[k] = strings.Join(v, "")
Expand All @@ -117,10 +124,6 @@ func (s *stringCacheFactory) create() stringCache {
return cache
}

func isRegex(s string) bool {
return strings.HasPrefix(s, "/") && strings.HasSuffix(s, "/")
}

type regexCache []*regexp.Regexp

func (cache regexCache) elementCount() int {
Expand All @@ -143,17 +146,24 @@ type regexCacheFactory struct {
cache regexCache
}

func (r *regexCacheFactory) addEntry(entry string) {
if isRegex(entry) {
entry = strings.TrimSpace(entry[1 : len(entry)-1])
compile, err := regexp.Compile(entry)
func (r *regexCacheFactory) addEntry(entry string) bool {
if !strings.HasPrefix(entry, "/") || !strings.HasSuffix(entry, "/") {
return false
}

if err != nil {
log.Log().Warnf("invalid regex '%s'", entry)
} else {
r.cache = append(r.cache, compile)
}
// Trim slashes
entry = strings.TrimSpace(entry[1 : len(entry)-1])

compile, err := regexp.Compile(entry)
if err != nil {
log.Log().Warnf("invalid regex '%s'", entry)

return true // invalid but handled
}

r.cache = append(r.cache, compile)

return true
}

func (r *regexCacheFactory) count() int {
Expand Down
4 changes: 3 additions & 1 deletion cache/stringcache/string_caches_benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ func BenchmarkStringCache(b *testing.B) {
factory := newStringCacheFactory()

for _, s := range testdata {
factory.addEntry(s)
if !factory.addEntry(s) {
b.Fatalf("cache didn't insert value: %s", s)
}
}

factory.create()
Expand Down
28 changes: 17 additions & 11 deletions cache/stringcache/string_caches_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,16 @@ var _ = Describe("Caches", func() {
cache stringCache
factory cacheFactory
)

Describe("String StringCache", func() {
When("string StringCache was created", func() {
BeforeEach(func() {
factory = newStringCacheFactory()
factory.addEntry("google.com")
factory.addEntry("apple.com")
factory.addEntry("")
factory.addEntry("google.com")
factory.addEntry("APPLe.com")
Expect(factory.addEntry("google.com")).Should(BeTrue())
Expect(factory.addEntry("apple.com")).Should(BeTrue())
Expect(factory.addEntry("")).Should(BeTrue()) // invalid, but handled
Expect(factory.addEntry("google.com")).Should(BeTrue())
Expect(factory.addEntry("APPLe.com")).Should(BeTrue())

cache = factory.create()
})
Expand All @@ -29,13 +30,16 @@ var _ = Describe("Caches", func() {
Expect(cache.contains("www.google.com")).Should(BeFalse())
Expect(cache.contains("")).Should(BeFalse())
})

It("should match case-insensitive", func() {
Expect(cache.contains("aPPle.com")).Should(BeTrue())
Expect(cache.contains("google.COM")).Should(BeTrue())
Expect(cache.contains("www.google.com")).Should(BeFalse())
Expect(cache.contains("")).Should(BeFalse())
})

It("should return correct element count", func() {
Expect(factory.count()).Should(Equal(4))
Expect(cache.elementCount()).Should(Equal(2))
})
})
Expand All @@ -45,14 +49,15 @@ var _ = Describe("Caches", func() {
When("regex StringCache was created", func() {
BeforeEach(func() {
factory = newRegexCacheFactory()
factory.addEntry("/.*google.com/")
factory.addEntry("/^apple\\.(de|com)$/")
factory.addEntry("/amazon/")
// this is not a regex, will be ignored
factory.addEntry("/(wrongRegex/")
factory.addEntry("plaintext")
Expect(factory.addEntry("/.*google.com/")).Should(BeTrue())
Expect(factory.addEntry("/^apple\\.(de|com)$/")).Should(BeTrue())
Expect(factory.addEntry("/amazon/")).Should(BeTrue())
Expect(factory.addEntry("/(wrongRegex/")).Should(BeTrue()) // recognized as regex but ignored because invalid
Expect(factory.addEntry("plaintext")).Should(BeFalse())

cache = factory.create()
})

It("should match if one regex in StringCache matches string", func() {
Expect(cache.contains("google.com")).Should(BeTrue())
Expect(cache.contains("google.coma")).Should(BeTrue())
Expand All @@ -67,6 +72,7 @@ var _ = Describe("Caches", func() {
Expect(cache.contains("amazon.com")).Should(BeTrue())
Expect(cache.contains("myamazon.com")).Should(BeTrue())
})

It("should return correct element count", func() {
Expect(factory.count()).Should(Equal(3))
Expect(cache.elementCount()).Should(Equal(3))
Expand Down
10 changes: 6 additions & 4 deletions lists/list_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ func NewListCache(ctx context.Context,
) (*ListCache, error) {
c := &ListCache{
groupedCache: stringcache.NewChainedGroupedCache(
stringcache.NewInMemoryGroupedStringCache(),
stringcache.NewInMemoryGroupedRegexCache(),
stringcache.NewInMemoryGroupedStringCache(), // accepts all values, must be last
),

cfg: cfg,
Expand Down Expand Up @@ -168,9 +168,11 @@ func (b *ListCache) createCacheForGroup(

producers.GoConsume(func(ctx context.Context, ch <-chan string) error {
for host := range ch {
hasEntries = true

groupFactory.AddEntry(host)
if groupFactory.AddEntry(host) {
hasEntries = true
} else {
logger().WithField("host", host).Warn("no list cache was able to use host")
}
}

return nil
Expand Down

0 comments on commit bcf2bc9

Please sign in to comment.