Skip to content

Commit

Permalink
feat: add warning if more than 500 regexes are in use
Browse files Browse the repository at this point in the history
  • Loading branch information
ThinkChaos committed Nov 12, 2023
1 parent c183767 commit 267df49
Showing 1 changed file with 19 additions and 3 deletions.
22 changes: 19 additions & 3 deletions lists/list_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ import (
"github.com/ThinkChaos/parcour/jobgroup"
)

const groupProducersBufferCap = 1000
const (
groupProducersBufferCap = 1000
regexWarningThreshold = 500
)

// ListCacheType represents the type of cached list ENUM(
// blacklist // is a list with blocked domains
Expand All @@ -35,6 +38,7 @@ type Matcher interface {
// ListCache generic cache of strings divided in groups
type ListCache struct {
groupedCache stringcache.GroupedStringCache
regexCache stringcache.GroupedStringCache

cfg config.SourceLoadingConfig
listType ListCacheType
Expand All @@ -44,12 +48,21 @@ type ListCache struct {

// LogConfig implements `config.Configurable`.
func (b *ListCache) LogConfig(logger *logrus.Entry) {
var total int
total := 0
regexes := 0

for group := range b.groupSources {
count := b.groupedCache.ElementCount(group)
logger.Infof("%s: %d entries", group, count)
total += count
regexes += b.regexCache.ElementCount(group)
}

if regexes > regexWarningThreshold {
logger.Warnf(
"REGEXES: %d !! High use of regexes is not recommended: they use a lot of memory and are very slow to search",
regexes,
)
}

logger.Infof("TOTAL: %d entries", total)
Expand All @@ -60,12 +73,15 @@ func NewListCache(ctx context.Context,
t ListCacheType, cfg config.SourceLoadingConfig,
groupSources map[string][]config.BytesSource, downloader FileDownloader,
) (*ListCache, error) {
regexCache := stringcache.NewInMemoryGroupedRegexCache()

c := &ListCache{
groupedCache: stringcache.NewChainedGroupedCache(
stringcache.NewInMemoryGroupedRegexCache(),
regexCache,
stringcache.NewInMemoryGroupedWildcardCache(), // must be after regex which can contain '*'
stringcache.NewInMemoryGroupedStringCache(), // accepts all values, must be last
),
regexCache: regexCache,

cfg: cfg,
listType: t,
Expand Down

0 comments on commit 267df49

Please sign in to comment.