forked from 0xERR0R/blocky
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
todo: feat(lists): add support for wildcard lists using a custom Trie
A couple other Trie implementations were tested but they use more memory and are slower. See PR #<FIXME> for details.
- Loading branch information
1 parent
7547e02
commit a4e993e
Showing
13 changed files
with
891,967 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,44 +1,194 @@ | ||
package stringcache | ||
|
||
import ( | ||
"math/rand" | ||
"context" | ||
"math" | ||
"os" | ||
"runtime" | ||
"runtime/debug" | ||
"testing" | ||
|
||
"github.com/0xERR0R/blocky/lists/parsers" | ||
) | ||
|
||
func BenchmarkStringCache(b *testing.B) { | ||
testdata := createTestdata(10_000) | ||
var ( | ||
stringTestData []string | ||
wildcardTestData []string | ||
|
||
baseMemStats runtime.MemStats | ||
) | ||
|
||
func init() { //nolint:gochecknoinits | ||
// If you update either list, make sure both are the list version (see file header). | ||
stringTestData = loadTestdata("../../helpertest/data/oisd-big-plain.txt") | ||
|
||
// String and Wildcard benchmarks don't use the exact same data, | ||
// but since it's two versions of the same list it's closer to | ||
// the real world. | ||
// For the same data, a Trie uses slightly more memory but searches are much faster. | ||
if true { // switch to same data | ||
wildcardTestData = loadTestdata("../../helpertest/data/oisd-big-wildcard.txt") | ||
} else { | ||
wildcardTestData = make([]string, 0, len(stringTestData)) | ||
|
||
for _, domain := range stringTestData { | ||
wildcardTestData = append(wildcardTestData, "*."+domain) | ||
} | ||
} | ||
} | ||
|
||
// --- Cache Building --- | ||
// | ||
// Most memory efficient: Wildcard (blocky/trie) | ||
// Fastest: Wildcard (blocky/trie) | ||
// | ||
//nolint:lll | ||
// BenchmarkStringFactory-8 6 174 514 565 ns/op 11.81 fact_heap_MB 26.93 peak_heap_MB 67 621 648 B/op 1 304 allocs/op | ||
// BenchmarkWildcardFactory-8 18 59 718 953 ns/op 16.52 fact_heap_MB 16.52 peak_heap_MB 26 624 735 B/op 92 071 allocs/op | ||
|
||
func BenchmarkStringFactory(b *testing.B) { | ||
benchmarkStringFactory(b, newStringCacheFactory) | ||
} | ||
|
||
func BenchmarkWildcardFactory(b *testing.B) { | ||
benchmarkWildcardFactory(b, newWildcardCacheFactory) | ||
} | ||
|
||
func benchmarkStringFactory(b *testing.B, newFactory func() cacheFactory) { | ||
benchmarkFactory(b, stringTestData, newFactory) | ||
} | ||
|
||
func benchmarkWildcardFactory(b *testing.B, newFactory func() cacheFactory) { | ||
benchmarkFactory(b, wildcardTestData, newFactory) | ||
} | ||
|
||
func benchmarkFactory(b *testing.B, data []string, newFactory func() cacheFactory) { | ||
baseMemStats = readMemStats() | ||
|
||
b.ReportAllocs() | ||
|
||
var ( | ||
factory cacheFactory | ||
cache stringCache | ||
) | ||
|
||
for i := 0; i < b.N; i++ { | ||
factory := newStringCacheFactory() | ||
factory = newFactory() | ||
|
||
for _, s := range testdata { | ||
for _, s := range data { | ||
factory.addEntry(s) | ||
} | ||
|
||
factory.create() | ||
cache = factory.create() | ||
} | ||
|
||
b.StopTimer() | ||
reportMemUsage(b, "peak", factory, cache) | ||
reportMemUsage(b, "fact", factory) // cache will be GC'd | ||
} | ||
|
||
// --- Cache Querying --- | ||
// | ||
// Most memory efficient: Wildcard (blocky/trie) | ||
// Fastest: Wildcard (blocky/trie) | ||
// | ||
//nolint:lll | ||
// BenchmarkStringCache-8 6 203 026 476 ns/op 15.14 heap_MB 0 B/op 0 allocs/op | ||
// BenchmarkWildcardCache-8 38 30 201 633 ns/op 16.54 heap_MB 0 B/op 0 allocs/op | ||
|
||
func BenchmarkStringCache(b *testing.B) { | ||
benchmarkStringCache(b, newStringCacheFactory) | ||
} | ||
|
||
func BenchmarkWildcardCache(b *testing.B) { | ||
benchmarkWildcardCache(b, newWildcardCacheFactory) | ||
} | ||
|
||
func benchmarkStringCache(b *testing.B, newFactory func() cacheFactory) { | ||
benchmarkCache(b, stringTestData, newFactory) | ||
} | ||
|
||
func benchmarkWildcardCache(b *testing.B, newFactory func() cacheFactory) { | ||
benchmarkCache(b, wildcardTestData, newFactory) | ||
} | ||
|
||
func benchmarkCache(b *testing.B, data []string, newFactory func() cacheFactory) { | ||
baseMemStats = readMemStats() | ||
|
||
factory := newFactory() | ||
|
||
for _, s := range data { | ||
factory.addEntry(s) | ||
} | ||
|
||
cache := factory.create() | ||
|
||
b.ReportAllocs() | ||
b.ResetTimer() | ||
|
||
for i := 0; i < b.N; i++ { | ||
for _, s := range data { | ||
if !cache.contains(s) { | ||
b.Fatalf("cache is missing value that was previously inserted: %s", s) | ||
} | ||
} | ||
} | ||
|
||
b.StopTimer() | ||
reportMemUsage(b, "cache", cache) | ||
} | ||
|
||
// --- | ||
|
||
func readMemStats() (res runtime.MemStats) { | ||
runtime.GC() | ||
debug.FreeOSMemory() | ||
|
||
runtime.ReadMemStats(&res) | ||
|
||
return res | ||
} | ||
|
||
func randString(n int) string { | ||
const charPool = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-." | ||
// toMeasure: keep these objects allocated during measuring | ||
func reportMemUsage(b *testing.B, prefix string, toMeasure ...any) { | ||
m := readMemStats() | ||
|
||
b := make([]byte, n) | ||
b.ReportMetric(toMB(m.HeapAlloc-baseMemStats.HeapAlloc), prefix+"_heap_MB") | ||
|
||
for i := range b { | ||
b[i] = charPool[rand.Intn(len(charPool))] | ||
// Forces Go to keep `toMeasure` and its contents allocated | ||
// You can tell it works because factory benchmarks have different values for both calls | ||
for i := range toMeasure { | ||
toMeasure[i] = nil | ||
} | ||
} | ||
|
||
func toMB(b uint64) float64 { | ||
const bytesInKB = float64(1024) | ||
|
||
kb := float64(b) / bytesInKB | ||
|
||
return string(b) | ||
return math.Round(kb) / 1024 | ||
} | ||
|
||
func createTestdata(count int) []string { | ||
var result []string | ||
func loadTestdata(path string) (res []string) { | ||
f, err := os.Open(path) | ||
if err != nil { | ||
panic(err) | ||
} | ||
defer f.Close() | ||
|
||
p := parsers.Hosts(f) | ||
|
||
err = parsers.ForEach[*parsers.HostsIterator](context.Background(), p, func(hosts *parsers.HostsIterator) error { | ||
return hosts.ForEach(func(host string) error { | ||
res = append(res, host) | ||
|
||
for i := 0; i < count; i++ { | ||
result = append(result, randString(8+rand.Intn(20))) | ||
return nil | ||
}) | ||
}) | ||
if err != nil { | ||
panic(err) | ||
} | ||
|
||
return result | ||
return res | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.