forked from tylertreat/BoomFilters
-
Notifications
You must be signed in to change notification settings - Fork 0
/
classic.go
121 lines (102 loc) · 3.17 KB
/
classic.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
package boom
import (
"hash"
"hash/fnv"
"math"
)
// BloomFilter implements a classic Bloom filter. A Bloom filter has a non-zero
// probability of false positives and a zero probability of false negatives.
type BloomFilter struct {
buckets *Buckets // filter data
hash hash.Hash64 // hash function (kernel for all k functions)
m uint // filter size
k uint // number of hash functions
count uint // number of items added
}
// NewBloomFilter creates a new Bloom filter optimized to store n items with a
// specified target false-positive rate.
func NewBloomFilter(n uint, fpRate float64) *BloomFilter {
m := OptimalM(n, fpRate)
return &BloomFilter{
buckets: NewBuckets(m, 1),
hash: fnv.New64(),
m: m,
k: OptimalK(fpRate),
}
}
// Capacity returns the Bloom filter capacity, m.
func (b *BloomFilter) Capacity() uint {
return b.m
}
// K returns the number of hash functions.
func (b *BloomFilter) K() uint {
return b.k
}
// Count returns the number of items added to the filter.
func (b *BloomFilter) Count() uint {
return b.count
}
// EstimatedFillRatio returns the current estimated ratio of set bits.
func (b *BloomFilter) EstimatedFillRatio() float64 {
return 1 - math.Exp((-float64(b.count)*float64(b.k))/float64(b.m))
}
// FillRatio returns the ratio of set bits.
func (b *BloomFilter) FillRatio() float64 {
sum := uint32(0)
for i := uint(0); i < b.buckets.Count(); i++ {
sum += b.buckets.Get(i)
}
return float64(sum) / float64(b.m)
}
// Test will test for membership of the data and returns true if it is a
// member, false if not. This is a probabilistic test, meaning there is a
// non-zero probability of false positives but a zero probability of false
// negatives.
func (b *BloomFilter) Test(data []byte) bool {
lower, upper := hashKernel(data, b.hash)
// If any of the K bits are not set, then it's not a member.
for i := uint(0); i < b.k; i++ {
if b.buckets.Get((uint(lower)+uint(upper)*i)%b.m) == 0 {
return false
}
}
return true
}
// Add will add the data to the Bloom filter. It returns the filter to allow
// for chaining.
func (b *BloomFilter) Add(data []byte) Filter {
lower, upper := hashKernel(data, b.hash)
// Set the K bits.
for i := uint(0); i < b.k; i++ {
b.buckets.Set((uint(lower)+uint(upper)*i)%b.m, 1)
}
b.count++
return b
}
// TestAndAdd is equivalent to calling Test followed by Add. It returns true if
// the data is a member, false if not.
func (b *BloomFilter) TestAndAdd(data []byte) bool {
lower, upper := hashKernel(data, b.hash)
member := true
// If any of the K bits are not set, then it's not a member.
for i := uint(0); i < b.k; i++ {
idx := (uint(lower) + uint(upper)*i) % b.m
if b.buckets.Get(idx) == 0 {
member = false
}
b.buckets.Set(idx, 1)
}
b.count++
return member
}
// Reset restores the Bloom filter to its original state. It returns the filter
// to allow for chaining.
func (b *BloomFilter) Reset() *BloomFilter {
b.buckets.Reset()
return b
}
// SetHash sets the hashing function used in the filter.
// For the effect on false positive rates see: https://github.com/tylertreat/BoomFilters/pull/1
func (b *BloomFilter) SetHash(h hash.Hash64) {
b.hash = h
}