diff --git a/internal/manifest/version.go b/internal/manifest/version.go index b584899dcd..6eb86a43f8 100644 --- a/internal/manifest/version.go +++ b/internal/manifest/version.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/internal/overlap/overlapcache" "github.com/cockroachdb/pebble/sstable" ) @@ -287,6 +288,9 @@ type FileMetadata struct { // SyntheticSuffix overrides all suffixes in a table; used for some virtual tables. SyntheticSuffix sstable.SyntheticSuffix + + // OverlapCache is used to speed up overlap checks during ingestion. + OverlapCache overlapcache.C } // InternalKeyBounds returns the set of overall table bounds. diff --git a/internal/overlap/checker.go b/internal/overlap/checker.go index c219b553a1..326860d72c 100644 --- a/internal/overlap/checker.go +++ b/internal/overlap/checker.go @@ -8,6 +8,7 @@ package overlap import ( "context" + "slices" "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/keyspan" @@ -23,7 +24,8 @@ type WithLSM [manifest.NumLevels]WithLevel type WithLevel struct { Result Kind // SplitFile can be set only when result is OnlyBoundary. If it is set, this - // file can be split to free up the range of interest. + // file can be split to free up the range of interest. SplitFile is not set + // for L0 (overlapping tables are allowed in L0). SplitFile *manifest.FileMetadata } @@ -89,6 +91,7 @@ func (c *Checker) LSMOverlap( } if res.Result == OnlyBoundary { result[0].Result = OnlyBoundary + // We don't set SplitFile for L0 (tables in L0 are allowed to overlap). } } for level := 1; level < manifest.NumLevels; level++ { @@ -135,11 +138,11 @@ func (c *Checker) LevelOverlap( return WithLevel{Result: Data}, nil } // We have a single file to look at; its boundaries enclose our region. - empty, err := c.EmptyRegion(ctx, region, file) + overlap, err := c.DataOverlapWithFile(ctx, region, file) if err != nil { return WithLevel{}, err } - if !empty { + if overlap { return WithLevel{Result: Data}, nil } return WithLevel{ @@ -148,116 +151,183 @@ func (c *Checker) LevelOverlap( }, nil } -// EmptyRegion returns true if the given region doesn't overlap with any keys or -// ranges in the given table. -func (c *Checker) EmptyRegion( +// DataOverlapWithFile returns true if the given region overlaps with any keys +// or spans in the given table. +func (c *Checker) DataOverlapWithFile( ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata, ) (bool, error) { - empty, err := c.emptyRegionPointsAndRangeDels(ctx, region, m) - if err != nil || !empty { - return empty, err + if overlap, ok := m.OverlapCache.CheckDataOverlap(c.cmp, region); ok { + return overlap, nil } - return c.emptyRegionRangeKeys(ctx, region, m) -} + // We want to check overlap with file, but we also want to update the cache + // with useful information. We try to find two data regions r1 and r2 with a + // space-in between; r1 ends before region.Start and r2 ends at or after + // region.Start. See overlapcache.C.ReportEmptyRegion(). + var r1, r2 base.UserKeyBounds -// emptyRegionPointsAndRangeDels returns true if the file doesn't contain any -// point keys or range del spans that overlap with region. -func (c *Checker) emptyRegionPointsAndRangeDels( - ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata, -) (bool, error) { - if !m.HasPointKeys { - return true, nil + if m.HasPointKeys { + lt, ge, err := c.pointKeysAroundKey(ctx, region.Start, m) + if err != nil { + return false, err + } + r1 = base.UserKeyBoundsInclusive(lt, lt) + r2 = base.UserKeyBoundsInclusive(ge, ge) + + if err := c.extendRegionsWithSpans(ctx, &r1, &r2, region.Start, m, manifest.KeyTypePoint); err != nil { + return false, err + } } - pointBounds := m.UserKeyBoundsByType(manifest.KeyTypePoint) - if !pointBounds.Overlaps(c.cmp, ®ion) { + if m.HasRangeKeys { + if err := c.extendRegionsWithSpans(ctx, &r1, &r2, region.Start, m, manifest.KeyTypeRange); err != nil { + return false, err + } + } + // If the regions now overlap or touch, it's all one big data region. + if r1.Start != nil && r2.Start != nil && c.cmp(r1.End.Key, r2.Start) >= 0 { + m.OverlapCache.ReportDataRegion(c.cmp, base.UserKeyBounds{ + Start: r1.Start, + End: r2.End, + }) return true, nil } + m.OverlapCache.ReportEmptyRegion(c.cmp, r1, r2) + // There is overlap iff we overlap with r2. + overlap := r2.Start != nil && region.End.IsUpperBoundFor(c.cmp, r2.Start) + return overlap, nil +} + +// pointKeysAroundKey returns two consecutive point keys: the greatest key that +// is < key and the smallest key that is >= key. If there is no such key, the +// corresponding return value is nil. Both lt and ge are nil if the file +// contains no point keys. +func (c *Checker) pointKeysAroundKey( + ctx context.Context, key []byte, m *manifest.FileMetadata, +) (lt, ge []byte, _ error) { + pointBounds := m.UserKeyBoundsByType(manifest.KeyTypePoint) + points, err := c.iteratorFactory.Points(ctx, m) - if err != nil { - return false, err + if points == nil || err != nil { + return nil, nil, err } - if points != nil { - defer points.Close() - var kv *base.InternalKV - if c.cmp(region.Start, pointBounds.Start) <= 0 { - kv = points.First() - } else { - kv = points.SeekGE(region.Start, base.SeekGEFlagsNone) + defer points.Close() + switch { + case c.cmp(key, pointBounds.Start) <= 0: + kv := points.First() + if kv != nil { + ge = slices.Clone(kv.K.UserKey) } - if kv == nil && points.Error() != nil { - return false, points.Error() + case c.cmp(key, pointBounds.End.Key) > 0: + kv := points.Last() + if kv != nil { + lt = slices.Clone(kv.K.UserKey) } - if kv != nil && region.End.IsUpperBoundForInternalKey(c.cmp, kv.K) { - // Found overlap. - return false, nil + default: + kv := points.SeekLT(key, base.SeekLTFlagsNone) + if kv != nil { + lt = slices.Clone(kv.K.UserKey) } - } - rangeDels, err := c.iteratorFactory.RangeDels(ctx, m) - if err != nil { - return false, err - } - if rangeDels != nil { - defer rangeDels.Close() - empty, err := c.emptyFragmentRegion(region, pointBounds.Start, rangeDels) - if err != nil || !empty { - return empty, err + if kv = points.Next(); kv != nil { + ge = slices.Clone(kv.K.UserKey) } } - // Found no overlap. - return true, nil + return lt, ge, points.Error() } -// emptyRegionRangeKeys returns true if the file doesn't contain any range key -// spans that overlap with region. -func (c *Checker) emptyRegionRangeKeys( - ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata, -) (bool, error) { - if !m.HasRangeKeys { - return true, nil - } - rangeKeyBounds := m.UserKeyBoundsByType(manifest.KeyTypeRange) - if !rangeKeyBounds.Overlaps(c.cmp, ®ion) { - return true, nil +// extendRegionsWithSpans opens a fragment iterator for either range dels or +// range keys (depending n keyType), finds the last span that ends before key +// and the following span, and extends/replaces regions r1 and r2. +func (c *Checker) extendRegionsWithSpans( + ctx context.Context, + r1, r2 *base.UserKeyBounds, + key []byte, + m *manifest.FileMetadata, + keyType manifest.KeyType, +) error { + var iter keyspan.FragmentIterator + var err error + if keyType == manifest.KeyTypePoint { + iter, err = c.iteratorFactory.RangeDels(ctx, m) + } else { + iter, err = c.iteratorFactory.RangeKeys(ctx, m) } - rangeKeys, err := c.iteratorFactory.RangeKeys(ctx, m) - if err != nil { - return false, err + if iter == nil || err != nil { + return err } - if rangeKeys != nil { - defer rangeKeys.Close() - empty, err := c.emptyFragmentRegion(region, rangeKeyBounds.Start, rangeKeys) - if err != nil || !empty { - return empty, err + defer iter.Close() + + fragmentBounds := m.UserKeyBoundsByType(keyType) + switch { + case c.cmp(key, fragmentBounds.Start) <= 0: + span, err := iter.First() + if err != nil { + return err + } + c.updateR2(r2, span) + + case !fragmentBounds.End.IsUpperBoundFor(c.cmp, key): + span, err := iter.Last() + if err != nil { + return err + } + c.updateR1(r1, span) + + default: + span, err := iter.SeekGE(key) + if err != nil { + return err } + c.updateR2(r2, span) + span, err = iter.Prev() + if err != nil { + return err + } + c.updateR1(r1, span) } - // Found no overlap. - return true, nil + return nil } -// emptyFragmentRegion returns true if the given iterator doesn't contain any -// spans that overlap with region. The fragmentLowerBounds is a known lower -// bound for all the spans. -func (c *Checker) emptyFragmentRegion( - region base.UserKeyBounds, fragmentLowerBound []byte, fragments keyspan.FragmentIterator, -) (bool, error) { - var span *keyspan.Span - var err error - if c.cmp(region.Start, fragmentLowerBound) <= 0 { - // This is an optimization: we know there are no spans before region.Start, - // so we can use First. - span, err = fragments.First() - } else { - span, err = fragments.SeekGE(region.Start) - } - if err != nil { - return false, err - } - if span != nil && span.Empty() { - return false, base.AssertionFailedf("fragment iterator produced empty span") +// updateR1 updates r1, the region of data that ends before a key of interest. +func (c *Checker) updateR1(r1 *base.UserKeyBounds, s *keyspan.Span) { + switch { + case s == nil: + + case r1.Start == nil || c.cmp(r1.End.Key, s.Start) < 0: + // Region completely to the right of r1. + *r1 = base.UserKeyBoundsEndExclusive(slices.Clone(s.Start), slices.Clone(s.End)) + + case c.cmp(s.End, r1.Start) < 0: + // Region completely to the left of r1, nothing to do. + + default: + // Regions are overlapping or touching. + if c.cmp(s.Start, r1.Start) < 0 { + r1.Start = slices.Clone(s.Start) + } + if c.cmp(r1.End.Key, s.End) < 0 { + r1.End = base.UserKeyExclusive(slices.Clone(s.End)) + } } - if span != nil && region.End.IsUpperBoundFor(c.cmp, span.Start) { - // Found overlap. - return false, nil +} + +// updateR2 updates r2, the region of data that ends before a key of interest. +func (c *Checker) updateR2(r2 *base.UserKeyBounds, s *keyspan.Span) { + switch { + case s == nil: + + case r2.Start == nil || c.cmp(s.End, r2.Start) < 0: + // Region completely to the left of r2. + *r2 = base.UserKeyBoundsEndExclusive(slices.Clone(s.Start), slices.Clone(s.End)) + + case c.cmp(r2.End.Key, s.Start) < 0: + // Region completely to the right of r2, nothing to do. + + default: + // Regions are overlapping or touching. + if c.cmp(s.Start, r2.Start) < 0 { + r2.Start = slices.Clone(s.Start) + } + if c.cmp(r2.End.Key, s.End) < 0 { + r2.End = base.UserKeyExclusive(slices.Clone(s.End)) + } } - return true, nil } diff --git a/internal/overlap/checker_test.go b/internal/overlap/checker_test.go index 21a2fc539b..4fcf5ce1c4 100644 --- a/internal/overlap/checker_test.go +++ b/internal/overlap/checker_test.go @@ -16,14 +16,21 @@ import ( "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/keyspan" "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/internal/overlap/overlapcache" "github.com/stretchr/testify/require" ) func TestChecker(t *testing.T) { tables := newTestTables() byName := make(map[string]*manifest.FileMetadata) + clearCaches := func() { + for _, t := range tables.tables { + t.meta.OverlapCache = overlapcache.C{} + } + } datadriven.RunTest(t, "testdata/checker", func(t *testing.T, d *datadriven.TestData) string { + clearCaches() switch d.Cmd { case "define": tt := testTable{ @@ -99,10 +106,15 @@ func TestChecker(t *testing.T) { tables.tables[tt.meta] = tt case "overlap": + var withCache bool var metas []*manifest.FileMetadata lines := strings.Split(d.Input, "\n") for _, arg := range d.CmdArgs { name := arg.String() + if name == "with-cache" { + withCache = true + continue + } m := byName[name] if m == nil { d.Fatalf(t, "unknown table %q", name) @@ -113,6 +125,9 @@ func TestChecker(t *testing.T) { c := MakeChecker(bytes.Compare, tables) var buf strings.Builder for _, l := range lines { + if !withCache { + clearCaches() + } bounds := base.ParseUserKeyBounds(l) withLevel, err := c.LevelOverlap(context.Background(), bounds, levelMeta.Slice()) require.NoError(t, err) diff --git a/internal/overlap/overlapcache/cache.go b/internal/overlap/overlapcache/cache.go new file mode 100644 index 0000000000..8d297011e0 --- /dev/null +++ b/internal/overlap/overlapcache/cache.go @@ -0,0 +1,294 @@ +// Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package overlapcache + +import ( + "fmt" + "sort" + "sync" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" +) + +// C is a data structure that caches information about data regions in a file. +// It is used to speed up related overlap checks during ingestion. +// +// -- Implementation -- +// +// The cache maintains information about a small number of regions. A region +// corresponds to a user key interval (UserKeyBounds). We define three types of +// regions: +// - empty region: it is known that no keys or spans in the file overlap this +// region. +// - data region: corresponds to a key or span (or union of keys and spans) in +// the file. Any single key that falls inside ths region has data overlap. +// - unknown region. +// +// We maintain a list of disjoint and sorted data regions, along with flags +// which indicate if the regions in-between are empty or unknown. The region +// before data region 0 refers to the entire start of the file up to data region +// 0. THe region after data region n-1 refers to the entire end of the file +// starting from the end of data region n-1. +// +// See testdata/cache for some examples represented visually. +type C struct { + mu struct { + sync.Mutex + n int + dataRegions [cacheMaxEntries]base.UserKeyBounds + emptyBeforeRegion [cacheMaxEntries + 1]bool + } +} + +// cacheMaxEntries must be at least 4. +const cacheMaxEntries = 6 + +// maxKeySize prevents the cache from holding on to very large keys. It is a +// safety precaution. +const maxKeySize = 4096 + +// CheckDataOverlap tries to determine if the target region overlaps any data +// regions. +func (c *C) CheckDataOverlap(cmp base.Compare, target base.UserKeyBounds) (overlaps, ok bool) { + c.mu.Lock() + defer c.mu.Unlock() + n := c.mu.n + + // Find first region which ends after the start of the target region. + idx := sort.Search(n, func(i int) bool { + return c.mu.dataRegions[i].End.IsUpperBoundFor(cmp, target.Start) + }) + if idx < n && target.End.IsUpperBoundFor(cmp, c.mu.dataRegions[idx].Start) { + // target overlaps with a known data region. + return true, true + } + // The target region falls completely outside regions idx-1 and idx. + if c.mu.emptyBeforeRegion[idx] { + // The entire space between data regions idx-1 and idx is known to contain + // no data. + return false, true + } + // We don't know if there is data in the space between regions idx-1 and idx. + return false, false +} + +// ReportDataRegion informs the cache that the target region contains data. +// +// There is no assumption about the region being maximal (i.e. it could be part +// of a larger data region). +// +// Note that the cache will hold on to the region's key slices indefinitely. +// They should not be modified ever again by the caller. +func (c *C) ReportDataRegion(cmp base.Compare, region base.UserKeyBounds) { + if len(region.Start) > maxKeySize || len(region.End.Key) > maxKeySize { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + if invariants.Enabled { + defer c.check(cmp) + } + c.insertRegion(cmp, region, allowLeftExtension|allowRightExtension) +} + +// ReportEmptyRegion informs the cache of an empty region, in-between two data +// regions r1 and r2. +// +// Unset regions are accepted and serve as "sentinels" representing the start or +// end of the file. Specifically: +// - if r1 is unset, the empty region is from the start of the file to the +// start of r2; +// - if r2 is unset, the empty region is from the end of r2 to the end of the +// file; +// - if both r1 and r2 are unset, the entire file is empty. +// +// There is no assumption about the regions being maximal (i.e. r1 could be part +// of a larger data region extending to the left, and r2 could be part of a +// larger data region extending to the right). +// +// Note that the cache will hold on to the regions' key slices indefinitely. +// They should not be modified ever again by the caller. +func (c *C) ReportEmptyRegion(cmp base.Compare, r1, r2 base.UserKeyBounds) { + if len(r1.Start) > maxKeySize || len(r1.End.Key) > maxKeySize || + len(r2.Start) > maxKeySize || len(r2.End.Key) > maxKeySize { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + if invariants.Enabled { + defer c.check(cmp) + } + + switch { + case r1.Start == nil && r2.Start == nil: + // The entire file is empty, + c.assert(c.mu.n == 0) + c.mu.emptyBeforeRegion[0] = true + return + + case r1.Start == nil: + // We know there is only empty space before r2. + idx := c.insertRegion(cmp, r2, allowRightExtension) + c.assert(idx == 0) + c.mu.emptyBeforeRegion[0] = true + return + + case r2.Start == nil: + // We know there is only empty space after r1. + idx := c.insertRegion(cmp, r1, allowLeftExtension) + c.assert(idx == c.mu.n-1) + c.mu.emptyBeforeRegion[c.mu.n] = true + return + } + + // Find the first region that contains or ends right at r1.Start. + r1Idx := c.insertionPoint(cmp, r1) + r1Overlapping, r1, r1EmptyBefore, _ := c.checkOverlap(cmp, r1Idx, r1, allowLeftExtension) + r2Idx := r1Idx + r1Overlapping + + r2Overlapping, r2, _, r2EmptyAfter := c.checkOverlap(cmp, r2Idx, r2, allowRightExtension) + + newIdx := c.makeSpace(r1Idx, 2, r2Idx+r2Overlapping) + c.mu.dataRegions[newIdx] = r1 + c.mu.dataRegions[newIdx+1] = r2 + c.mu.emptyBeforeRegion[newIdx] = r1EmptyBefore + c.mu.emptyBeforeRegion[newIdx+1] = true + c.mu.emptyBeforeRegion[newIdx+2] = r2EmptyAfter +} + +// insertionPoint returns the first region that contains or ends right at Start. +// We allow an exclusive end bound "touching" the new region, because we can +// coalesce with it. +func (c *C) insertionPoint(cmp base.Compare, region base.UserKeyBounds) int { + return sort.Search(c.mu.n, func(i int) bool { + return cmp(c.mu.dataRegions[i].End.Key, region.Start) >= 0 + }) +} + +// insertRegion inserts a data region, evicting a region if necessary. Returns +// the index where it was inserted. +func (c *C) insertRegion( + cmp base.Compare, region base.UserKeyBounds, extension allowedExtension, +) (idx int) { + idx = c.insertionPoint(cmp, region) + overlapping, extendedRegion, emptyBefore, emptyAfter := c.checkOverlap(cmp, idx, region, extension) + idx = c.makeSpace(idx, 1, idx+overlapping) + c.mu.dataRegions[idx] = extendedRegion + c.mu.emptyBeforeRegion[idx] = emptyBefore + c.mu.emptyBeforeRegion[idx+1] = emptyAfter + return idx +} + +// allowedExtension represents in which direction it is legal for checkOverlap +// to extend a region; used for sanity checking. +type allowedExtension uint8 + +const ( + allowLeftExtension allowedExtension = 1 << iota + allowRightExtension +) + +// numOverlappingRegions is called with idx pointing to the first region that +// ends after region.Start and returns the number of regions that overlap with +// (or touch) the target region. +func (c *C) checkOverlap( + cmp base.Compare, idx int, region base.UserKeyBounds, extension allowedExtension, +) (numOverlapping int, extendedRegion base.UserKeyBounds, emptyBefore, emptyAfter bool) { + for ; ; numOverlapping++ { + if idx+numOverlapping >= c.mu.n || cmp(region.End.Key, c.mu.dataRegions[idx+numOverlapping].Start) < 0 { + break + } + } + + // Extend the region if necessary. + extendedRegion = region + if numOverlapping > 0 { + switch cmp(c.mu.dataRegions[idx].Start, region.Start) { + case -1: + c.assert(extension&allowLeftExtension != 0) + extendedRegion.Start = c.mu.dataRegions[idx].Start + fallthrough + case 0: + emptyBefore = c.mu.emptyBeforeRegion[idx] + } + + switch c.mu.dataRegions[idx+numOverlapping-1].End.CompareUpperBounds(cmp, region.End) { + case 1: + c.assert(extension&allowRightExtension != 0) + extendedRegion.End = c.mu.dataRegions[idx+numOverlapping-1].End + case 0: + emptyAfter = c.mu.emptyBeforeRegion[idx+numOverlapping] + } + } + return numOverlapping, extendedRegion, emptyBefore, emptyAfter +} + +// makeSpace is used to retain regions [0, keepLeftIdx) and [keepRightIdx, n) +// and leave space for regions in-between. +// +// When necessary, makeSpace evicts regions to make room for the new regions. +// +// Returns the index for the first new region (this equals keepLeftIdx when +// there is no eviction). +func (c *C) makeSpace(keepLeftIdx, newRegions, keepRightIdx int) (firstSpaceIdx int) { + start := 0 + end := c.mu.n + newLen := keepLeftIdx + newRegions + (c.mu.n - keepRightIdx) + for ; newLen > cacheMaxEntries; newLen-- { + // The result doesn't fit, so we have to evict a region. We choose to evict + // either the first or the last region, whichever keeps the new region(s) + // closer to the center. The reasoning is that we want to optimize for the + // case where we get repeated queries around the same region of interest. + if (keepLeftIdx - start) > (end - keepRightIdx) { + start++ + c.mu.emptyBeforeRegion[start] = false + } else { + end-- + c.mu.emptyBeforeRegion[end] = false + } + } + c.moveRegions(start, keepLeftIdx, 0) + c.moveRegions(keepRightIdx, end, keepLeftIdx-start+newRegions) + if newLen < c.mu.n { + // Clear the now unused regions so we don't hold on to key slices. + clear(c.mu.dataRegions[newLen:c.mu.n]) + } + c.mu.n = newLen + return keepLeftIdx - start +} + +// moveRegions copies the regions [startIdx, endIdx) to +// [newStartIdx, newStartIdx+endIdx-startIdx). The emptyBeforeRegion flags for +// [startIdx, endIdx] are also copied. +func (c *C) moveRegions(startIdx, endIdx int, newStartIdx int) { + if startIdx >= endIdx || startIdx == newStartIdx { + return + } + copy(c.mu.dataRegions[newStartIdx:], c.mu.dataRegions[startIdx:endIdx]) + copy(c.mu.emptyBeforeRegion[newStartIdx:], c.mu.emptyBeforeRegion[startIdx:endIdx+1]) +} + +func (c *C) assert(cond bool) { + if !cond { + panic(errors.AssertionFailedf("overlapcache: conflicting information")) + } +} + +func (c *C) check(cmp base.Compare) { + for i := 0; i < c.mu.n; i++ { + r := &c.mu.dataRegions[i] + if !r.Valid(cmp) { + panic(fmt.Sprintf("invalid region %s", r)) + } + // Regions must not overlap or touch. + if i > 0 && cmp(c.mu.dataRegions[i-1].End.Key, r.Start) >= 0 { + panic(fmt.Sprintf("overlapping regions %s %s", c.mu.dataRegions[i-1], r)) + } + } +} diff --git a/internal/overlap/overlapcache/cache_test.go b/internal/overlap/overlapcache/cache_test.go new file mode 100644 index 0000000000..3fbd436528 --- /dev/null +++ b/internal/overlap/overlapcache/cache_test.go @@ -0,0 +1,244 @@ +// Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package overlapcache + +import ( + "fmt" + "math/rand" + "sort" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/datadriven/diagram" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/stretchr/testify/require" +) + +func TestCacheDataDriven(t *testing.T) { + var c C + datadriven.RunTest(t, "testdata/cache", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "report": + var r1, r2 base.UserKeyBounds + lines := strings.Split(d.Input, "\n") + if lines[0] != "[]" { + r1 = base.ParseUserKeyBounds(lines[0]) + } + if lines[1] != "[]" { + r2 = base.ParseUserKeyBounds(lines[1]) + } + c.ReportEmptyRegion(cmp, r1, r2) + + case "report-data": + r := base.ParseUserKeyBounds(d.Input) + c.ReportDataRegion(cmp, r) + + case "reset": + c = C{} + + default: + d.Fatalf(t, "unknown command: %s", d.Cmd) + } + return toStr(&c) + }) +} + +var cmp = testkeys.Comparer.Compare + +// toStr returns a diagram of the current state of the cache. +// Each region is marked with a "*" for data regions, "-" for empty regions, and +// "?" for unknown regions. +func toStr(c *C) string { + var wb diagram.Whiteboard + const spacing = 5 + col := 0 + region := func(ch string) { + wb.Write(0, col, strings.Repeat(ch, spacing)) + col += spacing + } + ifElse := func(cond bool, a, b string) string { + if cond { + return a + } + return b + } + writeKey := func(key []byte) { + str := string(key) + wb.Write(1, col, "|") + wb.Write(2, col-(len(str)-1)/2, str) + } + + region(ifElse(c.mu.emptyBeforeRegion[0], "-", "?")) + for i, r := range c.mu.dataRegions[:c.mu.n] { + wb.Write(0, col, "*") + writeKey(r.Start) + col++ + if cmp(r.Start, r.End.Key) != 0 { + region("*") + wb.Write(0, col, ifElse(r.End.Kind == base.Exclusive, "|", "*")) + writeKey(r.End.Key) + col++ + } + region(ifElse(c.mu.emptyBeforeRegion[i+1], "-", "?")) + } + return wb.Indented(2) +} + +type region struct { + start int + end int + endKind base.BoundaryKind +} + +func (r region) UserKeyBounds() base.UserKeyBounds { + return base.UserKeyBounds{ + Start: keys[r.start], + End: base.UserKeyBoundary{ + Key: keys[r.end], + Kind: r.endKind, + }, + } +} + +func (r *region) SetRandKind() { + r.endKind = base.Inclusive + if r.start != r.end && rand.Intn(2) == 0 { + r.endKind = base.Exclusive + } +} + +func (r *region) MaybeTrimRightRand() { + if rand.Intn(2) == 0 { + return + } + oldEnd := r.end + r.end = randInRange(r.start, r.end+1) + if r.start == r.end { + r.endKind = base.Inclusive + } else if oldEnd > r.end || r.endKind == base.Inclusive { + r.SetRandKind() + } +} + +func (r *region) MaybeTrimLeftRand() { + if rand.Intn(2) == 0 { + return + } + if r.endKind == base.Inclusive { + r.start = randInRange(r.start, r.end+1) + } else { + r.start = randInRange(r.start, r.end) + } +} + +func TestCacheRandomized(t *testing.T) { + for n := 0; n < 100; n++ { + runRandomizedTest(t) + } +} + +func runRandomizedTest(t *testing.T) { + const debug = false + + // Generate data regions. + numRegions := rand.Intn(20) + regions := make([]region, numRegions) + randKeys := rand.Perm(len(keys))[:numRegions+1] + sort.Ints(randKeys) + for i := range regions { + regions[i].start = randKeys[i] + regions[i].end = regions[i].start + if rand.Intn(4) > 0 { + regions[i].end = randInRange(regions[i].start, randKeys[i+1]) + } + regions[i].SetRandKind() + } + if debug { + fmt.Printf("Regions:") + for i := range regions { + fmt.Printf(" %s", regions[i].UserKeyBounds()) + } + } + c := &C{} + for j := 0; j < 100; j++ { + var knownRegion base.UserKeyBounds + if rand.Intn(4) == 0 && len(regions) > 0 { + r := regions[rand.Intn(len(regions))] + r.MaybeTrimLeftRand() + r.MaybeTrimRightRand() + knownRegion = r.UserKeyBounds() + if debug { + fmt.Printf("ReportDataRegion(%s)\n", r.UserKeyBounds()) + } + c.ReportDataRegion(cmp, r.UserKeyBounds()) + } else { + var r1, r2 base.UserKeyBounds + i := rand.Intn(len(regions)+1) - 1 + if i >= 0 { + r := regions[i] + r.MaybeTrimLeftRand() + r1 = r.UserKeyBounds() + knownRegion.Start = r1.Start + } + if i+1 < len(regions) { + r := regions[i+1] + r.MaybeTrimRightRand() + r2 = r.UserKeyBounds() + knownRegion.End = r2.End + } + if debug { + fmt.Printf("ReportEmptyRegion(%s, %s)\n", r1, r2) + } + c.ReportEmptyRegion(cmp, r1, r2) + } + if debug { + fmt.Printf("%s", toStr(c)) + } + + for j := 0; j < 100; j++ { + r := randRegion().UserKeyBounds() + + result, ok := c.CheckDataOverlap(cmp, r) + if !ok { + // The cache must be able to answer queries for any region that overlaps + // the knownRegion. + if (knownRegion.Start == nil || r.End.IsUpperBoundFor(cmp, knownRegion.Start)) && + (knownRegion.End.Key == nil || knownRegion.End.IsUpperBoundFor(cmp, r.Start)) { + t.Fatalf("cache should know if %s contains data", r) + } + continue + } + // Check the result. + idx := sort.Search(len(regions), func(i int) bool { + return regions[i].UserKeyBounds().End.IsUpperBoundFor(cmp, r.Start) + }) + correct := idx < len(regions) && r.End.IsUpperBoundFor(cmp, keys[regions[idx].start]) + require.Equalf(t, correct, result, "incorrect ContainsData result for %s", r) + } + } +} + +// Returns a random integer in [start, end). +func randInRange(start, end int) int { + return start + rand.Intn(end-start) +} + +func randRegion() region { + var r region + r.start = randInRange(0, len(keys)) + r.end = randInRange(r.start, len(keys)) + r.SetRandKind() + return r +} + +var keys = func() [][]byte { + keys := make([][]byte, 100) + for i := range keys { + keys[i] = []byte(fmt.Sprintf("k%02d", i)) + } + return keys +}() diff --git a/internal/overlap/overlapcache/testdata/cache b/internal/overlap/overlapcache/testdata/cache new file mode 100644 index 0000000000..df56cca7dc --- /dev/null +++ b/internal/overlap/overlapcache/testdata/cache @@ -0,0 +1,185 @@ +reset +---- + ????? + +report +[] +[] +---- + ----- + +reset +---- + ????? + +report +[] +[a, a] +---- + -----*????? + | + a + +report +[c, c] +[d, d] +---- + -----*?????*-----*????? + | | | + a c d + +report +[u, v) +[] +---- + -----*?????*-----*?????******|----- + | | | | | + a c d u v + +report +[a1, a1] +[a2, a2] +---- + -----*?????*-----*?????*-----*?????******|----- + | | | | | | | + a a1 a2 c d u v + +report-data +[a2, a2] +---- + -----*?????*-----*?????*-----*?????******|----- + | | | | | | | + a a1 a2 c d u v + +# We should evict a (and the start region is now unknown). +report +[e, f] +[g, g] +---- + ?????*?????*-----*?????*******-----*?????******|----- + | | | | | | | | + a2 c d e f g u v + +# We should evict a2 and a3. +report +[h, h] +[i, i] +---- + ?????*?????*******-----*?????*-----*?????******|----- + | | | | | | | | + d e f g h i u v + + +# We should evict u and v (and the end region is now unknown). +report +[g00, g01) +[g10, g10] +---- + ?????*******-----*?????******|-----*?????*-----*????? + | | | | | | | | + e f g g00 g01 g10 h i + +report +[g20, g20] +[g21, g21] +---- + ?????******|-----*?????*-----*?????*-----*????? + | | | | | | | + g00 g01 g10 g20 g21 h i + +report +[g15, g15] +[g16, g16] +---- + ?????******|-----*?????*-----*?????*-----*????? + | | | | | | | + g00 g01 g10 g15 g16 g20 g21 + +## Test region coalescing. +reset +---- + ????? + +report-data +[a, b] +---- + ?????*******????? + | | + a b + +report-data +[c, d) +---- + ?????*******?????******|????? + | | | | + a b c d + +report-data +[e, f) +---- + ?????*******?????******|?????******|????? + | | | | | | + a b c d e f + +report-data +[g, h] +---- + ?????*******?????******|?????******|?????*******????? + | | | | | | | | + a b c d e f g h + +# Coalesce into [a, d). +report-data +[a, c1] +---- + ?????******|?????******|?????*******????? + | | | | | | + a d e f g h + +# We now coalesce regions into [e, h]. +report-data +[f, g) +---- + ?????******|?????*******????? + | | | | + a d e h + +report-data +[c2, e) +---- + ?????*******????? + | | + a h + +reset +---- + ????? + +report-data +[a, b] +---- + ?????*******????? + | | + a b + +report-data +[c, d) +---- + ?????*******?????******|????? + | | | | + a b c d + +report-data +[e, f) +---- + ?????*******?????******|?????******|????? + | | | | | | + a b c d e f + +report +[a5, b2] +[b8, e) +---- + ?????*******-----******|????? + | | | | + a b2 b8 f diff --git a/internal/overlap/testdata/checker b/internal/overlap/testdata/checker index ea7e31f631..b2b71fc835 100644 --- a/internal/overlap/testdata/checker +++ b/internal/overlap/testdata/checker @@ -20,7 +20,7 @@ overlap t1 [a, b]: possible data overlap iterators opened: none [b1, b2]: boundary overlap, no data overlap (split file: t1) iterators opened: t1/points, t1/range-del [b, c]: possible data overlap iterators opened: none -[d, e]: possible data overlap iterators opened: t1/points +[d, e]: possible data overlap iterators opened: t1/points, t1/range-del [e, u): boundary overlap, no data overlap (split file: t1) iterators opened: t1/points, t1/range-del [e, u]: possible data overlap iterators opened: none [u, w]: possible data overlap iterators opened: none @@ -76,6 +76,26 @@ overlap t3 [j, l): possible data overlap iterators opened: none [k, l]: no overlap iterators opened: none +overlap t3 with-cache +[a, b) +[a, b] +[d, f] +[d, f] +[e, f] +[e, f] +[e, h) +[e, h] +[j, l) +---- +[a, b): no overlap iterators opened: none +[a, b]: possible data overlap iterators opened: none +[d, f]: possible data overlap iterators opened: t3/range-key +[d, f]: possible data overlap iterators opened: none +[e, f]: boundary overlap, no data overlap (split file: t3) iterators opened: t3/range-key +[e, f]: boundary overlap, no data overlap (split file: t3) iterators opened: none +[e, h): boundary overlap, no data overlap (split file: t3) iterators opened: none +[e, h]: possible data overlap iterators opened: none +[j, l): possible data overlap iterators opened: none define tBE points: @@ -109,11 +129,46 @@ overlap tBE tFM tMP [n, o] [a, z] ---- -[b1, b2]: boundary overlap, no data overlap (split file: tBE) iterators opened: tBE/points, tBE/range-del +[b1, b2]: boundary overlap, no data overlap (split file: tBE) iterators opened: tBE/points, tBE/range-del, tBE/range-key [c1, c2]: possible data overlap iterators opened: tBE/points, tBE/range-del, tBE/range-key [e, f): no overlap iterators opened: none [g1, g2]: boundary overlap, no data overlap (split file: tFM) iterators opened: tFM/points, tFM/range-del [j1, m1]: possible data overlap iterators opened: none [n1, n2]: boundary overlap, no data overlap (split file: tMP) iterators opened: tMP/points, tMP/range-del -[n, o]: possible data overlap iterators opened: tMP/points +[n, o]: possible data overlap iterators opened: tMP/points, tMP/range-del +[a, z]: possible data overlap iterators opened: none + +overlap tBE tFM tMP with-cache +[b1, b2] +[c1, c2] +[e, f) +[g1, g2] +[j1, m1] +[n1, n2] +[n, o] +[a, z] +[b1, b2] +[c1, c2] +[e, f) +[g1, g2] +[j1, m1] +[n1, n2] +[n, o] +[a, z] +---- +[b1, b2]: boundary overlap, no data overlap (split file: tBE) iterators opened: tBE/points, tBE/range-del, tBE/range-key +[c1, c2]: possible data overlap iterators opened: none +[e, f): no overlap iterators opened: none +[g1, g2]: boundary overlap, no data overlap (split file: tFM) iterators opened: tFM/points, tFM/range-del +[j1, m1]: possible data overlap iterators opened: none +[n1, n2]: boundary overlap, no data overlap (split file: tMP) iterators opened: tMP/points, tMP/range-del +[n, o]: possible data overlap iterators opened: none +[a, z]: possible data overlap iterators opened: none +[b1, b2]: boundary overlap, no data overlap (split file: tBE) iterators opened: none +[c1, c2]: possible data overlap iterators opened: none +[e, f): no overlap iterators opened: none +[g1, g2]: boundary overlap, no data overlap (split file: tFM) iterators opened: none +[j1, m1]: possible data overlap iterators opened: none +[n1, n2]: boundary overlap, no data overlap (split file: tMP) iterators opened: none +[n, o]: possible data overlap iterators opened: none [a, z]: possible data overlap iterators opened: none diff --git a/testdata/metrics b/testdata/metrics index 6776db1cd8..909a61794a 100644 --- a/testdata/metrics +++ b/testdata/metrics @@ -488,7 +488,7 @@ Virtual tables: 0 (0B) Local tables size: 4.3KB Compression types: snappy: 7 Block cache: 12 entries (1.9KB) hit rate: 9.1% -Table cache: 1 entries (760B) hit rate: 53.8% +Table cache: 1 entries (760B) hit rate: 57.1% Secondary cache: 0 entries (0B) hit rate: 0.0% Snapshots: 0 earliest seq num: 0 Table iters: 0 @@ -551,7 +551,7 @@ Virtual tables: 0 (0B) Local tables size: 6.1KB Compression types: snappy: 10 Block cache: 12 entries (1.9KB) hit rate: 9.1% -Table cache: 1 entries (760B) hit rate: 53.8% +Table cache: 1 entries (760B) hit rate: 57.1% Secondary cache: 0 entries (0B) hit rate: 0.0% Snapshots: 0 earliest seq num: 0 Table iters: 0 @@ -869,7 +869,7 @@ Virtual tables: 0 (0B) Local tables size: 0B Compression types: snappy: 2 Block cache: 6 entries (996B) hit rate: 0.0% -Table cache: 1 entries (760B) hit rate: 50.0% +Table cache: 1 entries (760B) hit rate: 66.7% Secondary cache: 0 entries (0B) hit rate: 0.0% Snapshots: 0 earliest seq num: 0 Table iters: 0 @@ -917,7 +917,7 @@ Virtual tables: 0 (0B) Local tables size: 589B Compression types: snappy: 3 Block cache: 6 entries (996B) hit rate: 0.0% -Table cache: 1 entries (760B) hit rate: 50.0% +Table cache: 1 entries (760B) hit rate: 66.7% Secondary cache: 0 entries (0B) hit rate: 0.0% Snapshots: 0 earliest seq num: 0 Table iters: 0