From b0b33cbfecf07a9b8a586f7c4f74bdd357dff17d Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Thu, 30 May 2024 11:20:11 -0700 Subject: [PATCH] overlap: use overlapcache We embed an overlap cache in `FileMetadata` and use it with the overlap checker. When we have to open a table, we now try to find a maximal empty region and the two surrounding data regions so that we can populate the cache with this information. The cache will be very useful for optimistic overlap checks which happen without blocking compactions, and might need to be retried. --- internal/manifest/version.go | 4 + internal/overlap/checker.go | 252 +++++++++++++++++++----------- internal/overlap/checker_test.go | 15 ++ internal/overlap/testdata/checker | 61 +++++++- testdata/metrics | 8 +- 5 files changed, 242 insertions(+), 98 deletions(-) diff --git a/internal/manifest/version.go b/internal/manifest/version.go index b584899dcd..6eb86a43f8 100644 --- a/internal/manifest/version.go +++ b/internal/manifest/version.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/internal/overlap/overlapcache" "github.com/cockroachdb/pebble/sstable" ) @@ -287,6 +288,9 @@ type FileMetadata struct { // SyntheticSuffix overrides all suffixes in a table; used for some virtual tables. SyntheticSuffix sstable.SyntheticSuffix + + // OverlapCache is used to speed up overlap checks during ingestion. + OverlapCache overlapcache.C } // InternalKeyBounds returns the set of overall table bounds. diff --git a/internal/overlap/checker.go b/internal/overlap/checker.go index c219b553a1..326860d72c 100644 --- a/internal/overlap/checker.go +++ b/internal/overlap/checker.go @@ -8,6 +8,7 @@ package overlap import ( "context" + "slices" "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/keyspan" @@ -23,7 +24,8 @@ type WithLSM [manifest.NumLevels]WithLevel type WithLevel struct { Result Kind // SplitFile can be set only when result is OnlyBoundary. If it is set, this - // file can be split to free up the range of interest. + // file can be split to free up the range of interest. SplitFile is not set + // for L0 (overlapping tables are allowed in L0). SplitFile *manifest.FileMetadata } @@ -89,6 +91,7 @@ func (c *Checker) LSMOverlap( } if res.Result == OnlyBoundary { result[0].Result = OnlyBoundary + // We don't set SplitFile for L0 (tables in L0 are allowed to overlap). } } for level := 1; level < manifest.NumLevels; level++ { @@ -135,11 +138,11 @@ func (c *Checker) LevelOverlap( return WithLevel{Result: Data}, nil } // We have a single file to look at; its boundaries enclose our region. - empty, err := c.EmptyRegion(ctx, region, file) + overlap, err := c.DataOverlapWithFile(ctx, region, file) if err != nil { return WithLevel{}, err } - if !empty { + if overlap { return WithLevel{Result: Data}, nil } return WithLevel{ @@ -148,116 +151,183 @@ func (c *Checker) LevelOverlap( }, nil } -// EmptyRegion returns true if the given region doesn't overlap with any keys or -// ranges in the given table. -func (c *Checker) EmptyRegion( +// DataOverlapWithFile returns true if the given region overlaps with any keys +// or spans in the given table. +func (c *Checker) DataOverlapWithFile( ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata, ) (bool, error) { - empty, err := c.emptyRegionPointsAndRangeDels(ctx, region, m) - if err != nil || !empty { - return empty, err + if overlap, ok := m.OverlapCache.CheckDataOverlap(c.cmp, region); ok { + return overlap, nil } - return c.emptyRegionRangeKeys(ctx, region, m) -} + // We want to check overlap with file, but we also want to update the cache + // with useful information. We try to find two data regions r1 and r2 with a + // space-in between; r1 ends before region.Start and r2 ends at or after + // region.Start. See overlapcache.C.ReportEmptyRegion(). + var r1, r2 base.UserKeyBounds -// emptyRegionPointsAndRangeDels returns true if the file doesn't contain any -// point keys or range del spans that overlap with region. -func (c *Checker) emptyRegionPointsAndRangeDels( - ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata, -) (bool, error) { - if !m.HasPointKeys { - return true, nil + if m.HasPointKeys { + lt, ge, err := c.pointKeysAroundKey(ctx, region.Start, m) + if err != nil { + return false, err + } + r1 = base.UserKeyBoundsInclusive(lt, lt) + r2 = base.UserKeyBoundsInclusive(ge, ge) + + if err := c.extendRegionsWithSpans(ctx, &r1, &r2, region.Start, m, manifest.KeyTypePoint); err != nil { + return false, err + } } - pointBounds := m.UserKeyBoundsByType(manifest.KeyTypePoint) - if !pointBounds.Overlaps(c.cmp, ®ion) { + if m.HasRangeKeys { + if err := c.extendRegionsWithSpans(ctx, &r1, &r2, region.Start, m, manifest.KeyTypeRange); err != nil { + return false, err + } + } + // If the regions now overlap or touch, it's all one big data region. + if r1.Start != nil && r2.Start != nil && c.cmp(r1.End.Key, r2.Start) >= 0 { + m.OverlapCache.ReportDataRegion(c.cmp, base.UserKeyBounds{ + Start: r1.Start, + End: r2.End, + }) return true, nil } + m.OverlapCache.ReportEmptyRegion(c.cmp, r1, r2) + // There is overlap iff we overlap with r2. + overlap := r2.Start != nil && region.End.IsUpperBoundFor(c.cmp, r2.Start) + return overlap, nil +} + +// pointKeysAroundKey returns two consecutive point keys: the greatest key that +// is < key and the smallest key that is >= key. If there is no such key, the +// corresponding return value is nil. Both lt and ge are nil if the file +// contains no point keys. +func (c *Checker) pointKeysAroundKey( + ctx context.Context, key []byte, m *manifest.FileMetadata, +) (lt, ge []byte, _ error) { + pointBounds := m.UserKeyBoundsByType(manifest.KeyTypePoint) + points, err := c.iteratorFactory.Points(ctx, m) - if err != nil { - return false, err + if points == nil || err != nil { + return nil, nil, err } - if points != nil { - defer points.Close() - var kv *base.InternalKV - if c.cmp(region.Start, pointBounds.Start) <= 0 { - kv = points.First() - } else { - kv = points.SeekGE(region.Start, base.SeekGEFlagsNone) + defer points.Close() + switch { + case c.cmp(key, pointBounds.Start) <= 0: + kv := points.First() + if kv != nil { + ge = slices.Clone(kv.K.UserKey) } - if kv == nil && points.Error() != nil { - return false, points.Error() + case c.cmp(key, pointBounds.End.Key) > 0: + kv := points.Last() + if kv != nil { + lt = slices.Clone(kv.K.UserKey) } - if kv != nil && region.End.IsUpperBoundForInternalKey(c.cmp, kv.K) { - // Found overlap. - return false, nil + default: + kv := points.SeekLT(key, base.SeekLTFlagsNone) + if kv != nil { + lt = slices.Clone(kv.K.UserKey) } - } - rangeDels, err := c.iteratorFactory.RangeDels(ctx, m) - if err != nil { - return false, err - } - if rangeDels != nil { - defer rangeDels.Close() - empty, err := c.emptyFragmentRegion(region, pointBounds.Start, rangeDels) - if err != nil || !empty { - return empty, err + if kv = points.Next(); kv != nil { + ge = slices.Clone(kv.K.UserKey) } } - // Found no overlap. - return true, nil + return lt, ge, points.Error() } -// emptyRegionRangeKeys returns true if the file doesn't contain any range key -// spans that overlap with region. -func (c *Checker) emptyRegionRangeKeys( - ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata, -) (bool, error) { - if !m.HasRangeKeys { - return true, nil - } - rangeKeyBounds := m.UserKeyBoundsByType(manifest.KeyTypeRange) - if !rangeKeyBounds.Overlaps(c.cmp, ®ion) { - return true, nil +// extendRegionsWithSpans opens a fragment iterator for either range dels or +// range keys (depending n keyType), finds the last span that ends before key +// and the following span, and extends/replaces regions r1 and r2. +func (c *Checker) extendRegionsWithSpans( + ctx context.Context, + r1, r2 *base.UserKeyBounds, + key []byte, + m *manifest.FileMetadata, + keyType manifest.KeyType, +) error { + var iter keyspan.FragmentIterator + var err error + if keyType == manifest.KeyTypePoint { + iter, err = c.iteratorFactory.RangeDels(ctx, m) + } else { + iter, err = c.iteratorFactory.RangeKeys(ctx, m) } - rangeKeys, err := c.iteratorFactory.RangeKeys(ctx, m) - if err != nil { - return false, err + if iter == nil || err != nil { + return err } - if rangeKeys != nil { - defer rangeKeys.Close() - empty, err := c.emptyFragmentRegion(region, rangeKeyBounds.Start, rangeKeys) - if err != nil || !empty { - return empty, err + defer iter.Close() + + fragmentBounds := m.UserKeyBoundsByType(keyType) + switch { + case c.cmp(key, fragmentBounds.Start) <= 0: + span, err := iter.First() + if err != nil { + return err + } + c.updateR2(r2, span) + + case !fragmentBounds.End.IsUpperBoundFor(c.cmp, key): + span, err := iter.Last() + if err != nil { + return err + } + c.updateR1(r1, span) + + default: + span, err := iter.SeekGE(key) + if err != nil { + return err } + c.updateR2(r2, span) + span, err = iter.Prev() + if err != nil { + return err + } + c.updateR1(r1, span) } - // Found no overlap. - return true, nil + return nil } -// emptyFragmentRegion returns true if the given iterator doesn't contain any -// spans that overlap with region. The fragmentLowerBounds is a known lower -// bound for all the spans. -func (c *Checker) emptyFragmentRegion( - region base.UserKeyBounds, fragmentLowerBound []byte, fragments keyspan.FragmentIterator, -) (bool, error) { - var span *keyspan.Span - var err error - if c.cmp(region.Start, fragmentLowerBound) <= 0 { - // This is an optimization: we know there are no spans before region.Start, - // so we can use First. - span, err = fragments.First() - } else { - span, err = fragments.SeekGE(region.Start) - } - if err != nil { - return false, err - } - if span != nil && span.Empty() { - return false, base.AssertionFailedf("fragment iterator produced empty span") +// updateR1 updates r1, the region of data that ends before a key of interest. +func (c *Checker) updateR1(r1 *base.UserKeyBounds, s *keyspan.Span) { + switch { + case s == nil: + + case r1.Start == nil || c.cmp(r1.End.Key, s.Start) < 0: + // Region completely to the right of r1. + *r1 = base.UserKeyBoundsEndExclusive(slices.Clone(s.Start), slices.Clone(s.End)) + + case c.cmp(s.End, r1.Start) < 0: + // Region completely to the left of r1, nothing to do. + + default: + // Regions are overlapping or touching. + if c.cmp(s.Start, r1.Start) < 0 { + r1.Start = slices.Clone(s.Start) + } + if c.cmp(r1.End.Key, s.End) < 0 { + r1.End = base.UserKeyExclusive(slices.Clone(s.End)) + } } - if span != nil && region.End.IsUpperBoundFor(c.cmp, span.Start) { - // Found overlap. - return false, nil +} + +// updateR2 updates r2, the region of data that ends before a key of interest. +func (c *Checker) updateR2(r2 *base.UserKeyBounds, s *keyspan.Span) { + switch { + case s == nil: + + case r2.Start == nil || c.cmp(s.End, r2.Start) < 0: + // Region completely to the left of r2. + *r2 = base.UserKeyBoundsEndExclusive(slices.Clone(s.Start), slices.Clone(s.End)) + + case c.cmp(r2.End.Key, s.Start) < 0: + // Region completely to the right of r2, nothing to do. + + default: + // Regions are overlapping or touching. + if c.cmp(s.Start, r2.Start) < 0 { + r2.Start = slices.Clone(s.Start) + } + if c.cmp(r2.End.Key, s.End) < 0 { + r2.End = base.UserKeyExclusive(slices.Clone(s.End)) + } } - return true, nil } diff --git a/internal/overlap/checker_test.go b/internal/overlap/checker_test.go index 21a2fc539b..4fcf5ce1c4 100644 --- a/internal/overlap/checker_test.go +++ b/internal/overlap/checker_test.go @@ -16,14 +16,21 @@ import ( "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/keyspan" "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/internal/overlap/overlapcache" "github.com/stretchr/testify/require" ) func TestChecker(t *testing.T) { tables := newTestTables() byName := make(map[string]*manifest.FileMetadata) + clearCaches := func() { + for _, t := range tables.tables { + t.meta.OverlapCache = overlapcache.C{} + } + } datadriven.RunTest(t, "testdata/checker", func(t *testing.T, d *datadriven.TestData) string { + clearCaches() switch d.Cmd { case "define": tt := testTable{ @@ -99,10 +106,15 @@ func TestChecker(t *testing.T) { tables.tables[tt.meta] = tt case "overlap": + var withCache bool var metas []*manifest.FileMetadata lines := strings.Split(d.Input, "\n") for _, arg := range d.CmdArgs { name := arg.String() + if name == "with-cache" { + withCache = true + continue + } m := byName[name] if m == nil { d.Fatalf(t, "unknown table %q", name) @@ -113,6 +125,9 @@ func TestChecker(t *testing.T) { c := MakeChecker(bytes.Compare, tables) var buf strings.Builder for _, l := range lines { + if !withCache { + clearCaches() + } bounds := base.ParseUserKeyBounds(l) withLevel, err := c.LevelOverlap(context.Background(), bounds, levelMeta.Slice()) require.NoError(t, err) diff --git a/internal/overlap/testdata/checker b/internal/overlap/testdata/checker index ea7e31f631..b2b71fc835 100644 --- a/internal/overlap/testdata/checker +++ b/internal/overlap/testdata/checker @@ -20,7 +20,7 @@ overlap t1 [a, b]: possible data overlap iterators opened: none [b1, b2]: boundary overlap, no data overlap (split file: t1) iterators opened: t1/points, t1/range-del [b, c]: possible data overlap iterators opened: none -[d, e]: possible data overlap iterators opened: t1/points +[d, e]: possible data overlap iterators opened: t1/points, t1/range-del [e, u): boundary overlap, no data overlap (split file: t1) iterators opened: t1/points, t1/range-del [e, u]: possible data overlap iterators opened: none [u, w]: possible data overlap iterators opened: none @@ -76,6 +76,26 @@ overlap t3 [j, l): possible data overlap iterators opened: none [k, l]: no overlap iterators opened: none +overlap t3 with-cache +[a, b) +[a, b] +[d, f] +[d, f] +[e, f] +[e, f] +[e, h) +[e, h] +[j, l) +---- +[a, b): no overlap iterators opened: none +[a, b]: possible data overlap iterators opened: none +[d, f]: possible data overlap iterators opened: t3/range-key +[d, f]: possible data overlap iterators opened: none +[e, f]: boundary overlap, no data overlap (split file: t3) iterators opened: t3/range-key +[e, f]: boundary overlap, no data overlap (split file: t3) iterators opened: none +[e, h): boundary overlap, no data overlap (split file: t3) iterators opened: none +[e, h]: possible data overlap iterators opened: none +[j, l): possible data overlap iterators opened: none define tBE points: @@ -109,11 +129,46 @@ overlap tBE tFM tMP [n, o] [a, z] ---- -[b1, b2]: boundary overlap, no data overlap (split file: tBE) iterators opened: tBE/points, tBE/range-del +[b1, b2]: boundary overlap, no data overlap (split file: tBE) iterators opened: tBE/points, tBE/range-del, tBE/range-key [c1, c2]: possible data overlap iterators opened: tBE/points, tBE/range-del, tBE/range-key [e, f): no overlap iterators opened: none [g1, g2]: boundary overlap, no data overlap (split file: tFM) iterators opened: tFM/points, tFM/range-del [j1, m1]: possible data overlap iterators opened: none [n1, n2]: boundary overlap, no data overlap (split file: tMP) iterators opened: tMP/points, tMP/range-del -[n, o]: possible data overlap iterators opened: tMP/points +[n, o]: possible data overlap iterators opened: tMP/points, tMP/range-del +[a, z]: possible data overlap iterators opened: none + +overlap tBE tFM tMP with-cache +[b1, b2] +[c1, c2] +[e, f) +[g1, g2] +[j1, m1] +[n1, n2] +[n, o] +[a, z] +[b1, b2] +[c1, c2] +[e, f) +[g1, g2] +[j1, m1] +[n1, n2] +[n, o] +[a, z] +---- +[b1, b2]: boundary overlap, no data overlap (split file: tBE) iterators opened: tBE/points, tBE/range-del, tBE/range-key +[c1, c2]: possible data overlap iterators opened: none +[e, f): no overlap iterators opened: none +[g1, g2]: boundary overlap, no data overlap (split file: tFM) iterators opened: tFM/points, tFM/range-del +[j1, m1]: possible data overlap iterators opened: none +[n1, n2]: boundary overlap, no data overlap (split file: tMP) iterators opened: tMP/points, tMP/range-del +[n, o]: possible data overlap iterators opened: none +[a, z]: possible data overlap iterators opened: none +[b1, b2]: boundary overlap, no data overlap (split file: tBE) iterators opened: none +[c1, c2]: possible data overlap iterators opened: none +[e, f): no overlap iterators opened: none +[g1, g2]: boundary overlap, no data overlap (split file: tFM) iterators opened: none +[j1, m1]: possible data overlap iterators opened: none +[n1, n2]: boundary overlap, no data overlap (split file: tMP) iterators opened: none +[n, o]: possible data overlap iterators opened: none [a, z]: possible data overlap iterators opened: none diff --git a/testdata/metrics b/testdata/metrics index 6776db1cd8..909a61794a 100644 --- a/testdata/metrics +++ b/testdata/metrics @@ -488,7 +488,7 @@ Virtual tables: 0 (0B) Local tables size: 4.3KB Compression types: snappy: 7 Block cache: 12 entries (1.9KB) hit rate: 9.1% -Table cache: 1 entries (760B) hit rate: 53.8% +Table cache: 1 entries (760B) hit rate: 57.1% Secondary cache: 0 entries (0B) hit rate: 0.0% Snapshots: 0 earliest seq num: 0 Table iters: 0 @@ -551,7 +551,7 @@ Virtual tables: 0 (0B) Local tables size: 6.1KB Compression types: snappy: 10 Block cache: 12 entries (1.9KB) hit rate: 9.1% -Table cache: 1 entries (760B) hit rate: 53.8% +Table cache: 1 entries (760B) hit rate: 57.1% Secondary cache: 0 entries (0B) hit rate: 0.0% Snapshots: 0 earliest seq num: 0 Table iters: 0 @@ -869,7 +869,7 @@ Virtual tables: 0 (0B) Local tables size: 0B Compression types: snappy: 2 Block cache: 6 entries (996B) hit rate: 0.0% -Table cache: 1 entries (760B) hit rate: 50.0% +Table cache: 1 entries (760B) hit rate: 66.7% Secondary cache: 0 entries (0B) hit rate: 0.0% Snapshots: 0 earliest seq num: 0 Table iters: 0 @@ -917,7 +917,7 @@ Virtual tables: 0 (0B) Local tables size: 589B Compression types: snappy: 3 Block cache: 6 entries (996B) hit rate: 0.0% -Table cache: 1 entries (760B) hit rate: 50.0% +Table cache: 1 entries (760B) hit rate: 66.7% Secondary cache: 0 entries (0B) hit rate: 0.0% Snapshots: 0 earliest seq num: 0 Table iters: 0