Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sstable: flush value blocks if 8MB are buffered #3188

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions sstable/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,16 @@ type WriterOptions struct {
// RequiredInPlaceValueBound mirrors
// Options.Experimental.RequiredInPlaceValueBound.
RequiredInPlaceValueBound UserKeyPrefixBound

// ValueBlockBufferLimit is the number of value blocks to buffer in-memory
// before flushing them to the underlying writer. Buffering these blocks and
// flushing them in groups, rather than interleaved block-by-block with data
// blocks, potentially improves locality of scans over data blocks in the
// presence of prefetching/read-ahead, page caching, etc.
//
// A value of 0 implies the default of max(8MB/BlockSize, 16) while a value of
// less than 0 disables buffering entirely.
ValueBlockBufferLimit int
}

func (o WriterOptions) ensureDefaults() WriterOptions {
Expand All @@ -288,6 +298,9 @@ func (o WriterOptions) ensureDefaults() WriterOptions {
if o.IndexBlockSize <= 0 {
o.IndexBlockSize = o.BlockSize
}
if o.ValueBlockBufferLimit == 0 {
o.ValueBlockBufferLimit = max(16, 8<<20/o.BlockSize)
}
if o.MergerName == "" {
o.MergerName = base.DefaultMerger.Name
}
Expand Down
315 changes: 315 additions & 0 deletions sstable/testdata/writer_value_blocks
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,318 @@ layout
787 version: 4
791 magic number: 0xf09faab3f09faab3
799 EOF

# Show value block buffering of 2 causing groups of 2 val blocks in the middle.
build block-size=8 buf-limit=2
[email protected]:blue10
[email protected]:blue8
[email protected]:blue8s
[email protected]:blue6isverylong
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
----
value-blocks: num-values 7, num-blocks: 6, size: 149

layout
----
0 data (33)
0 record (25 = 3 [0] + 15 + 7) [restart]
blue@10#20,1:blue10
25 [restart 0]
33 [trailer compression=none checksum=0x5fb0d551]
38 data (29)
38 record (21 = 3 [0] + 14 + 4) [restart]
blue@8#18,1:value handle {valueLen:5 blockNum:0 offsetInBlock:0}
59 [restart 38]
67 [trailer compression=none checksum=0x628e4a10]
72 data (29)
72 record (21 = 3 [0] + 14 + 4) [restart]
blue@8#16,1:value handle {valueLen:6 blockNum:0 offsetInBlock:5}
93 [restart 72]
101 [trailer compression=none checksum=0x4e65b9b6]
106 data (29)
106 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#15,1:value handle {valueLen:15 blockNum:1 offsetInBlock:0}
127 [restart 106]
135 [trailer compression=none checksum=0xc992640e]
140 value-block (11)
156 value-block (15)
176 data (29)
176 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#14,1:value handle {valueLen:16 blockNum:2 offsetInBlock:0}
197 [restart 176]
205 [trailer compression=none checksum=0x62a8bb33]
210 data (29)
210 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#13,1:value handle {valueLen:16 blockNum:3 offsetInBlock:0}
231 [restart 210]
239 [trailer compression=none checksum=0xc0ab3808]
244 value-block (16)
265 value-block (16)
286 data (29)
286 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#12,1:value handle {valueLen:16 blockNum:4 offsetInBlock:0}
307 [restart 286]
315 [trailer compression=none checksum=0xec7ee24d]
320 data (29)
320 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#11,1:value handle {valueLen:16 blockNum:5 offsetInBlock:0}
341 [restart 320]
349 [trailer compression=none checksum=0xcca2bad9]
354 index (28)
354 block:0/33 [restart]
374 [restart 354]
382 [trailer compression=none checksum=0x32b37f08]
387 index (27)
387 block:38/29 [restart]
406 [restart 387]
414 [trailer compression=none checksum=0x21d27815]
419 index (30)
419 block:72/29 [restart]
441 [restart 419]
449 [trailer compression=none checksum=0xba0b26fe]
454 index (27)
454 block:106/29 [restart]
473 [restart 454]
481 [trailer compression=none checksum=0xf2c4e3d7]
486 index (31)
486 block:176/29 [restart]
509 [restart 486]
517 [trailer compression=none checksum=0x1c2b03b0]
522 index (31)
522 block:210/29 [restart]
545 [restart 522]
553 [trailer compression=none checksum=0x5121de43]
558 index (31)
558 block:286/29 [restart]
581 [restart 558]
589 [trailer compression=none checksum=0x5670ba6d]
594 index (26)
594 block:320/29 [restart]
612 [restart 594]
620 [trailer compression=none checksum=0x5d0eec20]
625 top-index (151)
625 block:354/28 [restart]
646 block:387/27 [restart]
666 block:419/30 [restart]
689 block:454/27 [restart]
709 block:486/31 [restart]
732 block:522/31 [restart]
755 block:558/31 [restart]
778 block:594/26 [restart]
796 [restart 625]
800 [restart 646]
804 [restart 666]
808 [restart 689]
812 [restart 709]
816 [restart 732]
820 [restart 755]
824 [restart 778]
776 [trailer compression=snappy checksum=0x1a8319dc]
781 value-block (16)
802 value-block (16)
823 value-index (24)
852 properties (678)
852 obsolete-key (16) [restart]
868 pebble.num.value-blocks (27)
895 pebble.num.values.in.value-blocks (21)
916 pebble.value-blocks.size (22)
938 rocksdb.block.based.table.index.type (43)
981 rocksdb.block.based.table.prefix.filtering (20)
1001 rocksdb.block.based.table.whole.key.filtering (23)
1024 rocksdb.comparator (37)
1061 rocksdb.compression (16)
1077 rocksdb.compression_options (106)
1183 rocksdb.data.size (14)
1197 rocksdb.deleted.keys (15)
1212 rocksdb.external_sst_file.global_seqno (41)
1253 rocksdb.external_sst_file.version (14)
1267 rocksdb.filter.size (15)
1282 rocksdb.index.partitions (20)
1302 rocksdb.index.size (9)
1311 rocksdb.merge.operands (18)
1329 rocksdb.merge.operator (24)
1353 rocksdb.num.data.blocks (19)
1372 rocksdb.num.entries (11)
1383 rocksdb.num.range-deletions (19)
1402 rocksdb.prefix.extractor.name (31)
1433 rocksdb.property.collectors (34)
1467 rocksdb.raw.key.size (16)
1483 rocksdb.raw.value.size (14)
1497 rocksdb.top-level.index.size (25)
1522 [restart 852]
1530 [trailer compression=none checksum=0xe690121f]
1535 meta-index (64)
1535 pebble.value_index block:823/24 value-blocks-index-lengths: 1(num), 2(offset), 1(length) [restart]
1562 rocksdb.properties block:852/678 [restart]
1587 [restart 1535]
1591 [restart 1562]
1599 [trailer compression=none checksum=0x98d2a4dd]
1604 footer (53)
1604 checksum type: crc32c
1605 meta: offset=1535, length=64
1608 index: offset=625, length=151
1612 [padding]
1645 version: 4
1649 magic number: 0xf09faab3f09faab3
1657 EOF

# Show val block buffering limit of 1 flushing every block.
build block-size=8 buf-limit=1
[email protected]:blue10
[email protected]:blue8
[email protected]:blue8s
[email protected]:blue6isverylong
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
----
value-blocks: num-values 7, num-blocks: 6, size: 149


layout
----
0 data (33)
0 record (25 = 3 [0] + 15 + 7) [restart]
blue@10#20,1:blue10
25 [restart 0]
33 [trailer compression=none checksum=0x5fb0d551]
38 data (29)
38 record (21 = 3 [0] + 14 + 4) [restart]
blue@8#18,1:value handle {valueLen:5 blockNum:0 offsetInBlock:0}
59 [restart 38]
67 [trailer compression=none checksum=0x628e4a10]
72 data (29)
72 record (21 = 3 [0] + 14 + 4) [restart]
blue@8#16,1:value handle {valueLen:6 blockNum:0 offsetInBlock:5}
93 [restart 72]
101 [trailer compression=none checksum=0x4e65b9b6]
106 value-block (11)
122 data (29)
122 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#15,1:value handle {valueLen:15 blockNum:1 offsetInBlock:0}
143 [restart 122]
151 [trailer compression=none checksum=0xc992640e]
156 value-block (15)
176 data (29)
176 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#14,1:value handle {valueLen:16 blockNum:2 offsetInBlock:0}
197 [restart 176]
205 [trailer compression=none checksum=0x62a8bb33]
210 value-block (16)
231 data (29)
231 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#13,1:value handle {valueLen:16 blockNum:3 offsetInBlock:0}
252 [restart 231]
260 [trailer compression=none checksum=0xc0ab3808]
265 value-block (16)
286 data (29)
286 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#12,1:value handle {valueLen:16 blockNum:4 offsetInBlock:0}
307 [restart 286]
315 [trailer compression=none checksum=0xec7ee24d]
320 data (29)
320 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#11,1:value handle {valueLen:16 blockNum:5 offsetInBlock:0}
341 [restart 320]
349 [trailer compression=none checksum=0xcca2bad9]
354 index (28)
354 block:0/33 [restart]
374 [restart 354]
382 [trailer compression=none checksum=0x32b37f08]
387 index (27)
387 block:38/29 [restart]
406 [restart 387]
414 [trailer compression=none checksum=0x21d27815]
419 index (30)
419 block:72/29 [restart]
441 [restart 419]
449 [trailer compression=none checksum=0xba0b26fe]
454 index (27)
454 block:122/29 [restart]
473 [restart 454]
481 [trailer compression=none checksum=0xcd162eb6]
486 index (31)
486 block:176/29 [restart]
509 [restart 486]
517 [trailer compression=none checksum=0x1c2b03b0]
522 index (31)
522 block:231/29 [restart]
545 [restart 522]
553 [trailer compression=none checksum=0xa8453ba7]
558 index (31)
558 block:286/29 [restart]
581 [restart 558]
589 [trailer compression=none checksum=0x5670ba6d]
594 index (26)
594 block:320/29 [restart]
612 [restart 594]
620 [trailer compression=none checksum=0x5d0eec20]
625 top-index (151)
625 block:354/28 [restart]
646 block:387/27 [restart]
666 block:419/30 [restart]
689 block:454/27 [restart]
709 block:486/31 [restart]
732 block:522/31 [restart]
755 block:558/31 [restart]
778 block:594/26 [restart]
796 [restart 625]
800 [restart 646]
804 [restart 666]
808 [restart 689]
812 [restart 709]
816 [restart 732]
820 [restart 755]
824 [restart 778]
776 [trailer compression=snappy checksum=0x1a8319dc]
781 value-block (16)
802 value-block (16)
823 value-index (24)
852 properties (678)
852 obsolete-key (16) [restart]
868 pebble.num.value-blocks (27)
895 pebble.num.values.in.value-blocks (21)
916 pebble.value-blocks.size (22)
938 rocksdb.block.based.table.index.type (43)
981 rocksdb.block.based.table.prefix.filtering (20)
1001 rocksdb.block.based.table.whole.key.filtering (23)
1024 rocksdb.comparator (37)
1061 rocksdb.compression (16)
1077 rocksdb.compression_options (106)
1183 rocksdb.data.size (14)
1197 rocksdb.deleted.keys (15)
1212 rocksdb.external_sst_file.global_seqno (41)
1253 rocksdb.external_sst_file.version (14)
1267 rocksdb.filter.size (15)
1282 rocksdb.index.partitions (20)
1302 rocksdb.index.size (9)
1311 rocksdb.merge.operands (18)
1329 rocksdb.merge.operator (24)
1353 rocksdb.num.data.blocks (19)
1372 rocksdb.num.entries (11)
1383 rocksdb.num.range-deletions (19)
1402 rocksdb.prefix.extractor.name (31)
1433 rocksdb.property.collectors (34)
1467 rocksdb.raw.key.size (16)
1483 rocksdb.raw.value.size (14)
1497 rocksdb.top-level.index.size (25)
1522 [restart 852]
1530 [trailer compression=none checksum=0xe690121f]
1535 meta-index (64)
1535 pebble.value_index block:823/24 value-blocks-index-lengths: 1(num), 2(offset), 1(length) [restart]
1562 rocksdb.properties block:852/678 [restart]
1587 [restart 1535]
1591 [restart 1562]
1599 [trailer compression=none checksum=0x98d2a4dd]
1604 footer (53)
1604 checksum type: crc32c
1605 meta: offset=1535, length=64
1608 index: offset=625, length=151
1612 [padding]
1645 version: 4
1649 magic number: 0xf09faab3f09faab3
1657 EOF
Loading
Loading