From 1319d7d76ddb0b05056795b5804b317ce7d36847 Mon Sep 17 00:00:00 2001 From: Michael Deng Date: Wed, 20 Mar 2024 12:47:43 -0700 Subject: [PATCH 01/13] Cherry-pick: Wal debug 6.29.tikv (#2) Signed-off-by: Qi Xu Co-authored-by: Qi Xu --- db/db_impl/db_impl.cc | 17 +++++++++++++++-- db/db_impl/db_impl.h | 2 +- db/db_impl/db_impl_files.cc | 13 ++++++++++++- db/db_impl/db_impl_open.cc | 2 +- db/db_impl/db_impl_write.cc | 22 ++++++++++++++++++---- db/log_writer.cc | 1 + db/log_writer.h | 6 ++++++ 7 files changed, 54 insertions(+), 9 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 34aa65e90cf..ad7edaaed24 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1471,13 +1471,19 @@ void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir, for (auto it = logs_.begin(); it != logs_.end() && it->number <= up_to;) { auto& wal = *it; assert(wal.IsSyncing()); - + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "Synced log %" PRIu64 " from logs_\n", wal.number); if (logs_.size() > 1) { if (immutable_db_options_.track_and_verify_wals_in_manifest && wal.GetPreSyncSize() > 0) { synced_wals->AddWal(wal.number, WalMetadata(wal.GetPreSyncSize())); } - logs_to_free_.push_back(wal.ReleaseWriter()); + auto writer = wal.ReleaseWriter(); + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "deleting log %" PRIu64 + " from logs_. Last Seq number of the WAL is %" PRIu64 "\n", + wal.number, writer->GetLastSequence()); + logs_to_free_.push_back(writer); it = logs_.erase(it); } else { wal.FinishSync(); @@ -1491,12 +1497,19 @@ void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir, void DBImpl::MarkLogsNotSynced(uint64_t up_to) { log_write_mutex_.AssertHeld(); + uint64_t min_wal = 0; for (auto it = logs_.begin(); it != logs_.end() && it->number <= up_to; ++it) { auto& wal = *it; + if (min_wal == 0) { + min_wal = it->number; + } wal.FinishSync(); } log_sync_cv_.SignalAll(); + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "MarkLogsNotSynced from %" PRIu64 " to %" PRIu64 "\n", min_wal, + up_to); } SequenceNumber DBImpl::GetLatestSequenceNumber() const { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 1f604168abf..30380712767 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1822,7 +1822,7 @@ class DBImpl : public DB { IOStatus WriteToWAL(const WriteBatch& merged_batch, log::Writer* log_writer, uint64_t* log_used, uint64_t* log_size, - LogFileNumberSize& log_file_number_size); + LogFileNumberSize& log_file_number_size, int caller_id); IOStatus WriteToWAL(const WriteThread::WriteGroup& write_group, log::Writer* log_writer, uint64_t* log_used, diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index 1db50b476d2..d6289155dc7 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -293,6 +293,8 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, earliest.number); log_recycle_files_.push_back(earliest.number); } else { + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "deleting WAL log %" PRIu64 "\n", earliest.number); job_context->log_delete_files.push_back(earliest.number); } if (job_context->size_log_to_delete == 0) { @@ -317,7 +319,12 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, // logs_ could have changed while we were waiting. continue; } - logs_to_free_.push_back(log.ReleaseWriter()); + auto writer = log.ReleaseWriter(); + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "deleting log %" PRIu64 + " from logs_, last seq number of WAL %" PRIu64 "\n", + log.number, writer->GetLastSequence()); + logs_to_free_.push_back(writer); logs_.pop_front(); } // Current log cannot be obsolete. @@ -491,6 +498,10 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) { // Close WALs before trying to delete them. for (const auto w : state.logs_to_free) { // TODO: maybe check the return value of Close. + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "Close log %" PRIu64 + " from logs_, last Seq number in WAL %" PRIu64 "\n", + w->get_log_number(), w->GetLastSequence()); auto s = w->Close(); s.PermitUncheckedError(); } diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 28dceb3c0f1..ad5a1de3a99 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1735,7 +1735,7 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, assert(log_writer->get_log_number() == log_file_number_size.number); impl->mutex_.AssertHeld(); s = impl->WriteToWAL(empty_batch, log_writer, &log_used, &log_size, - log_file_number_size); + log_file_number_size, 0); if (s.ok()) { // Need to fsync, otherwise it might get lost after a power reset. s = impl->FlushWAL(false); diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index baf96db2324..fafd70861d5 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1412,10 +1412,23 @@ WriteBatch* DBImpl::MergeBatch(const WriteThread::WriteGroup& write_group, IOStatus DBImpl::WriteToWAL(const WriteBatch& merged_batch, log::Writer* log_writer, uint64_t* log_used, uint64_t* log_size, - LogFileNumberSize& log_file_number_size) { + LogFileNumberSize& log_file_number_size, + int caller_id) { assert(log_size != nullptr); - + if (log_writer->file()->GetFileSize() == 0) { + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "Start writing to WAL: [%" PRIu64 " ]", + log_writer->get_log_number()); + } + if (log_writer->get_log_number() != logs_.back().number) { + ROCKS_LOG_INFO( + immutable_db_options_.info_log, + "Not writing to latest WAL: [%" PRIu64 ", %" PRIu64 "] CallerId: %d", + log_writer->get_log_number(), logs_.back().number, caller_id); + } Slice log_entry = WriteBatchInternal::Contents(&merged_batch); + SequenceNumber seq = WriteBatchInternal::Sequence(&merged_batch); + log_writer->SetLastSequence(seq); *log_size = log_entry.size(); // When two_write_queues_ WriteToWAL has to be protected from concurretn calls // from the two queues anyway and log_write_mutex_ is already held. Otherwise @@ -1468,7 +1481,7 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group, uint64_t log_size; io_s = WriteToWAL(*merged_batch, log_writer, log_used, &log_size, - log_file_number_size); + log_file_number_size, 1); if (to_be_cached_state) { cached_recoverable_state_ = *to_be_cached_state; cached_recoverable_state_empty_ = false; @@ -1530,6 +1543,7 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group, stats->AddDBStats(InternalStats::kIntStatsWriteWithWal, write_with_wal); RecordTick(stats_, WRITE_WITH_WAL, write_with_wal); } + return io_s; } @@ -1569,7 +1583,7 @@ IOStatus DBImpl::ConcurrentWriteToWAL( uint64_t log_size; io_s = WriteToWAL(*merged_batch, log_writer, log_used, &log_size, - log_file_number_size); + log_file_number_size, 2); if (to_be_cached_state) { cached_recoverable_state_ = *to_be_cached_state; cached_recoverable_state_empty_ = false; diff --git a/db/log_writer.cc b/db/log_writer.cc index e2e596596aa..2d6a3ba1ba4 100644 --- a/db/log_writer.cc +++ b/db/log_writer.cc @@ -29,6 +29,7 @@ Writer::Writer(std::unique_ptr&& dest, uint64_t log_number, char t = static_cast(i); type_crc_[i] = crc32c::Value(&t, 1); } + last_seq_ = 0; } Writer::~Writer() { diff --git a/db/log_writer.h b/db/log_writer.h index 1a91b21994d..20db1abc4cc 100644 --- a/db/log_writer.h +++ b/db/log_writer.h @@ -15,6 +15,7 @@ #include "rocksdb/io_status.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" +#include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { @@ -92,11 +93,16 @@ class Writer { bool TEST_BufferIsEmpty(); + void SetLastSequence(SequenceNumber seq) { last_seq_ = seq; } + + SequenceNumber GetLastSequence() const { return last_seq_; } + private: std::unique_ptr dest_; size_t block_offset_; // Current offset in block uint64_t log_number_; bool recycle_log_files_; + SequenceNumber last_seq_; // crc32c values for all supported record types. These are // pre-computed to reduce the overhead of computing the crc of the From ce298c8608c33f54e536c989d838f62dc39bf01e Mon Sep 17 00:00:00 2001 From: rishabh_mittal Date: Wed, 3 Apr 2024 17:22:57 -0700 Subject: [PATCH 02/13] add logs --- db/db_impl/db_impl.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index ad7edaaed24..9af04909247 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1478,6 +1478,12 @@ void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir, wal.GetPreSyncSize() > 0) { synced_wals->AddWal(wal.number, WalMetadata(wal.GetPreSyncSize())); } + if (wal.GetPreSyncSize() != wal.writer->file()->GetFlushedSize()) { + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "size doesn't match log %" PRIu64 + " presync size %" PRIu64, "flushed size %" PRIu64 "\n", + wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize()); + } auto writer = wal.ReleaseWriter(); ROCKS_LOG_INFO(immutable_db_options_.info_log, "deleting log %" PRIu64 From e9ab37bccfb118ac0034f5e8337b8423de9a4022 Mon Sep 17 00:00:00 2001 From: rishabh_mittal Date: Wed, 3 Apr 2024 17:33:45 -0700 Subject: [PATCH 03/13] fixed buils failure --- db/db_impl/db_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 9af04909247..2345e908519 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1481,7 +1481,7 @@ void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir, if (wal.GetPreSyncSize() != wal.writer->file()->GetFlushedSize()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "size doesn't match log %" PRIu64 - " presync size %" PRIu64, "flushed size %" PRIu64 "\n", + " presync size %" PRIu64 " flushed size %" PRIu64 "\n", wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize()); } auto writer = wal.ReleaseWriter(); From d745e792f2b2e44421bdc01e37cb9238ca90f63c Mon Sep 17 00:00:00 2001 From: Rishabh Mittal Date: Tue, 26 Mar 2024 13:00:34 -0700 Subject: [PATCH 04/13] cherrypick : Ensure writes to WAL tail during FlushWAL(true /* sync */) will be synced (#8) Co-authored-by: Andrew Kryczka --- HISTORY.md | 3 +++ db/db_impl/db_impl.cc | 34 ++++++++++++++++---------------- db/db_write_test.cc | 45 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 17 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 9750cc91036..a5c4d42271e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,8 @@ # Rocksdb Change Log ## Unreleased +### Bug Fixes +* Fixed bug where `FlushWAL(true /* sync */)` (used by `GetLiveFilesStorageInfo()`, which is used by checkpoint and backup) could cause parallel writes at the tail of a WAL file to never be synced. + ### Bug Fixes * Fix a race condition in WAL size tracking which is caused by an unsafe iterator access after container is changed. * Fix unprotected concurrent accesses to `WritableFileWriter::filesize_` by `DB::SyncWAL()` and `DB::Put()` in two write queue mode. diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 2345e908519..69b5b03b3cb 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1471,33 +1471,33 @@ void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir, for (auto it = logs_.begin(); it != logs_.end() && it->number <= up_to;) { auto& wal = *it; assert(wal.IsSyncing()); - ROCKS_LOG_INFO(immutable_db_options_.info_log, - "Synced log %" PRIu64 " from logs_\n", wal.number); - if (logs_.size() > 1) { + + if (wal.number < logs_.back().number) { + // Inactive WAL if (immutable_db_options_.track_and_verify_wals_in_manifest && wal.GetPreSyncSize() > 0) { synced_wals->AddWal(wal.number, WalMetadata(wal.GetPreSyncSize())); } - if (wal.GetPreSyncSize() != wal.writer->file()->GetFlushedSize()) { - ROCKS_LOG_INFO(immutable_db_options_.info_log, - "size doesn't match log %" PRIu64 - " presync size %" PRIu64 " flushed size %" PRIu64 "\n", - wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize()); + if (wal.GetPreSyncSize() == wal.writer->file()->GetFlushedSize()) { + // Fully synced + logs_to_free_.push_back(wal.ReleaseWriter()); + it = logs_.erase(it); + } else { + assert(wal.GetPreSyncSize() < wal.writer->file()->GetFlushedSize()); + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "size doesn't match log %" PRIu64 + " presync size %" PRIu64 " flushed size %" PRIu64 "\n", + wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize()); + wal.FinishSync(); + ++it; } - auto writer = wal.ReleaseWriter(); - ROCKS_LOG_INFO(immutable_db_options_.info_log, - "deleting log %" PRIu64 - " from logs_. Last Seq number of the WAL is %" PRIu64 "\n", - wal.number, writer->GetLastSequence()); - logs_to_free_.push_back(writer); - it = logs_.erase(it); } else { + assert(wal.number == logs_.back().number); + // Active WAL wal.FinishSync(); ++it; } } - assert(logs_.empty() || logs_[0].number > up_to || - (logs_.size() == 1 && !logs_[0].IsSyncing())); log_sync_cv_.SignalAll(); } diff --git a/db/db_write_test.cc b/db/db_write_test.cc index 3f53fe597e5..a450270e043 100644 --- a/db/db_write_test.cc +++ b/db/db_write_test.cc @@ -386,6 +386,51 @@ TEST_P(DBWriteTest, UnflushedPutRaceWithTrackedWalSync) { Close(); } +TEST_P(DBWriteTest, InactiveWalFullySyncedBeforeUntracked) { + // Repro bug where a WAL is appended and switched after + // `FlushWAL(true /* sync */)`'s sync finishes and before it untracks fully + // synced inactive logs. Previously such a WAL would be wrongly untracked + // so the final append would never be synced. + Options options = GetOptions(); + std::unique_ptr fault_env( + new FaultInjectionTestEnv(env_)); + options.env = fault_env.get(); + Reopen(options); + + ASSERT_OK(Put("key1", "val1")); + + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::SyncWAL:BeforeMarkLogsSynced:1", [this](void* /* arg */) { + ASSERT_OK(Put("key2", "val2")); + ASSERT_OK(dbfull()->TEST_SwitchMemtable()); + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_OK(db_->FlushWAL(true /* sync */)); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); + + ASSERT_OK(Put("key3", "val3")); + + ASSERT_OK(db_->FlushWAL(true /* sync */)); + + Close(); + + // Simulate full loss of unsynced data. This should drop nothing since we did + // `FlushWAL(true /* sync */)` before `Close()`. + fault_env->DropUnsyncedFileData(); + + Reopen(options); + + ASSERT_EQ("val1", Get("key1")); + ASSERT_EQ("val2", Get("key2")); + ASSERT_EQ("val3", Get("key3")); + + // Need to close before `fault_env` goes out of scope. + Close(); +} + TEST_P(DBWriteTest, IOErrorOnWALWriteTriggersReadOnlyMode) { std::unique_ptr mock_env( new FaultInjectionTestEnv(env_)); From eda7792ff56a6b247ab4f8ef8bb1b1edfc3d6956 Mon Sep 17 00:00:00 2001 From: rishabh_mittal Date: Tue, 9 Apr 2024 11:43:32 -0700 Subject: [PATCH 05/13] added logs, thread id and thread name --- db/db_impl/db_impl.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 69b5b03b3cb..b30413a93c9 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -105,6 +105,7 @@ #include "util/stop_watch.h" #include "util/string_util.h" #include "utilities/trace/replayer_impl.h" +#include namespace ROCKSDB_NAMESPACE { @@ -1480,14 +1481,18 @@ void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir, } if (wal.GetPreSyncSize() == wal.writer->file()->GetFlushedSize()) { // Fully synced + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "erasing log %" PRIu64 + " presync size %" PRIu64 " flushed size %" PRIu64 " thread id %" PRIu64 " thread name %s \n", + wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize(), thread::get_id(), thread::get_name()); logs_to_free_.push_back(wal.ReleaseWriter()); it = logs_.erase(it); } else { assert(wal.GetPreSyncSize() < wal.writer->file()->GetFlushedSize()); ROCKS_LOG_INFO(immutable_db_options_.info_log, "size doesn't match log %" PRIu64 - " presync size %" PRIu64 " flushed size %" PRIu64 "\n", - wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize()); + " presync size %" PRIu64 " flushed size %" PRIu64 " thread id %" PRIu64 " thread name %s \n", + wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize(), thread::get_id(), thread::get_name()); wal.FinishSync(); ++it; } From a0f99e15bfe14216e11517b0f358d9b4ae352df6 Mon Sep 17 00:00:00 2001 From: rishabh_mittal Date: Tue, 9 Apr 2024 11:49:33 -0700 Subject: [PATCH 06/13] build fix --- db/db_impl/db_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index b30413a93c9..21afcb37a4d 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -105,7 +105,7 @@ #include "util/stop_watch.h" #include "util/string_util.h" #include "utilities/trace/replayer_impl.h" -#include +#include namespace ROCKSDB_NAMESPACE { From f360ce9adbe94d183da16b8ab608479947b7d95f Mon Sep 17 00:00:00 2001 From: rishabh_mittal Date: Tue, 9 Apr 2024 12:00:13 -0700 Subject: [PATCH 07/13] build fix --- db/db_impl/db_impl.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 21afcb37a4d..a7e0c119045 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1483,16 +1483,16 @@ void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir, // Fully synced ROCKS_LOG_INFO(immutable_db_options_.info_log, "erasing log %" PRIu64 - " presync size %" PRIu64 " flushed size %" PRIu64 " thread id %" PRIu64 " thread name %s \n", - wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize(), thread::get_id(), thread::get_name()); + " presync size %" PRIu64 " flushed size %" PRIu64 " thread id %u\n", + wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize(), pthread_self()); logs_to_free_.push_back(wal.ReleaseWriter()); it = logs_.erase(it); } else { assert(wal.GetPreSyncSize() < wal.writer->file()->GetFlushedSize()); ROCKS_LOG_INFO(immutable_db_options_.info_log, "size doesn't match log %" PRIu64 - " presync size %" PRIu64 " flushed size %" PRIu64 " thread id %" PRIu64 " thread name %s \n", - wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize(), thread::get_id(), thread::get_name()); + " presync size %" PRIu64 " flushed size %" PRIu64 " thread id %u \n", + wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize(), pthread_self()); wal.FinishSync(); ++it; } From a565754010f9656a194095164892a85fa083cdf1 Mon Sep 17 00:00:00 2001 From: rishabh_mittal Date: Tue, 9 Apr 2024 12:07:21 -0700 Subject: [PATCH 08/13] build fix[A --- db/db_impl/db_impl.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index a7e0c119045..f1074c469ec 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1483,7 +1483,7 @@ void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir, // Fully synced ROCKS_LOG_INFO(immutable_db_options_.info_log, "erasing log %" PRIu64 - " presync size %" PRIu64 " flushed size %" PRIu64 " thread id %u\n", + " presync size %" PRIu64 " flushed size %" PRIu64 " thread id %" PRIu64 "\n", wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize(), pthread_self()); logs_to_free_.push_back(wal.ReleaseWriter()); it = logs_.erase(it); @@ -1491,7 +1491,7 @@ void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir, assert(wal.GetPreSyncSize() < wal.writer->file()->GetFlushedSize()); ROCKS_LOG_INFO(immutable_db_options_.info_log, "size doesn't match log %" PRIu64 - " presync size %" PRIu64 " flushed size %" PRIu64 " thread id %u \n", + " presync size %" PRIu64 " flushed size %" PRIu64 " thread id %" PRIu64 " \n", wal.number, wal.GetPreSyncSize(), wal.writer->file()->GetFlushedSize(), pthread_self()); wal.FinishSync(); ++it; From 5fbe9e4e4441ceaf2e505b56f2fefec6e6e5e95f Mon Sep 17 00:00:00 2001 From: rishabh_mittal Date: Tue, 9 Apr 2024 13:50:53 -0700 Subject: [PATCH 09/13] add thread ID --- db/db_impl/db_impl_write.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index fafd70861d5..236df487a03 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -2205,8 +2205,8 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) { } ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] New memtable created with log file: #%" PRIu64 - ". Immutable memtables: %d.\n", - cfd->GetName().c_str(), new_log_number, num_imm_unflushed); + ". Immutable memtables: %d.thread id %" PRIu64 "\n", + cfd->GetName().c_str(), new_log_number, num_imm_unflushed, pthread_self()); mutex_.Lock(); if (recycle_log_number != 0) { // Since renaming the file is done outside DB mutex, we need to ensure From 51878d5067010171ffabd22920c7371eb2576426 Mon Sep 17 00:00:00 2001 From: rishabh_mittal Date: Tue, 9 Apr 2024 14:53:37 -0700 Subject: [PATCH 10/13] added logs in compaction and flushWAL --- db/db_impl/db_impl_compaction_flush.cc | 2 ++ db/db_impl/db_impl_write.cc | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index b886892f12b..1a7da1ba752 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1040,6 +1040,8 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options, } if (s.ok() && flush_needed) { + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "flushing memtable thread id %" PRIu64 "\n", pthread_self()); FlushOptions fo; fo.allow_write_stall = options.allow_write_stall; if (immutable_db_options_.atomic_flush) { diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 236df487a03..7b4af62893c 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1508,11 +1508,31 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group, log_write_mutex_.Lock(); } + if (logs_.back().number != log_writer->get_log_number()) { + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "new log file added after last write %" PRIu64 "writer log number %" PRIu64 "thread id %" PRIu64 "\n", + logs_.back().number, log_writer->get_log_number(), pthread_self()); + } + bool found = false; for (auto& log : logs_) { io_s = log.writer->file()->Sync(immutable_db_options_.use_fsync); + if (logs_.back().number == log_writer->get_log_number()) { + found = true; + } if (!io_s.ok()) { + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "WAL sync failed with log number %" PRIu64 "writer log number %" PRIu64 "thread id %" PRIu64 "\n", + log.number, log_writer->get_log_number(), pthread_self()); break; } + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "WAL sync completed with log number %" PRIu64 " writer log number %" PRIu64 "thread id %" PRIu64 "\n", + log.number, log_writer->get_log_number(), pthread_self()); + } + + if (!found) { + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "write log file not found %" PRIu64 " thread id %" PRIu64 "\n", log_writer->get_log_number(), pthread_self()); } if (UNLIKELY(needs_locking)) { From f4857dff8db6af2acef3cf7defb973fa3c3cc319 Mon Sep 17 00:00:00 2001 From: rishabh_mittal Date: Wed, 10 Apr 2024 09:47:50 -0700 Subject: [PATCH 11/13] added file sequence number --- db/db_impl/db_impl_write.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 7b4af62893c..8612b4c44b0 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1526,13 +1526,15 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group, break; } ROCKS_LOG_INFO(immutable_db_options_.info_log, - "WAL sync completed with log number %" PRIu64 " writer log number %" PRIu64 "thread id %" PRIu64 "\n", - log.number, log_writer->get_log_number(), pthread_self()); + "WAL sync completed with flush log number %" PRIu64 " current writer log number %" PRIu64 "presync size %" PRIu64 + " flushed size %" PRIu64 "last sequence %" PRIu64 "thread id %" PRIu64 "\n", log.number, log_writer->get_log_number(), log.GetPreSyncSize(), + log.writer->file()->GetFlushedSize(), log_writer->GetLastSequence(), pthread_self()); } if (!found) { ROCKS_LOG_INFO(immutable_db_options_.info_log, - "write log file not found %" PRIu64 " thread id %" PRIu64 "\n", log_writer->get_log_number(), pthread_self()); + "write log file not found %" PRIu64 "flushed size %" PRIu64 "last sequence %" PRIu64 "thread id %" PRIu64 "\n", + log_writer->get_log_number(), log_writer->file()->GetFlushedSize(), log_writer->GetLastSequence(), pthread_self()); } if (UNLIKELY(needs_locking)) { From cf50f5192b0aaa1f839ff54eea43eff755897943 Mon Sep 17 00:00:00 2001 From: rishabh_mittal Date: Wed, 10 Apr 2024 10:22:43 -0700 Subject: [PATCH 12/13] added file sequence number --- db/db_impl/db_impl_write.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 8612b4c44b0..0ca7a2fb844 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1516,7 +1516,7 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group, bool found = false; for (auto& log : logs_) { io_s = log.writer->file()->Sync(immutable_db_options_.use_fsync); - if (logs_.back().number == log_writer->get_log_number()) { + if (log.number == log_writer->get_log_number()) { found = true; } if (!io_s.ok()) { From e0d7e463e24e4835190e74ba4cede75bd14e9e17 Mon Sep 17 00:00:00 2001 From: rishabh_mittal Date: Wed, 10 Apr 2024 10:28:59 -0700 Subject: [PATCH 13/13] added file sequence number --- db/db_impl/db_impl_write.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 0ca7a2fb844..908bd90ad32 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1528,7 +1528,7 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group, ROCKS_LOG_INFO(immutable_db_options_.info_log, "WAL sync completed with flush log number %" PRIu64 " current writer log number %" PRIu64 "presync size %" PRIu64 " flushed size %" PRIu64 "last sequence %" PRIu64 "thread id %" PRIu64 "\n", log.number, log_writer->get_log_number(), log.GetPreSyncSize(), - log.writer->file()->GetFlushedSize(), log_writer->GetLastSequence(), pthread_self()); + log.writer->file()->GetFlushedSize(), log.writer->GetLastSequence(), pthread_self()); } if (!found) {