From 0a38ec61632a818527c368f33c8e89ec8357ed67 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Sun, 3 Nov 2024 04:41:40 +0700 Subject: [PATCH 01/15] fix(drive): uncommitted state if db transaction fails --- packages/rs-drive-abci/src/abci/handler/finalize_block.rs | 5 ++++- packages/rs-drive-abci/src/abci/handler/info.rs | 2 ++ packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs | 2 ++ packages/rs-drive-abci/src/abci/handler/process_proposal.rs | 2 ++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs index 9653391c7d..ca55077727 100644 --- a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs +++ b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs @@ -66,7 +66,10 @@ where )); } - app.commit_transaction(platform_version)?; + // TODO: do not commit on this block height + if block_height == 32326 { + app.commit_transaction(platform_version)?; + } app.platform() .committed_block_height_guard diff --git a/packages/rs-drive-abci/src/abci/handler/info.rs b/packages/rs-drive-abci/src/abci/handler/info.rs index dbb8501891..4ac04747bf 100644 --- a/packages/rs-drive-abci/src/abci/handler/info.rs +++ b/packages/rs-drive-abci/src/abci/handler/info.rs @@ -26,6 +26,8 @@ where .map(|app_hash| app_hash.to_vec()) .unwrap_or_default(); + // TODO: Check that Drive and Platform root hashes match except 32326 + let desired_protocol_version = DESIRED_PLATFORM_VERSION.protocol_version; let response = proto::ResponseInfo { diff --git a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs index 18252d0d45..9d5b4b7b3c 100644 --- a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs @@ -35,6 +35,8 @@ where let platform_state = app.platform().state.load(); + // TODO: Check that Drive and Platform root hashes match except 32327 + let last_committed_core_height = platform_state.last_committed_core_height(); let starting_platform_version = platform_state.current_platform_version()?; diff --git a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs index 5bf547e14a..440e60f753 100644 --- a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs @@ -179,6 +179,8 @@ where let platform_state = app.platform().state.load(); + // TODO: Check that Drive and Platform root hashes match except 32327 + let starting_platform_version = platform_state.current_platform_version()?; // Running the proposal executes all the state transitions for the block From e357b4cab581380dc79c49e45b10f38be1078c37 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Sun, 3 Nov 2024 05:06:18 +0700 Subject: [PATCH 02/15] chore: verify app hash mismatch --- packages/rs-drive-abci/src/abci/error.rs | 9 ++++++ .../src/abci/handler/finalize_block.rs | 2 +- .../rs-drive-abci/src/abci/handler/info.rs | 29 +++++++++++++++---- .../src/abci/handler/prepare_proposal.rs | 26 ++++++++++++++++- .../src/abci/handler/process_proposal.rs | 26 ++++++++++++++++- 5 files changed, 84 insertions(+), 8 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/error.rs b/packages/rs-drive-abci/src/abci/error.rs index 857321a16e..5e4a123140 100644 --- a/packages/rs-drive-abci/src/abci/error.rs +++ b/packages/rs-drive-abci/src/abci/error.rs @@ -90,4 +90,13 @@ pub enum AbciError { /// Generic with code should only be used in tests #[error("invalid state transition error: {0}")] InvalidStateTransition(#[from] ConsensusError), + + /// Drive storage root hash is not matching with app hash stored in PlatformState + #[error("drive and platform state app hash mismatch")] + AppHashMismatch { + /// Storage root hash + drive_storage_root_hash: [u8; 32], + /// App hash stored in PlatformState + platform_state_app_hash: [u8; 32], + }, } diff --git a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs index ca55077727..412610bc8d 100644 --- a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs +++ b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs @@ -66,7 +66,7 @@ where )); } - // TODO: do not commit on this block height + // TODO: document this if block_height == 32326 { app.commit_transaction(platform_version)?; } diff --git a/packages/rs-drive-abci/src/abci/handler/info.rs b/packages/rs-drive-abci/src/abci/handler/info.rs index 4ac04747bf..c443d524e1 100644 --- a/packages/rs-drive-abci/src/abci/handler/info.rs +++ b/packages/rs-drive-abci/src/abci/handler/info.rs @@ -21,12 +21,31 @@ where let platform_state = app.platform().state.load(); - let state_app_hash = platform_state + // Verify that Platform State corresponds to Drive commited state + let drive_storage_root_hash = platform_state .last_committed_block_app_hash() - .map(|app_hash| app_hash.to_vec()) .unwrap_or_default(); - // TODO: Check that Drive and Platform root hashes match except 32326 + let platform_state_app_hash = app + .platform() + .drive + .grove + .root_hash( + None, + &platform_state + .current_platform_version()? + .drive + .grove_version, + ) + .unwrap()?; + + if drive_storage_root_hash != platform_state_app_hash { + return Err(AbciError::AppHashMismatch { + drive_storage_root_hash, + platform_state_app_hash, + } + .into()); + } let desired_protocol_version = DESIRED_PLATFORM_VERSION.protocol_version; @@ -35,7 +54,7 @@ where app_version: desired_protocol_version as u64, last_block_height: platform_state.last_committed_block_height() as i64, version: env!("CARGO_PKG_VERSION").to_string(), - last_block_app_hash: state_app_hash.clone(), + last_block_app_hash: platform_state_app_hash.to_vec(), }; tracing::debug!( @@ -43,7 +62,7 @@ where software_version = env!("CARGO_PKG_VERSION"), block_version = request.block_version, p2p_version = request.p2p_version, - app_hash = hex::encode(state_app_hash), + app_hash = hex::encode(platform_state_app_hash), height = platform_state.last_committed_block_height(), "Handshake with consensus engine", ); diff --git a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs index 9d5b4b7b3c..02dd0607c5 100644 --- a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs @@ -35,7 +35,31 @@ where let platform_state = app.platform().state.load(); - // TODO: Check that Drive and Platform root hashes match except 32327 + // Verify that Platform State corresponds to Drive commited state + let drive_storage_root_hash = platform_state + .last_committed_block_app_hash() + .unwrap_or_default(); + + let platform_state_app_hash = app + .platform() + .drive + .grove + .root_hash( + None, + &platform_state + .current_platform_version()? + .drive + .grove_version, + ) + .unwrap()?; + + if drive_storage_root_hash != platform_state_app_hash { + return Err(AbciError::AppHashMismatch { + drive_storage_root_hash, + platform_state_app_hash, + } + .into()); + } let last_committed_core_height = platform_state.last_committed_core_height(); diff --git a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs index 440e60f753..2bf4cdb38f 100644 --- a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs @@ -179,7 +179,31 @@ where let platform_state = app.platform().state.load(); - // TODO: Check that Drive and Platform root hashes match except 32327 + // Verify that Platform State corresponds to Drive commited state + let drive_storage_root_hash = platform_state + .last_committed_block_app_hash() + .unwrap_or_default(); + + let platform_state_app_hash = app + .platform() + .drive + .grove + .root_hash( + None, + &platform_state + .current_platform_version()? + .drive + .grove_version, + ) + .unwrap()?; + + if drive_storage_root_hash != platform_state_app_hash { + return Err(AbciError::AppHashMismatch { + drive_storage_root_hash, + platform_state_app_hash, + } + .into()); + } let starting_platform_version = platform_state.current_platform_version()?; From bc6d8d44659a15bff788f4a665239252a7097999 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Sun, 3 Nov 2024 05:21:42 +0700 Subject: [PATCH 03/15] chore: ignore app hash validation for failed block --- .../src/abci/handler/finalize_block.rs | 4 +++- .../rs-drive-abci/src/abci/handler/info.rs | 22 +++++++++++++------ .../src/abci/handler/prepare_proposal.rs | 16 +++++++++----- .../src/abci/handler/process_proposal.rs | 16 +++++++++----- packages/rs-drive-abci/src/config.rs | 6 ----- 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs index 412610bc8d..ef13fe2267 100644 --- a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs +++ b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs @@ -5,6 +5,7 @@ use crate::execution::types::block_execution_context::v0::BlockExecutionContextV use crate::platform_types::cleaned_abci_messages::finalized_block_cleaned_request::v0::FinalizeBlockCleanedRequest; use crate::platform_types::platform_state::v0::PlatformStateV0Methods; use crate::rpc::core::CoreRPCLike; +use dpp::dashcore::Network; use std::sync::atomic::Ordering; use tenderdash_abci::proto::abci as proto; @@ -67,7 +68,8 @@ where } // TODO: document this - if block_height == 32326 { + // TODO: verify that chain id is evo1 + if !(app.platform().config.network == Network::Dash && block_height == 32326) { app.commit_transaction(platform_version)?; } diff --git a/packages/rs-drive-abci/src/abci/handler/info.rs b/packages/rs-drive-abci/src/abci/handler/info.rs index c443d524e1..12468cecf9 100644 --- a/packages/rs-drive-abci/src/abci/handler/info.rs +++ b/packages/rs-drive-abci/src/abci/handler/info.rs @@ -3,6 +3,7 @@ use crate::abci::AbciError; use crate::error::Error; use crate::platform_types::platform_state::v0::PlatformStateV0Methods; use crate::rpc::core::CoreRPCLike; +use dpp::dashcore::Network; use dpp::version::DESIRED_PLATFORM_VERSION; use tenderdash_abci::proto::abci as proto; @@ -21,6 +22,8 @@ where let platform_state = app.platform().state.load(); + let last_block_height = platform_state.last_committed_block_height() as i64; + // Verify that Platform State corresponds to Drive commited state let drive_storage_root_hash = platform_state .last_committed_block_app_hash() @@ -39,12 +42,17 @@ where ) .unwrap()?; - if drive_storage_root_hash != platform_state_app_hash { - return Err(AbciError::AppHashMismatch { - drive_storage_root_hash, - platform_state_app_hash, + // TODO: Document this + // TODO: verify that chain id is evo1 + #[allow(clippy::collapsible_if)] + if !(app.platform().config.network == Network::Dash && last_block_height == 32326) { + if drive_storage_root_hash != platform_state_app_hash { + return Err(AbciError::AppHashMismatch { + drive_storage_root_hash, + platform_state_app_hash, + } + .into()); } - .into()); } let desired_protocol_version = DESIRED_PLATFORM_VERSION.protocol_version; @@ -52,7 +60,7 @@ where let response = proto::ResponseInfo { data: "".to_string(), app_version: desired_protocol_version as u64, - last_block_height: platform_state.last_committed_block_height() as i64, + last_block_height, version: env!("CARGO_PKG_VERSION").to_string(), last_block_app_hash: platform_state_app_hash.to_vec(), }; @@ -63,7 +71,7 @@ where block_version = request.block_version, p2p_version = request.p2p_version, app_hash = hex::encode(platform_state_app_hash), - height = platform_state.last_committed_block_height(), + last_block_height, "Handshake with consensus engine", ); diff --git a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs index 02dd0607c5..777d2b9c0f 100644 --- a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs @@ -11,6 +11,7 @@ use crate::platform_types::platform_state::v0::PlatformStateV0Methods; use crate::platform_types::state_transitions_processing_result::StateTransitionExecutionResult; use crate::rpc::core::CoreRPCLike; use dpp::dashcore::hashes::Hash; +use dpp::dashcore::Network; use dpp::version::TryIntoPlatformVersioned; use drive::grovedb_storage::Error::RocksDBError; use tenderdash_abci::proto::abci as proto; @@ -53,12 +54,17 @@ where ) .unwrap()?; - if drive_storage_root_hash != platform_state_app_hash { - return Err(AbciError::AppHashMismatch { - drive_storage_root_hash, - platform_state_app_hash, + // TODO: Document this + // TODO: verify that chain id is evo1 + #[allow(clippy::collapsible_if)] + if !(app.platform().config.network == Network::Dash && request.height == 32327) { + if drive_storage_root_hash != platform_state_app_hash { + return Err(AbciError::AppHashMismatch { + drive_storage_root_hash, + platform_state_app_hash, + } + .into()); } - .into()); } let last_committed_core_height = platform_state.last_committed_core_height(); diff --git a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs index 2bf4cdb38f..f2cb4d9e61 100644 --- a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs @@ -12,6 +12,7 @@ use crate::platform_types::block_execution_outcome; use crate::platform_types::platform_state::v0::PlatformStateV0Methods; use crate::platform_types::state_transitions_processing_result::StateTransitionExecutionResult; use crate::rpc::core::CoreRPCLike; +use dpp::dashcore::Network; use dpp::version::TryIntoPlatformVersioned; use drive::grovedb_storage::Error::RocksDBError; use tenderdash_abci::proto::abci as proto; @@ -197,12 +198,17 @@ where ) .unwrap()?; - if drive_storage_root_hash != platform_state_app_hash { - return Err(AbciError::AppHashMismatch { - drive_storage_root_hash, - platform_state_app_hash, + // TODO: Document this + // TODO: verify that chain id is evo1 + #[allow(clippy::collapsible_if)] + if !(app.platform().config.network == Network::Dash && request.height == 32327) { + if drive_storage_root_hash != platform_state_app_hash { + return Err(AbciError::AppHashMismatch { + drive_storage_root_hash, + platform_state_app_hash, + } + .into()); } - .into()); } let starting_platform_version = platform_state.current_platform_version()?; diff --git a/packages/rs-drive-abci/src/config.rs b/packages/rs-drive-abci/src/config.rs index 1e8f5f3c26..b2fd1d2297 100644 --- a/packages/rs-drive-abci/src/config.rs +++ b/packages/rs-drive-abci/src/config.rs @@ -233,8 +233,6 @@ struct PlatformConfigIntermediate { #[serde(flatten)] pub instant_lock: InstantLockConfig, pub block_spacing_ms: u64, - #[serde(default = "PlatformConfig::default_initial_protocol_version")] - pub initial_protocol_version: ProtocolVersion, pub db_path: PathBuf, #[serde(default)] pub rejections_path: Option, @@ -624,10 +622,6 @@ impl ExecutionConfig { } impl PlatformConfig { - fn default_initial_protocol_version() -> ProtocolVersion { - INITIAL_PROTOCOL_VERSION - } - fn default_network() -> Network { Network::Dash } From 4b812097e1d9a87b35a6b8bae2319f9f6f8614c9 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Sun, 3 Nov 2024 17:23:54 +0700 Subject: [PATCH 04/15] chore: panic to restart drive --- packages/rs-drive-abci/src/abci/error.rs | 9 --------- .../src/abci/handler/finalize_block.rs | 13 +++++++++++-- .../rs-drive-abci/src/abci/handler/info.rs | 19 +++++++++++++------ .../src/abci/handler/prepare_proposal.rs | 19 +++++++++++++------ .../src/abci/handler/process_proposal.rs | 19 +++++++++++++------ 5 files changed, 50 insertions(+), 29 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/error.rs b/packages/rs-drive-abci/src/abci/error.rs index 5e4a123140..857321a16e 100644 --- a/packages/rs-drive-abci/src/abci/error.rs +++ b/packages/rs-drive-abci/src/abci/error.rs @@ -90,13 +90,4 @@ pub enum AbciError { /// Generic with code should only be used in tests #[error("invalid state transition error: {0}")] InvalidStateTransition(#[from] ConsensusError), - - /// Drive storage root hash is not matching with app hash stored in PlatformState - #[error("drive and platform state app hash mismatch")] - AppHashMismatch { - /// Storage root hash - drive_storage_root_hash: [u8; 32], - /// App hash stored in PlatformState - platform_state_app_hash: [u8; 32], - }, } diff --git a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs index ef13fe2267..3fb92640d9 100644 --- a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs +++ b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs @@ -67,10 +67,19 @@ where )); } - // TODO: document this + // We had a chain halt on mainnet on block 32326. Compaction happened + // and transaction.commit() returned an error. Due to a bug in tenderdash, + // validators just proceeded on next block without committing data but keeping + // updated cache. To keep consistency with mainnet chain we have to skip + // commit of this block now on. // TODO: verify that chain id is evo1 if !(app.platform().config.network == Network::Dash && block_height == 32326) { - app.commit_transaction(platform_version)?; + // This is simplified solution until we have a better way to handle + // We still have caches in memory that corresponds to the data that + // we weren't able to commit. Solution is to restart the Drive, so all caches + // will be restored from the disk and try to process this block again + app.commit_transaction(platform_version) + .expect("commit transaction"); } app.platform() diff --git a/packages/rs-drive-abci/src/abci/handler/info.rs b/packages/rs-drive-abci/src/abci/handler/info.rs index 12468cecf9..5aa6cdb130 100644 --- a/packages/rs-drive-abci/src/abci/handler/info.rs +++ b/packages/rs-drive-abci/src/abci/handler/info.rs @@ -42,16 +42,23 @@ where ) .unwrap()?; - // TODO: Document this + // We had a chain halt on mainnet on block 32326. Compaction happened + // and transaction.commit() returned an error. Due to a bug in tenderdash, + // validators just proceeded on next block without committing data but keeping + // updated cache. To keep consistency with mainnet chain we allow app hashes to be + // different for this block. // TODO: verify that chain id is evo1 #[allow(clippy::collapsible_if)] if !(app.platform().config.network == Network::Dash && last_block_height == 32326) { + // App hash in memory must be equal to app hash on disk if drive_storage_root_hash != platform_state_app_hash { - return Err(AbciError::AppHashMismatch { - drive_storage_root_hash, - platform_state_app_hash, - } - .into()); + // We panic because we can't recover from this situation. + // Better to restart the Drive, so we might self-heal the node + // reloading state form the disk + panic!( + "drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}", + drive_storage_root_hash, platform_state_app_hash + ); } } diff --git a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs index 777d2b9c0f..d630efdb1c 100644 --- a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs @@ -54,16 +54,23 @@ where ) .unwrap()?; - // TODO: Document this + // We had a chain halt on mainnet on block 32326. Compaction happened + // and transaction.commit() returned an error. Due to a bug in tenderdash, + // validators just proceeded on next block without committing data but keeping + // updated cache. To keep consistency with mainnet chain we allow app hashes to be + // different for this block. // TODO: verify that chain id is evo1 #[allow(clippy::collapsible_if)] if !(app.platform().config.network == Network::Dash && request.height == 32327) { + // App hash in memory must be equal to app hash on disk if drive_storage_root_hash != platform_state_app_hash { - return Err(AbciError::AppHashMismatch { - drive_storage_root_hash, - platform_state_app_hash, - } - .into()); + // We panic because we can't recover from this situation. + // Better to restart the Drive, so we might self-heal the node + // reloading state form the disk + panic!( + "drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}", + drive_storage_root_hash, platform_state_app_hash + ); } } diff --git a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs index f2cb4d9e61..acd137b241 100644 --- a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs @@ -198,16 +198,23 @@ where ) .unwrap()?; - // TODO: Document this + // We had a chain halt on mainnet on block 32326. Compaction happened + // and transaction.commit() returned an error. Due to a bug in tenderdash, + // validators just proceeded on next block without committing data but keeping + // updated cache. To keep consistency with mainnet chain we allow app hashes to be + // different for this block. // TODO: verify that chain id is evo1 #[allow(clippy::collapsible_if)] if !(app.platform().config.network == Network::Dash && request.height == 32327) { + // App hash in memory must be equal to app hash on disk if drive_storage_root_hash != platform_state_app_hash { - return Err(AbciError::AppHashMismatch { - drive_storage_root_hash, - platform_state_app_hash, - } - .into()); + // We panic because we can't recover from this situation. + // Better to restart the Drive, so we might self-heal the node + // reloading state form the disk + panic!( + "drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}", + drive_storage_root_hash, platform_state_app_hash + ); } } From acb0048763da050ed6fd3251b8347831953d0130 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Sun, 3 Nov 2024 17:32:46 +0700 Subject: [PATCH 05/15] refactor: add intermediate variable to increase readability --- packages/rs-drive-abci/src/abci/handler/info.rs | 13 ++++++------- .../src/abci/handler/prepare_proposal.rs | 13 ++++++------- .../src/abci/handler/process_proposal.rs | 13 ++++++------- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/handler/info.rs b/packages/rs-drive-abci/src/abci/handler/info.rs index 5aa6cdb130..0aa17eb57a 100644 --- a/packages/rs-drive-abci/src/abci/handler/info.rs +++ b/packages/rs-drive-abci/src/abci/handler/info.rs @@ -29,17 +29,16 @@ where .last_committed_block_app_hash() .unwrap_or_default(); + let grove_version = &platform_state + .current_platform_version()? + .drive + .grove_version; + let platform_state_app_hash = app .platform() .drive .grove - .root_hash( - None, - &platform_state - .current_platform_version()? - .drive - .grove_version, - ) + .root_hash(None, grove_version) .unwrap()?; // We had a chain halt on mainnet on block 32326. Compaction happened diff --git a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs index d630efdb1c..5d8a750b20 100644 --- a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs @@ -41,17 +41,16 @@ where .last_committed_block_app_hash() .unwrap_or_default(); + let grove_version = &platform_state + .current_platform_version()? + .drive + .grove_version; + let platform_state_app_hash = app .platform() .drive .grove - .root_hash( - None, - &platform_state - .current_platform_version()? - .drive - .grove_version, - ) + .root_hash(None, grove_version) .unwrap()?; // We had a chain halt on mainnet on block 32326. Compaction happened diff --git a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs index acd137b241..e7276a4733 100644 --- a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs @@ -185,17 +185,16 @@ where .last_committed_block_app_hash() .unwrap_or_default(); + let grove_version = &platform_state + .current_platform_version()? + .drive + .grove_version; + let platform_state_app_hash = app .platform() .drive .grove - .root_hash( - None, - &platform_state - .current_platform_version()? - .drive - .grove_version, - ) + .root_hash(None, grove_version) .unwrap()?; // We had a chain halt on mainnet on block 32326. Compaction happened From 9a5c3f217fc00f5291c4a246153cc5d5bb98be72 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Sun, 3 Nov 2024 18:05:01 +0700 Subject: [PATCH 06/15] chore: verify chain_id --- packages/rs-drive-abci/src/abci/handler/finalize_block.rs | 8 ++++++-- packages/rs-drive-abci/src/abci/handler/info.rs | 8 ++++++-- .../rs-drive-abci/src/abci/handler/prepare_proposal.rs | 8 ++++++-- .../rs-drive-abci/src/abci/handler/process_proposal.rs | 8 ++++++-- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs index 3fb92640d9..84d7c07cdb 100644 --- a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs +++ b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs @@ -72,8 +72,12 @@ where // validators just proceeded on next block without committing data but keeping // updated cache. To keep consistency with mainnet chain we have to skip // commit of this block now on. - // TODO: verify that chain id is evo1 - if !(app.platform().config.network == Network::Dash && block_height == 32326) { + let config = &app.platform().config; + + if !(app.platform().config.network == Network::Dash + && config.abci.chain_id == "evo1" + && block_height == 32326) + { // This is simplified solution until we have a better way to handle // We still have caches in memory that corresponds to the data that // we weren't able to commit. Solution is to restart the Drive, so all caches diff --git a/packages/rs-drive-abci/src/abci/handler/info.rs b/packages/rs-drive-abci/src/abci/handler/info.rs index 0aa17eb57a..6c1ca914ca 100644 --- a/packages/rs-drive-abci/src/abci/handler/info.rs +++ b/packages/rs-drive-abci/src/abci/handler/info.rs @@ -46,9 +46,13 @@ where // validators just proceeded on next block without committing data but keeping // updated cache. To keep consistency with mainnet chain we allow app hashes to be // different for this block. - // TODO: verify that chain id is evo1 + let config = &app.platform().config; + #[allow(clippy::collapsible_if)] - if !(app.platform().config.network == Network::Dash && last_block_height == 32326) { + if !(config.network == Network::Dash + && config.abci.chain_id == "evo1" + && last_block_height == 32326) + { // App hash in memory must be equal to app hash on disk if drive_storage_root_hash != platform_state_app_hash { // We panic because we can't recover from this situation. diff --git a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs index 5d8a750b20..6522ee8b5d 100644 --- a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs @@ -58,9 +58,13 @@ where // validators just proceeded on next block without committing data but keeping // updated cache. To keep consistency with mainnet chain we allow app hashes to be // different for this block. - // TODO: verify that chain id is evo1 + let config = &app.platform().config; + #[allow(clippy::collapsible_if)] - if !(app.platform().config.network == Network::Dash && request.height == 32327) { + if !(config.network == Network::Dash + && config.abci.chain_id == "evo1" + && request.height == 32327) + { // App hash in memory must be equal to app hash on disk if drive_storage_root_hash != platform_state_app_hash { // We panic because we can't recover from this situation. diff --git a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs index e7276a4733..8505084e85 100644 --- a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs @@ -202,9 +202,13 @@ where // validators just proceeded on next block without committing data but keeping // updated cache. To keep consistency with mainnet chain we allow app hashes to be // different for this block. - // TODO: verify that chain id is evo1 + let config = &app.platform().config; + #[allow(clippy::collapsible_if)] - if !(app.platform().config.network == Network::Dash && request.height == 32327) { + if !(app.platform().config.network == Network::Dash + && config.abci.chain_id == "evo1" + && request.height == 32327) + { // App hash in memory must be equal to app hash on disk if drive_storage_root_hash != platform_state_app_hash { // We panic because we can't recover from this situation. From 1589ef1279fbea16c67c2a3f897dc4d6366f7d41 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Sun, 3 Nov 2024 18:05:20 +0700 Subject: [PATCH 07/15] fix: mixed up var names --- packages/rs-drive-abci/src/abci/handler/info.rs | 4 ++-- packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs | 4 ++-- packages/rs-drive-abci/src/abci/handler/process_proposal.rs | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/handler/info.rs b/packages/rs-drive-abci/src/abci/handler/info.rs index 6c1ca914ca..f8a9cd9dff 100644 --- a/packages/rs-drive-abci/src/abci/handler/info.rs +++ b/packages/rs-drive-abci/src/abci/handler/info.rs @@ -25,7 +25,7 @@ where let last_block_height = platform_state.last_committed_block_height() as i64; // Verify that Platform State corresponds to Drive commited state - let drive_storage_root_hash = platform_state + let platform_state_app_hash = platform_state .last_committed_block_app_hash() .unwrap_or_default(); @@ -34,7 +34,7 @@ where .drive .grove_version; - let platform_state_app_hash = app + let drive_storage_root_hash = app .platform() .drive .grove diff --git a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs index 6522ee8b5d..219b012355 100644 --- a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs @@ -37,7 +37,7 @@ where let platform_state = app.platform().state.load(); // Verify that Platform State corresponds to Drive commited state - let drive_storage_root_hash = platform_state + let platform_state_app_hash = platform_state .last_committed_block_app_hash() .unwrap_or_default(); @@ -46,7 +46,7 @@ where .drive .grove_version; - let platform_state_app_hash = app + let drive_storage_root_hash = app .platform() .drive .grove diff --git a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs index 8505084e85..e20bb0a9ca 100644 --- a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs @@ -181,7 +181,7 @@ where let platform_state = app.platform().state.load(); // Verify that Platform State corresponds to Drive commited state - let drive_storage_root_hash = platform_state + let platform_state_app_hash = platform_state .last_committed_block_app_hash() .unwrap_or_default(); @@ -190,7 +190,7 @@ where .drive .grove_version; - let platform_state_app_hash = app + let drive_storage_root_hash = app .platform() .drive .grove From 2a3532b1496693bcbb6ca5c1e739686a6d7cd896 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Sun, 3 Nov 2024 18:07:08 +0700 Subject: [PATCH 08/15] revert: bring back initial protocol version --- packages/rs-drive-abci/src/config.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/packages/rs-drive-abci/src/config.rs b/packages/rs-drive-abci/src/config.rs index b2fd1d2297..1e8f5f3c26 100644 --- a/packages/rs-drive-abci/src/config.rs +++ b/packages/rs-drive-abci/src/config.rs @@ -233,6 +233,8 @@ struct PlatformConfigIntermediate { #[serde(flatten)] pub instant_lock: InstantLockConfig, pub block_spacing_ms: u64, + #[serde(default = "PlatformConfig::default_initial_protocol_version")] + pub initial_protocol_version: ProtocolVersion, pub db_path: PathBuf, #[serde(default)] pub rejections_path: Option, @@ -622,6 +624,10 @@ impl ExecutionConfig { } impl PlatformConfig { + fn default_initial_protocol_version() -> ProtocolVersion { + INITIAL_PROTOCOL_VERSION + } + fn default_network() -> Network { Network::Dash } From 5ceb20233f7a2ea3c6df18e512a0a21be46102b2 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Mon, 4 Nov 2024 09:47:51 +0700 Subject: [PATCH 09/15] chore: remove height conditions --- .../src/abci/handler/finalize_block.rs | 24 ++++---------- .../rs-drive-abci/src/abci/handler/info.rs | 31 ++++++------------- .../src/abci/handler/prepare_proposal.rs | 31 ++++++------------- .../src/abci/handler/process_proposal.rs | 31 ++++++------------- 4 files changed, 33 insertions(+), 84 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs index 84d7c07cdb..5148257f3e 100644 --- a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs +++ b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs @@ -67,24 +67,12 @@ where )); } - // We had a chain halt on mainnet on block 32326. Compaction happened - // and transaction.commit() returned an error. Due to a bug in tenderdash, - // validators just proceeded on next block without committing data but keeping - // updated cache. To keep consistency with mainnet chain we have to skip - // commit of this block now on. - let config = &app.platform().config; - - if !(app.platform().config.network == Network::Dash - && config.abci.chain_id == "evo1" - && block_height == 32326) - { - // This is simplified solution until we have a better way to handle - // We still have caches in memory that corresponds to the data that - // we weren't able to commit. Solution is to restart the Drive, so all caches - // will be restored from the disk and try to process this block again - app.commit_transaction(platform_version) - .expect("commit transaction"); - } + // This is simplified solution until we have a better way to handle + // We still have caches in memory that corresponds to the data that + // we weren't able to commit. Solution is to restart the Drive, so all caches + // will be restored from the disk and try to process this block again + app.commit_transaction(platform_version) + .expect("commit transaction"); app.platform() .committed_block_height_guard diff --git a/packages/rs-drive-abci/src/abci/handler/info.rs b/packages/rs-drive-abci/src/abci/handler/info.rs index f8a9cd9dff..570fedb6ff 100644 --- a/packages/rs-drive-abci/src/abci/handler/info.rs +++ b/packages/rs-drive-abci/src/abci/handler/info.rs @@ -41,28 +41,15 @@ where .root_hash(None, grove_version) .unwrap()?; - // We had a chain halt on mainnet on block 32326. Compaction happened - // and transaction.commit() returned an error. Due to a bug in tenderdash, - // validators just proceeded on next block without committing data but keeping - // updated cache. To keep consistency with mainnet chain we allow app hashes to be - // different for this block. - let config = &app.platform().config; - - #[allow(clippy::collapsible_if)] - if !(config.network == Network::Dash - && config.abci.chain_id == "evo1" - && last_block_height == 32326) - { - // App hash in memory must be equal to app hash on disk - if drive_storage_root_hash != platform_state_app_hash { - // We panic because we can't recover from this situation. - // Better to restart the Drive, so we might self-heal the node - // reloading state form the disk - panic!( - "drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}", - drive_storage_root_hash, platform_state_app_hash - ); - } + // App hash in memory must be equal to app hash on disk + if drive_storage_root_hash != platform_state_app_hash { + // We panic because we can't recover from this situation. + // Better to restart the Drive, so we might self-heal the node + // reloading state form the disk + panic!( + "drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}", + drive_storage_root_hash, platform_state_app_hash + ); } let desired_protocol_version = DESIRED_PLATFORM_VERSION.protocol_version; diff --git a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs index 219b012355..8048e1018b 100644 --- a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs @@ -53,28 +53,15 @@ where .root_hash(None, grove_version) .unwrap()?; - // We had a chain halt on mainnet on block 32326. Compaction happened - // and transaction.commit() returned an error. Due to a bug in tenderdash, - // validators just proceeded on next block without committing data but keeping - // updated cache. To keep consistency with mainnet chain we allow app hashes to be - // different for this block. - let config = &app.platform().config; - - #[allow(clippy::collapsible_if)] - if !(config.network == Network::Dash - && config.abci.chain_id == "evo1" - && request.height == 32327) - { - // App hash in memory must be equal to app hash on disk - if drive_storage_root_hash != platform_state_app_hash { - // We panic because we can't recover from this situation. - // Better to restart the Drive, so we might self-heal the node - // reloading state form the disk - panic!( - "drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}", - drive_storage_root_hash, platform_state_app_hash - ); - } + // App hash in memory must be equal to app hash on disk + if drive_storage_root_hash != platform_state_app_hash { + // We panic because we can't recover from this situation. + // Better to restart the Drive, so we might self-heal the node + // reloading state form the disk + panic!( + "drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}", + drive_storage_root_hash, platform_state_app_hash + ); } let last_committed_core_height = platform_state.last_committed_core_height(); diff --git a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs index e20bb0a9ca..bff4352be0 100644 --- a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs @@ -197,28 +197,15 @@ where .root_hash(None, grove_version) .unwrap()?; - // We had a chain halt on mainnet on block 32326. Compaction happened - // and transaction.commit() returned an error. Due to a bug in tenderdash, - // validators just proceeded on next block without committing data but keeping - // updated cache. To keep consistency with mainnet chain we allow app hashes to be - // different for this block. - let config = &app.platform().config; - - #[allow(clippy::collapsible_if)] - if !(app.platform().config.network == Network::Dash - && config.abci.chain_id == "evo1" - && request.height == 32327) - { - // App hash in memory must be equal to app hash on disk - if drive_storage_root_hash != platform_state_app_hash { - // We panic because we can't recover from this situation. - // Better to restart the Drive, so we might self-heal the node - // reloading state form the disk - panic!( - "drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}", - drive_storage_root_hash, platform_state_app_hash - ); - } + // App hash in memory must be equal to app hash on disk + if drive_storage_root_hash != platform_state_app_hash { + // We panic because we can't recover from this situation. + // Better to restart the Drive, so we might self-heal the node + // reloading state form the disk + panic!( + "drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}", + drive_storage_root_hash, platform_state_app_hash + ); } let starting_platform_version = platform_state.current_platform_version()?; From 677a452d4056139b0ed6f5c4da1361203b38811b Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Mon, 4 Nov 2024 15:19:04 +0700 Subject: [PATCH 10/15] chore: apply fix after 33000 --- .../src/abci/handler/finalize_block.rs | 13 +++++++------ packages/rs-drive-abci/src/abci/handler/info.rs | 13 +++++++------ .../src/abci/handler/prepare_proposal.rs | 13 +++++++------ .../src/abci/handler/process_proposal.rs | 13 +++++++------ 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs index 84d7c07cdb..79af128c71 100644 --- a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs +++ b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs @@ -67,16 +67,17 @@ where )); } - // We had a chain halt on mainnet on block 32326. Compaction happened - // and transaction.commit() returned an error. Due to a bug in tenderdash, - // validators just proceeded on next block without committing data but keeping - // updated cache. To keep consistency with mainnet chain we have to skip - // commit of this block now on. + // We had a sequence of errors on mainnet started since block 32326. + // We got rocksdb transaction is busy error because of a bug (writing outside of transaction). + // Due to another bug in tenderdash, validators just proceeded on next block partially committing + // the state and updated cache. Fullnodes are stuck and proceeded after re-sync. + // To keep consistency with mainnet chain we enable this fix at + // the block when we consider state is consistent. let config = &app.platform().config; if !(app.platform().config.network == Network::Dash && config.abci.chain_id == "evo1" - && block_height == 32326) + && block_height < 33000) { // This is simplified solution until we have a better way to handle // We still have caches in memory that corresponds to the data that diff --git a/packages/rs-drive-abci/src/abci/handler/info.rs b/packages/rs-drive-abci/src/abci/handler/info.rs index f8a9cd9dff..1710e1e110 100644 --- a/packages/rs-drive-abci/src/abci/handler/info.rs +++ b/packages/rs-drive-abci/src/abci/handler/info.rs @@ -41,17 +41,18 @@ where .root_hash(None, grove_version) .unwrap()?; - // We had a chain halt on mainnet on block 32326. Compaction happened - // and transaction.commit() returned an error. Due to a bug in tenderdash, - // validators just proceeded on next block without committing data but keeping - // updated cache. To keep consistency with mainnet chain we allow app hashes to be - // different for this block. + // We had a sequence of errors on mainnet started since block 32326. + // We got rocksdb transaction is busy error because of a bug (writing outside of transaction). + // Due to another bug in tenderdash, validators just proceeded on next block partially committing + // the state and updated cache. Fullnodes are stuck and proceeded after re-sync. + // To keep consistency with mainnet chain we enable this fix at + // the block when we consider state is consistent. let config = &app.platform().config; #[allow(clippy::collapsible_if)] if !(config.network == Network::Dash && config.abci.chain_id == "evo1" - && last_block_height == 32326) + && last_block_height < 33000) { // App hash in memory must be equal to app hash on disk if drive_storage_root_hash != platform_state_app_hash { diff --git a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs index 219b012355..58cd7e6f9e 100644 --- a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs @@ -53,17 +53,18 @@ where .root_hash(None, grove_version) .unwrap()?; - // We had a chain halt on mainnet on block 32326. Compaction happened - // and transaction.commit() returned an error. Due to a bug in tenderdash, - // validators just proceeded on next block without committing data but keeping - // updated cache. To keep consistency with mainnet chain we allow app hashes to be - // different for this block. + // We had a sequence of errors on mainnet started since block 32326. + // We got rocksdb transaction is busy error because of a bug (writing outside of transaction). + // Due to another bug in tenderdash, validators just proceeded on next block partially committing + // the state and updated cache. Fullnodes are stuck and proceeded after re-sync. + // To keep consistency with mainnet chain we enable this fix at + // the block when we consider state is consistent. let config = &app.platform().config; #[allow(clippy::collapsible_if)] if !(config.network == Network::Dash && config.abci.chain_id == "evo1" - && request.height == 32327) + && request.height < 33000) { // App hash in memory must be equal to app hash on disk if drive_storage_root_hash != platform_state_app_hash { diff --git a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs index e20bb0a9ca..3e7a6474ac 100644 --- a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs @@ -197,17 +197,18 @@ where .root_hash(None, grove_version) .unwrap()?; - // We had a chain halt on mainnet on block 32326. Compaction happened - // and transaction.commit() returned an error. Due to a bug in tenderdash, - // validators just proceeded on next block without committing data but keeping - // updated cache. To keep consistency with mainnet chain we allow app hashes to be - // different for this block. + // We had a sequence of errors on mainnet started since block 32326. + // We got rocksdb transaction is busy error because of a bug (writing outside of transaction). + // Due to another bug in tenderdash, validators just proceeded on next block partially committing + // the state and updated cache. Fullnodes are stuck and proceeded after re-sync. + // To keep consistency with mainnet chain we enable this fix at + // the block when we consider state is consistent. let config = &app.platform().config; #[allow(clippy::collapsible_if)] if !(app.platform().config.network == Network::Dash && config.abci.chain_id == "evo1" - && request.height == 32327) + && request.height < 33000) { // App hash in memory must be equal to app hash on disk if drive_storage_root_hash != platform_state_app_hash { From 3eaf1a797c7535f0a9b92a900593505e6de3cbed Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Mon, 4 Nov 2024 15:29:19 +0700 Subject: [PATCH 11/15] docs: updated comments --- .../rs-drive-abci/src/abci/handler/finalize_block.rs | 12 ++++++------ packages/rs-drive-abci/src/abci/handler/info.rs | 12 ++++++------ .../src/abci/handler/prepare_proposal.rs | 12 ++++++------ .../src/abci/handler/process_proposal.rs | 12 ++++++------ 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs index 79af128c71..369b68c4f4 100644 --- a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs +++ b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs @@ -67,12 +67,12 @@ where )); } - // We had a sequence of errors on mainnet started since block 32326. - // We got rocksdb transaction is busy error because of a bug (writing outside of transaction). - // Due to another bug in tenderdash, validators just proceeded on next block partially committing - // the state and updated cache. Fullnodes are stuck and proceeded after re-sync. - // To keep consistency with mainnet chain we enable this fix at - // the block when we consider state is consistent. + // We had a sequence of errors on the mainnet started since block 32326. + // We got RocksDB's "transaction is busy" error because of a bug (#2309 ). + // Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966), + // validators just proceeded to the next block partially committing the state and updating the cache. + // Full nodes are stuck and proceeded after re-sync. + // For the mainnet chain, we enable these fixes at the block when we consider the state is consistent. let config = &app.platform().config; if !(app.platform().config.network == Network::Dash diff --git a/packages/rs-drive-abci/src/abci/handler/info.rs b/packages/rs-drive-abci/src/abci/handler/info.rs index 1710e1e110..b6a43b9408 100644 --- a/packages/rs-drive-abci/src/abci/handler/info.rs +++ b/packages/rs-drive-abci/src/abci/handler/info.rs @@ -41,12 +41,12 @@ where .root_hash(None, grove_version) .unwrap()?; - // We had a sequence of errors on mainnet started since block 32326. - // We got rocksdb transaction is busy error because of a bug (writing outside of transaction). - // Due to another bug in tenderdash, validators just proceeded on next block partially committing - // the state and updated cache. Fullnodes are stuck and proceeded after re-sync. - // To keep consistency with mainnet chain we enable this fix at - // the block when we consider state is consistent. + // We had a sequence of errors on the mainnet started since block 32326. + // We got RocksDB's "transaction is busy" error because of a bug (#2309 ). + // Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966), + // validators just proceeded to the next block partially committing the state and updating the cache. + // Full nodes are stuck and proceeded after re-sync. + // For the mainnet chain, we enable these fixes at the block when we consider the state is consistent. let config = &app.platform().config; #[allow(clippy::collapsible_if)] diff --git a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs index 58cd7e6f9e..61785f7089 100644 --- a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs @@ -53,12 +53,12 @@ where .root_hash(None, grove_version) .unwrap()?; - // We had a sequence of errors on mainnet started since block 32326. - // We got rocksdb transaction is busy error because of a bug (writing outside of transaction). - // Due to another bug in tenderdash, validators just proceeded on next block partially committing - // the state and updated cache. Fullnodes are stuck and proceeded after re-sync. - // To keep consistency with mainnet chain we enable this fix at - // the block when we consider state is consistent. + // We had a sequence of errors on the mainnet started since block 32326. + // We got RocksDB's "transaction is busy" error because of a bug (#2309 ). + // Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966), + // validators just proceeded to the next block partially committing the state and updating the cache. + // Full nodes are stuck and proceeded after re-sync. + // For the mainnet chain, we enable these fixes at the block when we consider the state is consistent. let config = &app.platform().config; #[allow(clippy::collapsible_if)] diff --git a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs index 3e7a6474ac..950d779549 100644 --- a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs @@ -197,12 +197,12 @@ where .root_hash(None, grove_version) .unwrap()?; - // We had a sequence of errors on mainnet started since block 32326. - // We got rocksdb transaction is busy error because of a bug (writing outside of transaction). - // Due to another bug in tenderdash, validators just proceeded on next block partially committing - // the state and updated cache. Fullnodes are stuck and proceeded after re-sync. - // To keep consistency with mainnet chain we enable this fix at - // the block when we consider state is consistent. + // We had a sequence of errors on the mainnet started since block 32326. + // We got RocksDB's "transaction is busy" error because of a bug (#2309 ). + // Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966), + // validators just proceeded to the next block partially committing the state and updating the cache. + // Full nodes are stuck and proceeded after re-sync. + // For the mainnet chain, we enable these fixes at the block when we consider the state is consistent. let config = &app.platform().config; #[allow(clippy::collapsible_if)] From 6a98a45e1f7eade055050a94f2f8e1e74999e822 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Mon, 4 Nov 2024 15:30:48 +0700 Subject: [PATCH 12/15] docs: updated comments --- packages/rs-drive-abci/src/abci/handler/finalize_block.rs | 2 +- packages/rs-drive-abci/src/abci/handler/info.rs | 2 +- packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs | 2 +- packages/rs-drive-abci/src/abci/handler/process_proposal.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs index 369b68c4f4..15308db836 100644 --- a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs +++ b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs @@ -68,7 +68,7 @@ where } // We had a sequence of errors on the mainnet started since block 32326. - // We got RocksDB's "transaction is busy" error because of a bug (#2309 ). + // We got RocksDB's "transaction is busy" error because of a bug (https://github.com/dashpay/platform/pull/2309). // Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966), // validators just proceeded to the next block partially committing the state and updating the cache. // Full nodes are stuck and proceeded after re-sync. diff --git a/packages/rs-drive-abci/src/abci/handler/info.rs b/packages/rs-drive-abci/src/abci/handler/info.rs index b6a43b9408..9ac9d31626 100644 --- a/packages/rs-drive-abci/src/abci/handler/info.rs +++ b/packages/rs-drive-abci/src/abci/handler/info.rs @@ -42,7 +42,7 @@ where .unwrap()?; // We had a sequence of errors on the mainnet started since block 32326. - // We got RocksDB's "transaction is busy" error because of a bug (#2309 ). + // We got RocksDB's "transaction is busy" error because of a bug (https://github.com/dashpay/platform/pull/2309). // Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966), // validators just proceeded to the next block partially committing the state and updating the cache. // Full nodes are stuck and proceeded after re-sync. diff --git a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs index 61785f7089..61f58a0196 100644 --- a/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs @@ -54,7 +54,7 @@ where .unwrap()?; // We had a sequence of errors on the mainnet started since block 32326. - // We got RocksDB's "transaction is busy" error because of a bug (#2309 ). + // We got RocksDB's "transaction is busy" error because of a bug (https://github.com/dashpay/platform/pull/2309). // Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966), // validators just proceeded to the next block partially committing the state and updating the cache. // Full nodes are stuck and proceeded after re-sync. diff --git a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs index 950d779549..d40567d3db 100644 --- a/packages/rs-drive-abci/src/abci/handler/process_proposal.rs +++ b/packages/rs-drive-abci/src/abci/handler/process_proposal.rs @@ -198,7 +198,7 @@ where .unwrap()?; // We had a sequence of errors on the mainnet started since block 32326. - // We got RocksDB's "transaction is busy" error because of a bug (#2309 ). + // We got RocksDB's "transaction is busy" error because of a bug (https://github.com/dashpay/platform/pull/2309). // Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966), // validators just proceeded to the next block partially committing the state and updating the cache. // Full nodes are stuck and proceeded after re-sync. From 4d4a6b3a9b7d66d79a4792a5bd8edd0b99b341f3 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Mon, 4 Nov 2024 15:38:33 +0700 Subject: [PATCH 13/15] fix: invalid condition --- .../rs-drive-abci/src/abci/handler/finalize_block.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs index 15308db836..30af515220 100644 --- a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs +++ b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs @@ -67,6 +67,8 @@ where )); } + let result = app.commit_transaction(platform_version); + // We had a sequence of errors on the mainnet started since block 32326. // We got RocksDB's "transaction is busy" error because of a bug (https://github.com/dashpay/platform/pull/2309). // Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966), @@ -75,16 +77,18 @@ where // For the mainnet chain, we enable these fixes at the block when we consider the state is consistent. let config = &app.platform().config; - if !(app.platform().config.network == Network::Dash + if app.platform().config.network == Network::Dash && config.abci.chain_id == "evo1" - && block_height < 33000) + && block_height < 33000 { + // Old behavior on mainnet below block 33000 + result?; + } else { // This is simplified solution until we have a better way to handle // We still have caches in memory that corresponds to the data that // we weren't able to commit. Solution is to restart the Drive, so all caches // will be restored from the disk and try to process this block again - app.commit_transaction(platform_version) - .expect("commit transaction"); + result.expect("commit transaction"); } app.platform() From ad2a2b8a86d25b1c136b7d0d6deafe058db6c072 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Mon, 4 Nov 2024 15:43:19 +0700 Subject: [PATCH 14/15] docs: update comments --- .../rs-drive-abci/src/abci/handler/finalize_block.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs index 30af515220..852f85cc6b 100644 --- a/packages/rs-drive-abci/src/abci/handler/finalize_block.rs +++ b/packages/rs-drive-abci/src/abci/handler/finalize_block.rs @@ -84,10 +84,11 @@ where // Old behavior on mainnet below block 33000 result?; } else { - // This is simplified solution until we have a better way to handle - // We still have caches in memory that corresponds to the data that - // we weren't able to commit. Solution is to restart the Drive, so all caches - // will be restored from the disk and try to process this block again + // In case if transaction commit failed we still have caches in memory that + // corresponds to the data that we weren't able to commit. + // The simplified solution is to restart the Drive, so all caches + // will be restored from the disk and try to process this block again. + // TODO: We need a better handling of the transaction is busy error with retry logic. result.expect("commit transaction"); } From 93e535d73262c5bc1f32fdaa2e67c5cbf3d58232 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Mon, 4 Nov 2024 16:42:55 +0700 Subject: [PATCH 15/15] chore: kick off CI