From 4bf416291820e46ba7f5d7372d2f82142ffb2319 Mon Sep 17 00:00:00 2001 From: Justin Restivo Date: Fri, 18 Aug 2023 16:29:06 -0400 Subject: [PATCH 1/2] feat: improve testing harness --- testing/src/overall_safety_task.rs | 413 ++++++++++++++++------------- testing/src/test_builder.rs | 2 + testing/tests/libp2p.rs | 23 +- 3 files changed, 250 insertions(+), 188 deletions(-) diff --git a/testing/src/overall_safety_task.rs b/testing/src/overall_safety_task.rs index 8c4ffd04b7..e3af81b901 100644 --- a/testing/src/overall_safety_task.rs +++ b/testing/src/overall_safety_task.rs @@ -1,3 +1,4 @@ +use commit::Commitment; use hotshot_task::event_stream::EventStream; use std::{ collections::{hash_map::Entry, HashMap, HashSet}, @@ -19,22 +20,36 @@ use hotshot_task::{ }; use hotshot_types::{ certificate::QuorumCertificate, - data::{DeltasType, LeafType}, + data::{DeltasType, LeafBlock, LeafType}, error::RoundTimedoutState, event::{Event, EventType}, traits::node_implementation::NodeType, }; use snafu::Snafu; -use tracing::error; use crate::{test_launcher::TaskGenerator, test_runner::Node}; pub type StateAndBlock = (Vec, Vec); use super::GlobalTestEvent; +/// the status of a view +#[derive(Debug, Clone)] +pub enum ViewStatus { + /// success + Ok, + /// failure + Failed, + /// safety violation + Err(OverallSafetyTaskErr), + /// in progress + InProgress, +} + /// possible errors -#[derive(Snafu, Debug)] +#[derive(Snafu, Debug, Clone)] pub enum OverallSafetyTaskErr { + /// inconsistent txn nums + InconsistentTxnsNum { map: HashMap }, /// too many failed views TooManyFailures { /// expected number of failures @@ -80,17 +95,21 @@ pub struct RoundResult> { /// Nodes that failed to commit this round pub failed_nodes: HashMap>>>, - /// state of the majority of the nodes - pub agreed_state: Option, + /// whether or not the round succeeded (for a custom defn of succeeded) + pub status: ViewStatus, - /// block of the majority of the nodes - pub agreed_block: Option, + /// NOTE: technically a map is not needed + /// left one anyway for ease of viewing + /// leaf -> # entries decided on that leaf + pub leaf_map: HashMap, - /// leaf of the majority of the nodes - pub agreed_leaf: Option, + /// block -> # entries decided on that block + pub block_map: HashMap>, usize>, - /// whether or not the round succeeded (for a custom defn of succeeded) - pub success: bool, + /// state -> # entries decided on that state + pub state_map: HashMap<::MaybeState, usize>, + + pub num_txns_map: HashMap, } impl> Default for RoundResult { @@ -98,10 +117,11 @@ impl> Default for RoundResult< Self { success_nodes: Default::default(), failed_nodes: Default::default(), - agreed_leaf: Default::default(), - agreed_block: Default::default(), - agreed_state: Default::default(), - success: false, + leaf_map: Default::default(), + block_map: Default::default(), + state_map: Default::default(), + num_txns_map: Default::default(), + status: ViewStatus::InProgress, } } } @@ -160,130 +180,147 @@ impl> RoundCtx { } impl> RoundResult { - pub fn gen_leaves(&self) -> HashMap { - let mut leaves = HashMap::::new(); + /// insert into round result + pub fn insert_into_result( + &mut self, + idx: usize, + result: (Vec, QuorumCertificate), + maybe_block_size: Option, + ) -> Option { + self.success_nodes.insert(idx as u64, result.clone()); + + let maybe_leaf: Option = result.0.into_iter().last(); + if let Some(leaf) = maybe_leaf.clone() { + match self.leaf_map.entry(leaf.clone()) { + std::collections::hash_map::Entry::Occupied(mut o) => { + *o.get_mut() += 1; + } + std::collections::hash_map::Entry::Vacant(v) => { + v.insert(1); + } + } - for (leaf_vec, _) in self.success_nodes.values() { - let most_recent_leaf = leaf_vec.iter().last(); - if let Some(leaf) = most_recent_leaf { - match leaves.entry(leaf.clone()) { - std::collections::hash_map::Entry::Occupied(mut o) => { + let (state, block) = (leaf.get_state(), leaf.get_deltas()); + + match self.state_map.entry(state.clone()) { + std::collections::hash_map::Entry::Occupied(mut o) => { + *o.get_mut() += 1; + } + std::collections::hash_map::Entry::Vacant(v) => { + v.insert(1); + } + } + match self.block_map.entry(block.clone().block_commitment()) { + std::collections::hash_map::Entry::Occupied(mut o) => { + *o.get_mut() += 1; + } + std::collections::hash_map::Entry::Vacant(v) => { + v.insert(1); + } + } + + if let Some(num_txns) = maybe_block_size { + match self.num_txns_map.entry(num_txns) { + Entry::Occupied(mut o) => { *o.get_mut() += 1; } - std::collections::hash_map::Entry::Vacant(v) => { + Entry::Vacant(v) => { v.insert(1); } } } } - leaves - } - /// general leaf check - pub fn check_leaves(&mut self, threshold: usize) -> Result<(), OverallSafetyTaskErr> { - let leaves = self.gen_leaves(); - - for (leaf, num) in leaves { - if num > threshold { - self.agreed_leaf = Some(leaf); - self.success = true; - return Ok(()); - } - } - self.success = false; - Err(OverallSafetyTaskErr::MismatchedLeaf) + maybe_leaf } - /// sanity block and state check - pub fn check_blocks_and_state( + /// determines whether or not the round passes + /// also do a safety check + #[allow(clippy::too_many_arguments)] + pub fn update_status( &mut self, threshold: usize, - check_block: bool, + total_num_nodes: usize, + key: LEAF, + check_leaf: bool, check_state: bool, - ) -> Result<(), OverallSafetyTaskErr> { - let mut states = HashMap::<::MaybeState, usize>::new(); - let mut blocks = HashMap::<::DeltasType, usize>::new(); - let mut num_no_progress = 0; - for (_idx, leaves) in self.success_nodes.clone() { - let (state, block): StateAndBlock< - ::MaybeState, - ::DeltasType, - > = leaves - .0 - .iter() - .cloned() - .map(|leaf| (leaf.get_state(), leaf.get_deltas())) - .unzip(); - - if let (Some(most_recent_state), Some(most_recent_block)) = - (state.iter().last(), block.iter().last()) - { - match states.entry(most_recent_state.clone()) { - std::collections::hash_map::Entry::Occupied(mut o) => { - *o.get_mut() += 1; - } - std::collections::hash_map::Entry::Vacant(v) => { - v.insert(1); - } - } - match blocks.entry(most_recent_block.clone()) { - std::collections::hash_map::Entry::Occupied(mut o) => { - *o.get_mut() += 1; - } - std::collections::hash_map::Entry::Vacant(v) => { - v.insert(1); - } - } - } else { - num_no_progress += 1; - } + check_block: bool, + transaction_threshold: u64, + ) { + let num_decided = self.success_nodes.len(); + let num_failed = self.failed_nodes.len(); + let remaining_nodes = total_num_nodes - (num_decided + num_failed); + + if check_leaf && self.leaf_map.len() != 1 { + self.status = ViewStatus::Err(OverallSafetyTaskErr::MismatchedLeaf); + return; } - error!( - "states for this view {:#?}\nblocks for this view {:#?}", - states, blocks - ); - - error!( - "Number of nodes who made zero progress: {:#?}", - num_no_progress - ); - - let mut result_state = None; - let mut result_commitment = None; - - if check_state { - for (state, num_nodes) in states { - if num_nodes >= threshold { - result_state = Some(state.clone()); - self.agreed_state = Some(state); - self.success = true; - } + + if check_state && self.state_map.len() != 1 { + self.status = ViewStatus::Err(OverallSafetyTaskErr::InconsistentStates); + return; + } + + if check_block && self.block_map.len() != 1 { + self.status = ViewStatus::Err(OverallSafetyTaskErr::InconsistentBlocks); + return; + } + + if transaction_threshold >= 1 { + if self.num_txns_map.len() > 1 { + self.status = ViewStatus::Err(OverallSafetyTaskErr::InconsistentTxnsNum { + map: self.num_txns_map.clone(), + }); + return; } + if *self.num_txns_map.iter().last().unwrap().0 < transaction_threshold { + self.status = ViewStatus::Failed; + return; + } + } + + // check for success + if num_decided >= threshold { + // decide on if we've succeeded. + // if so, set state and return + // if not, return error + // if neither, continue through - if result_state.is_none() { - self.success = false; - return Err(OverallSafetyTaskErr::InconsistentStates); + let state_key = key.get_state(); + let block_key = key.get_deltas().block_commitment(); + + if *self.block_map.get(&block_key).unwrap() == threshold + && *self.state_map.get(&state_key).unwrap() == threshold + && *self.leaf_map.get(&key).unwrap() == threshold + { + self.status = ViewStatus::Ok; + return; } } - if check_block { - // Check if the block commitments are the same. - let mut consistent_block = None; - for (delta, _) in blocks.clone() { - let commitment = delta.block_commitment(); - if let Some(consistent_commitment) = result_commitment { - if commitment != consistent_commitment { - self.success = false; - error!("Inconsistent blocks, blocks: {:?}", blocks); - return Err(OverallSafetyTaskErr::InconsistentBlocks); + let is_success_possible = remaining_nodes + num_decided >= threshold; + if !is_success_possible { + self.status = ViewStatus::Failed; + } + } + + /// generate leaves + pub fn gen_leaves(&self) -> HashMap { + let mut leaves = HashMap::::new(); + + for (leaf_vec, _) in self.success_nodes.values() { + let most_recent_leaf = leaf_vec.iter().last(); + if let Some(leaf) = most_recent_leaf { + match leaves.entry(leaf.clone()) { + std::collections::hash_map::Entry::Occupied(mut o) => { + *o.get_mut() += 1; + } + std::collections::hash_map::Entry::Vacant(v) => { + v.insert(1); } } - result_commitment = Some(commitment); - consistent_block = Some(delta); } - self.success = true; - self.agreed_block = consistent_block; } - Ok(()) + leaves } } @@ -298,6 +335,11 @@ pub struct OverallSafetyPropertiesDescription { pub check_state: bool, /// whether or not to check the block pub check_block: bool, + /// whether or not to check that we have threshold amounts of transactions each block + /// if 0: don't check + /// if n > 0, check that at least n transactions are decided upon if such information + /// is available + pub transaction_threshold: u64, /// num of total rounds allowed to fail pub num_failed_views: usize, /// threshold calculator. Given number of live and total nodes, provide number of successes @@ -325,6 +367,7 @@ impl Default for OverallSafetyPropertiesDescription { check_state: true, check_block: true, num_failed_views: 10, + transaction_threshold: 0, // very strict threshold_calculator: Arc::new(|_num_live, num_total| 2 * num_total / 3 + 1), } @@ -340,25 +383,10 @@ impl OverallSafetyPropertiesDescription { check_leaf, check_state, check_block, - // TODO - // We can't exactly check that the transactions all match those submitted - // because of a type error. We ONLY have `MaybeState` and we need `State`. - // unless we specialize, this won't happen. - // so waiting on refactor for this - // code is below: - // - // ``` - // let next_state /* : Option<_> */ = { - // if let Some(last_leaf) = ctx.prior_round_results.iter().last() { - // if let Some(parent_state) = last_leaf.agreed_state { - // let mut block = ::StateType::next_block(Some(parent_state.clone())); - // } - // } - // }; - // ``` num_failed_views: num_failed_rounds_total, num_successful_views, threshold_calculator, + transaction_threshold, }: Self = self; Box::new(move |mut state, mut registry, test_event_stream| { @@ -411,22 +439,32 @@ impl OverallSafetyPropertiesDescription { let threshold_calculator = threshold_calculator.clone(); async move { let (idx, Event { view_number, event }) = msg; - match event { + let key = match event { EventType::Error { error } => { state.ctx.insert_error_to_context(view_number, error); + None } - EventType::Decide { leaf_chain, qc, .. } => { + EventType::Decide { + leaf_chain, + qc, + block_size: maybe_block_size, + } => { let paired_up = (leaf_chain.to_vec(), (*qc).clone()); match state.ctx.round_results.entry(view_number) { - Entry::Occupied(mut o) => { - o.get_mut().success_nodes.insert(idx as u64, paired_up); - } + Entry::Occupied(mut o) => o.get_mut().insert_into_result( + idx, + paired_up, + maybe_block_size, + ), Entry::Vacant(v) => { let mut round_result = RoundResult::default(); - round_result - .success_nodes - .insert(idx as u64, paired_up); + let key = round_result.insert_into_result( + idx, + paired_up, + maybe_block_size, + ); v.insert(round_result); + key } } } @@ -437,66 +475,69 @@ impl OverallSafetyPropertiesDescription { state: RoundTimedoutState::TestCollectRoundEventsTimedOut, }); state.ctx.insert_error_to_context(view_number, error); + None } _ => return (None, state), - } + }; // update view count let threshold = (threshold_calculator)(state.handles.len(), state.handles.len()); let view = state.ctx.round_results.get_mut(&view_number).unwrap(); - view.success = true; - if view.failed_nodes.len() > state.handles.len() - threshold { - // mark as failure - view.success = false; - state.ctx.failed_views.insert(view_number); - if state.ctx.failed_views.len() >= num_failed_rounds_total { - state - .test_event_stream - .publish(GlobalTestEvent::ShutDown) - .await; - return ( - Some(HotShotTaskCompleted::Error(Box::new( - OverallSafetyTaskErr::TooManyFailures { - got: state.ctx.failed_views.len(), - expected: num_failed_rounds_total, - }, - ))), - state, - ); - } - } - if view.success_nodes.len() >= threshold { - // TODO check for safety violation on leaves - if check_leaf { - let leaf_result = view.check_leaves(threshold); - if let Err(e) = leaf_result { + if let Some(key) = key { + view.update_status( + threshold, + state.handles.len(), + key, + check_leaf, + check_state, + check_block, + transaction_threshold, + ); + match view.status.clone() { + ViewStatus::Ok => { + state.ctx.successful_views.insert(view_number); + if state.ctx.successful_views.len() + >= self.num_successful_views + { + state + .test_event_stream + .publish(GlobalTestEvent::ShutDown) + .await; + return (Some(HotShotTaskCompleted::ShutDown), state); + } + return (None, state); + } + ViewStatus::Failed => { + state.ctx.failed_views.insert(view_number); + if state.ctx.failed_views.len() >= self.num_failed_views { + state + .test_event_stream + .publish(GlobalTestEvent::ShutDown) + .await; + return ( + Some(HotShotTaskCompleted::Error(Box::new( + OverallSafetyTaskErr::TooManyFailures { + got: state.ctx.failed_views.len(), + expected: num_failed_rounds_total, + }, + ))), + state, + ); + } + return (None, state); + } + ViewStatus::Err(e) => { return ( Some(HotShotTaskCompleted::Error(Box::new(e))), state, ); } - } - let block_state_result = view.check_blocks_and_state( - threshold, - check_block, - check_state, - ); - if let Err(e) = block_state_result { - return (Some(HotShotTaskCompleted::Error(Box::new(e))), state); - } - - // mark as success - view.success = true; - state.ctx.successful_views.insert(view_number); - if state.ctx.successful_views.len() >= num_successful_views { - state - .test_event_stream - .publish(GlobalTestEvent::ShutDown) - .await; - return (Some(HotShotTaskCompleted::ShutDown), state); + ViewStatus::InProgress => { + return (None, state); + } } } diff --git a/testing/src/test_builder.rs b/testing/src/test_builder.rs index bca99e4c42..359cd505b6 100644 --- a/testing/src/test_builder.rs +++ b/testing/src/test_builder.rs @@ -85,6 +85,7 @@ impl TestMetadata { check_state: true, check_block: true, num_failed_views: 15, + transaction_threshold: 0, threshold_calculator: Arc::new(|_active, total| (2 * total / 3 + 1)), }, timing_data: TimingData { @@ -108,6 +109,7 @@ impl TestMetadata { check_state: true, check_block: true, num_failed_views: 8, + transaction_threshold: 0, threshold_calculator: Arc::new(|_active, total| (2 * total / 3 + 1)), }, timing_data: TimingData { diff --git a/testing/tests/libp2p.rs b/testing/tests/libp2p.rs index e3afe4b8e4..45d0ec66c0 100644 --- a/testing/tests/libp2p.rs +++ b/testing/tests/libp2p.rs @@ -1,6 +1,10 @@ +use std::time::Duration; + use hotshot_testing::{ + completion_task::{CompletionTaskDescription, TimeBasedCompletionTaskDescription}, node_types::{SequencingLibp2pImpl, SequencingTestTypes}, - test_builder::TestMetadata, + overall_safety_task::OverallSafetyPropertiesDescription, + test_builder::{TestMetadata, TimingData}, }; use tracing::instrument; @@ -14,7 +18,22 @@ use tracing::instrument; async fn libp2p_network() { async_compatibility_layer::logging::setup_logging(); async_compatibility_layer::logging::setup_backtrace(); - let metadata = TestMetadata::default_multiple_rounds(); + let metadata = TestMetadata { + timing_data: TimingData { + start_delay: 12000000, + ..TimingData::default() + }, + overall_safety_properties: OverallSafetyPropertiesDescription { + check_leaf: true, + ..Default::default() + }, + completion_task_description: CompletionTaskDescription::TimeBasedCompletionTaskBuilder( + TimeBasedCompletionTaskDescription { + duration: Duration::new(240, 0), + }, + ), + ..TestMetadata::default_multiple_rounds() + }; metadata .gen_launcher::() From 8ea9c65416c2030b82cb82c36d2da129e3f85942 Mon Sep 17 00:00:00 2001 From: Justin Restivo Date: Fri, 18 Aug 2023 18:27:33 -0400 Subject: [PATCH 2/2] fix: increase fallback network timeout --- testing/tests/fallback_network.rs | 18 +++++++++++++++++- testing/tests/libp2p.rs | 6 +----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/testing/tests/fallback_network.rs b/testing/tests/fallback_network.rs index 105f71cf9b..b89422eb1e 100644 --- a/testing/tests/fallback_network.rs +++ b/testing/tests/fallback_network.rs @@ -1,5 +1,9 @@ +use std::time::Duration; + use hotshot_testing::{ + completion_task::{CompletionTaskDescription, TimeBasedCompletionTaskDescription}, node_types::{SequencingLibp2pImpl, SequencingTestTypes}, + overall_safety_task::OverallSafetyPropertiesDescription, test_builder::TestMetadata, }; use tracing::instrument; @@ -14,7 +18,19 @@ use tracing::instrument; async fn webserver_libp2p_network() { async_compatibility_layer::logging::setup_logging(); async_compatibility_layer::logging::setup_backtrace(); - let metadata = TestMetadata::default_multiple_rounds(); + let metadata = TestMetadata { + overall_safety_properties: OverallSafetyPropertiesDescription { + check_leaf: true, + ..Default::default() + }, + completion_task_description: CompletionTaskDescription::TimeBasedCompletionTaskBuilder( + TimeBasedCompletionTaskDescription { + duration: Duration::new(240, 0), + }, + ), + ..TestMetadata::default_multiple_rounds() + }; + metadata .gen_launcher::() .launch() diff --git a/testing/tests/libp2p.rs b/testing/tests/libp2p.rs index 45d0ec66c0..f9f0e177ba 100644 --- a/testing/tests/libp2p.rs +++ b/testing/tests/libp2p.rs @@ -4,7 +4,7 @@ use hotshot_testing::{ completion_task::{CompletionTaskDescription, TimeBasedCompletionTaskDescription}, node_types::{SequencingLibp2pImpl, SequencingTestTypes}, overall_safety_task::OverallSafetyPropertiesDescription, - test_builder::{TestMetadata, TimingData}, + test_builder::TestMetadata, }; use tracing::instrument; @@ -19,10 +19,6 @@ async fn libp2p_network() { async_compatibility_layer::logging::setup_logging(); async_compatibility_layer::logging::setup_backtrace(); let metadata = TestMetadata { - timing_data: TimingData { - start_delay: 12000000, - ..TimingData::default() - }, overall_safety_properties: OverallSafetyPropertiesDescription { check_leaf: true, ..Default::default()