Skip to content

Commit

Permalink
refreshes ContactInfo.outset before initializing validator (#3135)
Browse files Browse the repository at this point in the history
Nodes join gossip during bootstrap process with a stub contact-info
which in particular has invalid TVU socket address.

Once the bootstrap is done they re-join gossip a 2nd time with a fully
populated contact-info, but this contact-info has an outset timestamp
older than the 1st one because it was initiated earlier.

In v2.0 the outset timestamp determines which contact-info overrides the
other, so the v2.0 nodes refrain from updating their CRDS table with the
fully initialized contact-info.

The commit refreshes ContactInfo.outset before initializing the
validator so that it overrides the one pushed to the gossip by the
bootstrap stage.
  • Loading branch information
behzadnouri authored and willhickey committed Oct 12, 2024
1 parent cafb5ef commit d9f20e9
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 9 deletions.
2 changes: 1 addition & 1 deletion gossip/src/cluster_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ impl ClusterInfo {
*instance = NodeInstance::new(&mut thread_rng(), id, timestamp());
}
*self.keypair.write().unwrap() = new_keypair;
self.my_contact_info.write().unwrap().set_pubkey(id);
self.my_contact_info.write().unwrap().hot_swap_pubkey(id);

self.insert_self();
self.push_message(CrdsValue::new_signed(
Expand Down
19 changes: 12 additions & 7 deletions gossip/src/contact_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,7 @@ impl ContactInfo {
Self {
pubkey,
wallclock,
outset: {
let now = SystemTime::now();
let elapsed = now.duration_since(UNIX_EPOCH).unwrap();
u64::try_from(elapsed.as_micros()).unwrap()
},
outset: get_node_outset(),
shred_version,
version: solana_version::Version::default(),
addrs: Vec::<IpAddr>::default(),
Expand All @@ -210,8 +206,11 @@ impl ContactInfo {
self.shred_version
}

pub fn set_pubkey(&mut self, pubkey: Pubkey) {
self.pubkey = pubkey
pub fn hot_swap_pubkey(&mut self, pubkey: Pubkey) {
self.pubkey = pubkey;
// Need to update ContactInfo.outset so that this node's contact-info
// will override older node with the same pubkey.
self.outset = get_node_outset();
}

pub fn set_wallclock(&mut self, wallclock: u64) {
Expand Down Expand Up @@ -409,6 +408,12 @@ impl ContactInfo {
}
}

fn get_node_outset() -> u64 {
let now = SystemTime::now();
let elapsed = now.duration_since(UNIX_EPOCH).unwrap();
u64::try_from(elapsed.as_micros()).unwrap()
}

impl<'de> Deserialize<'de> for ContactInfo {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
Expand Down
2 changes: 1 addition & 1 deletion turbine/src/cluster_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ pub fn make_test_cluster<R: Rng>(
.collect();
nodes.shuffle(rng);
let keypair = Arc::new(Keypair::new());
nodes[0].set_pubkey(keypair.pubkey());
nodes[0] = ContactInfo::new_localhost(&keypair.pubkey(), /*wallclock:*/ timestamp());
let this_node = nodes[0].clone();
let mut stakes: HashMap<Pubkey, u64> = nodes
.iter()
Expand Down
7 changes: 7 additions & 0 deletions validator/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1901,6 +1901,13 @@ pub fn main() {
return;
}

// Bootstrap code above pushes a contact-info with more recent timestamp to
// gossip. If the node is staked the contact-info lingers in gossip causing
// false duplicate nodes error.
// Below line refreshes the timestamp on contact-info so that it overrides
// the one pushed by bootstrap.
node.info.hot_swap_pubkey(identity_keypair.pubkey());

let validator = Validator::new(
node,
identity_keypair,
Expand Down

0 comments on commit d9f20e9

Please sign in to comment.