Skip to content

Commit

Permalink
Only primary with slots has the right to mark a node as failed
Browse files Browse the repository at this point in the history
In markNodeAsFailingIfNeeded we will count needed_quorum and failures,
needed_quorum is the half the cluster->size and plus one, and cluster-size
is the size of primary node which contain slots, but when counting
failures, we dit not check if primary has slots.

Signed-off-by: Binbin <[email protected]>
  • Loading branch information
enjoy-binbin committed Jun 12, 2024
1 parent e65b2d2 commit 4e25c45
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/cluster_legacy.c
Original file line number Diff line number Diff line change
Expand Up @@ -1868,7 +1868,7 @@ void markNodeAsFailingIfNeeded(clusterNode *node) {

failures = clusterNodeFailureReportsCount(node);
/* Also count myself as a voter if I'm a primary. */
if (clusterNodeIsPrimary(myself)) failures++;
if (clusterNodeIsPrimary(myself) && myself->numslots) failures++;
if (failures < needed_quorum) return; /* No weak agreement from primaries. */

serverLog(LL_NOTICE, "Marking node %.40s (%s) as failing (quorum reached).", node->name, node->human_nodename);
Expand Down Expand Up @@ -2091,7 +2091,7 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
if (node && node != myself) {
/* We already know this node.
Handle failure reports, only when the sender is a primary. */
if (sender && clusterNodeIsPrimary(sender)) {
if (sender && clusterNodeIsPrimary(sender) && sender->numslots) {
if (flags & (CLUSTER_NODE_FAIL | CLUSTER_NODE_PFAIL)) {
if (clusterNodeAddFailureReport(node, sender)) {
serverLog(LL_VERBOSE, "Node %.40s (%s) reported node %.40s (%s) as not reachable.",
Expand Down Expand Up @@ -4779,7 +4779,7 @@ void clusterCron(void) {
if (!(node->flags & (CLUSTER_NODE_PFAIL | CLUSTER_NODE_FAIL))) {
node->flags |= CLUSTER_NODE_PFAIL;
update_state = 1;
if (clusterNodeIsPrimary(myself) && server.cluster->size == 1) {
if (server.cluster->size == 1 && clusterNodeIsPrimary(myself) && myself->numslots) {
markNodeAsFailingIfNeeded(node);
} else {
serverLog(LL_DEBUG, "*** NODE %.40s possibly failing", node->name);
Expand Down
68 changes: 68 additions & 0 deletions tests/unit/cluster/failure-marking.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ start_cluster 1 1 {tags {external:skip cluster}} {
pause_process $replica1_pid

wait_node_marked_fail 0 $replica1_instance_id

resume_process $replica1_pid
}
}

Expand Down Expand Up @@ -49,5 +51,71 @@ start_cluster 2 1 {tags {external:skip cluster}} {
resume_process $primary2_pid

wait_node_marked_fail 0 $replica1_instance_id

resume_process $replica1_pid
}
}

set old_singledb $::singledb
set ::singledb 1

tags {external:skip tls:skip cluster} {
set base_conf [list cluster-enabled yes cluster-ping-interval 100 cluster-node-timeout 3000 save ""]
start_multiple_servers 5 [list overrides $base_conf] {
test "Only primary with slots has the right to mark a node as failed" {
set primary_host [srv 0 host]
set primary_port [srv 0 port]
set primary_pid [srv 0 pid]
set primary_id [R 0 CLUSTER MYID]
set replica_id [R 1 CLUSTER MYID]
set replica_pid [srv -1 pid]

# Meet others nodes.
R 1 CLUSTER MEET $primary_host $primary_port
R 2 CLUSTER MEET $primary_host $primary_port
R 3 CLUSTER MEET $primary_host $primary_port
R 4 CLUSTER MEET $primary_host $primary_port

# Build a single primary cluster.
cluster_allocate_slots 1 1
wait_for_cluster_propagation
R 1 CLUSTER REPLICATE $primary_id
wait_for_cluster_propagation
wait_for_cluster_state "ok"

# Pause the primary, marking the primary as pfail.
pause_process $primary_pid
wait_node_marked_pfail 1 $primary_id
wait_node_marked_pfail 2 $primary_id
wait_node_marked_pfail 3 $primary_id
wait_node_marked_pfail 4 $primary_id

# Pause the replica, marking the replica as pfail.
pause_process $replica_pid
wait_node_marked_pfail 2 $replica_id
wait_node_marked_pfail 3 $replica_id
wait_node_marked_pfail 4 $replica_id

# Resume the primary, marking the replica as fail.
resume_process $primary_pid
wait_node_marked_fail 0 $replica_id
wait_node_marked_fail 2 $replica_id
wait_node_marked_fail 3 $replica_id
wait_node_marked_fail 4 $replica_id

# Check if we got the right failure reports.
wait_for_condition 1000 50 {
[R 0 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 0 &&
[R 2 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 1 &&
[R 3 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 1 &&
[R 4 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 1
} else {
fail "Cluster COUNT-FAILURE-REPORTS is not right."
}

resume_process $replica_pid
}
}
}

set ::singledb $old_singledb

0 comments on commit 4e25c45

Please sign in to comment.