Only primary with slots has the right to mark a node as failed

In markNodeAsFailingIfNeeded we will count needed_quorum and failures, needed_quorum is the half the cluster->size and plus one, and cluster-size is the size of primary node which contain slots, but when counting failures, we dit not check if primary has slots. Signed-off-by: Binbin <[email protected]>
enjoy-binbin · Jun 12, 2024 · 4e25c45 · 4e25c45
1 parent e65b2d2
commit 4e25c45
Show file tree

Hide file tree

Showing 2 changed files with 71 additions and 3 deletions.
diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
@@ -1868,7 +1868,7 @@ void markNodeAsFailingIfNeeded(clusterNode *node) {
 
     failures = clusterNodeFailureReportsCount(node);
     /* Also count myself as a voter if I'm a primary. */
-    if (clusterNodeIsPrimary(myself)) failures++;
+    if (clusterNodeIsPrimary(myself) && myself->numslots) failures++;
     if (failures < needed_quorum) return; /* No weak agreement from primaries. */
 
     serverLog(LL_NOTICE, "Marking node %.40s (%s) as failing (quorum reached).", node->name, node->human_nodename);
@@ -2091,7 +2091,7 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
         if (node && node != myself) {
             /* We already know this node.
                Handle failure reports, only when the sender is a primary. */
-            if (sender && clusterNodeIsPrimary(sender)) {
+            if (sender && clusterNodeIsPrimary(sender) && sender->numslots) {
                 if (flags & (CLUSTER_NODE_FAIL | CLUSTER_NODE_PFAIL)) {
                     if (clusterNodeAddFailureReport(node, sender)) {
                         serverLog(LL_VERBOSE, "Node %.40s (%s) reported node %.40s (%s) as not reachable.",
@@ -4779,7 +4779,7 @@ void clusterCron(void) {
             if (!(node->flags & (CLUSTER_NODE_PFAIL | CLUSTER_NODE_FAIL))) {
                 node->flags |= CLUSTER_NODE_PFAIL;
                 update_state = 1;
-                if (clusterNodeIsPrimary(myself) && server.cluster->size == 1) {
+                if (server.cluster->size == 1 && clusterNodeIsPrimary(myself) && myself->numslots) {
                     markNodeAsFailingIfNeeded(node);
                 } else {
                     serverLog(LL_DEBUG, "*** NODE %.40s possibly failing", node->name);

diff --git a/tests/unit/cluster/failure-marking.tcl b/tests/unit/cluster/failure-marking.tcl
@@ -16,6 +16,8 @@ start_cluster 1 1 {tags {external:skip cluster}} {
         pause_process $replica1_pid
 
         wait_node_marked_fail 0 $replica1_instance_id
+
+        resume_process $replica1_pid
     }
 }
 
@@ -49,5 +51,71 @@ start_cluster 2 1 {tags {external:skip cluster}} {
         resume_process $primary2_pid
 
         wait_node_marked_fail 0 $replica1_instance_id
+
+        resume_process $replica1_pid
+    }
+}
+
+set old_singledb $::singledb
+set ::singledb 1
+
+tags {external:skip tls:skip cluster} {
+    set base_conf [list cluster-enabled yes cluster-ping-interval 100 cluster-node-timeout 3000 save ""]
+    start_multiple_servers 5 [list overrides $base_conf] {
+        test "Only primary with slots has the right to mark a node as failed" {
+            set primary_host [srv 0 host]
+            set primary_port [srv 0 port]
+            set primary_pid [srv 0 pid]
+            set primary_id [R 0 CLUSTER MYID]
+            set replica_id [R 1 CLUSTER MYID]
+            set replica_pid [srv -1 pid]
+
+            # Meet others nodes.
+            R 1 CLUSTER MEET $primary_host $primary_port
+            R 2 CLUSTER MEET $primary_host $primary_port
+            R 3 CLUSTER MEET $primary_host $primary_port
+            R 4 CLUSTER MEET $primary_host $primary_port
+
+            # Build a single primary cluster.
+            cluster_allocate_slots 1 1
+            wait_for_cluster_propagation
+            R 1 CLUSTER REPLICATE $primary_id
+            wait_for_cluster_propagation
+            wait_for_cluster_state "ok"
+
+            # Pause the primary, marking the primary as pfail.
+            pause_process $primary_pid
+            wait_node_marked_pfail 1 $primary_id
+            wait_node_marked_pfail 2 $primary_id
+            wait_node_marked_pfail 3 $primary_id
+            wait_node_marked_pfail 4 $primary_id
+
+            # Pause the replica, marking the replica as pfail.
+            pause_process $replica_pid
+            wait_node_marked_pfail 2 $replica_id
+            wait_node_marked_pfail 3 $replica_id
+            wait_node_marked_pfail 4 $replica_id
+
+            # Resume the primary, marking the replica as fail.
+            resume_process $primary_pid
+            wait_node_marked_fail 0 $replica_id
+            wait_node_marked_fail 2 $replica_id
+            wait_node_marked_fail 3 $replica_id
+            wait_node_marked_fail 4 $replica_id
+
+            # Check if we got the right failure reports.
+            wait_for_condition 1000 50 {
+                [R 0 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 0 &&
+                [R 2 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 1 &&
+                [R 3 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 1 &&
+                [R 4 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 1
+            } else {
+                fail "Cluster COUNT-FAILURE-REPORTS is not right."
+            }
+
+            resume_process $replica_pid
+        }
     }
 }
+
+set ::singledb $old_singledb