Print logs when the cluster state changes to fail or the fail reason …

…changes This log allows us to easily distinguish between full coverage and minority partition when the cluster fails. Sometimes it is not easy to see the minority partition in a healthy shards (both primary and replicas). Signed-off-by: Binbin <[email protected]>
enjoy-binbin · Oct 18, 2024 · c894fc0 · c894fc0
1 parent 701ab72
commit c894fc0
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 1 deletion.
diff --git a/src/cluster.h b/src/cluster.h
@@ -12,6 +12,11 @@
 #define CLUSTER_FAIL 1                                              /* The cluster can't work */
 #define CLUSTER_NAMELEN 40                                          /* sha1 hex length */
 
+/* Reason why the cluster state changes to fail. */
+#define CLUSTER_FAIL_NONE 0
+#define CLUSTER_FAIL_NOT_FULL_COVERAGE 1
+#define CLUSTER_FAIL_MINORITY_PARTITION 2
+
 /* Redirection errors returned by getNodeByQuery(). */
 #define CLUSTER_REDIR_NONE 0          /* Node can serve the request. */
 #define CLUSTER_REDIR_CROSS_SLOT 1    /* -CROSSSLOT request. */

diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
@@ -1082,6 +1082,7 @@ void clusterInit(void) {
     server.cluster->myself = NULL;
     server.cluster->currentEpoch = 0;
     server.cluster->state = CLUSTER_FAIL;
+    server.cluster->fail_reason = CLUSTER_FAIL_NONE;
     server.cluster->size = 0;
     server.cluster->todo_before_sleep = 0;
     server.cluster->nodes = dictCreate(&clusterNodesDictType);
@@ -5282,6 +5283,21 @@ void clusterCloseAllSlots(void) {
  * Cluster state evaluation function
  * -------------------------------------------------------------------------- */
 
+void clusterLogWhyFail(int reason) {
+    char *msg;
+    switch (reason) {
+    case CLUSTER_FAIL_NOT_FULL_COVERAGE:
+        msg = "Detect there is at least a hash slot uncovered (no available node is serving it). "
+              "Please check the 'cluster-require-full-coverage' configuration option.";
+        break;
+    case CLUSTER_FAIL_MINORITY_PARTITION:
+        msg = "In a minority partition."; break;
+    default: msg = "Unknown reason code."; break;
+    }
+    serverLog(LL_NOTICE, "Currently cluster unable to work: %s", msg);
+    server.cluster->fail_reason = reason;
+}
+
 /* The following are defines that are only used in the evaluation function
  * and are based on heuristics. Actually the main point about the rejoin and
  * writable delay is that they should be a few orders of magnitude larger
@@ -5291,7 +5307,7 @@ void clusterCloseAllSlots(void) {
 #define CLUSTER_WRITABLE_DELAY 2000
 
 void clusterUpdateState(void) {
-    int j, new_state;
+    int j, new_state, new_reason;
     int reachable_primaries = 0;
     static mstime_t among_minority_time;
     static mstime_t first_call_time = 0;
@@ -5312,12 +5328,14 @@ void clusterUpdateState(void) {
     /* Start assuming the state is OK. We'll turn it into FAIL if there
      * are the right conditions. */
     new_state = CLUSTER_OK;
+    new_reason = CLUSTER_FAIL_NONE;
 
     /* Check if all the slots are covered. */
     if (server.cluster_require_full_coverage) {
         for (j = 0; j < CLUSTER_SLOTS; j++) {
             if (server.cluster->slots[j] == NULL || server.cluster->slots[j]->flags & (CLUSTER_NODE_FAIL)) {
                 new_state = CLUSTER_FAIL;
+                new_reason = CLUSTER_FAIL_NOT_FULL_COVERAGE;
                 break;
             }
         }
@@ -5352,6 +5370,7 @@ void clusterUpdateState(void) {
 
         if (reachable_primaries < needed_quorum) {
             new_state = CLUSTER_FAIL;
+            new_reason = CLUSTER_FAIL_MINORITY_PARTITION;
             among_minority_time = mstime();
         }
     }
@@ -5375,7 +5394,13 @@ void clusterUpdateState(void) {
         serverLog(new_state == CLUSTER_OK ? LL_NOTICE : LL_WARNING, "Cluster state changed: %s",
                   new_state == CLUSTER_OK ? "ok" : "fail");
         server.cluster->state = new_state;
+
+        /* Cluster state changes from ok to fail, print a log. */
+        if (new_state == CLUSTER_FAIL) clusterLogWhyFail(new_reason);
     }
+
+    /* Cluster state is still fail, but the reason has changed, print a log. */
+    if (new_state == CLUSTER_FAIL && new_reason != server.cluster->fail_reason) clusterLogWhyFail(new_reason);
 }
 
 /* This function is called after the node startup in order to verify that data

diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h
@@ -368,6 +368,7 @@ struct clusterState {
     clusterNode *myself; /* This node */
     uint64_t currentEpoch;
     int state;              /* CLUSTER_OK, CLUSTER_FAIL, ... */
+    int fail_reason;        /* Why the cluster state changes to fail. */
     int size;               /* Num of primary nodes with at least one slot */
     dict *nodes;            /* Hash table of name -> clusterNode structures */
     dict *shards;           /* Hash table of shard_id -> list (of nodes) structures */