canonical · phvalguima · Jul 8, 2024 · Jul 8, 2024 · Jul 8, 2024 · Jul 8, 2024
diff --git a/lib/charms/opensearch/v0/helper_cluster.py b/lib/charms/opensearch/v0/helper_cluster.py
@@ -139,6 +139,54 @@ def nodes_by_role(nodes: List[Node]) -> Dict[str, List[Node]]:
 
         return result
 
+    @staticmethod
+    def elected_manager(
+        opensearch: OpenSearchDistribution,
+        use_localhost: bool,
+        hosts: Optional[List[str]] = None,
+    ) -> Node:
+        """Get the list of nodes in a cluster."""
+        host: Optional[str] = None  # defaults to current unit ip
+        alt_hosts: Optional[List[str]] = hosts
+        if not use_localhost and hosts:
+            host = hosts[0]
+            if len(hosts) >= 2:
+                alt_hosts = hosts[1:]
+
+        if use_localhost or host:
+            manager_id = opensearch.request(
+                "GET",
+                "/_cluster/state/cluster_manager_node",
+                host=host,
+                alt_hosts=alt_hosts,
+                retries=3,
+            )
+            if "cluster_manager_node" not in manager_id:
+                return None
+
+            response = opensearch.request(
+                "GET",
+                f"/_nodes/{manager_id['cluster_manager_node']}",
+                host=host,
+                alt_hosts=alt_hosts,
+                retries=3,
+            )
+            if "nodes" in response:
+                for id, obj in response["nodes"].items():
+                    if id != manager_id["cluster_manager_node"]:
+                        return None
+                    node = Node(
+                        name=obj["name"],
+                        roles=obj["roles"],
+                        ip=obj["ip"],
+                        app=App(id=obj["attributes"]["app_id"]),
+                        unit_number=int(obj["name"].split(".")[0].split("-")[-1]),
+                        temperature=obj.get("attributes", {}).get("temp"),
+                    )
+                    return node
+
+            return None
+
     @staticmethod
     def nodes(
         opensearch: OpenSearchDistribution,
@@ -149,7 +197,9 @@ def nodes(
         host: Optional[str] = None  # defaults to current unit ip
         alt_hosts: Optional[List[str]] = hosts
         if not use_localhost and hosts:
-            host, alt_hosts = hosts[0], hosts[1:]
+            host = hosts[0]
+            if len(hosts) >= 2:
+                alt_hosts = hosts[1:]
 
         nodes: List[Node] = []
         if use_localhost or host:

diff --git a/lib/charms/opensearch/v0/opensearch_base_charm.py b/lib/charms/opensearch/v0/opensearch_base_charm.py
@@ -64,7 +64,11 @@
 from charms.opensearch.v0.opensearch_health import HealthColors, OpenSearchHealth
 from charms.opensearch.v0.opensearch_internal_data import RelationDataStore, Scope
 from charms.opensearch.v0.opensearch_locking import OpenSearchNodeLock
-from charms.opensearch.v0.opensearch_nodes_exclusions import OpenSearchExclusions
+from charms.opensearch.v0.opensearch_nodes_exclusions import (
+    OpenSearchExclusionError,
+    OpenSearchExclusionNodeNotRegisteredError,
+    OpenSearchExclusions,
+)
 from charms.opensearch.v0.opensearch_peer_clusters import (
     OpenSearchPeerClustersManager,
     OpenSearchProvidedRolesException,
@@ -436,7 +440,7 @@ def _on_peer_relation_changed(self, event: RelationChangedEvent):
         if not (unit_data := event.relation.data.get(event.unit)):
             return
 
-        self.opensearch_exclusions.cleanup()
+        self.opensearch_exclusions.allocation_cleanup()
 
         if self.unit.is_leader() and unit_data.get("bootstrap_contributor"):
             contributor_count = self.peers_data.get(Scope.APP, "bootstrap_contributors_count", 0)
@@ -517,7 +521,7 @@ def _on_opensearch_data_storage_detaching(self, _: StorageDetachingEvent):  # no
                 # release lock
                 self.node_lock.release()
 
-    def _on_update_status(self, event: UpdateStatusEvent):
+    def _on_update_status(self, event: UpdateStatusEvent):  # noqa: C901
         """On update status event.
 
         We want to periodically check for the following:
@@ -533,23 +537,39 @@ def _on_update_status(self, event: UpdateStatusEvent):
             self.status.set(BlockedStatus(" - ".join(missing_sys_reqs)))
             return
 
-        # if node already shutdown - leave
-        if not self.opensearch.is_node_up():
-            return
-
         # if there are exclusions to be removed
         if self.unit.is_leader():
-            self.opensearch_exclusions.cleanup()
-
             if (health := self.health.apply(wait_for_green_first=True)) not in [
                 HealthColors.GREEN,
                 HealthColors.IGNORE,
-            ]:
+            ] or not self.opensearch.is_node_up():
+                # Do not return right now!
+                # We must first check if we need to remove exclusions
                 event.defer()
 
+            # Unless it is unknown, in this case we can return and wait for the next run
             if health == HealthColors.UNKNOWN:
                 return
 
+            # Execute allocations now, as we know the cluster is minimally healthy.
+            self.opensearch_exclusions.allocation_cleanup()
+            # Now, review voting exclusions, as we may have lost a unit due to an outage
+            if not self.is_any_voting_unit_stopping():
+                try:
+                    self.opensearch_exclusions.settle_voting(
+                        unit_is_stopping=False,
+                        retry=False,
+                    )
+                except (OpenSearchExclusionError, OpenSearchHttpError):
+                    # Register the issue but do not act on it: let the next update status event
+                    # retry the operation.
+                    # We cannot assume or try to enforce the cluster to be in a healthy state
+                    logger.warning("Failed to settle voting exclusions")
+
+        # if node already shutdown - leave
+        if not self.opensearch.is_node_up():
+            return
+
         for relation in self.model.relations.get(ClientRelationName, []):
             self.opensearch_provider.update_endpoints(relation)
 
@@ -584,6 +604,16 @@ def _on_config_changed(self, event: ConfigChangedEvent):  # noqa C901
                 RelationJoinedEvent(event.handle, PeerRelationName, self.app, self.unit)
             )
 
+            # Review voting exclusions as our IP has changed: we may be coming back from a network
+            # outage case.
+            # In this case, we should retry if the node is not found in the cluster
+            if not self.is_any_voting_unit_stopping():
+                try:
+                    self.opensearch_exclusions.settle_voting(unit_is_stopping=False)
+                except (OpenSearchExclusionNodeNotRegisteredError, OpenSearchHttpError):
+                    event.defer()
+                    return
+
         previous_deployment_desc = self.opensearch_peer_cm.deployment_desc()
         if self.unit.is_leader():
             # run peer cluster manager processing
@@ -741,6 +771,14 @@ def on_tls_relation_broken(self, _: RelationBrokenEvent):
         # Otherwise, we block.
         self.status.set(BlockedStatus(TLSRelationBrokenError))
 
+    def is_any_voting_unit_stopping(self) -> bool:
+        """Check if any voting unit is stopping."""
+        rel = self.model.get_relation(PeerRelationName)
+        for unit in all_units(self):
+            if rel.data[unit].get("voting_unit_stopping") == "True":
+                return True
+        return False
+
     def is_every_unit_marked_as_started(self) -> bool:
         """Check if every unit in the cluster is marked as started."""
         rel = self.model.get_relation(PeerRelationName)
@@ -914,7 +952,9 @@ def _post_start_init(self, event: _StartOpenSearch):  # noqa: C901
             self._cleanup_bootstrap_conf_if_applies()
 
         # Remove the exclusions that could not be removed when no units were online
-        self.opensearch_exclusions.delete_current()
+        if not self.is_any_voting_unit_stopping():
+            self.opensearch_exclusions.settle_voting(unit_is_stopping=False)
+        self.opensearch_exclusions.delete_allocations()
 
         self.node_lock.release()
 
@@ -1006,34 +1046,47 @@ def _post_start_init(self, event: _StartOpenSearch):  # noqa: C901
         if self.opensearch_peer_cm.is_provider():
             self.peer_cluster_provider.refresh_relation_data(event, can_defer=False)
 
-    def _stop_opensearch(self, *, restart=False) -> None:
+    def _stop_opensearch(self, *, restart=False) -> None:  # noqa: C901
         """Stop OpenSearch if possible."""
         self.status.set(WaitingStatus(ServiceIsStopping))
 
+        if "cluster_manager" in self.opensearch.roles or "voting_only" in self.opensearch.roles:
+            # Inform peers that this unit is stopping and it has a voting seat.
+            # This unit must be the only one managing the voting exclusions, as it may have to
+            # exclude itself from the voting while stopping.
+            self.peers_data.put(Scope.UNIT, "voting_unit_stopping", True)
+
+        nodes = []
         if self.opensearch.is_node_up():
             try:
-                nodes = self._get_nodes(True)
+                nodes = self._get_nodes(self.opensearch.is_node_up())
                 # do not add exclusions if it's the last unit to stop
                 # otherwise cluster manager election will be blocked when starting up again
                 # and re-using storage
-                if len(nodes) > 1:
-                    # 1. Add current node to the voting + alloc exclusions
-                    self.opensearch_exclusions.add_current(restart=restart)
+                if len(nodes) > 1 and not restart:
+                    # 1. Add current node to the voting + alloc exclusions if not restarting
+                    self.opensearch_exclusions.add_allocations()
             except OpenSearchHttpError:
-                logger.debug("Failed to get online nodes, voting and alloc exclusions not added")
+                logger.debug("Failed to get online nodes, alloc exclusion not added")
 
         # block until all primary shards are moved away from the unit that is stopping
         self.health.wait_for_shards_relocation()
 
         # 2. stop the service
+        # We should only run voting settle right before stop. We need to manage voting before
+        # stopping e.g. in case we are going 1->0 units.
+        # We MUST run settle_voting, even if other units are stopping as well.
+        self.opensearch_exclusions.settle_voting(unit_is_stopping=True)
         self.opensearch.stop()
+
+        self.peers_data.delete(Scope.UNIT, "voting_unit_stopping")
         self.peers_data.delete(Scope.UNIT, "started")
         self.status.set(WaitingStatus(ServiceStopped))
 
         # 3. Remove the exclusions
         if not restart:
             try:
-                self.opensearch_exclusions.delete_current()
+                self.opensearch_exclusions.delete_allocations()
             except Exception:
                 # It is purposefully broad - as this can fail for HTTP reasons,
                 # or if the config wasn't set on disk etc. In any way, this operation is on
@@ -1175,7 +1228,7 @@ def _remove_data_role_from_dedicated_cm_if_needed(  # noqa: C901
         self.opensearch_config.remove_temporary_data_role()
 
         # wait until data moves out completely
-        self.opensearch_exclusions.add_current()
+        self.opensearch_exclusions.add_allocations()
 
         try:
             for attempt in Retrying(stop=stop_after_attempt(3), wait=wait_fixed(0.5)):
@@ -1188,7 +1241,7 @@ def _remove_data_role_from_dedicated_cm_if_needed(  # noqa: C901
                             raise Exception
                     return True
         except RetryError:
-            self.opensearch_exclusions.delete_current()
+            self.opensearch_exclusions.delete_allocations()
             event.defer()
             return False
 

diff --git a/lib/charms/opensearch/v0/opensearch_distro.py b/lib/charms/opensearch/v0/opensearch_distro.py
@@ -398,9 +398,11 @@ def roles(self) -> List[str]:
         """Get the list of the roles assigned to this node."""
         try:
             nodes = self.request("GET", f"/_nodes/{self.node_id}", alt_hosts=self._charm.alt_hosts)
+            logger.debug(f"Request for node {self.node_id} returned: {nodes}")
+
             return nodes["nodes"][self.node_id]["roles"]
         except OpenSearchHttpError:
-            return self.config.load("opensearch.yml")["node.roles"]
+            return self.config.load("opensearch.yml").get("node.roles", [])
 
     @property
     def host(self) -> str:

diff --git a/lib/charms/opensearch/v0/opensearch_locking.py b/lib/charms/opensearch/v0/opensearch_locking.py
@@ -254,9 +254,28 @@ def acquired(self) -> bool:  # noqa: C901
                         self._opensearch, use_localhost=host is not None, hosts=alt_hosts
                     )
                 )
+                try:
+                    logger.debug(
+                        "Current shard allocation status: %s",
+                        self._opensearch.request(
+                            "GET",
+                            "/_cluster/allocation/explain?include_yes_decisions=true&include_disk_info=true",
+                            payload={
+                                "index": self.OPENSEARCH_INDEX,
+                                "shard": 0,
+                                "primary": "true",
+                            },
+                        ),
+                    )
+                except Exception:
+                    logger.debug("Current shard allocation status: error to connect with API")
+                    pass
+
             except OpenSearchHttpError:
                 logger.exception("Error getting OpenSearch nodes")
-                return False
+                # If we are trying to acquire the lock at application removal, this condition
+                # will be eventually hit
+                return len(self.units) <= 1
             logger.debug(f"[Node lock] Opensearch {online_nodes=}")
             assert online_nodes > 0
             try:
@@ -266,10 +285,10 @@ def acquired(self) -> bool:  # noqa: C901
                 # if the node lock cannot be acquired, fall back to peer databag lock
                 # this avoids hitting deadlock situations in cases where
                 # the .charm_node_lock index is not available
-                if online_nodes <= 1:
+                if online_nodes <= 1 and self._charm.app.planned_units() > 1:
                     return self._peer.acquired
                 else:
-                    return False
+                    return self._charm.app.planned_units() > 1
             # If online_nodes == 1, we should acquire the lock via the peer databag.
             # If we acquired the lock via OpenSearch and this unit was stopping, we would be unable
             # to release the OpenSearch lock. For example, when scaling to 0.
@@ -302,10 +321,13 @@ def acquired(self) -> bool:  # noqa: C901
                             "to acquire lock"
                         )
                         return False
-                    else:
+                    elif self._charm.app.planned_units() > 1:
                         logger.exception("Error creating OpenSearch lock document")
                         # in this case, try to acquire peer databag lock as fallback
                         return self._peer.acquired
+                    else:
+                        # There is only one unit or less
+                        return True
                 else:
                     # Ensure write was successful on all nodes
                     # "It is important to note that this setting [`wait_for_active_shards`] greatly
@@ -359,7 +381,7 @@ def acquired(self) -> bool:  # noqa: C901
         # If return value is True:
         # - Lock granted in previous Juju event
         # - OR, unit is leader & lock granted in this Juju event
-        return self._peer.acquired
+        return self._peer.acquired if self._charm.app.planned_units() > 1 else True
 
     def release(self):
         """Release lock.