Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix race condition that prevented election of new leader #352

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 81 additions & 114 deletions appuio/redis/templates/configmap-scripts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,31 +27,39 @@ data:
info "$HEADLESS_SERVICE has my IP: ${myip}"
return 0
}
get_sentinel_master_info() {
if is_boolean_yes "$REDIS_TLS_ENABLED"; then
sentinel_info_command="redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.service.sentinelPort }} --tls --cert ${REDIS_TLS_CERT_FILE} --key ${REDIS_TLS_KEY_FILE} --cacert ${REDIS_TLS_CA_FILE} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
else
sentinel_info_command="redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.service.sentinelPort }} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
fi
echo $($sentinel_info_command)
}

{{- if and .Values.containerSecurityContext.runAsUser (eq (.Values.containerSecurityContext.runAsUser | int) 0) }}
useradd redis
chown -R redis {{ .Values.replica.persistence.path }}
{{- end }}

[[ -f $REDIS_PASSWORD_FILE ]] && export REDIS_PASSWORD="$(< "${REDIS_PASSWORD_FILE}")"
[[ -f $REDIS_MASTER_PASSWORD_FILE ]] && export REDIS_MASTER_PASSWORD="$(< "${REDIS_MASTER_PASSWORD_FILE}")"

HEADLESS_SERVICE="{{ template "redis.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"
REDIS_SERVICE="{{ template "redis.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"

# Waits for DNS to add this ip to the service DNS entry
retry_while not_exists_dns_entry

export REDIS_REPLICATION_MODE="slave"
if [[ -z "$(getent ahosts "$HEADLESS_SERVICE" | grep -v "^$(hostname -i) ")" ]]; then
if [[ -z "$(getent ahosts "$HEADLESS_SERVICE" | grep -v "^${myip}")" ]]; then
# Only node available on the network, master by default
export REDIS_REPLICATION_MODE="master"
fi

{{- if and .Values.securityContext.runAsUser (eq (.Values.securityContext.runAsUser | int) 0) }}
useradd redis
chown -R redis {{ .Values.slave.persistence.path }}
{{- end }}

if [[ -n $REDIS_PASSWORD_FILE ]]; then
password_aux=`cat ${REDIS_PASSWORD_FILE}`
export REDIS_PASSWORD=$password_aux
fi

if [[ -n $REDIS_MASTER_PASSWORD_FILE ]]; then
password_aux=`cat ${REDIS_MASTER_PASSWORD_FILE}`
export REDIS_MASTER_PASSWORD=$password_aux
else
export REDIS_REPLICATION_MODE="slave"

# Fetches current master's host and port
REDIS_SENTINEL_INFO=($(get_sentinel_master_info))
REDIS_MASTER_HOST=${REDIS_SENTINEL_INFO[0]}
REDIS_MASTER_PORT_NUMBER=${REDIS_SENTINEL_INFO[1]}
fi

if [[ "$REDIS_REPLICATION_MODE" == "master" ]]; then
Expand All @@ -63,32 +71,6 @@ data:
if [[ ! -f /opt/bitnami/redis/etc/replica.conf ]];then
cp /opt/bitnami/redis/mounted-etc/replica.conf /opt/bitnami/redis/etc/replica.conf
fi

if is_boolean_yes "$REDIS_TLS_ENABLED"; then
sentinel_info_command="redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.port }} --tls --cert ${REDIS_TLS_CERT_FILE} --key ${REDIS_TLS_KEY_FILE} --cacert ${REDIS_TLS_CA_FILE} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
else
sentinel_info_command="redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.port }} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
fi
REDIS_SENTINEL_INFO=($($sentinel_info_command))
REDIS_MASTER_HOST=${REDIS_SENTINEL_INFO[0]}
REDIS_MASTER_PORT_NUMBER=${REDIS_SENTINEL_INFO[1]}


# Immediately attempt to connect to the reported master. If it doesn't exist the connection attempt will either hang
# or fail with "port unreachable" and give no data. The liveness check will then timeout waiting for the redis
# container to be ready and restart the it. By then the new master will likely have been elected
if is_boolean_yes "$REDIS_TLS_ENABLED"; then
sentinel_info_command="redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_MASTER_HOST -p {{ .Values.sentinel.port }} --tls --cert ${REDIS_TLS_CERT_FILE} --key ${REDIS_TLS_KEY_FILE} --cacert ${REDIS_TLS_CA_FILE} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
else
sentinel_info_command="redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_MASTER_HOST -p {{ .Values.sentinel.port }} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
fi

if [[ ! ($($sentinel_info_command)) ]]; then
# master doesn't actually exist, this probably means the remaining pods haven't elected a new one yet
# and are reporting the old one still. Once this happens the container will get stuck and never see the new
# master. We stop here to allow the container to not pass the liveness check and be restarted.
exit 1
fi
fi

if [[ ! -f /opt/bitnami/redis/etc/redis.conf ]];then
Expand Down Expand Up @@ -150,6 +132,13 @@ data:
. /opt/bitnami/scripts/libvalidations.sh
. /opt/bitnami/scripts/libfile.sh

myip=$(hostname -i)

# If there are more than one IP, use the first IPv4 address
if [[ "$myip" = *" "* ]]; then
myip=$(echo $myip | awk '{if ( match($0,/([0-9]+\.)([0-9]+\.)([0-9]+\.)[0-9]+/) ) { print substr($0,RSTART,RLENGTH); } }')
fi

sentinel_conf_set() {
local -r key="${1:?missing key}"
local value="${2:-}"
Expand All @@ -165,6 +154,10 @@ data:
sentinel_conf_add() {
echo $'\n'"$@" >> "/opt/bitnami/redis-sentinel/etc/sentinel.conf"
}
sentinel_conf_remove() {
sed -e '/^$1-/d' -e '/^$/d' /opt/bitnami/redis-sentinel/etc/sentinel.conf > /opt/bitnami/redis-sentinel/etc/sentinel.conf.tmp
mv /opt/bitnami/redis-sentinel/etc/sentinel.conf.tmp /opt/bitnami/redis-sentinel/etc/sentinel.conf
}
host_id() {
echo "$1" | openssl sha1 | awk '{print $2}'
}
Expand All @@ -178,6 +171,14 @@ data:
info "$HEADLESS_SERVICE has my IP: ${myip}"
return 0
}
get_sentinel_master_info() {
if is_boolean_yes "$REDIS_TLS_ENABLED"; then
sentinel_info_command="redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.service.sentinelPort }} --tls --cert ${REDIS_TLS_CERT_FILE} --key ${REDIS_TLS_KEY_FILE} --cacert ${REDIS_TLS_CA_FILE} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
else
sentinel_info_command="redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.service.sentinelPort }} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
fi
echo $($sentinel_info_command)
}

HEADLESS_SERVICE="{{ template "redis.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"
REDIS_SERVICE="{{ template "redis.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"
Expand All @@ -187,93 +188,59 @@ data:
export REDIS_PASSWORD=$password_aux
fi

if [[ ! -f /opt/bitnami/redis-sentinel/etc/sentinel.conf ]]; then
cp /opt/bitnami/redis-sentinel/mounted-etc/sentinel.conf /opt/bitnami/redis-sentinel/etc/sentinel.conf
{{- if .Values.usePassword }}
printf "\nsentinel auth-pass %s %s" "{{ .Values.sentinel.masterSet }}" "$REDIS_PASSWORD" >> /opt/bitnami/redis-sentinel/etc/sentinel.conf
{{- if .Values.sentinel.usePassword }}
printf "\nrequirepass %s" "$REDIS_PASSWORD" >> /opt/bitnami/redis-sentinel/etc/sentinel.conf
{{- end }}
{{- end }}
{{- if .Values.sentinel.staticID }}
printf "\nsentinel myid %s" "$(host_id "$HOSTNAME")" >> /opt/bitnami/redis-sentinel/etc/sentinel.conf
{{- end }}
fi

export REDIS_REPLICATION_MODE="slave"
cp /opt/bitnami/redis-sentinel/mounted-etc/sentinel.conf /opt/bitnami/redis-sentinel/etc/sentinel.conf
{{- if .Values.usePassword }}
printf "\nsentinel auth-pass %s %s" "{{ .Values.sentinel.masterSet }}" "$REDIS_PASSWORD" >> /opt/bitnami/redis-sentinel/etc/sentinel.conf
{{- if .Values.sentinel.usePassword }}
printf "\nrequirepass %s" "$REDIS_PASSWORD" >> /opt/bitnami/redis-sentinel/etc/sentinel.conf
{{- end }}
{{- end }}
printf "\nsentinel myid %s" "$(host_id "$HOSTNAME")" >> /opt/bitnami/redis-sentinel/etc/sentinel.conf

# Waits for DNS to add this ip to the service DNS entry
retry_while not_exists_dns_entry

if [[ -z "$(getent ahosts "$HEADLESS_SERVICE" | grep -v "^$(hostname -i)")" ]]; then
if [[ -z "$(getent ahosts "$HEADLESS_SERVICE" | grep -v "^${myip}")" ]]; then
export REDIS_REPLICATION_MODE="master"
fi

# Clean sentineles from the current sentinel nodes
for node in $( getent ahosts "$HEADLESS_SERVICE" | grep -v "^$(hostname -i)" | cut -f 1 -d ' ' | uniq ); do
info "Cleaning sentinels in sentinel node: $node"
if is_boolean_yes "$REDIS_SENTINEL_TLS_ENABLED"; then
redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $node -p {{ .Values.sentinel.port }} --tls --cert ${REDIS_SENTINEL_TLS_CERT_FILE} --key ${REDIS_SENTINEL_TLS_KEY_FILE} --cacert ${REDIS_SENTINEL_TLS_CA_FILE} sentinel reset "*"
else
redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $node -p {{ .Values.sentinel.port }} sentinel reset "*"
fi
sleep {{ .Values.sentinel.cleanDelaySeconds }}
done
info "Sentinels clean up done"

if [[ "$REDIS_REPLICATION_MODE" == "master" ]]; then
REDIS_MASTER_HOST="$(hostname -i)"
REDIS_MASTER_HOST=${myip}
REDIS_MASTER_PORT_NUMBER="{{ .Values.redisPort }}"
else
if is_boolean_yes "$REDIS_SENTINEL_TLS_ENABLED"; then
sentinel_info_command="redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.port }} --tls --cert ${REDIS_SENTINEL_TLS_CERT_FILE} --key ${REDIS_SENTINEL_TLS_KEY_FILE} --cacert ${REDIS_SENTINEL_TLS_CA_FILE} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
else
sentinel_info_command="redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.port }} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
fi
REDIS_SENTINEL_INFO=($($sentinel_info_command))
export REDIS_REPLICATION_MODE="slave"

# Fetches current master's host and port
REDIS_SENTINEL_INFO=($(get_sentinel_master_info))
REDIS_MASTER_HOST=${REDIS_SENTINEL_INFO[0]}
REDIS_MASTER_PORT_NUMBER=${REDIS_SENTINEL_INFO[1]}
fi

# Immediately attempt to connect to the reported master. If it doesn't exist the connection attempt will either hang
# or fail with "port unreachable" and give no data. The liveness check will then timeout waiting for the sentinel
# container to be ready and restart the it. By then the new master will likely have been elected
if is_boolean_yes "$REDIS_SENTINEL_TLS_ENABLED"; then
sentinel_info_command="redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_MASTER_HOST -p {{ .Values.sentinel.port }} --tls --cert ${REDIS_SENTINEL_TLS_CERT_FILE} --key ${REDIS_SENTINEL_TLS_KEY_FILE} --cacert ${REDIS_SENTINEL_TLS_CA_FILE} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
else
sentinel_info_command="redis-cli {{- if .Values.usePassword }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_MASTER_HOST -p {{ .Values.sentinel.port }} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
fi

if [[ ! ($($sentinel_info_command)) ]]; then
# master doesn't actually exist, this probably means the remaining pods haven't elected a new one yet
# and are reporting the old one still. Once this happens the container will get stuck and never see the new
# master. We stop here to allow the container to not pass the liveness check and be restarted.
exit 1
fi
fi
sentinel_conf_set "sentinel monitor" "{{ .Values.sentinel.masterSet }} "$REDIS_MASTER_HOST" "$REDIS_MASTER_PORT_NUMBER" {{ .Values.sentinel.quorum }}"

add_replica() {
if [[ "$1" != "$REDIS_MASTER_HOST" ]]; then
sentinel_conf_add "sentinel known-replica {{ .Values.sentinel.masterSet }} $1 {{ .Values.redisPort }}"
fi
add_known_sentinel() {
hostname="$1"
ip="$2"

if [[ -n "$hostname" && -n "$ip" && "$hostname" != "$HOSTNAME" ]]; then
sentinel_conf_add "sentinel known-sentinel {{ .Values.sentinel.masterSet }} $ip {{ .Values.sentinelPort }} $(host_id "$hostname")"
fi
}
add_known_replica() {
ip="$1"

{{- if .Values.sentinel.staticID }}
# remove generated known sentinels and replicas
tmp="$(sed -e '/^sentinel known-/d' -e '/^$/d' /opt/bitnami/redis-sentinel/etc/sentinel.conf)"
echo "$tmp" > /opt/bitnami/redis-sentinel/etc/sentinel.conf

for node in $(seq 0 {{ .Values.cluster.slaveCount }}); do
NAME="{{ template "redis.fullname" . }}-node-$node"
IP="$(getent hosts "$NAME.$HEADLESS_SERVICE" | awk ' {print $1 }')"
if [[ "$NAME" != "$HOSTNAME" && -n "$IP" ]]; then
sentinel_conf_add "sentinel known-sentinel {{ .Values.sentinel.masterSet }} $IP {{ .Values.sentinel.port }} $(host_id "$NAME")"
add_replica "$IP"
fi
if [[ -n "$ip" && "$ip" != "$REDIS_MASTER_HOST" ]]; then
sentinel_conf_add "sentinel known-replica {{ .Values.sentinel.masterSet }} $ip {{ .Values.sentinelPort }}"
fi
}

# Add available hosts on the network as known replicas & sentinels
for node in $(seq 0 {{ .Values.sentinel.replicaCount }}); do
hostname="{{ template "common.names.fullname" . }}-node-$node"
ip="$(getent hosts "$hostname.$HEADLESS_SERVICE" | awk '{ print $1 }')"

add_known_sentinel "$hostname" "$ip"
add_known_replica "$ip"
done
add_replica "$(hostname -i)"
{{- end }}


{{- if .Values.tls.enabled }}
ARGS=("--port" "0")
ARGS+=("--tls-port" "${REDIS_SENTINEL_TLS_PORT_NUMBER}")
Expand Down