-
Notifications
You must be signed in to change notification settings - Fork 1
/
lb_monitor.sh
156 lines (136 loc) · 5.79 KB
/
lb_monitor.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/bin/sh
# This script will monitor both VPN instances and swap routes
# if communication with the other instance fails
# VPN instance variables
# Other instance's IP to ping and route to grab if other node goes down
VIP=
LB1_ID=
LB2_ID=
RT_ID=
#remote range is usually 0.0.0.0/0 for LVS LB as all traffic has to be routed back for masquerading
REMOTE_RANGE="0.0.0.0/0"
# Specify the EC2 region that this will be running in (e.g. https://ec2.us-east-1.amazonaws.com)
EC2_URL=
# Health Check variables
Num_Pings=3
Ping_Timeout=1
Wait_Between_Pings=1
Wait_for_Instance_Stop=60
Wait_for_Instance_Start=300
# Run aws-apitools-common.sh to set up default environment variables and to
# leverage AWS security credentials provided by EC2 roles
. /etc/profile.d/aws-apitools-common.sh
# Determine the VPN instances private IP so we can ping the both instance, swap
# its route, and reboot it. Requires EC2 DescribeInstances, ReplaceRoute, and Start/RebootInstances
# permissions. The following example EC2 Roles policy will authorize these commands:
# {
# "Statement": [
# {
# "Action": [
# "ec2:DescribeInstances",
# "ec2:CreateRoute",
# "ec2:ReplaceRoute",
# "ec2:StartInstances",
# "ec2:StopInstances"
# ],
# "Effect": "Allow",
# "Resource": "*"
# }
# ]
# }
# Get LB1 instance's IP
LB1_IP=`/opt/aws/bin/ec2-describe-instances $LB1_ID -U $EC2_URL | grep PRIVATEIPADDRESS -m 1 | awk -F$'\t' '{print $2;}'`
# Get LB2 instance's IP
LB2_IP=`/opt/aws/bin/ec2-describe-instances $LB2_ID -U $EC2_URL | grep PRIVATEIPADDRESS -m 1 | awk -F$'\t' '{print $2;}'`
# Get ENI ID of LB1 eth0
ENI_LB1=`/opt/aws/bin/ec2-describe-instances $LB1_ID -U $EC2_URL | grep NIC -m 1 | awk -F$'\t' '{print $2;}'`
# Get ENI ID of LB2 eth0
ENI_LB2=`/opt/aws/bin/ec2-describe-instances $LB2_ID -U $EC2_URL | grep NIC -m 1 | awk -F$'\t' '{print $2;}'`
# Get alloc ID for EIP
#EIP_ALLOC=`/opt/aws/bin/ec2-describe-addresses -U $EC2_URL | grep $EIP | awk -F$'\t' '{print $5;}'`
######################## Starting Script #######################
echo `date` "-- Starting VPN monitor"
echo `date` "-- Assigning VIP to LB1 ENI-1"
/opt/aws/bin/ec2-assign-private-ip-addresses -n $ENI_LB1 --secondary-private-ip-address $VIP --allow-reassignment -U $EC2_URL
#/opt/aws/bin/ec2-associate-address -a $EIP_ALLOC -n $ENI_LB1 --allow-reassociation -U $EC2_URL
echo `date` "-- Adding LB1 instance to $RT_ID default route on start"
/opt/aws/bin/ec2-replace-route $RT_ID -r $REMOTE_RANGE -n $ENI_LB1 -U $EC2_URL
# If replace-route failed, then the route might not exist and may need to be created instead
if [ "$?" != "0" ]; then
/opt/aws/bin/ec2-create-route $RT_ID -r $REMOTE_RANGE -n $ENI_LB1 -U $EC2_URL
fi
# Who has route LB1 or 2
WHO_HAS_RT="LB1"
while [ . ]; do
# Check health of LB1 instance
pingresult_LB1=`ping -c $Num_Pings -W $Ping_Timeout $LB1_IP | grep time= | wc -l`
# Check to see if any of the health checks succeeded, if not
if [ "$pingresult_LB1" == "0" ]; then
# Set HEALTHY variables to unhealthy (0)
LB1_HEALTHY=0
STOPPING_LB1=0
while [ "$LB1_HEALTHY" == "0" ]; do
# LB1 instance is unhealthy, loop while we try to fix it
if [ "$WHO_HAS_RT" == "LB1" ]; then
echo `date` "-- LB1 heartbeat failed, assigning VIP to LB2 instance ENI-1"
#/opt/aws/bin/ec2-associate-address -a $EIP_ALLOC -n $ENI_LB2 --allow-reassociation -U $EC2_URL
/opt/aws/bin/ec2-assign-private-ip-addresses -n $ENI_LB2 --secondary-private-ip-address $VIP --allow-reassignment -U $EC2_URL
echo `date` "-- LB1 heartbeat failed, LB2 instance taking over $LB_RT_ID and $NODE_RT_ID routes"
/opt/aws/bin/ec2-replace-route $RT_ID -r $REMOTE_RANGE -n $ENI_LB2 -U $EC2_URL
WHO_HAS_RT="LB2"
fi
# Check LB1 state to see if we should stop it or start it again
LB1_STATE=`/opt/aws/bin/ec2-describe-instances $LB1_ID -U $EC2_URL | grep INSTANCE | awk -F$'\t' '{print $6;}'`
if [ "$LB1_STATE" == "stopped" ]; then
echo `date` "-- LB1 instance stopped, starting it back up"
/opt/aws/bin/ec2-start-instances $LB1_ID -U $EC2_URL
LB1_HEALTHY=1
sleep $Wait_for_Instance_Start
else
if [ "$STOPPING_LB1" == "0" ]; then
echo `date` "-- LB1 instance $LB1_STATE, attempting to stop for reboot"
/opt/aws/bin/ec2-stop-instances $LB1_ID -U $EC2_URL
STOPPING_LB1=1
fi
sleep $Wait_for_Instance_Stop
fi
done
#else
fi
# Check health of LB2 instance
pingresult_LB2=`ping -c $Num_Pings -W $Ping_Timeout $LB2_IP | grep time= | wc -l`
# Check to see if any of the health checks succeeded, if not
if [ "$pingresult_LB2" == "0" ]; then
# Set HEALTHY variables to unhealthy (0)
LB2_HEALTHY=0
STOPPING_LB2=0
while [ "$LB2_HEALTHY" == "0" ]; do
# LB2 instance is unhealthy, loop while we try to fix it
if [ "$WHO_HAS_RT" == "LB2" ]; then
echo `date` "-- LB2 heartbeat failed, assigning VIP to LB1 instance ENI-1"
#/opt/aws/bin/ec2-associate-address -a $EIP_ALLOC -n $ENI_LB1 --allow-reassociation -U $EC2_URL
/opt/aws/bin/ec2-assign-private-ip-addresses -n $ENI_LB1 --secondary-private-ip-address $VIP --allow-reassignment -U $EC2_URL
echo `date` "-- LB2 heartbeat failed, LB1 instance taking over $LB_RT_ID and $NODE_RT_ID routes"
/opt/aws/bin/ec2-replace-route $NODE_RT_ID -r $REMOTE_RANGE -n $ENI_LB1 -U $EC2_URL
WHO_HAS_RT="LB1"
fi
# Check LB2 state to see if we should stop it or start it again
LB2_STATE=`/opt/aws/bin/ec2-describe-instances $LB2_ID -U $EC2_URL | grep INSTANCE | awk -F$'\t' '{print $6;}'`
if [ "$LB2_STATE" == "stopped" ]; then
echo `date` "-- LB2 instance stopped, starting it back up"
/opt/aws/bin/ec2-start-instances $LB2_ID -U $EC2_URL
LB2_HEALTHY=1
sleep $Wait_for_Instance_Start
else
if [ "$STOPPING_LB2" == "0" ]; then
echo `date` "-- LB2 instance $LB2_STATE, attempting to stop for reboot"
/opt/aws/bin/ec2-stop-instances $LB2_ID -U $EC2_URL
STOPPING_LB2=1
fi
sleep $Wait_for_Instance_Stop
fi
done
else
sleep $Wait_Between_Pings
fi
done