-
Notifications
You must be signed in to change notification settings - Fork 2
/
pingsweep.sh
183 lines (172 loc) · 6.68 KB
/
pingsweep.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/bin/bash
#
#
#
#
# Color Variables
txtbld=$(tput bold) # Bold
bldred=${txtbld}$(tput setaf 1) # red
bldgreen=${txtbld}$(tput setaf 2) # green
txtblu=$(tput setaf 4)
txtwhite=$(tput setaf 7)
txtyellow=$(tput setaf 3)
txtund=$(tput sgr 0 1)
txtrst=$(tput sgr0) # reset
ROW=" "
TABLE="| Node_Name | Instance_Address | Nova_Status | Connectivity"
KILLSWITCH=/root/dontrunovswatch
EXPIRE=3600
log ()
{
/usr/bin/logger -t ovswatch $@
/bin/echo $@
}
node_instances_down ()
{
log "NETWORK $NET_ID UNAVAILABLE on $SHORTNODE! Check or restart Open vSwitch and/or the Neutron plugin agent."
/usr/bin/touch $KILLSWITCH
exit 0
}
check_mode ()
{
for NODE in $(nova-manage service list 2>/dev/null | awk '/nova-compute/ {print $2}')
do
num_instances=0
fail=0
SHORTNODE=$(echo $NODE | cut -d '.' -f1)
echo -e "\r\nNode:" $NODE
echo -e "Network ID:" $NET_ID
for LINE in $(neutron port-list --network_id $NET_ID --binding:host_id $NODE --device_owner compute:nova | awk '/ip_address/ {print $2,$10}' | sed 's/ //')
do
((num_instances++))
INSTANCE_IP=$(echo $LINE | cut -d '"' -f2)
PORT_ID=$(echo $LINE | cut -d '"' -f1)
INSTANCES+=($INSTANCE_IP)
#Attempt ICMP ping from DHCP namespace
ip netns exec qdhcp-$NET_ID ping -i .25 -c 2 -W 2 $INSTANCE_IP >/dev/null && INSTANCE_STATE='ACTIVE' && INSTANCE_CONNECT=$bldgreen'ICMP'$txtrst && echo "$SHORTNODE instance $INSTANCE_IP $(tput setaf 2)OK$(tput sgr0)" || false
if [ $? != 0 ]; then
# If we're here it means ICMP failed. Could be dropped by secgroup or instance could be down.
# Look for an ARP entry in the DHCP namespace
NOARP=$(ip netns exec qdhcp-$NET_ID arp -an | grep $INSTANCE_IP | grep incomplete | wc -l)
if [ $NOARP -eq "1" ]; then
# If we're here it means that ICMP and ARP failed. Check to see if the instance is ACTIVE
INSTANCE_STATE=$(nova list --all-tenants | grep $INSTANCE_IP | cut -d '|' -f4 | sed 's/ //g')
if [ $INSTANCE_STATE == 'ACTIVE' ]; then
INSTANCE_CONNECT=$bldred'FAIL'$txtrst
echo "$SHORTNODE instance $INSTANCE_IP $(tput setaf 1)FAILED$(tput sgr0). Instance is in ACTIVE state and unresponsive."
((++fail))
else
INSTANCE_CONNECT='N/A'
echo "$SHORTNODE instance $INSTANCE_IP FAILED. Instance not in ACTIVE state. Disregard."
((num_instances--))
fi
else
INSTANCE_STATE='ACTIVE'
INSTANCE_CONNECT=$bldgreen'ARP'$txtrst
echo "$SHORTNODE instance $INSTANCE_IP $(tput setaf 2)OK$(tput sgr0) via ARP entry"
fi
fi
ROW="|"" "$SHORTNODE" ""|"" "$INSTANCE_IP" ""|"" "$INSTANCE_STATE" ""|"" "$INSTANCE_CONNECT
TABLE="$TABLE\n$ROW"
unset INSTANCE_CONNECT
unset INSTANCE_IP
unset PORT_ID
unset INSTANCE_STATE
done
echo "Number of instances : $num_instances"
echo "Number of unresponsive instances : $fail"
if [ $num_instances -gt 0 ]; then
if [ $fail -eq $num_instances ]; then
for i in ${INSTANCES[*]}
do
ip netns exec qdhcp-$NET_ID ping -i .25 -c 2 -W 2 $i > /dev/null
if [ $? == 0 ]; then
echo "$(tput setaf 1)NETWORK $NET_ID UNAVAILABLE on $SHORTNODE! Check or restart Open vSwitch and/or the Neutron plugin agent.$(tput sgr0)"
fi
done
fi
fi
unset num_instances
unset fail
done
#echo -e " \r\n== Pretty table output =="
echo -e $TABLE | column -t -x
exit 0
}
unattended_mode ()
{
for NODE in $(nova-manage service list 2>/dev/null | awk '/nova-compute/ {print $2}')
do
num_instances=0
fail=0
SHORTNODE=$(echo $NODE | cut -d '.' -f1)
for LINE in $(neutron port-list --network_id $NET_ID --binding:host_id $NODE --device_owner compute:nova | awk '/ip_address/ {print $2,$10}' | sed 's/ //')
do
((num_instances++))
INSTANCE_IP=$(echo $LINE | cut -d '"' -f2)
PORT_ID=$(echo $LINE | cut -d '"' -f1)
INSTANCES+=($INSTANCE_IP)
#Attempt ICMP ping from DHCP namespace
ip netns exec qdhcp-$NET_ID ping -i .25 -c 2 -W 2 $INSTANCE_IP > /dev/null || false
if [ $? != 0 ]; then
# If we're here it means ICMP failed. Could be dropped by secgroup or instance could be down.
# Look for an ARP entry in the DHCP namespace
NOARP=$(ip netns exec qdhcp-$NET_ID arp -an | grep $INSTANCE_IP | grep incomplete | wc -l)
if [ $NOARP -eq "1" ]; then
# If we're here it means that ICMP and ARP failed. Check to see if the instance is ACTIVE
INSTANCE_STATE=$(nova list --all-tenants | grep $INSTANCE_IP | cut -d '|' -f4 | sed 's/ //g')
if [ $INSTANCE_STATE == 'ACTIVE' ]; then
((++fail))
else
((num_instances--))
fi
fi
fi
unset INSTANCE_IP
unset PORT_ID
unset INSTANCE_STATE
done
if [ $num_instances -gt 0 ]; then
if [ $fail -eq $num_instances ]; then
for i in ${INSTANCES[*]}
do
ip netns exec qdhcp-$NET_ID ping -i .25 -c 2 -W 2 $i > /dev/null && node_instances_down
done
log "Possible network issue detected on $(hostname)! Check or restart Open vSwitch and/or the Neutron plugin agent."
fi
fi
unset num_instances
unset fail
done
exit 0
}
#
## Main
#
if [ "$#" -lt 2 ]; then
echo "Usage examples: "
echo "# ./ovs-watch.sh unattended INSIDE_NET (log and print major errors)"
echo "# ./ovs-watch.sh check INSIDE_NET (print output for human consumption)"
exit 1
fi
if [ -f $KILLSWITCH ]; then
FILEDATE=`date -r $KILLSWITCH +%s`
NOW=`date +%s`
DELTA=$(( $NOW - $FILEDATE ))
if [ $DELTA -gt $EXPIRE ]; then
logger -t ovswatch "Removing expired kill switch ($KILLSWITCH) after $EXPIRE seconds"
rm -f $KILLSWITCH
else
logger -t ovswatch "Kill switch $KILLSWITCH exists, exiting."
exit 0
fi
fi
source /root/openrc
NET_ID=$(neutron net-list | grep $2 | awk '{print $2}')
INSTANCES=()
[[ -z $NET_ID ]] && api_error
if [ $1 == 'check' ]; then
check_mode
elif [ $1 == 'unattended' ]; then
unattended_mode
fi