-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_megasasctl
executable file
·184 lines (165 loc) · 5.8 KB
/
check_megasasctl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/bin/bash
#====================================================================================
# Variable Defs:
#====================================================================================
SCRIPT=${0##*/}
NORMALRAIDSTATUS="optimal"
NORMALDISKSTATUS="online"
NORMALBBUSTATUS="good"
CURRENTRAIDSTATUS=""
CURRENTDISKSTATUS=""
BADRAIDARRAYS=()
BADDISKARRAY=()
BADBBUARRAY=()
STATUS="OK"
MSG=""
MEDIATHRESHOLD=20
MAXMEDIATHRESHOLD=30
HIGH_MEDIAERRORLIST=()
MED_MEDIAERRORLIST=()
MEGASASCTL="sudo /opt/megactl/megasasctl"
TEMPRAIDSTATUS="/tmp/raidstatus.txt"
#====================================================================================
# Function Defs:
#====================================================================================
# Function to Check Raid Arrays
function check_raid_array_status() {
# Get list of RAID Arrays
RAIDLIST=$( grep -v "bios" $TEMPRAIDSTATUS | grep "RAID" | grep '.' | sed 's/ */\ /g' | cut -d' ' -f1);
# Next go through raid array list and check each one for its status
for i in $RAIDLIST; do
# Log list of degraded raid arrays
CURRENTRAIDSTATUS=$( grep "${i} " $TEMPRAIDSTATUS | grep "RAID" | sed 's/ */\ /g'| cut -d' ' -f6 );
# If a RAID Array is found to be be less than optimal then record it.
if [ $CURRENTRAIDSTATUS != $NORMALRAIDSTATUS ] ; then
BADRAIDARRAYS=( "${BADRAIDARRAYS[@]}" "${i}" )
fi
done
# Check if the number of degraded arrays is greater than 0
if [ ${#BADRAIDARRAYS[@]} != 0 ] ; then
# Set Status = Critical
STATUS="CRITICAL"
# Report which arrays degraded arrays were found
MSG="Degraded RAID ARRAY(s) found: ${BADRAIDARRAYS[@]}"
else
MSG="RAID ARRAYS OPTIMAL, "
fi
}
# Function to Check All Disk Online Status - check all disks that do not have an status of "online"
function check_all_disk_online_status() {
# Get list of Disks
DISKLIST=$( grep -v "bios\|RAID\|row" $TEMPRAIDSTATUS | grep '.' | sed 's/ */\ /g'| cut -d' ' -f1 );
# Next go through raid array list and check each one for their status
for i in $DISKLIST; do
# Log list of degraded disks
CURRENTDISKSTATUS=$( grep "${i} " $TEMPRAIDSTATUS | grep -v "row" | sed 's/ */\ /g'| cut -d' ' -f7 );
if [ $CURRENTDISKSTATUS != $NORMALDISKSTATUS ] ; then
BADDISKARRAY=( "${BADDISKARRAY[@]}" "${i}" )
fi
done
# Set status information
if [ ${#BADDISKARRAY[@]} != 0 ] ; then
# Set Status = Critical
STATUS="CRITICAL"
# Report which arrays degraded disks were found
MSG="Degraded Disks found: ${BADDISKARRAY[@]}"
else
MSG="${MSG} ALL DISKS ONLINE, "
fi
}
# Function to check media error counts (M.E.C's) across all disks
function check_all_media_error_status() {
# Get list of Disks
DISKLIST=$( grep -v "row\|bios\|RAID" $TEMPRAIDSTATUS | grep "errs:" | sed 's/ */\ /g' | grep '.' | cut -d' ' -f1 );
# Next go through all disks and group by M.E.C
for i in $DISKLIST; do
DISKMEDIAERRORCOUNT=$( grep "$i " $TEMPRAIDSTATUS | grep -v "row" | sed 's/ */\ /g' | cut -d: -f3 | cut -d' ' -f1 )
# Check to see if M.E.C's are within thresholds
# if the number M.E.C is greater then the max threshold record it
if [ $DISKMEDIAERRORCOUNT -gt $MAXMEDIATHRESHOLD ] ; then
HIGH_MEDIAERRORLIST=( "${HIGH_MEDIAERRORLIST[@]}" "$i:${DISKMEDIAERRORCOUNT}" )
# else check if to see if M.E.C's are within the allowed thresholds and record it.
elif [ $DISKMEDIAERRORCOUNT -gt $MEDIATHRESHOLD ] && [ $DISKMEDIAERRORCOUNT -lt $MAXMEDIATHRESHOLD ] ; then
MED_MEDIAERRORLIST=( "${MED_MEDIAERRORLIST[@]}" "$i:${DISKMEDIAERRORCOUNT}" )
fi
done
# Set status information
if [ ${#HIGH_MEDIAERRORLIST[@]} != 0 ] ; then
# Set Status = CRITICAL
STATUS="CRITICAL"
# Report which disks were found w/ high MEC
MSG="High M.E.C. on: ${HIGH_MEDIAERRORLIST[@]}"
elif [ ${#MED_MEDIAERRORLIST[@]} != 0 ] ; then
# Set Status = WARNING
STATUS="WARNING"
# Report which disks were found w/ med MEC
MSG="M.E.C. Warning on the following disks: ${MED_MEDIAERRORLIST[@]}"
else
MSG="${MSG} ALL DISKS OPTIMAL, "
fi
}
# Function to check the BBU status across all adapters
function check_adapt_bbu_status() {
# Get Adapter Count
ADAPTCOUNT=$( grep -c "bios" $TEMPRAIDSTATUS )
# Go through each Adapter and log bad BBUs
for ((i=0; i<$ADAPTCOUNT; i++)) ; do
# Grab the BBU status for each adaptoer
BBUSTATUS=$( grep "a${i} " $TEMPRAIDSTATUS | sed 's/ */\ /g' | cut -d: -f7 | cut -d/ -f1 );
if [ "$BBUSTATUS" != "$NORMALBBUSTATUS" ] ; then
BADBBUARRAY=( "${BADBBUARRAY[@]}" "a$i" )
fi
done
# Check check to list of BBU's to see if the number is greater than 0
if [ ${#BADBBUARRAY[@]} != 0 ] ; then
# Set Status = Warning
#STATUS="WARNING"
# report which bad BBU's were found
MSG="Bad BBU(s) found: ${BADBBUARRAY[@]}"
else
MSG="${MSG} ALL BBU(s) OK"
fi
}
# Function to check status as we go
function check_status() {
if [ $STATUS = "CRITICAL" ] ; then
echo "$STATUS: $MSG"
exit 2
elif [ $STATUS = "WARNING" ] ; then
echo "$STATUS: $MSG"
exit 1
fi
}
#====================================================================================
# MAIN:
#====================================================================================
#
#if [[ $UID -ne 0 ]]; then
# echo "$0 must be run as root"
# exit 1
#fi
# First, capture megasasctl tmp file
${MEGASASCTL} -v > $TEMPRAIDSTATUS
if [ ! $? -eq 0 ] ; then ####&& [ ! -e ${TEMPRAIDSTATUS} ] ; then
echo "UNKONWN: Error running ${SCRIPT}. Please check";
exit 2;
fi
# Check the the status of the RAID Array
check_raid_array_status
check_status
# Check the status of each disk on every array,
check_all_disk_online_status
check_status
# Check for Media Errors
check_all_media_error_status
check_status
# Check BBU status across all adapters
check_adapt_bbu_status
check_status
# Cleanup: remove tmp raidstatus.txt file
rm -rf $TEMPRAIDSTATUS
# Check final script status
if [ $STATUS = "OK" ] ; then
echo "$STATUS: $MSG"
exit 0
fi