Compare commits
No commits in common. "75bad78233a4494f604a2b5f5ebfd60c34f20de2" and "ba70f3b737fcd175b2d646316f55e9c6a838fcf5" have entirely different histories.
75bad78233
...
ba70f3b737
16 changed files with 36 additions and 270 deletions
229
check_mdstat.sh
229
check_mdstat.sh
|
@ -1,204 +1,47 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
# Created by Sebastian Grewe, Jammicron Technology
|
# Created by Sebastian Grewe, Jammicron Technology
|
||||||
# Changes by Jasem Elayeb on 02.03.2016
|
# Changes By Jasem Elayeb on 02.03.2016
|
||||||
# Chagnes by Jonny007-MKD on 06.02.2020
|
|
||||||
# JE: add Physical Disks Name RAID_DISKS
|
# JE: add Physical Disks Name RAID_DISKS
|
||||||
# JE: add Physical Disks Status DISKS_STATUS
|
# JE: add Physical Disks Status DISKS_STATUS
|
||||||
# JE: add Array Names RAID_ARRAY
|
# JE: add Array Names RAID_ARRAY
|
||||||
# J007: Only check a single array
|
|
||||||
|
|
||||||
|
# Get count of raid arrays
|
||||||
|
RAID_DEVICES=`grep ^md -c /proc/mdstat`
|
||||||
|
|
||||||
if [ $# -lt 1 -o -z "$1" ]; then
|
# Get count of degraded arrays
|
||||||
echo "ERROR: pass raid name as argument"
|
RAID_STATUS=`grep "\[.*_.*\]" /proc/mdstat -c`
|
||||||
exit 1;
|
|
||||||
fi
|
|
||||||
while [[ $# -gt 0 ]]; do
|
|
||||||
case $1 in
|
|
||||||
-r|--raid) TARGET_RAID_NAME="$2";;
|
|
||||||
-?|--help)
|
|
||||||
echo "Check /proc/mdstat. Arguments:"
|
|
||||||
echo "--raid NAME: Raid name, e.g. md0"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
shift
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ -z "$TARGET_RAID_NAME" ]; then
|
# Is an array currently recovering, get percentage of recovery
|
||||||
echo "ERROR: pass --raid as argument"
|
RAID_RECOVER=`grep recovery /proc/mdstat | awk '{print $4}'`
|
||||||
exit 255
|
|
||||||
|
# Is an array currently resyncing, get percentage of resync
|
||||||
|
|
||||||
|
RAID_RESYNC=`grep resync /proc/mdstat | awk '{print $4}'`
|
||||||
|
|
||||||
|
RAID_ARRAY=`awk '/md[1-9]/{for (i=1;i<=NF;++i) if ($i~/md[1-2]/)print $i}' /proc/mdstat |xargs`
|
||||||
|
RAID_DISKS=`awk '/sd[a-z]/{for (i=1;i<=NF;++i) if ($i~/sd[a-z]/)print $i}' /proc/mdstat |xargs`
|
||||||
|
DISKS_STATUS=`grep algorithm /proc/mdstat|awk '{print $12}'`
|
||||||
|
|
||||||
|
# Check raid status
|
||||||
|
# RAID recovers --> Warning
|
||||||
|
if [[ $RAID_RECOVER ]]; then
|
||||||
|
STATUS="WARNING - Checked $RAID_DEVICES arrays $RAID_ARRAY, recovering : $RAID_RECOVER"
|
||||||
|
EXIT=1
|
||||||
|
# RAID resync --> Warning
|
||||||
|
elif [[ $RAID_RESYNC ]]; then
|
||||||
|
STATUS="WARNING - Checked $RAID_DEVICES arrays $RAID_ARRAY., resyncing : $RAID_RESYNC"
|
||||||
|
EXIT=1
|
||||||
|
# RAID ok
|
||||||
|
elif [[ $RAID_STATUS == "0" ]]; then
|
||||||
|
STATUS="OK - Checked $RAID_DEVICES arrays $RAID_ARRAY."
|
||||||
|
EXIT=0
|
||||||
|
# All else critical, better save than sorry
|
||||||
|
else
|
||||||
|
STATUS="CRITICAL - Checked $RAID_DEVICES arrays $RAID_ARRAY, $RAID_STATUS have FAILED"
|
||||||
|
EXIT=2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
function log {
|
# Status and quit
|
||||||
local x=
|
echo -e "$STATUS \n Physical Disks: $RAID_DISKS Disks Status: $DISKS_STATUS "
|
||||||
#echo " > $@"
|
exit $EXIT
|
||||||
}
|
|
||||||
|
|
||||||
CURRENT_RAID= # string
|
|
||||||
function setCurrentRaid {
|
|
||||||
# This function is called for each line in $1
|
|
||||||
# If the line does not start with a whitespace, it denotes a new RAID device and CURRENT_RAID is set
|
|
||||||
# Also, the state and the devices are parsed
|
|
||||||
local line="$1"
|
|
||||||
if [[ $line =~ ^[^\s].+:.+ ]]; then
|
|
||||||
local x1="${line%:*}" # Remove : suffix
|
|
||||||
local x2="${x1% *}" # Trim right
|
|
||||||
CURRENT_RAID="$x2"
|
|
||||||
log "SET CURRENT RAID"
|
|
||||||
fi
|
|
||||||
log "CURRENT RAID: $CURRENT_RAID"
|
|
||||||
}
|
|
||||||
|
|
||||||
STATE= # "active", "started", "inactive"
|
|
||||||
LEVEL= # int
|
|
||||||
DEVICES= # strings separated by whitespace
|
|
||||||
function setStateAndDevicesAndLevel {
|
|
||||||
# This functions is called for the line that starts a new RAID device by setCurrentRaid, with the complete line in $1
|
|
||||||
# It parses the STATE, the LEVEL and the DEVICES
|
|
||||||
local line="$1"
|
|
||||||
local x1="${line#*: }" # Remove : prefix
|
|
||||||
STATE="${x1%% *}" # Only the first word
|
|
||||||
local x2="${x1:$((${#STATE}+1))}" # Remove the first word
|
|
||||||
LEVEL="${x2%% *}" # Only the first word
|
|
||||||
local x3="${x2:$((${#LEVEL}+1))}" # Remove the first word
|
|
||||||
|
|
||||||
LEVEL="${LEVEL:4}" # Remove the raid prefix
|
|
||||||
|
|
||||||
for dev1 in $x3; do
|
|
||||||
local dev2="${dev1%[*}"
|
|
||||||
if [ -n "$DEVICES" ]; then DEVICES="$DEVICES "; fi
|
|
||||||
DEVICES="${DEVICES}$dev2"
|
|
||||||
done
|
|
||||||
|
|
||||||
log "STATE = $STATE"
|
|
||||||
log "LEVEL = $LEVEL"
|
|
||||||
log "DEVICES = $DEVICES"
|
|
||||||
}
|
|
||||||
|
|
||||||
SIZE_IN_BLOCKS="!" # int
|
|
||||||
NUM_DEVICES=0 # int with number of devices in raid
|
|
||||||
BAD_DEVICES=0 # int with number of "_" in "[UUU_UU]"
|
|
||||||
function parseConfigStatusLine {
|
|
||||||
# This function is called for 1st line after the raid definition line
|
|
||||||
# It parses the SIZE_IN_BLOCKS
|
|
||||||
local line="$1"
|
|
||||||
line="${line:6}" # trim left
|
|
||||||
SIZE_IN_BLOCKS=${line%% *}
|
|
||||||
|
|
||||||
local lastWord=${line##* } # Get the last word
|
|
||||||
lastWord=${lastWord:1:-1} # Remove first and last char
|
|
||||||
NUM_DEVICES=${#lastWord} # str length = num devices
|
|
||||||
lastWord=${lastWord//U/} # Remove all U, so only _ remain
|
|
||||||
BAD_DEVICES=${#lastWord} # str length = num bad devices
|
|
||||||
|
|
||||||
|
|
||||||
log "SIZE_IN_BLOCKS = $SIZE_IN_BLOCKS"
|
|
||||||
log "NUM_DEVICES = $NUM_DEVICES"
|
|
||||||
log "BAD_DEVICES = $BAD_DEVICES"
|
|
||||||
# TODO
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseBitmapOrRecoveryLine {
|
|
||||||
# This function is called for the 2nd line after the raid definition line
|
|
||||||
# It checks whether it contains a bitmap or a recovery line
|
|
||||||
if [[ "$1" == *bitmap* ]]; then
|
|
||||||
parseBitmapLine "$1"
|
|
||||||
fi
|
|
||||||
if [[ "$1" == *recovery* ]]; then
|
|
||||||
parseRecoveryLine "$1"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseBitmapLine {
|
|
||||||
local x
|
|
||||||
# TODO
|
|
||||||
}
|
|
||||||
|
|
||||||
RECOVERY_PROGRESS= # float in percent
|
|
||||||
RECOVERY_INFO= # finish and speed
|
|
||||||
function parseRecoveryLine {
|
|
||||||
# This function is called for the 2nd or 3rd lineafter the raid definition line
|
|
||||||
# It sets the RECOVERY_PROGRESS
|
|
||||||
local line="$1"
|
|
||||||
|
|
||||||
local x1="${line##*recovery = }"
|
|
||||||
local x2="${x1%%%*}"
|
|
||||||
RECOVERY_PROGRESS="$x2"
|
|
||||||
|
|
||||||
local x3="${line##*finish=}"
|
|
||||||
RECOVERY_INFO="finish=$x3"
|
|
||||||
|
|
||||||
log "RECOVERY_PROGESS = $RECOVERY_PROGRESS"
|
|
||||||
log "RECOVERY_INFO = $RECOVERY_INFO"
|
|
||||||
}
|
|
||||||
|
|
||||||
function printOutputAndExit {
|
|
||||||
# STATE, LEVEL, DEVICES, SIZE_IN_BLOCKS, NUM_DEVICES, BAD_DEVICES, RECOVERY_PROGRESS, RECOVERY_INFO
|
|
||||||
|
|
||||||
local result="OK"
|
|
||||||
local info=""
|
|
||||||
|
|
||||||
if [ -n "$RECOVERY_PROGRESS" ]; then
|
|
||||||
result="WARNING"
|
|
||||||
if [ -n "$info" ]; then info="$info. "; fi
|
|
||||||
info="${info}Recovering: $RECOVERY_PROGRESS $RECOVERY_INFO"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$STATE" != "active" -a "$STATE" != "started" ]; then
|
|
||||||
result="CRITICAL"
|
|
||||||
if [ -n "$info" ]; then info="$info. "; fi
|
|
||||||
info="${info}State is $STATE"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $BAD_DEVICES -gt 0 ]; then
|
|
||||||
result="CRITICAL"
|
|
||||||
if [ -n "$info" ]; then info="$info. "; fi
|
|
||||||
info="${info}Missing $NUM_DEVICES devices"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo -n "$result"
|
|
||||||
if [ -n "$info" ]; then
|
|
||||||
echo -n ": $info"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo -n " |"
|
|
||||||
echo -n " 'raid level'=$LEVEL"
|
|
||||||
echo -n " 'size in blocks'=$SIZE_IN_BLOCKS"
|
|
||||||
echo -n " 'num devices'=$NUM_DEVICES"
|
|
||||||
echo -n " 'num bad devices'=$BAD_DEVICES;;1;0;$NUM_DEVICES"
|
|
||||||
if [ -n "$RECOVERY_PROGRESS" ]; then
|
|
||||||
echo -n " 'recovery progress'=$RECOVERY_PROGRESS%;0.1;;0;100"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
case $result in
|
|
||||||
OK) exit 0;;
|
|
||||||
WARNING) exit 1;;
|
|
||||||
CRITICAL) exit 2;;
|
|
||||||
esac
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
HAD_TARGET_RAID=0
|
|
||||||
|
|
||||||
while IFS= read -r line; do
|
|
||||||
#echo " : $line"
|
|
||||||
|
|
||||||
setCurrentRaid "$line"
|
|
||||||
|
|
||||||
if [ "$CURRENT_RAID" == "$TARGET_RAID_NAME" ]; then
|
|
||||||
case $HAD_TARGET_RAID in
|
|
||||||
0) setStateAndDevicesAndLevel "$line";;
|
|
||||||
1) parseConfigStatusLine "$line";;
|
|
||||||
2) parseBitmapOrRecoveryLine "$line";;
|
|
||||||
3) parseRecoveryLine "$line";;
|
|
||||||
esac
|
|
||||||
HAD_TARGET_RAID=$(($HAD_TARGET_RAID+1))
|
|
||||||
elif [ $HAD_TARGET_RAID -gt 0 ]; then
|
|
||||||
printOutputAndExit
|
|
||||||
fi
|
|
||||||
done < /proc/mdstat
|
|
||||||
|
|
||||||
echo "CRITICAL: RAID $TARGET_RAID_NAME not found in mdstat"
|
|
||||||
exit 2
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +0,0 @@
|
||||||
Personalities : [raid6] [raid5] [raid4]
|
|
||||||
md0 : active raid5 sda1[0] sdd1[2] sdb1[1]
|
|
||||||
1465151808 blocks level 5, 64k chunk, algorithm 2 [4/3] [UUU_]
|
|
||||||
unused devices: <none>
|
|
|
@ -1 +0,0 @@
|
||||||
CRITICAL: Missing 1 of 4 devices | 'raid level'=5 'size in blocks'=1465151808 'num devices'=4 'num bad devices'=1;;1;0;4
|
|
|
@ -1,6 +0,0 @@
|
||||||
Personalities : [raid1]
|
|
||||||
md0 : active raid1 sdb1[1] sda1[0]
|
|
||||||
976430080 blocks super 1.2 [2/2] [UU]
|
|
||||||
bitmap: 0/8 pages [0KB], 65536KB chunk
|
|
||||||
|
|
||||||
unused devices: <none>
|
|
|
@ -1 +0,0 @@
|
||||||
OK | 'raid level'=1 'size in blocks'=976430080 'num devices'=2 'num bad devices'=0;;1;0;2
|
|
|
@ -1,6 +0,0 @@
|
||||||
Personalities : [raid1] [raid6] [raid5] [raid4]
|
|
||||||
md0 : active raid5 sde1[0] sdf1[4] sdb1[5] sdd1[2] sdc1[1]
|
|
||||||
1250241792 blocks super 1.2 level 5, 64k chunk, algorithm 2 [5/5] [UUUUU]
|
|
||||||
bitmap: 0/10 pages [0KB], 16384KB chunk
|
|
||||||
|
|
||||||
unused devices: <none>
|
|
|
@ -1 +0,0 @@
|
||||||
OK | 'raid level'=5 'size in blocks'=1250241792 'num devices'=5 'num bad devices'=0;;1;0;5
|
|
|
@ -1,14 +0,0 @@
|
||||||
Personalities : [raid1] [raid6] [raid5] [raid4]
|
|
||||||
md1 : active raid1 sdb2[1] sda2[0]
|
|
||||||
136448 blocks [2/2] [UU]
|
|
||||||
|
|
||||||
md2 : active raid1 sdb3[1] sda3[0]
|
|
||||||
129596288 blocks [2/2] [UU]
|
|
||||||
|
|
||||||
md0 : active raid5 sdl1[9] sdk1[8] sdj1[7] sdi1[6] sdh1[5] sdg1[4] sdf1[3] sde1[2] sdd1[1] sdc1[0]
|
|
||||||
1318680576 blocks level 5, 1024k chunk, algorithm 2 [10/10] [UUUUUUUUUU]
|
|
||||||
|
|
||||||
md3 : active raid1 sdb1[1] sda1[0]
|
|
||||||
16787776 blocks [2/2] [UU]
|
|
||||||
|
|
||||||
unused devices: <none>
|
|
|
@ -1 +0,0 @@
|
||||||
OK | 'raid level'=5 'size in blocks'=1318680576 'num devices'=10 'num bad devices'=0;;1;0;10
|
|
|
@ -1,7 +0,0 @@
|
||||||
Personalities : [linear] [raid0] [raid1] [raid5] [raid4] [raid6]
|
|
||||||
md0 : active raid6 sdf1[0] sde1[1] sdd1[2] sdc1[3] sdb1[4] sda1[5] hdb1[6]
|
|
||||||
1225557760 blocks level 6, 256k chunk, algorithm 2 [7/7] [UUUUUUU]
|
|
||||||
bitmap: 0/234 pages [0KB], 512KB chunk
|
|
||||||
|
|
||||||
unused devices: <none>
|
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
OK | 'raid level'=6 'size in blocks'=1225557760 'num devices'=7 'num bad devices'=0;;1;0;7
|
|
|
@ -1,5 +0,0 @@
|
||||||
Personalities : [raid1] [raid6] [raid5] [raid4]
|
|
||||||
md1 : active raid1 sdb2[1] sda2[0]
|
|
||||||
136448 blocks [2/2] [UU]
|
|
||||||
|
|
||||||
unused devices: <none>
|
|
|
@ -1 +0,0 @@
|
||||||
CRITICAL: RAID md0 not found in notExisting.input
|
|
|
@ -1,6 +0,0 @@
|
||||||
Personalities : [raid1] [raid6] [raid5] [raid4]
|
|
||||||
md0 : active raid5 sdh1[6] sdg1[4] sdf1[3] sde1[2] sdd1[1] sdc1[0]
|
|
||||||
1464725760 blocks level 5, 64k chunk, algorithm 2 [6/5] [UUUUUU]
|
|
||||||
[==>..................] recovery = 12.6% (37043392/292945152) finish=127.5min speed=33440K/sec
|
|
||||||
|
|
||||||
unused devices: <none>
|
|
|
@ -1 +0,0 @@
|
||||||
WARNING: Recovering: progress=12.6% finish=127.5min speed=33440K/sec | 'raid level'=5 'size in blocks'=1464725760 'num devices'=6 'num bad devices'=0;;1;0;6 'recovery progress'=12.6%;0;;0;100
|
|
22
test/run.sh
22
test/run.sh
|
@ -1,22 +0,0 @@
|
||||||
dir="`dirname $0`"
|
|
||||||
|
|
||||||
ERROR=false
|
|
||||||
for f in `ls $dir/*.input`; do
|
|
||||||
RESULT="`$dir/../check_mdstat.sh --raid md0 --input $f`"
|
|
||||||
EXPECTED="`cat $dir/$(basename $f .input).output`"
|
|
||||||
if [ "$RESULT" != "$EXPECTED" ]; then
|
|
||||||
echo "Error for test $(basename $f .input):"
|
|
||||||
echo " Expected"
|
|
||||||
echo " $EXPECTED"
|
|
||||||
echo " and got"
|
|
||||||
echo " $RESULT"
|
|
||||||
ERROR=true
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if $ERROR; then
|
|
||||||
exit 1;
|
|
||||||
else
|
|
||||||
echo "Tests passed."
|
|
||||||
exit 0;
|
|
||||||
fi
|
|
Loading…
Reference in a new issue