2020-02-06 21:50:46 +01:00
|
|
|
#!/bin/bash
|
|
|
|
#
|
|
|
|
# Created by Sebastian Grewe, Jammicron Technology
|
2020-02-08 16:26:05 +01:00
|
|
|
# Changes by Jasem Elayeb on 02.03.2016
|
|
|
|
# Chagnes by Jonny007-MKD on 06.02.2020
|
2020-02-06 21:50:46 +01:00
|
|
|
# JE: add Physical Disks Name RAID_DISKS
|
|
|
|
# JE: add Physical Disks Status DISKS_STATUS
|
|
|
|
# JE: add Array Names RAID_ARRAY
|
2020-02-08 16:26:05 +01:00
|
|
|
# J007: Only check a single array
|
2020-02-06 21:50:46 +01:00
|
|
|
|
2020-02-08 16:26:05 +01:00
|
|
|
|
|
|
|
if [ $# -lt 1 -o -z "$1" ]; then
|
|
|
|
echo "ERROR: pass raid name as argument"
|
|
|
|
exit 1;
|
|
|
|
fi
|
2020-02-08 18:36:27 +01:00
|
|
|
|
|
|
|
INPUT="/proc/mdstat"
|
|
|
|
DEBUG=false
|
2020-02-08 16:26:05 +01:00
|
|
|
while [[ $# -gt 0 ]]; do
|
|
|
|
case $1 in
|
2020-02-27 21:16:31 +01:00
|
|
|
-r|--raid) TARGET_RAID_NAME="$2"; shift;;
|
|
|
|
-i|--input) INPUT="$2"; shift;;
|
2020-02-08 18:36:27 +01:00
|
|
|
-d|--debug) DEBUG=true;;
|
2020-02-08 16:26:05 +01:00
|
|
|
-?|--help)
|
|
|
|
echo "Check /proc/mdstat. Arguments:"
|
|
|
|
echo "--raid NAME: Raid name, e.g. md0"
|
2020-02-08 18:36:27 +01:00
|
|
|
echo "--input FILE: Read from this file. Default: /proc/mdstat"
|
|
|
|
exit 0
|
2020-02-08 16:26:05 +01:00
|
|
|
;;
|
|
|
|
esac
|
|
|
|
shift
|
|
|
|
done
|
|
|
|
|
|
|
|
if [ -z "$TARGET_RAID_NAME" ]; then
|
|
|
|
echo "ERROR: pass --raid as argument"
|
|
|
|
exit 255
|
2020-02-06 21:50:46 +01:00
|
|
|
fi
|
|
|
|
|
2020-02-08 16:26:05 +01:00
|
|
|
function log {
|
2020-02-08 18:36:27 +01:00
|
|
|
if $DEBUG; then
|
|
|
|
echo " > $@"
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
function logLine {
|
|
|
|
if $DEBUG; then
|
|
|
|
echo " : $@"
|
|
|
|
fi
|
2020-02-08 16:26:05 +01:00
|
|
|
}
|
2020-02-09 14:40:31 +01:00
|
|
|
NL=$'\n'
|
2020-02-08 16:26:05 +01:00
|
|
|
|
|
|
|
CURRENT_RAID= # string
|
|
|
|
function setCurrentRaid {
|
|
|
|
# This function is called for each line in $1
|
|
|
|
# If the line does not start with a whitespace, it denotes a new RAID device and CURRENT_RAID is set
|
|
|
|
# Also, the state and the devices are parsed
|
|
|
|
local line="$1"
|
|
|
|
if [[ $line =~ ^[^\s].+:.+ ]]; then
|
|
|
|
local x1="${line%:*}" # Remove : suffix
|
|
|
|
local x2="${x1% *}" # Trim right
|
|
|
|
CURRENT_RAID="$x2"
|
|
|
|
log "SET CURRENT RAID"
|
|
|
|
fi
|
|
|
|
log "CURRENT RAID: $CURRENT_RAID"
|
|
|
|
}
|
|
|
|
|
|
|
|
STATE= # "active", "started", "inactive"
|
|
|
|
LEVEL= # int
|
2020-02-09 14:40:31 +01:00
|
|
|
declare -A DEVICES # dict of strings. key = index, value = dev name
|
|
|
|
declare -A FAILED_DEVICES
|
2020-02-08 16:26:05 +01:00
|
|
|
function setStateAndDevicesAndLevel {
|
|
|
|
# This functions is called for the line that starts a new RAID device by setCurrentRaid, with the complete line in $1
|
|
|
|
# It parses the STATE, the LEVEL and the DEVICES
|
|
|
|
local line="$1"
|
|
|
|
local x1="${line#*: }" # Remove : prefix
|
|
|
|
STATE="${x1%% *}" # Only the first word
|
|
|
|
local x2="${x1:$((${#STATE}+1))}" # Remove the first word
|
|
|
|
LEVEL="${x2%% *}" # Only the first word
|
|
|
|
local x3="${x2:$((${#LEVEL}+1))}" # Remove the first word
|
|
|
|
|
|
|
|
LEVEL="${LEVEL:4}" # Remove the raid prefix
|
|
|
|
|
|
|
|
for dev1 in $x3; do
|
2020-02-09 14:40:31 +01:00
|
|
|
local devName="${dev1%[*}" # device name
|
|
|
|
local dev2="${dev1#*[}" # all after device name
|
|
|
|
local devIndex="${dev2%]*}" # device index
|
|
|
|
local dev3="${dev2#*]}" # all after device index
|
|
|
|
local devFailed=false
|
|
|
|
DEVICES[$devIndex]="$devName"
|
|
|
|
FAILED_DEVICES[$devIndex]=false
|
|
|
|
if [ -n "$dev3" ]; then
|
|
|
|
FAILED_DEVICES[$devIndex]=true
|
|
|
|
fi
|
2020-02-08 16:26:05 +01:00
|
|
|
done
|
|
|
|
|
|
|
|
log "STATE = $STATE"
|
|
|
|
log "LEVEL = $LEVEL"
|
2020-02-09 14:40:31 +01:00
|
|
|
log "DEVICES = ${DEVICES[@]}"
|
2020-02-08 16:26:05 +01:00
|
|
|
}
|
|
|
|
|
2020-02-09 14:40:31 +01:00
|
|
|
SIZE_IN_BLOCKS="!" # int
|
|
|
|
NUM_DEVICES=0 # int with total number of devices in raid
|
|
|
|
NUM_ACTIVE_DEVICES=0 # int with number of active devices in raid
|
2020-02-08 16:26:05 +01:00
|
|
|
function parseConfigStatusLine {
|
|
|
|
# This function is called for 1st line after the raid definition line
|
|
|
|
# It parses the SIZE_IN_BLOCKS
|
|
|
|
local line="$1"
|
2020-02-08 18:36:27 +01:00
|
|
|
line=$(echo $line) # trim left
|
2020-02-08 16:26:05 +01:00
|
|
|
SIZE_IN_BLOCKS=${line%% *}
|
|
|
|
|
2020-02-09 14:40:31 +01:00
|
|
|
local x1=${line##* } # Get the last word [UU_]
|
|
|
|
local x2=${line% *} # Remove the last word
|
|
|
|
local x3=${x2##* } # Get the last word [3/2]
|
|
|
|
x3=${x3:1:-1}
|
|
|
|
NUM_DEVICES=${x3%/*}
|
|
|
|
NUM_ACTIVE_DEVICES=${x3#*/}
|
2020-02-08 16:26:05 +01:00
|
|
|
|
|
|
|
|
|
|
|
log "SIZE_IN_BLOCKS = $SIZE_IN_BLOCKS"
|
|
|
|
log "NUM_DEVICES = $NUM_DEVICES"
|
2020-02-09 14:40:31 +01:00
|
|
|
log "NUM_ACTIVE_DEVICES = $NUM_ACTIVE_DEVICES"
|
2020-02-08 16:26:05 +01:00
|
|
|
# TODO
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseBitmapOrRecoveryLine {
|
|
|
|
# This function is called for the 2nd line after the raid definition line
|
|
|
|
# It checks whether it contains a bitmap or a recovery line
|
|
|
|
if [[ "$1" == *bitmap* ]]; then
|
|
|
|
parseBitmapLine "$1"
|
|
|
|
fi
|
|
|
|
if [[ "$1" == *recovery* ]]; then
|
|
|
|
parseRecoveryLine "$1"
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseBitmapLine {
|
|
|
|
local x
|
|
|
|
# TODO
|
|
|
|
}
|
|
|
|
|
|
|
|
RECOVERY_PROGRESS= # float in percent
|
|
|
|
RECOVERY_INFO= # finish and speed
|
|
|
|
function parseRecoveryLine {
|
|
|
|
# This function is called for the 2nd or 3rd lineafter the raid definition line
|
|
|
|
# It sets the RECOVERY_PROGRESS
|
|
|
|
local line="$1"
|
|
|
|
|
|
|
|
local x1="${line##*recovery = }"
|
|
|
|
local x2="${x1%%%*}"
|
|
|
|
RECOVERY_PROGRESS="$x2"
|
|
|
|
|
|
|
|
local x3="${line##*finish=}"
|
|
|
|
RECOVERY_INFO="finish=$x3"
|
|
|
|
|
|
|
|
log "RECOVERY_PROGESS = $RECOVERY_PROGRESS"
|
|
|
|
log "RECOVERY_INFO = $RECOVERY_INFO"
|
|
|
|
}
|
|
|
|
|
|
|
|
function printOutputAndExit {
|
|
|
|
# STATE, LEVEL, DEVICES, SIZE_IN_BLOCKS, NUM_DEVICES, BAD_DEVICES, RECOVERY_PROGRESS, RECOVERY_INFO
|
|
|
|
|
|
|
|
local result="OK"
|
|
|
|
local info=""
|
2020-02-09 14:40:31 +01:00
|
|
|
local numFailedDevices=0
|
|
|
|
local numSpareDevices=0
|
|
|
|
|
|
|
|
if [ $NUM_ACTIVE_DEVICES -lt $NUM_DEVICES ]; then
|
|
|
|
result="CRITICAL"
|
|
|
|
if [ -n "$info" ]; then info="$info. "; fi
|
|
|
|
info="${info}Missing $(($NUM_DEVICES - $NUM_ACTIVE_DEVICES)) of $NUM_DEVICES devices"
|
|
|
|
fi
|
2020-02-08 16:26:05 +01:00
|
|
|
|
|
|
|
if [ -n "$RECOVERY_PROGRESS" ]; then
|
|
|
|
result="WARNING"
|
|
|
|
if [ -n "$info" ]; then info="$info. "; fi
|
2020-02-08 18:36:27 +01:00
|
|
|
info="${info}Recovering: progress=$RECOVERY_PROGRESS% $RECOVERY_INFO"
|
2020-02-08 16:26:05 +01:00
|
|
|
fi
|
|
|
|
|
2020-02-09 14:40:31 +01:00
|
|
|
local devIndices="$(printf "%s\n" "${!DEVICES[@]}" | sort -n)"
|
|
|
|
for devIndex in $devIndices; do
|
|
|
|
if [ -z "${DEVICES[$devIndex]}" ]; then
|
|
|
|
((numSpareDevies++))
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
if ${FAILED_DEVICES[$devIndex]}; then
|
|
|
|
result="CRITICAL"
|
|
|
|
((numFailedDevices++))
|
|
|
|
if [ -n "$info" ]; then info="$info. "; fi
|
|
|
|
info="${info}Device ${DEVICES[$devIndex]} failed"
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
2020-02-08 16:26:05 +01:00
|
|
|
if [ "$STATE" != "active" -a "$STATE" != "started" ]; then
|
|
|
|
result="CRITICAL"
|
|
|
|
if [ -n "$info" ]; then info="$info. "; fi
|
|
|
|
info="${info}State is $STATE"
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo -n "$result"
|
|
|
|
if [ -n "$info" ]; then
|
|
|
|
echo -n ": $info"
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo -n " |"
|
|
|
|
echo -n " 'raid level'=$LEVEL"
|
|
|
|
echo -n " 'size in blocks'=$SIZE_IN_BLOCKS"
|
|
|
|
echo -n " 'num devices'=$NUM_DEVICES"
|
2020-02-09 14:40:31 +01:00
|
|
|
echo -n " 'num failed devices'=$numFailedDevices;;1;0;$NUM_DEVICES"
|
|
|
|
echo -n " 'num spare devices'=$numSpareDevices;;;0;$NUM_DEVICES"
|
2020-02-08 16:26:05 +01:00
|
|
|
if [ -n "$RECOVERY_PROGRESS" ]; then
|
2020-02-08 18:36:27 +01:00
|
|
|
echo -n " 'recovery progress'=$RECOVERY_PROGRESS%;0;;0;100"
|
2020-02-08 16:26:05 +01:00
|
|
|
fi
|
2020-02-09 14:40:31 +01:00
|
|
|
for devIndex in $devIndices; do
|
|
|
|
if [ -z "${DEVICES[$devIndex]}" ]; then
|
|
|
|
echo -n " 'device $devIndex: spare'=$devIndex"
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
if ${FAILED_DEVICES[$devIndex]}; then
|
|
|
|
echo -n " 'device $devIndex: ${DEVICES[$devIndex]} failed'=$devIndex;;$devIndex'"
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
echo -n " 'device $devIndex: ${DEVICES[$devIndex]}'=$devIndex"
|
|
|
|
done
|
|
|
|
|
2020-02-08 16:26:05 +01:00
|
|
|
echo
|
|
|
|
|
|
|
|
case $result in
|
|
|
|
OK) exit 0;;
|
|
|
|
WARNING) exit 1;;
|
|
|
|
CRITICAL) exit 2;;
|
|
|
|
esac
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
HAD_TARGET_RAID=0
|
|
|
|
|
|
|
|
while IFS= read -r line; do
|
2020-02-08 18:36:27 +01:00
|
|
|
logLine "$line"
|
2020-02-08 16:26:05 +01:00
|
|
|
|
|
|
|
setCurrentRaid "$line"
|
|
|
|
|
|
|
|
if [ "$CURRENT_RAID" == "$TARGET_RAID_NAME" ]; then
|
|
|
|
case $HAD_TARGET_RAID in
|
|
|
|
0) setStateAndDevicesAndLevel "$line";;
|
|
|
|
1) parseConfigStatusLine "$line";;
|
2020-02-08 18:36:27 +01:00
|
|
|
*) parseBitmapOrRecoveryLine "$line";;
|
2020-02-08 16:26:05 +01:00
|
|
|
esac
|
2020-02-09 14:40:31 +01:00
|
|
|
((HAD_TARGET_RAID++))
|
2020-02-08 16:26:05 +01:00
|
|
|
elif [ $HAD_TARGET_RAID -gt 0 ]; then
|
|
|
|
printOutputAndExit
|
|
|
|
fi
|
2020-02-08 18:36:27 +01:00
|
|
|
done < $INPUT
|
2020-02-08 16:26:05 +01:00
|
|
|
|
2020-02-08 18:36:27 +01:00
|
|
|
echo "CRITICAL: RAID $TARGET_RAID_NAME not found in $(basename $INPUT)"
|
2020-02-08 16:26:05 +01:00
|
|
|
exit 2
|
|
|
|
|