check_mdstat.sh/check_mdstat.sh

217 lines
5.3 KiB
Bash
Raw Normal View History

#!/bin/bash
#
# Created by Sebastian Grewe, Jammicron Technology
2020-02-08 16:26:05 +01:00
# Changes by Jasem Elayeb on 02.03.2016
# Chagnes by Jonny007-MKD on 06.02.2020
# JE: add Physical Disks Name RAID_DISKS
# JE: add Physical Disks Status DISKS_STATUS
# JE: add Array Names RAID_ARRAY
2020-02-08 16:26:05 +01:00
# J007: Only check a single array
2020-02-08 16:26:05 +01:00
if [ $# -lt 1 -o -z "$1" ]; then
echo "ERROR: pass raid name as argument"
exit 1;
fi
2020-02-08 18:36:27 +01:00
INPUT="/proc/mdstat"
DEBUG=false
2020-02-08 16:26:05 +01:00
while [[ $# -gt 0 ]]; do
case $1 in
-r|--raid) TARGET_RAID_NAME="$2";;
2020-02-08 18:36:27 +01:00
-i|--input) INPUT="$2";;
-d|--debug) DEBUG=true;;
2020-02-08 16:26:05 +01:00
-?|--help)
echo "Check /proc/mdstat. Arguments:"
echo "--raid NAME: Raid name, e.g. md0"
2020-02-08 18:36:27 +01:00
echo "--input FILE: Read from this file. Default: /proc/mdstat"
exit 0
2020-02-08 16:26:05 +01:00
;;
esac
shift
done
if [ -z "$TARGET_RAID_NAME" ]; then
echo "ERROR: pass --raid as argument"
exit 255
fi
2020-02-08 16:26:05 +01:00
function log {
2020-02-08 18:36:27 +01:00
if $DEBUG; then
echo " > $@"
fi
}
function logLine {
if $DEBUG; then
echo " : $@"
fi
2020-02-08 16:26:05 +01:00
}
CURRENT_RAID= # string
function setCurrentRaid {
# This function is called for each line in $1
# If the line does not start with a whitespace, it denotes a new RAID device and CURRENT_RAID is set
# Also, the state and the devices are parsed
local line="$1"
if [[ $line =~ ^[^\s].+:.+ ]]; then
local x1="${line%:*}" # Remove : suffix
local x2="${x1% *}" # Trim right
CURRENT_RAID="$x2"
log "SET CURRENT RAID"
fi
log "CURRENT RAID: $CURRENT_RAID"
}
STATE= # "active", "started", "inactive"
LEVEL= # int
DEVICES= # strings separated by whitespace
function setStateAndDevicesAndLevel {
# This functions is called for the line that starts a new RAID device by setCurrentRaid, with the complete line in $1
# It parses the STATE, the LEVEL and the DEVICES
local line="$1"
local x1="${line#*: }" # Remove : prefix
STATE="${x1%% *}" # Only the first word
local x2="${x1:$((${#STATE}+1))}" # Remove the first word
LEVEL="${x2%% *}" # Only the first word
local x3="${x2:$((${#LEVEL}+1))}" # Remove the first word
LEVEL="${LEVEL:4}" # Remove the raid prefix
for dev1 in $x3; do
local dev2="${dev1%[*}"
if [ -n "$DEVICES" ]; then DEVICES="$DEVICES "; fi
DEVICES="${DEVICES}$dev2"
done
log "STATE = $STATE"
log "LEVEL = $LEVEL"
log "DEVICES = $DEVICES"
}
SIZE_IN_BLOCKS="!" # int
NUM_DEVICES=0 # int with number of devices in raid
BAD_DEVICES=0 # int with number of "_" in "[UUU_UU]"
function parseConfigStatusLine {
# This function is called for 1st line after the raid definition line
# It parses the SIZE_IN_BLOCKS
local line="$1"
2020-02-08 18:36:27 +01:00
line=$(echo $line) # trim left
2020-02-08 16:26:05 +01:00
SIZE_IN_BLOCKS=${line%% *}
local lastWord=${line##* } # Get the last word
lastWord=${lastWord:1:-1} # Remove first and last char
NUM_DEVICES=${#lastWord} # str length = num devices
lastWord=${lastWord//U/} # Remove all U, so only _ remain
BAD_DEVICES=${#lastWord} # str length = num bad devices
log "SIZE_IN_BLOCKS = $SIZE_IN_BLOCKS"
log "NUM_DEVICES = $NUM_DEVICES"
log "BAD_DEVICES = $BAD_DEVICES"
# TODO
}
function parseBitmapOrRecoveryLine {
# This function is called for the 2nd line after the raid definition line
# It checks whether it contains a bitmap or a recovery line
if [[ "$1" == *bitmap* ]]; then
parseBitmapLine "$1"
fi
if [[ "$1" == *recovery* ]]; then
parseRecoveryLine "$1"
fi
}
function parseBitmapLine {
local x
# TODO
}
RECOVERY_PROGRESS= # float in percent
RECOVERY_INFO= # finish and speed
function parseRecoveryLine {
# This function is called for the 2nd or 3rd lineafter the raid definition line
# It sets the RECOVERY_PROGRESS
local line="$1"
local x1="${line##*recovery = }"
local x2="${x1%%%*}"
RECOVERY_PROGRESS="$x2"
local x3="${line##*finish=}"
RECOVERY_INFO="finish=$x3"
log "RECOVERY_PROGESS = $RECOVERY_PROGRESS"
log "RECOVERY_INFO = $RECOVERY_INFO"
}
function printOutputAndExit {
# STATE, LEVEL, DEVICES, SIZE_IN_BLOCKS, NUM_DEVICES, BAD_DEVICES, RECOVERY_PROGRESS, RECOVERY_INFO
local result="OK"
local info=""
if [ -n "$RECOVERY_PROGRESS" ]; then
result="WARNING"
if [ -n "$info" ]; then info="$info. "; fi
2020-02-08 18:36:27 +01:00
info="${info}Recovering: progress=$RECOVERY_PROGRESS% $RECOVERY_INFO"
2020-02-08 16:26:05 +01:00
fi
if [ "$STATE" != "active" -a "$STATE" != "started" ]; then
result="CRITICAL"
if [ -n "$info" ]; then info="$info. "; fi
info="${info}State is $STATE"
fi
if [ $BAD_DEVICES -gt 0 ]; then
result="CRITICAL"
if [ -n "$info" ]; then info="$info. "; fi
2020-02-08 18:36:27 +01:00
info="${info}Missing $BAD_DEVICES of $NUM_DEVICES devices"
2020-02-08 16:26:05 +01:00
fi
echo -n "$result"
if [ -n "$info" ]; then
echo -n ": $info"
fi
echo -n " |"
echo -n " 'raid level'=$LEVEL"
echo -n " 'size in blocks'=$SIZE_IN_BLOCKS"
echo -n " 'num devices'=$NUM_DEVICES"
echo -n " 'num bad devices'=$BAD_DEVICES;;1;0;$NUM_DEVICES"
if [ -n "$RECOVERY_PROGRESS" ]; then
2020-02-08 18:36:27 +01:00
echo -n " 'recovery progress'=$RECOVERY_PROGRESS%;0;;0;100"
2020-02-08 16:26:05 +01:00
fi
echo
case $result in
OK) exit 0;;
WARNING) exit 1;;
CRITICAL) exit 2;;
esac
}
HAD_TARGET_RAID=0
while IFS= read -r line; do
2020-02-08 18:36:27 +01:00
logLine "$line"
2020-02-08 16:26:05 +01:00
setCurrentRaid "$line"
if [ "$CURRENT_RAID" == "$TARGET_RAID_NAME" ]; then
case $HAD_TARGET_RAID in
0) setStateAndDevicesAndLevel "$line";;
1) parseConfigStatusLine "$line";;
2020-02-08 18:36:27 +01:00
*) parseBitmapOrRecoveryLine "$line";;
2020-02-08 16:26:05 +01:00
esac
HAD_TARGET_RAID=$(($HAD_TARGET_RAID+1))
elif [ $HAD_TARGET_RAID -gt 0 ]; then
printOutputAndExit
fi
2020-02-08 18:36:27 +01:00
done < $INPUT
2020-02-08 16:26:05 +01:00
2020-02-08 18:36:27 +01:00
echo "CRITICAL: RAID $TARGET_RAID_NAME not found in $(basename $INPUT)"
2020-02-08 16:26:05 +01:00
exit 2