diff --git a/check_mdstat.sh b/check_mdstat.sh index 52488df..9050cf6 100755 --- a/check_mdstat.sh +++ b/check_mdstat.sh @@ -1,47 +1,204 @@ #!/bin/bash # # Created by Sebastian Grewe, Jammicron Technology -# Changes By Jasem Elayeb on 02.03.2016 +# Changes by Jasem Elayeb on 02.03.2016 +# Chagnes by Jonny007-MKD on 06.02.2020 # JE: add Physical Disks Name RAID_DISKS # JE: add Physical Disks Status DISKS_STATUS # JE: add Array Names RAID_ARRAY +# J007: Only check a single array -# Get count of raid arrays -RAID_DEVICES=`grep ^md -c /proc/mdstat` -# Get count of degraded arrays -RAID_STATUS=`grep "\[.*_.*\]" /proc/mdstat -c` +if [ $# -lt 1 -o -z "$1" ]; then + echo "ERROR: pass raid name as argument" + exit 1; +fi +while [[ $# -gt 0 ]]; do + case $1 in + -r|--raid) TARGET_RAID_NAME="$2";; + -?|--help) + echo "Check /proc/mdstat. Arguments:" + echo "--raid NAME: Raid name, e.g. md0" + ;; + esac + shift +done -# Is an array currently recovering, get percentage of recovery -RAID_RECOVER=`grep recovery /proc/mdstat | awk '{print $4}'` - -# Is an array currently resyncing, get percentage of resync - -RAID_RESYNC=`grep resync /proc/mdstat | awk '{print $4}'` - -RAID_ARRAY=`awk '/md[1-9]/{for (i=1;i<=NF;++i) if ($i~/md[1-2]/)print $i}' /proc/mdstat |xargs` -RAID_DISKS=`awk '/sd[a-z]/{for (i=1;i<=NF;++i) if ($i~/sd[a-z]/)print $i}' /proc/mdstat |xargs` -DISKS_STATUS=`grep algorithm /proc/mdstat|awk '{print $12}'` - -# Check raid status -# RAID recovers --> Warning -if [[ $RAID_RECOVER ]]; then - STATUS="WARNING - Checked $RAID_DEVICES arrays $RAID_ARRAY, recovering : $RAID_RECOVER" - EXIT=1 -# RAID resync --> Warning -elif [[ $RAID_RESYNC ]]; then - STATUS="WARNING - Checked $RAID_DEVICES arrays $RAID_ARRAY., resyncing : $RAID_RESYNC" - EXIT=1 -# RAID ok -elif [[ $RAID_STATUS == "0" ]]; then - STATUS="OK - Checked $RAID_DEVICES arrays $RAID_ARRAY." - EXIT=0 -# All else critical, better save than sorry -else - STATUS="CRITICAL - Checked $RAID_DEVICES arrays $RAID_ARRAY, $RAID_STATUS have FAILED" - EXIT=2 +if [ -z "$TARGET_RAID_NAME" ]; then + echo "ERROR: pass --raid as argument" + exit 255 fi -# Status and quit -echo -e "$STATUS \n Physical Disks: $RAID_DISKS Disks Status: $DISKS_STATUS " -exit $EXIT +function log { + local x= + #echo " > $@" +} + +CURRENT_RAID= # string +function setCurrentRaid { + # This function is called for each line in $1 + # If the line does not start with a whitespace, it denotes a new RAID device and CURRENT_RAID is set + # Also, the state and the devices are parsed + local line="$1" + if [[ $line =~ ^[^\s].+:.+ ]]; then + local x1="${line%:*}" # Remove : suffix + local x2="${x1% *}" # Trim right + CURRENT_RAID="$x2" + log "SET CURRENT RAID" + fi + log "CURRENT RAID: $CURRENT_RAID" +} + +STATE= # "active", "started", "inactive" +LEVEL= # int +DEVICES= # strings separated by whitespace +function setStateAndDevicesAndLevel { + # This functions is called for the line that starts a new RAID device by setCurrentRaid, with the complete line in $1 + # It parses the STATE, the LEVEL and the DEVICES + local line="$1" + local x1="${line#*: }" # Remove : prefix + STATE="${x1%% *}" # Only the first word + local x2="${x1:$((${#STATE}+1))}" # Remove the first word + LEVEL="${x2%% *}" # Only the first word + local x3="${x2:$((${#LEVEL}+1))}" # Remove the first word + + LEVEL="${LEVEL:4}" # Remove the raid prefix + + for dev1 in $x3; do + local dev2="${dev1%[*}" + if [ -n "$DEVICES" ]; then DEVICES="$DEVICES "; fi + DEVICES="${DEVICES}$dev2" + done + + log "STATE = $STATE" + log "LEVEL = $LEVEL" + log "DEVICES = $DEVICES" +} + +SIZE_IN_BLOCKS="!" # int +NUM_DEVICES=0 # int with number of devices in raid +BAD_DEVICES=0 # int with number of "_" in "[UUU_UU]" +function parseConfigStatusLine { + # This function is called for 1st line after the raid definition line + # It parses the SIZE_IN_BLOCKS + local line="$1" + line="${line:6}" # trim left + SIZE_IN_BLOCKS=${line%% *} + + local lastWord=${line##* } # Get the last word + lastWord=${lastWord:1:-1} # Remove first and last char + NUM_DEVICES=${#lastWord} # str length = num devices + lastWord=${lastWord//U/} # Remove all U, so only _ remain + BAD_DEVICES=${#lastWord} # str length = num bad devices + + + log "SIZE_IN_BLOCKS = $SIZE_IN_BLOCKS" + log "NUM_DEVICES = $NUM_DEVICES" + log "BAD_DEVICES = $BAD_DEVICES" + # TODO +} + +function parseBitmapOrRecoveryLine { + # This function is called for the 2nd line after the raid definition line + # It checks whether it contains a bitmap or a recovery line + if [[ "$1" == *bitmap* ]]; then + parseBitmapLine "$1" + fi + if [[ "$1" == *recovery* ]]; then + parseRecoveryLine "$1" + fi +} + +function parseBitmapLine { + local x + # TODO +} + +RECOVERY_PROGRESS= # float in percent +RECOVERY_INFO= # finish and speed +function parseRecoveryLine { + # This function is called for the 2nd or 3rd lineafter the raid definition line + # It sets the RECOVERY_PROGRESS + local line="$1" + + local x1="${line##*recovery = }" + local x2="${x1%%%*}" + RECOVERY_PROGRESS="$x2" + + local x3="${line##*finish=}" + RECOVERY_INFO="finish=$x3" + + log "RECOVERY_PROGESS = $RECOVERY_PROGRESS" + log "RECOVERY_INFO = $RECOVERY_INFO" +} + +function printOutputAndExit { + # STATE, LEVEL, DEVICES, SIZE_IN_BLOCKS, NUM_DEVICES, BAD_DEVICES, RECOVERY_PROGRESS, RECOVERY_INFO + + local result="OK" + local info="" + + if [ -n "$RECOVERY_PROGRESS" ]; then + result="WARNING" + if [ -n "$info" ]; then info="$info. "; fi + info="${info}Recovering: $RECOVERY_PROGRESS $RECOVERY_INFO" + fi + + if [ "$STATE" != "active" -a "$STATE" != "started" ]; then + result="CRITICAL" + if [ -n "$info" ]; then info="$info. "; fi + info="${info}State is $STATE" + fi + + if [ $BAD_DEVICES -gt 0 ]; then + result="CRITICAL" + if [ -n "$info" ]; then info="$info. "; fi + info="${info}Missing $NUM_DEVICES devices" + fi + + echo -n "$result" + if [ -n "$info" ]; then + echo -n ": $info" + fi + + echo -n " |" + echo -n " 'raid level'=$LEVEL" + echo -n " 'size in blocks'=$SIZE_IN_BLOCKS" + echo -n " 'num devices'=$NUM_DEVICES" + echo -n " 'num bad devices'=$BAD_DEVICES;;1;0;$NUM_DEVICES" + if [ -n "$RECOVERY_PROGRESS" ]; then + echo -n " 'recovery progress'=$RECOVERY_PROGRESS%;0.1;;0;100" + fi + echo + + case $result in + OK) exit 0;; + WARNING) exit 1;; + CRITICAL) exit 2;; + esac +} + + +HAD_TARGET_RAID=0 + +while IFS= read -r line; do + #echo " : $line" + + setCurrentRaid "$line" + + if [ "$CURRENT_RAID" == "$TARGET_RAID_NAME" ]; then + case $HAD_TARGET_RAID in + 0) setStateAndDevicesAndLevel "$line";; + 1) parseConfigStatusLine "$line";; + 2) parseBitmapOrRecoveryLine "$line";; + 3) parseRecoveryLine "$line";; + esac + HAD_TARGET_RAID=$(($HAD_TARGET_RAID+1)) + elif [ $HAD_TARGET_RAID -gt 0 ]; then + printOutputAndExit + fi +done < /proc/mdstat + +echo "CRITICAL: RAID $TARGET_RAID_NAME not found in mdstat" +exit 2 +