#!/bin/bash # # Created by Sebastian Grewe, Jammicron Technology # Changes by Jasem Elayeb on 02.03.2016 # Chagnes by Jonny007-MKD on 06.02.2020 # JE: add Physical Disks Name RAID_DISKS # JE: add Physical Disks Status DISKS_STATUS # JE: add Array Names RAID_ARRAY # J007: Only check a single array if [ $# -lt 1 -o -z "$1" ]; then echo "ERROR: pass raid name as argument" exit 1; fi INPUT="/proc/mdstat" DEBUG=false while [[ $# -gt 0 ]]; do case $1 in -r|--raid) TARGET_RAID_NAME="$2";; -i|--input) INPUT="$2";; -d|--debug) DEBUG=true;; -?|--help) echo "Check /proc/mdstat. Arguments:" echo "--raid NAME: Raid name, e.g. md0" echo "--input FILE: Read from this file. Default: /proc/mdstat" exit 0 ;; esac shift done if [ -z "$TARGET_RAID_NAME" ]; then echo "ERROR: pass --raid as argument" exit 255 fi function log { if $DEBUG; then echo " > $@" fi } function logLine { if $DEBUG; then echo " : $@" fi } NL=$'\n' CURRENT_RAID= # string function setCurrentRaid { # This function is called for each line in $1 # If the line does not start with a whitespace, it denotes a new RAID device and CURRENT_RAID is set # Also, the state and the devices are parsed local line="$1" if [[ $line =~ ^[^\s].+:.+ ]]; then local x1="${line%:*}" # Remove : suffix local x2="${x1% *}" # Trim right CURRENT_RAID="$x2" log "SET CURRENT RAID" fi log "CURRENT RAID: $CURRENT_RAID" } STATE= # "active", "started", "inactive" LEVEL= # int declare -A DEVICES # dict of strings. key = index, value = dev name declare -A FAILED_DEVICES function setStateAndDevicesAndLevel { # This functions is called for the line that starts a new RAID device by setCurrentRaid, with the complete line in $1 # It parses the STATE, the LEVEL and the DEVICES local line="$1" local x1="${line#*: }" # Remove : prefix STATE="${x1%% *}" # Only the first word local x2="${x1:$((${#STATE}+1))}" # Remove the first word LEVEL="${x2%% *}" # Only the first word local x3="${x2:$((${#LEVEL}+1))}" # Remove the first word LEVEL="${LEVEL:4}" # Remove the raid prefix for dev1 in $x3; do local devName="${dev1%[*}" # device name local dev2="${dev1#*[}" # all after device name local devIndex="${dev2%]*}" # device index local dev3="${dev2#*]}" # all after device index local devFailed=false DEVICES[$devIndex]="$devName" FAILED_DEVICES[$devIndex]=false if [ -n "$dev3" ]; then FAILED_DEVICES[$devIndex]=true fi done log "STATE = $STATE" log "LEVEL = $LEVEL" log "DEVICES = ${DEVICES[@]}" } SIZE_IN_BLOCKS="!" # int NUM_DEVICES=0 # int with total number of devices in raid NUM_ACTIVE_DEVICES=0 # int with number of active devices in raid function parseConfigStatusLine { # This function is called for 1st line after the raid definition line # It parses the SIZE_IN_BLOCKS local line="$1" line=$(echo $line) # trim left SIZE_IN_BLOCKS=${line%% *} local x1=${line##* } # Get the last word [UU_] local x2=${line% *} # Remove the last word local x3=${x2##* } # Get the last word [3/2] x3=${x3:1:-1} NUM_DEVICES=${x3%/*} NUM_ACTIVE_DEVICES=${x3#*/} log "SIZE_IN_BLOCKS = $SIZE_IN_BLOCKS" log "NUM_DEVICES = $NUM_DEVICES" log "NUM_ACTIVE_DEVICES = $NUM_ACTIVE_DEVICES" # TODO } function parseBitmapOrRecoveryLine { # This function is called for the 2nd line after the raid definition line # It checks whether it contains a bitmap or a recovery line if [[ "$1" == *bitmap* ]]; then parseBitmapLine "$1" fi if [[ "$1" == *recovery* ]]; then parseRecoveryLine "$1" fi } function parseBitmapLine { local x # TODO } RECOVERY_PROGRESS= # float in percent RECOVERY_INFO= # finish and speed function parseRecoveryLine { # This function is called for the 2nd or 3rd lineafter the raid definition line # It sets the RECOVERY_PROGRESS local line="$1" local x1="${line##*recovery = }" local x2="${x1%%%*}" RECOVERY_PROGRESS="$x2" local x3="${line##*finish=}" RECOVERY_INFO="finish=$x3" log "RECOVERY_PROGESS = $RECOVERY_PROGRESS" log "RECOVERY_INFO = $RECOVERY_INFO" } function printOutputAndExit { # STATE, LEVEL, DEVICES, SIZE_IN_BLOCKS, NUM_DEVICES, BAD_DEVICES, RECOVERY_PROGRESS, RECOVERY_INFO local result="OK" local info="" local numFailedDevices=0 local numSpareDevices=0 if [ $NUM_ACTIVE_DEVICES -lt $NUM_DEVICES ]; then result="CRITICAL" if [ -n "$info" ]; then info="$info. "; fi info="${info}Missing $(($NUM_DEVICES - $NUM_ACTIVE_DEVICES)) of $NUM_DEVICES devices" fi if [ -n "$RECOVERY_PROGRESS" ]; then result="WARNING" if [ -n "$info" ]; then info="$info. "; fi info="${info}Recovering: progress=$RECOVERY_PROGRESS% $RECOVERY_INFO" fi local devIndices="$(printf "%s\n" "${!DEVICES[@]}" | sort -n)" for devIndex in $devIndices; do if [ -z "${DEVICES[$devIndex]}" ]; then ((numSpareDevies++)) continue fi if ${FAILED_DEVICES[$devIndex]}; then result="CRITICAL" ((numFailedDevices++)) if [ -n "$info" ]; then info="$info. "; fi info="${info}Device ${DEVICES[$devIndex]} failed" fi done if [ "$STATE" != "active" -a "$STATE" != "started" ]; then result="CRITICAL" if [ -n "$info" ]; then info="$info. "; fi info="${info}State is $STATE" fi echo -n "$result" if [ -n "$info" ]; then echo -n ": $info" fi echo -n " |" echo -n " 'raid level'=$LEVEL" echo -n " 'size in blocks'=$SIZE_IN_BLOCKS" echo -n " 'num devices'=$NUM_DEVICES" echo -n " 'num failed devices'=$numFailedDevices;;1;0;$NUM_DEVICES" echo -n " 'num spare devices'=$numSpareDevices;;;0;$NUM_DEVICES" if [ -n "$RECOVERY_PROGRESS" ]; then echo -n " 'recovery progress'=$RECOVERY_PROGRESS%;0;;0;100" fi for devIndex in $devIndices; do if [ -z "${DEVICES[$devIndex]}" ]; then echo -n " 'device $devIndex: spare'=$devIndex" continue fi if ${FAILED_DEVICES[$devIndex]}; then echo -n " 'device $devIndex: ${DEVICES[$devIndex]} failed'=$devIndex;;$devIndex'" continue fi echo -n " 'device $devIndex: ${DEVICES[$devIndex]}'=$devIndex" done echo case $result in OK) exit 0;; WARNING) exit 1;; CRITICAL) exit 2;; esac } HAD_TARGET_RAID=0 while IFS= read -r line; do logLine "$line" setCurrentRaid "$line" if [ "$CURRENT_RAID" == "$TARGET_RAID_NAME" ]; then case $HAD_TARGET_RAID in 0) setStateAndDevicesAndLevel "$line";; 1) parseConfigStatusLine "$line";; *) parseBitmapOrRecoveryLine "$line";; esac ((HAD_TARGET_RAID++)) elif [ $HAD_TARGET_RAID -gt 0 ]; then printOutputAndExit fi done < $INPUT echo "CRITICAL: RAID $TARGET_RAID_NAME not found in $(basename $INPUT)" exit 2