You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
200 lines
5.7 KiB
200 lines
5.7 KiB
#!/bin/bash |
|
# |
|
# Created by Sebastian Grewe, Jammicron Technology |
|
# Changes by Jasem Elayeb on 02.03.2016 |
|
# Chagnes by Jonny007-MKD on 06.02.2020 |
|
# JE: add Physical Disks Name RAID_DISKS |
|
# JE: add Physical Disks Status DISKS_STATUS |
|
# JE: add Array Names RAID_ARRAY |
|
# J007: Only check a single array |
|
|
|
|
|
if [ $# -lt 1 -o -z "$1" ]; then |
|
echo "ERROR: pass raid name as argument" |
|
exit 1; |
|
fi |
|
|
|
INPUT="/sys/block" |
|
DEBUG=false |
|
while [[ $# -gt 0 ]]; do |
|
case $1 in |
|
-r|--raid) TARGET_RAID_NAME="$2"; shift;; |
|
-i|--input) INPUT="$2"; shift;; |
|
-d|--debug) DEBUG=true;; |
|
-?|--help) |
|
echo "Check /proc/mdstat. Arguments:" |
|
echo "--raid NAME: Raid name, e.g. md0" |
|
echo "--input FILE: Read from this file. Default: /proc/mdstat" |
|
exit 0 |
|
;; |
|
esac |
|
shift |
|
done |
|
|
|
if [ -z "$TARGET_RAID_NAME" ]; then |
|
echo "ERROR: pass --raid as argument" |
|
exit 255 |
|
fi |
|
INPUT="$INPUT/$TARGET_RAID_NAME/md" |
|
|
|
if [ ! -d $INPUT ]; then |
|
echo "CRITICAL: RAID $TARGET_RAID_NAME not found in $INPUT" |
|
exit 2 |
|
fi |
|
|
|
function log { |
|
if $DEBUG; then |
|
echo " > $@" |
|
fi |
|
} |
|
NL=$'\n' |
|
log "Reading from $INPUT" |
|
|
|
STATE= # string |
|
LEVEL= # int |
|
SIZE_IN_BLOCKS="!" # int |
|
CHUNK_SIZE_IN_BYTES= # size in bytes for chunks |
|
NUM_DEVICES=0 # int with total number of devices in raid |
|
NUM_ACTIVE_DEVICES=0 # int with number of active devices in raid |
|
NUM_SPARE_DEVICES=0 # int with number of spare devices in raid |
|
rYNC_ACTION= # "idle", "check" or "recover" |
|
SYNC_PROGRESS= # float in percent |
|
SYNC_SPEED= # sync speed in K/s |
|
declare -A DEVICE_STATE # dict of strings. key = dev, value = state |
|
declare -A FAULTY_DEVICES # dict with only faulty devices. key = dev, value = true |
|
declare -A TO_REPLACE_DEVICES # dict of devices in raid that shall be replaced. key = dev, value = true |
|
|
|
function getDataFromSys { |
|
# https://mjmwired.net/kernel/Documentation/md.txt |
|
|
|
LEVEL=$(<$INPUT/level) |
|
NUM_DEVICES=$(<$INPUT/raid_disks) |
|
CHUNK_SIZE_IN_BYTES=$(<$INPUT/chunk_size) |
|
SIZE_IN_BLOCKS=$(<$INPUT/component_size) |
|
STATE=$(<$INPUT/array_state) |
|
SYNC_ACTION=$(<$INPUT/sync_action) |
|
if [ "$SYNC_ACTION" != "idle" ]; then |
|
SYNC_PROGRESS=$(<$INPUT/sync_completed) |
|
SYNC_SPEED=$(<$INPUT/sync_speed) |
|
fi |
|
|
|
log "LEVEL = $LEVEL" |
|
log "STATE = $STATE" |
|
log "CHUNK_SIZE_IN_BYTES = $CHUNK_SIZE_IN_BYTES" |
|
log "SIZE_IN_BLOCKS = $SIZE_IN_BLOCKS" |
|
log "SYNC_ACTION = $SYNC_ACTION" |
|
log "SYNC_PROGRESS = $SYNC_PROGRESS" |
|
log "SYNC_SPEED = $SYNC_SPEED" |
|
|
|
for dev in `find $INPUT -type d -name 'dev-*' -printf "%f\n"`; do |
|
local deviceName=${dev:4} |
|
local slot=$(<$INPUT/$dev/slot) |
|
local state=$(<$INPUT/$dev/state) |
|
log "$dev [$slot] is '$state'" |
|
DEVICE_STATE[$deviceName]=$state |
|
case "$state" in |
|
*in_sync*) ((NUM_ACTIVE_DEVICES++));; |
|
*writemostly*) ((NUM_ACTIVE_DEVICES++));; |
|
*faulty*) FAULTY_DEVICES[$deviceName]=true;; |
|
*blocked*) FAULTY_DEVICES[$deviceName]=true;; |
|
*write-error*) TO_REPLACE_DEVICES[$deviceName]=true;; |
|
*want_replacement*) TO_REPLACE_DEVICES[$deviceName]=true;; |
|
*replacement*) ((NUM_SPARE_DEVICES++));; |
|
*spare*) ((NUM_SPARE_DEVICES++));; |
|
*) FAULTY_DEVICES[$deviceName]=true;; |
|
esac |
|
done |
|
|
|
log "NUM_DEVICES = $NUM_DEVICES" |
|
log "NUM_ACTIVE_DEVICES = $NUM_ACTIVE_DEVICES" |
|
log "NUM_SPARE_DEVICES = $NUM_SPARE_DEVICES" |
|
log "FAULTY_DEVICES = ${FAULTY_DEVICES[*]}" |
|
log "TO_REPLACE_DEVICES = ${TO_REPLACE_DEVICES[*]}" |
|
|
|
if [ "$SYNC_ACTION" != "idle" ]; then |
|
SYNC_NUM=${SYNC_PROGRESS% /*} |
|
log "'$SYNC_NUM'" |
|
SYNC_DEN=${SYNC_PROGRESS#*/ } |
|
log "'$SYNC_DEN'" |
|
SYNC_PERCENT=$(($SYNC_NUM*100/$SYNC_DEN)) |
|
BLOCK_SIZE=$(<$(dirname $INPUT)/queue/hw_sector_size) |
|
SYNC_REMAINING_S=$(( ($SYNC_DEN-$SYNC_NUM)*$BLOCK_SIZE/1024/$SYNC_SPEED)) |
|
SYNC_REMAINING_MIN=$(($SYNC_REMAINING_S/60)) |
|
fi |
|
} |
|
|
|
function printOutputAndExit { |
|
local result="" |
|
local info="" |
|
|
|
if [ $NUM_ACTIVE_DEVICES -lt $NUM_DEVICES ]; then |
|
result="CRITICAL" |
|
if [ -n "$info" ]; then info="$info. "; fi |
|
info="${info}Missing $(($NUM_DEVICES - $NUM_ACTIVE_DEVICES)) of $NUM_DEVICES devices" |
|
fi |
|
|
|
if [ ${#FAULTY_DEVICES[@]} -gt 0 ]; then |
|
result="CRITICAL" |
|
if [ -n "$info" ]; then info="$info. "; fi |
|
info="${info}These devices failed: ${!FAULTY_DEVICES[@]}" |
|
fi |
|
|
|
if [ ${#TO_REPLACE_DEVICES[@]} -gt 0 ]; then |
|
if [ -z "$result" ]; then result="WARNING"; fi |
|
if [ -n "$info" ]; then info="$info. "; fi |
|
info="${info}These devices should be replaced: ${!TO_REPLACE_DEVICES[@]}" |
|
fi |
|
|
|
if [ "$SYNC_ACTION" == "recover" ]; then |
|
if [ -z "$result" ]; then result="WARNING"; fi |
|
if [ -n "$info" ]; then info="$info. "; fi |
|
info="${info}Recovering: $SYNC_PERCENT%, remaining ${SYNC_REMAINING_MIN}min" |
|
fi |
|
if [ "$SYNC_ACTION" == "check" ]; then |
|
if [ -n "$info" ]; then info="$info. "; fi |
|
info="${info}Checking: $SYNC_PERCENT%, remaining ${SYNC_REMAINING_MIN}min" |
|
fi |
|
|
|
if [ -z "$result" ]; then result="OK"; fi |
|
|
|
echo -n "$result" |
|
if [ -n "$info" ]; then |
|
echo -n ": $info" |
|
fi |
|
|
|
echo -n " |" |
|
echo -n " 'raid level: $LEVEL'=0" |
|
echo -n " 'size in blocks'=$SIZE_IN_BLOCKS" |
|
echo -n " 'num devices'=$NUM_DEVICES" |
|
echo -n " 'num active devices'=$NUM_ACTIVE_DEVICES;;;0;$NUM_DEVICES" |
|
echo -n " 'num failed devices'=${#FAILED_DEVICES[@]};;1;0;$NUM_DEVICES" |
|
echo -n " 'num spare devices'=$NUM_SPARE_DEVICES;;;0" |
|
if [ "$SYNC_ACTION" != "idle" ]; then |
|
echo -n " 'sync progress'=$SYNC_NUM;0;;0;$SYNC_DEN" |
|
echo -n " 'sync speed [1/s]'=${SYNC_SPEED}K" |
|
echo -n " 'sync remaining'=${SYNC_REMAINING_S}s" |
|
fi |
|
for dev in ${!DEVICE_STATE[@]}; do |
|
echo -n " 'dev $dev: ${DEVICE_STATE[$dev]}" |
|
if [ -n "${FAULTY_DEVICE[$dev]}" ]; then |
|
echo -n " faulty" |
|
fi |
|
if [ -n "${TO_REPLACE_DEVICE[$dev]}" ]; then |
|
echo -n " wants replacement" |
|
fi |
|
echo -n "'=0" |
|
done |
|
|
|
echo |
|
|
|
case $result in |
|
OK) exit 0;; |
|
WARNING) exit 1;; |
|
CRITICAL) exit 2;; |
|
esac |
|
} |
|
|
|
|
|
getDataFromSys |
|
printOutputAndExit |
|
|
|
|