iomonitor – wrapper script for ioping

InUncategorized
By Charles
May 14, 2017
Link to it on my github because formatting is screwed up here
This is a wrapper script for ioping. Can be implemented in to a cronjob (ex: with https://healthchecks.io ) or as an NRPE command for nagios. Use –nagios-perfdata to generate perfdata for Nagios to consume
I needed a way to track I/O latency on a VM hypervisor node (ovirt) because one ovirt node of 3 kept reporting latency to storage but it was the only one reporting it (and guaranteed not a config issue). I set this up in nagios to run every minute and run for 15 runs which is usually ~15 seconds
This is what it looks like inside NagiosXI
#!/usr/bin/env bash

#
# Wrapper script for ioping. Can be implemented in to a cron
# job or as an NRPE command for nagios. Use --nagios-perfdata to generate perfdata
# for Nagios to consume
#
# I needed a way to track I/O latency on a VM hypervisor node (ovirt)
# because the ovirt engine kept reporting latencies but it was the only one
# reporting it (and guaranteed not a config issue). I set this up in nagios 
# to run every minute and run for 15 runs which is usually ~15 seconds
#
#
# It is suggested to first get a baseline for what your system looks like by
# running the script with all zeros for crit/warn then using "raw data" line 
# to generate some  values you consider warn/critical. I used a 
# count of 120 (2 minutes) then min/max/avg * 1.5 for warning and * 2.5 for critical
#
#	* While running this I did the following on my home directory
#
#		while [ true ]; do ls -alhtrR $HOME; done
#
#	to generate some I/O without using DD, figured all the stat() calls would be
#	better geared towards real use 
#
#
# Example:
#
#	./iomonitor --directory /tmp --min-warn 0 --min-crit 0 --max-warn 0 --max-crit 0 --avg-warn 0 --avg-crit 0 --count 120
#


# Check dependencies
if [ -z $(command -v ioping) ]; then
	echo "* ERROR: Cannot find ioping command"
	exit 254
fi

if [ -z $(command -v bc) ]; then
	echo "* ERROR: Cannot find bc command"
	exit 254
fi


# This prints when using the -v flag
function debug_write() {
        if [ ${dbg} ]; then
                echo "* $@"
        else
                return
        fi
}


# Collect arguments
setargs(){
	while [ "$1" != "" ]; do
    		case $1 in
      			"--min-warn")
        			shift
       			 	min_warn=$1
        		;;
			"--min-crit")
				shift
				min_crit=$1
			;;

			"--max-warn")
                                shift
                                max_warn=$1
                        ;;
                        "--max-crit")
                                shift
                                max_crit=$1
                        ;;

			"--avg-warn")
                                shift
                                avg_warn=$1
                        ;;
                        "--avg-crit")
                                shift
                                avg_crit=$1
                        ;;

			"-c" | "--count" )
				shift
				count=$1
			;;
			
      			"-d" | "--directory")
				shift
        			directory="$1"
        		;;
			"--nagios-perfdata")
				perfdata=1
			;;	
			"-v" | "--verbose")
				#shift
				dbg=1
			;;
			
    		esac

    		shift
  	done
}

setargs "$@"

# Startup
debug_write "min_warn=${min_warn}"
debug_write "min_crit=${min_crit}"
debug_write "max_warn=${max_warn}"
debug_write "max_crit=${max_crit}"
debug_write "avg_warn=${avg_warn}"
debug_write "avg_crit=${avg_crit}"
debug_write "count=${count}"
debug_write "directory=${directory}"

# If count is empty, default to 15
if [ -z ${count} ]; then
	count=15
fi

# Move in to the directory for ioping to run
cd "${directory}"
cdres=$?
if [ ${cdres} -ne 0 ]; then
	echo "* ERROR: Failed to CD to ${directory} to run ioping test. Exiting"
	exit 254
fi

# Stuff
debug_write "Current directory - $(pwd)"

# Run ioping
debug_write "Running ${count} times"
cmd=$(ioping -c ${count} .)

# --verbose
debug_write "output: ${cmd}"

# Grep the line we care about
line=$(echo "${cmd}" | grep "^min/avg/max/mdev" )
debug_write "line: '${line}'"

# Now awk the fields out
data_lines=$(echo "${line}" | awk '{ print $3 " " $4 "\n" $6 " " $7 "\n" $9 " " $10 "\n" $12 " " $13 };')

# Array for data parsing
declare -a data

# Conversions
SAVEIFS=$IFS
IFS=$(echo -en "\n\b")
count=0
for i in $(echo "${data_lines}"); do
	# TODO: Make what to convert to an argument
	# we default now to seconds. People may want to monitor at ms level
	#... but I suck at math

	value=$(echo "$i" | cut -d ' ' -f1)
	unit=$(echo "$i" | cut -d ' ' -f2)
	case "${unit}" in
		ns)
			conversion="0.000000001"
		;;
		us)
			conversion="0.000001"
		;;
		ms)
			conversion="0.001"
		;;
		s)	
			conversion="1"
		;;
		m)
			conversion="60"
		;;
		h)
			conversion="3600"
		;;
		*)
			echo "* ERROR: Received unit we could not convert. Got ${unit}"
			exit 245
		;;
	esac

	debug_write "(${unit}) - ${value} * ${conversion}"
	converted=$(echo "scale=6; ${value} * ${conversion}" | bc | awk '{printf "%f", $0}')

	data[${count}]=${converted}
	count=$((${count}+1))
done
IFS=$SAVEIFS


min=${data[0]}
avg=${data[1]}
max=${data[2]}
mdev=${data[3]}
debug_write "Converted to seconds: $min / $avg / $max / $mdev"


# now check warn/crit
exit_crit=0
exit_warn=0
output=""
perfdataoutput=""

# Because im lazy and using a function is prettier
function append() {
	output="${output}$@"
}

function perfdata_append() { 
	perfdataoutput="${perfdataoutput}$@ "
}

# Use BC to do float comparison
function comp() { 
	bc <<< "$@" return $? } # Iterate the fields we need. Doing it this way avoids repeat code # Why repeat code when we can use bashes flexibility?! for i in $(echo min max avg); do # Yay bash variable substitution! # use the value when we need to and the variable name when we need to # ex: ${idx_name} would expand to min then $idx_warn would expand to min_warn # so when we use ${!idx_warn} it would expand to min_warn value (the arg input field) idx_inner_val="${!i}" idx_name="$i" idx_warn="${idx_name}_warn" idx_crit="${idx_name}_crit" debug_write "${idx_inner_val} > ${!idx_warn}" 
	debug_write "${idx_inner_val} < ${!idx_crit}" if [ $(comp "${idx_inner_val} > ${!idx_warn}" ) -eq 1 ] && [ $(comp "${idx_inner_val} < ${!idx_crit}" ) -eq 1 ]; then append " * WARNING: '$directory' storage latency ${idx_name} response time ${idx_inner_val} > ${!idx_warn}\n"
		exit_warn=1
	fi
	
	if [ $(comp "${idx_inner_val} > ${!idx_crit}" ) -eq 1 ]; then
	        append " * CRITICAL: '$directory' storage latency ${idx_name} response time ${idx_inner_val} > ${!idx_crit}\n"
	        exit_crit=1
	fi

	perfdata_append "${idx_name}=${idx_inner_val}"

done

# May as well print the raw data when we print anything else or the OK
append "raw data: ${line}"


# Warn / crit / OK logic 

# Crit
if [ ${exit_crit} -eq 1 ]; then
	echo -e "${output}" 
	if [ ! -z "${perfdata}" ]; then
		echo -e " | ${perfdataoutput}"
	fi
	exit 2
fi

# Warn
if [ ${exit_warn} -eq 1 ]; then
	echo -e "${output}" 
	if [ ! -z "${perfdata}" ]; then
                echo -e " | ${perfdataoutput}"
        fi
	exit 1
fi

# Else OK 
echo -e "OK - ${directory} latency - ${output}" | tr -d '\n'
if [ ! -z "${perfdata}" ]; then
	echo -e " | ${perfdataoutput}"
fi


exit 0
Charles

Leave a Reply Cancel reply