SNMP based health check
Problem this snippet solves:
Allows SNMP based health checking combined of multiple scripts to allow decoupeling between LTM and the SNMP checks
- hc_status.sh: checks the CACHED status of member 2. snmp_poller_mgr.sh: runs multiple pollers to check members status 3. snmp_poller.sh: a script to check the member status 4. config file: a sample configuration file 5. Sample log rotate configuration
For more help... lahavs - at - emindsys.com
Code :
=========== Script #1: hc_status.sh =========== #!/bin/sh # API # ======================== dest_ip=$(echo $1 | sed 's/::ffff://'); dest_port=$2; svc_name=$3; pid=$$ status_file=/var/run/hc-${dest_ip}-${svc_name}.status log_file=/var/log/hc-status.log function write_log () { echo "$(date +%Y-%m-%d) $(date +%T) ${pid} ${svc_name} ${dest_ip} ${dest_port} $*" >> $log_file } if [ -f ${status_file} ]; then status=$(cat ${status_file} | cut -d ':' -f 2) else status=DOWN fi write_log ${status} echo ${status} ; echo UP exit 0 =========== Script #2: snmp_poller_mgr.sh =========== #!/bin/sh # API # ======================== svc_name=$1 conf_file=/usr/local/ixi/${svc_name} pid=$$ log_file=/var/log/hc-mgr.log pidfile=/var/run/hc-mgr-${svc_name}.pid snmp_poller=/usr/local/ixi/snmp_poller.sh function read_conf_param () { param=$1 write_log "Reading ${param} from ${conf_file}" # data=$(cat ${conf_file} | grep -v "#" | grep "${param}=" | cut -d "=" -f2) data=$(cat ${conf_file} | grep -v "#" | grep "${param}=" | awk -F '=' '{ print $2 }' ) echo "${data}" } function read_conf () { curr_checksum=$(md5sum ${conf_file}) if [ "${checksum}" = "${curr_checksum}" ]; then return fi checksum=${curr_checksum} oid=$(read_conf_param oid) write_log "oid=${oid}" expected_val=$(read_conf_param expected_val) write_log "expected_val=${expected_val}" nodes=$(read_conf_param nodes) write_log "nodes=${nodes}" community=$(read_conf_param community) write_log "community=${community}" interval=$(read_conf_param interval) write_log "interval=${interval}" } function write_log () { msg=$* echo "$(date +%Y-%m-%d) $(date +%T) ${pid} ${svc_name} ${msg}" >> $log_file } function init () { write_log "=== snmp poller manager started ===" if [ "${svc_name}" = "" ]; then write_log "svc_name not set" exit fi if [ "${conf_file}" = "" ] || ! [ -f "${conf_file}" ]; then write_log "conf_file ($conf_file) not set or not exist" exit fi if [ -f $pidfile ]; then write_log "${pidfile} exist" prev_pid=$(cat $pidfile) kill -0 ${prev_pid} > /dev/null 2>&1 err=$? if [ $err -eq 0 ]; then write_log "Aborting: PID $(cat $pidfile) is running, error code:${err}" exit 1 fi fi write_log "Setting PID file ${pidfile}" echo ${pid} > ${pidfile} } function set_sleep_time () { sleep_time=$(expr ${start_time} + ${interval} - $(date +%s)) } function main () { write_log "=== Starting main loop ===" while [ 1 ]; do read_conf; start_time=$(date +%s) for host in ${nodes}; do write_log "Polling started on" "host=${host}" ${snmp_poller} ${host} ${svc_name} ${expected_val} ${community} ${oid} & sleep 2 done set_sleep_time write_log "Going to sleep for ${sleep_time} sec." sleep ${sleep_time} done } init; main; =========== Script #3: snmp_poller.sh =========== #!/bin/sh # application API # -------------------- dest_ip=$1 svc_name=$2 expected_val=$3 community=$4 oid=$5 timeout=2 up=UP down=DOWN delay=3 pid=$$ pidfile=/var/run/hc-${dest_ip}-${svc_name}.pid status_file=/var/run/hc-${dest_ip}-${svc_name}.status state_file=/var/run/hc-${dest_ip}-${svc_name}.last_state log_file=/var/log/hc-${svc_name}.log function write_log () { echo "$(date +%Y-%m-%d) $(date +%T) ${pid} ${dest_ip}-${svc_name} $*" >> $log_file } function init () { write_log "monitor started" if [ -f $pidfile ]; then write_log "${pidfile} exist" prev_pid=$(cat $pidfile) kill -9 ${prev_pid} > /dev/null 2>&1 err=$? write_log "ERROR: PID $(cat $pidfile) killed, error code:${err}" fi echo ${pid} > $pidfile } function run_snmp_test () { cmd="snmpget -O qv -t ${timeout} -r 1 -v2c -c ${community} ${dest_ip}:161 ${oid}" result=$(${cmd}) err=$? if [ ${err} -ne 0 ]; then write_log "ERROR: snmpget existed with error code:${err}" status=${down} else if [ "${result}" = "${expected_val}" ]; then # write_log "OK - RESULT:${result} as expected" status=${up} else write_log "FAIL - RESULT:${result} != ${expected_val}" status=${down} fi fi } function response () { write_log "Current Status:${status} file:${status_file}" if [ "${status}" != "${last_state_value}" ]; then # Last State is different than status write_log "Status mismatch status:${status} last_state_value=${last_state_value} last_state_count:${last_state_count}" if [ ${last_state_count} -lt ${delay} ]; then # State change detected, using old state, increasing counter write_log "Status mismatch delaying status:${status}, using ${last_state_value}" count=$(expr ${last_state_count} + 1) echo $(date +%s):${last_state_value}:${count} > ${state_file} echo $(date +%s):${last_state_value} > ${status_file} else # Max delay reached, setting current status to be active write_log "Status mismatch max delay reached using, status:${status}" echo $(date +%s):${status}:0 > ${state_file} echo $(date +%s):${status} > ${status_file} fi else write_log "Status match status:${status} last_state_value=${last_state_value} last_state_count:${last_state_count}" echo $(date +%s):${status}:0 > ${state_file} echo $(date +%s):${status} > ${status_file} fi } function get_last_state () { if [ -e ${state_file} ]; then last_state_time=$(cat ${state_file} | cut -d ':' -f 1) last_state_value=$(cat ${state_file} | cut -d ':' -f 2) last_state_count=$(cat ${state_file} | cut -d ':' -f 3) else echo $(date +%s):${status}:0 > ${state_file} fi } function cleanup () { # write_log "Deleting ${pidfile}" rm -f ${pidfile} } function main () { init; run_snmp_test; get_last_state; response; cleanup; # write_log "=== monitor ended ===" exit 0 } main =========== Sample config file =========== # Configutation file for AIM TR # ---------------------------- oid=.1.3.6.1.4.1.15867.2000.3.5.2.1.2.1.0 expected_val=1 nodes=172.18.20.10 172.18.20.20 172.18.20.30 172.18.20.40 community=public interval=26 =========== Log rotate configuation =========== /var/log/hc-*.log { rotate 3 daily }
Published Mar 18, 2015
Version 1.0Lahav_Savir_532
Nimbostratus
Joined March 16, 2008
Lahav_Savir_532
Nimbostratus
Joined March 16, 2008
No CommentsBe the first to comment