For more information regarding the security incident at F5, the actions we are taking to address it, and our ongoing efforts to protect our customers, click here.

SNMP based health check

Problem this snippet solves:

Allows SNMP based health checking combined of multiple scripts to allow decoupeling between LTM and the SNMP checks

  1. hc_status.sh: checks the CACHED status of member 2. snmp_poller_mgr.sh: runs multiple pollers to check members status 3. snmp_poller.sh: a script to check the member status 4. config file: a sample configuration file 5. Sample log rotate configuration

For more help... lahavs - at - emindsys.com

Code :

=========== Script #1: hc_status.sh ===========
#!/bin/sh

# API
# ========================
dest_ip=$(echo $1 | sed 's/::ffff://');
dest_port=$2;
svc_name=$3;
pid=$$

status_file=/var/run/hc-${dest_ip}-${svc_name}.status
log_file=/var/log/hc-status.log

function write_log ()
{
echo "$(date +%Y-%m-%d) $(date +%T) ${pid} ${svc_name} ${dest_ip} ${dest_port} $*" >> $log_file
}


if [ -f ${status_file} ]; then
status=$(cat ${status_file} | cut -d ':' -f 2)
else
status=DOWN
fi
write_log ${status}
echo ${status}
; echo UP
exit 0

=========== Script #2: snmp_poller_mgr.sh ===========
#!/bin/sh

# API
# ========================
svc_name=$1
conf_file=/usr/local/ixi/${svc_name}

pid=$$
log_file=/var/log/hc-mgr.log
pidfile=/var/run/hc-mgr-${svc_name}.pid
snmp_poller=/usr/local/ixi/snmp_poller.sh

function read_conf_param () {
param=$1
write_log "Reading ${param} from ${conf_file}"
# data=$(cat ${conf_file} | grep -v "#" | grep "${param}=" | cut -d "=" -f2)
data=$(cat ${conf_file} | grep -v "#" | grep "${param}=" | awk -F '=' '{ print $2 }' )
echo "${data}"
}

function read_conf () {
curr_checksum=$(md5sum ${conf_file})
if [ "${checksum}" = "${curr_checksum}" ]; then
return
fi
checksum=${curr_checksum}

oid=$(read_conf_param oid)
write_log "oid=${oid}"

expected_val=$(read_conf_param expected_val)
write_log "expected_val=${expected_val}"

nodes=$(read_conf_param nodes)
write_log "nodes=${nodes}"

community=$(read_conf_param community)
write_log "community=${community}"

interval=$(read_conf_param interval)
write_log "interval=${interval}"
}

function write_log ()
{
msg=$*
echo "$(date +%Y-%m-%d) $(date +%T) ${pid} ${svc_name} ${msg}" >> $log_file
}

function init ()
{
write_log "=== snmp poller manager started ==="
if [ "${svc_name}" = "" ]; then
write_log "svc_name not set"
exit
fi

if [ "${conf_file}" = "" ] || ! [ -f "${conf_file}" ]; then
write_log "conf_file ($conf_file) not set or not exist"
exit
fi

if [ -f $pidfile ]; then
write_log "${pidfile} exist"
prev_pid=$(cat $pidfile)
kill -0 ${prev_pid} > /dev/null 2>&1
err=$?
if [ $err -eq 0 ]; then
write_log "Aborting: PID $(cat $pidfile) is running, error code:${err}"
exit 1
fi
fi
write_log "Setting PID file ${pidfile}"
echo ${pid} > ${pidfile}
}

function set_sleep_time () {
sleep_time=$(expr ${start_time} + ${interval} - $(date +%s))
}

function main () {
write_log "=== Starting main loop ==="
while [ 1 ]; do
read_conf;
start_time=$(date +%s)
for host in ${nodes}; do
write_log "Polling started on" "host=${host}"
${snmp_poller} ${host} ${svc_name} ${expected_val} ${community} ${oid} &
sleep 2
done
set_sleep_time
write_log "Going to sleep for ${sleep_time} sec."
sleep ${sleep_time}
done
}
init;
main;

=========== Script #3: snmp_poller.sh ===========

#!/bin/sh

# application API
# --------------------
dest_ip=$1
svc_name=$2
expected_val=$3
community=$4
oid=$5

timeout=2
up=UP
down=DOWN
delay=3


pid=$$
pidfile=/var/run/hc-${dest_ip}-${svc_name}.pid
status_file=/var/run/hc-${dest_ip}-${svc_name}.status
state_file=/var/run/hc-${dest_ip}-${svc_name}.last_state
log_file=/var/log/hc-${svc_name}.log

function write_log ()
{
echo "$(date +%Y-%m-%d) $(date +%T) ${pid} ${dest_ip}-${svc_name} $*" >> $log_file
}

function init ()
{
write_log "monitor started"

if [ -f $pidfile ]; then
write_log "${pidfile} exist"
prev_pid=$(cat $pidfile)
kill -9 ${prev_pid} > /dev/null 2>&1
err=$?
write_log "ERROR: PID $(cat $pidfile) killed, error code:${err}"
fi
echo ${pid} > $pidfile

}

function run_snmp_test ()
{
cmd="snmpget -O qv -t ${timeout} -r 1 -v2c -c ${community} ${dest_ip}:161 ${oid}"
result=$(${cmd})
err=$?

if [ ${err} -ne 0 ]; then

write_log "ERROR: snmpget existed with error code:${err}"
status=${down}

else

if [ "${result}" = "${expected_val}" ]; then
# write_log "OK - RESULT:${result} as expected"
status=${up}
else
write_log "FAIL - RESULT:${result} != ${expected_val}"
status=${down}
fi

fi
}

function response ()
{
write_log "Current Status:${status} file:${status_file}"

if [ "${status}" != "${last_state_value}" ]; then
# Last State is different than status
   write_log "Status mismatch status:${status} last_state_value=${last_state_value} last_state_count:${last_state_count}"
   
   if [ ${last_state_count} -lt ${delay} ]; then
       # State change detected, using old state, increasing counter
       write_log "Status mismatch delaying status:${status}, using ${last_state_value}"
       count=$(expr ${last_state_count} + 1)
       echo $(date +%s):${last_state_value}:${count} > ${state_file}
       echo $(date +%s):${last_state_value} > ${status_file}  
   
       else
      # Max delay reached, setting current status to be active
      write_log "Status mismatch max delay reached using, status:${status}"
       echo $(date +%s):${status}:0 > ${state_file}
       echo $(date +%s):${status} > ${status_file}
   fi
   
else
   write_log "Status match status:${status} last_state_value=${last_state_value} last_state_count:${last_state_count}"
       echo $(date +%s):${status}:0 > ${state_file}     
       echo $(date +%s):${status} > ${status_file}        
fi
}

function get_last_state ()
{
if [ -e ${state_file} ]; then
        last_state_time=$(cat ${state_file} | cut -d ':' -f 1)
        last_state_value=$(cat ${state_file} | cut -d ':' -f 2)
        last_state_count=$(cat ${state_file} | cut -d ':' -f 3)
else
   echo $(date +%s):${status}:0 > ${state_file}
fi
}


function cleanup ()
{
# write_log "Deleting ${pidfile}"
rm -f ${pidfile}
}

function main ()
{
init;
run_snmp_test;
get_last_state;
response;
cleanup;
# write_log "=== monitor ended ==="
exit 0
}
main

=========== Sample config file ===========

# Configutation file for AIM TR
# ----------------------------
oid=.1.3.6.1.4.1.15867.2000.3.5.2.1.2.1.0
expected_val=1
nodes=172.18.20.10 172.18.20.20 172.18.20.30 172.18.20.40
community=public
interval=26

=========== Log rotate configuation ===========
/var/log/hc-*.log {
    rotate 3
    daily
}
Published Mar 18, 2015
Version 1.0
No CommentsBe the first to comment