ISP Link Failover Script Problems

Hi,

I have ISP Link Failover script which is traced back to VyOS 1.1.8 and slightly modified to run on 1.2 and now on 1.3. Both links are active at the same time (no load balancing because it has problem with online banking) and get disconnected when gateway is unreachable anymore. Link activated back when ISP gateway goes up.

Since 1.3 I have some problems:

  1. Logger doesn’t record anything in logs. I use prefix to filter out my log entries “show log | grep ANV”, and there nothing.
  2. 1 ISP have particular continuous problem - ping to gateway is working, but link is non-functional, and therefore, script doesn’t deactivate faulty link.
  3. Is there any better solution then my quite old script?

Thanks in advance.
Andrei

VyOS config commands:

set protocols static route 0.0.0.0/0 next-hop 'gw1.xxx.xxx.xxx' distance 15
set protocols static route 0.0.0.0/0 next-hop 'gw2.yyy.yyy.yyy' distance 100

set system task-scheduler task TASK-CHECK-GW executable path /config/scripts/anvcheckgw.sh
set system task-scheduler task TASK-CHECK-GW crontab-spec '* * * * *'

Script:

#!/bin/vbash
#/config/scripts/anvcheckgw.sh

source /opt/vyatta/etc/functions/script-template

if [ "$(id -g -n)" != 'vyattacfg' ] ; then
    exec sg vyattacfg -c "/bin/vbash $(readlink -f $0) $@"
fi

# Gateway IPs.
GW1="gw1.xxx.xxx.xxx"
GW2="gw2.yyy.yyy.yyy"

# Ethernet Interfaces.
GW1ETH="eth2"
GW2ETH="eth3"

# DNS Servers IPs.
DNS1="dn1.xxx.xxx.xxx"
DNS2="dn2.yyy.yyy.yyy"


# VyOS commands to disable/enable IP routes.
GW1DIS="set protocols static route 0.0.0.0/0 next-hop "$GW1" disable"
GW1ENA="delete protocols static route 0.0.0.0/0 next-hop "$GW1" disable"
GW2DIS="set protocols static route 0.0.0.0/0 next-hop "$GW2" disable"
GW2ENA="delete protocols static route 0.0.0.0/0 next-hop "$GW2" disable"

# V=1.1.1.8
GWFIXP="sudo chown -R root:vyattacfg /opt/vyatta/config/active/"
# V>=1.1.8
GWFIXP="sudo chown -R vyos:vyattacfg /opt/vyatta/config/active/"


# VyOS commands to disable/enable DNS servers.
DNS1DIS="delete service dns forwarding name-server "$DNS1
DNS1ENA="set service dns forwarding name-server "$DNS1
DNS2DIS="delete service dns forwarding name-server "$DNS2
DNS2ENA="set service dns forwarding name-server "$DNS2


LOGPREFIX="ANVGWSTATUS: "
CONFUSRD='/config/user-data/'
GW1DOWNFLAG=$CONFUSRD"gw1-down"
GW2DOWNFLAG=$CONFUSRD"gw2-down"
GWALLDOWNFL=$CONFUSRD"gw-all-down"

FAILCOUNT1=0
FAILCOUNT2=0
MAXFAILCOUNT=5
PAUSEBWPINGS=3

NOW=$(date +'%Y-%m-%d %T')

# Check gateway #1
while true; do
    ping -I $GW1ETH -c 1 $GW1 >/dev/null 2>&1
    if [ "$?" -ne 0 ] ; then #if ping exits nonzero...
        FAILCOUNT1=$[FAILCOUNT1+1]
    else
        FAILCOUNT1=0 # Zero if one of previous pings failed and now OK.
        break
    fi
    if [ $FAILCOUNT1 -ge $MAXFAILCOUNT ]; then
        break
    fi
    sleep $PAUSEBWPINGS #check again in SLEEP seconds
done

# Check gateway #2
while true; do
    ping -I $GW2ETH -c 1 $GW2 >/dev/null 2>&1
    if [ "$?" -ne 0 ] ; then #if ping exits nonzero...
        FAILCOUNT2=$[FAILCOUNT2+1]
    else
        FAILCOUNT2=0 # Zero if one of previous pings failed.
        break
    fi
    if [ $FAILCOUNT2 -ge $MAXFAILCOUNT ]; then
        break
    fi
    sleep $PAUSEBWPINGS #check again in SLEEP seconds
done

if [ $FAILCOUNT1 -gt 0 ] && [ $FAILCOUNT2 -gt 0 ]; then
    echo "Both gateways down, nothing to do "$NOW
    if [ ! -f $GWALLDOWNFL ]; then
        touch $GWALLDOWNFL
        echo $NOW | tee $GWALLDOWNFL
        logger $LOGPREFIX": Both gateways are down at "$NOW
        exit 0
    fi

elif [ $FAILCOUNT1 -gt 0 ]; then
    echo "Gateway #1 is down "$NOW
    if [ ! -f $GW1DOWNFLAG ]; then
        touch $GW1DOWNFLAG
        echo $NOW | tee $GW1DOWNFLAG
        logger $LOGPREFIX": Gateway #1 is down at "$NOW
        configure
        eval $GW1DIS
        eval $DNS1DIS
        commit
        # eval $GWFIXP
    fi

elif [ $FAILCOUNT2 -gt 0 ]; then
    echo "Gateway #2 is down "$NOW
    if [ ! -f $GW2DOWNFLAG ]; then
        touch $GW2DOWNFLAG
        echo $NOW | tee $GW2DOWNFLAG
        logger $LOGPREFIX": Gateway #2 is down at "$NOW
        configure
        eval $GW2DIS
        eval $DNS2DIS
        commit
        # eval $GWFIXP
    fi

elif [ $FAILCOUNT1 -eq 0 ] && [ $FAILCOUNT2 -eq 0 ]; then
    echo "Both gateways are OK" > /dev/null
fi


# Remove down flags if gateway is up.
if [ $FAILCOUNT1 -eq 0 ] && [ -f $GW1DOWNFLAG ]; then
    logger $LOGPREFIX": Gateway #1 is up at "$NOW
    sudo rm -f $GW1DOWNFLAG || true
    configure
    eval $GW1ENA
    eval $DNS1ENA
    commit
    # eval $GWFIXP
fi

if [ $FAILCOUNT2 -eq 0 ] && [ -f $GW2DOWNFLAG ]; then
    logger $LOGPREFIX": Gateway #2 is up at "$NOW
    sudo rm -f $GW2DOWNFLAG || true
    configure
    eval $GW2ENA
    eval $DNS2ENA
    commit
    # eval $GWFIXP
fi

if [ -f $GWALLDOWNFL ]; then
    sudo rm -f $GWALLDOWNFL || true
fi

exit