I currently have 2 WAN connections, which both get their IPs through DHCP.
I only need failover, no load-balancing.
@marc_s provided me with some scripts he is using, for setting the failover routes, after DHCP lease changes.
Unfortunately I am having trouble, getting this set up.
WAN1 (main link): eth0
WAN2 (failover link): eth0.70
I set up my scripts accordingly:
/config/scripts/dhcp-client/pre-hooks.d/01-no-default-route
RUN="yes"
SCRIPTNAME="pre-hooks.d/01-no-default-route"
LOGFILE="/tmp/01-no-default-route.log"
NOW="$(date)"
echo -e "\n--- ${NOW} --- [ ENTER ${SCRIPTNAME} ]---\n" >> ${LOGFILE}
# Use FD 19 to capture the debug stream caused by "set -x":
exec 19>>${LOGFILE}
# Tell bash about it (there's nothing special about 19, its arbitrary)
export BASH_XTRACEFD=19
set -x
env >> ${LOGFILE}
# Setting new_routers to an empty string avoids the installation
# of the default routes and allows to properly setup failover rules.
# That applies only to eth0, DHCP WAN.
#
# See /config/scripts/setup-failover-routes.sh
# See /config/scripts/dhcp-client/post-hooks.d/01-failover
# See https://vyos.dev/T5724
if [ "$RUN" = "yes" ]; then
if [ "$interface" = "eth0.70" ]; then
case "$reason" in
BOUND|RENEW|REBIND|REBOOT)
export new_gw="$new_routers"
export old_gw="$old_routers"
new_routers=""
;;
EXPIRE|FAIL|STOP)
old_ip_address=""
old_routers=""
;;
esac
fi
fi
set +x
/config/scripts/dhcp-client/post-hooks.d/01-failover
RUN="yes"
SCRIPTNAME="post-hooks.d/01-failover"
LOGFILE="/tmp/01-failover.log"
NOW="$(date)"
echo -e "\n--- ${NOW} --- [ ENTER ${SCRIPTNAME} ]---\n" >> ${LOGFILE}
# Use FD 19 to capture the debug stream caused by "set -x":
exec 19>>${LOGFILE}
# Tell bash about it (there's nothing special about 19, its arbitrary)
export BASH_XTRACEFD=19
set -x
# Execute the script to configure the failover mechanism in case of a
# BOUND, RENEW, REBIND, REBOOT.
# That applies only to eth0, the DHCP WAN.
#
# See /config/scripts/setup-failover-routes.sh
# See /config/scripts/dhcp-client/pre-hooks.d/01-no-default-route
# See https://vyos.dev/T5724
if [ "$RUN" = "yes" ]; then
if [ "$interface" = "eth0.70" ]; then
case $reason in
BOUND|RENEW|REBIND|REBOOT)
sudo /config/scripts/setup-failover-routes.sh $old_gw $new_gw
;;
esac
fi
fi
set +x
echo -e "\n--- ${NOW} --- [ EXIT ${SCRIPTNAME} ]---\n" >> ${LOGFILE}
/config/scripts/dhcp-client/post-hooks.d/01-failover
#!/bin/vbash
if [ "$(id -g -n)" != 'vyattacfg' ] ; then
exec sg vyattacfg -c "/bin/vbash $(readlink -f $0) $1 $2"
fi
# Save arguments
OLD_GW="$1"
NEW_GW="$2"
source /opt/vyatta/etc/functions/script-template
SCRIPTNAME="sudo setup-failover-routes.sh"
LOGFILE="/tmp/failover.log"
DHCP_INT="eth0.70"
PPPOE_INT="eth0"
NEW_IP="$( ${vyos_op_scripts_dir}/interfaces.py show --raw --intf-name "${DHCP_INT}" | jq -r '.[].addr_info[] | select( .family == "inet" and .scope == "global" ) | .local' )"
PPPOE_IP="$( ${vyos_op_scripts_dir}/interfaces.py show --raw --intf-name "${PPPOE_INT}" | jq -r '.[].addr_info[] | select( .family == "inet" and .scope == "global" ) | .local' )"
PPPOE_GW="$( ${vyos_op_scripts_dir}/interfaces.py show --raw --intf-name "${PPPOE_INT}" | jq -r '.[].addr_info[] | select( .family == "inet" and .scope == "global" ) | .address' )"
function logit {
local NOW="$(date)"
echo -e "\n${NOW} [${SCRIPTNAME}] $*\n" >> ${LOGFILE}
}
# Use FD 19 to capture the debug stream caused by "set -x":
exec 19>>"${LOGFILE}"
# Tell bash about it (there's nothing special about 19, its arbitrary)
export BASH_XTRACEFD=19
logit "START"
logit "DHCP interface ${DHCP_INT}: OLD_GW=${OLD_GW}, NEW_GW=${NEW_GW}, NEW_IP=${NEW_IP}"
logit "PPPOE interface ${PPPOE_INT}: PPPOE_GW=${PPPOE_GW}, PPPOEIP=${PPPOE_IP}"
# Sanity checks, we need everything declared
if [[ -z "${OLD_GW}" ]] || [[ -z "${NEW_GW}" ]] || [[ -z "${NEW_IP}" ]] || [[ -z "${PPPOE_IP}" ]] || [[ -z "${PPPOE_GW}" ]]
then
logit "One or more variables are not defined, aborting."
else
configure
logit "executing VyOS protocol failover commands"
delete protocols failover route 0.0.0.0/0
set protocols failover route 0.0.0.0/0 next-hop ${NEW_GW} check target '1.1.1.1'
set protocols failover route 0.0.0.0/0 next-hop ${NEW_GW} check target '4.2.2.1'
set protocols failover route 0.0.0.0/0 next-hop ${NEW_GW} check timeout '5'
set protocols failover route 0.0.0.0/0 next-hop ${NEW_GW} check type 'icmp'
set protocols failover route 0.0.0.0/0 next-hop ${NEW_GW} interface "${DHCP_INT}"
set protocols failover route 0.0.0.0/0 next-hop ${NEW_GW} metric '254'
set protocols failover route 0.0.0.0/0 next-hop ${PPPOE_GW} check target '1.0.0.1'
set protocols failover route 0.0.0.0/0 next-hop ${PPPOE_GW} check target '4.2.2.2'
set protocols failover route 0.0.0.0/0 next-hop ${PPPOE_GW} check timeout '5'
set protocols failover route 0.0.0.0/0 next-hop ${PPPOE_GW} check type 'icmp'
set protocols failover route 0.0.0.0/0 next-hop ${PPPOE_GW} interface "${PPPOE_INT}"
set protocols failover route 0.0.0.0/0 next-hop ${PPPOE_GW} metric '1'
delete protocols static route 1.1.1.1/32
delete protocols static route 4.2.2.1/32
delete protocols static route 1.0.0.1/32
delete protocols static route 4.2.2.2/32
delete protocols static route ${OLD_GW}/32
# Set a static route for eth0's nexthop, otherwise it'll go out pppoe0 :-)
set protocols static route ${NEW_GW}/32 interface eth0
# Set static routes for the healthchecks
set protocols static route 1.1.1.1/32 next-hop ${NEW_GW} interface "${DHCP_INT}"
set protocols static route 4.2.2.1/32 next-hop ${NEW_GW} interface "${DHCP_INT}"
# You don't really need to specify a next-hop for a pppoe interface, so we leave it out
set protocols static route 1.0.0.1/32 interface "${PPPOE_INT}"
set protocols static route 4.2.2.2/32 interface "${PPPOE_INT}"
# PBR
delete policy local-route rule 10
delete policy local-route rule 20
set policy local-route rule 10 set table '124'
set policy local-route rule 10 source address "${NEW_IP}"
set policy local-route rule 20 set table '125'
set policy local-route rule 20 source address "${PPPOE_IP}"
# Static routing tables for PBR
delete protocols static table 124
delete protocols static table 125
# For some reason, for table 124, we need to use next-hop instead of dhcp-interface
set protocols static table 124 description 'Route traffic through cable (eth0)'
set protocols static table 124 route 0.0.0.0/0 next-hop "${NEW_GW}"
set protocols static table 125 description 'Route traffic through fiber (pppoe0)'
set protocols static table 125 route 0.0.0.0/0 interface "${PPPOE_INT}"
commit
logit "DONE"
fi
exit
in /tmp/faillover.log I get:
Fri Aug 2 21:45:26 CEST 2024 [sudo setup-failover-routes.sh] START
Fri Aug 2 21:45:26 CEST 2024 [sudo setup-failover-routes.sh] DHCP interface eth0.70: OLD_GW=149.xxx.xxx.xxx, NEW_GW=149.xxx.xxx.xxx, NEW_IP=
Fri Aug 2 21:45:26 CEST 2024 [sudo setup-failover-routes.sh] PPPOE interface eth0: PPPOE_GW=, PPPOEIP=
Fri Aug 2 21:45:26 CEST 2024 [sudo setup-failover-routes.sh] One or more variables are not defined, aborting.
I tried debugging it by running the interfaces.py myself and see whats going on in the JSON response there.
I get the following JSON, when running this script for eth0 (which is using the PPPOE_GW variable in the bash script):
{
"ifindex": 2,
"ifname": "eth0",
"flags": [
"BROADCAST",
"MULTICAST",
"UP",
"LOWER_UP"
],
"mtu": 1500,
"qdisc": "mq",
"operstate": "UP",
"group": "default",
"txqlen": 1000,
"link_type": "ether",
"address": "00:1b:21:38:50:3d",
"broadcast": "ff:ff:ff:ff:ff:ff",
"altnames": [
"enp0s16",
"ens16"
],
"addr_info": [
{
"family": "inet",
"local": "178.xxx.xxx.xxx",
"prefixlen": 30,
"broadcast": "178.xxx.xxx.xxx",
"scope": "global",
"dynamic": true,
"label": "eth0",
"valid_life_time": 391,
"preferred_life_time": 391
},
{
"family": "inet6",
"local": "fe80::21b:21ff:fe38:503d",
"prefixlen": 64,
"scope": "link",
"valid_life_time": 4294967295,
"preferred_life_time": 4294967295
}
],
"counters_last_clear": 0,
"description": "WAN1",
"stats": {
"rx_bytes": 29209287,
"rx_packets": 62705,
"rx_errors": 0,
"rx_dropped": 0,
"rx_over_errors": 0,
"multicast": 1425,
"tx_bytes": 14209189,
"tx_packets": 46174,
"tx_errors": 0,
"tx_dropped": 0,
"tx_carrier_errors": 0,
"collisions": 0
}
}
From my understanding, there should be a value called “address” inside of the “addr_info” block, but that doesnt exist, so probably, thats why there is no value provided for PPPOE_GW.
Were there changes to the interfaces.py script or something, why it doesn’t work like that anymore or is that an error on my side?
Could it be an issue, that my 2nd WAN is just a VLAN interface?