PBR Rules not working with failover on DHCP WAN

Hello,
I am running VyOS 1.5-rolling-202407241122 with the below configuration.
I have 2 WAN interfaces, with DHCP. I have configured the failover, but after doing this, I don’t get an outbound connection from the PBR rule. So connections to those set destination ports don’t work.
Also, which is weird. From my client, I am also not able to ping 1.1.1.1, but 8.8.8.8 works just fine. My client sits in the 192.168.1.0/24 subnet. Funny enough, the traceroute works though.

set firewall global-options state-policy established action 'accept'
set firewall global-options state-policy invalid action 'drop'
set firewall global-options state-policy related action 'accept'
set firewall group interface-group LAN interface 'eth1'
set firewall group interface-group WAN interface 'eth0'
set firewall group interface-group WAN interface 'eth0.70'
set firewall group interface-group WAN interface 'eth0.70,eth0'
set firewall group network-group NET-LAN-v4 network '192.168.1.0/24'
set firewall group network-group NET-LAN-v4 network '172.27.63.0/24'
set firewall ipv4 forward filter rule 100 action 'jump'
set firewall ipv4 forward filter rule 100 destination group network-group 'NET-LAN-v4'
set firewall ipv4 forward filter rule 100 inbound-interface group 'WAN'
set firewall ipv4 forward filter rule 100 jump-target 'OUTSIDE-IN'
set firewall ipv4 input filter default-action 'drop'
set firewall ipv4 input filter rule 20 action 'jump'
set firewall ipv4 input filter rule 20 destination port '22'
set firewall ipv4 input filter rule 20 jump-target 'vyos_mgmt'
set firewall ipv4 input filter rule 20 protocol 'tcp'
set firewall ipv4 input filter rule 30 action 'accept'
set firewall ipv4 input filter rule 30 icmp type-name 'echo-request'
set firewall ipv4 input filter rule 30 protocol 'icmp'
set firewall ipv4 input filter rule 30 state 'new'
set firewall ipv4 input filter rule 40 action 'accept'
set firewall ipv4 input filter rule 40 destination port '53'
set firewall ipv4 input filter rule 40 protocol 'tcp_udp'
set firewall ipv4 input filter rule 40 source group network-group 'NET-LAN-v4'
set firewall ipv4 input filter rule 50 action 'accept'
set firewall ipv4 input filter rule 50 source address '127.0.0.0/8'
set firewall ipv4 name OUTSIDE-IN default-action 'drop'
set firewall ipv4 name vyos_mgmt default-action 'return'
set firewall ipv4 name vyos_mgmt rule 15 action 'accept'
set firewall ipv4 name vyos_mgmt rule 15 inbound-interface group 'LAN'
set firewall ipv4 name vyos_mgmt rule 20 action 'drop'
set firewall ipv4 name vyos_mgmt rule 20 inbound-interface group 'WAN'
set interfaces ethernet eth0 address 'dhcp'
set interfaces ethernet eth0 description 'WAN1'
set interfaces ethernet eth0 dhcp-options default-route-distance '10'
set interfaces ethernet eth0 hw-id '00:1b:21:38:50:3d'
set interfaces ethernet eth0 offload gro
set interfaces ethernet eth0 offload gso
set interfaces ethernet eth0 offload sg
set interfaces ethernet eth0 offload tso
set interfaces ethernet eth0 vif 70 address 'dhcp'
set interfaces ethernet eth0 vif 70 description 'WAN2'
set interfaces ethernet eth0 vif 70 dhcp-options default-route-distance '200'
set interfaces ethernet eth1 address '192.168.1.1/24'
set interfaces ethernet eth1 description 'LAN'
set interfaces ethernet eth1 hw-id '00:1b:21:38:50:3c'
set interfaces ethernet eth1 offload gro
set interfaces ethernet eth1 offload gso
set interfaces ethernet eth1 offload sg
set interfaces ethernet eth1 offload tso
set interfaces ethernet eth1 vif 2763 address '172.27.63.1/24'
set interfaces ethernet eth1 vif 2763 description 'VLAN2763-IOT'
set interfaces loopback lo
set load-balancing wan flush-connections
set load-balancing wan interface-health eth0 failure-count '1'
set load-balancing wan interface-health eth0 nexthop 'dhcp'
set load-balancing wan interface-health eth0 success-count '1'
set load-balancing wan interface-health eth0 test 0 resp-time '3'
set load-balancing wan interface-health eth0 test 0 target '8.8.8.8'
set load-balancing wan interface-health eth0 test 0 ttl-limit '1'
set load-balancing wan interface-health eth0 test 0 type 'ping'
set load-balancing wan interface-health eth0.70 failure-count '1'
set load-balancing wan interface-health eth0.70 nexthop 'dhcp'
set load-balancing wan interface-health eth0.70 success-count '1'
set load-balancing wan interface-health eth0.70 test 0 resp-time '3'
set load-balancing wan interface-health eth0.70 test 0 target '1.1.1.1'
set load-balancing wan interface-health eth0.70 test 0 ttl-limit '1'
set load-balancing wan interface-health eth0.70 test 0 type 'ping'
set load-balancing wan rule 9 destination address '172.27.63.0/24'
set load-balancing wan rule 9 exclude
set load-balancing wan rule 9 inbound-interface 'eth1'
set load-balancing wan rule 9 source address '192.168.1.0/24'
set load-balancing wan rule 10 failover
set load-balancing wan rule 10 inbound-interface 'eth1'
set load-balancing wan rule 10 interface eth0 weight '100'
set load-balancing wan rule 10 interface eth0.70 weight '10'
set load-balancing wan rule 10 protocol 'all'
set nat source rule 100 outbound-interface name 'eth0'
set nat source rule 100 source group network-group 'NET-LAN-v4'
set nat source rule 100 translation address 'masquerade'
set nat source rule 110 outbound-interface name 'eth0.70'
set nat source rule 110 source group network-group 'NET-LAN-v4'
set nat source rule 110 translation address 'masquerade'
set policy route PBR interface 'eth1'
set policy route PBR rule 1 destination port '3724,1119,6012'
set policy route PBR rule 1 protocol 'tcp_udp'
set policy route PBR rule 1 set table '11'
set protocols static route 0.0.0.0/0 dhcp-interface 'eth0'
set protocols static route 1.1.1.1/32 dhcp-interface 'eth0.70'
set protocols static route 8.8.8.8/32 dhcp-interface 'eth0'
set protocols static table 10 route 0.0.0.0/0 dhcp-interface 'eth0'
set protocols static table 11 route 0.0.0.0/0 dhcp-interface 'eth0.70'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 lease '86400'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 option default-router '172.27.63.1'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 option domain-name 'vyos.iot'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 option name-server '172.27.63.1'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 range 0 start '172.27.63.50'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 range 0 stop '172.27.63.100'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 static-mapping iRobot ip-address '172.27.63.201'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 static-mapping iRobot mac '4c:b9:ea:39:53:72'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 static-mapping shelly-bad ip-address '172.27.63.205'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 static-mapping shelly-bad mac '48:55:19:d9:4c:31'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 static-mapping shelly-plug ip-address '172.27.63.204'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 static-mapping shelly-plug mac 'b0:b2:1c:19:fd:00'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 static-mapping shelly-schlaf ip-address '172.27.63.203'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 static-mapping shelly-schlaf mac '48:55:19:d9:9e:83'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 static-mapping sonoff-aqua ip-address '172.27.63.206'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 static-mapping sonoff-aqua mac '3c:e9:0e:8a:38:50'
set service dhcp-server shared-network-name IOT subnet 172.27.63.0/24 subnet-id '2'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 lease '86400'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 option default-router '192.168.1.1'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 option domain-name 'vyos.lan'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 option name-server '192.168.1.1'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 range 0 start '192.168.1.100'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 range 0 stop '192.168.1.200'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 static-mapping docker ip-address '192.168.1.220'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 static-mapping docker mac 'ba:66:cf:44:8a:34'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 static-mapping homeass ip-address '192.168.1.115'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 static-mapping homeass mac '02:c9:8b:b9:29:5f'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 static-mapping kiwork ip-address '192.168.1.90'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 static-mapping kiwork mac 'f8:b4:6a:87:ec:46'
set service dhcp-server shared-network-name LAN subnet 192.168.1.0/24 subnet-id '1'
set service dns forwarding allow-from '192.168.1.0/24'
set service dns forwarding allow-from '172.27.63.0/24'
set service dns forwarding cache-size '0'
set service dns forwarding listen-address '192.168.1.1'
set service dns forwarding listen-address '172.27.63.1'
set service ntp allow-client address '127.0.0.0/8'
set service ntp allow-client address '169.254.0.0/16'
set service ntp allow-client address '10.0.0.0/8'
set service ntp allow-client address '172.16.0.0/12'
set service ntp allow-client address '192.168.0.0/16'
set service ntp allow-client address '::1/128'
set service ntp allow-client address 'fe80::/10'
set service ntp allow-client address 'fc00::/7'
set service ntp server time1.vyos.net
set service ntp server time2.vyos.net
set service ntp server time3.vyos.net
set service ssh listen-address '192.168.1.1'
set service ssh port '22'
set system config-management commit-revisions '100'
set system console device ttyS0 speed '115200'
set system host-name 'vyos'
set system option keyboard-layout 'de'
set system syslog global facility all level 'info'
set system syslog global facility local7 level 'debug'
set system time-zone 'Europe/Vienna'

In my experience, PBR and WLB cannot peacefully coexist, because WLB uses special routing tables that negate the PBR rules.

Damn okay. So I have to use the static failover protocol, with some janky scripts, which set the gateways after a new DHCP lease is acquired. Or do you have any other approaches, where i don’t lose my PBR functionality and have failover as well?

Nope, what you describe is exactly what I do.

Do you also have 2 WAN Interfaces with DHCP?
If so, would you mind sharing your hooks and scripts?

I found a guide here on the forum, if you have 1 DHCP interface, and found it a bit hard to account for 2 DHCP interfaces.

I have one dhcp and one pppoe wan interface, but the scripts are easily adapted to your situation.

I’ll whip up a post tomorrow.

1 Like

I’d appreciate that, thanks Marc!

Here goes.

Firstly, all credits to @giuppo77 for the scripts. I merely made changes to their initial implentation to adapt for my situation (DHCP WAN and PPPoE WAN).

My situation: eth0 is cable WAN (DHCP) and pppoe0 is fiber WAN. I want traffic to go out the fiber link and failover to the cable link. The cable link changes IP often but the fiber link is quite static.

Script /config/scripts/dhcp-client/pre-hooks.d/01-no-default-route makes sure the DHCP client does not set a default route:

RUN="yes"
SCRIPTNAME="pre-hooks.d/01-no-default-route"
LOGFILE="/tmp/01-no-default-route.log"
NOW="$(date)"

echo -e "\n--- ${NOW} --- [ ENTER ${SCRIPTNAME} ]---\n" >> ${LOGFILE}

# Use FD 19 to capture the debug stream caused by "set -x":
exec 19>>${LOGFILE}

# Tell bash about it  (there's nothing special about 19, its arbitrary)
export BASH_XTRACEFD=19

set -x
env >> ${LOGFILE}

# Setting new_routers to an empty string avoids the installation
# of the default routes and allows to properly setup failover rules.
# That applies only to eth0, DHCP WAN.
#
# See /config/scripts/setup-failover-routes.sh
# See /config/scripts/dhcp-client/post-hooks.d/01-failover
# See https://vyos.dev/T5724

if [ "$RUN" = "yes" ]; then
    if [ "$interface" = "eth0" ]; then
        case "$reason" in
            BOUND|RENEW|REBIND|REBOOT)
                export new_gw="$new_routers"
                export old_gw="$old_routers"
                new_routers=""
                ;;

            EXPIRE|FAIL|STOP)
                old_ip_address=""
                old_routers=""
                ;;
        esac
    fi
fi

set +x

Then, a post hook script in /config/scripts/dhcp-client/post-hooks.d/01-failover:

RUN="yes"
SCRIPTNAME="post-hooks.d/01-failover"
LOGFILE="/tmp/01-failover.log"
NOW="$(date)"

echo -e "\n--- ${NOW} --- [ ENTER ${SCRIPTNAME} ]---\n" >> ${LOGFILE}

# Use FD 19 to capture the debug stream caused by "set -x":
exec 19>>${LOGFILE}

# Tell bash about it  (there's nothing special about 19, its arbitrary)
export BASH_XTRACEFD=19

set -x

# Execute the script to configure the failover mechanism in case of a
# BOUND, RENEW, REBIND, REBOOT.
# That applies only to eth0, the DHCP WAN.
#
# See /config/scripts/setup-failover-routes.sh
# See /config/scripts/dhcp-client/pre-hooks.d/01-no-default-route
# See https://vyos.dev/T5724

if [ "$RUN" = "yes" ]; then
    if [ "$interface" = "eth0" ]; then
        case $reason in
            BOUND|RENEW|REBIND|REBOOT)
            sudo /config/scripts/setup-failover-routes.sh $old_gw $new_gw
            ;;
        esac
    fi
fi

set +x

echo -e "\n--- ${NOW} --- [ EXIT ${SCRIPTNAME} ]---\n" >> ${LOGFILE}

The magic happens in /config/scripts/setup-failover-routes.sh:

#!/bin/vbash

if [ "$(id -g -n)" != 'vyattacfg' ] ; then
    exec sg vyattacfg -c "/bin/vbash $(readlink -f $0) $1 $2"
fi

# Save arguments

OLD_GW="$1"
NEW_GW="$2"

source /opt/vyatta/etc/functions/script-template

SCRIPTNAME="sudo setup-failover-routes.sh"
LOGFILE="/tmp/failover.log"
DHCP_INT="eth0"
PPPOE_INT="pppoe0"
NEW_IP="$( ${vyos_op_scripts_dir}/interfaces.py show --raw --intf-name "${DHCP_INT}" | jq -r '.[].addr_info[] | select( .family == "inet" and .scope == "global" ) | .local' )"
PPPOE_IP="$( ${vyos_op_scripts_dir}/interfaces.py show --raw --intf-name "${PPPOE_INT}" | jq -r '.[].addr_info[] | select( .family == "inet" and .scope == "global" ) | .local' )"
PPPOE_GW="$( ${vyos_op_scripts_dir}/interfaces.py show --raw --intf-name "${PPPOE_INT}" | jq -r '.[].addr_info[] | select( .family == "inet" and .scope == "global" ) | .address' )"

function logit {
    local NOW="$(date)"
    echo -e "\n${NOW} [${SCRIPTNAME}] $*\n" >> ${LOGFILE}
}

# Use FD 19 to capture the debug stream caused by "set -x":
exec 19>>"${LOGFILE}"

# Tell bash about it (there's nothing special about 19, its arbitrary)
export BASH_XTRACEFD=19

logit "START"

logit "DHCP interface ${DHCP_INT}: OLD_GW=${OLD_GW}, NEW_GW=${NEW_GW}, NEW_IP=${NEW_IP}"
logit "PPPOE interface ${PPPOE_INT}: PPPOE_GW=${PPPOE_GW}, PPPOEIP=${PPPOE_IP}"

# Sanity checks, we need everything declared
if [[ -z "${OLD_GW}" ]] || [[ -z "${NEW_GW}" ]] || [[ -z "${NEW_IP}" ]] || [[ -z "${PPPOE_IP}" ]] || [[ -z "${PPPOE_GW}" ]]
then
    logit "One or more variables are not defined, aborting."
else
    configure

    logit "executing VyOS protocol failover commands"

    delete protocols failover route 0.0.0.0/0

    set protocols failover route 0.0.0.0/0 next-hop ${NEW_GW} check target '1.1.1.1'
    set protocols failover route 0.0.0.0/0 next-hop ${NEW_GW} check target '4.2.2.1'
    set protocols failover route 0.0.0.0/0 next-hop ${NEW_GW} check timeout '5'
    set protocols failover route 0.0.0.0/0 next-hop ${NEW_GW} check type 'icmp'
    set protocols failover route 0.0.0.0/0 next-hop ${NEW_GW} interface "${DHCP_INT}"
    set protocols failover route 0.0.0.0/0 next-hop ${NEW_GW} metric '254'

    set protocols failover route 0.0.0.0/0 next-hop ${PPPOE_GW} check target '1.0.0.1'
    set protocols failover route 0.0.0.0/0 next-hop ${PPPOE_GW} check target '4.2.2.2'
    set protocols failover route 0.0.0.0/0 next-hop ${PPPOE_GW} check timeout '5'
    set protocols failover route 0.0.0.0/0 next-hop ${PPPOE_GW} check type 'icmp'
    set protocols failover route 0.0.0.0/0 next-hop ${PPPOE_GW} interface "${PPPOE_INT}"
    set protocols failover route 0.0.0.0/0 next-hop ${PPPOE_GW} metric '1'

    delete protocols static route 1.1.1.1/32
    delete protocols static route 4.2.2.1/32
    delete protocols static route 1.0.0.1/32
    delete protocols static route 4.2.2.2/32
    delete protocols static route ${OLD_GW}/32

    # Set a static route for eth0's nexthop, otherwise it'll go out pppoe0 :-)
    set protocols static route ${NEW_GW}/32 interface eth0

    # Set static routes for the healthchecks
    set protocols static route 1.1.1.1/32 next-hop ${NEW_GW} interface "${DHCP_INT}"
    set protocols static route 4.2.2.1/32 next-hop ${NEW_GW} interface "${DHCP_INT}"
    # You don't really need to specify a next-hop for a pppoe interface, so we leave it out
    set protocols static route 1.0.0.1/32 interface "${PPPOE_INT}"
    set protocols static route 4.2.2.2/32 interface "${PPPOE_INT}"

    # PBR
    delete policy local-route rule 10
    delete policy local-route rule 20
    set policy local-route rule 10 set table '124'
    set policy local-route rule 10 source address "${NEW_IP}"
    set policy local-route rule 20 set table '125'
    set policy local-route rule 20 source address "${PPPOE_IP}"

    # Static routing tables for PBR
    delete protocols static table 124
    delete protocols static table 125
    # For some reason, for table 124, we need to use next-hop instead of dhcp-interface
    set protocols static table 124 description 'Route traffic through cable (eth0)'
    set protocols static table 124 route 0.0.0.0/0 next-hop "${NEW_GW}"
    set protocols static table 125 description 'Route traffic through fiber (pppoe0)'
    set protocols static table 125 route 0.0.0.0/0 interface "${PPPOE_INT}"

    commit

    logit "DONE"
fi

exit

In the original thread you can find some other neat tricks @giuppo77 used for conntrack clearing, mail notification etc.

You can adapt the scripts above to suit your situation.

Hope this helps.

2 Likes

Thank you very much! I will be testing this out in a few days.

This topic was automatically closed 2 days after the last reply. New replies are no longer allowed.