Hmm, tried this but no love. From your example:
+ vpp {
+ interfaces {
+ bridge br10 {
+ member {
+ interface eth2 {
+ }
+ interface lo10 {
+ bvi
+ }
+ interface vxlan10 {
+ }
+ }
+ }
+ loopback lo10 {
+ kernel-interface "vpptun10"
+ }
+ vxlan vxlan10 {
+ remote "10.10.10.2"
+ source-address "10.10.10.1"
+ vni "10"
+ }
+ }
+ kernel-interfaces vpptun10 {
+ address "10.10.0.10/24"
+ }
+ settings {
+ interface eth1 {
+ driver "dpdk"
+ }
+ interface eth2 {
+ driver "dpdk"
+ }
+ }
+ }
Initialized commit-confirm; 5 minutes to confirm before reload
[ vpp ]
WARNING: offload option in eth1 settings is not supported by VPP interfaces. It will be ignored.
WARNING: ring-buffer option in eth1 settings is not supported by VPP interfaces. It will be ignored.
cores spun up as expected, but no connection, and dmesg spits lots of angry things:
[14522.347560] page_pool_release_retry() stalled pool shutdown 1 inflight 14499 sec (hundreds of times)
[18947.547157] IPv4: martian source 85.195.XXX.XXX (my IP) from 0.0.0.0, on dev eth1
[18947.547187] ll header: 00000000: 50 6b 4b 29 7f 7c 00 00 f7 f7 02 56 08 00
[26662.736708] mlx5_core 0000:01:00.1 eth2: Link down
[64267.182723] mlx5_core 0000:01:00.1 eth2: Link up
[76021.529987] mlx5_core 0000:01:00.1 eth2: Link down
[78457.354303] mlx5_core 0000:01:00.1 eth2: Link up[88758.804499] Initializing XFRM netlink socket
[88760.837601] mlx5_core 0000:01:00.0 eth1: Error cqe on cqn 0x22, ci 0x0, qn 0x10be, opcode 0xd, syndrome 0x4, vendor syndrome 0x51
[88760.838153] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88760.838156] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88760.838159] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88760.838161] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 be 00 00 d8 d2
[88760.838164] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64
[88760.838168] 00000000: 00 00 00 0a 00 10 be 04 00 00 00 08 00 00 00 00
[88760.838170] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 00 00
[88760.838173] 00000020: f7 f7 02 56 50 6b 4b 29 7f 7c 08 00 45 00 00 54
[88760.838176] 00000030: 00 00 00 50 00 00 22 00 00 00 00 00 6f 88 40 52
[88760.838190] mlx5_core 0000:01:00.0 eth1: ERR CQE on SQ: 0x10be
[88761.065281] ------------[ cut here ]------------
[88761.065283] WARNING: CPU: 2 PID: 65910 at drivers/iommu/dma-iommu.c:1094 iommu_dma_unmap_page+0x74/0x90
[88761.065289] Modules linked in: xfrm_user xfrm_algo uio_pci_generic uio nf_conntrack_bridge nft_flow_offload nf_flow_table_inet nf_flow_table nft_nat nft_masq af_packet nft_ct nft_chain_nat nf_nat nf_tables nfnetlink_cthelper nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink binfmt_misc intel_rapl_common crct10dif_pclmul crc32_pclmul ghash_clmulni_intel sha512_ssse3 sha256_ssse3 sha1_ssse3 aesni_intel virtio_console virtio_balloon crypto_simd cryptd iTCO_wdt pcspkr iTCO_vendor_support button evdev tcp_bbr sch_fq_codel mpls_iptunnel mpls_router ip_tunnel br_netfilter bridge stp llc vfio_pci vfio_pci_core irqbypass vfio_iommu_type1 vfio fuse efi_pstore configfs ip_tables x_tables autofs4 usb_storage ohci_hcd sd_mod squashfs lz4_decompress loop overlay ext4 crc16 mbcache jbd2 nls_cp437 vfat fat efivarfs nls_ascii mlx5_ib ib_uverbs ib_core hid_generic usbhid hid virtio_net net_failover failover ahci virtio_blk libahci libata virtio_pci virtio_pci_legacy_dev scsi_mod virtio_pci_modern_dev crc32c_intel scsi_common
[88761.065341] mlx5_core virtio ehci_pci virtio_ring i2c_i801 mlxfw pci_hyperv_intf uhci_hcd i2c_smbus lpc_ich ehci_hcd
[88761.065348] CPU: 2 PID: 65910 Comm: vpp_main Not tainted 6.6.79-vyos #1
[88761.065350] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[88761.065351] RIP: 0010:iommu_dma_unmap_page+0x74/0x90
[88761.065353] Code: 2b 48 3b 28 72 26 48 3b 68 08 73 20 4d 89 f8 44 89 f1 4c 89 ea 48 89 ee 48 89 df 5b 5d 41 5c 41 5d 41 5e 41 5f e9 2c e3 ae ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e 41 5f e9 26 4a 6c 00 66 66 2e 0f 1f
[88761.065354] RSP: 0000:ffff9800c6d0bd18 EFLAGS: 00010246
[88761.065356] RAX: 0000000000000000 RBX: ffff899880f120c0 RCX: 0000000000000000
[88761.065357] RDX: 0000000000000000 RSI: ffff8999828cd000 RDI: 0000000000000000
[88761.065358] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000001
[88761.065358] R10: ffff9800c2a9b100 R11: ffff9800c2a9b100 R12: 000000006f884052
[88761.065359] R13: 0000000000000050 R14: 0000000000000001 R15: 0000000000000000
[88761.065362] FS: 00007f7fce0e5f40(0000) GS:ffff899befd00000(0000) knlGS:0000000000000000
[88761.065363] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[88761.065364] CR2: 00007f7f84a7dff8 CR3: 00000001e15e2000 CR4: 0000000000750ee0
[88761.065366] PKRU: 55555554
[88761.065366] Call Trace:
[88761.065371] <TASK>
[88761.065372] ? iommu_dma_unmap_page+0x74/0x90
[88761.065373] ? __warn+0x78/0x110
[88761.065378] ? iommu_dma_unmap_page+0x74/0x90
[88761.065379] ? report_bug+0x159/0x180
[88761.065383] ? handle_bug+0x58/0x90
[88761.065385] ? exc_invalid_op+0x13/0x60
[88761.065387] ? asm_exc_invalid_op+0x16/0x20
[88761.065391] ? iommu_dma_unmap_page+0x74/0x90
[88761.065392] ? iommu_dma_unmap_page+0x29/0x90
[88761.065394] mlx5e_poll_tx_cq+0x151/0x510 [mlx5_core]
[88761.065433] mlx5e_napi_poll+0x7d/0x710 [mlx5_core]
[88761.065458] ? srso_alias_return_thunk+0x5/0xfbef5
[88761.065460] __napi_poll+0x23/0x1a0
[88761.065463] net_rx_action+0x141/0x2c0
[88761.065465] handle_softirqs+0xd2/0x280
[88761.065467] __irq_exit_rcu+0x68/0x90
[88761.065468] common_interrupt+0x3c/0xa0
[88761.065471] asm_common_interrupt+0x22/0x40
[88761.065472] RIP: 0033:0x7f7fcf89b473
[88761.065473] Code: 4c 8b 5c 24 30 48 83 c4 48 eb a1 66 2e 0f 1f 84 00 00 00 00 00 66 90 64 48 8b 14 25 08 00 00 00 48 8b 05 48 fc 01 00 48 39 02 <75> 16 48 8b 07 48 c1 e0 04 48 8b 04 02 48 83 f8 ff 74 05 48 03 47
[88761.065475] RSP: 002b:00007f7f86cc2dd8 EFLAGS: 00000246
[88761.065476] RAX: 0000000000000003 RBX: 00007f7f8e0e5700 RCX: 0000000000000000
[88761.065477] RDX: 00007f7fce0e68e0 RSI: 0000000000000000 RDI: 00007f7fcf87af30
[88761.065477] RBP: 00012224361f9ed0 R08: 0000000000000024 R09: 00000000000008f3
[88761.065478] R10: 0000000000000000 R11: 0000000000000000 R12: 00007f7f8ebbe4c0
[88761.065479] R13: 00007f7f8e0e5700 R14: 00012224361f9ed0 R15: 000055c43980ec40
[88761.065481] </TASK>
[88761.065481] ---[ end trace 0000000000000000 ]---
[88761.065483] mlx5_core 0000:01:00.0 eth1: Error cqe on cqn 0x22, ci 0x4, qn 0x10be, opcode 0xd, syndrome 0x4, vendor syndrome 0x51
[88761.065784] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88761.065785] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88761.065786] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88761.065786] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 be 00 00 dc d2
[88761.065787] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64
[88761.065788] 00000000: 00 00 00 0a 00 10 be 04 00 00 00 08 00 00 00 00
[88761.065789] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 00 00
[88761.065790] 00000020: f7 f7 02 56 50 6b 4b 29 7f 7c 08 00 45 00 00 28
[88761.065791] 00000030: 00 00 00 24 00 00 22 00 00 00 00 00 6f 53 f0 52
[88761.065807] mlx5_core 0000:01:00.0 eth1: ERR CQE on SQ: 0x10be
[88761.106890] mlx5_core 0000:01:00.0 eth1: Error cqe on cqn 0x1d, ci 0x0, qn 0x10b9, opcode 0xd, syndrome 0x4, vendor syndrome 0x51
[88761.107189] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88761.107192] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88761.107195] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88761.107197] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 b9 00 00 df d2
[88761.107200] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64
[88761.107203] 00000000: 00 00 00 0a 00 10 b9 04 00 00 00 08 00 00 00 00
[88761.107205] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 00 00
[88761.107207] 00000020: f7 f7 02 56 50 6b 4b 29 7f 7c 08 00 45 00 00 28
[88761.107210] 00000030: 00 00 00 2a 00 00 22 00 00 00 00 00 6f 53 e0 52
[88761.107224] mlx5_core 0000:01:00.0 eth1: ERR CQE on SQ: 0x10b9
[88762.013802] mlx5_core 0000:01:00.0 defunct_eth1: renamed from eth1
[88763.071279] mlx5_core 0000:01:00.0 defunct_eth1: Link up
[88763.076888] mlx5_core 0000:01:00.0 defunct_eth1: Error cqe on cqn 0x18, ci 0x0, qn 0x10c8, opcode 0xd, syndrome 0x4, vendor syndrome 0x51
[88763.077207] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88763.077208] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88763.077209] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88763.077210] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 c8 00 00 ae d2
[88763.077211] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64
[88763.077212] 00000000: 00 00 00 0a 00 10 c8 04 00 00 00 08 00 00 00 00
[88763.077213] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 33 33
[88763.077214] 00000020: 00 00 00 16 50 6b 4b 29 7f 7c 86 dd 60 00 00 00
[88763.077215] 00000030: 00 00 00 98 00 00 22 00 00 00 00 00 6f 53 a0 14
[88763.082685] mlx5_core 0000:01:00.0 defunct_eth1: ERR CQE on SQ: 0x10c8
[88763.083367] tun: Universal TUN/TAP device driver, 1.6
[88763.103292] infiniband mlx5_0: dump_cqe:273:(pid 65950): WC error: 6, Message: memory bind operation error
[88763.103295] cqe_dump: 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88763.103297] cqe_dump: 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88763.103297] cqe_dump: 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88763.103298] cqe_dump: 00000030: 00 00 00 00 12 00 78 06 25 00 00 93 00 00 f7 d2
[88763.103326] ------------[ cut here ]------------
[88763.103327] WARNING: CPU: 2 PID: 65910 at drivers/infiniband/hw/mlx5/umr.c:333 mlx5r_umr_post_send_wait+0x32e/0x4f0 [mlx5_ib]
[88763.103339] Modules linked in: vhost_net vhost vhost_iotlb tap tun xfrm_user xfrm_algo uio_pci_generic uio nf_conntrack_bridge nft_flow_offload nf_flow_table_inet nf_flow_table nft_nat nft_masq af_packet nft_ct nft_chain_nat nf_nat nf_tables nfnetlink_cthelper nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink binfmt_misc intel_rapl_common crct10dif_pclmul crc32_pclmul ghash_clmulni_intel sha512_ssse3 sha256_ssse3 sha1_ssse3 aesni_intel virtio_console virtio_balloon crypto_simd cryptd iTCO_wdt pcspkr iTCO_vendor_support button evdev tcp_bbr sch_fq_codel mpls_iptunnel mpls_router ip_tunnel br_netfilter bridge stp llc vfio_pci vfio_pci_core irqbypass vfio_iommu_type1 vfio fuse efi_pstore configfs ip_tables x_tables autofs4 usb_storage ohci_hcd sd_mod squashfs lz4_decompress loop overlay ext4 crc16 mbcache jbd2 nls_cp437 vfat fat efivarfs nls_ascii mlx5_ib ib_uverbs ib_core hid_generic usbhid hid virtio_net net_failover failover ahci virtio_blk libahci libata virtio_pci virtio_pci_legacy_dev scsi_mod
[88763.103381] virtio_pci_modern_dev crc32c_intel scsi_common mlx5_core virtio ehci_pci virtio_ring i2c_i801 mlxfw pci_hyperv_intf uhci_hcd i2c_smbus lpc_ich ehci_hcd
[88763.103389] CPU: 2 PID: 65910 Comm: vpp_main Tainted: G W 6.6.79-vyos #1
[88763.103391] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[88763.103391] RIP: 0010:mlx5r_umr_post_send_wait+0x32e/0x4f0 [mlx5_ib]
[88763.103398] Code: 48 89 ef e8 34 2c 98 d2 48 8d 7c 24 60 e8 6a 1b 98 d2 44 8b 4c 24 58 45 85 c9 74 6c 41 83 f9 05 0f 84 d8 fd ff ff 48 8b 1c 24 <0f> 0b 65 4c 8b 2c 25 80 df 02 00 4c 8d a3 08 05 00 00 45 8b 85 28
[88763.103400] RSP: 0018:ffff9800c6d0b978 EFLAGS: 00010202
[88763.103401] RAX: 0000000000000001 RBX: ffff899889642000 RCX: 0000000000000000
[88763.103402] RDX: 0000000000000001 RSI: 0000000055555554 RDI: ffff9800c6d0b9e0
[88763.103403] RBP: ffff899889642b28 R08: ffff899befd2f238 R09: 0000000000000006
[88763.103404] R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000080
[88763.103405] R13: ffff89988d3e6800 R14: ffff899889642b10 R15: 0000000000000000
[88763.103407] FS: 00007f7fce0e5f40(0000) GS:ffff899befd00000(0000) knlGS:0000000000000000
[88763.103408] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[88763.103409] CR2: 0000557e474645c0 CR3: 00000001e15e2000 CR4: 0000000000750ee0
[88763.103411] PKRU: 55555554
[88763.103411] Call Trace:
[88763.103413] <TASK>
[88763.103414] ? mlx5r_umr_post_send_wait+0x32e/0x4f0 [mlx5_ib]
[88763.103421] ? __warn+0x78/0x110
[88763.103425] ? mlx5r_umr_post_send_wait+0x32e/0x4f0 [mlx5_ib]
[88763.103432] ? report_bug+0x159/0x180
[88763.103436] ? handle_bug+0x58/0x90
[88763.103438] ? exc_invalid_op+0x13/0x60
[88763.103439] ? asm_exc_invalid_op+0x16/0x20
[88763.103443] ? mlx5r_umr_post_send_wait+0x32e/0x4f0 [mlx5_ib]
[88763.103449] ? mlx5r_umr_post_send_wait+0x316/0x4f0 [mlx5_ib]
[88763.103456] ? __pfx_mlx5r_umr_done+0x10/0x10 [mlx5_ib]
[88763.103462] mlx5r_umr_update_mr_pas+0x255/0x3c0 [mlx5_ib]
[88763.103470] ? __pfx_ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0x10/0x10 [ib_uverbs]
[88763.103477] create_real_mr+0x17c/0x1a0 [mlx5_ib]
[88763.103485] ? rdma_lookup_get_uobject+0x37/0x180 [ib_uverbs]
[88763.103490] ? __pfx_ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0x10/0x10 [ib_uverbs]
[88763.103495] ib_uverbs_reg_mr+0x16e/0x2a0 [ib_uverbs]
[88763.103501] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xbf/0x130 [ib_uverbs]
[88763.103507] ib_uverbs_cmd_verbs+0xbfa/0xca0 [ib_uverbs]
[88763.103512] ? srso_alias_return_thunk+0x5/0xfbef5
[88763.103514] ? __pfx_ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0x10/0x10 [ib_uverbs]
[88763.103518] ? srso_alias_return_thunk+0x5/0xfbef5
[88763.103522] ? srso_alias_return_thunk+0x5/0xfbef5
[88763.103523] ? blk_finish_plug+0x20/0x40
[88763.103525] ? srso_alias_return_thunk+0x5/0xfbef5
[88763.103527] ? do_madvise.part.0+0x561/0xc60
[88763.103530] ib_uverbs_ioctl+0x9f/0x110 [ib_uverbs]
[88763.103534] __x64_sys_ioctl+0x8b/0xc0
[88763.103537] do_syscall_64+0x34/0x80
[88763.103539] entry_SYSCALL_64_after_hwframe+0x78/0xe2
[88763.103541] RIP: 0033:0x7f7fce334d1b
[88763.103542] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1c 48 8b 44 24 18 64 48 2b 04 25 28 00 00
[88763.103544] RSP: 002b:00007f7f84a7e2d0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[88763.103545] RAX: ffffffffffffffda RBX: 00007f7f84a7e350 RCX: 00007f7fce334d1b
[88763.103546] RDX: 00007f7f84a7e370 RSI: 00000000c0181b01 RDI: 0000000000000013
[88763.103547] RBP: 0000000000000028 R08: 000055c45d130ae0 R09: 00007f7f84a7e388
[88763.103547] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f7f84a7e4b4
[88763.103548] R13: 000000000000000c R14: 00007f7f84a7e4c8 R15: 000055c45d130c20
[88763.103550] </TASK>
[88763.103550] ---[ end trace 0000000000000000 ]---
[88763.103551] infiniband mlx5_0: mlx5r_umr_post_send_wait:334:(pid 65910): reg umr failed (6). Trying to recover and resubmit the flushed WQEs
[88763.270693] mlx5_core 0000:01:00.1 defunct_eth2: renamed from eth2
[88763.407033] mlx5_core 0000:01:00.1 defunct_eth2: Link up
[88763.917606] mlx5_core 0000:01:00.0 defunct_eth1: Error cqe on cqn 0x1d, ci 0x0, qn 0x10cd, opcode 0xd, syndrome 0x4, vendor syndrome 0x51
[88763.917933] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88763.917936] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88763.917939] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88763.917941] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 cd 00 00 ab d2
[88763.917944] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64
[88763.917947] 00000000: 00 00 00 0a 00 10 cd 04 00 00 00 08 00 00 00 00
[88763.917949] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 33 33
[88763.917956] 00000020: 00 00 00 16 50 6b 4b 29 7f 7c 86 dd 60 00 00 00
[88763.917958] 00000030: 00 00 00 84 00 00 22 00 00 00 00 00 6f 4f f0 14
[88763.918276] mlx5_core 0000:01:00.0 defunct_eth1: ERR CQE on SQ: 0x10cd
[88764.001391] mlx5_core 0000:01:00.0 defunct_eth1: Error cqe on cqn 0x18, ci 0x2, qn 0x10c8, opcode 0xd, syndrome 0x4, vendor syndrome 0x51
[88764.001701] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88764.001703] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88764.001704] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88764.001704] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 c8 00 00 ac d2
[88764.001705] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64
[88764.001706] 00000000: 00 00 00 0a 00 10 c8 04 00 00 00 08 00 00 00 00
[88764.001707] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 33 33
[88764.001708] 00000020: ff 00 00 12 50 6b 4b 29 7f 7c 86 dd 60 00 00 00
[88764.001708] 00000030: 00 00 00 44 00 00 22 00 00 00 00 00 6f 53 ac 14
[88764.001743] mlx5_core 0000:01:00.0 defunct_eth1: ERR CQE on SQ: 0x10c8
[88764.008936] mlx5_core 0000:01:00.0 defunct_eth1: Error cqe on cqn 0x1d, ci 0x1, qn 0x10cd, opcode 0xd, syndrome 0x4, vendor syndrome 0x51
[88764.009245] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88764.009246] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88764.009247] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88764.009248] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 cd 00 00 aa d2
[88764.009249] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64
[88764.009250] 00000000: 00 00 00 0a 00 10 cd 04 00 00 00 08 00 00 00 00
[88764.009251] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 33 33
[88764.009251] 00000020: ff 29 7f 7c 50 6b 4b 29 7f 7c 86 dd 60 00 00 00
[88764.009252] 00000030: 00 00 00 44 00 00 22 00 00 00 00 00 6f 4f f5 94
[88764.009278] mlx5_core 0000:01:00.0 defunct_eth1: ERR CQE on SQ: 0x10cd
[88768.094171] mlx5_core 0000:01:00.0 defunct_eth1: Error cqe on cqn 0x27, ci 0x0, qn 0x10d7, opcode 0xd, syndrome 0x4, vendor syndrome 0x51
[88768.094492] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88768.094493] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88768.094494] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88768.094495] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 d7 00 00 b1 d2
[88768.094496] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64
[88768.094497] 00000000: 00 00 00 0a 00 10 d7 04 00 00 00 08 00 00 00 00
[88768.094498] 00000010: 00 00 00 00 c0 00 00 00 00 00 00 00 00 12 33 33
[88768.094499] 00000020: 00 01 00 02 50 6b 4b 29 7f 7c 86 dd 60 0b 40 53
[88768.094499] 00000030: 00 00 00 8a 00 00 22 00 00 00 00 00 6f 4f e9 1c
[88768.094509] mlx5_core 0000:01:00.0 defunct_eth1: ERR CQE on SQ: 0x10d7
[88768.158701] page_pool_release_retry() stalled pool shutdown 1 inflight 88750 sec
[88782.302244] mlx5_core 0000:01:00.0 defunct_eth1: Error cqe on cqn 0x22, ci 0x0, qn 0x10d2, opcode 0xd, syndrome 0x4, vendor syndrome 0x51
[88782.302561] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88782.302563] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88782.302564] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[88782.302564] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 d2 00 00 b4 d2
[88782.302565] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64
[88782.302567] 00000000: 00 00 00 0a 00 10 d2 04 00 00 00 08 00 00 00 00
[88782.302567] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 00 00
[88782.302568] 00000020: f7 f7 02 56 50 6b 4b 29 7f 7c 86 dd 60 00 00 00
[88782.302569] 00000030: 00 00 00 44 00 00 22 00 00 00 00 00 6b ff f0 14
[88782.302593] mlx5_core 0000:01:00.0 defunct_eth1: ERR CQE on SQ: 0x10d2
[88819.483169] page_pool_release_retry() stalled pool shutdown 206 inflight 60 sec
[88820.616111] mlx5_core 0000:01:00.0 defunct_eth1: Failed to get min RX wqes on Channel[1] RQN[0xc0004f] wq cur_sz(0) min_rx_wqes(128)
[88820.616117] mlx5_core 0000:01:00.0 defunct_eth1: RX timeout on channel: 1, ICOSQ: 0x10e7, RQ: 0xc0004f, CQ: 0x33
[88820.618402] mlx5_core 0000:01:00.0 defunct_eth1: EQ 0x8: Cons = 0x7a31d, irqn = 0x3a
[88820.640398] mlx5_core 0000:01:00.0 defunct_eth1: Failed to get min RX wqes on Channel[2] RQN[0xc00050] wq cur_sz(0) min_rx_wqes(128)
[88820.640406] mlx5_core 0000:01:00.0 defunct_eth1: RX timeout on channel: 2, ICOSQ: 0x10ec, RQ: 0xc00050, CQ: 0x38
[88820.662083] mlx5_core 0000:01:00.0 defunct_eth1: Failed to get min RX wqes on Channel[3] RQN[0xc00051] wq cur_sz(0) min_rx_wqes(128)
[88820.662090] mlx5_core 0000:01:00.0 defunct_eth1: RX timeout on channel: 3, ICOSQ: 0x10f1, RQ: 0xc00051, CQ: 0x3d
[88820.668110] mlx5_core 0000:01:00.0: free_4k:279:(pid 65878): page not found
[89121.542290] page_pool_release_retry() stalled pool shutdown 206 inflight 362 sec
[89128.615872] mlx5_core 0000:01:00.0: E-Switch: Unload vfs: mode(LEGACY), nvfs(0), necvfs(0), active vports(0)
[89128.618823] mlx5_core 0000:01:00.0: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0)
[89130.629664] page_pool_release_retry() stalled pool shutdown 1 inflight 89113 sec
[89180.889193] mlx5_core 0000:01:00.0: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0)
[89181.104693] mlx5_core 0000:01:00.0: E-Switch: cleanup
[89181.954119] page_pool_release_retry() stalled pool shutdown 206 inflight 422 sec
[89182.231397] mlx5_core 0000:01:00.0: mlx5_cmd_out_err:806:(pid 739): MANAGE_PAGES(0x108) op_mod(0x2) failed, status bad system state(0x4), syndrome (0xe8912), err(-5)
[89182.231787] mlx5_core 0000:01:00.0: reclaim_pages:558:(pid 739): failed reclaiming pages: err -5
[89182.232033] mlx5_core 0000:01:00.0: mlx5_reclaim_root_pages:698:(pid 739): reclaim_pages err (-5) func_id=0x0 ec_func=0x0
[89182.232036] ------------[ cut here ]------------
[89182.232036] FW pages counter is 28674 after reclaiming all pages
[89182.232066] WARNING: CPU: 0 PID: 739 at drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c:730 mlx5_reclaim_startup_pages+0x22a/0x290 [mlx5_core]
[89182.232108] Modules linked in: vhost_net vhost vhost_iotlb tap tun xfrm_user xfrm_algo uio_pci_generic uio nf_conntrack_bridge nft_flow_offload nf_flow_table_inet nf_flow_table nft_nat nft_masq af_packet nft_ct nft_chain_nat nf_nat nf_tables nfnetlink_cthelper nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink binfmt_misc intel_rapl_common crct10dif_pclmul crc32_pclmul ghash_clmulni_intel sha512_ssse3 sha256_ssse3 sha1_ssse3 aesni_intel virtio_console virtio_balloon crypto_simd cryptd iTCO_wdt pcspkr iTCO_vendor_support button evdev tcp_bbr sch_fq_codel mpls_iptunnel mpls_router ip_tunnel br_netfilter bridge stp llc vfio_pci vfio_pci_core irqbypass vfio_iommu_type1 vfio fuse efi_pstore configfs ip_tables x_tables autofs4 usb_storage ohci_hcd sd_mod squashfs lz4_decompress loop overlay ext4 crc16 mbcache jbd2 nls_cp437 vfat fat efivarfs nls_ascii mlx5_ib ib_uverbs ib_core hid_generic usbhid hid virtio_net net_failover failover ahci virtio_blk libahci libata virtio_pci virtio_pci_legacy_dev scsi_mod
[89182.232158] virtio_pci_modern_dev crc32c_intel scsi_common mlx5_core virtio ehci_pci virtio_ring i2c_i801 mlxfw pci_hyperv_intf uhci_hcd i2c_smbus lpc_ich ehci_hcd
[89182.232167] CPU: 0 PID: 739 Comm: python3 Tainted: G W 6.6.79-vyos #1
[89182.232169] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[89182.232170] RIP: 0010:mlx5_reclaim_startup_pages+0x22a/0x290 [mlx5_core]
[89182.232193] Code: 05 00 00 e8 c8 8e a2 d2 e9 21 ff ff ff 0f 0b 41 8b b5 b0 07 00 00 85 f6 0f 84 6c ff ff ff 48 c7 c7 c0 b0 77 c0 e8 46 a2 46 d2 <0f> 0b 41 8b b5 b8 07 00 00 85 f6 0f 84 5e ff ff ff 48 c7 c7 f8 b0
[89182.232195] RSP: 0018:ffff9800c184fca0 EFLAGS: 00010286
[89182.232196] RAX: 0000000000000000 RBX: ffff8998811f4928 RCX: 0000000000000027
[89182.232197] RDX: ffff899befc1d4c8 RSI: 0000000000000001 RDI: ffff899befc1d4c0
[89182.232198] RBP: 0000000000000000 R08: 0000000000000000 R09: ffff9800c184fb38
[89182.232199] R10: 0000000000000003 R11: ffffffff93ebab08 R12: 0000000000000000
[89182.232199] R13: ffff8998811f41a0 R14: 0000000000000000 R15: 0000000000001388
[89182.232202] FS: 00007f3b10779040(0000) GS:ffff899befc00000(0000) knlGS:0000000000000000
[89182.232203] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[89182.232204] CR2: 0000558fdcb0ad70 CR3: 000000012b078000 CR4: 0000000000750ef0
[89182.232206] PKRU: 55555554
[89182.232206] Call Trace:
[89182.232209] <TASK>
[89182.232210] ? mlx5_reclaim_startup_pages+0x22a/0x290 [mlx5_core]
[89182.232232] ? __warn+0x78/0x110
[89182.232238] ? mlx5_reclaim_startup_pages+0x22a/0x290 [mlx5_core]
[89182.232262] ? report_bug+0x159/0x180
[89182.232265] ? srso_alias_return_thunk+0x5/0xfbef5
[89182.232267] ? prb_read_valid+0x12/0x20
[89182.232271] ? handle_bug+0x58/0x90
[89182.232273] ? exc_invalid_op+0x13/0x60
[89182.232274] ? asm_exc_invalid_op+0x16/0x20
[89182.232277] ? mlx5_reclaim_startup_pages+0x22a/0x290 [mlx5_core]
[89182.232300] mlx5_function_disable+0x20/0xc0 [mlx5_core]
[89182.232321] ? srso_alias_return_thunk+0x5/0xfbef5
[89182.232322] mlx5_uninit_one+0x7e/0xf0 [mlx5_core]
[89182.232344] remove_one+0x49/0xc0 [mlx5_core]
[89182.232365] pci_device_remove+0x36/0xa0
[89182.232369] device_release_driver_internal+0x196/0x200
[89182.232373] pci_stop_bus_device+0x67/0x90
[89182.232377] pci_stop_and_remove_bus_device_locked+0x11/0x20
[89182.232378] remove_store+0x74/0x90
[89182.232381] kernfs_fop_write_iter+0x103/0x1e0
[89182.232384] vfs_write+0x1da/0x3a0
[89182.232388] ksys_write+0x5e/0xe0
[89182.232390] do_syscall_64+0x34/0x80
[89182.232392] entry_SYSCALL_64_after_hwframe+0x78/0xe2
[89182.232394] RIP: 0033:0x7f3b1087233f
[89182.232396] Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 f9 d4 f8 ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 4c d5 f8 ff 48
[89182.232397] RSP: 002b:00007fff1eaae410 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
[89182.232398] RAX: ffffffffffffffda RBX: 0000000000a860d8 RCX: 00007f3b1087233f
[89182.232399] RDX: 0000000000000001 RSI: 0000000026c1b410 RDI: 0000000000000014
[89182.232400] RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000
[89182.232401] R10: 0000000000000001 R11: 0000000000000293 R12: 00007f3b10778fc0
[89182.232401] R13: 0000000000000014 R14: 0000000000a440c0 R15: 0000000000000000
[89182.232403] </TASK>
[89182.232404] ---[ end trace 0000000000000000 ]---