diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-09 08:39:25 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-09 08:39:25 -0700 |
| commit | a55f7f5f29b32c2c53cc291899cf9b0c25a07f7c (patch) | |
| tree | 5d760fb7bbf41cc1ace3ee31368c17d8b9eba942 /tools | |
| parent | 8b02520ec5f7b0d976e8bbc072242275acd472d0 (diff) | |
| parent | b4afe3fa76a88ee7d3d8802b43fde89aa02f8e0d (diff) | |
Merge tag 'net-7.0-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni:
"Including fixes from netfilter, IPsec and wireless. This is again
considerably bigger than the old average. No known outstanding
regressions.
Current release - regressions:
- net: increase IP_TUNNEL_RECURSION_LIMIT to 5
- eth: ice: fix PTP timestamping broken by SyncE code on E825C
Current release - new code bugs:
- eth: stmmac: dwmac-motorcomm: fix eFUSE MAC address read failure
Previous releases - regressions:
- core: fix cross-cache free of KFENCE-allocated skb head
- sched: act_csum: validate nested VLAN headers
- rxrpc: fix call removal to use RCU safe deletion
- xfrm:
- wait for RCU readers during policy netns exit
- fix refcount leak in xfrm_migrate_policy_find
- wifi: rt2x00usb: fix devres lifetime
- mptcp: fix slab-use-after-free in __inet_lookup_established
- ipvs: fix NULL deref in ip_vs_add_service error path
- eth:
- airoha: fix memory leak in airoha_qdma_rx_process()
- lan966x: fix use-after-free and leak in lan966x_fdma_reload()
Previous releases - always broken:
- ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
- ipv4: nexthop: avoid duplicate NHA_HW_STATS_ENABLE on nexthop group
dump
- bridge: guard local VLAN-0 FDB helpers against NULL vlan group
- xsk: tailroom reservation and MTU validation
- rxrpc:
- fix to request an ack if window is limited
- fix RESPONSE authenticator parser OOB read
- netfilter: nft_ct: fix use-after-free in timeout object destroy
- batman-adv: hold claim backbone gateways by reference
- eth:
- stmmac: fix PTP ref clock for Tegra234
- idpf: fix PREEMPT_RT raw/bh spinlock nesting for async VC handling
- ipa: fix GENERIC_CMD register field masks for IPA v5.0+"
* tag 'net-7.0-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (104 commits)
net: lan966x: fix use-after-free and leak in lan966x_fdma_reload()
net: lan966x: fix page pool leak in error paths
net: lan966x: fix page_pool error handling in lan966x_fdma_rx_alloc_page_pool()
nfc: pn533: allocate rx skb before consuming bytes
l2tp: Drop large packets with UDP encap
net: ipa: fix event ring index not programmed for IPA v5.0+
net: ipa: fix GENERIC_CMD register field masks for IPA v5.0+
MAINTAINERS: Add Prashanth as additional maintainer for amd-xgbe driver
devlink: Fix incorrect skb socket family dumping
af_unix: read UNIX_DIAG_VFS data under unix_state_lock
Revert "mptcp: add needs_id for netlink appending addr"
mptcp: fix slab-use-after-free in __inet_lookup_established
net: txgbe: leave space for null terminators on property_entry
net: ioam6: fix OOB and missing lock
rxrpc: proc: size address buffers for %pISpc output
rxrpc: only handle RESPONSE during service challenge
rxrpc: Fix buffer overread in rxgk_do_verify_authenticator()
rxrpc: Fix leak of rxgk context in rxgk_verify_response()
rxrpc: Fix integer overflow in rxgk_verify_response()
rxrpc: Fix missing error checks for rxkad encryption/decryption failure
...
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/test_xsk.c | 55 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/test_xsk.h | 23 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/xsk.c | 19 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/xsk_xdp_progs.c | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/xskxceiver.c | 23 | ||||
| -rw-r--r-- | tools/testing/selftests/net/Makefile | 1 | ||||
| -rwxr-xr-x | tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/net/netfilter/nf_queue.c | 50 | ||||
| -rwxr-xr-x | tools/testing/selftests/net/netfilter/nft_queue.sh | 83 | ||||
| -rwxr-xr-x | tools/testing/selftests/net/srv6_iptunnel_cache.sh | 197 | ||||
| -rw-r--r-- | tools/testing/vsock/util.c | 8 |
11 files changed, 414 insertions, 50 deletions
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c index 7e38ec6e656b..7950c504ed28 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_xsk.c +++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c @@ -179,25 +179,6 @@ int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg); } -#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags" -static unsigned int get_max_skb_frags(void) -{ - unsigned int max_skb_frags = 0; - FILE *file; - - file = fopen(MAX_SKB_FRAGS_PATH, "r"); - if (!file) { - ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH); - return 0; - } - - if (fscanf(file, "%u", &max_skb_frags) != 1) - ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH); - - fclose(file); - return max_skb_frags; -} - static int set_ring_size(struct ifobject *ifobj) { int ret; @@ -1978,15 +1959,17 @@ int testapp_headroom(struct test_spec *test) int testapp_stats_rx_dropped(struct test_spec *test) { + u32 umem_tr = test->ifobj_tx->umem_tailroom; + if (test->mode == TEST_MODE_ZC) { ksft_print_msg("Can not run RX_DROPPED test for ZC mode\n"); return TEST_SKIP; } - if (pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0)) + if (pkt_stream_replace_half(test, (MIN_PKT_SIZE * 3) + umem_tr, 0)) return TEST_FAILURE; test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; + XDP_PACKET_HEADROOM - (MIN_PKT_SIZE * 2) - umem_tr; if (pkt_stream_receive_half(test)) return TEST_FAILURE; test->ifobj_rx->validation_func = validate_rx_dropped; @@ -2242,11 +2225,7 @@ int testapp_too_many_frags(struct test_spec *test) if (test->mode == TEST_MODE_ZC) { max_frags = test->ifobj_tx->xdp_zc_max_segs; } else { - max_frags = get_max_skb_frags(); - if (!max_frags) { - ksft_print_msg("Can't get MAX_SKB_FRAGS from system, using default (17)\n"); - max_frags = 17; - } + max_frags = test->ifobj_tx->max_skb_frags; max_frags += 1; } @@ -2551,16 +2530,34 @@ int testapp_adjust_tail_shrink_mb(struct test_spec *test) int testapp_adjust_tail_grow(struct test_spec *test) { + if (test->mode == TEST_MODE_SKB) + return TEST_SKIP; + /* Grow by 4 bytes for testing purpose */ return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2); } int testapp_adjust_tail_grow_mb(struct test_spec *test) { + u32 grow_size; + + if (test->mode == TEST_MODE_SKB) + return TEST_SKIP; + + /* worst case scenario is when underlying setup will work on 3k + * buffers, let us account for it; given that we will use 6k as + * pkt_len, expect that it will be broken down to 2 descs each + * with 3k payload; + * + * 4k is truesize, 3k payload, 256 HR, 320 TR; + */ + grow_size = XSK_UMEM__MAX_FRAME_SIZE - + XSK_UMEM__LARGE_FRAME_SIZE - + XDP_PACKET_HEADROOM - + test->ifobj_tx->umem_tailroom; test->mtu = MAX_ETH_JUMBO_SIZE; - /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */ - return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1, - XSK_UMEM__LARGE_FRAME_SIZE * 2); + + return testapp_adjust_tail(test, grow_size, XSK_UMEM__LARGE_FRAME_SIZE * 2); } int testapp_tx_queue_consumer(struct test_spec *test) diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.h b/tools/testing/selftests/bpf/prog_tests/test_xsk.h index 8fc78a057de0..1ab8aee4ce56 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_xsk.h +++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.h @@ -31,6 +31,9 @@ #define SOCK_RECONF_CTR 10 #define USLEEP_MAX 10000 +#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags" +#define SMP_CACHE_BYTES_PATH "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size" + extern bool opt_verbose; #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) @@ -45,6 +48,24 @@ static inline u64 ceil_u64(u64 a, u64 b) return (a + b - 1) / b; } +static inline unsigned int read_procfs_val(const char *path) +{ + unsigned int read_val = 0; + FILE *file; + + file = fopen(path, "r"); + if (!file) { + ksft_print_msg("Error opening %s\n", path); + return 0; + } + + if (fscanf(file, "%u", &read_val) != 1) + ksft_print_msg("Error reading %s\n", path); + + fclose(file); + return read_val; +} + /* Simple test */ enum test_mode { TEST_MODE_SKB, @@ -115,6 +136,8 @@ struct ifobject { int mtu; u32 bind_flags; u32 xdp_zc_max_segs; + u32 umem_tailroom; + u32 max_skb_frags; bool tx_on; bool rx_on; bool use_poll; diff --git a/tools/testing/selftests/bpf/prog_tests/xsk.c b/tools/testing/selftests/bpf/prog_tests/xsk.c index dd4c35c0e428..6e2f63ee2a6c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xsk.c +++ b/tools/testing/selftests/bpf/prog_tests/xsk.c @@ -62,6 +62,7 @@ int configure_ifobj(struct ifobject *tx, struct ifobject *rx) static void test_xsk(const struct test_spec *test_to_run, enum test_mode mode) { + u32 max_frags, umem_tailroom, cache_line_size; struct ifobject *ifobj_tx, *ifobj_rx; struct test_spec test; int ret; @@ -84,6 +85,24 @@ static void test_xsk(const struct test_spec *test_to_run, enum test_mode mode) ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending; } + cache_line_size = read_procfs_val(SMP_CACHE_BYTES_PATH); + if (!cache_line_size) + cache_line_size = 64; + + max_frags = read_procfs_val(MAX_SKB_FRAGS_PATH); + if (!max_frags) + max_frags = 17; + + ifobj_tx->max_skb_frags = max_frags; + ifobj_rx->max_skb_frags = max_frags; + + /* 48 bytes is a part of skb_shared_info w/o frags array; + * 16 bytes is sizeof(skb_frag_t) + */ + umem_tailroom = ALIGN(48 + (max_frags * 16), cache_line_size); + ifobj_tx->umem_tailroom = umem_tailroom; + ifobj_rx->umem_tailroom = umem_tailroom; + if (!ASSERT_OK(init_iface(ifobj_rx, worker_testapp_validate_rx), "init RX")) goto delete_rx; if (!ASSERT_OK(init_iface(ifobj_tx, worker_testapp_validate_tx), "init TX")) diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c index 683306db8594..023d8befd4ca 100644 --- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c +++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c @@ -26,8 +26,10 @@ SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp) SEC("xdp.frags") int xsk_xdp_drop(struct xdp_md *xdp) { + static unsigned int drop_idx; + /* Drop every other packet */ - if (idx++ % 2) + if (drop_idx++ % 2) return XDP_DROP; return bpf_redirect_map(&xsk, 0, XDP_DROP); diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 05b3cebc5ca9..7dad8556a722 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -80,6 +80,7 @@ #include <linux/mman.h> #include <linux/netdev.h> #include <linux/ethtool.h> +#include <linux/align.h> #include <arpa/inet.h> #include <net/if.h> #include <locale.h> @@ -333,6 +334,7 @@ static void print_tests(void) int main(int argc, char **argv) { const size_t total_tests = ARRAY_SIZE(tests) + ARRAY_SIZE(ci_skip_tests); + u32 cache_line_size, max_frags, umem_tailroom; struct pkt_stream *rx_pkt_stream_default; struct pkt_stream *tx_pkt_stream_default; struct ifobject *ifobj_tx, *ifobj_rx; @@ -354,6 +356,27 @@ int main(int argc, char **argv) setlocale(LC_ALL, ""); + cache_line_size = read_procfs_val(SMP_CACHE_BYTES_PATH); + if (!cache_line_size) { + ksft_print_msg("Can't get SMP_CACHE_BYTES from system, using default (64)\n"); + cache_line_size = 64; + } + + max_frags = read_procfs_val(MAX_SKB_FRAGS_PATH); + if (!max_frags) { + ksft_print_msg("Can't get MAX_SKB_FRAGS from system, using default (17)\n"); + max_frags = 17; + } + ifobj_tx->max_skb_frags = max_frags; + ifobj_rx->max_skb_frags = max_frags; + + /* 48 bytes is a part of skb_shared_info w/o frags array; + * 16 bytes is sizeof(skb_frag_t) + */ + umem_tailroom = ALIGN(48 + (max_frags * 16), cache_line_size); + ifobj_tx->umem_tailroom = umem_tailroom; + ifobj_rx->umem_tailroom = umem_tailroom; + parse_command_line(ifobj_tx, ifobj_rx, argc, argv); if (opt_print_tests) { diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 605c54c0e8a3..c709523c99c6 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -89,6 +89,7 @@ TEST_PROGS := \ srv6_end_x_next_csid_l3vpn_test.sh \ srv6_hencap_red_l3vpn_test.sh \ srv6_hl2encap_red_l2vpn_test.sh \ + srv6_iptunnel_cache.sh \ stress_reuseport_listen.sh \ tcp_fastopen_backup_key.sh \ test_bpf.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh index 72dfbeaf56b9..e8031f68200a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh @@ -414,6 +414,7 @@ vlmc_querier_intvl_test() bridge vlan add vid 10 dev br1 self pvid untagged ip link set dev $h1 master br1 ip link set dev br1 up + setup_wait_dev $h1 0 bridge vlan add vid 10 dev $h1 master bridge vlan global set vid 10 dev br1 mcast_snooping 1 mcast_querier 1 sleep 2 diff --git a/tools/testing/selftests/net/netfilter/nf_queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c index 116c0ca0eabb..8bbec37f5356 100644 --- a/tools/testing/selftests/net/netfilter/nf_queue.c +++ b/tools/testing/selftests/net/netfilter/nf_queue.c @@ -19,6 +19,8 @@ struct options { bool count_packets; bool gso_enabled; bool failopen; + bool out_of_order; + bool bogus_verdict; int verbose; unsigned int queue_num; unsigned int timeout; @@ -31,7 +33,7 @@ static struct options opts; static void help(const char *p) { - printf("Usage: %s [-c|-v [-vv] ] [-o] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); + printf("Usage: %s [-c|-v [-vv] ] [-o] [-O] [-b] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); } static int parse_attr_cb(const struct nlattr *attr, void *data) @@ -275,7 +277,9 @@ static int mainloop(void) unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE; struct mnl_socket *nl; struct nlmsghdr *nlh; + uint32_t ooo_ids[16]; unsigned int portid; + int ooo_count = 0; char *buf; int ret; @@ -308,6 +312,9 @@ static int mainloop(void) ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL); if (ret < 0) { + /* bogus verdict mode will generate ENOENT error messages */ + if (opts.bogus_verdict && errno == ENOENT) + continue; perror("mnl_cb_run"); exit(EXIT_FAILURE); } @@ -316,10 +323,35 @@ static int mainloop(void) if (opts.delay_ms) sleep_ms(opts.delay_ms); - nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict); - if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { - perror("mnl_socket_sendto"); - exit(EXIT_FAILURE); + if (opts.bogus_verdict) { + for (int i = 0; i < 50; i++) { + nlh = nfq_build_verdict(buf, id + 0x7FFFFFFF + i, + opts.queue_num, opts.verdict); + mnl_socket_sendto(nl, nlh, nlh->nlmsg_len); + } + } + + if (opts.out_of_order) { + ooo_ids[ooo_count] = id; + if (ooo_count >= 15) { + for (ooo_count; ooo_count >= 0; ooo_count--) { + nlh = nfq_build_verdict(buf, ooo_ids[ooo_count], + opts.queue_num, opts.verdict); + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + } + ooo_count = 0; + } else { + ooo_count++; + } + } else { + nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict); + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } } } @@ -332,7 +364,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "chvot:q:Q:d:G")) != -1) { + while ((c = getopt(argc, argv, "chvoObt:q:Q:d:G")) != -1) { switch (c) { case 'c': opts.count_packets = true; @@ -375,6 +407,12 @@ static void parse_opts(int argc, char **argv) case 'v': opts.verbose++; break; + case 'O': + opts.out_of_order = true; + break; + case 'b': + opts.bogus_verdict = true; + break; } } diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index ea766bdc5d04..d80390848e85 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -11,6 +11,7 @@ ret=0 timeout=5 SCTP_TEST_TIMEOUT=60 +STRESS_TEST_TIMEOUT=30 cleanup() { @@ -719,6 +720,74 @@ EOF fi } +check_tainted() +{ + local msg="$1" + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + if [ "$tainted_now" -eq 0 ];then + echo "PASS: $msg" + else + echo "TAINT: $msg" + dmesg + ret=1 + fi +} + +test_queue_stress() +{ + read tainted_then < /proc/sys/kernel/tainted + local i + + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet t { + chain forward { + type filter hook forward priority 0; policy accept; + + queue flags bypass to numgen random mod 8 + } +} +EOF + timeout "$STRESS_TEST_TIMEOUT" ip netns exec "$ns2" \ + socat -u UDP-LISTEN:12345,fork,pf=ipv4 STDOUT > /dev/null & + + timeout "$STRESS_TEST_TIMEOUT" ip netns exec "$ns3" \ + socat -u UDP-LISTEN:12345,fork,pf=ipv4 STDOUT > /dev/null & + + for i in $(seq 0 7); do + ip netns exec "$nsrouter" timeout "$STRESS_TEST_TIMEOUT" \ + ./nf_queue -q $i -t 2 -O -b > /dev/null & + done + + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + ping -q -f 10.0.2.99 > /dev/null 2>&1 & + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + ping -q -f 10.0.3.99 > /dev/null 2>&1 & + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + ping -q -f "dead:2::99" > /dev/null 2>&1 & + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + ping -q -f "dead:3::99" > /dev/null 2>&1 & + + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12345 + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" 12345 + + for i in $(seq 1 4);do + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + socat -u STDIN UDP-DATAGRAM:10.0.2.99:12345 < /dev/zero > /dev/null & + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + socat -u STDIN UDP-DATAGRAM:10.0.3.99:12345 < /dev/zero > /dev/null & + done + + wait + + check_tainted "concurrent queueing" +} + test_queue_removal() { read tainted_then < /proc/sys/kernel/tainted @@ -742,18 +811,7 @@ EOF ip netns exec "$ns1" nft flush ruleset - if [ "$tainted_then" -ne 0 ];then - return - fi - - read tainted_now < /proc/sys/kernel/tainted - if [ "$tainted_now" -eq 0 ];then - echo "PASS: queue program exiting while packets queued" - else - echo "TAINT: queue program exiting while packets queued" - dmesg - ret=1 - fi + check_tainted "queue program exiting while packets queued" } ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -799,6 +857,7 @@ test_sctp_forward test_sctp_output test_udp_nat_race test_udp_gro_ct +test_queue_stress # should be last, adds vrf device in ns1 and changes routes test_icmp_vrf diff --git a/tools/testing/selftests/net/srv6_iptunnel_cache.sh b/tools/testing/selftests/net/srv6_iptunnel_cache.sh new file mode 100755 index 000000000000..62638ab679d9 --- /dev/null +++ b/tools/testing/selftests/net/srv6_iptunnel_cache.sh @@ -0,0 +1,197 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> + +# This test verifies that the seg6 lwtunnel does not share the dst_cache +# between the input (forwarding) and output (locally generated) paths. +# +# A shared dst_cache allows a forwarded packet to populate the cache and a +# subsequent locally generated packet to silently reuse that entry, bypassing +# its own route lookup. To expose this, the SID is made reachable only for +# forwarded traffic (via an ip rule matching iif) and blackholed for everything +# else. A local ping on ns_router must always hit the blackhole; +# if it succeeds after a forwarded packet has populated the +# cache, the bug is confirmed. +# +# Both forwarded and local packets are pinned to the same CPU with taskset, +# since dst_cache is per-cpu. +# +# +# +--------------------+ +--------------------+ +# | ns_src | | ns_dst | +# | | | | +# | veth-s0 | | veth-d0 | +# | fd00::1/64 | | fd01::2/64 | +# +-------+------------+ +----------+---------+ +# | | +# | +--------------------+ | +# | | ns_router | | +# | | | | +# +------------+ veth-r0 veth-r1 +--------------+ +# | fd00::2 fd01::1 | +# +--------------------+ +# +# +# ns_router: encap (main table) +# +---------+---------------------------------------+ +# | dst | action | +# +---------+---------------------------------------+ +# | cafe::1 | encap seg6 mode encap segs fc00::100 | +# +---------+---------------------------------------+ +# +# ns_router: post-encap SID resolution +# +-------+------------+----------------------------+ +# | table | dst | action | +# +-------+------------+----------------------------+ +# | 100 | fc00::100 | via fd01::2 dev veth-r1 | +# +-------+------------+----------------------------+ +# | main | fc00::100 | blackhole | +# +-------+------------+----------------------------+ +# +# ns_router: ip rule +# +------------------+------------------------------+ +# | match | action | +# +------------------+------------------------------+ +# | iif veth-r0 | lookup 100 | +# +------------------+------------------------------+ +# +# ns_dst: SRv6 decap (main table) +# +--------------+----------------------------------+ +# | SID | action | +# +--------------+----------------------------------+ +# | fc00::100 | End.DT6 table 255 (local) | +# +--------------+----------------------------------+ + +source lib.sh + +readonly SID="fc00::100" +readonly DEST="cafe::1" + +readonly SRC_MAC="02:00:00:00:00:01" +readonly RTR_R0_MAC="02:00:00:00:00:02" +readonly RTR_R1_MAC="02:00:00:00:00:03" +readonly DST_MAC="02:00:00:00:00:04" + +cleanup() +{ + cleanup_ns "${NS_SRC}" "${NS_RTR}" "${NS_DST}" +} + +check_prerequisites() +{ + if ! command -v ip &>/dev/null; then + echo "SKIP: ip tool not found" + exit "${ksft_skip}" + fi + + if ! command -v ping &>/dev/null; then + echo "SKIP: ping not found" + exit "${ksft_skip}" + fi + + if ! command -v sysctl &>/dev/null; then + echo "SKIP: sysctl not found" + exit "${ksft_skip}" + fi + + if ! command -v taskset &>/dev/null; then + echo "SKIP: taskset not found" + exit "${ksft_skip}" + fi +} + +setup() +{ + setup_ns NS_SRC NS_RTR NS_DST + + ip link add veth-s0 netns "${NS_SRC}" type veth \ + peer name veth-r0 netns "${NS_RTR}" + ip link add veth-r1 netns "${NS_RTR}" type veth \ + peer name veth-d0 netns "${NS_DST}" + + ip -n "${NS_SRC}" link set veth-s0 address "${SRC_MAC}" + ip -n "${NS_RTR}" link set veth-r0 address "${RTR_R0_MAC}" + ip -n "${NS_RTR}" link set veth-r1 address "${RTR_R1_MAC}" + ip -n "${NS_DST}" link set veth-d0 address "${DST_MAC}" + + # ns_src + ip -n "${NS_SRC}" link set veth-s0 up + ip -n "${NS_SRC}" addr add fd00::1/64 dev veth-s0 nodad + ip -n "${NS_SRC}" -6 route add "${DEST}"/128 via fd00::2 + + # ns_router + ip -n "${NS_RTR}" link set veth-r0 up + ip -n "${NS_RTR}" addr add fd00::2/64 dev veth-r0 nodad + ip -n "${NS_RTR}" link set veth-r1 up + ip -n "${NS_RTR}" addr add fd01::1/64 dev veth-r1 nodad + ip netns exec "${NS_RTR}" sysctl -qw net.ipv6.conf.all.forwarding=1 + + ip -n "${NS_RTR}" -6 route add "${DEST}"/128 \ + encap seg6 mode encap segs "${SID}" dev veth-r0 + ip -n "${NS_RTR}" -6 route add "${SID}"/128 table 100 \ + via fd01::2 dev veth-r1 + ip -n "${NS_RTR}" -6 route add blackhole "${SID}"/128 + ip -n "${NS_RTR}" -6 rule add iif veth-r0 lookup 100 + + # ns_dst + ip -n "${NS_DST}" link set veth-d0 up + ip -n "${NS_DST}" addr add fd01::2/64 dev veth-d0 nodad + ip -n "${NS_DST}" addr add "${DEST}"/128 dev lo nodad + ip -n "${NS_DST}" -6 route add "${SID}"/128 \ + encap seg6local action End.DT6 table 255 dev veth-d0 + ip -n "${NS_DST}" -6 route add fd00::/64 via fd01::1 + + # static neighbors + ip -n "${NS_SRC}" -6 neigh add fd00::2 dev veth-s0 \ + lladdr "${RTR_R0_MAC}" nud permanent + ip -n "${NS_RTR}" -6 neigh add fd00::1 dev veth-r0 \ + lladdr "${SRC_MAC}" nud permanent + ip -n "${NS_RTR}" -6 neigh add fd01::2 dev veth-r1 \ + lladdr "${DST_MAC}" nud permanent + ip -n "${NS_DST}" -6 neigh add fd01::1 dev veth-d0 \ + lladdr "${RTR_R1_MAC}" nud permanent +} + +test_cache_isolation() +{ + RET=0 + + # local ping with empty cache: must fail (SID is blackholed) + if ip netns exec "${NS_RTR}" taskset -c 0 \ + ping -c 1 -W 2 "${DEST}" &>/dev/null; then + echo "SKIP: local ping succeeded, topology broken" + exit "${ksft_skip}" + fi + + # forward from ns_src to populate the input cache + if ! ip netns exec "${NS_SRC}" taskset -c 0 \ + ping -c 1 -W 2 "${DEST}" &>/dev/null; then + echo "SKIP: forwarded ping failed, topology broken" + exit "${ksft_skip}" + fi + + # local ping again: must still fail; if the output path reuses + # the input cache, it bypasses the blackhole and the ping succeeds + if ip netns exec "${NS_RTR}" taskset -c 0 \ + ping -c 1 -W 2 "${DEST}" &>/dev/null; then + echo "FAIL: output path used dst cached by input path" + RET="${ksft_fail}" + else + echo "PASS: output path dst_cache is independent" + fi + + return "${RET}" +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +check_prerequisites +setup +test_cache_isolation +exit "${RET}" diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 9430ef5b8bc3..1fe1338c79cd 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -344,7 +344,9 @@ void send_buf(int fd, const void *buf, size_t len, int flags, ret = send(fd, buf + nwritten, len - nwritten, flags); timeout_check("send"); - if (ret == 0 || (ret < 0 && errno != EINTR)) + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) break; nwritten += ret; @@ -396,7 +398,9 @@ void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret) ret = recv(fd, buf + nread, len - nread, flags); timeout_check("recv"); - if (ret == 0 || (ret < 0 && errno != EINTR)) + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) break; nread += ret; |
