diff options
Diffstat (limited to 'tools/testing')
80 files changed, 2453 insertions, 799 deletions
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index fcd2f9bf78c9..558839e3c185 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -50,6 +50,7 @@ CONFIG_IPV6_SIT=y CONFIG_IPV6_TUNNEL=y CONFIG_KEYS=y CONFIG_LIRC=y +CONFIG_LIVEPATCH=y CONFIG_LWTUNNEL=y CONFIG_MODULE_SIG=y CONFIG_MODULE_SRCVERSION_ALL=y @@ -111,6 +112,8 @@ CONFIG_IP6_NF_FILTER=y CONFIG_NF_NAT=y CONFIG_PACKET=y CONFIG_RC_CORE=y +CONFIG_SAMPLES=y +CONFIG_SAMPLE_LIVEPATCH=m CONFIG_SECURITY=y CONFIG_SECURITYFS=y CONFIG_SYN_COOKIES=y diff --git a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c new file mode 100644 index 000000000000..72aa5376c30e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "testing_helpers.h" +#include "livepatch_trampoline.skel.h" + +static int load_livepatch(void) +{ + char path[4096]; + + /* CI will set KBUILD_OUTPUT */ + snprintf(path, sizeof(path), "%s/samples/livepatch/livepatch-sample.ko", + getenv("KBUILD_OUTPUT") ? : "../../../.."); + + return load_module(path, env_verbosity > VERBOSE_NONE); +} + +static void unload_livepatch(void) +{ + /* Disable the livepatch before unloading the module */ + system("echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled"); + + unload_module("livepatch_sample", env_verbosity > VERBOSE_NONE); +} + +static void read_proc_cmdline(void) +{ + char buf[4096]; + int fd, ret; + + fd = open("/proc/cmdline", O_RDONLY); + if (!ASSERT_OK_FD(fd, "open /proc/cmdline")) + return; + + ret = read(fd, buf, sizeof(buf)); + if (!ASSERT_GT(ret, 0, "read /proc/cmdline")) + goto out; + + ASSERT_OK(strncmp(buf, "this has been live patched", 26), "strncmp"); + +out: + close(fd); +} + +static void __test_livepatch_trampoline(bool fexit_first) +{ + struct livepatch_trampoline *skel = NULL; + int err; + + skel = livepatch_trampoline__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto out; + + skel->bss->my_pid = getpid(); + + if (!fexit_first) { + /* fentry program is loaded first by default */ + err = livepatch_trampoline__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + } else { + /* Manually load fexit program first. */ + skel->links.fexit_cmdline = bpf_program__attach(skel->progs.fexit_cmdline); + if (!ASSERT_OK_PTR(skel->links.fexit_cmdline, "attach_fexit")) + goto out; + + skel->links.fentry_cmdline = bpf_program__attach(skel->progs.fentry_cmdline); + if (!ASSERT_OK_PTR(skel->links.fentry_cmdline, "attach_fentry")) + goto out; + } + + read_proc_cmdline(); + + ASSERT_EQ(skel->bss->fentry_hit, 1, "fentry_hit"); + ASSERT_EQ(skel->bss->fexit_hit, 1, "fexit_hit"); +out: + livepatch_trampoline__destroy(skel); +} + +void test_livepatch_trampoline(void) +{ + int retry_cnt = 0; + +retry: + if (load_livepatch()) { + if (retry_cnt) { + ASSERT_OK(1, "load_livepatch"); + goto out; + } + /* + * Something else (previous run of the same test?) loaded + * the KLP module. Unload the KLP module and retry. + */ + unload_livepatch(); + retry_cnt++; + goto retry; + } + + if (test__start_subtest("fentry_first")) + __test_livepatch_trampoline(false); + + if (test__start_subtest("fexit_first")) + __test_livepatch_trampoline(true); +out: + unload_livepatch(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index f8eb7f9d4fd2..8fade8bdc451 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -6,11 +6,13 @@ #include <netinet/in.h> #include <test_progs.h> #include <unistd.h> +#include <errno.h> #include "cgroup_helpers.h" #include "network_helpers.h" #include "mptcp_sock.skel.h" #include "mptcpify.skel.h" #include "mptcp_subflow.skel.h" +#include "mptcp_sockmap.skel.h" #define NS_TEST "mptcp_ns" #define ADDR_1 "10.0.1.1" @@ -436,6 +438,142 @@ close_cgroup: close(cgroup_fd); } +/* Test sockmap on MPTCP server handling non-mp-capable clients. */ +static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel) +{ + int listen_fd = -1, client_fd1 = -1, client_fd2 = -1; + int server_fd1 = -1, server_fd2 = -1, sent, recvd; + char snd[9] = "123456789"; + char rcv[10]; + + /* start server with MPTCP enabled */ + listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server")) + return; + + skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd)); + skel->bss->sk_index = 0; + /* create client without MPTCP enabled */ + client_fd1 = connect_to_fd_opts(listen_fd, NULL); + if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd")) + goto end; + + server_fd1 = accept(listen_fd, NULL, 0); + skel->bss->sk_index = 1; + client_fd2 = connect_to_fd_opts(listen_fd, NULL); + if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd")) + goto end; + + server_fd2 = accept(listen_fd, NULL, 0); + /* test normal redirect behavior: data sent by client_fd1 can be + * received by client_fd2 + */ + skel->bss->redirect_idx = 1; + sent = send(client_fd1, snd, sizeof(snd), 0); + if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:send(client_fd1)")) + goto end; + + /* try to recv more bytes to avoid truncation check */ + recvd = recv(client_fd2, rcv, sizeof(rcv), 0); + if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)")) + goto end; + +end: + if (client_fd1 >= 0) + close(client_fd1); + if (client_fd2 >= 0) + close(client_fd2); + if (server_fd1 >= 0) + close(server_fd1); + if (server_fd2 >= 0) + close(server_fd2); + close(listen_fd); +} + +/* Test sockmap rejection of MPTCP sockets - both server and client sides. */ +static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel) +{ + int listen_fd = -1, server_fd = -1, client_fd1 = -1; + int err, zero = 0; + + /* start server with MPTCP enabled */ + listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server")) + return; + + skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd)); + skel->bss->sk_index = 0; + /* create client with MPTCP enabled */ + client_fd1 = connect_to_fd(listen_fd, 0); + if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1")) + goto end; + + /* bpf_sock_map_update() called from sockops should reject MPTCP sk */ + if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject")) + goto end; + + server_fd = accept(listen_fd, NULL, 0); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), + &zero, &server_fd, BPF_NOEXIST); + if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed")) + goto end; + + /* MPTCP client should also be disallowed */ + err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), + &zero, &client_fd1, BPF_NOEXIST); + if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed")) + goto end; +end: + if (client_fd1 >= 0) + close(client_fd1); + if (server_fd >= 0) + close(server_fd); + close(listen_fd); +} + +static void test_mptcp_sockmap(void) +{ + struct mptcp_sockmap *skel; + struct netns_obj *netns; + int cgroup_fd, err; + + cgroup_fd = test__join_cgroup("/mptcp_sockmap"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap")) + return; + + skel = mptcp_sockmap__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap")) + goto close_cgroup; + + skel->links.mptcp_sockmap_inject = + bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap")) + goto skel_destroy; + + err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect), + bpf_map__fd(skel->maps.sock_map), + BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) + goto skel_destroy; + + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap")) + goto skel_destroy; + + if (endpoint_init("subflow") < 0) + goto close_netns; + + test_sockmap_with_mptcp_fallback(skel); + test_sockmap_reject_mptcp(skel); + +close_netns: + netns_free(netns); +skel_destroy: + mptcp_sockmap__destroy(skel); +close_cgroup: + close(cgroup_fd); +} + void test_mptcp(void) { if (test__start_subtest("base")) @@ -444,4 +582,6 @@ void test_mptcp(void) test_mptcpify(); if (test__start_subtest("subflow")) test_subflow(); + if (test__start_subtest("sockmap")) + test_mptcp_sockmap(); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c new file mode 100644 index 000000000000..c9efdd2a5b18 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "stacktrace_ips.skel.h" + +#ifdef __x86_64__ +static int check_stacktrace_ips(int fd, __u32 key, int cnt, ...) +{ + __u64 ips[PERF_MAX_STACK_DEPTH]; + struct ksyms *ksyms = NULL; + int i, err = 0; + va_list args; + + /* sorted by addr */ + ksyms = load_kallsyms_local(); + if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local")) + return -1; + + /* unlikely, but... */ + if (!ASSERT_LT(cnt, PERF_MAX_STACK_DEPTH, "check_max")) + return -1; + + err = bpf_map_lookup_elem(fd, &key, ips); + if (err) + goto out; + + /* + * Compare all symbols provided via arguments with stacktrace ips, + * and their related symbol addresses.t + */ + va_start(args, cnt); + + for (i = 0; i < cnt; i++) { + unsigned long val; + struct ksym *ksym; + + val = va_arg(args, unsigned long); + ksym = ksym_search_local(ksyms, ips[i]); + if (!ASSERT_OK_PTR(ksym, "ksym_search_local")) + break; + ASSERT_EQ(ksym->addr, val, "stack_cmp"); + } + + va_end(args); + +out: + free_kallsyms_local(ksyms); + return err; +} + +static void test_stacktrace_ips_kprobe_multi(bool retprobe) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, + .retprobe = retprobe + ); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct stacktrace_ips *skel; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + skel->links.kprobe_multi_test = bpf_program__attach_kprobe_multi_opts( + skel->progs.kprobe_multi_test, + "bpf_testmod_stacktrace_test", &opts); + if (!ASSERT_OK_PTR(skel->links.kprobe_multi_test, "bpf_program__attach_kprobe_multi_opts")) + goto cleanup; + + trigger_module_test_read(1); + + load_kallsyms(); + + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4, + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + +cleanup: + stacktrace_ips__destroy(skel); +} + +static void test_stacktrace_ips_raw_tp(void) +{ + __u32 info_len = sizeof(struct bpf_prog_info); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_prog_info info = {}; + struct stacktrace_ips *skel; + __u64 bpf_prog_ksym = 0; + int err; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + skel->links.rawtp_test = bpf_program__attach_raw_tracepoint( + skel->progs.rawtp_test, + "bpf_testmod_test_read"); + if (!ASSERT_OK_PTR(skel->links.rawtp_test, "bpf_program__attach_raw_tracepoint")) + goto cleanup; + + /* get bpf program address */ + info.jited_ksyms = ptr_to_u64(&bpf_prog_ksym); + info.nr_jited_ksyms = 1; + err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.rawtp_test), + &info, &info_len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) + goto cleanup; + + trigger_module_test_read(1); + + load_kallsyms(); + + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 2, + bpf_prog_ksym, + ksym_get_addr("bpf_trace_run2")); + +cleanup: + stacktrace_ips__destroy(skel); +} + +static void __test_stacktrace_ips(void) +{ + if (test__start_subtest("kprobe_multi")) + test_stacktrace_ips_kprobe_multi(false); + if (test__start_subtest("kretprobe_multi")) + test_stacktrace_ips_kprobe_multi(true); + if (test__start_subtest("raw_tp")) + test_stacktrace_ips_raw_tp(); +} +#else +static void __test_stacktrace_ips(void) +{ + test__skip(); +} +#endif + +void test_stacktrace_ips(void) +{ + __test_stacktrace_ips(); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index 164640db3a29..b1e509b231cd 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -99,13 +99,13 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp, icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; - timer_expires = icsk->icsk_retransmit_timer.expires; + timer_expires = sp->tcp_retransmit_timer.expires; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = icsk->icsk_retransmit_timer.expires; - } else if (timer_pending(&sp->sk_timer)) { + timer_expires = sp->tcp_retransmit_timer.expires; + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { timer_active = 2; - timer_expires = sp->sk_timer.expires; + timer_expires = icsk->icsk_keepalive_timer.expires; } else { timer_active = 0; timer_expires = bpf_jiffies64(); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c index 591c703f5032..dbc7166aee91 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -99,13 +99,13 @@ static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp, icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; - timer_expires = icsk->icsk_retransmit_timer.expires; + timer_expires = sp->tcp_retransmit_timer.expires; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = icsk->icsk_retransmit_timer.expires; - } else if (timer_pending(&sp->sk_timer)) { + timer_expires = sp->tcp_retransmit_timer.expires; + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { timer_active = 2; - timer_expires = sp->sk_timer.expires; + timer_expires = icsk->icsk_keepalive_timer.expires; } else { timer_active = 0; timer_expires = bpf_jiffies64(); diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c index 05fa5ce7fc59..d00fd570255a 100644 --- a/tools/testing/selftests/bpf/progs/iters_looping.c +++ b/tools/testing/selftests/bpf/progs/iters_looping.c @@ -161,3 +161,56 @@ int simplest_loop(void *ctx) return 0; } + +__used +static void iterator_with_diff_stack_depth(int x) +{ + struct bpf_iter_num iter; + + asm volatile ( + "if r1 == 42 goto 0f;" + "*(u64 *)(r10 - 128) = 0;" + "0:" + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "1:" + /* consume next item */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto 2f;" + "goto 1b;" + "2:" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common, "r6" + ); +} + +SEC("socket") +__success +__naked int widening_stack_size_bug(void *ctx) +{ + /* + * Depending on iterator_with_diff_stack_depth() parameter value, + * subprogram stack depth is either 8 or 128 bytes. Arrange values so + * that it is 128 on a first call and 8 on a second. This triggered a + * bug in verifier's widen_imprecise_scalars() logic. + */ + asm volatile ( + "r6 = 0;" + "r1 = 0;" + "1:" + "call iterator_with_diff_stack_depth;" + "r1 = 42;" + "r6 += 1;" + "if r6 < 2 goto 1b;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} diff --git a/tools/testing/selftests/bpf/progs/livepatch_trampoline.c b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c new file mode 100644 index 000000000000..15579d5bcd91 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +int fentry_hit; +int fexit_hit; +int my_pid; + +SEC("fentry/cmdline_proc_show") +int BPF_PROG(fentry_cmdline) +{ + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + fentry_hit = 1; + return 0; +} + +SEC("fexit/cmdline_proc_show") +int BPF_PROG(fexit_cmdline) +{ + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + fexit_hit = 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/mptcp_sockmap.c b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c new file mode 100644 index 000000000000..d4eef0cbadb9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +int sk_index; +int redirect_idx; +int trace_port; +int helper_ret; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 100); +} sock_map SEC(".maps"); + +SEC("sockops") +int mptcp_sockmap_inject(struct bpf_sock_ops *skops) +{ + struct bpf_sock *sk; + + /* only accept specified connection */ + if (skops->local_port != trace_port || + skops->op != BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) + return 1; + + sk = skops->sk; + if (!sk) + return 1; + + /* update sk handler */ + helper_ret = bpf_sock_map_update(skops, &sock_map, &sk_index, BPF_NOEXIST); + + return 1; +} + +SEC("sk_skb/stream_verdict") +int mptcp_sockmap_redirect(struct __sk_buff *skb) +{ + /* redirect skb to the sk under sock_map[redirect_idx] */ + return bpf_sk_redirect_map(skb, &sock_map, redirect_idx, 0); +} diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c new file mode 100644 index 000000000000..a96c8150d7f5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, 16384); + __type(key, __u32); + __type(value, stack_trace_t); +} stackmap SEC(".maps"); + +extern bool CONFIG_UNWINDER_ORC __kconfig __weak; + +/* + * This function is here to have CONFIG_UNWINDER_ORC + * used and added to object BTF. + */ +int unused(void) +{ + return CONFIG_UNWINDER_ORC ? 0 : 1; +} + +__u32 stack_key; + +SEC("kprobe.multi") +int kprobe_multi_test(struct pt_regs *ctx) +{ + stack_key = bpf_get_stackid(ctx, &stackmap, 0); + return 0; +} + +SEC("raw_tp/bpf_testmod_test_read") +int rawtp_test(void *ctx) +{ + /* Skip ebpf program entry in the stack. */ + stack_key = bpf_get_stackid(ctx, &stackmap, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c index b4a0d0cc8ec8..3662515f0107 100644 --- a/tools/testing/selftests/bpf/progs/stream_fail.c +++ b/tools/testing/selftests/bpf/progs/stream_fail.c @@ -10,7 +10,7 @@ SEC("syscall") __failure __msg("Possibly NULL pointer passed") int stream_vprintk_null_arg(void *ctx) { - bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0, NULL); + bpf_stream_vprintk_impl(BPF_STDOUT, "", NULL, 0, NULL); return 0; } @@ -18,7 +18,7 @@ SEC("syscall") __failure __msg("R3 type=scalar expected=") int stream_vprintk_scalar_arg(void *ctx) { - bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0, NULL); + bpf_stream_vprintk_impl(BPF_STDOUT, "", (void *)46, 0, NULL); return 0; } @@ -26,7 +26,7 @@ SEC("syscall") __failure __msg("arg#1 doesn't point to a const string") int stream_vprintk_string_arg(void *ctx) { - bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0, NULL); + bpf_stream_vprintk_impl(BPF_STDOUT, ctx, NULL, 0, NULL); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c index 23217f06a3ec..663a80990f8f 100644 --- a/tools/testing/selftests/bpf/progs/task_work.c +++ b/tools/testing/selftests/bpf/progs/task_work.c @@ -66,7 +66,7 @@ int oncpu_hash_map(struct pt_regs *args) if (!work) return 0; - bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL); return 0; } @@ -80,7 +80,7 @@ int oncpu_array_map(struct pt_regs *args) work = bpf_map_lookup_elem(&arrmap, &key); if (!work) return 0; - bpf_task_work_schedule_signal(task, &work->tw, &arrmap, process_work, NULL); + bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, process_work, NULL); return 0; } @@ -102,6 +102,6 @@ int oncpu_lru_map(struct pt_regs *args) work = bpf_map_lookup_elem(&lrumap, &key); if (!work || work->data[0]) return 0; - bpf_task_work_schedule_resume(task, &work->tw, &lrumap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, &lrumap, process_work, NULL); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c index 77fe8f28facd..1270953fd092 100644 --- a/tools/testing/selftests/bpf/progs/task_work_fail.c +++ b/tools/testing/selftests/bpf/progs/task_work_fail.c @@ -53,7 +53,7 @@ int mismatch_map(struct pt_regs *args) work = bpf_map_lookup_elem(&arrmap, &key); if (!work) return 0; - bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL); return 0; } @@ -65,7 +65,7 @@ int no_map_task_work(struct pt_regs *args) struct bpf_task_work tw; task = bpf_get_current_task_btf(); - bpf_task_work_schedule_resume(task, &tw, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &tw, &hmap, process_work, NULL); return 0; } @@ -76,7 +76,7 @@ int task_work_null(struct pt_regs *args) struct task_struct *task; task = bpf_get_current_task_btf(); - bpf_task_work_schedule_resume(task, NULL, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, NULL, &hmap, process_work, NULL); return 0; } @@ -91,6 +91,6 @@ int map_null(struct pt_regs *args) work = bpf_map_lookup_elem(&arrmap, &key); if (!work) return 0; - bpf_task_work_schedule_resume(task, &work->tw, NULL, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, NULL, process_work, NULL); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c index 90fca06fff56..55e555f7f41b 100644 --- a/tools/testing/selftests/bpf/progs/task_work_stress.c +++ b/tools/testing/selftests/bpf/progs/task_work_stress.c @@ -51,8 +51,8 @@ int schedule_task_work(void *ctx) if (!work) return 0; } - err = bpf_task_work_schedule_signal(bpf_get_current_task_btf(), &work->tw, &hmap, - process_work, NULL); + err = bpf_task_work_schedule_signal_impl(bpf_get_current_task_btf(), &work->tw, &hmap, + process_work, NULL); if (err) __sync_fetch_and_add(&schedule_error, 1); else diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index 8eeebaa951f0..1669a7eeda26 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -417,6 +417,30 @@ noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d, return a + (long)b + c + d + (long)e + f + g + h + i + j + k; } +noinline void bpf_testmod_stacktrace_test(void) +{ + /* used for stacktrace test as attach function */ + asm volatile (""); +} + +noinline void bpf_testmod_stacktrace_test_3(void) +{ + bpf_testmod_stacktrace_test(); + asm volatile (""); +} + +noinline void bpf_testmod_stacktrace_test_2(void) +{ + bpf_testmod_stacktrace_test_3(); + asm volatile (""); +} + +noinline void bpf_testmod_stacktrace_test_1(void) +{ + bpf_testmod_stacktrace_test_2(); + asm volatile (""); +} + int bpf_testmod_fentry_ok; noinline ssize_t @@ -497,6 +521,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, 21, 22, 23, 24, 25, 26) != 231) goto out; + bpf_testmod_stacktrace_test_1(); + bpf_testmod_fentry_ok = 1; out: return -EIO; /* always fail */ diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore index 585ecb4d5dc4..3633c7a3ed65 100644 --- a/tools/testing/selftests/drivers/net/.gitignore +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only +gro napi_id_helper psp_responder diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 33f4816216ec..f5c71d993750 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -6,10 +6,12 @@ TEST_INCLUDES := $(wildcard lib/py/*.py) \ ../../net/lib.sh \ TEST_GEN_FILES := \ + gro \ napi_id_helper \ # end of TEST_GEN_FILES TEST_PROGS := \ + gro.py \ hds.py \ napi_id.py \ napi_threaded.py \ diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/drivers/net/gro.c index cfc39f70635d..995b492f5bcb 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/drivers/net/gro.c @@ -57,7 +57,8 @@ #include <string.h> #include <unistd.h> -#include "../kselftest.h" +#include "../../kselftest.h" +#include "../../net/lib/ksft.h" #define DPORT 8000 #define SPORT 1500 @@ -1127,6 +1128,8 @@ static void gro_receiver(void) set_timeout(rxfd); bind_packetsocket(rxfd); + ksft_ready(); + memset(correct_payload, 0, sizeof(correct_payload)); if (strcmp(testname, "data") == 0) { diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py new file mode 100755 index 000000000000..ba83713bf7b5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/gro.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +GRO (Generic Receive Offload) conformance tests. + +Validates that GRO coalescing works correctly by running the gro +binary in different configurations and checking for correct packet +coalescing behavior. + +Test cases: + - data: Data packets with same size/headers and correct seq numbers coalesce + - ack: Pure ACK packets do not coalesce + - flags: Packets with PSH, SYN, URG, RST flags do not coalesce + - tcp: Packets with incorrect checksum, non-consecutive seqno don't coalesce + - ip: Packets with different ECN, TTL, TOS, or IP options don't coalesce + - large: Packets larger than GRO_MAX_SIZE don't coalesce +""" + +import os +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import NetDrvEpEnv, KsftXfailEx +from lib.py import cmd, defer, bkg, ip +from lib.py import ksft_variants + + +def _resolve_dmac(cfg, ipver): + """ + Find the destination MAC address remote host should use to send packets + towards the local host. It may be a router / gateway address. + """ + + attr = "dmac" + ipver + # Cache the response across test cases + if hasattr(cfg, attr): + return getattr(cfg, attr) + + route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}", + json=True, host=cfg.remote)[0] + gw = route.get("gateway") + # Local L2 segment, address directly + if not gw: + setattr(cfg, attr, cfg.dev['address']) + return getattr(cfg, attr) + + # ping to make sure neighbor is resolved, + # bind to an interface, for v6 the GW is likely link local + cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote) + + neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}", + json=True, host=cfg.remote)[0] + setattr(cfg, attr, neigh['lladdr']) + return getattr(cfg, attr) + + +def _write_defer_restore(cfg, path, val, defer_undo=False): + with open(path, "r", encoding="utf-8") as fp: + orig_val = fp.read().strip() + if str(val) == orig_val: + return + with open(path, "w", encoding="utf-8") as fp: + fp.write(val) + if defer_undo: + defer(_write_defer_restore, cfg, path, orig_val) + + +def _set_mtu_restore(dev, mtu, host): + if dev['mtu'] < mtu: + ip(f"link set dev {dev['ifname']} mtu {mtu}", host=host) + defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host) + + +def _setup(cfg, test_name): + """ Setup hardware loopback mode for GRO testing. """ + + if not hasattr(cfg, "bin_remote"): + cfg.bin_local = cfg.test_dir / "gro" + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + # "large" test needs at least 4k MTU + if test_name == "large": + _set_mtu_restore(cfg.dev, 4096, None) + _set_mtu_restore(cfg.remote_dev, 4096, cfg.remote) + + flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout" + irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs" + + _write_defer_restore(cfg, flush_path, "200000", defer_undo=True) + _write_defer_restore(cfg, irq_path, "10", defer_undo=True) + + try: + # Disable TSO for local tests + cfg.require_nsim() # will raise KsftXfailEx if not running on nsim + + cmd(f"ethtool -K {cfg.ifname} gro on tso off") + cmd(f"ethtool -K {cfg.remote_ifname} gro on tso off", host=cfg.remote) + except KsftXfailEx: + pass + +def _gro_variants(): + """Generator that yields all combinations of protocol and test types.""" + + for protocol in ["ipv4", "ipv6", "ipip"]: + for test_name in ["data", "ack", "flags", "tcp", "ip", "large"]: + yield protocol, test_name + + +@ksft_variants(_gro_variants()) +def test(cfg, protocol, test_name): + """Run a single GRO test with retries.""" + + ipver = "6" if protocol[-1] == "6" else "4" + cfg.require_ipver(ipver) + + _setup(cfg, test_name) + + base_cmd_args = [ + f"--{protocol}", + f"--dmac {_resolve_dmac(cfg, ipver)}", + f"--smac {cfg.remote_dev['address']}", + f"--daddr {cfg.addr_v[ipver]}", + f"--saddr {cfg.remote_addr_v[ipver]}", + f"--test {test_name}", + "--verbose" + ] + base_args = " ".join(base_cmd_args) + + # Each test is run 6 times to deflake, because given the receive timing, + # not all packets that should coalesce will be considered in the same flow + # on every try. + max_retries = 6 + for attempt in range(max_retries): + rx_cmd = f"{cfg.bin_local} {base_args} --rx --iface {cfg.ifname}" + tx_cmd = f"{cfg.bin_remote} {base_args} --iface {cfg.remote_ifname}" + + fail_now = attempt >= max_retries - 1 + + with bkg(rx_cmd, ksft_ready=True, exit_wait=True, + fail=fail_now) as rx_proc: + cmd(tx_cmd, host=cfg.remote) + + if rx_proc.ret == 0: + return + + ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# ')) + ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# ')) + + if test_name == "large" and os.environ.get("KSFT_MACHINE_SLOW"): + ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment") + return + + ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...") + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + ksft_run(cases=[test], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore index 6942bf575497..46540468a775 100644 --- a/tools/testing/selftests/drivers/net/hw/.gitignore +++ b/tools/testing/selftests/drivers/net/hw/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only iou-zcrx ncdevmem +toeplitz diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 8133d1a0051c..7c819fdfa107 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -1,6 +1,21 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_GEN_FILES = iou-zcrx +# Check if io_uring supports zero-copy receive +HAS_IOURING_ZCRX := $(shell \ + echo -e '#include <liburing.h>\n' \ + 'void *func = (void *)io_uring_register_ifq;\n' \ + 'int main() {return 0;}' | \ + $(CC) -luring -x c - -o /dev/null 2>&1 && echo y) + +ifeq ($(HAS_IOURING_ZCRX),y) +COND_GEN_FILES += iou-zcrx +else +$(warning excluding iouring tests, liburing not installed or too old) +endif + +TEST_GEN_FILES := \ + $(COND_GEN_FILES) \ +# end of TEST_GEN_FILES TEST_PROGS = \ csum.py \ @@ -21,6 +36,7 @@ TEST_PROGS = \ rss_ctx.py \ rss_flow_label.py \ rss_input_xfrm.py \ + toeplitz.py \ tso.py \ xsk_reconfig.py \ # @@ -38,7 +54,10 @@ TEST_INCLUDES := \ # # YNL files, must be before "include ..lib.mk" -YNL_GEN_FILES := ncdevmem +YNL_GEN_FILES := \ + ncdevmem \ + toeplitz \ +# end of YNL_GEN_FILES TEST_GEN_FILES += $(YNL_GEN_FILES) TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) @@ -54,4 +73,6 @@ include ../../../net/ynl.mk include ../../../net/bpf.mk +ifeq ($(HAS_IOURING_ZCRX),y) $(OUTPUT)/iou-zcrx: LDLIBS += -luring +endif diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index fb010a48a5a1..0c61debf86fb 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -25,7 +25,7 @@ try: fd_read_timeout, ip, rand_port, wait_port_listen, wait_file from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ - ksft_setup + ksft_setup, ksft_variants, KsftNamedVariant from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none from drivers.net.lib.py import GenerateTraffic, Remote @@ -40,7 +40,7 @@ try: "wait_port_listen", "wait_file", "KsftSkipEx", "KsftFailEx", "KsftXfailEx", "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", - "ksft_setup", + "ksft_setup", "ksft_variants", "KsftNamedVariant", "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", "ksft_not_none", "ksft_not_none", diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c index 9ba03164d73a..a4d04438c313 100644 --- a/tools/testing/selftests/net/toeplitz.c +++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c @@ -52,7 +52,11 @@ #include <sys/types.h> #include <unistd.h> -#include "../kselftest.h" +#include <ynl.h> +#include "ethtool-user.h" + +#include "../../../kselftest.h" +#include "../../../net/lib/ksft.h" #define TOEPLITZ_KEY_MIN_LEN 40 #define TOEPLITZ_KEY_MAX_LEN 60 @@ -64,6 +68,7 @@ #define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2)) #define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */ +#define RSS_MAX_INDIR (1 << 16) #define RPS_MAX_CPUS 16UL /* must be a power of 2 */ @@ -101,6 +106,8 @@ struct ring_state { static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */ static int rps_silo_to_cpu[RPS_MAX_CPUS]; static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN]; +static unsigned int rss_indir_tbl[RSS_MAX_INDIR]; +static unsigned int rss_indir_tbl_size; static struct ring_state rings[RSS_MAX_CPUS]; static inline uint32_t toeplitz(const unsigned char *four_tuple, @@ -129,7 +136,12 @@ static inline uint32_t toeplitz(const unsigned char *four_tuple, /* Compare computed cpu with arrival cpu from packet_fanout_cpu */ static void verify_rss(uint32_t rx_hash, int cpu) { - int queue = rx_hash % cfg_num_queues; + int queue; + + if (rss_indir_tbl_size) + queue = rss_indir_tbl[rx_hash % rss_indir_tbl_size]; + else + queue = rx_hash % cfg_num_queues; log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]); if (rx_irq_cpus[queue] != cpu) { @@ -482,6 +494,56 @@ static void parse_rps_bitmap(const char *arg) rps_silo_to_cpu[cfg_num_rps_cpus++] = i; } +static void read_rss_dev_info_ynl(void) +{ + struct ethtool_rss_get_req *req; + struct ethtool_rss_get_rsp *rsp; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_ethtool_family, NULL); + if (!ys) + error(1, errno, "ynl_sock_create failed"); + + req = ethtool_rss_get_req_alloc(); + if (!req) + error(1, errno, "ethtool_rss_get_req_alloc failed"); + + ethtool_rss_get_req_set_header_dev_name(req, cfg_ifname); + + rsp = ethtool_rss_get(ys, req); + if (!rsp) + error(1, ys->err.code, "YNL: %s", ys->err.msg); + + if (!rsp->_len.hkey) + error(1, 0, "RSS key not available for %s", cfg_ifname); + + if (rsp->_len.hkey < TOEPLITZ_KEY_MIN_LEN || + rsp->_len.hkey > TOEPLITZ_KEY_MAX_LEN) + error(1, 0, "RSS key length %u out of bounds [%u, %u]", + rsp->_len.hkey, TOEPLITZ_KEY_MIN_LEN, + TOEPLITZ_KEY_MAX_LEN); + + memcpy(toeplitz_key, rsp->hkey, rsp->_len.hkey); + + if (rsp->_count.indir > RSS_MAX_INDIR) + error(1, 0, "RSS indirection table too large (%u > %u)", + rsp->_count.indir, RSS_MAX_INDIR); + + /* If indir table not available we'll fallback to simple modulo math */ + if (rsp->_count.indir) { + memcpy(rss_indir_tbl, rsp->indir, + rsp->_count.indir * sizeof(rss_indir_tbl[0])); + rss_indir_tbl_size = rsp->_count.indir; + + log_verbose("RSS indirection table size: %u\n", + rss_indir_tbl_size); + } + + ethtool_rss_get_rsp_free(rsp); + ethtool_rss_get_req_free(req); + ynl_sock_destroy(ys); +} + static void parse_opts(int argc, char **argv) { static struct option long_options[] = { @@ -550,7 +612,7 @@ static void parse_opts(int argc, char **argv) } if (!have_toeplitz) - error(1, 0, "Must supply rss key ('-k')"); + read_rss_dev_info_ynl(); num_cpus = get_nprocs(); if (num_cpus > RSS_MAX_CPUS) @@ -576,6 +638,10 @@ int main(int argc, char **argv) fd_sink = setup_sink(); setup_rings(); + + /* Signal to test framework that we're ready to receive */ + ksft_ready(); + process_rings(); cleanup_rings(); diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.py b/tools/testing/selftests/drivers/net/hw/toeplitz.py new file mode 100755 index 000000000000..d2db5ee9e358 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/toeplitz.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Toeplitz Rx hashing test: + - rxhash (the hash value calculation itself); + - RSS mapping from rxhash to rx queue; + - RPS mapping from rxhash to cpu. +""" + +import glob +import os +import socket +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import NetDrvEpEnv, EthtoolFamily, NetdevFamily +from lib.py import cmd, bkg, rand_port, defer +from lib.py import ksft_in +from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, KsftFailEx + +# "define" for the ID of the Toeplitz hash function +ETH_RSS_HASH_TOP = 1 + + +def _check_rps_and_rfs_not_configured(cfg): + """Verify that RPS is not already configured.""" + + for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"): + with open(rps_file, "r", encoding="utf-8") as fp: + val = fp.read().strip() + if set(val) - {"0", ","}: + raise KsftSkipEx(f"RPS already configured on {rps_file}: {val}") + + rfs_file = "/proc/sys/net/core/rps_sock_flow_entries" + with open(rfs_file, "r", encoding="utf-8") as fp: + val = fp.read().strip() + if val != "0": + raise KsftSkipEx(f"RFS already configured {rfs_file}: {val}") + + +def _get_cpu_for_irq(irq): + with open(f"/proc/irq/{irq}/smp_affinity_list", "r", + encoding="utf-8") as fp: + data = fp.read().strip() + if "," in data or "-" in data: + raise KsftFailEx(f"IRQ{irq} is not mapped to a single core: {data}") + return int(data) + + +def _get_irq_cpus(cfg): + """ + Read the list of IRQs for the device Rx queues. + """ + queues = cfg.netnl.queue_get({"ifindex": cfg.ifindex}, dump=True) + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + + # Remap into ID-based dicts + napis = {n["id"]: n for n in napis} + queues = {f"{q['type']}{q['id']}": q for q in queues} + + cpus = [] + for rx in range(9999): + name = f"rx{rx}" + if name not in queues: + break + cpus.append(_get_cpu_for_irq(napis[queues[name]["napi-id"]]["irq"])) + + return cpus + + +def _get_unused_cpus(cfg, count=2): + """ + Get CPUs that are not used by Rx queues. + Returns a list of at least 'count' CPU numbers. + """ + + # Get CPUs used by Rx queues + rx_cpus = set(_get_irq_cpus(cfg)) + + # Get total number of CPUs + num_cpus = os.cpu_count() + + # Find unused CPUs + unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus] + + if len(unused_cpus) < count: + raise KsftSkipEx(f"Need at {count} CPUs not used by Rx queues, found {len(unused_cpus)}") + + return unused_cpus[:count] + + +def _configure_rps(cfg, rps_cpus): + """Configure RPS for all Rx queues.""" + + mask = 0 + for cpu in rps_cpus: + mask |= (1 << cpu) + mask = hex(mask)[2:] + + # Set RPS bitmap for all rx queues + for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"): + with open(rps_file, "w", encoding="utf-8") as fp: + fp.write(mask) + + return mask + + +def _send_traffic(cfg, proto_flag, ipver, port): + """Send 20 packets of requested type.""" + + # Determine protocol and IP version for socat + if proto_flag == "-u": + proto = "UDP" + else: + proto = "TCP" + + baddr = f"[{cfg.addr_v['6']}]" if ipver == "6" else cfg.addr_v["4"] + + # Run socat in a loop to send traffic periodically + # Use sh -c with a loop similar to toeplitz_client.sh + socat_cmd = f""" + for i in `seq 20`; do + echo "msg $i" | socat -{ipver} -t 0.1 - {proto}:{baddr}:{port}; + sleep 0.001; + done + """ + + cmd(socat_cmd, shell=True, host=cfg.remote) + + +def _test_variants(): + for grp in ["", "rss", "rps"]: + for l4 in ["tcp", "udp"]: + for l3 in ["4", "6"]: + name = f"{l4}_ipv{l3}" + if grp: + name = f"{grp}_{name}" + yield KsftNamedVariant(name, "-" + l4[0], l3, grp) + + +@ksft_variants(_test_variants()) +def test(cfg, proto_flag, ipver, grp): + """Run a single toeplitz test.""" + + cfg.require_ipver(ipver) + + # Check that rxhash is enabled + ksft_in("receive-hashing: on", cmd(f"ethtool -k {cfg.ifname}").stdout) + + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # Make sure NIC is configured to use Toeplitz hash, and no key xfrm. + if rss.get('hfunc') != ETH_RSS_HASH_TOP or rss.get('input-xfrm'): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hfunc": ETH_RSS_HASH_TOP, + "input-xfrm": {}}) + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex}, + "hfunc": rss.get('hfunc'), + "input-xfrm": rss.get('input-xfrm', {}) + }) + + port = rand_port(socket.SOCK_DGRAM) + + toeplitz_path = cfg.test_dir / "toeplitz" + rx_cmd = [ + str(toeplitz_path), + "-" + ipver, + proto_flag, + "-d", str(port), + "-i", cfg.ifname, + "-T", "4000", + "-s", + "-v" + ] + + if grp: + _check_rps_and_rfs_not_configured(cfg) + if grp == "rss": + irq_cpus = ",".join([str(x) for x in _get_irq_cpus(cfg)]) + rx_cmd += ["-C", irq_cpus] + ksft_pr(f"RSS using CPUs: {irq_cpus}") + elif grp == "rps": + # Get CPUs not used by Rx queues and configure them for RPS + rps_cpus = _get_unused_cpus(cfg, count=2) + rps_mask = _configure_rps(cfg, rps_cpus) + defer(_configure_rps, cfg, []) + rx_cmd += ["-r", rps_mask] + ksft_pr(f"RPS using CPUs: {rps_cpus}, mask: {rps_mask}") + + # Run rx in background, it will exit once it has seen enough packets + with bkg(" ".join(rx_cmd), ksft_ready=True, exit_wait=True) as rx_proc: + while rx_proc.proc.poll() is None: + _send_traffic(cfg, proto_flag, ipver, port) + + # Check rx result + ksft_pr("Receiver output:") + ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# ')) + if rx_proc.stderr: + ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# ')) + + +def main() -> None: + """Ksft boilerplate main.""" + + with NetDrvEpEnv(__file__) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + ksft_run(cases=[test], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index b0c6300150fb..d9d035634a31 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -25,7 +25,7 @@ try: fd_read_timeout, ip, rand_port, wait_port_listen, wait_file from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ - ksft_setup + ksft_setup, ksft_variants, KsftNamedVariant from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none @@ -38,7 +38,7 @@ try: "wait_port_listen", "wait_file", "KsftSkipEx", "KsftFailEx", "KsftXfailEx", "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", - "ksft_setup", + "ksft_setup", "ksft_variants", "KsftNamedVariant", "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", "ksft_not_none", "ksft_not_none"] diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 01be3d9b9720..8b644fd84ff2 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -168,6 +168,8 @@ class NetDrvEpEnv(NetDrvEnvBase): # resolve remote interface name self.remote_ifname = self.resolve_remote_ifc() + self.remote_dev = ip("-d link show dev " + self.remote_ifname, + host=self.remote, json=True)[0] self._required_cmd = {} diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index a3446b569976..2022f3061738 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -28,8 +28,6 @@ OUTPUT_FILE="/tmp/${TARGET}" # Check for basic system dependency and exit if not found check_for_dependencies -# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) -echo "6 5" > /proc/sys/kernel/printk # Remove the namespace, interfaces and netconsole target on exit trap cleanup EXIT @@ -39,6 +37,9 @@ do for IP_VERSION in "ipv6" "ipv4" do echo "Running with target mode: ${FORMAT} (${IP_VERSION})" + # Set current loglevel to KERN_INFO(6), and default to + # KERN_NOTICE(5) + echo "6 5" > /proc/sys/kernel/printk # Create one namespace and two interfaces set_network "${IP_VERSION}" # Create a dynamic target for netconsole diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh index 29bad56448a2..06089643b771 100755 --- a/tools/testing/selftests/drivers/net/netcons_overflow.sh +++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh @@ -15,7 +15,7 @@ SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh # This is coming from netconsole code. Check for it in drivers/net/netconsole.c -MAX_USERDATA_ITEMS=16 +MAX_USERDATA_ITEMS=256 # Function to create userdata entries function create_userdata_max_entries() { diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 030762b203d7..1b529ccaf050 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -3,7 +3,8 @@ lib_dir=$(dirname $0)/../../../net/forwarding -ALL_TESTS="fw_flash_test params_test regions_test reload_test \ +ALL_TESTS="fw_flash_test params_test \ + params_default_test regions_test reload_test \ netns_reload_test resource_test dev_info_test \ empty_reporter_test dummy_reporter_test rate_test" NUM_NETIFS=0 @@ -78,17 +79,28 @@ fw_flash_test() param_get() { local name=$1 + local attr=${2:-value} + local cmode=${3:-driverinit} cmd_jq "devlink dev param show $DL_HANDLE name $name -j" \ - '.[][][].values[] | select(.cmode == "driverinit").value' + '.[][][].values[] | select(.cmode == "'"$cmode"'").'"$attr" } param_set() { local name=$1 local value=$2 + local cmode=${3:-driverinit} - devlink dev param set $DL_HANDLE name $name cmode driverinit value $value + devlink dev param set $DL_HANDLE name $name cmode $cmode value $value +} + +param_set_default() +{ + local name=$1 + local cmode=${2:-driverinit} + + devlink dev param set $DL_HANDLE name $name default cmode $cmode } check_value() @@ -97,12 +109,18 @@ check_value() local phase_name=$2 local expected_param_value=$3 local expected_debugfs_value=$4 + local cmode=${5:-driverinit} local value + local attr="value" - value=$(param_get $name) - check_err $? "Failed to get $name param value" + if [[ "$phase_name" == *"default"* ]]; then + attr="default" + fi + + value=$(param_get $name $attr $cmode) + check_err $? "Failed to get $name param $attr" [ "$value" == "$expected_param_value" ] - check_err $? "Unexpected $phase_name $name param value" + check_err $? "Unexpected $phase_name $name param $attr" value=$(<$DEBUGFS_DIR/$name) check_err $? "Failed to get $name debugfs value" [ "$value" == "$expected_debugfs_value" ] @@ -135,6 +153,92 @@ params_test() log_test "params test" } +value_to_debugfs() +{ + local value=$1 + + case "$value" in + true) + echo "Y" + ;; + false) + echo "N" + ;; + *) + echo "$value" + ;; + esac +} + +test_default() +{ + local param_name=$1 + local new_value=$2 + local expected_default=$3 + local cmode=${4:-driverinit} + local default_debugfs + local new_debugfs + local expected_debugfs + + default_debugfs=$(value_to_debugfs $expected_default) + new_debugfs=$(value_to_debugfs $new_value) + + expected_debugfs=$default_debugfs + check_value $param_name initial-default $expected_default $expected_debugfs $cmode + + param_set $param_name $new_value $cmode + check_err $? "Failed to set $param_name to $new_value" + + expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$new_debugfs" || echo "$default_debugfs") + check_value $param_name post-set $new_value $expected_debugfs $cmode + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload device" + + expected_debugfs=$new_debugfs + check_value $param_name post-reload-new-value $new_value $expected_debugfs $cmode + + param_set_default $param_name $cmode + check_err $? "Failed to set $param_name to default" + + expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$default_debugfs" || echo "$new_debugfs") + check_value $param_name post-set-default $expected_default $expected_debugfs $cmode + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload device" + + expected_debugfs=$default_debugfs + check_value $param_name post-reload-default $expected_default $expected_debugfs $cmode +} + +params_default_test() +{ + RET=0 + + if ! devlink dev param help 2>&1 | grep -q "value VALUE | default"; then + echo "SKIP: devlink cli missing default feature" + return + fi + + # Remove side effects of previous tests. Use plain param_set, because + # param_set_default is a feature under test here. + param_set max_macs 32 driverinit + check_err $? "Failed to reset max_macs to default value" + param_set test1 true driverinit + check_err $? "Failed to reset test1 to default value" + param_set test2 1234 runtime + check_err $? "Failed to reset test2 to default value" + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload device for clean state" + + test_default max_macs 16 32 driverinit + test_default test1 false true driverinit + test_default test2 100 1234 runtime + + log_test "params default test" +} + check_region_size() { local name=$1 diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index a148004e1c36..e54df158dfe9 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -12,6 +12,7 @@ from dataclasses import dataclass from enum import Enum from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr +from lib.py import KsftNamedVariant, ksft_variants from lib.py import KsftFailEx, NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import bkg, cmd, rand_port, wait_port_listen @@ -672,7 +673,18 @@ def test_xdp_native_adjst_head_shrnk_data(cfg): _validate_res(res, offset_lst, pkt_sz_lst) -def _test_xdp_native_ifc_stats(cfg, act): +@ksft_variants([ + KsftNamedVariant("pass", XDPAction.PASS), + KsftNamedVariant("drop", XDPAction.DROP), + KsftNamedVariant("tx", XDPAction.TX), +]) +def test_xdp_native_qstats(cfg, act): + """ + Send 1000 messages. Expect XDP action specified in @act. + Make sure the packets were counted to interface level qstats + (Rx, and Tx if act is TX). + """ + cfg.require_cmd("socat") bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) @@ -687,9 +699,12 @@ def _test_xdp_native_ifc_stats(cfg, act): "/dev/null" # Listener runs on "remote" in case of XDP_TX rx_host = cfg.remote if act == XDPAction.TX else None - # We want to spew 2000 packets quickly, bash seems to do a good enough job - tx_udp = f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \ - "for i in `seq 2000`; do echo a >&5; done; exec 5>&-" + # We want to spew 1000 packets quickly, bash seems to do a good enough job + # Each reopening of the socket gives us a differenot local port (for RSS) + tx_udp = "for _ in `seq 20`; do " \ + f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \ + "for i in `seq 50`; do echo a >&5; done; " \ + "exec 5>&-; done" cfg.wait_hw_stats_settle() # Qstats have more clearly defined semantics than rtnetlink. @@ -704,11 +719,11 @@ def _test_xdp_native_ifc_stats(cfg, act): cfg.wait_hw_stats_settle() after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] - ksft_ge(after['rx-packets'] - before['rx-packets'], 2000) + expected_pkts = 1000 + ksft_ge(after['rx-packets'] - before['rx-packets'], expected_pkts) if act == XDPAction.TX: - ksft_ge(after['tx-packets'] - before['tx-packets'], 2000) + ksft_ge(after['tx-packets'] - before['tx-packets'], expected_pkts) - expected_pkts = 2000 stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch") if act == XDPAction.TX: @@ -730,30 +745,6 @@ def _test_xdp_native_ifc_stats(cfg, act): ksft_ge(after['tx-packets'], before['tx-packets']) -def test_xdp_native_qstats_pass(cfg): - """ - Send 2000 messages, expect XDP_PASS, make sure the packets were counted - to interface level qstats (Rx). - """ - _test_xdp_native_ifc_stats(cfg, XDPAction.PASS) - - -def test_xdp_native_qstats_drop(cfg): - """ - Send 2000 messages, expect XDP_DROP, make sure the packets were counted - to interface level qstats (Rx). - """ - _test_xdp_native_ifc_stats(cfg, XDPAction.DROP) - - -def test_xdp_native_qstats_tx(cfg): - """ - Send 2000 messages, expect XDP_TX, make sure the packets were counted - to interface level qstats (Rx and Tx) - """ - _test_xdp_native_ifc_stats(cfg, XDPAction.TX) - - def main(): """ Main function to execute the XDP tests. @@ -778,9 +769,7 @@ def main(): test_xdp_native_adjst_tail_shrnk_data, test_xdp_native_adjst_head_grow_data, test_xdp_native_adjst_head_shrnk_data, - test_xdp_native_qstats_pass, - test_xdp_native_qstats_drop, - test_xdp_native_qstats_tx, + test_xdp_native_qstats, ], args=(cfg,)) ksft_exit() diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index c62165fabd0c..cfa16aa1f39a 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -20,6 +20,10 @@ sample_events() { echo 0 > tracing_on echo 0 > events/enable +# Clear functions caused by page cache; run sample_events twice +sample_events +sample_events + echo "Get the most frequently calling function" echo > trace sample_events diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 9e3be2ee7f1b..f917b4c4c943 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -1758,10 +1758,15 @@ int main(int argc, char *argv[]) uffd_test_ops = mem_type->mem_ops; uffd_test_case_ops = test->test_case_ops; - if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) + if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) { gopts.page_size = default_huge_page_size(); - else + if (gopts.page_size == 0) { + uffd_test_skip("huge page size is 0, feature missing?"); + continue; + } + } else { gopts.page_size = psize(); + } /* Ensure we have at least 2 pages */ gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2) @@ -1776,12 +1781,6 @@ int main(int argc, char *argv[]) continue; uffd_test_start("%s on %s", test->name, mem_type->name); - if ((mem_type->mem_flag == MEM_HUGETLB || - mem_type->mem_flag == MEM_HUGETLB_PRIVATE) && - (default_huge_page_size() == 0)) { - uffd_test_skip("huge page size is 0, feature missing?"); - continue; - } if (!uffd_feature_supported(test)) { uffd_test_skip("feature missing"); continue; diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 439101b518ee..6930fe926c58 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -4,10 +4,8 @@ bind_timewait bind_wildcard busy_poller cmsg_sender -diag_uid epoll_busy_poll fin_ack_lat -gro hwtstamp_config io_uring_zerocopy_tx ioam6_parser @@ -18,7 +16,6 @@ ipv6_flowlabel ipv6_flowlabel_mgr ipv6_fragmentation log.txt -msg_oob msg_zerocopy netlink-dumps nettest @@ -35,9 +32,6 @@ reuseport_bpf_numa reuseport_dualstack rxtimestamp sctp_hello -scm_inq -scm_pidfd -scm_rights sk_bind_sendto_listen sk_connect_zero_addr sk_so_peek_off @@ -56,7 +50,6 @@ tcp_port_share tfo timestamping tls -toeplitz tools tun txring_overwrite @@ -64,4 +57,3 @@ txtimestamp udpgso udpgso_bench_rx udpgso_bench_tx -unix_connect diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b5127e968108..b66ba04f19d9 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -38,7 +38,6 @@ TEST_PROGS := \ fq_band_pktlimit.sh \ gre_gso.sh \ gre_ipv6_lladdr.sh \ - gro.sh \ icmp.sh \ icmp_redirect.sh \ io_uring_zerocopy_tx.sh \ @@ -121,8 +120,6 @@ TEST_PROGS := \ # end of TEST_PROGS TEST_PROGS_EXTENDED := \ - toeplitz.sh \ - toeplitz_client.sh \ xfrm_policy_add_speed.sh \ # end of TEST_PROGS_EXTENDED @@ -130,7 +127,6 @@ TEST_GEN_FILES := \ bind_bhash \ cmsg_sender \ fin_ack_lat \ - gro \ hwtstamp_config \ io_uring_zerocopy_tx \ ioam6_parser \ @@ -159,7 +155,6 @@ TEST_GEN_FILES := \ tcp_mmap \ tfo \ timestamping \ - toeplitz \ txring_overwrite \ txtimestamp \ udpgso \ @@ -193,8 +188,6 @@ TEST_FILES := \ in_netns.sh \ lib.sh \ settings \ - setup_loopback.sh \ - setup_veth.sh \ # end of TEST_FILES # YNL files, must be before "include ..lib.mk" diff --git a/tools/testing/selftests/net/af_unix/.gitignore b/tools/testing/selftests/net/af_unix/.gitignore new file mode 100644 index 000000000000..240b26740c9e --- /dev/null +++ b/tools/testing/selftests/net/af_unix/.gitignore @@ -0,0 +1,8 @@ +diag_uid +msg_oob +scm_inq +scm_pidfd +scm_rights +so_peek_off +unix_connect +unix_connreset diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index de805cbbdf69..3cd677b72072 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -6,7 +6,9 @@ TEST_GEN_PROGS := \ scm_inq \ scm_pidfd \ scm_rights \ + so_peek_off \ unix_connect \ + unix_connreset \ # end of TEST_GEN_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/so_peek_off.c b/tools/testing/selftests/net/af_unix/so_peek_off.c new file mode 100644 index 000000000000..86e7b0fb522d --- /dev/null +++ b/tools/testing/selftests/net/af_unix/so_peek_off.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include <stdlib.h> +#include <unistd.h> + +#include <sys/socket.h> + +#include "../../kselftest_harness.h" + +FIXTURE(so_peek_off) +{ + int fd[2]; /* 0: sender, 1: receiver */ +}; + +FIXTURE_VARIANT(so_peek_off) +{ + int type; +}; + +FIXTURE_VARIANT_ADD(so_peek_off, stream) +{ + .type = SOCK_STREAM, +}; + +FIXTURE_VARIANT_ADD(so_peek_off, dgram) +{ + .type = SOCK_DGRAM, +}; + +FIXTURE_VARIANT_ADD(so_peek_off, seqpacket) +{ + .type = SOCK_SEQPACKET, +}; + +FIXTURE_SETUP(so_peek_off) +{ + struct timeval timeout = { + .tv_sec = 5, + .tv_usec = 0, + }; + int ret; + + ret = socketpair(AF_UNIX, variant->type, 0, self->fd); + ASSERT_EQ(0, ret); + + ret = setsockopt(self->fd[1], SOL_SOCKET, SO_RCVTIMEO_NEW, + &timeout, sizeof(timeout)); + ASSERT_EQ(0, ret); + + ret = setsockopt(self->fd[1], SOL_SOCKET, SO_PEEK_OFF, + &(int){0}, sizeof(int)); + ASSERT_EQ(0, ret); +} + +FIXTURE_TEARDOWN(so_peek_off) +{ + close_range(self->fd[0], self->fd[1], 0); +} + +#define sendeq(fd, str, flags) \ + do { \ + int bytes, len = strlen(str); \ + \ + bytes = send(fd, str, len, flags); \ + ASSERT_EQ(len, bytes); \ + } while (0) + +#define recveq(fd, str, buflen, flags) \ + do { \ + char buf[(buflen) + 1] = {}; \ + int bytes; \ + \ + bytes = recv(fd, buf, buflen, flags); \ + ASSERT_NE(-1, bytes); \ + ASSERT_STREQ(str, buf); \ + } while (0) + +#define async \ + for (pid_t pid = (pid = fork(), \ + pid < 0 ? \ + __TH_LOG("Failed to start async {}"), \ + _metadata->exit_code = KSFT_FAIL, \ + __bail(1, _metadata), \ + 0xdead : \ + pid); \ + !pid; exit(0)) + +TEST_F(so_peek_off, single_chunk) +{ + sendeq(self->fd[0], "aaaabbbb", 0); + + recveq(self->fd[1], "aaaa", 4, MSG_PEEK); + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); +} + +TEST_F(so_peek_off, two_chunks) +{ + sendeq(self->fd[0], "aaaa", 0); + sendeq(self->fd[0], "bbbb", 0); + + recveq(self->fd[1], "aaaa", 4, MSG_PEEK); + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); +} + +TEST_F(so_peek_off, two_chunks_blocking) +{ + async { + usleep(1000); + sendeq(self->fd[0], "aaaa", 0); + } + + recveq(self->fd[1], "aaaa", 4, MSG_PEEK); + + async { + usleep(1000); + sendeq(self->fd[0], "bbbb", 0); + } + + /* goto again; -> goto redo; in unix_stream_read_generic(). */ + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); +} + +TEST_F(so_peek_off, two_chunks_overlap) +{ + sendeq(self->fd[0], "aaaa", 0); + recveq(self->fd[1], "aa", 2, MSG_PEEK); + + sendeq(self->fd[0], "bbbb", 0); + + if (variant->type == SOCK_STREAM) { + /* SOCK_STREAM tries to fill the buffer. */ + recveq(self->fd[1], "aabb", 4, MSG_PEEK); + recveq(self->fd[1], "bb", 100, MSG_PEEK); + } else { + /* SOCK_DGRAM and SOCK_SEQPACKET returns at the skb boundary. */ + recveq(self->fd[1], "aa", 100, MSG_PEEK); + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); + } +} + +TEST_F(so_peek_off, two_chunks_overlap_blocking) +{ + async { + usleep(1000); + sendeq(self->fd[0], "aaaa", 0); + } + + recveq(self->fd[1], "aa", 2, MSG_PEEK); + + async { + usleep(1000); + sendeq(self->fd[0], "bbbb", 0); + } + + /* Even SOCK_STREAM does not wait if at least one byte is read. */ + recveq(self->fd[1], "aa", 100, MSG_PEEK); + + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/unix_connreset.c b/tools/testing/selftests/net/af_unix/unix_connreset.c new file mode 100644 index 000000000000..08c1de8f5a98 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/unix_connreset.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Selftest for AF_UNIX socket close and ECONNRESET behaviour. + * + * This test verifies: + * 1. SOCK_STREAM returns EOF when the peer closes normally. + * 2. SOCK_STREAM returns ECONNRESET if peer closes with unread data. + * 3. SOCK_SEQPACKET returns EOF when the peer closes normally. + * 4. SOCK_SEQPACKET returns ECONNRESET if the peer closes with unread data. + * 5. SOCK_DGRAM does not return ECONNRESET when the peer closes. + * + * These tests document the intended Linux behaviour. + * + */ + +#define _GNU_SOURCE +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <sys/socket.h> +#include <sys/un.h> +#include "../../kselftest_harness.h" + +#define SOCK_PATH "/tmp/af_unix_connreset.sock" + +static void remove_socket_file(void) +{ + unlink(SOCK_PATH); +} + +FIXTURE(unix_sock) +{ + int server; + int client; + int child; +}; + +FIXTURE_VARIANT(unix_sock) +{ + int socket_type; + const char *name; +}; + +FIXTURE_VARIANT_ADD(unix_sock, stream) { + .socket_type = SOCK_STREAM, + .name = "SOCK_STREAM", +}; + +FIXTURE_VARIANT_ADD(unix_sock, dgram) { + .socket_type = SOCK_DGRAM, + .name = "SOCK_DGRAM", +}; + +FIXTURE_VARIANT_ADD(unix_sock, seqpacket) { + .socket_type = SOCK_SEQPACKET, + .name = "SOCK_SEQPACKET", +}; + +FIXTURE_SETUP(unix_sock) +{ + struct sockaddr_un addr = {}; + int err; + + addr.sun_family = AF_UNIX; + strcpy(addr.sun_path, SOCK_PATH); + remove_socket_file(); + + self->server = socket(AF_UNIX, variant->socket_type, 0); + ASSERT_LT(-1, self->server); + + err = bind(self->server, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(0, err); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + err = listen(self->server, 1); + ASSERT_EQ(0, err); + } + + self->client = socket(AF_UNIX, variant->socket_type | SOCK_NONBLOCK, 0); + ASSERT_LT(-1, self->client); + + err = connect(self->client, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(0, err); +} + +FIXTURE_TEARDOWN(unix_sock) +{ + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) + close(self->child); + + close(self->client); + close(self->server); + remove_socket_file(); +} + +/* Test 1: peer closes normally */ +TEST_F(unix_sock, eof) +{ + char buf[16] = {}; + ssize_t n; + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + self->child = accept(self->server, NULL, NULL); + ASSERT_LT(-1, self->child); + + close(self->child); + } else { + close(self->server); + } + + n = recv(self->client, buf, sizeof(buf), 0); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + ASSERT_EQ(0, n); + } else { + ASSERT_EQ(-1, n); + ASSERT_EQ(EAGAIN, errno); + } +} + +/* Test 2: peer closes with unread data */ +TEST_F(unix_sock, reset_unread_behavior) +{ + char buf[16] = {}; + ssize_t n; + + /* Send data that will remain unread */ + send(self->client, "hello", 5, 0); + + if (variant->socket_type == SOCK_DGRAM) { + /* No real connection, just close the server */ + close(self->server); + } else { + self->child = accept(self->server, NULL, NULL); + ASSERT_LT(-1, self->child); + + /* Peer closes before client reads */ + close(self->child); + } + + n = recv(self->client, buf, sizeof(buf), 0); + ASSERT_EQ(-1, n); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + ASSERT_EQ(ECONNRESET, errno); + } else { + ASSERT_EQ(EAGAIN, errno); + } +} + +/* Test 3: closing unaccepted (embryo) server socket should reset client. */ +TEST_F(unix_sock, reset_closed_embryo) +{ + char buf[16] = {}; + ssize_t n; + + if (variant->socket_type == SOCK_DGRAM) { + snprintf(_metadata->results->reason, + sizeof(_metadata->results->reason), + "Test only applies to SOCK_STREAM and SOCK_SEQPACKET"); + exit(KSFT_XFAIL); + } + + /* Close server without accept()ing */ + close(self->server); + + n = recv(self->client, buf, sizeof(buf), 0); + + ASSERT_EQ(-1, n); + ASSERT_EQ(ECONNRESET, errno); +} + +TEST_HARNESS_MAIN + diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index a94b73a53f72..a88f797c549a 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -11,7 +11,8 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \ ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \ - ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance" + ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance \ + fib6_ra_to_static" VERBOSE=0 PAUSE_ON_FAIL=no @@ -1476,6 +1477,68 @@ ipv6_route_metrics_test() route_cleanup } +fib6_ra_to_static() +{ + setup + + echo + echo "Fib6 route promotion from RA-learned to static test" + set -e + + # ra6 is required for the test. (ipv6toolkit) + if [ ! -x "$(command -v ra6)" ]; then + echo "SKIP: ra6 not found." + set +e + cleanup &> /dev/null + return + fi + + # Create a pair of veth devices to send a RA message from one + # device to another. + $IP link add veth1 type veth peer name veth2 + $IP link set dev veth1 up + $IP link set dev veth2 up + $IP -6 address add 2001:10::1/64 dev veth1 nodad + $IP -6 address add 2001:10::2/64 dev veth2 nodad + + # Make veth1 ready to receive RA messages. + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2 + + # Send a RA message with a prefix from veth2. + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60 + + # Wait for the RA message. + sleep 1 + + # systemd may mess up the test. Make sure that + # systemd-networkd.service and systemd-networkd.socket are stopped. + check_rt_num_clean 2 $($IP -6 route list|grep expires|wc -l) || return + + # Configure static address on the same prefix + $IP -6 address add 2001:12::dead/64 dev veth1 nodad + + # On-link route won't expire anymore, default route still owned by RA + check_rt_num 1 $($IP -6 route list |grep expires|wc -l) + + # Send a second RA message with a prefix from veth2. + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60 + sleep 1 + + # Expire is not back, on-link route is still static + check_rt_num 1 $($IP -6 route list |grep expires|wc -l) + + $IP -6 address del 2001:12::dead/64 dev veth1 nodad + + # Expire is back, on-link route is now owned by RA again + check_rt_num 2 $($IP -6 route list |grep expires|wc -l) + + log_test $ret 0 "ipv6 promote RA route to static" + + set +e + + cleanup &> /dev/null +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route() @@ -2798,6 +2861,7 @@ do ipv6_mpath_list) ipv6_mpath_list_test;; ipv4_mpath_balance) ipv4_mpath_balance_test;; ipv6_mpath_balance) ipv6_mpath_balance_test;; + fib6_ra_to_static) fib6_ra_to_static;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh index ff2accccaf4d..b4eda6c6199e 100755 --- a/tools/testing/selftests/net/forwarding/lib_sh_test.sh +++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh @@ -30,6 +30,11 @@ tfail() do_test "tfail" false } +tfail2() +{ + do_test "tfail2" false +} + txfail() { FAIL_TO_XFAIL=yes do_test "txfail" false @@ -132,6 +137,8 @@ test_ret() ret_subtest $ksft_fail "tfail" txfail tfail ret_subtest $ksft_xfail "txfail" txfail txfail + + ret_subtest $ksft_fail "tfail2" tfail2 tfail } exit_status_tests_run() diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh deleted file mode 100755 index 4c5144c6f652..000000000000 --- a/tools/testing/selftests/net/gro.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -readonly SERVER_MAC="aa:00:00:00:00:02" -readonly CLIENT_MAC="aa:00:00:00:00:01" -readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large") -readonly PROTOS=("ipv4" "ipv6" "ipip") -dev="" -test="all" -proto="ipv4" - -run_test() { - local server_pid=0 - local exit_code=0 - local protocol=$1 - local test=$2 - local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \ - "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" ) - - setup_ns - # Each test is run 6 times to deflake, because given the receive timing, - # not all packets that should coalesce will be considered in the same flow - # on every try. - for tries in {1..6}; do - # Actual test starts here - ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ - 1>>log.txt & - server_pid=$! - sleep 0.5 # to allow for socket init - ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \ - 1>>log.txt - wait "${server_pid}" - exit_code=$? - if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \ - ${exit_code} -ne 0 ]]; then - echo "Ignoring errors due to slow environment" 1>&2 - exit_code=0 - fi - if [[ "${exit_code}" -eq 0 ]]; then - break; - fi - done - cleanup_ns - echo ${exit_code} -} - -run_all_tests() { - local failed_tests=() - for proto in "${PROTOS[@]}"; do - for test in "${TESTS[@]}"; do - echo "running test ${proto} ${test}" >&2 - exit_code=$(run_test $proto $test) - if [[ "${exit_code}" -ne 0 ]]; then - failed_tests+=("${proto}_${test}") - fi; - done; - done - if [[ ${#failed_tests[@]} -ne 0 ]]; then - echo "failed tests: ${failed_tests[*]}. \ - Please see log.txt for more logs" - exit 1 - else - echo "All Tests Succeeded!" - fi; -} - -usage() { - echo "Usage: $0 \ - [-i <DEV>] \ - [-t data|ack|flags|tcp|ip|large] \ - [-p <ipv4|ipv6>]" 1>&2; - exit 1; -} - -while getopts "i:t:p:" opt; do - case "${opt}" in - i) - dev="${OPTARG}" - ;; - t) - test="${OPTARG}" - ;; - p) - proto="${OPTARG}" - ;; - *) - usage - ;; - esac -done - -if [ -n "$dev" ]; then - source setup_loopback.sh -else - source setup_veth.sh -fi - -setup -trap cleanup EXIT -if [[ "${test}" == "all" ]]; then - run_all_tests -else - exit_code=$(run_test "${proto}" "${test}") - exit $exit_code -fi; diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index feba4ef69a54..f448bafb3f20 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -43,7 +43,7 @@ __ksft_status_merge() weights[$i]=$((weight++)) done - if [[ ${weights[$a]} > ${weights[$b]} ]]; then + if [[ ${weights[$a]} -ge ${weights[$b]} ]]; then echo "$a" return 0 else diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index ce795bc0a1af..5339f56329e1 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -8,6 +8,7 @@ CFLAGS += -I../../ TEST_FILES := \ ../../../../net/ynl \ ../../../../../Documentation/netlink/specs \ + ksft_setup_loopback.sh \ # end of TEST_FILES TEST_GEN_FILES := \ diff --git a/tools/testing/selftests/net/lib/ksft_setup_loopback.sh b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh new file mode 100755 index 000000000000..3defbb1919c5 --- /dev/null +++ b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Setup script for running ksft tests over a real interface in loopback mode. +# This scripts replaces the historical setup_loopback.sh. It puts +# a (presumably) real hardware interface into loopback mode, creates macvlan +# interfaces on top and places them in a network namespace for isolation. +# +# NETIF env variable must be exported to indicate the real target device. +# Note that the test will override NETIF with one of the macvlans, the +# actual ksft test will only see the macvlans. +# +# Example use: +# export NETIF=eth0 +# ./net/lib/ksft_setup_loopback.sh ./drivers/net/gro.py + +if [ -z "$NETIF" ]; then + echo "Error: NETIF variable not set" + exit 1 +fi +if ! [ -d "/sys/class/net/$NETIF" ]; then + echo "Error: Can't find $NETIF, invalid netdevice" + exit 1 +fi + +# Save original settings for cleanup +readonly FLUSH_PATH="/sys/class/net/${NETIF}/gro_flush_timeout" +readonly IRQ_PATH="/sys/class/net/${NETIF}/napi_defer_hard_irqs" +FLUSH_TIMEOUT="$(< "${FLUSH_PATH}")" +readonly FLUSH_TIMEOUT +HARD_IRQS="$(< "${IRQ_PATH}")" +readonly HARD_IRQS + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +readonly SERVER_NS +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +readonly CLIENT_NS +readonly SERVER_MAC="aa:00:00:00:00:02" +readonly CLIENT_MAC="aa:00:00:00:00:01" + +# ksft expects addresses to communicate with remote +export LOCAL_V6=2001:db8:1::1 +export REMOTE_V6=2001:db8:1::2 + +cleanup() { + local exit_code=$? + + echo "Cleaning up..." + + # Remove macvlan interfaces and namespaces + ip -netns "${SERVER_NS}" link del dev server 2>/dev/null || true + ip netns del "${SERVER_NS}" 2>/dev/null || true + ip -netns "${CLIENT_NS}" link del dev client 2>/dev/null || true + ip netns del "${CLIENT_NS}" 2>/dev/null || true + + # Disable loopback + ethtool -K "${NETIF}" loopback off 2>/dev/null || true + sleep 1 + + echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}" + echo "${HARD_IRQS}" >"${IRQ_PATH}" + + exit $exit_code +} + +trap cleanup EXIT INT TERM + +# Enable loopback mode +echo "Enabling loopback on ${NETIF}..." +ethtool -K "${NETIF}" loopback on || { + echo "Failed to enable loopback mode" + exit 1 +} +# The interface may need time to get carrier back, but selftests +# will wait for carrier, so no need to wait / sleep here. + +# Use timer on host to trigger the network stack +# Also disable device interrupt to not depend on NIC interrupt +# Reduce test flakiness caused by unexpected interrupts +echo 100000 >"${FLUSH_PATH}" +echo 50 >"${IRQ_PATH}" + +# Create server namespace with macvlan +ip netns add "${SERVER_NS}" +ip link add link "${NETIF}" dev server address "${SERVER_MAC}" type macvlan +ip link set dev server netns "${SERVER_NS}" +ip -netns "${SERVER_NS}" link set dev server up +ip -netns "${SERVER_NS}" addr add $LOCAL_V6/64 dev server +ip -netns "${SERVER_NS}" link set dev lo up + +# Create client namespace with macvlan +ip netns add "${CLIENT_NS}" +ip link add link "${NETIF}" dev client address "${CLIENT_MAC}" type macvlan +ip link set dev client netns "${CLIENT_NS}" +ip -netns "${CLIENT_NS}" link set dev client up +ip -netns "${CLIENT_NS}" addr add $REMOTE_V6/64 dev client +ip -netns "${CLIENT_NS}" link set dev lo up + +echo "Setup complete!" +echo " Device: ${NETIF}" +echo " Server NS: ${SERVER_NS}" +echo " Client NS: ${CLIENT_NS}" +echo "" + +# Setup environment variables for tests +export NETIF=server +export REMOTE_TYPE=netns +export REMOTE_ARGS="${CLIENT_NS}" + +# Run the command +ip netns exec "${SERVER_NS}" "$@" diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 97b7cf2b20eb..40f9ce307dd1 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -8,7 +8,8 @@ from .consts import KSRC from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \ ksft_ne, ksft_true, ksft_not_none, ksft_in, ksft_not_in, ksft_is, \ ksft_ge, ksft_gt, ksft_lt, ksft_raises, ksft_busy_wait, \ - ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit + ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit, \ + ksft_variants, KsftNamedVariant from .netns import NetNS, NetNSEnter from .nsim import NetdevSim, NetdevSimDev from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \ @@ -21,7 +22,7 @@ __all__ = ["KSRC", "ksft_ne", "ksft_true", "ksft_not_none", "ksft_in", "ksft_not_in", "ksft_is", "ksft_ge", "ksft_gt", "ksft_lt", "ksft_raises", "ksft_busy_wait", "ktap_result", "ksft_disruptive", "ksft_setup", - "ksft_run", "ksft_exit", + "ksft_run", "ksft_exit", "ksft_variants", "KsftNamedVariant", "NetNS", "NetNSEnter", "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer", "bpftool", "ip", "ethtool", "bpftrace", "rand_port", diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 83b1574f7719..ebd82940ee50 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -1,12 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 -import builtins import functools import inspect import signal import sys import time import traceback +from collections import namedtuple from .consts import KSFT_MAIN_NAME from .utils import global_defer_queue @@ -136,7 +136,7 @@ def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""): time.sleep(sleep) -def ktap_result(ok, cnt=1, case="", comment=""): +def ktap_result(ok, cnt=1, case_name="", comment=""): global KSFT_RESULT_ALL KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok @@ -146,8 +146,8 @@ def ktap_result(ok, cnt=1, case="", comment=""): res += "ok " res += str(cnt) + " " res += KSFT_MAIN_NAME - if case: - res += "." + str(case.__name__) + if case_name: + res += "." + case_name if comment: res += " # " + comment print(res, flush=True) @@ -163,7 +163,7 @@ def ksft_flush_defer(): entry = global_defer_queue.pop() try: entry.exec_only() - except: + except BaseException: ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!") tb = traceback.format_exc() for line in tb.strip().split('\n'): @@ -171,6 +171,10 @@ def ksft_flush_defer(): KSFT_RESULT = False +KsftCaseFunction = namedtuple("KsftCaseFunction", + ['name', 'original_func', 'variants']) + + def ksft_disruptive(func): """ Decorator that marks the test as disruptive (e.g. the test @@ -181,11 +185,47 @@ def ksft_disruptive(func): @functools.wraps(func) def wrapper(*args, **kwargs): if not KSFT_DISRUPTIVE: - raise KsftSkipEx(f"marked as disruptive") + raise KsftSkipEx("marked as disruptive") return func(*args, **kwargs) return wrapper +class KsftNamedVariant: + """ Named string name + argument list tuple for @ksft_variants """ + + def __init__(self, name, *params): + self.params = params + self.name = name or "_".join([str(x) for x in self.params]) + + +def ksft_variants(params): + """ + Decorator defining the sets of inputs for a test. + The parameters will be included in the name of the resulting sub-case. + Parameters can be either single object, tuple or a KsftNamedVariant. + The argument can be a list or a generator. + + Example: + + @ksft_variants([ + (1, "a"), + (2, "b"), + KsftNamedVariant("three", 3, "c"), + ]) + def my_case(cfg, a, b): + pass # ... + + ksft_run(cases=[my_case], args=(cfg, )) + + Will generate cases: + my_case.1_a + my_case.2_b + my_case.three + """ + + return lambda func: KsftCaseFunction(func.__name__, func, params) + + def ksft_setup(env): """ Setup test framework global state from the environment. @@ -199,7 +239,7 @@ def ksft_setup(env): return False try: return bool(int(value)) - except: + except Exception: raise Exception(f"failed to parse {name}") if "DISRUPTIVE" in env: @@ -220,9 +260,13 @@ def _ksft_intr(signum, frame): ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...") -def ksft_run(cases=None, globs=None, case_pfx=None, args=()): +def _ksft_generate_test_cases(cases, globs, case_pfx, args): + """Generate a flat list of (func, args, name) tuples""" + cases = cases or [] + test_cases = [] + # If using the globs method find all relevant functions if globs and case_pfx: for key, value in globs.items(): if not callable(value): @@ -232,6 +276,27 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases.append(value) break + for func in cases: + if isinstance(func, KsftCaseFunction): + # Parametrized test - create case for each param + for param in func.variants: + if not isinstance(param, KsftNamedVariant): + if not isinstance(param, tuple): + param = (param, ) + param = KsftNamedVariant(None, *param) + + test_cases.append((func.original_func, + (*args, *param.params), + func.name + "." + param.name)) + else: + test_cases.append((func, args, func.__name__)) + + return test_cases + + +def ksft_run(cases=None, globs=None, case_pfx=None, args=()): + test_cases = _ksft_generate_test_cases(cases, globs, case_pfx, args) + global term_cnt term_cnt = 0 prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr) @@ -239,19 +304,19 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} print("TAP version 13", flush=True) - print("1.." + str(len(cases)), flush=True) + print("1.." + str(len(test_cases)), flush=True) global KSFT_RESULT cnt = 0 stop = False - for case in cases: + for func, args, name in test_cases: KSFT_RESULT = True cnt += 1 comment = "" cnt_key = "" try: - case(*args) + func(*args) except KsftSkipEx as e: comment = "SKIP " + str(e) cnt_key = 'skip' @@ -273,7 +338,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): if not cnt_key: cnt_key = 'pass' if KSFT_RESULT else 'fail' - ktap_result(KSFT_RESULT, cnt, case, comment=comment) + ktap_result(KSFT_RESULT, cnt, name, comment=comment) totals[cnt_key] += 1 if stop: diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py index 1a8cbe9acc48..7c640ed64c0b 100644 --- a/tools/testing/selftests/net/lib/py/nsim.py +++ b/tools/testing/selftests/net/lib/py/nsim.py @@ -27,7 +27,7 @@ class NetdevSim: self.port_index = port_index self.ns = ns self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index) - ret = ip("-j link show dev %s" % ifname, ns=ns) + ret = ip("-d -j link show dev %s" % ifname, ns=ns) self.dev = json.loads(ret.stdout)[0] self.ifindex = self.dev["ifindex"] diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index cb40ecef9456..106ee1f2df86 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -32,7 +32,7 @@ class cmd: Use bkg() instead to run a command in the background. """ def __init__(self, comm, shell=None, fail=True, ns=None, background=False, - host=None, timeout=5, ksft_wait=None): + host=None, timeout=5, ksft_ready=None, ksft_wait=None): if ns: comm = f'ip netns exec {ns} ' + comm @@ -52,21 +52,25 @@ class cmd: # ksft_wait lets us wait for the background process to fully start, # we pass an FD to the child process, and wait for it to write back. # Similarly term_fd tells child it's time to exit. - pass_fds = () + pass_fds = [] env = os.environ.copy() if ksft_wait is not None: - rfd, ready_fd = os.pipe() wait_fd, self.ksft_term_fd = os.pipe() - pass_fds = (ready_fd, wait_fd, ) - env["KSFT_READY_FD"] = str(ready_fd) + pass_fds.append(wait_fd) env["KSFT_WAIT_FD"] = str(wait_fd) + ksft_ready = True # ksft_wait implies ready + if ksft_ready is not None: + rfd, ready_fd = os.pipe() + pass_fds.append(ready_fd) + env["KSFT_READY_FD"] = str(ready_fd) self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE, stderr=subprocess.PIPE, pass_fds=pass_fds, env=env) if ksft_wait is not None: - os.close(ready_fd) os.close(wait_fd) + if ksft_ready is not None: + os.close(ready_fd) msg = fd_read_timeout(rfd, ksft_wait) os.close(rfd) if not msg: @@ -116,10 +120,10 @@ class bkg(cmd): with bkg("my_binary", ksft_wait=5): """ def __init__(self, comm, shell=None, fail=None, ns=None, host=None, - exit_wait=False, ksft_wait=None): + exit_wait=False, ksft_ready=None, ksft_wait=None): super().__init__(comm, background=True, shell=shell, fail=fail, ns=ns, host=host, - ksft_wait=ksft_wait) + ksft_ready=ksft_ready, ksft_wait=ksft_wait) self.terminate = not exit_wait and not ksft_wait self._exit_wait = exit_wait self.check_fail = fail diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index c368fc045f4b..64f05229ab24 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -332,7 +332,7 @@ static __u16 csum_fold_helper(__u32 csum) } static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, - __u32 hdr_len) + unsigned long hdr_len) { char tmp_buff[MAX_ADJST_OFFSET]; __u32 buff_pos, udp_csum = 0; @@ -422,8 +422,9 @@ static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) { struct udphdr *udph = NULL; __s32 *adjust_offset, *val; - __u32 key, hdr_len; + unsigned long hdr_len; void *offset_ptr; + __u32 key; __u8 tag; int ret; diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 9b7b93f8eb0c..a6447f7a31fe 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -375,81 +375,75 @@ do_transfer() local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}" local capopt="-i any -s 65535 -B 32768 ${capuser}" - ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & + ip netns exec ${listener_ns} tcpdump ${capopt} \ + -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & local cappid_listener=$! - ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & - local cappid_connector=$! + if [ ${listener_ns} != ${connector_ns} ]; then + ip netns exec ${connector_ns} tcpdump ${capopt} \ + -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & + local cappid_connector=$! + fi sleep 1 fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n + mptcp_lib_nstat_init "${listener_ns}" if [ ${listener_ns} != ${connector_ns} ]; then - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n - fi - - local stat_synrx_last_l - local stat_ackrx_last_l - local stat_cookietx_last - local stat_cookierx_last - local stat_csum_err_s - local stat_csum_err_c - local stat_tcpfb_last_l - stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") - stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") - stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") - stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") - stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") - - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_args $local_addr < "$sin" > "$sout" & + mptcp_lib_nstat_init "${connector_ns}" + fi + + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_args $local_addr < "$sin" > "$sout" & local spid=$! mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_args $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_args $connect_addr < "$cin" > "$cout" & local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \ + "${connector_ns}" "${port}" "${cpid}" "${spid}" & + local timeout_pid=$! + wait $cpid local retc=$? wait $spid local rets=$? + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + local stop stop=$(date +%s%3N) if $capture; then sleep 1 kill ${cappid_listener} - kill ${cappid_connector} + if [ ${listener_ns} != ${connector_ns} ]; then + kill ${cappid_connector} + fi fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat | grep Tcp > /tmp/${listener_ns}.out + mptcp_lib_nstat_get "${listener_ns}" if [ ${listener_ns} != ${connector_ns} ]; then - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat | grep Tcp > /tmp/${connector_ns}.out + mptcp_lib_nstat_get "${connector_ns}" fi local duration duration=$((stop-start)) printf "(duration %05sms) " "${duration}" - if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ - "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" echo cat "$capout" @@ -463,38 +457,38 @@ do_transfer() rets=$? local extra="" - local stat_synrx_now_l - local stat_ackrx_now_l - local stat_cookietx_now - local stat_cookierx_now - local stat_ooo_now - local stat_tcpfb_now_l - stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") - stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") - stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") - stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") - - expect_synrx=$((stat_synrx_last_l)) - expect_ackrx=$((stat_ackrx_last_l)) + local stat_synrx + local stat_ackrx + local stat_cookietx + local stat_cookierx + local stat_ooo + local stat_tcpfb + stat_synrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_ooo=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") + stat_tcpfb=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") + + expect_synrx=0 + expect_ackrx=0 cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies) cookies=${cookies##*=} if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then - expect_synrx=$((stat_synrx_last_l+connect_per_transfer)) - expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer)) + expect_synrx=${connect_per_transfer} + expect_ackrx=${connect_per_transfer} fi - if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then - mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \ + if [ ${stat_synrx} -lt ${expect_synrx} ]; then + mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx})" \ "than expected (${expect_synrx})" retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then - if [ ${stat_ooo_now} -eq 0 ]; then - mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ + if [ ${stat_ackrx} -lt ${expect_ackrx} ]; then + if [ ${stat_ooo} -eq 0 ]; then + mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx})" \ "than expected (${expect_ackrx})" rets=1 else @@ -508,47 +502,45 @@ do_transfer() csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") - local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) - if [ $csum_err_s_nr -gt 0 ]; then - mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]" + if [ $csum_err_s -gt 0 ]; then + mptcp_lib_pr_fail "server got ${csum_err_s} data checksum error[s]" rets=1 fi - local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) - if [ $csum_err_c_nr -gt 0 ]; then - mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]" + if [ $csum_err_c -gt 0 ]; then + mptcp_lib_pr_fail "client got ${csum_err_c} data checksum error[s]" retc=1 fi fi - if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + if [ ${stat_ooo} -eq 0 ] && [ ${stat_tcpfb} -gt 0 ]; then mptcp_lib_pr_fail "unexpected fallback to TCP" rets=1 fi if [ $cookies -eq 2 ];then - if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then + if [ $stat_cookietx -eq 0 ] ;then extra+=" WARN: CookieSent: did not advance" fi - if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then + if [ $stat_cookierx -eq 0 ] ;then extra+=" WARN: CookieRecv: did not advance" fi else - if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then + if [ $stat_cookietx -gt 0 ] ;then extra+=" WARN: CookieSent: changed" fi - if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then + if [ $stat_cookierx -gt 0 ] ;then extra+=" WARN: CookieRecv: changed" fi fi - if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then + if [ ${stat_synrx} -gt ${expect_synrx} ]; then extra+=" WARN: SYNRX: expect ${expect_synrx}," - extra+=" got ${stat_synrx_now_l} (probably retransmissions)" + extra+=" got ${stat_synrx} (probably retransmissions)" fi - if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then + if [ ${stat_ackrx} -gt ${expect_ackrx} ]; then extra+=" WARN: ACKRX: expect ${expect_ackrx}," - extra+=" got ${stat_ackrx_now_l} (probably retransmissions)" + extra+=" got ${stat_ackrx} (probably retransmissions)" fi if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f0efbf985625..b2e6e548f796 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -983,10 +983,8 @@ do_transfer() cond_start_capture ${listener_ns} - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n + mptcp_lib_nstat_init "${listener_ns}" + mptcp_lib_nstat_init "${connector_ns}" local extra_args if [ $speed = "fast" ]; then @@ -1026,38 +1024,38 @@ do_transfer() if [ "$test_linkfail" -gt 1 ];then listener_in="${sinfail}" fi - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \ - ${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" & + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \ + ${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" & local spid=$! mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" extra_cl_args="$extra_args $extra_cl_args" if [ "$test_linkfail" -eq 0 ];then - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr < "$cin" > "$cout" & elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then connector_in="${cinsent}" ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ tee "$cinsent" | \ - timeout ${timeout_test} \ ip netns exec ${connector_ns} \ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ $extra_cl_args $connect_addr > "$cout" & else connector_in="${cinsent}" tee "$cinsent" < "$cinfail" | \ - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr > "$cout" & + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr > "$cout" & fi local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \ + "${connector_ns}" "${port}" "${cpid}" "${spid}" & + local timeout_pid=$! + pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr check_cestab $listener_ns $connector_ns @@ -1066,17 +1064,20 @@ do_transfer() wait $spid local rets=$? + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + cond_stop_capture - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat | grep Tcp > /tmp/${listener_ns}.out - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat | grep Tcp > /tmp/${connector_ns}.out + mptcp_lib_nstat_get "${listener_ns}" + mptcp_lib_nstat_get "${connector_ns}" - if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then fail_test "client exit code $retc, server $rets" - mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ - "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" return 1 fi @@ -1151,12 +1152,20 @@ run_tests() do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} } +_dump_stats() +{ + local ns="${1}" + local side="${2}" + + mptcp_lib_print_err "${side} ns stats (${ns2})" + mptcp_lib_pr_nstat "${ns}" + echo +} + dump_stats() { - echo Server ns stats - ip netns exec $ns1 nstat -as | grep Tcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep Tcp + _dump_stats "${ns1}" "Server" + _dump_stats "${ns2}" "Client" } chk_csum_nr() @@ -3646,7 +3655,6 @@ fullmesh_tests() fastclose_tests() { if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then - MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=1024 fastclose=client \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 @@ -3655,7 +3663,6 @@ fastclose_tests() fi if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then - MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=1024 fastclose=server \ run_tests $ns1 $ns2 10.0.1.1 join_rst_nr=1 \ @@ -3952,7 +3959,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 2 2 - { test_linkfail=128 speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 @@ -3985,7 +3992,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { test_linkfail=128 speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -4013,7 +4020,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { test_linkfail=128 speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -4034,7 +4041,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { test_linkfail=128 speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -4058,7 +4065,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - { test_linkfail=128 speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 @@ -4089,7 +4096,7 @@ endpoint_tests() pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - { test_linkfail=128 speed=slow \ + { timeout_test=120 test_linkfail=128 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4116,7 +4123,7 @@ endpoint_tests() pm_nl_set_limits $ns2 0 3 pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - { test_linkfail=128 speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4194,7 +4201,7 @@ endpoint_tests() # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal - { test_linkfail=128 speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4203,38 +4210,45 @@ endpoint_tests() $ns1 10.0.2.1 id 1 flags signal chk_subflow_nr "before delete" 2 chk_mptcp_info subflows 1 subflows 1 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 1 pm_nl_del_endpoint $ns1 1 10.0.2.1 pm_nl_del_endpoint $ns1 2 224.0.0.1 sleep 0.5 chk_subflow_nr "after delete" 1 chk_mptcp_info subflows 0 subflows 0 + chk_mptcp_info add_addr_signal 0 add_addr_accepted 0 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal wait_mpj $ns2 chk_subflow_nr "after re-add" 3 chk_mptcp_info subflows 2 subflows 2 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 pm_nl_del_endpoint $ns1 42 10.0.1.1 sleep 0.5 chk_subflow_nr "after delete ID 0" 2 chk_mptcp_info subflows 2 subflows 2 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal wait_mpj $ns2 chk_subflow_nr "after re-add ID 0" 3 chk_mptcp_info subflows 3 subflows 3 + chk_mptcp_info add_addr_signal 3 add_addr_accepted 2 pm_nl_del_endpoint $ns1 99 10.0.1.1 sleep 0.5 chk_subflow_nr "after re-delete ID 0" 2 chk_mptcp_info subflows 2 subflows 2 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal wait_mpj $ns2 chk_subflow_nr "after re-re-add ID 0" 3 chk_mptcp_info subflows 3 subflows 3 + chk_mptcp_info add_addr_signal 3 add_addr_accepted 2 mptcp_lib_kill_group_wait $tests_pid kill_events_pids @@ -4267,7 +4281,7 @@ endpoint_tests() # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow - { test_linkfail=128 speed=20 \ + { timeout_test=120 test_linkfail=128 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index f4388900016a..5fea7e7df628 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -106,23 +106,32 @@ mptcp_lib_pr_info() { mptcp_lib_print_info "INFO: ${*}" } -# $1-2: listener/connector ns ; $3 port ; $4-5 listener/connector stat file +mptcp_lib_pr_nstat() { + local ns="${1}" + local hist="/tmp/${ns}.out" + + if [ -f "${hist}" ]; then + awk '$2 != 0 { print " "$0 }' "${hist}" + else + ip netns exec "${ns}" nstat -as | grep Tcp + fi +} + +# $1-2: listener/connector ns ; $3 port mptcp_lib_pr_err_stats() { local lns="${1}" local cns="${2}" local port="${3}" - local lstat="${4}" - local cstat="${5}" echo -en "${MPTCP_LIB_COLOR_RED}" { printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}" ip netns exec "${lns}" ss -Menitam -o "sport = :${port}" - cat "${lstat}" + mptcp_lib_pr_nstat "${lns}" printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}" ip netns exec "${cns}" ss -Menitam -o "dport = :${port}" - [ "${lstat}" != "${cstat}" ] && cat "${cstat}" + [ "${lns}" != "${cns}" ] && mptcp_lib_pr_nstat "${cns}" } 1>&2 echo -en "${MPTCP_LIB_COLOR_RESET}" } @@ -341,6 +350,19 @@ mptcp_lib_evts_get_info() { mptcp_lib_get_info_value "${1}" "^type:${3:-1}," } +mptcp_lib_wait_timeout() { + local timeout_test="${1}" + local listener_ns="${2}" + local connector_ns="${3}" + local port="${4}" + shift 4 # rest are PIDs + + sleep "${timeout_test}" + mptcp_lib_print_err "timeout" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" + kill "${@}" 2>/dev/null +} + # $1: PID mptcp_lib_kill_wait() { [ "${1}" -eq 0 ] && return 0 @@ -376,14 +398,36 @@ mptcp_lib_is_v6() { [ -z "${1##*:*}" ] } +mptcp_lib_nstat_init() { + local ns="${1}" + + rm -f "/tmp/${ns}."{nstat,out} + NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -n +} + +mptcp_lib_nstat_get() { + local ns="${1}" + + # filter out non-*TCP stats, and the rate (last column) + NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -sz | + grep -o ".*Tcp\S\+\s\+[0-9]\+" > "/tmp/${ns}.out" +} + # $1: ns, $2: MIB counter +# Get the counter from the history (mptcp_lib_nstat_{init,get}()) if available. +# If not, get the counter from nstat ignoring any history. mptcp_lib_get_counter() { local ns="${1}" local counter="${2}" + local hist="/tmp/${ns}.out" local count - count=$(ip netns exec "${ns}" nstat -asz "${counter}" | - awk 'NR==1 {next} {print $2}') + if [[ -s "${hist}" && "${counter}" == *"Tcp"* ]]; then + count=$(awk "/^${counter} / {print \$2; exit}" "${hist}") + else + count=$(ip netns exec "${ns}" nstat -asz "${counter}" | + awk 'NR==1 {next} {print $2}') + fi if [ -z "${count}" ]; then mptcp_lib_fail_if_expected_feature "${counter} counter" return 1 diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index f01989be6e9b..ab8bce06b262 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -169,41 +169,44 @@ do_transfer() cmsg+=",TCPINQ" fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n - - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ - ${local_addr} < "$sin" > "$sout" & + mptcp_lib_nstat_init "${listener_ns}" + mptcp_lib_nstat_init "${connector_ns}" + + ip netns exec ${listener_ns} \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ + ${local_addr} < "$sin" > "$sout" & local spid=$! - sleep 1 + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \ - $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \ + $connect_addr < "$cin" > "$cout" & local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \ + "${connector_ns}" "${port}" "${cpid}" "${spid}" & + local timeout_pid=$! + wait $cpid local retc=$? wait $spid local rets=$? - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat | grep Tcp > /tmp/${listener_ns}.out - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat | grep Tcp > /tmp/${connector_ns}.out + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + + mptcp_lib_nstat_get "${listener_ns}" + mptcp_lib_nstat_get "${connector_ns}" print_title "Transfer ${ip:2}" - if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ - "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" mptcp_lib_result_fail "transfer ${ip}" diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 1903e8e84a31..806aaa7d2d61 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -155,48 +155,53 @@ do_transfer() sleep 1 fi - NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \ - nstat -n - NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \ - nstat -n - - timeout ${timeout_test} \ - ip netns exec ${ns3} \ - ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ - 0.0.0.0 < "$sin" > "$sout" & + mptcp_lib_nstat_init "${ns3}" + mptcp_lib_nstat_init "${ns1}" + + ip netns exec ${ns3} \ + ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ + 0.0.0.0 < "$sin" > "$sout" & local spid=$! mptcp_lib_wait_local_port_listen "${ns3}" "${port}" - timeout ${timeout_test} \ - ip netns exec ${ns1} \ - ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \ - 10.0.3.3 < "$cin" > "$cout" & + ip netns exec ${ns1} \ + ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \ + 10.0.3.3 < "$cin" > "$cout" & local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${ns3}" "${ns1}" "${port}" \ + "${cpid}" "${spid}" & + local timeout_pid=$! + wait $cpid local retc=$? wait $spid local rets=$? + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + if $capture; then sleep 1 kill ${cappid_listener} kill ${cappid_connector} fi - NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \ - nstat | grep Tcp > /tmp/${ns3}.out - NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \ - nstat | grep Tcp > /tmp/${ns1}.out + mptcp_lib_nstat_get "${ns3}" + mptcp_lib_nstat_get "${ns1}" cmp $sin $cout > /dev/null 2>&1 local cmps=$? cmp $cin $sout > /dev/null 2>&1 local cmpc=$? - if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ - [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then + if [ $retc -eq 0 ] && [ $rets -eq 0 ] && + [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && + [ $timeout_pid -eq 0 ]; then printf "%-16s" " max $max_time " mptcp_lib_pr_ok cat "$capout" @@ -204,8 +209,7 @@ do_transfer() fi mptcp_lib_pr_fail "client exit code $retc, server $rets" - mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" \ - "/tmp/${ns3}.out" "/tmp/${ns1}.out" + mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" ls -l $sin $cout ls -l $cin $sout diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 87323942cb8a..e9ae1806ab07 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -211,7 +211,8 @@ make_connection() ip netns exec "$ns1" \ ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 & local server_pid=$! - sleep 0.5 + + mptcp_lib_wait_local_port_listen "${ns1}" "${port}" # Run the client, transfer $file and stay connected to the server # to conduct tests diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt index b2b2cdf27e20..454441e7ecff 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt @@ -1,6 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 // Test that we correctly skip zero-length IOVs. + +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt index a82c8899d36b..0a0700afdaa3 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt @@ -4,6 +4,8 @@ // send a packet with MSG_ZEROCOPY and receive the notification ID // repeat and verify IDs are consecutive +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt index c01915e7f4a1..df91675d2991 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt @@ -3,6 +3,8 @@ // // send multiple packets, then read one range of all notifications. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt index 6509882932e9..2963cfcb14df 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Minimal client-side zerocopy test +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt index 2cd78755cb2a..ea0c2fa73c2d 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt @@ -7,6 +7,8 @@ // First send on a closed socket and wait for (absent) notification. // Then connect and send and verify that notification nr. is zero. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt index 7671c20e01cf..4df978a9b82e 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt @@ -7,6 +7,9 @@ // fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR // is correctly fired only once, when EPOLLET is set. send another packet with // MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. + +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt index fadc480fdb7f..36b6edc4858c 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt @@ -8,6 +8,9 @@ // fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR // is correctly fired only once, when EPOLLET is set. send another packet with // MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. + +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt index 5bfa0d1d2f4a..1bea6f3b4558 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt @@ -8,6 +8,9 @@ // is correctly fired only once, when EPOLLONESHOT is set. send another packet // with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and // confirm that EPOLLERR is correctly set. + +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt index 4a73bbf46961..e27c21ff5d18 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt @@ -8,6 +8,8 @@ // one will have no data in the initial send. On return 0 the // zerocopy notification counter is not incremented. Verify this too. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` // Send a FastOpen request, no cookie yet so no data in SYN diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt index 36086c5877ce..b1fa77c77dfa 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt @@ -4,6 +4,8 @@ // send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the // kernel returns the notification ID. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207` diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt index 672f817faca0..2f5317d0a9fa 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt @@ -7,6 +7,8 @@ // because each iovec element becomes a frag // 3) the PSH bit is set on an skb when it runs out of fragments +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt index a9a1ac0aea4f..9d5272c6b207 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt @@ -4,6 +4,8 @@ // verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy // packets of all sizes, including the smallest payload, 1B. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 163a084d525d..248c2b91fe42 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -8,6 +8,7 @@ ALL_TESTS=" kci_test_polrouting kci_test_route_get kci_test_addrlft + kci_test_addrlft_route_cleanup kci_test_promote_secondaries kci_test_tc kci_test_gre @@ -323,6 +324,25 @@ kci_test_addrlft() end_test "PASS: preferred_lft addresses have expired" } +kci_test_addrlft_route_cleanup() +{ + local ret=0 + local test_addr="2001:db8:99::1/64" + local test_prefix="2001:db8:99::/64" + + run_cmd ip -6 addr add $test_addr dev "$devdummy" valid_lft 300 preferred_lft 300 + run_cmd_grep "$test_prefix proto kernel" ip -6 route show dev "$devdummy" + run_cmd ip -6 addr del $test_addr dev "$devdummy" + run_cmd_grep_fail "$test_prefix" ip -6 route show dev "$devdummy" + + if [ $ret -ne 0 ]; then + end_test "FAIL: route not cleaned up when address with valid_lft deleted" + return 1 + fi + + end_test "PASS: route cleaned up when address with valid_lft deleted" +} + kci_test_promote_secondaries() { run_cmd ifconfig "$devdummy" diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh deleted file mode 100644 index 2070b57849de..000000000000 --- a/tools/testing/selftests/net/setup_loopback.sh +++ /dev/null @@ -1,120 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout" -readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs" -readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})" -readonly HARD_IRQS="$(< ${IRQ_PATH})" -readonly server_ns=$(mktemp -u server-XXXXXXXX) -readonly client_ns=$(mktemp -u client-XXXXXXXX) - -netdev_check_for_carrier() { - local -r dev="$1" - - for i in {1..5}; do - carrier="$(cat /sys/class/net/${dev}/carrier)" - if [[ "${carrier}" -ne 1 ]] ; then - echo "carrier not ready yet..." >&2 - sleep 1 - else - echo "carrier ready" >&2 - break - fi - done - echo "${carrier}" -} - -# Assumes that there is no existing ipvlan device on the physical device -setup_loopback_environment() { - local dev="$1" - - # Fail hard if cannot turn on loopback mode for current NIC - ethtool -K "${dev}" loopback on || exit 1 - sleep 1 - - # Check for the carrier - carrier=$(netdev_check_for_carrier ${dev}) - if [[ "${carrier}" -ne 1 ]] ; then - echo "setup_loopback_environment failed" - exit 1 - fi -} - -setup_macvlan_ns(){ - local -r link_dev="$1" - local -r ns_name="$2" - local -r ns_dev="$3" - local -r ns_mac="$4" - local -r addr="$5" - - ip link add link "${link_dev}" dev "${ns_dev}" \ - address "${ns_mac}" type macvlan - exit_code=$? - if [[ "${exit_code}" -ne 0 ]]; then - echo "setup_macvlan_ns failed" - exit $exit_code - fi - - [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - ip link set dev "${ns_dev}" netns "${ns_name}" - ip -netns "${ns_name}" link set dev "${ns_dev}" up - if [[ -n "${addr}" ]]; then - ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}" - fi - - sleep 1 -} - -cleanup_macvlan_ns(){ - while (( $# >= 2 )); do - ns_name="$1" - ns_dev="$2" - ip -netns "${ns_name}" link del dev "${ns_dev}" - ip netns del "${ns_name}" - shift 2 - done -} - -cleanup_loopback(){ - local -r dev="$1" - - ethtool -K "${dev}" loopback off - sleep 1 - - # Check for the carrier - carrier=$(netdev_check_for_carrier ${dev}) - if [[ "${carrier}" -ne 1 ]] ; then - echo "setup_loopback_environment failed" - exit 1 - fi -} - -setup_interrupt() { - # Use timer on host to trigger the network stack - # Also disable device interrupt to not depend on NIC interrupt - # Reduce test flakiness caused by unexpected interrupts - echo 100000 >"${FLUSH_PATH}" - echo 50 >"${IRQ_PATH}" -} - -setup_ns() { - # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}" - setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" -} - -cleanup_ns() { - cleanup_macvlan_ns ${server_ns} server ${client_ns} client -} - -setup() { - setup_loopback_environment "${dev}" - setup_interrupt -} - -cleanup() { - cleanup_loopback "${dev}" - - echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}" - echo "${HARD_IRQS}" >"${IRQ_PATH}" -} diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh deleted file mode 100644 index 152bf4c65747..000000000000 --- a/tools/testing/selftests/net/setup_veth.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -readonly server_ns=$(mktemp -u server-XXXXXXXX) -readonly client_ns=$(mktemp -u client-XXXXXXXX) - -setup_veth_ns() { - local -r link_dev="$1" - local -r ns_name="$2" - local -r ns_dev="$3" - local -r ns_mac="$4" - - [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 200000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" - echo 1 > "/sys/class/net/${ns_dev}/napi_defer_hard_irqs" - ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 - ip -netns "${ns_name}" link set dev "${ns_dev}" up - - ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off -} - -setup_ns() { - # Set up server_ns namespace and client_ns namespace - ip link add name server type veth peer name client - - setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}" - setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" -} - -cleanup_ns() { - local ns_name - - for ns_name in ${client_ns} ${server_ns}; do - [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}" - done -} - -setup() { - # no global init setup step needed - : -} - -cleanup() { - cleanup_ns -} diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 8457b7ccbc09..b76df1efc2ef 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -174,7 +174,7 @@ static int do_recv_errqueue_timeout(int fdt) msg.msg_controllen = sizeof(control); while (1) { - const char *reason; + const char *reason = NULL; ret = recvmsg(fdt, &msg, MSG_ERRQUEUE); if (ret == -1 && errno == EAGAIN) diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh deleted file mode 100755 index 8ff172f7bb1b..000000000000 --- a/tools/testing/selftests/net/toeplitz.sh +++ /dev/null @@ -1,199 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping -# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu -# ('-rps <rps_map>') -# -# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action, -# which is a driver-specific encoding. -# -# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \ -# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)] - -source setup_loopback.sh -readonly SERVER_IP4="192.168.1.200/24" -readonly SERVER_IP6="fda8::1/64" -readonly SERVER_MAC="aa:00:00:00:00:02" - -readonly CLIENT_IP4="192.168.1.100/24" -readonly CLIENT_IP6="fda8::2/64" -readonly CLIENT_MAC="aa:00:00:00:00:01" - -PORT=8000 -KEY="$(</proc/sys/net/core/netdev_rss_key)" -TEST_RSS=false -RPS_MAP="" -PROTO_FLAG="" -IP_FLAG="" -DEV="eth0" - -# Return the number of rxqs among which RSS is configured to spread packets. -# This is determined by reading the RSS indirection table using ethtool. -get_rss_cfg_num_rxqs() { - echo $(ethtool -x "${DEV}" | - grep -E [[:space:]]+[0-9]+:[[:space:]]+ | - cut -d: -f2- | - awk '{$1=$1};1' | - tr ' ' '\n' | - sort -u | - wc -l) -} - -# Return a list of the receive irq handler cpus. -# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc. -# Reads /sys/kernel/irq/ in order, so algorithm depends on -# irq_{rxq-0} < irq_{rxq-1}, etc. -get_rx_irq_cpus() { - CPUS="" - # sort so that irq 2 is read before irq 10 - SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V) - # Consider only as many queues as RSS actually uses. We assume that - # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1). - RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs) - RXQ_COUNT=0 - - for i in ${SORTED_IRQS} - do - [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break - # lookup relevant IRQs by action name - [[ -e "$i/actions" ]] || continue - cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue - irqname=$(<"$i/actions") - - # does the IRQ get called - irqcount=$(cat "$i/per_cpu_count" | tr -d '0,') - [[ -n "${irqcount}" ]] || continue - - # lookup CPU - irq=$(basename "$i") - cpu=$(cat "/proc/irq/$irq/smp_affinity_list") - - if [[ -z "${CPUS}" ]]; then - CPUS="${cpu}" - else - CPUS="${CPUS},${cpu}" - fi - RXQ_COUNT=$((RXQ_COUNT+1)) - done - - echo "${CPUS}" -} - -get_disable_rfs_cmd() { - echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;" -} - -get_set_rps_bitmaps_cmd() { - CMD="" - for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus - do - CMD="${CMD} echo $1 > ${i};" - done - - echo "${CMD}" -} - -get_disable_rps_cmd() { - echo "$(get_set_rps_bitmaps_cmd 0)" -} - -die() { - echo "$1" - exit 1 -} - -check_nic_rxhash_enabled() { - local -r pattern="receive-hashing:\ on" - - ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled" -} - -parse_opts() { - local prog=$0 - shift 1 - - while [[ "$1" =~ "-" ]]; do - if [[ "$1" = "-irq_prefix" ]]; then - shift - IRQ_PATTERN="^$1-[0-9]*$" - elif [[ "$1" = "-u" || "$1" = "-t" ]]; then - PROTO_FLAG="$1" - elif [[ "$1" = "-4" ]]; then - IP_FLAG="$1" - SERVER_IP="${SERVER_IP4}" - CLIENT_IP="${CLIENT_IP4}" - elif [[ "$1" = "-6" ]]; then - IP_FLAG="$1" - SERVER_IP="${SERVER_IP6}" - CLIENT_IP="${CLIENT_IP6}" - elif [[ "$1" = "-rss" ]]; then - TEST_RSS=true - elif [[ "$1" = "-rps" ]]; then - shift - RPS_MAP="$1" - elif [[ "$1" = "-i" ]]; then - shift - DEV="$1" - else - die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \ - [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]" - fi - shift - done -} - -setup() { - setup_loopback_environment "${DEV}" - - # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${DEV}" $server_ns server \ - "${SERVER_MAC}" "${SERVER_IP}" - setup_macvlan_ns "${DEV}" $client_ns client \ - "${CLIENT_MAC}" "${CLIENT_IP}" -} - -cleanup() { - cleanup_macvlan_ns $server_ns server $client_ns client - cleanup_loopback "${DEV}" -} - -parse_opts $0 $@ - -setup -trap cleanup EXIT - -check_nic_rxhash_enabled - -# Actual test starts here -if [[ "${TEST_RSS}" = true ]]; then - # RPS/RFS must be disabled because they move packets between cpus, - # which breaks the PACKET_FANOUT_CPU identification of RSS decisions. - eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \ - ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ - -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ - -C "$(get_rx_irq_cpus)" -s -v & -elif [[ ! -z "${RPS_MAP}" ]]; then - eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \ - ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ - -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ - -r "0x${RPS_MAP}" -s -v & -else - ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ - -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v & -fi - -server_pid=$! - -ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ - "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" & - -client_pid=$! - -wait "${server_pid}" -exit_code=$? -kill -9 "${client_pid}" -if [[ "${exit_code}" -eq 0 ]]; then - echo "Test Succeeded!" -fi -exit "${exit_code}" diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh deleted file mode 100755 index 2fef34f4aba1..000000000000 --- a/tools/testing/selftests/net/toeplitz_client.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# A simple program for generating traffic for the toeplitz test. -# -# This program sends packets periodically for, conservatively, 20 seconds. The -# intent is for the calling program to kill this program once it is no longer -# needed, rather than waiting for the 20 second expiration. - -send_traffic() { - expiration=$((SECONDS+20)) - while [[ "${SECONDS}" -lt "${expiration}" ]] - do - if [[ "${PROTO}" == "-u" ]]; then - echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}" - else - echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}" - fi - sleep 0.001 - done -} - -PROTO=$1 -IPVER=$2 -ADDR=$3 -PORT=$4 - -send_traffic diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index dae91eb97d69..bcc14688661d 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -217,7 +217,7 @@ static void print_timestamp_usr(void) static void print_timestamp(struct scm_timestamping *tss, int tstype, int tskey, int payload_len) { - const char *tsname; + const char *tsname = NULL; validate_key(tskey, tstype); diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c index 5288e768b207..68625362add2 100644 --- a/tools/testing/selftests/user_events/perf_test.c +++ b/tools/testing/selftests/user_events/perf_test.c @@ -236,7 +236,7 @@ TEST_F(user, perf_empty_events) { ASSERT_EQ(1 << reg.enable_bit, self->check); /* Ensure write shows up at correct offset */ - ASSERT_NE(-1, write(self->data_fd, ®.write_index, + ASSERT_NE(-1, write(self->data_fd, (void *)®.write_index, sizeof(reg.write_index))); val = (void *)(((char *)perf_page) + perf_page->data_offset); ASSERT_EQ(PERF_RECORD_SAMPLE, *val); diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h index 240409bf5f8a..69ec0c856481 100644 --- a/tools/testing/selftests/vfio/lib/include/vfio_util.h +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h @@ -4,9 +4,12 @@ #include <fcntl.h> #include <string.h> -#include <linux/vfio.h> + +#include <uapi/linux/types.h> +#include <linux/iommufd.h> #include <linux/list.h> #include <linux/pci_regs.h> +#include <linux/vfio.h> #include "../../../kselftest.h" @@ -185,6 +188,13 @@ struct vfio_pci_device { struct vfio_pci_driver driver; }; +struct iova_allocator { + struct iommu_iova_range *ranges; + u32 nranges; + u32 range_idx; + u64 range_offset; +}; + /* * Return the BDF string of the device that the test should use. * @@ -206,6 +216,13 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_ void vfio_pci_device_cleanup(struct vfio_pci_device *device); void vfio_pci_device_reset(struct vfio_pci_device *device); +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, + u32 *nranges); + +struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device); +void iova_allocator_cleanup(struct iova_allocator *allocator); +iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size); + int __vfio_pci_dma_map(struct vfio_pci_device *device, struct vfio_dma_region *region); int __vfio_pci_dma_unmap(struct vfio_pci_device *device, diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c index a381fd253aa7..b479a359da12 100644 --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c @@ -12,11 +12,12 @@ #include <sys/mman.h> #include <uapi/linux/types.h> +#include <linux/iommufd.h> #include <linux/limits.h> #include <linux/mman.h> +#include <linux/overflow.h> #include <linux/types.h> #include <linux/vfio.h> -#include <linux/iommufd.h> #include "../../../kselftest.h" #include <vfio_util.h> @@ -29,6 +30,249 @@ VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \ } while (0) +static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz, + u32 *cap_offset) +{ + struct vfio_info_cap_header *hdr; + + if (!*cap_offset) + return NULL; + + VFIO_ASSERT_LT(*cap_offset, bufsz); + VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr)); + + hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset); + *cap_offset = hdr->next; + + return hdr; +} + +static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info, + u16 cap_id) +{ + struct vfio_info_cap_header *hdr; + u32 cap_offset = info->cap_offset; + u32 max_depth; + u32 depth = 0; + + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) + return NULL; + + if (cap_offset) + VFIO_ASSERT_GE(cap_offset, sizeof(*info)); + + max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr); + + while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) { + depth++; + VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n"); + + if (hdr->id == cap_id) + return hdr; + } + + return NULL; +} + +/* Return buffer including capability chain, if present. Free with free() */ +static struct vfio_iommu_type1_info *vfio_iommu_get_info(struct vfio_pci_device *device) +{ + struct vfio_iommu_type1_info *info; + + info = malloc(sizeof(*info)); + VFIO_ASSERT_NOT_NULL(info); + + *info = (struct vfio_iommu_type1_info) { + .argsz = sizeof(*info), + }; + + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info); + VFIO_ASSERT_GE(info->argsz, sizeof(*info)); + + info = realloc(info, info->argsz); + VFIO_ASSERT_NOT_NULL(info); + + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info); + VFIO_ASSERT_GE(info->argsz, sizeof(*info)); + + return info; +} + +/* + * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to + * report iommufd's iommu_iova_range. Free with free(). + */ +static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device, + u32 *nranges) +{ + struct vfio_iommu_type1_info_cap_iova_range *cap_range; + struct vfio_iommu_type1_info *info; + struct vfio_info_cap_header *hdr; + struct iommu_iova_range *ranges = NULL; + + info = vfio_iommu_get_info(device); + hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); + VFIO_ASSERT_NOT_NULL(hdr); + + cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header); + VFIO_ASSERT_GT(cap_range->nr_iovas, 0); + + ranges = calloc(cap_range->nr_iovas, sizeof(*ranges)); + VFIO_ASSERT_NOT_NULL(ranges); + + for (u32 i = 0; i < cap_range->nr_iovas; i++) { + ranges[i] = (struct iommu_iova_range){ + .start = cap_range->iova_ranges[i].start, + .last = cap_range->iova_ranges[i].end, + }; + } + + *nranges = cap_range->nr_iovas; + + free(info); + return ranges; +} + +/* Return iova ranges of the device's IOAS. Free with free() */ +static struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device, + u32 *nranges) +{ + struct iommu_iova_range *ranges; + int ret; + + struct iommu_ioas_iova_ranges query = { + .size = sizeof(query), + .ioas_id = device->ioas_id, + }; + + ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); + VFIO_ASSERT_EQ(ret, -1); + VFIO_ASSERT_EQ(errno, EMSGSIZE); + VFIO_ASSERT_GT(query.num_iovas, 0); + + ranges = calloc(query.num_iovas, sizeof(*ranges)); + VFIO_ASSERT_NOT_NULL(ranges); + + query.allowed_iovas = (uintptr_t)ranges; + + ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); + *nranges = query.num_iovas; + + return ranges; +} + +static int iova_range_comp(const void *a, const void *b) +{ + const struct iommu_iova_range *ra = a, *rb = b; + + if (ra->start < rb->start) + return -1; + + if (ra->start > rb->start) + return 1; + + return 0; +} + +/* Return sorted IOVA ranges of the device. Free with free(). */ +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, + u32 *nranges) +{ + struct iommu_iova_range *ranges; + + if (device->iommufd) + ranges = iommufd_iova_ranges(device, nranges); + else + ranges = vfio_iommu_iova_ranges(device, nranges); + + if (!ranges) + return NULL; + + VFIO_ASSERT_GT(*nranges, 0); + + /* Sort and check that ranges are sane and non-overlapping */ + qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp); + VFIO_ASSERT_LT(ranges[0].start, ranges[0].last); + + for (u32 i = 1; i < *nranges; i++) { + VFIO_ASSERT_LT(ranges[i].start, ranges[i].last); + VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start); + } + + return ranges; +} + +struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device) +{ + struct iova_allocator *allocator; + struct iommu_iova_range *ranges; + u32 nranges; + + ranges = vfio_pci_iova_ranges(device, &nranges); + VFIO_ASSERT_NOT_NULL(ranges); + + allocator = malloc(sizeof(*allocator)); + VFIO_ASSERT_NOT_NULL(allocator); + + *allocator = (struct iova_allocator){ + .ranges = ranges, + .nranges = nranges, + .range_idx = 0, + .range_offset = 0, + }; + + return allocator; +} + +void iova_allocator_cleanup(struct iova_allocator *allocator) +{ + free(allocator->ranges); + free(allocator); +} + +iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size) +{ + VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n"); + VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n"); + + for (;;) { + struct iommu_iova_range *range; + iova_t iova, last; + + VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges, + "IOVA allocator out of space\n"); + + range = &allocator->ranges[allocator->range_idx]; + iova = range->start + allocator->range_offset; + + /* Check for sufficient space at the current offset */ + if (check_add_overflow(iova, size - 1, &last) || + last > range->last) + goto next_range; + + /* Align iova to size */ + iova = last & ~(size - 1); + + /* Check for sufficient space at the aligned iova */ + if (check_add_overflow(iova, size - 1, &last) || + last > range->last) + goto next_range; + + if (last == range->last) { + allocator->range_idx++; + allocator->range_offset = 0; + } else { + allocator->range_offset = last - range->start + 1; + } + + return iova; + +next_range: + allocator->range_idx++; + allocator->range_offset = 0; + } +} + iova_t __to_iova(struct vfio_pci_device *device, void *vaddr) { struct vfio_dma_region *region; diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c index 4f1ea79a200c..102603d4407d 100644 --- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c @@ -3,6 +3,8 @@ #include <sys/mman.h> #include <unistd.h> +#include <uapi/linux/types.h> +#include <linux/iommufd.h> #include <linux/limits.h> #include <linux/mman.h> #include <linux/sizes.h> @@ -93,6 +95,7 @@ static int iommu_mapping_get(const char *bdf, u64 iova, FIXTURE(vfio_dma_mapping_test) { struct vfio_pci_device *device; + struct iova_allocator *iova_allocator; }; FIXTURE_VARIANT(vfio_dma_mapping_test) { @@ -117,10 +120,12 @@ FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB | FIXTURE_SETUP(vfio_dma_mapping_test) { self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); + self->iova_allocator = iova_allocator_init(self->device); } FIXTURE_TEARDOWN(vfio_dma_mapping_test) { + iova_allocator_cleanup(self->iova_allocator); vfio_pci_device_cleanup(self->device); } @@ -142,7 +147,7 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap) else ASSERT_NE(region.vaddr, MAP_FAILED); - region.iova = (u64)region.vaddr; + region.iova = iova_allocator_alloc(self->iova_allocator, size); region.size = size; vfio_pci_dma_map(self->device, ®ion); @@ -219,7 +224,10 @@ FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(); FIXTURE_SETUP(vfio_dma_map_limit_test) { struct vfio_dma_region *region = &self->region; + struct iommu_iova_range *ranges; u64 region_size = getpagesize(); + iova_t last_iova; + u32 nranges; /* * Over-allocate mmap by double the size to provide enough backing vaddr @@ -232,8 +240,13 @@ FIXTURE_SETUP(vfio_dma_map_limit_test) MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); ASSERT_NE(region->vaddr, MAP_FAILED); - /* One page prior to the end of address space */ - region->iova = ~(iova_t)0 & ~(region_size - 1); + ranges = vfio_pci_iova_ranges(self->device, &nranges); + VFIO_ASSERT_NOT_NULL(ranges); + last_iova = ranges[nranges - 1].last; + free(ranges); + + /* One page prior to the last iova */ + region->iova = last_iova & ~(region_size - 1); region->size = region_size; } @@ -276,6 +289,7 @@ TEST_F(vfio_dma_map_limit_test, overflow) struct vfio_dma_region *region = &self->region; int rc; + region->iova = ~(iova_t)0 & ~(region->size - 1); region->size = self->mmap_size; rc = __vfio_pci_dma_map(self->device, region); diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c index 2dbd70b7db62..f69eec8b928d 100644 --- a/tools/testing/selftests/vfio/vfio_pci_driver_test.c +++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c @@ -19,6 +19,7 @@ static const char *device_bdf; } while (0) static void region_setup(struct vfio_pci_device *device, + struct iova_allocator *iova_allocator, struct vfio_dma_region *region, u64 size) { const int flags = MAP_SHARED | MAP_ANONYMOUS; @@ -29,7 +30,7 @@ static void region_setup(struct vfio_pci_device *device, VFIO_ASSERT_NE(vaddr, MAP_FAILED); region->vaddr = vaddr; - region->iova = (u64)vaddr; + region->iova = iova_allocator_alloc(iova_allocator, size); region->size = size; vfio_pci_dma_map(device, region); @@ -44,6 +45,7 @@ static void region_teardown(struct vfio_pci_device *device, FIXTURE(vfio_pci_driver_test) { struct vfio_pci_device *device; + struct iova_allocator *iova_allocator; struct vfio_dma_region memcpy_region; void *vaddr; int msi_fd; @@ -72,14 +74,15 @@ FIXTURE_SETUP(vfio_pci_driver_test) struct vfio_pci_driver *driver; self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); + self->iova_allocator = iova_allocator_init(self->device); driver = &self->device->driver; - region_setup(self->device, &self->memcpy_region, SZ_1G); - region_setup(self->device, &driver->region, SZ_2M); + region_setup(self->device, self->iova_allocator, &self->memcpy_region, SZ_1G); + region_setup(self->device, self->iova_allocator, &driver->region, SZ_2M); /* Any IOVA that doesn't overlap memcpy_region and driver->region. */ - self->unmapped_iova = 8UL * SZ_1G; + self->unmapped_iova = iova_allocator_alloc(self->iova_allocator, SZ_1G); vfio_pci_driver_init(self->device); self->msi_fd = self->device->msi_eventfds[driver->msi]; @@ -108,6 +111,7 @@ FIXTURE_TEARDOWN(vfio_pci_driver_test) region_teardown(self->device, &self->memcpy_region); region_teardown(self->device, &driver->region); + iova_allocator_cleanup(self->iova_allocator); vfio_pci_device_cleanup(self->device); } diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index d4517386e551..9e1250790f33 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -2015,6 +2015,11 @@ static void test_stream_transport_change_client(const struct test_opts *opts) exit(EXIT_FAILURE); } + /* Although setting SO_LINGER does not affect the original test + * for null-ptr-deref, it may trigger a lockdep warning. + */ + enable_so_linger(s, 1); + ret = connect(s, (struct sockaddr *)&sa, sizeof(sa)); /* The connect can fail due to signals coming from the thread, * or because the receiver connection queue is full. @@ -2352,7 +2357,7 @@ static struct test_case test_cases[] = { .run_server = test_stream_nolinger_server, }, { - .name = "SOCK_STREAM transport change null-ptr-deref", + .name = "SOCK_STREAM transport change null-ptr-deref, lockdep warn", .run_client = test_stream_transport_change_client, .run_server = test_stream_transport_change_server, }, |
