diff options
Diffstat (limited to 'tools/testing')
50 files changed, 1534 insertions, 208 deletions
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 418669927fb0..296516eecfd6 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -1523,6 +1523,23 @@ static void mock_companion(struct acpi_device *adev, struct device *dev) #define SZ_64G (SZ_32G * 2) #endif +static int cxl_mock_platform_device_add(struct platform_device *pdev, + struct platform_device **ppdev) +{ + int rc; + + if (ppdev) + *ppdev = pdev; + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + if (ppdev) + *ppdev = NULL; + } + + return rc; +} + static __init int cxl_rch_topo_init(void) { int rc, i; @@ -1537,13 +1554,10 @@ static __init int cxl_rch_topo_init(void) goto err_bridge; mock_companion(adev, &pdev->dev); - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_rch[i]); + if (rc) goto err_bridge; - } - cxl_rch[i] = pdev; mock_pci_bus[idx].bridge = &pdev->dev; rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, "firmware_node"); @@ -1595,13 +1609,10 @@ static __init int cxl_single_topo_init(void) goto err_bridge; mock_companion(adev, &pdev->dev); - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_hb_single[i]); + if (rc) goto err_bridge; - } - cxl_hb_single[i] = pdev; mock_pci_bus[i + NR_CXL_HOST_BRIDGES].bridge = &pdev->dev; rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, "physical_node"); @@ -1620,12 +1631,9 @@ static __init int cxl_single_topo_init(void) goto err_port; pdev->dev.parent = &bridge->dev; - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_root_single[i]); + if (rc) goto err_port; - } - cxl_root_single[i] = pdev; } for (i = 0; i < ARRAY_SIZE(cxl_swu_single); i++) { @@ -1638,12 +1646,9 @@ static __init int cxl_single_topo_init(void) goto err_uport; pdev->dev.parent = &root_port->dev; - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_swu_single[i]); + if (rc) goto err_uport; - } - cxl_swu_single[i] = pdev; } for (i = 0; i < ARRAY_SIZE(cxl_swd_single); i++) { @@ -1657,12 +1662,9 @@ static __init int cxl_single_topo_init(void) goto err_dport; pdev->dev.parent = &uport->dev; - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_swd_single[i]); + if (rc) goto err_dport; - } - cxl_swd_single[i] = pdev; } return 0; @@ -1735,12 +1737,9 @@ static int cxl_mem_init(void) pdev->dev.parent = &dport->dev; set_dev_node(&pdev->dev, i % 2); - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_mem[i]); + if (rc) goto err_mem; - } - cxl_mem[i] = pdev; } for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) { @@ -1753,12 +1752,9 @@ static int cxl_mem_init(void) pdev->dev.parent = &dport->dev; set_dev_node(&pdev->dev, i % 2); - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_mem_single[i]); + if (rc) goto err_single; - } - cxl_mem_single[i] = pdev; } for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) { @@ -1772,12 +1768,9 @@ static int cxl_mem_init(void) pdev->dev.parent = &rch->dev; set_dev_node(&pdev->dev, i % 2); - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_rcd[i]); + if (rc) goto err_rcd; - } - cxl_rcd[i] = pdev; } return 0; @@ -1869,13 +1862,10 @@ static __init int cxl_test_init(void) goto err_bridge; mock_companion(adev, &pdev->dev); - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_host_bridge[i]); + if (rc) goto err_bridge; - } - cxl_host_bridge[i] = pdev; mock_pci_bus[i].bridge = &pdev->dev; rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, "physical_node"); @@ -1893,12 +1883,9 @@ static __init int cxl_test_init(void) goto err_port; pdev->dev.parent = &bridge->dev; - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_root_port[i]); + if (rc) goto err_port; - } - cxl_root_port[i] = pdev; } BUILD_BUG_ON(ARRAY_SIZE(cxl_switch_uport) != ARRAY_SIZE(cxl_root_port)); @@ -1911,12 +1898,9 @@ static __init int cxl_test_init(void) goto err_uport; pdev->dev.parent = &root_port->dev; - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_switch_uport[i]); + if (rc) goto err_uport; - } - cxl_switch_uport[i] = pdev; } for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) { @@ -1929,12 +1913,9 @@ static __init int cxl_test_init(void) goto err_dport; pdev->dev.parent = &uport->dev; - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_switch_dport[i]); + if (rc) goto err_dport; - } - cxl_switch_dport[i] = pdev; } rc = cxl_single_topo_init(); @@ -1953,9 +1934,9 @@ static __init int cxl_test_init(void) acpi0017_mock.dev.bus = &platform_bus_type; cxl_acpi->dev.groups = cxl_acpi_groups; - rc = platform_device_add(cxl_acpi); + rc = cxl_mock_platform_device_add(cxl_acpi, NULL); if (rc) - goto err_root; + goto err_rch; rc = cxl_mem_init(); if (rc) diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c index 8cd298b78e44..04aaf4c9cf5e 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c @@ -14,7 +14,7 @@ static struct { const char *prog_name; int expected_runtime_err; } kfunc_dynptr_tests[] = { - {"dynptr_data_null", -EBADMSG}, + {"dynptr_data_null", -EINVAL}, }; static bool kfunc_not_supported; diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_array_inner_map.c b/tools/testing/selftests/bpf/prog_tests/percpu_array_inner_map.c new file mode 100644 index 000000000000..2a8b2381306b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/percpu_array_inner_map.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +/* + * Test that replacing an inner percpu array map with one that has different + * max_entries is rejected. percpu_array_map_gen_lookup() inlines the + * template's index_mask, so allowing a smaller replacement would cause OOB. + */ +void test_percpu_array_inner_map(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts); + int outer_fd, tmpl_fd, good_fd, bad_fd, err; + int zero = 0; + + /* Create template: percpu array with 8 entries */ + tmpl_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, "tmpl", + sizeof(int), sizeof(long), 8, NULL); + if (!ASSERT_OK_FD(tmpl_fd, "create_tmpl")) + return; + + /* Create outer array-of-maps using template */ + opts.inner_map_fd = tmpl_fd; + outer_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer", + sizeof(int), sizeof(int), 1, &opts); + if (!ASSERT_OK_FD(outer_fd, "create_outer")) + goto close_tmpl; + + /* Insert template as initial inner map */ + err = bpf_map_update_elem(outer_fd, &zero, &tmpl_fd, 0); + if (!ASSERT_OK(err, "insert_tmpl")) + goto close_outer; + + /* Replacement with same max_entries should succeed */ + good_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, "good", + sizeof(int), sizeof(long), 8, NULL); + if (!ASSERT_OK_FD(good_fd, "create_good")) + goto close_outer; + + err = bpf_map_update_elem(outer_fd, &zero, &good_fd, 0); + ASSERT_OK(err, "replace_same_max_entries"); + close(good_fd); + + /* Replacement with fewer max_entries must fail */ + bad_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, "bad", + sizeof(int), sizeof(long), 2, NULL); + if (!ASSERT_OK_FD(bad_fd, "create_bad")) + goto close_outer; + + err = bpf_map_update_elem(outer_fd, &zero, &bad_fd, 0); + ASSERT_ERR(err, "replace_smaller_max_entries"); + close(bad_fd); + +close_outer: + close(outer_fd); +close_tmpl: + close(tmpl_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index b87e7f39e15a..6ed8e149e3d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -417,6 +417,107 @@ static void run_tests(int family, enum bpf_map_type map_type) close(map); } +/* + * Regression test for the KTLS + sockmap (verdict) reverse-order UAF. + * + * Vulnerable sequence: + * 1. Insert receiver socket into sockmap with BPF_SK_SKB_VERDICT program. + * sk->sk_data_ready becomes sk_psock_verdict_data_ready. + * 2. Configure TLS RX: tls_sw_strparser_arm() saves + * sk_psock_verdict_data_ready as rx_ctx->saved_data_ready. + * + * When data arrives, tls_rx_msg_ready() calls saved_data_ready() = + * sk_psock_verdict_data_ready(), which calls tcp_read_skb() and drains + * sk_receive_queue via __skb_unlink() without advancing copied_seq. + * tls_strp_msg_load() then finds the queue empty while tcp_inq() is still + * non-zero, hits WARN_ON_ONCE(!first), and leaves a dangling frag_list + * pointer that tls_decrypt_sg() walks — a use-after-free. + * + * The fix adds a tls_sw_has_ctx_rx() check to sk_psock_verdict_data_ready(), + * mirroring what sk_psock_strp_data_ready() already does: when a TLS RX + * context is present, defer to psock->saved_data_ready (sock_def_readable) + * instead of calling tcp_read_skb(), so TLS retains sole ownership of the + * receive queue. Data is then decrypted and returned correctly by + * tls_sw_recvmsg(). + */ +static void test_sockmap_ktls_verdict_with_tls_rx(int family, int sotype) +{ + struct tls12_crypto_info_aes_gcm_128 crypto_info = {}; + char send_buf[] = "hello ktls sockmap reverse order"; + char recv_buf[sizeof(send_buf)] = {}; + struct test_sockmap_ktls *skel; + int c = -1, p = -1, zero = 0; + int prog_fd, map_fd; + ssize_t n; + int err; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open_and_load")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_skb_verdict_pass); + map_fd = bpf_map__fd(skel->maps.sock_map_verdict); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_SKB_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk_skb verdict")) + goto out; + + /* Step 1: configure TLS TX on sender (no sockmap involvement) */ + err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP) client")) + goto out; + + crypto_info.info.version = TLS_1_2_VERSION; + crypto_info.info.cipher_type = TLS_CIPHER_AES_GCM_128; + memset(crypto_info.key, 0x01, sizeof(crypto_info.key)); + memset(crypto_info.salt, 0x02, sizeof(crypto_info.salt)); + + err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_info, sizeof(crypto_info)); + if (!ASSERT_OK(err, "setsockopt(TLS_TX)")) + goto out; + + /* Step 2: insert receiver into sockmap BEFORE TLS RX */ + err = bpf_map_update_elem(map_fd, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto out; + + /* Step 3: configure TLS RX AFTER sockmap insertion */ + err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP) server")) + goto out; + + err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_info, sizeof(crypto_info)); + if (!ASSERT_OK(err, "setsockopt(TLS_RX)")) + goto out; + + /* + * A buggy kernel hits WARN_ON_ONCE in tls_strp_load_anchor_with_queue + * and may UAF in tls_decrypt_sg here. With the fix, + * sk_psock_verdict_data_ready defers to sock_def_readable and TLS + * decrypts the record normally. + */ + n = send(c, send_buf, sizeof(send_buf), 0); + if (!ASSERT_EQ(n, (ssize_t)sizeof(send_buf), "send")) + goto out; + + n = recv_timeout(p, recv_buf, sizeof(recv_buf), 0, 5); + if (!ASSERT_EQ(n, (ssize_t)sizeof(send_buf), "recv")) + goto out; + + ASSERT_OK(memcmp(send_buf, recv_buf, sizeof(send_buf)), "data integrity"); + +out: + if (c != -1) + close(c); + if (p != -1) + close(p); + test_sockmap_ktls__destroy(skel); +} + static void run_ktls_test(int family, int sotype) { if (test__start_subtest("tls simple offload")) @@ -429,6 +530,8 @@ static void run_ktls_test(int family, int sotype) test_sockmap_ktls_tx_no_buf(family, sotype, true); if (test__start_subtest("tls tx with pop")) test_sockmap_ktls_tx_pop(family, sotype); + if (test__start_subtest("tls verdict with tls rx")) + test_sockmap_ktls_verdict_with_tls_rx(family, sotype); } void test_sockmap_ktls(void) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index a96b25ebff23..06cd24e37b3f 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -22,6 +22,7 @@ #include "verifier_bswap.skel.h" #include "verifier_btf_ctx_access.skel.h" #include "verifier_btf_unreliable_prog.skel.h" +#include "verifier_call_large_imm.skel.h" #include "verifier_cfg.skel.h" #include "verifier_cgroup_inv_retcode.skel.h" #include "verifier_cgroup_skb.skel.h" @@ -170,6 +171,7 @@ void test_verifier_bpf_trap(void) { RUN(verifier_bpf_trap); } void test_verifier_bswap(void) { RUN(verifier_bswap); } void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); } void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); } +void test_verifier_call_large_imm(void) { RUN(verifier_call_large_imm); } void test_verifier_cfg(void) { RUN(verifier_cfg); } void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); } void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); } diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 051e2b6f2694..ac44d60e5066 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -208,6 +208,28 @@ int reject_with_reference(void *ctx) return 0; } +__noinline int global_subprog_may_throw(struct __sk_buff *ctx) +{ + if (ctx->len) + bpf_throw(0); + return 0; +} + +SEC("?tc") +__failure __msg("Unreleased reference") +int reject_global_subprog_throw_with_reference(struct __sk_buff *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + if (ctx->protocol) + global_subprog_may_throw(ctx); + bpf_obj_drop(f); + return 0; +} + __noinline static int subprog_ref(struct __sk_buff *ctx) { struct foo *f; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c index 83df4919c224..facafeaf4620 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c @@ -17,6 +17,13 @@ struct { __type(value, int); } sock_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 2); + __type(key, int); + __type(value, int); +} sock_map_verdict SEC(".maps"); + SEC("sk_msg") int prog_sk_policy(struct sk_msg_md *msg) { @@ -38,3 +45,17 @@ int prog_sk_policy_redir(struct sk_msg_md *msg) bpf_msg_apply_bytes(msg, apply_bytes); return bpf_msg_redirect_map(msg, &sock_map, two, 0); } + +/* + * Verdict program for the reverse-order TLS/sockmap regression test. + * Returns SK_PASS so tcp_read_skb() drains the receive queue via + * sk_psock_verdict_recv() without calling tcp_eat_skb(), which is + * the precondition for the KTLS strparser frag_list UAF. + */ +SEC("sk_skb/verdict") +int prog_skb_verdict_pass(struct __sk_buff *skb) +{ + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_call_large_imm.c b/tools/testing/selftests/bpf/progs/verifier_call_large_imm.c new file mode 100644 index 000000000000..7998df07f6a6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_call_large_imm.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +int call_happened = 0; + +/* + * 32765 is the exact minimum number of padding instructions needed to + * trigger the verifier failure, because: + * 1. Counting the wrapper instructions around the padding block (one + * "r0=0" and two "exit" instructions), the actual jump distance + * evaluates to N + 3. + * 2. To overflow the s16 max bound (32767), we need N + 3 > 32767. + * Thus, N = 32765 is the exact minimum padding size required. + */ +static __attribute__((noinline)) void padding_subprog(void) +{ + asm volatile ( + "r0 = 0;" + ".rept 32765;" + "r0 += 0;" + ".endr;" + ::: __clobber_all); +} + +static __attribute__((noinline)) int target_subprog(void) +{ + /* Use volatile variable here to prevent optimization. */ + volatile int magic_ret = 3; + return magic_ret; +} + +SEC("syscall") +__success __retval(3) +int call_large_imm_test(void *ctx) +{ + /* + * Landing pad to handle call error on kernel without the fix, + * preventing kernel panic. + */ + asm volatile ( + "r0 = 0;" + ".rept 32768;" + "r0 += 0;" + ".endr;" + ::: __clobber_all); + + /* + * The call_happened variable is 1 only when the call insn wrongly + * go back to the landing pad above. + */ + if (call_happened == 1) { + /* Use volatile variable here to prevent optimization. */ + volatile int flag = -1; + return flag; + } + + call_happened = 1; + + padding_subprog(); + + return target_subprog(); +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c index 6a7295347e90..42f54936f4bb 100644 --- a/tools/testing/selftests/cgroup/lib/cgroup_util.c +++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c @@ -106,8 +106,9 @@ int cg_read_strcmp(const char *cgroup, const char *control, /* Handle the case of comparing against empty string */ if (!expected) return -1; - else - size = strlen(expected) + 1; + + /* needs size > 1, otherwise cg_read() reads 0 bytes */ + size = (expected[0] == '\0') ? 2 : strlen(expected) + 1; buf = malloc(size); if (!buf) diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh index 42a6628fb8bc..1c0444729e70 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh @@ -18,7 +18,7 @@ write_test() { echo "testing $interface $value" echo $value > $dir/$interface new=$(cat $dir/$interface) - [[ $value -ne $(cat $dir/$interface) ]] && { + [[ "$value" != "$new" ]] && { echo "$interface write $value failed: new:$new" exit 1 } diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index eeabd34bf083..12f59925500b 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -368,11 +368,15 @@ static int test_percpu_basic(const char *root) for (i = 0; i < 1000; i++) { child = cg_name_indexed(parent, "child", i); - if (!child) - return -1; + if (!child) { + ret = -1; + goto cleanup_children; + } - if (cg_create(child)) + if (cg_create(child)) { + free(child); goto cleanup_children; + } free(child); } diff --git a/tools/testing/selftests/drivers/net/shaper.py b/tools/testing/selftests/drivers/net/shaper.py index 11310f19bfa0..e39d270e688d 100755 --- a/tools/testing/selftests/drivers/net/shaper.py +++ b/tools/testing/selftests/drivers/net/shaper.py @@ -1,7 +1,10 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 -from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_true, KsftSkipEx +import errno + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, ksft_raises, ksft_true, KsftSkipEx from lib.py import EthtoolFamily, NetshaperFamily from lib.py import NetDrvEnv from lib.py import NlError @@ -438,6 +441,21 @@ def queue_update(cfg, nl_shaper) -> None: nl_shaper.delete({'ifindex': cfg.ifindex, 'handle': {'scope': 'queue', 'id': i}}) +def dup_leaves(cfg, nl_shaper) -> None: + """ Ensure that the kernel rejects duplicate leaves. """ + if not cfg.groups: + raise KsftSkipEx("device does not support node scope") + + with ksft_raises(NlError) as cm: + nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 0}}, + {'handle': {'scope': 'queue', 'id': 0}}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 10000}) + ksft_eq(cm.exception.error, errno.EINVAL) + def main() -> None: with NetDrvEnv(__file__, queue_count=4) as cfg: cfg.queues = False @@ -453,7 +471,9 @@ def main() -> None: basic_groups, qgroups, delegation, - queue_update], args=(cfg, NetshaperFamily())) + dup_leaves, + queue_update], + args=(cfg, NetshaperFamily())) ksft_exit() diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index e5bbdb5bbdc3..4415c94b2866 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -41,10 +41,10 @@ #include <inttypes.h> #include <limits.h> #include <pthread.h> -#include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> +#include "kvm_syscalls.h" #include "kvm_util.h" #include "test_util.h" #include "memstress.h" diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index d6528c6f5e03..832ef4dfb99f 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -14,10 +14,10 @@ #include <linux/bitmap.h> #include <linux/falloc.h> #include <linux/sizes.h> -#include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> +#include "kvm_syscalls.h" #include "kvm_util.h" #include "numaif.h" #include "test_util.h" @@ -510,7 +510,12 @@ static void test_guest_memfd_guest(void) "Default VM type should support INIT_SHARED, supported flags = 0x%x", vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); - size = vm->page_size; + /* + * Use the max of the host or guest page size for all operations, as + * KVM requires guest_memfd files and memslots to be sized to multiples + * of the host page size. + */ + size = max_t(size_t, vm->page_size, page_size); fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_INIT_SHARED); vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); @@ -519,7 +524,7 @@ static void test_guest_memfd_guest(void) memset(mem, 0xaa, size); kvm_munmap(mem, size); - virt_pg_map(vm, gpa, gpa); + virt_map(vm, gpa, gpa, size / vm->page_size); vcpu_args_set(vcpu, 2, gpa, size); vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h index 843c9904c46f..067a4c9cf452 100644 --- a/tools/testing/selftests/kvm/include/kvm_syscalls.h +++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h @@ -2,8 +2,18 @@ #ifndef SELFTEST_KVM_SYSCALLS_H #define SELFTEST_KVM_SYSCALLS_H +/* + * Include both the kernel and libc versions of mman.h. The kernel provides + * the most up-to-date flags and definitions, while libc provides the syscall + * wrappers tests expect. + */ +#include <linux/mman.h> + +#include <sys/mman.h> #include <sys/syscall.h> +#include <test_util.h> + #define MAP_ARGS0(m,...) #define MAP_ARGS1(m,t,a,...) m(t,a) #define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__) diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index d9b433b834f1..a56271c237ae 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -19,9 +19,9 @@ #include <errno.h> #include <unistd.h> #include <fcntl.h> -#include <sys/mman.h> #include "kselftest.h" +#include <linux/mman.h> #include <linux/types.h> #define msecs_to_usecs(msec) ((msec) * 1000ULL) diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index b49690658c60..8be0d09ecf0f 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -6,11 +6,14 @@ */ #include "test_util.h" -#include <execinfo.h> + #include <sys/syscall.h> #include "kselftest.h" +#ifdef __GLIBC__ +#include <execinfo.h> + /* Dumps the current stack trace to stderr. */ static void __attribute__((noinline)) test_dump_stack(void); static void test_dump_stack(void) @@ -57,6 +60,9 @@ static void test_dump_stack(void) system(cmd); #pragma GCC diagnostic pop } +#else +static void test_dump_stack(void) {} +#endif static pid_t _gettid(void) { diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index b689c4df4a01..1924a9895834 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -7,7 +7,7 @@ #include "test_util.h" -#include <bits/endian.h> +#include <endian.h> #include <linux/elf.h> #include "kvm_util.h" diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 2a76eca7029d..e08967ef7b7b 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -5,13 +5,13 @@ * Copyright (C) 2018, Google LLC. */ #include "test_util.h" +#include "kvm_syscalls.h" #include "kvm_util.h" #include "processor.h" #include "ucall_common.h" #include <assert.h> #include <sched.h> -#include <sys/mman.h> #include <sys/resource.h> #include <sys/types.h> #include <sys/stat.h> diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 3d02db371422..e977e979470f 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -15,7 +15,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sys/mman.h> #include <time.h> #include <unistd.h> @@ -23,6 +22,7 @@ #include <linux/sizes.h> #include <test_util.h> +#include <kvm_syscalls.h> #include <kvm_util.h> #include <processor.h> #include <ucall_common.h> diff --git a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c index a9e5a01200b8..478381e6f84e 100644 --- a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c +++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c @@ -4,11 +4,10 @@ * * Copyright (C) 2024, Red Hat, Inc. */ -#include <sys/mman.h> - #include <linux/fs.h> #include "test_util.h" +#include "kvm_syscalls.h" #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" diff --git a/tools/testing/selftests/kvm/s390/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c index 8054d2b178f0..d86179827a18 100644 --- a/tools/testing/selftests/kvm/s390/tprot.c +++ b/tools/testing/selftests/kvm/s390/tprot.c @@ -4,8 +4,8 @@ * * Copyright IBM Corp. 2021 */ -#include <sys/mman.h> #include "test_util.h" +#include "kvm_syscalls.h" #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 9b919a231c93..e639a9db51ee 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -8,11 +8,11 @@ #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> -#include <sys/mman.h> #include <linux/compiler.h> #include <test_util.h> +#include <kvm_syscalls.h> #include <kvm_util.h> #include <processor.h> diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index 7df2bc8eec02..76fcdd1fd3cb 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -220,6 +220,8 @@ static void check_steal_time_uapi(void) }; vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, 1, 0); + virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, 1); st_ipa = (ulong)ST_GPA_BASE | 1; ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c index 404f0028e110..0c84c27ea584 100644 --- a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c +++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c @@ -137,6 +137,10 @@ static void run_apic_bus_clock_test(u64 apic_hz, u64 delay_ms, vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, NSEC_PER_SEC / apic_hz); + TEST_ASSERT_EQ(kvm_check_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS), 1); + TEST_ASSERT_EQ(vm_check_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS), + NSEC_PER_SEC / apic_hz); + vcpu = vm_vcpu_add(vm, 0, apic_guest_code); vcpu_args_set(vcpu, 2, apic_hz, delay_ms); diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index 788689497e92..77fb4c5d871b 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -986,6 +986,56 @@ TEST_F(hmm, migrate) } /* + * Migrate private file memory to device private memory. + */ +TEST_F(hmm, migrate_file_private) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + int fd; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + fd = hmm_create_file(size); + ASSERT_GE(fd, 0); + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = fd; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* * Migrate anonymous memory to device private memory and fault some of it back * to system memory, then try migrating the resulting mix of system and device * private memory to the device. diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index d8468451b3a3..c17b133a81d2 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -103,7 +103,7 @@ RUN_ALL=false RUN_DESTRUCTIVE=false TAP_PREFIX="# " -while getopts "aht:n" OPT; do +while getopts "aht:nd" OPT; do case ${OPT} in "a") RUN_ALL=true ;; "h") usage ;; diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh index e8031f68200a..ebdb4c790a5d 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh @@ -4,7 +4,7 @@ ALL_TESTS="vlmc_control_test vlmc_querier_test vlmc_igmp_mld_version_test \ vlmc_last_member_test vlmc_startup_query_test vlmc_membership_test \ vlmc_querier_intvl_test vlmc_query_intvl_test vlmc_query_response_intvl_test \ - vlmc_router_port_test vlmc_filtering_test" + vlmc_router_port_test vlmc_filtering_test vlmc_mcast_toggle_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -537,6 +537,34 @@ vlmc_filtering_test() log_test "Disable multicast vlan snooping when vlan filtering is disabled" } +vlmc_mcast_toggle_test() +{ + RET=0 + + ip link add name br1-mcast up type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1 + ip link add name dummy1-mcast up master br1-mcast type dummy + + # Enabling per-VLAN multicast snooping should disable the per-port + # multicast context on "dummy1-mcast". + ip link set dev br1-mcast type bridge mcast_vlan_snooping 1 + + # Toggling multicast snooping on the bridge should not affect the + # per-port multicast context on "dummy1-mcast" given that per-VLAN + # multicast snooping is enabled. + ip link set dev br1-mcast type bridge mcast_snooping 0 + ip link set dev br1-mcast type bridge mcast_snooping 1 + + # If both the per-port and per-{port, VLAN} multicast contexts are + # enabled on "dummy1-mcast", removing it from the bridge will result + # in a splat. + ip link set dev dummy1-mcast nomaster + + log_test "Toggling mcast snooping with per-VLAN mcast snooping enabled" + + ip link del dev dummy1-mcast + ip link del dev br1-mcast +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index b2b99889942f..845c26dd01a9 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -273,8 +273,8 @@ setup() ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null - ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null + ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null ip -netns $ioam_node_alpha link set veth0 up &>/dev/null ip -netns $ioam_node_alpha link set lo up &>/dev/null ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \ diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index 64f05229ab24..ded3f896e622 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -268,6 +268,17 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) return XDP_PASS; } +static __always_inline __u16 csum_fold_helper(__u32 csum) +{ + csum = (csum & 0xffff) + (csum >> 16); + return ~((csum & 0xffff) + (csum >> 16)); +} + +static __always_inline __u16 csum_fold_udp_helper(__u32 csum) +{ + return csum_fold_helper(csum) ? : 0xffff; +} + static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) { void *data_end = (void *)(long)ctx->data_end; @@ -281,21 +292,22 @@ static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) if (eth->h_proto == bpf_htons(ETH_P_IP)) { struct iphdr *iph = data + sizeof(*eth); - __u16 total_len; if (iph + 1 > (struct iphdr *)data_end) return NULL; - iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset); - udph = (void *)eth + sizeof(*iph) + sizeof(*eth); if (!udph || udph + 1 > (struct udphdr *)data_end) return NULL; - len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + len = iph->tot_len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + iph->tot_len = len_new; + iph->check = csum_fold_helper( + bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), ~((__u32)iph->check))); } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { struct ipv6hdr *ipv6h = data + sizeof(*eth); - __u16 payload_len; if (ipv6h + 1 > (struct ipv6hdr *)data_end) return NULL; @@ -304,33 +316,27 @@ static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) if (!udph || udph + 1 > (struct udphdr *)data_end) return NULL; - *udp_csum = ~((__u32)udph->check); - len = ipv6h->payload_len; len_new = bpf_htons(bpf_ntohs(len) + offset); ipv6h->payload_len = len_new; - - *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, - sizeof(len_new), *udp_csum); - - len = udph->len; - len_new = bpf_htons(bpf_ntohs(udph->len) + offset); - *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, - sizeof(len_new), *udp_csum); } else { return NULL; } + len = udph->len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + + *udp_csum = ~((__u32)udph->check); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + udph->len = len_new; return udph; } -static __u16 csum_fold_helper(__u32 csum) -{ - return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff; -} - static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, unsigned long hdr_len) { @@ -359,7 +365,7 @@ static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, return -1; udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); - udph->check = (__u16)csum_fold_helper(udp_csum); + udph->check = (__u16)csum_fold_udp_helper(udp_csum); if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0) return -1; @@ -403,7 +409,7 @@ static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) return -1; udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum); - udph->check = (__u16)csum_fold_helper(udp_csum); + udph->check = (__u16)csum_fold_udp_helper(udp_csum); buff_len = bpf_xdp_get_buff_len(ctx); @@ -484,8 +490,7 @@ static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len, return -1; udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); - - udph->check = (__u16)csum_fold_helper(udp_csum); + udph->check = (__u16)csum_fold_udp_helper(udp_csum); if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0) return -1; @@ -542,7 +547,7 @@ static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, return -1; udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum); - udph->check = (__u16)csum_fold_helper(udp_csum); + udph->check = (__u16)csum_fold_udp_helper(udp_csum); if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) return -1; diff --git a/tools/testing/selftests/net/link_netns.py b/tools/testing/selftests/net/link_netns.py index aab043c59d69..6d1f863b6262 100755 --- a/tools/testing/selftests/net/link_netns.py +++ b/tools/testing/selftests/net/link_netns.py @@ -3,13 +3,14 @@ import time -from lib.py import ksft_run, ksft_exit, ksft_true +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_true from lib.py import ip from lib.py import NetNS, NetNSEnter from lib.py import RtnlFamily LINK_NETNSID = 100 +LINK_NETNSID2 = 200 def test_event() -> None: @@ -32,6 +33,57 @@ def test_event() -> None: "Received unexpected link notification") +def test_event_all_nsid() -> None: + """NETLINK_LISTEN_ALL_NSID notifications: local events must not + carry nsid even with a self-referential mapping. Remote events + must carry the correct nsid.""" + + with NetNS() as ns1, NetNS() as ns2: + net1, net2 = str(ns1), str(ns2) + + with NetNSEnter(net1): + rtnl = RtnlFamily() + rtnl.ntf_listen_all_nsid() + rtnl.ntf_subscribe("rtnlgrp-link") + + # Case 1: no nsid assigned, local event, no nsid expected. + ip("link add dummy-lo type dummy", ns=net1) + + # Case 2: self-referential nsid, local event, still no nsid. + ip(f"netns set {net1} {LINK_NETNSID}", ns=net1) + ip("link add dummy-sr type dummy", ns=net1) + + # Case 3: remote event, nsid present. + ip(f"netns set {net2} {LINK_NETNSID2}", ns=net1) + ip("link add dummy-re type dummy", ns=net2) + + # Collect the three newlink events, ignoring unrelated noise. + events = {} + for msg in rtnl.poll_ntf(duration=1): + if msg['name'] == 'getlink': + ifname = msg['msg'].get('ifname') + if ifname in ('dummy-lo', 'dummy-sr', 'dummy-re'): + events[ifname] = msg + if len(events) == 3: + break + + ksft_true('dummy-lo' in events, "missing local event") + ksft_true(events['dummy-lo'].get('nsid') is None, + "local event without nsid should not carry nsid") + + ksft_true('dummy-sr' in events, "missing self-ref event") + ksft_true(events['dummy-sr'].get('nsid') is None, + "local event with self-ref nsid should not carry nsid") + + ksft_true('dummy-re' in events, "missing remote event") + ksft_eq(events['dummy-re'].get('nsid'), LINK_NETNSID2, + "remote event should carry nsid") + + ip("link del dummy-lo", ns=net1) + ip("link del dummy-sr", ns=net1) + ip("link del dummy-re", ns=net2) + + def validate_link_netns(netns, ifname, link_netnsid) -> bool: link_info = ip(f"-d link show dev {ifname}", ns=netns, json=True) if not link_info: @@ -133,7 +185,12 @@ def test_peer_net() -> None: def main() -> None: - ksft_run([test_event, test_link_net, test_peer_net]) + ksft_run([ + test_event, + test_event_all_nsid, + test_link_net, + test_peer_net, + ]) ksft_exit() diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index a6447f7a31fe..d158678fa6ab 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -401,7 +401,7 @@ do_transfer() mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" local start - start=$(date +%s%3N) + start=$(date +%s%N) ip netns exec ${connector_ns} \ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ $extra_args $connect_addr < "$cin" > "$cout" & @@ -423,7 +423,7 @@ do_transfer() fi local stop - stop=$(date +%s%3N) + stop=$(date +%s%N) if $capture; then sleep 1 @@ -439,7 +439,7 @@ do_transfer() fi local duration - duration=$((stop-start)) + duration=$(((stop-start) / 1000000)) printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index beec41f6662a..5acd12021e6e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1828,6 +1828,22 @@ chk_add_tx_nr() fi } +chk_add_drop_tx_nr() +{ + local drop_tx_nr=$1 + local count + + print_check "add addr tx drop" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTxDrop") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$drop_tx_nr" ]; then + fail_test "got $count ADD_ADDR drop[s] TX, expected $drop_tx_nr" + else + print_ok + fi +} + chk_rm_nr() { local rm_addr_nr=$1 @@ -3278,6 +3294,21 @@ add_addr_ports_tests() chk_mpc_endp_attempt ${retl} 1 fi + + # first signal address drops, second one still progresses + if reset "signal addr list progresses after tx drop"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 0 + ip netns exec $ns1 sysctl -q net.ipv4.tcp_timestamps=1 + ip netns exec $ns2 sysctl -q net.ipv4.tcp_timestamps=1 + + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal port 10100 + pm_nl_add_endpoint $ns1 dead:beef:3::1 flags signal + run_tests $ns1 $ns2 dead:beef:1::1 + chk_add_drop_tx_nr 1 + chk_add_tx_nr 1 1 + chk_add_nr 1 1 0 + fi } bind_tests() diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 989a5975dcea..5ef6033775c8 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -28,7 +28,7 @@ declare -rx MPTCP_LIB_AF_INET6=10 MPTCP_LIB_SUBTESTS=() MPTCP_LIB_SUBTESTS_DUPLICATED=0 MPTCP_LIB_SUBTEST_FLAKY=0 -MPTCP_LIB_SUBTESTS_LAST_TS_MS= +MPTCP_LIB_SUBTESTS_LAST_TS_NS= MPTCP_LIB_TEST_COUNTER=0 MPTCP_LIB_TEST_FORMAT="%02u %-50s" MPTCP_LIB_IP_MPTCP=0 @@ -236,7 +236,7 @@ mptcp_lib_kversion_ge() { } mptcp_lib_subtests_last_ts_reset() { - MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)" + MPTCP_LIB_SUBTESTS_LAST_TS_NS="$(date +%s%N)" } mptcp_lib_subtests_last_ts_reset @@ -255,7 +255,7 @@ __mptcp_lib_result_check_duplicated() { __mptcp_lib_result_add() { local result="${1}" local time="time=" - local ts_prev_ms + local ts_prev_ns shift local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) @@ -265,9 +265,9 @@ __mptcp_lib_result_add() { # not to add two '#' [[ "${*}" != *"#"* ]] && time="# ${time}" - ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}" + ts_prev_ns="${MPTCP_LIB_SUBTESTS_LAST_TS_NS}" mptcp_lib_subtests_last_ts_reset - time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms" + time+="$(((MPTCP_LIB_SUBTESTS_LAST_TS_NS - ts_prev_ns) / 1000000))ms" MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}") } diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index ee2d1a5254f8..d953ee218c0f 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -26,6 +26,7 @@ TEST_PROGS := \ nft_concat_range.sh \ nft_conntrack_helper.sh \ nft_fib.sh \ + nft_fib_nexthop.sh \ nft_flowtable.sh \ nft_interface_stress.sh \ nft_meta.sh \ diff --git a/tools/testing/selftests/net/netfilter/nft_fib_nexthop.sh b/tools/testing/selftests/net/netfilter/nft_fib_nexthop.sh new file mode 100755 index 000000000000..c4f203057382 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_fib_nexthop.sh @@ -0,0 +1,152 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2154 +# +# Exercise nft_fib6_eval()'s sibling/nh enumeration on three route shapes: +# 1) route via a single external nexthop (nhid) +# 2) route via an external nexthop group (nhid -> group, two members) +# 3) route via old-style multipath (nexthop ... nexthop ...) +# +# In each scenario the route's nexthop set contains veth0 (the iif of the +# test packet). nft_fib6_info_nh_uses_dev() must walk the set and report +# veth0 as a valid oif. For (2) and (3) the matching nexthop is the second +# member, so the walk has to traverse beyond the primary nh. +# +# After sending $PKTS ICMPv6 echo requests from ns1, check two counters on +# nsrouter: +# nf_ok -- `fib daddr . iif oif eq "veth0"` must equal $PKTS +# nf_bad -- `fib daddr . iif oif missing` must stay at 0 +# Both rules also match on iif veth0 and ip6 daddr dead:dead::/64 so that +# kernel-generated ND/MLD/RA traffic cannot pollute the counters. +# +# Topology similar to nft_fib.sh, without ns2; two dummy interfaces on +# nsrouter host extra nh devices: +# +# dead:1::99 dead:1::1 +# ns1 <----veth----> nsrouter --- dummy0 dead:2::1 +# \-- dummy1 dead:9::1 + +source lib.sh + +ret=0 +PKTS=3 + +checktool "nft --version" "run test without nft" +checktool "ip -V" "run test without iproute2" + +setup_ns nsrouter ns1 +trap cleanup_all_ns EXIT + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" \ + > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi + +ip -net "$ns1" link set lo up +ip -net "$ns1" link set eth0 up +ip -net "$ns1" -6 addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" -6 route add default via dead:1::1 + +ip -net "$nsrouter" link set lo up +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" -6 addr add dead:1::1/64 dev veth0 nodad + +if ! ip -net "$nsrouter" link add dummy0 type dummy 2>/dev/null; then + echo "SKIP: dummy netdev not available" + exit $ksft_skip +fi +ip -net "$nsrouter" link set dummy0 up +ip -net "$nsrouter" -6 addr add dead:2::1/64 dev dummy0 nodad + +ip -net "$nsrouter" link add dummy1 type dummy +ip -net "$nsrouter" link set dummy1 up +ip -net "$nsrouter" -6 addr add dead:9::1/64 dev dummy1 nodad + +ip netns exec "$nsrouter" sysctl -q net.ipv6.conf.all.forwarding=1 + +load_fib_rule() { + # filter on iif + daddr so the counters only see our test packets + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table ip6 t { + counter nf_ok { } + counter nf_bad { } + chain c { + type filter hook prerouting priority 0; policy accept; + iif "veth0" ip6 daddr dead:dead::/64 fib daddr . iif oif eq "veth0" counter name nf_ok + iif "veth0" ip6 daddr dead:dead::/64 fib daddr . iif oif missing counter name nf_bad + } +} +EOF +} + +bad_counter() { + local counter=$1 + local expect=$2 + local tag=$3 + + echo "FAIL ($tag): counter $counter has unexpected value (expected \"$expect\")" 1>&2 + ip netns exec "$nsrouter" nft list counter ip6 t "$counter" 1>&2 +} + +run_scenario() { + local what="$1"; shift + # counter output format is "packets PACKET_NUM bytes BYTES_NUM"; + # we only care about the packet count + local expect_ok="packets $PKTS bytes" + local expect_bad="packets 0 bytes" + local lret=0 + + # reset route + nexthop state between scenarios + ip -net "$nsrouter" -6 route del dead:dead::/64 > /dev/null 2>&1 || true + ip -net "$nsrouter" nexthop flush > /dev/null 2>&1 || true + + # run the scenario function passed by the caller + "$@" || echo "WARN ($what): scenario setup returned non-zero" + + load_fib_rule || { echo "FAIL ($what): nft load"; ret=1; return; } + + # ping a daddr inside dead:dead::/64 so fib has to walk the nh set + ip netns exec "$ns1" ping -6 -c "$PKTS" -i 0.1 -W 1 dead:dead::1 \ + > /dev/null 2>&1 || true + + # verify the packets went through the expected fib path + if ! ip netns exec "$nsrouter" nft list counter ip6 t nf_ok | grep -q "$expect_ok"; then + bad_counter nf_ok "$expect_ok" "$what" + lret=1 + fi + if ! ip netns exec "$nsrouter" nft list counter ip6 t nf_bad | grep -q "$expect_bad"; then + bad_counter nf_bad "$expect_bad" "$what" + lret=1 + fi + + if [ $lret -eq 0 ]; then + echo "PASS: $what" + else + ret=1 + fi +} + +scenario_single_nh() { + ip -net "$nsrouter" nexthop add id 1 via dead:1::99 dev veth0 + ip -net "$nsrouter" -6 route add dead:dead::/64 nhid 1 +} +run_scenario "single external nexthop (nhid -> veth0)" scenario_single_nh + +scenario_nh_group() { + ip -net "$nsrouter" nexthop add id 1 via dead:2::2 dev dummy0 + ip -net "$nsrouter" nexthop add id 2 via dead:1::99 dev veth0 + ip -net "$nsrouter" nexthop add id 100 group 1/2 + ip -net "$nsrouter" -6 route add dead:dead::/64 nhid 100 +} +run_scenario "nexthop group (dummy0 + veth0)" scenario_nh_group + +scenario_old_multipath() { + ip -net "$nsrouter" -6 route add dead:dead::/64 \ + nexthop via dead:2::2 dev dummy0 \ + nexthop via dead:1::99 dev veth0 +} +run_scenario "old-style multipath (sibling on veth0)" scenario_old_multipath + +exit $ret diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh index af1532b4d2da..ec9a51bbf3c9 100755 --- a/tools/testing/selftests/net/ovpn/test-close-socket.sh +++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh @@ -53,7 +53,7 @@ ovpn_run_ping_traffic() { for p in $(seq 1 ${OVPN_NUM_PEERS}); do ovpn_cmd_ok "send ping traffic to peer ${p}" \ - ip netns exec ovpn_peer0 ping -qfc 500 -w 3 \ + ip netns exec ovpn_peer0 ping -qfc 100 -w 3 \ 5.5.5.$((p + 1)) done } diff --git a/tools/testing/selftests/net/ovpn/test-mark.sh b/tools/testing/selftests/net/ovpn/test-mark.sh index 5a8f47554286..7c1d56e9c525 100755 --- a/tools/testing/selftests/net/ovpn/test-mark.sh +++ b/tools/testing/selftests/net/ovpn/test-mark.sh @@ -66,7 +66,7 @@ ovpn_mark_run_baseline_traffic() { for p in $(seq 1 3); do ovpn_cmd_ok "send baseline traffic to peer ${p}" \ - ip netns exec ovpn_peer0 ping -qfc 500 -w 3 \ + ip netns exec ovpn_peer0 ping -qfc 100 -w 3 \ 5.5.5.$((p + 1)) done } @@ -101,7 +101,7 @@ ovpn_mark_verify_drop_traffic() { local total_count for p in $(seq 1 3); do - if ping_output=$(ip netns exec ovpn_peer0 ping -qfc 500 -w 1 \ + if ping_output=$(ip netns exec ovpn_peer0 ping -qfc 100 -w 1 \ 5.5.5.$((p + 1)) 2>&1); then printf '%s\n' "expected ping to peer ${p} to fail \ after nft drop rule" @@ -144,7 +144,7 @@ ovpn_mark_verify_traffic_recovery() { sleep 1 for p in $(seq 1 3); do ovpn_cmd_ok "send recovery traffic to peer ${p}" \ - ip netns exec ovpn_peer0 ping -qfc 500 -w 3 \ + ip netns exec ovpn_peer0 ping -qfc 100 -w 3 \ 5.5.5.$((p + 1)) done } diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh index c06e3135fbef..9b5610837032 100755 --- a/tools/testing/selftests/net/ovpn/test.sh +++ b/tools/testing/selftests/net/ovpn/test.sh @@ -110,7 +110,7 @@ ovpn_run_basic_traffic() { ovpn_run_lan_traffic() { ovpn_cmd_ok "ping LAN behind peer1" \ - ip netns exec ovpn_peer0 ping -qfc 500 -w 3 "${OVPN_LAN_IP}" + ip netns exec ovpn_peer0 ping -qfc 100 -w 3 "${OVPN_LAN_IP}" } ovpn_run_float_mode() { @@ -127,7 +127,7 @@ ovpn_run_float_mode() { for p in $(seq 1 ${OVPN_NUM_PEERS}); do peer_ns="ovpn_peer${p}" ovpn_cmd_ok "ping tunnel after float peer ${p}" \ - ip netns exec "${peer_ns}" ping -qfc 500 -w 3 5.5.5.1 + ip netns exec "${peer_ns}" ping -qfc 100 -w 3 5.5.5.1 done } diff --git a/tools/testing/selftests/net/rds/config b/tools/testing/selftests/net/rds/config index 97db7ecb892a..3d62d0c750a8 100644 --- a/tools/testing/selftests/net/rds/config +++ b/tools/testing/selftests/net/rds/config @@ -1,3 +1,4 @@ +CONFIG_MODULES=n CONFIG_NET_NS=y CONFIG_NET_SCH_NETEM=y CONFIG_RDS=y diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index c499953d4885..ace3a99023ed 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -24,6 +24,8 @@ ALL_TESTS=" kci_test_macsec kci_test_macsec_vlan kci_test_team_bridge_macvlan + kci_test_bridge_promisc_netlink + kci_test_bridge_promisc_sysfs kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get @@ -61,6 +63,14 @@ check_fail() fi } +sysfs_write() +{ + local val="$1" + local path="$2" + + echo "$val" > "$path" +} + run_cmd_common() { local cmd="$*" @@ -680,6 +690,59 @@ kci_test_team_bridge_macvlan() end_test "PASS: team_bridge_macvlan" } +# Test that changing bridge port flags via the netlink path does not sleep with +# the bridge spin lock held. +kci_test_bridge_promisc_netlink() +{ + local dummy="test_dummy1" + local bridge="test_br1" + local team="test_team1" + local ret=0 + + run_cmd ip link add $team up type team + run_cmd ip link add $bridge up type bridge vlan_filtering 1 + run_cmd ip link add $dummy up type dummy + run_cmd ip link set $dummy master $bridge + run_cmd ip link set $team master $bridge + + # This causes the bridge driver to sync all the static FDB entries to + # the team device (which supports unicast filtering) and remove it from + # promiscuous mode. The call to dev_set_promiscuity() can sleep due to + # Rx mode inlining, which is a problem if the bridge spin lock is held. + run_cmd bridge link set dev $dummy flood off learning off + + run_cmd ip link del $dummy + run_cmd ip link del $bridge + run_cmd ip link del $team + + end_test "PASS: bridge_promisc_netlink" +} + +# Same as kci_test_bridge_promisc_netlink(), but the flags are changed via the +# sysfs path. +kci_test_bridge_promisc_sysfs() +{ + local dummy="test_dummy1" + local bridge="test_br1" + local team="test_team1" + local ret=0 + + run_cmd ip link add $team up type team + run_cmd ip link add $bridge up type bridge vlan_filtering 1 + run_cmd ip link add $dummy up type dummy + run_cmd ip link set $dummy master $bridge + run_cmd ip link set $team master $bridge + + run_cmd sysfs_write 0 /sys/class/net/$dummy/brport/unicast_flood + run_cmd sysfs_write 0 /sys/class/net/$dummy/brport/learning + + run_cmd ip link del $dummy + run_cmd ip link del $bridge + run_cmd ip link del $team + + end_test "PASS: bridge_promisc_sysfs" +} + #------------------------------------------------------------------- # Example commands # ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ diff --git a/tools/testing/selftests/rdma/rxe_ipv6.sh b/tools/testing/selftests/rdma/rxe_ipv6.sh index b7059bfd6d7c..32dad687a044 100755 --- a/tools/testing/selftests/rdma/rxe_ipv6.sh +++ b/tools/testing/selftests/rdma/rxe_ipv6.sh @@ -8,6 +8,8 @@ RXE_NAME="rxe6" PORT=4791 IP6_ADDR="2001:db8::1/64" +source "$(dirname "$0")/../kselftest/ktap_helpers.sh" + exec > /dev/null # Cleanup function to run on exit (even on failure) @@ -21,8 +23,8 @@ trap cleanup EXIT # 1. Prerequisites check for mod in tun veth rdma_rxe; do if ! modinfo "$mod" >/dev/null 2>&1; then - echo "Error: Kernel module '$mod' not found." - exit 1 + echo "SKIP: Kernel module '$mod' not found." >&2 + exit $KSFT_SKIP fi done diff --git a/tools/testing/selftests/rdma/rxe_rping_between_netns.sh b/tools/testing/selftests/rdma/rxe_rping_between_netns.sh index e5b876f58c6e..e7554fbb8951 100755 --- a/tools/testing/selftests/rdma/rxe_rping_between_netns.sh +++ b/tools/testing/selftests/rdma/rxe_rping_between_netns.sh @@ -8,6 +8,8 @@ IP_A="1.1.1.1" IP_B="1.1.1.2" PORT=4791 +source "$(dirname "$0")/../kselftest/ktap_helpers.sh" + exec > /dev/null # --- Cleanup Routine --- @@ -27,6 +29,11 @@ if [[ $EUID -ne 0 ]]; then exit 1 fi +if ! modinfo rdma_rxe >/dev/null 2>&1; then + echo "SKIP: Kernel module 'rdma_rxe' not found." >&2 + exit $KSFT_SKIP +fi + modprobe rdma_rxe || { echo "Failed to load rdma_rxe"; exit 1; } # --- Setup Network Topology --- diff --git a/tools/testing/selftests/rdma/rxe_socket_with_netns.sh b/tools/testing/selftests/rdma/rxe_socket_with_netns.sh index 002e5098f751..9478657c02c1 100755 --- a/tools/testing/selftests/rdma/rxe_socket_with_netns.sh +++ b/tools/testing/selftests/rdma/rxe_socket_with_netns.sh @@ -4,6 +4,8 @@ PORT=4791 MODS=("tun" "rdma_rxe") +source "$(dirname "$0")/../kselftest/ktap_helpers.sh" + exec > /dev/null # --- Helper: Cleanup Routine --- @@ -26,6 +28,10 @@ if [[ $EUID -ne 0 ]]; then fi for m in "${MODS[@]}"; do + if ! modinfo "$m" >/dev/null 2>&1; then + echo "SKIP: Kernel module '$m' not found." >&2 + exit $KSFT_SKIP + fi modprobe "$m" || { echo "Error: Failed to load $m"; exit 1; } done diff --git a/tools/testing/selftests/rdma/rxe_test_NETDEV_UNREGISTER.sh b/tools/testing/selftests/rdma/rxe_test_NETDEV_UNREGISTER.sh index 021ca451499d..8c18cea7535c 100755 --- a/tools/testing/selftests/rdma/rxe_test_NETDEV_UNREGISTER.sh +++ b/tools/testing/selftests/rdma/rxe_test_NETDEV_UNREGISTER.sh @@ -5,6 +5,8 @@ DEV_NAME="tun0" RXE_NAME="rxe0" RDMA_PORT=4791 +source "$(dirname "$0")/../kselftest/ktap_helpers.sh" + exec > /dev/null # --- Cleanup Routine --- @@ -19,8 +21,8 @@ trap cleanup EXIT # 1. Dependency Check if ! modinfo rdma_rxe >/dev/null 2>&1; then - echo "Error: rdma_rxe module not found." - exit 1 + echo "SKIP: rdma_rxe module not found." >&2 + exit $KSFT_SKIP fi modprobe rdma_rxe diff --git a/tools/testing/selftests/sched_ext/dequeue.c b/tools/testing/selftests/sched_ext/dequeue.c index 4e93262703ca..383d06e972a4 100644 --- a/tools/testing/selftests/sched_ext/dequeue.c +++ b/tools/testing/selftests/sched_ext/dequeue.c @@ -33,6 +33,7 @@ static void worker_fn(int id) /* Do some work to trigger scheduling events */ for (j = 0; j < 10000; j++) sum += j; + asm volatile("" : : "r"(sum)); /* Sleep to trigger dequeue */ usleep(1000 + (id * 100)); diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index b056eb966871..d0cad6571691 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -1144,6 +1144,620 @@ "teardown": [ "$TC qdisc del dev $DUMMY clsact" ] + }, + { + "id": "531c", + "name": "Redirect multiport: dummy egress -> dev1 ingress -> dummy egress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY egress protocol ip prio 10 matchall action mirred ingress redirect dev $DEV1 index 1", + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 2" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 3 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY clsact", + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "b1d7", + "name": "Redirect singleport: dev1 ingress -> dev1 egress -> dev1 ingress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DEV1 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 egress protocol ip prio 11 matchall action mirred ingress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 3 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "c66d", + "name": "Redirect multiport: dev1 ingress -> dummy ingress -> dev1 egress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY ingress protocol ip prio 11 matchall action mirred egress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "aa99", + "name": "Redirect multiport: dev1 ingress -> dummy ingress -> dev1 ingress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY ingress protocol ip prio 11 matchall action mirred ingress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 2, + "overlimits": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "37d7", + "name": "Redirect multiport: dev1 ingress -> dummy egress -> dev1 ingress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 11 matchall action mirred ingress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 3 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "6d02", + "name": "Redirect multiport: dummy egress -> dev1 ingress -> dummy egress, different prios (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY egress protocol ip prio 10 matchall action mirred ingress redirect dev $DEV1 index 1", + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 11 matchall action mirred egress redirect dev $DUMMY index 2" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 3 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY clsact", + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "8115", + "name": "Redirect multiport: dev1 ingress -> dummy ingress -> dummy egress -> dev1 egress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY ingress protocol ip prio 11 matchall action mirred egress redirect dev $DUMMY index 2" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 12 matchall action mirred egress redirect dev $DEV1 index 3", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "9eb3", + "name": "Redirect multiport: dev1 ingress -> dummy egress -> dev1 egress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 11 matchall action mirred egress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "d837", + "name": "Redirect multiport: dev1 ingress -> dummy egress -> dummy ingress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 11 matchall action mirred ingress redirect dev $DUMMY index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "2071", + "name": "Redirect singleport: dev1 ingress -> dev1 ingress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DEV1 index 1", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1, + "overlimits": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "0101", + "name": "Redirect singleport: dummy egress -> dummy ingress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY egress protocol ip prio 11 matchall action mirred ingress redirect dev $DUMMY index 1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "cf97", + "name": "Redirect multiport: dev1 ingress -> dummy ingress -> dummy egress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY ingress protocol ip prio 11 matchall action mirred egress redirect dev $DUMMY index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] } - ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index b1f856cf62c1..82c38a13dfbf 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -702,6 +702,7 @@ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1", "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2", "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true", "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1", @@ -714,8 +715,8 @@ { "kind": "hfsc", "handle": "1:", - "bytes": 294, - "packets": 3 + "bytes": 392, + "packets": 4 } ], "matchCount": "1", @@ -1284,5 +1285,46 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "3a62", + "name": "Try to create a qlen underflow with QFQ/CBS", + "category": [ + "qdisc", + "qfq", + "cbs" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: qfq", + "$TC class add dev $DUMMY classid 1:1 parent 1: qfq", + "$TC class add dev $DUMMY classid 1:2 parent 1: qfq", + "$TC qdisc add dev $DUMMY handle 2: parent 1:1 cbs", + "$TC qdisc add dev $DUMMY handle 3: parent 2: netem delay 5000000000", + "$TC filter add dev $DUMMY parent 1: prio 1 u32 match ip dst 10.10.10.1 classid 1:1 action ok", + "$TC filter add dev $DUMMY parent 1: prio 2 u32 match ip dst 10.10.10.2 classid 1:2 action ok", + "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "$IP l set $DUMMY down", + "$IP l set $DUMMY up", + "$TC qdisc replace dev $DUMMY handle 4: parent 2: pfifo" + ], + "cmdUnderTest": "ping -c 1 10.10.10.2 -W0.01 -I$DUMMY", + "expExitCode": "1", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:1", + "matchJSON": [ + { + "kind": "cbs", + "handle": "2:", + "bytes": 0, + "packets": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json index 718d2df2aafa..472b672a600d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json @@ -338,84 +338,34 @@ ] }, { - "id": "d34d", - "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change root", - "category": ["qdisc", "netem"], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DUMMY root handle 1: netem limit 1", - "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1" - ], - "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: netem duplicate 50%", - "expExitCode": "2", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc netem", - "matchCount": "2", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1:0 root" - ] - }, - { - "id": "b33f", - "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change non-root", - "category": ["qdisc", "netem"], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DUMMY root handle 1: netem limit 1", - "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1" - ], - "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 2: netem duplicate 50%", - "expExitCode": "2", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc netem", - "matchCount": "2", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1:0 root" - ] - }, - { - "id": "cafe", - "name": "NETEM test qdisc duplication restriction in qdisc tree", - "category": ["qdisc", "netem"], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DUMMY root handle 1: netem limit 1 duplicate 100%" + "id": "8c17", + "name": "Test netem's recursive duplicate", + "category": [ + "qdisc", + "netem" ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1: handle 2: netem duplicate 100%", - "expExitCode": "2", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc netem", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1:0 root" - ] - }, - { - "id": "1337", - "name": "NETEM test qdisc duplication restriction in qdisc tree across branches", - "category": ["qdisc", "netem"], "plugins": { "requires": "nsPlugin" }, "setup": [ - "$TC qdisc add dev $DUMMY parent root handle 1:0 hfsc", - "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc rt m2 10Mbit", - "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem", - "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc rt m2 10Mbit" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", - "expExitCode": "2", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc netem", - "matchCount": "1", + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: netem limit 1000 duplicate 100%", + "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1000 duplicate 100%" + ], + "cmdUnderTest": "ping -c 1 10.10.11.11 -W 0.01", + "expExitCode": "1", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY root", + "matchJSON": [ + { + "kind": "netem", + "handle": "1:", + "bytes": 294, + "packets": 3 + } + ], "teardown": [ - "$TC qdisc del dev $DUMMY handle 1:0 root" + "$TC qdisc del dev $DUMMY handle 1: root" ] - } + } ] diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index fbd9b1e7342a..0b23c09daea5 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1735,6 +1735,17 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) goto fail; } + /* + * The kernel may reduce nr_hw_queues (e.g. capped to nr_cpu_ids). + * Cap nthreads to the actual queue count to avoid creating extra + * handler threads that will hang during device removal. + * + * per_io_tasks mode is excluded: threads interleave across all + * queues so nthreads > nr_hw_queues is valid and intentional. + */ + if (!ctx->per_io_tasks && dev->nthreads > info->nr_hw_queues) + dev->nthreads = info->nr_hw_queues; + ret = ublk_start_daemon(ctx, dev); ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\n", __func__, ret); if (ret < 0) |
