diff options
Diffstat (limited to 'tools/testing/selftests/net')
20 files changed, 2011 insertions, 48 deletions
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 332f387615d7..227f9e067d25 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -41,6 +41,7 @@ TEST_PROGS += netns-name.sh TEST_PROGS += link_netns.py TEST_PROGS += nl_netdev.py TEST_PROGS += rtnetlink.py +TEST_PROGS += rtnetlink_notification.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile new file mode 100644 index 000000000000..2546c45e42f7 --- /dev/null +++ b/tools/testing/selftests/net/bench/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_MODS_DIR := page_pool + +TEST_PROGS += test_bench_page_pool.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile new file mode 100644 index 000000000000..0549a16ba275 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/Makefile @@ -0,0 +1,17 @@ +BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= /lib/modules/$(shell uname -r)/build + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +obj-m += bench_page_pool.o +bench_page_pool-y += bench_page_pool_simple.o time_bench.o + +all: + +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c new file mode 100644 index 000000000000..f183d5e30dc6 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Benchmark module for page_pool. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/mutex.h> + +#include <linux/version.h> +#include <net/page_pool/helpers.h> + +#include <linux/interrupt.h> +#include <linux/limits.h> + +#include "time_bench.h" + +static int verbose = 1; +#define MY_POOL_SIZE 1024 + +static void _page_pool_put_page(struct page_pool *pool, struct page *page, + bool allow_direct) +{ + page_pool_put_page(pool, page, -1, allow_direct); +} + +/* Makes tests selectable. Useful for perf-record to analyze a single test. + * Hint: Bash shells support writing binary number like: $((2#101010) + * + * # modprobe bench_page_pool_simple run_flags=$((2#100)) + */ +static unsigned long run_flags = 0xFFFFFFFF; +module_param(run_flags, ulong, 0); +MODULE_PARM_DESC(run_flags, "Limit which bench test that runs"); + +/* Count the bit number from the enum */ +enum benchmark_bit { + bit_run_bench_baseline, + bit_run_bench_no_softirq01, + bit_run_bench_no_softirq02, + bit_run_bench_no_softirq03, +}; + +#define bit(b) (1 << (b)) +#define enabled(b) ((run_flags & (bit(b)))) + +/* notice time_bench is limited to U32_MAX nr loops */ +static unsigned long loops = 10000000; +module_param(loops, ulong, 0); +MODULE_PARM_DESC(loops, "Specify loops bench will run"); + +/* Timing at the nanosec level, we need to know the overhead + * introduced by the for loop itself + */ +static int time_bench_for_loop(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + int i; + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + } + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +static int time_bench_atomic_inc(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + atomic_t cnt; + int i; + + atomic_set(&cnt, 0); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + atomic_inc(&cnt); + barrier(); /* avoid compiler to optimize this loop */ + } + loops_cnt = atomic_read(&cnt); + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum + * overhead of taking+releasing a spinlock, to know the cycles that can be saved + * by e.g. amortizing this via bulking. + */ +static int time_bench_lock(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + spinlock_t lock; + int i; + + spin_lock_init(&lock); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + spin_lock(&lock); + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + spin_unlock(&lock); + } + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +/* Helper for filling some page's into ptr_ring */ +static void pp_fill_ptr_ring(struct page_pool *pp, int elems) +{ + /* GFP_ATOMIC needed when under run softirq */ + gfp_t gfp_mask = GFP_ATOMIC; + struct page **array; + int i; + + array = kcalloc(elems, sizeof(struct page *), gfp_mask); + + for (i = 0; i < elems; i++) + array[i] = page_pool_alloc_pages(pp, gfp_mask); + for (i = 0; i < elems; i++) + _page_pool_put_page(pp, array[i], false); + + kfree(array); +} + +enum test_type { type_fast_path, type_ptr_ring, type_page_allocator }; + +/* Depends on compile optimizing this function */ +static int time_bench_page_pool(struct time_bench_record *rec, void *data, + enum test_type type, const char *func) +{ + uint64_t loops_cnt = 0; + gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */ + int i, err; + + struct page_pool *pp; + struct page *page; + + struct page_pool_params pp_params = { + .order = 0, + .flags = 0, + .pool_size = MY_POOL_SIZE, + .nid = NUMA_NO_NODE, + .dev = NULL, /* Only use for DMA mapping */ + .dma_dir = DMA_BIDIRECTIONAL, + }; + + pp = page_pool_create(&pp_params); + if (IS_ERR(pp)) { + err = PTR_ERR(pp); + pr_warn("%s: Error(%d) creating page_pool\n", func, err); + goto out; + } + pp_fill_ptr_ring(pp, 64); + + if (in_serving_softirq()) + pr_warn("%s(): in_serving_softirq fast-path\n", func); + else + pr_warn("%s(): Cannot use page_pool fast-path\n", func); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + /* Common fast-path alloc that depend on in_serving_softirq() */ + page = page_pool_alloc_pages(pp, gfp_mask); + if (!page) + break; + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + + /* The benchmarks purpose it to test different return paths. + * Compiler should inline optimize other function calls out + */ + if (type == type_fast_path) { + /* Fast-path recycling e.g. XDP_DROP use-case */ + page_pool_recycle_direct(pp, page); + + } else if (type == type_ptr_ring) { + /* Normal return path */ + _page_pool_put_page(pp, page, false); + + } else if (type == type_page_allocator) { + /* Test if not pages are recycled, but instead + * returned back into systems page allocator + */ + get_page(page); /* cause no-recycling */ + _page_pool_put_page(pp, page, false); + put_page(page); + } else { + BUILD_BUG(); + } + } + time_bench_stop(rec, loops_cnt); +out: + page_pool_destroy(pp); + return loops_cnt; +} + +static int time_bench_page_pool01_fast_path(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_fast_path, __func__); +} + +static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_ptr_ring, __func__); +} + +static int time_bench_page_pool03_slow(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_page_allocator, __func__); +} + +static int run_benchmark_tests(void) +{ + uint32_t nr_loops = loops; + + /* Baseline tests */ + if (enabled(bit_run_bench_baseline)) { + time_bench_loop(nr_loops * 10, 0, "for_loop", NULL, + time_bench_for_loop); + time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL, + time_bench_atomic_inc); + time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock); + } + + /* This test cannot activate correct code path, due to no-softirq ctx */ + if (enabled(bit_run_bench_no_softirq01)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL, + time_bench_page_pool01_fast_path); + if (enabled(bit_run_bench_no_softirq02)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL, + time_bench_page_pool02_ptr_ring); + if (enabled(bit_run_bench_no_softirq03)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL, + time_bench_page_pool03_slow); + + return 0; +} + +static int __init bench_page_pool_simple_module_init(void) +{ + if (verbose) + pr_info("Loaded\n"); + + if (loops > U32_MAX) { + pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops, + U32_MAX); + return -ECHRNG; + } + + run_benchmark_tests(); + + return 0; +} +module_init(bench_page_pool_simple_module_init); + +static void __exit bench_page_pool_simple_module_exit(void) +{ + if (verbose) + pr_info("Unloaded\n"); +} +module_exit(bench_page_pool_simple_module_exit); + +MODULE_DESCRIPTION("Benchmark of page_pool simple cases"); +MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c new file mode 100644 index 000000000000..073bb36ec5f2 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c @@ -0,0 +1,394 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Benchmarking code execution time inside the kernel + * + * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/time.h> + +#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */ + +/* For concurrency testing */ +#include <linux/completion.h> +#include <linux/sched.h> +#include <linux/workqueue.h> +#include <linux/kthread.h> + +#include "time_bench.h" + +static int verbose = 1; + +/** TSC (Time-Stamp Counter) based ** + * See: linux/time_bench.h + * tsc_start_clock() and tsc_stop_clock() + */ + +/** Wall-clock based ** + */ + +/** PMU (Performance Monitor Unit) based ** + */ +#define PERF_FORMAT \ + (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \ + PERF_FORMAT_TOTAL_TIME_RUNNING) + +struct raw_perf_event { + uint64_t config; /* event */ + uint64_t config1; /* umask */ + struct perf_event *save; + char *desc; +}; + +/* if HT is enable a maximum of 4 events (5 if one is instructions + * retired can be specified, if HT is disabled a maximum of 8 (9 if + * one is instructions retired) can be specified. + * + * From Table 19-1. Architectural Performance Events + * Architectures Software Developer’s Manual Volume 3: System Programming + * Guide + */ +struct raw_perf_event perf_events[] = { + { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" }, + { 0xc0, 0x00, NULL, "Instruction Retired" } +}; + +#define NUM_EVTS (ARRAY_SIZE(perf_events)) + +/* WARNING: PMU config is currently broken! + */ +bool time_bench_PMU_config(bool enable) +{ + int i; + struct perf_event_attr perf_conf; + struct perf_event *perf_event; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + pr_info("DEBUG: cpu:%d\n", cpu); + preempt_enable(); + + memset(&perf_conf, 0, sizeof(struct perf_event_attr)); + perf_conf.type = PERF_TYPE_RAW; + perf_conf.size = sizeof(struct perf_event_attr); + perf_conf.read_format = PERF_FORMAT; + perf_conf.pinned = 1; + perf_conf.exclude_user = 1; /* No userspace events */ + perf_conf.exclude_kernel = 0; /* Only kernel events */ + + for (i = 0; i < NUM_EVTS; i++) { + perf_conf.disabled = enable; + //perf_conf.disabled = (i == 0) ? 1 : 0; + perf_conf.config = perf_events[i].config; + perf_conf.config1 = perf_events[i].config1; + if (verbose) + pr_info("%s() enable PMU counter: %s\n", + __func__, perf_events[i].desc); + perf_event = perf_event_create_kernel_counter(&perf_conf, cpu, + NULL /* task */, + NULL /* overflow_handler*/, + NULL /* context */); + if (perf_event) { + perf_events[i].save = perf_event; + pr_info("%s():DEBUG perf_event success\n", __func__); + + perf_event_enable(perf_event); + } else { + pr_info("%s():DEBUG perf_event is NULL\n", __func__); + } + } + + return true; +} + +/** Generic functions ** + */ + +/* Calculate stats, store results in record */ +bool time_bench_calc_stats(struct time_bench_record *rec) +{ +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ + uint64_t ns_per_call_tmp_rem = 0; + uint32_t ns_per_call_remainder = 0; + uint64_t pmc_ipc_tmp_rem = 0; + uint32_t pmc_ipc_remainder = 0; + uint32_t pmc_ipc_div = 0; + uint32_t invoked_cnt_precision = 0; + uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */ + + if (rec->flags & TIME_BENCH_LOOP) { + if (rec->invoked_cnt < 1000) { + pr_err("ERR: need more(>1000) loops(%llu) for timing\n", + rec->invoked_cnt); + return false; + } + if (rec->invoked_cnt > ((1ULL << 32) - 1)) { + /* div_u64_rem() can only support div with 32bit*/ + pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n", + rec->invoked_cnt); + return false; + } + invoked_cnt = (uint32_t)rec->invoked_cnt; + } + + /* TSC (Time-Stamp Counter) records */ + if (rec->flags & TIME_BENCH_TSC) { + rec->tsc_interval = rec->tsc_stop - rec->tsc_start; + if (rec->tsc_interval == 0) { + pr_err("ABORT: timing took ZERO TSC time\n"); + return false; + } + /* Calculate stats */ + if (rec->flags & TIME_BENCH_LOOP) + rec->tsc_cycles = rec->tsc_interval / invoked_cnt; + else + rec->tsc_cycles = rec->tsc_interval; + } + + /* Wall-clock time calc */ + if (rec->flags & TIME_BENCH_WALLCLOCK) { + rec->time_start = rec->ts_start.tv_nsec + + (NANOSEC_PER_SEC * rec->ts_start.tv_sec); + rec->time_stop = rec->ts_stop.tv_nsec + + (NANOSEC_PER_SEC * rec->ts_stop.tv_sec); + rec->time_interval = rec->time_stop - rec->time_start; + if (rec->time_interval == 0) { + pr_err("ABORT: timing took ZERO wallclock time\n"); + return false; + } + /* Calculate stats */ + /*** Division in kernel it tricky ***/ + /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */ + /* remainder only correct because NANOSEC_PER_SEC is 10^9 */ + rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC, + &rec->time_sec_remainder); + //TODO: use existing struct timespec records instead of div? + + if (rec->flags & TIME_BENCH_LOOP) { + /*** Division in kernel it tricky ***/ + /* Orig: ns = ((double)time_interval / invoked_cnt); */ + /* First get quotient */ + rec->ns_per_call_quotient = + div_u64_rem(rec->time_interval, invoked_cnt, + &ns_per_call_remainder); + /* Now get decimals .xxx precision (incorrect roundup)*/ + ns_per_call_tmp_rem = ns_per_call_remainder; + invoked_cnt_precision = invoked_cnt / 1000; + if (invoked_cnt_precision > 0) { + rec->ns_per_call_decimal = + div_u64_rem(ns_per_call_tmp_rem, + invoked_cnt_precision, + &ns_per_call_remainder); + } + } + } + + /* Performance Monitor Unit (PMU) counters */ + if (rec->flags & TIME_BENCH_PMU) { + //FIXME: Overflow handling??? + rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start; + rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start; + + /* Calc Instruction Per Cycle (IPC) */ + /* First get quotient */ + rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk, + &pmc_ipc_remainder); + /* Now get decimals .xxx precision (incorrect roundup)*/ + pmc_ipc_tmp_rem = pmc_ipc_remainder; + pmc_ipc_div = rec->pmc_clk / 1000; + if (pmc_ipc_div > 0) { + rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem, + pmc_ipc_div, + &pmc_ipc_remainder); + } + } + + return true; +} + +/* Generic function for invoking a loop function and calculating + * execution time stats. The function being called/timed is assumed + * to perform a tight loop, and update the timing record struct. + */ +bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, + int (*func)(struct time_bench_record *record, void *data)) +{ + struct time_bench_record rec; + + /* Setup record */ + memset(&rec, 0, sizeof(rec)); /* zero func might not update all */ + rec.version_abi = 1; + rec.loops = loops; + rec.step = step; + rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK); + + /*** Loop function being timed ***/ + if (!func(&rec, data)) { + pr_err("ABORT: function being timed failed\n"); + return false; + } + + if (rec.invoked_cnt < loops) + pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n", + rec.invoked_cnt, loops); + + /* Calculate stats */ + time_bench_calc_stats(&rec); + + pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", + txt, rec.tsc_cycles, rec.ns_per_call_quotient, + rec.ns_per_call_decimal, rec.step, rec.time_sec, + rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt, + rec.tsc_interval); + if (rec.flags & TIME_BENCH_PMU) + pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n", + txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient, + rec.pmc_ipc_decimal); + return true; +} + +/* Function getting invoked by kthread */ +static int invoke_test_on_cpu_func(void *private) +{ + struct time_bench_cpu *cpu = private; + struct time_bench_sync *sync = cpu->sync; + cpumask_t newmask = CPU_MASK_NONE; + void *data = cpu->data; + + /* Restrict CPU */ + cpumask_set_cpu(cpu->rec.cpu, &newmask); + set_cpus_allowed_ptr(current, &newmask); + + /* Synchronize start of concurrency test */ + atomic_inc(&sync->nr_tests_running); + wait_for_completion(&sync->start_event); + + /* Start benchmark function */ + if (!cpu->bench_func(&cpu->rec, data)) { + pr_err("ERROR: function being timed failed on CPU:%d(%d)\n", + cpu->rec.cpu, smp_processor_id()); + } else { + if (verbose) + pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu, + smp_processor_id()); + } + cpu->did_bench_run = true; + + /* End test */ + atomic_dec(&sync->nr_tests_running); + /* Wait for kthread_stop() telling us to stop */ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +void time_bench_print_stats_cpumask(const char *desc, + struct time_bench_cpu *cpu_tasks, + const struct cpumask *mask) +{ + uint64_t average = 0; + int cpu; + int step = 0; + struct sum { + uint64_t tsc_cycles; + int records; + } sum = { 0 }; + + /* Get stats */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + struct time_bench_record *rec = &c->rec; + + /* Calculate stats */ + time_bench_calc_stats(rec); + + pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", + desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient, + rec->ns_per_call_decimal, rec->step, rec->time_sec, + rec->time_sec_remainder, rec->time_interval, + rec->invoked_cnt, rec->tsc_interval); + + /* Collect average */ + sum.records++; + sum.tsc_cycles += rec->tsc_cycles; + step = rec->step; + } + + if (sum.records) /* avoid div-by-zero */ + average = sum.tsc_cycles / sum.records; + pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc, + average, sum.records, step); +} + +void time_bench_run_concurrent(uint32_t loops, int step, void *data, + const struct cpumask *mask, /* Support masking outsome CPUs*/ + struct time_bench_sync *sync, + struct time_bench_cpu *cpu_tasks, + int (*func)(struct time_bench_record *record, void *data)) +{ + int cpu, running = 0; + + if (verbose) // DEBUG + pr_warn("%s() Started on CPU:%d\n", __func__, + smp_processor_id()); + + /* Reset sync conditions */ + atomic_set(&sync->nr_tests_running, 0); + init_completion(&sync->start_event); + + /* Spawn off jobs on all CPUs */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + + running++; + c->sync = sync; /* Send sync variable along */ + c->data = data; /* Send opaque along */ + + /* Init benchmark record */ + memset(&c->rec, 0, sizeof(struct time_bench_record)); + c->rec.version_abi = 1; + c->rec.loops = loops; + c->rec.step = step; + c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | + TIME_BENCH_WALLCLOCK); + c->rec.cpu = cpu; + c->bench_func = func; + c->task = kthread_run(invoke_test_on_cpu_func, c, + "time_bench%d", cpu); + if (IS_ERR(c->task)) { + pr_err("%s(): Failed to start test func\n", __func__); + return; /* Argh, what about cleanup?! */ + } + } + + /* Wait until all processes are running */ + while (atomic_read(&sync->nr_tests_running) < running) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + /* Kick off all CPU concurrently on completion event */ + complete_all(&sync->start_event); + + /* Wait for CPUs to finish */ + while (atomic_read(&sync->nr_tests_running)) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + + /* Stop the kthreads */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + + kthread_stop(c->task); + } + + if (verbose) // DEBUG - happens often, finish on another CPU + pr_warn("%s() Finished on CPU:%d\n", __func__, + smp_processor_id()); +} diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h new file mode 100644 index 000000000000..e113fcf341dc --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Benchmarking code execution time inside the kernel + * + * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer + * for licensing details see kernel-base/COPYING + */ +#ifndef _LINUX_TIME_BENCH_H +#define _LINUX_TIME_BENCH_H + +/* Main structure used for recording a benchmark run */ +struct time_bench_record { + uint32_t version_abi; + uint32_t loops; /* Requested loop invocations */ + uint32_t step; /* option for e.g. bulk invocations */ + + uint32_t flags; /* Measurements types enabled */ +#define TIME_BENCH_LOOP BIT(0) +#define TIME_BENCH_TSC BIT(1) +#define TIME_BENCH_WALLCLOCK BIT(2) +#define TIME_BENCH_PMU BIT(3) + + uint32_t cpu; /* Used when embedded in time_bench_cpu */ + + /* Records */ + uint64_t invoked_cnt; /* Returned actual invocations */ + uint64_t tsc_start; + uint64_t tsc_stop; + struct timespec64 ts_start; + struct timespec64 ts_stop; + /* PMU counters for instruction and cycles + * instructions counter including pipelined instructions + */ + uint64_t pmc_inst_start; + uint64_t pmc_inst_stop; + /* CPU unhalted clock counter */ + uint64_t pmc_clk_start; + uint64_t pmc_clk_stop; + + /* Result records */ + uint64_t tsc_interval; + uint64_t time_start, time_stop, time_interval; /* in nanosec */ + uint64_t pmc_inst, pmc_clk; + + /* Derived result records */ + uint64_t tsc_cycles; // +decimal? + uint64_t ns_per_call_quotient, ns_per_call_decimal; + uint64_t time_sec; + uint32_t time_sec_remainder; + uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */ +}; + +/* For synchronizing parallel CPUs to run concurrently */ +struct time_bench_sync { + atomic_t nr_tests_running; + struct completion start_event; +}; + +/* Keep track of CPUs executing our bench function. + * + * Embed a time_bench_record for storing info per cpu + */ +struct time_bench_cpu { + struct time_bench_record rec; + struct time_bench_sync *sync; /* back ptr */ + struct task_struct *task; + /* "data" opaque could have been placed in time_bench_sync, + * but to avoid any false sharing, place it per CPU + */ + void *data; + /* Support masking outsome CPUs, mark if it ran */ + bool did_bench_run; + /* int cpu; // note CPU stored in time_bench_record */ + int (*bench_func)(struct time_bench_record *record, void *data); +}; + +/* + * Below TSC assembler code is not compatible with other archs, and + * can also fail on guests if cpu-flags are not correct. + * + * The way TSC reading is used, many iterations, does not require as + * high accuracy as described below (in Intel Doc #324264). + * + * Considering changing to use get_cycles() (#include <asm/timex.h>). + */ + +/** TSC (Time-Stamp Counter) based ** + * Recommend reading, to understand details of reading TSC accurately: + * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel" + * + * Consider getting exclusive ownership of CPU by using: + * unsigned long flags; + * preempt_disable(); + * raw_local_irq_save(flags); + * _your_code_ + * raw_local_irq_restore(flags); + * preempt_enable(); + * + * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx" + * RDTSC only change "%rax" and "%rdx" but + * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx) + */ +static __always_inline uint64_t tsc_start_clock(void) +{ + /* See: Intel Doc #324264 */ + unsigned int hi, lo; + + asm volatile("CPUID\n\t" + "RDTSC\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); + //FIXME: on 32bit use clobbered %eax + %edx + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +static __always_inline uint64_t tsc_stop_clock(void) +{ + /* See: Intel Doc #324264 */ + unsigned int hi, lo; + + asm volatile("RDTSCP\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + "CPUID\n\t" + : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +/** Wall-clock based ** + * + * use: getnstimeofday() + * getnstimeofday(&rec->ts_start); + * getnstimeofday(&rec->ts_stop); + * + * API changed see: Documentation/core-api/timekeeping.rst + * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday + * + * We should instead use: ktime_get_real_ts64() is a direct + * replacement, but consider using monotonic time (ktime_get_ts64()) + * and/or a ktime_t based interface (ktime_get()/ktime_get_real()). + */ + +/** PMU (Performance Monitor Unit) based ** + * + * Needed for calculating: Instructions Per Cycle (IPC) + * - The IPC number tell how efficient the CPU pipelining were + */ +//lookup: perf_event_create_kernel_counter() + +bool time_bench_PMU_config(bool enable); + +/* Raw reading via rdpmc() using fixed counters + * + * From: https://github.com/andikleen/simple-pmu + */ +enum { + FIXED_SELECT = (1U << 30), /* == 0x40000000 */ + FIXED_INST_RETIRED_ANY = 0, + FIXED_CPU_CLK_UNHALTED_CORE = 1, + FIXED_CPU_CLK_UNHALTED_REF = 2, +}; + +static __always_inline unsigned int long long p_rdpmc(unsigned int in) +{ + unsigned int d, a; + + asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory"); + return ((unsigned long long)d << 32) | a; +} + +/* These PMU counter needs to be enabled, but I don't have the + * configure code implemented. My current hack is running: + * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko + */ +/* Reading all pipelined instruction */ +static __always_inline unsigned long long pmc_inst(void) +{ + return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY); +} + +/* Reading CPU clock cycles */ +static __always_inline unsigned long long pmc_clk(void) +{ + return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE); +} + +/* Raw reading via MSR rdmsr() is likely wrong + * FIXME: How can I know which raw MSR registers are conf for what? + */ +#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */ +#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */ +#define MSR_IA32_PCM2 0x400000C3 +static inline uint64_t msr_inst(unsigned long long *msr_result) +{ + return rdmsrq_safe(MSR_IA32_PCM0, msr_result); +} + +/** Generic functions ** + */ +bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, + int (*func)(struct time_bench_record *rec, void *data)); +bool time_bench_calc_stats(struct time_bench_record *rec); + +void time_bench_run_concurrent(uint32_t loops, int step, void *data, + const struct cpumask *mask, /* Support masking outsome CPUs*/ + struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks, + int (*func)(struct time_bench_record *record, void *data)); +void time_bench_print_stats_cpumask(const char *desc, + struct time_bench_cpu *cpu_tasks, + const struct cpumask *mask); + +//FIXME: use rec->flags to select measurement, should be MACRO +static __always_inline void time_bench_start(struct time_bench_record *rec) +{ + //getnstimeofday(&rec->ts_start); + ktime_get_real_ts64(&rec->ts_start); + if (rec->flags & TIME_BENCH_PMU) { + rec->pmc_inst_start = pmc_inst(); + rec->pmc_clk_start = pmc_clk(); + } + rec->tsc_start = tsc_start_clock(); +} + +static __always_inline void time_bench_stop(struct time_bench_record *rec, + uint64_t invoked_cnt) +{ + rec->tsc_stop = tsc_stop_clock(); + if (rec->flags & TIME_BENCH_PMU) { + rec->pmc_inst_stop = pmc_inst(); + rec->pmc_clk_stop = pmc_clk(); + } + //getnstimeofday(&rec->ts_stop); + ktime_get_real_ts64(&rec->ts_stop); + rec->invoked_cnt = invoked_cnt; +} + +#endif /* _LINUX_TIME_BENCH_H */ diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh new file mode 100755 index 000000000000..7b8b18cfedce --- /dev/null +++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# + +set -e + +DRIVER="./page_pool/bench_page_pool.ko" +result="" + +function run_test() +{ + rmmod "bench_page_pool.ko" || true + insmod $DRIVER > /dev/null 2>&1 + result=$(dmesg | tail -10) + echo "$result" + + echo + echo "Fast path results:" + echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" + + echo + echo "ptr_ring results:" + echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" + + echo + echo "slow path results:" + echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" +} + +run_test + +exit 0 diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 00bde7b6f39e..d7bb2e80e88c 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -102,6 +102,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_bridge_1d_port_8472.sh \ vxlan_bridge_1d.sh \ vxlan_bridge_1q_ipv6.sh \ + vxlan_bridge_1q_mc_ul.sh \ vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 508f3c700d71..83ee6a07e072 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -37,6 +37,7 @@ declare -A NETIFS=( : "${TEAMD:=teamd}" : "${MCD:=smcrouted}" : "${MC_CLI:=smcroutectl}" +: "${MCD_TABLE_NAME:=selftests}" # Constants for netdevice bring-up: # Default time in seconds to wait for an interface to come up before giving up @@ -1757,6 +1758,51 @@ mc_send() msend -g $groups -I $if_name -c 1 > /dev/null 2>&1 } +adf_mcd_start() +{ + local ifs=("$@") + + local table_name="$MCD_TABLE_NAME" + local smcroutedir + local pid + local if + local i + + check_command "$MCD" || return 1 + check_command "$MC_CLI" || return 1 + + smcroutedir=$(mktemp -d) + defer rm -rf "$smcroutedir" + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + "$smcroutedir/$table_name.conf" + done + + for if in "${ifs[@]}"; do + if ! ip_link_has_flag "$if" MULTICAST; then + ip link set dev "$if" multicast on + defer ip link set dev "$if" multicast off + fi + + echo "phyint $if enable" >> \ + "$smcroutedir/$table_name.conf" + done + + "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \ + -P "$smcroutedir/$table_name.pid" + busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid" + pid=$(cat "$smcroutedir/$table_name.pid") + defer kill_process "$pid" +} + +mc_cli() +{ + local table_name="$MCD_TABLE_NAME" + + "$MC_CLI" -I "$table_name" "$@" +} + start_ip_monitor() { local mtype=$1; shift diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh index 5a58b1ec8aef..83e52abdbc2e 100755 --- a/tools/testing/selftests/net/forwarding/router_multicast.sh +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -33,10 +33,6 @@ NUM_NETIFS=6 source lib.sh source tc_common.sh -require_command $MCD -require_command $MC_CLI -table_name=selftests - h1_create() { simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64 @@ -149,25 +145,6 @@ router_destroy() ip link set dev $rp1 down } -start_mcd() -{ - SMCROUTEDIR="$(mktemp -d)" - - for ((i = 1; i <= $NUM_NETIFS; ++i)); do - echo "phyint ${NETIFS[p$i]} enable" >> \ - $SMCROUTEDIR/$table_name.conf - done - - $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ - -P $SMCROUTEDIR/$table_name.pid -} - -kill_mcd() -{ - pkill $MCD - rm -rf $SMCROUTEDIR -} - setup_prepare() { h1=${NETIFS[p1]} @@ -179,7 +156,7 @@ setup_prepare() rp3=${NETIFS[p5]} h3=${NETIFS[p6]} - start_mcd + adf_mcd_start || exit "$EXIT_STATUS" vrf_prepare @@ -206,7 +183,7 @@ cleanup() vrf_cleanup - kill_mcd + defer_scopes_cleanup } create_mcast_sg() @@ -214,9 +191,9 @@ create_mcast_sg() local if_name=$1; shift local s_addr=$1; shift local mcast=$1; shift - local dest_ifs=${@} + local dest_ifs=("${@}") - $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs + mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}" } delete_mcast_sg() @@ -224,9 +201,9 @@ delete_mcast_sg() local if_name=$1; shift local s_addr=$1; shift local mcast=$1; shift - local dest_ifs=${@} + local dest_ifs=("${@}") - $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs + mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}" } mcast_v4() diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh new file mode 100755 index 000000000000..7ec58b6b1128 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh @@ -0,0 +1,771 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------------------------+ +# | + $h1.10 + $h1.20 | +# | | 192.0.2.1/28 | 2001:db8:1::1/64 | +# | \________ ________/ | +# | \ / | +# | + $h1 H1 (vrf) | +# +-----------|-----------------------------+ +# | +# +-----------|----------------------------------------------------------------+ +# | +---------|--------------------------------------+ SWITCH (main vrf) | +# | | + $swp1 BR1 (802.1q) | | +# | | vid 10 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | + lo10 (dummy) | +# | | local 192.0.2.100 local 2001:db8:4::1 | 192.0.2.100/28 | +# | | group 233.252.0.1 group ff0e::1:2:3 | 2001:db8:4::1/64 | +# | | id 1000 id 2000 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +------------------------------------------------+ | +# | | +# | + $swp2 $swp3 + | +# | | 192.0.2.33/28 192.0.2.65/28 | | +# | | 2001:db8:2::1/64 2001:db8:3::1/64 | | +# | | | | +# +---|--------------------------------------------------------------------|---+ +# | | +# +---|--------------------------------+ +--------------------------------|---+ +# | | H2 (vrf) | | H3 (vrf) | | +# | +-|----------------------------+ | | +-----------------------------|-+ | +# | | + $h2 BR2 (802.1d) | | | | BR3 (802.1d) $h3 + | | +# | | | | | | | | +# | | + v1$h2 (veth) | | | | v1$h3 (veth) + | | +# | +-|----------------------------+ | | +-----------------------------|-+ | +# | | | | | | +# +---|--------------------------------+ +--------------------------------|---+ +# | | +# +---|--------------------------------+ +--------------------------------|---+ +# | + v2$h2 (veth) NS2 (netns) | | NS3 (netns) v2$h3 (veth) + | +# | 192.0.2.34/28 | | 192.0.2.66/28 | +# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 | +# | | | | +# | +--------------------------------+ | | +--------------------------------+ | +# | | BR1 (802.1q) | | | | BR1 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | group 233.252.0.1 dev v2$h2 | | | | group 233.252.0.1 dev v2$h3 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 2001:db8:2::2 | | | | local 2001:db8:3::2 | | +# | | group ff0e::1:2:3 dev v2$h2 | | | | group ff0e::1:2:3 dev v2$h3 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 20 | | | | | vid 10 20 | | +# | +--|-----------------------------+ | | +--|-----------------------------+ | +# | | | | | | +# | +--|-----------------------------+ | | +--|-----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | | +# | +--------------------------------+ | | +--------------------------------+ | +# +------------------------------------+ +------------------------------------+ +# +#shellcheck disable=SC2317 # SC doesn't see our uses of functions. + +: "${VXPORT:=4789}" +export VXPORT + +: "${GROUP4:=233.252.0.1}" +export GROUP4 + +: "${GROUP6:=ff0e::1:2:3}" +export GROUP6 + +: "${IPMR:=lo10}" + +ALL_TESTS=" + ipv4_nomcroute + ipv4_mcroute + ipv4_mcroute_changelink + ipv4_mcroute_starg + ipv4_mcroute_noroute + ipv4_mcroute_fdb + ipv4_mcroute_fdb_oif0 + ipv4_mcroute_fdb_oif0_sep + + ipv6_nomcroute + ipv6_mcroute + ipv6_mcroute_changelink + ipv6_mcroute_starg + ipv6_mcroute_noroute + ipv6_mcroute_fdb + ipv6_mcroute_fdb_oif0 + + ipv4_nomcroute_rx + ipv4_mcroute_rx + ipv4_mcroute_starg_rx + ipv4_mcroute_fdb_oif0_sep_rx + ipv4_mcroute_fdb_sep_rx + + ipv6_nomcroute_rx + ipv6_mcroute_rx + ipv6_mcroute_starg_rx + ipv6_mcroute_fdb_sep_rx +" + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init "$h1" + defer simple_if_fini "$h1" + + ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10 + ip_link_set_up "$h1.10" + ip_addr_add "$h1.10" 192.0.2.1/28 + + ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20 + ip_link_set_up "$h1.20" + ip_addr_add "$h1.20" 2001:db8:1::1/64 +} + +install_capture() +{ + local dev=$1; shift + + tc qdisc add dev "$dev" clsact + defer tc qdisc del dev "$dev" clsact + + tc filter add dev "$dev" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port "$VXPORT" \ + action pass + defer tc filter del dev "$dev" ingress proto ip pref 104 + + tc filter add dev "$dev" ingress proto ipv6 pref 106 \ + flower skip_hw ip_proto udp dst_port "$VXPORT" \ + action pass + defer tc filter del dev "$dev" ingress proto ipv6 pref 106 +} + +h2_create() +{ + # $h2 + ip_link_set_up "$h2" + + # H2 + vrf_create "v$h2" + defer vrf_destroy "v$h2" + + ip_link_set_up "v$h2" + + # br2 + ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0 + ip_link_set_master br2 "v$h2" + ip_link_set_up br2 + + # $h2 + ip_link_set_master "$h2" br2 + install_capture "$h2" + + # v1$h2 + ip_link_set_up "v1$h2" + ip_link_set_master "v1$h2" br2 +} + +h3_create() +{ + # $h3 + ip_link_set_up "$h3" + + # H3 + vrf_create "v$h3" + defer vrf_destroy "v$h3" + + ip_link_set_up "v$h3" + + # br3 + ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0 + ip_link_set_master br3 "v$h3" + ip_link_set_up br3 + + # $h3 + ip_link_set_master "$h3" br3 + install_capture "$h3" + + # v1$h3 + ip_link_set_up "v1$h3" + ip_link_set_master "v1$h3" br3 +} + +switch_create() +{ + local swp1_mac + + # br1 + swp1_mac=$(mac_get "$swp1") + ip_link_add br1 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + ip_link_set_addr br1 "$swp1_mac" + ip_link_set_up br1 + + # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on + # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test. + ip_link_add "X$IPMR" up type dummy + + # IPMR + ip_link_add "$IPMR" up type dummy + ip_addr_add "$IPMR" 192.0.2.100/28 + ip_addr_add "$IPMR" 2001:db8:4::1/64 + + # $swp1 + ip_link_set_up "$swp1" + ip_link_set_master "$swp1" br1 + bridge_vlan_add vid 10 dev "$swp1" + bridge_vlan_add vid 20 dev "$swp1" + + # $swp2 + ip_link_set_up "$swp2" + ip_addr_add "$swp2" 192.0.2.33/28 + ip_addr_add "$swp2" 2001:db8:2::1/64 + + # $swp3 + ip_link_set_up "$swp3" + ip_addr_add "$swp3" 192.0.2.65/28 + ip_addr_add "$swp3" 2001:db8:3::1/64 +} + +vx_create() +{ + local name=$1; shift + local vid=$1; shift + + ip_link_add "$name" up type vxlan dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 16 \ + "$@" + ip_link_set_master "$name" br1 + bridge_vlan_add vid "$vid" dev "$name" pvid untagged +} +export -f vx_create + +vx_wait() +{ + # Wait for all the ARP, IGMP etc. noise to settle down so that the + # tunnel is clear for measurements. + sleep 10 +} + +vx10_create() +{ + vx_create vx10 10 id 1000 "$@" +} +export -f vx10_create + +vx20_create() +{ + vx_create vx20 20 id 2000 "$@" +} +export -f vx20_create + +vx10_create_wait() +{ + vx10_create "$@" + vx_wait +} + +vx20_create_wait() +{ + vx20_create "$@" + vx_wait +} + +ns_init_common() +{ + local ns=$1; shift + local if_in=$1; shift + local ipv4_in=$1; shift + local ipv6_in=$1; shift + local ipv4_host=$1; shift + local ipv6_host=$1; shift + + # v2$h2 / v2$h3 + ip_link_set_up "$if_in" + ip_addr_add "$if_in" "$ipv4_in" + ip_addr_add "$if_in" "$ipv6_in" + + # br1 + ip_link_add br1 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + ip_link_set_up br1 + + # vx10, vx20 + vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in" + vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in" + + # w1 + ip_link_add w1 type veth peer name w2 + ip_link_set_master w1 br1 + ip_link_set_up w1 + bridge_vlan_add vid 10 dev w1 + bridge_vlan_add vid 20 dev w1 + + # w2 + simple_if_init w2 + defer simple_if_fini w2 + + # w2.10 + ip_link_add w2.10 master vw2 link w2 type vlan id 10 + ip_link_set_up w2.10 + ip_addr_add w2.10 "$ipv4_host" + + # w2.20 + ip_link_add w2.20 master vw2 link w2 type vlan id 20 + ip_link_set_up w2.20 + ip_addr_add w2.20 "$ipv6_host" +} +export -f ns_init_common + +ns2_create() +{ + # NS2 + ip netns add ns2 + defer ip netns del ns2 + + # v2$h2 + ip link set dev "v2$h2" netns ns2 + defer ip -n ns2 link set dev "v2$h2" netns 1 + + in_ns ns2 \ + ns_init_common ns2 "v2$h2" \ + 192.0.2.34/28 2001:db8:2::2/64 \ + 192.0.2.3/28 2001:db8:1::3/64 +} + +ns3_create() +{ + # NS3 + ip netns add ns3 + defer ip netns del ns3 + + # v2$h3 + ip link set dev "v2$h3" netns ns3 + defer ip -n ns3 link set dev "v2$h3" netns 1 + + ip -n ns3 link set dev "v2$h3" up + + in_ns ns3 \ + ns_init_common ns3 "v2$h3" \ + 192.0.2.66/28 2001:db8:3::2/64 \ + 192.0.2.4/28 2001:db8:1::4/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + defer vrf_cleanup + + forwarding_enable + defer forwarding_restore + + ip_link_add "v1$h2" type veth peer name "v2$h2" + ip_link_add "v1$h3" type veth peer name "v2$h3" + + h1_create + h2_create + h3_create + switch_create + ns2_create + ns3_create +} + +adf_install_broken_sg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3" + defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3" + + mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3" + defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3" +} + +adf_install_rx() +{ + mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR" + defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10 + + mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR" + defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10 + + mc_cli add "$swp2" :: "$GROUP6" "$IPMR" + defer mc_cli remove "$swp2" :: "$GROUP6" lo10 + + mc_cli add "$swp3" :: "$GROUP6" "$IPMR" + defer mc_cli remove "$swp3" :: "$GROUP6" lo10 +} + +adf_install_sg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +adf_install_sg_sep() +{ + adf_mcd_start lo || exit "$EXIT_STATUS" + + mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + + mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" +} + +adf_install_sg_sep_rx() +{ + local lo=$1; shift + + adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS" + + mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +adf_install_starg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +do_packets_v4() +{ + local mac + + mac=$(mac_get "$h2") + "$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \ + -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q +} + +do_packets_v6() +{ + local mac + + mac=$(mac_get "$h2") + "$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \ + -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q +} + +do_test() +{ + local ipv=$1; shift + local expect_h2=$1; shift + local expect_h3=$1; shift + local what=$1; shift + + local pref=$((100 + ipv)) + local t0_h2 + local t0_h3 + local t1_h2 + local t1_h3 + local d_h2 + local d_h3 + + RET=0 + + t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress) + t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress) + + "do_packets_v$ipv" + sleep 1 + + t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress) + t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress) + + d_h2=$((t1_h2 - t0_h2)) + d_h3=$((t1_h3 - t0_h3)) + + ((d_h2 == expect_h2)) + check_err $? "Expected $expect_h2 packets on H2, got $d_h2" + + ((d_h3 == expect_h3)) + check_err $? "Expected $expect_h3 packets on H3, got $d_h3" + + log_test "VXLAN MC flood $what" +} + +ipv4_do_test_rx() +{ + local h3_should_fail=$1; shift + local what=$1; shift + + RET=0 + + ping_do "$h1.10" 192.0.2.3 + check_err $? "H2 should respond" + + ping_do "$h1.10" 192.0.2.4 + check_err_fail "$h3_should_fail" $? "H3 responds" + + log_test "VXLAN MC flood $what" +} + +ipv6_do_test_rx() +{ + local h3_should_fail=$1; shift + local what=$1; shift + + RET=0 + + ping6_do "$h1.20" 2001:db8:1::3 + check_err $? "H2 should respond" + + ping6_do "$h1.20" 2001:db8:1::4 + check_err_fail "$h3_should_fail" $? "H3 responds" + + log_test "VXLAN MC flood $what" +} + +ipv4_nomcroute() +{ + # Install a misleading (S,G) rule to attempt to trick the system into + # pushing the packets elsewhere. + adf_install_broken_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2" + do_test 4 10 0 "IPv4 nomcroute" +} + +ipv6_nomcroute() +{ + # Like for IPv4, install a misleading (S,G). + adf_install_broken_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + do_test 6 10 0 "IPv6 nomcroute" +} + +ipv4_nomcroute_rx() +{ + vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2" + ipv4_do_test_rx 1 "IPv4 nomcroute ping" +} + +ipv6_nomcroute_rx() +{ + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + ipv6_do_test_rx 1 "IPv6 nomcroute ping" +} + +ipv4_mcroute() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 10 10 "IPv4 mcroute" +} + +ipv6_mcroute() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 10 10 "IPv6 mcroute" +} + +ipv4_mcroute_rx() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + ipv4_do_test_rx 0 "IPv4 mcroute ping" +} + +ipv6_mcroute_rx() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ipv6_do_test_rx 0 "IPv6 mcroute ping" +} + +ipv4_mcroute_changelink() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" + ip link set dev vx10 type vxlan mcroute + sleep 1 + do_test 4 10 10 "IPv4 mcroute changelink" +} + +ipv6_mcroute_changelink() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ip link set dev vx20 type vxlan mcroute + sleep 1 + do_test 6 10 10 "IPv6 mcroute changelink" +} + +ipv4_mcroute_starg() +{ + adf_install_starg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 10 10 "IPv4 mcroute (*,G)" +} + +ipv6_mcroute_starg() +{ + adf_install_starg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 10 10 "IPv6 mcroute (*,G)" +} + +ipv4_mcroute_starg_rx() +{ + adf_install_starg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping" +} + +ipv6_mcroute_starg_rx() +{ + adf_install_starg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping" +} + +ipv4_mcroute_noroute() +{ + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 0 0 "IPv4 mcroute, no route" +} + +ipv6_mcroute_noroute() +{ + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 0 0 "IPv6 mcroute, no route" +} + +ipv4_mcroute_fdb() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute + bridge fdb add dev vx10 \ + 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR" + do_test 4 10 10 "IPv4 mcroute FDB" +} + +ipv6_mcroute_fdb() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute + bridge -6 fdb add dev vx20 \ + 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR" + do_test 6 10 10 "IPv6 mcroute FDB" +} + +# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup. +ipv4_mcroute_fdb_oif0() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + do_test 4 10 10 "IPv4 mcroute oif=0" +} + +ipv6_mcroute_fdb_oif0() +{ + # The IPv6 tunnel lookup does not fall back to selection by source + # address. Instead it just does a FIB match, and that would find one of + # the several ff00::/8 multicast routes -- each device has one. In order + # to reliably force the $IPMR device, add a /128 route for the + # destination group address. + ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR" + defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR" + + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + bridge -6 fdb del dev vx20 00:00:00:00:00:00 + bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6" + do_test 6 10 10 "IPv6 mcroute oif=0" +} + +# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR. +# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT. +ipv4_mcroute_fdb_oif0_sep() +{ + adf_install_sg_sep + + ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0" +} + +ipv4_mcroute_fdb_oif0_sep_rx() +{ + adf_install_sg_sep_rx lo + + ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping" +} + +ipv4_mcroute_fdb_sep_rx() +{ + adf_install_sg_sep_rx lo + + ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add \ + dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo + ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping" +} + +ipv6_mcroute_fdb_sep_rx() +{ + adf_install_sg_sep_rx "X$IPMR" + + ip_addr_add "X$IPMR" 2001:db8:5::1/64 + vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute + bridge -6 fdb del dev vx20 00:00:00:00:00:00 + bridge -6 fdb add dev vx20 00:00:00:00:00:00 \ + self static dst "$GROUP6" via "X$IPMR" + ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping" +} + +trap cleanup EXIT + +setup_prepare +setup_wait "$NUM_NETIFS" +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 006fdadcc4b9..ff0dbe23e8e0 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -547,13 +547,19 @@ ip_link_set_addr() defer ip link set dev "$name" address "$old_addr" } -ip_link_is_up() +ip_link_has_flag() { local name=$1; shift + local flag=$1; shift local state=$(ip -j link show "$name" | - jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"') - [[ $state == "UP" ]] + jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)') + [[ $state == true ]] +} + +ip_link_is_up() +{ + ip_link_has_flag "$1" UP } ip_link_set_up() diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index cd8a58097448..1f5227f3d64d 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len) name[0] = '\0'; rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen); if (rc < 0) - log_err_errno("setsockopt(SO_BINDTODEVICE)"); + log_err_errno("getsockopt(SO_BINDTODEVICE)"); return rc; } @@ -535,7 +535,7 @@ static int set_freebind(int sd, int version) break; case AF_INET6: if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) { - log_err_errno("setsockopt(IPV6_FREEBIND"); + log_err_errno("setsockopt(IPV6_FREEBIND)"); rc = -1; } break; @@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str, sep++; if (str_to_uint(sep, 1, pfx_len_max, &args->prefix_len) != 0) { - fprintf(stderr, "Invalid port\n"); + fprintf(stderr, "Invalid prefix length\n"); return 1; } } else { @@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen, } } - nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; + nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; while (1) { FD_ZERO(&rfds); FD_SET(sd, &rfds); @@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args) sd = socket(args->version, args->type, args->protocol); if (sd < 0) { log_err_errno("Error opening socket"); - return -1; + return -1; } if (set_reuseaddr(sd) != 0) @@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) * waiting to be told when to continue */ if (read(fd, &buf, sizeof(buf)) <= 0) { - log_err_errno("Failed to read IPC status from status"); + log_err_errno("Failed to read IPC status from pipe"); return 1; } if (!buf) { diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index beaee5e4e2aa..c9109627a741 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -2,8 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 import time +from os import system from lib.py import ksft_run, ksft_exit, ksft_pr -from lib.py import ksft_eq, ksft_ge, ksft_busy_wait +from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait from lib.py import NetdevFamily, NetdevSimDev, ip @@ -34,6 +35,39 @@ def napi_list_check(nf) -> None: ksft_eq(len(napis), 100, comment=f"queue count after reset queue {q} mode {i}") +def dev_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at device level using sysfs. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set threaded + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_ne(napi1.get('pid'), None) + + # unset threaded + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1.get('pid'), None) def nsim_rxq_reset_down(nf) -> None: """ @@ -122,7 +156,7 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, - nsim_rxq_reset_down], + dev_set_threaded, nsim_rxq_reset_down], args=(nf, )) ksft_exit() diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index ef8b25a606d8..c5b01e1bd4c7 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -39,11 +39,15 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then # xfail tests that are known flaky with dbg config, not fixable. # still run them for coverage (and expect 100% pass without dbg). declare -ar xfail_list=( + "tcp_blocking_blocking-connect.pkt" + "tcp_blocking_blocking-read.pkt" "tcp_eor_no-coalesce-retrans.pkt" "tcp_fast_recovery_prr-ss.*.pkt" + "tcp_sack_sack-route-refresh-ip-tos.pkt" "tcp_slow_start_slow-start-after-win-update.pkt" "tcp_timestamping.*.pkt" "tcp_user_timeout_user-timeout-probe.pkt" + "tcp_zerocopy_cl.*.pkt" "tcp_zerocopy_epoll_.*.pkt" "tcp_tcp_info_tcp-info-.*-limited.pkt" ) diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh new file mode 100755 index 000000000000..3f9780232bd6 --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink_notification.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking rtnetlink notification callpaths, and get as much +# coverage as possible. +# +# set -e + +ALL_TESTS=" + kci_test_mcast_addr_notification + kci_test_anycast_addr_notification +" + +source lib.sh +test_dev="test-dummy1" + +kci_test_mcast_addr_notification() +{ + RET=0 + local tmpfile + local monitor_pid + local match_result + + tmpfile=$(mktemp) + defer rm "$tmpfile" + + ip monitor maddr > $tmpfile & + monitor_pid=$! + defer kill_process "$monitor_pid" + + sleep 1 + + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "mcast addr notification: iproute2 too old" + return $RET + fi + + ip link add name "$test_dev" type dummy + check_err $? "failed to add dummy interface" + ip link set "$test_dev" up + check_err $? "failed to set dummy interface up" + ip link del dev "$test_dev" + check_err $? "Failed to delete dummy interface" + sleep 1 + + # There should be 4 line matches as follows. + # 13: test-dummy1 inet6 mcast ff02::1 scope global + # 13: test-dummy1 inet mcast 224.0.0.1 scope global + # Deleted 13: test-dummy1 inet mcast 224.0.0.1 scope global + # Deleted 13: test-dummy1 inet6 mcast ff02::1 scope global + match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile") + if [ "$match_result" -ne 4 ]; then + RET=$ksft_fail + fi + log_test "mcast addr notification: Expected 4 matches, got $match_result" + return $RET +} + +kci_test_anycast_addr_notification() +{ + RET=0 + local tmpfile + local monitor_pid + local match_result + + tmpfile=$(mktemp) + defer rm "$tmpfile" + + ip monitor acaddress > "$tmpfile" & + monitor_pid=$! + defer kill_process "$monitor_pid" + sleep 1 + + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "anycast addr notification: iproute2 too old" + return "$RET" + fi + + ip link add name "$test_dev" type dummy + check_err $? "failed to add dummy interface" + ip link set "$test_dev" up + check_err $? "failed to set dummy interface up" + sysctl -qw net.ipv6.conf."$test_dev".forwarding=1 + ip link del dev "$test_dev" + check_err $? "Failed to delete dummy interface" + sleep 1 + + # There should be 2 line matches as follows. + # 9: dummy2 inet6 any fe80:: scope global + # Deleted 9: dummy2 inet6 any fe80:: scope global + match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile") + if [ "$match_result" -ne 2 ]; then + RET=$ksft_fail + fi + log_test "anycast addr notification: Expected 2 matches, got $match_result" + return "$RET" +} + +#check for needed privileges +if [ "$(id -u)" -ne 0 ];then + RET=$ksft_skip + log_test "need root privileges" + exit $RET +fi + +require_command ip + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh index 4b86040c58c6..bedf0ce885c2 100755 --- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh @@ -72,6 +72,9 @@ # Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in # the selftest network. # +# In addition, every router interface connecting rt-x to rt-y is assigned an +# IPv6 link-local address fe80::x:y/64. +# # Local SID/C-SID table # ===================== # @@ -521,6 +524,9 @@ setup_rt_networking() ip -netns "${nsname}" addr \ add "${net_prefix}::${rt}/64" dev "${devname}" nodad + ip -netns "${nsname}" addr \ + add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad + ip -netns "${nsname}" link set "${devname}" up done @@ -609,6 +615,27 @@ set_end_x_nextcsid() nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" } +set_end_x_ll_nextcsid() +{ + local rt="$1" + local adj="$2" + + eval nsname=\${$(get_rtname "${rt}")} + lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" + nh6_ll_addr="fe80::${adj}:${rt}" + oifname="veth-rt-${rt}-${adj}" + + # enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop + # address (note that "dev" is the dummy dum0 device chosen for the sake + # of simplicity). + ip -netns "${nsname}" -6 route \ + replace "${lcnode_func_prefix}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.X nh6 "${nh6_ll_addr}" \ + oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \ + nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" +} + set_underlay_sids_reachability() { local rt="$1" @@ -1016,6 +1043,27 @@ host_vpn_tests() check_and_log_hs_ipv4_connectivity 1 2 check_and_log_hs_ipv4_connectivity 2 1 + + # Setup the adjacencies in the SRv6 aware routers using IPv6 link-local + # addresses. + # - rt-3 SRv6 End.X adjacency with rt-4 + # - rt-4 SRv6 End.X adjacency with rt-1 + set_end_x_ll_nextcsid 3 4 + set_end_x_ll_nextcsid 4 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local" + + check_and_log_hs_ipv6_connectivity 1 2 + check_and_log_hs_ipv6_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local" + + check_and_log_hs_ipv4_connectivity 1 2 + check_and_log_hs_ipv4_connectivity 2 1 + + # Restore the previous adjacencies. + set_end_x_nextcsid 3 4 + set_end_x_nextcsid 4 1 } __nextcsid_end_x_behavior_test() diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c index f00245263b20..6478da6a71c3 100644 --- a/tools/testing/selftests/net/tcp_ao/seq-ext.c +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Check that after SEQ number wrap-around: * 1. SEQ-extension has upper bytes set - * 2. TCP conneciton is alive and no TCPAOBad segments + * 2. TCP connection is alive and no TCPAOBad segments * In order to test (2), the test doesn't just adjust seq number for a queue * on a connected socket, but migrates it to another sk+port number, so * that there won't be any delayed packets that will fail to verify diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh index 6127a78ee988..8deacc565afa 100755 --- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -146,18 +146,17 @@ run_cmd() } check_hv_connectivity() { - ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null - sleep 1 - ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null + slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null + slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null return $? } check_vm_connectivity() { - run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" + slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)" - run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" + slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)" } diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index e9c2f71da207..ce34cb2e6e0b 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -275,7 +275,7 @@ setup_sym() # Wait for ip config to settle - sleep 2 + slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 } setup_asym() @@ -370,7 +370,7 @@ setup_asym() ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad # Wait for ip config to settle - sleep 2 + slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 } check_connectivity() |