20 files changed, 2011 insertions, 48 deletions
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 332f387615d7..227f9e067d25 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -41,6 +41,7 @@ TEST_PROGS += netns-name.sh
 TEST_PROGS += link_netns.py
 TEST_PROGS += nl_netdev.py
 TEST_PROGS += rtnetlink.py
+TEST_PROGS += rtnetlink_notification.sh
 TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile
new file mode 100644
index 000000000000..2546c45e42f7
--- /dev/null
+++ b/tools/testing/selftests/net/bench/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_MODS_DIR := page_pool
+
+TEST_PROGS += test_bench_page_pool.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile
new file mode 100644
index 000000000000..0549a16ba275
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/Makefile
@@ -0,0 +1,17 @@
+BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+obj-m	+= bench_page_pool.o
+bench_page_pool-y += bench_page_pool_simple.o time_bench.o
+
+all:
+	+$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules
+
+clean:
+	+$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean
diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
new file mode 100644
index 000000000000..f183d5e30dc6
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmark module for page_pool.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/mutex.h>
+
+#include <linux/version.h>
+#include <net/page_pool/helpers.h>
+
+#include <linux/interrupt.h>
+#include <linux/limits.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+#define MY_POOL_SIZE 1024
+
+static void _page_pool_put_page(struct page_pool *pool, struct page *page,
+				bool allow_direct)
+{
+	page_pool_put_page(pool, page, -1, allow_direct);
+}
+
+/* Makes tests selectable. Useful for perf-record to analyze a single test.
+ * Hint: Bash shells support writing binary number like: $((2#101010)
+ *
+ * # modprobe bench_page_pool_simple run_flags=$((2#100))
+ */
+static unsigned long run_flags = 0xFFFFFFFF;
+module_param(run_flags, ulong, 0);
+MODULE_PARM_DESC(run_flags, "Limit which bench test that runs");
+
+/* Count the bit number from the enum */
+enum benchmark_bit {
+	bit_run_bench_baseline,
+	bit_run_bench_no_softirq01,
+	bit_run_bench_no_softirq02,
+	bit_run_bench_no_softirq03,
+};
+
+#define bit(b)		(1 << (b))
+#define enabled(b)	((run_flags & (bit(b))))
+
+/* notice time_bench is limited to U32_MAX nr loops */
+static unsigned long loops = 10000000;
+module_param(loops, ulong, 0);
+MODULE_PARM_DESC(loops, "Specify loops bench will run");
+
+/* Timing at the nanosec level, we need to know the overhead
+ * introduced by the for loop itself
+ */
+static int time_bench_for_loop(struct time_bench_record *rec, void *data)
+{
+	uint64_t loops_cnt = 0;
+	int i;
+
+	time_bench_start(rec);
+	/** Loop to measure **/
+	for (i = 0; i < rec->loops; i++) {
+		loops_cnt++;
+		barrier(); /* avoid compiler to optimize this loop */
+	}
+	time_bench_stop(rec, loops_cnt);
+	return loops_cnt;
+}
+
+static int time_bench_atomic_inc(struct time_bench_record *rec, void *data)
+{
+	uint64_t loops_cnt = 0;
+	atomic_t cnt;
+	int i;
+
+	atomic_set(&cnt, 0);
+
+	time_bench_start(rec);
+	/** Loop to measure **/
+	for (i = 0; i < rec->loops; i++) {
+		atomic_inc(&cnt);
+		barrier(); /* avoid compiler to optimize this loop */
+	}
+	loops_cnt = atomic_read(&cnt);
+	time_bench_stop(rec, loops_cnt);
+	return loops_cnt;
+}
+
+/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum
+ * overhead of taking+releasing a spinlock, to know the cycles that can be saved
+ * by e.g. amortizing this via bulking.
+ */
+static int time_bench_lock(struct time_bench_record *rec, void *data)
+{
+	uint64_t loops_cnt = 0;
+	spinlock_t lock;
+	int i;
+
+	spin_lock_init(&lock);
+
+	time_bench_start(rec);
+	/** Loop to measure **/
+	for (i = 0; i < rec->loops; i++) {
+		spin_lock(&lock);
+		loops_cnt++;
+		barrier(); /* avoid compiler to optimize this loop */
+		spin_unlock(&lock);
+	}
+	time_bench_stop(rec, loops_cnt);
+	return loops_cnt;
+}
+
+/* Helper for filling some page's into ptr_ring */
+static void pp_fill_ptr_ring(struct page_pool *pp, int elems)
+{
+	/* GFP_ATOMIC needed when under run softirq */
+	gfp_t gfp_mask = GFP_ATOMIC;
+	struct page **array;
+	int i;
+
+	array = kcalloc(elems, sizeof(struct page *), gfp_mask);
+
+	for (i = 0; i < elems; i++)
+		array[i] = page_pool_alloc_pages(pp, gfp_mask);
+	for (i = 0; i < elems; i++)
+		_page_pool_put_page(pp, array[i], false);
+
+	kfree(array);
+}
+
+enum test_type { type_fast_path, type_ptr_ring, type_page_allocator };
+
+/* Depends on compile optimizing this function */
+static int time_bench_page_pool(struct time_bench_record *rec, void *data,
+				enum test_type type, const char *func)
+{
+	uint64_t loops_cnt = 0;
+	gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */
+	int i, err;
+
+	struct page_pool *pp;
+	struct page *page;
+
+	struct page_pool_params pp_params = {
+		.order = 0,
+		.flags = 0,
+		.pool_size = MY_POOL_SIZE,
+		.nid = NUMA_NO_NODE,
+		.dev = NULL, /* Only use for DMA mapping */
+		.dma_dir = DMA_BIDIRECTIONAL,
+	};
+
+	pp = page_pool_create(&pp_params);
+	if (IS_ERR(pp)) {
+		err = PTR_ERR(pp);
+		pr_warn("%s: Error(%d) creating page_pool\n", func, err);
+		goto out;
+	}
+	pp_fill_ptr_ring(pp, 64);
+
+	if (in_serving_softirq())
+		pr_warn("%s(): in_serving_softirq fast-path\n", func);
+	else
+		pr_warn("%s(): Cannot use page_pool fast-path\n", func);
+
+	time_bench_start(rec);
+	/** Loop to measure **/
+	for (i = 0; i < rec->loops; i++) {
+		/* Common fast-path alloc that depend on in_serving_softirq() */
+		page = page_pool_alloc_pages(pp, gfp_mask);
+		if (!page)
+			break;
+		loops_cnt++;
+		barrier(); /* avoid compiler to optimize this loop */
+
+		/* The benchmarks purpose it to test different return paths.
+		 * Compiler should inline optimize other function calls out
+		 */
+		if (type == type_fast_path) {
+			/* Fast-path recycling e.g. XDP_DROP use-case */
+			page_pool_recycle_direct(pp, page);
+
+		} else if (type == type_ptr_ring) {
+			/* Normal return path */
+			_page_pool_put_page(pp, page, false);
+
+		} else if (type == type_page_allocator) {
+			/* Test if not pages are recycled, but instead
+			 * returned back into systems page allocator
+			 */
+			get_page(page); /* cause no-recycling */
+			_page_pool_put_page(pp, page, false);
+			put_page(page);
+		} else {
+			BUILD_BUG();
+		}
+	}
+	time_bench_stop(rec, loops_cnt);
+out:
+	page_pool_destroy(pp);
+	return loops_cnt;
+}
+
+static int time_bench_page_pool01_fast_path(struct time_bench_record *rec,
+					    void *data)
+{
+	return time_bench_page_pool(rec, data, type_fast_path, __func__);
+}
+
+static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec,
+					   void *data)
+{
+	return time_bench_page_pool(rec, data, type_ptr_ring, __func__);
+}
+
+static int time_bench_page_pool03_slow(struct time_bench_record *rec,
+				       void *data)
+{
+	return time_bench_page_pool(rec, data, type_page_allocator, __func__);
+}
+
+static int run_benchmark_tests(void)
+{
+	uint32_t nr_loops = loops;
+
+	/* Baseline tests */
+	if (enabled(bit_run_bench_baseline)) {
+		time_bench_loop(nr_loops * 10, 0, "for_loop", NULL,
+				time_bench_for_loop);
+		time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL,
+				time_bench_atomic_inc);
+		time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock);
+	}
+
+	/* This test cannot activate correct code path, due to no-softirq ctx */
+	if (enabled(bit_run_bench_no_softirq01))
+		time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL,
+				time_bench_page_pool01_fast_path);
+	if (enabled(bit_run_bench_no_softirq02))
+		time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL,
+				time_bench_page_pool02_ptr_ring);
+	if (enabled(bit_run_bench_no_softirq03))
+		time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL,
+				time_bench_page_pool03_slow);
+
+	return 0;
+}
+
+static int __init bench_page_pool_simple_module_init(void)
+{
+	if (verbose)
+		pr_info("Loaded\n");
+
+	if (loops > U32_MAX) {
+		pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops,
+		       U32_MAX);
+		return -ECHRNG;
+	}
+
+	run_benchmark_tests();
+
+	return 0;
+}
+module_init(bench_page_pool_simple_module_init);
+
+static void __exit bench_page_pool_simple_module_exit(void)
+{
+	if (verbose)
+		pr_info("Unloaded\n");
+}
+module_exit(bench_page_pool_simple_module_exit);
+
+MODULE_DESCRIPTION("Benchmark of page_pool simple cases");
+MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c
new file mode 100644
index 000000000000..073bb36ec5f2
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/time.h>
+
+#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */
+
+/* For concurrency testing */
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+
+/** TSC (Time-Stamp Counter) based **
+ * See: linux/time_bench.h
+ *  tsc_start_clock() and tsc_stop_clock()
+ */
+
+/** Wall-clock based **
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ */
+#define PERF_FORMAT                                                            \
+	(PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \
+	 PERF_FORMAT_TOTAL_TIME_RUNNING)
+
+struct raw_perf_event {
+	uint64_t config; /* event */
+	uint64_t config1; /* umask */
+	struct perf_event *save;
+	char *desc;
+};
+
+/* if HT is enable a maximum of 4 events (5 if one is instructions
+ * retired can be specified, if HT is disabled a maximum of 8 (9 if
+ * one is instructions retired) can be specified.
+ *
+ * From Table 19-1. Architectural Performance Events
+ * Architectures Software Developer’s Manual Volume 3: System Programming
+ * Guide
+ */
+struct raw_perf_event perf_events[] = {
+	{ 0x3c, 0x00, NULL, "Unhalted CPU Cycles" },
+	{ 0xc0, 0x00, NULL, "Instruction Retired" }
+};
+
+#define NUM_EVTS (ARRAY_SIZE(perf_events))
+
+/* WARNING: PMU config is currently broken!
+ */
+bool time_bench_PMU_config(bool enable)
+{
+	int i;
+	struct perf_event_attr perf_conf;
+	struct perf_event *perf_event;
+	int cpu;
+
+	preempt_disable();
+	cpu = smp_processor_id();
+	pr_info("DEBUG: cpu:%d\n", cpu);
+	preempt_enable();
+
+	memset(&perf_conf, 0, sizeof(struct perf_event_attr));
+	perf_conf.type           = PERF_TYPE_RAW;
+	perf_conf.size           = sizeof(struct perf_event_attr);
+	perf_conf.read_format    = PERF_FORMAT;
+	perf_conf.pinned         = 1;
+	perf_conf.exclude_user   = 1; /* No userspace events */
+	perf_conf.exclude_kernel = 0; /* Only kernel events */
+
+	for (i = 0; i < NUM_EVTS; i++) {
+		perf_conf.disabled = enable;
+		//perf_conf.disabled = (i == 0) ? 1 : 0;
+		perf_conf.config   = perf_events[i].config;
+		perf_conf.config1  = perf_events[i].config1;
+		if (verbose)
+			pr_info("%s() enable PMU counter: %s\n",
+				__func__, perf_events[i].desc);
+		perf_event = perf_event_create_kernel_counter(&perf_conf, cpu,
+							      NULL /* task */,
+							      NULL /* overflow_handler*/,
+							      NULL /* context */);
+		if (perf_event) {
+			perf_events[i].save = perf_event;
+			pr_info("%s():DEBUG perf_event success\n", __func__);
+
+			perf_event_enable(perf_event);
+		} else {
+			pr_info("%s():DEBUG perf_event is NULL\n", __func__);
+		}
+	}
+
+	return true;
+}
+
+/** Generic functions **
+ */
+
+/* Calculate stats, store results in record */
+bool time_bench_calc_stats(struct time_bench_record *rec)
+{
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+	uint64_t ns_per_call_tmp_rem = 0;
+	uint32_t ns_per_call_remainder = 0;
+	uint64_t pmc_ipc_tmp_rem = 0;
+	uint32_t pmc_ipc_remainder = 0;
+	uint32_t pmc_ipc_div = 0;
+	uint32_t invoked_cnt_precision = 0;
+	uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */
+
+	if (rec->flags & TIME_BENCH_LOOP) {
+		if (rec->invoked_cnt < 1000) {
+			pr_err("ERR: need more(>1000) loops(%llu) for timing\n",
+			       rec->invoked_cnt);
+			return false;
+		}
+		if (rec->invoked_cnt > ((1ULL << 32) - 1)) {
+			/* div_u64_rem() can only support div with 32bit*/
+			pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n",
+			       rec->invoked_cnt);
+			return false;
+		}
+		invoked_cnt = (uint32_t)rec->invoked_cnt;
+	}
+
+	/* TSC (Time-Stamp Counter) records */
+	if (rec->flags & TIME_BENCH_TSC) {
+		rec->tsc_interval = rec->tsc_stop - rec->tsc_start;
+		if (rec->tsc_interval == 0) {
+			pr_err("ABORT: timing took ZERO TSC time\n");
+			return false;
+		}
+		/* Calculate stats */
+		if (rec->flags & TIME_BENCH_LOOP)
+			rec->tsc_cycles = rec->tsc_interval / invoked_cnt;
+		else
+			rec->tsc_cycles = rec->tsc_interval;
+	}
+
+	/* Wall-clock time calc */
+	if (rec->flags & TIME_BENCH_WALLCLOCK) {
+		rec->time_start = rec->ts_start.tv_nsec +
+				  (NANOSEC_PER_SEC * rec->ts_start.tv_sec);
+		rec->time_stop = rec->ts_stop.tv_nsec +
+				 (NANOSEC_PER_SEC * rec->ts_stop.tv_sec);
+		rec->time_interval = rec->time_stop - rec->time_start;
+		if (rec->time_interval == 0) {
+			pr_err("ABORT: timing took ZERO wallclock time\n");
+			return false;
+		}
+		/* Calculate stats */
+		/*** Division in kernel it tricky ***/
+		/* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */
+		/* remainder only correct because NANOSEC_PER_SEC is 10^9 */
+		rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC,
+					    &rec->time_sec_remainder);
+		//TODO: use existing struct timespec records instead of div?
+
+		if (rec->flags & TIME_BENCH_LOOP) {
+			/*** Division in kernel it tricky ***/
+			/* Orig: ns = ((double)time_interval / invoked_cnt); */
+			/* First get quotient */
+			rec->ns_per_call_quotient =
+				div_u64_rem(rec->time_interval, invoked_cnt,
+					    &ns_per_call_remainder);
+			/* Now get decimals .xxx precision (incorrect roundup)*/
+			ns_per_call_tmp_rem = ns_per_call_remainder;
+			invoked_cnt_precision = invoked_cnt / 1000;
+			if (invoked_cnt_precision > 0) {
+				rec->ns_per_call_decimal =
+					div_u64_rem(ns_per_call_tmp_rem,
+						    invoked_cnt_precision,
+						    &ns_per_call_remainder);
+			}
+		}
+	}
+
+	/* Performance Monitor Unit (PMU) counters */
+	if (rec->flags & TIME_BENCH_PMU) {
+		//FIXME: Overflow handling???
+		rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start;
+		rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start;
+
+		/* Calc Instruction Per Cycle (IPC) */
+		/* First get quotient */
+		rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk,
+						    &pmc_ipc_remainder);
+		/* Now get decimals .xxx precision (incorrect roundup)*/
+		pmc_ipc_tmp_rem = pmc_ipc_remainder;
+		pmc_ipc_div = rec->pmc_clk / 1000;
+		if (pmc_ipc_div > 0) {
+			rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem,
+							   pmc_ipc_div,
+							   &pmc_ipc_remainder);
+		}
+	}
+
+	return true;
+}
+
+/* Generic function for invoking a loop function and calculating
+ * execution time stats.  The function being called/timed is assumed
+ * to perform a tight loop, and update the timing record struct.
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+		     int (*func)(struct time_bench_record *record, void *data))
+{
+	struct time_bench_record rec;
+
+	/* Setup record */
+	memset(&rec, 0, sizeof(rec)); /* zero func might not update all */
+	rec.version_abi = 1;
+	rec.loops       = loops;
+	rec.step        = step;
+	rec.flags       = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK);
+
+	/*** Loop function being timed ***/
+	if (!func(&rec, data)) {
+		pr_err("ABORT: function being timed failed\n");
+		return false;
+	}
+
+	if (rec.invoked_cnt < loops)
+		pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n",
+			rec.invoked_cnt, loops);
+
+	/* Calculate stats */
+	time_bench_calc_stats(&rec);
+
+	pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+		txt, rec.tsc_cycles, rec.ns_per_call_quotient,
+		rec.ns_per_call_decimal, rec.step, rec.time_sec,
+		rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt,
+		rec.tsc_interval);
+	if (rec.flags & TIME_BENCH_PMU)
+		pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n",
+			txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient,
+			rec.pmc_ipc_decimal);
+	return true;
+}
+
+/* Function getting invoked by kthread */
+static int invoke_test_on_cpu_func(void *private)
+{
+	struct time_bench_cpu *cpu = private;
+	struct time_bench_sync *sync = cpu->sync;
+	cpumask_t newmask = CPU_MASK_NONE;
+	void *data = cpu->data;
+
+	/* Restrict CPU */
+	cpumask_set_cpu(cpu->rec.cpu, &newmask);
+	set_cpus_allowed_ptr(current, &newmask);
+
+	/* Synchronize start of concurrency test */
+	atomic_inc(&sync->nr_tests_running);
+	wait_for_completion(&sync->start_event);
+
+	/* Start benchmark function */
+	if (!cpu->bench_func(&cpu->rec, data)) {
+		pr_err("ERROR: function being timed failed on CPU:%d(%d)\n",
+		       cpu->rec.cpu, smp_processor_id());
+	} else {
+		if (verbose)
+			pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu,
+				smp_processor_id());
+	}
+	cpu->did_bench_run = true;
+
+	/* End test */
+	atomic_dec(&sync->nr_tests_running);
+	/*  Wait for kthread_stop() telling us to stop */
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+	return 0;
+}
+
+void time_bench_print_stats_cpumask(const char *desc,
+				    struct time_bench_cpu *cpu_tasks,
+				    const struct cpumask *mask)
+{
+	uint64_t average = 0;
+	int cpu;
+	int step = 0;
+	struct sum {
+		uint64_t tsc_cycles;
+		int records;
+	} sum = { 0 };
+
+	/* Get stats */
+	for_each_cpu(cpu, mask) {
+		struct time_bench_cpu *c = &cpu_tasks[cpu];
+		struct time_bench_record *rec = &c->rec;
+
+		/* Calculate stats */
+		time_bench_calc_stats(rec);
+
+		pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+			desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient,
+			rec->ns_per_call_decimal, rec->step, rec->time_sec,
+			rec->time_sec_remainder, rec->time_interval,
+			rec->invoked_cnt, rec->tsc_interval);
+
+		/* Collect average */
+		sum.records++;
+		sum.tsc_cycles += rec->tsc_cycles;
+		step = rec->step;
+	}
+
+	if (sum.records) /* avoid div-by-zero */
+		average = sum.tsc_cycles / sum.records;
+	pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc,
+		average, sum.records, step);
+}
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+			       const struct cpumask *mask, /* Support masking outsome CPUs*/
+			       struct time_bench_sync *sync,
+			       struct time_bench_cpu *cpu_tasks,
+			       int (*func)(struct time_bench_record *record, void *data))
+{
+	int cpu, running = 0;
+
+	if (verbose) // DEBUG
+		pr_warn("%s() Started on CPU:%d\n", __func__,
+			smp_processor_id());
+
+	/* Reset sync conditions */
+	atomic_set(&sync->nr_tests_running, 0);
+	init_completion(&sync->start_event);
+
+	/* Spawn off jobs on all CPUs */
+	for_each_cpu(cpu, mask) {
+		struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+		running++;
+		c->sync = sync; /* Send sync variable along */
+		c->data = data; /* Send opaque along */
+
+		/* Init benchmark record */
+		memset(&c->rec, 0, sizeof(struct time_bench_record));
+		c->rec.version_abi = 1;
+		c->rec.loops       = loops;
+		c->rec.step        = step;
+		c->rec.flags       = (TIME_BENCH_LOOP | TIME_BENCH_TSC |
+				      TIME_BENCH_WALLCLOCK);
+		c->rec.cpu = cpu;
+		c->bench_func = func;
+		c->task = kthread_run(invoke_test_on_cpu_func, c,
+				      "time_bench%d", cpu);
+		if (IS_ERR(c->task)) {
+			pr_err("%s(): Failed to start test func\n", __func__);
+			return; /* Argh, what about cleanup?! */
+		}
+	}
+
+	/* Wait until all processes are running */
+	while (atomic_read(&sync->nr_tests_running) < running) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(10);
+	}
+	/* Kick off all CPU concurrently on completion event */
+	complete_all(&sync->start_event);
+
+	/* Wait for CPUs to finish */
+	while (atomic_read(&sync->nr_tests_running)) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(10);
+	}
+
+	/* Stop the kthreads */
+	for_each_cpu(cpu, mask) {
+		struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+		kthread_stop(c->task);
+	}
+
+	if (verbose) // DEBUG - happens often, finish on another CPU
+		pr_warn("%s() Finished on CPU:%d\n", __func__,
+			smp_processor_id());
+}
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h
new file mode 100644
index 000000000000..e113fcf341dc
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ *  for licensing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_TIME_BENCH_H
+#define _LINUX_TIME_BENCH_H
+
+/* Main structure used for recording a benchmark run */
+struct time_bench_record {
+	uint32_t version_abi;
+	uint32_t loops;		/* Requested loop invocations */
+	uint32_t step;		/* option for e.g. bulk invocations */
+
+	uint32_t flags;		/* Measurements types enabled */
+#define TIME_BENCH_LOOP		BIT(0)
+#define TIME_BENCH_TSC		BIT(1)
+#define TIME_BENCH_WALLCLOCK	BIT(2)
+#define TIME_BENCH_PMU		BIT(3)
+
+	uint32_t cpu; /* Used when embedded in time_bench_cpu */
+
+	/* Records */
+	uint64_t invoked_cnt;	/* Returned actual invocations */
+	uint64_t tsc_start;
+	uint64_t tsc_stop;
+	struct timespec64 ts_start;
+	struct timespec64 ts_stop;
+	/* PMU counters for instruction and cycles
+	 * instructions counter including pipelined instructions
+	 */
+	uint64_t pmc_inst_start;
+	uint64_t pmc_inst_stop;
+	/* CPU unhalted clock counter */
+	uint64_t pmc_clk_start;
+	uint64_t pmc_clk_stop;
+
+	/* Result records */
+	uint64_t tsc_interval;
+	uint64_t time_start, time_stop, time_interval; /* in nanosec */
+	uint64_t pmc_inst, pmc_clk;
+
+	/* Derived result records */
+	uint64_t tsc_cycles; // +decimal?
+	uint64_t ns_per_call_quotient, ns_per_call_decimal;
+	uint64_t time_sec;
+	uint32_t time_sec_remainder;
+	uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */
+};
+
+/* For synchronizing parallel CPUs to run concurrently */
+struct time_bench_sync {
+	atomic_t nr_tests_running;
+	struct completion start_event;
+};
+
+/* Keep track of CPUs executing our bench function.
+ *
+ * Embed a time_bench_record for storing info per cpu
+ */
+struct time_bench_cpu {
+	struct time_bench_record rec;
+	struct time_bench_sync *sync; /* back ptr */
+	struct task_struct *task;
+	/* "data" opaque could have been placed in time_bench_sync,
+	 * but to avoid any false sharing, place it per CPU
+	 */
+	void *data;
+	/* Support masking outsome CPUs, mark if it ran */
+	bool did_bench_run;
+	/* int cpu; // note CPU stored in time_bench_record */
+	int (*bench_func)(struct time_bench_record *record, void *data);
+};
+
+/*
+ * Below TSC assembler code is not compatible with other archs, and
+ * can also fail on guests if cpu-flags are not correct.
+ *
+ * The way TSC reading is used, many iterations, does not require as
+ * high accuracy as described below (in Intel Doc #324264).
+ *
+ * Considering changing to use get_cycles() (#include <asm/timex.h>).
+ */
+
+/** TSC (Time-Stamp Counter) based **
+ * Recommend reading, to understand details of reading TSC accurately:
+ *  Intel Doc #324264, "How to Benchmark Code Execution Times on Intel"
+ *
+ * Consider getting exclusive ownership of CPU by using:
+ *   unsigned long flags;
+ *   preempt_disable();
+ *   raw_local_irq_save(flags);
+ *   _your_code_
+ *   raw_local_irq_restore(flags);
+ *   preempt_enable();
+ *
+ * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx"
+ *  RDTSC only change "%rax" and "%rdx" but
+ *  CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx)
+ */
+static __always_inline uint64_t tsc_start_clock(void)
+{
+	/* See: Intel Doc #324264 */
+	unsigned int hi, lo;
+
+	asm volatile("CPUID\n\t"
+		     "RDTSC\n\t"
+		     "mov %%edx, %0\n\t"
+		     "mov %%eax, %1\n\t"
+		     : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+	//FIXME: on 32bit use clobbered %eax + %edx
+	return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+static __always_inline uint64_t tsc_stop_clock(void)
+{
+	/* See: Intel Doc #324264 */
+	unsigned int hi, lo;
+
+	asm volatile("RDTSCP\n\t"
+		     "mov %%edx, %0\n\t"
+		     "mov %%eax, %1\n\t"
+		     "CPUID\n\t"
+		     : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+	return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+/** Wall-clock based **
+ *
+ * use: getnstimeofday()
+ *  getnstimeofday(&rec->ts_start);
+ *  getnstimeofday(&rec->ts_stop);
+ *
+ * API changed see: Documentation/core-api/timekeeping.rst
+ *  https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday
+ *
+ * We should instead use: ktime_get_real_ts64() is a direct
+ *  replacement, but consider using monotonic time (ktime_get_ts64())
+ *  and/or a ktime_t based interface (ktime_get()/ktime_get_real()).
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ *
+ * Needed for calculating: Instructions Per Cycle (IPC)
+ * - The IPC number tell how efficient the CPU pipelining were
+ */
+//lookup: perf_event_create_kernel_counter()
+
+bool time_bench_PMU_config(bool enable);
+
+/* Raw reading via rdpmc() using fixed counters
+ *
+ * From: https://github.com/andikleen/simple-pmu
+ */
+enum {
+	FIXED_SELECT = (1U << 30), /* == 0x40000000 */
+	FIXED_INST_RETIRED_ANY = 0,
+	FIXED_CPU_CLK_UNHALTED_CORE = 1,
+	FIXED_CPU_CLK_UNHALTED_REF = 2,
+};
+
+static __always_inline unsigned int long long p_rdpmc(unsigned int in)
+{
+	unsigned int d, a;
+
+	asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory");
+	return ((unsigned long long)d << 32) | a;
+}
+
+/* These PMU counter needs to be enabled, but I don't have the
+ * configure code implemented.  My current hack is running:
+ *  sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko
+ */
+/* Reading all pipelined instruction */
+static __always_inline unsigned long long pmc_inst(void)
+{
+	return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY);
+}
+
+/* Reading CPU clock cycles */
+static __always_inline unsigned long long pmc_clk(void)
+{
+	return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE);
+}
+
+/* Raw reading via MSR rdmsr() is likely wrong
+ * FIXME: How can I know which raw MSR registers are conf for what?
+ */
+#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */
+#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */
+#define MSR_IA32_PCM2 0x400000C3
+static inline uint64_t msr_inst(unsigned long long *msr_result)
+{
+	return rdmsrq_safe(MSR_IA32_PCM0, msr_result);
+}
+
+/** Generic functions **
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+		     int (*func)(struct time_bench_record *rec, void *data));
+bool time_bench_calc_stats(struct time_bench_record *rec);
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+			       const struct cpumask *mask, /* Support masking outsome CPUs*/
+			       struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks,
+			       int (*func)(struct time_bench_record *record, void *data));
+void time_bench_print_stats_cpumask(const char *desc,
+				    struct time_bench_cpu *cpu_tasks,
+				    const struct cpumask *mask);
+
+//FIXME: use rec->flags to select measurement, should be MACRO
+static __always_inline void time_bench_start(struct time_bench_record *rec)
+{
+	//getnstimeofday(&rec->ts_start);
+	ktime_get_real_ts64(&rec->ts_start);
+	if (rec->flags & TIME_BENCH_PMU) {
+		rec->pmc_inst_start = pmc_inst();
+		rec->pmc_clk_start = pmc_clk();
+	}
+	rec->tsc_start = tsc_start_clock();
+}
+
+static __always_inline void time_bench_stop(struct time_bench_record *rec,
+					    uint64_t invoked_cnt)
+{
+	rec->tsc_stop = tsc_stop_clock();
+	if (rec->flags & TIME_BENCH_PMU) {
+		rec->pmc_inst_stop = pmc_inst();
+		rec->pmc_clk_stop = pmc_clk();
+	}
+	//getnstimeofday(&rec->ts_stop);
+	ktime_get_real_ts64(&rec->ts_stop);
+	rec->invoked_cnt = invoked_cnt;
+}
+
+#endif /* _LINUX_TIME_BENCH_H */
diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
new file mode 100755
index 000000000000..7b8b18cfedce
--- /dev/null
+++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+set -e
+
+DRIVER="./page_pool/bench_page_pool.ko"
+result=""
+
+function run_test()
+{
+	rmmod "bench_page_pool.ko" || true
+	insmod $DRIVER > /dev/null 2>&1
+	result=$(dmesg | tail -10)
+	echo "$result"
+
+	echo
+	echo "Fast path results:"
+	echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+	echo
+	echo "ptr_ring results:"
+	echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+	echo
+	echo "slow path results:"
+	echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+}
+
+run_test
+
+exit 0
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 00bde7b6f39e..d7bb2e80e88c 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -102,6 +102,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
 	vxlan_bridge_1d_port_8472.sh \
 	vxlan_bridge_1d.sh \
 	vxlan_bridge_1q_ipv6.sh \
+	vxlan_bridge_1q_mc_ul.sh \
 	vxlan_bridge_1q_port_8472_ipv6.sh \
 	vxlan_bridge_1q_port_8472.sh \
 	vxlan_bridge_1q.sh \
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 508f3c700d71..83ee6a07e072 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -37,6 +37,7 @@ declare -A NETIFS=(
 : "${TEAMD:=teamd}"
 : "${MCD:=smcrouted}"
 : "${MC_CLI:=smcroutectl}"
+: "${MCD_TABLE_NAME:=selftests}"
 
 # Constants for netdevice bring-up:
 # Default time in seconds to wait for an interface to come up before giving up
@@ -1757,6 +1758,51 @@ mc_send()
 		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
 }
 
+adf_mcd_start()
+{
+	local ifs=("$@")
+
+	local table_name="$MCD_TABLE_NAME"
+	local smcroutedir
+	local pid
+	local if
+	local i
+
+	check_command "$MCD" || return 1
+	check_command "$MC_CLI" || return 1
+
+	smcroutedir=$(mktemp -d)
+	defer rm -rf "$smcroutedir"
+
+	for ((i = 1; i <= NUM_NETIFS; ++i)); do
+		echo "phyint ${NETIFS[p$i]} enable" >> \
+			"$smcroutedir/$table_name.conf"
+	done
+
+	for if in "${ifs[@]}"; do
+		if ! ip_link_has_flag "$if" MULTICAST; then
+			ip link set dev "$if" multicast on
+			defer ip link set dev "$if" multicast off
+		fi
+
+		echo "phyint $if enable" >> \
+			"$smcroutedir/$table_name.conf"
+	done
+
+	"$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \
+		-P "$smcroutedir/$table_name.pid"
+	busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid"
+	pid=$(cat "$smcroutedir/$table_name.pid")
+	defer kill_process "$pid"
+}
+
+mc_cli()
+{
+	local table_name="$MCD_TABLE_NAME"
+
+        "$MC_CLI" -I "$table_name" "$@"
+}
+
 start_ip_monitor()
 {
 	local mtype=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
index 5a58b1ec8aef..83e52abdbc2e 100755
--- a/tools/testing/selftests/net/forwarding/router_multicast.sh
+++ b/tools/testing/selftests/net/forwarding/router_multicast.sh
@@ -33,10 +33,6 @@ NUM_NETIFS=6
 source lib.sh
 source tc_common.sh
 
-require_command $MCD
-require_command $MC_CLI
-table_name=selftests
-
 h1_create()
 {
 	simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64
@@ -149,25 +145,6 @@ router_destroy()
 	ip link set dev $rp1 down
 }
 
-start_mcd()
-{
-	SMCROUTEDIR="$(mktemp -d)"
-
-	for ((i = 1; i <= $NUM_NETIFS; ++i)); do
-		echo "phyint ${NETIFS[p$i]} enable" >> \
-			$SMCROUTEDIR/$table_name.conf
-	done
-
-	$MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
-		-P $SMCROUTEDIR/$table_name.pid
-}
-
-kill_mcd()
-{
-	pkill $MCD
-	rm -rf $SMCROUTEDIR
-}
-
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
@@ -179,7 +156,7 @@ setup_prepare()
 	rp3=${NETIFS[p5]}
 	h3=${NETIFS[p6]}
 
-	start_mcd
+	adf_mcd_start || exit "$EXIT_STATUS"
 
 	vrf_prepare
 
@@ -206,7 +183,7 @@ cleanup()
 
 	vrf_cleanup
 
-	kill_mcd
+	defer_scopes_cleanup
 }
 
 create_mcast_sg()
@@ -214,9 +191,9 @@ create_mcast_sg()
 	local if_name=$1; shift
 	local s_addr=$1; shift
 	local mcast=$1; shift
-	local dest_ifs=${@}
+	local dest_ifs=("${@}")
 
-	$MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs
+	mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
 }
 
 delete_mcast_sg()
@@ -224,9 +201,9 @@ delete_mcast_sg()
 	local if_name=$1; shift
 	local s_addr=$1; shift
 	local mcast=$1; shift
-	local dest_ifs=${@}
+	local dest_ifs=("${@}")
 
-        $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs
+        mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
 }
 
 mcast_v4()
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
new file mode 100755
index 000000000000..7ec58b6b1128
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
@@ -0,0 +1,771 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------------------------+
+# | + $h1.10             + $h1.20           |
+# | | 192.0.2.1/28       | 2001:db8:1::1/64 |
+# | \________   ________/                   |
+# |          \ /                            |
+# |           + $h1                H1 (vrf) |
+# +-----------|-----------------------------+
+#             |
+# +-----------|----------------------------------------------------------------+
+# | +---------|--------------------------------------+       SWITCH (main vrf) |
+# | |         + $swp1                   BR1 (802.1q) |                         |
+# | |            vid 10 20                           |                         |
+# | |                                                |                         |
+# | |  + vx10 (vxlan)         + vx20 (vxlan)         |      + lo10 (dummy)     |
+# | |    local 192.0.2.100      local 2001:db8:4::1  |        192.0.2.100/28   |
+# | |    group 233.252.0.1      group ff0e::1:2:3    |        2001:db8:4::1/64 |
+# | |    id 1000                id 2000              |                         |
+# | |    vid 10 pvid untagged   vid 20 pvid untagged |                         |
+# | +------------------------------------------------+                         |
+# |                                                                            |
+# |   + $swp2                                                        $swp3 +   |
+# |   | 192.0.2.33/28                                        192.0.2.65/28 |   |
+# |   | 2001:db8:2::1/64                                  2001:db8:3::1/64 |   |
+# |   |                                                                    |   |
+# +---|--------------------------------------------------------------------|---+
+#     |                                                                    |
+# +---|--------------------------------+  +--------------------------------|---+
+# |   |                      H2 (vrf)  |  | H3 (vrf)                       |   |
+# | +-|----------------------------+   |  |  +-----------------------------|-+ |
+# | | + $h2           BR2 (802.1d) |   |  |  | BR3 (802.1d)            $h3 + | |
+# | |                              |   |  |  |                               | |
+# | | + v1$h2 (veth)               |   |  |  |                v1$h3 (veth) + | |
+# | +-|----------------------------+   |  |  +-----------------------------|-+ |
+# |   |                                |  |                                |   |
+# +---|--------------------------------+  +--------------------------------|---+
+#     |                                                                    |
+# +---|--------------------------------+  +--------------------------------|---+
+# |   + v2$h2 (veth)       NS2 (netns) |  | NS3 (netns)       v2$h3 (veth) +   |
+# |     192.0.2.34/28                  |  |                  192.0.2.66/28     |
+# |     2001:db8:2::2/64               |  |               2001:db8:3::2/64     |
+# |                                    |  |                                    |
+# | +--------------------------------+ |  | +--------------------------------+ |
+# | |                  BR1 (802.1q)  | |  | |                   BR1 (802.1q) | |
+# | |  + vx10 (vxlan)                | |  | |  + vx10 (vxlan)                | |
+# | |    local 192.0.2.34            | |  | |    local 192.0.2.50            | |
+# | |    group 233.252.0.1 dev v2$h2 | |  | |    group 233.252.0.1 dev v2$h3 | |
+# | |    id 1000 dstport $VXPORT     | |  | |    id 1000 dstport $VXPORT     | |
+# | |    vid 10 pvid untagged        | |  | |    vid 10 pvid untagged        | |
+# | |                                | |  | |                                | |
+# | |  + vx20 (vxlan)                | |  | |  + vx20 (vxlan)                | |
+# | |    local 2001:db8:2::2         | |  | |    local 2001:db8:3::2         | |
+# | |    group ff0e::1:2:3 dev v2$h2 | |  | |    group ff0e::1:2:3 dev v2$h3 | |
+# | |    id 2000 dstport $VXPORT     | |  | |    id 2000 dstport $VXPORT     | |
+# | |    vid 20 pvid untagged        | |  | |    vid 20 pvid untagged        | |
+# | |                                | |  | |                                | |
+# | |  + w1 (veth)                   | |  | |  + w1 (veth)                   | |
+# | |  | vid 10 20                   | |  | |  | vid 10 20                   | |
+# | +--|-----------------------------+ |  | +--|-----------------------------+ |
+# |    |                               |  |    |                               |
+# | +--|-----------------------------+ |  | +--|-----------------------------+ |
+# | |  + w2 (veth)        VW2 (vrf)  | |  | |  + w2 (veth)        VW2 (vrf)  | |
+# | |  |\                            | |  | |  |\                            | |
+# | |  | + w2.10                     | |  | |  | + w2.10                     | |
+# | |  |   192.0.2.3/28              | |  | |  |   192.0.2.4/28              | |
+# | |  |                             | |  | |  |                             | |
+# | |  + w2.20                       | |  | |  + w2.20                       | |
+# | |    2001:db8:1::3/64            | |  | |    2001:db8:1::4/64            | |
+# | +--------------------------------+ |  | +--------------------------------+ |
+# +------------------------------------+  +------------------------------------+
+#
+#shellcheck disable=SC2317 # SC doesn't see our uses of functions.
+
+: "${VXPORT:=4789}"
+export VXPORT
+
+: "${GROUP4:=233.252.0.1}"
+export GROUP4
+
+: "${GROUP6:=ff0e::1:2:3}"
+export GROUP6
+
+: "${IPMR:=lo10}"
+
+ALL_TESTS="
+	ipv4_nomcroute
+	ipv4_mcroute
+	ipv4_mcroute_changelink
+	ipv4_mcroute_starg
+	ipv4_mcroute_noroute
+	ipv4_mcroute_fdb
+	ipv4_mcroute_fdb_oif0
+	ipv4_mcroute_fdb_oif0_sep
+
+	ipv6_nomcroute
+	ipv6_mcroute
+	ipv6_mcroute_changelink
+	ipv6_mcroute_starg
+	ipv6_mcroute_noroute
+	ipv6_mcroute_fdb
+	ipv6_mcroute_fdb_oif0
+
+	ipv4_nomcroute_rx
+	ipv4_mcroute_rx
+	ipv4_mcroute_starg_rx
+	ipv4_mcroute_fdb_oif0_sep_rx
+	ipv4_mcroute_fdb_sep_rx
+
+	ipv6_nomcroute_rx
+	ipv6_mcroute_rx
+	ipv6_mcroute_starg_rx
+	ipv6_mcroute_fdb_sep_rx
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init "$h1"
+	defer simple_if_fini "$h1"
+
+	ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10
+	ip_link_set_up "$h1.10"
+	ip_addr_add "$h1.10" 192.0.2.1/28
+
+	ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20
+	ip_link_set_up "$h1.20"
+	ip_addr_add "$h1.20" 2001:db8:1::1/64
+}
+
+install_capture()
+{
+	local dev=$1; shift
+
+	tc qdisc add dev "$dev" clsact
+	defer tc qdisc del dev "$dev" clsact
+
+	tc filter add dev "$dev" ingress proto ip pref 104 \
+	   flower skip_hw ip_proto udp dst_port "$VXPORT" \
+	   action pass
+	defer tc filter del dev "$dev" ingress proto ip pref 104
+
+	tc filter add dev "$dev" ingress proto ipv6 pref 106 \
+	   flower skip_hw ip_proto udp dst_port "$VXPORT" \
+	   action pass
+	defer tc filter del dev "$dev" ingress proto ipv6 pref 106
+}
+
+h2_create()
+{
+	# $h2
+	ip_link_set_up "$h2"
+
+	# H2
+	vrf_create "v$h2"
+	defer vrf_destroy "v$h2"
+
+	ip_link_set_up "v$h2"
+
+	# br2
+	ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0
+	ip_link_set_master br2 "v$h2"
+	ip_link_set_up br2
+
+	# $h2
+	ip_link_set_master "$h2" br2
+	install_capture "$h2"
+
+	# v1$h2
+	ip_link_set_up "v1$h2"
+	ip_link_set_master "v1$h2" br2
+}
+
+h3_create()
+{
+	# $h3
+	ip_link_set_up "$h3"
+
+	# H3
+	vrf_create "v$h3"
+	defer vrf_destroy "v$h3"
+
+	ip_link_set_up "v$h3"
+
+	# br3
+	ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0
+	ip_link_set_master br3 "v$h3"
+	ip_link_set_up br3
+
+	# $h3
+	ip_link_set_master "$h3" br3
+	install_capture "$h3"
+
+	# v1$h3
+	ip_link_set_up "v1$h3"
+	ip_link_set_master "v1$h3" br3
+}
+
+switch_create()
+{
+	local swp1_mac
+
+	# br1
+	swp1_mac=$(mac_get "$swp1")
+	ip_link_add br1 type bridge vlan_filtering 1 \
+			    vlan_default_pvid 0 mcast_snooping 0
+	ip_link_set_addr br1 "$swp1_mac"
+	ip_link_set_up br1
+
+	# A dummy to force the IPv6 OIF=0 test to install a suitable MC route on
+	# $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test.
+	ip_link_add "X$IPMR" up type dummy
+
+	# IPMR
+	ip_link_add "$IPMR" up type dummy
+	ip_addr_add "$IPMR" 192.0.2.100/28
+	ip_addr_add "$IPMR" 2001:db8:4::1/64
+
+	# $swp1
+	ip_link_set_up "$swp1"
+	ip_link_set_master "$swp1" br1
+	bridge_vlan_add vid 10 dev "$swp1"
+	bridge_vlan_add vid 20 dev "$swp1"
+
+	# $swp2
+	ip_link_set_up "$swp2"
+	ip_addr_add "$swp2" 192.0.2.33/28
+	ip_addr_add "$swp2" 2001:db8:2::1/64
+
+	# $swp3
+	ip_link_set_up "$swp3"
+	ip_addr_add "$swp3" 192.0.2.65/28
+	ip_addr_add "$swp3" 2001:db8:3::1/64
+}
+
+vx_create()
+{
+	local name=$1; shift
+	local vid=$1; shift
+
+	ip_link_add "$name" up type vxlan dstport "$VXPORT" \
+		nolearning noudpcsum tos inherit ttl 16 \
+		"$@"
+	ip_link_set_master "$name" br1
+	bridge_vlan_add vid "$vid" dev "$name" pvid untagged
+}
+export -f vx_create
+
+vx_wait()
+{
+	# Wait for all the ARP, IGMP etc. noise to settle down so that the
+	# tunnel is clear for measurements.
+	sleep 10
+}
+
+vx10_create()
+{
+	vx_create vx10 10 id 1000 "$@"
+}
+export -f vx10_create
+
+vx20_create()
+{
+	vx_create vx20 20 id 2000 "$@"
+}
+export -f vx20_create
+
+vx10_create_wait()
+{
+	vx10_create "$@"
+	vx_wait
+}
+
+vx20_create_wait()
+{
+	vx20_create "$@"
+	vx_wait
+}
+
+ns_init_common()
+{
+	local ns=$1; shift
+	local if_in=$1; shift
+	local ipv4_in=$1; shift
+	local ipv6_in=$1; shift
+	local ipv4_host=$1; shift
+	local ipv6_host=$1; shift
+
+	# v2$h2 / v2$h3
+	ip_link_set_up "$if_in"
+	ip_addr_add "$if_in" "$ipv4_in"
+	ip_addr_add "$if_in" "$ipv6_in"
+
+	# br1
+	ip_link_add br1 type bridge vlan_filtering 1 \
+		    vlan_default_pvid 0 mcast_snooping 0
+	ip_link_set_up br1
+
+	# vx10, vx20
+	vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in"
+	vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in"
+
+	# w1
+	ip_link_add w1 type veth peer name w2
+	ip_link_set_master w1 br1
+	ip_link_set_up w1
+	bridge_vlan_add vid 10 dev w1
+	bridge_vlan_add vid 20 dev w1
+
+	# w2
+	simple_if_init w2
+	defer simple_if_fini w2
+
+	# w2.10
+	ip_link_add w2.10 master vw2 link w2 type vlan id 10
+	ip_link_set_up w2.10
+	ip_addr_add w2.10 "$ipv4_host"
+
+	# w2.20
+	ip_link_add w2.20 master vw2 link w2 type vlan id 20
+	ip_link_set_up w2.20
+	ip_addr_add w2.20 "$ipv6_host"
+}
+export -f ns_init_common
+
+ns2_create()
+{
+	# NS2
+	ip netns add ns2
+	defer ip netns del ns2
+
+	# v2$h2
+	ip link set dev "v2$h2" netns ns2
+	defer ip -n ns2 link set dev "v2$h2" netns 1
+
+	in_ns ns2 \
+	      ns_init_common ns2 "v2$h2" \
+			     192.0.2.34/28 2001:db8:2::2/64 \
+			     192.0.2.3/28  2001:db8:1::3/64
+}
+
+ns3_create()
+{
+	# NS3
+	ip netns add ns3
+	defer ip netns del ns3
+
+	# v2$h3
+	ip link set dev "v2$h3" netns ns3
+	defer ip -n ns3 link set dev "v2$h3" netns 1
+
+	ip -n ns3 link set dev "v2$h3" up
+
+	in_ns ns3 \
+	      ns_init_common ns3 "v2$h3" \
+			     192.0.2.66/28 2001:db8:3::2/64 \
+			     192.0.2.4/28  2001:db8:1::4/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	defer vrf_cleanup
+
+	forwarding_enable
+	defer forwarding_restore
+
+	ip_link_add "v1$h2" type veth peer name "v2$h2"
+	ip_link_add "v1$h3" type veth peer name "v2$h3"
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+	ns2_create
+	ns3_create
+}
+
+adf_install_broken_sg()
+{
+	adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+	mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+	defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+
+	mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+	defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+}
+
+adf_install_rx()
+{
+	mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR"
+	defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10
+
+	mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR"
+	defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10
+
+	mc_cli add "$swp2" :: "$GROUP6" "$IPMR"
+	defer mc_cli remove "$swp2" :: "$GROUP6" lo10
+
+	mc_cli add "$swp3" :: "$GROUP6" "$IPMR"
+	defer mc_cli remove "$swp3" :: "$GROUP6" lo10
+}
+
+adf_install_sg()
+{
+	adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+	mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3"
+	defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3"
+
+	mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+	defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+
+	adf_install_rx
+}
+
+adf_install_sg_sep()
+{
+	adf_mcd_start lo || exit "$EXIT_STATUS"
+
+	mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+	defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+	mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+	defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+}
+
+adf_install_sg_sep_rx()
+{
+	local lo=$1; shift
+
+	adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS"
+
+	mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+	defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+	mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+	defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+
+	adf_install_rx
+}
+
+adf_install_starg()
+{
+	adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+	mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+	defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+
+	mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+	defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+
+	adf_install_rx
+}
+
+do_packets_v4()
+{
+	local mac
+
+	mac=$(mac_get "$h2")
+	"$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+	    -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q
+}
+
+do_packets_v6()
+{
+	local mac
+
+	mac=$(mac_get "$h2")
+	"$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+	    -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q
+}
+
+do_test()
+{
+	local ipv=$1; shift
+	local expect_h2=$1; shift
+	local expect_h3=$1; shift
+	local what=$1; shift
+
+	local pref=$((100 + ipv))
+	local t0_h2
+	local t0_h3
+	local t1_h2
+	local t1_h3
+	local d_h2
+	local d_h3
+
+	RET=0
+
+	t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+	t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+	"do_packets_v$ipv"
+	sleep 1
+
+	t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+	t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+	d_h2=$((t1_h2 - t0_h2))
+	d_h3=$((t1_h3 - t0_h3))
+
+	((d_h2 == expect_h2))
+	check_err $? "Expected $expect_h2 packets on H2, got $d_h2"
+
+	((d_h3 == expect_h3))
+	check_err $? "Expected $expect_h3 packets on H3, got $d_h3"
+
+	log_test "VXLAN MC flood $what"
+}
+
+ipv4_do_test_rx()
+{
+	local h3_should_fail=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ping_do "$h1.10" 192.0.2.3
+	check_err $? "H2 should respond"
+
+	ping_do "$h1.10" 192.0.2.4
+	check_err_fail "$h3_should_fail" $? "H3 responds"
+
+	log_test "VXLAN MC flood $what"
+}
+
+ipv6_do_test_rx()
+{
+	local h3_should_fail=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ping6_do "$h1.20" 2001:db8:1::3
+	check_err $? "H2 should respond"
+
+	ping6_do "$h1.20" 2001:db8:1::4
+	check_err_fail "$h3_should_fail" $? "H3 responds"
+
+	log_test "VXLAN MC flood $what"
+}
+
+ipv4_nomcroute()
+{
+	# Install a misleading (S,G) rule to attempt to trick the system into
+	# pushing the packets elsewhere.
+	adf_install_broken_sg
+	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2"
+	do_test 4 10 0 "IPv4 nomcroute"
+}
+
+ipv6_nomcroute()
+{
+	# Like for IPv4, install a misleading (S,G).
+	adf_install_broken_sg
+	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+	do_test 6 10 0 "IPv6 nomcroute"
+}
+
+ipv4_nomcroute_rx()
+{
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2"
+	ipv4_do_test_rx 1 "IPv4 nomcroute ping"
+}
+
+ipv6_nomcroute_rx()
+{
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+	ipv6_do_test_rx 1 "IPv6 nomcroute ping"
+}
+
+ipv4_mcroute()
+{
+	adf_install_sg
+	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	do_test 4 10 10 "IPv4 mcroute"
+}
+
+ipv6_mcroute()
+{
+	adf_install_sg
+	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	do_test 6 10 10 "IPv6 mcroute"
+}
+
+ipv4_mcroute_rx()
+{
+	adf_install_sg
+	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	ipv4_do_test_rx 0 "IPv4 mcroute ping"
+}
+
+ipv6_mcroute_rx()
+{
+	adf_install_sg
+	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	ipv6_do_test_rx 0 "IPv6 mcroute ping"
+}
+
+ipv4_mcroute_changelink()
+{
+	adf_install_sg
+	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR"
+	ip link set dev vx10 type vxlan mcroute
+	sleep 1
+	do_test 4 10 10 "IPv4 mcroute changelink"
+}
+
+ipv6_mcroute_changelink()
+{
+	adf_install_sg
+	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	ip link set dev vx20 type vxlan mcroute
+	sleep 1
+	do_test 6 10 10 "IPv6 mcroute changelink"
+}
+
+ipv4_mcroute_starg()
+{
+	adf_install_starg
+	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	do_test 4 10 10 "IPv4 mcroute (*,G)"
+}
+
+ipv6_mcroute_starg()
+{
+	adf_install_starg
+	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	do_test 6 10 10 "IPv6 mcroute (*,G)"
+}
+
+ipv4_mcroute_starg_rx()
+{
+	adf_install_starg
+	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping"
+}
+
+ipv6_mcroute_starg_rx()
+{
+	adf_install_starg
+	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping"
+}
+
+ipv4_mcroute_noroute()
+{
+	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	do_test 4 0 0 "IPv4 mcroute, no route"
+}
+
+ipv6_mcroute_noroute()
+{
+	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	do_test 6 0 0 "IPv6 mcroute, no route"
+}
+
+ipv4_mcroute_fdb()
+{
+	adf_install_sg
+	vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute
+	bridge fdb add dev vx10 \
+		00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR"
+	do_test 4 10 10 "IPv4 mcroute FDB"
+}
+
+ipv6_mcroute_fdb()
+{
+	adf_install_sg
+	vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute
+	bridge -6 fdb add dev vx20 \
+		00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR"
+	do_test 6 10 10 "IPv6 mcroute FDB"
+}
+
+# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup.
+ipv4_mcroute_fdb_oif0()
+{
+	adf_install_sg
+	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	bridge fdb del dev vx10 00:00:00:00:00:00
+	bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+	do_test 4 10 10 "IPv4 mcroute oif=0"
+}
+
+ipv6_mcroute_fdb_oif0()
+{
+	# The IPv6 tunnel lookup does not fall back to selection by source
+	# address. Instead it just does a FIB match, and that would find one of
+	# the several ff00::/8 multicast routes -- each device has one. In order
+	# to reliably force the $IPMR device, add a /128 route for the
+	# destination group address.
+	ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR"
+	defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR"
+
+	adf_install_sg
+	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	bridge -6 fdb del dev vx20 00:00:00:00:00:00
+	bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6"
+	do_test 6 10 10 "IPv6 mcroute oif=0"
+}
+
+# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR.
+# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT.
+ipv4_mcroute_fdb_oif0_sep()
+{
+	adf_install_sg_sep
+
+	ip_addr_add lo 192.0.2.120/28
+	vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+	bridge fdb del dev vx10 00:00:00:00:00:00
+	bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+	do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0"
+}
+
+ipv4_mcroute_fdb_oif0_sep_rx()
+{
+	adf_install_sg_sep_rx lo
+
+	ip_addr_add lo 192.0.2.120/28
+	vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+	bridge fdb del dev vx10 00:00:00:00:00:00
+	bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+	ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping"
+}
+
+ipv4_mcroute_fdb_sep_rx()
+{
+	adf_install_sg_sep_rx lo
+
+	ip_addr_add lo 192.0.2.120/28
+	vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+	bridge fdb del dev vx10 00:00:00:00:00:00
+	bridge fdb add \
+	       dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo
+	ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping"
+}
+
+ipv6_mcroute_fdb_sep_rx()
+{
+	adf_install_sg_sep_rx "X$IPMR"
+
+	ip_addr_add "X$IPMR" 2001:db8:5::1/64
+	vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute
+	bridge -6 fdb del dev vx20 00:00:00:00:00:00
+	bridge -6 fdb add dev vx20 00:00:00:00:00:00 \
+			  self static dst "$GROUP6" via "X$IPMR"
+	ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait "$NUM_NETIFS"
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 006fdadcc4b9..ff0dbe23e8e0 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -547,13 +547,19 @@ ip_link_set_addr()
 	defer ip link set dev "$name" address "$old_addr"
 }
 
-ip_link_is_up()
+ip_link_has_flag()
 {
 	local name=$1; shift
+	local flag=$1; shift
 
 	local state=$(ip -j link show "$name" |
-		      jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"')
-	[[ $state == "UP" ]]
+		      jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)')
+	[[ $state == true ]]
+}
+
+ip_link_is_up()
+{
+	ip_link_has_flag "$1" UP
 }
 
 ip_link_set_up()
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index cd8a58097448..1f5227f3d64d 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len)
 	name[0] = '\0';
 	rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
 	if (rc < 0)
-		log_err_errno("setsockopt(SO_BINDTODEVICE)");
+		log_err_errno("getsockopt(SO_BINDTODEVICE)");
 
 	return rc;
 }
@@ -535,7 +535,7 @@ static int set_freebind(int sd, int version)
 		break;
 	case AF_INET6:
 		if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) {
-			log_err_errno("setsockopt(IPV6_FREEBIND");
+			log_err_errno("setsockopt(IPV6_FREEBIND)");
 			rc = -1;
 		}
 		break;
@@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str,
 			sep++;
 			if (str_to_uint(sep, 1, pfx_len_max,
 					&args->prefix_len) != 0) {
-				fprintf(stderr, "Invalid port\n");
+				fprintf(stderr, "Invalid prefix length\n");
 				return 1;
 			}
 		} else {
@@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen,
 		}
 	}
 
-	nfds = interactive ? MAX(fileno(stdin), sd)  + 1 : sd + 1;
+	nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
 	while (1) {
 		FD_ZERO(&rfds);
 		FD_SET(sd, &rfds);
@@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args)
 	sd = socket(args->version, args->type, args->protocol);
 	if (sd < 0) {
 		log_err_errno("Error opening socket");
-		return  -1;
+		return -1;
 	}
 
 	if (set_reuseaddr(sd) != 0)
@@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
 	 * waiting to be told when to continue
 	 */
 	if (read(fd, &buf, sizeof(buf)) <= 0) {
-		log_err_errno("Failed to read IPC status from status");
+		log_err_errno("Failed to read IPC status from pipe");
 		return 1;
 	}
 	if (!buf) {
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index beaee5e4e2aa..c9109627a741 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -2,8 +2,9 @@
 # SPDX-License-Identifier: GPL-2.0
 
 import time
+from os import system
 from lib.py import ksft_run, ksft_exit, ksft_pr
-from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
+from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait
 from lib.py import NetdevFamily, NetdevSimDev, ip
 
 
@@ -34,6 +35,39 @@ def napi_list_check(nf) -> None:
                 ksft_eq(len(napis), 100,
                         comment=f"queue count after reset queue {q} mode {i}")
 
+def dev_set_threaded(nf) -> None:
+    """
+    Test that verifies various cases of napi threaded
+    set and unset at device level using sysfs.
+    """
+    with NetdevSimDev(queue_count=2) as nsimdev:
+        nsim = nsimdev.nsims[0]
+
+        ip(f"link set dev {nsim.ifname} up")
+
+        napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+        ksft_eq(len(napis), 2)
+
+        napi0_id = napis[0]['id']
+        napi1_id = napis[1]['id']
+
+        # set threaded
+        system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+        # check napi threaded is set for both napis
+        napi0 = nf.napi_get({'id': napi0_id})
+        ksft_ne(napi0.get('pid'), None)
+        napi1 = nf.napi_get({'id': napi1_id})
+        ksft_ne(napi1.get('pid'), None)
+
+        # unset threaded
+        system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+        # check napi threaded is unset for both napis
+        napi0 = nf.napi_get({'id': napi0_id})
+        ksft_eq(napi0.get('pid'), None)
+        napi1 = nf.napi_get({'id': napi1_id})
+        ksft_eq(napi1.get('pid'), None)
 
 def nsim_rxq_reset_down(nf) -> None:
     """
@@ -122,7 +156,7 @@ def page_pool_check(nf) -> None:
 def main() -> None:
     nf = NetdevFamily()
     ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
-              nsim_rxq_reset_down],
+              dev_set_threaded, nsim_rxq_reset_down],
              args=(nf, ))
     ksft_exit()
 
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index ef8b25a606d8..c5b01e1bd4c7 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -39,11 +39,15 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
 	# xfail tests that are known flaky with dbg config, not fixable.
 	# still run them for coverage (and expect 100% pass without dbg).
 	declare -ar xfail_list=(
+		"tcp_blocking_blocking-connect.pkt"
+		"tcp_blocking_blocking-read.pkt"
 		"tcp_eor_no-coalesce-retrans.pkt"
 		"tcp_fast_recovery_prr-ss.*.pkt"
+		"tcp_sack_sack-route-refresh-ip-tos.pkt"
 		"tcp_slow_start_slow-start-after-win-update.pkt"
 		"tcp_timestamping.*.pkt"
 		"tcp_user_timeout_user-timeout-probe.pkt"
+		"tcp_zerocopy_cl.*.pkt"
 		"tcp_zerocopy_epoll_.*.pkt"
 		"tcp_tcp_info_tcp-info-.*-limited.pkt"
 	)
diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh
new file mode 100755
index 000000000000..3f9780232bd6
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink_notification.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking rtnetlink notification callpaths, and get as much
+# coverage as possible.
+#
+# set -e
+
+ALL_TESTS="
+	kci_test_mcast_addr_notification
+	kci_test_anycast_addr_notification
+"
+
+source lib.sh
+test_dev="test-dummy1"
+
+kci_test_mcast_addr_notification()
+{
+	RET=0
+	local tmpfile
+	local monitor_pid
+	local match_result
+
+	tmpfile=$(mktemp)
+	defer rm "$tmpfile"
+
+	ip monitor maddr > $tmpfile &
+	monitor_pid=$!
+	defer kill_process "$monitor_pid"
+
+	sleep 1
+
+	if [ ! -e "/proc/$monitor_pid" ]; then
+		RET=$ksft_skip
+		log_test "mcast addr notification: iproute2 too old"
+		return $RET
+	fi
+
+	ip link add name "$test_dev" type dummy
+	check_err $? "failed to add dummy interface"
+	ip link set "$test_dev" up
+	check_err $? "failed to set dummy interface up"
+	ip link del dev "$test_dev"
+	check_err $? "Failed to delete dummy interface"
+	sleep 1
+
+	# There should be 4 line matches as follows.
+	# 13: test-dummy1    inet6 mcast ff02::1 scope global 
+	# 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+	# Deleted 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+	# Deleted 13: test-dummy1    inet6 mcast ff02::1 scope global 
+	match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile")
+	if [ "$match_result" -ne 4 ]; then
+		RET=$ksft_fail
+	fi
+	log_test "mcast addr notification: Expected 4 matches, got $match_result"
+	return $RET
+}
+
+kci_test_anycast_addr_notification()
+{
+	RET=0
+	local tmpfile
+	local monitor_pid
+	local match_result
+
+	tmpfile=$(mktemp)
+	defer rm "$tmpfile"
+
+	ip monitor acaddress > "$tmpfile" &
+	monitor_pid=$!
+	defer kill_process "$monitor_pid"
+	sleep 1
+
+	if [ ! -e "/proc/$monitor_pid" ]; then
+		RET=$ksft_skip
+		log_test "anycast addr notification: iproute2 too old"
+		return "$RET"
+	fi
+
+	ip link add name "$test_dev" type dummy
+	check_err $? "failed to add dummy interface"
+	ip link set "$test_dev" up
+	check_err $? "failed to set dummy interface up"
+	sysctl -qw net.ipv6.conf."$test_dev".forwarding=1
+	ip link del dev "$test_dev"
+	check_err $? "Failed to delete dummy interface"
+	sleep 1
+
+	# There should be 2 line matches as follows.
+	# 9: dummy2    inet6 any fe80:: scope global
+	# Deleted 9: dummy2    inet6 any fe80:: scope global
+	match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile")
+	if [ "$match_result" -ne 2 ]; then
+		RET=$ksft_fail
+	fi
+	log_test "anycast addr notification: Expected 2 matches, got $match_result"
+	return "$RET"
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+	RET=$ksft_skip
+	log_test "need root privileges"
+	exit $RET
+fi
+
+require_command ip
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
index 4b86040c58c6..bedf0ce885c2 100755
--- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -72,6 +72,9 @@
 # Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
 # the selftest network.
 #
+# In addition, every router interface connecting rt-x to rt-y is assigned an
+# IPv6 link-local address fe80::x:y/64.
+#
 # Local SID/C-SID table
 # =====================
 #
@@ -521,6 +524,9 @@ setup_rt_networking()
 		ip -netns "${nsname}" addr \
 			add "${net_prefix}::${rt}/64" dev "${devname}" nodad
 
+		ip -netns "${nsname}" addr \
+			add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad
+
 		ip -netns "${nsname}" link set "${devname}" up
 	done
 
@@ -609,6 +615,27 @@ set_end_x_nextcsid()
 		nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
 }
 
+set_end_x_ll_nextcsid()
+{
+	local rt="$1"
+	local adj="$2"
+
+	eval nsname=\${$(get_rtname "${rt}")}
+	lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+	nh6_ll_addr="fe80::${adj}:${rt}"
+	oifname="veth-rt-${rt}-${adj}"
+
+	# enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop
+	# address (note that "dev" is the dummy dum0 device chosen for the sake
+	# of simplicity).
+	ip -netns "${nsname}" -6 route \
+		replace "${lcnode_func_prefix}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End.X nh6 "${nh6_ll_addr}" \
+		oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \
+		nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
+}
+
 set_underlay_sids_reachability()
 {
 	local rt="$1"
@@ -1016,6 +1043,27 @@ host_vpn_tests()
 
 	check_and_log_hs_ipv4_connectivity 1 2
 	check_and_log_hs_ipv4_connectivity 2 1
+
+	# Setup the adjacencies in the SRv6 aware routers using IPv6 link-local
+	# addresses.
+	# - rt-3 SRv6 End.X adjacency with rt-4
+	# - rt-4 SRv6 End.X adjacency with rt-1
+	set_end_x_ll_nextcsid 3 4
+	set_end_x_ll_nextcsid 4 1
+
+	log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local"
+
+	check_and_log_hs_ipv6_connectivity 1 2
+	check_and_log_hs_ipv6_connectivity 2 1
+
+	log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local"
+
+	check_and_log_hs_ipv4_connectivity 1 2
+	check_and_log_hs_ipv4_connectivity 2 1
+
+	# Restore the previous adjacencies.
+	set_end_x_nextcsid 3 4
+	set_end_x_nextcsid 4 1
 }
 
 __nextcsid_end_x_behavior_test()
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
index f00245263b20..6478da6a71c3 100644
--- a/tools/testing/selftests/net/tcp_ao/seq-ext.c
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Check that after SEQ number wrap-around:
  * 1. SEQ-extension has upper bytes set
- * 2. TCP conneciton is alive and no TCPAOBad segments
+ * 2. TCP connection is alive and no TCPAOBad segments
  * In order to test (2), the test doesn't just adjust seq number for a queue
  * on a connected socket, but migrates it to another sk+port number, so
  * that there won't be any delayed packets that will fail to verify
diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
index 6127a78ee988..8deacc565afa 100755
--- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -146,18 +146,17 @@ run_cmd()
 }
 
 check_hv_connectivity() {
-	ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
-	sleep 1
-	ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
+	slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
+	slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
 
 	return $?
 }
 
 check_vm_connectivity() {
-	run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
+	slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
 	log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
 
-	run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
+	slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
 	log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
 }
 
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index e9c2f71da207..ce34cb2e6e0b 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -275,7 +275,7 @@ setup_sym()
 
 
 	# Wait for ip config to settle
-	sleep 2
+	slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
 }
 
 setup_asym()
@@ -370,7 +370,7 @@ setup_asym()
 	ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
 
 	# Wait for ip config to settle
-	sleep 2
+	slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
 }
 
 check_connectivity()