From 23c44f6c83257923b179461694edcf62749bedd5 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 26 Nov 2024 19:13:31 -0800
Subject: perf tools: Fix build-id event recording

The build-id events written at the end of the record session are broken
due to unexpected data.  The write_buildid() writes the fixed length
event first and then variable length filename.

But a recent change made it write more data in the padding area
accidentally.  So readers of the event see zero-filled data for the
next entry and treat it incorrectly.  This resulted in wrong kernel
symbols because the kernel DSO loaded a random vmlinux image in the
path as it didn't have a valid build-id.

Fixes: ae39ba16554e ("perf inject: Fix build ID injection")
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/Z0aRFFW9xMh3mqKB@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/build-id.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 8982f68e7230..e763e8d99a43 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -277,7 +277,7 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid
 	struct perf_record_header_build_id b;
 	size_t len;
 
-	len = sizeof(b) + name_len + 1;
+	len = name_len + 1;
 	len = PERF_ALIGN(len, sizeof(u64));
 
 	memset(&b, 0, sizeof(b));
@@ -286,7 +286,7 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid
 	misc |= PERF_RECORD_MISC_BUILD_ID_SIZE;
 	b.pid = pid;
 	b.header.misc = misc;
-	b.header.size = len;
+	b.header.size = sizeof(b) + len;
 
 	err = do_write(fd, &b, sizeof(b));
 	if (err < 0)
-- 
cgit v1.2.3


From f54cd8f43f55c0274c5b51509aff39675639c2e1 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@linaro.org>
Date: Fri, 29 Nov 2024 15:19:48 +0000
Subject: perf test: Don't signal all processes on system when interrupting
 tests

This signal handler loops over all tests on ctrl-C, but it's active
while the test list is being constructed. process.pid is 0, then -1,
then finally set to the child pid on fork. If the Ctrl-C is received
during this point a kill(-1, SIGINT) can be sent which affects all
processes.

Make sure the child has forked first before forwarding the signal. This
can be reproduced with ctrl-C immediately after launching perf test
which terminates the ssh connection.

Fixes: 553d5efeb341 ("perf test: Add a signal handler to kill forked child processes")
Signed-off-by: James Clark <james.clark@linaro.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20241129151948.3199732-1-james.clark@linaro.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/tests/builtin-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 8dcf74d3c0a3..4751dd3c6f67 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -508,7 +508,7 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
 		for (size_t x = 0; x < num_tests; x++) {
 			struct child_test *child_test = child_tests[x];
 
-			if (!child_test)
+			if (!child_test || child_test->process.pid <= 0)
 				continue;
 
 			pr_debug3("Killing %d pid %d\n",
-- 
cgit v1.2.3


From 88a6e2f67cc94f751a74409ab4c21e5fc8ea6757 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@kernel.org>
Date: Tue, 26 Nov 2024 11:47:25 -0300
Subject: perf machine: Initialize machine->env to address a segfault

Its used from trace__run(), for the 'perf trace' live mode, i.e. its
strace-like, non-perf.data file processing mode, the most common one.

The trace__run() function will set trace->host using machine__new_host()
that is supposed to give a machine instance representing the running
machine, and since we'll use perf_env__arch_strerrno() to get the right
errno -> string table, we need to use machine->env, so initialize it in
machine__new_host().

Before the patch:

  (gdb) run trace --errno-summary -a sleep 1
  <SNIP>
   Summary of events:

   gvfs-afc-volume (3187), 2 events, 0.0%

     syscall            calls  errors  total       min       avg       max       stddev
                                       (msec)    (msec)    (msec)    (msec)        (%)
     --------------- --------  ------ -------- --------- --------- ---------     ------
     pselect6               1      0     0.000     0.000     0.000     0.000      0.00%

   GUsbEventThread (3519), 2 events, 0.0%

     syscall            calls  errors  total       min       avg       max       stddev
                                       (msec)    (msec)    (msec)    (msec)        (%)
     --------------- --------  ------ -------- --------- --------- ---------     ------
     poll                   1      0     0.000     0.000     0.000     0.000      0.00%
  <SNIP>
  Program received signal SIGSEGV, Segmentation fault.
  0x00000000005caba0 in perf_env__arch_strerrno (env=0x0, err=110) at util/env.c:478
  478		if (env->arch_strerrno == NULL)
  (gdb) bt
  #0  0x00000000005caba0 in perf_env__arch_strerrno (env=0x0, err=110) at util/env.c:478
  #1  0x00000000004b75d2 in thread__dump_stats (ttrace=0x14f58f0, trace=0x7fffffffa5b0, fp=0x7ffff6ff74e0 <_IO_2_1_stderr_>) at builtin-trace.c:4673
  #2  0x00000000004b78bf in trace__fprintf_thread (fp=0x7ffff6ff74e0 <_IO_2_1_stderr_>, thread=0x10fa0b0, trace=0x7fffffffa5b0) at builtin-trace.c:4708
  #3  0x00000000004b7ad9 in trace__fprintf_thread_summary (trace=0x7fffffffa5b0, fp=0x7ffff6ff74e0 <_IO_2_1_stderr_>) at builtin-trace.c:4747
  #4  0x00000000004b656e in trace__run (trace=0x7fffffffa5b0, argc=2, argv=0x7fffffffde60) at builtin-trace.c:4456
  #5  0x00000000004ba43e in cmd_trace (argc=2, argv=0x7fffffffde60) at builtin-trace.c:5487
  #6  0x00000000004c0414 in run_builtin (p=0xec3068 <commands+648>, argc=5, argv=0x7fffffffde60) at perf.c:351
  #7  0x00000000004c06bb in handle_internal_command (argc=5, argv=0x7fffffffde60) at perf.c:404
  #8  0x00000000004c0814 in run_argv (argcp=0x7fffffffdc4c, argv=0x7fffffffdc40) at perf.c:448
  #9  0x00000000004c0b5d in main (argc=5, argv=0x7fffffffde60) at perf.c:560
  (gdb)

After:

  root@number:~# perf trace -a --errno-summary sleep 1
  <SNIP>
     pw-data-loop (2685), 1410 events, 16.0%

     syscall            calls  errors  total       min       avg       max       stddev
                                       (msec)    (msec)    (msec)    (msec)        (%)
     --------------- --------  ------ -------- --------- --------- ---------     ------
     epoll_wait           188      0   983.428     0.000     5.231    15.595      8.68%
     ioctl                 94      0     0.811     0.004     0.009     0.016      2.82%
     read                 188      0     0.322     0.001     0.002     0.006      5.15%
     write                141      0     0.280     0.001     0.002     0.018      8.39%
     timerfd_settime       94      0     0.138     0.001     0.001     0.007      6.47%

   gnome-control-c (179406), 1848 events, 20.9%

     syscall            calls  errors  total       min       avg       max       stddev
                                       (msec)    (msec)    (msec)    (msec)        (%)
     --------------- --------  ------ -------- --------- --------- ---------     ------
     poll                 222      0   959.577     0.000     4.322    21.414     11.40%
     recvmsg              150      0     0.539     0.001     0.004     0.013      5.12%
     write                300      0     0.442     0.001     0.001     0.007      3.29%
     read                 150      0     0.183     0.001     0.001     0.009      5.53%
     getpid               102      0     0.101     0.000     0.001     0.008      7.82%

  root@number:~#

Fixes: 54373b5d53c1f6aa ("perf env: Introduce perf_env__arch_strerrno()")
Reported-by: Veronika Molnarova <vmolnaro@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Veronika Molnarova <vmolnaro@redhat.com>
Acked-by: Michael Petlan <mpetlan@redhat.com>
Tested-by: Michael Petlan <mpetlan@redhat.com>
Link: https://lore.kernel.org/r/Z0XffUgNSv_9OjOi@x1
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/machine.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 4f0ac998b0cc..27d5345d2b30 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -134,6 +134,8 @@ struct machine *machine__new_host(void)
 
 		if (machine__create_kernel_maps(machine) < 0)
 			goto out_delete;
+
+		machine->env = &perf_env;
 	}
 
 	return machine;
-- 
cgit v1.2.3


From 793baff3f24f16dab9061045e23eea67724feae6 Mon Sep 17 00:00:00 2001
From: Honglei Wang <jameshongleiwang@126.com>
Date: Fri, 29 Nov 2024 17:10:03 +0800
Subject: sched_ext: Add __weak to fix the build errors

commit 5cbb302880f5 ("sched_ext: Rename
scx_bpf_dispatch[_vtime]_from_dsq*() -> scx_bpf_dsq_move[_vtime]*()")
introduced several new functions which caused compilation errors when
compiled with clang.

Let's fix this by adding __weak markers.

Signed-off-by: Honglei Wang <jameshongleiwang@126.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 5cbb302880f5 ("sched_ext: Rename scx_bpf_dispatch[_vtime]_from_dsq*() -> scx_bpf_dsq_move[_vtime]*()")
Acked-by: Andrii Nakryiko <andrii@kernel.org>
---
 tools/sched_ext/include/scx/common.bpf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index 2f36b7b6418d..625f5b046776 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -40,9 +40,9 @@ void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_fl
 void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
 u32 scx_bpf_dispatch_nr_slots(void) __ksym;
 void scx_bpf_dispatch_cancel(void) __ksym;
-bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym;
-void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym;
-void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym;
+bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym __weak;
+void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
+void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
 bool scx_bpf_dsq_move(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
 bool scx_bpf_dsq_move_vtime(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
 u32 scx_bpf_reenqueue_local(void) __ksym;
-- 
cgit v1.2.3


From ef7009decc30eb2515a64253791d61b72229c119 Mon Sep 17 00:00:00 2001
From: Ihor Solodrai <ihor.solodrai@pm.me>
Date: Thu, 21 Nov 2024 21:40:17 +0000
Subject: selftests/sched_ext: fix build after renames in sched_ext API

The selftests are falining to build on current tip of bpf-next and
sched_ext [1]. This has broken BPF CI [2] after merge from upstream.

Use appropriate function names in the selftests according to the
recent changes in the sched_ext API [3].

[1] https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=fc39fb56917bb3cb53e99560ca3612a84456ada2
[2] https://github.com/kernel-patches/bpf/actions/runs/11959327258/job/33340923745
[3] https://lore.kernel.org/all/20241109194853.580310-1-tj@kernel.org/

Signed-off-by: Ihor Solodrai <ihor.solodrai@pm.me>
Acked-by: Andrea Righi <arighi@nvidia.com>
Acked-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c       | 2 +-
 tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c      | 4 ++--
 tools/testing/selftests/sched_ext/dsp_local_on.bpf.c              | 2 +-
 tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c      | 2 +-
 tools/testing/selftests/sched_ext/exit.bpf.c                      | 4 ++--
 tools/testing/selftests/sched_ext/maximal.bpf.c                   | 4 ++--
 tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c            | 2 +-
 tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c | 2 +-
 tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c       | 2 +-
 .../testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c | 2 +-
 .../testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c | 4 ++--
 tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c          | 8 ++++----
 12 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c
index 37d9bf6fb745..6f4c3f5a1c5d 100644
--- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c
+++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c
@@ -20,7 +20,7 @@ s32 BPF_STRUCT_OPS(ddsp_bogus_dsq_fail_select_cpu, struct task_struct *p,
 		 * If we dispatch to a bogus DSQ that will fall back to the
 		 * builtin global DSQ, we fail gracefully.
 		 */
-		scx_bpf_dispatch_vtime(p, 0xcafef00d, SCX_SLICE_DFL,
+		scx_bpf_dsq_insert_vtime(p, 0xcafef00d, SCX_SLICE_DFL,
 				       p->scx.dsq_vtime, 0);
 		return cpu;
 	}
diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c
index dffc97d9cdf1..e4a55027778f 100644
--- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c
+++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c
@@ -17,8 +17,8 @@ s32 BPF_STRUCT_OPS(ddsp_vtimelocal_fail_select_cpu, struct task_struct *p,
 
 	if (cpu >= 0) {
 		/* Shouldn't be allowed to vtime dispatch to a builtin DSQ. */
-		scx_bpf_dispatch_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL,
-				       p->scx.dsq_vtime, 0);
+		scx_bpf_dsq_insert_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL,
+					 p->scx.dsq_vtime, 0);
 		return cpu;
 	}
 
diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
index 6a7db1502c29..6325bf76f47e 100644
--- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
+++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
@@ -45,7 +45,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev)
 
 	target = bpf_get_prandom_u32() % nr_cpus;
 
-	scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0);
+	scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0);
 	bpf_task_release(p);
 }
 
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
index 1efb50d61040..a7cf868d5e31 100644
--- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
+++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
@@ -31,7 +31,7 @@ void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p,
 	/* Can only call from ops.select_cpu() */
 	scx_bpf_select_cpu_dfl(p, 0, 0, &found);
 
-	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+	scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
 }
 
 SEC(".struct_ops.link")
diff --git a/tools/testing/selftests/sched_ext/exit.bpf.c b/tools/testing/selftests/sched_ext/exit.bpf.c
index d75d4faf07f6..4bc36182d3ff 100644
--- a/tools/testing/selftests/sched_ext/exit.bpf.c
+++ b/tools/testing/selftests/sched_ext/exit.bpf.c
@@ -33,7 +33,7 @@ void BPF_STRUCT_OPS(exit_enqueue, struct task_struct *p, u64 enq_flags)
 	if (exit_point == EXIT_ENQUEUE)
 		EXIT_CLEANLY();
 
-	scx_bpf_dispatch(p, DSQ_ID, SCX_SLICE_DFL, enq_flags);
+	scx_bpf_dsq_insert(p, DSQ_ID, SCX_SLICE_DFL, enq_flags);
 }
 
 void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p)
@@ -41,7 +41,7 @@ void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p)
 	if (exit_point == EXIT_DISPATCH)
 		EXIT_CLEANLY();
 
-	scx_bpf_consume(DSQ_ID);
+	scx_bpf_dsq_move_to_local(DSQ_ID);
 }
 
 void BPF_STRUCT_OPS(exit_enable, struct task_struct *p)
diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c
index 4d4cd8d966db..4c005fa71810 100644
--- a/tools/testing/selftests/sched_ext/maximal.bpf.c
+++ b/tools/testing/selftests/sched_ext/maximal.bpf.c
@@ -20,7 +20,7 @@ s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu,
 
 void BPF_STRUCT_OPS(maximal_enqueue, struct task_struct *p, u64 enq_flags)
 {
-	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+	scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
 }
 
 void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags)
@@ -28,7 +28,7 @@ void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags)
 
 void BPF_STRUCT_OPS(maximal_dispatch, s32 cpu, struct task_struct *prev)
 {
-	scx_bpf_consume(SCX_DSQ_GLOBAL);
+	scx_bpf_dsq_move_to_local(SCX_DSQ_GLOBAL);
 }
 
 void BPF_STRUCT_OPS(maximal_runnable, struct task_struct *p, u64 enq_flags)
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c
index f171ac470970..13d0f5be788d 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c
@@ -30,7 +30,7 @@ void BPF_STRUCT_OPS(select_cpu_dfl_enqueue, struct task_struct *p,
 	}
 	scx_bpf_put_idle_cpumask(idle_mask);
 
-	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+	scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
 }
 
 SEC(".struct_ops.link")
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c
index 9efdbb7da928..815f1d5d61ac 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c
@@ -67,7 +67,7 @@ void BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_enqueue, struct task_struct *p,
 		saw_local = true;
 	}
 
-	scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, enq_flags);
+	scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, enq_flags);
 }
 
 s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_init_task,
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c
index 59bfc4f36167..4bb99699e920 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c
@@ -29,7 +29,7 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_select_cpu, struct task_struct *p,
 	cpu = prev_cpu;
 
 dispatch:
-	scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, 0);
+	scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, 0);
 	return cpu;
 }
 
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c
index 3bbd5fcdfb18..2a75de11b2cf 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c
@@ -18,7 +18,7 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_select_cpu, struct task_struct *p
 		   s32 prev_cpu, u64 wake_flags)
 {
 	/* Dispatching to a random DSQ should fail. */
-	scx_bpf_dispatch(p, 0xcafef00d, SCX_SLICE_DFL, 0);
+	scx_bpf_dsq_insert(p, 0xcafef00d, SCX_SLICE_DFL, 0);
 
 	return prev_cpu;
 }
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c
index 0fda57fe0ecf..99d075695c97 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c
@@ -18,8 +18,8 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_select_cpu, struct task_struct *p
 		   s32 prev_cpu, u64 wake_flags)
 {
 	/* Dispatching twice in a row is disallowed. */
-	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
-	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
+	scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
+	scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
 
 	return prev_cpu;
 }
diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c
index e6c67bcf5e6e..bfcb96cd4954 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c
@@ -2,8 +2,8 @@
 /*
  * A scheduler that validates that enqueue flags are properly stored and
  * applied at dispatch time when a task is directly dispatched from
- * ops.select_cpu(). We validate this by using scx_bpf_dispatch_vtime(), and
- * making the test a very basic vtime scheduler.
+ * ops.select_cpu(). We validate this by using scx_bpf_dsq_insert_vtime(),
+ * and making the test a very basic vtime scheduler.
  *
  * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
  * Copyright (c) 2024 David Vernet <dvernet@meta.com>
@@ -47,13 +47,13 @@ s32 BPF_STRUCT_OPS(select_cpu_vtime_select_cpu, struct task_struct *p,
 	cpu = prev_cpu;
 	scx_bpf_test_and_clear_cpu_idle(cpu);
 ddsp:
-	scx_bpf_dispatch_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0);
+	scx_bpf_dsq_insert_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0);
 	return cpu;
 }
 
 void BPF_STRUCT_OPS(select_cpu_vtime_dispatch, s32 cpu, struct task_struct *p)
 {
-	if (scx_bpf_consume(VTIME_DSQ))
+	if (scx_bpf_dsq_move_to_local(VTIME_DSQ))
 		consumed = true;
 }
 
-- 
cgit v1.2.3


From f24d192985cbd6782850fdbb3839039da2f0ee76 Mon Sep 17 00:00:00 2001
From: guanjing <guanjing@cmss.chinamobile.com>
Date: Sun, 17 Nov 2024 10:51:29 +0800
Subject: sched_ext: fix application of sizeof to pointer

sizeof when applied to a pointer typed expression gives the size of
the pointer.

The proper fix in this particular case is to code sizeof(*cpuset)
instead of sizeof(cpuset).

This issue was detected with the help of Coccinelle.

Fixes: 22a920209ab6 ("sched_ext: Implement tickless support")
Signed-off-by: guanjing <guanjing@cmss.chinamobile.com>
Acked-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/sched_ext/scx_central.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c
index 21deea320bd7..e938156ed0a0 100644
--- a/tools/sched_ext/scx_central.c
+++ b/tools/sched_ext/scx_central.c
@@ -97,7 +97,7 @@ restart:
 	SCX_BUG_ON(!cpuset, "Failed to allocate cpuset");
 	CPU_ZERO(cpuset);
 	CPU_SET(skel->rodata->central_cpu, cpuset);
-	SCX_BUG_ON(sched_setaffinity(0, sizeof(cpuset), cpuset),
+	SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset),
 		   "Failed to affinitize to central CPU %d (max %d)",
 		   skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1);
 	CPU_FREE(cpuset);
-- 
cgit v1.2.3


From 5fc3a088ee2de55a6b39b7ee18484e01f377ab8a Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 2 Dec 2024 19:53:39 -0800
Subject: tools headers: Sync uapi/drm/drm.h with the kernel sources

To pick up the changes in this cset:

  56c594d8df64e726 ("drm: add DRM_SET_CLIENT_NAME ioctl")

This addresses these perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h

Please see tools/include/uapi/README for further details.

Reviewed-by: James Clark <james.clark@linaro.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: dri-devel@lists.freedesktop.org
Link: https://lore.kernel.org/r/20241203035349.1901262-2-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/include/uapi/drm/drm.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 16122819edfe..7fba37b94401 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -1024,6 +1024,13 @@ struct drm_crtc_queue_sequence {
 	__u64 user_data;	/* user data passed to event */
 };
 
+#define DRM_CLIENT_NAME_MAX_LEN		64
+struct drm_set_client_name {
+	__u64 name_len;
+	__u64 name;
+};
+
+
 #if defined(__cplusplus)
 }
 #endif
@@ -1288,6 +1295,16 @@ extern "C" {
  */
 #define DRM_IOCTL_MODE_CLOSEFB		DRM_IOWR(0xD0, struct drm_mode_closefb)
 
+/**
+ * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file
+ *
+ * Having a name allows for easier tracking and debugging.
+ * The length of the name (without null ending char) must be
+ * <= DRM_CLIENT_NAME_MAX_LEN.
+ * The call will fail if the name contains whitespaces or non-printable chars.
+ */
+#define DRM_IOCTL_SET_CLIENT_NAME	DRM_IOWR(0xD1, struct drm_set_client_name)
+
 /*
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
-- 
cgit v1.2.3


From 5229df8fb6796ff4aba39b16cf617028fa5d94b7 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 2 Dec 2024 19:53:40 -0800
Subject: tools headers: Sync uapi/linux/perf_event.h with the kernel sources

To pick up the changes in this cset:

  18d92bb57c39504d ("perf/core: Add aux_pause, aux_resume, aux_start_paused")

This addresses these perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h

Please see tools/include/uapi/README for further details.

Reviewed-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20241203035349.1901262-3-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/include/uapi/linux/perf_event.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 4842c36fdf80..0524d541d4e3 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -511,7 +511,16 @@ struct perf_event_attr {
 	__u16	sample_max_stack;
 	__u16	__reserved_2;
 	__u32	aux_sample_size;
-	__u32	__reserved_3;
+
+	union {
+		__u32	aux_action;
+		struct {
+			__u32	aux_start_paused :  1, /* start AUX area tracing paused */
+				aux_pause        :  1, /* on overflow, pause AUX area tracing */
+				aux_resume       :  1, /* on overflow, resume AUX area tracing */
+				__reserved_3     : 29;
+		};
+	};
 
 	/*
 	 * User provided data if sigtrap=1, passed back to user via
-- 
cgit v1.2.3


From e2064b7c5d82651a418b86078597e51dea3f3123 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 2 Dec 2024 19:53:41 -0800
Subject: tools headers: Sync uapi/linux/kvm.h with the kernel sources

To pick up the changes in this cset:

  e785dfacf7e7fe94 ("LoongArch: KVM: Add PCHPIC device support")
  2e8b9df82631e714 ("LoongArch: KVM: Add EIOINTC device support")
  c532de5a67a70f85 ("LoongArch: KVM: Add IPI device support")

This addresses these perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h

Please see tools/include/uapi/README for further details.

Reviewed-by: James Clark <james.clark@linaro.org>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
Cc: Bibo Mao <maobibo@loongson.cn>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: kvm@vger.kernel.org
Cc: loongarch@lists.linux.dev
Link: https://lore.kernel.org/r/20241203035349.1901262-4-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/include/uapi/linux/kvm.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 637efc055145..502ea63b5d2e 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1158,7 +1158,15 @@ enum kvm_device_type {
 #define KVM_DEV_TYPE_ARM_PV_TIME	KVM_DEV_TYPE_ARM_PV_TIME
 	KVM_DEV_TYPE_RISCV_AIA,
 #define KVM_DEV_TYPE_RISCV_AIA		KVM_DEV_TYPE_RISCV_AIA
+	KVM_DEV_TYPE_LOONGARCH_IPI,
+#define KVM_DEV_TYPE_LOONGARCH_IPI	KVM_DEV_TYPE_LOONGARCH_IPI
+	KVM_DEV_TYPE_LOONGARCH_EIOINTC,
+#define KVM_DEV_TYPE_LOONGARCH_EIOINTC	KVM_DEV_TYPE_LOONGARCH_EIOINTC
+	KVM_DEV_TYPE_LOONGARCH_PCHPIC,
+#define KVM_DEV_TYPE_LOONGARCH_PCHPIC	KVM_DEV_TYPE_LOONGARCH_PCHPIC
+
 	KVM_DEV_TYPE_MAX,
+
 };
 
 struct kvm_vfio_spapr_tce {
-- 
cgit v1.2.3


From e120829dbf927c8b93cd5e06acfec0332cc82e02 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 2 Dec 2024 19:53:42 -0800
Subject: tools headers: Sync x86 kvm and cpufeature headers with the kernel

To pick up the changes in this cset:

  a0423af92cb31e6f ("x86: KVM: Advertise CPUIDs for new instructions in Clearwater Forest")
  0c487010cb4f79e4 ("x86/cpufeatures: Add X86_FEATURE_AMD_WORKLOAD_CLASS feature bit")
  1ad4667066714369 ("x86/cpufeatures: Add X86_FEATURE_AMD_HETEROGENEOUS_CORES")
  104edc6efca62838 ("x86/cpufeatures: Rename X86_FEATURE_FAST_CPPC to have AMD prefix")
  3ea87dfa31a7b0bb ("x86/cpufeatures: Add a IBPB_NO_RET BUG flag")
  ff898623af2ed564 ("x86/cpufeatures: Define X86_FEATURE_AMD_IBPB_RET")
  dcb988cdac85bad1 ("KVM: x86: Quirk initialization of feature MSRs to KVM's max configuration")

This addresses these perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h
    diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h

Please see tools/include/uapi/README for further details.

Reviewed-by: James Clark <james.clark@linaro.org>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: x86@kernel.org
Cc: kvm@vger.kernel.org
Link: https://lore.kernel.org/r/20241203035349.1901262-5-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/arch/x86/include/asm/cpufeatures.h | 11 +++++++++--
 tools/arch/x86/include/uapi/asm/kvm.h    |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 23698d0f4bb4..17b6590748c0 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -215,7 +215,7 @@
 #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* Disable Speculative Store Bypass. */
 #define X86_FEATURE_LS_CFG_SSBD		( 7*32+24)  /* AMD SSBD implementation via LS_CFG MSR */
 #define X86_FEATURE_IBRS		( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */
-#define X86_FEATURE_IBPB		( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */
+#define X86_FEATURE_IBPB		( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier without a guaranteed RSB flush */
 #define X86_FEATURE_STIBP		( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ZEN			( 7*32+28) /* Generic flag for all Zen and newer */
 #define X86_FEATURE_L1TF_PTEINV		( 7*32+29) /* L1TF workaround PTE inversion */
@@ -317,6 +317,9 @@
 #define X86_FEATURE_ZEN1		(11*32+31) /* CPU based on Zen1 microarchitecture */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_SHA512		(12*32+ 0) /* SHA512 instructions */
+#define X86_FEATURE_SM3			(12*32+ 1) /* SM3 instructions */
+#define X86_FEATURE_SM4			(12*32+ 2) /* SM4 instructions */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* "avx_vnni" AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */
 #define X86_FEATURE_CMPCCXADD           (12*32+ 7) /* CMPccXADD instructions */
@@ -348,6 +351,7 @@
 #define X86_FEATURE_CPPC		(13*32+27) /* "cppc" Collaborative Processor Performance Control */
 #define X86_FEATURE_AMD_PSFD            (13*32+28) /* Predictive Store Forwarding Disable */
 #define X86_FEATURE_BTC_NO		(13*32+29) /* Not vulnerable to Branch Type Confusion */
+#define X86_FEATURE_AMD_IBPB_RET	(13*32+30) /* IBPB clears return address predictor */
 #define X86_FEATURE_BRS			(13*32+31) /* "brs" Branch Sampling available */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
@@ -472,7 +476,9 @@
 #define X86_FEATURE_BHI_CTRL		(21*32+ 2) /* BHI_DIS_S HW control available */
 #define X86_FEATURE_CLEAR_BHB_HW	(21*32+ 3) /* BHI_DIS_S HW control enabled */
 #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
-#define X86_FEATURE_AMD_FAST_CPPC		(21*32 + 5) /* AMD Fast CPPC */
+#define X86_FEATURE_AMD_FAST_CPPC	(21*32 + 5) /* Fast CPPC */
+#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
+#define X86_FEATURE_AMD_WORKLOAD_CLASS	(21*32 + 7) /* Workload Classification */
 
 /*
  * BUG word(s)
@@ -523,4 +529,5 @@
 #define X86_BUG_DIV0			X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */
 #define X86_BUG_RFDS			X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
 #define X86_BUG_BHI			X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */
+#define X86_BUG_IBPB_NO_RET	   	X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index a8debbf2f702..88585c1de416 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -440,6 +440,7 @@ struct kvm_sync_regs {
 #define KVM_X86_QUIRK_FIX_HYPERCALL_INSN	(1 << 5)
 #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS	(1 << 6)
 #define KVM_X86_QUIRK_SLOT_ZAP_ALL		(1 << 7)
+#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS	(1 << 8)
 
 #define KVM_STATE_NESTED_FORMAT_VMX	0
 #define KVM_STATE_NESTED_FORMAT_SVM	1
-- 
cgit v1.2.3


From 76e231997926c8f5707ed5f9c1f91377efe9e484 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 2 Dec 2024 19:53:43 -0800
Subject: tools headers: Sync arm64 kvm header with the kernel sources

To pick up the changes in this cset:

  97413cea1c48cc05 ("KVM: arm64: Add PSCI v1.3 SYSTEM_OFF2 function for hibernation")

This addresses these perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h

Please see tools/include/uapi/README for further details.

Reviewed-by: James Clark <james.clark@linaro.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: kvmarm@lists.linux.dev
Link: https://lore.kernel.org/r/20241203035349.1901262-6-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/arch/arm64/include/uapi/asm/kvm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 964df31da975..66736ff04011 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -484,6 +484,12 @@ enum {
  */
 #define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2	(1ULL << 0)
 
+/*
+ * Shutdown caused by a PSCI v1.3 SYSTEM_OFF2 call.
+ * Valid only when the system event has a type of KVM_SYSTEM_EVENT_SHUTDOWN.
+ */
+#define KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2	(1ULL << 0)
+
 /* run->fail_entry.hardware_entry_failure_reason codes. */
 #define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED	(1ULL << 0)
 
-- 
cgit v1.2.3


From 81b483f72289257f5c6f4974150d41e8b499e1e9 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 2 Dec 2024 19:53:44 -0800
Subject: tools headers: Sync *xattrat syscall changes with the kernel sources

To pick up the changes in this cset:

  6140be90ec70c39f ("fs/xattr: add *at family syscalls")

This addresses these perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h
    diff -u tools/perf/arch/x86/entry/syscalls/syscall_32.tbl arch/x86/entry/syscalls/syscall_32.tbl
    diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
    diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
    diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
    diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl

The arm64 changes are not included as it requires more changes in the
tools.  It'll be worked for the later cycle.

Please see tools/include/uapi/README for further details.

Reviewed-by: James Clark <james.clark@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Brauner <brauner@kernel.org>
CC: x86@kernel.org
CC: linux-mips@vger.kernel.org
CC: linuxppc-dev@lists.ozlabs.org
CC: linux-s390@vger.kernel.org
Link: https://lore.kernel.org/r/20241203035349.1901262-7-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/include/uapi/asm-generic/unistd.h             | 11 ++++++++++-
 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl |  4 ++++
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl  |  4 ++++
 tools/perf/arch/s390/entry/syscalls/syscall.tbl     |  4 ++++
 tools/perf/arch/x86/entry/syscalls/syscall_32.tbl   |  4 ++++
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl   |  4 ++++
 6 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 5bf6148cac2b..88dc393c2bca 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -841,8 +841,17 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
 #define __NR_mseal 462
 __SYSCALL(__NR_mseal, sys_mseal)
 
+#define __NR_setxattrat 463
+__SYSCALL(__NR_setxattrat, sys_setxattrat)
+#define __NR_getxattrat 464
+__SYSCALL(__NR_getxattrat, sys_getxattrat)
+#define __NR_listxattrat 465
+__SYSCALL(__NR_listxattrat, sys_listxattrat)
+#define __NR_removexattrat 466
+__SYSCALL(__NR_removexattrat, sys_removexattrat)
+
 #undef __NR_syscalls
-#define __NR_syscalls 463
+#define __NR_syscalls 467
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 1464c6be6eb3..c844cd5cda62 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -377,3 +377,7 @@
 460	n64	lsm_set_self_attr		sys_lsm_set_self_attr
 461	n64	lsm_list_modules		sys_lsm_list_modules
 462	n64	mseal				sys_mseal
+463	n64	setxattrat			sys_setxattrat
+464	n64	getxattrat			sys_getxattrat
+465	n64	listxattrat			sys_listxattrat
+466	n64	removexattrat			sys_removexattrat
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index ebae8415dfbb..d8b4ab78bef0 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -553,3 +553,7 @@
 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
 461	common	lsm_list_modules		sys_lsm_list_modules
 462	common	mseal				sys_mseal
+463	common	setxattrat			sys_setxattrat
+464	common	getxattrat			sys_getxattrat
+465	common	listxattrat			sys_listxattrat
+466	common	removexattrat			sys_removexattrat
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 01071182763e..e9115b4d8b63 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -465,3 +465,7 @@
 460  common	lsm_set_self_attr	sys_lsm_set_self_attr		sys_lsm_set_self_attr
 461  common	lsm_list_modules	sys_lsm_list_modules		sys_lsm_list_modules
 462  common	mseal			sys_mseal			sys_mseal
+463  common	setxattrat		sys_setxattrat			sys_setxattrat
+464  common	getxattrat		sys_getxattrat			sys_getxattrat
+465  common	listxattrat		sys_listxattrat			sys_listxattrat
+466  common	removexattrat		sys_removexattrat		sys_removexattrat
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
index 534c74b14fab..4d0fb2fba7e2 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
@@ -468,3 +468,7 @@
 460	i386	lsm_set_self_attr	sys_lsm_set_self_attr
 461	i386	lsm_list_modules	sys_lsm_list_modules
 462	i386	mseal 			sys_mseal
+463	i386	setxattrat		sys_setxattrat
+464	i386	getxattrat		sys_getxattrat
+465	i386	listxattrat		sys_listxattrat
+466	i386	removexattrat		sys_removexattrat
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 7093ee21c0d1..5eb708bff1c7 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -386,6 +386,10 @@
 460	common	lsm_set_self_attr	sys_lsm_set_self_attr
 461	common	lsm_list_modules	sys_lsm_list_modules
 462 	common  mseal			sys_mseal
+463	common	setxattrat		sys_setxattrat
+464	common	getxattrat		sys_getxattrat
+465	common	listxattrat		sys_listxattrat
+466	common	removexattrat		sys_removexattrat
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
-- 
cgit v1.2.3


From 3cef7d8b12811dfd737e77e69d9ef72bc55365cf Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 2 Dec 2024 19:53:45 -0800
Subject: tools headers: Sync uapi/asm-generic/mman.h with the kernel sources

To pick up the changes in this cset:

  3630e82ab6bd2642 ("mman: Add map_shadow_stack() flags")

This addresses these perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/asm-generic/mman.h include/uapi/asm-generic/mman.h

Please see tools/include/uapi/README for further details.

Reviewed-by: James Clark <james.clark@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Mark Brown <broonie@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arch@vger.kernel.org
Link: https://lore.kernel.org/r/20241203035349.1901262-8-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/include/uapi/asm-generic/mman.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h
index 406f7718f9ad..51d2556af54a 100644
--- a/tools/include/uapi/asm-generic/mman.h
+++ b/tools/include/uapi/asm-generic/mman.h
@@ -19,4 +19,8 @@
 #define MCL_FUTURE	2		/* lock all future mappings */
 #define MCL_ONFAULT	4		/* lock all pages that are faulted in */
 
+#define SHADOW_STACK_SET_TOKEN (1ULL << 0)     /* Set up a restore token in the shadow stack */
+#define SHADOW_STACK_SET_MARKER (1ULL << 1)     /* Set up a top of stack marker in the shadow stack */
+
+
 #endif /* __ASM_GENERIC_MMAN_H */
-- 
cgit v1.2.3


From 6d442c69cbe0f8ab47177b8af125e6c88b9e8cd0 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 2 Dec 2024 19:53:46 -0800
Subject: tools headers: Sync uapi/linux/fcntl.h with the kernel sources

To pick up the changes in this cset:

  c374196b2b9f4b80 ("fs: name_to_handle_at() support for "explicit connectable" file handles")
  95f567f81e43a1bc ("fs: Simplify getattr interface function checking AT_GETATTR_NOSEC flag")

This addresses these perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/perf/trace/beauty/include/uapi/linux/fcntl.h include/uapi/linux/fcntl.h

Please see tools/include/uapi/README for further details.

Reviewed-by: James Clark <james.clark@linaro.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Alexander Aring <alex.aring@gmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20241203035349.1901262-9-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/trace/beauty/include/uapi/linux/fcntl.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
index 87e2dec79fea..6e6907e63bfc 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
@@ -153,9 +153,6 @@
 					   object identity and may not be
 					   usable with open_by_handle_at(2). */
 #define AT_HANDLE_MNT_ID_UNIQUE	0x001	/* Return the u64 unique mount ID. */
-
-#if defined(__KERNEL__)
-#define AT_GETATTR_NOSEC	0x80000000
-#endif
+#define AT_HANDLE_CONNECTABLE	0x002	/* Request a connectable file handle */
 
 #endif /* _UAPI_LINUX_FCNTL_H */
-- 
cgit v1.2.3


From 02116fcfd827fe17dbdb543f111cdd9b8b32f18e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 2 Dec 2024 19:53:47 -0800
Subject: tools headers: Sync uapi/linux/mount.h with the kernel sources

To pick up the changes in this cset:

  aefff51e1c2986e1 statmount: retrieve security mount options
  2f4d4503e9e5ab76 statmount: add flag to retrieve unescaped options
  44010543fc8bedad fs: add the ability for statmount() to report the sb_source
  ed9d95f691c29748 fs: add the ability for statmount() to report the fs_subtype

This addresses these perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/perf/trace/beauty/include/uapi/linux/mount.h include/uapi/linux/mount.h

Please see tools/include/uapi/README for further details.

Reviewed-by: James Clark <james.clark@linaro.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20241203035349.1901262-10-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/trace/beauty/include/uapi/linux/mount.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h
index 225bc366ffcb..c07008816aca 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/mount.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h
@@ -154,7 +154,7 @@ struct mount_attr {
  */
 struct statmount {
 	__u32 size;		/* Total size, including strings */
-	__u32 mnt_opts;		/* [str] Mount options of the mount */
+	__u32 mnt_opts;		/* [str] Options (comma separated, escaped) */
 	__u64 mask;		/* What results were written */
 	__u32 sb_dev_major;	/* Device ID */
 	__u32 sb_dev_minor;
@@ -173,7 +173,13 @@ struct statmount {
 	__u32 mnt_root;		/* [str] Root of mount relative to root of fs */
 	__u32 mnt_point;	/* [str] Mountpoint relative to current root */
 	__u64 mnt_ns_id;	/* ID of the mount namespace */
-	__u64 __spare2[49];
+	__u32 fs_subtype;	/* [str] Subtype of fs_type (if any) */
+	__u32 sb_source;	/* [str] Source string of the mount */
+	__u32 opt_num;		/* Number of fs options */
+	__u32 opt_array;	/* [str] Array of nul terminated fs options */
+	__u32 opt_sec_num;	/* Number of security options */
+	__u32 opt_sec_array;	/* [str] Array of nul terminated security options */
+	__u64 __spare2[46];
 	char str[];		/* Variable size part containing strings */
 };
 
@@ -207,6 +213,10 @@ struct mnt_id_req {
 #define STATMOUNT_FS_TYPE		0x00000020U	/* Want/got fs_type */
 #define STATMOUNT_MNT_NS_ID		0x00000040U	/* Want/got mnt_ns_id */
 #define STATMOUNT_MNT_OPTS		0x00000080U	/* Want/got mnt_opts */
+#define STATMOUNT_FS_SUBTYPE		0x00000100U	/* Want/got fs_subtype */
+#define STATMOUNT_SB_SOURCE		0x00000200U	/* Want/got sb_source */
+#define STATMOUNT_OPT_ARRAY		0x00000400U	/* Want/got opt_... */
+#define STATMOUNT_OPT_SEC_ARRAY		0x00000800U	/* Want/got opt_sec... */
 
 /*
  * Special @mnt_id values that can be passed to listmount
-- 
cgit v1.2.3


From c994ac74cc3683b1cca45bc425ea1c625da109f3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 2 Dec 2024 19:53:48 -0800
Subject: tools headers: Sync uapi/linux/prctl.h with the kernel sources

To pick up the changes in this cset:

  09d6775f503b393d riscv: Add support for userspace pointer masking
  91e102e79740ae43 prctl: arch-agnostic prctl for shadow stack

This addresses these perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/perf/trace/beauty/include/uapi/linux/prctl.h include/uapi/linux/prctl.h

Please see tools/include/uapi/README for further details.

Reviewed-by: James Clark <james.clark@linaro.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20241203035349.1901262-11-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/trace/beauty/include/uapi/linux/prctl.h | 27 +++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index 35791791a879..5c6080680cb2 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -230,7 +230,7 @@ struct prctl_mm_map {
 # define PR_PAC_APDBKEY			(1UL << 3)
 # define PR_PAC_APGAKEY			(1UL << 4)
 
-/* Tagged user address controls for arm64 */
+/* Tagged user address controls for arm64 and RISC-V */
 #define PR_SET_TAGGED_ADDR_CTRL		55
 #define PR_GET_TAGGED_ADDR_CTRL		56
 # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
@@ -244,6 +244,9 @@ struct prctl_mm_map {
 # define PR_MTE_TAG_MASK		(0xffffUL << PR_MTE_TAG_SHIFT)
 /* Unused; kept only for source compatibility */
 # define PR_MTE_TCF_SHIFT		1
+/* RISC-V pointer masking tag length */
+# define PR_PMLEN_SHIFT			24
+# define PR_PMLEN_MASK			(0x7fUL << PR_PMLEN_SHIFT)
 
 /* Control reclaim behavior when allocating memory */
 #define PR_SET_IO_FLUSHER		57
@@ -328,4 +331,26 @@ struct prctl_mm_map {
 # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC	0x10 /* Clear the aspect on exec */
 # define PR_PPC_DEXCR_CTRL_MASK		0x1f
 
+/*
+ * Get the current shadow stack configuration for the current thread,
+ * this will be the value configured via PR_SET_SHADOW_STACK_STATUS.
+ */
+#define PR_GET_SHADOW_STACK_STATUS      74
+
+/*
+ * Set the current shadow stack configuration.  Enabling the shadow
+ * stack will cause a shadow stack to be allocated for the thread.
+ */
+#define PR_SET_SHADOW_STACK_STATUS      75
+# define PR_SHADOW_STACK_ENABLE         (1UL << 0)
+# define PR_SHADOW_STACK_WRITE		(1UL << 1)
+# define PR_SHADOW_STACK_PUSH		(1UL << 2)
+
+/*
+ * Prevent further changes to the specified shadow stack
+ * configuration.  All bits may be locked via this call, including
+ * undefined bits.
+ */
+#define PR_LOCK_SHADOW_STACK_STATUS      76
+
 #endif /* _LINUX_PRCTL_H */
-- 
cgit v1.2.3


From 968121f0a649ee2ff5ac7b29aa4d4f6b0798ea9e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 2 Dec 2024 19:53:49 -0800
Subject: perf tools: Fix build error on generated/fs_at_flags_array.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It should only have generic flags in the array but the recent header
sync brought a new flags to fcntl.h and caused a build error.  Let's
update the shell script to exclude flags specific to name_to_handle_at().

    CC      trace/beauty/fs_at_flags.o
  In file included from trace/beauty/fs_at_flags.c:21:
  tools/perf/trace/beauty/generated/fs_at_flags_array.c:13:30: error: initialized field overwritten [-Werror=override-init]
     13 |         [ilog2(0x002) + 1] = "HANDLE_CONNECTABLE",
        |                              ^~~~~~~~~~~~~~~~~~~~
  tools/perf/trace/beauty/generated/fs_at_flags_array.c:13:30: note: (near initialization for ‘fs_at_flags[2]’)

Reviewed-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20241203035349.1901262-12-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/trace/beauty/fs_at_flags.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/trace/beauty/fs_at_flags.sh b/tools/perf/trace/beauty/fs_at_flags.sh
index e3f13f96a27c..fac4d0c049fc 100755
--- a/tools/perf/trace/beauty/fs_at_flags.sh
+++ b/tools/perf/trace/beauty/fs_at_flags.sh
@@ -13,13 +13,14 @@ printf "static const char *fs_at_flags[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+AT_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
 # AT_EACCESS is only meaningful to faccessat, so we will special case it there...
 # AT_STATX_SYNC_TYPE is not a bit, its a mask of AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC and AT_STATX_DONT_SYNC
-# AT_HANDLE_FID and AT_HANDLE_MNT_ID_UNIQUE are reusing values and are valid only for name_to_handle_at()
+# AT_HANDLE_FID, AT_HANDLE_MNT_ID_UNIQUE and AT_HANDLE_CONNECTABLE are reusing values and are valid only for name_to_handle_at()
 # AT_RENAME_NOREPLACE reuses 0x1 and is valid only for renameat2()
 grep -E $regex ${linux_fcntl} | \
 	grep -v AT_EACCESS | \
 	grep -v AT_STATX_SYNC_TYPE | \
 	grep -v AT_HANDLE_FID | \
 	grep -v AT_HANDLE_MNT_ID_UNIQUE | \
+	grep -v AT_HANDLE_CONNECTABLE | \
 	grep -v AT_RENAME_NOREPLACE | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
-- 
cgit v1.2.3


From c33aea446bf555ab2b4e06deb914ba8f87cdb068 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 5 Dec 2024 15:15:29 -0800
Subject: perf tools: Fix precise_ip fallback logic

Sometimes it returns other than EOPNOTSUPP for invalid precise_ip so
it cannot check the error code.  Let's move the fallback after the
missing feature checks so that it can handle EINVAL as well.  This also
aligns well with the existing behavior which blindly turns off the
precise_ip but we check the missing features correctly now.

Fixes: af954f76eea56453 ("perf tools: Check fallback error and order")
Reported-by: kernel test robot <oliver.sang@intel.com>
Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Closes: https://lore.kernel.org/oe-lkp/202411301431.799e5531-lkp@intel.com
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/r/Z1DV0lN8qHSysX7f@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/evsel.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f745723d486b..d22c5df1701e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2571,12 +2571,12 @@ try_fallback:
 	if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit))
 		goto retry_open;
 
-	if (err == -EOPNOTSUPP && evsel__precise_ip_fallback(evsel))
-		goto retry_open;
-
 	if (err == -EINVAL && evsel__detect_missing_features(evsel))
 		goto fallback_missing_features;
 
+	if (evsel__precise_ip_fallback(evsel))
+		goto retry_open;
+
 	if (evsel__handle_error_quirks(evsel, err))
 		goto retry_open;
 
-- 
cgit v1.2.3


From cf3515c556907b4da290967a2a6cbbd9ee0ee723 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Thu, 5 Dec 2024 17:35:59 +0100
Subject: selftests: mlxsw: sharedbuffer: Remove h1 ingress test case

The test is sending only one packet generated with mausezahn from $h1 to
$h2. However, for some reason, it is testing for non-zero maximum occupancy
in both the ingress pool of $h1 and $h2. The former only passes when $h2
happens to send a packet.

Avoid intermittent failures by removing unintentional test case
regarding the ingress pool of $h1.

Fixes: a865ad999603 ("selftests: mlxsw: Add shared buffer traffic test")
Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Link: https://patch.msgid.link/5b7344608d5e06f38209e48d8af8c92fa11b6742.1733414773.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
index 0c47faff9274..a7b3d6cf3185 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -108,11 +108,6 @@ port_pool_test()
 
 	devlink sb occupancy snapshot $DEVLINK_DEV
 
-	RET=0
-	max_occ=$(sb_occ_pool_check $dl_port1 $SB_POOL_ING $exp_max_occ)
-	check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
-	log_test "physical port's($h1) ingress pool"
-
 	RET=0
 	max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ)
 	check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
-- 
cgit v1.2.3


From 6c46ad4d1bb2e8ec2265296e53765190f6e32f33 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Thu, 5 Dec 2024 17:36:00 +0100
Subject: selftests: mlxsw: sharedbuffer: Remove duplicate test cases

On both port_tc_ip_test() and port_tc_arp_test(), the max occupancy is
checked on $h2 twice, when only the error message is different and does not
match the check itself.

Remove the two duplicated test cases from the test.

Fixes: a865ad999603 ("selftests: mlxsw: Add shared buffer traffic test")
Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Link: https://patch.msgid.link/d9eb26f6fc16a06a30b5c2c16ad80caf502bc561.1733414773.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
index a7b3d6cf3185..21bebc5726f6 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -131,11 +131,6 @@ port_tc_ip_test()
 
 	devlink sb occupancy snapshot $DEVLINK_DEV
 
-	RET=0
-	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
-	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
-	log_test "physical port's($h1) ingress TC - IP packet"
-
 	RET=0
 	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
 	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
@@ -158,11 +153,6 @@ port_tc_arp_test()
 
 	devlink sb occupancy snapshot $DEVLINK_DEV
 
-	RET=0
-	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
-	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
-	log_test "physical port's($h1) ingress TC - ARP packet"
-
 	RET=0
 	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
 	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
-- 
cgit v1.2.3


From 5f2c7ab15fd806043db1a7d54b5ec36be0bd93b1 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Thu, 5 Dec 2024 17:36:01 +0100
Subject: selftests: mlxsw: sharedbuffer: Ensure no extra packets are counted

The test assumes that the packet it is sending is the only packet being
passed to the device.

However, it is not the case and so other packets are filling the buffers
as well. Therefore, the test sometimes fails because it is reading a
maximum occupancy that is larger than expected.

Add egress filters on $h1 and $h2 that will guarantee the above.

Fixes: a865ad999603 ("selftests: mlxsw: Add shared buffer traffic test")
Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Link: https://patch.msgid.link/64c28bc9b1cc1d78c4a73feda7cedbe9526ccf8b.1733414773.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/sharedbuffer.sh    | 40 ++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
index 21bebc5726f6..c068e6c2a580 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -22,20 +22,34 @@ SB_ITC=0
 h1_create()
 {
 	simple_if_init $h1 192.0.1.1/24
+	tc qdisc add dev $h1 clsact
+
+	# Add egress filter on $h1 that will guarantee that the packet sent,
+	# will be the only packet being passed to the device.
+	tc filter add dev $h1 egress pref 2 handle 102 matchall action drop
 }
 
 h1_destroy()
 {
+	tc filter del dev $h1 egress pref 2 handle 102 matchall action drop
+	tc qdisc del dev $h1 clsact
 	simple_if_fini $h1 192.0.1.1/24
 }
 
 h2_create()
 {
 	simple_if_init $h2 192.0.1.2/24
+	tc qdisc add dev $h2 clsact
+
+	# Add egress filter on $h2 that will guarantee that the packet sent,
+	# will be the only packet being passed to the device.
+	tc filter add dev $h2 egress pref 1 handle 101 matchall action drop
 }
 
 h2_destroy()
 {
+	tc filter del dev $h2 egress pref 1 handle 101 matchall action drop
+	tc qdisc del dev $h2 clsact
 	simple_if_fini $h2 192.0.1.2/24
 }
 
@@ -101,6 +115,11 @@ port_pool_test()
 	local exp_max_occ=$(devlink_cell_size_get)
 	local max_occ
 
+	tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \
+		src_mac $h1mac dst_mac $h2mac \
+		src_ip 192.0.1.1 dst_ip 192.0.1.2 \
+		action pass
+
 	devlink sb occupancy clearmax $DEVLINK_DEV
 
 	$MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
@@ -117,6 +136,11 @@ port_pool_test()
 	max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ)
 	check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ"
 	log_test "CPU port's egress pool"
+
+	tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \
+		src_mac $h1mac dst_mac $h2mac \
+		src_ip 192.0.1.1 dst_ip 192.0.1.2 \
+		action pass
 }
 
 port_tc_ip_test()
@@ -124,6 +148,11 @@ port_tc_ip_test()
 	local exp_max_occ=$(devlink_cell_size_get)
 	local max_occ
 
+	tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \
+		src_mac $h1mac dst_mac $h2mac \
+		src_ip 192.0.1.1 dst_ip 192.0.1.2 \
+		action pass
+
 	devlink sb occupancy clearmax $DEVLINK_DEV
 
 	$MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
@@ -140,6 +169,11 @@ port_tc_ip_test()
 	max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ)
 	check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ"
 	log_test "CPU port's egress TC - IP packet"
+
+	tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \
+		src_mac $h1mac dst_mac $h2mac \
+		src_ip 192.0.1.1 dst_ip 192.0.1.2 \
+		action pass
 }
 
 port_tc_arp_test()
@@ -147,6 +181,9 @@ port_tc_arp_test()
 	local exp_max_occ=$(devlink_cell_size_get)
 	local max_occ
 
+	tc filter add dev $h1 egress protocol arp pref 1 handle 101 flower \
+		src_mac $h1mac action pass
+
 	devlink sb occupancy clearmax $DEVLINK_DEV
 
 	$MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q
@@ -162,6 +199,9 @@ port_tc_arp_test()
 	max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ)
 	check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ"
 	log_test "CPU port's egress TC - ARP packet"
+
+	tc filter del dev $h1 egress protocol arp pref 1 handle 101 flower \
+		src_mac $h1mac action pass
 }
 
 setup_prepare()
-- 
cgit v1.2.3


From cb1b78f1c726c938bd47497c1ab16b01ce967f37 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Tue, 10 Sep 2024 00:44:32 +0000
Subject: tools: hv: Fix a complier warning in the fcopy uio daemon

hv_fcopy_uio_daemon.c:436:53: warning: '%s' directive output may be truncated
writing up to 14 bytes into a region of size 10 [-Wformat-truncation=]
  436 |  snprintf(uio_dev_path, sizeof(uio_dev_path), "/dev/%s", uio_name);

Also added 'static' for the array 'desc[]'.

Fixes: 82b0945ce2c2 ("tools: hv: Add new fcopy application based on uio driver")
Cc: stable@vger.kernel.org # 6.10+
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Saurabh Sengar <ssengar@linux.microsoft.com>
Link: https://lore.kernel.org/r/20240910004433.50254-1-decui@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20240910004433.50254-1-decui@microsoft.com>
---
 tools/hv/hv_fcopy_uio_daemon.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c
index 7a00f3066a98..12743d7f164f 100644
--- a/tools/hv/hv_fcopy_uio_daemon.c
+++ b/tools/hv/hv_fcopy_uio_daemon.c
@@ -35,8 +35,6 @@
 #define WIN8_SRV_MINOR		1
 #define WIN8_SRV_VERSION	(WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR)
 
-#define MAX_FOLDER_NAME		15
-#define MAX_PATH_LEN		15
 #define FCOPY_UIO		"/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/uio"
 
 #define FCOPY_VER_COUNT		1
@@ -51,7 +49,7 @@ static const int fw_versions[] = {
 
 #define HV_RING_SIZE		0x4000 /* 16KB ring buffer size */
 
-unsigned char desc[HV_RING_SIZE];
+static unsigned char desc[HV_RING_SIZE];
 
 static int target_fd;
 static char target_fname[PATH_MAX];
@@ -409,8 +407,8 @@ int main(int argc, char *argv[])
 	struct vmbus_br txbr, rxbr;
 	void *ring;
 	uint32_t len = HV_RING_SIZE;
-	char uio_name[MAX_FOLDER_NAME] = {0};
-	char uio_dev_path[MAX_PATH_LEN] = {0};
+	char uio_name[NAME_MAX] = {0};
+	char uio_dev_path[PATH_MAX] = {0};
 
 	static struct option long_options[] = {
 		{"help",	no_argument,	   0,  'h' },
-- 
cgit v1.2.3


From 91ae69c7ed9e262f24240c425ad1eef2cf6639b7 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Wed, 16 Oct 2024 16:35:10 +0200
Subject: tools: hv: change permissions of NetworkManager configuration file

Align permissions of the resulting .nmconnection file, instead of
the input file from hv_kvp_daemon. To avoid the tiny time frame
where the output file is world-readable, use umask instead of chmod.

Fixes: 42999c904612 ("hv/hv_kvp_daemon:Support for keyfile based connection profile")
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Reviewed-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
Link: https://lore.kernel.org/r/20241016143521.3735-1-olaf@aepfle.de
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20241016143521.3735-1-olaf@aepfle.de>
---
 tools/hv/hv_set_ifconfig.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh
index 440a91b35823..2f8baed2b8f7 100755
--- a/tools/hv/hv_set_ifconfig.sh
+++ b/tools/hv/hv_set_ifconfig.sh
@@ -81,7 +81,7 @@ echo "ONBOOT=yes" >> $1
 
 cp $1 /etc/sysconfig/network-scripts/
 
-chmod 600 $2
+umask 0177
 interface=$(echo $2 | awk -F - '{ print $2 }')
 filename="${2##*/}"
 
-- 
cgit v1.2.3


From a9640fcdd400463442846677e62b8208b81cb031 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Tue, 5 Nov 2024 09:14:04 +0100
Subject: tools/hv: terminate fcopy daemon if read from uio fails

Terminate endless loop in reading fails, to avoid flooding syslog.

This happens if the state of "Guest services" integration service
is changed from "enabled" to "disabled" at runtime in the VM
settings. In this case pread returns EIO.

Also handle an interrupted system call, and continue in this case.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Reviewed-by: Saurabh Sengar <ssengar@linux.microsoft.com>
Link: https://lore.kernel.org/r/20241105081437.15689-1-olaf@aepfle.de
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20241105081437.15689-1-olaf@aepfle.de>
---
 tools/hv/hv_fcopy_uio_daemon.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c
index 12743d7f164f..0198321d14a2 100644
--- a/tools/hv/hv_fcopy_uio_daemon.c
+++ b/tools/hv/hv_fcopy_uio_daemon.c
@@ -466,8 +466,10 @@ int main(int argc, char *argv[])
 		 */
 		ret = pread(fcopy_fd, &tmp, sizeof(int), 0);
 		if (ret < 0) {
+			if (errno == EINTR || errno == EAGAIN)
+				continue;
 			syslog(LOG_ERR, "pread failed: %s", strerror(errno));
-			continue;
+			goto close;
 		}
 
 		len = HV_RING_SIZE;
-- 
cgit v1.2.3


From 07dfa6e821e1c58cbd0f195173dddbd593721f9b Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 12 Nov 2024 16:04:01 +0100
Subject: hv/hv_kvp_daemon: Pass NIC name to hv_get_dns_info as well

The reference implementation of hv_get_dns_info which is in the tree uses
/etc/resolv.conf to get DNS servers and this does not require to know which
NIC is queried. Distro specific implementations, however, may want to
provide per-NIC, fine grained information. E.g. NetworkManager keeps track
of DNS servers per connection.

Similar to hv_get_dhcp_info, pass NIC name as a parameter to
hv_get_dns_info script.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Link: https://lore.kernel.org/r/20241112150401.217094-1-vkuznets@redhat.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20241112150401.217094-1-vkuznets@redhat.com>
---
 tools/hv/hv_kvp_daemon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index ae57bf69ad4a..296a7a62c54d 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -725,7 +725,7 @@ static void kvp_get_ipconfig_info(char *if_name,
 	 * .
 	 */
 
-	sprintf(cmd, KVP_SCRIPTS_PATH "%s",  "hv_get_dns_info");
+	sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dns_info", if_name);
 
 	/*
 	 * Execute the command to gather DNS info.
-- 
cgit v1.2.3


From a4d024fe2e77063069c5f423f2f9be766450f0f9 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Mon, 2 Dec 2024 13:04:10 +0100
Subject: tools/hv: reduce resouce usage in hv_get_dns_info helper

Remove the usage of cat. Replace the shell process with awk with 'exec'.
Also use a generic shell because no bash specific features will be used.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Wei Liu <wei.liu@kernel.org>
Link: https://lore.kernel.org/r/20241202120432.21115-1-olaf@aepfle.de
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20241202120432.21115-1-olaf@aepfle.de>
---
 tools/hv/hv_get_dns_info.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/hv/hv_get_dns_info.sh b/tools/hv/hv_get_dns_info.sh
index 058c17b46ffc..268521234d4b 100755
--- a/tools/hv/hv_get_dns_info.sh
+++ b/tools/hv/hv_get_dns_info.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
 
 # This example script parses /etc/resolv.conf to retrive DNS information.
 # In the interest of keeping the KVP daemon code free of distro specific
@@ -10,4 +10,4 @@
 # this script can be based on the Network Manager APIs for retrieving DNS
 # entries.
 
-cat /etc/resolv.conf 2>/dev/null | awk '/^nameserver/ { print $2 }'
+exec awk '/^nameserver/ { print $2 }' /etc/resolv.conf 2>/dev/null
-- 
cgit v1.2.3


From becc7fe329c09a7744fa908fca83418fa94a45a0 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Mon, 2 Dec 2024 13:40:52 +0100
Subject: tools/hv: add a .gitignore file

Remove generated files from 'git status' output after 'make -C tools/hv'.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Link: https://lore.kernel.org/r/20241202124107.28650-1-olaf@aepfle.de
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20241202124107.28650-1-olaf@aepfle.de>
---
 tools/hv/.gitignore | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 tools/hv/.gitignore

(limited to 'tools')

diff --git a/tools/hv/.gitignore b/tools/hv/.gitignore
new file mode 100644
index 000000000000..0c5bc15d602f
--- /dev/null
+++ b/tools/hv/.gitignore
@@ -0,0 +1,3 @@
+hv_fcopy_uio_daemon
+hv_kvp_daemon
+hv_vss_daemon
-- 
cgit v1.2.3


From 175c71c2aceef173ae6d3dceb41edfc2ac0d5937 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Sun, 8 Dec 2024 23:47:17 +0000
Subject: tools/hv: reduce resource usage in hv_kvp_daemon

hv_kvp_daemon uses popen(3) and system(3) as convinience helper to
launch external helpers. These helpers are invoked via a
temporary shell process. There is no need to keep this temporary
process around while the helper runs. Replace this temporary shell
with the actual helper process via 'exec'.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Link: https://lore.kernel.org/linux-hyperv/20241202123520.27812-1-olaf@aepfle.de/
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 tools/hv/hv_kvp_daemon.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 296a7a62c54d..04ba035d67e9 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -725,7 +725,7 @@ static void kvp_get_ipconfig_info(char *if_name,
 	 * .
 	 */
 
-	sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dns_info", if_name);
+	sprintf(cmd, "exec %s %s", KVP_SCRIPTS_PATH "hv_get_dns_info", if_name);
 
 	/*
 	 * Execute the command to gather DNS info.
@@ -742,7 +742,7 @@ static void kvp_get_ipconfig_info(char *if_name,
 	 * Enabled: DHCP enabled.
 	 */
 
-	sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dhcp_info", if_name);
+	sprintf(cmd, "exec %s %s", KVP_SCRIPTS_PATH "hv_get_dhcp_info", if_name);
 
 	file = popen(cmd, "r");
 	if (file == NULL)
@@ -1606,8 +1606,9 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
 	 * invoke the external script to do its magic.
 	 */
 
-	str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s %s",
-			   "hv_set_ifconfig", if_filename, nm_filename);
+	str_len = snprintf(cmd, sizeof(cmd), "exec %s %s %s",
+			   KVP_SCRIPTS_PATH "hv_set_ifconfig",
+			   if_filename, nm_filename);
 	/*
 	 * This is a little overcautious, but it's necessary to suppress some
 	 * false warnings from gcc 8.0.1.
-- 
cgit v1.2.3


From 246dfe3dc199246bd64635163115f2691623fc53 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Mon, 9 Dec 2024 21:42:26 +0800
Subject: perf ftrace: Fix undefined behavior in cmp_profile_data()

The comparison function cmp_profile_data() violates the C standard's
requirements for qsort() comparison functions, which mandate symmetry
and transitivity:

* Symmetry: If x < y, then y > x.
* Transitivity: If x < y and y < z, then x < z.

When v1 and v2 are equal, the function incorrectly returns 1, breaking
symmetry and transitivity. This causes undefined behavior, which can
lead to memory corruption in certain versions of glibc [1].

Fix the issue by returning 0 when v1 and v2 are equal, ensuring
compliance with the C standard and preventing undefined behavior.

Link: https://www.qualys.com/2024/01/30/qsort.txt [1]
Fixes: 0f223813edd0 ("perf ftrace: Add 'profile' command")
Fixes: 74ae366c37b7 ("perf ftrace profile: Add -s/--sort option")
Cc: stable@vger.kernel.org
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: jserv@ccns.ncku.edu.tw
Cc: chuang@cs.nycu.edu.tw
Link: https://lore.kernel.org/r/20241209134226.1939163-1-visitorckw@gmail.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-ftrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 272d3c70810e..a56cf8b0a7d4 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -1151,8 +1151,9 @@ static int cmp_profile_data(const void *a, const void *b)
 
 	if (v1 > v2)
 		return -1;
-	else
+	if (v1 < v2)
 		return 1;
+	return 0;
 }
 
 static void print_profile_result(struct perf_ftrace *ftrace)
-- 
cgit v1.2.3


From 3f61a12b08bd84e6cf705484c2cb9d5b891882fa Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 5 Dec 2024 20:23:05 -0800
Subject: perf hwmon_pmu: Use openat rather than dup to refresh directory

The hwmon PMU test will make a temp directory, open the directory with
O_DIRECTORY then fill it with contents. As the open is before the
filling the contents the later fdopendir may reflect the initial empty
state, meaning no events are seen. Change to re-open the directory,
rather than dup the fd, so the latest contents are seen.

Minor tweaks/additions to debug messages.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Link: https://lore.kernel.org/r/20241206042306.1055913-1-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/hwmon_pmu.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c
index e61429b38ba7..4acb9bb19b84 100644
--- a/tools/perf/util/hwmon_pmu.c
+++ b/tools/perf/util/hwmon_pmu.c
@@ -258,8 +258,12 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu)
 	if (pmu->pmu.sysfs_aliases_loaded)
 		return 0;
 
-	/* Use a dup-ed fd as closedir will close it. */
-	dup_fd = dup(pmu->hwmon_dir_fd);
+	/*
+	 * Use a dup-ed fd as closedir will close it. Use openat so that the
+	 * directory contents are refreshed.
+	 */
+	dup_fd = openat(pmu->hwmon_dir_fd, ".", O_DIRECTORY);
+
 	if (dup_fd == -1)
 		return -ENOMEM;
 
@@ -336,6 +340,9 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu)
 			close(fd);
 		}
 	}
+	if (hashmap__size(&pmu->events) == 0)
+		pr_debug2("hwmon_pmu: %s has no events\n", pmu->pmu.name);
+
 	hashmap__for_each_entry_safe((&pmu->events), cur, tmp, bkt) {
 		union hwmon_pmu_event_key key = {
 			.type_and_num = cur->key,
@@ -343,8 +350,8 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu)
 		struct hwmon_pmu_event_value *value = cur->pvalue;
 
 		if (!test_bit(HWMON_ITEM_INPUT, value->items)) {
-			pr_debug("hwmon_pmu: removing event '%s%d' that has no input file\n",
-				hwmon_type_strs[key.type], key.num);
+			pr_debug("hwmon_pmu: %s removing event '%s%d' that has no input file\n",
+				pmu->pmu.name, hwmon_type_strs[key.type], key.num);
 			hashmap__delete(&pmu->events, key.type_and_num, &key, &value);
 			zfree(&value->label);
 			zfree(&value->name);
-- 
cgit v1.2.3


From d4e17a322a8fc3266798ec8dee1fbe679078b2bc Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 5 Dec 2024 20:23:06 -0800
Subject: perf test hwmon_pmu: Fix event file location

The temp directory is made and a known fake hwmon PMU created within
it. Prior to this fix the events were being incorrectly written to the
temp directory rather than the fake PMU directory. This didn't impact
the test as the directory fd matched the wrong location, but it
doesn't mirror what a hwmon PMU would actually look like.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Link: https://lore.kernel.org/r/20241206042306.1055913-2-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/tests/hwmon_pmu.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/hwmon_pmu.c b/tools/perf/tests/hwmon_pmu.c
index f8bcee9660d5..d2b066a2b557 100644
--- a/tools/perf/tests/hwmon_pmu.c
+++ b/tools/perf/tests/hwmon_pmu.c
@@ -65,7 +65,7 @@ static struct perf_pmu *test_pmu_get(char *dir, size_t sz)
 		{ "temp2_label", "test hwmon event2\n", },
 		{ "temp2_input", "50000\n", },
 	};
-	int dirfd, file;
+	int hwmon_dirfd = -1, test_dirfd = -1, file;
 	struct perf_pmu *hwm = NULL;
 	ssize_t len;
 
@@ -76,19 +76,24 @@ static struct perf_pmu *test_pmu_get(char *dir, size_t sz)
 		dir[0] = '\0';
 		return NULL;
 	}
-	dirfd = open(dir, O_DIRECTORY);
-	if (dirfd < 0) {
+	test_dirfd = open(dir, O_PATH|O_DIRECTORY);
+	if (test_dirfd < 0) {
 		pr_err("Failed to open test directory \"%s\"\n", dir);
 		goto err_out;
 	}
 
 	/* Create the test hwmon directory and give it a name. */
-	if (mkdirat(dirfd, "hwmon1234", 0755) < 0) {
+	if (mkdirat(test_dirfd, "hwmon1234", 0755) < 0) {
 		pr_err("Failed to mkdir hwmon directory\n");
 		goto err_out;
 	}
-	file = openat(dirfd, "hwmon1234/name", O_WRONLY | O_CREAT, 0600);
-	if (!file) {
+	hwmon_dirfd = openat(test_dirfd, "hwmon1234", O_DIRECTORY);
+	if (hwmon_dirfd < 0) {
+		pr_err("Failed to open test hwmon directory \"%s/hwmon1234\"\n", dir);
+		goto err_out;
+	}
+	file = openat(hwmon_dirfd, "name", O_WRONLY | O_CREAT, 0600);
+	if (file < 0) {
 		pr_err("Failed to open for writing file \"name\"\n");
 		goto err_out;
 	}
@@ -104,8 +109,8 @@ static struct perf_pmu *test_pmu_get(char *dir, size_t sz)
 	for (size_t i = 0; i < ARRAY_SIZE(test_items); i++) {
 		const struct test_item *item = &test_items[i];
 
-		file = openat(dirfd, item->name, O_WRONLY | O_CREAT, 0600);
-		if (!file) {
+		file = openat(hwmon_dirfd, item->name, O_WRONLY | O_CREAT, 0600);
+		if (file < 0) {
 			pr_err("Failed to open for writing file \"%s\"\n", item->name);
 			goto err_out;
 		}
@@ -119,16 +124,18 @@ static struct perf_pmu *test_pmu_get(char *dir, size_t sz)
 	}
 
 	/* Make the PMU reading the files created above. */
-	hwm = perf_pmus__add_test_hwmon_pmu(dirfd, "hwmon1234", test_hwmon_name);
+	hwm = perf_pmus__add_test_hwmon_pmu(hwmon_dirfd, "hwmon1234", test_hwmon_name);
 	if (!hwm)
 		pr_err("Test hwmon creation failed\n");
 
 err_out:
 	if (!hwm) {
 		test_pmu_put(dir, hwm);
-		if (dirfd >= 0)
-			close(dirfd);
+		if (hwmon_dirfd >= 0)
+			close(hwmon_dirfd);
 	}
+	if (test_dirfd >= 0)
+		close(test_dirfd);
 	return hwm;
 }
 
-- 
cgit v1.2.3


From 11d5245f608f8ac01c97b93f31497cef7b96e457 Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Mon, 2 Dec 2024 12:29:24 +0100
Subject: selftests/bpf: Extend test for sockmap update with same

Verify that the sockmap link was not severed, and socket's entry is indeed
removed from the map when the corresponding descriptor gets closed.

Signed-off-by: Michal Luczaj <mhal@rbox.co>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20241202-sockmap-replace-v1-2-1e88579e7bd5@rbox.co
---
 tools/testing/selftests/bpf/prog_tests/sockmap_basic.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index fdff0652d7ef..248754296d97 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -934,8 +934,10 @@ static void test_sockmap_same_sock(void)
 
 	err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
 	ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
-	if (err)
+	if (err) {
+		close(tcp);
 		goto out;
+	}
 
 	for (i = 0; i < 2; i++) {
 		err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY);
@@ -954,14 +956,14 @@ static void test_sockmap_same_sock(void)
 		ASSERT_OK(err, "bpf_map_update_elem(tcp)");
 	}
 
+	close(tcp);
 	err = bpf_map_delete_elem(map, &zero);
-	ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+	ASSERT_ERR(err, "bpf_map_delete_elem(entry)");
 
 	close(stream[0]);
 	close(stream[1]);
 out:
 	close(dgram);
-	close(tcp);
 	close(udp);
 	test_sockmap_pass_prog__destroy(skel);
 }
-- 
cgit v1.2.3


From 3f23ee5590d9605dbde9a5e1d4b97637a4803329 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Mon, 9 Dec 2024 20:10:56 -0800
Subject: selftests/bpf: test for changing packet data from global functions

Check if verifier is aware of packet pointers invalidation done in
global functions. Based on a test shared by Nick Zavaritsky in [0].

[0] https://lore.kernel.org/bpf/0498CA22-5779-4767-9C0C-A9515CEA711F@gmail.com/

Suggested-by: Nick Zavaritsky <mejedi@gmail.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20241210041100.1898468-5-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/verifier_sock.c | 28 +++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c
index d3e70e38e442..51826379a1aa 100644
--- a/tools/testing/selftests/bpf/progs/verifier_sock.c
+++ b/tools/testing/selftests/bpf/progs/verifier_sock.c
@@ -1037,4 +1037,32 @@ __naked void sock_create_read_src_port(void)
 	: __clobber_all);
 }
 
+__noinline
+long skb_pull_data2(struct __sk_buff *sk, __u32 len)
+{
+	return bpf_skb_pull_data(sk, len);
+}
+
+__noinline
+long skb_pull_data1(struct __sk_buff *sk, __u32 len)
+{
+	return skb_pull_data2(sk, len);
+}
+
+/* global function calls bpf_skb_pull_data(), which invalidates packet
+ * pointers established before global function call.
+ */
+SEC("tc")
+__failure __msg("invalid mem access")
+int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk)
+{
+	int *p = (void *)(long)sk->data;
+
+	if ((void *)(p + 1) > (void *)(long)sk->data_end)
+		return TCX_DROP;
+	skb_pull_data1(sk, 0);
+	*p = 42; /* this is unsafe */
+	return TCX_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 89ff40890d8f12a7d7e93fb602cc27562f3834f0 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Mon, 9 Dec 2024 20:10:58 -0800
Subject: selftests/bpf: freplace tests for tracking of changes_packet_data

Try different combinations of global functions replacement:
- replace function that changes packet data with one that doesn't;
- replace function that changes packet data with one that does;
- replace function that doesn't change packet data with one that does;
- replace function that doesn't change packet data with one that doesn't;

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20241210041100.1898468-7-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/changes_pkt_data.c    | 76 ++++++++++++++++++++++
 .../testing/selftests/bpf/progs/changes_pkt_data.c | 26 ++++++++
 .../bpf/progs/changes_pkt_data_freplace.c          | 18 +++++
 3 files changed, 120 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c
 create mode 100644 tools/testing/selftests/bpf/progs/changes_pkt_data.c
 create mode 100644 tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c
new file mode 100644
index 000000000000..c0c7202f6c5c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bpf/libbpf.h"
+#include "changes_pkt_data_freplace.skel.h"
+#include "changes_pkt_data.skel.h"
+#include <test_progs.h>
+
+static void print_verifier_log(const char *log)
+{
+	if (env.verbosity >= VERBOSE_VERY)
+		fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log);
+}
+
+static void test_aux(const char *main_prog_name, const char *freplace_prog_name, bool expect_load)
+{
+	struct changes_pkt_data_freplace *freplace = NULL;
+	struct bpf_program *freplace_prog = NULL;
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	struct changes_pkt_data *main = NULL;
+	char log[16*1024];
+	int err;
+
+	opts.kernel_log_buf = log;
+	opts.kernel_log_size = sizeof(log);
+	if (env.verbosity >= VERBOSE_SUPER)
+		opts.kernel_log_level = 1 | 2 | 4;
+	main = changes_pkt_data__open_opts(&opts);
+	if (!ASSERT_OK_PTR(main, "changes_pkt_data__open"))
+		goto out;
+	err = changes_pkt_data__load(main);
+	print_verifier_log(log);
+	if (!ASSERT_OK(err, "changes_pkt_data__load"))
+		goto out;
+	freplace = changes_pkt_data_freplace__open_opts(&opts);
+	if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open"))
+		goto out;
+	freplace_prog = bpf_object__find_program_by_name(freplace->obj, freplace_prog_name);
+	if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog"))
+		goto out;
+	bpf_program__set_autoload(freplace_prog, true);
+	bpf_program__set_autoattach(freplace_prog, true);
+	bpf_program__set_attach_target(freplace_prog,
+				       bpf_program__fd(main->progs.dummy),
+				       main_prog_name);
+	err = changes_pkt_data_freplace__load(freplace);
+	print_verifier_log(log);
+	if (expect_load) {
+		ASSERT_OK(err, "changes_pkt_data_freplace__load");
+	} else {
+		ASSERT_ERR(err, "changes_pkt_data_freplace__load");
+		ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log");
+	}
+
+out:
+	changes_pkt_data_freplace__destroy(freplace);
+	changes_pkt_data__destroy(main);
+}
+
+/* There are two global subprograms in both changes_pkt_data.skel.h:
+ * - one changes packet data;
+ * - another does not.
+ * It is ok to freplace subprograms that change packet data with those
+ * that either do or do not. It is only ok to freplace subprograms
+ * that do not change packet data with those that do not as well.
+ * The below tests check outcomes for each combination of such freplace.
+ */
+void test_changes_pkt_data_freplace(void)
+{
+	if (test__start_subtest("changes_with_changes"))
+		test_aux("changes_pkt_data", "changes_pkt_data", true);
+	if (test__start_subtest("changes_with_doesnt_change"))
+		test_aux("changes_pkt_data", "does_not_change_pkt_data", true);
+	if (test__start_subtest("doesnt_change_with_changes"))
+		test_aux("does_not_change_pkt_data", "changes_pkt_data", false);
+	if (test__start_subtest("doesnt_change_with_doesnt_change"))
+		test_aux("does_not_change_pkt_data", "does_not_change_pkt_data", true);
+}
diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c
new file mode 100644
index 000000000000..f87da8e9d6b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/changes_pkt_data.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__noinline
+long changes_pkt_data(struct __sk_buff *sk, __u32 len)
+{
+	return bpf_skb_pull_data(sk, len);
+}
+
+__noinline __weak
+long does_not_change_pkt_data(struct __sk_buff *sk, __u32 len)
+{
+	return 0;
+}
+
+SEC("tc")
+int dummy(struct __sk_buff *sk)
+{
+	changes_pkt_data(sk, 0);
+	does_not_change_pkt_data(sk, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c
new file mode 100644
index 000000000000..0e525beb8603
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("?freplace")
+long changes_pkt_data(struct __sk_buff *sk, __u32 len)
+{
+	return bpf_skb_pull_data(sk, len);
+}
+
+SEC("?freplace")
+long does_not_change_pkt_data(struct __sk_buff *sk, __u32 len)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 1a4607ffba35bf2a630aab299e34dd3f6e658d70 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Mon, 9 Dec 2024 20:10:59 -0800
Subject: bpf: consider that tail calls invalidate packet pointers

Tail-called programs could execute any of the helpers that invalidate
packet pointers. Hence, conservatively assume that each tail call
invalidates packet pointers.

Making the change in bpf_helper_changes_pkt_data() automatically makes
use of check_cfg() logic that computes 'changes_pkt_data' effect for
global sub-programs, such that the following program could be
rejected:

    int tail_call(struct __sk_buff *sk)
    {
    	bpf_tail_call_static(sk, &jmp_table, 0);
    	return 0;
    }

    SEC("tc")
    int not_safe(struct __sk_buff *sk)
    {
    	int *p = (void *)(long)sk->data;
    	... make p valid ...
    	tail_call(sk);
    	*p = 42; /* this is unsafe */
    	...
    }

The tc_bpf2bpf.c:subprog_tc() needs change: mark it as a function that
can invalidate packet pointers. Otherwise, it can't be freplaced with
tailcall_freplace.c:entry_freplace() that does a tail call.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20241210041100.1898468-8-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/tc_bpf2bpf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c
index d1a57f7d09bd..fe6249d99b31 100644
--- a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c
@@ -11,6 +11,8 @@ int subprog_tc(struct __sk_buff *skb)
 
 	__sink(skb);
 	__sink(ret);
+	/* let verifier know that 'subprog_tc' can change pointers to skb->data */
+	bpf_skb_change_proto(skb, 0, 0);
 	return ret;
 }
 
-- 
cgit v1.2.3


From d9706b56e13b7916461ca6b4b731e169ed44ed09 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Mon, 9 Dec 2024 20:11:00 -0800
Subject: selftests/bpf: validate that tail call invalidates packet pointers

Add a test case with a tail call done from a global sub-program. Such
tails calls should be considered as invalidating packet pointers.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20241210041100.1898468-9-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/verifier_sock.c | 28 +++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c
index 51826379a1aa..0d5e56dffabb 100644
--- a/tools/testing/selftests/bpf/progs/verifier_sock.c
+++ b/tools/testing/selftests/bpf/progs/verifier_sock.c
@@ -50,6 +50,13 @@ struct {
 	__uint(map_flags, BPF_F_NO_PREALLOC);
 } sk_storage_map SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
 SEC("cgroup/skb")
 __description("skb->sk: no NULL check")
 __failure __msg("invalid mem access 'sock_common_or_null'")
@@ -1065,4 +1072,25 @@ int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk)
 	return TCX_PASS;
 }
 
+__noinline
+int tail_call(struct __sk_buff *sk)
+{
+	bpf_tail_call_static(sk, &jmp_table, 0);
+	return 0;
+}
+
+/* Tail calls invalidate packet pointers. */
+SEC("tc")
+__failure __msg("invalid mem access")
+int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk)
+{
+	int *p = (void *)(long)sk->data;
+
+	if ((void *)(p + 1) > (void *)(long)sk->data_end)
+		return TCX_DROP;
+	tail_call(sk);
+	*p = 42; /* this is unsafe */
+	return TCX_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From b8f614207b0d5e4abd6df8d5cb3cc11f009d1d93 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Mon, 9 Dec 2024 09:29:24 -0600
Subject: scx: Fix maximal BPF selftest prog

maximal.bpf.c is still dispatching to and consuming from SCX_DSQ_GLOBAL.
Let's have it use its own DSQ to avoid any runtime errors.

Signed-off-by: David Vernet <void@manifault.com>
Tested-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/sched_ext/maximal.bpf.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c
index 4c005fa71810..430f5e13bf55 100644
--- a/tools/testing/selftests/sched_ext/maximal.bpf.c
+++ b/tools/testing/selftests/sched_ext/maximal.bpf.c
@@ -12,6 +12,8 @@
 
 char _license[] SEC("license") = "GPL";
 
+#define DSQ_ID 0
+
 s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu,
 		   u64 wake_flags)
 {
@@ -20,7 +22,7 @@ s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu,
 
 void BPF_STRUCT_OPS(maximal_enqueue, struct task_struct *p, u64 enq_flags)
 {
-	scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+	scx_bpf_dsq_insert(p, DSQ_ID, SCX_SLICE_DFL, enq_flags);
 }
 
 void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags)
@@ -28,7 +30,7 @@ void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags)
 
 void BPF_STRUCT_OPS(maximal_dispatch, s32 cpu, struct task_struct *prev)
 {
-	scx_bpf_dsq_move_to_local(SCX_DSQ_GLOBAL);
+	scx_bpf_dsq_move_to_local(DSQ_ID);
 }
 
 void BPF_STRUCT_OPS(maximal_runnable, struct task_struct *p, u64 enq_flags)
@@ -123,7 +125,7 @@ void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
 
 s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init)
 {
-	return 0;
+	return scx_bpf_create_dsq(DSQ_ID, -1);
 }
 
 void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info)
-- 
cgit v1.2.3


From 777f290ab328de333b85558bb6807a69a59b36ba Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Sat, 30 Nov 2024 01:56:21 +0530
Subject: selftests/ftrace: adjust offset for kprobe syntax error test

In 'NOFENTRY_ARGS' test case for syntax check, any offset X of
`vfs_read+X` except function entry offset (0) fits the criterion,
even if that offset is not at instruction boundary, as the parser
comes before probing. But with "ENDBR64" instruction on x86, offset
4 is treated as function entry. So, X can't be 4 as well. Thus, 8
was used as offset for the test case. On 64-bit powerpc though, any
offset <= 16 can be considered function entry depending on build
configuration (see arch_kprobe_on_func_entry() for implementation
details). So, use `vfs_read+20` to accommodate that scenario too.

Link: https://lore.kernel.org/r/20241129202621.721159-1-hbathini@linux.ibm.com
Fixes: 4231f30fcc34a ("selftests/ftrace: Add BTF arguments test cases")
Suggested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index a16c6a6f6055..8f1c58f0c239 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -111,7 +111,7 @@ check_error 'p vfs_read $arg* ^$arg*'		# DOUBLE_ARGS
 if !grep -q 'kernel return probes support:' README; then
 check_error 'r vfs_read ^$arg*'			# NOFENTRY_ARGS
 fi
-check_error 'p vfs_read+8 ^$arg*'		# NOFENTRY_ARGS
+check_error 'p vfs_read+20 ^$arg*'		# NOFENTRY_ARGS
 check_error 'p vfs_read ^hoge'			# NO_BTFARG
 check_error 'p kfree ^$arg10'			# NO_BTFARG (exceed the number of parameters)
 check_error 'r kfree ^$retval'			# NO_RETVAL
-- 
cgit v1.2.3


From a93a620c38f3763ec74cb0def9625756966f12d9 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 4 Dec 2024 18:23:05 -0800
Subject: perf test expr: Fix system_tsc_freq for only x86

The refactoring of tool PMU events to have a PMU then adding the expr
literals to the tool PMU made it so that the literal system_tsc_freq
was only supported on x86. Update the test expectations to match -
namely the parsing is x86 specific and only yields a non-zero value on
Intel.

Fixes: 609aa2667f67 ("perf tool_pmu: Switch to standard pmu functions and json descriptions")
Reported-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Closes: https://lore.kernel.org/linux-perf-users/20241022140156.98854-1-atrajeev@linux.vnet.ibm.com/
Co-developed-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Tested-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: James Clark <james.clark@linaro.org>
Cc: akanksha@linux.ibm.com
Cc: hbathini@linux.ibm.com
Cc: kjain@linux.ibm.com
Cc: maddy@linux.ibm.com
Cc: disgoel@linux.vnet.ibm.com
Cc: linuxppc-dev@lists.ozlabs.org
Link: https://lore.kernel.org/r/20241205022305.158202-1-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/tests/expr.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 41ff1affdfcd..726cf8d4da28 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -75,14 +75,12 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 	double val, num_cpus_online, num_cpus, num_cores, num_dies, num_packages;
 	int ret;
 	struct expr_parse_ctx *ctx;
-	bool is_intel = false;
 	char strcmp_cpuid_buf[256];
 	struct perf_cpu cpu = {-1};
 	char *cpuid = get_cpuid_allow_env_override(cpu);
 	char *escaped_cpuid1, *escaped_cpuid2;
 
 	TEST_ASSERT_VAL("get_cpuid", cpuid);
-	is_intel = strstr(cpuid, "Intel") != NULL;
 
 	TEST_ASSERT_EQUAL("ids_union", test_ids_union(), 0);
 
@@ -245,12 +243,19 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 	if (num_dies) // Some platforms do not have CPU die support, for example s390
 		TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages);
 
-	TEST_ASSERT_VAL("#system_tsc_freq", expr__parse(&val, ctx, "#system_tsc_freq") == 0);
-	if (is_intel)
-		TEST_ASSERT_VAL("#system_tsc_freq > 0", val > 0);
-	else
-		TEST_ASSERT_VAL("#system_tsc_freq == 0", fpclassify(val) == FP_ZERO);
 
+	if (expr__parse(&val, ctx, "#system_tsc_freq") == 0) {
+		bool is_intel = strstr(cpuid, "Intel") != NULL;
+
+		if (is_intel)
+			TEST_ASSERT_VAL("#system_tsc_freq > 0", val > 0);
+		else
+			TEST_ASSERT_VAL("#system_tsc_freq == 0", fpclassify(val) == FP_ZERO);
+	} else {
+#if defined(__i386__) || defined(__x86_64__)
+		TEST_ASSERT_VAL("#system_tsc_freq unsupported", 0);
+#endif
+	}
 	/*
 	 * Source count returns the number of events aggregating in a leader
 	 * event including the leader. Check parsing yields an id.
-- 
cgit v1.2.3


From f7e36d02d771ee14acae1482091718460cffb321 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@linaro.org>
Date: Thu, 14 Nov 2024 16:04:48 +0000
Subject: libperf: evlist: Fix --cpu argument on hybrid platform

Since the linked fixes: commit, specifying a CPU on hybrid platforms
results in an error because Perf tries to open an extended type event
on "any" CPU which isn't valid. Extended type events can only be opened
on CPUs that match the type.

Before (working):

  $ perf record --cpu 1 -- true
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 2.385 MB perf.data (7 samples) ]

After (not working):

  $ perf record -C 1 -- true
  WARNING: A requested CPU in '1' is not supported by PMU 'cpu_atom' (CPUs 16-27) for event 'cycles:P'
  Error:
  The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cpu_atom/cycles:P/).
  /bin/dmesg | grep -i perf may provide additional information.

(Ignore the warning message, that's expected and not particularly
relevant to this issue).

This is because perf_cpu_map__intersect() of the user specified CPU (1)
and one of the PMU's CPUs (16-27) correctly results in an empty (NULL)
CPU map. However for the purposes of opening an event, libperf converts
empty CPU maps into an any CPU (-1) which the kernel rejects.

Fix it by deleting evsels with empty CPU maps in the specific case where
user requested CPU maps are evaluated.

Fixes: 251aa040244a ("perf parse-events: Wildcard most "numeric" events")
Reviewed-by: Ian Rogers <irogers@google.com>
Tested-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: James Clark <james.clark@linaro.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/r/20241114160450.295844-2-james.clark@linaro.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/perf/evlist.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index c6d67fc9e57e..83c43dc13313 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -47,6 +47,20 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
 		 */
 		perf_cpu_map__put(evsel->cpus);
 		evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus);
+
+		/*
+		 * Empty cpu lists would eventually get opened as "any" so remove
+		 * genuinely empty ones before they're opened in the wrong place.
+		 */
+		if (perf_cpu_map__is_empty(evsel->cpus)) {
+			struct perf_evsel *next = perf_evlist__next(evlist, evsel);
+
+			perf_evlist__remove(evlist, evsel);
+			/* Keep idx contiguous */
+			if (next)
+				list_for_each_entry_from(next, &evlist->entries, node)
+					next->idx--;
+		}
 	} else if (!evsel->own_cpus || evlist->has_user_cpus ||
 		(!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) {
 		/*
@@ -80,11 +94,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
 
 static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
 {
-	struct perf_evsel *evsel;
+	struct perf_evsel *evsel, *n;
 
 	evlist->needs_map_propagation = true;
 
-	perf_evlist__for_each_evsel(evlist, evsel)
+	list_for_each_entry_safe(evsel, n, &evlist->entries, node)
 		__perf_evlist__propagate_maps(evlist, evsel);
 }
 
-- 
cgit v1.2.3


From d92906fd1b940681b4509f7bb8ae737789fb4695 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Fri, 6 Dec 2024 15:08:40 +0100
Subject: selftests: netfilter: Stabilize rpath.sh

On some systems, neighbor discoveries from ns1 for fec0:42::1 (i.e., the
martian trap address) would happen at the wrong time and cause
false-negative test result.

Problem analysis also discovered that IPv6 martian ping test was broken
in that sent neighbor discoveries, not echo requests were inadvertently
trapped

Avoid the race condition by introducing the neighbors to each other
upfront. Also pin down the firewall rules to matching on echo requests
only.

Fixes: efb056e5f1f0 ("netfilter: ip6t_rpfilter: Fix regression with VRF interfaces")
Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/net/netfilter/rpath.sh | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh
index 4485fd7675ed..86ec4e68594d 100755
--- a/tools/testing/selftests/net/netfilter/rpath.sh
+++ b/tools/testing/selftests/net/netfilter/rpath.sh
@@ -61,9 +61,20 @@ ip -net "$ns2" a a 192.168.42.1/24 dev d0
 ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad
 ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
 
+# avoid neighbor lookups and enable martian IPv6 pings
+ns2_hwaddr=$(ip -net "$ns2" link show dev v0 | \
+	     sed -n 's, *link/ether \([^ ]*\) .*,\1,p')
+ns1_hwaddr=$(ip -net "$ns1" link show dev v0 | \
+	     sed -n 's, *link/ether \([^ ]*\) .*,\1,p')
+ip -net "$ns1" neigh add fec0:42::1 lladdr "$ns2_hwaddr" nud permanent dev v0
+ip -net "$ns1" neigh add fec0:23::1 lladdr "$ns2_hwaddr" nud permanent dev v0
+ip -net "$ns2" neigh add fec0:42::2 lladdr "$ns1_hwaddr" nud permanent dev d0
+ip -net "$ns2" neigh add fec0:23::2 lladdr "$ns1_hwaddr" nud permanent dev v0
+
 # firewall matches to test
 [ -n "$iptables" ] && {
 	common='-t raw -A PREROUTING -s 192.168.0.0/16'
+	common+=' -p icmp --icmp-type echo-request'
 	if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then
 		echo "Cannot add rpfilter rule"
 		exit $ksft_skip
@@ -72,6 +83,7 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
 }
 [ -n "$ip6tables" ] && {
 	common='-t raw -A PREROUTING -s fec0::/16'
+	common+=' -p icmpv6 --icmpv6-type echo-request'
 	if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then
 		echo "Cannot add rpfilter rule"
 		exit $ksft_skip
@@ -82,8 +94,10 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
 table inet t {
 	chain c {
 		type filter hook prerouting priority raw;
-		ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter
-		ip6 saddr fec0::/16 fib saddr . iif oif exists counter
+		ip saddr 192.168.0.0/16 icmp type echo-request \
+			fib saddr . iif oif exists counter
+		ip6 saddr fec0::/16 icmpv6 type echo-request \
+			fib saddr . iif oif exists counter
 	}
 }
 EOF
-- 
cgit v1.2.3


From 434fffa926b10706f2bde2db22979d68463302fc Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@linaro.org>
Date: Wed, 11 Dec 2024 08:55:23 +0000
Subject: perf probe: Fix uninitialized variable

Since the linked fixes: commit, err is returned uninitialized due to the
removal of "return 0". Initialize err to fix it.

This fixes the following intermittent test failure on release builds:

 $ perf test "testsuite_probe"
 ...
 -- [ FAIL ] -- perf_probe :: test_invalid_options :: mutually exclusive options :: -L foo -V bar (output regexp parsing)
 Regexp not found: \"Error: switch .+ cannot be used with switch .+\"
 ...

Fixes: 080e47b2a237 ("perf probe: Introduce quotation marks support")
Tested-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20241211085525.519458-2-james.clark@linaro.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/probe-event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 6d51a4c98ad7..eaa0318e9b87 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1370,7 +1370,7 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
 {
 	char *buf = strdup(arg);
 	char *p;
-	int err;
+	int err = 0;
 
 	if (!buf)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 04789af756a4a43e72986185f66f148e65b32fed Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Wed, 11 Dec 2024 23:07:11 -0800
Subject: selftests/bpf: extend changes_pkt_data with cases w/o subprograms

Extend changes_pkt_data tests with test cases freplacing the main
program that does not have subprograms. Try four combinations when
both main program and replacement do and do not change packet data.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20241212070711.427443-2-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/changes_pkt_data.c    | 55 +++++++++++++++++-----
 .../testing/selftests/bpf/progs/changes_pkt_data.c | 27 ++++++++---
 .../bpf/progs/changes_pkt_data_freplace.c          |  6 +--
 3 files changed, 66 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c
index c0c7202f6c5c..7526de379081 100644
--- a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c
@@ -10,10 +10,14 @@ static void print_verifier_log(const char *log)
 		fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log);
 }
 
-static void test_aux(const char *main_prog_name, const char *freplace_prog_name, bool expect_load)
+static void test_aux(const char *main_prog_name,
+		     const char *to_be_replaced,
+		     const char *replacement,
+		     bool expect_load)
 {
 	struct changes_pkt_data_freplace *freplace = NULL;
 	struct bpf_program *freplace_prog = NULL;
+	struct bpf_program *main_prog = NULL;
 	LIBBPF_OPTS(bpf_object_open_opts, opts);
 	struct changes_pkt_data *main = NULL;
 	char log[16*1024];
@@ -26,6 +30,10 @@ static void test_aux(const char *main_prog_name, const char *freplace_prog_name,
 	main = changes_pkt_data__open_opts(&opts);
 	if (!ASSERT_OK_PTR(main, "changes_pkt_data__open"))
 		goto out;
+	main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name);
+	if (!ASSERT_OK_PTR(main_prog, "main_prog"))
+		goto out;
+	bpf_program__set_autoload(main_prog, true);
 	err = changes_pkt_data__load(main);
 	print_verifier_log(log);
 	if (!ASSERT_OK(err, "changes_pkt_data__load"))
@@ -33,14 +41,14 @@ static void test_aux(const char *main_prog_name, const char *freplace_prog_name,
 	freplace = changes_pkt_data_freplace__open_opts(&opts);
 	if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open"))
 		goto out;
-	freplace_prog = bpf_object__find_program_by_name(freplace->obj, freplace_prog_name);
+	freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement);
 	if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog"))
 		goto out;
 	bpf_program__set_autoload(freplace_prog, true);
 	bpf_program__set_autoattach(freplace_prog, true);
 	bpf_program__set_attach_target(freplace_prog,
-				       bpf_program__fd(main->progs.dummy),
-				       main_prog_name);
+				       bpf_program__fd(main_prog),
+				       to_be_replaced);
 	err = changes_pkt_data_freplace__load(freplace);
 	print_verifier_log(log);
 	if (expect_load) {
@@ -62,15 +70,38 @@ out:
  * that either do or do not. It is only ok to freplace subprograms
  * that do not change packet data with those that do not as well.
  * The below tests check outcomes for each combination of such freplace.
+ * Also test a case when main subprogram itself is replaced and is a single
+ * subprogram in a program.
  */
 void test_changes_pkt_data_freplace(void)
 {
-	if (test__start_subtest("changes_with_changes"))
-		test_aux("changes_pkt_data", "changes_pkt_data", true);
-	if (test__start_subtest("changes_with_doesnt_change"))
-		test_aux("changes_pkt_data", "does_not_change_pkt_data", true);
-	if (test__start_subtest("doesnt_change_with_changes"))
-		test_aux("does_not_change_pkt_data", "changes_pkt_data", false);
-	if (test__start_subtest("doesnt_change_with_doesnt_change"))
-		test_aux("does_not_change_pkt_data", "does_not_change_pkt_data", true);
+	struct {
+		const char *main;
+		const char *to_be_replaced;
+		bool changes;
+	} mains[] = {
+		{ "main_with_subprogs",   "changes_pkt_data",         true },
+		{ "main_with_subprogs",   "does_not_change_pkt_data", false },
+		{ "main_changes",         "main_changes",             true },
+		{ "main_does_not_change", "main_does_not_change",     false },
+	};
+	struct {
+		const char *func;
+		bool changes;
+	} replacements[] = {
+		{ "changes_pkt_data",         true },
+		{ "does_not_change_pkt_data", false }
+	};
+	char buf[64];
+
+	for (int i = 0; i < ARRAY_SIZE(mains); ++i) {
+		for (int j = 0; j < ARRAY_SIZE(replacements); ++j) {
+			snprintf(buf, sizeof(buf), "%s_with_%s",
+				 mains[i].to_be_replaced, replacements[j].func);
+			if (!test__start_subtest(buf))
+				continue;
+			test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func,
+				 mains[i].changes || !replacements[j].changes);
+		}
+	}
 }
diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c
index f87da8e9d6b3..43cada48b28a 100644
--- a/tools/testing/selftests/bpf/progs/changes_pkt_data.c
+++ b/tools/testing/selftests/bpf/progs/changes_pkt_data.c
@@ -4,22 +4,35 @@
 #include <bpf/bpf_helpers.h>
 
 __noinline
-long changes_pkt_data(struct __sk_buff *sk, __u32 len)
+long changes_pkt_data(struct __sk_buff *sk)
 {
-	return bpf_skb_pull_data(sk, len);
+	return bpf_skb_pull_data(sk, 0);
 }
 
 __noinline __weak
-long does_not_change_pkt_data(struct __sk_buff *sk, __u32 len)
+long does_not_change_pkt_data(struct __sk_buff *sk)
 {
 	return 0;
 }
 
-SEC("tc")
-int dummy(struct __sk_buff *sk)
+SEC("?tc")
+int main_with_subprogs(struct __sk_buff *sk)
+{
+	changes_pkt_data(sk);
+	does_not_change_pkt_data(sk);
+	return 0;
+}
+
+SEC("?tc")
+int main_changes(struct __sk_buff *sk)
+{
+	bpf_skb_pull_data(sk, 0);
+	return 0;
+}
+
+SEC("?tc")
+int main_does_not_change(struct __sk_buff *sk)
 {
-	changes_pkt_data(sk, 0);
-	does_not_change_pkt_data(sk, 0);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c
index 0e525beb8603..f9a622705f1b 100644
--- a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c
+++ b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c
@@ -4,13 +4,13 @@
 #include <bpf/bpf_helpers.h>
 
 SEC("?freplace")
-long changes_pkt_data(struct __sk_buff *sk, __u32 len)
+long changes_pkt_data(struct __sk_buff *sk)
 {
-	return bpf_skb_pull_data(sk, len);
+	return bpf_skb_pull_data(sk, 0);
 }
 
 SEC("?freplace")
-long does_not_change_pkt_data(struct __sk_buff *sk, __u32 len)
+long does_not_change_pkt_data(struct __sk_buff *sk)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 659b9ba7cb2d7adb64618b87ddfaa528a143766e Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 12 Dec 2024 01:20:49 -0800
Subject: bpf: Check size for BTF-based ctx access of pointer members

Robert Morris reported the following program type which passes the
verifier in [0]:

SEC("struct_ops/bpf_cubic_init")
void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
	asm volatile("r2 = *(u16*)(r1 + 0)");     // verifier should demand u64
	asm volatile("*(u32 *)(r2 +1504) = 0");   // 1280 in some configs
}

The second line may or may not work, but the first instruction shouldn't
pass, as it's a narrow load into the context structure of the struct ops
callback. The code falls back to btf_ctx_access to ensure correctness
and obtaining the types of pointers. Ensure that the size of the access
is correctly checked to be 8 bytes, otherwise the verifier thinks the
narrow load obtained a trusted BTF pointer and will permit loads/stores
as it sees fit.

Perform the check on size after we've verified that the load is for a
pointer field, as for scalar values narrow loads are fine. Access to
structs passed as arguments to a BPF program are also treated as
scalars, therefore no adjustment is needed in their case.

Existing verifier selftests are broken by this change, but because they
were incorrect. Verifier tests for d_path were performing narrow load
into context to obtain path pointer, had this program actually run it
would cause a crash. The same holds for verifier_btf_ctx_access tests.

  [0]: https://lore.kernel.org/bpf/51338.1732985814@localhost

Fixes: 9e15db66136a ("bpf: Implement accurate raw_tp context access via BTF")
Reported-by: Robert Morris <rtm@mit.edu>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20241212092050.3204165-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c | 4 ++--
 tools/testing/selftests/bpf/progs/verifier_d_path.c         | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
index a570e48b917a..bfc3bf18fed4 100644
--- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
@@ -11,7 +11,7 @@ __success __retval(0)
 __naked void btf_ctx_access_accept(void)
 {
 	asm volatile ("					\
-	r2 = *(u32*)(r1 + 8);		/* load 2nd argument value (int pointer) */\
+	r2 = *(u64 *)(r1 + 8);		/* load 2nd argument value (int pointer) */\
 	r0 = 0;						\
 	exit;						\
 "	::: __clobber_all);
@@ -23,7 +23,7 @@ __success __retval(0)
 __naked void ctx_access_u32_pointer_accept(void)
 {
 	asm volatile ("					\
-	r2 = *(u32*)(r1 + 0);		/* load 1nd argument value (u32 pointer) */\
+	r2 = *(u64 *)(r1 + 0);		/* load 1nd argument value (u32 pointer) */\
 	r0 = 0;						\
 	exit;						\
 "	::: __clobber_all);
diff --git a/tools/testing/selftests/bpf/progs/verifier_d_path.c b/tools/testing/selftests/bpf/progs/verifier_d_path.c
index ec79cbcfde91..87e51a215558 100644
--- a/tools/testing/selftests/bpf/progs/verifier_d_path.c
+++ b/tools/testing/selftests/bpf/progs/verifier_d_path.c
@@ -11,7 +11,7 @@ __success __retval(0)
 __naked void d_path_accept(void)
 {
 	asm volatile ("					\
-	r1 = *(u32*)(r1 + 0);				\
+	r1 = *(u64 *)(r1 + 0);				\
 	r2 = r10;					\
 	r2 += -8;					\
 	r6 = 0;						\
@@ -31,7 +31,7 @@ __failure __msg("helper call is not allowed in probe")
 __naked void d_path_reject(void)
 {
 	asm volatile ("					\
-	r1 = *(u32*)(r1 + 0);				\
+	r1 = *(u64 *)(r1 + 0);				\
 	r2 = r10;					\
 	r2 += -8;					\
 	r6 = 0;						\
-- 
cgit v1.2.3


From 8025731c28beb4700dc801a1ca4504d1f78bac27 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 12 Dec 2024 01:20:50 -0800
Subject: selftests/bpf: Add test for narrow ctx load for pointer args

Ensure that performing narrow ctx loads other than size == 8 are
rejected when the argument is a pointer type.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20241212092050.3204165-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/progs/verifier_btf_ctx_access.c  | 36 ++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
index bfc3bf18fed4..28b939572cda 100644
--- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
@@ -29,4 +29,40 @@ __naked void ctx_access_u32_pointer_accept(void)
 "	::: __clobber_all);
 }
 
+SEC("fentry/bpf_fentry_test9")
+__description("btf_ctx_access u32 pointer reject u32")
+__failure __msg("size 4 must be 8")
+__naked void ctx_access_u32_pointer_reject_32(void)
+{
+	asm volatile ("					\
+	r2 = *(u32 *)(r1 + 0);		/* load 1st argument with narrow load */\
+	r0 = 0;						\
+	exit;						\
+"	::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("btf_ctx_access u32 pointer reject u16")
+__failure __msg("size 2 must be 8")
+__naked void ctx_access_u32_pointer_reject_16(void)
+{
+	asm volatile ("					\
+	r2 = *(u16 *)(r1 + 0);		/* load 1st argument with narrow load */\
+	r0 = 0;						\
+	exit;						\
+"	::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("btf_ctx_access u32 pointer reject u8")
+__failure __msg("size 1 must be 8")
+__naked void ctx_access_u32_pointer_reject_8(void)
+{
+	asm volatile ("					\
+	r2 = *(u8 *)(r1 + 0);		/* load 1st argument with narrow load */\
+	r0 = 0;						\
+	exit;						\
+"	::: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From dda014ba59331dee4f3b773a020e109932f4bd24 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 29 Nov 2024 15:47:49 +0100
Subject: objtool/x86: allow syscall instruction

The syscall instruction is used in Xen PV mode for doing hypercalls.
Allow syscall to be used in the kernel in case it is tagged with an
unwind hint for objtool.

This is part of XSA-466 / CVE-2024-53241.

Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Co-developed-by: Peter Zijlstra <peterz@infradead.org>
---
 tools/objtool/check.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 4ce176ad411f..76060da755b5 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3820,9 +3820,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 			break;
 
 		case INSN_CONTEXT_SWITCH:
-			if (func && (!next_insn || !next_insn->hint)) {
-				WARN_INSN(insn, "unsupported instruction in callable function");
-				return 1;
+			if (func) {
+				if (!next_insn || !next_insn->hint) {
+					WARN_INSN(insn, "unsupported instruction in callable function");
+					return 1;
+				}
+				break;
 			}
 			return 0;
 
-- 
cgit v1.2.3


From ce03573a1917532da06057da9f8e74a2ee9e2ac9 Mon Sep 17 00:00:00 2001
From: Weizhao Ouyang <o451686892@gmail.com>
Date: Wed, 11 Dec 2024 19:16:39 +0800
Subject: kselftest/arm64: abi: fix SVCR detection

When using svcr_in to check ZA and Streaming Mode, we should make sure
that the value in x2 is correct, otherwise it may trigger an Illegal
instruction if FEAT_SVE and !FEAT_SME.

Fixes: 43e3f85523e4 ("kselftest/arm64: Add SME support to syscall ABI test")
Signed-off-by: Weizhao Ouyang <o451686892@gmail.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20241211111639.12344-1-o451686892@gmail.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../testing/selftests/arm64/abi/syscall-abi-asm.S  | 32 ++++++++++------------
 1 file changed, 15 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
index df3230fdac39..66ab2e0bae5f 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
+++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
@@ -81,32 +81,31 @@ do_syscall:
 	stp	x27, x28, [sp, #96]
 
 	// Set SVCR if we're doing SME
-	cbz	x1, 1f
+	cbz	x1, load_gpr
 	adrp	x2, svcr_in
 	ldr	x2, [x2, :lo12:svcr_in]
 	msr	S3_3_C4_C2_2, x2
-1:
 
 	// Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR
-	tbz	x2, #SVCR_ZA_SHIFT, 1f
+	tbz	x2, #SVCR_ZA_SHIFT, load_gpr
 	mov	w12, #0
 	ldr	x2, =za_in
-2:	_ldr_za 12, 2
+1:	_ldr_za 12, 2
 	add	x2, x2, x1
 	add	x12, x12, #1
 	cmp	x1, x12
-	bne	2b
+	bne	1b
 
 	// ZT0
 	mrs	x2, S3_0_C0_C4_5	// ID_AA64SMFR0_EL1
 	ubfx	x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \
 			 #ID_AA64SMFR0_EL1_SMEver_WIDTH
-	cbz	x2, 1f
+	cbz	x2, load_gpr
 	adrp	x2, zt_in
 	add	x2, x2, :lo12:zt_in
 	_ldr_zt 2
-1:
 
+load_gpr:
 	// Load GPRs x8-x28, and save our SP/FP for later comparison
 	ldr	x2, =gpr_in
 	add	x2, x2, #64
@@ -125,9 +124,9 @@ do_syscall:
 	str	x30, [x2], #8		// LR
 
 	// Load FPRs if we're not doing neither SVE nor streaming SVE
-	cbnz	x0, 1f
+	cbnz	x0, check_sve_in
 	ldr	x2, =svcr_in
-	tbnz	x2, #SVCR_SM_SHIFT, 1f
+	tbnz	x2, #SVCR_SM_SHIFT, check_sve_in
 
 	ldr	x2, =fpr_in
 	ldp	q0, q1, [x2]
@@ -148,8 +147,8 @@ do_syscall:
 	ldp	q30, q31, [x2, #16 * 30]
 
 	b	2f
-1:
 
+check_sve_in:
 	// Load the SVE registers if we're doing SVE/SME
 
 	ldr	x2, =z_in
@@ -256,32 +255,31 @@ do_syscall:
 	stp	q30, q31, [x2, #16 * 30]
 
 	// Save SVCR if we're doing SME
-	cbz	x1, 1f
+	cbz	x1, check_sve_out
 	mrs	x2, S3_3_C4_C2_2
 	adrp	x3, svcr_out
 	str	x2, [x3, :lo12:svcr_out]
-1:
 
 	// Save ZA if it's enabled - uses x12 as scratch due to SME STR
-	tbz	x2, #SVCR_ZA_SHIFT, 1f
+	tbz	x2, #SVCR_ZA_SHIFT, check_sve_out
 	mov	w12, #0
 	ldr	x2, =za_out
-2:	_str_za 12, 2
+1:	_str_za 12, 2
 	add	x2, x2, x1
 	add	x12, x12, #1
 	cmp	x1, x12
-	bne	2b
+	bne	1b
 
 	// ZT0
 	mrs	x2, S3_0_C0_C4_5	// ID_AA64SMFR0_EL1
 	ubfx	x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \
 			#ID_AA64SMFR0_EL1_SMEver_WIDTH
-	cbz	x2, 1f
+	cbz	x2, check_sve_out
 	adrp	x2, zt_out
 	add	x2, x2, :lo12:zt_out
 	_str_zt 2
-1:
 
+check_sve_out:
 	// Save the SVE state if we have some
 	cbz	x0, 1f
 
-- 
cgit v1.2.3


From c00d738e1673ab801e1577e4e3c780ccf88b1a5b Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 13 Dec 2024 14:19:27 -0800
Subject: bpf: Revert "bpf: Mark raw_tp arguments with PTR_MAYBE_NULL"

This patch reverts commit
cb4158ce8ec8 ("bpf: Mark raw_tp arguments with PTR_MAYBE_NULL"). The
patch was well-intended and meant to be as a stop-gap fixing branch
prediction when the pointer may actually be NULL at runtime. Eventually,
it was supposed to be replaced by an automated script or compiler pass
detecting possibly NULL arguments and marking them accordingly.

However, it caused two main issues observed for production programs and
failed to preserve backwards compatibility. First, programs relied on
the verifier not exploring == NULL branch when pointer is not NULL, thus
they started failing with a 'dereference of scalar' error.  Next,
allowing raw_tp arguments to be modified surfaced the warning in the
verifier that warns against reg->off when PTR_MAYBE_NULL is set.

More information, context, and discusson on both problems is available
in [0]. Overall, this approach had several shortcomings, and the fixes
would further complicate the verifier's logic, and the entire masking
scheme would have to be removed eventually anyway.

Hence, revert the patch in preparation of a better fix avoiding these
issues to replace this commit.

  [0]: https://lore.kernel.org/bpf/20241206161053.809580-1-memxor@gmail.com

Reported-by: Manu Bretelle <chantra@meta.com>
Fixes: cb4158ce8ec8 ("bpf: Mark raw_tp arguments with PTR_MAYBE_NULL")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20241213221929.3495062-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
index 5aaf2b065f86..bba3e37f749b 100644
--- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
+++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
@@ -7,11 +7,7 @@
 #include "bpf_misc.h"
 
 SEC("tp_btf/bpf_testmod_test_nullable_bare")
-/* This used to be a failure test, but raw_tp nullable arguments can now
- * directly be dereferenced, whether they have nullable annotation or not,
- * and don't need to be explicitly checked.
- */
-__success
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
 int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx)
 {
 	return nullable_ctx->len;
-- 
cgit v1.2.3


From 838a10bd2ebfe11a60dd67687533a7cfc220cc86 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 13 Dec 2024 14:19:28 -0800
Subject: bpf: Augment raw_tp arguments with PTR_MAYBE_NULL

Arguments to a raw tracepoint are tagged as trusted, which carries the
semantics that the pointer will be non-NULL.  However, in certain cases,
a raw tracepoint argument may end up being NULL. More context about this
issue is available in [0].

Thus, there is a discrepancy between the reality, that raw_tp arguments can
actually be NULL, and the verifier's knowledge, that they are never NULL,
causing explicit NULL check branch to be dead code eliminated.

A previous attempt [1], i.e. the second fixed commit, was made to
simulate symbolic execution as if in most accesses, the argument is a
non-NULL raw_tp, except for conditional jumps.  This tried to suppress
branch prediction while preserving compatibility, but surfaced issues
with production programs that were difficult to solve without increasing
verifier complexity. A more complete discussion of issues and fixes is
available at [2].

Fix this by maintaining an explicit list of tracepoints where the
arguments are known to be NULL, and mark the positional arguments as
PTR_MAYBE_NULL. Additionally, capture the tracepoints where arguments
are known to be ERR_PTR, and mark these arguments as scalar values to
prevent potential dereference.

Each hex digit is used to encode NULL-ness (0x1) or ERR_PTR-ness (0x2),
shifted by the zero-indexed argument number x 4. This can be represented
as follows:
1st arg: 0x1
2nd arg: 0x10
3rd arg: 0x100
... and so on (likewise for ERR_PTR case).

In the future, an automated pass will be used to produce such a list, or
insert __nullable annotations automatically for tracepoints. Each
compilation unit will be analyzed and results will be collated to find
whether a tracepoint pointer is definitely not null, maybe null, or an
unknown state where verifier conservatively marks it PTR_MAYBE_NULL.
A proof of concept of this tool from Eduard is available at [3].

Note that in case we don't find a specification in the raw_tp_null_args
array and the tracepoint belongs to a kernel module, we will
conservatively mark the arguments as PTR_MAYBE_NULL. This is because
unlike for in-tree modules, out-of-tree module tracepoints may pass NULL
freely to the tracepoint. We don't protect against such tracepoints
passing ERR_PTR (which is uncommon anyway), lest we mark all such
arguments as SCALAR_VALUE.

While we are it, let's adjust the test raw_tp_null to not perform
dereference of the skb->mark, as that won't be allowed anymore, and make
it more robust by using inline assembly to test the dead code
elimination behavior, which should still stay the same.

  [0]: https://lore.kernel.org/bpf/ZrCZS6nisraEqehw@jlelli-thinkpadt14gen4.remote.csb
  [1]: https://lore.kernel.org/all/20241104171959.2938862-1-memxor@gmail.com
  [2]: https://lore.kernel.org/bpf/20241206161053.809580-1-memxor@gmail.com
  [3]: https://github.com/eddyz87/llvm-project/tree/nullness-for-tracepoint-params

Reported-by: Juri Lelli <juri.lelli@redhat.com> # original bug
Reported-by: Manu Bretelle <chantra@meta.com> # bugs in masking fix
Fixes: 3f00c5239344 ("bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs")
Fixes: cb4158ce8ec8 ("bpf: Mark raw_tp arguments with PTR_MAYBE_NULL")
Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
Co-developed-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20241213221929.3495062-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/raw_tp_null.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c
index 457f34c151e3..5927054b6dd9 100644
--- a/tools/testing/selftests/bpf/progs/raw_tp_null.c
+++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c
@@ -3,6 +3,7 @@
 
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -17,16 +18,14 @@ int BPF_PROG(test_raw_tp_null, struct sk_buff *skb)
 	if (task->pid != tid)
 		return 0;
 
-	i = i + skb->mark + 1;
-	/* The compiler may move the NULL check before this deref, which causes
-	 * the load to fail as deref of scalar. Prevent that by using a barrier.
+	/* If dead code elimination kicks in, the increment +=2 will be
+	 * removed. For raw_tp programs attaching to tracepoints in kernel
+	 * modules, we mark input arguments as PTR_MAYBE_NULL, so branch
+	 * prediction should never kick in.
 	 */
-	barrier();
-	/* If dead code elimination kicks in, the increment below will
-	 * be removed. For raw_tp programs, we mark input arguments as
-	 * PTR_MAYBE_NULL, so branch prediction should never kick in.
-	 */
-	if (!skb)
-		i += 2;
+	asm volatile ("%[i] += 1; if %[ctx] != 0 goto +1; %[i] += 2;"
+			: [i]"+r"(i)
+			: [ctx]"r"(skb)
+			: "memory");
 	return 0;
 }
-- 
cgit v1.2.3


From 0da1955b5bd2af3a1c3d13916df06e34ffa6df3d Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 13 Dec 2024 14:19:29 -0800
Subject: selftests/bpf: Add tests for raw_tp NULL args

Add tests to ensure that arguments are correctly marked based on their
specified positions, and whether they get marked correctly as maybe
null. For modules, all tracepoint parameters should be marked
PTR_MAYBE_NULL by default.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20241213221929.3495062-4-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/raw_tp_null.c |  3 +++
 .../testing/selftests/bpf/progs/raw_tp_null_fail.c | 24 ++++++++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/raw_tp_null_fail.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c
index 6fa19449297e..43676a9922dc 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c
@@ -3,11 +3,14 @@
 
 #include <test_progs.h>
 #include "raw_tp_null.skel.h"
+#include "raw_tp_null_fail.skel.h"
 
 void test_raw_tp_null(void)
 {
 	struct raw_tp_null *skel;
 
+	RUN_TESTS(raw_tp_null_fail);
+
 	skel = raw_tp_null__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "raw_tp_null__open_and_load"))
 		return;
diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c
new file mode 100644
index 000000000000..38d669957bf1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Ensure module parameter has PTR_MAYBE_NULL */
+SEC("tp_btf/bpf_testmod_test_raw_tp_null")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
+int test_raw_tp_null_bpf_testmod_test_raw_tp_null_arg_1(void *ctx) {
+    asm volatile("r1 = *(u64 *)(r1 +0); r1 = *(u64 *)(r1 +0);" ::: __clobber_all);
+    return 0;
+}
+
+/* Check NULL marking */
+SEC("tp_btf/sched_pi_setprio")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
+int test_raw_tp_null_sched_pi_setprio_arg_2(void *ctx) {
+    asm volatile("r1 = *(u64 *)(r1 +8); r1 = *(u64 *)(r1 +0);" ::: __clobber_all);
+    return 0;
+}
-- 
cgit v1.2.3


From 663ad7481f068057f6f692c5368c47150e855370 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 13 Dec 2024 13:07:11 +0000
Subject: tools/net/ynl: fix sub-message key lookup for nested attributes

Use the correct attribute space for sub-message key lookup in nested
attributes when adding attributes. This fixes rt_link where the "kind"
key and "data" sub-message are nested attributes in "linkinfo".

For example:

./tools/net/ynl/cli.py \
    --create \
    --spec Documentation/netlink/specs/rt_link.yaml \
    --do newlink \
    --json '{"link": 99,
             "linkinfo": { "kind": "vlan", "data": {"id": 4 } }
             }'

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Fixes: ab463c4342d1 ("tools/net/ynl: Add support for encoding sub-messages")
Link: https://patch.msgid.link/20241213130711.40267-1-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/ynl.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 01ec01a90e76..eea29359a899 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -556,10 +556,10 @@ class YnlFamily(SpecFamily):
         if attr["type"] == 'nest':
             nl_type |= Netlink.NLA_F_NESTED
             attr_payload = b''
-            sub_attrs = SpaceAttrs(self.attr_sets[space], value, search_attrs)
+            sub_space = attr['nested-attributes']
+            sub_attrs = SpaceAttrs(self.attr_sets[sub_space], value, search_attrs)
             for subname, subvalue in value.items():
-                attr_payload += self._add_attr(attr['nested-attributes'],
-                                               subname, subvalue, sub_attrs)
+                attr_payload += self._add_attr(sub_space, subname, subvalue, sub_attrs)
         elif attr["type"] == 'flag':
             if not value:
                 # If value is absent or false then skip attribute creation.
-- 
cgit v1.2.3


From 0518863407b8dcc7070fdbc1c015046d66777e78 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 13 Dec 2024 07:22:42 -0800
Subject: selftests: net: support setting recv_size in YNL

recv_size parameter allows constraining the buffer size for dumps.
It's useful in testing kernel handling of dump continuation,
IOW testing dumps which span multiple skbs.

Let the tests set this parameter when initializing the YNL family.
Keep the normal default, we don't want tests to unintentionally
behave very differently than normal code.

Reviewed-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Link: https://patch.msgid.link/20241213152244.3080955-4-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/lib/py/ynl.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index a0d689d58c57..076a7e8dc3eb 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -32,23 +32,23 @@ except ModuleNotFoundError as e:
 # Set schema='' to avoid jsonschema validation, it's slow
 #
 class EthtoolFamily(YnlFamily):
-    def __init__(self):
+    def __init__(self, recv_size=0):
         super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(),
-                         schema='')
+                         schema='', recv_size=recv_size)
 
 
 class RtnlFamily(YnlFamily):
-    def __init__(self):
+    def __init__(self, recv_size=0):
         super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
-                         schema='')
+                         schema='', recv_size=recv_size)
 
 
 class NetdevFamily(YnlFamily):
-    def __init__(self):
+    def __init__(self, recv_size=0):
         super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(),
-                         schema='')
+                         schema='', recv_size=recv_size)
 
 class NetshaperFamily(YnlFamily):
-    def __init__(self):
+    def __init__(self, recv_size=0):
         super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(),
-                         schema='')
+                         schema='', recv_size=recv_size)
-- 
cgit v1.2.3


From 1234810b1649e9d781aeafd4b23fb1fcfbf95d8f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 13 Dec 2024 07:22:43 -0800
Subject: selftests: net-drv: queues: sanity check netlink dumps

This test already catches a netlink bug fixed by this series,
but only when running on HW with many queues. Make sure the
netdevsim instance created has a lot of queues, and constrain
the size of the recv_buffer used by netlink.

While at it test both rx and tx queues.

Reviewed-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Link: https://patch.msgid.link/20241213152244.3080955-5-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/queues.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index 30f29096e27c..9c5473abbd78 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -8,25 +8,28 @@ from lib.py import cmd
 import glob
 
 
-def sys_get_queues(ifname) -> int:
-    folders = glob.glob(f'/sys/class/net/{ifname}/queues/rx-*')
+def sys_get_queues(ifname, qtype='rx') -> int:
+    folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*')
     return len(folders)
 
 
-def nl_get_queues(cfg, nl):
+def nl_get_queues(cfg, nl, qtype='rx'):
     queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
     if queues:
-        return len([q for q in queues if q['type'] == 'rx'])
+        return len([q for q in queues if q['type'] == qtype])
     return None
 
 
 def get_queues(cfg, nl) -> None:
-    queues = nl_get_queues(cfg, nl)
-    if not queues:
-        raise KsftSkipEx('queue-get not supported by device')
+    snl = NetdevFamily(recv_size=4096)
 
-    expected = sys_get_queues(cfg.dev['ifname'])
-    ksft_eq(queues, expected)
+    for qtype in ['rx', 'tx']:
+        queues = nl_get_queues(cfg, snl, qtype)
+        if not queues:
+            raise KsftSkipEx('queue-get not supported by device')
+
+        expected = sys_get_queues(cfg.dev['ifname'], qtype)
+        ksft_eq(queues, expected)
 
 
 def addremove_queues(cfg, nl) -> None:
@@ -57,7 +60,7 @@ def addremove_queues(cfg, nl) -> None:
 
 
 def main() -> None:
-    with NetDrvEnv(__file__, queue_count=3) as cfg:
+    with NetDrvEnv(__file__, queue_count=100) as cfg:
         ksft_run([get_queues, addremove_queues], args=(cfg, NetdevFamily()))
     ksft_exit()
 
-- 
cgit v1.2.3


From 5712e323d4c3ad03bba4d28f83e80593171ac3f1 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 13 Dec 2024 07:22:44 -0800
Subject: selftests: net-drv: stats: sanity check netlink dumps

Sanity check netlink dumps, to make sure dumps don't have
repeated entries or gaps in IDs.

Reviewed-by: Petr Machata <petrm@nvidia.com>
Link: https://patch.msgid.link/20241213152244.3080955-6-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/stats.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index 63e3c045a3b2..031ac9def6c0 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -110,6 +110,23 @@ def qstat_by_ifindex(cfg) -> None:
             ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key)
             ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key)
 
+    # Sanity check the dumps
+    queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True)
+    # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}}
+    parsed = {}
+    for entry in queues:
+        ifindex = entry["ifindex"]
+        if ifindex not in parsed:
+            parsed[ifindex] = {"rx":[], "tx": []}
+        parsed[ifindex][entry["queue-type"]].append(entry['queue-id'])
+    # Now, validate
+    for ifindex, queues in parsed.items():
+        for qtype in ['rx', 'tx']:
+            ksft_eq(len(queues[qtype]), len(set(queues[qtype])),
+                    comment="repeated queue keys")
+            ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1,
+                    comment="missing queue keys")
+
     # Test invalid dumps
     # 0 is invalid
     with ksft_raises(NlError) as cm:
@@ -158,7 +175,7 @@ def check_down(cfg) -> None:
 
 
 def main() -> None:
-    with NetDrvEnv(__file__) as cfg:
+    with NetDrvEnv(__file__, queue_count=100) as cfg:
         ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
                   check_down],
                  args=(cfg, ))
-- 
cgit v1.2.3


From 6a75f19af16ff482cfd6085c77123aa0f464f8dd Mon Sep 17 00:00:00 2001
From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
Date: Thu, 5 Dec 2024 11:29:41 -0800
Subject: selftests/memfd: run sysctl tests when PID namespace support is
 enabled

The sysctl tests for vm.memfd_noexec rely on the kernel to support PID
namespaces (i.e.  the kernel is built with CONFIG_PID_NS=y).  If the
kernel the test runs on does not support PID namespaces, the first sysctl
test will fail when attempting to spawn a new thread in a new PID
namespace, abort the test, preventing the remaining tests from being run.

This is not desirable, as not all kernels need PID namespaces, but can
still use the other features provided by memfd.  Therefore, only run the
sysctl tests if the kernel supports PID namespaces.  Otherwise, skip those
tests and emit an informative message to let the user know why the sysctl
tests are not being run.

Link: https://lkml.kernel.org/r/20241205192943.3228757-1-isaacmanjarres@google.com
Fixes: 11f75a01448f ("selftests/memfd: add tests for MFD_NOEXEC_SEAL MFD_EXEC")
Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Reviewed-by: Jeff Xu <jeffxu@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: <stable@vger.kernel.org>	[6.6+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/memfd/memfd_test.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 95af2d78fd31..0a0b55516028 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -9,6 +9,7 @@
 #include <fcntl.h>
 #include <linux/memfd.h>
 #include <sched.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <signal.h>
@@ -1557,6 +1558,11 @@ static void test_share_fork(char *banner, char *b_suffix)
 	close(fd);
 }
 
+static bool pid_ns_supported(void)
+{
+	return access("/proc/self/ns/pid", F_OK) == 0;
+}
+
 int main(int argc, char **argv)
 {
 	pid_t pid;
@@ -1591,8 +1597,12 @@ int main(int argc, char **argv)
 	test_seal_grow();
 	test_seal_resize();
 
-	test_sysctl_simple();
-	test_sysctl_nested();
+	if (pid_ns_supported()) {
+		test_sysctl_simple();
+		test_sysctl_nested();
+	} else {
+		printf("PID namespaces are not supported; skipping sysctl tests\n");
+	}
 
 	test_share_dup("SHARE-DUP", "");
 	test_share_mmap("SHARE-MMAP", "");
-- 
cgit v1.2.3


From a17975992cc11588767175247ccaae1213a8b582 Mon Sep 17 00:00:00 2001
From: Adrian Moreno <amorenoz@redhat.com>
Date: Tue, 17 Dec 2024 22:16:51 +0100
Subject: selftests: openvswitch: fix tcpdump execution

Fix the way tcpdump is executed by:
- Using the right variable for the namespace. Currently the use of the
  empty "ns" makes the command fail.
- Waiting until it starts to capture to ensure the interesting traffic
  is caught on slow systems.
- Using line-buffered output to ensure logs are available when the test
  is paused with "-p". Otherwise the last chunk of data might only be
  written when tcpdump is killed.

Fixes: 74cc26f416b9 ("selftests: openvswitch: add interface support")
Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Link: https://patch.msgid.link/20241217211652.483016-1-amorenoz@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/openvswitch/openvswitch.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index cc0bfae2bafa..960e1ab4dd04 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -171,8 +171,10 @@ ovs_add_netns_and_veths () {
 		ovs_add_if "$1" "$2" "$4" -u || return 1
 	fi
 
-	[ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \
-			tcpdump -i any -s 65535
+	if [ $TRACING -eq 1 ]; then
+		ovs_netns_spawn_daemon "$1" "$3" tcpdump -l -i any -s 6553
+		ovs_wait grep -q "listening on any" ${ovs_dir}/stderr
+	fi
 
 	return 0
 }
-- 
cgit v1.2.3


From 29d44cce324dab2bd86c447071a596262e7109b6 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Thu, 19 Dec 2024 19:15:06 +0800
Subject: selftests/bpf: Use asm constraint "m" for LoongArch

Currently, LoongArch LLVM does not support the constraint "o" and no plan
to support it, it only supports the similar constraint "m", so change the
constraints from "nor" in the "else" case to arch-specific "nmr" to avoid
the build error such as "unexpected asm memory constraint" for LoongArch.

Fixes: 630301b0d59d ("selftests/bpf: Add basic USDT selftests")
Suggested-by: Weining Lu <luweining@loongson.cn>
Suggested-by: Li Chen <chenli@loongson.cn>
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Huacai Chen <chenhuacai@loongson.cn>
Cc: stable@vger.kernel.org
Link: https://llvm.org/docs/LangRef.html#supported-constraint-code-list
Link: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp#L172
Link: https://lore.kernel.org/bpf/20241219111506.20643-1-yangtiezhu@loongson.cn
---
 tools/testing/selftests/bpf/sdt.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
index ca0162b4dc57..1fcfa5160231 100644
--- a/tools/testing/selftests/bpf/sdt.h
+++ b/tools/testing/selftests/bpf/sdt.h
@@ -102,6 +102,8 @@
 # define STAP_SDT_ARG_CONSTRAINT        nZr
 # elif defined __arm__
 # define STAP_SDT_ARG_CONSTRAINT        g
+# elif defined __loongarch__
+# define STAP_SDT_ARG_CONSTRAINT        nmr
 # else
 # define STAP_SDT_ARG_CONSTRAINT        nor
 # endif
-- 
cgit v1.2.3


From 716f2bca1ce93bb95364f1fc0555c1650507b588 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Wed, 18 Dec 2024 18:57:24 +0100
Subject: selftests/bpf: Fix compilation error in get_uprobe_offset()

In get_uprobe_offset(), the call to procmap_query() use the constant
PROCMAP_QUERY_VMA_EXECUTABLE, even if PROCMAP_QUERY is not defined.

Define PROCMAP_QUERY_VMA_EXECUTABLE when PROCMAP_QUERY isn't.

Fixes: 4e9e07603ecd ("selftests/bpf: make use of PROCMAP_QUERY ioctl if available")
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/bpf/20241218175724.578884-1-jmarchan@redhat.com
---
 tools/testing/selftests/bpf/trace_helpers.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 2d742fdac6b9..81943c6254e6 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -293,6 +293,10 @@ static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *st
 	return 0;
 }
 #else
+# ifndef PROCMAP_QUERY_VMA_EXECUTABLE
+#  define PROCMAP_QUERY_VMA_EXECUTABLE 0x04
+# endif
+
 static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 55853cb829dc707427c3519f6b8686682a204368 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Wed, 18 Dec 2024 10:59:31 +0800
Subject: selftests/alsa: Fix circular dependency involving global-timer

The pattern rule `$(OUTPUT)/%: %.c` inadvertently included a circular
dependency on the global-timer target due to its inclusion in
$(TEST_GEN_PROGS_EXTENDED). This resulted in a circular dependency
warning during the build process.

To resolve this, the dependency on $(TEST_GEN_PROGS_EXTENDED) has been
replaced with an explicit dependency on $(OUTPUT)/libatest.so. This change
ensures that libatest.so is built before any other targets that require it,
without creating a circular dependency.

This fix addresses the following warning:

make[4]: Entering directory 'tools/testing/selftests/alsa'
make[4]: Circular default_modconfig/kselftest/alsa/global-timer <- default_modconfig/kselftest/alsa/global-timer dependency dropped.
make[4]: Nothing to be done for 'all'.
make[4]: Leaving directory 'tools/testing/selftests/alsa'

Cc: Mark Brown <broonie@kernel.org>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Link: https://patch.msgid.link/20241218025931.914164-1-lizhijian@fujitsu.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 tools/testing/selftests/alsa/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile
index 944279160fed..8dab90ad22bb 100644
--- a/tools/testing/selftests/alsa/Makefile
+++ b/tools/testing/selftests/alsa/Makefile
@@ -27,5 +27,5 @@ include ../lib.mk
 $(OUTPUT)/libatest.so: conf.c alsa-local.h
 	$(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@
 
-$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) alsa-local.h
+$(OUTPUT)/%: %.c $(OUTPUT)/libatest.so alsa-local.h
 	$(CC) $(CFLAGS) $< $(LDLIBS) -latest -o $@
-- 
cgit v1.2.3


From 724c6ce38bbaeb4b3f109b0e066d6c0ecd15446c Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Thu, 19 Dec 2024 14:57:34 +0100
Subject: stddef: make __struct_group() UAPI C++-friendly

For the most part of the C++ history, it couldn't have type
declarations inside anonymous unions for different reasons. At the
same time, __struct_group() relies on the latters, so when the @TAG
argument is not empty, C++ code doesn't want to build (even under
`extern "C"`):

../linux/include/uapi/linux/pkt_cls.h:25:24: error:
'struct tc_u32_sel::<unnamed union>::tc_u32_sel_hdr,' invalid;
an anonymous union may only have public non-static data members
[-fpermissive]

The safest way to fix this without trying to switch standards (which
is impossible in UAPI anyway) etc., is to disable tag declaration
for that language. This won't break anything since for now it's not
buildable at all.
Use a separate definition for __struct_group() when __cplusplus is
defined to mitigate the error, including the version from tools/.

Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro")
Reported-by: Christopher Ferris <cferris@google.com>
Closes: https://lore.kernel.org/linux-hardening/Z1HZpe3WE5As8UAz@google.com
Suggested-by: Kees Cook <kees@kernel.org> # __struct_group_tag()
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/20241219135734.2130002-1-aleksander.lobakin@intel.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 tools/include/uapi/linux/stddef.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h
index bb6ea517efb5..c53cde425406 100644
--- a/tools/include/uapi/linux/stddef.h
+++ b/tools/include/uapi/linux/stddef.h
@@ -8,6 +8,13 @@
 #define __always_inline __inline__
 #endif
 
+/* Not all C++ standards support type declarations inside an anonymous union */
+#ifndef __cplusplus
+#define __struct_group_tag(TAG)		TAG
+#else
+#define __struct_group_tag(TAG)
+#endif
+
 /**
  * __struct_group() - Create a mirrored named and anonyomous struct
  *
@@ -20,14 +27,14 @@
  * and size: one anonymous and one named. The former's members can be used
  * normally without sub-struct naming, and the latter can be used to
  * reason about the start, end, and size of the group of struct members.
- * The named struct can also be explicitly tagged for layer reuse, as well
- * as both having struct attributes appended.
+ * The named struct can also be explicitly tagged for layer reuse (C only),
+ * as well as both having struct attributes appended.
  */
 #define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
 	union { \
 		struct { MEMBERS } ATTRS; \
-		struct TAG { MEMBERS } ATTRS NAME; \
-	}
+		struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \
+	} ATTRS
 
 /**
  * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
-- 
cgit v1.2.3


From 246068b86b1c36e4590388ab8f278e21f1997dc1 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 19 Dec 2024 17:54:10 +0200
Subject: selftests: net: local_termination: require mausezahn

Since the blamed commit, we require mausezahn because send_raw() uses it.
Remove the "REQUIRE_MZ=no" line, which overwrites the default of requiring it.

Fixes: 237979504264 ("selftests: net: local_termination: add PTP frames to the mix")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20241219155410.1856868-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/local_termination.sh | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index c35548767756..ecd34f364125 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -7,7 +7,6 @@ ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \
 NUM_NETIFS=2
 PING_COUNT=1
 REQUIRE_MTOOLS=yes
-REQUIRE_MZ=no
 
 source lib.sh
 
-- 
cgit v1.2.3


From 30b981796b94b083da8fdded7cb74cb493608760 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 18 Dec 2024 19:28:33 -0800
Subject: selftests: drv-net: test empty queue and NAPI responses in netlink

Make sure kernel doesn't respond to GETs for queues and NAPIs when
link is down. Not with valid data, or with empty message, we want
a ENOENT.

Link: https://patch.msgid.link/20241219032833.1165433-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/queues.py | 28 +++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index 9c5473abbd78..38303da957ee 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -1,10 +1,12 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 
-from lib.py import ksft_run, ksft_exit, ksft_eq, KsftSkipEx
-from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import ksft_disruptive, ksft_exit, ksft_run
+from lib.py import ksft_eq, ksft_raises, KsftSkipEx
+from lib.py import EthtoolFamily, NetdevFamily, NlError
 from lib.py import NetDrvEnv
-from lib.py import cmd
+from lib.py import cmd, defer, ip
+import errno
 import glob
 
 
@@ -59,9 +61,27 @@ def addremove_queues(cfg, nl) -> None:
     ksft_eq(queues, expected)
 
 
+@ksft_disruptive
+def check_down(cfg, nl) -> None:
+    # Check the NAPI IDs before interface goes down and hides them
+    napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+
+    ip(f"link set dev {cfg.dev['ifname']} down")
+    defer(ip, f"link set dev {cfg.dev['ifname']} up")
+
+    with ksft_raises(NlError) as cm:
+        nl.queue_get({'ifindex': cfg.ifindex, 'id': 0, 'type': 'rx'})
+    ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT)
+
+    if napis:
+        with ksft_raises(NlError) as cm:
+            nl.napi_get({'id': napis[0]['id']})
+        ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT)
+
+
 def main() -> None:
     with NetDrvEnv(__file__, queue_count=100) as cfg:
-        ksft_run([get_queues, addremove_queues], args=(cfg, NetdevFamily()))
+        ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily()))
     ksft_exit()
 
 
-- 
cgit v1.2.3


From 9ee0c7b8654346d60c823babe4b3747357a30477 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Thu, 12 Dec 2024 19:40:55 -0800
Subject: selftests/bpf: Add a BPF selftest for bpf_skb_change_tail()

As requested by Daniel, we need to add a selftest to cover
bpf_skb_change_tail() cases in skb_verdict. Here we test trimming,
growing and error cases, and validate its expected return values and the
expected sizes of the payload.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20241213034057.246437-3-xiyou.wangcong@gmail.com
---
 .../selftests/bpf/prog_tests/sockmap_basic.c       | 51 ++++++++++++++++++++++
 .../selftests/bpf/progs/test_sockmap_change_tail.c | 40 +++++++++++++++++
 2 files changed, 91 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 248754296d97..884ad87783d5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -12,6 +12,7 @@
 #include "test_sockmap_progs_query.skel.h"
 #include "test_sockmap_pass_prog.skel.h"
 #include "test_sockmap_drop_prog.skel.h"
+#include "test_sockmap_change_tail.skel.h"
 #include "bpf_iter_sockmap.skel.h"
 
 #include "sockmap_helpers.h"
@@ -643,6 +644,54 @@ out:
 		test_sockmap_drop_prog__destroy(drop);
 }
 
+static void test_sockmap_skb_verdict_change_tail(void)
+{
+	struct test_sockmap_change_tail *skel;
+	int err, map, verdict;
+	int c1, p1, sent, recvd;
+	int zero = 0;
+	char buf[2];
+
+	skel = test_sockmap_change_tail__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+	verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+	map = bpf_map__fd(skel->maps.sock_map_rx);
+
+	err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach"))
+		goto out;
+	err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
+	if (!ASSERT_OK(err, "create_pair()"))
+		goto out;
+	err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
+		goto out_close;
+	sent = xsend(p1, "Tr", 2, 0);
+	ASSERT_EQ(sent, 2, "xsend(p1)");
+	recvd = recv(c1, buf, 2, 0);
+	ASSERT_EQ(recvd, 1, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+	sent = xsend(p1, "G", 1, 0);
+	ASSERT_EQ(sent, 1, "xsend(p1)");
+	recvd = recv(c1, buf, 2, 0);
+	ASSERT_EQ(recvd, 2, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+	sent = xsend(p1, "E", 1, 0);
+	ASSERT_EQ(sent, 1, "xsend(p1)");
+	recvd = recv(c1, buf, 1, 0);
+	ASSERT_EQ(recvd, 1, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
+
+out_close:
+	close(c1);
+	close(p1);
+out:
+	test_sockmap_change_tail__destroy(skel);
+}
+
 static void test_sockmap_skb_verdict_peek_helper(int map)
 {
 	int err, c1, p1, zero = 0, sent, recvd, avail;
@@ -1058,6 +1107,8 @@ void test_sockmap_basic(void)
 		test_sockmap_skb_verdict_fionread(true);
 	if (test__start_subtest("sockmap skb_verdict fionread on drop"))
 		test_sockmap_skb_verdict_fionread(false);
+	if (test__start_subtest("sockmap skb_verdict change tail"))
+		test_sockmap_skb_verdict_change_tail();
 	if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
 		test_sockmap_skb_verdict_peek();
 	if (test__start_subtest("sockmap skb_verdict msg_f_peek with link"))
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
new file mode 100644
index 000000000000..2796dd8545eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 ByteDance */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_map_rx SEC(".maps");
+
+long change_tail_ret = 1;
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+	char *data, *data_end;
+
+	bpf_skb_pull_data(skb, 1);
+	data = (char *)(unsigned long)skb->data;
+	data_end = (char *)(unsigned long)skb->data_end;
+
+	if (data + 1 > data_end)
+		return SK_PASS;
+
+	if (data[0] == 'T') { /* Trim the packet */
+		change_tail_ret = bpf_skb_change_tail(skb, skb->len - 1, 0);
+		return SK_PASS;
+	} else if (data[0] == 'G') { /* Grow the packet */
+		change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0);
+		return SK_PASS;
+	} else if (data[0] == 'E') { /* Error */
+		change_tail_ret = bpf_skb_change_tail(skb, 65535, 0);
+		return SK_PASS;
+	}
+	return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 472759c9f5377912c7483cca5da847888a27cecc Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Thu, 12 Dec 2024 19:40:56 -0800
Subject: selftests/bpf: Introduce socket_helpers.h for TC tests

Pull socket helpers out of sockmap_helpers.h so that they can be reused
for TC tests as well. This prepares for the next patch.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20241213034057.246437-4-xiyou.wangcong@gmail.com
---
 .../selftests/bpf/prog_tests/socket_helpers.h      | 394 +++++++++++++++++++++
 .../selftests/bpf/prog_tests/sockmap_helpers.h     | 385 +-------------------
 2 files changed, 395 insertions(+), 384 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/socket_helpers.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
new file mode 100644
index 000000000000..1bdfb79ef009
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
@@ -0,0 +1,394 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SOCKET_HELPERS__
+#define __SOCKET_HELPERS__
+
+#include <linux/vm_sockets.h>
+
+/* include/linux/net.h */
+#define SOCK_TYPE_MASK 0xf
+
+#define IO_TIMEOUT_SEC 30
+#define MAX_STRERR_LEN 256
+
+/* workaround for older vm_sockets.h */
+#ifndef VMADDR_CID_LOCAL
+#define VMADDR_CID_LOCAL 1
+#endif
+
+/* include/linux/cleanup.h */
+#define __get_and_null(p, nullvalue)                                           \
+	({                                                                     \
+		__auto_type __ptr = &(p);                                      \
+		__auto_type __val = *__ptr;                                    \
+		*__ptr = nullvalue;                                            \
+		__val;                                                         \
+	})
+
+#define take_fd(fd) __get_and_null(fd, -EBADF)
+
+/* Wrappers that fail the test on error and report it. */
+
+#define _FAIL(errnum, fmt...)                                                  \
+	({                                                                     \
+		error_at_line(0, (errnum), __func__, __LINE__, fmt);           \
+		CHECK_FAIL(true);                                              \
+	})
+#define FAIL(fmt...) _FAIL(0, fmt)
+#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
+#define FAIL_LIBBPF(err, msg)                                                  \
+	({                                                                     \
+		char __buf[MAX_STRERR_LEN];                                    \
+		libbpf_strerror((err), __buf, sizeof(__buf));                  \
+		FAIL("%s: %s", (msg), __buf);                                  \
+	})
+
+
+#define xaccept_nonblock(fd, addr, len)                                        \
+	({                                                                     \
+		int __ret =                                                    \
+			accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC);   \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("accept");                                  \
+		__ret;                                                         \
+	})
+
+#define xbind(fd, addr, len)                                                   \
+	({                                                                     \
+		int __ret = bind((fd), (addr), (len));                         \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("bind");                                    \
+		__ret;                                                         \
+	})
+
+#define xclose(fd)                                                             \
+	({                                                                     \
+		int __ret = close((fd));                                       \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("close");                                   \
+		__ret;                                                         \
+	})
+
+#define xconnect(fd, addr, len)                                                \
+	({                                                                     \
+		int __ret = connect((fd), (addr), (len));                      \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("connect");                                 \
+		__ret;                                                         \
+	})
+
+#define xgetsockname(fd, addr, len)                                            \
+	({                                                                     \
+		int __ret = getsockname((fd), (addr), (len));                  \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("getsockname");                             \
+		__ret;                                                         \
+	})
+
+#define xgetsockopt(fd, level, name, val, len)                                 \
+	({                                                                     \
+		int __ret = getsockopt((fd), (level), (name), (val), (len));   \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("getsockopt(" #name ")");                   \
+		__ret;                                                         \
+	})
+
+#define xlisten(fd, backlog)                                                   \
+	({                                                                     \
+		int __ret = listen((fd), (backlog));                           \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("listen");                                  \
+		__ret;                                                         \
+	})
+
+#define xsetsockopt(fd, level, name, val, len)                                 \
+	({                                                                     \
+		int __ret = setsockopt((fd), (level), (name), (val), (len));   \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("setsockopt(" #name ")");                   \
+		__ret;                                                         \
+	})
+
+#define xsend(fd, buf, len, flags)                                             \
+	({                                                                     \
+		ssize_t __ret = send((fd), (buf), (len), (flags));             \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("send");                                    \
+		__ret;                                                         \
+	})
+
+#define xrecv_nonblock(fd, buf, len, flags)                                    \
+	({                                                                     \
+		ssize_t __ret = recv_timeout((fd), (buf), (len), (flags),      \
+					     IO_TIMEOUT_SEC);                  \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("recv");                                    \
+		__ret;                                                         \
+	})
+
+#define xsocket(family, sotype, flags)                                         \
+	({                                                                     \
+		int __ret = socket(family, sotype, flags);                     \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("socket");                                  \
+		__ret;                                                         \
+	})
+
+static inline void close_fd(int *fd)
+{
+	if (*fd >= 0)
+		xclose(*fd);
+}
+
+#define __close_fd __attribute__((cleanup(close_fd)))
+
+static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
+{
+	return (struct sockaddr *)ss;
+}
+
+static inline void init_addr_loopback4(struct sockaddr_storage *ss,
+				       socklen_t *len)
+{
+	struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+	addr4->sin_family = AF_INET;
+	addr4->sin_port = 0;
+	addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	*len = sizeof(*addr4);
+}
+
+static inline void init_addr_loopback6(struct sockaddr_storage *ss,
+				       socklen_t *len)
+{
+	struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
+
+	addr6->sin6_family = AF_INET6;
+	addr6->sin6_port = 0;
+	addr6->sin6_addr = in6addr_loopback;
+	*len = sizeof(*addr6);
+}
+
+static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
+					    socklen_t *len)
+{
+	struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
+
+	addr->svm_family = AF_VSOCK;
+	addr->svm_port = VMADDR_PORT_ANY;
+	addr->svm_cid = VMADDR_CID_LOCAL;
+	*len = sizeof(*addr);
+}
+
+static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
+				      socklen_t *len)
+{
+	switch (family) {
+	case AF_INET:
+		init_addr_loopback4(ss, len);
+		return;
+	case AF_INET6:
+		init_addr_loopback6(ss, len);
+		return;
+	case AF_VSOCK:
+		init_addr_loopback_vsock(ss, len);
+		return;
+	default:
+		FAIL("unsupported address family %d", family);
+	}
+}
+
+static inline int enable_reuseport(int s, int progfd)
+{
+	int err, one = 1;
+
+	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+	if (err)
+		return -1;
+	err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
+			  sizeof(progfd));
+	if (err)
+		return -1;
+
+	return 0;
+}
+
+static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = 0;
+	int err, s;
+
+	init_addr_loopback(family, &addr, &len);
+
+	s = xsocket(family, sotype, 0);
+	if (s == -1)
+		return -1;
+
+	if (progfd >= 0)
+		enable_reuseport(s, progfd);
+
+	err = xbind(s, sockaddr(&addr), len);
+	if (err)
+		goto close;
+
+	if (sotype & SOCK_DGRAM)
+		return s;
+
+	err = xlisten(s, SOMAXCONN);
+	if (err)
+		goto close;
+
+	return s;
+close:
+	xclose(s);
+	return -1;
+}
+
+static inline int socket_loopback(int family, int sotype)
+{
+	return socket_loopback_reuseport(family, sotype, -1);
+}
+
+static inline int poll_connect(int fd, unsigned int timeout_sec)
+{
+	struct timeval timeout = { .tv_sec = timeout_sec };
+	fd_set wfds;
+	int r, eval;
+	socklen_t esize = sizeof(eval);
+
+	FD_ZERO(&wfds);
+	FD_SET(fd, &wfds);
+
+	r = select(fd + 1, NULL, &wfds, NULL, &timeout);
+	if (r == 0)
+		errno = ETIME;
+	if (r != 1)
+		return -1;
+
+	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
+		return -1;
+	if (eval != 0) {
+		errno = eval;
+		return -1;
+	}
+
+	return 0;
+}
+
+static inline int poll_read(int fd, unsigned int timeout_sec)
+{
+	struct timeval timeout = { .tv_sec = timeout_sec };
+	fd_set rfds;
+	int r;
+
+	FD_ZERO(&rfds);
+	FD_SET(fd, &rfds);
+
+	r = select(fd + 1, &rfds, NULL, NULL, &timeout);
+	if (r == 0)
+		errno = ETIME;
+
+	return r == 1 ? 0 : -1;
+}
+
+static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
+				 unsigned int timeout_sec)
+{
+	if (poll_read(fd, timeout_sec))
+		return -1;
+
+	return accept(fd, addr, len);
+}
+
+static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
+			       unsigned int timeout_sec)
+{
+	if (poll_read(fd, timeout_sec))
+		return -1;
+
+	return recv(fd, buf, len, flags);
+}
+
+
+static inline int create_pair(int family, int sotype, int *p0, int *p1)
+{
+	__close_fd int s, c = -1, p = -1;
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int err;
+
+	s = socket_loopback(family, sotype);
+	if (s < 0)
+		return s;
+
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		return err;
+
+	c = xsocket(family, sotype, 0);
+	if (c < 0)
+		return c;
+
+	err = connect(c, sockaddr(&addr), len);
+	if (err) {
+		if (errno != EINPROGRESS) {
+			FAIL_ERRNO("connect");
+			return err;
+		}
+
+		err = poll_connect(c, IO_TIMEOUT_SEC);
+		if (err) {
+			FAIL_ERRNO("poll_connect");
+			return err;
+		}
+	}
+
+	switch (sotype & SOCK_TYPE_MASK) {
+	case SOCK_DGRAM:
+		err = xgetsockname(c, sockaddr(&addr), &len);
+		if (err)
+			return err;
+
+		err = xconnect(s, sockaddr(&addr), len);
+		if (err)
+			return err;
+
+		*p0 = take_fd(s);
+		break;
+	case SOCK_STREAM:
+	case SOCK_SEQPACKET:
+		p = xaccept_nonblock(s, NULL, NULL);
+		if (p < 0)
+			return p;
+
+		*p0 = take_fd(p);
+		break;
+	default:
+		FAIL("Unsupported socket type %#x", sotype);
+		return -EOPNOTSUPP;
+	}
+
+	*p1 = take_fd(c);
+	return 0;
+}
+
+static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
+				      int *p0, int *p1)
+{
+	int err;
+
+	err = create_pair(family, sotype, c0, p0);
+	if (err)
+		return err;
+
+	err = create_pair(family, sotype, c1, p1);
+	if (err) {
+		close(*c0);
+		close(*p0);
+	}
+
+	return err;
+}
+
+#endif // __SOCKET_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index 38e35c72bdaa..3e5571dd578d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -1,139 +1,12 @@
 #ifndef __SOCKMAP_HELPERS__
 #define __SOCKMAP_HELPERS__
 
-#include <linux/vm_sockets.h>
+#include "socket_helpers.h"
 
-/* include/linux/net.h */
-#define SOCK_TYPE_MASK 0xf
-
-#define IO_TIMEOUT_SEC 30
-#define MAX_STRERR_LEN 256
 #define MAX_TEST_NAME 80
 
-/* workaround for older vm_sockets.h */
-#ifndef VMADDR_CID_LOCAL
-#define VMADDR_CID_LOCAL 1
-#endif
-
 #define __always_unused	__attribute__((__unused__))
 
-/* include/linux/cleanup.h */
-#define __get_and_null(p, nullvalue)                                           \
-	({                                                                     \
-		__auto_type __ptr = &(p);                                      \
-		__auto_type __val = *__ptr;                                    \
-		*__ptr = nullvalue;                                            \
-		__val;                                                         \
-	})
-
-#define take_fd(fd) __get_and_null(fd, -EBADF)
-
-#define _FAIL(errnum, fmt...)                                                  \
-	({                                                                     \
-		error_at_line(0, (errnum), __func__, __LINE__, fmt);           \
-		CHECK_FAIL(true);                                              \
-	})
-#define FAIL(fmt...) _FAIL(0, fmt)
-#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
-#define FAIL_LIBBPF(err, msg)                                                  \
-	({                                                                     \
-		char __buf[MAX_STRERR_LEN];                                    \
-		libbpf_strerror((err), __buf, sizeof(__buf));                  \
-		FAIL("%s: %s", (msg), __buf);                                  \
-	})
-
-/* Wrappers that fail the test on error and report it. */
-
-#define xaccept_nonblock(fd, addr, len)                                        \
-	({                                                                     \
-		int __ret =                                                    \
-			accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC);   \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("accept");                                  \
-		__ret;                                                         \
-	})
-
-#define xbind(fd, addr, len)                                                   \
-	({                                                                     \
-		int __ret = bind((fd), (addr), (len));                         \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("bind");                                    \
-		__ret;                                                         \
-	})
-
-#define xclose(fd)                                                             \
-	({                                                                     \
-		int __ret = close((fd));                                       \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("close");                                   \
-		__ret;                                                         \
-	})
-
-#define xconnect(fd, addr, len)                                                \
-	({                                                                     \
-		int __ret = connect((fd), (addr), (len));                      \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("connect");                                 \
-		__ret;                                                         \
-	})
-
-#define xgetsockname(fd, addr, len)                                            \
-	({                                                                     \
-		int __ret = getsockname((fd), (addr), (len));                  \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("getsockname");                             \
-		__ret;                                                         \
-	})
-
-#define xgetsockopt(fd, level, name, val, len)                                 \
-	({                                                                     \
-		int __ret = getsockopt((fd), (level), (name), (val), (len));   \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("getsockopt(" #name ")");                   \
-		__ret;                                                         \
-	})
-
-#define xlisten(fd, backlog)                                                   \
-	({                                                                     \
-		int __ret = listen((fd), (backlog));                           \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("listen");                                  \
-		__ret;                                                         \
-	})
-
-#define xsetsockopt(fd, level, name, val, len)                                 \
-	({                                                                     \
-		int __ret = setsockopt((fd), (level), (name), (val), (len));   \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("setsockopt(" #name ")");                   \
-		__ret;                                                         \
-	})
-
-#define xsend(fd, buf, len, flags)                                             \
-	({                                                                     \
-		ssize_t __ret = send((fd), (buf), (len), (flags));             \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("send");                                    \
-		__ret;                                                         \
-	})
-
-#define xrecv_nonblock(fd, buf, len, flags)                                    \
-	({                                                                     \
-		ssize_t __ret = recv_timeout((fd), (buf), (len), (flags),      \
-					     IO_TIMEOUT_SEC);                  \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("recv");                                    \
-		__ret;                                                         \
-	})
-
-#define xsocket(family, sotype, flags)                                         \
-	({                                                                     \
-		int __ret = socket(family, sotype, flags);                     \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("socket");                                  \
-		__ret;                                                         \
-	})
-
 #define xbpf_map_delete_elem(fd, key)                                          \
 	({                                                                     \
 		int __ret = bpf_map_delete_elem((fd), (key));                  \
@@ -193,130 +66,6 @@
 		__ret;                                                         \
 	})
 
-static inline void close_fd(int *fd)
-{
-	if (*fd >= 0)
-		xclose(*fd);
-}
-
-#define __close_fd __attribute__((cleanup(close_fd)))
-
-static inline int poll_connect(int fd, unsigned int timeout_sec)
-{
-	struct timeval timeout = { .tv_sec = timeout_sec };
-	fd_set wfds;
-	int r, eval;
-	socklen_t esize = sizeof(eval);
-
-	FD_ZERO(&wfds);
-	FD_SET(fd, &wfds);
-
-	r = select(fd + 1, NULL, &wfds, NULL, &timeout);
-	if (r == 0)
-		errno = ETIME;
-	if (r != 1)
-		return -1;
-
-	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
-		return -1;
-	if (eval != 0) {
-		errno = eval;
-		return -1;
-	}
-
-	return 0;
-}
-
-static inline int poll_read(int fd, unsigned int timeout_sec)
-{
-	struct timeval timeout = { .tv_sec = timeout_sec };
-	fd_set rfds;
-	int r;
-
-	FD_ZERO(&rfds);
-	FD_SET(fd, &rfds);
-
-	r = select(fd + 1, &rfds, NULL, NULL, &timeout);
-	if (r == 0)
-		errno = ETIME;
-
-	return r == 1 ? 0 : -1;
-}
-
-static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
-				 unsigned int timeout_sec)
-{
-	if (poll_read(fd, timeout_sec))
-		return -1;
-
-	return accept(fd, addr, len);
-}
-
-static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
-			       unsigned int timeout_sec)
-{
-	if (poll_read(fd, timeout_sec))
-		return -1;
-
-	return recv(fd, buf, len, flags);
-}
-
-static inline void init_addr_loopback4(struct sockaddr_storage *ss,
-				       socklen_t *len)
-{
-	struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
-
-	addr4->sin_family = AF_INET;
-	addr4->sin_port = 0;
-	addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-	*len = sizeof(*addr4);
-}
-
-static inline void init_addr_loopback6(struct sockaddr_storage *ss,
-				       socklen_t *len)
-{
-	struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
-
-	addr6->sin6_family = AF_INET6;
-	addr6->sin6_port = 0;
-	addr6->sin6_addr = in6addr_loopback;
-	*len = sizeof(*addr6);
-}
-
-static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
-					    socklen_t *len)
-{
-	struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
-
-	addr->svm_family = AF_VSOCK;
-	addr->svm_port = VMADDR_PORT_ANY;
-	addr->svm_cid = VMADDR_CID_LOCAL;
-	*len = sizeof(*addr);
-}
-
-static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
-				      socklen_t *len)
-{
-	switch (family) {
-	case AF_INET:
-		init_addr_loopback4(ss, len);
-		return;
-	case AF_INET6:
-		init_addr_loopback6(ss, len);
-		return;
-	case AF_VSOCK:
-		init_addr_loopback_vsock(ss, len);
-		return;
-	default:
-		FAIL("unsupported address family %d", family);
-	}
-}
-
-static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
-{
-	return (struct sockaddr *)ss;
-}
-
 static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
 {
 	u64 value;
@@ -334,136 +83,4 @@ static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
 	return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
 }
 
-static inline int enable_reuseport(int s, int progfd)
-{
-	int err, one = 1;
-
-	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
-	if (err)
-		return -1;
-	err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
-			  sizeof(progfd));
-	if (err)
-		return -1;
-
-	return 0;
-}
-
-static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
-{
-	struct sockaddr_storage addr;
-	socklen_t len = 0;
-	int err, s;
-
-	init_addr_loopback(family, &addr, &len);
-
-	s = xsocket(family, sotype, 0);
-	if (s == -1)
-		return -1;
-
-	if (progfd >= 0)
-		enable_reuseport(s, progfd);
-
-	err = xbind(s, sockaddr(&addr), len);
-	if (err)
-		goto close;
-
-	if (sotype & SOCK_DGRAM)
-		return s;
-
-	err = xlisten(s, SOMAXCONN);
-	if (err)
-		goto close;
-
-	return s;
-close:
-	xclose(s);
-	return -1;
-}
-
-static inline int socket_loopback(int family, int sotype)
-{
-	return socket_loopback_reuseport(family, sotype, -1);
-}
-
-static inline int create_pair(int family, int sotype, int *p0, int *p1)
-{
-	__close_fd int s, c = -1, p = -1;
-	struct sockaddr_storage addr;
-	socklen_t len = sizeof(addr);
-	int err;
-
-	s = socket_loopback(family, sotype);
-	if (s < 0)
-		return s;
-
-	err = xgetsockname(s, sockaddr(&addr), &len);
-	if (err)
-		return err;
-
-	c = xsocket(family, sotype, 0);
-	if (c < 0)
-		return c;
-
-	err = connect(c, sockaddr(&addr), len);
-	if (err) {
-		if (errno != EINPROGRESS) {
-			FAIL_ERRNO("connect");
-			return err;
-		}
-
-		err = poll_connect(c, IO_TIMEOUT_SEC);
-		if (err) {
-			FAIL_ERRNO("poll_connect");
-			return err;
-		}
-	}
-
-	switch (sotype & SOCK_TYPE_MASK) {
-	case SOCK_DGRAM:
-		err = xgetsockname(c, sockaddr(&addr), &len);
-		if (err)
-			return err;
-
-		err = xconnect(s, sockaddr(&addr), len);
-		if (err)
-			return err;
-
-		*p0 = take_fd(s);
-		break;
-	case SOCK_STREAM:
-	case SOCK_SEQPACKET:
-		p = xaccept_nonblock(s, NULL, NULL);
-		if (p < 0)
-			return p;
-
-		*p0 = take_fd(p);
-		break;
-	default:
-		FAIL("Unsupported socket type %#x", sotype);
-		return -EOPNOTSUPP;
-	}
-
-	*p1 = take_fd(c);
-	return 0;
-}
-
-static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
-				      int *p0, int *p1)
-{
-	int err;
-
-	err = create_pair(family, sotype, c0, p0);
-	if (err)
-		return err;
-
-	err = create_pair(family, sotype, c1, p1);
-	if (err) {
-		close(*c0);
-		close(*p0);
-	}
-
-	return err;
-}
-
 #endif // __SOCKMAP_HELPERS__
-- 
cgit v1.2.3


From 4a58963d10fa3cb654b859e3f9a8aecbcf9f4982 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Thu, 12 Dec 2024 19:40:57 -0800
Subject: selftests/bpf: Test bpf_skb_change_tail() in TC ingress

Similarly to the previous test, we also need a test case to cover
positive offsets as well, TC is an excellent hook for this.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Zijian Zhang <zijianzhang@bytedance.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20241213034057.246437-5-xiyou.wangcong@gmail.com
---
 .../selftests/bpf/prog_tests/tc_change_tail.c      |  62 ++++++++++++
 .../selftests/bpf/progs/test_tc_change_tail.c      | 106 +++++++++++++++++++++
 2 files changed, 168 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/tc_change_tail.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_tc_change_tail.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c
new file mode 100644
index 000000000000..74752233e779
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <test_progs.h>
+#include <linux/pkt_cls.h>
+
+#include "test_tc_change_tail.skel.h"
+#include "socket_helpers.h"
+
+#define LO_IFINDEX 1
+
+void test_tc_change_tail(void)
+{
+	LIBBPF_OPTS(bpf_tcx_opts, tcx_opts);
+	struct test_tc_change_tail *skel = NULL;
+	struct bpf_link *link;
+	int c1, p1;
+	char buf[2];
+	int ret;
+
+	skel = test_tc_change_tail__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tc_change_tail__open_and_load"))
+		return;
+
+	link = bpf_program__attach_tcx(skel->progs.change_tail, LO_IFINDEX,
+				     &tcx_opts);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_tcx"))
+		goto destroy;
+
+	skel->links.change_tail = link;
+	ret = create_pair(AF_INET, SOCK_DGRAM, &c1, &p1);
+	if (!ASSERT_OK(ret, "create_pair"))
+		goto destroy;
+
+	ret = xsend(p1, "Tr", 2, 0);
+	ASSERT_EQ(ret, 2, "xsend(p1)");
+	ret = recv(c1, buf, 2, 0);
+	ASSERT_EQ(ret, 2, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+	ret = xsend(p1, "G", 1, 0);
+	ASSERT_EQ(ret, 1, "xsend(p1)");
+	ret = recv(c1, buf, 2, 0);
+	ASSERT_EQ(ret, 1, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+	ret = xsend(p1, "E", 1, 0);
+	ASSERT_EQ(ret, 1, "xsend(p1)");
+	ret = recv(c1, buf, 1, 0);
+	ASSERT_EQ(ret, 1, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
+
+	ret = xsend(p1, "Z", 1, 0);
+	ASSERT_EQ(ret, 1, "xsend(p1)");
+	ret = recv(c1, buf, 1, 0);
+	ASSERT_EQ(ret, 1, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
+
+	close(c1);
+	close(p1);
+destroy:
+	test_tc_change_tail__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
new file mode 100644
index 000000000000..28edafe803f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/pkt_cls.h>
+
+long change_tail_ret = 1;
+
+static __always_inline struct iphdr *parse_ip_header(struct __sk_buff *skb, int *ip_proto)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	struct ethhdr *eth = data;
+	struct iphdr *iph;
+
+	/* Verify Ethernet header */
+	if ((void *)(data + sizeof(*eth)) > data_end)
+		return NULL;
+
+	/* Skip Ethernet header to get to IP header */
+	iph = (void *)(data + sizeof(struct ethhdr));
+
+	/* Verify IP header */
+	if ((void *)(data + sizeof(struct ethhdr) + sizeof(*iph)) > data_end)
+		return NULL;
+
+	/* Basic IP header validation */
+	if (iph->version != 4)  /* Only support IPv4 */
+		return NULL;
+
+	if (iph->ihl < 5)  /* Minimum IP header length */
+		return NULL;
+
+	*ip_proto = iph->protocol;
+	return iph;
+}
+
+static __always_inline struct udphdr *parse_udp_header(struct __sk_buff *skb, struct iphdr *iph)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *hdr = (void *)iph;
+	struct udphdr *udp;
+
+	/* Calculate UDP header position */
+	udp = hdr + (iph->ihl * 4);
+	hdr = (void *)udp;
+
+	/* Verify UDP header bounds */
+	if ((void *)(hdr + sizeof(*udp)) > data_end)
+		return NULL;
+
+	return udp;
+}
+
+SEC("tc/ingress")
+int change_tail(struct __sk_buff *skb)
+{
+	int len = skb->len;
+	struct udphdr *udp;
+	struct iphdr *iph;
+	void *data_end;
+	char *payload;
+	int ip_proto;
+
+	bpf_skb_pull_data(skb, len);
+
+	data_end = (void *)(long)skb->data_end;
+	iph = parse_ip_header(skb, &ip_proto);
+	if (!iph)
+		return TCX_PASS;
+
+	if (ip_proto != IPPROTO_UDP)
+		return TCX_PASS;
+
+	udp = parse_udp_header(skb, iph);
+	if (!udp)
+		return TCX_PASS;
+
+	payload = (char *)udp + (sizeof(struct udphdr));
+	if (payload + 1 > (char *)data_end)
+		return TCX_PASS;
+
+	if (payload[0] == 'T') { /* Trim the packet */
+		change_tail_ret = bpf_skb_change_tail(skb, len - 1, 0);
+		if (!change_tail_ret)
+			bpf_skb_change_tail(skb, len, 0);
+		return TCX_PASS;
+	} else if (payload[0] == 'G') { /* Grow the packet */
+		change_tail_ret = bpf_skb_change_tail(skb, len + 1, 0);
+		if (!change_tail_ret)
+			bpf_skb_change_tail(skb, len, 0);
+		return TCX_PASS;
+	} else if (payload[0] == 'E') { /* Error */
+		change_tail_ret = bpf_skb_change_tail(skb, 65535, 0);
+		return TCX_PASS;
+	} else if (payload[0] == 'Z') { /* Zero */
+		change_tail_ret = bpf_skb_change_tail(skb, 0, 0);
+		return TCX_PASS;
+	}
+	return TCX_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From ce2b93fc1dfa1c82f2576aa571731c4e5dcc8dd7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 24 Dec 2024 14:09:15 -1000
Subject: sched_ext: Fix dsq_local_on selftest

The dsp_local_on selftest expects the scheduler to fail by trying to
schedule an e.g. CPU-affine task to the wrong CPU. However, this isn't
guaranteed to happen in the 1 second window that the test is running.
Besides, it's odd to have this particular exception path tested when there
are no other tests that verify that the interface is working at all - e.g.
the test would pass if dsp_local_on interface is completely broken and fails
on any attempt.

Flip the test so that it verifies that the feature works. While at it, fix a
typo in the info message.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Ihor Solodrai <ihor.solodrai@pm.me>
Link: http://lkml.kernel.org/r/Z1n9v7Z6iNJ-wKmq@slm.duckdns.org
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/sched_ext/dsp_local_on.bpf.c | 5 ++++-
 tools/testing/selftests/sched_ext/dsp_local_on.c     | 5 +++--
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
index 6325bf76f47e..fbda6bf54671 100644
--- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
+++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
@@ -43,7 +43,10 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev)
 	if (!p)
 		return;
 
-	target = bpf_get_prandom_u32() % nr_cpus;
+	if (p->nr_cpus_allowed == nr_cpus)
+		target = bpf_get_prandom_u32() % nr_cpus;
+	else
+		target = scx_bpf_task_cpu(p);
 
 	scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0);
 	bpf_task_release(p);
diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.c b/tools/testing/selftests/sched_ext/dsp_local_on.c
index 472851b56854..0ff27e57fe43 100644
--- a/tools/testing/selftests/sched_ext/dsp_local_on.c
+++ b/tools/testing/selftests/sched_ext/dsp_local_on.c
@@ -34,9 +34,10 @@ static enum scx_test_status run(void *ctx)
 	/* Just sleeping is fine, plenty of scheduling events happening */
 	sleep(1);
 
-	SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_ERROR));
 	bpf_link__destroy(link);
 
+	SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_UNREG));
+
 	return SCX_TEST_PASS;
 }
 
@@ -50,7 +51,7 @@ static void cleanup(void *ctx)
 struct scx_test dsp_local_on = {
 	.name = "dsp_local_on",
 	.description = "Verify we can directly dispatch tasks to a local DSQs "
-		       "from osp.dispatch()",
+		       "from ops.dispatch()",
 	.setup = setup,
 	.run = run,
 	.cleanup = cleanup,
-- 
cgit v1.2.3


From 6cc45f8c1f898570916044f606be9890d295e129 Mon Sep 17 00:00:00 2001
From: Tomas Glozar <tglozar@redhat.com>
Date: Wed, 27 Nov 2024 14:41:30 +0100
Subject: rtla/timerlat: Fix histogram ALL for zero samples

rtla timerlat hist currently computers the minimum, maximum and average
latency even in cases when there are zero samples. This leads to
nonsensical values being calculated for maximum and minimum, and to
divide by zero for average.

A similar bug is fixed by 01b05fc0e5f3 ("rtla/timerlat: Fix histogram
report when a cpu count is 0") but the bug still remains for printing
the sum over all CPUs in timerlat_print_stats_all.

The issue can be reproduced with this command:

$ rtla timerlat hist -U -d 1s
Index
over:
count:
min:
avg:
max:
Floating point exception (core dumped)

(There are always no samples with -U unless the user workload is
created.)

Fix the bug by omitting max/min/avg when sample count is zero,
displaying a dash instead, just like we already do for the individual
CPUs. The logic is moved into a new function called
format_summary_value, which is used for both the individual CPUs
and for the overall summary.

Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20241127134130.51171-1-tglozar@redhat.com
Fixes: 1462501c7a8 ("rtla/timerlat: Add a summary for hist mode")
Signed-off-by: Tomas Glozar <tglozar@redhat.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 tools/tracing/rtla/src/timerlat_hist.c | 177 ++++++++++++++++++---------------
 1 file changed, 96 insertions(+), 81 deletions(-)

(limited to 'tools')

diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index 8b66387e5f35..4403cc4eba30 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -281,6 +281,21 @@ static void timerlat_hist_header(struct osnoise_tool *tool)
 	trace_seq_reset(s);
 }
 
+/*
+ * format_summary_value - format a line of summary value (min, max or avg)
+ * of hist data
+ */
+static void format_summary_value(struct trace_seq *seq,
+				 int count,
+				 unsigned long long val,
+				 bool avg)
+{
+	if (count)
+		trace_seq_printf(seq, "%9llu ", avg ? val / count : val);
+	else
+		trace_seq_printf(seq, "%9c ", '-');
+}
+
 /*
  * timerlat_print_summary - print the summary of the hist data to the output
  */
@@ -328,29 +343,23 @@ timerlat_print_summary(struct timerlat_hist_params *params,
 		if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
 			continue;
 
-		if (!params->no_irq) {
-			if (data->hist[cpu].irq_count)
-				trace_seq_printf(trace->seq, "%9llu ",
-						data->hist[cpu].min_irq);
-			else
-				trace_seq_printf(trace->seq, "        - ");
-		}
+		if (!params->no_irq)
+			format_summary_value(trace->seq,
+					     data->hist[cpu].irq_count,
+					     data->hist[cpu].min_irq,
+					     false);
 
-		if (!params->no_thread) {
-			if (data->hist[cpu].thread_count)
-				trace_seq_printf(trace->seq, "%9llu ",
-						data->hist[cpu].min_thread);
-			else
-				trace_seq_printf(trace->seq, "        - ");
-		}
+		if (!params->no_thread)
+			format_summary_value(trace->seq,
+					     data->hist[cpu].thread_count,
+					     data->hist[cpu].min_thread,
+					     false);
 
-		if (params->user_hist) {
-			if (data->hist[cpu].user_count)
-				trace_seq_printf(trace->seq, "%9llu ",
-						data->hist[cpu].min_user);
-			else
-				trace_seq_printf(trace->seq, "        - ");
-		}
+		if (params->user_hist)
+			format_summary_value(trace->seq,
+					     data->hist[cpu].user_count,
+					     data->hist[cpu].min_user,
+					     false);
 	}
 	trace_seq_printf(trace->seq, "\n");
 
@@ -364,29 +373,23 @@ timerlat_print_summary(struct timerlat_hist_params *params,
 		if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
 			continue;
 
-		if (!params->no_irq) {
-			if (data->hist[cpu].irq_count)
-				trace_seq_printf(trace->seq, "%9llu ",
-						 data->hist[cpu].sum_irq / data->hist[cpu].irq_count);
-			else
-				trace_seq_printf(trace->seq, "        - ");
-		}
+		if (!params->no_irq)
+			format_summary_value(trace->seq,
+					     data->hist[cpu].irq_count,
+					     data->hist[cpu].sum_irq,
+					     true);
 
-		if (!params->no_thread) {
-			if (data->hist[cpu].thread_count)
-				trace_seq_printf(trace->seq, "%9llu ",
-						 data->hist[cpu].sum_thread / data->hist[cpu].thread_count);
-			else
-				trace_seq_printf(trace->seq, "        - ");
-		}
+		if (!params->no_thread)
+			format_summary_value(trace->seq,
+					     data->hist[cpu].thread_count,
+					     data->hist[cpu].sum_thread,
+					     true);
 
-		if (params->user_hist) {
-			if (data->hist[cpu].user_count)
-				trace_seq_printf(trace->seq, "%9llu ",
-						 data->hist[cpu].sum_user / data->hist[cpu].user_count);
-			else
-				trace_seq_printf(trace->seq, "        - ");
-		}
+		if (params->user_hist)
+			format_summary_value(trace->seq,
+					     data->hist[cpu].user_count,
+					     data->hist[cpu].sum_user,
+					     true);
 	}
 	trace_seq_printf(trace->seq, "\n");
 
@@ -400,29 +403,23 @@ timerlat_print_summary(struct timerlat_hist_params *params,
 		if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
 			continue;
 
-		if (!params->no_irq) {
-			if (data->hist[cpu].irq_count)
-				trace_seq_printf(trace->seq, "%9llu ",
-						 data->hist[cpu].max_irq);
-			else
-				trace_seq_printf(trace->seq, "        - ");
-		}
+		if (!params->no_irq)
+			format_summary_value(trace->seq,
+					     data->hist[cpu].irq_count,
+					     data->hist[cpu].max_irq,
+					     false);
 
-		if (!params->no_thread) {
-			if (data->hist[cpu].thread_count)
-				trace_seq_printf(trace->seq, "%9llu ",
-						data->hist[cpu].max_thread);
-			else
-				trace_seq_printf(trace->seq, "        - ");
-		}
+		if (!params->no_thread)
+			format_summary_value(trace->seq,
+					     data->hist[cpu].thread_count,
+					     data->hist[cpu].max_thread,
+					     false);
 
-		if (params->user_hist) {
-			if (data->hist[cpu].user_count)
-				trace_seq_printf(trace->seq, "%9llu ",
-						data->hist[cpu].max_user);
-			else
-				trace_seq_printf(trace->seq, "        - ");
-		}
+		if (params->user_hist)
+			format_summary_value(trace->seq,
+					     data->hist[cpu].user_count,
+					     data->hist[cpu].max_user,
+					     false);
 	}
 	trace_seq_printf(trace->seq, "\n");
 	trace_seq_do_printf(trace->seq);
@@ -506,16 +503,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params,
 		trace_seq_printf(trace->seq, "min:  ");
 
 	if (!params->no_irq)
-		trace_seq_printf(trace->seq, "%9llu ",
-				 sum.min_irq);
+		format_summary_value(trace->seq,
+				     sum.irq_count,
+				     sum.min_irq,
+				     false);
 
 	if (!params->no_thread)
-		trace_seq_printf(trace->seq, "%9llu ",
-				 sum.min_thread);
+		format_summary_value(trace->seq,
+				     sum.thread_count,
+				     sum.min_thread,
+				     false);
 
 	if (params->user_hist)
-		trace_seq_printf(trace->seq, "%9llu ",
-				 sum.min_user);
+		format_summary_value(trace->seq,
+				     sum.user_count,
+				     sum.min_user,
+				     false);
 
 	trace_seq_printf(trace->seq, "\n");
 
@@ -523,16 +526,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params,
 		trace_seq_printf(trace->seq, "avg:  ");
 
 	if (!params->no_irq)
-		trace_seq_printf(trace->seq, "%9llu ",
-				 sum.sum_irq / sum.irq_count);
+		format_summary_value(trace->seq,
+				     sum.irq_count,
+				     sum.sum_irq,
+				     true);
 
 	if (!params->no_thread)
-		trace_seq_printf(trace->seq, "%9llu ",
-				 sum.sum_thread / sum.thread_count);
+		format_summary_value(trace->seq,
+				     sum.thread_count,
+				     sum.sum_thread,
+				     true);
 
 	if (params->user_hist)
-		trace_seq_printf(trace->seq, "%9llu ",
-				 sum.sum_user / sum.user_count);
+		format_summary_value(trace->seq,
+				     sum.user_count,
+				     sum.sum_user,
+				     true);
 
 	trace_seq_printf(trace->seq, "\n");
 
@@ -540,16 +549,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params,
 		trace_seq_printf(trace->seq, "max:  ");
 
 	if (!params->no_irq)
-		trace_seq_printf(trace->seq, "%9llu ",
-				 sum.max_irq);
+		format_summary_value(trace->seq,
+				     sum.irq_count,
+				     sum.max_irq,
+				     false);
 
 	if (!params->no_thread)
-		trace_seq_printf(trace->seq, "%9llu ",
-				 sum.max_thread);
+		format_summary_value(trace->seq,
+				     sum.thread_count,
+				     sum.max_thread,
+				     false);
 
 	if (params->user_hist)
-		trace_seq_printf(trace->seq, "%9llu ",
-				 sum.max_user);
+		format_summary_value(trace->seq,
+				     sum.user_count,
+				     sum.max_user,
+				     false);
 
 	trace_seq_printf(trace->seq, "\n");
 	trace_seq_do_printf(trace->seq);
-- 
cgit v1.2.3


From 31ad36a271290648e7c2288a03d7b933d20254d6 Mon Sep 17 00:00:00 2001
From: chenchangcheng <ccc194101@163.com>
Date: Fri, 20 Dec 2024 15:48:47 +0800
Subject: objtool: Add bch2_trans_unlocked_error() to bcachefs noreturns

Fix the following objtool warning during build time:

    fs/bcachefs/btree_trans_commit.o: warning: objtool: bch2_trans_commit_write_locked.isra.0() falls through to next function do_bch2_trans_commit.isra.0()
    fs/bcachefs/btree_trans_commit.o: warning: objtool: .text: unexpected end of section
......
    fs/bcachefs/btree_update.o: warning: objtool: bch2_trans_update_get_key_cache() falls through to next function flush_new_cached_update()
    fs/bcachefs/btree_update.o: warning: objtool: flush_new_cached_update() falls through to next function bch2_trans_update_by_path()

bch2_trans_unlocked_error() is an Obviously Correct (tm) panic() wrapper,
add it to the list of known noreturns.

[ mingo: Improved the changelog ]

Fixes: fd104e2967b7 ("bcachefs: bch2_trans_verify_not_unlocked()")
Signed-off-by: chenchangcheng <chenchangcheng@kylinos.cn>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20241220074847.3418134-1-ccc194101@163.com
---
 tools/objtool/noreturns.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index f37614cc2c1b..b2174894f9f7 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -19,6 +19,7 @@ NORETURN(__x64_sys_exit_group)
 NORETURN(arch_cpu_idle_dead)
 NORETURN(bch2_trans_in_restart_error)
 NORETURN(bch2_trans_restart_error)
+NORETURN(bch2_trans_unlocked_error)
 NORETURN(cpu_bringup_and_idle)
 NORETURN(cpu_startup_entry)
 NORETURN(do_exit)
-- 
cgit v1.2.3


From ea0916e01d0b0f2cce1369ac1494239a79827270 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Thu, 28 Nov 2024 15:06:18 +0000
Subject: selftests/memfd: add test for mapping write-sealed memfd read-only

Now we have reinstated the ability to map F_SEAL_WRITE mappings read-only,
assert that we are able to do this in a test to ensure that we do not
regress this again.

Link: https://lkml.kernel.org/r/a6377ec470b14c0539b4600cf8fa24bf2e4858ae.1732804776.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Jann Horn <jannh@google.com>
Cc: Julian Orth <ju.orth@gmail.com>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/memfd/memfd_test.c | 43 ++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 0a0b55516028..c0c53451a16d 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -282,6 +282,24 @@ static void *mfd_assert_mmap_shared(int fd)
 	return p;
 }
 
+static void *mfd_assert_mmap_read_shared(int fd)
+{
+	void *p;
+
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_READ,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+
+	return p;
+}
+
 static void *mfd_assert_mmap_private(int fd)
 {
 	void *p;
@@ -980,6 +998,30 @@ static void test_seal_future_write(void)
 	close(fd);
 }
 
+static void test_seal_write_map_read_shared(void)
+{
+	int fd;
+	void *p;
+
+	printf("%s SEAL-WRITE-MAP-READ\n", memfd_str);
+
+	fd = mfd_assert_new("kern_memfd_seal_write_map_read",
+			    mfd_def_size,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+	mfd_assert_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE);
+
+	p = mfd_assert_mmap_read_shared(fd);
+
+	mfd_assert_read(fd);
+	mfd_assert_read_shared(fd);
+	mfd_fail_write(fd);
+
+	munmap(p, mfd_def_size);
+	close(fd);
+}
+
 /*
  * Test SEAL_SHRINK
  * Test whether SEAL_SHRINK actually prevents shrinking
@@ -1593,6 +1635,7 @@ int main(int argc, char **argv)
 
 	test_seal_write();
 	test_seal_future_write();
+	test_seal_write_map_read_shared();
 	test_seal_shrink();
 	test_seal_grow();
 	test_seal_resize();
-- 
cgit v1.2.3