From 6715df8d5d24655b9fd368e904028112b54c7de1 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Sun, 19 Feb 2023 22:04:26 +0200
Subject: bpf: Allow reads from uninit stack

This commits updates the following functions to allow reads from
uninitialized stack locations when env->allow_uninit_stack option is
enabled:
- check_stack_read_fixed_off()
- check_stack_range_initialized(), called from:
  - check_stack_read_var_off()
  - check_helper_mem_access()

Such change allows to relax logic in stacksafe() to treat STACK_MISC
and STACK_INVALID in a same way and make the following stack slot
configurations equivalent:

  |  Cached state    |  Current state   |
  |   stack slot     |   stack slot     |
  |------------------+------------------|
  | STACK_INVALID or | STACK_INVALID or |
  | STACK_MISC       | STACK_SPILL   or |
  |                  | STACK_MISC    or |
  |                  | STACK_ZERO    or |
  |                  | STACK_DYNPTR     |

This leads to significant verification speed gains (see below).

The idea was suggested by Andrii Nakryiko [1] and initial patch was
created by Alexei Starovoitov [2].

Currently the env->allow_uninit_stack is allowed for programs loaded
by users with CAP_PERFMON or CAP_SYS_ADMIN capabilities.

A number of test cases from verifier/*.c were expecting uninitialized
stack access to be an error. These test cases were updated to execute
in unprivileged mode (thus preserving the tests).

The test progs/test_global_func10.c expected "invalid indirect read
from stack" error message because of the access to uninitialized
memory region. This error is no longer possible in privileged mode.
The test is updated to provoke an error "invalid indirect access to
stack" because of access to invalid stack address (such error is not
verified by progs/test_global_func*.c series of tests).

The following tests had to be removed because these can't be made
unprivileged:
- verifier/sock.c:
  - "sk_storage_get(map, skb->sk, &stack_value, 1): partially init
  stack_value"
  BPF_PROG_TYPE_SCHED_CLS programs are not executed in unprivileged mode.
- verifier/var_off.c:
  - "indirect variable-offset stack access, max_off+size > max_initialized"
  - "indirect variable-offset stack access, uninitialized"
  These tests verify that access to uninitialized stack values is
  detected when stack offset is not a constant. However, variable
  stack access is prohibited in unprivileged mode, thus these tests
  are no longer valid.

 * * *

Here is veristat log comparing this patch with current master on a
set of selftest binaries listed in tools/testing/selftests/bpf/veristat.cfg
and cilium BPF binaries (see [3]):

$ ./veristat -e file,prog,states -C -f 'states_pct<-30' master.log current.log
File                        Program                     States (A)  States (B)  States    (DIFF)
--------------------------  --------------------------  ----------  ----------  ----------------
bpf_host.o                  tail_handle_ipv6_from_host         349         244    -105 (-30.09%)
bpf_host.o                  tail_handle_nat_fwd_ipv4          1320         895    -425 (-32.20%)
bpf_lxc.o                   tail_handle_nat_fwd_ipv4          1320         895    -425 (-32.20%)
bpf_sock.o                  cil_sock4_connect                   70          48     -22 (-31.43%)
bpf_sock.o                  cil_sock4_sendmsg                   68          46     -22 (-32.35%)
bpf_xdp.o                   tail_handle_nat_fwd_ipv4          1554         803    -751 (-48.33%)
bpf_xdp.o                   tail_lb_ipv4                      6457        2473   -3984 (-61.70%)
bpf_xdp.o                   tail_lb_ipv6                      7249        3908   -3341 (-46.09%)
pyperf600_bpf_loop.bpf.o    on_event                           287         145    -142 (-49.48%)
strobemeta.bpf.o            on_event                         15915        4772  -11143 (-70.02%)
strobemeta_nounroll2.bpf.o  on_event                         17087        3820  -13267 (-77.64%)
xdp_synproxy_kern.bpf.o     syncookie_tc                     21271        6635  -14636 (-68.81%)
xdp_synproxy_kern.bpf.o     syncookie_xdp                    23122        6024  -17098 (-73.95%)
--------------------------  --------------------------  ----------  ----------  ----------------

Note: I limited selection by states_pct<-30%.

Inspection of differences in pyperf600_bpf_loop behavior shows that
the following patch for the test removes almost all differences:

    - a/tools/testing/selftests/bpf/progs/pyperf.h
    + b/tools/testing/selftests/bpf/progs/pyperf.h
    @ -266,8 +266,8 @ int __on_event(struct bpf_raw_tracepoint_args *ctx)
            }

            if (event->pthread_match || !pidData->use_tls) {
    -               void* frame_ptr;
    -               FrameData frame;
    +               void* frame_ptr = 0;
    +               FrameData frame = {};
                    Symbol sym = {};
                    int cur_cpu = bpf_get_smp_processor_id();

W/o this patch the difference comes from the following pattern
(for different variables):

    static bool get_frame_data(... FrameData *frame ...)
    {
        ...
        bpf_probe_read_user(&frame->f_code, ...);
        if (!frame->f_code)
            return false;
        ...
        bpf_probe_read_user(&frame->co_name, ...);
        if (frame->co_name)
            ...;
    }

    int __on_event(struct bpf_raw_tracepoint_args *ctx)
    {
        FrameData frame;
        ...
        get_frame_data(... &frame ...) // indirectly via a bpf_loop & callback
        ...
    }

    SEC("raw_tracepoint/kfree_skb")
    int on_event(struct bpf_raw_tracepoint_args* ctx)
    {
        ...
        ret |= __on_event(ctx);
        ret |= __on_event(ctx);
        ...
    }

With regards to value `frame->co_name` the following is important:
- Because of the conditional `if (!frame->f_code)` each call to
  __on_event() produces two states, one with `frame->co_name` marked
  as STACK_MISC, another with it as is (and marked STACK_INVALID on a
  first call).
- The call to bpf_probe_read_user() does not mark stack slots
  corresponding to `&frame->co_name` as REG_LIVE_WRITTEN but it marks
  these slots as BPF_MISC, this happens because of the following loop
  in the check_helper_call():

	for (i = 0; i < meta.access_size; i++) {
		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
				       BPF_WRITE, -1, false);
		if (err)
			return err;
	}

  Note the size of the write, it is a one byte write for each byte
  touched by a helper. The BPF_B write does not lead to write marks
  for the target stack slot.
- Which means that w/o this patch when second __on_event() call is
  verified `if (frame->co_name)` will propagate read marks first to a
  stack slot with STACK_MISC marks and second to a stack slot with
  STACK_INVALID marks and these states would be considered different.

[1] https://lore.kernel.org/bpf/CAEf4BzY3e+ZuC6HUa8dCiUovQRg2SzEk7M-dSkqNZyn=xEmnPA@mail.gmail.com/
[2] https://lore.kernel.org/bpf/CAADnVQKs2i1iuZ5SUGuJtxWVfGYR9kDgYKhq3rNV+kBLQCu7rA@mail.gmail.com/
[3] git@github.com:anakryiko/cilium.git

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Co-developed-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230219200427.606541-2-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/progs/test_global_func10.c       |   8 +-
 tools/testing/selftests/bpf/verifier/calls.c       |  13 ++-
 .../selftests/bpf/verifier/helper_access_var_len.c | 104 ++++++++++++++-------
 tools/testing/selftests/bpf/verifier/int_ptr.c     |   9 +-
 .../selftests/bpf/verifier/search_pruning.c        |  13 ++-
 tools/testing/selftests/bpf/verifier/sock.c        |  27 ------
 tools/testing/selftests/bpf/verifier/spill_fill.c  |   7 +-
 tools/testing/selftests/bpf/verifier/var_off.c     |  52 -----------
 8 files changed, 98 insertions(+), 135 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
index 98327bdbbfd2..8fba3f3649e2 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -5,12 +5,12 @@
 #include "bpf_misc.h"
 
 struct Small {
-	int x;
+	long x;
 };
 
 struct Big {
-	int x;
-	int y;
+	long x;
+	long y;
 };
 
 __noinline int foo(const struct Big *big)
@@ -22,7 +22,7 @@ __noinline int foo(const struct Big *big)
 }
 
 SEC("cgroup_skb/ingress")
-__failure __msg("invalid indirect read from stack")
+__failure __msg("invalid indirect access to stack")
 int global_func10(struct __sk_buff *skb)
 {
 	const struct Small small = {.x = skb->len };
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 9d993926bf0e..289ed202ec66 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -2221,19 +2221,22 @@
 	 * that fp-8 stack slot was unused in the fall-through
 	 * branch and will accept the program incorrectly
 	 */
-	BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2),
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 2, 2),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_JMP_IMM(BPF_JA, 0, 0, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_48b = { 6 },
-	.errstr = "invalid indirect read from stack R2 off -8+0 size 8",
-	.result = REJECT,
-	.prog_type = BPF_PROG_TYPE_XDP,
+	.fixup_map_hash_48b = { 7 },
+	.errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8",
+	.result_unpriv = REJECT,
+	/* in privileged mode reads from uninitialized stack locations are permitted */
+	.result = ACCEPT,
 },
 {
 	"calls: ctx read at start of subprog",
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index a6c869a7319c..9c4885885aba 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -29,19 +29,30 @@
 {
 	"helper access to variable memory: stack, bitwise AND, zero included",
 	.insns = {
-	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
-	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
-	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
-	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
-	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
+	/* set max stack size */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0),
+	/* set r3 to a random value */
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+	/* use bitwise AND to limit r3 range to [0, 64] */
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 64),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	/* Call bpf_ringbuf_output(), it is one of a few helper functions with
+	 * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.
+	 * For unpriv this should signal an error, because memory at &fp[-64] is
+	 * not initialized.
+	 */
+	BPF_EMIT_CALL(BPF_FUNC_ringbuf_output),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "invalid indirect read from stack R1 off -64+0 size 64",
-	.result = REJECT,
-	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.fixup_map_ringbuf = { 4 },
+	.errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64",
+	.result_unpriv = REJECT,
+	/* in privileged mode reads from uninitialized stack locations are permitted */
+	.result = ACCEPT,
 },
 {
 	"helper access to variable memory: stack, bitwise AND + JMP, wrong max",
@@ -183,20 +194,31 @@
 {
 	"helper access to variable memory: stack, JMP, no min check",
 	.insns = {
-	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
-	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
-	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
-	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
-	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
+	/* set max stack size */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0),
+	/* set r3 to a random value */
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+	/* use JMP to limit r3 range to [0, 64] */
+	BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 64, 6),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	/* Call bpf_ringbuf_output(), it is one of a few helper functions with
+	 * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.
+	 * For unpriv this should signal an error, because memory at &fp[-64] is
+	 * not initialized.
+	 */
+	BPF_EMIT_CALL(BPF_FUNC_ringbuf_output),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "invalid indirect read from stack R1 off -64+0 size 64",
-	.result = REJECT,
-	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.fixup_map_ringbuf = { 4 },
+	.errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64",
+	.result_unpriv = REJECT,
+	/* in privileged mode reads from uninitialized stack locations are permitted */
+	.result = ACCEPT,
 },
 {
 	"helper access to variable memory: stack, JMP (signed), no min check",
@@ -564,29 +586,41 @@
 {
 	"helper access to variable memory: 8 bytes leak",
 	.insns = {
-	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
-	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+	/* set max stack size */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0),
+	/* set r3 to a random value */
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+	/* Note: fp[-32] left uninitialized */
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
-	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
-	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
-	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
-	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
-	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+	/* Limit r3 range to [1, 64] */
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 63),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1),
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	/* Call bpf_ringbuf_output(), it is one of a few helper functions with
+	 * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.
+	 * For unpriv this should signal an error, because memory region [1, 64]
+	 * at &fp[-64] is not fully initialized.
+	 */
+	BPF_EMIT_CALL(BPF_FUNC_ringbuf_output),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "invalid indirect read from stack R1 off -64+32 size 64",
-	.result = REJECT,
-	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.fixup_map_ringbuf = { 3 },
+	.errstr_unpriv = "invalid indirect read from stack R2 off -64+32 size 64",
+	.result_unpriv = REJECT,
+	/* in privileged mode reads from uninitialized stack locations are permitted */
+	.result = ACCEPT,
 },
 {
 	"helper access to variable memory: 8 bytes no leak (init memory)",
diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c
index 070893fb2900..02d9e004260b 100644
--- a/tools/testing/selftests/bpf/verifier/int_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/int_ptr.c
@@ -54,12 +54,13 @@
 		/* bpf_strtoul() */
 		BPF_EMIT_CALL(BPF_FUNC_strtoul),
 
-		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
 		BPF_EXIT_INSN(),
 	},
-	.result = REJECT,
-	.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
-	.errstr = "invalid indirect read from stack R4 off -16+4 size 8",
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "invalid indirect read from stack R4 off -16+4 size 8",
+	/* in privileged mode reads from uninitialized stack locations are permitted */
+	.result = ACCEPT,
 },
 {
 	"ARG_PTR_TO_LONG misaligned",
diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c
index d63fd8991b03..745d6b5842fd 100644
--- a/tools/testing/selftests/bpf/verifier/search_pruning.c
+++ b/tools/testing/selftests/bpf/verifier/search_pruning.c
@@ -128,9 +128,10 @@
 		BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
-	.errstr = "invalid read from stack off -16+0 size 8",
-	.result = REJECT,
-	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.errstr_unpriv = "invalid read from stack off -16+0 size 8",
+	.result_unpriv = REJECT,
+	/* in privileged mode reads from uninitialized stack locations are permitted */
+	.result = ACCEPT,
 },
 {
 	"precision tracking for u32 spill/fill",
@@ -258,6 +259,8 @@
 	BPF_EXIT_INSN(),
 	},
 	.flags = BPF_F_TEST_STATE_FREQ,
-	.errstr = "invalid read from stack off -8+1 size 8",
-	.result = REJECT,
+	.errstr_unpriv = "invalid read from stack off -8+1 size 8",
+	.result_unpriv = REJECT,
+	/* in privileged mode reads from uninitialized stack locations are permitted */
+	.result = ACCEPT,
 },
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index d11d0b28be41..108dd3ee1edd 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -530,33 +530,6 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = ACCEPT,
 },
-{
-	"sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value",
-	.insns = {
-	BPF_MOV64_IMM(BPF_REG_2, 0),
-	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
-	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
-	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
-	BPF_MOV64_IMM(BPF_REG_0, 0),
-	BPF_EXIT_INSN(),
-	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
-	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
-	BPF_MOV64_IMM(BPF_REG_0, 0),
-	BPF_EXIT_INSN(),
-	BPF_MOV64_IMM(BPF_REG_4, 1),
-	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8),
-	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
-	BPF_LD_MAP_FD(BPF_REG_1, 0),
-	BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
-	BPF_MOV64_IMM(BPF_REG_0, 0),
-	BPF_EXIT_INSN(),
-	},
-	.fixup_sk_storage_map = { 14 },
-	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-	.result = REJECT,
-	.errstr = "invalid indirect read from stack",
-},
 {
 	"bpf_map_lookup_elem(smap, &key)",
 	.insns = {
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
index 9bb302dade23..d1463bf4949a 100644
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ b/tools/testing/selftests/bpf/verifier/spill_fill.c
@@ -171,9 +171,10 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.result = REJECT,
-	.errstr = "invalid read from stack off -4+0 size 4",
-	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "invalid read from stack off -4+0 size 4",
+	/* in privileged mode reads from uninitialized stack locations are permitted */
+	.result = ACCEPT,
 },
 {
 	"Spill a u32 const scalar.  Refill as u16.  Offset to skb->data",
diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
index d37f512fad16..b183e26c03f1 100644
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ b/tools/testing/selftests/bpf/verifier/var_off.c
@@ -212,31 +212,6 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_LWT_IN,
 },
-{
-	"indirect variable-offset stack access, max_off+size > max_initialized",
-	.insns = {
-	/* Fill only the second from top 8 bytes of the stack. */
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
-	/* Get an unknown value. */
-	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
-	/* Make it small and 4-byte aligned. */
-	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
-	BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
-	/* Add it to fp.  We now have either fp-12 or fp-16, but we don't know
-	 * which. fp-12 size 8 is partially uninitialized stack.
-	 */
-	BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
-	/* Dereference it indirectly. */
-	BPF_LD_MAP_FD(BPF_REG_1, 0),
-	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
-	BPF_MOV64_IMM(BPF_REG_0, 0),
-	BPF_EXIT_INSN(),
-	},
-	.fixup_map_hash_8b = { 5 },
-	.errstr = "invalid indirect read from stack R2 var_off",
-	.result = REJECT,
-	.prog_type = BPF_PROG_TYPE_LWT_IN,
-},
 {
 	"indirect variable-offset stack access, min_off < min_initialized",
 	.insns = {
@@ -289,33 +264,6 @@
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
 },
-{
-	"indirect variable-offset stack access, uninitialized",
-	.insns = {
-	BPF_MOV64_IMM(BPF_REG_2, 6),
-	BPF_MOV64_IMM(BPF_REG_3, 28),
-	/* Fill the top 16 bytes of the stack. */
-	BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-	/* Get an unknown value. */
-	BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0),
-	/* Make it small and 4-byte aligned. */
-	BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4),
-	BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
-	/* Add it to fp.  We now have either fp-12 or fp-16, we don't know
-	 * which, but either way it points to initialized stack.
-	 */
-	BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
-	BPF_MOV64_IMM(BPF_REG_5, 8),
-	/* Dereference it indirectly. */
-	BPF_EMIT_CALL(BPF_FUNC_getsockopt),
-	BPF_MOV64_IMM(BPF_REG_0, 0),
-	BPF_EXIT_INSN(),
-	},
-	.errstr = "invalid indirect read from stack R4 var_off",
-	.result = REJECT,
-	.prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
 {
 	"indirect variable-offset stack access, ok",
 	.insns = {
-- 
cgit v1.2.3


From 6338a94d5ab42a94e96ea36edc5f7df1fe73e68e Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Sun, 19 Feb 2023 22:04:27 +0200
Subject: selftests/bpf: Tests for uninitialized stack reads

Three testcases to make sure that stack reads from uninitialized
locations are accepted by verifier when executed in privileged mode:
- read from a fixed offset;
- read from a variable offset;
- passing a pointer to stack to a helper converts
  STACK_INVALID to STACK_MISC.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230219200427.606541-3-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/uninit_stack.c        |  9 +++
 tools/testing/selftests/bpf/progs/uninit_stack.c   | 87 ++++++++++++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/uninit_stack.c
 create mode 100644 tools/testing/selftests/bpf/progs/uninit_stack.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/uninit_stack.c b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c
new file mode 100644
index 000000000000..e64c71948491
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "uninit_stack.skel.h"
+
+void test_uninit_stack(void)
+{
+	RUN_TESTS(uninit_stack);
+}
diff --git a/tools/testing/selftests/bpf/progs/uninit_stack.c b/tools/testing/selftests/bpf/progs/uninit_stack.c
new file mode 100644
index 000000000000..8a403470e557
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uninit_stack.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Read an uninitialized value from stack at a fixed offset */
+SEC("socket")
+__naked int read_uninit_stack_fixed_off(void *ctx)
+{
+	asm volatile ("					\
+		r0 = 0;					\
+		/* force stack depth to be 128 */	\
+		*(u64*)(r10 - 128) = r1;		\
+		r1 = *(u8 *)(r10 - 8 );			\
+		r0 += r1;				\
+		r1 = *(u8 *)(r10 - 11);			\
+		r1 = *(u8 *)(r10 - 13);			\
+		r1 = *(u8 *)(r10 - 15);			\
+		r1 = *(u16*)(r10 - 16);			\
+		r1 = *(u32*)(r10 - 32);			\
+		r1 = *(u64*)(r10 - 64);			\
+		/* read from a spill of a wrong size, it is a separate	\
+		 * branch in check_stack_read_fixed_off()		\
+		 */					\
+		*(u32*)(r10 - 72) = r1;			\
+		r1 = *(u64*)(r10 - 72);			\
+		r0 = 0;					\
+		exit;					\
+"
+		      ::: __clobber_all);
+}
+
+/* Read an uninitialized value from stack at a variable offset */
+SEC("socket")
+__naked int read_uninit_stack_var_off(void *ctx)
+{
+	asm volatile ("					\
+		call %[bpf_get_prandom_u32];		\
+		/* force stack depth to be 64 */	\
+		*(u64*)(r10 - 64) = r0;			\
+		r0 = -r0;				\
+		/* give r0 a range [-31, -1] */		\
+		if r0 s<= -32 goto exit_%=;		\
+		if r0 s>= 0 goto exit_%=;		\
+		/* access stack using r0 */		\
+		r1 = r10;				\
+		r1 += r0;				\
+		r2 = *(u8*)(r1 + 0);			\
+exit_%=:	r0 = 0;					\
+		exit;					\
+"
+		      :
+		      : __imm(bpf_get_prandom_u32)
+		      : __clobber_all);
+}
+
+static __noinline void dummy(void) {}
+
+/* Pass a pointer to uninitialized stack memory to a helper.
+ * Passed memory block should be marked as STACK_MISC after helper call.
+ */
+SEC("socket")
+__log_level(7) __msg("fp-104=mmmmmmmm")
+__naked int helper_uninit_to_misc(void *ctx)
+{
+	asm volatile ("					\
+		/* force stack depth to be 128 */	\
+		*(u64*)(r10 - 128) = r1;		\
+		r1 = r10;				\
+		r1 += -128;				\
+		r2 = 32;				\
+		call %[bpf_trace_printk];		\
+		/* Call to dummy() forces print_verifier_state(..., true),	\
+		 * thus showing the stack state, matched by __msg().		\
+		 */					\
+		call %[dummy];				\
+		r0 = 0;					\
+		exit;					\
+"
+		      :
+		      : __imm(bpf_trace_printk),
+			__imm(dummy)
+		      : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 32513d40d908b267508d37994753d9bd1600914b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 10 Mar 2023 12:41:18 -0800
Subject: selftests/bpf: Fix progs/find_vma_fail1.c build error.

The commit 11e456cae91e ("selftests/bpf: Fix compilation errors: Assign a value to a constant")
fixed the issue cleanly in bpf-next.
This is an alternative fix in bpf tree to avoid merge conflict between bpf and bpf-next.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/find_vma_fail1.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
index b3b326b8e2d1..6dab9cffda13 100644
--- a/tools/testing/selftests/bpf/progs/find_vma_fail1.c
+++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2021 Facebook */
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
+#define vm_flags vm_start
 
 char _license[] SEC("license") = "GPL";
 
-- 
cgit v1.2.3


From e8c8361cfdbf450f760e8a2bdbd4222d1947366b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 10 Mar 2023 12:47:51 -0800
Subject: selftests/bpf: Fix progs/test_deny_namespace.c issues.

The following build error can be seen:
progs/test_deny_namespace.c:22:19: error: call to undeclared function 'BIT_LL'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
        __u64 cap_mask = BIT_LL(CAP_SYS_ADMIN);

The struct kernel_cap_struct no longer exists in the kernel as well.
Adjust bpf prog to fix both issues.

Fixes: f122a08b197d ("capability: just use a 'u64' instead of a 'u32[2]' array")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_deny_namespace.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
index 591104e79812..e96b901a733c 100644
--- a/tools/testing/selftests/bpf/progs/test_deny_namespace.c
+++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
@@ -5,12 +5,10 @@
 #include <errno.h>
 #include <linux/capability.h>
 
-struct kernel_cap_struct {
-	__u64 val;
-} __attribute__((preserve_access_index));
+typedef struct { unsigned long long val; } kernel_cap_t;
 
 struct cred {
-	struct kernel_cap_struct cap_effective;
+	kernel_cap_t cap_effective;
 } __attribute__((preserve_access_index));
 
 char _license[] SEC("license") = "GPL";
@@ -18,8 +16,8 @@ char _license[] SEC("license") = "GPL";
 SEC("lsm.s/userns_create")
 int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
 {
-	struct kernel_cap_struct caps = cred->cap_effective;
-	__u64 cap_mask = BIT_LL(CAP_SYS_ADMIN);
+	kernel_cap_t caps = cred->cap_effective;
+	__u64 cap_mask = 1ULL << CAP_SYS_ADMIN;
 
 	if (ret)
 		return 0;
-- 
cgit v1.2.3


From 05107edc910135d27fe557267dc45be9630bf3dd Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Wed, 8 Mar 2023 11:59:33 -0800
Subject: selftests: sigaltstack: fix -Wuninitialized

Building sigaltstack with clang via:
$ ARCH=x86 make LLVM=1 -C tools/testing/selftests/sigaltstack/

produces the following warning:
  warning: variable 'sp' is uninitialized when used here [-Wuninitialized]
  if (sp < (unsigned long)sstack ||
      ^~

Clang expects these to be declared at global scope; we've fixed this in
the kernel proper by using the macro `current_stack_pointer`. This is
defined in different headers for different target architectures, so just
create a new header that defines the arch-specific register names for
the stack pointer register, and define it for more targets (at least the
ones that support current_stack_pointer/ARCH_HAS_CURRENT_STACK_POINTER).

Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
Link: https://lore.kernel.org/lkml/CA+G9fYsi3OOu7yCsMutpzKDnBMAzJBCPimBp86LhGBa0eCnEpA@mail.gmail.com/
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
Tested-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 .../selftests/sigaltstack/current_stack_pointer.h  | 23 ++++++++++++++++++++++
 tools/testing/selftests/sigaltstack/sas.c          |  7 +------
 2 files changed, 24 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/sigaltstack/current_stack_pointer.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/sigaltstack/current_stack_pointer.h
new file mode 100644
index 000000000000..ea9bdf3a90b1
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/current_stack_pointer.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#if __alpha__
+register unsigned long sp asm("$30");
+#elif __arm__ || __aarch64__ || __csky__ || __m68k__ || __mips__ || __riscv
+register unsigned long sp asm("sp");
+#elif __i386__
+register unsigned long sp asm("esp");
+#elif __loongarch64
+register unsigned long sp asm("$sp");
+#elif __ppc__
+register unsigned long sp asm("r1");
+#elif __s390x__
+register unsigned long sp asm("%15");
+#elif __sh__
+register unsigned long sp asm("r15");
+#elif __x86_64__
+register unsigned long sp asm("rsp");
+#elif __XTENSA__
+register unsigned long sp asm("a1");
+#else
+#error "implement current_stack_pointer equivalent"
+#endif
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index c53b070755b6..98d37cb744fb 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -20,6 +20,7 @@
 #include <sys/auxv.h>
 
 #include "../kselftest.h"
+#include "current_stack_pointer.h"
 
 #ifndef SS_AUTODISARM
 #define SS_AUTODISARM  (1U << 31)
@@ -46,12 +47,6 @@ void my_usr1(int sig, siginfo_t *si, void *u)
 	stack_t stk;
 	struct stk_data *p;
 
-#if __s390x__
-	register unsigned long sp asm("%15");
-#else
-	register unsigned long sp asm("sp");
-#endif
-
 	if (sp < (unsigned long)sstack ||
 			sp >= (unsigned long)sstack + stack_size) {
 		ksft_exit_fail_msg("SP is not on sigaltstack\n");
-- 
cgit v1.2.3


From 62faca1ca10cc84e99ae7f38aa28df2bc945369b Mon Sep 17 00:00:00 2001
From: "Chang S. Bae" <chang.seok.bae@intel.com>
Date: Mon, 27 Feb 2023 13:05:04 -0800
Subject: selftests/x86/amx: Add a ptrace test

Include a test case to validate the XTILEDATA injection to the target.

Also, it ensures the kernel's ability to copy states between different
XSAVE formats.

Refactor the memcmp() code to be usable for the state validation.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/all/20230227210504.18520-3-chang.seok.bae%40intel.com
---
 tools/testing/selftests/x86/amx.c | 108 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 105 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
index 625e42901237..d884fd69dd51 100644
--- a/tools/testing/selftests/x86/amx.c
+++ b/tools/testing/selftests/x86/amx.c
@@ -14,8 +14,10 @@
 #include <sys/auxv.h>
 #include <sys/mman.h>
 #include <sys/shm.h>
+#include <sys/ptrace.h>
 #include <sys/syscall.h>
 #include <sys/wait.h>
+#include <sys/uio.h>
 
 #include "../kselftest.h" /* For __cpuid_count() */
 
@@ -583,6 +585,13 @@ static void test_dynamic_state(void)
 	_exit(0);
 }
 
+static inline int __compare_tiledata_state(struct xsave_buffer *xbuf1, struct xsave_buffer *xbuf2)
+{
+	return memcmp(&xbuf1->bytes[xtiledata.xbuf_offset],
+		      &xbuf2->bytes[xtiledata.xbuf_offset],
+		      xtiledata.size);
+}
+
 /*
  * Save current register state and compare it to @xbuf1.'
  *
@@ -599,9 +608,7 @@ static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1)
 		fatal_error("failed to allocate XSAVE buffer\n");
 
 	xsave(xbuf2, XFEATURE_MASK_XTILEDATA);
-	ret = memcmp(&xbuf1->bytes[xtiledata.xbuf_offset],
-		     &xbuf2->bytes[xtiledata.xbuf_offset],
-		     xtiledata.size);
+	ret = __compare_tiledata_state(xbuf1, xbuf2);
 
 	free(xbuf2);
 
@@ -826,6 +833,99 @@ static void test_context_switch(void)
 	free(finfo);
 }
 
+/* Ptrace test */
+
+/*
+ * Make sure the ptracee has the expanded kernel buffer on the first
+ * use. Then, initialize the state before performing the state
+ * injection from the ptracer.
+ */
+static inline void ptracee_firstuse_tiledata(void)
+{
+	load_rand_tiledata(stashed_xsave);
+	init_xtiledata();
+}
+
+/*
+ * Ptracer injects the randomized tile data state. It also reads
+ * before and after that, which will execute the kernel's state copy
+ * functions. So, the tester is advised to double-check any emitted
+ * kernel messages.
+ */
+static void ptracer_inject_tiledata(pid_t target)
+{
+	struct xsave_buffer *xbuf;
+	struct iovec iov;
+
+	xbuf = alloc_xbuf();
+	if (!xbuf)
+		fatal_error("unable to allocate XSAVE buffer");
+
+	printf("\tRead the init'ed tiledata via ptrace().\n");
+
+	iov.iov_base = xbuf;
+	iov.iov_len = xbuf_size;
+
+	memset(stashed_xsave, 0, xbuf_size);
+
+	if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+		fatal_error("PTRACE_GETREGSET");
+
+	if (!__compare_tiledata_state(stashed_xsave, xbuf))
+		printf("[OK]\tThe init'ed tiledata was read from ptracee.\n");
+	else
+		printf("[FAIL]\tThe init'ed tiledata was not read from ptracee.\n");
+
+	printf("\tInject tiledata via ptrace().\n");
+
+	load_rand_tiledata(xbuf);
+
+	memcpy(&stashed_xsave->bytes[xtiledata.xbuf_offset],
+	       &xbuf->bytes[xtiledata.xbuf_offset],
+	       xtiledata.size);
+
+	if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+		fatal_error("PTRACE_SETREGSET");
+
+	if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+		fatal_error("PTRACE_GETREGSET");
+
+	if (!__compare_tiledata_state(stashed_xsave, xbuf))
+		printf("[OK]\tTiledata was correctly written to ptracee.\n");
+	else
+		printf("[FAIL]\tTiledata was not correctly written to ptracee.\n");
+}
+
+static void test_ptrace(void)
+{
+	pid_t child;
+	int status;
+
+	child = fork();
+	if (child < 0) {
+		err(1, "fork");
+	} else if (!child) {
+		if (ptrace(PTRACE_TRACEME, 0, NULL, NULL))
+			err(1, "PTRACE_TRACEME");
+
+		ptracee_firstuse_tiledata();
+
+		raise(SIGTRAP);
+		_exit(0);
+	}
+
+	do {
+		wait(&status);
+	} while (WSTOPSIG(status) != SIGTRAP);
+
+	ptracer_inject_tiledata(child);
+
+	ptrace(PTRACE_DETACH, child, NULL, NULL);
+	wait(&status);
+	if (!WIFEXITED(status) || WEXITSTATUS(status))
+		err(1, "ptrace test");
+}
+
 int main(void)
 {
 	/* Check hardware availability at first */
@@ -846,6 +946,8 @@ int main(void)
 	ctxtswtest_config.num_threads = 5;
 	test_context_switch();
 
+	test_ptrace();
+
 	clearhandler(SIGILL);
 	free_stashed_xsave();
 
-- 
cgit v1.2.3


From d035230ec9937a9138921d2a0eeb99496ea7eac0 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 8 Mar 2023 19:04:22 +0000
Subject: kselftest: vm: fix unused variable warning

Remove unused variable from the MDWE test.

[joey.gouly@arm.com: add commit message]
Link: https://lkml.kernel.org/r/20230308190423.46491-4-joey.gouly@arm.com
Fixes: 4cf1fe34fd18 ("kselftest: vm: add tests for memory-deny-write-execute")
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Alexey Izbyshev <izbyshev@ispras.ru>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: nd <nd@arm.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/mm/mdwe_test.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c
index f466a099f1bf..bc91bef5d254 100644
--- a/tools/testing/selftests/mm/mdwe_test.c
+++ b/tools/testing/selftests/mm/mdwe_test.c
@@ -163,9 +163,8 @@ TEST_F(mdwe, mprotect_WRITE_EXEC)
 
 TEST_F(mdwe, mmap_FIXED)
 {
-	void *p, *p2;
+	void *p;
 
-	p2 = mmap(NULL, self->size, PROT_READ | PROT_EXEC, self->flags, 0, 0);
 	self->p = mmap(NULL, self->size, PROT_READ, self->flags, 0, 0);
 	ASSERT_NE(self->p, MAP_FAILED);
 
-- 
cgit v1.2.3


From 25209a3209ecc44f93300b7ee5287f451be1d6ff Mon Sep 17 00:00:00 2001
From: Arseniy Krasnov <avkrasnov@sberdevices.ru>
Date: Tue, 28 Mar 2023 14:33:07 +0300
Subject: test/vsock: new skbuff appending test

This adds test which checks case when data of newly received skbuff is
appended to the last skbuff in the socket's queue. It looks like simple
test with 'send()' and 'recv()', but internally it triggers logic which
appends one received skbuff to another. Test checks that this feature
works correctly.

This test is actual only for virtio transport.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/vsock/vsock_test.c | 90 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 3de10dbb50f5..12b97c92fbb2 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -968,6 +968,91 @@ static void test_seqpacket_inv_buf_server(const struct test_opts *opts)
 	test_inv_buf_server(opts, false);
 }
 
+#define HELLO_STR "HELLO"
+#define WORLD_STR "WORLD"
+
+static void test_stream_virtio_skb_merge_client(const struct test_opts *opts)
+{
+	ssize_t res;
+	int fd;
+
+	fd = vsock_stream_connect(opts->peer_cid, 1234);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Send first skbuff. */
+	res = send(fd, HELLO_STR, strlen(HELLO_STR), 0);
+	if (res != strlen(HELLO_STR)) {
+		fprintf(stderr, "unexpected send(2) result %zi\n", res);
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("SEND0");
+	/* Peer reads part of first skbuff. */
+	control_expectln("REPLY0");
+
+	/* Send second skbuff, it will be appended to the first. */
+	res = send(fd, WORLD_STR, strlen(WORLD_STR), 0);
+	if (res != strlen(WORLD_STR)) {
+		fprintf(stderr, "unexpected send(2) result %zi\n", res);
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("SEND1");
+	/* Peer reads merged skbuff packet. */
+	control_expectln("REPLY1");
+
+	close(fd);
+}
+
+static void test_stream_virtio_skb_merge_server(const struct test_opts *opts)
+{
+	unsigned char buf[64];
+	ssize_t res;
+	int fd;
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	control_expectln("SEND0");
+
+	/* Read skbuff partially. */
+	res = recv(fd, buf, 2, 0);
+	if (res != 2) {
+		fprintf(stderr, "expected recv(2) returns 2 bytes, got %zi\n", res);
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("REPLY0");
+	control_expectln("SEND1");
+
+	res = recv(fd, buf + 2, sizeof(buf) - 2, 0);
+	if (res != 8) {
+		fprintf(stderr, "expected recv(2) returns 8 bytes, got %zi\n", res);
+		exit(EXIT_FAILURE);
+	}
+
+	res = recv(fd, buf, sizeof(buf) - 8 - 2, MSG_DONTWAIT);
+	if (res != -1) {
+		fprintf(stderr, "expected recv(2) failure, got %zi\n", res);
+		exit(EXIT_FAILURE);
+	}
+
+	if (memcmp(buf, HELLO_STR WORLD_STR, strlen(HELLO_STR WORLD_STR))) {
+		fprintf(stderr, "pattern mismatch\n");
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("REPLY1");
+
+	close(fd);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -1038,6 +1123,11 @@ static struct test_case test_cases[] = {
 		.run_client = test_seqpacket_inv_buf_client,
 		.run_server = test_seqpacket_inv_buf_server,
 	},
+	{
+		.name = "SOCK_STREAM virtio skb merge",
+		.run_client = test_stream_virtio_skb_merge_client,
+		.run_server = test_stream_virtio_skb_merge_server,
+	},
 	{},
 };
 
-- 
cgit v1.2.3


From f1594bc676579133a3cd906d7d27733289edfb86 Mon Sep 17 00:00:00 2001
From: Anh Tuan Phan <tuananhlfc@gmail.com>
Date: Fri, 24 Mar 2023 09:14:15 +0700
Subject: selftests mount: Fix mount_setattr_test builds failed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When compiling selftests with target mount_setattr I encountered some errors with the below messages:
mount_setattr_test.c: In function ‘mount_setattr_thread’:
mount_setattr_test.c:343:16: error: variable ‘attr’ has initializer but incomplete type
  343 |         struct mount_attr attr = {
      |                ^~~~~~~~~~

These errors might be because of linux/mount.h is not included. This patch resolves that issue.

Signed-off-by: Anh Tuan Phan <tuananhlfc@gmail.com>
Acked-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/mount_setattr/mount_setattr_test.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 582669ca38e9..c6a8c732b802 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -18,6 +18,7 @@
 #include <grp.h>
 #include <stdbool.h>
 #include <stdarg.h>
+#include <linux/mount.h>
 
 #include "../kselftest_harness.h"
 
-- 
cgit v1.2.3


From c13af03de46ba27674dd9fb31a17c0d480081139 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Mon, 27 Feb 2023 09:36:04 -0800
Subject: maple_tree: fix write memory barrier of nodes once dead for RCU mode

During the development of the maple tree, the strategy of freeing multiple
nodes changed and, in the process, the pivots were reused to store
pointers to dead nodes.  To ensure the readers see accurate pivots, the
writers need to mark the nodes as dead and call smp_wmb() to ensure any
readers can identify the node as dead before using the pivot values.

There were two places where the old method of marking the node as dead
without smp_wmb() were being used, which resulted in RCU readers seeing
the wrong pivot value before seeing the node was dead.  Fix this race
condition by using mte_set_node_dead() which has the smp_wmb() call to
ensure the race is closed.

Add a WARN_ON() to the ma_free_rcu() call to ensure all nodes being freed
are marked as dead to ensure there are no other call paths besides the two
updated paths.

This is necessary for the RCU mode of the maple tree.

Link: https://lkml.kernel.org/r/20230227173632.3292573-6-surenb@google.com
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/radix-tree/maple.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 958ee9bdb316..4c89ff333f6f 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -108,6 +108,7 @@ static noinline void check_new_node(struct maple_tree *mt)
 	MT_BUG_ON(mt, mn->slot[1] != NULL);
 	MT_BUG_ON(mt, mas_allocated(&mas) != 0);
 
+	mn->parent = ma_parent_ptr(mn);
 	ma_free_rcu(mn);
 	mas.node = MAS_START;
 	mas_nomem(&mas, GFP_KERNEL);
@@ -160,6 +161,7 @@ static noinline void check_new_node(struct maple_tree *mt)
 		MT_BUG_ON(mt, mas_allocated(&mas) != i);
 		MT_BUG_ON(mt, !mn);
 		MT_BUG_ON(mt, not_empty(mn));
+		mn->parent = ma_parent_ptr(mn);
 		ma_free_rcu(mn);
 	}
 
@@ -192,6 +194,7 @@ static noinline void check_new_node(struct maple_tree *mt)
 		MT_BUG_ON(mt, not_empty(mn));
 		MT_BUG_ON(mt, mas_allocated(&mas) != i - 1);
 		MT_BUG_ON(mt, !mn);
+		mn->parent = ma_parent_ptr(mn);
 		ma_free_rcu(mn);
 	}
 
@@ -210,6 +213,7 @@ static noinline void check_new_node(struct maple_tree *mt)
 			mn = mas_pop_node(&mas);
 			MT_BUG_ON(mt, not_empty(mn));
 			MT_BUG_ON(mt, mas_allocated(&mas) != j - 1);
+			mn->parent = ma_parent_ptr(mn);
 			ma_free_rcu(mn);
 		}
 		MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@@ -233,6 +237,7 @@ static noinline void check_new_node(struct maple_tree *mt)
 			MT_BUG_ON(mt, mas_allocated(&mas) != i - j);
 			mn = mas_pop_node(&mas);
 			MT_BUG_ON(mt, not_empty(mn));
+			mn->parent = ma_parent_ptr(mn);
 			ma_free_rcu(mn);
 			MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1);
 		}
@@ -269,6 +274,7 @@ static noinline void check_new_node(struct maple_tree *mt)
 			mn = mas_pop_node(&mas); /* get the next node. */
 			MT_BUG_ON(mt, mn == NULL);
 			MT_BUG_ON(mt, not_empty(mn));
+			mn->parent = ma_parent_ptr(mn);
 			ma_free_rcu(mn);
 		}
 		MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@@ -294,6 +300,7 @@ static noinline void check_new_node(struct maple_tree *mt)
 			mn = mas_pop_node(&mas2); /* get the next node. */
 			MT_BUG_ON(mt, mn == NULL);
 			MT_BUG_ON(mt, not_empty(mn));
+			mn->parent = ma_parent_ptr(mn);
 			ma_free_rcu(mn);
 		}
 		MT_BUG_ON(mt, mas_allocated(&mas2) != 0);
@@ -334,10 +341,12 @@ static noinline void check_new_node(struct maple_tree *mt)
 	MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
 	mn = mas_pop_node(&mas);
 	MT_BUG_ON(mt, not_empty(mn));
+	mn->parent = ma_parent_ptr(mn);
 	ma_free_rcu(mn);
 	for (i = 1; i <= MAPLE_ALLOC_SLOTS + 1; i++) {
 		mn = mas_pop_node(&mas);
 		MT_BUG_ON(mt, not_empty(mn));
+		mn->parent = ma_parent_ptr(mn);
 		ma_free_rcu(mn);
 	}
 	MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@@ -375,6 +384,7 @@ static noinline void check_new_node(struct maple_tree *mt)
 		mas_node_count(&mas, i); /* Request */
 		mas_nomem(&mas, GFP_KERNEL); /* Fill request */
 		mn = mas_pop_node(&mas); /* get the next node. */
+		mn->parent = ma_parent_ptr(mn);
 		ma_free_rcu(mn);
 		mas_destroy(&mas);
 
@@ -382,10 +392,13 @@ static noinline void check_new_node(struct maple_tree *mt)
 		mas_node_count(&mas, i); /* Request */
 		mas_nomem(&mas, GFP_KERNEL); /* Fill request */
 		mn = mas_pop_node(&mas); /* get the next node. */
+		mn->parent = ma_parent_ptr(mn);
 		ma_free_rcu(mn);
 		mn = mas_pop_node(&mas); /* get the next node. */
+		mn->parent = ma_parent_ptr(mn);
 		ma_free_rcu(mn);
 		mn = mas_pop_node(&mas); /* get the next node. */
+		mn->parent = ma_parent_ptr(mn);
 		ma_free_rcu(mn);
 		mas_destroy(&mas);
 	}
@@ -35369,6 +35382,7 @@ static noinline void check_prealloc(struct maple_tree *mt)
 	MT_BUG_ON(mt, allocated != 1 + height * 3);
 	mn = mas_pop_node(&mas);
 	MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
+	mn->parent = ma_parent_ptr(mn);
 	ma_free_rcu(mn);
 	MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
 	mas_destroy(&mas);
@@ -35386,6 +35400,7 @@ static noinline void check_prealloc(struct maple_tree *mt)
 	mas_destroy(&mas);
 	allocated = mas_allocated(&mas);
 	MT_BUG_ON(mt, allocated != 0);
+	mn->parent = ma_parent_ptr(mn);
 	ma_free_rcu(mn);
 
 	MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
@@ -35756,6 +35771,7 @@ void farmer_tests(void)
 	tree.ma_root = mt_mk_node(node, maple_leaf_64);
 	mt_dump(&tree);
 
+	node->parent = ma_parent_ptr(node);
 	ma_free_rcu(node);
 
 	/* Check things that will make lockdep angry */
-- 
cgit v1.2.3


From 38e058cc7d245dc8034426415bee8fec16ace1bd Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 4 Apr 2023 15:24:11 +0800
Subject: selftests: net: rps_default_mask.sh: delete veth link specifically

When deleting the netns and recreating a new one while re-adding the
veth interface, there is a small window of time during which the old
veth interface has not yet been removed. This can cause the new addition
to fail. To resolve this issue, we can either wait for a short while to
ensure that the old veth interface is deleted, or we can specifically
remove the veth interface.

Before this patch:
  # ./rps_default_mask.sh
  empty rps_default_mask                                      [ ok ]
  changing rps_default_mask dont affect existing devices      [ ok ]
  changing rps_default_mask dont affect existing netns        [ ok ]
  changing rps_default_mask affect newly created devices      [ ok ]
  changing rps_default_mask don't affect newly child netns[II][ ok ]
  rps_default_mask is 0 by default in child netns             [ ok ]
  RTNETLINK answers: File exists
  changing rps_default_mask in child ns don't affect the main one[ ok ]
  cat: /sys/class/net/vethC11an1/queues/rx-0/rps_cpus: No such file or directory
  changing rps_default_mask in child ns affects new childns devices./rps_default_mask.sh: line 36: [: -eq: unary operator expected
  [fail] expected 1 found
  changing rps_default_mask in child ns don't affect existing devices[ ok ]

After this patch:
  # ./rps_default_mask.sh
  empty rps_default_mask                                      [ ok ]
  changing rps_default_mask dont affect existing devices      [ ok ]
  changing rps_default_mask dont affect existing netns        [ ok ]
  changing rps_default_mask affect newly created devices      [ ok ]
  changing rps_default_mask don't affect newly child netns[II][ ok ]
  rps_default_mask is 0 by default in child netns             [ ok ]
  changing rps_default_mask in child ns don't affect the main one[ ok ]
  changing rps_default_mask in child ns affects new childns devices[ ok ]
  changing rps_default_mask in child ns don't affect existing devices[ ok ]

Fixes: 3a7d84eae03b ("self-tests: more rps self tests")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/20230404072411.879476-1-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/rps_default_mask.sh | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
index 0fd0d2db3abc..a26c5624429f 100755
--- a/tools/testing/selftests/net/rps_default_mask.sh
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -60,6 +60,7 @@ ip link set dev $VETH up
 ip -n $NETNS link set dev $VETH up
 chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3
 chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0
+ip link del dev $VETH
 ip netns del $NETNS
 
 setup
-- 
cgit v1.2.3


From 919e659ed12568b5b8ba6c2ffdd82d8d31fc28af Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 6 Apr 2023 12:40:19 +0200
Subject: selftests/bpf: fix xdp_redirect xdp-features selftest for veth driver

xdp-features supported by veth driver are no more static, but they
depends on veth configuration (e.g. if GRO is enabled/disabled or
TX/RX queue configuration). Take it into account in xdp_redirect
xdp-features selftest for veth driver.

Fixes: fccca038f300 ("veth: take into account device reconfiguration for xdp_features flag")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/bc35455cfbb1d4f7f52536955ded81ad47d8dc54.1680777371.git.lorenzo@kernel.org
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 .../selftests/bpf/prog_tests/xdp_do_redirect.c     | 30 +++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 7271a18ab3e2..8251a0fc6ee9 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -167,8 +167,7 @@ void test_xdp_do_redirect(void)
 
 	if (!ASSERT_EQ(query_opts.feature_flags,
 		       NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
-		       NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
-		       NETDEV_XDP_ACT_NDO_XMIT_SG,
+		       NETDEV_XDP_ACT_RX_SG,
 		       "veth_src query_opts.feature_flags"))
 		goto out;
 
@@ -176,11 +175,36 @@ void test_xdp_do_redirect(void)
 	if (!ASSERT_OK(err, "veth_dst bpf_xdp_query"))
 		goto out;
 
+	if (!ASSERT_EQ(query_opts.feature_flags,
+		       NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+		       NETDEV_XDP_ACT_RX_SG,
+		       "veth_dst query_opts.feature_flags"))
+		goto out;
+
+	/* Enable GRO */
+	SYS("ethtool -K veth_src gro on");
+	SYS("ethtool -K veth_dst gro on");
+
+	err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts);
+	if (!ASSERT_OK(err, "veth_src bpf_xdp_query gro on"))
+		goto out;
+
 	if (!ASSERT_EQ(query_opts.feature_flags,
 		       NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
 		       NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
 		       NETDEV_XDP_ACT_NDO_XMIT_SG,
-		       "veth_dst query_opts.feature_flags"))
+		       "veth_src query_opts.feature_flags gro on"))
+		goto out;
+
+	err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts);
+	if (!ASSERT_OK(err, "veth_dst bpf_xdp_query gro on"))
+		goto out;
+
+	if (!ASSERT_EQ(query_opts.feature_flags,
+		       NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+		       NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
+		       NETDEV_XDP_ACT_NDO_XMIT_SG,
+		       "veth_dst query_opts.feature_flags gro on"))
 		goto out;
 
 	memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
-- 
cgit v1.2.3


From 481b56e0391ea46d6bf1a2604422a21063615901 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 6 Apr 2023 16:23:51 +0800
Subject: selftests: bonding: re-format bond option tests

To improve the testing process for bond options, A new bond topology lib
is added to our testing setup. The current option_prio.sh file will be
renamed to bond_options.sh so that all bonding options can be tested here.
Specifically, for priority testing, we will run all tests using modes
1, 5, and 6. These changes will help us streamline the testing process
and ensure that our bond options are rigorously evaluated.

Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Jonathan Toppins <jtoppins@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/drivers/net/bonding/Makefile |   3 +-
 .../selftests/drivers/net/bonding/bond_options.sh  | 209 ++++++++++++++++++
 .../drivers/net/bonding/bond_topo_3d1c.sh          | 143 ++++++++++++
 .../selftests/drivers/net/bonding/option_prio.sh   | 245 ---------------------
 4 files changed, 354 insertions(+), 246 deletions(-)
 create mode 100755 tools/testing/selftests/drivers/net/bonding/bond_options.sh
 create mode 100644 tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
 delete mode 100755 tools/testing/selftests/drivers/net/bonding/option_prio.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index a39bb2560d9b..03f92d7aeb19 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -8,11 +8,12 @@ TEST_PROGS := \
 	dev_addr_lists.sh \
 	mode-1-recovery-updelay.sh \
 	mode-2-recovery-updelay.sh \
-	option_prio.sh \
+	bond_options.sh \
 	bond-eth-type-change.sh
 
 TEST_FILES := \
 	lag_lib.sh \
+	bond_topo_3d1c.sh \
 	net_forwarding_lib.sh
 
 include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
new file mode 100755
index 000000000000..7213211d0bde
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -0,0 +1,209 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bonding options with mode 1,5,6
+
+ALL_TESTS="
+	prio
+"
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source ${lib_dir}/net_forwarding_lib.sh
+source ${lib_dir}/bond_topo_3d1c.sh
+
+skip_prio()
+{
+	local skip=1
+
+	# check if iproute support prio option
+	ip -n ${s_ns} link set eth0 type bond_slave prio 10
+	[[ $? -ne 0 ]] && skip=0
+
+	# check if kernel support prio option
+	ip -n ${s_ns} -d link show eth0 | grep -q "prio 10"
+	[[ $? -ne 0 ]] && skip=0
+
+	return $skip
+}
+
+skip_ns()
+{
+	local skip=1
+
+	# check if iproute support ns_ip6_target option
+	ip -n ${s_ns} link add bond1 type bond ns_ip6_target ${g_ip6}
+	[[ $? -ne 0 ]] && skip=0
+
+	# check if kernel support ns_ip6_target option
+	ip -n ${s_ns} -d link show bond1 | grep -q "ns_ip6_target ${g_ip6}"
+	[[ $? -ne 0 ]] && skip=0
+
+	ip -n ${s_ns} link del bond1
+
+	return $skip
+}
+
+active_slave=""
+check_active_slave()
+{
+	local target_active_slave=$1
+	active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+	test "$active_slave" = "$target_active_slave"
+	check_err $? "Current active slave is $active_slave but not $target_active_slave"
+}
+
+
+# Test bonding prio option
+prio_test()
+{
+	local param="$1"
+	RET=0
+
+	# create bond
+	bond_reset "${param}"
+
+	# check bonding member prio value
+	ip -n ${s_ns} link set eth0 type bond_slave prio 0
+	ip -n ${s_ns} link set eth1 type bond_slave prio 10
+	ip -n ${s_ns} link set eth2 type bond_slave prio 11
+	cmd_jq "ip -n ${s_ns} -d -j link show eth0" \
+		".[].linkinfo.info_slave_data | select (.prio == 0)" "-e" &> /dev/null
+	check_err $? "eth0 prio is not 0"
+	cmd_jq "ip -n ${s_ns} -d -j link show eth1" \
+		".[].linkinfo.info_slave_data | select (.prio == 10)" "-e" &> /dev/null
+	check_err $? "eth1 prio is not 10"
+	cmd_jq "ip -n ${s_ns} -d -j link show eth2" \
+		".[].linkinfo.info_slave_data | select (.prio == 11)" "-e" &> /dev/null
+	check_err $? "eth2 prio is not 11"
+
+	bond_check_connection "setup"
+
+	# active slave should be the primary slave
+	check_active_slave eth1
+
+	# active slave should be the higher prio slave
+	ip -n ${s_ns} link set $active_slave down
+	bond_check_connection "fail over"
+	check_active_slave eth2
+
+	# when only 1 slave is up
+	ip -n ${s_ns} link set $active_slave down
+	bond_check_connection "only 1 slave up"
+	check_active_slave eth0
+
+	# when a higher prio slave change to up
+	ip -n ${s_ns} link set eth2 up
+	bond_check_connection "higher prio slave up"
+	case $primary_reselect in
+		"0")
+			check_active_slave "eth2"
+			;;
+		"1")
+			check_active_slave "eth0"
+			;;
+		"2")
+			check_active_slave "eth0"
+			;;
+	esac
+	local pre_active_slave=$active_slave
+
+	# when the primary slave change to up
+	ip -n ${s_ns} link set eth1 up
+	bond_check_connection "primary slave up"
+	case $primary_reselect in
+		"0")
+			check_active_slave "eth1"
+			;;
+		"1")
+			check_active_slave "$pre_active_slave"
+			;;
+		"2")
+			check_active_slave "$pre_active_slave"
+			ip -n ${s_ns} link set $active_slave down
+			bond_check_connection "pre_active slave down"
+			check_active_slave "eth1"
+			;;
+	esac
+
+	# Test changing bond slave prio
+	if [[ "$primary_reselect" == "0" ]];then
+		ip -n ${s_ns} link set eth0 type bond_slave prio 1000000
+		ip -n ${s_ns} link set eth1 type bond_slave prio 0
+		ip -n ${s_ns} link set eth2 type bond_slave prio -50
+		ip -n ${s_ns} -d link show eth0 | grep -q 'prio 1000000'
+		check_err $? "eth0 prio is not 1000000"
+		ip -n ${s_ns} -d link show eth1 | grep -q 'prio 0'
+		check_err $? "eth1 prio is not 0"
+		ip -n ${s_ns} -d link show eth2 | grep -q 'prio -50'
+		check_err $? "eth3 prio is not -50"
+		check_active_slave "eth1"
+
+		ip -n ${s_ns} link set $active_slave down
+		bond_check_connection "change slave prio"
+		check_active_slave "eth0"
+	fi
+}
+
+prio_miimon()
+{
+	local primary_reselect
+	local mode=$1
+
+	for primary_reselect in 0 1 2; do
+		prio_test "mode $mode miimon 100 primary eth1 primary_reselect $primary_reselect"
+		log_test "prio" "$mode miimon primary_reselect $primary_reselect"
+	done
+}
+
+prio_arp()
+{
+	local primary_reselect
+	local mode=$1
+
+	for primary_reselect in 0 1 2; do
+		prio_test "mode active-backup arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect"
+		log_test "prio" "$mode arp_ip_target primary_reselect $primary_reselect"
+	done
+}
+
+prio_ns()
+{
+	local primary_reselect
+	local mode=$1
+
+	if skip_ns; then
+		log_test_skip "prio ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'."
+		return 0
+	fi
+
+	for primary_reselect in 0 1 2; do
+		prio_test "mode active-backup arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect"
+		log_test "prio" "$mode ns_ip6_target primary_reselect $primary_reselect"
+	done
+}
+
+prio()
+{
+	local mode modes="active-backup balance-tlb balance-alb"
+
+	if skip_prio; then
+		log_test_skip "prio" "Current iproute or kernel doesn't support bond option 'prio'."
+		return 0
+	fi
+
+	for mode in $modes; do
+		prio_miimon $mode
+		prio_arp $mode
+		prio_ns $mode
+	done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
new file mode 100644
index 000000000000..4045ca97fb22
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Topology for Bond mode 1,5,6 testing
+#
+#  +-------------------------------------+
+#  |                bond0                |
+#  |                  +                  |  Server
+#  |      eth0        | eth1   eth2      |  192.0.2.1/24
+#  |        +-------------------+        |  2001:db8::1/24
+#  |        |         |         |        |
+#  +-------------------------------------+
+#           |         |         |
+#  +-------------------------------------+
+#  |        |         |         |        |
+#  |    +---+---------+---------+---+    |  Gateway
+#  |    |            br0            |    |  192.0.2.254/24
+#  |    +-------------+-------------+    |  2001:db8::254/24
+#  |                  |                  |
+#  +-------------------------------------+
+#                     |
+#  +-------------------------------------+
+#  |                  |                  |  Client
+#  |                  +                  |  192.0.2.10/24
+#  |                eth0                 |  2001:db8::10/24
+#  +-------------------------------------+
+
+s_ns="s-$(mktemp -u XXXXXX)"
+c_ns="c-$(mktemp -u XXXXXX)"
+g_ns="g-$(mktemp -u XXXXXX)"
+s_ip4="192.0.2.1"
+c_ip4="192.0.2.10"
+g_ip4="192.0.2.254"
+s_ip6="2001:db8::1"
+c_ip6="2001:db8::10"
+g_ip6="2001:db8::254"
+
+gateway_create()
+{
+	ip netns add ${g_ns}
+	ip -n ${g_ns} link add br0 type bridge
+	ip -n ${g_ns} link set br0 up
+	ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
+	ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
+}
+
+gateway_destroy()
+{
+	ip -n ${g_ns} link del br0
+	ip netns del ${g_ns}
+}
+
+server_create()
+{
+	ip netns add ${s_ns}
+	ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
+
+	for i in $(seq 0 2); do
+		ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+
+		ip -n ${g_ns} link set s${i} up
+		ip -n ${g_ns} link set s${i} master br0
+		ip -n ${s_ns} link set eth${i} master bond0
+	done
+
+	ip -n ${s_ns} link set bond0 up
+	ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+	ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+	sleep 2
+}
+
+# Reset bond with new mode and options
+bond_reset()
+{
+	local param="$1"
+
+	ip -n ${s_ns} link set bond0 down
+	ip -n ${s_ns} link del bond0
+
+	ip -n ${s_ns} link add bond0 type bond $param
+	for i in $(seq 0 2); do
+		ip -n ${s_ns} link set eth$i master bond0
+	done
+
+	ip -n ${s_ns} link set bond0 up
+	ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+	ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+	sleep 2
+}
+
+server_destroy()
+{
+	for i in $(seq 0 2); do
+		ip -n ${s_ns} link del eth${i}
+	done
+	ip netns del ${s_ns}
+}
+
+client_create()
+{
+	ip netns add ${c_ns}
+	ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
+
+	ip -n ${g_ns} link set c0 up
+	ip -n ${g_ns} link set c0 master br0
+
+	ip -n ${c_ns} link set eth0 up
+	ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
+	ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
+}
+
+client_destroy()
+{
+	ip -n ${c_ns} link del eth0
+	ip netns del ${c_ns}
+}
+
+setup_prepare()
+{
+	gateway_create
+	server_create
+	client_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	client_destroy
+	server_destroy
+	gateway_destroy
+}
+
+bond_check_connection()
+{
+	local msg=${1:-"check connection"}
+
+	sleep 2
+	ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
+	check_err $? "${msg}: ping failed"
+	ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
+	check_err $? "${msg}: ping6 failed"
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/option_prio.sh b/tools/testing/selftests/drivers/net/bonding/option_prio.sh
deleted file mode 100755
index c32eebff5005..000000000000
--- a/tools/testing/selftests/drivers/net/bonding/option_prio.sh
+++ /dev/null
@@ -1,245 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test bonding option prio
-#
-
-ALL_TESTS="
-	prio_arp_ip_target_test
-	prio_miimon_test
-"
-
-REQUIRE_MZ=no
-REQUIRE_JQ=no
-NUM_NETIFS=0
-lib_dir=$(dirname "$0")
-source "$lib_dir"/net_forwarding_lib.sh
-
-destroy()
-{
-	ip link del bond0 &>/dev/null
-	ip link del br0 &>/dev/null
-	ip link del veth0 &>/dev/null
-	ip link del veth1 &>/dev/null
-	ip link del veth2 &>/dev/null
-	ip netns del ns1 &>/dev/null
-	ip link del veth3 &>/dev/null
-}
-
-cleanup()
-{
-	pre_cleanup
-
-	destroy
-}
-
-skip()
-{
-        local skip=1
-	ip link add name bond0 type bond mode 1 miimon 100 &>/dev/null
-	ip link add name veth0 type veth peer name veth0_p
-	ip link set veth0 master bond0
-
-	# check if iproute support prio option
-	ip link set dev veth0 type bond_slave prio 10
-	[[ $? -ne 0 ]] && skip=0
-
-	# check if bonding support prio option
-	ip -d link show veth0 | grep -q "prio 10"
-	[[ $? -ne 0 ]] && skip=0
-
-	ip link del bond0 &>/dev/null
-	ip link del veth0
-
-	return $skip
-}
-
-active_slave=""
-check_active_slave()
-{
-	local target_active_slave=$1
-	active_slave="$(cat /sys/class/net/bond0/bonding/active_slave)"
-	test "$active_slave" = "$target_active_slave"
-	check_err $? "Current active slave is $active_slave but not $target_active_slave"
-}
-
-
-# Test bonding prio option with mode=$mode monitor=$monitor
-# and primary_reselect=$primary_reselect
-prio_test()
-{
-	RET=0
-
-	local monitor=$1
-	local mode=$2
-	local primary_reselect=$3
-
-	local bond_ip4="192.169.1.2"
-	local peer_ip4="192.169.1.1"
-	local bond_ip6="2009:0a:0b::02"
-	local peer_ip6="2009:0a:0b::01"
-
-
-	# create veths
-	ip link add name veth0 type veth peer name veth0_p
-	ip link add name veth1 type veth peer name veth1_p
-	ip link add name veth2 type veth peer name veth2_p
-
-	# create bond
-	if [[ "$monitor" == "miimon" ]];then
-		ip link add name bond0 type bond mode $mode miimon 100 primary veth1 primary_reselect $primary_reselect
-	elif [[ "$monitor" == "arp_ip_target" ]];then
-		ip link add name bond0 type bond mode $mode arp_interval 1000 arp_ip_target $peer_ip4 primary veth1 primary_reselect $primary_reselect
-	elif [[ "$monitor" == "ns_ip6_target" ]];then
-		ip link add name bond0 type bond mode $mode arp_interval 1000 ns_ip6_target $peer_ip6 primary veth1 primary_reselect $primary_reselect
-	fi
-	ip link set bond0 up
-	ip link set veth0 master bond0
-	ip link set veth1 master bond0
-	ip link set veth2 master bond0
-	# check bonding member prio value
-	ip link set dev veth0 type bond_slave prio 0
-	ip link set dev veth1 type bond_slave prio 10
-	ip link set dev veth2 type bond_slave prio 11
-	ip -d link show veth0 | grep -q 'prio 0'
-	check_err $? "veth0 prio is not 0"
-	ip -d link show veth1 | grep -q 'prio 10'
-	check_err $? "veth0 prio is not 10"
-	ip -d link show veth2 | grep -q 'prio 11'
-	check_err $? "veth0 prio is not 11"
-
-	ip link set veth0 up
-	ip link set veth1 up
-	ip link set veth2 up
-	ip link set veth0_p up
-	ip link set veth1_p up
-	ip link set veth2_p up
-
-	# prepare ping target
-	ip link add name br0 type bridge
-	ip link set br0 up
-	ip link set veth0_p master br0
-	ip link set veth1_p master br0
-	ip link set veth2_p master br0
-	ip link add name veth3 type veth peer name veth3_p
-	ip netns add ns1
-	ip link set veth3_p master br0 up
-	ip link set veth3 netns ns1 up
-	ip netns exec ns1 ip addr add $peer_ip4/24 dev veth3
-	ip netns exec ns1 ip addr add $peer_ip6/64 dev veth3
-	ip addr add $bond_ip4/24 dev bond0
-	ip addr add $bond_ip6/64 dev bond0
-	sleep 5
-
-	ping $peer_ip4 -c5 -I bond0 &>/dev/null
-	check_err $? "ping failed 1."
-	ping6 $peer_ip6 -c5 -I bond0 &>/dev/null
-	check_err $? "ping6 failed 1."
-
-	# active salve should be the primary slave
-	check_active_slave veth1
-
-	# active slave should be the higher prio slave
-	ip link set $active_slave down
-	ping $peer_ip4 -c5 -I bond0 &>/dev/null
-	check_err $? "ping failed 2."
-	check_active_slave veth2
-
-	# when only 1 slave is up
-	ip link set $active_slave down
-	ping $peer_ip4 -c5 -I bond0 &>/dev/null
-	check_err $? "ping failed 3."
-	check_active_slave veth0
-
-	# when a higher prio slave change to up
-	ip link set veth2 up
-	ping $peer_ip4 -c5 -I bond0 &>/dev/null
-	check_err $? "ping failed 4."
-	case $primary_reselect in
-		"0")
-			check_active_slave "veth2"
-			;;
-		"1")
-			check_active_slave "veth0"
-			;;
-		"2")
-			check_active_slave "veth0"
-			;;
-	esac
-	local pre_active_slave=$active_slave
-
-	# when the primary slave change to up
-	ip link set veth1 up
-	ping $peer_ip4 -c5 -I bond0 &>/dev/null
-	check_err $? "ping failed 5."
-	case $primary_reselect in
-		"0")
-			check_active_slave "veth1"
-			;;
-		"1")
-			check_active_slave "$pre_active_slave"
-			;;
-		"2")
-			check_active_slave "$pre_active_slave"
-			ip link set $active_slave down
-			ping $peer_ip4 -c5 -I bond0 &>/dev/null
-			check_err $? "ping failed 6."
-			check_active_slave "veth1"
-			;;
-	esac
-
-	# Test changing bond salve prio
-	if [[ "$primary_reselect" == "0" ]];then
-		ip link set dev veth0 type bond_slave prio 1000000
-		ip link set dev veth1 type bond_slave prio 0
-		ip link set dev veth2 type bond_slave prio -50
-		ip -d link show veth0 | grep -q 'prio 1000000'
-		check_err $? "veth0 prio is not 1000000"
-		ip -d link show veth1 | grep -q 'prio 0'
-		check_err $? "veth1 prio is not 0"
-		ip -d link show veth2 | grep -q 'prio -50'
-		check_err $? "veth3 prio is not -50"
-		check_active_slave "veth1"
-
-		ip link set $active_slave down
-		ping $peer_ip4 -c5 -I bond0 &>/dev/null
-		check_err $? "ping failed 7."
-		check_active_slave "veth0"
-	fi
-
-	cleanup
-
-	log_test "prio_test" "Test bonding option 'prio' with mode=$mode monitor=$monitor and primary_reselect=$primary_reselect"
-}
-
-prio_miimon_test()
-{
-	local mode
-	local primary_reselect
-
-	for mode in 1 5 6; do
-		for primary_reselect in 0 1 2; do
-			prio_test "miimon" $mode $primary_reselect
-		done
-	done
-}
-
-prio_arp_ip_target_test()
-{
-	local primary_reselect
-
-	for primary_reselect in 0 1 2; do
-		prio_test "arp_ip_target" 1 $primary_reselect
-	done
-}
-
-if skip;then
-	log_test_skip "option_prio.sh" "Current iproute doesn't support 'prio'."
-	exit 0
-fi
-
-trap cleanup EXIT
-
-tests_run
-
-exit "$EXIT_STATUS"
-- 
cgit v1.2.3


From 2e825f8accb4491466677162cd9893fe77aea2f9 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 6 Apr 2023 16:23:52 +0800
Subject: selftests: bonding: add arp validate test

This patch add bonding arp validate tests with mode active backup,
monitor arp_ip_target and ns_ip6_target. It also checks mii_status
to make sure all slaves are UP.

Acked-by: Jonathan Toppins <jtoppins@redhat.com>
Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/bonding/bond_options.sh  | 55 ++++++++++++++++++++++
 1 file changed, 55 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index 7213211d0bde..db29a3146a86 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -5,6 +5,7 @@
 
 ALL_TESTS="
 	prio
+	arp_validate
 "
 
 REQUIRE_MZ=no
@@ -200,6 +201,60 @@ prio()
 	done
 }
 
+arp_validate_test()
+{
+	local param="$1"
+	RET=0
+
+	# create bond
+	bond_reset "${param}"
+
+	bond_check_connection
+	[ $RET -ne 0 ] && log_test "arp_validate" "$retmsg"
+
+	# wait for a while to make sure the mii status stable
+	sleep 5
+	for i in $(seq 0 2); do
+		mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status")
+		if [ ${mii_status} != "UP" ]; then
+			RET=1
+			log_test "arp_validate" "interface eth$i mii_status $mii_status"
+		fi
+	done
+}
+
+arp_validate_arp()
+{
+	local mode=$1
+	local val
+	for val in $(seq 0 6); do
+		arp_validate_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} arp_validate $val"
+		log_test "arp_validate" "$mode arp_ip_target arp_validate $val"
+	done
+}
+
+arp_validate_ns()
+{
+	local mode=$1
+	local val
+
+	if skip_ns; then
+		log_test_skip "arp_validate ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'."
+		return 0
+	fi
+
+	for val in $(seq 0 6); do
+		arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} arp_validate $val"
+		log_test "arp_validate" "$mode ns_ip6_target arp_validate $val"
+	done
+}
+
+arp_validate()
+{
+	arp_validate_arp "active-backup"
+	arp_validate_ns "active-backup"
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 711ae788cbbb82818531b55e32b09518ee09a11a Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Tue, 11 Apr 2023 22:42:12 +0200
Subject: selftests: mptcp: userspace pm: uniform verify events

Simply adding a "sleep" before checking something is usually not a good
idea because the time that has been picked can not be enough or too
much. The best is to wait for events with a timeout.

In this selftest, 'sleep 0.5' is used more than 40 times. It is always
used before calling a 'verify_*' function except for this
verify_listener_events which has been added later.

At the end, using all these 'sleep 0.5' seems to work: the slow CIs
don't complain so far. Also because it doesn't take too much time, we
can just add two more 'sleep 0.5' to uniform what is done before calling
a 'verify_*' function. For the same reasons, we can also delay a bigger
refactoring to replace all these 'sleep 0.5' by functions waiting for
events instead of waiting for a fix time and hope for the best.

Fixes: 6c73008aa301 ("selftests: mptcp: listener test for userspace PM")
Cc: stable@vger.kernel.org
Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/userspace_pm.sh | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 48e52f995a98..b1eb7bce599d 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -913,6 +913,7 @@ test_listener()
 		$client4_port > /dev/null 2>&1 &
 	local listener_pid=$!
 
+	sleep 0.5
 	verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port
 
 	# ADD_ADDR from client to server machine reusing the subflow port
@@ -928,6 +929,7 @@ test_listener()
 	# Delete the listener from the client ns, if one was created
 	kill_wait $listener_pid
 
+	sleep 0.5
 	verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
 }
 
-- 
cgit v1.2.3


From 306dc21361993f4fe50a15d4db6b1a4de5d0adb0 Mon Sep 17 00:00:00 2001
From: Aaron Conole <aconole@redhat.com>
Date: Wed, 12 Apr 2023 07:58:28 -0400
Subject: selftests: openvswitch: adjust datapath NL message declaration

The netlink message for creating a new datapath takes an array
of ports for the PID creation.  This shouldn't cause much issue
but correct it for future cases where we need to do decode of
datapath information that could include the per-cpu PID map.

Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite")
Signed-off-by: Aaron Conole <aconole@redhat.com>
Link: https://lore.kernel.org/r/20230412115828.3991806-1-aconole@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 3243c90d449e..5d467d1993cb 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -62,7 +62,7 @@ class OvsDatapath(GenericNetlinkSocket):
         nla_map = (
             ("OVS_DP_ATTR_UNSPEC", "none"),
             ("OVS_DP_ATTR_NAME", "asciiz"),
-            ("OVS_DP_ATTR_UPCALL_PID", "uint32"),
+            ("OVS_DP_ATTR_UPCALL_PID", "array(uint32)"),
             ("OVS_DP_ATTR_STATS", "dpstats"),
             ("OVS_DP_ATTR_MEGAFLOW_STATS", "megaflowstats"),
             ("OVS_DP_ATTR_USER_FEATURES", "uint32"),
-- 
cgit v1.2.3


From 3a0385be133e7091cc9a9a998c7ec712bb9585db Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 12 Apr 2023 11:13:06 -0400
Subject: selftests: add the missing CONFIG_IP_SCTP in net config

The selftest sctp_vrf needs CONFIG_IP_SCTP set in config
when building the kernel, so add it.

Fixes: a61bd7b9fef3 ("selftests: add a selftest for sctp vrf")
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Link: https://lore.kernel.org/r/61dddebc4d2dd98fe7fb145e24d4b2430e42b572.1681312386.git.lucien.xin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index cc9fd55ab869..2529226ce87c 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -48,3 +48,4 @@ CONFIG_BAREUDP=m
 CONFIG_IPV6_IOAM6_LWTUNNEL=y
 CONFIG_CRYPTO_SM4_GENERIC=y
 CONFIG_AMT=m
+CONFIG_IP_SCTP=m
-- 
cgit v1.2.3


From e8163b98d96c4d87c870689f560c53be7ccd55c8 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 12 Apr 2023 21:48:35 +0200
Subject: selftests/bpf: xdp_hw_metadata remove bpf_printk and add counters

The tool xdp_hw_metadata can be used by driver developers
implementing XDP-hints metadata kfuncs.

Remove all bpf_printk calls, as the tool already transfers all the
XDP-hints related information via metadata area to AF_XDP
userspace process.

Add counters for providing remaining information about failure and
skipped packet events.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/168132891533.340624.7313781245316405141.stgit@firesoul
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/progs/xdp_hw_metadata.c  | 36 +++++++++++++---------
 tools/testing/selftests/bpf/xdp_hw_metadata.c      |  4 ++-
 2 files changed, 24 insertions(+), 16 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
index 4c55b4d79d3d..0687d11162f6 100644
--- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -12,6 +12,10 @@ struct {
 	__type(value, __u32);
 } xsk SEC(".maps");
 
+__u64 pkts_skip = 0;
+__u64 pkts_fail = 0;
+__u64 pkts_redir = 0;
+
 extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
 					 __u64 *timestamp) __ksym;
 extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
@@ -26,7 +30,7 @@ int rx(struct xdp_md *ctx)
 	struct udphdr *udp = NULL;
 	struct iphdr *iph = NULL;
 	struct xdp_meta *meta;
-	int ret;
+	int err;
 
 	data = (void *)(long)ctx->data;
 	data_end = (void *)(long)ctx->data_end;
@@ -46,17 +50,20 @@ int rx(struct xdp_md *ctx)
 			udp = NULL;
 	}
 
-	if (!udp)
+	if (!udp) {
+		__sync_add_and_fetch(&pkts_skip, 1);
 		return XDP_PASS;
+	}
 
-	if (udp->dest != bpf_htons(9091))
+	/* Forwarding UDP:9091 to AF_XDP */
+	if (udp->dest != bpf_htons(9091)) {
+		__sync_add_and_fetch(&pkts_skip, 1);
 		return XDP_PASS;
+	}
 
-	bpf_printk("forwarding UDP:9091 to AF_XDP");
-
-	ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
-	if (ret != 0) {
-		bpf_printk("bpf_xdp_adjust_meta returned %d", ret);
+	err = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
+	if (err) {
+		__sync_add_and_fetch(&pkts_fail, 1);
 		return XDP_PASS;
 	}
 
@@ -65,20 +72,19 @@ int rx(struct xdp_md *ctx)
 	meta = data_meta;
 
 	if (meta + 1 > data) {
-		bpf_printk("bpf_xdp_adjust_meta doesn't appear to work");
+		__sync_add_and_fetch(&pkts_fail, 1);
 		return XDP_PASS;
 	}
 
-	if (!bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp))
-		bpf_printk("populated rx_timestamp with %llu", meta->rx_timestamp);
-	else
+	err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp);
+	if (err)
 		meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */
 
-	if (!bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash))
-		bpf_printk("populated rx_hash with %u", meta->rx_hash);
-	else
+	err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash);
+	if (err)
 		meta->rx_hash = 0; /* Used by AF_XDP as not avail signal */
 
+	__sync_add_and_fetch(&pkts_redir, 1);
 	return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
 }
 
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 1c8acb68b977..3b942ef7297b 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -212,7 +212,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd)
 	while (true) {
 		errno = 0;
 		ret = poll(fds, rxq + 1, 1000);
-		printf("poll: %d (%d)\n", ret, errno);
+		printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
+		       ret, errno, bpf_obj->bss->pkts_skip,
+		       bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
 		if (ret < 0)
 			break;
 		if (ret == 0)
-- 
cgit v1.2.3


From 0f26b74e7d071b0dc18e2c43d79d496c2b144035 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 12 Apr 2023 21:49:00 +0200
Subject: selftests/bpf: Adjust bpf_xdp_metadata_rx_hash for new arg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update BPF selftests to use the new RSS type argument for kfunc
bpf_xdp_metadata_rx_hash.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/168132894068.340624.8914711185697163690.stgit@firesoul
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/xdp_metadata.c |  2 ++
 tools/testing/selftests/bpf/progs/xdp_hw_metadata.c   | 10 +++++-----
 tools/testing/selftests/bpf/progs/xdp_metadata.c      |  6 +++---
 tools/testing/selftests/bpf/progs/xdp_metadata2.c     |  7 ++++---
 tools/testing/selftests/bpf/xdp_hw_metadata.c         |  6 +++++-
 tools/testing/selftests/bpf/xdp_metadata.h            |  4 ++++
 6 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index aa4beae99f4f..8c5e98da9ae9 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -273,6 +273,8 @@ static int verify_xsk_metadata(struct xsk *xsk)
 	if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
 		return -1;
 
+	ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
+
 	xsk_ring_cons__release(&xsk->rx, 1);
 	refill_rx(xsk, comp_addr);
 
diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
index 0687d11162f6..e1c787815e44 100644
--- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -18,8 +18,8 @@ __u64 pkts_redir = 0;
 
 extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
 					 __u64 *timestamp) __ksym;
-extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
-				    __u32 *hash) __ksym;
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+				    enum xdp_rss_hash_type *rss_type) __ksym;
 
 SEC("xdp")
 int rx(struct xdp_md *ctx)
@@ -80,9 +80,9 @@ int rx(struct xdp_md *ctx)
 	if (err)
 		meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */
 
-	err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash);
-	if (err)
-		meta->rx_hash = 0; /* Used by AF_XDP as not avail signal */
+	err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
+	if (err < 0)
+		meta->rx_hash_err = err; /* Used by AF_XDP as no hash signal */
 
 	__sync_add_and_fetch(&pkts_redir, 1);
 	return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
index 77678b034389..d151d406a123 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -21,8 +21,8 @@ struct {
 
 extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
 					 __u64 *timestamp) __ksym;
-extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
-				    __u32 *hash) __ksym;
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+				    enum xdp_rss_hash_type *rss_type) __ksym;
 
 SEC("xdp")
 int rx(struct xdp_md *ctx)
@@ -56,7 +56,7 @@ int rx(struct xdp_md *ctx)
 	if (timestamp == 0)
 		meta->rx_timestamp = 1;
 
-	bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash);
+	bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
 
 	return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
 }
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata2.c b/tools/testing/selftests/bpf/progs/xdp_metadata2.c
index cf69d05451c3..85f88d9d7a78 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata2.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata2.c
@@ -5,17 +5,18 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
-				    __u32 *hash) __ksym;
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+				    enum xdp_rss_hash_type *rss_type) __ksym;
 
 int called;
 
 SEC("freplace/rx")
 int freplace_rx(struct xdp_md *ctx)
 {
+	enum xdp_rss_hash_type type = 0;
 	u32 hash = 0;
 	/* Call _any_ metadata function to make sure we don't crash. */
-	bpf_xdp_metadata_rx_hash(ctx, &hash);
+	bpf_xdp_metadata_rx_hash(ctx, &hash, &type);
 	called++;
 	return XDP_PASS;
 }
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 3b942ef7297b..987cf0db5ebc 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -141,7 +141,11 @@ static void verify_xdp_metadata(void *data)
 	meta = data - sizeof(*meta);
 
 	printf("rx_timestamp: %llu\n", meta->rx_timestamp);
-	printf("rx_hash: %u\n", meta->rx_hash);
+	if (meta->rx_hash_err < 0)
+		printf("No rx_hash err=%d\n", meta->rx_hash_err);
+	else
+		printf("rx_hash: 0x%X with RSS type:0x%X\n",
+		       meta->rx_hash, meta->rx_hash_type);
 }
 
 static void verify_skb_metadata(int fd)
diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h
index f6780fbb0a21..0c4624dc6f2f 100644
--- a/tools/testing/selftests/bpf/xdp_metadata.h
+++ b/tools/testing/selftests/bpf/xdp_metadata.h
@@ -12,4 +12,8 @@
 struct xdp_meta {
 	__u64 rx_timestamp;
 	__u32 rx_hash;
+	union {
+		__u32 rx_hash_type;
+		__s32 rx_hash_err;
+	};
 };
-- 
cgit v1.2.3