summaryrefslogtreecommitdiff
path: root/tools/testing
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing')
-rw-r--r--tools/testing/cxl/Kbuild7
-rw-r--r--tools/testing/cxl/cxl_core_exports.c22
-rw-r--r--tools/testing/cxl/exports.h13
-rw-r--r--tools/testing/cxl/test/cxl_translate.c30
-rw-r--r--tools/testing/cxl/test/mem.c2
-rw-r--r--tools/testing/cxl/test/mock.c36
-rw-r--r--tools/testing/kunit/kunit-completion.sh34
-rwxr-xr-xtools/testing/kunit/kunit.py37
-rw-r--r--tools/testing/kunit/kunit_parser.py3
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py110
-rw-r--r--tools/testing/kunit/qemu_configs/armeb.py16
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-failure-nested.log10
-rw-r--r--tools/testing/memblock/internal.h3
-rw-r--r--tools/testing/radix-tree/idr-test.c21
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/alsa/utimer-test.c1
-rw-r--r--tools/testing/selftests/arm64/Makefile6
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c49
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c3
-rw-r--r--tools/testing/selftests/arm64/fp/fp-pidbench.S6
-rw-r--r--tools/testing/selftests/arm64/gcs/basic-gcs.c40
-rw-r--r--tools/testing/selftests/arm64/mte/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/.gitignore4
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/Makefile28
-rw-r--r--tools/testing/selftests/bpf/bench.c4
-rw-r--r--tools/testing/selftests/bpf/bench.h1
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c1
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh5
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h85
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h3
-rw-r--r--tools/testing/selftests/bpf/bpftool_helpers.c74
-rw-r--r--tools/testing/selftests/bpf/bpftool_helpers.h11
-rw-r--r--tools/testing/selftests/bpf/cgroup_iter_memcg.h18
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/map_tests/task_storage_map.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_list.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_gotox.c208
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpftool_maps_access.c371
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpftool_metadata.c144
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_permute.c244
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_iter.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c223
-rw-r--r--tools/testing/selftests/bpf/prog_tests/d_path.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/exe_ctx.c59
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fsession_test.c140
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_args_test.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/iters.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_implicit_args.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_kptr.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/percpu_alloc.c335
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c294
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c120
-rw-r--r--tools/testing/selftests/bpf/prog_tests/string_kfuncs.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_data.h4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_storage.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_assoc.c191
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_local_data.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c250
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c33
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_start_delete_race.c137
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_failure.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/wq.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c16
-rw-r--r--tools/testing/selftests/bpf/progs/arena_list.c11
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c6
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c39
-rw-r--r--tools/testing/selftests/bpf/progs/compute_live_registers.c41
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_failure.c2
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/file_reader.c2
-rw-r--r--tools/testing/selftests/bpf/progs/free_timer.c10
-rw-r--r--tools/testing/selftests/bpf/progs/fsession_test.c179
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_args_test.c84
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_test.c23
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c140
-rw-r--r--tools/testing/selftests/bpf/progs/iters_css.c9
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_implicit_args.c41
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_override.c15
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c15
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage.c19
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c18
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr_fail.c4
-rw-r--r--tools/testing/selftests/bpf/progs/percpu_alloc_array.c32
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.h2
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h6
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c10
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c30
-rw-r--r--tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c38
-rw-r--r--tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c12
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_ips.c27
-rw-r--r--tools/testing/selftests/bpf/progs/stream.c53
-rw-r--r--tools/testing/selftests/bpf/progs/stream_fail.c6
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c6
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c1
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_success.c7
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_assoc.c105
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_assoc_in_timer.c77
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_assoc_reuse.c75
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_sleepable.c43
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_data.bpf.h2
-rw-r--r--tools/testing/selftests/bpf/progs/task_ls_recursion.c14
-rw-r--r--tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c7
-rw-r--r--tools/testing/selftests/bpf/progs/task_work.c7
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_fail.c8
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_stress.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c57
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_decl_tag.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_ctx.c48
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c21
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c12
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c118
-rw-r--r--tools/testing/selftests/bpf/progs/timer_start_deadlock.c70
-rw-r--r--tools/testing/selftests/bpf/progs/timer_start_delete_race.c66
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c46
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_session.c7
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c15
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c11
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c223
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_globals1.c87
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_globals2.c49
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c50
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_async_cb_context.c8
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bswap.c43
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_default_trusted_ptr.c29
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c1149
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_restricted.c111
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_jit_inline.c20
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_linked_scalars.c336
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_lsm.c31
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_scalar_ids.c53
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subreg.c153
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_unpriv.c22
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c7
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_xdp.c35
-rw-r--r--tools/testing/selftests/bpf/progs/wq_failures.c4
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_map.sh398
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_metadata.sh85
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h10
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c150
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h13
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h12
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_value_access.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c4
-rw-r--r--tools/testing/selftests/bpf/veristat.c2
-rw-r--r--tools/testing/selftests/cgroup/lib/cgroup_util.c21
-rw-r--r--tools/testing/selftests/cgroup/lib/include/cgroup_util.h5
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh29
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c33
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c20
-rw-r--r--tools/testing/selftests/coredump/coredump_test_helpers.c2
-rw-r--r--tools/testing/selftests/damon/access_memory.c29
-rwxr-xr-xtools/testing/selftests/damon/sysfs_memcg_path_leak.sh26
-rwxr-xr-xtools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py41
-rw-r--r--tools/testing/selftests/dm-verity/Makefile5
-rw-r--r--tools/testing/selftests/dm-verity/config10
-rwxr-xr-xtools/testing/selftests/dm-verity/test-dm-verity-keyring.sh873
-rw-r--r--tools/testing/selftests/drivers/net/Makefile6
-rw-r--r--tools/testing/selftests/drivers/net/gro.c543
-rwxr-xr-xtools/testing/selftests/drivers/net/gro.py166
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile1
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py19
-rw-r--r--tools/testing/selftests/drivers/net/hw/iou-zcrx.c72
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py157
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py4
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c12
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/nic_timestamp.py128
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_drv.py88
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_flow_label.py11
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py44
-rw-r--r--tools/testing/selftests/drivers/net/hw/toeplitz.c32
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/toeplitz.py23
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py8
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh39
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh4
-rw-r--r--tools/testing/selftests/drivers/net/netconsole/Makefile19
-rw-r--r--tools/testing/selftests/drivers/net/netconsole/config6
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_basic.sh (renamed from tools/testing/selftests/drivers/net/netcons_basic.sh)2
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_cmdline.sh (renamed from tools/testing/selftests/drivers/net/netcons_cmdline.sh)2
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_fragmented_msg.sh (renamed from tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh)2
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_overflow.sh (renamed from tools/testing/selftests/drivers/net/netcons_overflow.sh)2
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_resume.sh124
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_sysdata.sh (renamed from tools/testing/selftests/drivers/net/netcons_sysdata.sh)2
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_torture.sh (renamed from tools/testing/selftests/drivers/net/netcons_torture.sh)2
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/peer.sh59
-rwxr-xr-xtools/testing/selftests/drivers/net/psp.py12
-rw-r--r--tools/testing/selftests/drivers/net/psp_responder.c50
-rw-r--r--tools/testing/selftests/filesystems/anon_inode_test.c5
-rw-r--r--tools/testing/selftests/filesystems/open_tree_ns/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/open_tree_ns/Makefile10
-rw-r--r--tools/testing/selftests/filesystems/open_tree_ns/open_tree_ns_test.c1030
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount.h15
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c261
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c101
-rw-r--r--tools/testing/selftests/filesystems/utils.c26
-rw-r--r--tools/testing/selftests/filesystems/utils.h1
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc18
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc5
-rw-r--r--tools/testing/selftests/hid/Makefile2
-rw-r--r--tools/testing/selftests/hid/progs/hid_bpf_helpers.h8
-rw-r--r--tools/testing/selftests/hid/tests/conftest.py14
-rw-r--r--tools/testing/selftests/hid/tests/test_multitouch.py61
-rw-r--r--tools/testing/selftests/iommu/iommufd.c8
-rw-r--r--tools/testing/selftests/kselftest_harness.h8
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm11
-rw-r--r--tools/testing/selftests/kvm/arm64/idreg-idst.c117
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c1
-rw-r--r--tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h2
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h4
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h44
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h1
-rw-r--r--tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h1
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h2
-rw-r--r--tools/testing/selftests/kvm/include/s390/kvm_util_arch.h1
-rw-r--r--tools/testing/selftests/kvm/include/x86/apic.h7
-rw-r--r--tools/testing/selftests/kvm/include/x86/kvm_util_arch.h22
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h65
-rw-r--r--tools/testing/selftests/kvm/include/x86/svm.h3
-rw-r--r--tools/testing/selftests/kvm/include/x86/svm_util.h9
-rw-r--r--tools/testing/selftests/kvm/include/x86/vmx.h16
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c47
-rw-r--r--tools/testing/selftests/kvm/lib/guest_modes.c41
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c63
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/processor.c28
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c101
-rw-r--r--tools/testing/selftests/kvm/lib/s390/processor.c16
-rw-r--r--tools/testing/selftests/kvm/lib/x86/memstress.c65
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c233
-rw-r--r--tools/testing/selftests/kvm/lib/x86/svm.c27
-rw-r--r--tools/testing/selftests/kvm/lib/x86/vmx.c253
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c12
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c1
-rw-r--r--tools/testing/selftests/kvm/s390/keyop.c299
-rw-r--r--tools/testing/selftests/kvm/steal_time.c96
-rw-r--r--tools/testing/selftests/kvm/x86/amx_test.c144
-rw-r--r--tools/testing/selftests/kvm/x86/cpuid_test.c15
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c2
-rw-r--r--tools/testing/selftests/kvm/x86/nested_dirty_log_test.c293
-rw-r--r--tools/testing/selftests/kvm/x86/nested_set_state_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c)128
-rw-r--r--tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c197
-rw-r--r--tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c155
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c179
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_tpr_test.c276
-rw-r--r--tools/testing/selftests/landlock/.gitignore1
-rw-r--r--tools/testing/selftests/landlock/Makefile1
-rw-r--r--tools/testing/selftests/landlock/base_test.c8
-rw-r--r--tools/testing/selftests/landlock/common.h1
-rw-r--r--tools/testing/selftests/landlock/fs_bench.c214
-rw-r--r--tools/testing/selftests/landlock/fs_test.c34
-rw-r--r--tools/testing/selftests/landlock/net_test.c30
-rw-r--r--tools/testing/selftests/landlock/ptrace_test.c154
-rw-r--r--tools/testing/selftests/landlock/scoped_abstract_unix_test.c23
-rw-r--r--tools/testing/selftests/landlock/scoped_base_variants.h9
-rw-r--r--tools/testing/selftests/landlock/tsync_test.c161
-rw-r--r--tools/testing/selftests/lkdtm/tests.txt4
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c113
-rw-r--r--tools/testing/selftests/mm/.gitignore1
-rw-r--r--tools/testing/selftests/mm/Makefile48
-rwxr-xr-xtools/testing/selftests/mm/charge_reserved_hugetlb.sh55
-rwxr-xr-xtools/testing/selftests/mm/check_config.sh3
-rw-r--r--tools/testing/selftests/mm/config2
-rw-r--r--tools/testing/selftests/mm/cow.c43
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb-madvise.c9
-rwxr-xr-xtools/testing/selftests/mm/ksft_compaction.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_cow.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_gup_test.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_hmm.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_hugetlb.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_hugevm.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_ksm.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_ksm_numa.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_madv_guard.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_madv_populate.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_mdwe.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_memfd_secret.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_memory_failure.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_migration.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_mkdirty.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_mlock.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_mmap.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_mremap.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_page_frag.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_pagemap.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_pfnmap.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_pkey.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_process_madv.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_process_mrelease.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_rmap.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_soft_dirty.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_thp.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_userfaultfd.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_vma_merge.sh4
-rwxr-xr-xtools/testing/selftests/mm/ksft_vmalloc.sh4
-rw-r--r--tools/testing/selftests/mm/memory-failure.c359
-rw-r--r--tools/testing/selftests/mm/merge.c384
-rw-r--r--tools/testing/selftests/mm/page_frag/Makefile2
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c15
-rw-r--r--tools/testing/selftests/mm/pfnmap.c93
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh45
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c6
-rwxr-xr-xtools/testing/selftests/mm/test_vmalloc.sh31
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c2
-rw-r--r--tools/testing/selftests/mm/va_high_addr_switch.c10
-rwxr-xr-xtools/testing/selftests/mm/va_high_addr_switch.sh12
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c260
-rw-r--r--tools/testing/selftests/mm/vm_util.c41
-rw-r--r--tools/testing/selftests/mm/vm_util.h10
-rw-r--r--tools/testing/selftests/mm/write_to_hugetlbfs.c9
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile19
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile7
-rwxr-xr-xtools/testing/selftests/net/amt.sh7
-rw-r--r--tools/testing/selftests/net/config3
-rwxr-xr-xtools/testing/selftests/net/double_udp_encap.sh393
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh7
-rwxr-xr-xtools/testing/selftests/net/fib-onlink-tests.sh99
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh15
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh70
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb_max.sh90
-rw-r--r--tools/testing/selftests/net/forwarding/config1
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh18
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_dsfield.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_ip.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh26
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh76
-rw-r--r--tools/testing/selftests/net/hsr/Makefile2
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_ping.sh207
-rwxr-xr-xtools/testing/selftests/net/hsr/link_faults.sh378
-rwxr-xr-xtools/testing/selftests/net/hsr/prp_ping.sh147
-rw-r--r--tools/testing/selftests/net/hsr/settings2
-rw-r--r--tools/testing/selftests/net/icmp_rfc4884.c679
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh2
-rw-r--r--tools/testing/selftests/net/ipsec.c11
-rwxr-xr-xtools/testing/selftests/net/ipvtap_test.sh168
-rw-r--r--tools/testing/selftests/net/lib.sh5
-rw-r--r--tools/testing/selftests/net/lib/csum.c2
-rw-r--r--tools/testing/selftests/net/lib/ksft.h6
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py4
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py44
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py32
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c101
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_splice.sh5
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_diag.c28
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh188
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c5
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh4
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c11
-rw-r--r--tools/testing/selftests/net/netfilter/config1
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_clash.sh9
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c13
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh2
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh45
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh69
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh142
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt2
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh11
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt19
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt22
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt34
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt12
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt31
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt70
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt12
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt14
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt16
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt39
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt19
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt19
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt16
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt22
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt15
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt70
-rw-r--r--tools/testing/selftests/net/tap.c16
-rw-r--r--tools/testing/selftests/net/tfo.c14
-rwxr-xr-xtools/testing/selftests/net/tfo_passive.sh13
-rw-r--r--tools/testing/selftests/net/tls.c18
-rw-r--r--tools/testing/selftests/net/tun.c898
-rw-r--r--tools/testing/selftests/net/tuntap_helpers.h390
-rw-r--r--tools/testing/selftests/net/txtimestamp.c10
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh64
-rw-r--r--tools/testing/selftests/nolibc/Makefile14
-rw-r--r--tools/testing/selftests/nolibc/Makefile.nolibc8
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c86
-rw-r--r--tools/testing/selftests/pci_endpoint/pci_endpoint_test.c17
-rw-r--r--tools/testing/selftests/pidfd/pidfd_info_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore1
-rwxr-xr-xtools/testing/selftests/ptp/phc.sh60
-rw-r--r--tools/testing/selftests/rcutorture/.gitignore1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/config2csv.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-series.sh184
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh40
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/mktestid.sh2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE011
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE021
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c6
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h8
-rw-r--r--tools/testing/selftests/resctrl/resctrl_tests.c28
-rw-r--r--tools/testing/selftests/resctrl/resctrlfs.c10
-rw-r--r--tools/testing/selftests/riscv/Makefile2
-rw-r--r--tools/testing/selftests/riscv/cfi/.gitignore2
-rw-r--r--tools/testing/selftests/riscv/cfi/Makefile23
-rw-r--r--tools/testing/selftests/riscv/cfi/cfi_rv_test.h82
-rw-r--r--tools/testing/selftests/riscv/cfi/cfitests.c173
-rw-r--r--tools/testing/selftests/riscv/cfi/shadowstack.c385
-rw-r--r--tools/testing/selftests/riscv/cfi/shadowstack.h27
-rw-r--r--tools/testing/selftests/riscv/hwprobe/which-cpus.c18
-rw-r--r--tools/testing/selftests/riscv/vector/.gitignore2
-rw-r--r--tools/testing/selftests/riscv/vector/Makefile10
-rw-r--r--tools/testing/selftests/riscv/vector/v_helpers.c23
-rw-r--r--tools/testing/selftests/riscv/vector/v_helpers.h2
-rw-r--r--tools/testing/selftests/riscv/vector/validate_v_ptrace.c915
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c8
-rw-r--r--tools/testing/selftests/rseq/.gitignore1
-rw-r--r--tools/testing/selftests/rseq/Makefile5
-rw-r--r--tools/testing/selftests/rseq/rseq-abi.h27
-rw-r--r--tools/testing/selftests/rseq/rseq-slice-hist.py132
-rw-r--r--tools/testing/selftests/rseq/slice_test.c219
-rwxr-xr-xtools/testing/selftests/run_kselftest.sh11
-rw-r--r--tools/testing/selftests/sched_ext/Makefile2
-rw-r--r--tools/testing/selftests/sched_ext/init_enable_count.c34
-rw-r--r--tools/testing/selftests/sched_ext/rt_stall.bpf.c23
-rw-r--r--tools/testing/selftests/sched_ext/rt_stall.c240
-rw-r--r--tools/testing/selftests/sched_ext/runner.c8
-rw-r--r--tools/testing/selftests/sched_ext/total_bw.c281
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json93
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json78
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake_mq.json559
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json25
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c74
-rw-r--r--tools/testing/selftests/ublk/.gitignore6
-rw-r--r--tools/testing/selftests/ublk/Makefile77
-rw-r--r--tools/testing/selftests/ublk/batch.c607
-rw-r--r--tools/testing/selftests/ublk/common.c6
-rw-r--r--tools/testing/selftests/ublk/fault_inject.c1
-rw-r--r--tools/testing/selftests/ublk/file_backed.c104
-rw-r--r--tools/testing/selftests/ublk/kublk.c364
-rw-r--r--tools/testing/selftests/ublk/kublk.h259
-rw-r--r--tools/testing/selftests/ublk/metadata_size.c36
-rw-r--r--tools/testing/selftests/ublk/null.c19
-rw-r--r--tools/testing/selftests/ublk/settings1
-rw-r--r--tools/testing/selftests/ublk/stripe.c25
-rwxr-xr-xtools/testing/selftests/ublk/test_batch_01.sh31
-rwxr-xr-xtools/testing/selftests/ublk/test_batch_02.sh29
-rwxr-xr-xtools/testing/selftests/ublk/test_batch_03.sh29
-rwxr-xr-xtools/testing/selftests/ublk/test_common.sh92
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_01.sh48
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_02.sh23
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_03.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_06.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_07.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_08.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_09.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_10.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_12.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_13.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_16.sh56
-rwxr-xr-xtools/testing/selftests/ublk/test_integrity_01.sh105
-rwxr-xr-xtools/testing/selftests/ublk/test_integrity_02.sh141
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_01.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_02.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_03.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_04.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_05.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_06.sh24
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_07.sh20
-rwxr-xr-xtools/testing/selftests/ublk/test_null_01.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_null_02.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_null_03.sh23
-rwxr-xr-xtools/testing/selftests/ublk/test_part_01.sh104
-rwxr-xr-xtools/testing/selftests/ublk/test_part_02.sh67
-rwxr-xr-xtools/testing/selftests/ublk/test_recover_01.sh (renamed from tools/testing/selftests/ublk/test_generic_04.sh)8
-rwxr-xr-xtools/testing/selftests/ublk/test_recover_02.sh (renamed from tools/testing/selftests/ublk/test_generic_05.sh)8
-rwxr-xr-xtools/testing/selftests/ublk/test_recover_03.sh (renamed from tools/testing/selftests/ublk/test_generic_11.sh)3
-rwxr-xr-xtools/testing/selftests/ublk/test_recover_04.sh39
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_01.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_02.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_03.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_04.sh13
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_05.sh11
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_06.sh38
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_07.sh38
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_08.sh44
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_09.sh43
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_01.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_02.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_03.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_04.sh1
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_05.sh25
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_06.sh20
-rw-r--r--tools/testing/selftests/ublk/trace/seq_io.bt48
-rw-r--r--tools/testing/selftests/ublk/utils.h64
-rw-r--r--tools/testing/selftests/vDSO/vdso_config.h4
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_abi.c55
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getcpu.c4
-rw-r--r--tools/testing/selftests/vDSO/vgetrandom-chacha.S4
-rw-r--r--tools/testing/selftests/vfio/Makefile10
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio.h9
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iommu.h6
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h1
-rw-r--r--tools/testing/selftests/vfio/lib/iommu.c13
-rw-r--r--tools/testing/selftests/vfio/lib/iova_allocator.c1
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.c25
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c25
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_mmio_test.c143
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_test.c9
-rw-r--r--tools/testing/selftests/vfio/vfio_iommufd_setup_test.c1
-rw-r--r--tools/testing/selftests/vsock/settings2
-rwxr-xr-xtools/testing/selftests/vsock/vmtest.sh1055
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config2
-rw-r--r--tools/testing/selftests/x86/Makefile1
-rw-r--r--tools/testing/selftests/x86/sysret_rip.c12
-rw-r--r--tools/testing/shared/linux/kernel.h4
-rw-r--r--tools/testing/vma/Makefile7
-rw-r--r--tools/testing/vma/include/custom.h119
-rw-r--r--tools/testing/vma/include/dup.h1320
-rw-r--r--tools/testing/vma/include/stubs.h428
-rw-r--r--tools/testing/vma/main.c55
-rw-r--r--tools/testing/vma/shared.c131
-rw-r--r--tools/testing/vma/shared.h114
-rw-r--r--tools/testing/vma/tests/merge.c (renamed from tools/testing/vma/vma.c)332
-rw-r--r--tools/testing/vma/tests/mmap.c57
-rw-r--r--tools/testing/vma/tests/vma.c339
-rw-r--r--tools/testing/vma/vma_internal.h1843
-rw-r--r--tools/testing/vsock/util.c12
-rw-r--r--tools/testing/vsock/util.h2
-rw-r--r--tools/testing/vsock/vsock_test.c149
-rw-r--r--tools/testing/vsock/vsock_test_zerocopy.c74
-rw-r--r--tools/testing/vsock/vsock_test_zerocopy.h3
605 files changed, 31502 insertions, 6136 deletions
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 0e151d0572d1..53d84a6874b7 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -7,9 +7,10 @@ ldflags-y += --wrap=nvdimm_bus_register
ldflags-y += --wrap=cxl_await_media_ready
ldflags-y += --wrap=devm_cxl_add_rch_dport
ldflags-y += --wrap=cxl_endpoint_parse_cdat
-ldflags-y += --wrap=cxl_dport_init_ras_reporting
ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup
ldflags-y += --wrap=hmat_get_extended_linear_cache_size
+ldflags-y += --wrap=devm_cxl_add_dport_by_dev
+ldflags-y += --wrap=devm_cxl_switch_port_decoders_setup
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
@@ -57,12 +58,14 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o
cxl_core-y += $(CXL_CORE_SRC)/hdm.o
cxl_core-y += $(CXL_CORE_SRC)/pmu.o
cxl_core-y += $(CXL_CORE_SRC)/cdat.o
-cxl_core-y += $(CXL_CORE_SRC)/ras.o
cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o
+cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras.o
+cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras_rch.o
+cxl_core-$(CONFIG_CXL_ATL) += $(CXL_CORE_SRC)/atl.o
cxl_core-y += config_check.o
cxl_core-y += cxl_core_test.o
cxl_core-y += cxl_core_exports.o
diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c
index 6754de35598d..f088792a8925 100644
--- a/tools/testing/cxl/cxl_core_exports.c
+++ b/tools/testing/cxl/cxl_core_exports.c
@@ -2,28 +2,6 @@
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
#include "cxl.h"
-#include "exports.h"
/* Exporting of cxl_core symbols that are only used by cxl_test */
EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, "CXL");
-
-cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev =
- __devm_cxl_add_dport_by_dev;
-EXPORT_SYMBOL_NS_GPL(_devm_cxl_add_dport_by_dev, "CXL");
-
-struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
- struct device *dport_dev)
-{
- return _devm_cxl_add_dport_by_dev(port, dport_dev);
-}
-EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL");
-
-cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup =
- __devm_cxl_switch_port_decoders_setup;
-EXPORT_SYMBOL_NS_GPL(_devm_cxl_switch_port_decoders_setup, "CXL");
-
-int devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
-{
- return _devm_cxl_switch_port_decoders_setup(port);
-}
-EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL");
diff --git a/tools/testing/cxl/exports.h b/tools/testing/cxl/exports.h
deleted file mode 100644
index 7ebee7c0bd67..000000000000
--- a/tools/testing/cxl/exports.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2025 Intel Corporation */
-#ifndef __MOCK_CXL_EXPORTS_H_
-#define __MOCK_CXL_EXPORTS_H_
-
-typedef struct cxl_dport *(*cxl_add_dport_by_dev_fn)(struct cxl_port *port,
- struct device *dport_dev);
-extern cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev;
-
-typedef int(*cxl_switch_decoders_setup_fn)(struct cxl_port *port);
-extern cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup;
-
-#endif
diff --git a/tools/testing/cxl/test/cxl_translate.c b/tools/testing/cxl/test/cxl_translate.c
index 2200ae21795c..16328b2112b2 100644
--- a/tools/testing/cxl/test/cxl_translate.c
+++ b/tools/testing/cxl/test/cxl_translate.c
@@ -68,6 +68,8 @@ static u64 to_hpa(u64 dpa_offset, int pos, u8 r_eiw, u16 r_eig, u8 hb_ways,
/* Calculate base HPA offset from DPA and position */
hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, r_eiw, r_eig);
+ if (hpa_offset == ULLONG_MAX)
+ return ULLONG_MAX;
if (math == XOR_MATH) {
cximsd->nr_maps = hbiw_to_nr_maps[hb_ways];
@@ -258,19 +260,23 @@ static int test_random_params(void)
pos = get_random_u32() % ways;
dpa = get_random_u64() >> 12;
+ reverse_dpa = ULLONG_MAX;
+ reverse_pos = -1;
+
hpa = cxl_calculate_hpa_offset(dpa, pos, eiw, eig);
- reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig);
- reverse_pos = cxl_calculate_position(hpa, eiw, eig);
-
- if (reverse_dpa != dpa || reverse_pos != pos) {
- pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n",
- i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw,
- eig);
-
- if (failures++ > 10) {
- pr_err("test random too many failures, stop\n");
- break;
- }
+ if (hpa != ULLONG_MAX) {
+ reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig);
+ reverse_pos = cxl_calculate_position(hpa, eiw, eig);
+ if (reverse_dpa == dpa && reverse_pos == pos)
+ continue;
+ }
+
+ pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n",
+ i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw, eig);
+
+ if (failures++ > 10) {
+ pr_err("test random too many failures, stop\n");
+ break;
}
}
pr_info("..... test random: PASS %d FAIL %d\n", i - failures, failures);
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 176dcde570cd..cb87e8c0e63c 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -1767,7 +1767,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
cxl_mock_add_event_logs(&mdata->mes);
- cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
+ cxlmd = devm_cxl_add_memdev(cxlds, NULL);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 44bce80ef3ff..b8fcb50c1027 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -10,21 +10,12 @@
#include <cxlmem.h>
#include <cxlpci.h>
#include "mock.h"
-#include "../exports.h"
static LIST_HEAD(mock);
-static struct cxl_dport *
-redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
- struct device *dport_dev);
-static int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port);
-
void register_cxl_mock_ops(struct cxl_mock_ops *ops)
{
list_add_rcu(&ops->list, &mock);
- _devm_cxl_add_dport_by_dev = redirect_devm_cxl_add_dport_by_dev;
- _devm_cxl_switch_port_decoders_setup =
- redirect_devm_cxl_switch_port_decoders_setup;
}
EXPORT_SYMBOL_GPL(register_cxl_mock_ops);
@@ -32,9 +23,6 @@ DEFINE_STATIC_SRCU(cxl_mock_srcu);
void unregister_cxl_mock_ops(struct cxl_mock_ops *ops)
{
- _devm_cxl_switch_port_decoders_setup =
- __devm_cxl_switch_port_decoders_setup;
- _devm_cxl_add_dport_by_dev = __devm_cxl_add_dport_by_dev;
list_del_rcu(&ops->list);
synchronize_srcu(&cxl_mock_srcu);
}
@@ -163,7 +151,7 @@ __wrap_nvdimm_bus_register(struct device *dev,
}
EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register);
-int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
+int __wrap_devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
{
int rc, index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
@@ -171,11 +159,12 @@ int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
if (ops && ops->is_mock_port(port->uport_dev))
rc = ops->devm_cxl_switch_port_decoders_setup(port);
else
- rc = __devm_cxl_switch_port_decoders_setup(port);
+ rc = devm_cxl_switch_port_decoders_setup(port);
put_cxl_mock_ops(index);
return rc;
}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_switch_port_decoders_setup, "CXL");
int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
{
@@ -245,20 +234,8 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, "CXL");
-void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
-{
- int index;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (!ops || !ops->is_mock_port(dport->dport_dev))
- cxl_dport_init_ras_reporting(dport, host);
-
- put_cxl_mock_ops(index);
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, "CXL");
-
-struct cxl_dport *redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
- struct device *dport_dev)
+struct cxl_dport *__wrap_devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
{
int index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
@@ -267,11 +244,12 @@ struct cxl_dport *redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
if (ops && ops->is_mock_port(port->uport_dev))
dport = ops->devm_cxl_add_dport_by_dev(port, dport_dev);
else
- dport = __devm_cxl_add_dport_by_dev(port, dport_dev);
+ dport = devm_cxl_add_dport_by_dev(port, dport_dev);
put_cxl_mock_ops(index);
return dport;
}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_dport_by_dev, "CXL");
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("cxl_test: emulation module");
diff --git a/tools/testing/kunit/kunit-completion.sh b/tools/testing/kunit/kunit-completion.sh
new file mode 100644
index 000000000000..f053e7b5d265
--- /dev/null
+++ b/tools/testing/kunit/kunit-completion.sh
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+# bash completion support for KUnit
+
+_kunit_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+
+_kunit()
+{
+ local cur prev words cword
+ _init_completion || return
+
+ local script="${_kunit_dir}/kunit.py"
+
+ if [[ $cword -eq 1 && "$cur" != -* ]]; then
+ local cmds=$(${script} --list-cmds 2>/dev/null)
+ COMPREPLY=($(compgen -W "${cmds}" -- "$cur"))
+ return 0
+ fi
+
+ if [[ "$cur" == -* ]]; then
+ if [[ -n "${words[1]}" && "${words[1]}" != -* ]]; then
+ local opts=$(${script} ${words[1]} --list-opts 2>/dev/null)
+ COMPREPLY=($(compgen -W "${opts}" -- "$cur"))
+ return 0
+ else
+ local opts=$(${script} --list-opts 2>/dev/null)
+ COMPREPLY=($(compgen -W "${opts}" -- "$cur"))
+ return 0
+ fi
+ fi
+}
+
+complete -o default -F _kunit kunit.py
+complete -o default -F _kunit kunit
+complete -o default -F _kunit ./tools/testing/kunit/kunit.py
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index cd99c1956331..4ec5ecba6d49 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -323,11 +323,27 @@ def get_default_jobs() -> int:
return ncpu
raise RuntimeError("os.cpu_count() returned None")
+def get_default_build_dir() -> str:
+ if 'KBUILD_OUTPUT' in os.environ:
+ return os.path.join(os.environ['KBUILD_OUTPUT'], '.kunit')
+ return '.kunit'
+
+def add_completion_opts(parser: argparse.ArgumentParser) -> None:
+ parser.add_argument('--list-opts',
+ help=argparse.SUPPRESS,
+ action='store_true')
+
+def add_root_opts(parser: argparse.ArgumentParser) -> None:
+ parser.add_argument('--list-cmds',
+ help=argparse.SUPPRESS,
+ action='store_true')
+ add_completion_opts(parser)
+
def add_common_opts(parser: argparse.ArgumentParser) -> None:
parser.add_argument('--build_dir',
help='As in the make command, it specifies the build '
'directory.',
- type=str, default='.kunit', metavar='DIR')
+ type=str, default=get_default_build_dir(), metavar='DIR')
parser.add_argument('--make_options',
help='X=Y make option, can be repeated.',
action='append', metavar='X=Y')
@@ -374,6 +390,8 @@ def add_common_opts(parser: argparse.ArgumentParser) -> None:
help='Additional QEMU arguments, e.g. "-smp 8"',
action='append', metavar='')
+ add_completion_opts(parser)
+
def add_build_opts(parser: argparse.ArgumentParser) -> None:
parser.add_argument('--jobs',
help='As in the make command, "Specifies the number of '
@@ -569,6 +587,7 @@ subcommand_handlers_map = {
def main(argv: Sequence[str]) -> None:
parser = argparse.ArgumentParser(
description='Helps writing and running KUnit tests.')
+ add_root_opts(parser)
subparser = parser.add_subparsers(dest='subcommand')
# The 'run' command will config, build, exec, and parse in one go.
@@ -603,12 +622,28 @@ def main(argv: Sequence[str]) -> None:
parse_parser.add_argument('file',
help='Specifies the file to read results from.',
type=str, nargs='?', metavar='input_file')
+ add_completion_opts(parse_parser)
cli_args = parser.parse_args(massage_argv(argv))
if get_kernel_root_path():
os.chdir(get_kernel_root_path())
+ if cli_args.list_cmds:
+ print(" ".join(subparser.choices.keys()))
+ return
+
+ if cli_args.list_opts:
+ target_parser = subparser.choices.get(cli_args.subcommand)
+ if not target_parser:
+ target_parser = parser
+
+ # Accessing private attribute _option_string_actions to get
+ # the list of options. This is not a public API, but argparse
+ # does not provide a way to inspect options programmatically.
+ print(' '.join(target_parser._option_string_actions.keys()))
+ return
+
subcomand_handler = subcommand_handlers_map.get(cli_args.subcommand, None)
if subcomand_handler is None:
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 333cd3a4a56b..5338489dcbe4 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -689,6 +689,9 @@ def bubble_up_test_results(test: Test) -> None:
elif test.counts.get_status() == TestStatus.TEST_CRASHED:
test.status = TestStatus.TEST_CRASHED
+ if status == TestStatus.FAILURE and test.counts.get_status() == TestStatus.SUCCESS:
+ counts.add_status(status)
+
def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: bool, printer: Printer) -> Test:
"""
Finds next test to parse in LineStream, creates new Test object,
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index bbba921e0eac..b67408147c1f 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -11,11 +11,13 @@ from unittest import mock
import tempfile, shutil # Handling test_tmpdir
+import io
import itertools
import json
import os
import signal
import subprocess
+import sys
from typing import Iterable
import kunit_config
@@ -36,7 +38,7 @@ def setUpModule():
def tearDownModule():
shutil.rmtree(test_tmpdir)
-def test_data_path(path):
+def _test_data_path(path):
return os.path.join(abs_test_data_dir, path)
class KconfigTest(unittest.TestCase):
@@ -52,7 +54,7 @@ class KconfigTest(unittest.TestCase):
self.assertFalse(kconfig1.is_subset_of(kconfig0))
def test_read_from_file(self):
- kconfig_path = test_data_path('test_read_from_file.kconfig')
+ kconfig_path = _test_data_path('test_read_from_file.kconfig')
kconfig = kunit_config.parse_file(kconfig_path)
@@ -98,7 +100,7 @@ class KUnitParserTest(unittest.TestCase):
raise AssertionError(f'"{needle}" not found in {list(backup)}!')
def test_output_isolated_correctly(self):
- log_path = test_data_path('test_output_isolated_correctly.log')
+ log_path = _test_data_path('test_output_isolated_correctly.log')
with open(log_path) as file:
result = kunit_parser.extract_tap_lines(file.readlines())
self.assertContains('TAP version 14', result)
@@ -109,7 +111,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertContains('ok 1 - example', result)
def test_output_with_prefix_isolated_correctly(self):
- log_path = test_data_path('test_pound_sign.log')
+ log_path = _test_data_path('test_pound_sign.log')
with open(log_path) as file:
result = kunit_parser.extract_tap_lines(file.readlines())
self.assertContains('TAP version 14', result)
@@ -138,35 +140,46 @@ class KUnitParserTest(unittest.TestCase):
self.assertContains('ok 3 - string-stream-test', result)
def test_parse_successful_test_log(self):
- all_passed_log = test_data_path('test_is_test_passed-all_passed.log')
+ all_passed_log = _test_data_path('test_is_test_passed-all_passed.log')
with open(all_passed_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
self.assertEqual(result.counts.errors, 0)
def test_parse_successful_nested_tests_log(self):
- all_passed_log = test_data_path('test_is_test_passed-all_passed_nested.log')
+ all_passed_log = _test_data_path('test_is_test_passed-all_passed_nested.log')
with open(all_passed_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
self.assertEqual(result.counts.errors, 0)
def test_kselftest_nested(self):
- kselftest_log = test_data_path('test_is_test_passed-kselftest.log')
+ kselftest_log = _test_data_path('test_is_test_passed-kselftest.log')
with open(kselftest_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
self.assertEqual(result.counts.errors, 0)
def test_parse_failed_test_log(self):
- failed_log = test_data_path('test_is_test_passed-failure.log')
+ failed_log = _test_data_path('test_is_test_passed-failure.log')
with open(failed_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status)
self.assertEqual(result.counts.errors, 0)
+ def test_parse_failed_nested_tests_log(self):
+ nested_log = _test_data_path('test_is_test_passed-failure-nested.log')
+ with open(nested_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status)
+ self.assertEqual(result.counts.failed, 2)
+ self.assertEqual(kunit_parser.TestStatus.FAILURE, result.subtests[0].status)
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.subtests[0].subtests[0].status)
+ self.assertEqual(kunit_parser.TestStatus.FAILURE, result.subtests[1].status)
+ self.assertEqual(kunit_parser.TestStatus.FAILURE, result.subtests[1].subtests[0].status)
+
def test_no_header(self):
- empty_log = test_data_path('test_is_test_passed-no_tests_run_no_header.log')
+ empty_log = _test_data_path('test_is_test_passed-no_tests_run_no_header.log')
with open(empty_log) as file:
result = kunit_parser.parse_run_tests(
kunit_parser.extract_tap_lines(file.readlines()), stdout)
@@ -175,7 +188,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(result.counts.errors, 1)
def test_missing_test_plan(self):
- missing_plan_log = test_data_path('test_is_test_passed-'
+ missing_plan_log = _test_data_path('test_is_test_passed-'
'missing_plan.log')
with open(missing_plan_log) as file:
result = kunit_parser.parse_run_tests(
@@ -186,7 +199,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
def test_no_tests(self):
- header_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log')
+ header_log = _test_data_path('test_is_test_passed-no_tests_run_with_header.log')
with open(header_log) as file:
result = kunit_parser.parse_run_tests(
kunit_parser.extract_tap_lines(file.readlines()), stdout)
@@ -195,7 +208,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(result.counts.errors, 1)
def test_no_tests_no_plan(self):
- no_plan_log = test_data_path('test_is_test_passed-no_tests_no_plan.log')
+ no_plan_log = _test_data_path('test_is_test_passed-no_tests_no_plan.log')
with open(no_plan_log) as file:
result = kunit_parser.parse_run_tests(
kunit_parser.extract_tap_lines(file.readlines()), stdout)
@@ -207,7 +220,7 @@ class KUnitParserTest(unittest.TestCase):
def test_no_kunit_output(self):
- crash_log = test_data_path('test_insufficient_memory.log')
+ crash_log = _test_data_path('test_insufficient_memory.log')
print_mock = mock.patch('kunit_printer.Printer.print').start()
with open(crash_log) as file:
result = kunit_parser.parse_run_tests(
@@ -218,7 +231,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(result.counts.errors, 1)
def test_skipped_test(self):
- skipped_log = test_data_path('test_skip_tests.log')
+ skipped_log = _test_data_path('test_skip_tests.log')
with open(skipped_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
@@ -227,7 +240,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(result.counts, kunit_parser.TestCounts(passed=4, skipped=1))
def test_skipped_all_tests(self):
- skipped_log = test_data_path('test_skip_all_tests.log')
+ skipped_log = _test_data_path('test_skip_all_tests.log')
with open(skipped_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
@@ -235,7 +248,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(result.counts, kunit_parser.TestCounts(skipped=5))
def test_ignores_hyphen(self):
- hyphen_log = test_data_path('test_strip_hyphen.log')
+ hyphen_log = _test_data_path('test_strip_hyphen.log')
with open(hyphen_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
@@ -249,7 +262,7 @@ class KUnitParserTest(unittest.TestCase):
result.subtests[1].name)
def test_ignores_prefix_printk_time(self):
- prefix_log = test_data_path('test_config_printk_time.log')
+ prefix_log = _test_data_path('test_config_printk_time.log')
with open(prefix_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
@@ -257,7 +270,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(result.counts.errors, 0)
def test_ignores_multiple_prefixes(self):
- prefix_log = test_data_path('test_multiple_prefixes.log')
+ prefix_log = _test_data_path('test_multiple_prefixes.log')
with open(prefix_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
@@ -265,7 +278,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(result.counts.errors, 0)
def test_prefix_mixed_kernel_output(self):
- mixed_prefix_log = test_data_path('test_interrupted_tap_output.log')
+ mixed_prefix_log = _test_data_path('test_interrupted_tap_output.log')
with open(mixed_prefix_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
@@ -273,7 +286,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(result.counts.errors, 0)
def test_prefix_poundsign(self):
- pound_log = test_data_path('test_pound_sign.log')
+ pound_log = _test_data_path('test_pound_sign.log')
with open(pound_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
@@ -281,7 +294,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual(result.counts.errors, 0)
def test_kernel_panic_end(self):
- panic_log = test_data_path('test_kernel_panic_interrupt.log')
+ panic_log = _test_data_path('test_kernel_panic_interrupt.log')
with open(panic_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status)
@@ -289,7 +302,7 @@ class KUnitParserTest(unittest.TestCase):
self.assertGreaterEqual(result.counts.errors, 1)
def test_pound_no_prefix(self):
- pound_log = test_data_path('test_pound_no_prefix.log')
+ pound_log = _test_data_path('test_pound_no_prefix.log')
with open(pound_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
@@ -318,7 +331,7 @@ class KUnitParserTest(unittest.TestCase):
'Failures: all_failed_suite, some_failed_suite.test2')
def test_ktap_format(self):
- ktap_log = test_data_path('test_parse_ktap_output.log')
+ ktap_log = _test_data_path('test_parse_ktap_output.log')
with open(ktap_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
self.assertEqual(result.counts, kunit_parser.TestCounts(passed=3))
@@ -327,13 +340,13 @@ class KUnitParserTest(unittest.TestCase):
self.assertEqual('case_2', result.subtests[0].subtests[1].name)
def test_parse_subtest_header(self):
- ktap_log = test_data_path('test_parse_subtest_header.log')
+ ktap_log = _test_data_path('test_parse_subtest_header.log')
with open(ktap_log) as file:
kunit_parser.parse_run_tests(file.readlines(), stdout)
self.print_mock.assert_any_call(StrContains('suite (1 subtest)'))
def test_parse_attributes(self):
- ktap_log = test_data_path('test_parse_attributes.log')
+ ktap_log = _test_data_path('test_parse_attributes.log')
with open(ktap_log) as file:
result = kunit_parser.parse_run_tests(file.readlines(), stdout)
@@ -466,7 +479,8 @@ class LinuxSourceTreeTest(unittest.TestCase):
want_kconfig = kunit_config.Kconfig()
want_kconfig.add_entry('NOT_REAL', 'y')
- tree = kunit_kernel.LinuxSourceTree('', kconfig_add=['CONFIG_NOT_REAL=y'])
+ tree = kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[os.devnull],
+ kconfig_add=['CONFIG_NOT_REAL=y'])
self.assertTrue(want_kconfig.is_subset_of(tree._kconfig), msg=tree._kconfig)
def test_invalid_arch(self):
@@ -478,7 +492,7 @@ class LinuxSourceTreeTest(unittest.TestCase):
return subprocess.Popen(['echo "hi\nbye"'], shell=True, text=True, stdout=subprocess.PIPE)
with tempfile.TemporaryDirectory('') as build_dir:
- tree = kunit_kernel.LinuxSourceTree(build_dir)
+ tree = kunit_kernel.LinuxSourceTree(build_dir, kunitconfig_paths=[os.devnull])
mock.patch.object(tree._ops, 'start', side_effect=fake_start).start()
with self.assertRaises(ValueError):
@@ -555,7 +569,7 @@ class KUnitJsonTest(unittest.TestCase):
self.addCleanup(mock.patch.stopall)
def _json_for(self, log_file):
- with open(test_data_path(log_file)) as file:
+ with open(_test_data_path(log_file)) as file:
test_result = kunit_parser.parse_run_tests(file, stdout)
json_obj = kunit_json.get_json_result(
test=test_result,
@@ -596,11 +610,12 @@ class StrContains(str):
class KUnitMainTest(unittest.TestCase):
def setUp(self):
- path = test_data_path('test_is_test_passed-all_passed.log')
+ path = _test_data_path('test_is_test_passed-all_passed.log')
with open(path) as file:
all_passed_log = file.readlines()
self.print_mock = mock.patch('kunit_printer.Printer.print').start()
+ mock.patch.dict(os.environ, clear=True).start()
self.addCleanup(mock.patch.stopall)
self.mock_linux_init = mock.patch.object(kunit_kernel, 'LinuxSourceTree').start()
@@ -723,6 +738,24 @@ class KUnitMainTest(unittest.TestCase):
args=None, build_dir=build_dir, filter_glob='', filter='', filter_action=None, timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
+ @mock.patch.dict(os.environ, {'KBUILD_OUTPUT': '/tmp'})
+ def test_run_builddir_from_env(self):
+ build_dir = '/tmp/.kunit'
+ kunit.main(['run'])
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=None, build_dir=build_dir, filter_glob='', filter='', filter_action=None, timeout=300)
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
+ @mock.patch.dict(os.environ, {'KBUILD_OUTPUT': '/tmp'})
+ def test_run_builddir_override(self):
+ build_dir = '.kunit'
+ kunit.main(['run', '--build_dir=.kunit'])
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=None, build_dir=build_dir, filter_glob='', filter='', filter_action=None, timeout=300)
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
def test_config_builddir(self):
build_dir = '.kunit'
kunit.main(['config', '--build_dir', build_dir])
@@ -855,5 +888,24 @@ class KUnitMainTest(unittest.TestCase):
mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test1', filter='', filter_action=None, timeout=300),
])
+ @mock.patch.object(sys, 'stdout', new_callable=io.StringIO)
+ def test_list_cmds(self, mock_stdout):
+ kunit.main(['--list-cmds'])
+ output = mock_stdout.getvalue()
+ output_cmds = sorted(output.split())
+ expected_cmds = sorted(['build', 'config', 'exec', 'parse', 'run'])
+ self.assertEqual(output_cmds, expected_cmds)
+
+ @mock.patch.object(sys, 'stdout', new_callable=io.StringIO)
+ def test_run_list_opts(self, mock_stdout):
+ kunit.main(['run', '--list-opts'])
+ output = mock_stdout.getvalue()
+ output_cmds = set(output.split())
+ self.assertIn('--help', output_cmds)
+ self.assertIn('--kunitconfig', output_cmds)
+ self.assertIn('--jobs', output_cmds)
+ self.assertIn('--kernel_args', output_cmds)
+ self.assertIn('--raw_output', output_cmds)
+
if __name__ == '__main__':
unittest.main()
diff --git a/tools/testing/kunit/qemu_configs/armeb.py b/tools/testing/kunit/qemu_configs/armeb.py
new file mode 100644
index 000000000000..86d326651490
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/armeb.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='arm',
+ kconfig='''
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_ARCH_VIRT=y
+CONFIG_SERIAL_AMBA_PL010=y
+CONFIG_SERIAL_AMBA_PL010_CONSOLE=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y''',
+ qemu_arch='arm',
+ kernel_path='arch/arm/boot/zImage',
+ kernel_command_line='console=ttyAMA0',
+ extra_qemu_params=['-machine', 'virt'])
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-failure-nested.log b/tools/testing/kunit/test_data/test_is_test_passed-failure-nested.log
new file mode 100644
index 000000000000..5498dfd0b0db
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_is_test_passed-failure-nested.log
@@ -0,0 +1,10 @@
+KTAP version 1
+1..2
+ KTAP version 1
+ 1..1
+ ok 1 test 1
+not ok 1 subtest 1
+ KTAP version 1
+ 1..1
+ not ok 1 subsubtest 1
+not ok 2 subtest 2
diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h
index 0ab4b53bb4f3..009b97bbdd22 100644
--- a/tools/testing/memblock/internal.h
+++ b/tools/testing/memblock/internal.h
@@ -15,8 +15,7 @@ bool mirrored_kernelcore = false;
struct page {};
-void memblock_free_pages(struct page *page, unsigned long pfn,
- unsigned int order)
+void memblock_free_pages(unsigned long pfn, unsigned int order)
{
}
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 2f830ff8396c..945144e98507 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -57,6 +57,26 @@ void idr_alloc_test(void)
idr_destroy(&idr);
}
+void idr_alloc2_test(void)
+{
+ int id;
+ struct idr idr = IDR_INIT_BASE(idr, 1);
+
+ id = idr_alloc(&idr, idr_alloc2_test, 0, 1, GFP_KERNEL);
+ assert(id == -ENOSPC);
+
+ id = idr_alloc(&idr, idr_alloc2_test, 1, 2, GFP_KERNEL);
+ assert(id == 1);
+
+ id = idr_alloc(&idr, idr_alloc2_test, 0, 1, GFP_KERNEL);
+ assert(id == -ENOSPC);
+
+ id = idr_alloc(&idr, idr_alloc2_test, 0, 2, GFP_KERNEL);
+ assert(id == -ENOSPC);
+
+ idr_destroy(&idr);
+}
+
void idr_replace_test(void)
{
DEFINE_IDR(idr);
@@ -409,6 +429,7 @@ void idr_checks(void)
idr_replace_test();
idr_alloc_test();
+ idr_alloc2_test();
idr_null_test();
idr_nowait_test();
idr_get_next_test(0);
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 56e44a98d6a5..450f13ba4cca 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -22,6 +22,7 @@ TARGETS += drivers/ntsync
TARGETS += drivers/s390x/uvdevice
TARGETS += drivers/net
TARGETS += drivers/net/bonding
+TARGETS += drivers/net/netconsole
TARGETS += drivers/net/team
TARGETS += drivers/net/virtio_net
TARGETS += drivers/platform/x86/intel/ifs
diff --git a/tools/testing/selftests/alsa/utimer-test.c b/tools/testing/selftests/alsa/utimer-test.c
index c45cb226bd8f..d221972cd8fb 100644
--- a/tools/testing/selftests/alsa/utimer-test.c
+++ b/tools/testing/selftests/alsa/utimer-test.c
@@ -141,7 +141,6 @@ TEST_F(timer_f, utimer) {
TEST(wrong_timers_test) {
int timer_dev_fd;
int utimer_fd;
- size_t i;
struct snd_timer_uinfo wrong_timer = {
.resolution = 0,
.id = UTIMER_DEFAULT_ID,
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index c4c72ee2ef55..e456f3b62fa1 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -30,13 +30,15 @@ all:
@for DIR in $(ARM64_SUBTARGETS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
mkdir -p $$BUILD_TARGET; \
- make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@ \
+ $(if $(FORCE_TARGETS),|| exit); \
done
install: all
@for DIR in $(ARM64_SUBTARGETS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
- make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@ \
+ $(if $(FORCE_TARGETS),|| exit); \
done
run_tests: all
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index c41640f18e4e..9d2df1f3e6bb 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -11,6 +11,8 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <linux/auxvec.h>
+#include <linux/compiler.h>
#include <sys/auxv.h>
#include <sys/prctl.h>
#include <asm/hwcap.h>
@@ -595,6 +597,45 @@ static void lrcpc3_sigill(void)
: "=r" (data0), "=r" (data1) : "r" (src) :);
}
+static void ignore_signal(int sig, siginfo_t *info, void *context)
+{
+ ucontext_t *uc = context;
+
+ uc->uc_mcontext.pc += 4;
+}
+
+static void ls64_sigill(void)
+{
+ struct sigaction ign, old;
+ char src[64] __aligned(64) = { 1 };
+
+ /*
+ * LS64 requires target memory to be Device/Non-cacheable (if
+ * FEAT_LS64WB not supported) and the completer supports these
+ * instructions, otherwise we'll receive a SIGBUS. Since we are only
+ * testing the ABI here, so just ignore the SIGBUS and see if we can
+ * execute the instructions without receiving a SIGILL. Restore the
+ * handler of SIGBUS after this test.
+ */
+ ign.sa_sigaction = ignore_signal;
+ ign.sa_flags = SA_SIGINFO | SA_RESTART;
+ sigemptyset(&ign.sa_mask);
+ sigaction(SIGBUS, &ign, &old);
+
+ register void *xn asm ("x8") = src;
+ register u64 xt_1 asm ("x0");
+
+ /* LD64B x0, [x8] */
+ asm volatile(".inst 0xf83fd100" : "=r" (xt_1) : "r" (xn)
+ : "x1", "x2", "x3", "x4", "x5", "x6", "x7");
+
+ /* ST64B x0, [x8] */
+ asm volatile(".inst 0xf83f9100" : : "r" (xt_1), "r" (xn)
+ : "x1", "x2", "x3", "x4", "x5", "x6", "x7");
+
+ sigaction(SIGBUS, &old, NULL);
+}
+
static const struct hwcap_data {
const char *name;
unsigned long at_hwcap;
@@ -1134,6 +1175,14 @@ static const struct hwcap_data {
.hwcap_bit = HWCAP3_MTE_STORE_ONLY,
.cpuinfo = "mtestoreonly",
},
+ {
+ .name = "LS64",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_LS64,
+ .cpuinfo = "ls64",
+ .sigill_fn = ls64_sigill,
+ .sigill_reliable = true,
+ },
};
typedef void (*sighandler_fn)(int, siginfo_t *, void *);
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
index 1703543fb7c7..ce4550fb7224 100644
--- a/tools/testing/selftests/arm64/abi/tpidr2.c
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -128,8 +128,7 @@ static int sys_clone(unsigned long clone_flags, unsigned long newsp,
int *parent_tidptr, unsigned long tls,
int *child_tidptr)
{
- return my_syscall5(__NR_clone, clone_flags, newsp, parent_tidptr, tls,
- child_tidptr);
+ return syscall(__NR_clone, clone_flags, newsp, parent_tidptr, tls, child_tidptr);
}
#define __STACK_SIZE (8 * 1024 * 1024)
diff --git a/tools/testing/selftests/arm64/fp/fp-pidbench.S b/tools/testing/selftests/arm64/fp/fp-pidbench.S
index 73830f6bc99b..881dfa3b342e 100644
--- a/tools/testing/selftests/arm64/fp/fp-pidbench.S
+++ b/tools/testing/selftests/arm64/fp/fp-pidbench.S
@@ -33,7 +33,7 @@
function _start
puts "Iterations per test: "
mov x20, #10000
- lsl x20, x20, #8
+ lsl x20, x20, #12
mov x0, x20
bl putdec
puts "\n"
@@ -63,6 +63,10 @@ function _start
puts "SVE used per syscall: "
test_loop "rdvl x0, #8"
+ // Test non-SVE execution after SVE
+ puts "No SVE after SVE: "
+ test_loop
+
// And we're done
out:
mov x0, #0
diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c
index 250977abc398..ae4cce6afe2b 100644
--- a/tools/testing/selftests/arm64/gcs/basic-gcs.c
+++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c
@@ -22,7 +22,7 @@ static size_t page_size = 65536;
static __attribute__((noinline)) void valid_gcs_function(void)
{
/* Do something the compiler can't optimise out */
- my_syscall1(__NR_prctl, PR_SVE_GET_VL);
+ syscall(__NR_prctl, PR_SVE_GET_VL);
}
static inline int gcs_set_status(unsigned long mode)
@@ -36,12 +36,10 @@ static inline int gcs_set_status(unsigned long mode)
* other 3 values passed in registers to the syscall are zero
* since the kernel validates them.
*/
- ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, mode,
- 0, 0, 0);
+ ret = syscall(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, mode, 0, 0, 0);
if (ret == 0) {
- ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
- &new_mode, 0, 0, 0);
+ ret = syscall(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &new_mode, 0, 0, 0);
if (ret == 0) {
if (new_mode != mode) {
ksft_print_msg("Mode set to %lx not %lx\n",
@@ -49,7 +47,7 @@ static inline int gcs_set_status(unsigned long mode)
ret = -EINVAL;
}
} else {
- ksft_print_msg("Failed to validate mode: %d\n", ret);
+ ksft_print_msg("Failed to validate mode: %d\n", errno);
}
if (enabling != chkfeat_gcs()) {
@@ -69,10 +67,9 @@ static bool read_status(void)
unsigned long state;
int ret;
- ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
- &state, 0, 0, 0);
+ ret = syscall(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &state, 0, 0, 0);
if (ret != 0) {
- ksft_print_msg("Failed to read state: %d\n", ret);
+ ksft_print_msg("Failed to read state: %d\n", errno);
return false;
}
@@ -188,9 +185,8 @@ static bool map_guarded_stack(void)
int elem;
bool pass = true;
- buf = (void *)my_syscall3(__NR_map_shadow_stack, 0, page_size,
- SHADOW_STACK_SET_MARKER |
- SHADOW_STACK_SET_TOKEN);
+ buf = (void *)syscall(__NR_map_shadow_stack, 0, page_size,
+ SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN);
if (buf == MAP_FAILED) {
ksft_print_msg("Failed to map %lu byte GCS: %d\n",
page_size, errno);
@@ -257,8 +253,7 @@ static bool test_fork(void)
valid_gcs_function();
get_gcspr();
- ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
- &child_mode, 0, 0, 0);
+ ret = syscall(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &child_mode, 0, 0, 0);
if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) {
ksft_print_msg("GCS not enabled in child\n");
ret = -EINVAL;
@@ -321,8 +316,7 @@ static bool test_vfork(void)
valid_gcs_function();
get_gcspr();
- ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
- &child_mode, 0, 0, 0);
+ ret = syscall(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &child_mode, 0, 0, 0);
if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) {
ksft_print_msg("GCS not enabled in child\n");
ret = EXIT_FAILURE;
@@ -390,17 +384,15 @@ int main(void)
if (!(getauxval(AT_HWCAP) & HWCAP_GCS))
ksft_exit_skip("SKIP GCS not supported\n");
- ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
- &gcs_mode, 0, 0, 0);
+ ret = syscall(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &gcs_mode, 0, 0, 0);
if (ret != 0)
- ksft_exit_fail_msg("Failed to read GCS state: %d\n", ret);
+ ksft_exit_fail_msg("Failed to read GCS state: %d\n", errno);
if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) {
gcs_mode = PR_SHADOW_STACK_ENABLE;
- ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
- gcs_mode, 0, 0, 0);
+ ret = syscall(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, gcs_mode, 0, 0, 0);
if (ret != 0)
- ksft_exit_fail_msg("Failed to enable GCS: %d\n", ret);
+ ksft_exit_fail_msg("Failed to enable GCS: %d\n", errno);
}
ksft_set_plan(ARRAY_SIZE(tests));
@@ -410,9 +402,9 @@ int main(void)
}
/* One last test: disable GCS, we can do this one time */
- ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0);
+ ret = syscall(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0);
if (ret != 0)
- ksft_print_msg("Failed to disable GCS: %d\n", ret);
+ ksft_print_msg("Failed to disable GCS: %d\n", errno);
ksft_finished();
diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
index 052d0f9f92b3..f6937f890039 100644
--- a/tools/testing/selftests/arm64/mte/.gitignore
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -6,3 +6,4 @@ check_mmap_options
check_prctl
check_ksm_options
check_user_mem
+check_hugetlb_options
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 19c1638e312a..a3ea98211ea6 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -23,7 +23,6 @@ test_tcpnotify_user
test_libbpf
xdping
test_cpp
-test_progs_verification_cert
*.d
*.subskel.h
*.skel.h
@@ -45,3 +44,6 @@ xdp_synproxy
xdp_hw_metadata
xdp_features
verification_cert.h
+*.BTF
+*.BTF_ids
+*.BTF.base
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index a17baf8c6fd7..f7e1e5f5511c 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -1,4 +1,5 @@
# TEMPORARY
# Alphabetical order
+exe_ctx # execution context check (e.g., hardirq, softirq, etc)
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index b7030a6e2e76..6776158f1f3e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -4,6 +4,7 @@ include ../../../scripts/Makefile.arch
include ../../../scripts/Makefile.include
CXX ?= $(CROSS_COMPILE)g++
+OBJCOPY ?= $(CROSS_COMPILE)objcopy
CURDIR := $(abspath .)
TOOLSDIR := $(abspath ../../..)
@@ -107,8 +108,6 @@ TEST_PROGS := test_kmod.sh \
test_xdping.sh \
test_bpftool_build.sh \
test_bpftool.sh \
- test_bpftool_map.sh \
- test_bpftool_metadata.sh \
test_doc_build.sh \
test_xsk.sh \
test_xdp_features.sh
@@ -437,6 +436,8 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(abspath $(OUTPUT)/../usr/include) \
-std=gnu11 \
-fno-strict-aliasing \
+ -Wno-microsoft-anon-tag \
+ -fms-extensions \
-Wno-compare-distinct-pointer-types \
-Wno-initializer-overrides \
#
@@ -641,6 +642,9 @@ $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c
) > $$@)
endif
+$(TRUNNER_OUTPUT)/resolve_btfids.test.o: $(RESOLVE_BTFIDS) $(TRUNNER_OUTPUT)/btf_data.bpf.o
+$(TRUNNER_OUTPUT)/resolve_btfids.test.o: private TEST_NEEDS_BTFIDS = 1
+
# compile individual test files
# Note: we cd into output directory to ensure embedded BPF object is found
$(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
@@ -648,6 +652,10 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
| $(TRUNNER_OUTPUT)/%.test.d
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
$(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -MMD -MT $$@ -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+ $$(if $$(TEST_NEEDS_BTFIDS), \
+ $$(call msg,BTFIDS,$(TRUNNER_BINARY),$$@) \
+ $(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@; \
+ $(RESOLVE_BTFIDS) --patch_btfids $$@.BTF_ids $$@)
$(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \
$(TRUNNER_TESTS_DIR)/%.c \
@@ -693,13 +701,11 @@ $(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS)
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
$(TRUNNER_LIB_OBJS) \
- $(RESOLVE_BTFIDS) \
$(TRUNNER_BPFTOOL) \
$(OUTPUT)/veristat \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LLVM_LDLIBS) $$(LDFLAGS) $$(LLVM_LDFLAGS) -o $$@
- $(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \
$(OUTPUT)/$(if $2,$2/)bpftool
@@ -714,9 +720,12 @@ $(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP)
$(Q)mkdir -p $(BUILD_DIR)
$(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR)
+# Generates a header with C array declaration, containing test_progs_verification_cert bytes
$(VERIFY_SIG_HDR): $(VERIFICATION_CERT)
- $(Q)ln -fs $< test_progs_verification_cert && \
- xxd -i test_progs_verification_cert > $@
+ $(Q)(echo "unsigned char test_progs_verification_cert[] = {"; \
+ od -v -t 'xC' -w12 $< | sed 's/ \(\S\+\)/ 0x\1,/g;s/^\S\+/ /;$$d'; \
+ echo "};"; \
+ echo "unsigned int test_progs_verification_cert_len = $$(wc -c < $<);") > $@
# Define test_progs test runner.
TRUNNER_TESTS_DIR := prog_tests
@@ -739,7 +748,8 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
json_writer.c \
$(VERIFY_SIG_HDR) \
flow_dissector_load.h \
- ip_check_defrag_frags.h
+ ip_check_defrag_frags.h \
+ bpftool_helpers.c
TRUNNER_LIB_SOURCES := find_bit.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(OUTPUT)/liburandom_read.so \
@@ -888,10 +898,10 @@ EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature bpftool $(TEST_KMOD_TARGETS) \
$(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h \
+ *.BTF *.BTF_ids *.BTF.base \
no_alu32 cpuv4 bpf_gcc \
liburandom_read.so) \
- $(OUTPUT)/FEATURE-DUMP.selftests \
- test_progs_verification_cert
+ $(OUTPUT)/FEATURE-DUMP.selftests
.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index bd29bb2e6cb5..8368bd3a0665 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -265,6 +265,7 @@ static const struct argp_option opts[] = {
{ "verbose", 'v', NULL, 0, "Verbose debug output"},
{ "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
{ "quiet", 'q', NULL, 0, "Be more quiet"},
+ { "stacktrace", 's', NULL, 0, "Get stack trace"},
{ "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
"Set of CPUs for producer threads; implies --affinity"},
{ "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
@@ -350,6 +351,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 'q':
env.quiet = true;
break;
+ case 's':
+ env.stacktrace = true;
+ break;
case ARG_PROD_AFFINITY_SET:
env.affinity = true;
if (parse_num_list(arg, &env.prod_cpus.cpus,
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index bea323820ffb..7cf21936e7ed 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -26,6 +26,7 @@ struct env {
bool list;
bool affinity;
bool quiet;
+ bool stacktrace;
int consumer_cnt;
int producer_cnt;
int nr_cpus;
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 34018fc3927f..aeec9edd3851 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -146,6 +146,7 @@ static void setup_ctx(void)
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
ctx.skel->rodata->batch_iters = args.batch_iters;
+ ctx.skel->rodata->stacktrace = env.stacktrace;
}
static void load_ctx(void)
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
index 83e05e837871..123b7feb6935 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -49,6 +49,11 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
done
+header "Perfbuf, multi-producer"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+ summarize "pb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 --rb-sample-rate 50 pb-libbpf)"
+done
+
header "Ringbuf, multi-producer contention in overwrite mode, no consumer"
for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)"
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 2cd9165c7348..4b7210c318dd 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -580,11 +580,6 @@ extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
-extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
- int (callback_fn)(void *map, int *key, void *value),
- unsigned int flags__k, void *aux__ign) __ksym;
-#define bpf_wq_set_callback(timer, cb, flags) \
- bpf_wq_set_callback_impl(timer, cb, flags, NULL)
struct bpf_iter_kmem_cache;
extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym;
@@ -615,9 +610,17 @@ extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
+#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
+
extern bool CONFIG_PREEMPT_RT __kconfig __weak;
#ifdef bpf_target_x86
-extern const int __preempt_count __ksym;
+extern const int __preempt_count __ksym __weak;
+
+struct pcpu_hot___local {
+ int preempt_count;
+} __attribute__((preserve_access_index));
+
+extern struct pcpu_hot___local pcpu_hot __ksym __weak;
#endif
struct task_struct___preempt_rt {
@@ -627,7 +630,19 @@ struct task_struct___preempt_rt {
static inline int get_preempt_count(void)
{
#if defined(bpf_target_x86)
- return *(int *) bpf_this_cpu_ptr(&__preempt_count);
+ /* By default, read the per-CPU __preempt_count. */
+ if (bpf_ksym_exists(&__preempt_count))
+ return *(int *) bpf_this_cpu_ptr(&__preempt_count);
+
+ /*
+ * If __preempt_count does not exist, try to read preempt_count under
+ * struct pcpu_hot. Between v6.1 and v6.14 -- more specifically,
+ * [64701838bf057, 46e8fff6d45fe), preempt_count had been managed
+ * under struct pcpu_hot.
+ */
+ if (bpf_core_field_exists(pcpu_hot.preempt_count))
+ return ((struct pcpu_hot___local *)
+ bpf_this_cpu_ptr(&pcpu_hot))->preempt_count;
#elif defined(bpf_target_arm64)
return bpf_get_current_task_btf()->thread_info.preempt.count;
#endif
@@ -653,4 +668,60 @@ static inline int bpf_in_interrupt(void)
(tsk->softirq_disable_cnt & SOFTIRQ_MASK);
}
+/* Description
+ * Report whether it is in NMI context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_nmi(void)
+{
+ return get_preempt_count() & NMI_MASK;
+}
+
+/* Description
+ * Report whether it is in hard IRQ context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_hardirq(void)
+{
+ return get_preempt_count() & HARDIRQ_MASK;
+}
+
+/* Description
+ * Report whether it is in softirq context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_serving_softirq(void)
+{
+ struct task_struct___preempt_rt *tsk;
+ int pcnt;
+
+ pcnt = get_preempt_count();
+ if (!CONFIG_PREEMPT_RT)
+ return (pcnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET;
+
+ tsk = (void *) bpf_get_current_task_btf();
+ return (tsk->softirq_disable_cnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET;
+}
+
+/* Description
+ * Report whether it is in task context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_task(void)
+{
+ struct task_struct___preempt_rt *tsk;
+ int pcnt;
+
+ pcnt = get_preempt_count();
+ if (!CONFIG_PREEMPT_RT)
+ return !(pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+
+ tsk = (void *) bpf_get_current_task_btf();
+ return !((pcnt & (NMI_MASK | HARDIRQ_MASK)) |
+ ((tsk->softirq_disable_cnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET));
+}
#endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index e0189254bb6e..7dad01439391 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -79,9 +79,6 @@ extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
struct bpf_dynptr *sig_ptr,
struct bpf_key *trusted_keyring) __ksym;
-extern bool bpf_session_is_return(void) __ksym __weak;
-extern __u64 *bpf_session_cookie(void) __ksym __weak;
-
struct dentry;
/* Description
* Returns xattr of a dentry
diff --git a/tools/testing/selftests/bpf/bpftool_helpers.c b/tools/testing/selftests/bpf/bpftool_helpers.c
new file mode 100644
index 000000000000..a5824945a4a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpftool_helpers.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "bpftool_helpers.h"
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+
+#define BPFTOOL_PATH_MAX_LEN 64
+#define BPFTOOL_FULL_CMD_MAX_LEN 512
+
+#define BPFTOOL_DEFAULT_PATH "tools/sbin/bpftool"
+
+static int detect_bpftool_path(char *buffer)
+{
+ char tmp[BPFTOOL_PATH_MAX_LEN];
+
+ /* Check default bpftool location (will work if we are running the
+ * default flavor of test_progs)
+ */
+ snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "./%s", BPFTOOL_DEFAULT_PATH);
+ if (access(tmp, X_OK) == 0) {
+ strncpy(buffer, tmp, BPFTOOL_PATH_MAX_LEN);
+ return 0;
+ }
+
+ /* Check alternate bpftool location (will work if we are running a
+ * specific flavor of test_progs, e.g. cpuv4 or no_alu32)
+ */
+ snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "../%s", BPFTOOL_DEFAULT_PATH);
+ if (access(tmp, X_OK) == 0) {
+ strncpy(buffer, tmp, BPFTOOL_PATH_MAX_LEN);
+ return 0;
+ }
+
+ /* Failed to find bpftool binary */
+ return 1;
+}
+
+static int run_command(char *args, char *output_buf, size_t output_max_len)
+{
+ static char bpftool_path[BPFTOOL_PATH_MAX_LEN] = {0};
+ bool suppress_output = !(output_buf && output_max_len);
+ char command[BPFTOOL_FULL_CMD_MAX_LEN];
+ FILE *f;
+ int ret;
+
+ /* Detect and cache bpftool binary location */
+ if (bpftool_path[0] == 0 && detect_bpftool_path(bpftool_path))
+ return 1;
+
+ ret = snprintf(command, BPFTOOL_FULL_CMD_MAX_LEN, "%s %s%s",
+ bpftool_path, args,
+ suppress_output ? " > /dev/null 2>&1" : "");
+
+ f = popen(command, "r");
+ if (!f)
+ return 1;
+
+ if (!suppress_output)
+ fread(output_buf, 1, output_max_len, f);
+ ret = pclose(f);
+
+ return ret;
+}
+
+int run_bpftool_command(char *args)
+{
+ return run_command(args, NULL, 0);
+}
+
+int get_bpftool_command_output(char *args, char *output_buf, size_t output_max_len)
+{
+ return run_command(args, output_buf, output_max_len);
+}
+
diff --git a/tools/testing/selftests/bpf/bpftool_helpers.h b/tools/testing/selftests/bpf/bpftool_helpers.h
new file mode 100644
index 000000000000..dec1ba201410
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpftool_helpers.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#pragma once
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+#define MAX_BPFTOOL_CMD_LEN (256)
+
+int run_bpftool_command(char *args);
+int get_bpftool_command_output(char *args, char *output_buf, size_t output_max_len);
diff --git a/tools/testing/selftests/bpf/cgroup_iter_memcg.h b/tools/testing/selftests/bpf/cgroup_iter_memcg.h
new file mode 100644
index 000000000000..3f59b127943b
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_iter_memcg.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef __CGROUP_ITER_MEMCG_H
+#define __CGROUP_ITER_MEMCG_H
+
+struct memcg_query {
+ /* some node_stat_item's */
+ unsigned long nr_anon_mapped;
+ unsigned long nr_shmem;
+ unsigned long nr_file_pages;
+ unsigned long nr_file_mapped;
+ /* some memcg_stat_item */
+ unsigned long memcg_kmem;
+ /* some vm_event_item */
+ unsigned long pgfault;
+};
+
+#endif /* __CGROUP_ITER_MEMCG_H */
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 558839e3c185..24855381290d 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -1,6 +1,6 @@
CONFIG_BLK_DEV_LOOP=y
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
CONFIG_BPF=y
CONFIG_BPF_EVENTS=y
CONFIG_BPF_JIT=y
diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
deleted file mode 100644
index a4121d2248ac..000000000000
--- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c
+++ /dev/null
@@ -1,128 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
-#define _GNU_SOURCE
-#include <sched.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <errno.h>
-#include <string.h>
-#include <pthread.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_util.h"
-#include "test_maps.h"
-#include "task_local_storage_helpers.h"
-#include "read_bpf_task_storage_busy.skel.h"
-
-struct lookup_ctx {
- bool start;
- bool stop;
- int pid_fd;
- int map_fd;
- int loop;
-};
-
-static void *lookup_fn(void *arg)
-{
- struct lookup_ctx *ctx = arg;
- long value;
- int i = 0;
-
- while (!ctx->start)
- usleep(1);
-
- while (!ctx->stop && i++ < ctx->loop)
- bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value);
- return NULL;
-}
-
-static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr)
-{
- unsigned int i;
-
- ctx->stop = true;
- ctx->start = true;
- for (i = 0; i < nr; i++)
- pthread_join(tids[i], NULL);
-}
-
-void test_task_storage_map_stress_lookup(void)
-{
-#define MAX_NR_THREAD 4096
- unsigned int i, nr = 256, loop = 8192, cpu = 0;
- struct read_bpf_task_storage_busy *skel;
- pthread_t tids[MAX_NR_THREAD];
- struct lookup_ctx ctx;
- cpu_set_t old, new;
- const char *cfg;
- int err;
-
- cfg = getenv("TASK_STORAGE_MAP_NR_THREAD");
- if (cfg) {
- nr = atoi(cfg);
- if (nr > MAX_NR_THREAD)
- nr = MAX_NR_THREAD;
- }
- cfg = getenv("TASK_STORAGE_MAP_NR_LOOP");
- if (cfg)
- loop = atoi(cfg);
- cfg = getenv("TASK_STORAGE_MAP_PIN_CPU");
- if (cfg)
- cpu = atoi(cfg);
-
- skel = read_bpf_task_storage_busy__open_and_load();
- err = libbpf_get_error(skel);
- CHECK(err, "open_and_load", "error %d\n", err);
-
- /* Only for a fully preemptible kernel */
- if (!skel->kconfig->CONFIG_PREEMPTION) {
- printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__);
- read_bpf_task_storage_busy__destroy(skel);
- skips++;
- return;
- }
-
- /* Save the old affinity setting */
- sched_getaffinity(getpid(), sizeof(old), &old);
-
- /* Pinned on a specific CPU */
- CPU_ZERO(&new);
- CPU_SET(cpu, &new);
- sched_setaffinity(getpid(), sizeof(new), &new);
-
- ctx.start = false;
- ctx.stop = false;
- ctx.pid_fd = sys_pidfd_open(getpid(), 0);
- ctx.map_fd = bpf_map__fd(skel->maps.task);
- ctx.loop = loop;
- for (i = 0; i < nr; i++) {
- err = pthread_create(&tids[i], NULL, lookup_fn, &ctx);
- if (err) {
- abort_lookup(&ctx, tids, i);
- CHECK(err, "pthread_create", "error %d\n", err);
- goto out;
- }
- }
-
- ctx.start = true;
- for (i = 0; i < nr; i++)
- pthread_join(tids[i], NULL);
-
- skel->bss->pid = getpid();
- err = read_bpf_task_storage_busy__attach(skel);
- CHECK(err, "attach", "error %d\n", err);
-
- /* Trigger program */
- sys_gettid();
- skel->bss->pid = 0;
-
- CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy);
-out:
- read_bpf_task_storage_busy__destroy(skel);
- /* Restore affinity setting */
- sched_setaffinity(getpid(), sizeof(old), &old);
- printf("%s:PASS\n", __func__);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c
index d15867cddde0..4f2866a615ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c
@@ -27,17 +27,23 @@ static int list_sum(struct arena_list_head *head)
return sum;
}
-static void test_arena_list_add_del(int cnt)
+static void test_arena_list_add_del(int cnt, bool nonsleepable)
{
LIBBPF_OPTS(bpf_test_run_opts, opts);
struct arena_list *skel;
int expected_sum = (u64)cnt * (cnt - 1) / 2;
int ret, sum;
- skel = arena_list__open_and_load();
- if (!ASSERT_OK_PTR(skel, "arena_list__open_and_load"))
+ skel = arena_list__open();
+ if (!ASSERT_OK_PTR(skel, "arena_list__open"))
return;
+ skel->rodata->nonsleepable = nonsleepable;
+
+ ret = arena_list__load(skel);
+ if (!ASSERT_OK(ret, "arena_list__load"))
+ goto out;
+
skel->bss->cnt = cnt;
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_add), &opts);
ASSERT_OK(ret, "ret_add");
@@ -65,7 +71,11 @@ out:
void test_arena_list(void)
{
if (test__start_subtest("arena_list_1"))
- test_arena_list_add_del(1);
+ test_arena_list_add_del(1, false);
if (test__start_subtest("arena_list_1000"))
- test_arena_list_add_del(1000);
+ test_arena_list_add_del(1000, false);
+ if (test__start_subtest("arena_list_1_nonsleepable"))
+ test_arena_list_add_del(1, true);
+ if (test__start_subtest("arena_list_1000_nonsleepable"))
+ test_arena_list_add_del(1000, true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
index d138cc7b1bda..75b0cf2467ab 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
@@ -240,6 +240,208 @@ static void check_nonstatic_global_other_sec(struct bpf_gotox *skel)
bpf_link__destroy(link);
}
+/*
+ * The following subtests do not use skeleton rather than to check
+ * if the test should be skipped.
+ */
+
+static int create_jt_map(__u32 max_entries)
+{
+ const char *map_name = "jt";
+ __u32 key_size = 4;
+ __u32 value_size = sizeof(struct bpf_insn_array_value);
+
+ return bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, map_name,
+ key_size, value_size, max_entries, NULL);
+}
+
+static int prog_load(struct bpf_insn *insns, __u32 insn_cnt)
+{
+ return bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
+}
+
+static int __check_ldimm64_off_prog_load(__u32 max_entries, __u32 off)
+{
+ struct bpf_insn insns[] = {
+ BPF_LD_IMM64_RAW(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int map_fd, ret;
+
+ map_fd = create_jt_map(max_entries);
+ if (!ASSERT_GE(map_fd, 0, "create_jt_map"))
+ return -1;
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) {
+ close(map_fd);
+ return -1;
+ }
+
+ insns[0].imm = map_fd;
+ insns[1].imm = off;
+
+ ret = prog_load(insns, ARRAY_SIZE(insns));
+ close(map_fd);
+ return ret;
+}
+
+/*
+ * Check that loads from an instruction array map are only allowed with offsets
+ * which are multiples of 8 and do not point to outside of the map.
+ */
+static void check_ldimm64_off_load(struct bpf_gotox *skel __always_unused)
+{
+ const __u32 max_entries = 10;
+ int prog_fd;
+ __u32 off;
+
+ for (off = 0; off < max_entries; off++) {
+ prog_fd = __check_ldimm64_off_prog_load(max_entries, off * 8);
+ if (!ASSERT_GE(prog_fd, 0, "__check_ldimm64_off_prog_load"))
+ return;
+ close(prog_fd);
+ }
+
+ prog_fd = __check_ldimm64_off_prog_load(max_entries, 7 /* not a multiple of 8 */);
+ if (!ASSERT_EQ(prog_fd, -EACCES, "__check_ldimm64_off_prog_load: should be -EACCES")) {
+ close(prog_fd);
+ return;
+ }
+
+ prog_fd = __check_ldimm64_off_prog_load(max_entries, max_entries * 8 /* too large */);
+ if (!ASSERT_EQ(prog_fd, -EACCES, "__check_ldimm64_off_prog_load: should be -EACCES")) {
+ close(prog_fd);
+ return;
+ }
+}
+
+static int __check_ldimm64_gotox_prog_load(struct bpf_insn *insns,
+ __u32 insn_cnt,
+ __u32 off1, __u32 off2)
+{
+ const __u32 values[] = {5, 7, 9, 11, 13, 15};
+ const __u32 max_entries = ARRAY_SIZE(values);
+ struct bpf_insn_array_value val = {};
+ int map_fd, ret, i;
+
+ map_fd = create_jt_map(max_entries);
+ if (!ASSERT_GE(map_fd, 0, "create_jt_map"))
+ return -1;
+
+ for (i = 0; i < max_entries; i++) {
+ val.orig_off = values[i];
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0,
+ "bpf_map_update_elem")) {
+ close(map_fd);
+ return -1;
+ }
+ }
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) {
+ close(map_fd);
+ return -1;
+ }
+
+ /* r1 = &map + offset1 */
+ insns[0].imm = map_fd;
+ insns[1].imm = off1;
+
+ /* r1 += off2 */
+ insns[2].imm = off2;
+
+ ret = prog_load(insns, insn_cnt);
+ close(map_fd);
+ return ret;
+}
+
+static void reject_offsets(struct bpf_insn *insns, __u32 insn_cnt, __u32 off1, __u32 off2)
+{
+ int prog_fd;
+
+ prog_fd = __check_ldimm64_gotox_prog_load(insns, insn_cnt, off1, off2);
+ if (!ASSERT_EQ(prog_fd, -EACCES, "__check_ldimm64_gotox_prog_load"))
+ close(prog_fd);
+}
+
+/*
+ * Verify a bit more complex programs which include indirect jumps
+ * and with jump tables loaded with a non-zero offset
+ */
+static void check_ldimm64_off_gotox(struct bpf_gotox *skel __always_unused)
+{
+ struct bpf_insn insns[] = {
+ /*
+ * The following instructions perform an indirect jump to
+ * labels below. Thus valid offsets in the map are {0,...,5}.
+ * The program rewrites the offsets in the instructions below:
+ * r1 = &map + offset1
+ * r1 += offset2
+ * r1 = *r1
+ * gotox r1
+ */
+ BPF_LD_IMM64_RAW(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_1, 0, 0, 0),
+
+ /* case 0: */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* case 1: */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* case 2: */
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ /* case 3: */
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* case 4: */
+ BPF_MOV64_IMM(BPF_REG_0, 4),
+ BPF_EXIT_INSN(),
+ /* default: */
+ BPF_MOV64_IMM(BPF_REG_0, 5),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, err;
+ __u32 off1, off2;
+
+ /* allow all combinations off1 + off2 < 6 */
+ for (off1 = 0; off1 < 6; off1++) {
+ for (off2 = 0; off1 + off2 < 6; off2++) {
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ prog_fd = __check_ldimm64_gotox_prog_load(insns, ARRAY_SIZE(insns),
+ off1 * 8, off2 * 8);
+ if (!ASSERT_GE(prog_fd, 0, "__check_ldimm64_gotox_prog_load"))
+ return;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err")) {
+ close(prog_fd);
+ return;
+ }
+
+ if (!ASSERT_EQ(topts.retval, off1 + off2, "test_run_opts retval")) {
+ close(prog_fd);
+ return;
+ }
+
+ close(prog_fd);
+ }
+ }
+
+ /* reject off1 + off2 >= 6 */
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 3, 8 * 3);
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 7, 8 * 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 0, 8 * 7);
+
+ /* reject (off1 + off2) % 8 != 0 */
+ reject_offsets(insns, ARRAY_SIZE(insns), 3, 3);
+ reject_offsets(insns, ARRAY_SIZE(insns), 7, 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), 0, 7);
+}
+
void test_bpf_gotox(void)
{
struct bpf_gotox *skel;
@@ -288,5 +490,11 @@ void test_bpf_gotox(void)
if (test__start_subtest("one-map-two-jumps"))
__subtest(skel, check_one_map_two_jumps);
+ if (test__start_subtest("check-ldimm64-off"))
+ __subtest(skel, check_ldimm64_off_load);
+
+ if (test__start_subtest("check-ldimm64-off-gotox"))
+ __subtest(skel, check_ldimm64_off_gotox);
+
bpf_gotox__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index dd6512fa652b..215878ea04de 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -19,6 +19,10 @@ struct {
{ "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" },
{ "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" },
{ "write_not_allowlisted_field", "no write support to nf_conn at off" },
+ { "lookup_null_bpf_tuple", "Possibly NULL pointer passed to trusted arg1" },
+ { "lookup_null_bpf_opts", "Possibly NULL pointer passed to trusted arg3" },
+ { "xdp_lookup_null_bpf_tuple", "Possibly NULL pointer passed to trusted arg1" },
+ { "xdp_lookup_null_bpf_opts", "Possibly NULL pointer passed to trusted arg3" },
};
enum {
@@ -111,7 +115,6 @@ static void test_bpf_nf_ct(int mode)
if (!ASSERT_OK(err, "bpf_prog_test_run"))
goto end;
- ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
ASSERT_EQ(skel->bss->test_einval_reserved_new, -EINVAL, "Test EINVAL for reserved in new struct not set to 0");
ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
diff --git a/tools/testing/selftests/bpf/prog_tests/bpftool_maps_access.c b/tools/testing/selftests/bpf/prog_tests/bpftool_maps_access.c
new file mode 100644
index 000000000000..e0eb869cb1b4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpftool_maps_access.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <sys/stat.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/libbpf.h>
+#include <bpftool_helpers.h>
+#include <test_progs.h>
+#include <bpf/bpf.h>
+#include "security_bpf_map.skel.h"
+
+#define PROTECTED_MAP_NAME "prot_map"
+#define UNPROTECTED_MAP_NAME "not_prot_map"
+#define BPF_ITER_FILE "bpf_iter_map_elem.bpf.o"
+#define BPFFS_PIN_DIR "/sys/fs/bpf/test_bpftool_map"
+#define INNER_MAP_NAME "inner_map_tt"
+#define OUTER_MAP_NAME "outer_map_tt"
+
+#define MAP_NAME_MAX_LEN 64
+#define PATH_MAX_LEN 128
+
+enum map_protection {
+ PROTECTED,
+ UNPROTECTED
+};
+
+struct test_desc {
+ char *name;
+ enum map_protection protection;
+ struct bpf_map *map;
+ char *map_name;
+ bool pinned;
+ char pin_path[PATH_MAX_LEN];
+ bool write_must_fail;
+};
+
+static struct security_bpf_map *general_setup(void)
+{
+ struct security_bpf_map *skel;
+ uint32_t key, value;
+ int ret, i;
+
+ skel = security_bpf_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ goto end;
+
+ struct bpf_map *maps[] = {skel->maps.prot_map, skel->maps.not_prot_map};
+
+ ret = security_bpf_map__attach(skel);
+ if (!ASSERT_OK(ret, "attach maps security programs"))
+ goto end_destroy;
+
+ for (i = 0; i < sizeof(maps)/sizeof(struct bpf_map *); i++) {
+ for (key = 0; key < 2; key++) {
+ int ret = bpf_map__update_elem(maps[i], &key,
+ sizeof(key), &key, sizeof(key),
+ 0);
+ if (!ASSERT_OK(ret, "set initial map value"))
+ goto end_destroy;
+ }
+ }
+
+ key = 0;
+ value = 1;
+ ret = bpf_map__update_elem(skel->maps.prot_status_map, &key,
+ sizeof(key), &value, sizeof(value), 0);
+ if (!ASSERT_OK(ret, "configure map protection"))
+ goto end_destroy;
+
+ if (!ASSERT_OK(mkdir(BPFFS_PIN_DIR, S_IFDIR), "create bpffs pin dir"))
+ goto end_destroy;
+
+ return skel;
+end_destroy:
+ security_bpf_map__destroy(skel);
+end:
+ return NULL;
+}
+
+static void general_cleanup(struct security_bpf_map *skel)
+{
+ rmdir(BPFFS_PIN_DIR);
+ security_bpf_map__destroy(skel);
+}
+
+static void update_test_desc(struct security_bpf_map *skel,
+ struct test_desc *test)
+{
+ /* Now that the skeleton is loaded, update all missing fields to
+ * have the subtest properly configured
+ */
+ if (test->protection == PROTECTED) {
+ test->map = skel->maps.prot_map;
+ test->map_name = PROTECTED_MAP_NAME;
+ } else {
+ test->map = skel->maps.not_prot_map;
+ test->map_name = UNPROTECTED_MAP_NAME;
+ }
+}
+
+static int test_setup(struct security_bpf_map *skel, struct test_desc *desc)
+{
+ int ret;
+
+ update_test_desc(skel, desc);
+
+ if (desc->pinned) {
+ ret = snprintf(desc->pin_path, PATH_MAX_LEN, "%s/%s", BPFFS_PIN_DIR,
+ desc->name);
+ if (!ASSERT_GT(ret, 0, "format pin path"))
+ return 1;
+ ret = bpf_map__pin(desc->map, desc->pin_path);
+ if (!ASSERT_OK(ret, "pin map"))
+ return 1;
+ }
+
+ return 0;
+}
+
+static void test_cleanup(struct test_desc *desc)
+{
+ if (desc->pinned)
+ bpf_map__unpin(desc->map, NULL);
+}
+
+static int lookup_map_value(char *map_handle)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "map lookup %s key 0 0 0 0",
+ map_handle);
+ if (!ASSERT_GT(ret, 0, "format map lookup cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static int read_map_btf_data(char *map_handle)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "btf dump map %s",
+ map_handle);
+ if (!ASSERT_GT(ret, 0, "format map btf dump cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static int write_map_value(char *map_handle)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN,
+ "map update %s key 0 0 0 0 value 1 1 1 1", map_handle);
+ if (!ASSERT_GT(ret, 0, "format value write cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static int delete_map_value(char *map_handle)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN,
+ "map delete %s key 0 0 0 0", map_handle);
+ if (!ASSERT_GT(ret, 0, "format value deletion cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static int iterate_on_map_values(char *map_handle, char *iter_pin_path)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "iter pin %s %s map %s",
+ BPF_ITER_FILE, iter_pin_path, map_handle);
+ if (!ASSERT_GT(ret, 0, "format iterator creation cmd"))
+ return 1;
+ ret = run_bpftool_command(cmd);
+ if (ret)
+ return ret;
+ ret = snprintf(cmd, MAP_NAME_MAX_LEN, "cat %s", iter_pin_path);
+ if (ret < 0)
+ goto cleanup;
+ ret = system(cmd);
+
+cleanup:
+ unlink(iter_pin_path);
+ return ret;
+}
+
+static int create_inner_map(void)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(
+ cmd, MAX_BPFTOOL_CMD_LEN,
+ "map create %s/%s type array key 4 value 4 entries 4 name %s",
+ BPFFS_PIN_DIR, INNER_MAP_NAME, INNER_MAP_NAME);
+ if (!ASSERT_GT(ret, 0, "format inner map create cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static int create_outer_map(void)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(
+ cmd, MAX_BPFTOOL_CMD_LEN,
+ "map create %s/%s type hash_of_maps key 4 value 4 entries 2 name %s inner_map name %s",
+ BPFFS_PIN_DIR, OUTER_MAP_NAME, OUTER_MAP_NAME, INNER_MAP_NAME);
+ if (!ASSERT_GT(ret, 0, "format outer map create cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static void delete_pinned_map(char *map_name)
+{
+ char pin_path[PATH_MAX_LEN];
+ int ret;
+
+ ret = snprintf(pin_path, PATH_MAX_LEN, "%s/%s", BPFFS_PIN_DIR,
+ map_name);
+ if (ret >= 0)
+ unlink(pin_path);
+}
+
+static int add_outer_map_entry(int key)
+{
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+ int ret = 0;
+
+ ret = snprintf(
+ cmd, MAX_BPFTOOL_CMD_LEN,
+ "map update pinned %s/%s key %d 0 0 0 value name %s",
+ BPFFS_PIN_DIR, OUTER_MAP_NAME, key, INNER_MAP_NAME);
+ if (!ASSERT_GT(ret, 0, "format outer map value addition cmd"))
+ return 1;
+ return run_bpftool_command(cmd);
+}
+
+static void test_basic_access(struct test_desc *desc)
+{
+ char map_handle[MAP_NAME_MAX_LEN];
+ char iter_pin_path[PATH_MAX_LEN];
+ int ret;
+
+ if (desc->pinned)
+ ret = snprintf(map_handle, MAP_NAME_MAX_LEN, "pinned %s",
+ desc->pin_path);
+ else
+ ret = snprintf(map_handle, MAP_NAME_MAX_LEN, "name %s",
+ desc->map_name);
+ if (!ASSERT_GT(ret, 0, "format map handle"))
+ return;
+
+ ret = lookup_map_value(map_handle);
+ ASSERT_OK(ret, "read map value");
+
+ ret = read_map_btf_data(map_handle);
+ ASSERT_OK(ret, "read map btf data");
+
+ ret = write_map_value(map_handle);
+ ASSERT_OK(desc->write_must_fail ? !ret : ret, "write map value");
+
+ ret = delete_map_value(map_handle);
+ ASSERT_OK(desc->write_must_fail ? !ret : ret, "delete map value");
+ /* Restore deleted value */
+ if (!ret)
+ write_map_value(map_handle);
+
+ ret = snprintf(iter_pin_path, PATH_MAX_LEN, "%s/iter", BPFFS_PIN_DIR);
+ if (ASSERT_GT(ret, 0, "format iter pin path")) {
+ ret = iterate_on_map_values(map_handle, iter_pin_path);
+ ASSERT_OK(ret, "iterate on map values");
+ }
+}
+
+static void test_create_nested_maps(void)
+{
+ if (!ASSERT_OK(create_inner_map(), "create inner map"))
+ return;
+ if (!ASSERT_OK(create_outer_map(), "create outer map"))
+ goto end_cleanup_inner;
+ ASSERT_OK(add_outer_map_entry(0), "add a first entry in outer map");
+ ASSERT_OK(add_outer_map_entry(1), "add a second entry in outer map");
+ ASSERT_NEQ(add_outer_map_entry(2), 0, "add a third entry in outer map");
+
+ delete_pinned_map(OUTER_MAP_NAME);
+end_cleanup_inner:
+ delete_pinned_map(INNER_MAP_NAME);
+}
+
+static void test_btf_list(void)
+{
+ ASSERT_OK(run_bpftool_command("btf list"), "list btf data");
+}
+
+static struct test_desc tests[] = {
+ {
+ .name = "unprotected_unpinned",
+ .protection = UNPROTECTED,
+ .map_name = UNPROTECTED_MAP_NAME,
+ .pinned = false,
+ .write_must_fail = false,
+ },
+ {
+ .name = "unprotected_pinned",
+ .protection = UNPROTECTED,
+ .map_name = UNPROTECTED_MAP_NAME,
+ .pinned = true,
+ .write_must_fail = false,
+ },
+ {
+ .name = "protected_unpinned",
+ .protection = PROTECTED,
+ .map_name = UNPROTECTED_MAP_NAME,
+ .pinned = false,
+ .write_must_fail = true,
+ },
+ {
+ .name = "protected_pinned",
+ .protection = PROTECTED,
+ .map_name = UNPROTECTED_MAP_NAME,
+ .pinned = true,
+ .write_must_fail = true,
+ }
+};
+
+static const size_t tests_count = ARRAY_SIZE(tests);
+
+void test_bpftool_maps_access(void)
+{
+ struct security_bpf_map *skel;
+ struct test_desc *current;
+ int i;
+
+ skel = general_setup();
+ if (!ASSERT_OK_PTR(skel, "prepare programs"))
+ goto cleanup;
+
+ for (i = 0; i < tests_count; i++) {
+ current = &tests[i];
+ if (!test__start_subtest(current->name))
+ continue;
+ if (ASSERT_OK(test_setup(skel, current), "subtest setup")) {
+ test_basic_access(current);
+ test_cleanup(current);
+ }
+ }
+ if (test__start_subtest("nested_maps"))
+ test_create_nested_maps();
+ if (test__start_subtest("btf_list"))
+ test_btf_list();
+
+cleanup:
+ general_cleanup(skel);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/bpftool_metadata.c b/tools/testing/selftests/bpf/prog_tests/bpftool_metadata.c
new file mode 100644
index 000000000000..408ace90dc7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpftool_metadata.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <bpftool_helpers.h>
+#include <test_progs.h>
+#include <linux/bpf.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <stdbool.h>
+
+#define BPFFS_DIR "/sys/fs/bpf/test_metadata"
+#define BPFFS_USED BPFFS_DIR "/used"
+#define BPFFS_UNUSED BPFFS_DIR "/unused"
+
+#define BPF_FILE_USED "metadata_used.bpf.o"
+#define BPF_FILE_UNUSED "metadata_unused.bpf.o"
+#define METADATA_MAP_NAME "metadata.rodata"
+
+#define MAX_BPFTOOL_OUTPUT_LEN (64*1024)
+
+#define MAX_TOKENS_TO_CHECK 3
+static char output[MAX_BPFTOOL_OUTPUT_LEN];
+
+struct test_desc {
+ char *name;
+ char *bpf_prog;
+ char *bpffs_path;
+ char *expected_output[MAX_TOKENS_TO_CHECK];
+ char *expected_output_json[MAX_TOKENS_TO_CHECK];
+ char *metadata_map_name;
+};
+
+static int setup(struct test_desc *test)
+{
+ return mkdir(BPFFS_DIR, 0700);
+}
+
+static void cleanup(struct test_desc *test)
+{
+ unlink(test->bpffs_path);
+ rmdir(BPFFS_DIR);
+}
+
+static int check_metadata(char *buf, char * const *tokens, int count)
+{
+ int i;
+
+ for (i = 0; i < count && tokens[i]; i++)
+ if (!strstr(buf, tokens[i]))
+ return 1;
+
+ return 0;
+}
+
+static void run_test(struct test_desc *test)
+{
+ int ret;
+ char cmd[MAX_BPFTOOL_CMD_LEN];
+
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "prog load %s %s",
+ test->bpf_prog, test->bpffs_path);
+ if (!ASSERT_GT(ret, 0, "format prog insert command"))
+ return;
+ ret = run_bpftool_command(cmd);
+ if (!ASSERT_OK(ret, "load program"))
+ return;
+
+ /* Check output with default format */
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "prog show pinned %s",
+ test->bpffs_path);
+ if (!ASSERT_GT(ret, 0, "format pinned prog check command"))
+ return;
+ ret = get_bpftool_command_output(cmd, output,
+ MAX_BPFTOOL_OUTPUT_LEN);
+ if (ASSERT_OK(ret, "get program info")) {
+ ret = check_metadata(output, test->expected_output,
+ ARRAY_SIZE(test->expected_output));
+ ASSERT_OK(ret, "find metadata");
+ }
+
+ /* Check output with json format */
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "prog -j show pinned %s",
+ test->bpffs_path);
+ if (!ASSERT_GT(ret, 0, "format pinned prog check command in json"))
+ return;
+ ret = get_bpftool_command_output(cmd, output,
+ MAX_BPFTOOL_OUTPUT_LEN);
+ if (ASSERT_OK(ret, "get program info in json")) {
+ ret = check_metadata(output, test->expected_output_json,
+ ARRAY_SIZE(test->expected_output_json));
+ ASSERT_OK(ret, "find metadata in json");
+ }
+
+ /* Check that the corresponding map can be found and accessed */
+ ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "map show name %s",
+ test->metadata_map_name);
+ if (!ASSERT_GT(ret, 0, "format map check command"))
+ return;
+ ASSERT_OK(run_bpftool_command(cmd), "access metadata map");
+}
+
+static struct test_desc tests[] = {
+ {
+ .name = "metadata_unused",
+ .bpf_prog = BPF_FILE_UNUSED,
+ .bpffs_path = BPFFS_UNUSED,
+ .expected_output = {
+ "a = \"foo\"",
+ "b = 1"
+ },
+ .expected_output_json = {
+ "\"metadata\":{\"a\":\"foo\",\"b\":1}"
+ },
+ .metadata_map_name = METADATA_MAP_NAME
+ },
+ {
+ .name = "metadata_used",
+ .bpf_prog = BPF_FILE_USED,
+ .bpffs_path = BPFFS_USED,
+ .expected_output = {
+ "a = \"bar\"",
+ "b = 2"
+ },
+ .expected_output_json = {
+ "\"metadata\":{\"a\":\"bar\",\"b\":2}"
+ },
+ .metadata_map_name = METADATA_MAP_NAME
+ }
+};
+static const int tests_count = ARRAY_SIZE(tests);
+
+void test_bpftool_metadata(void)
+{
+ int i;
+
+ for (i = 0; i < tests_count; i++) {
+ if (!test__start_subtest(tests[i].name))
+ continue;
+ if (ASSERT_OK(setup(&tests[i]), "setup bpffs pin dir")) {
+ run_test(&tests[i]);
+ cleanup(&tests[i]);
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 10cba526d3e6..f1642794f70e 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -875,8 +875,8 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
"int cpu_number = (int)100", 100);
#endif
- TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_cgrp_storage_busy", int, BTF_F_COMPACT,
- "static int bpf_cgrp_storage_busy = (int)2", 2);
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_bprintf_nest_level", int, BTF_F_COMPACT,
+ "static int bpf_bprintf_nest_level = (int)2", 2);
}
struct btf_dump_string_ctx {
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_permute.c b/tools/testing/selftests/bpf/prog_tests/btf_permute.c
new file mode 100644
index 000000000000..04ade5ad77ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_permute.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Xiaomi */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "btf_helpers.h"
+
+static void permute_base_check(struct btf *btf)
+{
+ VALIDATE_RAW_BTF(
+ btf,
+ "[1] STRUCT 's2' size=4 vlen=1\n"
+ "\t'm' type_id=4 bits_offset=0",
+ "[2] FUNC 'f' type_id=6 linkage=static",
+ "[3] PTR '(anon)' type_id=4",
+ "[4] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[5] STRUCT 's1' size=4 vlen=1\n"
+ "\t'm' type_id=4 bits_offset=0",
+ "[6] FUNC_PROTO '(anon)' ret_type_id=4 vlen=1\n"
+ "\t'p' type_id=3");
+}
+
+/* Ensure btf__permute works as expected in the base-BTF scenario */
+static void test_permute_base(void)
+{
+ struct btf *btf;
+ __u32 permute_ids[7];
+ int err;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_main_btf"))
+ return;
+
+ btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_ptr(btf, 1); /* [2] ptr to int */
+ btf__add_struct(btf, "s1", 4); /* [3] struct s1 { */
+ btf__add_field(btf, "m", 1, 0, 0); /* int m; */
+ /* } */
+ btf__add_struct(btf, "s2", 4); /* [4] struct s2 { */
+ btf__add_field(btf, "m", 1, 0, 0); /* int m; */
+ /* } */
+ btf__add_func_proto(btf, 1); /* [5] int (*)(int *p); */
+ btf__add_func_param(btf, "p", 2);
+ btf__add_func(btf, "f", BTF_FUNC_STATIC, 5); /* [6] int f(int *p); */
+
+ VALIDATE_RAW_BTF(
+ btf,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=4 vlen=1\n"
+ "\t'm' type_id=1 bits_offset=0",
+ "[4] STRUCT 's2' size=4 vlen=1\n"
+ "\t'm' type_id=1 bits_offset=0",
+ "[5] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p' type_id=2",
+ "[6] FUNC 'f' type_id=5 linkage=static");
+
+ permute_ids[0] = 0; /* [0] -> [0] */
+ permute_ids[1] = 4; /* [1] -> [4] */
+ permute_ids[2] = 3; /* [2] -> [3] */
+ permute_ids[3] = 5; /* [3] -> [5] */
+ permute_ids[4] = 1; /* [4] -> [1] */
+ permute_ids[5] = 6; /* [5] -> [6] */
+ permute_ids[6] = 2; /* [6] -> [2] */
+ err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_OK(err, "btf__permute_base"))
+ goto done;
+ permute_base_check(btf);
+
+ /* ids[0] must be 0 for base BTF */
+ permute_ids[0] = 4; /* [0] -> [0] */
+ permute_ids[1] = 0; /* [1] -> [4] */
+ permute_ids[2] = 3; /* [2] -> [3] */
+ permute_ids[3] = 5; /* [3] -> [5] */
+ permute_ids[4] = 1; /* [4] -> [1] */
+ permute_ids[5] = 6; /* [5] -> [6] */
+ permute_ids[6] = 2; /* [6] -> [2] */
+ err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_ERR(err, "btf__permute_base"))
+ goto done;
+ /* BTF is not modified */
+ permute_base_check(btf);
+
+ /* id_map_cnt is invalid */
+ permute_ids[0] = 0; /* [0] -> [0] */
+ permute_ids[1] = 4; /* [1] -> [4] */
+ permute_ids[2] = 3; /* [2] -> [3] */
+ permute_ids[3] = 5; /* [3] -> [5] */
+ permute_ids[4] = 1; /* [4] -> [1] */
+ permute_ids[5] = 6; /* [5] -> [6] */
+ permute_ids[6] = 2; /* [6] -> [2] */
+ err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids) - 1, NULL);
+ if (!ASSERT_ERR(err, "btf__permute_base"))
+ goto done;
+ /* BTF is not modified */
+ permute_base_check(btf);
+
+ /* Multiple types can not be mapped to the same ID */
+ permute_ids[0] = 0;
+ permute_ids[1] = 4;
+ permute_ids[2] = 4;
+ permute_ids[3] = 5;
+ permute_ids[4] = 1;
+ permute_ids[5] = 6;
+ permute_ids[6] = 2;
+ err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_ERR(err, "btf__permute_base"))
+ goto done;
+ /* BTF is not modified */
+ permute_base_check(btf);
+
+ /* Type ID must be valid */
+ permute_ids[0] = 0;
+ permute_ids[1] = 4;
+ permute_ids[2] = 3;
+ permute_ids[3] = 5;
+ permute_ids[4] = 1;
+ permute_ids[5] = 7;
+ permute_ids[6] = 2;
+ err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_ERR(err, "btf__permute_base"))
+ goto done;
+ /* BTF is not modified */
+ permute_base_check(btf);
+
+done:
+ btf__free(btf);
+}
+
+static void permute_split_check(struct btf *btf)
+{
+ VALIDATE_RAW_BTF(
+ btf,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's2' size=4 vlen=1\n"
+ "\t'm' type_id=1 bits_offset=0",
+ "[4] FUNC 'f' type_id=5 linkage=static",
+ "[5] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p' type_id=2",
+ "[6] STRUCT 's1' size=4 vlen=1\n"
+ "\t'm' type_id=1 bits_offset=0");
+}
+
+/* Ensure btf__permute works as expected in the split-BTF scenario */
+static void test_permute_split(void)
+{
+ struct btf *split_btf = NULL, *base_btf = NULL;
+ __u32 permute_ids[4];
+ int err, start_id;
+
+ base_btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(base_btf, "empty_main_btf"))
+ return;
+
+ btf__add_int(base_btf, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_ptr(base_btf, 1); /* [2] ptr to int */
+ VALIDATE_RAW_BTF(
+ base_btf,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1");
+ split_btf = btf__new_empty_split(base_btf);
+ if (!ASSERT_OK_PTR(split_btf, "empty_split_btf"))
+ goto cleanup;
+ btf__add_struct(split_btf, "s1", 4); /* [3] struct s1 { */
+ btf__add_field(split_btf, "m", 1, 0, 0); /* int m; */
+ /* } */
+ btf__add_struct(split_btf, "s2", 4); /* [4] struct s2 { */
+ btf__add_field(split_btf, "m", 1, 0, 0); /* int m; */
+ /* } */
+ btf__add_func_proto(split_btf, 1); /* [5] int (*)(int p); */
+ btf__add_func_param(split_btf, "p", 2);
+ btf__add_func(split_btf, "f", BTF_FUNC_STATIC, 5); /* [6] int f(int *p); */
+
+ VALIDATE_RAW_BTF(
+ split_btf,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=4 vlen=1\n"
+ "\t'm' type_id=1 bits_offset=0",
+ "[4] STRUCT 's2' size=4 vlen=1\n"
+ "\t'm' type_id=1 bits_offset=0",
+ "[5] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p' type_id=2",
+ "[6] FUNC 'f' type_id=5 linkage=static");
+
+ start_id = btf__type_cnt(base_btf);
+ permute_ids[3 - start_id] = 6; /* [3] -> [6] */
+ permute_ids[4 - start_id] = 3; /* [4] -> [3] */
+ permute_ids[5 - start_id] = 5; /* [5] -> [5] */
+ permute_ids[6 - start_id] = 4; /* [6] -> [4] */
+ err = btf__permute(split_btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_OK(err, "btf__permute_split"))
+ goto cleanup;
+ permute_split_check(split_btf);
+
+ /*
+ * For split BTF, id_map_cnt must equal to the number of types
+ * added on top of base BTF
+ */
+ permute_ids[3 - start_id] = 4;
+ permute_ids[4 - start_id] = 3;
+ permute_ids[5 - start_id] = 5;
+ permute_ids[6 - start_id] = 6;
+ err = btf__permute(split_btf, permute_ids, ARRAY_SIZE(permute_ids) - 1, NULL);
+ if (!ASSERT_ERR(err, "btf__permute_split"))
+ goto cleanup;
+ /* BTF is not modified */
+ permute_split_check(split_btf);
+
+ /* Multiple types can not be mapped to the same ID */
+ permute_ids[3 - start_id] = 4;
+ permute_ids[4 - start_id] = 3;
+ permute_ids[5 - start_id] = 3;
+ permute_ids[6 - start_id] = 6;
+ err = btf__permute(split_btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_ERR(err, "btf__permute_split"))
+ goto cleanup;
+ /* BTF is not modified */
+ permute_split_check(split_btf);
+
+ /* Can not map to base ID */
+ permute_ids[3 - start_id] = 4;
+ permute_ids[4 - start_id] = 2;
+ permute_ids[5 - start_id] = 5;
+ permute_ids[6 - start_id] = 6;
+ err = btf__permute(split_btf, permute_ids, ARRAY_SIZE(permute_ids), NULL);
+ if (!ASSERT_ERR(err, "btf__permute_split"))
+ goto cleanup;
+ /* BTF is not modified */
+ permute_split_check(split_btf);
+
+cleanup:
+ btf__free(split_btf);
+ btf__free(base_btf);
+}
+
+void test_btf_permute(void)
+{
+ if (test__start_subtest("permute_base"))
+ test_permute_base();
+ if (test__start_subtest("permute_split"))
+ test_permute_split();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
index 574d9a0cdc8e..0f88a9d00a22 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
@@ -190,6 +190,16 @@ static void test_walk_self_only(struct cgroup_iter *skel)
BPF_CGROUP_ITER_SELF_ONLY, "self_only");
}
+static void test_walk_children(struct cgroup_iter *skel)
+{
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n%8llu\n" EPILOGUE, cg_id[CHILD1],
+ cg_id[CHILD2]);
+
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+ BPF_CGROUP_ITER_CHILDREN, "children");
+}
+
static void test_walk_dead_self_only(struct cgroup_iter *skel)
{
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
@@ -325,6 +335,8 @@ void test_cgroup_iter(void)
test_walk_dead_self_only(skel);
if (test__start_subtest("cgroup_iter__self_only_css_task"))
test_walk_self_only_css_task();
+ if (test__start_subtest("cgroup_iter__children"))
+ test_walk_children(skel);
out:
cgroup_iter__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c
new file mode 100644
index 000000000000..a5afd16705f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include "cgroup_helpers.h"
+#include "cgroup_iter_memcg.h"
+#include "cgroup_iter_memcg.skel.h"
+
+static int read_stats(struct bpf_link *link)
+{
+ int fd, ret = 0;
+ ssize_t bytes;
+
+ fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(fd, "bpf_iter_create"))
+ return 1;
+
+ /*
+ * Invoke iter program by reading from its fd. We're not expecting any
+ * data to be written by the bpf program so the result should be zero.
+ * Results will be read directly through the custom data section
+ * accessible through skel->data_query.memcg_query.
+ */
+ bytes = read(fd, NULL, 0);
+ if (!ASSERT_EQ(bytes, 0, "read fd"))
+ ret = 1;
+
+ close(fd);
+ return ret;
+}
+
+static void test_anon(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+ void *map;
+ size_t len;
+
+ len = sysconf(_SC_PAGESIZE) * 1024;
+
+ /*
+ * Increase memcg anon usage by mapping and writing
+ * to a new anon region.
+ */
+ map = mmap(NULL, len, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (!ASSERT_NEQ(map, MAP_FAILED, "mmap anon"))
+ return;
+
+ memset(map, 1, len);
+
+ if (!ASSERT_OK(read_stats(link), "read stats"))
+ goto cleanup;
+
+ ASSERT_GT(memcg_query->nr_anon_mapped, 0, "final anon mapped val");
+
+cleanup:
+ munmap(map, len);
+}
+
+static void test_file(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+ void *map;
+ size_t len;
+ char *path;
+ int fd;
+
+ len = sysconf(_SC_PAGESIZE) * 1024;
+ path = "/tmp/test_cgroup_iter_memcg";
+
+ /*
+ * Increase memcg file usage by creating and writing
+ * to a mapped file.
+ */
+ fd = open(path, O_CREAT | O_RDWR, 0644);
+ if (!ASSERT_OK_FD(fd, "open fd"))
+ return;
+ if (!ASSERT_OK(ftruncate(fd, len), "ftruncate"))
+ goto cleanup_fd;
+
+ map = mmap(NULL, len, PROT_WRITE, MAP_SHARED, fd, 0);
+ if (!ASSERT_NEQ(map, MAP_FAILED, "mmap file"))
+ goto cleanup_fd;
+
+ memset(map, 1, len);
+
+ if (!ASSERT_OK(read_stats(link), "read stats"))
+ goto cleanup_map;
+
+ ASSERT_GT(memcg_query->nr_file_pages, 0, "final file value");
+ ASSERT_GT(memcg_query->nr_file_mapped, 0, "final file mapped value");
+
+cleanup_map:
+ munmap(map, len);
+cleanup_fd:
+ close(fd);
+ unlink(path);
+}
+
+static void test_shmem(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+ size_t len;
+ int fd;
+
+ len = sysconf(_SC_PAGESIZE) * 1024;
+
+ /*
+ * Increase memcg shmem usage by creating and writing
+ * to a shmem object.
+ */
+ fd = shm_open("/tmp_shmem", O_CREAT | O_RDWR, 0644);
+ if (!ASSERT_OK_FD(fd, "shm_open"))
+ return;
+
+ if (!ASSERT_OK(fallocate(fd, 0, 0, len), "fallocate"))
+ goto cleanup;
+
+ if (!ASSERT_OK(read_stats(link), "read stats"))
+ goto cleanup;
+
+ ASSERT_GT(memcg_query->nr_shmem, 0, "final shmem value");
+
+cleanup:
+ close(fd);
+ shm_unlink("/tmp_shmem");
+}
+
+#define NR_PIPES 64
+static void test_kmem(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+ int fds[NR_PIPES][2], i;
+
+ /*
+ * Increase kmem value by creating pipes which will allocate some
+ * kernel buffers.
+ */
+ for (i = 0; i < NR_PIPES; i++) {
+ if (!ASSERT_OK(pipe(fds[i]), "pipe"))
+ goto cleanup;
+ }
+
+ if (!ASSERT_OK(read_stats(link), "read stats"))
+ goto cleanup;
+
+ ASSERT_GT(memcg_query->memcg_kmem, 0, "kmem value");
+
+cleanup:
+ for (i = i - 1; i >= 0; i--) {
+ close(fds[i][0]);
+ close(fds[i][1]);
+ }
+}
+
+static void test_pgfault(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+ void *map;
+ size_t len;
+
+ len = sysconf(_SC_PAGESIZE) * 1024;
+
+ /* Create region to use for triggering a page fault. */
+ map = mmap(NULL, len, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (!ASSERT_NEQ(map, MAP_FAILED, "mmap anon"))
+ return;
+
+ /* Trigger page fault. */
+ memset(map, 1, len);
+
+ if (!ASSERT_OK(read_stats(link), "read stats"))
+ goto cleanup;
+
+ ASSERT_GT(memcg_query->pgfault, 0, "final pgfault val");
+
+cleanup:
+ munmap(map, len);
+}
+
+void test_cgroup_iter_memcg(void)
+{
+ char *cgroup_rel_path = "/cgroup_iter_memcg_test";
+ struct cgroup_iter_memcg *skel;
+ struct bpf_link *link;
+ int cgroup_fd;
+
+ cgroup_fd = cgroup_setup_and_join(cgroup_rel_path);
+ if (!ASSERT_OK_FD(cgroup_fd, "cgroup_setup_and_join"))
+ return;
+
+ skel = cgroup_iter_memcg__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_iter_memcg__open_and_load"))
+ goto cleanup_cgroup_fd;
+
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo = {
+ .cgroup.cgroup_fd = cgroup_fd,
+ .cgroup.order = BPF_CGROUP_ITER_SELF_ONLY,
+ };
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(skel->progs.cgroup_memcg_query, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto cleanup_skel;
+
+ if (test__start_subtest("cgroup_iter_memcg__anon"))
+ test_anon(link, &skel->data_query->memcg_query);
+ if (test__start_subtest("cgroup_iter_memcg__shmem"))
+ test_shmem(link, &skel->data_query->memcg_query);
+ if (test__start_subtest("cgroup_iter_memcg__file"))
+ test_file(link, &skel->data_query->memcg_query);
+ if (test__start_subtest("cgroup_iter_memcg__kmem"))
+ test_kmem(link, &skel->data_query->memcg_query);
+ if (test__start_subtest("cgroup_iter_memcg__pgfault"))
+ test_pgfault(link, &skel->data_query->memcg_query);
+
+ bpf_link__destroy(link);
+cleanup_skel:
+ cgroup_iter_memcg__destroy(skel);
+cleanup_cgroup_fd:
+ close(cgroup_fd);
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index ccc768592e66..1a2a2f1abf03 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -38,6 +38,14 @@ static int set_pathname(int fd, pid_t pid)
return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN);
}
+static inline long syscall_close(int fd)
+{
+ return syscall(__NR_close_range,
+ (unsigned int)fd,
+ (unsigned int)fd,
+ 0u);
+}
+
static int trigger_fstat_events(pid_t pid)
{
int sockfd = -1, procfd = -1, devfd = -1;
@@ -104,18 +112,34 @@ out_close:
/* sys_close no longer triggers filp_close, but we can
* call sys_close_range instead which still does
*/
-#define close(fd) syscall(__NR_close_range, fd, fd, 0)
+ syscall_close(pipefd[0]);
+ syscall_close(pipefd[1]);
+ syscall_close(sockfd);
+ syscall_close(procfd);
+ syscall_close(devfd);
+ syscall_close(localfd);
+ syscall_close(indicatorfd);
+ return ret;
+}
- close(pipefd[0]);
- close(pipefd[1]);
- close(sockfd);
- close(procfd);
- close(devfd);
- close(localfd);
- close(indicatorfd);
+static void attach_and_load(struct test_d_path **skel)
+{
+ int err;
-#undef close
- return ret;
+ *skel = test_d_path__open_and_load();
+ if (CHECK(!*skel, "setup", "d_path skeleton failed\n"))
+ goto cleanup;
+
+ err = test_d_path__attach(*skel);
+ if (CHECK(err, "setup", "attach failed: %d\n", err))
+ goto cleanup;
+
+ (*skel)->bss->my_pid = getpid();
+ return;
+
+cleanup:
+ test_d_path__destroy(*skel);
+ *skel = NULL;
}
static void test_d_path_basic(void)
@@ -124,16 +148,11 @@ static void test_d_path_basic(void)
struct test_d_path *skel;
int err;
- skel = test_d_path__open_and_load();
- if (CHECK(!skel, "setup", "d_path skeleton failed\n"))
- goto cleanup;
-
- err = test_d_path__attach(skel);
- if (CHECK(err, "setup", "attach failed: %d\n", err))
+ attach_and_load(&skel);
+ if (!skel)
goto cleanup;
bss = skel->bss;
- bss->my_pid = getpid();
err = trigger_fstat_events(bss->my_pid);
if (err < 0)
@@ -195,6 +214,39 @@ static void test_d_path_check_types(void)
test_d_path_check_types__destroy(skel);
}
+/* Check if the verifier correctly generates code for
+ * accessing the memory modified by d_path helper.
+ */
+static void test_d_path_mem_access(void)
+{
+ int localfd = -1;
+ char path_template[] = "/dev/shm/d_path_loadgen.XXXXXX";
+ struct test_d_path__bss *bss;
+ struct test_d_path *skel;
+
+ attach_and_load(&skel);
+ if (!skel)
+ goto cleanup;
+
+ bss = skel->bss;
+
+ localfd = mkstemp(path_template);
+ if (CHECK(localfd < 0, "trigger", "mkstemp failed\n"))
+ goto cleanup;
+
+ if (CHECK(fallocate(localfd, 0, 0, 1024) < 0, "trigger", "fallocate failed\n"))
+ goto cleanup;
+ remove(path_template);
+
+ if (CHECK(!bss->path_match_fallocate, "check",
+ "failed to read fallocate path"))
+ goto cleanup;
+
+cleanup:
+ syscall_close(localfd);
+ test_d_path__destroy(skel);
+}
+
void test_d_path(void)
{
if (test__start_subtest("basic"))
@@ -205,4 +257,7 @@ void test_d_path(void)
if (test__start_subtest("check_alloc_mem"))
test_d_path_check_types();
+
+ if (test__start_subtest("check_mem_access"))
+ test_d_path_mem_access();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
index 6c2b0c3dbcd8..fb2cea710db3 100644
--- a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
@@ -73,12 +73,10 @@ close_memfd:
return -1;
}
-static int create_sys_heap_dmabuf(void)
+static int create_sys_heap_dmabuf(size_t bytes)
{
- sysheap_test_buffer_size = 20 * getpagesize();
-
struct dma_heap_allocation_data data = {
- .len = sysheap_test_buffer_size,
+ .len = bytes,
.fd = 0,
.fd_flags = O_RDWR | O_CLOEXEC,
.heap_flags = 0,
@@ -110,7 +108,9 @@ close_sysheap_dmabuf:
static int create_test_buffers(void)
{
udmabuf = create_udmabuf();
- sysheap_dmabuf = create_sys_heap_dmabuf();
+
+ sysheap_test_buffer_size = 20 * getpagesize();
+ sysheap_dmabuf = create_sys_heap_dmabuf(sysheap_test_buffer_size);
if (udmabuf < 0 || sysheap_dmabuf < 0)
return -1;
@@ -219,6 +219,26 @@ close_iter_fd:
close(iter_fd);
}
+static void subtest_dmabuf_iter_check_lots_of_buffers(struct dmabuf_iter *skel)
+{
+ int iter_fd;
+ char buf[1024];
+ size_t total_bytes_read = 0;
+ ssize_t bytes_read;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+ if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+ return;
+
+ while ((bytes_read = read(iter_fd, buf, sizeof(buf))) > 0)
+ total_bytes_read += bytes_read;
+
+ ASSERT_GT(total_bytes_read, 4096, "total_bytes_read");
+
+ close(iter_fd);
+}
+
+
static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd)
{
LIBBPF_OPTS(bpf_test_run_opts, topts);
@@ -275,6 +295,23 @@ void test_dmabuf_iter(void)
subtest_dmabuf_iter_check_no_infinite_reads(skel);
if (test__start_subtest("default_iter"))
subtest_dmabuf_iter_check_default_iter(skel);
+ if (test__start_subtest("lots_of_buffers")) {
+ size_t NUM_BUFS = 100;
+ int buffers[NUM_BUFS];
+ int i;
+
+ for (i = 0; i < NUM_BUFS; ++i) {
+ buffers[i] = create_sys_heap_dmabuf(getpagesize());
+ if (!ASSERT_OK_FD(buffers[i], "dmabuf_fd"))
+ goto cleanup_bufs;
+ }
+
+ subtest_dmabuf_iter_check_lots_of_buffers(skel);
+
+cleanup_bufs:
+ for (--i; i >= 0; --i)
+ close(buffers[i]);
+ }
if (test__start_subtest("open_coded"))
subtest_dmabuf_iter_check_open_coded(skel, map_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/exe_ctx.c b/tools/testing/selftests/bpf/prog_tests/exe_ctx.c
new file mode 100644
index 000000000000..aed6a6ef0876
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/exe_ctx.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Valve Corporation.
+ * Author: Changwoo Min <changwoo@igalia.com>
+ */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "test_ctx.skel.h"
+
+void test_exe_ctx(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ cpu_set_t old_cpuset, target_cpuset;
+ struct test_ctx *skel;
+ int err, prog_fd;
+
+ /* 1. Pin the current process to CPU 0. */
+ if (sched_getaffinity(0, sizeof(old_cpuset), &old_cpuset) == 0) {
+ CPU_ZERO(&target_cpuset);
+ CPU_SET(0, &target_cpuset);
+ ASSERT_OK(sched_setaffinity(0, sizeof(target_cpuset),
+ &target_cpuset), "setaffinity");
+ }
+
+ skel = test_ctx__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto restore_affinity;
+
+ err = test_ctx__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* 2. When we run this, the kernel will execute the BPF prog on CPU 0. */
+ prog_fd = bpf_program__fd(skel->progs.trigger_all_contexts);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "test_run_trigger");
+
+ /* 3. Wait for the local CPU's softirq/tasklet to finish. */
+ for (int i = 0; i < 1000; i++) {
+ if (skel->bss->count_task > 0 &&
+ skel->bss->count_hardirq > 0 &&
+ skel->bss->count_softirq > 0)
+ break;
+ usleep(1000); /* Wait 1ms per iteration, up to 1 sec total */
+ }
+
+ /* On CPU 0, these should now all be non-zero. */
+ ASSERT_GT(skel->bss->count_task, 0, "task_ok");
+ ASSERT_GT(skel->bss->count_hardirq, 0, "hardirq_ok");
+ ASSERT_GT(skel->bss->count_softirq, 0, "softirq_ok");
+
+cleanup:
+ test_ctx__destroy(skel);
+
+restore_affinity:
+ ASSERT_OK(sched_setaffinity(0, sizeof(old_cpuset), &old_cpuset),
+ "restore_affinity");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fsession_test.c b/tools/testing/selftests/bpf/prog_tests/fsession_test.c
new file mode 100644
index 000000000000..a299aeb8cc2e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fsession_test.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 ChinaTelecom */
+#include <test_progs.h>
+#include "fsession_test.skel.h"
+
+static int check_result(struct fsession_test *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* Trigger test function calls */
+ prog_fd = bpf_program__fd(skel->progs.test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return err;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return topts.retval;
+
+ for (int i = 0; i < sizeof(*skel->bss) / sizeof(__u64); i++) {
+ if (!ASSERT_EQ(((__u64 *)skel->bss)[i], 1, "test_result"))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void test_fsession_basic(void)
+{
+ struct fsession_test *skel = NULL;
+ int err;
+
+ skel = fsession_test__open();
+ if (!ASSERT_OK_PTR(skel, "fsession_test__open"))
+ return;
+
+ err = fsession_test__load(skel);
+ if (err == -EOPNOTSUPP) {
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_OK(err, "fsession_test__load"))
+ goto cleanup;
+
+ err = fsession_test__attach(skel);
+ if (!ASSERT_OK(err, "fsession_attach"))
+ goto cleanup;
+
+ check_result(skel);
+cleanup:
+ fsession_test__destroy(skel);
+}
+
+static void test_fsession_reattach(void)
+{
+ struct fsession_test *skel = NULL;
+ int err;
+
+ skel = fsession_test__open();
+ if (!ASSERT_OK_PTR(skel, "fsession_test__open"))
+ return;
+
+ err = fsession_test__load(skel);
+ if (err == -EOPNOTSUPP) {
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_OK(err, "fsession_test__load"))
+ goto cleanup;
+
+ /* first attach */
+ err = fsession_test__attach(skel);
+ if (!ASSERT_OK(err, "fsession_first_attach"))
+ goto cleanup;
+
+ if (check_result(skel))
+ goto cleanup;
+
+ /* detach */
+ fsession_test__detach(skel);
+
+ /* reset counters */
+ memset(skel->bss, 0, sizeof(*skel->bss));
+
+ /* second attach */
+ err = fsession_test__attach(skel);
+ if (!ASSERT_OK(err, "fsession_second_attach"))
+ goto cleanup;
+
+ if (check_result(skel))
+ goto cleanup;
+
+cleanup:
+ fsession_test__destroy(skel);
+}
+
+static void test_fsession_cookie(void)
+{
+ struct fsession_test *skel = NULL;
+ int err;
+
+ skel = fsession_test__open();
+ if (!ASSERT_OK_PTR(skel, "fsession_test__open"))
+ goto cleanup;
+
+ /*
+ * The test_fsession_basic() will test the session cookie with
+ * bpf_get_func_ip() case, so we need only check
+ * the cookie without bpf_get_func_ip() case here
+ */
+ bpf_program__set_autoload(skel->progs.test6, false);
+
+ err = fsession_test__load(skel);
+ if (err == -EOPNOTSUPP) {
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_OK(err, "fsession_test__load"))
+ goto cleanup;
+
+ err = fsession_test__attach(skel);
+ if (!ASSERT_OK(err, "fsession_attach"))
+ goto cleanup;
+
+ skel->bss->test6_entry_result = 1;
+ skel->bss->test6_exit_result = 1;
+
+ check_result(skel);
+cleanup:
+ fsession_test__destroy(skel);
+}
+
+void test_fsession_test(void)
+{
+ if (test__start_subtest("fsession_test"))
+ test_fsession_basic();
+ if (test__start_subtest("fsession_reattach"))
+ test_fsession_reattach();
+ if (test__start_subtest("fsession_cookie"))
+ test_fsession_cookie();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
index 64a9c95d4acf..96b27de05524 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
@@ -33,11 +33,15 @@ void test_get_func_args_test(void)
ASSERT_EQ(topts.retval >> 16, 1, "test_run");
ASSERT_EQ(topts.retval & 0xffff, 1234 + 29, "test_run");
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
+ ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
+ ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
+ ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
cleanup:
get_func_args_test__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
index c40242dfa8fb..7772a0f288d3 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -46,6 +46,8 @@ static void test_function_entry(void)
ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
ASSERT_EQ(skel->bss->test8_result, 1, "test8_result");
+ ASSERT_EQ(skel->bss->test9_entry_result, 1, "test9_entry_result");
+ ASSERT_EQ(skel->bss->test9_exit_result, 1, "test9_exit_result");
cleanup:
get_func_ip_test__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c
index 3cea71f9c500..a539980a2fbe 100644
--- a/tools/testing/selftests/bpf/prog_tests/iters.c
+++ b/tools/testing/selftests/bpf/prog_tests/iters.c
@@ -253,6 +253,11 @@ static void subtest_css_iters(void)
{ "/cg1/cg2" },
{ "/cg1/cg2/cg3" },
{ "/cg1/cg2/cg3/cg4" },
+ { "/cg1/cg5" },
+ { "/cg1/cg5/cg6" },
+ { "/cg1/cg7" },
+ { "/cg1/cg7/cg8" },
+ { "/cg1/cg7/cg8/cg9" },
};
int err, cg_nr = ARRAY_SIZE(cgs);
int i;
@@ -284,7 +289,8 @@ static void subtest_css_iters(void)
ASSERT_EQ(skel->bss->post_order_cnt, cg_nr, "post_order_cnt");
ASSERT_EQ(skel->bss->last_cg_id, get_cgroup_id(cgs[0].path), "last_cg_id");
- ASSERT_EQ(skel->bss->tree_high, cg_nr - 1, "tree_high");
+ ASSERT_EQ(skel->bss->children_cnt, 3, "children_cnt");
+ ASSERT_EQ(skel->bss->tree_high, 3, "tree_high");
iters_css__detach(skel);
cleanup:
cleanup_cgroup_environment();
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_implicit_args.c b/tools/testing/selftests/bpf/prog_tests/kfunc_implicit_args.c
new file mode 100644
index 000000000000..5e4793c9c29a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_implicit_args.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "kfunc_implicit_args.skel.h"
+
+void test_kfunc_implicit_args(void)
+{
+ RUN_TESTS(kfunc_implicit_args);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 6cfaa978bc9a..9caef222e528 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -1,4 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <sys/prctl.h>
#include <test_progs.h>
#include "kprobe_multi.skel.h"
#include "trace_helpers.h"
@@ -540,6 +542,46 @@ cleanup:
kprobe_multi_override__destroy(skel);
}
+static void test_override(void)
+{
+ struct kprobe_multi_override *skel = NULL;
+ int err;
+
+ skel = kprobe_multi_override__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+
+ /* no override */
+ err = prctl(0xffff, 0);
+ ASSERT_EQ(err, -1, "err");
+
+ /* kprobe.multi override */
+ skel->links.test_override = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override,
+ SYS_PREFIX "sys_prctl", NULL);
+ if (!ASSERT_OK_PTR(skel->links.test_override, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ err = prctl(0xffff, 0);
+ ASSERT_EQ(err, 123, "err");
+
+ bpf_link__destroy(skel->links.test_override);
+ skel->links.test_override = NULL;
+
+ /* kprobe override */
+ skel->links.test_kprobe_override = bpf_program__attach_kprobe(skel->progs.test_kprobe_override,
+ false, SYS_PREFIX "sys_prctl");
+ if (!ASSERT_OK_PTR(skel->links.test_kprobe_override, "bpf_program__attach_kprobe"))
+ goto cleanup;
+
+ err = prctl(0xffff, 0);
+ ASSERT_EQ(err, 123, "err");
+
+cleanup:
+ kprobe_multi_override__destroy(skel);
+}
+
#ifdef __x86_64__
static void test_attach_write_ctx(void)
{
@@ -597,6 +639,8 @@ void test_kprobe_multi_test(void)
test_attach_api_fails();
if (test__start_subtest("attach_override"))
test_attach_override();
+ if (test__start_subtest("override"))
+ test_override();
if (test__start_subtest("session"))
test_session_skel_api();
if (test__start_subtest("session_cookie"))
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
index 8743df599567..03b46f17cf53 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -118,19 +118,39 @@ exit:
static int kern_sync_rcu_tasks_trace(struct rcu_tasks_trace_gp *rcu)
{
- long gp_seq = READ_ONCE(rcu->bss->gp_seq);
LIBBPF_OPTS(bpf_test_run_opts, opts);
+ int ret;
- if (!ASSERT_OK(bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.do_call_rcu_tasks_trace),
- &opts), "do_call_rcu_tasks_trace"))
+ WRITE_ONCE(rcu->bss->done, 0);
+ ret = bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.call_rcu_tasks_trace), &opts);
+ if (!ASSERT_OK(ret, "call_rcu_tasks_trace"))
return -EFAULT;
- if (!ASSERT_OK(opts.retval, "opts.retval == 0"))
+ if (!ASSERT_OK(opts.retval, "call_rcu_tasks_trace retval"))
return -EFAULT;
- while (gp_seq == READ_ONCE(rcu->bss->gp_seq))
+ while (!READ_ONCE(rcu->bss->done))
sched_yield();
return 0;
}
+static void wait_for_map_release(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, lopts);
+ struct map_kptr *skel;
+ int ret;
+
+ skel = map_kptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load"))
+ return;
+
+ do {
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.count_ref), &lopts);
+ ASSERT_OK(ret, "count_ref ret");
+ ASSERT_OK(lopts.retval, "count_ref retval");
+ } while (skel->bss->num_of_refs != 2);
+
+ map_kptr__destroy(skel);
+}
+
void serial_test_map_kptr(void)
{
struct rcu_tasks_trace_gp *skel;
@@ -140,24 +160,24 @@ void serial_test_map_kptr(void)
skel = rcu_tasks_trace_gp__open_and_load();
if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load"))
return;
- if (!ASSERT_OK(rcu_tasks_trace_gp__attach(skel), "rcu_tasks_trace_gp__attach"))
- goto end;
if (test__start_subtest("success-map")) {
test_map_kptr_success(true);
ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace");
ASSERT_OK(kern_sync_rcu(), "sync rcu");
+ wait_for_map_release();
+
/* Observe refcount dropping to 1 on bpf_map_free_deferred */
test_map_kptr_success(false);
ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace");
ASSERT_OK(kern_sync_rcu(), "sync rcu");
+ wait_for_map_release();
+
/* Observe refcount dropping to 1 on synchronous delete elem */
test_map_kptr_success(true);
}
-end:
rcu_tasks_trace_gp__destroy(skel);
- return;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
index 343da65864d6..a72ae0b29f6e 100644
--- a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "cgroup_helpers.h"
#include "percpu_alloc_array.skel.h"
#include "percpu_alloc_cgrp_local_storage.skel.h"
#include "percpu_alloc_fail.skel.h"
@@ -115,6 +116,328 @@ static void test_failure(void) {
RUN_TESTS(percpu_alloc_fail);
}
+static void test_percpu_map_op_cpu_flag(struct bpf_map *map, void *keys, size_t key_sz, u32 entries,
+ int nr_cpus, bool test_batch)
+{
+ size_t value_sz = sizeof(u32), value_sz_cpus, value_sz_total;
+ u32 *values = NULL, *values_percpu = NULL;
+ const u32 value = 0xDEADC0DE;
+ int i, j, cpu, map_fd, err;
+ u64 batch = 0, flags;
+ void *values_row;
+ u32 count, v;
+ LIBBPF_OPTS(bpf_map_batch_opts, batch_opts);
+
+ value_sz_cpus = value_sz * nr_cpus;
+ values = calloc(entries, value_sz_cpus);
+ if (!ASSERT_OK_PTR(values, "calloc values"))
+ return;
+
+ values_percpu = calloc(entries, roundup(value_sz, 8) * nr_cpus);
+ if (!ASSERT_OK_PTR(values_percpu, "calloc values_percpu")) {
+ free(values);
+ return;
+ }
+
+ value_sz_total = value_sz_cpus * entries;
+ memset(values, 0, value_sz_total);
+
+ map_fd = bpf_map__fd(map);
+ flags = BPF_F_CPU | BPF_F_ALL_CPUS;
+ err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem_flags cpu|all_cpus"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, keys, values, flags);
+ if (!ASSERT_ERR(err, "bpf_map_update_elem cpu|all_cpus"))
+ goto out;
+
+ flags = BPF_F_ALL_CPUS;
+ err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem_flags all_cpus"))
+ goto out;
+
+ flags = BPF_F_LOCK | BPF_F_CPU;
+ err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem_flags BPF_F_LOCK"))
+ goto out;
+
+ flags = BPF_F_LOCK | BPF_F_ALL_CPUS;
+ err = bpf_map_update_elem(map_fd, keys, values, flags);
+ if (!ASSERT_ERR(err, "bpf_map_update_elem BPF_F_LOCK"))
+ goto out;
+
+ flags = (u64)nr_cpus << 32 | BPF_F_CPU;
+ err = bpf_map_update_elem(map_fd, keys, values, flags);
+ if (!ASSERT_EQ(err, -ERANGE, "bpf_map_update_elem -ERANGE"))
+ goto out;
+
+ err = bpf_map__update_elem(map, keys, key_sz, values, value_sz, flags);
+ if (!ASSERT_EQ(err, -ERANGE, "bpf_map__update_elem -ERANGE"))
+ goto out;
+
+ err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags);
+ if (!ASSERT_EQ(err, -ERANGE, "bpf_map_lookup_elem_flags -ERANGE"))
+ goto out;
+
+ err = bpf_map__lookup_elem(map, keys, key_sz, values, value_sz, flags);
+ if (!ASSERT_EQ(err, -ERANGE, "bpf_map__lookup_elem -ERANGE"))
+ goto out;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ /* clear value on all cpus */
+ values[0] = 0;
+ flags = BPF_F_ALL_CPUS;
+ for (i = 0; i < entries; i++) {
+ err = bpf_map__update_elem(map, keys + i * key_sz, key_sz, values,
+ value_sz, flags);
+ if (!ASSERT_OK(err, "bpf_map__update_elem all_cpus"))
+ goto out;
+ }
+
+ /* update value on specified cpu */
+ for (i = 0; i < entries; i++) {
+ values[0] = value;
+ flags = (u64)cpu << 32 | BPF_F_CPU;
+ err = bpf_map__update_elem(map, keys + i * key_sz, key_sz, values,
+ value_sz, flags);
+ if (!ASSERT_OK(err, "bpf_map__update_elem specified cpu"))
+ goto out;
+
+ /* lookup then check value on CPUs */
+ for (j = 0; j < nr_cpus; j++) {
+ flags = (u64)j << 32 | BPF_F_CPU;
+ err = bpf_map__lookup_elem(map, keys + i * key_sz, key_sz, values,
+ value_sz, flags);
+ if (!ASSERT_OK(err, "bpf_map__lookup_elem specified cpu"))
+ goto out;
+ if (!ASSERT_EQ(values[0], j != cpu ? 0 : value,
+ "bpf_map__lookup_elem value on specified cpu"))
+ goto out;
+ }
+ }
+ }
+
+ if (!test_batch)
+ goto out;
+
+ count = entries;
+ batch_opts.elem_flags = (u64)nr_cpus << 32 | BPF_F_CPU;
+ err = bpf_map_update_batch(map_fd, keys, values, &count, &batch_opts);
+ if (!ASSERT_EQ(err, -ERANGE, "bpf_map_update_batch -ERANGE"))
+ goto out;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ memset(values, 0, value_sz_total);
+
+ /* clear values across all CPUs */
+ count = entries;
+ batch_opts.elem_flags = BPF_F_ALL_CPUS;
+ err = bpf_map_update_batch(map_fd, keys, values, &count, &batch_opts);
+ if (!ASSERT_OK(err, "bpf_map_update_batch all_cpus"))
+ goto out;
+ if (!ASSERT_EQ(count, entries, "bpf_map_update_batch count"))
+ goto out;
+
+ /* update values on specified CPU */
+ for (i = 0; i < entries; i++)
+ values[i] = value;
+
+ count = entries;
+ batch_opts.elem_flags = (u64)cpu << 32 | BPF_F_CPU;
+ err = bpf_map_update_batch(map_fd, keys, values, &count, &batch_opts);
+ if (!ASSERT_OK(err, "bpf_map_update_batch specified cpu"))
+ goto out;
+ if (!ASSERT_EQ(count, entries, "bpf_map_update_batch count"))
+ goto out;
+
+ /* lookup values on specified CPU */
+ batch = 0;
+ count = entries;
+ memset(values, 0, entries * value_sz);
+ err = bpf_map_lookup_batch(map_fd, NULL, &batch, keys, values, &count, &batch_opts);
+ if (!ASSERT_TRUE(!err || err == -ENOENT, "bpf_map_lookup_batch specified cpu"))
+ goto out;
+ if (!ASSERT_EQ(count, entries, "bpf_map_lookup_batch count"))
+ goto out;
+
+ for (i = 0; i < entries; i++)
+ if (!ASSERT_EQ(values[i], value,
+ "bpf_map_lookup_batch value on specified cpu"))
+ goto out;
+
+ /* lookup values from all CPUs */
+ batch = 0;
+ count = entries;
+ batch_opts.elem_flags = 0;
+ memset(values_percpu, 0, roundup(value_sz, 8) * nr_cpus * entries);
+ err = bpf_map_lookup_batch(map_fd, NULL, &batch, keys, values_percpu, &count,
+ &batch_opts);
+ if (!ASSERT_TRUE(!err || err == -ENOENT, "bpf_map_lookup_batch all_cpus"))
+ goto out;
+ if (!ASSERT_EQ(count, entries, "bpf_map_lookup_batch count"))
+ goto out;
+
+ for (i = 0; i < entries; i++) {
+ values_row = (void *) values_percpu +
+ roundup(value_sz, 8) * i * nr_cpus;
+ for (j = 0; j < nr_cpus; j++) {
+ v = *(u32 *) (values_row + roundup(value_sz, 8) * j);
+ if (!ASSERT_EQ(v, j != cpu ? 0 : value,
+ "bpf_map_lookup_batch value all_cpus"))
+ goto out;
+ }
+ }
+ }
+
+out:
+ free(values_percpu);
+ free(values);
+}
+
+static void test_percpu_map_cpu_flag(enum bpf_map_type map_type)
+{
+ struct percpu_alloc_array *skel;
+ size_t key_sz = sizeof(int);
+ int *keys, nr_cpus, i, err;
+ struct bpf_map *map;
+ u32 max_entries;
+
+ nr_cpus = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(nr_cpus, 0, "libbpf_num_possible_cpus"))
+ return;
+
+ max_entries = nr_cpus * 2;
+ keys = calloc(max_entries, key_sz);
+ if (!ASSERT_OK_PTR(keys, "calloc keys"))
+ return;
+
+ for (i = 0; i < max_entries; i++)
+ keys[i] = i;
+
+ skel = percpu_alloc_array__open();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open")) {
+ free(keys);
+ return;
+ }
+
+ map = skel->maps.percpu;
+ bpf_map__set_type(map, map_type);
+ bpf_map__set_max_entries(map, max_entries);
+
+ err = percpu_alloc_array__load(skel);
+ if (!ASSERT_OK(err, "test_percpu_alloc__load"))
+ goto out;
+
+ test_percpu_map_op_cpu_flag(map, keys, key_sz, nr_cpus, nr_cpus, true);
+out:
+ percpu_alloc_array__destroy(skel);
+ free(keys);
+}
+
+static void test_percpu_array_cpu_flag(void)
+{
+ test_percpu_map_cpu_flag(BPF_MAP_TYPE_PERCPU_ARRAY);
+}
+
+static void test_percpu_hash_cpu_flag(void)
+{
+ test_percpu_map_cpu_flag(BPF_MAP_TYPE_PERCPU_HASH);
+}
+
+static void test_lru_percpu_hash_cpu_flag(void)
+{
+ test_percpu_map_cpu_flag(BPF_MAP_TYPE_LRU_PERCPU_HASH);
+}
+
+static void test_percpu_cgroup_storage_cpu_flag(void)
+{
+ struct percpu_alloc_array *skel = NULL;
+ struct bpf_cgroup_storage_key key;
+ int cgroup, prog_fd, nr_cpus, err;
+ struct bpf_map *map;
+
+ nr_cpus = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(nr_cpus, 0, "libbpf_num_possible_cpus"))
+ return;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ return;
+
+ cgroup = create_and_get_cgroup("/cg_percpu");
+ if (!ASSERT_GE(cgroup, 0, "create_and_get_cgroup")) {
+ cleanup_cgroup_environment();
+ return;
+ }
+
+ err = join_cgroup("/cg_percpu");
+ if (!ASSERT_OK(err, "join_cgroup"))
+ goto out;
+
+ skel = percpu_alloc_array__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open_and_load"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.cgroup_egress);
+ err = bpf_prog_attach(prog_fd, cgroup, BPF_CGROUP_INET_EGRESS, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ map = skel->maps.percpu_cgroup_storage;
+ err = bpf_map_get_next_key(bpf_map__fd(map), NULL, &key);
+ if (!ASSERT_OK(err, "bpf_map_get_next_key"))
+ goto out;
+
+ test_percpu_map_op_cpu_flag(map, &key, sizeof(key), 1, nr_cpus, false);
+out:
+ bpf_prog_detach2(-1, cgroup, BPF_CGROUP_INET_EGRESS);
+ close(cgroup);
+ cleanup_cgroup_environment();
+ percpu_alloc_array__destroy(skel);
+}
+
+static void test_map_op_cpu_flag(enum bpf_map_type map_type)
+{
+ u32 max_entries = 1, count = max_entries;
+ u64 flags, batch = 0, val = 0;
+ int err, map_fd, key = 0;
+ LIBBPF_OPTS(bpf_map_batch_opts, batch_opts);
+
+ map_fd = bpf_map_create(map_type, "test_cpu_flag", sizeof(int), sizeof(u64), max_entries,
+ NULL);
+ if (!ASSERT_GE(map_fd, 0, "bpf_map_create"))
+ return;
+
+ flags = BPF_F_ALL_CPUS;
+ err = bpf_map_update_elem(map_fd, &key, &val, flags);
+ ASSERT_ERR(err, "bpf_map_update_elem all_cpus");
+
+ batch_opts.elem_flags = BPF_F_ALL_CPUS;
+ err = bpf_map_update_batch(map_fd, &key, &val, &count, &batch_opts);
+ ASSERT_ERR(err, "bpf_map_update_batch all_cpus");
+
+ flags = BPF_F_CPU;
+ err = bpf_map_lookup_elem_flags(map_fd, &key, &val, flags);
+ ASSERT_ERR(err, "bpf_map_lookup_elem_flags cpu");
+
+ batch_opts.elem_flags = BPF_F_CPU;
+ err = bpf_map_lookup_batch(map_fd, NULL, &batch, &key, &val, &count, &batch_opts);
+ ASSERT_ERR(err, "bpf_map_lookup_batch cpu");
+
+ close(map_fd);
+}
+
+static void test_array_cpu_flag(void)
+{
+ test_map_op_cpu_flag(BPF_MAP_TYPE_ARRAY);
+}
+
+static void test_hash_cpu_flag(void)
+{
+ test_map_op_cpu_flag(BPF_MAP_TYPE_HASH);
+}
+
void test_percpu_alloc(void)
{
if (test__start_subtest("array"))
@@ -125,4 +448,16 @@ void test_percpu_alloc(void)
test_cgrp_local_storage();
if (test__start_subtest("failure_tests"))
test_failure();
+ if (test__start_subtest("cpu_flag_percpu_array"))
+ test_percpu_array_cpu_flag();
+ if (test__start_subtest("cpu_flag_percpu_hash"))
+ test_percpu_hash_cpu_flag();
+ if (test__start_subtest("cpu_flag_lru_percpu_hash"))
+ test_lru_percpu_hash_cpu_flag();
+ if (test__start_subtest("cpu_flag_percpu_cgroup_storage"))
+ test_percpu_cgroup_storage_cpu_flag();
+ if (test__start_subtest("cpu_flag_array"))
+ test_array_cpu_flag();
+ if (test__start_subtest("cpu_flag_hash"))
+ test_hash_cpu_flag();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index 51544372f52e..41dfaaabb73f 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -101,9 +101,9 @@ static int resolve_symbols(void)
int type_id;
__u32 nr;
- btf = btf__parse_elf("btf_data.bpf.o", NULL);
+ btf = btf__parse_raw("resolve_btfids.test.o.BTF");
if (CHECK(libbpf_get_error(btf), "resolve",
- "Failed to load BTF from btf_data.bpf.o\n"))
+ "Failed to load BTF from resolve_btfids.test.o.BTF\n"))
return -1;
nr = btf__type_cnt(btf);
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
index e4940583924b..e2c867fd5244 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
@@ -5,9 +5,14 @@
#include "sk_bypass_prot_mem.skel.h"
#include "network_helpers.h"
+#ifndef PAGE_SIZE
+#include <unistd.h>
+#define PAGE_SIZE getpagesize()
+#endif
+
#define NR_PAGES 32
#define NR_SOCKETS 2
-#define BUF_TOTAL (NR_PAGES * 4096 / NR_SOCKETS)
+#define BUF_TOTAL (NR_PAGES * PAGE_SIZE / NR_SOCKETS)
#define BUF_SINGLE 1024
#define NR_SEND (BUF_TOTAL / BUF_SINGLE)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 1e3e4392dcca..256707e7d20d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Cloudflare
#include <error.h>
-#include <netinet/tcp.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
#include <sys/epoll.h>
#include "test_progs.h"
@@ -22,6 +23,15 @@
#define TCP_REPAIR_ON 1
#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */
+/**
+ * SOL_TCP is defined in <netinet/tcp.h> (glibc), but the copybuf_address
+ * field of tcp_zerocopy_receive is not yet included in older versions.
+ * This workaround remains necessary until the glibc update propagates.
+ */
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
static int connected_socket_v4(void)
{
struct sockaddr_in addr = {
@@ -536,13 +546,14 @@ out:
}
-static void test_sockmap_skb_verdict_fionread(bool pass_prog)
+static void do_test_sockmap_skb_verdict_fionread(int sotype, bool pass_prog)
{
int err, map, verdict, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
int expected, zero = 0, sent, recvd, avail;
struct test_sockmap_pass_prog *pass = NULL;
struct test_sockmap_drop_prog *drop = NULL;
char buf[256] = "0123456789";
+ int split_len = sizeof(buf) / 2;
if (pass_prog) {
pass = test_sockmap_pass_prog__open_and_load();
@@ -550,7 +561,10 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
return;
verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
map = bpf_map__fd(pass->maps.sock_map_rx);
- expected = sizeof(buf);
+ if (sotype == SOCK_DGRAM)
+ expected = split_len; /* FIONREAD for UDP is different from TCP */
+ else
+ expected = sizeof(buf);
} else {
drop = test_sockmap_drop_prog__open_and_load();
if (!ASSERT_OK_PTR(drop, "open_and_load"))
@@ -566,7 +580,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
- err = create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
+ err = create_socket_pairs(AF_INET, sotype, &c0, &c1, &p0, &p1);
if (!ASSERT_OK(err, "create_socket_pairs()"))
goto out;
@@ -574,8 +588,9 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
goto out_close;
- sent = xsend(p1, &buf, sizeof(buf), 0);
- ASSERT_EQ(sent, sizeof(buf), "xsend(p0)");
+ sent = xsend(p1, &buf, split_len, 0);
+ sent += xsend(p1, &buf, sizeof(buf) - split_len, 0);
+ ASSERT_EQ(sent, sizeof(buf), "xsend(p1)");
err = ioctl(c1, FIONREAD, &avail);
ASSERT_OK(err, "ioctl(FIONREAD) error");
ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
@@ -597,6 +612,12 @@ out:
test_sockmap_drop_prog__destroy(drop);
}
+static void test_sockmap_skb_verdict_fionread(bool pass_prog)
+{
+ do_test_sockmap_skb_verdict_fionread(SOCK_STREAM, pass_prog);
+ do_test_sockmap_skb_verdict_fionread(SOCK_DGRAM, pass_prog);
+}
+
static void test_sockmap_skb_verdict_change_tail(void)
{
struct test_sockmap_change_tail *skel;
@@ -1042,6 +1063,257 @@ close_map:
xclose(map);
}
+/* it is used to reproduce WARNING */
+static void test_sockmap_zc(void)
+{
+ int map, err, sent, recvd, zero = 0, one = 1, on = 1;
+ char buf[10] = "0123456789", rcv[11], addr[100];
+ struct test_sockmap_pass_prog *skel = NULL;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ struct tcp_zerocopy_receive zc;
+ socklen_t zc_len = sizeof(zc);
+ struct bpf_program *prog;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ if (create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1))
+ goto end;
+
+ prog = skel->progs.prog_skb_verdict_ingress;
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto end;
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto end;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto end;
+
+ sent = xsend(c0, buf, sizeof(buf), 0);
+ if (!ASSERT_EQ(sent, sizeof(buf), "xsend"))
+ goto end;
+
+ /* trigger tcp_bpf_recvmsg_parser and inc copied_seq of p1 */
+ recvd = recv_timeout(p1, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, sent, "recv_timeout(p1)"))
+ goto end;
+
+ /* uninstall sockmap of p1 */
+ bpf_map_delete_elem(map, &one);
+
+ /* trigger tcp stack and the rcv_nxt of p1 is less than copied_seq */
+ sent = xsend(c1, buf, sizeof(buf) - 1, 0);
+ if (!ASSERT_EQ(sent, sizeof(buf) - 1, "xsend"))
+ goto end;
+
+ err = setsockopt(p1, SOL_SOCKET, SO_ZEROCOPY, &on, sizeof(on));
+ if (!ASSERT_OK(err, "setsockopt"))
+ goto end;
+
+ memset(&zc, 0, sizeof(zc));
+ zc.copybuf_address = (__u64)((unsigned long)addr);
+ zc.copybuf_len = sizeof(addr);
+
+ err = getsockopt(p1, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, &zc, &zc_len);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto end;
+
+end:
+ if (c0 >= 0)
+ close(c0);
+ if (p0 >= 0)
+ close(p0);
+ if (c1 >= 0)
+ close(c1);
+ if (p1 >= 0)
+ close(p1);
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+/* it is used to check whether copied_seq of sk is correct */
+static void test_sockmap_copied_seq(bool strp)
+{
+ int i, map, err, sent, recvd, zero = 0, one = 1;
+ struct test_sockmap_pass_prog *skel = NULL;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ char buf[10] = "0123456789", rcv[11];
+ struct bpf_program *prog;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ if (create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1))
+ goto end;
+
+ prog = skel->progs.prog_skb_verdict_ingress;
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach verdict"))
+ goto end;
+
+ if (strp) {
+ prog = skel->progs.prog_skb_verdict_ingress_strp;
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach parser"))
+ goto end;
+ }
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+ goto end;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+ goto end;
+
+ /* just trigger sockamp: data sent by c0 will be received by p1 */
+ sent = xsend(c0, buf, sizeof(buf), 0);
+ if (!ASSERT_EQ(sent, sizeof(buf), "xsend(c0), bpf"))
+ goto end;
+
+ /* do partial read */
+ recvd = recv_timeout(p1, rcv, 1, MSG_DONTWAIT, 1);
+ recvd += recv_timeout(p1, rcv + 1, sizeof(rcv) - 1, MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, sent, "recv_timeout(p1), bpf") ||
+ !ASSERT_OK(memcmp(buf, rcv, recvd), "data mismatch"))
+ goto end;
+
+ /* uninstall sockmap of p1 and p0 */
+ err = bpf_map_delete_elem(map, &one);
+ if (!ASSERT_OK(err, "bpf_map_delete_elem(1)"))
+ goto end;
+
+ err = bpf_map_delete_elem(map, &zero);
+ if (!ASSERT_OK(err, "bpf_map_delete_elem(0)"))
+ goto end;
+
+ /* now all sockets become plain socket, they should still work */
+ for (i = 0; i < 5; i++) {
+ /* test copied_seq of p1 by running tcp native stack */
+ sent = xsend(c1, buf, sizeof(buf), 0);
+ if (!ASSERT_EQ(sent, sizeof(buf), "xsend(c1), native"))
+ goto end;
+
+ recvd = recv(p1, rcv, sizeof(rcv), MSG_DONTWAIT);
+ if (!ASSERT_EQ(recvd, sent, "recv_timeout(p1), native"))
+ goto end;
+
+ /* p0 previously redirected skb to p1, we also check copied_seq of p0 */
+ sent = xsend(c0, buf, sizeof(buf), 0);
+ if (!ASSERT_EQ(sent, sizeof(buf), "xsend(c0), native"))
+ goto end;
+
+ recvd = recv(p0, rcv, sizeof(rcv), MSG_DONTWAIT);
+ if (!ASSERT_EQ(recvd, sent, "recv_timeout(p0), native"))
+ goto end;
+ }
+
+end:
+ if (c0 >= 0)
+ close(c0);
+ if (p0 >= 0)
+ close(p0);
+ if (c1 >= 0)
+ close(c1);
+ if (p1 >= 0)
+ close(p1);
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+/* Wait until FIONREAD returns the expected value or timeout */
+static int wait_for_fionread(int fd, int expected, unsigned int timeout_ms)
+{
+ unsigned int elapsed = 0;
+ int avail = 0;
+
+ while (elapsed < timeout_ms) {
+ if (ioctl(fd, FIONREAD, &avail) < 0)
+ return -errno;
+ if (avail >= expected)
+ return avail;
+ usleep(1000);
+ elapsed++;
+ }
+ return avail;
+}
+
+/* it is used to send data to via native stack and BPF redirecting */
+static void test_sockmap_multi_channels(int sotype)
+{
+ int map, err, sent, recvd, zero = 0, one = 1, avail = 0, expected;
+ struct test_sockmap_pass_prog *skel = NULL;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ char buf[10] = "0123456789", rcv[11];
+ struct bpf_program *prog;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ err = create_socket_pairs(AF_INET, sotype, &c0, &c1, &p0, &p1);
+ if (err)
+ goto end;
+
+ prog = skel->progs.prog_skb_verdict_ingress;
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach verdict"))
+ goto end;
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+ goto end;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto end;
+
+ /* send data to p1 via native stack */
+ sent = xsend(c1, buf, 2, 0);
+ if (!ASSERT_EQ(sent, 2, "xsend(2)"))
+ goto end;
+
+ avail = wait_for_fionread(p1, 2, IO_TIMEOUT_SEC);
+ ASSERT_EQ(avail, 2, "ioctl(FIONREAD) partial return");
+
+ /* send data to p1 via bpf redirecting */
+ sent = xsend(c0, buf + 2, sizeof(buf) - 2, 0);
+ if (!ASSERT_EQ(sent, sizeof(buf) - 2, "xsend(remain-data)"))
+ goto end;
+
+ /* Poll FIONREAD until expected bytes arrive, poll_read() is unreliable
+ * here since it may return immediately if prior data is already queued.
+ */
+ expected = sotype == SOCK_DGRAM ? 2 : sizeof(buf);
+ avail = wait_for_fionread(p1, expected, IO_TIMEOUT_SEC);
+ ASSERT_EQ(avail, expected, "ioctl(FIONREAD) full return");
+
+ recvd = recv_timeout(p1, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(p1)") ||
+ !ASSERT_OK(memcmp(buf, rcv, recvd), "data mismatch"))
+ goto end;
+end:
+ if (c0 >= 0)
+ close(c0);
+ if (p0 >= 0)
+ close(p0);
+ if (c1 >= 0)
+ close(c1);
+ if (p1 >= 0)
+ close(p1);
+ test_sockmap_pass_prog__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -1108,4 +1380,14 @@ void test_sockmap_basic(void)
test_sockmap_skb_verdict_vsock_poll();
if (test__start_subtest("sockmap vsock unconnected"))
test_sockmap_vsock_unconnected();
+ if (test__start_subtest("sockmap with zc"))
+ test_sockmap_zc();
+ if (test__start_subtest("sockmap recover"))
+ test_sockmap_copied_seq(false);
+ if (test__start_subtest("sockmap recover with strp"))
+ test_sockmap_copied_seq(true);
+ if (test__start_subtest("sockmap tcp multi channels"))
+ test_sockmap_multi_channels(SOCK_STREAM);
+ if (test__start_subtest("sockmap udp multi channels"))
+ test_sockmap_multi_channels(SOCK_DGRAM);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
index c9efdd2a5b18..da42b00e3d1f 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
@@ -74,11 +74,20 @@ static void test_stacktrace_ips_kprobe_multi(bool retprobe)
load_kallsyms();
- check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
- ksym_get_addr("bpf_testmod_stacktrace_test_3"),
- ksym_get_addr("bpf_testmod_stacktrace_test_2"),
- ksym_get_addr("bpf_testmod_stacktrace_test_1"),
- ksym_get_addr("bpf_testmod_test_read"));
+ if (retprobe) {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ } else {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5,
+ ksym_get_addr("bpf_testmod_stacktrace_test"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ }
cleanup:
stacktrace_ips__destroy(skel);
@@ -128,6 +137,99 @@ cleanup:
stacktrace_ips__destroy(skel);
}
+static void test_stacktrace_ips_kprobe(bool retprobe)
+{
+ LIBBPF_OPTS(bpf_kprobe_opts, opts,
+ .retprobe = retprobe
+ );
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct stacktrace_ips *skel;
+
+ skel = stacktrace_ips__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+ return;
+
+ if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.kprobe_test = bpf_program__attach_kprobe_opts(
+ skel->progs.kprobe_test,
+ "bpf_testmod_stacktrace_test", &opts);
+ if (!ASSERT_OK_PTR(skel->links.kprobe_test, "bpf_program__attach_kprobe_opts"))
+ goto cleanup;
+
+ trigger_module_test_read(1);
+
+ load_kallsyms();
+
+ if (retprobe) {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ } else {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5,
+ ksym_get_addr("bpf_testmod_stacktrace_test"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ }
+
+cleanup:
+ stacktrace_ips__destroy(skel);
+}
+
+static void test_stacktrace_ips_trampoline(bool retprobe)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct stacktrace_ips *skel;
+
+ skel = stacktrace_ips__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+ return;
+
+ if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+ test__skip();
+ goto cleanup;
+ }
+
+ if (retprobe) {
+ skel->links.fexit_test = bpf_program__attach_trace(skel->progs.fexit_test);
+ if (!ASSERT_OK_PTR(skel->links.fexit_test, "bpf_program__attach_trace"))
+ goto cleanup;
+ } else {
+ skel->links.fentry_test = bpf_program__attach_trace(skel->progs.fentry_test);
+ if (!ASSERT_OK_PTR(skel->links.fentry_test, "bpf_program__attach_trace"))
+ goto cleanup;
+ }
+
+ trigger_module_test_read(1);
+
+ load_kallsyms();
+
+ if (retprobe) {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ } else {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5,
+ ksym_get_addr("bpf_testmod_stacktrace_test"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ }
+
+cleanup:
+ stacktrace_ips__destroy(skel);
+}
+
static void __test_stacktrace_ips(void)
{
if (test__start_subtest("kprobe_multi"))
@@ -136,6 +238,14 @@ static void __test_stacktrace_ips(void)
test_stacktrace_ips_kprobe_multi(true);
if (test__start_subtest("raw_tp"))
test_stacktrace_ips_raw_tp();
+ if (test__start_subtest("kprobe"))
+ test_stacktrace_ips_kprobe(false);
+ if (test__start_subtest("kretprobe"))
+ test_stacktrace_ips_kprobe(true);
+ if (test__start_subtest("fentry"))
+ test_stacktrace_ips_trampoline(false);
+ if (test__start_subtest("fexit"))
+ test_stacktrace_ips_trampoline(true);
}
#else
static void __test_stacktrace_ips(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
index 0f3bf594e7a5..300032a19445 100644
--- a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
+++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
@@ -9,6 +9,7 @@
static const char * const test_cases[] = {
"strcmp",
"strcasecmp",
+ "strncasecmp",
"strchr",
"strchrnul",
"strnchr",
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 0ab36503c3b2..7d534fde0af9 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -8,6 +8,7 @@
#include "tailcall_freplace.skel.h"
#include "tc_bpf2bpf.skel.h"
#include "tailcall_fail.skel.h"
+#include "tailcall_sleepable.skel.h"
/* test_tailcall_1 checks basic functionality by patching multiple locations
* in a single program for a single tail call slot with nop->jmp, jmp->nop
@@ -1653,6 +1654,77 @@ static void test_tailcall_failure()
RUN_TESTS(tailcall_fail);
}
+noinline void uprobe_sleepable_trigger(void)
+{
+ asm volatile ("");
+}
+
+static void test_tailcall_sleepable(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct tailcall_sleepable *skel;
+ int prog_fd, map_fd;
+ int err, key;
+
+ skel = tailcall_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "tailcall_sleepable__open"))
+ return;
+
+ /*
+ * Test that we can't load uprobe_normal and uprobe_sleepable_1,
+ * because they share tailcall map.
+ */
+ bpf_program__set_autoload(skel->progs.uprobe_normal, true);
+ bpf_program__set_autoload(skel->progs.uprobe_sleepable_1, true);
+
+ err = tailcall_sleepable__load(skel);
+ if (!ASSERT_ERR(err, "tailcall_sleepable__load"))
+ goto out;
+
+ tailcall_sleepable__destroy(skel);
+
+ /*
+ * Test that we can tail call from sleepable to sleepable program.
+ */
+ skel = tailcall_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "tailcall_sleepable__open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.uprobe_sleepable_1, true);
+ bpf_program__set_autoload(skel->progs.uprobe_sleepable_2, true);
+
+ err = tailcall_sleepable__load(skel);
+ if (!ASSERT_OK(err, "tailcall_sleepable__load"))
+ goto out;
+
+ /* Add sleepable uprobe_sleepable_2 to jmp_table[0]. */
+ key = 0;
+ prog_fd = bpf_program__fd(skel->progs.uprobe_sleepable_2);
+ map_fd = bpf_map__fd(skel->maps.jmp_table);
+ err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ skel->bss->my_pid = getpid();
+
+ /* Attach uprobe_sleepable_1 to uprobe_sleepable_trigger and hit it. */
+ opts.func_name = "uprobe_sleepable_trigger";
+ skel->links.uprobe_sleepable_1 = bpf_program__attach_uprobe_opts(
+ skel->progs.uprobe_sleepable_1,
+ -1,
+ "/proc/self/exe",
+ 0 /* offset */,
+ &opts);
+ if (!ASSERT_OK_PTR(skel->links.uprobe_sleepable_1, "bpf_program__attach_uprobe_opts"))
+ goto out;
+
+ uprobe_sleepable_trigger();
+ ASSERT_EQ(skel->bss->executed, 1, "executed");
+
+out:
+ tailcall_sleepable__destroy(skel);
+}
+
void test_tailcalls(void)
{
if (test__start_subtest("tailcall_1"))
@@ -1707,4 +1779,6 @@ void test_tailcalls(void)
test_tailcall_bpf2bpf_freplace();
if (test__start_subtest("tailcall_failure"))
test_tailcall_failure();
+ if (test__start_subtest("tailcall_sleepable"))
+ test_tailcall_sleepable();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
index 2de38776a2d4..0f86b9275cf9 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_data.h
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
@@ -94,7 +94,7 @@ struct tld_metadata {
};
struct tld_meta_u {
- _Atomic __u8 cnt;
+ _Atomic __u16 cnt;
__u16 size;
struct tld_metadata metadata[];
};
@@ -217,7 +217,7 @@ out:
static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data)
{
int err, i, sz, off = 0;
- __u8 cnt;
+ __u16 cnt;
if (!TLD_READ_ONCE(tld_meta_p)) {
err = __tld_init_meta_p();
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index 42e822ea352f..7bee33797c71 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -112,24 +112,24 @@ static void test_recursion(void)
task_ls_recursion__detach(skel);
/* Refer to the comment in BPF_PROG(on_update) for
- * the explanation on the value 201 and 100.
+ * the explanation on the value 200 and 1.
*/
map_fd = bpf_map__fd(skel->maps.map_a);
err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
ASSERT_OK(err, "lookup map_a");
- ASSERT_EQ(value, 201, "map_a value");
- ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy");
+ ASSERT_EQ(value, 200, "map_a value");
+ ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy");
map_fd = bpf_map__fd(skel->maps.map_b);
err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
ASSERT_OK(err, "lookup map_b");
- ASSERT_EQ(value, 100, "map_b value");
+ ASSERT_EQ(value, 1, "map_b value");
prog_fd = bpf_program__fd(skel->progs.on_update);
memset(&info, 0, sizeof(info));
err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
ASSERT_OK(err, "get prog info");
- ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion");
+ ASSERT_EQ(info.recursion_misses, 2, "on_update prog recursion");
prog_fd = bpf_program__fd(skel->progs.on_enter);
memset(&info, 0, sizeof(info));
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_assoc.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_assoc.c
new file mode 100644
index 000000000000..461ded722351
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_assoc.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_assoc.skel.h"
+#include "struct_ops_assoc_reuse.skel.h"
+#include "struct_ops_assoc_in_timer.skel.h"
+
+static void test_st_ops_assoc(void)
+{
+ struct struct_ops_assoc *skel = NULL;
+ int err, pid;
+
+ skel = struct_ops_assoc__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_assoc__open"))
+ goto out;
+
+ /* cannot explicitly associate struct_ops program */
+ err = bpf_program__assoc_struct_ops(skel->progs.test_1_a,
+ skel->maps.st_ops_map_a, NULL);
+ ASSERT_ERR(err, "bpf_program__assoc_struct_ops(test_1_a, st_ops_map_a)");
+
+ err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog_a,
+ skel->maps.st_ops_map_a, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops(syscall_prog_a, st_ops_map_a)");
+
+ err = bpf_program__assoc_struct_ops(skel->progs.sys_enter_prog_a,
+ skel->maps.st_ops_map_a, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops(sys_enter_prog_a, st_ops_map_a)");
+
+ err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog_b,
+ skel->maps.st_ops_map_b, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops(syscall_prog_b, st_ops_map_b)");
+
+ err = bpf_program__assoc_struct_ops(skel->progs.sys_enter_prog_b,
+ skel->maps.st_ops_map_b, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops(sys_enter_prog_b, st_ops_map_b)");
+
+ /* sys_enter_prog_a already associated with map_a */
+ err = bpf_program__assoc_struct_ops(skel->progs.sys_enter_prog_a,
+ skel->maps.st_ops_map_b, NULL);
+ ASSERT_ERR(err, "bpf_program__assoc_struct_ops(sys_enter_prog_a, st_ops_map_b)");
+
+ err = struct_ops_assoc__attach(skel);
+ if (!ASSERT_OK(err, "struct_ops_assoc__attach"))
+ goto out;
+
+ /* run tracing prog that calls .test_1 and checks return */
+ pid = getpid();
+ skel->bss->test_pid = pid;
+ sys_gettid();
+ skel->bss->test_pid = 0;
+
+ ASSERT_EQ(skel->bss->test_err_a, 0, "skel->bss->test_err_a");
+ ASSERT_EQ(skel->bss->test_err_b, 0, "skel->bss->test_err_b");
+
+ /* run syscall_prog that calls .test_1 and checks return */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog_a), NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog_b), NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ ASSERT_EQ(skel->bss->test_err_a, 0, "skel->bss->test_err_a");
+ ASSERT_EQ(skel->bss->test_err_b, 0, "skel->bss->test_err_b");
+
+out:
+ struct_ops_assoc__destroy(skel);
+}
+
+static void test_st_ops_assoc_reuse(void)
+{
+ struct struct_ops_assoc_reuse *skel = NULL;
+ int err;
+
+ skel = struct_ops_assoc_reuse__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_assoc_reuse__open"))
+ goto out;
+
+ err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog_a,
+ skel->maps.st_ops_map_a, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops(syscall_prog_a, st_ops_map_a)");
+
+ err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog_b,
+ skel->maps.st_ops_map_b, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops(syscall_prog_b, st_ops_map_b)");
+
+ err = struct_ops_assoc_reuse__attach(skel);
+ if (!ASSERT_OK(err, "struct_ops_assoc__attach"))
+ goto out;
+
+ /* run syscall_prog that calls .test_1 and checks return */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog_a), NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog_b), NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ ASSERT_EQ(skel->bss->test_err_a, 0, "skel->bss->test_err_a");
+ ASSERT_EQ(skel->bss->test_err_b, 0, "skel->bss->test_err_b");
+
+out:
+ struct_ops_assoc_reuse__destroy(skel);
+}
+
+static void test_st_ops_assoc_in_timer(void)
+{
+ struct struct_ops_assoc_in_timer *skel = NULL;
+ int err;
+
+ skel = struct_ops_assoc_in_timer__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_assoc_in_timer__open"))
+ goto out;
+
+ err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog,
+ skel->maps.st_ops_map, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops");
+
+ err = struct_ops_assoc_in_timer__attach(skel);
+ if (!ASSERT_OK(err, "struct_ops_assoc__attach"))
+ goto out;
+
+ /*
+ * Run .test_1 by calling kfunc bpf_kfunc_multi_st_ops_test_1_prog_arg() and checks
+ * the return value. .test_1 will also schedule timer_cb that runs .test_1 again
+ * immediately.
+ */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog), NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ /* Check the return of the kfunc after timer_cb runs */
+ while (!READ_ONCE(skel->bss->timer_cb_run))
+ sched_yield();
+ ASSERT_EQ(skel->bss->timer_test_1_ret, 1234, "skel->bss->timer_test_1_ret");
+ ASSERT_EQ(skel->bss->test_err, 0, "skel->bss->test_err_a");
+out:
+ struct_ops_assoc_in_timer__destroy(skel);
+}
+
+static void test_st_ops_assoc_in_timer_no_uref(void)
+{
+ struct struct_ops_assoc_in_timer *skel = NULL;
+ struct bpf_link *link;
+ int err;
+
+ skel = struct_ops_assoc_in_timer__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_assoc_in_timer__open"))
+ goto out;
+
+ err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog,
+ skel->maps.st_ops_map, NULL);
+ ASSERT_OK(err, "bpf_program__assoc_struct_ops");
+
+ link = bpf_map__attach_struct_ops(skel->maps.st_ops_map);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops"))
+ goto out;
+
+ /*
+ * Run .test_1 by calling kfunc bpf_kfunc_multi_st_ops_test_1_prog_arg() and checks
+ * the return value. .test_1 will also schedule timer_cb that runs .test_1 again.
+ * timer_cb will run 500ms after syscall_prog runs, when the user space no longer
+ * holds a reference to st_ops_map.
+ */
+ skel->bss->timer_ns = 500000000;
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog), NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ /* Detach and close struct_ops map to cause it to be freed */
+ bpf_link__destroy(link);
+ close(bpf_program__fd(skel->progs.syscall_prog));
+ close(bpf_map__fd(skel->maps.st_ops_map));
+
+ /* Check the return of the kfunc after timer_cb runs */
+ while (!READ_ONCE(skel->bss->timer_cb_run))
+ sched_yield();
+ ASSERT_EQ(skel->bss->timer_test_1_ret, -1, "skel->bss->timer_test_1_ret");
+ ASSERT_EQ(skel->bss->test_err, 0, "skel->bss->test_err_a");
+out:
+ struct_ops_assoc_in_timer__destroy(skel);
+}
+
+void test_struct_ops_assoc(void)
+{
+ if (test__start_subtest("st_ops_assoc"))
+ test_st_ops_assoc();
+ if (test__start_subtest("st_ops_assoc_reuse"))
+ test_st_ops_assoc_reuse();
+ if (test__start_subtest("st_ops_assoc_in_timer"))
+ test_st_ops_assoc_in_timer();
+ if (test__start_subtest("st_ops_assoc_in_timer_no_uref"))
+ test_st_ops_assoc_in_timer_no_uref();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
index 9fd6306b455c..9556ad3d986f 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
@@ -4,7 +4,7 @@
#include <test_progs.h>
#define TLD_FREE_DATA_ON_THREAD_EXIT
-#define TLD_DYN_DATA_SIZE 4096
+#define TLD_DYN_DATA_SIZE (getpagesize() - 8)
#include "task_local_data.h"
struct test_tld_struct {
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
index 5af28f359cfd..bab4a31621c7 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xsk.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
@@ -433,7 +433,7 @@ static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pk
}
/* Search for the end of the packet in verbatim mode */
- if (!pkt_continues(pkt->options))
+ if (!pkt_continues(pkt->options) || !pkt->valid)
return nb_frags;
next_frag = pkt_stream->current_pkt_nb;
@@ -1090,6 +1090,8 @@ static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
xsk_ring_prod__cancel(&umem->fq, nb_frags);
}
frags_processed -= nb_frags;
+ pkt_stream_cancel(pkt_stream);
+ pkts_sent--;
}
if (ifobj->use_fill_ring)
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index 34f9ccce2602..09ff21e1ad2f 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -1,12 +1,27 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
+#include <sched.h>
#include <test_progs.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
#include "timer.skel.h"
#include "timer_failure.skel.h"
#include "timer_interrupt.skel.h"
#define NUM_THR 8
+static int perf_event_open(__u32 type, __u64 config, int pid, int cpu)
+{
+ struct perf_event_attr attr = {
+ .type = type,
+ .config = config,
+ .size = sizeof(struct perf_event_attr),
+ .sample_period = 10000,
+ };
+
+ return syscall(__NR_perf_event_open, &attr, pid, cpu, -1, 0);
+}
+
static void *spin_lock_thread(void *arg)
{
int i, err, prog_fd = *(int *)arg;
@@ -22,13 +37,174 @@ static void *spin_lock_thread(void *arg)
pthread_exit(arg);
}
-static int timer(struct timer *timer_skel)
+
+static int timer_stress_runner(struct timer *timer_skel, bool async_cancel)
{
- int i, err, prog_fd;
+ int i, err = 1, prog_fd;
LIBBPF_OPTS(bpf_test_run_opts, topts);
pthread_t thread_id[NUM_THR];
void *ret;
+ timer_skel->bss->async_cancel = async_cancel;
+ prog_fd = bpf_program__fd(timer_skel->progs.race);
+ for (i = 0; i < NUM_THR; i++) {
+ err = pthread_create(&thread_id[i], NULL,
+ &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ break;
+ }
+
+ while (i) {
+ err = pthread_join(thread_id[--i], &ret);
+ if (ASSERT_OK(err, "pthread_join"))
+ ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join");
+ }
+ return err;
+}
+
+static int timer_stress(struct timer *timer_skel)
+{
+ return timer_stress_runner(timer_skel, false);
+}
+
+static int timer_stress_async_cancel(struct timer *timer_skel)
+{
+ return timer_stress_runner(timer_skel, true);
+}
+
+static void *nmi_cpu_worker(void *arg)
+{
+ volatile __u64 num = 1;
+ int i;
+
+ for (i = 0; i < 500000000; ++i)
+ num *= (i % 7) + 1;
+ (void)num;
+
+ return NULL;
+}
+
+static int run_nmi_test(struct timer *timer_skel, struct bpf_program *prog)
+{
+ struct bpf_link *link = NULL;
+ int pe_fd = -1, pipefd[2] = {-1, -1}, pid = 0, status;
+ char buf = 0;
+ int ret = -1;
+
+ if (!ASSERT_OK(pipe(pipefd), "pipe"))
+ goto cleanup;
+
+ pid = fork();
+ if (pid == 0) {
+ /* Child: spawn multiple threads to consume multiple CPUs */
+ pthread_t threads[NUM_THR];
+ int i;
+
+ close(pipefd[1]);
+ read(pipefd[0], &buf, 1);
+ close(pipefd[0]);
+
+ for (i = 0; i < NUM_THR; i++)
+ pthread_create(&threads[i], NULL, nmi_cpu_worker, NULL);
+ for (i = 0; i < NUM_THR; i++)
+ pthread_join(threads[i], NULL);
+ exit(0);
+ }
+
+ if (!ASSERT_GE(pid, 0, "fork"))
+ goto cleanup;
+
+ /* Open perf event for child process across all CPUs */
+ pe_fd = perf_event_open(PERF_TYPE_HARDWARE,
+ PERF_COUNT_HW_CPU_CYCLES,
+ pid, /* measure child process */
+ -1); /* on any CPU */
+ if (pe_fd < 0) {
+ if (errno == ENOENT || errno == EOPNOTSUPP) {
+ printf("SKIP:no PERF_COUNT_HW_CPU_CYCLES\n");
+ test__skip();
+ ret = EOPNOTSUPP;
+ goto cleanup;
+ }
+ ASSERT_GE(pe_fd, 0, "perf_event_open");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_perf_event(prog, pe_fd);
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
+ goto cleanup;
+ pe_fd = -1; /* Ownership transferred to link */
+
+ /* Signal child to start CPU work */
+ close(pipefd[0]);
+ pipefd[0] = -1;
+ write(pipefd[1], &buf, 1);
+ close(pipefd[1]);
+ pipefd[1] = -1;
+
+ waitpid(pid, &status, 0);
+ pid = 0;
+
+ /* Verify NMI context was hit */
+ ASSERT_GT(timer_skel->bss->test_hits, 0, "test_hits");
+ ret = 0;
+
+cleanup:
+ bpf_link__destroy(link);
+ if (pe_fd >= 0)
+ close(pe_fd);
+ if (pid > 0) {
+ write(pipefd[1], &buf, 1);
+ waitpid(pid, &status, 0);
+ }
+ if (pipefd[0] >= 0)
+ close(pipefd[0]);
+ if (pipefd[1] >= 0)
+ close(pipefd[1]);
+ return ret;
+}
+
+static int timer_stress_nmi_race(struct timer *timer_skel)
+{
+ int err;
+
+ err = run_nmi_test(timer_skel, timer_skel->progs.nmi_race);
+ if (err == EOPNOTSUPP)
+ return 0;
+ return err;
+}
+
+static int timer_stress_nmi_update(struct timer *timer_skel)
+{
+ int err;
+
+ err = run_nmi_test(timer_skel, timer_skel->progs.nmi_update);
+ if (err == EOPNOTSUPP)
+ return 0;
+ if (err)
+ return err;
+ ASSERT_GT(timer_skel->bss->update_hits, 0, "update_hits");
+ return 0;
+}
+
+static int timer_stress_nmi_cancel(struct timer *timer_skel)
+{
+ int err;
+
+ err = run_nmi_test(timer_skel, timer_skel->progs.nmi_cancel);
+ if (err == EOPNOTSUPP)
+ return 0;
+ if (err)
+ return err;
+ ASSERT_GT(timer_skel->bss->cancel_hits, 0, "cancel_hits");
+ return 0;
+}
+
+static int timer(struct timer *timer_skel)
+{
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
err = timer__attach(timer_skel);
if (!ASSERT_OK(err, "timer_attach"))
return err;
@@ -63,25 +239,30 @@ static int timer(struct timer *timer_skel)
/* check that code paths completed */
ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
- prog_fd = bpf_program__fd(timer_skel->progs.race);
- for (i = 0; i < NUM_THR; i++) {
- err = pthread_create(&thread_id[i], NULL,
- &spin_lock_thread, &prog_fd);
- if (!ASSERT_OK(err, "pthread_create"))
- break;
- }
+ return 0;
+}
- while (i) {
- err = pthread_join(thread_id[--i], &ret);
- if (ASSERT_OK(err, "pthread_join"))
- ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join");
- }
+static int timer_cancel_async(struct timer *timer_skel)
+{
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ prog_fd = bpf_program__fd(timer_skel->progs.test_async_cancel_succeed);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ usleep(500);
+ /* check that there were no errors in timer execution */
+ ASSERT_EQ(timer_skel->bss->err, 0, "err");
+
+ /* check that code paths completed */
+ ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
return 0;
}
-/* TODO: use pid filtering */
-void serial_test_timer(void)
+static void test_timer(int (*timer_test_fn)(struct timer *timer_skel))
{
struct timer *timer_skel = NULL;
int err;
@@ -94,13 +275,48 @@ void serial_test_timer(void)
if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
return;
- err = timer(timer_skel);
+ err = timer_test_fn(timer_skel);
ASSERT_OK(err, "timer");
timer__destroy(timer_skel);
+}
+
+void serial_test_timer(void)
+{
+ test_timer(timer);
RUN_TESTS(timer_failure);
}
+void serial_test_timer_stress(void)
+{
+ test_timer(timer_stress);
+}
+
+void serial_test_timer_stress_async_cancel(void)
+{
+ test_timer(timer_stress_async_cancel);
+}
+
+void serial_test_timer_async_cancel(void)
+{
+ test_timer(timer_cancel_async);
+}
+
+void serial_test_timer_stress_nmi_race(void)
+{
+ test_timer(timer_stress_nmi_race);
+}
+
+void serial_test_timer_stress_nmi_update(void)
+{
+ test_timer(timer_stress_nmi_update);
+}
+
+void serial_test_timer_stress_nmi_cancel(void)
+{
+ test_timer(timer_stress_nmi_cancel);
+}
+
void test_timer_interrupt(void)
{
struct timer_interrupt *skel = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c b/tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c
new file mode 100644
index 000000000000..9f1f9aec8888
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "timer_start_deadlock.skel.h"
+
+void test_timer_start_deadlock(void)
+{
+ struct timer_start_deadlock *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ skel = timer_start_deadlock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = timer_start_deadlock__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.start_timer);
+
+ /*
+ * Run the syscall program that attempts to deadlock.
+ * If the kernel deadlocks, this call will never return.
+ */
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "prog_test_run");
+ ASSERT_EQ(opts.retval, 0, "prog_retval");
+
+ ASSERT_EQ(skel->bss->tp_called, 1, "tp_called");
+cleanup:
+ timer_start_deadlock__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_start_delete_race.c b/tools/testing/selftests/bpf/prog_tests/timer_start_delete_race.c
new file mode 100644
index 000000000000..29a46e96f660
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_start_delete_race.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <pthread.h>
+#include <test_progs.h>
+#include "timer_start_delete_race.skel.h"
+
+/*
+ * Test for race between bpf_timer_start() and map element deletion.
+ *
+ * The race scenario:
+ * - CPU 1: bpf_timer_start() proceeds to bpf_async_process() and is about
+ * to call hrtimer_start() but hasn't yet
+ * - CPU 2: map_delete_elem() calls __bpf_async_cancel_and_free(), since
+ * timer is not scheduled yet hrtimer_try_to_cancel() is a nop,
+ * then calls bpf_async_refcount_put() dropping refcnt to zero
+ * and scheduling call_rcu_tasks_trace()
+ * - CPU 1: continues and calls hrtimer_start()
+ * - After RCU tasks trace grace period: memory is freed
+ * - Timer callback fires on freed memory: UAF!
+ *
+ * This test stresses this race by having two threads:
+ * - Thread 1: repeatedly starts timers
+ * - Thread 2: repeatedly deletes map elements
+ *
+ * KASAN should detect use-after-free.
+ */
+
+#define ITERATIONS 1000
+
+struct ctx {
+ struct timer_start_delete_race *skel;
+ volatile bool start;
+ volatile bool stop;
+ int errors;
+};
+
+static void *start_timer_thread(void *arg)
+{
+ struct ctx *ctx = arg;
+ cpu_set_t cpuset;
+ int fd, i;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+
+ while (!ctx->start && !ctx->stop)
+ usleep(1);
+ if (ctx->stop)
+ return NULL;
+
+ fd = bpf_program__fd(ctx->skel->progs.start_timer);
+
+ for (i = 0; i < ITERATIONS && !ctx->stop; i++) {
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ int err;
+
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (err || opts.retval) {
+ ctx->errors++;
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+static void *delete_elem_thread(void *arg)
+{
+ struct ctx *ctx = arg;
+ cpu_set_t cpuset;
+ int fd, i;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+
+ while (!ctx->start && !ctx->stop)
+ usleep(1);
+ if (ctx->stop)
+ return NULL;
+
+ fd = bpf_program__fd(ctx->skel->progs.delete_elem);
+
+ for (i = 0; i < ITERATIONS && !ctx->stop; i++) {
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ int err;
+
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (err || opts.retval) {
+ ctx->errors++;
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+void test_timer_start_delete_race(void)
+{
+ struct timer_start_delete_race *skel;
+ pthread_t threads[2];
+ struct ctx ctx = {};
+ int err;
+
+ skel = timer_start_delete_race__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ ctx.skel = skel;
+
+ err = pthread_create(&threads[0], NULL, start_timer_thread, &ctx);
+ if (!ASSERT_OK(err, "create start_timer_thread")) {
+ ctx.stop = true;
+ goto cleanup;
+ }
+
+ err = pthread_create(&threads[1], NULL, delete_elem_thread, &ctx);
+ if (!ASSERT_OK(err, "create delete_elem_thread")) {
+ ctx.stop = true;
+ pthread_join(threads[0], NULL);
+ goto cleanup;
+ }
+
+ ctx.start = true;
+
+ pthread_join(threads[0], NULL);
+ pthread_join(threads[1], NULL);
+
+ ASSERT_EQ(ctx.errors, 0, "thread_errors");
+
+ /* Either KASAN will catch UAF or kernel will crash or nothing happens */
+cleanup:
+ timer_start_delete_race__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
index 10e231965589..f9f9e1cb87bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
@@ -73,7 +73,7 @@ static void test_tracing_deny(void)
static void test_fexit_noreturns(void)
{
test_tracing_fail_prog("fexit_noreturns",
- "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected.");
+ "Attaching fexit/fsession/fmod_ret to __noreturn function 'do_exit' is rejected.");
}
void test_tracing_failure(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 4b4b081b46cc..302286a80154 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -6,6 +6,8 @@
#include "verifier_and.skel.h"
#include "verifier_arena.skel.h"
#include "verifier_arena_large.skel.h"
+#include "verifier_arena_globals1.skel.h"
+#include "verifier_arena_globals2.skel.h"
#include "verifier_array_access.skel.h"
#include "verifier_async_cb_context.skel.h"
#include "verifier_basic_stack.skel.h"
@@ -28,9 +30,11 @@
#include "verifier_ctx.skel.h"
#include "verifier_ctx_sk_msg.skel.h"
#include "verifier_d_path.skel.h"
+#include "verifier_default_trusted_ptr.skel.h"
#include "verifier_direct_packet_access.skel.h"
#include "verifier_direct_stack_access_wraparound.skel.h"
#include "verifier_div0.skel.h"
+#include "verifier_div_mod_bounds.skel.h"
#include "verifier_div_overflow.skel.h"
#include "verifier_global_subprogs.skel.h"
#include "verifier_global_ptr_args.skel.h"
@@ -108,6 +112,7 @@
#include "verifier_xdp_direct_packet_access.skel.h"
#include "verifier_bits_iter.skel.h"
#include "verifier_lsm.skel.h"
+#include "verifier_jit_inline.skel.h"
#include "irq.skel.h"
#define MAX_ENTRIES 11
@@ -147,6 +152,8 @@ static void run_tests_aux(const char *skel_name,
void test_verifier_and(void) { RUN(verifier_and); }
void test_verifier_arena(void) { RUN(verifier_arena); }
void test_verifier_arena_large(void) { RUN(verifier_arena_large); }
+void test_verifier_arena_globals1(void) { RUN(verifier_arena_globals1); }
+void test_verifier_arena_globals2(void) { RUN(verifier_arena_globals2); }
void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); }
void test_verifier_bounds(void) { RUN(verifier_bounds); }
@@ -167,9 +174,11 @@ void test_verifier_const_or(void) { RUN(verifier_const_or); }
void test_verifier_ctx(void) { RUN(verifier_ctx); }
void test_verifier_ctx_sk_msg(void) { RUN(verifier_ctx_sk_msg); }
void test_verifier_d_path(void) { RUN(verifier_d_path); }
+void test_verifier_default_trusted_ptr(void) { RUN_TESTS(verifier_default_trusted_ptr); }
void test_verifier_direct_packet_access(void) { RUN(verifier_direct_packet_access); }
void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); }
void test_verifier_div0(void) { RUN(verifier_div0); }
+void test_verifier_div_mod_bounds(void) { RUN(verifier_div_mod_bounds); }
void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); }
void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); }
void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); }
@@ -247,6 +256,7 @@ void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); }
void test_verifier_lsm(void) { RUN(verifier_lsm); }
void test_irq(void) { RUN(irq); }
void test_verifier_mtu(void) { RUN(verifier_mtu); }
+void test_verifier_jit_inline(void) { RUN(verifier_jit_inline); }
static int init_test_val_map(struct bpf_object *obj, char *map_name)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/wq.c b/tools/testing/selftests/bpf/prog_tests/wq.c
index 15c67d23128b..84831eecc935 100644
--- a/tools/testing/selftests/bpf/prog_tests/wq.c
+++ b/tools/testing/selftests/bpf/prog_tests/wq.c
@@ -16,12 +16,12 @@ void serial_test_wq(void)
/* re-run the success test to check if the timer was actually executed */
wq_skel = wq__open_and_load();
- if (!ASSERT_OK_PTR(wq_skel, "wq_skel_load"))
+ if (!ASSERT_OK_PTR(wq_skel, "wq__open_and_load"))
return;
err = wq__attach(wq_skel);
if (!ASSERT_OK(err, "wq_attach"))
- return;
+ goto clean_up;
prog_fd = bpf_program__fd(wq_skel->progs.test_syscall_array_sleepable);
err = bpf_prog_test_run_opts(prog_fd, &topts);
@@ -31,6 +31,7 @@ void serial_test_wq(void)
usleep(50); /* 10 usecs should be enough, but give it extra */
ASSERT_EQ(wq_skel->bss->ok_sleepable, (1 << 1), "ok_sleepable");
+clean_up:
wq__destroy(wq_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index ee94c281888a..26159e0499c7 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -47,6 +47,7 @@ void test_xdp_context_test_run(void)
struct test_xdp_context_test_run *skel = NULL;
char data[sizeof(pkt_v4) + sizeof(__u32)];
char bad_ctx[sizeof(struct xdp_md) + 1];
+ char large_data[256];
struct xdp_md ctx_in, ctx_out;
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
@@ -94,9 +95,6 @@ void test_xdp_context_test_run(void)
test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data),
0, 0, 0);
- /* Meta data must be 255 bytes or smaller */
- test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);
-
/* Total size of data must be data_end - data_meta or larger */
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
sizeof(data) + 1, 0, 0, 0);
@@ -116,6 +114,16 @@ void test_xdp_context_test_run(void)
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
0, 0, 1);
+ /* Meta data must be 216 bytes or smaller (256 - sizeof(struct
+ * xdp_frame)). Test both nearest invalid size and nearest invalid
+ * 4-byte-aligned size, and make sure data_in is large enough that we
+ * actually hit the check on metadata length
+ */
+ opts.data_in = large_data;
+ opts.data_size_in = sizeof(large_data);
+ test_xdp_context_error(prog_fd, opts, 0, 217, sizeof(large_data), 0, 0, 0);
+ test_xdp_context_error(prog_fd, opts, 0, 220, sizeof(large_data), 0, 0, 0);
+
test_xdp_context_test_run__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index df27535995af..ad56e4370ce3 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -18,7 +18,7 @@ static void test_xdp_with_cpumap_helpers(void)
struct bpf_cpumap_val val = {
.qsize = 192,
};
- int err, prog_fd, prog_redir_fd, map_fd;
+ int err, prog_fd, prog_redir_fd, map_fd, bad_fd;
struct nstoken *nstoken = NULL;
__u32 idx = 0;
@@ -79,7 +79,22 @@ static void test_xdp_with_cpumap_helpers(void)
val.qsize = 192;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
+ ASSERT_EQ(err, -EINVAL, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
+
+ /* Try to attach non-BPF file descriptor */
+ bad_fd = open("/dev/null", O_RDONLY);
+ ASSERT_GE(bad_fd, 0, "Open /dev/null for non-BPF fd");
+
+ val.bpf_prog.fd = bad_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_EQ(err, -EINVAL, "Add non-BPF fd to cpumap entry");
+
+ /* Try to attach nonexistent file descriptor */
+ err = close(bad_fd);
+ ASSERT_EQ(err, 0, "Close non-BPF fd for nonexistent fd");
+
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_EQ(err, -EBADF, "Add nonexistent fd to cpumap entry");
/* Try to attach BPF_XDP program with frags to cpumap when we have
* already loaded a BPF_XDP program on the map
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
index efa350d04ec5..910dabe95afd 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
@@ -114,12 +114,14 @@ static void test_xdp_pull_data_basic(void)
{
u32 pg_sz, max_meta_len, max_data_len;
struct test_xdp_pull_data *skel;
+ int buff_len;
skel = test_xdp_pull_data__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_xdp_pull_data__open_and_load"))
return;
pg_sz = sysconf(_SC_PAGE_SIZE);
+ buff_len = pg_sz + pg_sz / 2;
if (find_xdp_sizes(skel, pg_sz))
goto out;
@@ -140,13 +142,13 @@ static void test_xdp_pull_data_basic(void)
run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1025, 1025);
/* multi-buf pkt, empty linear data area, pull requires memmove */
- run_test(skel, XDP_PASS, pg_sz, 9000, 0, 0, PULL_MAX);
+ run_test(skel, XDP_PASS, pg_sz, buff_len, 0, 0, PULL_MAX);
/* multi-buf pkt, no headroom */
- run_test(skel, XDP_PASS, pg_sz, 9000, max_meta_len, 1024, PULL_MAX);
+ run_test(skel, XDP_PASS, pg_sz, buff_len, max_meta_len, 1024, PULL_MAX);
/* multi-buf pkt, no tailroom, pull requires memmove */
- run_test(skel, XDP_PASS, pg_sz, 9000, 0, max_data_len, PULL_MAX);
+ run_test(skel, XDP_PASS, pg_sz, buff_len, 0, max_data_len, PULL_MAX);
/* Test cases with invalid pull length */
@@ -154,18 +156,18 @@ static void test_xdp_pull_data_basic(void)
run_test(skel, XDP_DROP, pg_sz, 2048, 0, 2048, 2049);
/* multi-buf pkt with no space left in linear data area */
- run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, max_data_len,
+ run_test(skel, XDP_DROP, pg_sz, buff_len, max_meta_len, max_data_len,
PULL_MAX | PULL_PLUS_ONE);
/* multi-buf pkt, empty linear data area */
- run_test(skel, XDP_DROP, pg_sz, 9000, 0, 0, PULL_MAX | PULL_PLUS_ONE);
+ run_test(skel, XDP_DROP, pg_sz, buff_len, 0, 0, PULL_MAX | PULL_PLUS_ONE);
/* multi-buf pkt, no headroom */
- run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, 1024,
+ run_test(skel, XDP_DROP, pg_sz, buff_len, max_meta_len, 1024,
PULL_MAX | PULL_PLUS_ONE);
/* multi-buf pkt, no tailroom */
- run_test(skel, XDP_DROP, pg_sz, 9000, 0, max_data_len,
+ run_test(skel, XDP_DROP, pg_sz, buff_len, 0, max_data_len,
PULL_MAX | PULL_PLUS_ONE);
out:
diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c
index 3a2ddcacbea6..235d8cc95bdd 100644
--- a/tools/testing/selftests/bpf/progs/arena_list.c
+++ b/tools/testing/selftests/bpf/progs/arena_list.c
@@ -30,6 +30,7 @@ struct arena_list_head __arena *list_head;
int list_sum;
int cnt;
bool skip = false;
+const volatile bool nonsleepable = false;
#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
long __arena arena_sum;
@@ -42,6 +43,9 @@ int test_val SEC(".addr_space.1");
int zero;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
SEC("syscall")
int arena_list_add(void *ctx)
{
@@ -71,6 +75,10 @@ int arena_list_del(void *ctx)
struct elem __arena *n;
int sum = 0;
+ /* Take rcu_read_lock to test non-sleepable context */
+ if (nonsleepable)
+ bpf_rcu_read_lock();
+
arena_sum = 0;
list_for_each_entry(n, list_head, node) {
sum += n->value;
@@ -79,6 +87,9 @@ int arena_list_del(void *ctx)
bpf_free(n);
}
list_sum = sum;
+
+ if (nonsleepable)
+ bpf_rcu_read_unlock();
#else
skip = true;
#endif
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
index ff189a736ad8..8fc38592a87b 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -62,9 +62,9 @@ static int create_attach_counter(__u64 cg_id, __u64 state, __u64 pending)
&init, BPF_NOEXIST);
}
-SEC("fentry/cgroup_attach_task")
-int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
- bool threadgroup)
+SEC("tp_btf/cgroup_attach_task")
+int BPF_PROG(counter, struct cgroup *dst_cgrp, const char *path,
+ struct task_struct *task, bool threadgroup)
{
__u64 cg_id = cgroup_id(dst_cgrp);
struct percpu_attach_counter *pcpu_counter = bpf_map_lookup_elem(
diff --git a/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c b/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c
new file mode 100644
index 000000000000..59fb70a3cc50
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include "cgroup_iter_memcg.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* The latest values read are stored here. */
+struct memcg_query memcg_query SEC(".data.query");
+
+SEC("iter.s/cgroup")
+int cgroup_memcg_query(struct bpf_iter__cgroup *ctx)
+{
+ struct cgroup *cgrp = ctx->cgroup;
+ struct cgroup_subsys_state *css;
+ struct mem_cgroup *memcg;
+
+ if (!cgrp)
+ return 1;
+
+ css = &cgrp->self;
+ memcg = bpf_get_mem_cgroup(css);
+ if (!memcg)
+ return 1;
+
+ bpf_mem_cgroup_flush_stats(memcg);
+
+ memcg_query.nr_anon_mapped = bpf_mem_cgroup_page_state(memcg, NR_ANON_MAPPED);
+ memcg_query.nr_shmem = bpf_mem_cgroup_page_state(memcg, NR_SHMEM);
+ memcg_query.nr_file_pages = bpf_mem_cgroup_page_state(memcg, NR_FILE_PAGES);
+ memcg_query.nr_file_mapped = bpf_mem_cgroup_page_state(memcg, NR_FILE_MAPPED);
+ memcg_query.memcg_kmem = bpf_mem_cgroup_page_state(memcg, MEMCG_KMEM);
+ memcg_query.pgfault = bpf_mem_cgroup_vm_events(memcg, PGFAULT);
+
+ bpf_put_mem_cgroup(memcg);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c
index 6884ab99a421..f05e120f3450 100644
--- a/tools/testing/selftests/bpf/progs/compute_live_registers.c
+++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c
@@ -431,6 +431,47 @@ __naked void subprog1(void)
::: __clobber_all);
}
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+
+SEC("socket")
+__log_level(2)
+__msg("2: .1........ (07) r1 += 8")
+__msg("3: .1........ (79) r2 = *(u64 *)(r1 +0)")
+__msg("4: ..2....... (b7) r3 = 1")
+__msg("5: ..23...... (b7) r4 = 2")
+__msg("6: ..234..... (0d) gotox r2")
+__msg("7: ...3...... (bf) r0 = r3")
+__msg("8: 0......... (95) exit")
+__msg("9: ....4..... (bf) r0 = r4")
+__msg("10: 0......... (95) exit")
+__naked
+void gotox(void)
+{
+ asm volatile (
+ ".pushsection .jumptables,\"\",@progbits;"
+"jt0_%=: .quad l0_%= - socket;"
+ ".quad l1_%= - socket;"
+ ".size jt0_%=, 16;"
+ ".global jt0_%=;"
+ ".popsection;"
+
+ "r1 = jt0_%= ll;"
+ "r1 += 8;"
+ "r2 = *(u64 *)(r1 + 0);"
+ "r3 = 1;"
+ "r4 = 2;"
+ ".8byte %[gotox_r2];"
+"l0_%=: r0 = r3;"
+ "exit;"
+"l1_%=: r0 = r4;"
+ "exit;"
+ :
+ : __imm_insn(gotox_r2, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_2, BPF_REG_0, 0, 0))
+ : __clobber_all);
+}
+
+#endif /* __TARGET_ARCH_x86 || __TARGET_ARCH_arm64 */
+
/* to retain debug info for BTF generation */
void kfunc_root(void)
{
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
index 8a2fd596c8a3..61c32e91e8c3 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_failure.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -110,7 +110,7 @@ SEC("tp_btf/task_newtask")
__failure __msg("NULL pointer passed to trusted arg0")
int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags)
{
- /* NULL passed to KF_TRUSTED_ARGS kfunc. */
+ /* NULL passed to kfunc. */
bpf_cpumask_empty(NULL);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index dda6a8dada82..8f2ae9640886 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -1465,7 +1465,7 @@ int xdp_invalid_data_slice2(struct xdp_md *xdp)
}
/* Only supported prog type can create skb-type dynptrs */
-SEC("?raw_tp")
+SEC("?xdp")
__failure __msg("calling kernel function bpf_dynptr_from_skb is not allowed")
int skb_invalid_ctx(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/file_reader.c b/tools/testing/selftests/bpf/progs/file_reader.c
index 4d756b623557..462712ff3b8a 100644
--- a/tools/testing/selftests/bpf/progs/file_reader.c
+++ b/tools/testing/selftests/bpf/progs/file_reader.c
@@ -77,7 +77,7 @@ int on_open_validate_file_read(void *c)
err = 1;
return 0;
}
- bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, task_work_callback, NULL);
+ bpf_task_work_schedule_signal(task, &work->tw, &arrmap, task_work_callback);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/free_timer.c b/tools/testing/selftests/bpf/progs/free_timer.c
index 4501ae8fc414..eccb2d47db43 100644
--- a/tools/testing/selftests/bpf/progs/free_timer.c
+++ b/tools/testing/selftests/bpf/progs/free_timer.c
@@ -7,6 +7,16 @@
#define MAX_ENTRIES 8
+/* clang considers 'sum += 1' as usage but 'sum++' as non-usage. GCC
+ * is more consistent and considers both 'sum += 1' and 'sum++' as
+ * non-usage. This triggers warnings in the functions below.
+ *
+ * Starting with GCC 16 -Wunused-but-set-variable=2 can be used to
+ * mimic clang's behavior. */
+#if !defined(__clang__) && __GNUC__ > 15
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+
struct map_value {
struct bpf_timer timer;
};
diff --git a/tools/testing/selftests/bpf/progs/fsession_test.c b/tools/testing/selftests/bpf/progs/fsession_test.c
new file mode 100644
index 000000000000..86e8a2fe467e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fsession_test.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 ChinaTelecom */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_entry_result = 0;
+__u64 test1_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test1, int a, int ret)
+{
+ bool is_exit = bpf_session_is_return(ctx);
+
+ if (!is_exit) {
+ test1_entry_result = a == 1 && ret == 0;
+ return 0;
+ }
+
+ test1_exit_result = a == 1 && ret == 2;
+ return 0;
+}
+
+__u64 test2_entry_result = 0;
+__u64 test2_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test3")
+int BPF_PROG(test2, char a, int b, __u64 c, int ret)
+{
+ bool is_exit = bpf_session_is_return(ctx);
+
+ if (!is_exit) {
+ test2_entry_result = a == 4 && b == 5 && c == 6 && ret == 0;
+ return 0;
+ }
+
+ test2_exit_result = a == 4 && b == 5 && c == 6 && ret == 15;
+ return 0;
+}
+
+__u64 test3_entry_result = 0;
+__u64 test3_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test4")
+int BPF_PROG(test3, void *a, char b, int c, __u64 d, int ret)
+{
+ bool is_exit = bpf_session_is_return(ctx);
+
+ if (!is_exit) {
+ test3_entry_result = a == (void *)7 && b == 8 && c == 9 && d == 10 && ret == 0;
+ return 0;
+ }
+
+ test3_exit_result = a == (void *)7 && b == 8 && c == 9 && d == 10 && ret == 34;
+ return 0;
+}
+
+__u64 test4_entry_result = 0;
+__u64 test4_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test5")
+int BPF_PROG(test4, __u64 a, void *b, short c, int d, __u64 e, int ret)
+{
+ bool is_exit = bpf_session_is_return(ctx);
+
+ if (!is_exit) {
+ test4_entry_result = a == 11 && b == (void *)12 && c == 13 && d == 14 &&
+ e == 15 && ret == 0;
+ return 0;
+ }
+
+ test4_exit_result = a == 11 && b == (void *)12 && c == 13 && d == 14 &&
+ e == 15 && ret == 65;
+ return 0;
+}
+
+__u64 test5_entry_result = 0;
+__u64 test5_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test7")
+int BPF_PROG(test5, struct bpf_fentry_test_t *arg, int ret)
+{
+ bool is_exit = bpf_session_is_return(ctx);
+
+ if (!is_exit) {
+ if (!arg)
+ test5_entry_result = ret == 0;
+ return 0;
+ }
+
+ if (!arg)
+ test5_exit_result = 1;
+ return 0;
+}
+
+__u64 test6_entry_result = 0;
+__u64 test6_exit_result = 0;
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test6, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ if (bpf_session_is_return(ctx))
+ test6_exit_result = (const void *) addr == &bpf_fentry_test1;
+ else
+ test6_entry_result = (const void *) addr == &bpf_fentry_test1;
+ return 0;
+}
+
+__u64 test7_entry_ok = 0;
+__u64 test7_exit_ok = 0;
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test7, int a)
+{
+ volatile __u64 *cookie = bpf_session_cookie(ctx);
+
+ if (!bpf_session_is_return(ctx)) {
+ *cookie = 0xAAAABBBBCCCCDDDDull;
+ test7_entry_ok = *cookie == 0xAAAABBBBCCCCDDDDull;
+ return 0;
+ }
+
+ test7_exit_ok = *cookie == 0xAAAABBBBCCCCDDDDull;
+ return 0;
+}
+
+__u64 test8_entry_ok = 0;
+__u64 test8_exit_ok = 0;
+
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test8, int a)
+{
+ volatile __u64 *cookie = bpf_session_cookie(ctx);
+
+ if (!bpf_session_is_return(ctx)) {
+ *cookie = 0x1111222233334444ull;
+ test8_entry_ok = *cookie == 0x1111222233334444ull;
+ return 0;
+ }
+
+ test8_exit_ok = *cookie == 0x1111222233334444ull;
+ return 0;
+}
+
+__u64 test9_entry_result = 0;
+__u64 test9_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test9, int a, int ret)
+{
+ __u64 *cookie = bpf_session_cookie(ctx);
+
+ if (!bpf_session_is_return(ctx)) {
+ test9_entry_result = a == 1 && ret == 0;
+ *cookie = 0x123456ULL;
+ return 0;
+ }
+
+ test9_exit_result = a == 1 && ret == 2 && *cookie == 0x123456ULL;
+ return 0;
+}
+
+__u64 test10_result = 0;
+SEC("fexit/bpf_fentry_test1")
+int BPF_PROG(test10, int a, int ret)
+{
+ test10_result = a == 1 && ret == 2;
+ return 0;
+}
+
+__u64 test11_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test11, int a)
+{
+ test11_result = a == 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_func_args_test.c b/tools/testing/selftests/bpf/progs/get_func_args_test.c
index e0f34a55e697..075a1180ec26 100644
--- a/tools/testing/selftests/bpf/progs/get_func_args_test.c
+++ b/tools/testing/selftests/bpf/progs/get_func_args_test.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <errno.h>
@@ -121,3 +121,85 @@ int BPF_PROG(fexit_test, int _a, int *_b, int _ret)
test4_result &= err == 0 && ret == 1234;
return 0;
}
+
+__u64 test5_result = 0;
+SEC("tp_btf/bpf_testmod_fentry_test1_tp")
+int BPF_PROG(tp_test1)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, z = 0;
+ __s64 err;
+
+ test5_result = cnt == 1;
+
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test5_result &= err == 0 && ((int) a == 1);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 1, &z);
+ test5_result &= err == -EINVAL;
+
+ return 0;
+}
+
+__u64 test6_result = 0;
+SEC("tp_btf/bpf_testmod_fentry_test2_tp")
+int BPF_PROG(tp_test2)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, b = 0, z = 0;
+ __s64 err;
+
+ test6_result = cnt == 2;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test6_result &= err == 0 && (int) a == 2;
+
+ err = bpf_get_func_arg(ctx, 1, &b);
+ test6_result &= err == 0 && b == 3;
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 2, &z);
+ test6_result &= err == -EINVAL;
+
+ return 0;
+}
+
+__u64 test7_result = 0;
+#if defined(bpf_target_x86) || defined(bpf_target_arm64) || defined(bpf_target_riscv)
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test7)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test7_result = cnt == 1;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test7_result &= err == 0 && ((int) a == 1);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 1, &z);
+ test7_result &= err == -EINVAL;
+
+ if (bpf_session_is_return(ctx)) {
+ err = bpf_get_func_ret(ctx, &ret);
+ test7_result &= err == 0 && ret == 2;
+ } else {
+ err = bpf_get_func_ret(ctx, &ret);
+ test7_result &= err == 0 && ret == 0;
+ }
+
+ return 0;
+}
+#else
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test7)
+{
+ test7_result = 1;
+ return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
index 2011cacdeb18..45eaa54d1ac7 100644
--- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -103,3 +103,26 @@ int BPF_URETPROBE(test8, int ret)
test8_result = (const void *) addr == (const void *) uprobe_trigger;
return 0;
}
+
+__u64 test9_entry_result = 0;
+__u64 test9_exit_result = 0;
+#if defined(bpf_target_x86) || defined(bpf_target_arm64) || defined(bpf_target_riscv)
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test9, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ if (bpf_session_is_return(ctx))
+ test9_exit_result = (const void *) addr == &bpf_fentry_test1;
+ else
+ test9_entry_result = (const void *) addr == &bpf_fentry_test1;
+ return 0;
+}
+#else
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test9, int a)
+{
+ test9_entry_result = test9_exit_result = 1;
+ return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 7dd92a303bf6..7f27b517d5d5 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -1926,4 +1926,144 @@ static int loop1_wrapper(void)
);
}
+/*
+ * This is similar to a test case absent_mark_in_the_middle_state(),
+ * but adapted for use with bpf_loop().
+ */
+SEC("raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("math between fp pointer and register with unbounded min value is not allowed")
+__naked void absent_mark_in_the_middle_state4(void)
+{
+ /*
+ * Equivalent to a C program below:
+ *
+ * int main(void) {
+ * fp[-8] = bpf_get_prandom_u32();
+ * fp[-16] = -32; // used in a memory access below
+ * bpf_loop(7, loop_cb4, fp, 0);
+ * return 0;
+ * }
+ *
+ * int loop_cb4(int i, void *ctx) {
+ * if (unlikely(ctx[-8] > bpf_get_prandom_u32()))
+ * *(u64 *)(fp + ctx[-16]) = 42; // aligned access expected
+ * if (unlikely(fp[-8] > bpf_get_prandom_u32()))
+ * ctx[-16] = -31; // makes said access unaligned
+ * return 0;
+ * }
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "*(u64 *)(r10 - 8) = r0;"
+ "*(u64 *)(r10 - 16) = -32;"
+ "r1 = 7;"
+ "r2 = loop_cb4 ll;"
+ "r3 = r10;"
+ "r4 = 0;"
+ "call %[bpf_loop];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_loop),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static void loop_cb4(void)
+{
+ asm volatile (
+ "r9 = r2;"
+ "r8 = *(u64 *)(r9 - 8);"
+ "r6 = *(u64 *)(r9 - 16);"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 > r8 goto use_fp16_%=;"
+ "1:"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 > r8 goto update_fp16_%=;"
+ "2:"
+ "r0 = 0;"
+ "exit;"
+ "use_fp16_%=:"
+ "r1 = r10;"
+ "r1 += r6;"
+ "*(u64 *)(r1 + 0) = 42;"
+ "goto 1b;"
+ "update_fp16_%=:"
+ "*(u64 *)(r9 - 16) = -31;"
+ "goto 2b;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ );
+}
+
+SEC("raw_tp")
+__success
+__naked int stack_misc_vs_scalar_in_a_loop(void)
+{
+ asm volatile(
+ "*(u8 *)(r10 - 15) = 1;" /* This marks stack slot fp[-16] as STACK_MISC. */
+ "*(u8 *)(r10 - 23) = 1;"
+ "*(u8 *)(r10 - 31) = 1;"
+ "*(u8 *)(r10 - 39) = 1;"
+ "*(u8 *)(r10 - 47) = 1;"
+ "*(u8 *)(r10 - 55) = 1;"
+ "*(u8 *)(r10 - 63) = 1;"
+ "*(u8 *)(r10 - 71) = 1;"
+ "*(u8 *)(r10 - 79) = 1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+
+#define maybe_change_stack_slot(off) \
+ "call %[bpf_get_prandom_u32];" \
+ "if r0 == 42 goto +1;" \
+ "goto +1;" \
+ "*(u64 *)(r10 " #off ") = r0;"
+
+ /*
+ * When comparing verifier states fp[-16] will be
+ * either STACK_MISC or SCALAR. Pruning logic should
+ * consider old STACK_MISC equivalent to current SCALAR
+ * to avoid states explosion.
+ */
+ maybe_change_stack_slot(-16)
+ maybe_change_stack_slot(-24)
+ maybe_change_stack_slot(-32)
+ maybe_change_stack_slot(-40)
+ maybe_change_stack_slot(-48)
+ maybe_change_stack_slot(-56)
+ maybe_change_stack_slot(-64)
+ maybe_change_stack_slot(-72)
+ maybe_change_stack_slot(-80)
+
+#undef maybe_change_stack_slot
+
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm_addr(amap)
+ : __clobber_all
+ );
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_css.c b/tools/testing/selftests/bpf/progs/iters_css.c
index ec1f6c2f590b..5a1d87d186a9 100644
--- a/tools/testing/selftests/bpf/progs/iters_css.c
+++ b/tools/testing/selftests/bpf/progs/iters_css.c
@@ -12,8 +12,7 @@ char _license[] SEC("license") = "GPL";
pid_t target_pid;
u64 root_cg_id, leaf_cg_id;
u64 first_cg_id, last_cg_id;
-
-int pre_order_cnt, post_order_cnt, tree_high;
+int pre_order_cnt, post_order_cnt, children_cnt, tree_high;
struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
void bpf_cgroup_release(struct cgroup *p) __ksym;
@@ -43,7 +42,7 @@ int iter_css_for_each(const void *ctx)
}
root_css = &root_cgrp->self;
leaf_css = &leaf_cgrp->self;
- pre_order_cnt = post_order_cnt = tree_high = 0;
+ pre_order_cnt = post_order_cnt = children_cnt = tree_high = 0;
first_cg_id = last_cg_id = 0;
bpf_rcu_read_lock();
@@ -60,6 +59,10 @@ int iter_css_for_each(const void *ctx)
first_cg_id = cur_cgrp->kn->id;
}
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_CHILDREN) {
+ children_cnt++;
+ }
+
bpf_for_each(css, pos, leaf_css, BPF_CGROUP_ITER_ANCESTORS_UP)
tree_high++;
diff --git a/tools/testing/selftests/bpf/progs/kfunc_implicit_args.c b/tools/testing/selftests/bpf/progs/kfunc_implicit_args.c
new file mode 100644
index 000000000000..89b6a47e22dd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_implicit_args.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+extern int bpf_kfunc_implicit_arg(int a) __weak __ksym;
+extern int bpf_kfunc_implicit_arg_impl(int a, struct bpf_prog_aux *aux) __weak __ksym; /* illegal */
+extern int bpf_kfunc_implicit_arg_legacy(int a, int b) __weak __ksym;
+extern int bpf_kfunc_implicit_arg_legacy_impl(int a, int b, struct bpf_prog_aux *aux) __weak __ksym;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("syscall")
+__retval(5)
+int test_kfunc_implicit_arg(void *ctx)
+{
+ return bpf_kfunc_implicit_arg(5);
+}
+
+SEC("syscall")
+__failure __msg("cannot find address for kernel function bpf_kfunc_implicit_arg_impl")
+int test_kfunc_implicit_arg_impl_illegal(void *ctx)
+{
+ return bpf_kfunc_implicit_arg_impl(5, NULL);
+}
+
+SEC("syscall")
+__retval(7)
+int test_kfunc_implicit_arg_legacy(void *ctx)
+{
+ return bpf_kfunc_implicit_arg_legacy(3, 4);
+}
+
+SEC("syscall")
+__retval(11)
+int test_kfunc_implicit_arg_legacy_impl(void *ctx)
+{
+ return bpf_kfunc_implicit_arg_legacy_impl(5, 6, NULL);
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_override.c b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c
index 28f8487c9059..14f39fa6d515 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi_override.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c
@@ -5,9 +5,24 @@
char _license[] SEC("license") = "GPL";
+int pid = 0;
+
SEC("kprobe.multi")
int test_override(struct pt_regs *ctx)
{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ bpf_override_return(ctx, 123);
+ return 0;
+}
+
+SEC("kprobe")
+int test_kprobe_override(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
bpf_override_return(ctx, 123);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
index 0835b5edf685..ad627016e3e5 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <stdbool.h>
-#include "bpf_kfuncs.h"
char _license[] SEC("license") = "GPL";
@@ -23,16 +22,16 @@ int BPF_PROG(trigger)
return 0;
}
-static int check_cookie(__u64 val, __u64 *result)
+static int check_cookie(struct pt_regs *ctx, __u64 val, __u64 *result)
{
__u64 *cookie;
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 1;
- cookie = bpf_session_cookie();
+ cookie = bpf_session_cookie(ctx);
- if (bpf_session_is_return())
+ if (bpf_session_is_return(ctx))
*result = *cookie == val ? val : 0;
else
*cookie = val;
@@ -42,17 +41,17 @@ static int check_cookie(__u64 val, __u64 *result)
SEC("kprobe.session/bpf_fentry_test1")
int test_kprobe_1(struct pt_regs *ctx)
{
- return check_cookie(1, &test_kprobe_1_result);
+ return check_cookie(ctx, 1, &test_kprobe_1_result);
}
SEC("kprobe.session/bpf_fentry_test1")
int test_kprobe_2(struct pt_regs *ctx)
{
- return check_cookie(2, &test_kprobe_2_result);
+ return check_cookie(ctx, 2, &test_kprobe_2_result);
}
SEC("kprobe.session/bpf_fentry_test1")
int test_kprobe_3(struct pt_regs *ctx)
{
- return check_cookie(3, &test_kprobe_3_result);
+ return check_cookie(ctx, 3, &test_kprobe_3_result);
}
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 637e75df2e14..d0be77011a84 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -62,7 +62,6 @@ SEC("lsm/inode_unlink")
int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
- struct bpf_local_storage *local_storage;
struct local_storage *storage;
struct task_struct *task;
bool is_self_unlink;
@@ -88,15 +87,10 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
if (!storage || storage->value)
return 0;
- if (bpf_task_storage_delete(&task_storage_map, task))
+ if (bpf_task_storage_delete(&task_storage_map2, task))
return 0;
- /* Ensure that the task_storage_map is disconnected from the storage.
- * The storage memory should not be freed back to the
- * bpf_mem_alloc.
- */
- local_storage = task->bpf_storage;
- if (!local_storage || local_storage->smap)
+ if (bpf_task_storage_delete(&task_storage_map, task))
return 0;
task_storage_result = 0;
@@ -164,18 +158,9 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
if (bpf_sk_storage_delete(&sk_storage_map2, sk))
return 0;
- storage = bpf_sk_storage_get(&sk_storage_map2, sk, 0,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
- if (!storage)
- return 0;
-
if (bpf_sk_storage_delete(&sk_storage_map, sk))
return 0;
- /* Ensure that the sk_storage_map is disconnected from the storage. */
- if (!sk->sk_bpf_storage || sk->sk_bpf_storage->smap)
- return 0;
-
sk_storage_result = 0;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index edaba481db9d..e708ffbe1f61 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -487,6 +487,24 @@ int test_map_kptr_ref3(struct __sk_buff *ctx)
return 0;
}
+int num_of_refs;
+
+SEC("syscall")
+int count_ref(void *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long arg = 0;
+
+ p = bpf_kfunc_call_test_acquire(&arg);
+ if (!p)
+ return 1;
+
+ num_of_refs = p->cnt.refs.counter;
+
+ bpf_kfunc_call_test_release(p);
+ return 0;
+}
+
SEC("syscall")
int test_ls_map_kptr_ref1(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
index 4c0ff01f1a96..6443b320c732 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -272,7 +272,7 @@ int reject_untrusted_xchg(struct __sk_buff *ctx)
SEC("?tc")
__failure
-__msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member")
+__msg("invalid kptr access, R2 type=trusted_ptr_prog_test_ref_kfunc expected=ptr_prog_test_member")
int reject_bad_type_xchg(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *ref_ptr;
@@ -291,7 +291,7 @@ int reject_bad_type_xchg(struct __sk_buff *ctx)
}
SEC("?tc")
-__failure __msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc")
+__failure __msg("invalid kptr access, R2 type=trusted_ptr_prog_test_ref_kfunc")
int reject_member_of_ref_xchg(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *ref_ptr;
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
index 37c2d2608ec0..ed6a2a93d5a5 100644
--- a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
@@ -187,4 +187,36 @@ out:
return 0;
}
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, u32);
+} percpu SEC(".maps");
+
+SEC("?fentry/bpf_fentry_test1")
+int BPF_PROG(test_percpu_array, int x)
+{
+ u64 value = 0xDEADC0DE;
+ int key = 0;
+
+ bpf_map_update_elem(&percpu, &key, &value, BPF_ANY);
+ return 0;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, u32);
+} percpu_cgroup_storage SEC(".maps");
+
+SEC("cgroup_skb/egress")
+int cgroup_egress(struct __sk_buff *skb)
+{
+ u32 *val = bpf_get_local_storage(&percpu_cgroup_storage, 0);
+
+ *val = 1;
+ return 1;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/profiler.h b/tools/testing/selftests/bpf/progs/profiler.h
index 3bac4fdd4bdf..637fbf2c2652 100644
--- a/tools/testing/selftests/bpf/progs/profiler.h
+++ b/tools/testing/selftests/bpf/progs/profiler.h
@@ -169,7 +169,7 @@ enum bpf_function_id {
profiler_bpf_sched_process_exec,
profiler_bpf_sched_process_exit,
profiler_bpf_sys_enter_kill,
- profiler_bpf_do_filp_open_ret,
+ profiler_bpf_do_file_open_ret,
profiler_bpf_sched_process_fork,
profiler_bpf_vfs_link,
profiler_bpf_vfs_symlink,
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 813143b4985d..9044dd8aff11 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -751,11 +751,11 @@ out:
return 0;
}
-SEC("kretprobe/do_filp_open")
-int kprobe_ret__do_filp_open(struct pt_regs* ctx)
+SEC("kretprobe/do_file_open")
+int kprobe_ret__do_file_open(struct pt_regs *ctx)
{
struct bpf_func_stats_ctx stats_ctx;
- bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
+ bpf_stats_enter(&stats_ctx, profiler_bpf_do_file_open_ret);
struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
index 4acb6af2dfe3..70b7baf9304b 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_fail.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -153,7 +153,7 @@ long rbtree_api_add_to_multiple_trees(void *ctx)
}
SEC("?tc")
-__failure __msg("dereference of modified ptr_or_null_ ptr R2 off=16 disallowed")
+__failure __msg("Possibly NULL pointer passed to trusted arg1")
long rbtree_api_use_unchecked_remove_retval(void *ctx)
{
struct bpf_rb_node *res;
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index d70c28824bbe..b4e073168fb1 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -7,6 +7,16 @@
#include "bpf_tracing_net.h"
#include "bpf_misc.h"
+/* clang considers 'sum += 1' as usage but 'sum++' as non-usage. GCC
+ * is more consistent and considers both 'sum += 1' and 'sum++' as
+ * non-usage. This triggers warnings in the functions below.
+ *
+ * Starting with GCC 16 -Wunused-but-set-variable=2 can be used to
+ * mimic clang's behavior. */
+#if !defined(__clang__) && __GNUC__ > 15
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+
char _license[] SEC("license") = "GPL";
struct {
diff --git a/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c
index df4873558634..189c05c6abcc 100644
--- a/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c
+++ b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c
@@ -1,36 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
-struct task_ls_map {
- __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
- __uint(map_flags, BPF_F_NO_PREALLOC);
- __type(key, int);
- __type(value, int);
-} task_ls_map SEC(".maps");
-
-long gp_seq;
+int done;
SEC("syscall")
-int do_call_rcu_tasks_trace(void *ctx)
-{
- struct task_struct *current;
- int *v;
-
- current = bpf_get_current_task_btf();
- v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
- if (!v)
- return 1;
- /* Invoke call_rcu_tasks_trace */
- return bpf_task_storage_delete(&task_ls_map, current);
-}
-
-SEC("kprobe/rcu_tasks_trace_postgp")
-int rcu_tasks_trace_postgp(void *ctx)
+int call_rcu_tasks_trace(void *ctx)
{
- __sync_add_and_fetch(&gp_seq, 1);
- return 0;
+ return bpf_kfunc_call_test_call_rcu_tasks_trace(&done);
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
deleted file mode 100644
index 69da05bb6c63..000000000000
--- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-
-extern bool CONFIG_PREEMPTION __kconfig __weak;
-extern const int bpf_task_storage_busy __ksym;
-
-char _license[] SEC("license") = "GPL";
-
-int pid = 0;
-int busy = 0;
-
-struct {
- __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
- __uint(map_flags, BPF_F_NO_PREALLOC);
- __type(key, int);
- __type(value, long);
-} task SEC(".maps");
-
-SEC("raw_tp/sys_enter")
-int BPF_PROG(read_bpf_task_storage_busy)
-{
- int *value;
-
- if (!CONFIG_PREEMPTION)
- return 0;
-
- if (bpf_get_current_pid_tgid() >> 32 != pid)
- return 0;
-
- value = bpf_this_cpu_ptr(&bpf_task_storage_busy);
- if (value)
- busy = *value;
-
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
index 46d6eb2a3b17..c8f4815c8dfb 100644
--- a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
+++ b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
@@ -6,7 +6,6 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
-void *local_storage_ptr = NULL;
void *sk_ptr = NULL;
int cookie_found = 0;
__u64 cookie = 0;
@@ -19,21 +18,17 @@ struct {
__type(value, int);
} sk_storage SEC(".maps");
-SEC("fexit/bpf_local_storage_destroy")
-int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage)
+SEC("fexit/bpf_sk_storage_free")
+int BPF_PROG(bpf_sk_storage_free, struct sock *sk)
{
- struct sock *sk;
-
- if (local_storage_ptr != local_storage)
+ if (sk_ptr != sk)
return 0;
- sk = bpf_core_cast(sk_ptr, struct sock);
if (sk->sk_cookie.counter != cookie)
return 0;
cookie_found++;
omem = sk->sk_omem_alloc.counter;
- local_storage_ptr = NULL;
return 0;
}
@@ -50,7 +45,6 @@ int BPF_PROG(inet6_sock_destruct, struct sock *sk)
if (value && *value == 0xdeadbeef) {
cookie_found++;
sk_ptr = sk;
- local_storage_ptr = sk->sk_bpf_storage;
}
return 0;
diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
index a96c8150d7f5..6830f2978613 100644
--- a/tools/testing/selftests/bpf/progs/stacktrace_ips.c
+++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
@@ -31,6 +31,13 @@ int unused(void)
__u32 stack_key;
+SEC("kprobe")
+int kprobe_test(struct pt_regs *ctx)
+{
+ stack_key = bpf_get_stackid(ctx, &stackmap, 0);
+ return 0;
+}
+
SEC("kprobe.multi")
int kprobe_multi_test(struct pt_regs *ctx)
{
@@ -46,4 +53,24 @@ int rawtp_test(void *ctx)
return 0;
}
+SEC("fentry/bpf_testmod_stacktrace_test")
+int fentry_test(struct pt_regs *ctx)
+{
+ /*
+ * Skip 2 bpf_program/trampoline stack entries:
+ * - bpf_prog_bd1f7a949f55fb03_fentry_test
+ * - bpf_trampoline_182536277701
+ */
+ stack_key = bpf_get_stackid(ctx, &stackmap, 2);
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_stacktrace_test")
+int fexit_test(struct pt_regs *ctx)
+{
+ /* Skip 2 bpf_program/trampoline stack entries, check fentry_test. */
+ stack_key = bpf_get_stackid(ctx, &stackmap, 2);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c
index 4a5bd852f10c..6f999ba951a3 100644
--- a/tools/testing/selftests/bpf/progs/stream.c
+++ b/tools/testing/selftests/bpf/progs/stream.c
@@ -42,6 +42,10 @@ int size;
u64 fault_addr;
void *arena_ptr;
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+
+private(STREAM) struct bpf_spin_lock block;
+
SEC("syscall")
__success __retval(0)
int stream_exhaust(void *ctx)
@@ -234,4 +238,53 @@ int stream_arena_callback_fault(void *ctx)
return 0;
}
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_print_stack_kfunc(void *ctx)
+{
+ return bpf_stream_print_stack(BPF_STDERR);
+}
+
+SEC("syscall")
+__success __retval(-2)
+int stream_print_stack_invalid_id(void *ctx)
+{
+ /* Try to pass an invalid stream ID. */
+ return bpf_stream_print_stack((enum bpf_stream_id)0xbadcafe);
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stdout(_STR)
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_print_kfuncs_locked(void *ctx)
+{
+ int ret;
+
+ bpf_spin_lock(&block);
+
+ ret = bpf_stream_printk(BPF_STDOUT, _STR);
+ if (ret)
+ goto out;
+
+ ret = bpf_stream_print_stack(BPF_STDERR);
+
+out:
+ bpf_spin_unlock(&block);
+
+ return ret;
+}
+
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c
index 3662515f0107..8e8249f3521c 100644
--- a/tools/testing/selftests/bpf/progs/stream_fail.c
+++ b/tools/testing/selftests/bpf/progs/stream_fail.c
@@ -10,7 +10,7 @@ SEC("syscall")
__failure __msg("Possibly NULL pointer passed")
int stream_vprintk_null_arg(void *ctx)
{
- bpf_stream_vprintk_impl(BPF_STDOUT, "", NULL, 0, NULL);
+ bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0);
return 0;
}
@@ -18,7 +18,7 @@ SEC("syscall")
__failure __msg("R3 type=scalar expected=")
int stream_vprintk_scalar_arg(void *ctx)
{
- bpf_stream_vprintk_impl(BPF_STDOUT, "", (void *)46, 0, NULL);
+ bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0);
return 0;
}
@@ -26,7 +26,7 @@ SEC("syscall")
__failure __msg("arg#1 doesn't point to a const string")
int stream_vprintk_string_arg(void *ctx)
{
- bpf_stream_vprintk_impl(BPF_STDOUT, ctx, NULL, 0, NULL);
+ bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
index 826e6b6aff7e..bddc4e8579d2 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
@@ -33,6 +33,8 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return
SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_null1(void *ctx) { return bpf_strcasecmp(NULL, "HELLO"); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strcasecmp_null2(void *ctx) { return bpf_strcasecmp("HELLO", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strncasecmp_null1(void *ctx) { return bpf_strncasecmp(NULL, "HELLO", 5); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strncasecmp_null2(void *ctx) { return bpf_strncasecmp("HELLO", NULL, 5); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); }
@@ -57,6 +59,8 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { ret
SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr1(void *ctx) { return bpf_strcasecmp(user_ptr, "HELLO"); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr2(void *ctx) { return bpf_strcasecmp("HELLO", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strncasecmp_user_ptr1(void *ctx) { return bpf_strncasecmp(user_ptr, "HELLO", 5); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strncasecmp_user_ptr2(void *ctx) { return bpf_strncasecmp("HELLO", user_ptr, 5); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); }
@@ -83,6 +87,8 @@ SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return
SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); }
SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault1(void *ctx) { return bpf_strcasecmp(invalid_kern_ptr, "HELLO"); }
SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault2(void *ctx) { return bpf_strcasecmp("HELLO", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strncasecmp_pagefault1(void *ctx) { return bpf_strncasecmp(invalid_kern_ptr, "HELLO", 5); }
+SEC("syscall") __retval(-EFAULT) int test_strncasecmp_pagefault2(void *ctx) { return bpf_strncasecmp("HELLO", invalid_kern_ptr, 5); }
SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); }
SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); }
SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); }
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
index 05e1da1f250f..412c53b87b18 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
@@ -8,6 +8,7 @@ char long_str[XATTR_SIZE_MAX + 1];
SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); }
SEC("syscall") int test_strcasecmp_too_long(void *ctx) { return bpf_strcasecmp(long_str, long_str); }
+SEC("syscall") int test_strncasecmp_too_long(void *ctx) { return bpf_strncasecmp(long_str, long_str, sizeof(long_str)); }
SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); }
SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); }
SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); }
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
index a8513964516b..f65b1226a81a 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
@@ -17,6 +17,13 @@ __test(0) int test_strcasecmp_eq2(void *ctx) { return bpf_strcasecmp(str, "HELLO
__test(0) int test_strcasecmp_eq3(void *ctx) { return bpf_strcasecmp(str, "HELLO world"); }
__test(1) int test_strcasecmp_neq1(void *ctx) { return bpf_strcasecmp(str, "hello"); }
__test(1) int test_strcasecmp_neq2(void *ctx) { return bpf_strcasecmp(str, "HELLO"); }
+__test(0) int test_strncasecmp_eq1(void *ctx) { return bpf_strncasecmp(str, "hello world", 11); }
+__test(0) int test_strncasecmp_eq2(void *ctx) { return bpf_strncasecmp(str, "HELLO WORLD", 11); }
+__test(0) int test_strncasecmp_eq3(void *ctx) { return bpf_strncasecmp(str, "HELLO world", 11); }
+__test(0) int test_strncasecmp_eq4(void *ctx) { return bpf_strncasecmp(str, "hello", 5); }
+__test(0) int test_strncasecmp_eq5(void *ctx) { return bpf_strncasecmp(str, "hello world!", 11); }
+__test(-1) int test_strncasecmp_neq1(void *ctx) { return bpf_strncasecmp(str, "hello!", 6); }
+__test(1) int test_strncasecmp_neq2(void *ctx) { return bpf_strncasecmp(str, "abc", 3); }
__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); }
__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); }
__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); }
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_assoc.c b/tools/testing/selftests/bpf/progs/struct_ops_assoc.c
new file mode 100644
index 000000000000..68842e3f936b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_assoc.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int test_pid;
+
+/* Programs associated with st_ops_map_a */
+
+#define MAP_A_MAGIC 1234
+int test_err_a;
+
+SEC("struct_ops")
+int BPF_PROG(test_1_a, struct st_ops_args *args)
+{
+ return MAP_A_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter_prog_a, struct pt_regs *regs, long id)
+{
+ struct st_ops_args args = {};
+ struct task_struct *task;
+ int ret;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_A_MAGIC)
+ test_err_a++;
+
+ return 0;
+}
+
+SEC("syscall")
+int syscall_prog_a(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_A_MAGIC)
+ test_err_a++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map_a = {
+ .test_1 = (void *)test_1_a,
+};
+
+/* Programs associated with st_ops_map_b */
+
+#define MAP_B_MAGIC 5678
+int test_err_b;
+
+SEC("struct_ops")
+int BPF_PROG(test_1_b, struct st_ops_args *args)
+{
+ return MAP_B_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter_prog_b, struct pt_regs *regs, long id)
+{
+ struct st_ops_args args = {};
+ struct task_struct *task;
+ int ret;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_B_MAGIC)
+ test_err_b++;
+
+ return 0;
+}
+
+SEC("syscall")
+int syscall_prog_b(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_B_MAGIC)
+ test_err_b++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map_b = {
+ .test_1 = (void *)test_1_b,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_assoc_in_timer.c b/tools/testing/selftests/bpf/progs/struct_ops_assoc_in_timer.c
new file mode 100644
index 000000000000..0bed49e9f217
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_assoc_in_timer.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array_map SEC(".maps");
+
+#define MAP_MAGIC 1234
+int recur;
+int test_err;
+int timer_ns;
+int timer_test_1_ret;
+int timer_cb_run;
+
+__noinline static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ struct st_ops_args args = {};
+
+ recur++;
+ timer_test_1_ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ recur--;
+
+ timer_cb_run++;
+
+ return 0;
+}
+
+SEC("struct_ops")
+int BPF_PROG(test_1, struct st_ops_args *args)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ if (!recur) {
+ timer = bpf_map_lookup_elem(&array_map, &key);
+ if (!timer)
+ return 0;
+
+ bpf_timer_init(timer, &array_map, 1);
+ bpf_timer_set_callback(timer, timer_cb);
+ bpf_timer_start(timer, timer_ns, 0);
+ }
+
+ return MAP_MAGIC;
+}
+
+SEC("syscall")
+int syscall_prog(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_assoc_reuse.c b/tools/testing/selftests/bpf/progs/struct_ops_assoc_reuse.c
new file mode 100644
index 000000000000..396b3e58c729
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_assoc_reuse.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define MAP_A_MAGIC 1234
+int test_err_a;
+int recur;
+
+/*
+ * test_1_a is reused. The kfunc should not be able to get the associated
+ * struct_ops and call test_1 recursively as it is ambiguous.
+ */
+SEC("struct_ops")
+int BPF_PROG(test_1_a, struct st_ops_args *args)
+{
+ int ret;
+
+ if (!recur) {
+ recur++;
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(args);
+ if (ret != -1)
+ test_err_a++;
+ recur--;
+ }
+
+ return MAP_A_MAGIC;
+}
+
+/* Programs associated with st_ops_map_a */
+
+SEC("syscall")
+int syscall_prog_a(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_A_MAGIC)
+ test_err_a++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map_a = {
+ .test_1 = (void *)test_1_a,
+};
+
+/* Programs associated with st_ops_map_b */
+
+int test_err_b;
+
+SEC("syscall")
+int syscall_prog_b(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1_assoc(&args);
+ if (ret != MAP_A_MAGIC)
+ test_err_b++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map_b = {
+ .test_1 = (void *)test_1_a,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
index 6a2dd5367802..c8d217e89eea 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
@@ -12,7 +12,7 @@ void bpf_task_release(struct task_struct *p) __ksym;
* reject programs returning a referenced kptr of the wrong type.
*/
SEC("struct_ops/test_return_ref_kptr")
-__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)")
+__failure __msg("At program exit the register R0 is not a known value (trusted_ptr_or_null_)")
struct task_struct *BPF_PROG(kptr_return_fail__wrong_type, int dummy,
struct task_struct *task, struct cgroup *cgrp)
{
diff --git a/tools/testing/selftests/bpf/progs/tailcall_sleepable.c b/tools/testing/selftests/bpf/progs/tailcall_sleepable.c
new file mode 100644
index 000000000000..d959a9eaaa9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_sleepable.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_test_utils.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __array(values, void (void));
+} jmp_table SEC(".maps");
+
+SEC("?uprobe")
+int uprobe_normal(void *ctx)
+{
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return 0;
+}
+
+SEC("?uprobe.s")
+int uprobe_sleepable_1(void *ctx)
+{
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return 0;
+}
+
+int executed = 0;
+int my_pid = 0;
+
+SEC("?uprobe.s")
+int uprobe_sleepable_2(void *ctx)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
index 432fff2af844..fed53d63a7e5 100644
--- a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
+++ b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
@@ -80,7 +80,7 @@ struct tld_metadata {
};
struct tld_meta_u {
- __u8 cnt;
+ __u16 cnt;
__u16 size;
struct tld_metadata metadata[TLD_MAX_DATA_CNT];
};
diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
index f1853c38aada..b37359432692 100644
--- a/tools/testing/selftests/bpf/progs/task_ls_recursion.c
+++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
@@ -36,14 +36,9 @@ int BPF_PROG(on_update)
if (!test_pid || task->pid != test_pid)
return 0;
+ /* This will succeed as there is no real deadlock */
ptr = bpf_task_storage_get(&map_a, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
- /* ptr will not be NULL when it is called from
- * the bpf_task_storage_get(&map_b,...F_CREATE) in
- * the BPF_PROG(on_enter) below. It is because
- * the value can be found in map_a and the kernel
- * does not need to acquire any spin_lock.
- */
if (ptr) {
int err;
@@ -53,12 +48,7 @@ int BPF_PROG(on_update)
nr_del_errs++;
}
- /* This will still fail because map_b is empty and
- * this BPF_PROG(on_update) has failed to acquire
- * the percpu busy lock => meaning potential
- * deadlock is detected and it will fail to create
- * new storage.
- */
+ /* This will succeed as there is no real deadlock */
ptr = bpf_task_storage_get(&map_b, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
index 986829aaf73a..6ce98fe9f387 100644
--- a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
+++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
@@ -1,15 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-#ifndef EBUSY
-#define EBUSY 16
-#endif
-
extern bool CONFIG_PREEMPTION __kconfig __weak;
int nr_get_errs = 0;
int nr_del_errs = 0;
@@ -40,7 +37,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
ret = bpf_task_storage_delete(&task_storage,
bpf_get_current_task_btf());
- if (ret == -EBUSY)
+ if (ret == -EDEADLK || ret == -ETIMEDOUT)
__sync_fetch_and_add(&nr_del_errs, 1);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c
index 663a80990f8f..a6009d105158 100644
--- a/tools/testing/selftests/bpf/progs/task_work.c
+++ b/tools/testing/selftests/bpf/progs/task_work.c
@@ -65,8 +65,7 @@ int oncpu_hash_map(struct pt_regs *args)
work = bpf_map_lookup_elem(&hmap, &key);
if (!work)
return 0;
-
- bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
+ bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work);
return 0;
}
@@ -80,7 +79,7 @@ int oncpu_array_map(struct pt_regs *args)
work = bpf_map_lookup_elem(&arrmap, &key);
if (!work)
return 0;
- bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, process_work, NULL);
+ bpf_task_work_schedule_signal(task, &work->tw, &arrmap, process_work);
return 0;
}
@@ -102,6 +101,6 @@ int oncpu_lru_map(struct pt_regs *args)
work = bpf_map_lookup_elem(&lrumap, &key);
if (!work || work->data[0])
return 0;
- bpf_task_work_schedule_resume_impl(task, &work->tw, &lrumap, process_work, NULL);
+ bpf_task_work_schedule_resume(task, &work->tw, &lrumap, process_work);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c
index 1270953fd092..82e4b8913333 100644
--- a/tools/testing/selftests/bpf/progs/task_work_fail.c
+++ b/tools/testing/selftests/bpf/progs/task_work_fail.c
@@ -53,7 +53,7 @@ int mismatch_map(struct pt_regs *args)
work = bpf_map_lookup_elem(&arrmap, &key);
if (!work)
return 0;
- bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
+ bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work);
return 0;
}
@@ -65,7 +65,7 @@ int no_map_task_work(struct pt_regs *args)
struct bpf_task_work tw;
task = bpf_get_current_task_btf();
- bpf_task_work_schedule_resume_impl(task, &tw, &hmap, process_work, NULL);
+ bpf_task_work_schedule_resume(task, &tw, &hmap, process_work);
return 0;
}
@@ -76,7 +76,7 @@ int task_work_null(struct pt_regs *args)
struct task_struct *task;
task = bpf_get_current_task_btf();
- bpf_task_work_schedule_resume_impl(task, NULL, &hmap, process_work, NULL);
+ bpf_task_work_schedule_resume(task, NULL, &hmap, process_work);
return 0;
}
@@ -91,6 +91,6 @@ int map_null(struct pt_regs *args)
work = bpf_map_lookup_elem(&arrmap, &key);
if (!work)
return 0;
- bpf_task_work_schedule_resume_impl(task, &work->tw, NULL, process_work, NULL);
+ bpf_task_work_schedule_resume(task, &work->tw, NULL, process_work);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c
index 55e555f7f41b..1d4378f351ef 100644
--- a/tools/testing/selftests/bpf/progs/task_work_stress.c
+++ b/tools/testing/selftests/bpf/progs/task_work_stress.c
@@ -51,8 +51,8 @@ int schedule_task_work(void *ctx)
if (!work)
return 0;
}
- err = bpf_task_work_schedule_signal_impl(bpf_get_current_task_btf(), &work->tw, &hmap,
- process_work, NULL);
+ err = bpf_task_work_schedule_signal(bpf_get_current_task_btf(), &work->tw, &hmap,
+ process_work);
if (err)
__sync_fetch_and_add(&schedule_error, 1);
else
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index f7b330ddd007..076fbf03a126 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -15,7 +15,6 @@
extern unsigned long CONFIG_HZ __kconfig;
-int test_einval_bpf_tuple = 0;
int test_einval_reserved = 0;
int test_einval_reserved_new = 0;
int test_einval_netns_id = 0;
@@ -99,12 +98,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
__builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
- ct = lookup_fn(ctx, NULL, 0, &opts_def, sizeof(opts_def));
- if (ct)
- bpf_ct_release(ct);
- else
- test_einval_bpf_tuple = opts_def.error;
-
opts_def.reserved[0] = 1;
ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
sizeof(opts_def));
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
index a586f087ffeb..2c156cd166af 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
@@ -4,6 +4,7 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
struct nf_conn;
@@ -18,6 +19,10 @@ struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u3
struct bpf_ct_opts___local *, u32) __ksym;
struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym;
void bpf_ct_release(struct nf_conn *) __ksym;
void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym;
@@ -146,4 +151,56 @@ int change_status_after_alloc(struct __sk_buff *ctx)
return 0;
}
+SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to trusted arg1")
+int lookup_null_bpf_tuple(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_lookup(ctx, NULL, 0, &opts, sizeof(opts));
+ if (ct)
+ bpf_ct_release(ct);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to trusted arg3")
+int lookup_null_bpf_opts(struct __sk_buff *ctx)
+{
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), NULL, sizeof(struct bpf_ct_opts___local));
+ if (ct)
+ bpf_ct_release(ct);
+ return 0;
+}
+
+SEC("?xdp")
+__failure __msg("Possibly NULL pointer passed to trusted arg1")
+int xdp_lookup_null_bpf_tuple(struct xdp_md *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct nf_conn *ct;
+
+ ct = bpf_xdp_ct_lookup(ctx, NULL, 0, &opts, sizeof(opts));
+ if (ct)
+ bpf_ct_release(ct);
+ return 0;
+}
+
+SEC("?xdp")
+__failure __msg("Possibly NULL pointer passed to trusted arg3")
+int xdp_lookup_null_bpf_opts(struct xdp_md *ctx)
+{
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_xdp_ct_lookup(ctx, &tup, sizeof(tup.ipv4), NULL, sizeof(struct bpf_ct_opts___local));
+ if (ct)
+ bpf_ct_release(ct);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
index c88ccc53529a..0c3df19626cb 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
@@ -33,7 +33,7 @@ struct {
} hashmap1 SEC(".maps");
-static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2
+static __noinline __tag1 __tag2 int foo(int x __tag1 __tag2)
{
struct key_t key;
value_t val = {};
diff --git a/tools/testing/selftests/bpf/progs/test_ctx.c b/tools/testing/selftests/bpf/progs/test_ctx.c
new file mode 100644
index 000000000000..7d4995506717
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ctx.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Valve Corporation.
+ * Author: Changwoo Min <changwoo@igalia.com>
+ */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+extern void bpf_kfunc_trigger_ctx_check(void) __ksym;
+
+int count_hardirq;
+int count_softirq;
+int count_task;
+
+/* Triggered via bpf_prog_test_run from user-space */
+SEC("syscall")
+int trigger_all_contexts(void *ctx)
+{
+ if (bpf_in_task())
+ __sync_fetch_and_add(&count_task, 1);
+
+ /* Trigger the firing of a hardirq and softirq for test. */
+ bpf_kfunc_trigger_ctx_check();
+ return 0;
+}
+
+/* Observer for HardIRQ */
+SEC("fentry/bpf_testmod_test_hardirq_fn")
+int BPF_PROG(on_hardirq)
+{
+ if (bpf_in_hardirq())
+ __sync_fetch_and_add(&count_hardirq, 1);
+ return 0;
+}
+
+/* Observer for SoftIRQ */
+SEC("fentry/bpf_testmod_test_softirq_fn")
+int BPF_PROG(on_softirq)
+{
+ if (bpf_in_serving_softirq())
+ __sync_fetch_and_add(&count_softirq, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c
index 84e1f883f97b..561b2f861808 100644
--- a/tools/testing/selftests/bpf/progs/test_d_path.c
+++ b/tools/testing/selftests/bpf/progs/test_d_path.c
@@ -17,6 +17,7 @@ int rets_close[MAX_FILES] = {};
int called_stat = 0;
int called_close = 0;
+int path_match_fallocate = 0;
SEC("fentry/security_inode_getattr")
int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
@@ -62,4 +63,26 @@ int BPF_PROG(prog_close, struct file *file, void *id)
return 0;
}
+SEC("fentry/vfs_fallocate")
+int BPF_PROG(prog_fallocate, struct file *file, int mode, loff_t offset, loff_t len)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ int ret = 0;
+ char path_fallocate[MAX_PATH_LEN] = {};
+
+ if (pid != my_pid)
+ return 0;
+
+ ret = bpf_d_path(&file->f_path,
+ path_fallocate, MAX_PATH_LEN);
+ if (ret < 0)
+ return 0;
+
+ if (!path_fallocate[0])
+ return 0;
+
+ path_match_fallocate = 1;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index 061befb004c2..d249113ed657 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -48,10 +48,9 @@ SEC("?lsm.s/bpf")
__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
- unsigned long val = 0;
+ static struct bpf_dynptr val;
- return bpf_verify_pkcs7_signature((struct bpf_dynptr *)val,
- (struct bpf_dynptr *)val, NULL);
+ return bpf_verify_pkcs7_signature(&val, &val, NULL);
}
SEC("lsm.s/bpf")
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
index 0ad1bf1ede8d..967081bbcfe1 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
@@ -29,7 +29,7 @@ int kfunc_dynptr_nullable_test2(struct __sk_buff *skb)
}
SEC("tc")
-__failure __msg("expected pointer to stack or const struct bpf_dynptr")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
int kfunc_dynptr_nullable_test3(struct __sk_buff *skb)
{
struct bpf_dynptr data;
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
index 69aacc96db36..ef9edca184ea 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
@@ -44,4 +44,18 @@ int prog_skb_parser(struct __sk_buff *skb)
return SK_PASS;
}
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict_ingress(struct __sk_buff *skb)
+{
+ int one = 1;
+
+ return bpf_sk_redirect_map(skb, &sock_map_rx, one, BPF_F_INGRESS);
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_verdict_ingress_strp(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 7330c61b5730..7376df405a6b 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -23,7 +23,12 @@ static const int cfg_udp_src = 20000;
(((__u64)len & BPF_ADJ_ROOM_ENCAP_L2_MASK) \
<< BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
-#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
+struct vxlanhdr___local {
+ __be32 vx_flags;
+ __be32 vx_vni;
+};
+
+#define L2_PAD_SZ (sizeof(struct vxlanhdr___local) + ETH_HLEN)
#define UDP_PORT 5555
#define MPLS_OVER_UDP_PORT 6635
@@ -154,7 +159,7 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
l2_len = ETH_HLEN;
if (ext_proto & EXTPROTO_VXLAN) {
udp_dst = VXLAN_UDP_PORT;
- l2_len += sizeof(struct vxlanhdr);
+ l2_len += sizeof(struct vxlanhdr___local);
} else
udp_dst = ETH_OVER_UDP_PORT;
break;
@@ -195,12 +200,12 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
if (ext_proto & EXTPROTO_VXLAN) {
- struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+ struct vxlanhdr___local *vxlan_hdr = (struct vxlanhdr___local *)l2_hdr;
vxlan_hdr->vx_flags = VXLAN_FLAGS;
vxlan_hdr->vx_vni = VXLAN_VNI;
- l2_hdr += sizeof(struct vxlanhdr);
+ l2_hdr += sizeof(struct vxlanhdr___local);
}
if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
@@ -285,7 +290,7 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
l2_len = ETH_HLEN;
if (ext_proto & EXTPROTO_VXLAN) {
udp_dst = VXLAN_UDP_PORT;
- l2_len += sizeof(struct vxlanhdr);
+ l2_len += sizeof(struct vxlanhdr___local);
} else
udp_dst = ETH_OVER_UDP_PORT;
break;
@@ -325,12 +330,12 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
if (ext_proto & EXTPROTO_VXLAN) {
- struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+ struct vxlanhdr___local *vxlan_hdr = (struct vxlanhdr___local *)l2_hdr;
vxlan_hdr->vx_flags = VXLAN_FLAGS;
vxlan_hdr->vx_vni = VXLAN_VNI;
- l2_hdr += sizeof(struct vxlanhdr);
+ l2_hdr += sizeof(struct vxlanhdr___local);
}
if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
@@ -639,7 +644,7 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
olen += ETH_HLEN;
break;
case VXLAN_UDP_PORT:
- olen += ETH_HLEN + sizeof(struct vxlanhdr);
+ olen += ETH_HLEN + sizeof(struct vxlanhdr___local);
break;
}
break;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index 0a0f371a2dec..fa73b17cb999 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -1,12 +1,12 @@
-#include <stdbool.h>
-#include <linux/bpf.h>
-#include <linux/errno.h>
-#include <linux/if_ether.h>
-#include <linux/pkt_cls.h>
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
+#include <errno.h>
+
#include "bpf_kfuncs.h"
+#include "bpf_tracing_net.h"
#define META_SIZE 32
@@ -42,7 +42,7 @@ static bool check_metadata(const char *file, int line, __u8 *meta_have)
if (!__builtin_memcmp(meta_have, meta_want, META_SIZE))
return true;
- bpf_stream_printk(BPF_STREAM_STDERR,
+ bpf_stream_printk(BPF_STDERR,
"FAIL:%s:%d: metadata mismatch\n"
" have:\n %pI6\n %pI6\n"
" want:\n %pI6\n %pI6\n",
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index 4c677c001258..d6d5fefcd9b1 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -1,13 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
-#include <linux/bpf.h>
-#include <time.h>
+
+#include <vmlinux.h>
#include <stdbool.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#define CLOCK_MONOTONIC 1
+#define CLOCK_BOOTTIME 7
+
char _license[] SEC("license") = "GPL";
+
struct hmap_elem {
int counter;
struct bpf_timer timer;
@@ -59,10 +63,14 @@ __u64 bss_data;
__u64 abs_data;
__u64 err;
__u64 ok;
+__u64 test_hits;
+__u64 update_hits;
+__u64 cancel_hits;
__u64 callback_check = 52;
__u64 callback2_check = 52;
__u64 pinned_callback_check;
__s32 pinned_cpu;
+bool async_cancel = 0;
#define ARRAY 1
#define HTAB 2
@@ -164,6 +172,29 @@ int BPF_PROG2(test1, int, a)
return 0;
}
+static int timer_error(void *map, int *key, struct bpf_timer *timer)
+{
+ err = 42;
+ return 0;
+}
+
+SEC("syscall")
+int test_async_cancel_succeed(void *ctx)
+{
+ struct bpf_timer *arr_timer;
+ int array_key = ARRAY;
+
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(arr_timer, timer_error);
+ bpf_timer_start(arr_timer, 100000 /* 100us */, 0);
+ bpf_timer_cancel_async(arr_timer);
+ ok = 7;
+ return 0;
+}
+
/* callback for prealloc and non-prealloca hashtab timers */
static int timer_cb2(void *map, int *key, struct hmap_elem *val)
{
@@ -399,27 +430,88 @@ static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer
return 0;
}
-SEC("syscall")
-int race(void *ctx)
+/* Callback that updates its own map element */
+static int update_self_callback(void *map, int *key, struct bpf_timer *timer)
+{
+ struct elem init = {};
+
+ bpf_map_update_elem(map, key, &init, BPF_ANY);
+ __sync_fetch_and_add(&update_hits, 1);
+ return 0;
+}
+
+/* Callback that cancels itself using async cancel */
+static int cancel_self_callback(void *map, int *key, struct bpf_timer *timer)
+{
+ bpf_timer_cancel_async(timer);
+ __sync_fetch_and_add(&cancel_hits, 1);
+ return 0;
+}
+
+enum test_mode {
+ TEST_RACE_SYNC,
+ TEST_RACE_ASYNC,
+ TEST_UPDATE,
+ TEST_CANCEL,
+};
+
+static __always_inline int test_common(enum test_mode mode)
{
struct bpf_timer *timer;
- int err, race_key = 0;
struct elem init;
+ int ret, key = 0;
__builtin_memset(&init, 0, sizeof(struct elem));
- bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY);
- timer = bpf_map_lookup_elem(&race_array, &race_key);
+ bpf_map_update_elem(&race_array, &key, &init, BPF_ANY);
+ timer = bpf_map_lookup_elem(&race_array, &key);
if (!timer)
- return 1;
+ return 0;
- err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC);
- if (err && err != -EBUSY)
- return 1;
+ ret = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC);
+ if (ret && ret != -EBUSY)
+ return 0;
+
+ if (mode == TEST_RACE_SYNC || mode == TEST_RACE_ASYNC)
+ bpf_timer_set_callback(timer, race_timer_callback);
+ else if (mode == TEST_UPDATE)
+ bpf_timer_set_callback(timer, update_self_callback);
+ else
+ bpf_timer_set_callback(timer, cancel_self_callback);
- bpf_timer_set_callback(timer, race_timer_callback);
bpf_timer_start(timer, 0, 0);
- bpf_timer_cancel(timer);
+
+ if (mode == TEST_RACE_ASYNC)
+ bpf_timer_cancel_async(timer);
+ else if (mode == TEST_RACE_SYNC)
+ bpf_timer_cancel(timer);
return 0;
}
+
+SEC("syscall")
+int race(void *ctx)
+{
+ return test_common(async_cancel ? TEST_RACE_ASYNC : TEST_RACE_SYNC);
+}
+
+SEC("perf_event")
+int nmi_race(void *ctx)
+{
+ __sync_fetch_and_add(&test_hits, 1);
+ return test_common(TEST_RACE_ASYNC);
+}
+
+SEC("perf_event")
+int nmi_update(void *ctx)
+{
+ __sync_fetch_and_add(&test_hits, 1);
+ return test_common(TEST_UPDATE);
+}
+
+SEC("perf_event")
+int nmi_cancel(void *ctx)
+{
+ __sync_fetch_and_add(&test_hits, 1);
+ return test_common(TEST_CANCEL);
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_start_deadlock.c b/tools/testing/selftests/bpf/progs/timer_start_deadlock.c
new file mode 100644
index 000000000000..019518ee18cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_start_deadlock.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define CLOCK_MONOTONIC 1
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer_map SEC(".maps");
+
+volatile int in_timer_start;
+volatile int tp_called;
+
+static int timer_cb(void *map, int *key, struct elem *value)
+{
+ return 0;
+}
+
+SEC("tp_btf/hrtimer_cancel")
+int BPF_PROG(tp_hrtimer_cancel, struct hrtimer *hrtimer)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ if (!in_timer_start)
+ return 0;
+
+ tp_called = 1;
+ timer = bpf_map_lookup_elem(&timer_map, &key);
+
+ /*
+ * Call bpf_timer_start() from the tracepoint within hrtimer logic
+ * on the same timer to make sure it doesn't deadlock.
+ */
+ bpf_timer_start(timer, 1000000000, 0);
+ return 0;
+}
+
+SEC("syscall")
+int start_timer(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer_map, &key);
+ /* claude may complain here that there is no NULL check. Ignoring it. */
+ bpf_timer_init(timer, &timer_map, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(timer, timer_cb);
+
+ /*
+ * call hrtimer_start() twice, so that 2nd call does
+ * remove_hrtimer() and trace_hrtimer_cancel() tracepoint.
+ */
+ in_timer_start = 1;
+ bpf_timer_start(timer, 1000000000, 0);
+ bpf_timer_start(timer, 1000000000, 0);
+ in_timer_start = 0;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_start_delete_race.c b/tools/testing/selftests/bpf/progs/timer_start_delete_race.c
new file mode 100644
index 000000000000..2ff6357f33f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_start_delete_race.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_helpers.h>
+
+#define ITER_CNT 2000
+
+struct map_value {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} timer_map SEC(".maps");
+
+long cb_cnt;
+
+/*
+ * Timer callback that accesses the map value.
+ * If the race bug exists and this runs on freed memory,
+ * KASAN should detect it.
+ */
+static int timer_cb(void *map, int *key, struct map_value *value)
+{
+ __sync_fetch_and_add(&cb_cnt, 1);
+ return 0;
+}
+
+SEC("syscall")
+int start_timer(void *ctx)
+{
+ struct map_value *value;
+ int i;
+
+ for (i = 0; i < ITER_CNT; i++) {
+ int key = 0;
+
+ value = bpf_map_lookup_elem(&timer_map, &key);
+ /* hey claude, no if (!value) here. The verifier got smarter */
+
+ bpf_timer_init(&value->timer, &timer_map, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(&value->timer, timer_cb);
+ bpf_timer_start(&value->timer, 100000000, 0);
+ }
+ return 0;
+}
+
+SEC("syscall")
+int delete_elem(void *ctx)
+{
+ int i;
+
+ for (i = 0; i < ITER_CNT; i++) {
+ int key = 0;
+
+ bpf_map_delete_elem(&timer_map, &key);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 2898b3749d07..4ea0422d1042 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -25,6 +25,34 @@ static __always_inline void inc_counter(void)
__sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1);
}
+volatile const int stacktrace;
+
+typedef __u64 stack_trace_t[128];
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stack_heap SEC(".maps");
+
+static __always_inline void do_stacktrace(void *ctx)
+{
+ if (!stacktrace)
+ return;
+
+ __u64 *ptr = bpf_map_lookup_elem(&stack_heap, &(__u32){0});
+
+ if (ptr)
+ bpf_get_stack(ctx, ptr, sizeof(stack_trace_t), 0);
+}
+
+static __always_inline void handle(void *ctx)
+{
+ inc_counter();
+ do_stacktrace(ctx);
+}
+
SEC("?uprobe")
int bench_trigger_uprobe(void *ctx)
{
@@ -81,21 +109,21 @@ int trigger_driver_kfunc(void *ctx)
SEC("?kprobe/bpf_get_numa_node_id")
int bench_trigger_kprobe(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
SEC("?kretprobe/bpf_get_numa_node_id")
int bench_trigger_kretprobe(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
SEC("?kprobe.multi/bpf_get_numa_node_id")
int bench_trigger_kprobe_multi(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
@@ -108,7 +136,7 @@ int bench_kprobe_multi_empty(void *ctx)
SEC("?kretprobe.multi/bpf_get_numa_node_id")
int bench_trigger_kretprobe_multi(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
@@ -121,34 +149,34 @@ int bench_kretprobe_multi_empty(void *ctx)
SEC("?fentry/bpf_get_numa_node_id")
int bench_trigger_fentry(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
SEC("?fexit/bpf_get_numa_node_id")
int bench_trigger_fexit(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
SEC("?fmod_ret/bpf_modify_return_test_tp")
int bench_trigger_fmodret(void *ctx)
{
- inc_counter();
+ handle(ctx);
return -22;
}
SEC("?tp/bpf_test_run/bpf_trigger_tp")
int bench_trigger_tp(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
SEC("?raw_tp/bpf_trigger_tp")
int bench_trigger_rawtp(void *ctx)
{
- inc_counter();
+ handle(ctx);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session.c
index 30bff90b68dc..6e46bb00ff58 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_multi_session.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <stdbool.h>
-#include "bpf_kfuncs.h"
#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -51,7 +50,7 @@ static int uprobe_multi_check(void *ctx, bool is_return)
SEC("uprobe.session//proc/self/exe:uprobe_multi_func_*")
int uprobe(struct pt_regs *ctx)
{
- return uprobe_multi_check(ctx, bpf_session_is_return());
+ return uprobe_multi_check(ctx, bpf_session_is_return(ctx));
}
static __always_inline bool verify_sleepable_user_copy(void)
@@ -67,5 +66,5 @@ int uprobe_sleepable(struct pt_regs *ctx)
{
if (verify_sleepable_user_copy())
uprobe_multi_sleep_result++;
- return uprobe_multi_check(ctx, bpf_session_is_return());
+ return uprobe_multi_check(ctx, bpf_session_is_return(ctx));
}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c
index 5befdf944dc6..b5db196614a9 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <stdbool.h>
-#include "bpf_kfuncs.h"
char _license[] SEC("license") = "GPL";
@@ -13,16 +12,16 @@ __u64 test_uprobe_1_result = 0;
__u64 test_uprobe_2_result = 0;
__u64 test_uprobe_3_result = 0;
-static int check_cookie(__u64 val, __u64 *result)
+static int check_cookie(struct pt_regs *ctx, __u64 val, __u64 *result)
{
__u64 *cookie;
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 1;
- cookie = bpf_session_cookie();
+ cookie = bpf_session_cookie(ctx);
- if (bpf_session_is_return())
+ if (bpf_session_is_return(ctx))
*result = *cookie == val ? val : 0;
else
*cookie = val;
@@ -32,17 +31,17 @@ static int check_cookie(__u64 val, __u64 *result)
SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1")
int uprobe_1(struct pt_regs *ctx)
{
- return check_cookie(1, &test_uprobe_1_result);
+ return check_cookie(ctx, 1, &test_uprobe_1_result);
}
SEC("uprobe.session//proc/self/exe:uprobe_multi_func_2")
int uprobe_2(struct pt_regs *ctx)
{
- return check_cookie(2, &test_uprobe_2_result);
+ return check_cookie(ctx, 2, &test_uprobe_2_result);
}
SEC("uprobe.session//proc/self/exe:uprobe_multi_func_3")
int uprobe_3(struct pt_regs *ctx)
{
- return check_cookie(3, &test_uprobe_3_result);
+ return check_cookie(ctx, 3, &test_uprobe_3_result);
}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c
index 8fbcd69fae22..3ce309248a04 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <stdbool.h>
-#include "bpf_kfuncs.h"
#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -16,11 +15,11 @@ int idx_return = 0;
__u64 test_uprobe_cookie_entry[6];
__u64 test_uprobe_cookie_return[3];
-static int check_cookie(void)
+static int check_cookie(struct pt_regs *ctx)
{
- __u64 *cookie = bpf_session_cookie();
+ __u64 *cookie = bpf_session_cookie(ctx);
- if (bpf_session_is_return()) {
+ if (bpf_session_is_return(ctx)) {
if (idx_return >= ARRAY_SIZE(test_uprobe_cookie_return))
return 1;
test_uprobe_cookie_return[idx_return++] = *cookie;
@@ -40,5 +39,5 @@ int uprobe_recursive(struct pt_regs *ctx)
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 1;
- return check_cookie();
+ return check_cookie(ctx);
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
index 7f4827eede3c..c4b8daac4388 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -10,6 +10,8 @@
#include "bpf_experimental.h"
#include "bpf_arena_common.h"
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+
struct {
__uint(type, BPF_MAP_TYPE_ARENA);
__uint(map_flags, BPF_F_MMAPABLE);
@@ -21,6 +23,37 @@ struct {
#endif
} arena SEC(".maps");
+SEC("socket")
+__success __retval(0)
+int basic_alloc1_nosleep(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile int __arena *page1, *page2, *no_page;
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ *page1 = 1;
+ page2 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page2)
+ return 2;
+ *page2 = 2;
+ no_page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 3;
+ if (*page1 != 1)
+ return 4;
+ if (*page2 != 2)
+ return 5;
+ bpf_arena_free_pages(&arena, (void __arena *)page2, 1);
+ if (*page1 != 1)
+ return 6;
+ if (*page2 != 0 && *page2 != 2) /* use-after-free should return 0 or the stored value */
+ return 7;
+#endif
+ return 0;
+}
+
SEC("syscall")
__success __retval(0)
int basic_alloc1(void *ctx)
@@ -60,6 +93,44 @@ int basic_alloc1(void *ctx)
return 0;
}
+SEC("socket")
+__success __retval(0)
+int basic_alloc2_nosleep(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page1, *page2, *page3, *page4;
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ page2 = page1 + __PAGE_SIZE;
+ page3 = page1 + __PAGE_SIZE * 2;
+ page4 = page1 - __PAGE_SIZE;
+ *page1 = 1;
+ *page2 = 2;
+ *page3 = 3;
+ *page4 = 4;
+ if (*page1 != 1)
+ return 1;
+ if (*page2 != 2)
+ return 2;
+ if (*page3 != 0)
+ return 3;
+ if (*page4 != 0)
+ return 4;
+ bpf_arena_free_pages(&arena, (void __arena *)page1, 2);
+ if (*page1 != 0 && *page1 != 1)
+ return 5;
+ if (*page2 != 0 && *page2 != 2)
+ return 6;
+ if (*page3 != 0)
+ return 7;
+ if (*page4 != 0)
+ return 8;
+#endif
+ return 0;
+}
+
SEC("syscall")
__success __retval(0)
int basic_alloc2(void *ctx)
@@ -102,6 +173,19 @@ struct bpf_arena___l {
struct bpf_map map;
} __attribute__((preserve_access_index));
+SEC("socket")
+__success __retval(0) __log_level(2)
+int basic_alloc3_nosleep(void *ctx)
+{
+ struct bpf_arena___l *ar = (struct bpf_arena___l *)&arena;
+ volatile char __arena *pages;
+
+ pages = bpf_arena_alloc_pages(&ar->map, NULL, ar->map.max_entries, NUMA_NO_NODE, 0);
+ if (!pages)
+ return 1;
+ return 0;
+}
+
SEC("syscall")
__success __retval(0) __log_level(2)
int basic_alloc3(void *ctx)
@@ -115,6 +199,38 @@ int basic_alloc3(void *ctx)
return 0;
}
+SEC("socket")
+__success __retval(0)
+int basic_reserve1_nosleep(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page)
+ return 1;
+
+ page += __PAGE_SIZE;
+
+ /* Reserve the second page */
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 2;
+
+ /* Try to explicitly allocate the reserved page. */
+ page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
+ if (page)
+ return 3;
+
+ /* Try to implicitly allocate the page (since there's only 2 of them). */
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (page)
+ return 4;
+#endif
+ return 0;
+}
+
SEC("syscall")
__success __retval(0)
int basic_reserve1(void *ctx)
@@ -147,6 +263,26 @@ int basic_reserve1(void *ctx)
return 0;
}
+SEC("socket")
+__success __retval(0)
+int basic_reserve2_nosleep(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = arena_base(&arena);
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 1;
+
+ page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
+ if ((u64)page)
+ return 2;
+#endif
+ return 0;
+}
+
SEC("syscall")
__success __retval(0)
int basic_reserve2(void *ctx)
@@ -168,6 +304,27 @@ int basic_reserve2(void *ctx)
}
/* Reserve the same page twice, should return -EBUSY. */
+SEC("socket")
+__success __retval(0)
+int reserve_twice_nosleep(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = arena_base(&arena);
+
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 1;
+
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret != -EBUSY)
+ return 2;
+#endif
+ return 0;
+}
+
SEC("syscall")
__success __retval(0)
int reserve_twice(void *ctx)
@@ -190,6 +347,36 @@ int reserve_twice(void *ctx)
}
/* Try to reserve past the end of the arena. */
+SEC("socket")
+__success __retval(0)
+int reserve_invalid_region_nosleep(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ /* Try a NULL pointer. */
+ ret = bpf_arena_reserve_pages(&arena, NULL, 3);
+ if (ret != -EINVAL)
+ return 1;
+
+ page = arena_base(&arena);
+
+ ret = bpf_arena_reserve_pages(&arena, page, 3);
+ if (ret != -EINVAL)
+ return 2;
+
+ ret = bpf_arena_reserve_pages(&arena, page, 4096);
+ if (ret != -EINVAL)
+ return 3;
+
+ ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1);
+ if (ret != -EINVAL)
+ return 4;
+#endif
+ return 0;
+}
+
SEC("syscall")
__success __retval(0)
int reserve_invalid_region(void *ctx)
@@ -254,4 +441,40 @@ int iter_maps3(struct bpf_iter__bpf_map *ctx)
return 0;
}
+private(ARENA_TESTS) struct bpf_spin_lock arena_bpf_test_lock;
+
+/* Use the arena kfunc API while under a BPF lock. */
+SEC("syscall")
+__success __retval(0)
+int arena_kfuncs_under_bpf_lock(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ bpf_spin_lock(&arena_bpf_test_lock);
+
+ /* Get a separate region of the arena. */
+ page = arena_base(&arena);
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret) {
+ bpf_spin_unlock(&arena_bpf_test_lock);
+ return 1;
+ }
+
+ bpf_arena_free_pages(&arena, page, 1);
+
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page) {
+ bpf_spin_unlock(&arena_bpf_test_lock);
+ return 2;
+ }
+
+ bpf_arena_free_pages(&arena, page, 1);
+
+ bpf_spin_unlock(&arena_bpf_test_lock);
+#endif
+
+ return 0;
+}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_globals1.c b/tools/testing/selftests/bpf/progs/verifier_arena_globals1.c
new file mode 100644
index 000000000000..83182ddbfb95
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_globals1.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+#include "bpf_misc.h"
+
+#define ARENA_PAGES (1UL<< (32 - __builtin_ffs(__PAGE_SIZE) + 1))
+#define GLOBAL_PAGES (16)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, ARENA_PAGES);
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, (1ull << 32) | (~0u - __PAGE_SIZE * ARENA_PAGES + 1));
+#else
+ __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * ARENA_PAGES + 1));
+#endif
+} arena SEC(".maps");
+
+/*
+ * Global data, to be placed at the end of the arena.
+ */
+volatile char __arena global_data[GLOBAL_PAGES][PAGE_SIZE];
+
+SEC("syscall")
+__success __retval(0)
+int check_reserve1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ const u8 magic = 0x5a;
+ __u8 __arena *guard, *globals;
+ volatile char __arena *ptr;
+ int i;
+ int ret;
+
+ guard = (void __arena *)arena_base(&arena);
+ globals = (void __arena *)(arena_base(&arena) + (ARENA_PAGES - GLOBAL_PAGES) * PAGE_SIZE);
+
+ /* Reserve the region we've offset the globals by. */
+ ret = bpf_arena_reserve_pages(&arena, guard, ARENA_PAGES - GLOBAL_PAGES);
+ if (ret)
+ return 1;
+
+ /* Make sure the globals are in the expected offset. */
+ ret = bpf_arena_reserve_pages(&arena, globals, 1);
+ if (!ret)
+ return 2;
+
+ /* Verify globals are properly mapped in by libbpf. */
+ for (i = 0; i < GLOBAL_PAGES; i++) {
+ ptr = &global_data[i][PAGE_SIZE / 2];
+
+ *ptr = magic;
+ if (*ptr != magic)
+ return i + 3;
+ }
+#endif
+ return 0;
+}
+
+/*
+ * Relocation check by reading directly into the global data w/o using symbols.
+ */
+SEC("syscall")
+__success __retval(0)
+int check_relocation(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ const u8 magic = 0xfa;
+ u8 __arena *ptr;
+
+ global_data[GLOBAL_PAGES - 1][PAGE_SIZE / 2] = magic;
+ ptr = (u8 __arena *)((u64)(ARENA_PAGES * PAGE_SIZE - PAGE_SIZE / 2));
+ if (*ptr != magic)
+ return 1;
+
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_globals2.c b/tools/testing/selftests/bpf/progs/verifier_arena_globals2.c
new file mode 100644
index 000000000000..e6bd7b61f9f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_globals2.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+#define ARENA_PAGES (32)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, ARENA_PAGES);
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, (1ull << 32) | (~0u - __PAGE_SIZE * ARENA_PAGES + 1));
+#else
+ __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * ARENA_PAGES + 1));
+#endif
+} arena SEC(".maps");
+
+/*
+ * Fill the entire arena with global data.
+ * The offset into the arena should be 0.
+ */
+char __arena global_data[ARENA_PAGES][PAGE_SIZE];
+
+SEC("syscall")
+__success __retval(0)
+int check_reserve2(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ void __arena *guard;
+ int ret;
+
+ guard = (void __arena *)arena_base(&arena);
+
+ /* Make sure the data at offset 0 case is properly handled. */
+ ret = bpf_arena_reserve_pages(&arena, guard, 1);
+ if (!ret)
+ return 1;
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
index f19e15400b3e..5f7e7afee169 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -23,18 +23,31 @@ int big_alloc1(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile char __arena *page1, *page2, *no_page, *page3;
- void __arena *base;
+ u64 base;
- page1 = base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ base = (u64)arena_base(&arena);
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
if (!page1)
return 1;
+
+ if ((u64)page1 != base)
+ return 15;
+
*page1 = 1;
- page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE * 2,
+ page2 = bpf_arena_alloc_pages(&arena, (void __arena *)(ARENA_SIZE - 2 * PAGE_SIZE),
1, NUMA_NO_NODE, 0);
if (!page2)
return 2;
*page2 = 2;
- no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE,
+
+ /* Test for the guard region at the end of the arena. */
+ no_page = bpf_arena_alloc_pages(&arena, (void __arena *)ARENA_SIZE - PAGE_SIZE,
+ 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 16;
+
+ no_page = bpf_arena_alloc_pages(&arena, (void __arena *)ARENA_SIZE,
1, NUMA_NO_NODE, 0);
if (no_page)
return 3;
@@ -270,5 +283,34 @@ int big_alloc2(void *ctx)
return 9;
return 0;
}
+
+SEC("socket")
+__success __retval(0)
+int big_alloc3(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *pages;
+ u64 i;
+
+ /*
+ * Allocate 2051 pages in one go to check how kmalloc_nolock() handles large requests.
+ * Since kmalloc_nolock() can allocate up to 1024 struct page * at a time, this call should
+ * result in three batches: two batches of 1024 pages each, followed by a final batch of 3
+ * pages.
+ */
+ pages = bpf_arena_alloc_pages(&arena, NULL, 2051, NUMA_NO_NODE, 0);
+ if (!pages)
+ return 0;
+
+ bpf_for(i, 0, 2051)
+ pages[i * PAGE_SIZE] = 123;
+ bpf_for(i, 0, 2051)
+ if (pages[i * PAGE_SIZE] != 123)
+ return i;
+
+ bpf_arena_free_pages(&arena, pages, 2051);
+#endif
+ return 0;
+}
#endif
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
index 7efa9521105e..39aff82549c9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
+++ b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
@@ -96,7 +96,7 @@ int wq_non_sleepable_prog(void *ctx)
if (bpf_wq_init(&val->w, &wq_map, 0) != 0)
return 0;
- if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0)
+ if (bpf_wq_set_callback(&val->w, wq_cb, 0) != 0)
return 0;
return 0;
}
@@ -114,7 +114,7 @@ int wq_sleepable_prog(void *ctx)
if (bpf_wq_init(&val->w, &wq_map, 0) != 0)
return 0;
- if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0)
+ if (bpf_wq_set_callback(&val->w, wq_cb, 0) != 0)
return 0;
return 0;
}
@@ -156,7 +156,7 @@ int task_work_non_sleepable_prog(void *ctx)
if (!task)
return 0;
- bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL);
+ bpf_task_work_schedule_resume(task, &val->tw, &task_work_map, task_work_cb);
return 0;
}
@@ -176,6 +176,6 @@ int task_work_sleepable_prog(void *ctx)
if (!task)
return 0;
- bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL);
+ bpf_task_work_schedule_resume(task, &val->tw, &task_work_map, task_work_cb);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 411a18437d7e..560531404bce 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -1477,7 +1477,7 @@ __naked void sub64_full_overflow(void)
SEC("socket")
__description("64-bit subtraction, partial overflow, result in unbounded reg")
__success __log_level(2)
-__msg("3: (1f) r3 -= r2 {{.*}} R3=scalar()")
+__msg("3: (1f) r3 -= r2 {{.*}} R3=scalar(id=1-1)")
__retval(0)
__naked void sub64_partial_overflow(void)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c
index e61755656e8d..4b779deee767 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bswap.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c
@@ -48,6 +48,49 @@ __naked void bswap_64(void)
: __clobber_all);
}
+#define BSWAP_RANGE_TEST(name, op, in_value, out_value) \
+ SEC("socket") \
+ __success __log_level(2) \
+ __msg("r0 &= {{.*}}; R0=scalar({{.*}},var_off=(0x0; " #in_value "))") \
+ __msg("r0 = " op " r0 {{.*}}; R0=scalar({{.*}},var_off=(0x0; " #out_value "))") \
+ __naked void name(void) \
+ { \
+ asm volatile ( \
+ "call %[bpf_get_prandom_u32];" \
+ "r0 &= " #in_value ";" \
+ "r0 = " op " r0;" \
+ "r2 = " #out_value " ll;" \
+ "if r0 > r2 goto trap_%=;" \
+ "r0 = 0;" \
+ "exit;" \
+ "trap_%=:" \
+ "r1 = 42;" \
+ "r0 = *(u64 *)(r1 + 0);" \
+ "exit;" \
+ : \
+ : __imm(bpf_get_prandom_u32) \
+ : __clobber_all); \
+ }
+
+BSWAP_RANGE_TEST(bswap16_range, "bswap16", 0x3f00, 0x3f)
+BSWAP_RANGE_TEST(bswap32_range, "bswap32", 0x3f00, 0x3f0000)
+BSWAP_RANGE_TEST(bswap64_range, "bswap64", 0x3f00, 0x3f000000000000)
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+BSWAP_RANGE_TEST(be16_range, "be16", 0x3f00, 0x3f)
+BSWAP_RANGE_TEST(be32_range, "be32", 0x3f00, 0x3f0000)
+BSWAP_RANGE_TEST(be64_range, "be64", 0x3f00, 0x3f000000000000)
+BSWAP_RANGE_TEST(le16_range, "le16", 0x3f00, 0x3f00)
+BSWAP_RANGE_TEST(le32_range, "le32", 0x3f00, 0x3f00)
+BSWAP_RANGE_TEST(le64_range, "le64", 0x3f00, 0x3f00)
+#else
+BSWAP_RANGE_TEST(be16_range, "be16", 0x3f00, 0x3f00)
+BSWAP_RANGE_TEST(be32_range, "be32", 0x3f00, 0x3f00)
+BSWAP_RANGE_TEST(be64_range, "be64", 0x3f00, 0x3f00)
+BSWAP_RANGE_TEST(le16_range, "le16", 0x3f00, 0x3f)
+BSWAP_RANGE_TEST(le32_range, "le32", 0x3f00, 0x3f0000)
+BSWAP_RANGE_TEST(le64_range, "le64", 0x3f00, 0x3f000000000000)
+#endif
+
#else
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_default_trusted_ptr.c b/tools/testing/selftests/bpf/progs/verifier_default_trusted_ptr.c
new file mode 100644
index 000000000000..fa3b656ad4fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_default_trusted_ptr.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2026 Google LLC.
+ */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+SEC("syscall")
+__success __retval(0)
+int test_default_trusted_ptr(void *ctx)
+{
+ struct prog_test_member *trusted_ptr;
+
+ trusted_ptr = bpf_kfunc_get_default_trusted_ptr_test();
+ /*
+ * Test BPF kfunc bpf_get_default_trusted_ptr_test() returns a
+ * PTR_TO_BTF_ID | PTR_TRUSTED, therefore it should be accepted when
+ * passed to a BPF kfunc only accepting KF_TRUSTED_ARGS.
+ */
+ bpf_kfunc_put_default_trusted_ptr_test(trusted_ptr);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c b/tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c
new file mode 100644
index 000000000000..4672af0b3268
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c
@@ -0,0 +1,1149 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <limits.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* This file contains unit tests for signed/unsigned division and modulo
+ * operations (with divisor as a constant), focusing on verifying whether
+ * BPF verifier's range tracking module soundly and precisely computes
+ * the results.
+ */
+
+SEC("socket")
+__description("UDIV32, positive divisor")
+__success __retval(0) __log_level(2)
+__msg("w1 /= 3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=3,var_off=(0x0; 0x3))")
+__naked void udiv32_pos_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w1 &= 8; \
+ w1 |= 1; \
+ w1 /= 3; \
+ if w1 > 3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UDIV32, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("w1 /= w2 {{.*}}; R1=0 R2=0")
+__naked void udiv32_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w1 &= 8; \
+ w1 |= 1; \
+ w2 = 0; \
+ w1 /= w2; \
+ if w1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UDIV64, positive divisor")
+__success __retval(0) __log_level(2)
+__msg("r1 /= 3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=3,var_off=(0x0; 0x3))")
+__naked void udiv64_pos_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r1 &= 8; \
+ r1 |= 1; \
+ r1 /= 3; \
+ if r1 > 3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UDIV64, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("r1 /= r2 {{.*}}; R1=0 R2=0")
+__naked void udiv64_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r1 &= 8; \
+ r1 |= 1; \
+ r2 = 0; \
+ r1 /= r2; \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, positive divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= 3 {{.*}}; R1=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=3,var_off=(0x2; 0x1))")
+__naked void sdiv32_pos_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< 8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s/= 3; \
+ if w1 s< 2 goto l1_%=; \
+ if w1 s> 3 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, positive divisor, negative dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= 3 {{.*}}; R1=scalar(smin=umin=umin32=0xfffffffd,smax=umax=umax32=0xfffffffe,smin32=-3,smax32=-2,var_off=(0xfffffffc; 0x3))")
+__naked void sdiv32_pos_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s> -8 goto l0_%=; \
+ if w1 s< -10 goto l0_%=; \
+ w1 s/= 3; \
+ if w1 s< -3 goto l1_%=; \
+ if w1 s> -2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, positive divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= 3 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-2,smax32=3,var_off=(0x0; 0xffffffff))")
+__naked void sdiv32_pos_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s/= 3; \
+ if w1 s< -2 goto l1_%=; \
+ if w1 s> 3 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, negative divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= -3 {{.*}}; R1=scalar(smin=umin=umin32=0xfffffffd,smax=umax=umax32=0xfffffffe,smin32=-3,smax32=-2,var_off=(0xfffffffc; 0x3))")
+__naked void sdiv32_neg_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< 8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s/= -3; \
+ if w1 s< -3 goto l1_%=; \
+ if w1 s> -2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, negative divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= -3 {{.*}}; R1=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=3,var_off=(0x2; 0x1))")
+__naked void sdiv32_neg_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s> -8 goto l0_%=; \
+ if w1 s< -10 goto l0_%=; \
+ w1 s/= -3; \
+ if w1 s< 2 goto l1_%=; \
+ if w1 s> 3 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, negative divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= -3 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-3,smax32=2,var_off=(0x0; 0xffffffff))")
+__naked void sdiv32_neg_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s/= -3; \
+ if w1 s< -3 goto l1_%=; \
+ if w1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= w2 {{.*}}; R1=0 R2=0")
+__naked void sdiv32_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w1 &= 8; \
+ w1 |= 1; \
+ w2 = 0; \
+ w1 s/= w2; \
+ if w1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, overflow (S32_MIN/-1)")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= -1 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__naked void sdiv32_overflow_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w2 = %[int_min]; \
+ w2 += 10; \
+ if w1 s> w2 goto l0_%=; \
+ w1 s/= -1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, overflow (S32_MIN/-1), constant dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s/= -1 {{.*}}; R1=0x80000000")
+__naked void sdiv32_overflow_2(void)
+{
+ asm volatile (" \
+ w1 = %[int_min]; \
+ w1 s/= -1; \
+ if w1 != %[int_min] goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, positive divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= 3 {{.*}}; R1=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=3,var_off=(0x2; 0x1))")
+__naked void sdiv64_pos_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< 8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s/= 3; \
+ if r1 s< 2 goto l1_%=; \
+ if r1 s> 3 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, positive divisor, negative dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= 3 {{.*}}; R1=scalar(smin=smin32=-3,smax=smax32=-2,umin=0xfffffffffffffffd,umax=0xfffffffffffffffe,umin32=0xfffffffd,umax32=0xfffffffe,var_off=(0xfffffffffffffffc; 0x3))")
+__naked void sdiv64_pos_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s> -8 goto l0_%=; \
+ if r1 s< -10 goto l0_%=; \
+ r1 s/= 3; \
+ if r1 s< -3 goto l1_%=; \
+ if r1 s> -2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, positive divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= 3 {{.*}}; R1=scalar(smin=smin32=-2,smax=smax32=3)")
+__naked void sdiv64_pos_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s/= 3; \
+ if r1 s< -2 goto l1_%=; \
+ if r1 s> 3 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, negative divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= -3 {{.*}}; R1=scalar(smin=smin32=-3,smax=smax32=-2,umin=0xfffffffffffffffd,umax=0xfffffffffffffffe,umin32=0xfffffffd,umax32=0xfffffffe,var_off=(0xfffffffffffffffc; 0x3))")
+__naked void sdiv64_neg_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< 8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s/= -3; \
+ if r1 s< -3 goto l1_%=; \
+ if r1 s> -2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, negative divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= -3 {{.*}}; R1=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=3,var_off=(0x2; 0x1))")
+__naked void sdiv64_neg_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s> -8 goto l0_%=; \
+ if r1 s< -10 goto l0_%=; \
+ r1 s/= -3; \
+ if r1 s< 2 goto l1_%=; \
+ if r1 s> 3 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, negative divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= -3 {{.*}}; R1=scalar(smin=smin32=-3,smax=smax32=2)")
+__naked void sdiv64_neg_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s/= -3; \
+ if r1 s< -3 goto l1_%=; \
+ if r1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= r2 {{.*}}; R1=0 R2=0")
+__naked void sdiv64_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r1 &= 8; \
+ r1 |= 1; \
+ r2 = 0; \
+ r1 s/= r2; \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, overflow (S64_MIN/-1)")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= -1 {{.*}}; R1=scalar()")
+__naked void sdiv64_overflow_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ r1 = r0; \
+ r2 = %[llong_min] ll; \
+ r2 += 10; \
+ if r1 s> r2 goto l0_%=; \
+ r1 s/= -1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN),
+ __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, overflow (S64_MIN/-1), constant dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s/= -1 {{.*}}; R1=0x8000000000000000")
+__naked void sdiv64_overflow_2(void)
+{
+ asm volatile (" \
+ r1 = %[llong_min] ll; \
+ r1 s/= -1; \
+ r2 = %[llong_min] ll; \
+ if r1 != r2 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UMOD32, positive divisor")
+__success __retval(0) __log_level(2)
+__msg("w1 %= 3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3))")
+__naked void umod32_pos_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w1 &= 8; \
+ w1 |= 1; \
+ w1 %%= 3; \
+ if w1 > 3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UMOD32, positive divisor, small dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 %= 10 {{.*}}; R1=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=9,var_off=(0x1; 0x8))")
+__naked void umod32_pos_divisor_unchanged(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w1 &= 8; \
+ w1 |= 1; \
+ w1 %%= 10; \
+ if w1 < 1 goto l0_%=; \
+ if w1 > 9 goto l0_%=; \
+ if w1 & 1 != 1 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UMOD32, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("w1 %= w2 {{.*}}; R1=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=9,var_off=(0x1; 0x8)) R2=0")
+__naked void umod32_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w1 &= 8; \
+ w1 |= 1; \
+ w2 = 0; \
+ w1 %%= w2; \
+ if w1 < 1 goto l0_%=; \
+ if w1 > 9 goto l0_%=; \
+ if w1 & 1 != 1 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UMOD64, positive divisor")
+__success __retval(0) __log_level(2)
+__msg("r1 %= 3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3))")
+__naked void umod64_pos_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r1 &= 8; \
+ r1 |= 1; \
+ r1 %%= 3; \
+ if r1 > 3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UMOD64, positive divisor, small dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 %= 10 {{.*}}; R1=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=9,var_off=(0x1; 0x8))")
+__naked void umod64_pos_divisor_unchanged(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r1 &= 8; \
+ r1 |= 1; \
+ r1 %%= 10; \
+ if r1 < 1 goto l0_%=; \
+ if r1 > 9 goto l0_%=; \
+ if r1 & 1 != 1 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("UMOD64, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("r1 %= r2 {{.*}}; R1=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=9,var_off=(0x1; 0x8)) R2=0")
+__naked void umod64_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r1 &= 8; \
+ r1 |= 1; \
+ r2 = 0; \
+ r1 %%= r2; \
+ if r1 < 1 goto l0_%=; \
+ if r1 > 9 goto l0_%=; \
+ if r1 & 1 != 1 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, positive divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= 3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3))")
+__naked void smod32_pos_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< 8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s%%= 3; \
+ if w1 s< 0 goto l1_%=; \
+ if w1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, positive divisor, negative dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= 3 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-2,smax32=0,var_off=(0x0; 0xffffffff))")
+__naked void smod32_pos_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s> -8 goto l0_%=; \
+ if w1 s< -10 goto l0_%=; \
+ w1 s%%= 3; \
+ if w1 s< -2 goto l1_%=; \
+ if w1 s> 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, positive divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= 3 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-2,smax32=2,var_off=(0x0; 0xffffffff))")
+__naked void smod32_pos_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s%%= 3; \
+ if w1 s< -2 goto l1_%=; \
+ if w1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, positive divisor, small dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= 11 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-8,smax32=10,var_off=(0x0; 0xffffffff))")
+__naked void smod32_pos_divisor_unchanged(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s%%= 11; \
+ if w1 s< -8 goto l1_%=; \
+ if w1 s> 10 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, negative divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= -3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3))")
+__naked void smod32_neg_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< 8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s%%= -3; \
+ if w1 s< 0 goto l1_%=; \
+ if w1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, negative divisor, negative dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= -3 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-2,smax32=0,var_off=(0x0; 0xffffffff))")
+__naked void smod32_neg_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s> -8 goto l0_%=; \
+ if w1 s< -10 goto l0_%=; \
+ w1 s%%= -3; \
+ if w1 s< -2 goto l1_%=; \
+ if w1 s> 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, negative divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= -3 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-2,smax32=2,var_off=(0x0; 0xffffffff))")
+__naked void smod32_neg_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s%%= -3; \
+ if w1 s< -2 goto l1_%=; \
+ if w1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, negative divisor, small dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= -11 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-8,smax32=10,var_off=(0x0; 0xffffffff))")
+__naked void smod32_neg_divisor_unchanged(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w1 s%%= -11; \
+ if w1 s< -8 goto l1_%=; \
+ if w1 s> 10 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= w2 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-8,smax32=10,var_off=(0x0; 0xffffffff)) R2=0")
+__naked void smod32_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ if w1 s< -8 goto l0_%=; \
+ if w1 s> 10 goto l0_%=; \
+ w2 = 0; \
+ w1 s%%= w2; \
+ if w1 s< -8 goto l1_%=; \
+ if w1 s> 10 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, overflow (S32_MIN%-1)")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= -1 {{.*}}; R1=0")
+__naked void smod32_overflow_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = w0; \
+ w2 = %[int_min]; \
+ w2 += 10; \
+ if w1 s> w2 goto l0_%=; \
+ w1 s%%= -1; \
+ if w1 != 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, overflow (S32_MIN%-1), constant dividend")
+__success __retval(0) __log_level(2)
+__msg("w1 s%= -1 {{.*}}; R1=0")
+__naked void smod32_overflow_2(void)
+{
+ asm volatile (" \
+ w1 = %[int_min]; \
+ w1 s%%= -1; \
+ if w1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, positive divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= 3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3))")
+__naked void smod64_pos_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< 8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s%%= 3; \
+ if r1 s< 0 goto l1_%=; \
+ if r1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, positive divisor, negative dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= 3 {{.*}}; R1=scalar(smin=smin32=-2,smax=smax32=0)")
+__naked void smod64_pos_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s> -8 goto l0_%=; \
+ if r1 s< -10 goto l0_%=; \
+ r1 s%%= 3; \
+ if r1 s< -2 goto l1_%=; \
+ if r1 s> 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, positive divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= 3 {{.*}}; R1=scalar(smin=smin32=-2,smax=smax32=2)")
+__naked void smod64_pos_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s%%= 3; \
+ if r1 s< -2 goto l1_%=; \
+ if r1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, positive divisor, small dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= 11 {{.*}}; R1=scalar(smin=smin32=-8,smax=smax32=10)")
+__naked void smod64_pos_divisor_unchanged(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s%%= 11; \
+ if r1 s< -8 goto l1_%=; \
+ if r1 s> 10 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, negative divisor, positive dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= -3 {{.*}}; R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=2,var_off=(0x0; 0x3))")
+__naked void smod64_neg_divisor_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< 8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s%%= -3; \
+ if r1 s< 0 goto l1_%=; \
+ if r1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, negative divisor, negative dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= -3 {{.*}}; R1=scalar(smin=smin32=-2,smax=smax32=0)")
+__naked void smod64_neg_divisor_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s> -8 goto l0_%=; \
+ if r1 s< -10 goto l0_%=; \
+ r1 s%%= -3; \
+ if r1 s< -2 goto l1_%=; \
+ if r1 s> 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, negative divisor, mixed sign dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= -3 {{.*}}; R1=scalar(smin=smin32=-2,smax=smax32=2)")
+__naked void smod64_neg_divisor_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s%%= -3; \
+ if r1 s< -2 goto l1_%=; \
+ if r1 s> 2 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, negative divisor, small dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= -11 {{.*}}; R1=scalar(smin=smin32=-8,smax=smax32=10)")
+__naked void smod64_neg_divisor_unchanged(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r1 s%%= -11; \
+ if r1 s< -8 goto l1_%=; \
+ if r1 s> 10 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, zero divisor")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= r2 {{.*}}; R1=scalar(smin=smin32=-8,smax=smax32=10) R2=0")
+__naked void smod64_zero_divisor(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ if r1 s< -8 goto l0_%=; \
+ if r1 s> 10 goto l0_%=; \
+ r2 = 0; \
+ r1 s%%= r2; \
+ if r1 s< -8 goto l1_%=; \
+ if r1 s> 10 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, overflow (S64_MIN%-1)")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= -1 {{.*}}; R1=0")
+__naked void smod64_overflow_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ r1 = r0; \
+ r2 = %[llong_min] ll; \
+ r2 += 10; \
+ if r1 s> r2 goto l0_%=; \
+ r1 s%%= -1; \
+ if r1 != 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN),
+ __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, overflow (S64_MIN%-1), constant dividend")
+__success __retval(0) __log_level(2)
+__msg("r1 s%= -1 {{.*}}; R1=0")
+__naked void smod64_overflow_2(void)
+{
+ asm volatile (" \
+ r1 = %[llong_min] ll; \
+ r1 s%%= -1; \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u64 *)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
index 1204fbc58178..e7dae0cf9c17 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -72,7 +72,7 @@ int trusted_task_arg_nonnull_fail1(void *ctx)
SEC("?tp_btf/task_newtask")
__failure __log_level(2)
-__msg("R1 type=ptr_or_null_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("R1 type=trusted_ptr_or_null_ expected=ptr_, trusted_ptr_, rcu_ptr_")
__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')")
int trusted_task_arg_nonnull_fail2(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
index 059aa716e3d0..889c9b78b912 100644
--- a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
@@ -17,17 +17,6 @@ struct {
__type(value, struct val);
} map_spin_lock SEC(".maps");
-struct timer {
- struct bpf_timer t;
-};
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
- __type(key, int);
- __type(value, struct timer);
-} map_timer SEC(".maps");
-
SEC("kprobe")
__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE")
__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
@@ -85,106 +74,6 @@ __naked void bpf_prog_type_raw_tracepoint_1(void)
}
SEC("kprobe")
-__description("bpf_timer_init isn restricted in BPF_PROG_TYPE_KPROBE")
-__failure __msg("tracing progs cannot use bpf_timer yet")
-__naked void in_bpf_prog_type_kprobe_2(void)
-{
- asm volatile (" \
- r2 = r10; \
- r2 += -8; \
- r1 = 0; \
- *(u64*)(r2 + 0) = r1; \
- r1 = %[map_timer] ll; \
- call %[bpf_map_lookup_elem]; \
- if r0 == 0 goto l0_%=; \
- r1 = r0; \
- r2 = %[map_timer] ll; \
- r3 = 1; \
-l0_%=: call %[bpf_timer_init]; \
- exit; \
-" :
- : __imm(bpf_map_lookup_elem),
- __imm(bpf_timer_init),
- __imm_addr(map_timer)
- : __clobber_all);
-}
-
-SEC("perf_event")
-__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_PERF_EVENT")
-__failure __msg("tracing progs cannot use bpf_timer yet")
-__naked void bpf_prog_type_perf_event_2(void)
-{
- asm volatile (" \
- r2 = r10; \
- r2 += -8; \
- r1 = 0; \
- *(u64*)(r2 + 0) = r1; \
- r1 = %[map_timer] ll; \
- call %[bpf_map_lookup_elem]; \
- if r0 == 0 goto l0_%=; \
- r1 = r0; \
- r2 = %[map_timer] ll; \
- r3 = 1; \
-l0_%=: call %[bpf_timer_init]; \
- exit; \
-" :
- : __imm(bpf_map_lookup_elem),
- __imm(bpf_timer_init),
- __imm_addr(map_timer)
- : __clobber_all);
-}
-
-SEC("tracepoint")
-__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_TRACEPOINT")
-__failure __msg("tracing progs cannot use bpf_timer yet")
-__naked void in_bpf_prog_type_tracepoint_2(void)
-{
- asm volatile (" \
- r2 = r10; \
- r2 += -8; \
- r1 = 0; \
- *(u64*)(r2 + 0) = r1; \
- r1 = %[map_timer] ll; \
- call %[bpf_map_lookup_elem]; \
- if r0 == 0 goto l0_%=; \
- r1 = r0; \
- r2 = %[map_timer] ll; \
- r3 = 1; \
-l0_%=: call %[bpf_timer_init]; \
- exit; \
-" :
- : __imm(bpf_map_lookup_elem),
- __imm(bpf_timer_init),
- __imm_addr(map_timer)
- : __clobber_all);
-}
-
-SEC("raw_tracepoint")
-__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
-__failure __msg("tracing progs cannot use bpf_timer yet")
-__naked void bpf_prog_type_raw_tracepoint_2(void)
-{
- asm volatile (" \
- r2 = r10; \
- r2 += -8; \
- r1 = 0; \
- *(u64*)(r2 + 0) = r1; \
- r1 = %[map_timer] ll; \
- call %[bpf_map_lookup_elem]; \
- if r0 == 0 goto l0_%=; \
- r1 = r0; \
- r2 = %[map_timer] ll; \
- r3 = 1; \
-l0_%=: call %[bpf_timer_init]; \
- exit; \
-" :
- : __imm(bpf_map_lookup_elem),
- __imm(bpf_timer_init),
- __imm_addr(map_timer)
- : __clobber_all);
-}
-
-SEC("kprobe")
__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_KPROBE")
__failure __msg("tracing progs cannot use bpf_spin_lock yet")
__naked void in_bpf_prog_type_kprobe_3(void)
diff --git a/tools/testing/selftests/bpf/progs/verifier_jit_inline.c b/tools/testing/selftests/bpf/progs/verifier_jit_inline.c
new file mode 100644
index 000000000000..4ea254063646
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_jit_inline.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("fentry/bpf_fentry_test1")
+__success __retval(0)
+__arch_x86_64
+__jited(" addq %gs:{{.*}}, %rax")
+__arch_arm64
+__jited(" mrs x7, SP_EL0")
+int inline_bpf_get_current_task(void)
+{
+ bpf_get_current_task();
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c b/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c
index a509cad97e69..1fce7a7e8d03 100644
--- a/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c
+++ b/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c
@@ -32,7 +32,7 @@ static void task_kfunc_load_test(void)
}
SEC("raw_tp")
-__failure __msg("calling kernel function")
+__success
int BPF_PROG(task_kfunc_raw_tp)
{
task_kfunc_load_test();
@@ -86,7 +86,7 @@ static void cgrp_kfunc_load_test(void)
}
SEC("raw_tp")
-__failure __msg("calling kernel function")
+__success
int BPF_PROG(cgrp_kfunc_raw_tp)
{
cgrp_kfunc_load_test();
@@ -138,7 +138,7 @@ static void cpumask_kfunc_load_test(void)
}
SEC("raw_tp")
-__failure __msg("calling kernel function")
+__success
int BPF_PROG(cpumask_kfunc_raw_tp)
{
cpumask_kfunc_load_test();
diff --git a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c
index 8f755d2464cf..2ef346c827c2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c
+++ b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
+#include <limits.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
@@ -18,9 +19,9 @@ __naked void scalars(void)
r4 = r1; \
w2 += 0x7FFFFFFF; \
w4 += 0; \
- if r2 == 0 goto l1; \
+ if r2 == 0 goto l0_%=; \
exit; \
-l1: \
+l0_%=: \
r4 >>= 63; \
r3 = 1; \
r3 -= r4; \
@@ -31,4 +32,335 @@ l1: \
" ::: __clobber_all);
}
+/*
+ * Test that sync_linked_regs() preserves register IDs.
+ *
+ * The sync_linked_regs() function copies bounds from known_reg to linked
+ * registers. When doing so, it must preserve each register's original id
+ * to allow subsequent syncs from the same source to work correctly.
+ *
+ */
+SEC("socket")
+__success
+__naked void sync_linked_regs_preserves_id(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; /* r0 in [0, 255] */ \
+ r1 = r0; /* r0, r1 linked with id 1 */ \
+ r1 += 4; /* r1 has id=1 and off=4 in [4, 259] */ \
+ if r1 < 10 goto l0_%=; \
+ /* r1 in [10, 259], r0 synced to [6, 255] */ \
+ r2 = r0; /* r2 has id=1 and in [6, 255] */ \
+ if r1 < 14 goto l0_%=; \
+ /* r1 in [14, 259], r0 synced to [10, 255] */ \
+ if r0 >= 10 goto l0_%=; \
+ /* Never executed */ \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success
+__naked void scalars_neg(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r1 += -4; \
+ if r1 s< 0 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Same test but using BPF_SUB instead of BPF_ADD with negative immediate */
+SEC("socket")
+__success
+__naked void scalars_neg_sub(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r1 -= 4; \
+ if r1 s< 0 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* alu32 with negative offset */
+SEC("socket")
+__success
+__naked void scalars_neg_alu32_add(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0xff; \
+ w1 = w0; \
+ w1 += -4; \
+ if w1 s< 0 goto l0_%=; \
+ if w0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* alu32 with negative offset using SUB */
+SEC("socket")
+__success
+__naked void scalars_neg_alu32_sub(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0xff; \
+ w1 = w0; \
+ w1 -= 4; \
+ if w1 s< 0 goto l0_%=; \
+ if w0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Positive offset: r1 = r0 + 4, then if r1 >= 6, r0 >= 2, so r0 != 0 */
+SEC("socket")
+__success
+__naked void scalars_pos(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r1 += 4; \
+ if r1 < 6 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* SUB with negative immediate: r1 -= -4 is equivalent to r1 += 4 */
+SEC("socket")
+__success
+__naked void scalars_sub_neg_imm(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r1 -= -4; \
+ if r1 < 6 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Double ADD clears the ID (can't accumulate offsets) */
+SEC("socket")
+__failure
+__msg("div by zero")
+__naked void scalars_double_add(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r1 += 2; \
+ r1 += 2; \
+ if r1 < 6 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Test that sync_linked_regs() correctly handles large offset differences.
+ * r1.off = S32_MIN, r2.off = 1, delta = S32_MIN - 1 requires 64-bit math.
+ */
+SEC("socket")
+__success
+__naked void scalars_sync_delta_overflow(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r2 = r0; \
+ r1 += %[s32_min]; \
+ r2 += 1; \
+ if r2 s< 100 goto l0_%=; \
+ if r1 s< 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ [s32_min]"i"(INT_MIN)
+ : __clobber_all);
+}
+
+/*
+ * Another large delta case: r1.off = S32_MAX, r2.off = -1.
+ * delta = S32_MAX - (-1) = S32_MAX + 1 requires 64-bit math.
+ */
+SEC("socket")
+__success
+__naked void scalars_sync_delta_overflow_large_range(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ r1 = r0; \
+ r2 = r0; \
+ r1 += %[s32_max]; \
+ r2 += -1; \
+ if r2 s< 0 goto l0_%=; \
+ if r1 s>= 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ [s32_max]"i"(INT_MAX)
+ : __clobber_all);
+}
+
+/*
+ * Test linked scalar tracking with alu32 and large positive offset (0x7FFFFFFF).
+ * After w1 += 0x7FFFFFFF, w1 wraps to negative for any r0 >= 1.
+ * If w1 is signed-negative, then r0 >= 1, so r0 != 0.
+ */
+SEC("socket")
+__success
+__naked void scalars_alu32_big_offset(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0xff; \
+ w1 = w0; \
+ w1 += 0x7FFFFFFF; \
+ if w1 s>= 0 goto l0_%=; \
+ if w0 != 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure
+__msg("div by zero")
+__naked void scalars_alu32_basic(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ w1 += 1; \
+ if r1 > 10 goto 1f; \
+ r0 >>= 32; \
+ if r0 == 0 goto 1f; \
+ r0 /= 0; \
+1: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Test alu32 linked register tracking with wrapping.
+ * R0 is bounded to [0xffffff00, 0xffffffff] (high 32-bit values)
+ * w1 += 0x100 causes R1 to wrap to [0, 0xff]
+ *
+ * After sync_linked_regs, if bounds are computed correctly:
+ * R0 should be [0x00000000_ffffff00, 0x00000000_ffffff80]
+ * R0 >> 32 == 0, so div by zero is unreachable
+ *
+ * If bounds are computed incorrectly (64-bit underflow):
+ * R0 becomes [0xffffffff_ffffff00, 0xffffffff_ffffff80]
+ * R0 >> 32 == 0xffffffff != 0, so div by zero is reachable
+ */
+SEC("socket")
+__success
+__naked void scalars_alu32_wrap(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w0 |= 0xffffff00; \
+ r1 = r0; \
+ w1 += 0x100; \
+ if r1 > 0x80 goto l0_%=; \
+ r2 = r0; \
+ r2 >>= 32; \
+ if r2 == 0 goto l0_%=; \
+ r0 /= 0; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success
+void alu32_negative_offset(void)
+{
+ volatile char path[5];
+ volatile int offset = bpf_get_prandom_u32();
+ int off = offset;
+
+ if (off >= 5 && off < 10)
+ path[off - 5] = '.';
+
+ /* So compiler doesn't say: error: variable 'path' set but not used */
+ __sink(path[0]);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_lsm.c b/tools/testing/selftests/bpf/progs/verifier_lsm.c
index 6af9100a37ff..38e8e9176862 100644
--- a/tools/testing/selftests/bpf/progs/verifier_lsm.c
+++ b/tools/testing/selftests/bpf/progs/verifier_lsm.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
SEC("lsm/file_permission")
@@ -159,4 +160,32 @@ __naked int disabled_hook_test3(void *ctx)
::: __clobber_all);
}
+SEC("lsm/mmap_file")
+__description("not null checking nullable pointer in bpf_lsm_mmap_file")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
+int BPF_PROG(no_null_check, struct file *file)
+{
+ struct inode *inode;
+
+ inode = file->f_inode;
+ __sink(inode);
+
+ return 0;
+}
+
+SEC("lsm/mmap_file")
+__description("null checking nullable pointer in bpf_lsm_mmap_file")
+__success
+int BPF_PROG(null_check, struct file *file)
+{
+ struct inode *inode;
+
+ if (file) {
+ inode = file->f_inode;
+ __sink(inode);
+ }
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
index c0ce690ddb68..3072fee9a448 100644
--- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
+++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
@@ -715,6 +715,51 @@ __naked void ignore_unique_scalar_ids_old(void)
: __clobber_all);
}
+/* Check that two registers with 0 scalar IDs in a verified state can be mapped
+ * to the same scalar ID in current state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* The states should be equivalent on reaching insn 12.
+ */
+__msg("12: safe")
+__msg("processed 17 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void two_nil_old_ids_one_cur_id(void)
+{
+ asm volatile (
+ /* Give unique scalar IDs to r{6,7} */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r6 = r0;"
+ "r6 *= 1;"
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r7 = r0;"
+ "r7 *= 1;"
+ "r0 = 0;"
+ /* Maybe make r{6,7} IDs identical */
+ "if r6 > r7 goto l0_%=;"
+ "goto l1_%=;"
+"l0_%=:"
+ "r6 = r7;"
+"l1_%=:"
+ /* Mark r{6,7} precise.
+ * Get here in two states:
+ * - first: r6{.id=0}, r7{.id=0} (cached state)
+ * - second: r6{.id=A}, r7{.id=A}
+ * Verifier considers such states equivalent.
+ * Thus "exit;" would be verified only once.
+ */
+ "r2 = r10;"
+ "r2 += r6;"
+ "r2 += r7;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
/* Check that two different scalar IDs in a verified state can't be
* mapped to the same scalar ID in current state.
*/
@@ -723,9 +768,9 @@ __success __log_level(2)
/* The exit instruction should be reachable from two states,
* use two matches and "processed .. insns" to ensure this.
*/
-__msg("13: (95) exit")
-__msg("13: (95) exit")
-__msg("processed 18 insns")
+__msg("15: (95) exit")
+__msg("15: (95) exit")
+__msg("processed 20 insns")
__flag(BPF_F_TEST_STATE_FREQ)
__naked void two_old_ids_one_cur_id(void)
{
@@ -734,9 +779,11 @@ __naked void two_old_ids_one_cur_id(void)
"call %[bpf_ktime_get_ns];"
"r0 &= 0xff;"
"r6 = r0;"
+ "r8 = r0;"
"call %[bpf_ktime_get_ns];"
"r0 &= 0xff;"
"r7 = r0;"
+ "r9 = r0;"
"r0 = 0;"
/* Maybe make r{6,7} IDs identical */
"if r6 > r7 goto l0_%=;"
diff --git a/tools/testing/selftests/bpf/progs/verifier_subreg.c b/tools/testing/selftests/bpf/progs/verifier_subreg.c
index 8613ea160dcd..be328100ba53 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subreg.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subreg.c
@@ -532,6 +532,74 @@ __naked void arsh32_imm_zero_extend_check(void)
}
SEC("socket")
+__description("arsh32 imm sign positive extend check")
+__success __retval(0)
+__log_level(2)
+__msg("2: (57) r6 &= 4095 ; R6=scalar(smin=smin32=0,smax=umax=smax32=umax32=4095,var_off=(0x0; 0xfff))")
+__msg("3: (67) r6 <<= 32 ; R6=scalar(smin=smin32=0,smax=umax=0xfff00000000,smax32=umax32=0,var_off=(0x0; 0xfff00000000))")
+__msg("4: (c7) r6 s>>= 32 ; R6=scalar(smin=smin32=0,smax=umax=smax32=umax32=4095,var_off=(0x0; 0xfff))")
+__naked void arsh32_imm_sign_extend_positive_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 &= 4095; \
+ r6 <<= 32; \
+ r6 s>>= 32; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("arsh32 imm sign negative extend check")
+__success __retval(0)
+__log_level(2)
+__msg("3: (17) r6 -= 4095 ; R6=scalar(smin=smin32=-4095,smax=smax32=0)")
+__msg("4: (67) r6 <<= 32 ; R6=scalar(smin=0xfffff00100000000,smax=smax32=umax32=0,umax=0xffffffff00000000,smin32=0,var_off=(0x0; 0xffffffff00000000))")
+__msg("5: (c7) r6 s>>= 32 ; R6=scalar(smin=smin32=-4095,smax=smax32=0)")
+__naked void arsh32_imm_sign_extend_negative_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 &= 4095; \
+ r6 -= 4095; \
+ r6 <<= 32; \
+ r6 s>>= 32; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("arsh32 imm sign extend check")
+__success __retval(0)
+__log_level(2)
+__msg("3: (17) r6 -= 2047 ; R6=scalar(smin=smin32=-2047,smax=smax32=2048)")
+__msg("4: (67) r6 <<= 32 ; R6=scalar(smin=0xfffff80100000000,smax=0x80000000000,umax=0xffffffff00000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xffffffff00000000))")
+__msg("5: (c7) r6 s>>= 32 ; R6=scalar(smin=smin32=-2047,smax=smax32=2048)")
+__naked void arsh32_imm_sign_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 &= 4095; \
+ r6 -= 2047; \
+ r6 <<= 32; \
+ r6 s>>= 32; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
__description("end16 (to_le) reg zero extend check")
__success __success_unpriv __retval(0)
__naked void le_reg_zero_extend_check_1(void)
@@ -670,4 +738,89 @@ __naked void ldx_w_zero_extend_check(void)
: __clobber_all);
}
+SEC("socket")
+__success __success_unpriv __retval(0)
+__naked void arsh_31_and(void)
+{
+ /* Below is what LLVM generates in cilium's bpf_wiregard.o */
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w2 = w0; \
+ w2 s>>= 31; \
+ w2 &= -134; /* w2 becomes 0 or -134 */ \
+ if w2 s> -1 goto +2; \
+ /* Branch always taken because w2 = -134 */ \
+ if w2 != -136 goto +1; \
+ w0 /= 0; \
+ w0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __success_unpriv __retval(0)
+__naked void arsh_63_and(void)
+{
+ /* Copy of arsh_31 with s/w/r/ */
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ r2 <<= 32; \
+ r2 s>>= 63; \
+ r2 &= -134; \
+ if r2 s> -1 goto +2; \
+ /* Branch always taken because w2 = -134 */ \
+ if r2 != -136 goto +1; \
+ r0 /= 0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __success_unpriv __retval(0)
+__naked void arsh_31_or(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w2 = w0; \
+ w2 s>>= 31; \
+ w2 |= 134; /* w2 becomes -1 or 134 */ \
+ if w2 s> -1 goto +2; \
+ /* Branch always taken because w2 = -1 */ \
+ if w2 == -1 goto +1; \
+ w0 /= 0; \
+ w0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __success_unpriv __retval(0)
+__naked void arsh_63_or(void)
+{
+ /* Copy of arsh_31 with s/w/r/ */
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ r2 <<= 32; \
+ r2 s>>= 63; \
+ r2 |= 134; /* r2 becomes -1 or 134 */ \
+ if r2 s> -1 goto +2; \
+ /* Branch always taken because w2 = -1 */ \
+ if r2 == -1 goto +1; \
+ r0 /= 0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
index 28b4f7035ceb..8ee1243e62a8 100644
--- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -950,4 +950,26 @@ l3_%=: r0 = 0; \
" ::: __clobber_all);
}
+SEC("socket")
+__description("unpriv: nospec after dead stack write in helper")
+__success __success_unpriv
+__retval(0)
+/* Dead code sanitizer rewrites the call to `goto -1`. */
+__naked void unpriv_dead_helper_stack_write_nospec_result(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 != 1 goto l0_%=; \
+ r2 = 0; \
+ r3 = r10; \
+ r3 += -16; \
+ r4 = 4; \
+ r5 = 0; \
+ call %[bpf_skb_load_bytes_relative]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_skb_load_bytes_relative)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
index 2129e4353fd9..4d8273c258d5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
+++ b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
@@ -173,14 +173,15 @@ __naked void flow_keys_illegal_variable_offset_alu(void)
asm volatile(" \
r6 = r1; \
r7 = *(u64*)(r6 + %[flow_keys_off]); \
- r8 = 8; \
- r8 /= 1; \
+ call %[bpf_get_prandom_u32]; \
+ r8 = r0; \
r8 &= 8; \
r7 += r8; \
r0 = *(u64*)(r7 + 0); \
exit; \
" :
- : __imm_const(flow_keys_off, offsetof(struct __sk_buff, flow_keys))
+ : __imm_const(flow_keys_off, offsetof(struct __sk_buff, flow_keys)),
+ __imm(bpf_get_prandom_u32)
: __clobber_all);
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_xdp.c b/tools/testing/selftests/bpf/progs/verifier_xdp.c
index 50768ed179b3..7dc9226aeb34 100644
--- a/tools/testing/selftests/bpf/progs/verifier_xdp.c
+++ b/tools/testing/selftests/bpf/progs/verifier_xdp.c
@@ -5,6 +5,14 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, __u64);
+ __uint(map_flags, BPF_F_RDONLY_PROG);
+} map_array_ro SEC(".maps");
+
SEC("xdp")
__description("XDP, using ifindex from netdev")
__success __retval(1)
@@ -21,4 +29,31 @@ l0_%=: exit; \
: __clobber_all);
}
+SEC("xdp")
+__description("XDP, using xdp_store_bytes from RO map")
+__success __retval(0)
+__naked void xdp_store_bytes_from_ro_map(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = r0; \
+ r4 = 8; \
+ call %[bpf_xdp_store_bytes]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_xdp_store_bytes),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c
index d06f6d40594a..3767f5595bbc 100644
--- a/tools/testing/selftests/bpf/progs/wq_failures.c
+++ b/tools/testing/selftests/bpf/progs/wq_failures.c
@@ -97,7 +97,7 @@ __failure
/* check that the first argument of bpf_wq_set_callback()
* is a correct bpf_wq pointer.
*/
-__msg(": (85) call bpf_wq_set_callback_impl#") /* anchor message */
+__msg(": (85) call bpf_wq_set_callback#") /* anchor message */
__msg("arg#0 doesn't point to a map value")
long test_wrong_wq_pointer(void *ctx)
{
@@ -123,7 +123,7 @@ __failure
/* check that the first argument of bpf_wq_set_callback()
* is a correct bpf_wq pointer.
*/
-__msg(": (85) call bpf_wq_set_callback_impl#") /* anchor message */
+__msg(": (85) call bpf_wq_set_callback#") /* anchor message */
__msg("off 1 doesn't point to 'struct bpf_wq' that is at 0")
long test_wrong_wq_pointer_offset(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/test_bpftool_map.sh b/tools/testing/selftests/bpf/test_bpftool_map.sh
deleted file mode 100755
index 515b1df0501e..000000000000
--- a/tools/testing/selftests/bpf/test_bpftool_map.sh
+++ /dev/null
@@ -1,398 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-TESTNAME="bpftool_map"
-BPF_FILE="security_bpf_map.bpf.o"
-BPF_ITER_FILE="bpf_iter_map_elem.bpf.o"
-PROTECTED_MAP_NAME="prot_map"
-NOT_PROTECTED_MAP_NAME="not_prot_map"
-BPF_FS_TMP_PARENT="/tmp"
-BPF_FS_PARENT=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
-BPF_FS_PARENT=${BPF_FS_PARENT:-$BPF_FS_TMP_PARENT}
-# bpftool will mount bpf file system under BPF_DIR if it is not mounted
-# under BPF_FS_PARENT.
-BPF_DIR="$BPF_FS_PARENT/test_$TESTNAME"
-SCRIPT_DIR=$(dirname $(realpath "$0"))
-BPF_FILE_PATH="$SCRIPT_DIR/$BPF_FILE"
-BPF_ITER_FILE_PATH="$SCRIPT_DIR/$BPF_ITER_FILE"
-BPFTOOL_PATH="bpftool"
-# Assume the script is located under tools/testing/selftests/bpf/
-KDIR_ROOT_DIR=$(realpath "$SCRIPT_DIR"/../../../../)
-
-_cleanup()
-{
- set +eu
-
- # If BPF_DIR is a mount point this will not remove the mount point itself.
- [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2> /dev/null
-
- # Unmount if BPF filesystem was temporarily created.
- if [ "$BPF_FS_PARENT" = "$BPF_FS_TMP_PARENT" ]; then
- # A loop and recursive unmount are required as bpftool might
- # create multiple mounts. For example, a bind mount of the directory
- # to itself. The bind mount is created to change mount propagation
- # flags on an actual mount point.
- max_attempts=3
- attempt=0
- while mountpoint -q "$BPF_DIR" && [ $attempt -lt $max_attempts ]; do
- umount -R "$BPF_DIR" 2>/dev/null
- attempt=$((attempt+1))
- done
-
- # The directory still exists. Remove it now.
- [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2>/dev/null
- fi
-}
-
-cleanup_skip()
-{
- echo "selftests: $TESTNAME [SKIP]"
- _cleanup
-
- exit $ksft_skip
-}
-
-cleanup()
-{
- if [ "$?" = 0 ]; then
- echo "selftests: $TESTNAME [PASS]"
- else
- echo "selftests: $TESTNAME [FAILED]"
- fi
- _cleanup
-}
-
-check_root_privileges() {
- if [ $(id -u) -ne 0 ]; then
- echo "Need root privileges"
- exit $ksft_skip
- fi
-}
-
-# Function to verify bpftool path.
-# Parameters:
-# $1: bpftool path
-verify_bpftool_path() {
- local bpftool_path="$1"
- if ! "$bpftool_path" version > /dev/null 2>&1; then
- echo "Could not run test without bpftool"
- exit $ksft_skip
- fi
-}
-
-# Function to verify BTF support.
-# The test requires BTF support for fmod_ret programs.
-verify_btf_support() {
- if [ ! -f /sys/kernel/btf/vmlinux ]; then
- echo "Could not run test without BTF support"
- exit $ksft_skip
- fi
-}
-
-# Function to initialize map entries with keys [0..2] and values set to 0.
-# Parameters:
-# $1: Map name
-# $2: bpftool path
-initialize_map_entries() {
- local map_name="$1"
- local bpftool_path="$2"
-
- for key in 0 1 2; do
- "$bpftool_path" map update name "$map_name" key $key 0 0 0 value 0 0 0 $key
- done
-}
-
-# Test read access to the map.
-# Parameters:
-# $1: Name command (name/pinned)
-# $2: Map name
-# $3: bpftool path
-# $4: key
-access_for_read() {
- local name_cmd="$1"
- local map_name="$2"
- local bpftool_path="$3"
- local key="$4"
-
- # Test read access to the map.
- if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
- echo " Read access to $key in $map_name failed"
- exit 1
- fi
-
- # Test read access to map's BTF data.
- if ! "$bpftool_path" btf dump map "$name_cmd" "$map_name" 1>/dev/null; then
- echo " Read access to $map_name for BTF data failed"
- exit 1
- fi
-}
-
-# Test write access to the map.
-# Parameters:
-# $1: Name command (name/pinned)
-# $2: Map name
-# $3: bpftool path
-# $4: key
-# $5: Whether write should succeed (true/false)
-access_for_write() {
- local name_cmd="$1"
- local map_name="$2"
- local bpftool_path="$3"
- local key="$4"
- local write_should_succeed="$5"
- local value="1 1 1 1"
-
- if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
- $value 2>/dev/null; then
- if [ "$write_should_succeed" = "false" ]; then
- echo " Write access to $key in $map_name succeeded but should have failed"
- exit 1
- fi
- else
- if [ "$write_should_succeed" = "true" ]; then
- echo " Write access to $key in $map_name failed but should have succeeded"
- exit 1
- fi
- fi
-}
-
-# Test entry deletion for the map.
-# Parameters:
-# $1: Name command (name/pinned)
-# $2: Map name
-# $3: bpftool path
-# $4: key
-# $5: Whether write should succeed (true/false)
-access_for_deletion() {
- local name_cmd="$1"
- local map_name="$2"
- local bpftool_path="$3"
- local key="$4"
- local write_should_succeed="$5"
- local value="1 1 1 1"
-
- # Test deletion by key for the map.
- # Before deleting, check the key exists.
- if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
- echo " Key $key does not exist in $map_name"
- exit 1
- fi
-
- # Delete by key.
- if "$bpftool_path" map delete "$name_cmd" "$map_name" key $key 2>/dev/null; then
- if [ "$write_should_succeed" = "false" ]; then
- echo " Deletion for $key in $map_name succeeded but should have failed"
- exit 1
- fi
- else
- if [ "$write_should_succeed" = "true" ]; then
- echo " Deletion for $key in $map_name failed but should have succeeded"
- exit 1
- fi
- fi
-
- # After deleting, check the entry existence according to the expected status.
- if "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
- if [ "$write_should_succeed" = "true" ]; then
- echo " Key $key for $map_name was not deleted but should have been deleted"
- exit 1
- fi
- else
- if [ "$write_should_succeed" = "false" ]; then
- echo "Key $key for $map_name was deleted but should have not been deleted"
- exit 1
- fi
- fi
-
- # Test creation of map's deleted entry, if deletion was successful.
- # Otherwise, the entry exists.
- if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
- $value 2>/dev/null; then
- if [ "$write_should_succeed" = "false" ]; then
- echo " Write access to $key in $map_name succeeded after deletion attempt but should have failed"
- exit 1
- fi
- else
- if [ "$write_should_succeed" = "true" ]; then
- echo " Write access to $key in $map_name failed after deletion attempt but should have succeeded"
- exit 1
- fi
- fi
-}
-
-# Test map elements iterator.
-# Parameters:
-# $1: Name command (name/pinned)
-# $2: Map name
-# $3: bpftool path
-# $4: BPF_DIR
-# $5: bpf iterator object file path
-iterate_map_elem() {
- local name_cmd="$1"
- local map_name="$2"
- local bpftool_path="$3"
- local bpf_dir="$4"
- local bpf_file="$5"
- local pin_path="$bpf_dir/map_iterator"
-
- "$bpftool_path" iter pin "$bpf_file" "$pin_path" map "$name_cmd" "$map_name"
- if [ ! -f "$pin_path" ]; then
- echo " Failed to pin iterator to $pin_path"
- exit 1
- fi
-
- cat "$pin_path" 1>/dev/null
- rm "$pin_path" 2>/dev/null
-}
-
-# Function to test map access with configurable write expectations
-# Parameters:
-# $1: Name command (name/pinned)
-# $2: Map name
-# $3: bpftool path
-# $4: key for rw
-# $5: key to delete
-# $6: Whether write should succeed (true/false)
-# $7: BPF_DIR
-# $8: bpf iterator object file path
-access_map() {
- local name_cmd="$1"
- local map_name="$2"
- local bpftool_path="$3"
- local key_for_rw="$4"
- local key_to_del="$5"
- local write_should_succeed="$6"
- local bpf_dir="$7"
- local bpf_iter_file_path="$8"
-
- access_for_read "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw"
- access_for_write "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" \
- "$write_should_succeed"
- access_for_deletion "$name_cmd" "$map_name" "$bpftool_path" "$key_to_del" \
- "$write_should_succeed"
- iterate_map_elem "$name_cmd" "$map_name" "$bpftool_path" "$bpf_dir" \
- "$bpf_iter_file_path"
-}
-
-# Function to test map access with configurable write expectations
-# Parameters:
-# $1: Map name
-# $2: bpftool path
-# $3: BPF_DIR
-# $4: Whether write should succeed (true/false)
-# $5: bpf iterator object file path
-test_map_access() {
- local map_name="$1"
- local bpftool_path="$2"
- local bpf_dir="$3"
- local pin_path="$bpf_dir/${map_name}_pinned"
- local write_should_succeed="$4"
- local bpf_iter_file_path="$5"
-
- # Test access to the map by name.
- access_map "name" "$map_name" "$bpftool_path" "0 0 0 0" "1 0 0 0" \
- "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
-
- # Pin the map to the BPF filesystem
- "$bpftool_path" map pin name "$map_name" "$pin_path"
- if [ ! -e "$pin_path" ]; then
- echo " Failed to pin $map_name"
- exit 1
- fi
-
- # Test access to the pinned map.
- access_map "pinned" "$pin_path" "$bpftool_path" "0 0 0 0" "2 0 0 0" \
- "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
-}
-
-# Function to test map creation and map-of-maps
-# Parameters:
-# $1: bpftool path
-# $2: BPF_DIR
-test_map_creation_and_map_of_maps() {
- local bpftool_path="$1"
- local bpf_dir="$2"
- local outer_map_name="outer_map_tt"
- local inner_map_name="inner_map_tt"
-
- "$bpftool_path" map create "$bpf_dir/$inner_map_name" type array key 4 \
- value 4 entries 4 name "$inner_map_name"
- if [ ! -f "$bpf_dir/$inner_map_name" ]; then
- echo " Failed to create inner map file at $bpf_dir/$outer_map_name"
- return 1
- fi
-
- "$bpftool_path" map create "$bpf_dir/$outer_map_name" type hash_of_maps \
- key 4 value 4 entries 2 name "$outer_map_name" inner_map name "$inner_map_name"
- if [ ! -f "$bpf_dir/$outer_map_name" ]; then
- echo " Failed to create outer map file at $bpf_dir/$outer_map_name"
- return 1
- fi
-
- # Add entries to the outer map by name and by pinned path.
- "$bpftool_path" map update pinned "$bpf_dir/$outer_map_name" key 0 0 0 0 \
- value pinned "$bpf_dir/$inner_map_name"
- "$bpftool_path" map update name "$outer_map_name" key 1 0 0 0 value \
- name "$inner_map_name"
-
- # The outer map should be full by now.
- # The following map update command is expected to fail.
- if "$bpftool_path" map update name "$outer_map_name" key 2 0 0 0 value name \
- "$inner_map_name" 2>/dev/null; then
- echo " Update for $outer_map_name succeeded but should have failed"
- exit 1
- fi
-}
-
-# Function to test map access with the btf list command
-# Parameters:
-# $1: bpftool path
-test_map_access_with_btf_list() {
- local bpftool_path="$1"
-
- # The btf list command iterates over maps for
- # loaded BPF programs.
- if ! "$bpftool_path" btf list 1>/dev/null; then
- echo " Failed to access btf data"
- exit 1
- fi
-}
-
-set -eu
-
-trap cleanup_skip EXIT
-
-check_root_privileges
-
-verify_bpftool_path "$BPFTOOL_PATH"
-
-verify_btf_support
-
-trap cleanup EXIT
-
-# Load and attach the BPF programs to control maps access.
-"$BPFTOOL_PATH" prog loadall "$BPF_FILE_PATH" "$BPF_DIR" autoattach
-
-initialize_map_entries "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
-initialize_map_entries "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
-
-# Activate the map protection mechanism. Protection status is controlled
-# by a value stored in the prot_status_map at index 0.
-"$BPFTOOL_PATH" map update name prot_status_map key 0 0 0 0 value 1 0 0 0
-
-# Test protected map (write should fail).
-test_map_access "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "false" \
- "$BPF_ITER_FILE_PATH"
-
-# Test not protected map (write should succeed).
-test_map_access "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "true" \
- "$BPF_ITER_FILE_PATH"
-
-test_map_creation_and_map_of_maps "$BPFTOOL_PATH" "$BPF_DIR"
-
-test_map_access_with_btf_list "$BPFTOOL_PATH"
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
deleted file mode 100755
index b5520692f41b..000000000000
--- a/tools/testing/selftests/bpf/test_bpftool_metadata.sh
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-BPF_FILE_USED="metadata_used.bpf.o"
-BPF_FILE_UNUSED="metadata_unused.bpf.o"
-
-TESTNAME=bpftool_metadata
-BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
-BPF_DIR=$BPF_FS/test_$TESTNAME
-
-_cleanup()
-{
- set +e
- rm -rf $BPF_DIR 2> /dev/null
-}
-
-cleanup_skip()
-{
- echo "selftests: $TESTNAME [SKIP]"
- _cleanup
-
- exit $ksft_skip
-}
-
-cleanup()
-{
- if [ "$?" = 0 ]; then
- echo "selftests: $TESTNAME [PASS]"
- else
- echo "selftests: $TESTNAME [FAILED]"
- fi
- _cleanup
-}
-
-if [ $(id -u) -ne 0 ]; then
- echo "selftests: $TESTNAME [SKIP] Need root privileges"
- exit $ksft_skip
-fi
-
-if [ -z "$BPF_FS" ]; then
- echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted"
- exit $ksft_skip
-fi
-
-if ! bpftool version > /dev/null 2>&1; then
- echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool"
- exit $ksft_skip
-fi
-
-set -e
-
-trap cleanup_skip EXIT
-
-mkdir $BPF_DIR
-
-trap cleanup EXIT
-
-bpftool prog load $BPF_FILE_UNUSED $BPF_DIR/unused
-
-METADATA_PLAIN="$(bpftool prog)"
-echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null
-echo "$METADATA_PLAIN" | grep 'b = 1' > /dev/null
-
-bpftool prog --json | grep '"metadata":{"a":"foo","b":1}' > /dev/null
-
-bpftool map | grep 'metadata.rodata' > /dev/null
-
-rm $BPF_DIR/unused
-
-bpftool prog load $BPF_FILE_USED $BPF_DIR/used
-
-METADATA_PLAIN="$(bpftool prog)"
-echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null
-echo "$METADATA_PLAIN" | grep 'b = 2' > /dev/null
-
-bpftool prog --json | grep '"metadata":{"a":"bar","b":2}' > /dev/null
-
-bpftool map | grep 'metadata.rodata' > /dev/null
-
-rm $BPF_DIR/used
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h
index aeef86b3da74..45a5e41f3a92 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h
@@ -63,6 +63,16 @@ BPF_TESTMOD_DECLARE_TRACE(bpf_testmod_test_writable_bare,
sizeof(struct bpf_testmod_test_writable_ctx)
);
+DECLARE_TRACE(bpf_testmod_fentry_test1,
+ TP_PROTO(int a),
+ TP_ARGS(a)
+);
+
+DECLARE_TRACE(bpf_testmod_fentry_test2,
+ TP_PROTO(int a, u64 b),
+ TP_ARGS(a, b)
+);
+
#endif /* _BPF_TESTMOD_EVENTS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index 1669a7eeda26..e62c6b78657f 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -18,6 +18,7 @@
#include <linux/in6.h>
#include <linux/un.h>
#include <linux/filter.h>
+#include <linux/rcupdate_trace.h>
#include <net/sock.h>
#include <linux/namei.h>
#include "bpf_testmod.h"
@@ -254,6 +255,22 @@ __bpf_kfunc int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size)
return NULL;
}
+static struct prog_test_member trusted_ptr;
+
+__bpf_kfunc struct prog_test_member *bpf_kfunc_get_default_trusted_ptr_test(void)
+{
+ return &trusted_ptr;
+}
+
+__bpf_kfunc void bpf_kfunc_put_default_trusted_ptr_test(struct prog_test_member *trusted_ptr)
+{
+ /*
+ * This BPF kfunc doesn't actually have any put/KF_ACQUIRE
+ * semantics. We're simply wanting to simulate a BPF kfunc that takes a
+ * struct prog_test_member pointer as an argument.
+ */
+}
+
__bpf_kfunc struct bpf_testmod_ctx *
bpf_testmod_ctx_create(int *err)
{
@@ -285,6 +302,12 @@ __bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx)
call_rcu(&ctx->rcu, testmod_free_cb);
}
+__bpf_kfunc void bpf_testmod_ctx_release_dtor(void *ctx)
+{
+ bpf_testmod_ctx_release(ctx);
+}
+CFI_NOSEAL(bpf_testmod_ctx_release_dtor);
+
static struct bpf_testmod_ops3 *st_ops3;
static int bpf_testmod_test_3(void)
@@ -390,11 +413,15 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg)
noinline int bpf_testmod_fentry_test1(int a)
{
+ trace_bpf_testmod_fentry_test1_tp(a);
+
return a + 1;
}
noinline int bpf_testmod_fentry_test2(int a, u64 b)
{
+ trace_bpf_testmod_fentry_test2_tp(a, b);
+
return a + b;
}
@@ -693,9 +720,9 @@ BTF_ID_FLAGS(func, bpf_kfunc_dynptr_test)
BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_nonzero_offset_test, KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_zero_offset_test, KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_kfunc_nested_release_test, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test)
+BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test)
+BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test)
BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU)
BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test, KF_RET_NULL | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test_nostruct, KF_RET_NULL | KF_RCU_PROTECTED)
@@ -703,11 +730,13 @@ BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1)
BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_2)
+BTF_ID_FLAGS(func, bpf_kfunc_get_default_trusted_ptr_test);
+BTF_ID_FLAGS(func, bpf_kfunc_put_default_trusted_ptr_test);
BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids)
BTF_ID_LIST(bpf_testmod_dtor_ids)
BTF_ID(struct, bpf_testmod_ctx)
-BTF_ID(func, bpf_testmod_ctx_release)
+BTF_ID(func, bpf_testmod_ctx_release_dtor)
static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
.owner = THIS_MODULE,
@@ -857,6 +886,32 @@ __bpf_kfunc void bpf_kfunc_call_test_sleepable(void)
{
}
+struct bpf_kfunc_rcu_tasks_trace_data {
+ struct rcu_head rcu;
+ int *done;
+};
+
+static void bpf_kfunc_rcu_tasks_trace_cb(struct rcu_head *rhp)
+{
+ struct bpf_kfunc_rcu_tasks_trace_data *data;
+
+ data = container_of(rhp, struct bpf_kfunc_rcu_tasks_trace_data, rcu);
+ WRITE_ONCE(*data->done, 1);
+ kfree(data);
+}
+
+__bpf_kfunc int bpf_kfunc_call_test_call_rcu_tasks_trace(int *done)
+{
+ struct bpf_kfunc_rcu_tasks_trace_data *data;
+
+ data = kmalloc(sizeof(*data), GFP_ATOMIC);
+ if (!data)
+ return -ENOMEM;
+ data->done = done;
+ call_rcu_tasks_trace(&data->rcu, bpf_kfunc_rcu_tasks_trace_cb);
+ return 0;
+}
+
__bpf_kfunc int bpf_kfunc_init_sock(struct init_sock_args *args)
{
int proto;
@@ -1134,6 +1189,38 @@ __bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args)
}
__bpf_kfunc int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id);
+__bpf_kfunc int bpf_kfunc_multi_st_ops_test_1_assoc(struct st_ops_args *args, struct bpf_prog_aux *aux);
+
+__bpf_kfunc int bpf_kfunc_implicit_arg(int a, struct bpf_prog_aux *aux);
+__bpf_kfunc int bpf_kfunc_implicit_arg_legacy(int a, int b, struct bpf_prog_aux *aux);
+__bpf_kfunc int bpf_kfunc_implicit_arg_legacy_impl(int a, int b, struct bpf_prog_aux *aux);
+
+/* hook targets */
+noinline void bpf_testmod_test_hardirq_fn(void) { barrier(); }
+noinline void bpf_testmod_test_softirq_fn(void) { barrier(); }
+
+/* Tasklet for SoftIRQ context */
+static void ctx_check_tasklet_fn(struct tasklet_struct *t)
+{
+ bpf_testmod_test_softirq_fn();
+}
+
+DECLARE_TASKLET(ctx_check_tasklet, ctx_check_tasklet_fn);
+
+/* IRQ Work for HardIRQ context */
+static void ctx_check_irq_fn(struct irq_work *work)
+{
+ bpf_testmod_test_hardirq_fn();
+ tasklet_schedule(&ctx_check_tasklet);
+}
+
+static struct irq_work ctx_check_irq = IRQ_WORK_INIT_HARD(ctx_check_irq_fn);
+
+/* The kfunc trigger */
+__bpf_kfunc void bpf_kfunc_trigger_ctx_check(void)
+{
+ irq_work_queue(&ctx_check_irq);
+}
BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
@@ -1157,11 +1244,12 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_RCU)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_sleepable, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_call_rcu_tasks_trace)
BTF_ID_FLAGS(func, bpf_kfunc_init_sock, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_close_sock, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_connect, KF_SLEEPABLE)
@@ -1171,11 +1259,16 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_sendmsg, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_sock_sendmsg, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getsockname, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getpeername, KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10)
+BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1)
+BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1_assoc, KF_IMPLICIT_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_implicit_arg, KF_IMPLICIT_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_implicit_arg_legacy, KF_IMPLICIT_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_implicit_arg_legacy_impl)
+BTF_ID_FLAGS(func, bpf_kfunc_trigger_ctx_check)
BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
static int bpf_testmod_ops_init(struct btf *btf)
@@ -1637,6 +1730,7 @@ static struct bpf_testmod_multi_st_ops *multi_st_ops_find_nolock(u32 id)
return NULL;
}
+/* Call test_1() of the struct_ops map identified by the id */
int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id)
{
struct bpf_testmod_multi_st_ops *st_ops;
@@ -1652,6 +1746,38 @@ int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id)
return ret;
}
+/* Call test_1() of the associated struct_ops map */
+int bpf_kfunc_multi_st_ops_test_1_assoc(struct st_ops_args *args, struct bpf_prog_aux *aux)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+ int ret = -1;
+
+ st_ops = (struct bpf_testmod_multi_st_ops *)bpf_prog_get_assoc_struct_ops(aux);
+ if (st_ops)
+ ret = st_ops->test_1(args);
+
+ return ret;
+}
+
+int bpf_kfunc_implicit_arg(int a, struct bpf_prog_aux *aux)
+{
+ if (aux && a > 0)
+ return a;
+ return -EINVAL;
+}
+
+int bpf_kfunc_implicit_arg_legacy(int a, int b, struct bpf_prog_aux *aux)
+{
+ if (aux)
+ return a + b;
+ return -EINVAL;
+}
+
+int bpf_kfunc_implicit_arg_legacy_impl(int a, int b, struct bpf_prog_aux *aux)
+{
+ return bpf_kfunc_implicit_arg_legacy(a, b, aux);
+}
+
static int multi_st_ops_reg(void *kdata, struct bpf_link *link)
{
struct bpf_testmod_multi_st_ops *st_ops =
@@ -1774,6 +1900,10 @@ static void bpf_testmod_exit(void)
while (refcount_read(&prog_test_struct.cnt) > 1)
msleep(20);
+ /* Clean up irqwork and tasklet */
+ irq_work_sync(&ctx_check_irq);
+ tasklet_kill(&ctx_check_tasklet);
+
bpf_kfunc_close_sock();
sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
unregister_bpf_testmod_uprobe();
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
index 4df6fa6a92cb..b393bf771131 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
@@ -118,6 +118,7 @@ void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
void bpf_kfunc_call_test_destructive(void) __ksym;
void bpf_kfunc_call_test_sleepable(void) __ksym;
+int bpf_kfunc_call_test_call_rcu_tasks_trace(int *done) __ksym;
void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p);
struct prog_test_member *bpf_kfunc_call_memb_acquire(void);
@@ -161,6 +162,16 @@ void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym;
struct task_struct *bpf_kfunc_ret_rcu_test(void) __ksym;
int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size) __ksym;
-int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) __ksym;
+#ifndef __KERNEL__
+extern int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) __weak __ksym;
+extern int bpf_kfunc_multi_st_ops_test_1_assoc(struct st_ops_args *args) __weak __ksym;
+#endif
+
+struct prog_test_member *bpf_kfunc_get_default_trusted_ptr_test(void) __ksym;
+void bpf_kfunc_put_default_trusted_ptr_test(struct prog_test_member *trusted_ptr) __ksym;
+
+void bpf_testmod_test_hardirq_fn(void);
+void bpf_testmod_test_softirq_fn(void);
+void bpf_kfunc_trigger_ctx_check(void) __ksym;
#endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 9437bdd4afa5..a5576b2dfc26 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -4,6 +4,18 @@
#include <bpf/libbpf.h>
+#ifdef __x86_64__
+#define SYS_PREFIX "__x64_"
+#elif defined(__s390x__)
+#define SYS_PREFIX "__s390x_"
+#elif defined(__aarch64__)
+#define SYS_PREFIX "__arm64_"
+#elif defined(__riscv)
+#define SYS_PREFIX "__riscv_"
+#else
+#define SYS_PREFIX ""
+#endif
+
#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index c8d640802cce..9ca83dce100d 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -220,7 +220,7 @@
},
.result_unpriv = REJECT,
.result = REJECT,
- .errstr = "variable ptr_ access var_off=(0x0; 0x7) disallowed",
+ .errstr = "variable trusted_ptr_ access var_off=(0x0; 0x7) disallowed",
},
{
"calls: invalid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c
index c0648dc009b5..e569d119fb60 100644
--- a/tools/testing/selftests/bpf/verifier/direct_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c
@@ -81,7 +81,7 @@
},
.fixup_map_array_48b = { 1 },
.result = REJECT,
- .errstr = "direct value offset of 4294967295 is not allowed",
+ .errstr = "invalid access to map value pointer, value_size=48 off=4294967295",
},
{
"direct map access, write test 8",
@@ -141,7 +141,7 @@
},
.fixup_map_array_48b = { 1 },
.result = REJECT,
- .errstr = "direct value offset of 536870912 is not allowed",
+ .errstr = "invalid access to map value pointer, value_size=48 off=536870912",
},
{
"direct map access, write test 13",
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 59a020c35647..061d98f6e9bb 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -229,11 +229,11 @@
{
"precise: program doesn't prematurely prune branches",
.insns = {
- BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0x400),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_0),
BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
BPF_ALU64_IMM(BPF_MOV, BPF_REG_8, 0),
BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0x80000000),
- BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 0x401),
BPF_JMP_IMM(BPF_JA, 0, 0, 0),
BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 2),
BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 1),
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index e962f133250c..1be1e353d40a 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -2580,7 +2580,7 @@ static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last
if (last && fmt == RESFMT_TABLE) {
output_header_underlines();
printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
- env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
+ env.files_processed, env.progs_processed, env.files_skipped, env.progs_skipped);
}
}
diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c
index 44c52f620fda..ce6c2642fd9b 100644
--- a/tools/testing/selftests/cgroup/lib/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c
@@ -168,6 +168,27 @@ long cg_read_key_long(const char *cgroup, const char *control, const char *key)
return atol(ptr + strlen(key));
}
+long cg_read_key_long_poll(const char *cgroup, const char *control,
+ const char *key, long expected, int retries,
+ useconds_t wait_interval_us)
+{
+ long val = -1;
+ int i;
+
+ for (i = 0; i < retries; i++) {
+ val = cg_read_key_long(cgroup, control, key);
+ if (val < 0)
+ return val;
+
+ if (val == expected)
+ break;
+
+ usleep(wait_interval_us);
+ }
+
+ return val;
+}
+
long cg_read_lc(const char *cgroup, const char *control)
{
char buf[PAGE_SIZE];
diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
index 7ab2824ed7b5..77f386dab5e8 100644
--- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -17,6 +17,8 @@
#define CG_NAMED_NAME "selftest"
#define CG_PATH_FORMAT (!cg_test_v1_named ? "0::%s" : (":name=" CG_NAMED_NAME ":%s"))
+#define DEFAULT_WAIT_INTERVAL_US (100 * 1000) /* 100 ms */
+
/*
* Checks if two given values differ by less than err% of their sum.
*/
@@ -64,6 +66,9 @@ extern int cg_read_strstr(const char *cgroup, const char *control,
extern long cg_read_long(const char *cgroup, const char *control);
extern long cg_read_long_fd(int fd);
long cg_read_key_long(const char *cgroup, const char *control, const char *key);
+long cg_read_key_long_poll(const char *cgroup, const char *control,
+ const char *key, long expected, int retries,
+ useconds_t wait_interval_us);
extern long cg_read_lc(const char *cgroup, const char *control);
extern int cg_write(const char *cgroup, const char *control, char *buf);
extern int cg_open(const char *cgroup, const char *control, int flags);
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index a17256d9f88a..5dff3ad53867 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -269,7 +269,7 @@ TEST_MATRIX=(
" C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3"
" C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
" C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3|A2:1-3|A3:2-3|B1:2-3 A1:P0|A3:P0|B1:P-2"
+ " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-1|A2:1|A3:1|B1:2-3 A1:P0|A3:P0|B1:P2"
" C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5"
" C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
" C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
@@ -318,7 +318,7 @@ TEST_MATRIX=(
# Invalid to valid local partition direct transition tests
" C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3"
" C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3"
- " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0"
+ " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:5-6 A1:P2|B1:P0"
" C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3"
# Local partition invalidation tests
@@ -388,10 +388,10 @@ TEST_MATRIX=(
" C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2"
" C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1"
- # A non-exclusive cpuset.cpus change will invalidate partition and its siblings
- " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0"
- " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P-1"
- " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P0|B1:P-1"
+ # A non-exclusive cpuset.cpus change will not invalidate its siblings partition.
+ " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:3 A1:P1|B1:P0"
+ " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-1|XA1:0-1|B1:2-3 A1:P1|B1:P1"
+ " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-1|B1:2-3 A1:P0|B1:P1"
# cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it
" C0-3 . . C4-5 X5 . . . 0 A1:0-3|B1:4-5"
@@ -417,6 +417,17 @@ TEST_MATRIX=(
" CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \
A1:P0|A2:P2:B1:P-1 2-4"
+ # When multiple partitions with conflicting cpuset.cpus are created, the
+ # latter created ones will only get what are left of the available exclusive
+ # CPUs.
+ " C1-3:P1 . . . . . . C3-5:P1 0 A1:1-3|B1:4-5:XB1:4-5 A1:P1|B1:P1"
+
+ # cpuset.cpus can be set to a subset of sibling's cpuset.cpus.exclusive
+ " C1-3:X1-3 . . C4-5 . . . C1-2 0 A1:1-3|B1:1-2"
+
+ # cpuset.cpus can become empty with task in it as it inherits parent's effective CPUs
+ " C1-3:S+ C2 . . . T:C . . 0 A1:1-3|A2:1-3"
+
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
# Failure cases:
@@ -427,7 +438,7 @@ TEST_MATRIX=(
# Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected
" C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5"
- # cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive
+ # cpuset.cpus.exclusive cannot be set to a superset of sibling's cpuset.cpus
" C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5"
)
@@ -477,6 +488,10 @@ REMOTE_TEST_MATRIX=(
. . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \
p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \
1-2,4-6|1-2,5-6"
+ # c12 whose cpuset.cpus CPUs are all granted to c11 will become invalid partition
+ " C1-5:P1:S+ . C1-4:P1 C2-3 . . \
+ . . . P1 . . p1:5|c11:1-4|c12:5 \
+ p1:P1|c11:P1|c12:P-1"
)
#
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index ca38525484e3..eeabd34bf083 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -26,6 +26,7 @@
*/
#define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs())
+#define KMEM_DEAD_WAIT_RETRIES 80
static int alloc_dcache(const char *cgroup, void *arg)
{
@@ -306,9 +307,7 @@ static int test_kmem_dead_cgroups(const char *root)
{
int ret = KSFT_FAIL;
char *parent;
- long dead;
- int i;
- int max_time = 20;
+ long dead = -1;
parent = cg_name(root, "kmem_dead_cgroups_test");
if (!parent)
@@ -323,21 +322,19 @@ static int test_kmem_dead_cgroups(const char *root)
if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
goto cleanup;
- for (i = 0; i < max_time; i++) {
- dead = cg_read_key_long(parent, "cgroup.stat",
- "nr_dying_descendants ");
- if (dead == 0) {
- ret = KSFT_PASS;
- break;
- }
- /*
- * Reclaiming cgroups might take some time,
- * let's wait a bit and repeat.
- */
- sleep(1);
- if (i > 5)
- printf("Waiting time longer than 5s; wait: %ds (dead: %ld)\n", i, dead);
- }
+ /*
+ * Allow up to ~8s for reclaim of dying descendants to complete.
+ * This is a generous upper bound derived from stress testing, not
+ * from a specific kernel constant, and can be adjusted if reclaim
+ * behavior changes in the future.
+ */
+ dead = cg_read_key_long_poll(parent, "cgroup.stat",
+ "nr_dying_descendants ", 0, KMEM_DEAD_WAIT_RETRIES,
+ DEFAULT_WAIT_INTERVAL_US);
+ if (dead)
+ goto cleanup;
+
+ ret = KSFT_PASS;
cleanup:
cg_destroy(parent);
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 4e1647568c5b..2fb096a2a9f9 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -21,6 +21,8 @@
#include "kselftest.h"
#include "cgroup_util.h"
+#define MEMCG_SOCKSTAT_WAIT_RETRIES 30
+
static bool has_localevents;
static bool has_recursiveprot;
@@ -1384,6 +1386,7 @@ static int test_memcg_sock(const char *root)
int bind_retries = 5, ret = KSFT_FAIL, pid, err;
unsigned short port;
char *memcg;
+ long sock_post = -1;
memcg = cg_name(root, "memcg_test");
if (!memcg)
@@ -1432,7 +1435,22 @@ static int test_memcg_sock(const char *root)
if (cg_read_long(memcg, "memory.current") < 0)
goto cleanup;
- if (cg_read_key_long(memcg, "memory.stat", "sock "))
+ /*
+ * memory.stat is updated asynchronously via the memcg rstat
+ * flushing worker, which runs periodically (every 2 seconds,
+ * see FLUSH_TIME). On a busy system, the "sock " counter may
+ * stay non-zero for a short period of time after the TCP
+ * connection is closed and all socket memory has been
+ * uncharged.
+ *
+ * Poll memory.stat for up to 3 seconds (~FLUSH_TIME plus some
+ * scheduling slack) and require that the "sock " counter
+ * eventually drops to zero.
+ */
+ sock_post = cg_read_key_long_poll(memcg, "memory.stat", "sock ", 0,
+ MEMCG_SOCKSTAT_WAIT_RETRIES,
+ DEFAULT_WAIT_INTERVAL_US);
+ if (sock_post)
goto cleanup;
ret = KSFT_PASS;
diff --git a/tools/testing/selftests/coredump/coredump_test_helpers.c b/tools/testing/selftests/coredump/coredump_test_helpers.c
index a6f6d5f2ae07..5c8adee63641 100644
--- a/tools/testing/selftests/coredump/coredump_test_helpers.c
+++ b/tools/testing/selftests/coredump/coredump_test_helpers.c
@@ -56,7 +56,7 @@ void crashing_child(void)
pthread_create(&thread, NULL, do_nothing, NULL);
/* crash on purpose */
- i = *(int *)NULL;
+ __builtin_trap();
}
int create_detached_tmpfs(void)
diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c
index 56b17e8fe1be..567793b11107 100644
--- a/tools/testing/selftests/damon/access_memory.c
+++ b/tools/testing/selftests/damon/access_memory.c
@@ -8,6 +8,11 @@
#include <string.h>
#include <time.h>
+enum access_mode {
+ ACCESS_MODE_ONCE,
+ ACCESS_MODE_REPEAT,
+};
+
int main(int argc, char *argv[])
{
char **regions;
@@ -15,10 +20,12 @@ int main(int argc, char *argv[])
int nr_regions;
int sz_region;
int access_time_ms;
+ enum access_mode mode = ACCESS_MODE_ONCE;
+
int i;
- if (argc != 4) {
- printf("Usage: %s <number> <size (bytes)> <time (ms)>\n",
+ if (argc < 4) {
+ printf("Usage: %s <number> <size (bytes)> <time (ms)> [mode]\n",
argv[0]);
return -1;
}
@@ -27,15 +34,21 @@ int main(int argc, char *argv[])
sz_region = atoi(argv[2]);
access_time_ms = atoi(argv[3]);
+ if (argc > 4 && !strcmp(argv[4], "repeat"))
+ mode = ACCESS_MODE_REPEAT;
+
regions = malloc(sizeof(*regions) * nr_regions);
for (i = 0; i < nr_regions; i++)
regions[i] = malloc(sz_region);
- for (i = 0; i < nr_regions; i++) {
- start_clock = clock();
- while ((clock() - start_clock) * 1000 / CLOCKS_PER_SEC <
- access_time_ms)
- memset(regions[i], i, sz_region);
- }
+ do {
+ for (i = 0; i < nr_regions; i++) {
+ start_clock = clock();
+ while ((clock() - start_clock) * 1000 / CLOCKS_PER_SEC
+ < access_time_ms)
+ memset(regions[i], i, sz_region);
+ }
+ } while (mode == ACCESS_MODE_REPEAT);
+
return 0;
}
diff --git a/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh b/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh
index 64c5d8c518a4..33a7ff43ed6c 100755
--- a/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh
+++ b/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh
@@ -14,6 +14,13 @@ then
exit $ksft_skip
fi
+kmemleak="/sys/kernel/debug/kmemleak"
+if [ ! -f "$kmemleak" ]
+then
+ echo "$kmemleak not found"
+ exit $ksft_skip
+fi
+
# ensure filter directory
echo 1 > "$damon_sysfs/kdamonds/nr_kdamonds"
echo 1 > "$damon_sysfs/kdamonds/0/contexts/nr_contexts"
@@ -22,22 +29,17 @@ echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/0/filters/nr_filters"
filter_dir="$damon_sysfs/kdamonds/0/contexts/0/schemes/0/filters/0"
-before_kb=$(grep Slab /proc/meminfo | awk '{print $2}')
-
-# try to leak 3000 KiB
-for i in {1..102400};
+# try to leak 128 times
+for i in {1..128};
do
echo "012345678901234567890123456789" > "$filter_dir/memcg_path"
done
-after_kb=$(grep Slab /proc/meminfo | awk '{print $2}')
-# expect up to 1500 KiB free from other tasks memory
-expected_after_kb_max=$((before_kb + 1500))
-
-if [ "$after_kb" -gt "$expected_after_kb_max" ]
+echo scan > "$kmemleak"
+kmemleak_report=$(cat "$kmemleak")
+if [ "$kmemleak_report" = "" ]
then
- echo "maybe memcg_path are leaking: $before_kb -> $after_kb"
- exit 1
-else
exit 0
fi
+echo "$kmemleak_report"
+exit 1
diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py
index 90ad7409a7a6..35c724a63f6c 100755
--- a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py
+++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py
@@ -6,10 +6,10 @@ import time
import _damon_sysfs
-def main():
- # access two 10 MiB memory regions, 2 second per each
- sz_region = 10 * 1024 * 1024
- proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+def pass_wss_estimation(sz_region):
+ # access two regions of given size, 2 seocnds per each region
+ proc = subprocess.Popen(
+ ['./access_memory', '2', '%d' % sz_region, '2000', 'repeat'])
kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
contexts=[_damon_sysfs.DamonCtx(
ops='vaddr',
@@ -27,7 +27,7 @@ def main():
exit(1)
wss_collected = []
- while proc.poll() == None:
+ while proc.poll() is None and len(wss_collected) < 40:
time.sleep(0.1)
err = kdamonds.kdamonds[0].update_schemes_tried_bytes()
if err != None:
@@ -36,20 +36,43 @@ def main():
wss_collected.append(
kdamonds.kdamonds[0].contexts[0].schemes[0].tried_bytes)
+ proc.terminate()
+ err = kdamonds.stop()
+ if err is not None:
+ print('kdamond stop failed: %s' % err)
+ exit(1)
wss_collected.sort()
acceptable_error_rate = 0.2
for percentile in [50, 75]:
sample = wss_collected[int(len(wss_collected) * percentile / 100)]
error_rate = abs(sample - sz_region) / sz_region
- print('%d-th percentile (%d) error %f' %
- (percentile, sample, error_rate))
+ print('%d-th percentile error %f (expect %d, result %d)' %
+ (percentile, error_rate, sz_region, sample))
if error_rate > acceptable_error_rate:
print('the error rate is not acceptable (> %f)' %
acceptable_error_rate)
print('samples are as below')
- print('\n'.join(['%d' % wss for wss in wss_collected]))
- exit(1)
+ for idx, wss in enumerate(wss_collected):
+ if idx < len(wss_collected) - 1 and \
+ wss_collected[idx + 1] == wss:
+ continue
+ print('%d/%d: %d' % (idx, len(wss_collected), wss))
+ return False
+ return True
+
+def main():
+ # DAMON doesn't flush TLB. If the system has large TLB that can cover
+ # whole test working set, DAMON cannot see the access. Test up to 160 MiB
+ # test working set.
+ sz_region_mb = 10
+ max_sz_region_mb = 160
+ while sz_region_mb <= max_sz_region_mb:
+ test_pass = pass_wss_estimation(sz_region_mb * 1024 * 1024)
+ if test_pass is True:
+ exit(0)
+ sz_region_mb *= 2
+ exit(1)
if __name__ == '__main__':
main()
diff --git a/tools/testing/selftests/dm-verity/Makefile b/tools/testing/selftests/dm-verity/Makefile
new file mode 100644
index 000000000000..b75ee08a54af
--- /dev/null
+++ b/tools/testing/selftests/dm-verity/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_PROGS := test-dm-verity-keyring.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/dm-verity/config b/tools/testing/selftests/dm-verity/config
new file mode 100644
index 000000000000..1cd3712fa0a4
--- /dev/null
+++ b/tools/testing/selftests/dm-verity/config
@@ -0,0 +1,10 @@
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_VERITY=m
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_KEYS=y
+CONFIG_ASYMMETRIC_KEY_TYPE=y
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
+CONFIG_X509_CERTIFICATE_PARSER=y
+CONFIG_PKCS7_MESSAGE_PARSER=y
+CONFIG_SYSTEM_DATA_VERIFICATION=y
diff --git a/tools/testing/selftests/dm-verity/test-dm-verity-keyring.sh b/tools/testing/selftests/dm-verity/test-dm-verity-keyring.sh
new file mode 100755
index 000000000000..1f9601ef22f8
--- /dev/null
+++ b/tools/testing/selftests/dm-verity/test-dm-verity-keyring.sh
@@ -0,0 +1,873 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test script for dm-verity keyring functionality
+#
+# This script has two modes depending on kernel configuration:
+#
+# 1. keyring_unsealed=1 AND require_signatures=1:
+# - Upload a test key to the .dm-verity keyring
+# - Seal the keyring
+# - Create a dm-verity device with a signed root hash
+# - Verify signature verification works
+#
+# 2. keyring_unsealed=0 (default) OR require_signatures=0:
+# - Verify the keyring is already sealed (if unsealed=0)
+# - Verify keys cannot be added to a sealed keyring
+# - Verify the keyring is inactive (not used for verification)
+#
+# Requirements:
+# - Root privileges
+# - openssl
+# - veritysetup (cryptsetup)
+# - keyctl (keyutils)
+
+set -e
+
+WORK_DIR=""
+DATA_DEV=""
+HASH_DEV=""
+DM_NAME="verity-test-$$"
+CLEANUP_DONE=0
+
+# Module parameters (detected at runtime)
+KEYRING_UNSEALED=""
+REQUIRE_SIGNATURES=""
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+log_info() {
+ echo -e "${GREEN}[INFO]${NC} $*"
+}
+
+log_warn() {
+ echo -e "${YELLOW}[WARN]${NC} $*"
+}
+
+log_error() {
+ echo -e "${RED}[ERROR]${NC} $*" >&2
+}
+
+log_pass() {
+ echo -e "${GREEN}[PASS]${NC} $*"
+}
+
+log_fail() {
+ echo -e "${RED}[FAIL]${NC} $*" >&2
+}
+
+log_skip() {
+ echo -e "${YELLOW}[SKIP]${NC} $*"
+}
+
+cleanup() {
+ if [ "$CLEANUP_DONE" -eq 1 ]; then
+ return
+ fi
+ CLEANUP_DONE=1
+
+ log_info "Cleaning up..."
+
+ # Remove dm-verity device if it exists
+ if dmsetup info "$DM_NAME" &>/dev/null; then
+ dmsetup remove "$DM_NAME" 2>/dev/null || true
+ fi
+
+ # Detach loop devices
+ if [ -n "$DATA_DEV" ] && [[ "$DATA_DEV" == /dev/loop* ]]; then
+ losetup -d "$DATA_DEV" 2>/dev/null || true
+ fi
+ if [ -n "$HASH_DEV" ] && [[ "$HASH_DEV" == /dev/loop* ]]; then
+ losetup -d "$HASH_DEV" 2>/dev/null || true
+ fi
+
+ # Remove work directory
+ if [ -n "$WORK_DIR" ] && [ -d "$WORK_DIR" ]; then
+ rm -rf "$WORK_DIR"
+ fi
+}
+
+trap cleanup EXIT
+
+die() {
+ log_error "$*"
+ exit 1
+}
+
+find_dm_verity_keyring() {
+ # The .dm-verity keyring is not linked to user-accessible keyrings,
+ # so we need to find it via /proc/keys
+ local serial_hex
+ serial_hex=$(awk '/\.dm-verity/ {print $1}' /proc/keys 2>/dev/null)
+
+ if [ -z "$serial_hex" ]; then
+ return 1
+ fi
+
+ # Convert hex to decimal for keyctl
+ echo $((16#$serial_hex))
+}
+
+get_module_param() {
+ local param="$1"
+ local path="/sys/module/dm_verity/parameters/$param"
+
+ if [ -f "$path" ]; then
+ cat "$path"
+ else
+ echo ""
+ fi
+}
+
+check_requirements() {
+ log_info "Checking requirements..."
+
+ # Check for root
+ if [ "$(id -u)" -ne 0 ]; then
+ die "This script must be run as root"
+ fi
+
+ # Check for required tools
+ for cmd in openssl veritysetup keyctl losetup dmsetup dd awk; do
+ if ! command -v "$cmd" &>/dev/null; then
+ die "Required command not found: $cmd"
+ fi
+ done
+
+ # Check for dm-verity module
+ if ! modprobe -n dm-verity &>/dev/null; then
+ die "dm-verity module not available"
+ fi
+
+ # Verify OpenSSL can create signatures
+ # OpenSSL cms -sign with -binary -outform DER creates detached signatures by default
+ log_info "Using OpenSSL for PKCS#7 signatures"
+}
+
+load_dm_verity_module() {
+ local keyring_unsealed="${1:-0}"
+ local require_signatures="${2:-0}"
+
+ log_info "Loading dm-verity module with keyring_unsealed=$keyring_unsealed require_signatures=$require_signatures"
+
+ # Unload if already loaded
+ if lsmod | grep -q '^dm_verity'; then
+ log_info "Unloading existing dm-verity module..."
+ modprobe -r dm-verity 2>/dev/null || \
+ die "Failed to unload dm-verity module (may be in use)"
+ sleep 1
+ fi
+
+ # Load with specified parameters
+ modprobe dm-verity keyring_unsealed="$keyring_unsealed" require_signatures="$require_signatures" || \
+ die "Failed to load dm-verity module"
+
+ # Wait for keyring to be created (poll with timeout)
+ local keyring_id=""
+ local timeout=50 # 5 seconds (50 * 0.1s)
+ while [ $timeout -gt 0 ]; do
+ keyring_id=$(find_dm_verity_keyring) && break
+ sleep 0.1
+ timeout=$((timeout - 1))
+ done
+
+ if [ -z "$keyring_id" ]; then
+ die "dm-verity keyring not found after module load (timeout)"
+ fi
+
+ log_info "Found .dm-verity keyring: $keyring_id"
+ echo "$keyring_id" > "$WORK_DIR/keyring_id"
+
+ # Read and display module parameters
+ KEYRING_UNSEALED=$(get_module_param "keyring_unsealed")
+ REQUIRE_SIGNATURES=$(get_module_param "require_signatures")
+
+ log_info "Module parameters:"
+ log_info " keyring_unsealed=$KEYRING_UNSEALED"
+ log_info " require_signatures=$REQUIRE_SIGNATURES"
+}
+
+unload_dm_verity_module() {
+ log_info "Unloading dm-verity module..."
+
+ # Clean up any dm-verity devices first
+ local dm_dev
+ while read -r dm_dev _; do
+ [ -n "$dm_dev" ] || continue
+ log_info "Removing dm-verity device: $dm_dev"
+ dmsetup remove "$dm_dev" 2>/dev/null || true
+ done < <(dmsetup ls --target verity 2>/dev/null)
+
+ if lsmod | grep -q '^dm_verity'; then
+ modprobe -r dm-verity 2>/dev/null || \
+ log_warn "Failed to unload dm-verity module"
+ sleep 1
+ fi
+}
+
+generate_keys() {
+ log_info "Generating signing key pair..."
+
+ # Generate private key (2048-bit for faster test execution)
+ openssl genrsa -out "$WORK_DIR/private.pem" 2048 2>/dev/null
+
+ # Create OpenSSL config for certificate extensions
+ # The kernel requires digitalSignature key usage for signature verification
+ # Both subjectKeyIdentifier and authorityKeyIdentifier are needed for
+ # the kernel to match keys in the keyring (especially for self-signed certs)
+ cat > "$WORK_DIR/openssl.cnf" << 'EOF'
+[req]
+distinguished_name = req_distinguished_name
+x509_extensions = v3_ca
+prompt = no
+
+[req_distinguished_name]
+CN = dm-verity-test-key
+
+[v3_ca]
+basicConstraints = critical,CA:FALSE
+keyUsage = digitalSignature
+subjectKeyIdentifier = hash
+authorityKeyIdentifier = keyid
+EOF
+
+ # Generate self-signed certificate with proper extensions
+ openssl req -new -x509 -key "$WORK_DIR/private.pem" \
+ -out "$WORK_DIR/cert.pem" -days 365 \
+ -config "$WORK_DIR/openssl.cnf" 2>/dev/null
+
+ # Convert certificate to DER format for kernel
+ openssl x509 -in "$WORK_DIR/cert.pem" -outform DER \
+ -out "$WORK_DIR/cert.der"
+
+ # Show certificate info for debugging
+ log_info "Certificate details:"
+ openssl x509 -in "$WORK_DIR/cert.pem" -noout -text 2>/dev/null | \
+ grep -E "Subject:|Issuer:|Key Usage|Extended" | head -10
+
+ log_info "Keys generated successfully"
+}
+
+seal_keyring() {
+ log_info "Sealing the .dm-verity keyring..."
+
+ local keyring_id
+ keyring_id=$(cat "$WORK_DIR/keyring_id")
+
+ keyctl restrict_keyring "$keyring_id" || \
+ die "Failed to seal keyring"
+
+ log_info "Keyring sealed successfully"
+}
+
+create_test_device() {
+ log_info "Creating test device images..."
+
+ # Create data image with random content (8MB is sufficient for testing)
+ dd if=/dev/urandom of="$WORK_DIR/data.img" bs=1M count=8 status=none
+
+ # Create hash image (will be populated by veritysetup)
+ dd if=/dev/zero of="$WORK_DIR/hash.img" bs=1M count=1 status=none
+
+ # Setup loop devices
+ DATA_DEV=$(losetup --find --show "$WORK_DIR/data.img")
+ HASH_DEV=$(losetup --find --show "$WORK_DIR/hash.img")
+
+ log_info "Data device: $DATA_DEV"
+ log_info "Hash device: $HASH_DEV"
+}
+
+create_verity_hash() {
+ log_info "Creating dm-verity hash tree..."
+
+ local root_hash output
+ output=$(veritysetup format "$DATA_DEV" "$HASH_DEV" 2>&1)
+ root_hash=$(echo "$output" | grep "Root hash:" | awk '{print $3}')
+
+ if [ -z "$root_hash" ]; then
+ log_error "veritysetup format output:"
+ echo "$output" | sed 's/^/ /'
+ die "Failed to get root hash from veritysetup format"
+ fi
+
+ echo "$root_hash" > "$WORK_DIR/root_hash"
+ log_info "Root hash: $root_hash"
+}
+
+create_detached_signature() {
+ local infile="$1"
+ local outfile="$2"
+ local cert="$3"
+ local key="$4"
+
+ # Use openssl smime (not cms) for PKCS#7 signatures compatible with kernel
+ # Flags from working veritysetup example:
+ # -nocerts: don't include certificate in signature
+ # -noattr: no signed attributes
+ # -binary: binary input mode
+ if openssl smime -sign -nocerts -noattr -binary \
+ -in "$infile" \
+ -inkey "$key" \
+ -signer "$cert" \
+ -outform der \
+ -out "$outfile" 2>/dev/null; then
+ return 0
+ fi
+
+ log_error "Failed to create signature"
+ return 1
+}
+
+activate_verity_device() {
+ local with_sig="$1"
+ local root_hash
+ root_hash=$(cat "$WORK_DIR/root_hash")
+
+ # Clear dmesg and capture any kernel messages during activation
+ dmesg -C 2>/dev/null || true
+
+ if [ "$with_sig" = "yes" ]; then
+ log_info "Activating dm-verity device with signature..."
+ veritysetup open "$DATA_DEV" "$DM_NAME" "$HASH_DEV" "$root_hash" \
+ --root-hash-signature="$WORK_DIR/root_hash.p7s" 2>&1
+ local ret=$?
+ else
+ log_info "Activating dm-verity device without signature..."
+ veritysetup open "$DATA_DEV" "$DM_NAME" "$HASH_DEV" "$root_hash" 2>&1
+ local ret=$?
+ fi
+
+ # Show relevant kernel messages
+ local kmsg
+ kmsg=$(dmesg 2>/dev/null | grep -i -E 'verity|pkcs|signature|asymmetric|key' | tail -10)
+ if [ -n "$kmsg" ]; then
+ log_info "Kernel messages:"
+ echo "$kmsg" | while read -r line; do echo " $line"; done
+ fi
+
+ return $ret
+}
+
+deactivate_verity_device() {
+ if dmsetup info "$DM_NAME" &>/dev/null; then
+ dmsetup remove "$DM_NAME" 2>/dev/null || true
+ fi
+}
+
+show_keyring_status() {
+ log_info "Keyring status:"
+
+ local keyring_id
+ keyring_id=$(find_dm_verity_keyring) || true
+
+ if [ -n "$keyring_id" ]; then
+ echo " Keyring ID: $keyring_id"
+ keyctl show "$keyring_id" 2>/dev/null || true
+ grep '\.dm-verity' /proc/keys 2>/dev/null || true
+ fi
+}
+
+list_keyring_keys() {
+ log_info "Keys in .dm-verity keyring:"
+
+ local keyring_id
+ keyring_id=$(cat "$WORK_DIR/keyring_id" 2>/dev/null) || \
+ keyring_id=$(find_dm_verity_keyring) || true
+
+ if [ -z "$keyring_id" ]; then
+ log_warn "Could not find keyring"
+ return
+ fi
+
+ # List all keys in the keyring
+ local keys
+ keys=$(keyctl list "$keyring_id" 2>/dev/null)
+ if [ -z "$keys" ] || [ "$keys" = "keyring is empty" ]; then
+ echo " (empty)"
+ else
+ echo "$keys" | while read -r line; do
+ echo " $line"
+ done
+
+ # Show detailed info for each key
+ log_info "Key details:"
+ keyctl list "$keyring_id" 2>/dev/null | awk '{print $1}' | grep -E '^[0-9]+$' | while read -r key_id; do
+ echo " Key $key_id:"
+ keyctl describe "$key_id" 2>/dev/null | sed 's/^/ /'
+ done
+ fi
+}
+
+generate_named_key() {
+ local name="$1"
+ local key_dir="$WORK_DIR/keys/$name"
+
+ mkdir -p "$key_dir"
+
+ # Log to stderr so it doesn't interfere with return value
+ echo "[INFO] Generating key pair: $name" >&2
+
+ # Generate private key
+ openssl genrsa -out "$key_dir/private.pem" 2048 2>/dev/null
+
+ # Create OpenSSL config for certificate extensions
+ # Both subjectKeyIdentifier and authorityKeyIdentifier are needed for
+ # the kernel to match keys in the keyring (especially for self-signed certs)
+ cat > "$key_dir/openssl.cnf" << EOF
+[req]
+distinguished_name = req_distinguished_name
+x509_extensions = v3_ca
+prompt = no
+
+[req_distinguished_name]
+CN = dm-verity-test-$name
+
+[v3_ca]
+basicConstraints = critical,CA:FALSE
+keyUsage = digitalSignature
+subjectKeyIdentifier = hash
+authorityKeyIdentifier = keyid
+EOF
+
+ # Generate self-signed certificate with proper extensions
+ openssl req -new -x509 -key "$key_dir/private.pem" \
+ -out "$key_dir/cert.pem" -days 365 \
+ -config "$key_dir/openssl.cnf" 2>/dev/null
+
+ # Convert certificate to DER format for kernel
+ openssl x509 -in "$key_dir/cert.pem" -outform DER \
+ -out "$key_dir/cert.der"
+
+ # Return the key directory path (only this goes to stdout)
+ echo "$key_dir"
+}
+
+upload_named_key() {
+ local name="$1"
+ local key_dir="$2"
+
+ local keyring_id
+ keyring_id=$(cat "$WORK_DIR/keyring_id")
+
+ log_info "Uploading key '$name' to keyring..."
+
+ local key_id
+ if key_id=$(keyctl padd asymmetric "$name" "$keyring_id" \
+ < "$key_dir/cert.der" 2>&1); then
+ log_info "Key '$name' uploaded with ID: $key_id"
+ echo "$key_id" > "$key_dir/key_id"
+ return 0
+ else
+ log_error "Failed to upload key '$name': $key_id"
+ return 1
+ fi
+}
+
+#
+# Test: Verify sealed keyring rejects key additions
+#
+test_sealed_keyring_rejects_keys() {
+ log_info "TEST: Verify sealed keyring rejects key additions"
+
+ local keyring_id
+ keyring_id=$(cat "$WORK_DIR/keyring_id")
+
+ generate_keys
+
+ # Try to add a key - should fail
+ if keyctl padd asymmetric "dm-verity-test" "$keyring_id" \
+ < "$WORK_DIR/cert.der" 2>/dev/null; then
+ log_fail "Key addition should have been rejected on sealed keyring"
+ return 1
+ else
+ log_pass "Sealed keyring correctly rejected key addition"
+ return 0
+ fi
+}
+
+#
+# Test: Multiple keys in keyring
+#
+test_multiple_keys() {
+ log_info "TEST: Multiple keys in keyring"
+
+ local key1_dir key2_dir key3_dir
+
+ # Generate three different keys
+ key1_dir=$(generate_named_key "vendor-a")
+ key2_dir=$(generate_named_key "vendor-b")
+ key3_dir=$(generate_named_key "vendor-c")
+
+ # Upload all three keys
+ upload_named_key "vendor-a" "$key1_dir" || return 1
+ upload_named_key "vendor-b" "$key2_dir" || return 1
+ upload_named_key "vendor-c" "$key3_dir" || return 1
+
+ log_info ""
+ log_info "Keys in keyring before sealing:"
+ list_keyring_keys
+ show_keyring_status
+
+ # Seal the keyring
+ log_info ""
+ seal_keyring
+
+ # List keys after sealing
+ log_info ""
+ log_info "Keys in keyring after sealing:"
+ list_keyring_keys
+ show_keyring_status
+
+ log_pass "Key upload and keyring sealing succeeded"
+
+ # Create test device
+ log_info ""
+ create_test_device
+ create_verity_hash
+
+ # Test 1: Sign with key1, should verify successfully
+ log_info ""
+ log_info "Sub-test: Verify with vendor-a key"
+ if ! sign_root_hash_with_key "$key1_dir"; then
+ log_fail "Failed to sign with vendor-a key"
+ return 1
+ fi
+ if activate_verity_device "yes"; then
+ log_pass "Verification with vendor-a key succeeded"
+ deactivate_verity_device
+ else
+ log_fail "Verification with vendor-a key should succeed"
+ return 1
+ fi
+
+ # Test 2: Sign with key2, should also verify successfully
+ log_info ""
+ log_info "Sub-test: Verify with vendor-b key"
+ if ! sign_root_hash_with_key "$key2_dir"; then
+ log_fail "Failed to sign with vendor-b key"
+ return 1
+ fi
+ if activate_verity_device "yes"; then
+ log_pass "Verification with vendor-b key succeeded"
+ deactivate_verity_device
+ else
+ log_fail "Verification with vendor-b key should succeed"
+ return 1
+ fi
+
+ # Test 3: Sign with key3, should also verify successfully
+ log_info ""
+ log_info "Sub-test: Verify with vendor-c key"
+ if ! sign_root_hash_with_key "$key3_dir"; then
+ log_fail "Failed to sign with vendor-c key"
+ return 1
+ fi
+ if activate_verity_device "yes"; then
+ log_pass "Verification with vendor-c key succeeded"
+ deactivate_verity_device
+ else
+ log_fail "Verification with vendor-c key should succeed"
+ return 1
+ fi
+
+ # Test 4: Generate a key NOT in the keyring, should fail
+ log_info ""
+ log_info "Sub-test: Verify with unknown key (should fail)"
+ local unknown_key_dir
+ unknown_key_dir=$(generate_named_key "unknown-vendor")
+ if ! sign_root_hash_with_key "$unknown_key_dir"; then
+ log_fail "Failed to sign with unknown-vendor key"
+ return 1
+ fi
+ if activate_verity_device "yes"; then
+ log_fail "Verification with unknown key should fail"
+ deactivate_verity_device
+ return 1
+ else
+ log_pass "Verification with unknown key correctly rejected"
+ fi
+
+ log_info ""
+ log_pass "Multiple keys test completed successfully"
+ return 0
+}
+
+sign_root_hash_with_key() {
+ local key_dir="$1"
+
+ local root_hash
+ root_hash=$(cat "$WORK_DIR/root_hash")
+
+ # Create the data to sign (hex string, not binary)
+ echo -n "$root_hash" > "$WORK_DIR/root_hash.txt"
+
+ # Debug: show exactly what we're signing
+ log_info "Root hash (hex): $root_hash"
+ log_info "Root hash hex string size: $(wc -c < "$WORK_DIR/root_hash.txt") bytes"
+
+ # Create detached PKCS#7 signature
+ if ! create_detached_signature "$WORK_DIR/root_hash.txt" "$WORK_DIR/root_hash.p7s" \
+ "$key_dir/cert.pem" "$key_dir/private.pem"; then
+ log_error "Failed to sign root hash with key from $key_dir"
+ return 1
+ fi
+
+ # Debug: show signing certificate info
+ log_info "Signed with certificate:"
+ openssl x509 -in "$key_dir/cert.pem" -noout -subject 2>/dev/null | sed 's/^/ /'
+
+ # Debug: verify signature locally
+ # -nointern: cert not in signature, use -certfile
+ # -noverify: skip certificate chain validation (self-signed)
+ if openssl smime -verify -binary -inform der -nointern -noverify \
+ -in "$WORK_DIR/root_hash.p7s" \
+ -content "$WORK_DIR/root_hash.txt" \
+ -certfile "$key_dir/cert.pem" \
+ -out /dev/null 2>/dev/null; then
+ log_info "Local signature verification: PASSED"
+ else
+ log_warn "Local signature verification: FAILED"
+ fi
+ return 0
+}
+
+#
+# Test: Verify corrupted signatures are rejected
+#
+test_corrupted_signature() {
+ log_info "TEST: Verify corrupted signatures are rejected"
+
+ # This test requires a valid setup from test_multiple_keys or similar
+ # It modifies the signature file and verifies rejection
+
+ if [ ! -f "$WORK_DIR/root_hash.p7s" ]; then
+ log_warn "No signature file found, skipping corrupted signature test"
+ return 0
+ fi
+
+ # Save original signature
+ cp "$WORK_DIR/root_hash.p7s" "$WORK_DIR/root_hash.p7s.orig"
+
+ # Test 1: Truncated signature
+ log_info "Sub-test: Truncated signature (should fail)"
+ head -c 100 "$WORK_DIR/root_hash.p7s.orig" > "$WORK_DIR/root_hash.p7s"
+ if activate_verity_device "yes"; then
+ log_fail "Truncated signature should be rejected"
+ deactivate_verity_device
+ cp "$WORK_DIR/root_hash.p7s.orig" "$WORK_DIR/root_hash.p7s"
+ return 1
+ else
+ log_pass "Truncated signature correctly rejected"
+ fi
+
+ # Test 2: Corrupted signature (flip some bytes)
+ log_info "Sub-test: Corrupted signature bytes (should fail)"
+ cp "$WORK_DIR/root_hash.p7s.orig" "$WORK_DIR/root_hash.p7s"
+ # Corrupt bytes in the middle of the signature
+ local sig_size
+ sig_size=$(wc -c < "$WORK_DIR/root_hash.p7s")
+ local corrupt_offset=$((sig_size / 2))
+ printf '\xff\xff\xff\xff' | dd of="$WORK_DIR/root_hash.p7s" bs=1 seek=$corrupt_offset conv=notrunc 2>/dev/null
+ if activate_verity_device "yes"; then
+ log_fail "Corrupted signature should be rejected"
+ deactivate_verity_device
+ cp "$WORK_DIR/root_hash.p7s.orig" "$WORK_DIR/root_hash.p7s"
+ return 1
+ else
+ log_pass "Corrupted signature correctly rejected"
+ fi
+
+ # Test 3: Signature over wrong data (sign different content)
+ log_info "Sub-test: Signature over wrong data (should fail)"
+ # Create a different root hash (all zeros as hex string)
+ printf '%064d' 0 > "$WORK_DIR/wrong_hash.txt"
+ # Get the first key directory that was used
+ local key_dir="$WORK_DIR/keys/vendor-a"
+ if [ -d "$key_dir" ]; then
+ create_detached_signature "$WORK_DIR/wrong_hash.txt" "$WORK_DIR/root_hash.p7s" \
+ "$key_dir/cert.pem" "$key_dir/private.pem"
+ if activate_verity_device "yes"; then
+ log_fail "Signature over wrong data should be rejected"
+ deactivate_verity_device
+ cp "$WORK_DIR/root_hash.p7s.orig" "$WORK_DIR/root_hash.p7s"
+ return 1
+ else
+ log_pass "Signature over wrong data correctly rejected"
+ fi
+ else
+ log_warn "Key directory not found, skipping wrong data test"
+ fi
+
+ # Restore original signature
+ cp "$WORK_DIR/root_hash.p7s.orig" "$WORK_DIR/root_hash.p7s"
+
+ log_pass "Corrupted signature test completed successfully"
+ return 0
+}
+
+#
+# Test: Verify keyring is sealed when keyring_unsealed=0
+#
+test_keyring_sealed_by_default() {
+ log_info "TEST: Verify keyring is sealed by default (keyring_unsealed=0)"
+
+ local keyring_id
+ keyring_id=$(cat "$WORK_DIR/keyring_id")
+
+ log_info "Current keyring state (should be empty and sealed):"
+ list_keyring_keys
+ show_keyring_status
+
+ generate_keys
+
+ # Try to add a key - should fail if keyring is sealed
+ log_info "Attempting to add key to sealed keyring..."
+ if keyctl padd asymmetric "dm-verity-test" "$keyring_id" \
+ < "$WORK_DIR/cert.der" 2>/dev/null; then
+ log_fail "Keyring should be sealed when keyring_unsealed=0"
+ list_keyring_keys
+ return 1
+ else
+ log_pass "Keyring is correctly sealed when keyring_unsealed=0"
+ log_info "Keyring state after failed add attempt:"
+ list_keyring_keys
+ return 0
+ fi
+}
+
+#
+# Test: Verify dm-verity keyring is inactive when sealed empty
+#
+test_keyring_inactive_when_empty() {
+ log_info "TEST: Verify dm-verity keyring is inactive when sealed empty"
+
+ # When keyring_unsealed=0, the keyring is sealed immediately while empty
+ # This means it should NOT be used for verification (nr_leaves_on_tree=0)
+
+ log_info "Keyring state (should be empty and sealed):"
+ list_keyring_keys
+ show_keyring_status
+
+ create_test_device
+ create_verity_hash
+
+ # Without any keys in the dm-verity keyring, and with it sealed,
+ # verification should fall through to the secondary/platform keyrings
+ # and likely succeed (if require_signatures=0) or fail (if =1)
+
+ log_info "Sub-test: Device activation with sealed empty keyring"
+ if [ "$REQUIRE_SIGNATURES" = "Y" ] || [ "$REQUIRE_SIGNATURES" = "1" ]; then
+ if activate_verity_device "no"; then
+ log_fail "Device should NOT activate without signature when require_signatures=1"
+ deactivate_verity_device
+ return 1
+ else
+ log_pass "Device correctly rejected (require_signatures=1, no valid signature)"
+ fi
+ else
+ if activate_verity_device "no"; then
+ log_pass "Device activated (require_signatures=0, empty dm-verity keyring is inactive)"
+ deactivate_verity_device
+ else
+ log_fail "Device should activate when require_signatures=0"
+ return 1
+ fi
+ fi
+
+ return 0
+}
+
+main() {
+ local rc=0
+
+ log_info "=== dm-verity keyring test ==="
+ log_info ""
+
+ # Create work directory
+ WORK_DIR=$(mktemp -d -t dm-verity-test.XXXXXX)
+ log_info "Work directory: $WORK_DIR"
+
+ check_requirements
+
+ #
+ # Test 1: UNSEALED keyring mode (keyring_unsealed=1)
+ #
+ log_info ""
+ log_info "========================================"
+ log_info "=== TEST MODE: UNSEALED KEYRING ==="
+ log_info "========================================"
+ log_info ""
+
+ load_dm_verity_module 1 1 # keyring_unsealed=1, require_signatures=1
+ show_keyring_status
+
+ log_info ""
+ if ! test_multiple_keys; then
+ rc=1
+ fi
+
+ # After sealing, verify it rejects new keys
+ log_info ""
+ if ! test_sealed_keyring_rejects_keys; then
+ rc=1
+ fi
+
+ # Test corrupted signatures are rejected
+ log_info ""
+ if ! test_corrupted_signature; then
+ rc=1
+ fi
+
+ # Clean up devices before reloading module
+ deactivate_verity_device
+ if [ -n "$DATA_DEV" ] && [[ "$DATA_DEV" == /dev/loop* ]]; then
+ losetup -d "$DATA_DEV" 2>/dev/null || true
+ DATA_DEV=""
+ fi
+ if [ -n "$HASH_DEV" ] && [[ "$HASH_DEV" == /dev/loop* ]]; then
+ losetup -d "$HASH_DEV" 2>/dev/null || true
+ HASH_DEV=""
+ fi
+
+ #
+ # Test 2: SEALED keyring mode (keyring_unsealed=0, default)
+ #
+ log_info ""
+ log_info "========================================"
+ log_info "=== TEST MODE: SEALED KEYRING (default) ==="
+ log_info "========================================"
+ log_info ""
+
+ load_dm_verity_module 0 0 # keyring_unsealed=0, require_signatures=0
+ show_keyring_status
+
+ log_info ""
+ if ! test_keyring_sealed_by_default; then
+ rc=1
+ fi
+
+ log_info ""
+ if ! test_keyring_inactive_when_empty; then
+ rc=1
+ fi
+
+ #
+ # Summary
+ #
+ log_info ""
+ log_info "========================================"
+ if [ $rc -eq 0 ]; then
+ log_info "=== All tests PASSED ==="
+ else
+ log_error "=== Some tests FAILED ==="
+ fi
+ log_info "========================================"
+
+ return $rc
+}
+
+main "$@"
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index f5c71d993750..8154d6d429d3 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -15,12 +15,6 @@ TEST_PROGS := \
hds.py \
napi_id.py \
napi_threaded.py \
- netcons_basic.sh \
- netcons_cmdline.sh \
- netcons_fragmented_msg.sh \
- netcons_overflow.sh \
- netcons_sysdata.sh \
- netcons_torture.sh \
netpoll_basic.py \
ping.py \
psp.py \
diff --git a/tools/testing/selftests/drivers/net/gro.c b/tools/testing/selftests/drivers/net/gro.c
index e894037d2e3e..3c0745b68bfa 100644
--- a/tools/testing/selftests/drivers/net/gro.c
+++ b/tools/testing/selftests/drivers/net/gro.c
@@ -3,26 +3,45 @@
* This testsuite provides conformance testing for GRO coalescing.
*
* Test cases:
- * 1.data
+ *
+ * data_*:
* Data packets of the same size and same header setup with correct
* sequence numbers coalesce. The one exception being the last data
* packet coalesced: it can be smaller than the rest and coalesced
* as long as it is in the same flow.
- * 2.ack
+ * - data_same: same size packets coalesce
+ * - data_lrg_sml: large then small coalesces
+ * - data_sml_lrg: small then large doesn't coalesce
+ *
+ * ack:
* Pure ACK does not coalesce.
- * 3.flags
- * Specific test cases: no packets with PSH, SYN, URG, RST set will
- * be coalesced.
- * 4.tcp
+ *
+ * flags_*:
+ * No packets with PSH, SYN, URG, RST, CWR set will be coalesced.
+ * - flags_psh, flags_syn, flags_rst, flags_urg, flags_cwr
+ *
+ * tcp_*:
* Packets with incorrect checksum, non-consecutive seqno and
* different TCP header options shouldn't coalesce. Nit: given that
* some extension headers have paddings, such as timestamp, headers
- * that are padding differently would not be coalesced.
- * 5.ip:
- * Packets with different (ECN, TTL, TOS) header, ip options or
- * ip fragments (ipv6) shouldn't coalesce.
- * 6.large:
+ * that are padded differently would not be coalesced.
+ * - tcp_csum: incorrect checksum
+ * - tcp_seq: non-consecutive sequence numbers
+ * - tcp_ts: different timestamps
+ * - tcp_opt: different TCP options
+ *
+ * ip_*:
+ * Packets with different (ECN, TTL, TOS) header, IP options or
+ * IP fragments shouldn't coalesce.
+ * - ip_ecn, ip_tos: shared between IPv4/IPv6
+ * - ip_ttl, ip_opt, ip_frag4: IPv4 only
+ * - ip_id_df*: IPv4 IP ID field coalescing tests
+ * - ip_frag6, ip_v6ext_*: IPv6 only
+ *
+ * large_*:
* Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ * - large_max: exceeding max size
+ * - large_rem: remainder handling
*
* MSS is defined as 4096 - header because if it is too small
* (i.e. 1500 MTU - header), it will result in many packets,
@@ -79,6 +98,15 @@
#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+enum flush_id_case {
+ FLUSH_ID_DF1_INC,
+ FLUSH_ID_DF1_FIXED,
+ FLUSH_ID_DF0_INC,
+ FLUSH_ID_DF0_FIXED,
+ FLUSH_ID_DF1_INC_FIXED,
+ FLUSH_ID_DF1_FIXED_INC,
+};
+
static const char *addr6_src = "fdaa::2";
static const char *addr6_dst = "fdaa::1";
static const char *addr4_src = "192.168.1.200";
@@ -95,7 +123,6 @@ static int tcp_offset = -1;
static int total_hdr_len = -1;
static int ethhdr_proto = -1;
static bool ipip;
-static const int num_flush_id_cases = 6;
static void vlog(const char *fmt, ...)
{
@@ -127,19 +154,19 @@ static void setup_sock_filter(int fd)
/* Overridden later if exthdrs are used: */
opt_ipproto_off = ipproto_off;
- if (strcmp(testname, "ip") == 0) {
- if (proto == PF_INET)
- optlen = sizeof(struct ip_timestamp);
- else {
- BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
- BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
- BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
-
- /* same size for HBH and Fragment extension header types */
- optlen = MIN_EXTHDR_SIZE;
- opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
- + offsetof(struct ip6_ext, ip6e_nxt);
- }
+ if (strcmp(testname, "ip_opt") == 0) {
+ optlen = sizeof(struct ip_timestamp);
+ } else if (strcmp(testname, "ip_frag6") == 0 ||
+ strcmp(testname, "ip_v6ext_same") == 0 ||
+ strcmp(testname, "ip_v6ext_diff") == 0) {
+ BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
+ BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
+ BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
+
+ /* same size for HBH and Fragment extension header types */
+ optlen = MIN_EXTHDR_SIZE;
+ opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
+ + offsetof(struct ip6_ext, ip6e_nxt);
}
/* this filter validates the following:
@@ -333,32 +360,58 @@ static void create_packet(void *buf, int seq_offset, int ack_offset,
fill_datalinklayer(buf);
}
-/* send one extra flag, not first and not last pkt */
-static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
- int rst, int urg)
+#ifndef TH_CWR
+#define TH_CWR 0x80
+#endif
+static void set_flags(struct tcphdr *tcph, int payload_len, int psh, int syn,
+ int rst, int urg, int cwr)
{
- static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- int payload_len, pkt_size, flag, i;
- struct tcphdr *tcph;
-
- payload_len = PAYLOAD_LEN * psh;
- pkt_size = total_hdr_len + payload_len;
- flag = NUM_PACKETS / 2;
-
- create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
-
- tcph = (struct tcphdr *)(flag_buf + tcp_offset);
tcph->psh = psh;
tcph->syn = syn;
tcph->rst = rst;
tcph->urg = urg;
+ if (cwr)
+ tcph->th_flags |= TH_CWR;
+ else
+ tcph->th_flags &= ~TH_CWR;
tcph->check = 0;
tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+/* send extra flags of the (NUM_PACKETS / 2) and (NUM_PACKETS / 2 - 1)
+ * pkts, not first and not last pkt
+ */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+ int rst, int urg, int cwr)
+{
+ static char flag_buf[2][MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ int payload_len, pkt_size, i;
+ struct tcphdr *tcph;
+ int flag[2];
+
+ payload_len = PAYLOAD_LEN * (psh || cwr);
+ pkt_size = total_hdr_len + payload_len;
+ flag[0] = NUM_PACKETS / 2;
+ flag[1] = NUM_PACKETS / 2 - 1;
+
+ /* Create and configure packets with flags
+ */
+ for (i = 0; i < 2; i++) {
+ if (flag[i] > 0) {
+ create_packet(flag_buf[i], flag[i] * payload_len, 0,
+ payload_len, 0);
+ tcph = (struct tcphdr *)(flag_buf[i] + tcp_offset);
+ set_flags(tcph, payload_len, psh, syn, rst, urg, cwr);
+ }
+ }
for (i = 0; i < NUM_PACKETS + 1; i++) {
- if (i == flag) {
- write_packet(fd, flag_buf, pkt_size, daddr);
+ if (i == flag[0]) {
+ write_packet(fd, flag_buf[0], pkt_size, daddr);
+ continue;
+ } else if (i == flag[1] && cwr) {
+ write_packet(fd, flag_buf[1], pkt_size, daddr);
continue;
}
create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
@@ -648,7 +701,8 @@ static void fix_ip4_checksum(struct iphdr *iph)
iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
}
-static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
+static void send_flush_id_case(int fd, struct sockaddr_ll *daddr,
+ enum flush_id_case tcase)
{
static char buf1[MAX_HDR_LEN + PAYLOAD_LEN];
static char buf2[MAX_HDR_LEN + PAYLOAD_LEN];
@@ -667,7 +721,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
switch (tcase) {
- case 0: /* DF=1, Incrementing - should coalesce */
+ case FLUSH_ID_DF1_INC: /* DF=1, Incrementing - should coalesce */
iph1->frag_off |= htons(IP_DF);
iph1->id = htons(8);
@@ -675,7 +729,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
iph2->id = htons(9);
break;
- case 1: /* DF=1, Fixed - should coalesce */
+ case FLUSH_ID_DF1_FIXED: /* DF=1, Fixed - should coalesce */
iph1->frag_off |= htons(IP_DF);
iph1->id = htons(8);
@@ -683,7 +737,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
iph2->id = htons(8);
break;
- case 2: /* DF=0, Incrementing - should coalesce */
+ case FLUSH_ID_DF0_INC: /* DF=0, Incrementing - should coalesce */
iph1->frag_off &= ~htons(IP_DF);
iph1->id = htons(8);
@@ -691,7 +745,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
iph2->id = htons(9);
break;
- case 3: /* DF=0, Fixed - should coalesce */
+ case FLUSH_ID_DF0_FIXED: /* DF=0, Fixed - should coalesce */
iph1->frag_off &= ~htons(IP_DF);
iph1->id = htons(8);
@@ -699,9 +753,10 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
iph2->id = htons(8);
break;
- case 4: /* DF=1, two packets incrementing, and one fixed - should
- * coalesce only the first two packets
- */
+ case FLUSH_ID_DF1_INC_FIXED: /* DF=1, two packets incrementing, and
+ * one fixed - should coalesce only the
+ * first two packets
+ */
iph1->frag_off |= htons(IP_DF);
iph1->id = htons(8);
@@ -713,9 +768,10 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
send_three = true;
break;
- case 5: /* DF=1, two packets fixed, and one incrementing - should
- * coalesce only the first two packets
- */
+ case FLUSH_ID_DF1_FIXED_INC: /* DF=1, two packets fixed, and one
+ * incrementing - should coalesce only
+ * the first two packets
+ */
iph1->frag_off |= htons(IP_DF);
iph1->id = htons(8);
@@ -739,16 +795,6 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
}
}
-static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt)
-{
- for (int i = 0; i < num_flush_id_cases; i++) {
- sleep(1);
- send_flush_id_case(fd, daddr, i);
- sleep(1);
- write_packet(fd, fin_pkt, total_hdr_len, daddr);
- }
-}
-
static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
{
static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
@@ -926,6 +972,28 @@ static void set_timeout(int fd)
error(1, errno, "cannot set timeout, setsockopt failed");
}
+static void set_rcvbuf(int fd)
+{
+ int bufsize = 1 * 1024 * 1024; /* 1 MB */
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)))
+ error(1, errno, "cannot set rcvbuf size, setsockopt failed");
+}
+
+static void recv_error(int fd, int rcv_errno)
+{
+ struct tpacket_stats stats;
+ socklen_t len;
+
+ len = sizeof(stats);
+ if (getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len))
+ error(1, errno, "can't get stats");
+
+ fprintf(stderr, "Socket stats: packets=%u, drops=%u\n",
+ stats.tp_packets, stats.tp_drops);
+ error(1, rcv_errno, "could not receive");
+}
+
static void check_recv_pkts(int fd, int *correct_payload,
int correct_num_pkts)
{
@@ -950,7 +1018,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
ip_ext_len = 0;
pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
if (pkt_size < 0)
- error(1, errno, "could not receive");
+ recv_error(fd, errno);
if (iph->version == 4)
ip_ext_len = (iph->ihl - 5) * 4;
@@ -1008,108 +1076,131 @@ static void gro_sender(void)
daddr.sll_halen = ETH_ALEN;
create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
- if (strcmp(testname, "data") == 0) {
+ /* data sub-tests */
+ if (strcmp(testname, "data_same") == 0) {
send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+ } else if (strcmp(testname, "data_lrg_sml") == 0) {
send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+ } else if (strcmp(testname, "data_sml_lrg") == 0) {
send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ /* ack test */
} else if (strcmp(testname, "ack") == 0) {
send_ack(txfd, &daddr);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "flags") == 0) {
- send_flags(txfd, &daddr, 1, 0, 0, 0);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- send_flags(txfd, &daddr, 0, 1, 0, 0);
+ /* flags sub-tests */
+ } else if (strcmp(testname, "flags_psh") == 0) {
+ send_flags(txfd, &daddr, 1, 0, 0, 0, 0);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_flags(txfd, &daddr, 0, 0, 1, 0);
+ } else if (strcmp(testname, "flags_syn") == 0) {
+ send_flags(txfd, &daddr, 0, 1, 0, 0, 0);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_flags(txfd, &daddr, 0, 0, 0, 1);
+ } else if (strcmp(testname, "flags_rst") == 0) {
+ send_flags(txfd, &daddr, 0, 0, 1, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "flags_urg") == 0) {
+ send_flags(txfd, &daddr, 0, 0, 0, 1, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "flags_cwr") == 0) {
+ send_flags(txfd, &daddr, 0, 0, 0, 0, 1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "tcp") == 0) {
+
+ /* tcp sub-tests */
+ } else if (strcmp(testname, "tcp_csum") == 0) {
send_changed_checksum(txfd, &daddr);
- /* Adding sleep before sending FIN so that it is not
- * received prior to other packets.
- */
usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+ } else if (strcmp(testname, "tcp_seq") == 0) {
send_changed_seq(txfd, &daddr);
usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+ } else if (strcmp(testname, "tcp_ts") == 0) {
send_changed_ts(txfd, &daddr);
usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+ } else if (strcmp(testname, "tcp_opt") == 0) {
send_diff_opt(txfd, &daddr);
usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "ip") == 0) {
+
+ /* ip sub-tests - shared between IPv4 and IPv6 */
+ } else if (strcmp(testname, "ip_ecn") == 0) {
send_changed_ECN(txfd, &daddr);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+ } else if (strcmp(testname, "ip_tos") == 0) {
send_changed_tos(txfd, &daddr);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- if (proto == PF_INET) {
- /* Modified packets may be received out of order.
- * Sleep function added to enforce test boundaries
- * so that fin pkts are not received prior to other pkts.
- */
- sleep(1);
- send_changed_ttl(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- send_ip_options(txfd, &daddr);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- send_fragment4(txfd, &daddr);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- test_flush_id(txfd, &daddr, fin_pkt);
- } else if (proto == PF_INET6) {
- sleep(1);
- send_fragment6(txfd, &daddr);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- /* send IPv6 packets with ext header with same payload */
- send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- /* send IPv6 packets with ext header with different payload */
- send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- }
- } else if (strcmp(testname, "large") == 0) {
- /* 20 is the difference between min iphdr size
- * and min ipv6hdr size. Like MAX_HDR_SIZE,
- * MAX_PAYLOAD is defined with the larger header of the two.
- */
+
+ /* ip sub-tests - IPv4 only */
+ } else if (strcmp(testname, "ip_ttl") == 0) {
+ send_changed_ttl(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_opt") == 0) {
+ send_ip_options(txfd, &daddr);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_frag4") == 0) {
+ send_fragment4(txfd, &daddr);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_id_df1_inc") == 0) {
+ send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_id_df1_fixed") == 0) {
+ send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_id_df0_inc") == 0) {
+ send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_INC);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_id_df0_fixed") == 0) {
+ send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_FIXED);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) {
+ send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC_FIXED);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) {
+ send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED_INC);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ /* ip sub-tests - IPv6 only */
+ } else if (strcmp(testname, "ip_frag6") == 0) {
+ send_fragment6(txfd, &daddr);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_v6ext_same") == 0) {
+ send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_v6ext_diff") == 0) {
+ send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ /* large sub-tests */
+ } else if (strcmp(testname, "large_max") == 0) {
int offset = (proto == PF_INET && !ipip) ? 20 : 0;
int remainder = (MAX_PAYLOAD + offset) % MSS;
send_large(txfd, &daddr, remainder);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "large_rem") == 0) {
+ int offset = (proto == PF_INET && !ipip) ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
send_large(txfd, &daddr, remainder + 1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
} else {
- error(1, 0, "Unknown testcase");
+ error(1, 0, "Unknown testcase: %s", testname);
}
if (close(txfd))
@@ -1126,132 +1217,166 @@ static void gro_receiver(void)
error(1, 0, "socket creation");
setup_sock_filter(rxfd);
set_timeout(rxfd);
+ set_rcvbuf(rxfd);
bind_packetsocket(rxfd);
ksft_ready();
memset(correct_payload, 0, sizeof(correct_payload));
- if (strcmp(testname, "data") == 0) {
+ /* data sub-tests */
+ if (strcmp(testname, "data_same") == 0) {
printf("pure data packet of same size: ");
correct_payload[0] = PAYLOAD_LEN * 2;
check_recv_pkts(rxfd, correct_payload, 1);
-
+ } else if (strcmp(testname, "data_lrg_sml") == 0) {
printf("large data packets followed by a smaller one: ");
correct_payload[0] = PAYLOAD_LEN * 1.5;
check_recv_pkts(rxfd, correct_payload, 1);
-
+ } else if (strcmp(testname, "data_sml_lrg") == 0) {
printf("small data packets followed by a larger one: ");
correct_payload[0] = PAYLOAD_LEN / 2;
correct_payload[1] = PAYLOAD_LEN;
check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* ack test */
} else if (strcmp(testname, "ack") == 0) {
printf("duplicate ack and pure ack: ");
check_recv_pkts(rxfd, correct_payload, 3);
- } else if (strcmp(testname, "flags") == 0) {
+
+ /* flags sub-tests */
+ } else if (strcmp(testname, "flags_psh") == 0) {
correct_payload[0] = PAYLOAD_LEN * 3;
correct_payload[1] = PAYLOAD_LEN * 2;
-
printf("psh flag ends coalescing: ");
check_recv_pkts(rxfd, correct_payload, 2);
-
+ } else if (strcmp(testname, "flags_syn") == 0) {
correct_payload[0] = PAYLOAD_LEN * 2;
correct_payload[1] = 0;
correct_payload[2] = PAYLOAD_LEN * 2;
printf("syn flag ends coalescing: ");
check_recv_pkts(rxfd, correct_payload, 3);
-
+ } else if (strcmp(testname, "flags_rst") == 0) {
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = 0;
+ correct_payload[2] = PAYLOAD_LEN * 2;
printf("rst flag ends coalescing: ");
check_recv_pkts(rxfd, correct_payload, 3);
-
+ } else if (strcmp(testname, "flags_urg") == 0) {
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = 0;
+ correct_payload[2] = PAYLOAD_LEN * 2;
printf("urg flag ends coalescing: ");
check_recv_pkts(rxfd, correct_payload, 3);
- } else if (strcmp(testname, "tcp") == 0) {
+ } else if (strcmp(testname, "flags_cwr") == 0) {
correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
- correct_payload[2] = PAYLOAD_LEN;
- correct_payload[3] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN * 2;
+ correct_payload[2] = PAYLOAD_LEN * 2;
+ printf("cwr flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ /* tcp sub-tests */
+ } else if (strcmp(testname, "tcp_csum") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
printf("changed checksum does not coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
-
+ } else if (strcmp(testname, "tcp_seq") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
printf("Wrong Seq number doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("Different timestamp doesn't coalesce: ");
+ } else if (strcmp(testname, "tcp_ts") == 0) {
correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ correct_payload[3] = PAYLOAD_LEN;
+ printf("Different timestamp doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 4);
-
- printf("Different options doesn't coalesce: ");
+ } else if (strcmp(testname, "tcp_opt") == 0) {
correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ printf("Different options doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
- } else if (strcmp(testname, "ip") == 0) {
+
+ /* ip sub-tests - shared between IPv4 and IPv6 */
+ } else if (strcmp(testname, "ip_ecn") == 0) {
correct_payload[0] = PAYLOAD_LEN;
correct_payload[1] = PAYLOAD_LEN;
-
printf("different ECN doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
-
+ } else if (strcmp(testname, "ip_tos") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
printf("different tos doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
- if (proto == PF_INET) {
- printf("different ttl doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("ip options doesn't coalesce: ");
- correct_payload[2] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 3);
-
- printf("fragmented ip4 doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- /* is_atomic checks */
- printf("DF=1, Incrementing - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=1, Fixed - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=0, Incrementing - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=0, Fixed - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
- } else if (proto == PF_INET6) {
- /* GRO doesn't check for ipv6 hop limit when flushing.
- * Hence no corresponding test to the ipv4 case.
- */
- printf("fragmented ip6 doesn't coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = PAYLOAD_LEN;
- correct_payload[2] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 3);
-
- printf("ipv6 with ext header does coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("ipv6 with ext header with different payloads doesn't coalesce: ");
- correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
- }
- } else if (strcmp(testname, "large") == 0) {
+ /* ip sub-tests - IPv4 only */
+ } else if (strcmp(testname, "ip_ttl") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ printf("different ttl doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip_opt") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ printf("ip options doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "ip_frag4") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ printf("fragmented ip4 doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip_id_df1_inc") == 0) {
+ printf("DF=1, Incrementing - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+ } else if (strcmp(testname, "ip_id_df1_fixed") == 0) {
+ printf("DF=1, Fixed - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+ } else if (strcmp(testname, "ip_id_df0_inc") == 0) {
+ printf("DF=0, Incrementing - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+ } else if (strcmp(testname, "ip_id_df0_fixed") == 0) {
+ printf("DF=0, Fixed - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+ } else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) {
+ printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) {
+ printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* ip sub-tests - IPv6 only */
+ } else if (strcmp(testname, "ip_frag6") == 0) {
+ /* GRO doesn't check for ipv6 hop limit when flushing.
+ * Hence no corresponding test to the ipv4 case.
+ */
+ printf("fragmented ip6 doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "ip_v6ext_same") == 0) {
+ printf("ipv6 with ext header does coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+ } else if (strcmp(testname, "ip_v6ext_diff") == 0) {
+ printf("ipv6 with ext header with different payloads doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* large sub-tests */
+ } else if (strcmp(testname, "large_max") == 0) {
int offset = (proto == PF_INET && !ipip) ? 20 : 0;
int remainder = (MAX_PAYLOAD + offset) % MSS;
@@ -1259,14 +1384,18 @@ static void gro_receiver(void)
correct_payload[1] = remainder;
printf("Shouldn't coalesce if exceed IP max pkt size: ");
check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "large_rem") == 0) {
+ int offset = (proto == PF_INET && !ipip) ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
/* last segment sent individually, doesn't start new segment */
- correct_payload[0] = correct_payload[0] - remainder;
+ correct_payload[0] = (MAX_PAYLOAD + offset) - remainder;
correct_payload[1] = remainder + 1;
correct_payload[2] = remainder + 1;
+ printf("last segment sent individually: ");
check_recv_pkts(rxfd, correct_payload, 3);
} else {
- error(1, 0, "Test case error, should never trigger");
+ error(1, 0, "Test case error: unknown testname %s", testname);
}
if (close(rxfd))
diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py
index ba83713bf7b5..cbc1b19dbc91 100755
--- a/tools/testing/selftests/drivers/net/gro.py
+++ b/tools/testing/selftests/drivers/net/gro.py
@@ -9,18 +9,36 @@ binary in different configurations and checking for correct packet
coalescing behavior.
Test cases:
- - data: Data packets with same size/headers and correct seq numbers coalesce
+ - data_same: Same size data packets coalesce
+ - data_lrg_sml: Large packet followed by smaller one coalesces
+ - data_sml_lrg: Small packet followed by larger one doesn't coalesce
- ack: Pure ACK packets do not coalesce
- - flags: Packets with PSH, SYN, URG, RST flags do not coalesce
- - tcp: Packets with incorrect checksum, non-consecutive seqno don't coalesce
- - ip: Packets with different ECN, TTL, TOS, or IP options don't coalesce
- - large: Packets larger than GRO_MAX_SIZE don't coalesce
+ - flags_psh: Packets with PSH flag don't coalesce
+ - flags_syn: Packets with SYN flag don't coalesce
+ - flags_rst: Packets with RST flag don't coalesce
+ - flags_urg: Packets with URG flag don't coalesce
+ - flags_cwr: Packets with CWR flag don't coalesce
+ - tcp_csum: Packets with incorrect checksum don't coalesce
+ - tcp_seq: Packets with non-consecutive seqno don't coalesce
+ - tcp_ts: Packets with different timestamp options don't coalesce
+ - tcp_opt: Packets with different TCP options don't coalesce
+ - ip_ecn: Packets with different ECN don't coalesce
+ - ip_tos: Packets with different TOS don't coalesce
+ - ip_ttl: (IPv4) Packets with different TTL don't coalesce
+ - ip_opt: (IPv4) Packets with IP options don't coalesce
+ - ip_frag4: (IPv4) IPv4 fragments don't coalesce
+ - ip_id_df*: (IPv4) IP ID field coalescing tests
+ - ip_frag6: (IPv6) IPv6 fragments don't coalesce
+ - ip_v6ext_same: (IPv6) IPv6 ext header with same payload coalesces
+ - ip_v6ext_diff: (IPv6) IPv6 ext header with different payload doesn't coalesce
+ - large_max: Packets exceeding GRO_MAX_SIZE don't coalesce
+ - large_rem: Large packet remainder handling
"""
import os
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import NetDrvEpEnv, KsftXfailEx
-from lib.py import cmd, defer, bkg, ip
+from lib.py import bkg, cmd, defer, ethtool, ip
from lib.py import ksft_variants
@@ -70,49 +88,150 @@ def _set_mtu_restore(dev, mtu, host):
defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host)
-def _setup(cfg, test_name):
+def _set_ethtool_feat(dev, current, feats, host=None):
+ s2n = {True: "on", False: "off"}
+
+ new = ["-K", dev]
+ old = ["-K", dev]
+ no_change = True
+ for name, state in feats.items():
+ new += [name, s2n[state]]
+ old += [name, s2n[current[name]["active"]]]
+
+ if current[name]["active"] != state:
+ no_change = False
+ if current[name]["fixed"]:
+ raise KsftXfailEx(f"Device does not support {name}")
+ if no_change:
+ return
+
+ eth_cmd = ethtool(" ".join(new), host=host)
+ defer(ethtool, " ".join(old), host=host)
+
+ # If ethtool printed something kernel must have modified some features
+ if eth_cmd.stdout:
+ ksft_pr(eth_cmd)
+
+
+def _setup(cfg, mode, test_name):
""" Setup hardware loopback mode for GRO testing. """
if not hasattr(cfg, "bin_remote"):
cfg.bin_local = cfg.test_dir / "gro"
cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
- # "large" test needs at least 4k MTU
- if test_name == "large":
+ if not hasattr(cfg, "feat"):
+ cfg.feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
+ cfg.remote_feat = ethtool(f"-k {cfg.remote_ifname}",
+ host=cfg.remote, json=True)[0]
+
+ # "large_*" tests need at least 4k MTU
+ if test_name.startswith("large_"):
_set_mtu_restore(cfg.dev, 4096, None)
_set_mtu_restore(cfg.remote_dev, 4096, cfg.remote)
- flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout"
- irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs"
-
- _write_defer_restore(cfg, flush_path, "200000", defer_undo=True)
- _write_defer_restore(cfg, irq_path, "10", defer_undo=True)
+ if mode == "sw":
+ flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout"
+ irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs"
+
+ _write_defer_restore(cfg, flush_path, "200000", defer_undo=True)
+ _write_defer_restore(cfg, irq_path, "10", defer_undo=True)
+
+ _set_ethtool_feat(cfg.ifname, cfg.feat,
+ {"generic-receive-offload": True,
+ "rx-gro-hw": False,
+ "large-receive-offload": False})
+ elif mode == "hw":
+ _set_ethtool_feat(cfg.ifname, cfg.feat,
+ {"generic-receive-offload": False,
+ "rx-gro-hw": True,
+ "large-receive-offload": False})
+
+ # Some NICs treat HW GRO as a GRO sub-feature so disabling GRO
+ # will also clear HW GRO. Use a hack of installing XDP generic
+ # to skip SW GRO, even when enabled.
+ feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
+ if not feat["rx-gro-hw"]["active"]:
+ ksft_pr("Driver clears HW GRO and SW GRO is cleared, using generic XDP workaround")
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ ip(f"link set dev {cfg.ifname} xdpgeneric obj {prog} sec xdp")
+ defer(ip, f"link set dev {cfg.ifname} xdpgeneric off")
+
+ # Attaching XDP may change features, fetch the latest state
+ feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
+
+ _set_ethtool_feat(cfg.ifname, feat,
+ {"generic-receive-offload": True,
+ "rx-gro-hw": True,
+ "large-receive-offload": False})
+ elif mode == "lro":
+ # netdevsim advertises LRO for feature inheritance testing with
+ # bonding/team tests but it doesn't actually perform the offload
+ cfg.require_nsim(nsim_test=False)
+
+ _set_ethtool_feat(cfg.ifname, cfg.feat,
+ {"generic-receive-offload": False,
+ "rx-gro-hw": False,
+ "large-receive-offload": True})
try:
# Disable TSO for local tests
cfg.require_nsim() # will raise KsftXfailEx if not running on nsim
- cmd(f"ethtool -K {cfg.ifname} gro on tso off")
- cmd(f"ethtool -K {cfg.remote_ifname} gro on tso off", host=cfg.remote)
+ _set_ethtool_feat(cfg.remote_ifname, cfg.remote_feat,
+ {"tcp-segmentation-offload": False},
+ host=cfg.remote)
except KsftXfailEx:
pass
+
def _gro_variants():
"""Generator that yields all combinations of protocol and test types."""
- for protocol in ["ipv4", "ipv6", "ipip"]:
- for test_name in ["data", "ack", "flags", "tcp", "ip", "large"]:
- yield protocol, test_name
+ # Tests that work for all protocols
+ common_tests = [
+ "data_same", "data_lrg_sml", "data_sml_lrg",
+ "ack",
+ "flags_psh", "flags_syn", "flags_rst", "flags_urg", "flags_cwr",
+ "tcp_csum", "tcp_seq", "tcp_ts", "tcp_opt",
+ "ip_ecn", "ip_tos",
+ "large_max", "large_rem",
+ ]
+
+ # Tests specific to IPv4
+ ipv4_tests = [
+ "ip_ttl", "ip_opt", "ip_frag4",
+ "ip_id_df1_inc", "ip_id_df1_fixed",
+ "ip_id_df0_inc", "ip_id_df0_fixed",
+ "ip_id_df1_inc_fixed", "ip_id_df1_fixed_inc",
+ ]
+
+ # Tests specific to IPv6
+ ipv6_tests = [
+ "ip_frag6", "ip_v6ext_same", "ip_v6ext_diff",
+ ]
+
+ for mode in ["sw", "hw", "lro"]:
+ for protocol in ["ipv4", "ipv6", "ipip"]:
+ for test_name in common_tests:
+ yield mode, protocol, test_name
+
+ if protocol in ["ipv4", "ipip"]:
+ for test_name in ipv4_tests:
+ yield mode, protocol, test_name
+ elif protocol == "ipv6":
+ for test_name in ipv6_tests:
+ yield mode, protocol, test_name
@ksft_variants(_gro_variants())
-def test(cfg, protocol, test_name):
+def test(cfg, mode, protocol, test_name):
"""Run a single GRO test with retries."""
ipver = "6" if protocol[-1] == "6" else "4"
cfg.require_ipver(ipver)
- _setup(cfg, test_name)
+ _setup(cfg, mode, test_name)
base_cmd_args = [
f"--{protocol}",
@@ -142,10 +261,9 @@ def test(cfg, protocol, test_name):
if rx_proc.ret == 0:
return
- ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
- ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+ ksft_pr(rx_proc)
- if test_name == "large" and os.environ.get("KSFT_MACHINE_SLOW"):
+ if test_name.startswith("large_") and os.environ.get("KSFT_MACHINE_SLOW"):
ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment")
return
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 9c163ba6feee..a64140333a46 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -35,6 +35,7 @@ TEST_PROGS = \
pp_alloc_fail.py \
rss_api.py \
rss_ctx.py \
+ rss_drv.py \
rss_flow_label.py \
rss_input_xfrm.py \
toeplitz.py \
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
index 45c2d49d55b6..ee863e90d1e0 100755
--- a/tools/testing/selftests/drivers/net/hw/devmem.py
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -63,12 +63,29 @@ def check_tx_chunks(cfg) -> None:
ksft_eq(socat.stdout.strip(), "hello\nworld")
+def check_rx_hds(cfg) -> None:
+ """Test HDS splitting across payload sizes."""
+ require_devmem(cfg)
+
+ for size in [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]:
+ port = rand_port()
+ listen_cmd = f"{cfg.bin_local} -L -l -f {cfg.ifname} -s {cfg.addr} -p {port}"
+
+ with bkg(listen_cmd, exit_wait=True) as ncdevmem:
+ wait_port_listen(port)
+ cmd(f"dd if=/dev/zero bs={size} count=1 2>/dev/null | " +
+ f"socat -b {size} -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},nodelay",
+ host=cfg.remote, shell=True)
+
+ ksft_eq(ncdevmem.ret, 0, f"HDS failed for payload size {size}")
+
+
def main() -> None:
with NetDrvEpEnv(__file__) as cfg:
cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem")
cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
- ksft_run([check_rx, check_tx, check_tx_chunks],
+ ksft_run([check_rx, check_tx, check_tx_chunks, check_rx_hds],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
index 62456df947bc..240d13dbc54e 100644
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -12,6 +12,7 @@
#include <unistd.h>
#include <arpa/inet.h>
+#include <linux/mman.h>
#include <linux/errqueue.h>
#include <linux/if_packet.h>
#include <linux/ipv6.h>
@@ -37,6 +38,23 @@
#include <liburing.h>
+#define SKIP_CODE 42
+
+struct t_io_uring_zcrx_ifq_reg {
+ __u32 if_idx;
+ __u32 if_rxq;
+ __u32 rq_entries;
+ __u32 flags;
+
+ __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */
+ __u64 region_ptr; /* struct io_uring_region_desc * */
+
+ struct io_uring_zcrx_offsets offsets;
+ __u32 zcrx_id;
+ __u32 rx_buf_len;
+ __u64 __resv[3];
+};
+
static long page_size;
#define AREA_SIZE (8192 * page_size)
#define SEND_SIZE (512 * 4096)
@@ -65,6 +83,8 @@ static bool cfg_oneshot;
static int cfg_oneshot_recvs;
static int cfg_send_size = SEND_SIZE;
static struct sockaddr_in6 cfg_addr;
+static unsigned int cfg_rx_buf_len;
+static bool cfg_dry_run;
static char *payload;
static void *area_ptr;
@@ -128,14 +148,28 @@ static void setup_zcrx(struct io_uring *ring)
if (!ifindex)
error(1, 0, "bad interface name: %s", cfg_ifname);
- area_ptr = mmap(NULL,
- AREA_SIZE,
- PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE,
- 0,
- 0);
- if (area_ptr == MAP_FAILED)
- error(1, 0, "mmap(): zero copy area");
+ if (cfg_rx_buf_len && cfg_rx_buf_len != page_size) {
+ area_ptr = mmap(NULL,
+ AREA_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE |
+ MAP_HUGETLB | MAP_HUGE_2MB,
+ -1,
+ 0);
+ if (area_ptr == MAP_FAILED) {
+ printf("Can't allocate huge pages\n");
+ exit(SKIP_CODE);
+ }
+ } else {
+ area_ptr = mmap(NULL,
+ AREA_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE,
+ 0,
+ 0);
+ if (area_ptr == MAP_FAILED)
+ error(1, 0, "mmap(): zero copy area");
+ }
ring_size = get_refill_ring_size(rq_entries);
ring_ptr = mmap(NULL,
@@ -157,17 +191,23 @@ static void setup_zcrx(struct io_uring *ring)
.flags = 0,
};
- struct io_uring_zcrx_ifq_reg reg = {
+ struct t_io_uring_zcrx_ifq_reg reg = {
.if_idx = ifindex,
.if_rxq = cfg_queue_id,
.rq_entries = rq_entries,
.area_ptr = (__u64)(unsigned long)&area_reg,
.region_ptr = (__u64)(unsigned long)&region_reg,
+ .rx_buf_len = cfg_rx_buf_len,
};
- ret = io_uring_register_ifq(ring, &reg);
- if (ret)
+ ret = io_uring_register_ifq(ring, (void *)&reg);
+ if (cfg_rx_buf_len && (ret == -EINVAL || ret == -EOPNOTSUPP ||
+ ret == -ERANGE)) {
+ printf("Large chunks are not supported %i\n", ret);
+ exit(SKIP_CODE);
+ } else if (ret) {
error(1, 0, "io_uring_register_ifq(): %d", ret);
+ }
rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head);
rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail);
@@ -323,6 +363,8 @@ static void run_server(void)
io_uring_queue_init(512, &ring, flags);
setup_zcrx(&ring);
+ if (cfg_dry_run)
+ return;
add_accept(&ring, fd);
@@ -383,7 +425,7 @@ static void parse_opts(int argc, char **argv)
usage(argv[0]);
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) {
+ while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:x:d")) != -1) {
switch (c) {
case 's':
if (cfg_client)
@@ -418,6 +460,12 @@ static void parse_opts(int argc, char **argv)
case 'z':
cfg_send_size = strtoul(optarg, NULL, 0);
break;
+ case 'x':
+ cfg_rx_buf_len = page_size * strtoul(optarg, NULL, 0);
+ break;
+ case 'd':
+ cfg_dry_run = true;
+ break;
}
}
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index 712c806508b5..c63d6d6450d2 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -3,104 +3,121 @@
import re
from os import path
-from lib.py import ksft_run, ksft_exit, KsftSkipEx
+from lib.py import ksft_run, ksft_exit, KsftSkipEx, ksft_variants, KsftNamedVariant
from lib.py import NetDrvEpEnv
from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
+from lib.py import EthtoolFamily
+SKIP_CODE = 42
-def _get_current_settings(cfg):
- output = ethtool(f"-g {cfg.ifname}", json=True)[0]
- return (output['rx'], output['hds-thresh'])
-
-
-def _get_combined_channels(cfg):
- output = ethtool(f"-l {cfg.ifname}").stdout
- values = re.findall(r'Combined:\s+(\d+)', output)
- return int(values[1])
-
-
-def _create_rss_ctx(cfg, chan):
- output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout
+def create_rss_ctx(cfg):
+ output = ethtool(f"-X {cfg.ifname} context new start {cfg.target} equal 1").stdout
values = re.search(r'New RSS context is (\d+)', output).group(1)
- ctx_id = int(values)
- return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}"))
+ return int(values)
-def _set_flow_rule(cfg, port, chan):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout
+def set_flow_rule(cfg):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} action {cfg.target}").stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
-def _set_flow_rule_rss(cfg, port, ctx_id):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout
+def set_flow_rule_rss(cfg, rss_ctx_id):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} context {rss_ctx_id}").stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
-def test_zcrx(cfg) -> None:
- cfg.require_ipver('6')
-
- combined_chans = _get_combined_channels(cfg)
- if combined_chans < 2:
- raise KsftSkipEx('at least 2 combined channels required')
- (rx_ring, hds_thresh) = _get_current_settings(cfg)
- port = rand_port()
-
- ethtool(f"-G {cfg.ifname} tcp-data-split on")
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+def single(cfg):
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ channels = channels['combined-count']
+ if channels < 2:
+ raise KsftSkipEx('Test requires NETIF with at least 2 combined channels')
- ethtool(f"-G {cfg.ifname} hds-thresh 0")
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+ rings = cfg.ethnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ rx_rings = rings['rx']
+ hds_thresh = rings.get('hds-thresh', 0)
- ethtool(f"-G {cfg.ifname} rx 64")
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+ cfg.ethnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled',
+ 'hds-thresh': 0,
+ 'rx': 64})
+ defer(cfg.ethnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown',
+ 'hds-thresh': hds_thresh,
+ 'rx': rx_rings})
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ cfg.target = channels - 1
+ ethtool(f"-X {cfg.ifname} equal {cfg.target}")
defer(ethtool, f"-X {cfg.ifname} default")
- flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ flow_rule_id = set_flow_rule(cfg)
defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
- with bkg(rx_cmd, exit_wait=True):
- wait_port_listen(port, proto="tcp")
- cmd(tx_cmd, host=cfg.remote)
+def rss(cfg):
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ channels = channels['combined-count']
+ if channels < 2:
+ raise KsftSkipEx('Test requires NETIF with at least 2 combined channels')
-def test_zcrx_oneshot(cfg) -> None:
- cfg.require_ipver('6')
+ rings = cfg.ethnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ rx_rings = rings['rx']
+ hds_thresh = rings.get('hds-thresh', 0)
- combined_chans = _get_combined_channels(cfg)
- if combined_chans < 2:
- raise KsftSkipEx('at least 2 combined channels required')
- (rx_ring, hds_thresh) = _get_current_settings(cfg)
- port = rand_port()
+ cfg.ethnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled',
+ 'hds-thresh': 0,
+ 'rx': 64})
+ defer(cfg.ethnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown',
+ 'hds-thresh': hds_thresh,
+ 'rx': rx_rings})
- ethtool(f"-G {cfg.ifname} tcp-data-split on")
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+ cfg.target = channels - 1
+ ethtool(f"-X {cfg.ifname} equal {cfg.target}")
+ defer(ethtool, f"-X {cfg.ifname} default")
- ethtool(f"-G {cfg.ifname} hds-thresh 0")
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+ rss_ctx_id = create_rss_ctx(cfg)
+ defer(ethtool, f"-X {cfg.ifname} delete context {rss_ctx_id}")
- ethtool(f"-G {cfg.ifname} rx 64")
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+ flow_rule_id = set_flow_rule_rss(cfg, rss_ctx_id)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
- defer(ethtool, f"-X {cfg.ifname} default")
- flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
- defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+@ksft_variants([
+ KsftNamedVariant("single", single),
+ KsftNamedVariant("rss", rss),
+])
+def test_zcrx(cfg, setup) -> None:
+ cfg.require_ipver('6')
- rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
- tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384"
+ setup(cfg)
+ rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840"
with bkg(rx_cmd, exit_wait=True):
- wait_port_listen(port, proto="tcp")
+ wait_port_listen(cfg.port, proto="tcp")
cmd(tx_cmd, host=cfg.remote)
-def test_zcrx_rss(cfg) -> None:
+@ksft_variants([
+ KsftNamedVariant("single", single),
+ KsftNamedVariant("rss", rss),
+])
+def test_zcrx_oneshot(cfg, setup) -> None:
+ cfg.require_ipver('6')
+
+ setup(cfg)
+ rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -o 4"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 4096 -z 16384"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(cfg.port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def test_zcrx_large_chunks(cfg) -> None:
+ """Test zcrx with large buffer chunks."""
+
cfg.require_ipver('6')
combined_chans = _get_combined_channels(cfg)
@@ -121,12 +138,16 @@ def test_zcrx_rss(cfg) -> None:
ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
defer(ethtool, f"-X {cfg.ifname} default")
- (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
- flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -x 2"
tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+
+ probe = cmd(rx_cmd + " -d", fail=False)
+ if probe.ret == SKIP_CODE:
+ raise KsftSkipEx(probe.stdout)
+
with bkg(rx_cmd, exit_wait=True):
wait_port_listen(port, proto="tcp")
cmd(tx_cmd, host=cfg.remote)
@@ -137,7 +158,9 @@ def main() -> None:
cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx")
cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
- ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ cfg.ethnl = EthtoolFamily()
+ cfg.port = rand_port()
+ ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot], args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
index 766bfc4ad842..d5d247eca6b7 100644
--- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -22,7 +22,7 @@ try:
NlError, RtnlFamily, DevlinkFamily, PSPFamily
from net.lib.py import CmdExitFailure
from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
- fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
+ fd_read_timeout, ip, rand_port, wait_port_listen, wait_file, tool
from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
ksft_setup, ksft_variants, KsftNamedVariant
@@ -37,7 +37,7 @@ try:
"CmdExitFailure",
"bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
"fd_read_timeout", "ip", "rand_port",
- "wait_port_listen", "wait_file",
+ "wait_port_listen", "wait_file", "tool",
"KsftSkipEx", "KsftFailEx", "KsftXfailEx",
"ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
"ksft_setup", "ksft_variants", "KsftNamedVariant",
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 3288ed04ce08..e098d6534c3c 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -48,6 +48,7 @@
#include <errno.h>
#define __iovec_defined
#include <fcntl.h>
+#include <limits.h>
#include <malloc.h>
#include <error.h>
#include <poll.h>
@@ -97,6 +98,7 @@ static unsigned int ifindex;
static unsigned int dmabuf_id;
static uint32_t tx_dmabuf_id;
static int waittime_ms = 500;
+static bool fail_on_linear;
/* System state loaded by current_config_load() */
#define MAX_FLOWS 8
@@ -974,6 +976,11 @@ static int do_server(struct memory_buffer *mem)
"SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n",
dmabuf_cmsg->frag_size);
+ if (fail_on_linear) {
+ pr_err("received SCM_DEVMEM_LINEAR but --fail-on-linear (-L) set");
+ goto err_close_client;
+ }
+
continue;
}
@@ -1397,8 +1404,11 @@ int main(int argc, char *argv[])
int is_server = 0, opt;
int ret, err = 1;
- while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) {
+ while ((opt = getopt(argc, argv, "Lls:c:p:v:q:t:f:z:")) != -1) {
switch (opt) {
+ case 'L':
+ fail_on_linear = true;
+ break;
case 'l':
is_server = 1;
break;
diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
index c1e943d53f19..c632b41e7a23 100755
--- a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
+++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
@@ -1,15 +1,38 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+# pylint: disable=locally-disabled, invalid-name, attribute-defined-outside-init, too-few-public-methods
"""
Tests related to configuration of HW timestamping
"""
import errno
+import ctypes
+import fcntl
+import socket
from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx
from lib.py import NetDrvEnv, EthtoolFamily, NlError
+SIOCSHWTSTAMP = 0x89b0
+SIOCGHWTSTAMP = 0x89b1
+class hwtstamp_config(ctypes.Structure):
+ """ Python copy of struct hwtstamp_config """
+ _fields_ = [
+ ("flags", ctypes.c_int),
+ ("tx_type", ctypes.c_int),
+ ("rx_filter", ctypes.c_int),
+ ]
+
+
+class ifreq(ctypes.Structure):
+ """ Python copy of struct ifreq """
+ _fields_ = [
+ ("ifr_name", ctypes.c_char * 16),
+ ("ifr_data", ctypes.POINTER(hwtstamp_config)),
+ ]
+
+
def __get_hwtimestamp_support(cfg):
""" Retrieve supported configuration information """
@@ -31,8 +54,29 @@ def __get_hwtimestamp_support(cfg):
return ctx
+def __get_hwtimestamp_config_ioctl(cfg):
+ """ Retrieve current TS configuration information (via ioctl) """
+
+ config = hwtstamp_config()
+
+ req = ifreq()
+ req.ifr_name = cfg.ifname.encode()
+ req.ifr_data = ctypes.pointer(config)
+
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+ fcntl.ioctl(sock.fileno(), SIOCGHWTSTAMP, req)
+ sock.close()
+
+ except OSError as e:
+ if e.errno == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported via ioctl") from e
+ raise
+ return config
+
+
def __get_hwtimestamp_config(cfg):
- """ Retrieve current TS configuration information """
+ """ Retrieve current TS configuration information (via netLink) """
try:
tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}})
@@ -43,8 +87,27 @@ def __get_hwtimestamp_config(cfg):
return tscfg
+def __set_hwtimestamp_config_ioctl(cfg, ts):
+ """ Setup new TS configuration information (via ioctl) """
+ config = hwtstamp_config()
+ config.rx_filter = ts['rx-filters']['bits']['bit'][0]['index']
+ config.tx_type = ts['tx-types']['bits']['bit'][0]['index']
+ req = ifreq()
+ req.ifr_name = cfg.ifname.encode()
+ req.ifr_data = ctypes.pointer(config)
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+ fcntl.ioctl(sock.fileno(), SIOCSHWTSTAMP, req)
+ sock.close()
+
+ except OSError as e:
+ if e.errno == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported via ioctl") from e
+ raise
+
+
def __set_hwtimestamp_config(cfg, ts):
- """ Setup new TS configuration information """
+ """ Setup new TS configuration information (via netlink) """
ts['header'] = {'dev-name': cfg.ifname}
try:
@@ -56,9 +119,9 @@ def __set_hwtimestamp_config(cfg, ts):
return res
-def test_hwtstamp_tx(cfg):
+def __perform_hwtstamp_tx(cfg, is_ioctl):
"""
- Test TX timestamp configuration.
+ Test TX timestamp configuration via either netlink or ioctl.
The driver should apply provided config and report back proper state.
"""
@@ -66,16 +129,37 @@ def test_hwtstamp_tx(cfg):
ts = __get_hwtimestamp_support(cfg)
tx = ts['tx']
for t in tx:
+ res = None
tscfg = orig_tscfg
tscfg['tx-types']['bits']['bit'] = [t]
- res = __set_hwtimestamp_config(cfg, tscfg)
+ if is_ioctl:
+ __set_hwtimestamp_config_ioctl(cfg, tscfg)
+ else:
+ res = __set_hwtimestamp_config(cfg, tscfg)
if res is None:
res = __get_hwtimestamp_config(cfg)
+ resioctl = __get_hwtimestamp_config_ioctl(cfg)
ksft_eq(res['tx-types']['bits']['bit'], [t])
+ ksft_eq(resioctl.tx_type, t['index'])
__set_hwtimestamp_config(cfg, orig_tscfg)
+def test_hwtstamp_tx_netlink(cfg):
+ """
+ Test TX timestamp configuration setup via netlink.
+ The driver should apply provided config and report back proper state.
+ """
+ __perform_hwtstamp_tx(cfg, False)
+
+
+def test_hwtstamp_tx_ioctl(cfg):
+ """
+ Test TX timestamp configuration setup via ioctl.
+ The driver should apply provided config and report back proper state.
+ """
+ __perform_hwtstamp_tx(cfg, True)
+
-def test_hwtstamp_rx(cfg):
+def __perform_hwtstamp_rx(cfg, is_ioctl):
"""
Test RX timestamp configuration.
The filter configuration is taken from the list of supported filters.
@@ -87,11 +171,17 @@ def test_hwtstamp_rx(cfg):
ts = __get_hwtimestamp_support(cfg)
rx = ts['rx']
for r in rx:
+ res = None
tscfg = orig_tscfg
tscfg['rx-filters']['bits']['bit'] = [r]
- res = __set_hwtimestamp_config(cfg, tscfg)
+ if is_ioctl:
+ __set_hwtimestamp_config_ioctl(cfg, tscfg)
+ else:
+ res = __set_hwtimestamp_config(cfg, tscfg)
if res is None:
res = __get_hwtimestamp_config(cfg)
+ resioctl = __get_hwtimestamp_config_ioctl(cfg)
+ ksft_eq(resioctl.rx_filter, res['rx-filters']['bits']['bit'][0]['index'])
if r['index'] == 0 or r['index'] == 1:
ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index'])
else:
@@ -100,12 +190,34 @@ def test_hwtstamp_rx(cfg):
__set_hwtimestamp_config(cfg, orig_tscfg)
+def test_hwtstamp_rx_netlink(cfg):
+ """
+ Test RX timestamp configuration via netlink.
+ The filter configuration is taken from the list of supported filters.
+ The driver should apply the config without error and report back proper state.
+ Some extension of the timestamping scope is allowed for PTP filters.
+ """
+ __perform_hwtstamp_rx(cfg, False)
+
+
+def test_hwtstamp_rx_ioctl(cfg):
+ """
+ Test RX timestamp configuration via ioctl.
+ The filter configuration is taken from the list of supported filters.
+ The driver should apply the config without error and report back proper state.
+ Some extension of the timestamping scope is allowed for PTP filters.
+ """
+ __perform_hwtstamp_rx(cfg, True)
+
+
def main() -> None:
""" Ksft boiler plate main """
with NetDrvEnv(__file__, nsim_test=False) as cfg:
cfg.ethnl = EthtoolFamily()
- ksft_run([test_hwtstamp_tx, test_hwtstamp_rx], args=(cfg,))
+ ksft_run([test_hwtstamp_tx_ioctl, test_hwtstamp_tx_netlink,
+ test_hwtstamp_rx_ioctl, test_hwtstamp_rx_netlink],
+ args=(cfg,))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_drv.py b/tools/testing/selftests/drivers/net/hw/rss_drv.py
new file mode 100755
index 000000000000..2d1a33189076
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_drv.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Driver-related behavior tests for RSS.
+"""
+
+from lib.py import ksft_run, ksft_exit, ksft_ge
+from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx
+from lib.py import defer, ethtool
+from lib.py import EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+
+
+def _is_power_of_two(n):
+ return n > 0 and (n & (n - 1)) == 0
+
+
+def _get_rss(cfg, context=0):
+ return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0]
+
+
+def _test_rss_indir_size(cfg, qcnt, context=0):
+ """Test that indirection table size is at least 4x queue count."""
+ ethtool(f"-L {cfg.ifname} combined {qcnt}")
+
+ rss = _get_rss(cfg, context=context)
+ indir = rss['rss-indirection-table']
+ ksft_ge(len(indir), 4 * qcnt, "Table smaller than 4x")
+ return len(indir)
+
+
+def _maybe_create_context(cfg, create_context):
+ """ Either create a context and return its ID or return 0 for main ctx """
+ if not create_context:
+ return 0
+ try:
+ ctx = cfg.ethnl.rss_create_act({'header': {'dev-index': cfg.ifindex}})
+ ctx_id = ctx['context']
+ defer(cfg.ethnl.rss_delete_act,
+ {'header': {'dev-index': cfg.ifindex}, 'context': ctx_id})
+ except NlError:
+ raise KsftSkipEx("Device does not support additional RSS contexts")
+
+ return ctx_id
+
+
+@ksft_variants([
+ KsftNamedVariant("main", False),
+ KsftNamedVariant("ctx", True),
+])
+def indir_size_4x(cfg, create_context):
+ """
+ Test that the indirection table has at least 4 entries per queue.
+ Empirically network-heavy workloads like memcache suffer with the 33%
+ imbalance of a 2x indirection table size.
+ 4x table translates to a 16% imbalance.
+ """
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ ch_max = channels.get('combined-max', 0)
+ qcnt = channels['combined-count']
+
+ if ch_max < 3:
+ raise KsftSkipEx(f"Not enough queues for the test: max={ch_max}")
+
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+ ethtool(f"-L {cfg.ifname} combined 3")
+
+ ctx_id = _maybe_create_context(cfg, create_context)
+
+ indir_sz = _test_rss_indir_size(cfg, 3, context=ctx_id)
+
+ # Test with max queue count (max - 1 if max is a power of two)
+ test_max = ch_max - 1 if _is_power_of_two(ch_max) else ch_max
+ if test_max > 3 and indir_sz < test_max * 4:
+ _test_rss_indir_size(cfg, test_max, context=ctx_id)
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+ with NetDrvEnv(__file__) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ ksft_run([indir_size_4x], args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
index 6fa95fe27c47..7dc80070884a 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
@@ -145,9 +145,14 @@ def test_rss_flow_label_6only(cfg):
# Try to enable Flow Labels and check again, in case it leaks thru
initial = _ethtool_get_cfg(cfg, "udp6")
- changed = initial.replace("l", "") if "l" in initial else initial + "l"
-
- cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}")
+ no_lbl = initial.replace("l", "")
+ if "l" not in initial:
+ try:
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}")
+ except CmdExitFailure as exc:
+ raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc
+ else:
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}")
restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
_check_v4_flow_types(cfg)
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
index 72880e388478..503f1a2a2872 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -5,9 +5,9 @@ import multiprocessing
import socket
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout
from lib.py import NetDrvEpEnv
-from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import EthtoolFamily, NetdevFamily, NlError
from lib.py import KsftSkipEx, KsftFailEx
-from lib.py import rand_port
+from lib.py import defer, ksft_pr, rand_port
def traffic(cfg, local_port, remote_port, ipver):
@@ -21,6 +21,40 @@ def traffic(cfg, local_port, remote_port, ipver):
return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
+def _rss_input_xfrm_try_enable(cfg):
+ """
+ Check if symmetric input-xfrm is already enabled, if not try to enable it
+ and register a cleanup.
+ """
+ rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}})
+ orig_xfrm = rss.get('input-xfrm', set())
+ sym_xfrm = set(filter(lambda x: 'sym' in x, orig_xfrm))
+
+ if sym_xfrm:
+ ksft_pr("Sym input xfrm already enabled:", sym_xfrm)
+ return sym_xfrm
+
+ for xfrm in cfg.ethnl.consts["input-xfrm"].entries:
+ # Skip non-symmetric transforms
+ if "sym" not in xfrm:
+ continue
+
+ try_xfrm = {xfrm} | orig_xfrm
+ try:
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": try_xfrm})
+ except NlError:
+ continue
+
+ ksft_pr("Sym input xfrm configured:", try_xfrm)
+ defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": orig_xfrm})
+ return {xfrm}
+
+ return set()
+
+
def test_rss_input_xfrm(cfg, ipver):
"""
Test symmetric input_xfrm.
@@ -37,12 +71,10 @@ def test_rss_input_xfrm(cfg, ipver):
if not hasattr(socket, "SO_INCOMING_CPU"):
raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
- rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}})
- input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {})))
-
# Check for symmetric xor/or-xor
+ input_xfrm = _rss_input_xfrm_try_enable(cfg)
if not input_xfrm:
- raise KsftSkipEx("Symmetric RSS hash not requested")
+ raise KsftSkipEx("Symmetric RSS hash not supported by device")
cpus = set()
successful = 0
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c
index d23b3b0c20a3..035bf908d8d9 100644
--- a/tools/testing/selftests/drivers/net/hw/toeplitz.c
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c
@@ -59,7 +59,7 @@
#include "../../../net/lib/ksft.h"
#define TOEPLITZ_KEY_MIN_LEN 40
-#define TOEPLITZ_KEY_MAX_LEN 60
+#define TOEPLITZ_KEY_MAX_LEN 256
#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
@@ -72,6 +72,8 @@
#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
+#define MIN_PKT_SAMPLES 40 /* minimum number of packets to receive */
+
/* configuration options (cmdline arguments) */
static uint16_t cfg_dport = 8000;
static int cfg_family = AF_INET6;
@@ -251,15 +253,31 @@ static bool recv_block(struct ring_state *ring)
return true;
}
-/* simple test: sleep once unconditionally and then process all rings */
+/* simple test: process all rings until MIN_PKT_SAMPLES packets are received,
+ * or the test times out.
+ */
static void process_rings(void)
{
+ struct timeval start, now;
+ bool pkts_found = true;
+ long elapsed_usec;
int i;
- usleep(1000 * cfg_timeout_msec);
+ gettimeofday(&start, NULL);
- for (i = 0; i < num_cpus; i++)
- do {} while (recv_block(&rings[i]));
+ do {
+ if (!pkts_found)
+ usleep(100);
+
+ pkts_found = false;
+ for (i = 0; i < num_cpus; i++)
+ pkts_found |= recv_block(&rings[i]);
+
+ gettimeofday(&now, NULL);
+ elapsed_usec = (now.tv_sec - start.tv_sec) * 1000000 +
+ (now.tv_usec - start.tv_usec);
+ } while (frames_received - frames_nohash < MIN_PKT_SAMPLES &&
+ elapsed_usec < cfg_timeout_msec * 1000);
fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
frames_received - frames_nohash - frames_error,
@@ -485,8 +503,8 @@ static void parse_rps_bitmap(const char *arg)
bitmap = strtoul(arg, NULL, 0);
- if (bitmap & ~(RPS_MAX_CPUS - 1))
- error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+ if (bitmap & ~((1UL << RPS_MAX_CPUS) - 1))
+ error(1, 0, "rps bitmap 0x%lx out of bounds, max cpu %lu",
bitmap, RPS_MAX_CPUS - 1);
for (i = 0; i < RPS_MAX_CPUS; i++)
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.py b/tools/testing/selftests/drivers/net/hw/toeplitz.py
index d2db5ee9e358..cd7e080e6f84 100755
--- a/tools/testing/selftests/drivers/net/hw/toeplitz.py
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.py
@@ -19,6 +19,8 @@ from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, KsftFailEx
# "define" for the ID of the Toeplitz hash function
ETH_RSS_HASH_TOP = 1
+# Must match RPS_MAX_CPUS in toeplitz.c
+RPS_MAX_CPUS = 16
def _check_rps_and_rfs_not_configured(cfg):
@@ -67,23 +69,24 @@ def _get_irq_cpus(cfg):
return cpus
-def _get_unused_cpus(cfg, count=2):
+def _get_unused_rps_cpus(cfg, count=2):
"""
- Get CPUs that are not used by Rx queues.
- Returns a list of at least 'count' CPU numbers.
+ Get CPUs that are not used by Rx queues for RPS.
+ Returns a list of at least 'count' CPU numbers within
+ the RPS_MAX_CPUS supported range.
"""
# Get CPUs used by Rx queues
rx_cpus = set(_get_irq_cpus(cfg))
- # Get total number of CPUs
- num_cpus = os.cpu_count()
+ # Get total number of CPUs, capped by RPS_MAX_CPUS
+ num_cpus = min(os.cpu_count(), RPS_MAX_CPUS)
# Find unused CPUs
unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus]
if len(unused_cpus) < count:
- raise KsftSkipEx(f"Need at {count} CPUs not used by Rx queues, found {len(unused_cpus)}")
+ raise KsftSkipEx(f"Need at least {count} CPUs in range 0..{num_cpus - 1} not used by Rx queues, found {len(unused_cpus)}")
return unused_cpus[:count]
@@ -94,12 +97,14 @@ def _configure_rps(cfg, rps_cpus):
mask = 0
for cpu in rps_cpus:
mask |= (1 << cpu)
- mask = hex(mask)[2:]
+
+ mask = hex(mask)
# Set RPS bitmap for all rx queues
for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
with open(rps_file, "w", encoding="utf-8") as fp:
- fp.write(mask)
+ # sysfs expects hex without '0x' prefix, toeplitz.c needs the prefix
+ fp.write(mask[2:])
return mask
@@ -179,7 +184,7 @@ def test(cfg, proto_flag, ipver, grp):
ksft_pr(f"RSS using CPUs: {irq_cpus}")
elif grp == "rps":
# Get CPUs not used by Rx queues and configure them for RPS
- rps_cpus = _get_unused_cpus(cfg, count=2)
+ rps_cpus = _get_unused_rps_cpus(cfg, count=2)
rps_mask = _configure_rps(cfg, rps_cpus)
defer(_configure_rps, cfg, [])
rx_cmd += ["-r", rps_mask]
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 8b644fd84ff2..41cc248ac848 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -170,6 +170,7 @@ class NetDrvEpEnv(NetDrvEnvBase):
self.remote_ifname = self.resolve_remote_ifc()
self.remote_dev = ip("-d link show dev " + self.remote_ifname,
host=self.remote, json=True)[0]
+ self.remote_ifindex = self.remote_dev['ifindex']
self._required_cmd = {}
@@ -247,9 +248,12 @@ class NetDrvEpEnv(NetDrvEnvBase):
if not self.addr_v[ipver] or not self.remote_addr_v[ipver]:
raise KsftSkipEx(f"Test requires IPv{ipver} connectivity")
- def require_nsim(self):
- if self._ns is None:
+ def require_nsim(self, nsim_test=True):
+ """Require or exclude netdevsim for this test"""
+ if nsim_test and self._ns is None:
raise KsftXfailEx("Test only works on netdevsim")
+ if nsim_test is False and self._ns is not None:
+ raise KsftXfailEx("Test does not work on netdevsim")
def _require_cmd(self, comm, key, host=None):
cached = self._required_cmd.get(comm, {})
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index ae8abff4be40..02dcdeb723be 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -203,19 +203,21 @@ function do_cleanup() {
function cleanup_netcons() {
# delete netconsole dynamic reconfiguration
# do not fail if the target is already disabled
- if [[ ! -d "${NETCONS_PATH}" ]]
+ local TARGET_PATH=${1:-${NETCONS_PATH}}
+
+ if [[ ! -d "${TARGET_PATH}" ]]
then
# in some cases this is called before netcons path is created
return
fi
- if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]]
+ if [[ $(cat "${TARGET_PATH}"/enabled) != 0 ]]
then
- echo 0 > "${NETCONS_PATH}"/enabled || true
+ echo 0 > "${TARGET_PATH}"/enabled || true
fi
# Remove all the keys that got created during the selftest
- find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
+ find "${TARGET_PATH}/userdata/" -mindepth 1 -type d -delete
# Remove the configfs entry
- rmdir "${NETCONS_PATH}"
+ rmdir "${TARGET_PATH}"
}
function cleanup() {
@@ -247,8 +249,8 @@ function listen_port_and_save_to() {
SOCAT_MODE="UDP6-LISTEN"
fi
- # Just wait for 2 seconds
- timeout 2 ip netns exec "${NAMESPACE}" \
+ # Just wait for 3 seconds
+ timeout 3 ip netns exec "${NAMESPACE}" \
socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" 2> /dev/null
}
@@ -377,6 +379,29 @@ function check_netconsole_module() {
fi
}
+function wait_target_state() {
+ local TARGET=${1}
+ local STATE=${2}
+ local TARGET_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
+ local ENABLED=0
+
+ if [ "${STATE}" == "enabled" ]
+ then
+ ENABLED=1
+ fi
+
+ if [ ! -d "$TARGET_PATH" ]; then
+ echo "FAIL: Target does not exist." >&2
+ exit "${ksft_fail}"
+ fi
+
+ local CHECK_CMD="grep \"$ENABLED\" \"$TARGET_PATH/enabled\""
+ slowwait 2 sh -c "test -n \"\$($CHECK_CMD)\"" || {
+ echo "FAIL: ${TARGET} is not ${STATE}." >&2
+ exit "${ksft_fail}"
+ }
+}
+
# A wrapper to translate protocol version to udp version
function wait_for_port() {
local NAMESPACE=${1}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 0441a18f098b..aac8ef490feb 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -317,7 +317,7 @@ police_limits_test()
tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
flower skip_sw \
- action police rate 0.5kbit burst 1m conform-exceed drop/ok
+ action police rate 0.5kbit burst 2k conform-exceed drop/ok
check_fail $? "Incorrect success to add police action with too low rate"
tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
@@ -327,7 +327,7 @@ police_limits_test()
tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
flower skip_sw \
- action police rate 1.5kbit burst 1m conform-exceed drop/ok
+ action police rate 1.5kbit burst 2k conform-exceed drop/ok
check_err $? "Failed to add police action with low rate"
tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
diff --git a/tools/testing/selftests/drivers/net/netconsole/Makefile b/tools/testing/selftests/drivers/net/netconsole/Makefile
new file mode 100644
index 000000000000..b56c70b7e274
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netconsole/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_INCLUDES := \
+ ../../../net/lib.sh \
+ ../lib/sh/lib_netcons.sh \
+# end of TEST_INCLUDES
+
+TEST_PROGS := \
+ netcons_basic.sh \
+ netcons_cmdline.sh \
+ netcons_fragmented_msg.sh \
+ netcons_overflow.sh \
+ netcons_resume.sh \
+ netcons_sysdata.sh \
+ netcons_torture.sh \
+# end of TEST_PROGS
+
+include ../../../lib.mk
+
diff --git a/tools/testing/selftests/drivers/net/netconsole/config b/tools/testing/selftests/drivers/net/netconsole/config
new file mode 100644
index 000000000000..a3f6b0fd44ef
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netconsole/config
@@ -0,0 +1,6 @@
+CONFIG_CONFIGFS_FS=y
+CONFIG_IPV6=y
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETCONSOLE_EXTENDED_LOG=y
+CONFIG_NETDEVSIM=m
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh
index 2022f3061738..59cf10013ecd 100755
--- a/tools/testing/selftests/drivers/net/netcons_basic.sh
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh
@@ -18,7 +18,7 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
modprobe netdevsim 2> /dev/null || true
modprobe netconsole 2> /dev/null || true
diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_cmdline.sh
index d1d23dc67f99..96d704b8d9d9 100755
--- a/tools/testing/selftests/drivers/net/netcons_cmdline.sh
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_cmdline.sh
@@ -12,7 +12,7 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
check_netconsole_module
diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_fragmented_msg.sh
index 4a71e01a230c..0dc7280c3080 100755
--- a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_fragmented_msg.sh
@@ -16,7 +16,7 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
modprobe netdevsim 2> /dev/null || true
modprobe netconsole 2> /dev/null || true
diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_overflow.sh
index 06089643b771..a8e43d08c166 100755
--- a/tools/testing/selftests/drivers/net/netcons_overflow.sh
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_overflow.sh
@@ -13,7 +13,7 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
# This is coming from netconsole code. Check for it in drivers/net/netconsole.c
MAX_USERDATA_ITEMS=256
diff --git a/tools/testing/selftests/drivers/net/netconsole/netcons_resume.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_resume.sh
new file mode 100755
index 000000000000..cb59cf436dd0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_resume.sh
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test validates that netconsole is able to resume a target that was
+# deactivated when its interface was removed when the interface is brought
+# back up.
+#
+# The test configures a netconsole target and then removes netdevsim module to
+# cause the interface to disappear. Targets are configured via cmdline to ensure
+# targets bound by interface name and mac address can be resumed.
+# The test verifies that the target moved to disabled state before adding
+# netdevsim and the interface back.
+#
+# Finally, the test verifies that the target is re-enabled automatically and
+# the message is received on the destination interface.
+#
+# Author: Andre Carvalho <asantostc@gmail.com>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
+
+SAVED_SRCMAC="" # to be populated later
+SAVED_DSTMAC="" # to be populated later
+
+modprobe netdevsim 2> /dev/null || true
+rmmod netconsole 2> /dev/null || true
+
+check_netconsole_module
+
+function cleanup() {
+ cleanup_netcons "${NETCONS_CONFIGFS}/cmdline0"
+ do_cleanup
+ rmmod netconsole
+}
+
+function trigger_reactivation() {
+ # Add back low level module
+ modprobe netdevsim
+ # Recreate namespace and two interfaces
+ set_network
+ # Restore MACs
+ ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" \
+ address "${SAVED_DSTMAC}"
+ if [ "${BINDMODE}" == "mac" ]; then
+ ip link set dev "${SRCIF}" down
+ ip link set dev "${SRCIF}" address "${SAVED_SRCMAC}"
+ # Rename device in order to trigger target resume, as initial
+ # when device was recreated it didn't have correct mac address.
+ ip link set dev "${SRCIF}" name "${TARGET}"
+ fi
+}
+
+function trigger_deactivation() {
+ # Start by storing mac addresses so we can be restored in reactivate
+ SAVED_DSTMAC=$(ip netns exec "${NAMESPACE}" \
+ cat /sys/class/net/"$DSTIF"/address)
+ SAVED_SRCMAC=$(mac_get "${SRCIF}")
+ # Remove low level module
+ rmmod netdevsim
+}
+
+trap cleanup EXIT
+
+# Run the test twice, with different cmdline parameters
+for BINDMODE in "ifname" "mac"
+do
+ echo "Running with bind mode: ${BINDMODE}" >&2
+ # Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+ echo "6 5" > /proc/sys/kernel/printk
+
+ # Create one namespace and two interfaces
+ set_network
+
+ # Create the command line for netconsole, with the configuration from
+ # the function above
+ CMDLINE=$(create_cmdline_str "${BINDMODE}")
+
+ # The content of kmsg will be save to the following file
+ OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}"
+
+ # Load the module, with the cmdline set
+ modprobe netconsole "${CMDLINE}"
+ # Expose cmdline target in configfs
+ mkdir "${NETCONS_CONFIGFS}/cmdline0"
+
+ # Target should be enabled
+ wait_target_state "cmdline0" "enabled"
+
+ # Trigger deactivation by unloading netdevsim module. Target should be
+ # disabled.
+ trigger_deactivation
+ wait_target_state "cmdline0" "disabled"
+
+ # Trigger reactivation by loading netdevsim, recreating the network and
+ # restoring mac addresses. Target should be re-enabled.
+ trigger_reactivation
+ wait_target_state "cmdline0" "enabled"
+
+ # Listen for netconsole port inside the namespace and destination
+ # interface
+ listen_port_and_save_to "${OUTPUT_FILE}" &
+ # Wait for socat to start and listen to the port.
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_msg "${OUTPUT_FILE}"
+
+ # kill socat in case it is still running
+ pkill_socat
+ # Cleanup & unload the module
+ cleanup
+
+ echo "${BINDMODE} : Test passed" >&2
+done
+
+trap - EXIT
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_sysdata.sh
index baf69031089e..3fb8c4afe3d2 100755
--- a/tools/testing/selftests/drivers/net/netcons_sysdata.sh
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_sysdata.sh
@@ -18,7 +18,7 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
# Enable the sysdata cpu_nr feature
function set_cpu_nr() {
diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_torture.sh
index 2ce9ee3719d1..33a44adb6f8f 100755
--- a/tools/testing/selftests/drivers/net/netcons_torture.sh
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_torture.sh
@@ -17,7 +17,7 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
# Number of times the main loop run
ITERATIONS=${1:-150}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
index 7f32b5600925..f4721f7636dd 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -52,6 +52,39 @@ cleanup_ns()
ip netns del nssv
}
+is_carrier_up()
+{
+ local netns="$1"
+ local nsim_dev="$2"
+
+ test "$(ip netns exec "$netns" \
+ cat /sys/class/net/"$nsim_dev"/carrier 2>/dev/null)" -eq 1
+}
+
+assert_carrier_up()
+{
+ local netns="$1"
+ local nsim_dev="$2"
+
+ if ! is_carrier_up "$netns" "$nsim_dev"; then
+ echo "$nsim_dev's carrier should be UP, but it isn't"
+ cleanup_ns
+ exit 1
+ fi
+}
+
+assert_carrier_down()
+{
+ local netns="$1"
+ local nsim_dev="$2"
+
+ if is_carrier_up "$netns" "$nsim_dev"; then
+ echo "$nsim_dev's carrier should be DOWN, but it isn't"
+ cleanup_ns
+ exit 1
+ fi
+}
+
###
### Code start
###
@@ -113,6 +146,32 @@ if [ $? -eq 0 ]; then
exit 1
fi
+# netdevsim carrier state consistency checking
+assert_carrier_up nssv "$NSIM_DEV_1_NAME"
+assert_carrier_up nscl "$NSIM_DEV_2_NAME"
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > "$NSIM_DEV_SYS_UNLINK"
+
+assert_carrier_down nssv "$NSIM_DEV_1_NAME"
+assert_carrier_down nscl "$NSIM_DEV_2_NAME"
+
+ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" down
+ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" up
+
+assert_carrier_down nssv "$NSIM_DEV_1_NAME"
+assert_carrier_down nscl "$NSIM_DEV_2_NAME"
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK
+
+assert_carrier_up nssv "$NSIM_DEV_1_NAME"
+assert_carrier_up nscl "$NSIM_DEV_2_NAME"
+
+ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" down
+ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" up
+
+assert_carrier_up nssv "$NSIM_DEV_1_NAME"
+assert_carrier_up nscl "$NSIM_DEV_2_NAME"
+
# send/recv packets
tmp_file=$(mktemp)
diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py
index 06559ef49b9a..864d9fce1094 100755
--- a/tools/testing/selftests/drivers/net/psp.py
+++ b/tools/testing/selftests/drivers/net/psp.py
@@ -266,6 +266,7 @@ def assoc_sk_only_mismatch(cfg):
the_exception = cm.exception
ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id")
ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+ _close_conn(cfg, s)
def assoc_sk_only_mismatch_tx(cfg):
@@ -283,6 +284,7 @@ def assoc_sk_only_mismatch_tx(cfg):
the_exception = cm.exception
ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id")
ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+ _close_conn(cfg, s)
def assoc_sk_only_unconn(cfg):
@@ -573,8 +575,9 @@ def psp_ip_ver_test_builder(name, test_func, psp_ver, ipver):
"""Build test cases for each combo of PSP version and IP version"""
def test_case(cfg):
cfg.require_ipver(ipver)
- test_case.__name__ = f"{name}_v{psp_ver}_ip{ipver}"
test_func(cfg, psp_ver, ipver)
+
+ test_case.__name__ = f"{name}_v{psp_ver}_ip{ipver}"
return test_case
@@ -582,8 +585,9 @@ def ipver_test_builder(name, test_func, ipver):
"""Build test cases for each IP version"""
def test_case(cfg):
cfg.require_ipver(ipver)
- test_case.__name__ = f"{name}_ip{ipver}"
test_func(cfg, ipver)
+
+ test_case.__name__ = f"{name}_ip{ipver}"
return test_case
@@ -599,8 +603,8 @@ def main() -> None:
cfg.comm_port = rand_port()
srv = None
try:
- with bkg(responder + f" -p {cfg.comm_port}", host=cfg.remote,
- exit_wait=True) as srv:
+ with bkg(responder + f" -p {cfg.comm_port} -i {cfg.remote_ifindex}",
+ host=cfg.remote, exit_wait=True) as srv:
wait_port_listen(cfg.comm_port, host=cfg.remote)
cfg.comm_sock = socket.create_connection((cfg.remote_addr,
diff --git a/tools/testing/selftests/drivers/net/psp_responder.c b/tools/testing/selftests/drivers/net/psp_responder.c
index f309e0d73cbf..a26e7628bbb1 100644
--- a/tools/testing/selftests/drivers/net/psp_responder.c
+++ b/tools/testing/selftests/drivers/net/psp_responder.c
@@ -22,7 +22,7 @@ static bool should_quit;
struct opts {
int port;
- int devid;
+ int ifindex;
bool verbose;
};
@@ -360,7 +360,7 @@ static void usage(const char *name, const char *miss)
if (miss)
fprintf(stderr, "Missing argument: %s\n", miss);
- fprintf(stderr, "Usage: %s -p port [-v] [-d psp-dev-id]\n", name);
+ fprintf(stderr, "Usage: %s -p port [-v] [-i ifindex]\n", name);
exit(EXIT_FAILURE);
}
@@ -368,7 +368,7 @@ static void parse_cmd_opts(int argc, char **argv, struct opts *opts)
{
int opt;
- while ((opt = getopt(argc, argv, "vp:d:")) != -1) {
+ while ((opt = getopt(argc, argv, "vp:i:")) != -1) {
switch (opt) {
case 'v':
opts->verbose = 1;
@@ -376,8 +376,8 @@ static void parse_cmd_opts(int argc, char **argv, struct opts *opts)
case 'p':
opts->port = atoi(optarg);
break;
- case 'd':
- opts->devid = atoi(optarg);
+ case 'i':
+ opts->ifindex = atoi(optarg);
break;
default:
usage(argv[0], NULL);
@@ -410,12 +410,11 @@ static int psp_dev_set_ena(struct ynl_sock *ys, __u32 dev_id, __u32 versions)
int main(int argc, char **argv)
{
struct psp_dev_get_list *dev_list;
- bool devid_found = false;
__u32 ver_ena, ver_cap;
struct opts opts = {};
struct ynl_error yerr;
struct ynl_sock *ys;
- int first_id = 0;
+ int devid = -1;
int ret;
parse_cmd_opts(argc, argv, &opts);
@@ -429,20 +428,19 @@ int main(int argc, char **argv)
}
dev_list = psp_dev_get_dump(ys);
- if (ynl_dump_empty(dev_list)) {
- if (ys->err.code)
- goto err_close;
- fprintf(stderr, "No PSP devices\n");
- goto err_close_silent;
- }
+ if (ynl_dump_empty(dev_list) && ys->err.code)
+ goto err_close;
ynl_dump_foreach(dev_list, d) {
- if (opts.devid) {
- devid_found = true;
+ if (opts.ifindex) {
+ if (d->ifindex != opts.ifindex)
+ continue;
+ devid = d->id;
ver_ena = d->psp_versions_ena;
ver_cap = d->psp_versions_cap;
- } else if (!first_id) {
- first_id = d->id;
+ break;
+ } else if (devid < 0) {
+ devid = d->id;
ver_ena = d->psp_versions_ena;
ver_cap = d->psp_versions_cap;
} else {
@@ -452,23 +450,21 @@ int main(int argc, char **argv)
}
psp_dev_get_list_free(dev_list);
- if (opts.devid && !devid_found) {
- fprintf(stderr, "PSP device %d requested on cmdline, not found\n",
- opts.devid);
- goto err_close_silent;
- } else if (!opts.devid) {
- opts.devid = first_id;
- }
+ if (opts.ifindex && devid < 0)
+ fprintf(stderr,
+ "WARN: PSP device with ifindex %d requested on cmdline, not found\n",
+ opts.ifindex);
- if (ver_ena != ver_cap) {
- ret = psp_dev_set_ena(ys, opts.devid, ver_cap);
+ if (devid >= 0 && ver_ena != ver_cap) {
+ ret = psp_dev_set_ena(ys, devid, ver_cap);
if (ret)
goto err_close;
}
ret = run_responder(ys, &opts);
- if (ver_ena != ver_cap && psp_dev_set_ena(ys, opts.devid, ver_ena))
+ if (devid >= 0 && ver_ena != ver_cap &&
+ psp_dev_set_ena(ys, devid, ver_ena))
fprintf(stderr, "WARN: failed to set the PSP versions back\n");
ynl_sock_destroy(ys);
diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c
index 94c6c81c2301..2c4c50500116 100644
--- a/tools/testing/selftests/filesystems/anon_inode_test.c
+++ b/tools/testing/selftests/filesystems/anon_inode_test.c
@@ -42,7 +42,10 @@ TEST(anon_inode_no_exec)
fd_context = sys_fsopen("tmpfs", 0);
ASSERT_GE(fd_context, 0);
- ASSERT_LT(execveat(fd_context, "", NULL, NULL, AT_EMPTY_PATH), 0);
+ char *const empty_argv[] = {NULL};
+ char *const empty_envp[] = {NULL};
+
+ ASSERT_LT(execveat(fd_context, "", empty_argv, empty_envp, AT_EMPTY_PATH), 0);
ASSERT_EQ(errno, EACCES);
EXPECT_EQ(close(fd_context), 0);
diff --git a/tools/testing/selftests/filesystems/open_tree_ns/.gitignore b/tools/testing/selftests/filesystems/open_tree_ns/.gitignore
new file mode 100644
index 000000000000..fb12b93fbcaa
--- /dev/null
+++ b/tools/testing/selftests/filesystems/open_tree_ns/.gitignore
@@ -0,0 +1 @@
+open_tree_ns_test
diff --git a/tools/testing/selftests/filesystems/open_tree_ns/Makefile b/tools/testing/selftests/filesystems/open_tree_ns/Makefile
new file mode 100644
index 000000000000..73c03c4a7ef6
--- /dev/null
+++ b/tools/testing/selftests/filesystems/open_tree_ns/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := open_tree_ns_test
+
+CFLAGS := -Wall -Werror -g $(KHDR_INCLUDES)
+LDLIBS := -lcap
+
+include ../../lib.mk
+
+$(OUTPUT)/open_tree_ns_test: open_tree_ns_test.c ../utils.c
+ $(CC) $(CFLAGS) -o $@ $^ $(LDLIBS)
diff --git a/tools/testing/selftests/filesystems/open_tree_ns/open_tree_ns_test.c b/tools/testing/selftests/filesystems/open_tree_ns/open_tree_ns_test.c
new file mode 100644
index 000000000000..9711556280ae
--- /dev/null
+++ b/tools/testing/selftests/filesystems/open_tree_ns/open_tree_ns_test.c
@@ -0,0 +1,1030 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for OPEN_TREE_NAMESPACE flag.
+ *
+ * Test that open_tree() with OPEN_TREE_NAMESPACE creates a new mount
+ * namespace containing the specified mount tree.
+ */
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/nsfs.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../wrappers.h"
+#include "../statmount/statmount.h"
+#include "../utils.h"
+#include "../../kselftest_harness.h"
+
+#ifndef OPEN_TREE_NAMESPACE
+#define OPEN_TREE_NAMESPACE (1 << 1)
+#endif
+
+static int get_mnt_ns_id(int fd, uint64_t *mnt_ns_id)
+{
+ if (ioctl(fd, NS_GET_MNTNS_ID, mnt_ns_id) < 0)
+ return -errno;
+ return 0;
+}
+
+static int get_mnt_ns_id_from_path(const char *path, uint64_t *mnt_ns_id)
+{
+ int fd, ret;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return -errno;
+
+ ret = get_mnt_ns_id(fd, mnt_ns_id);
+ close(fd);
+ return ret;
+}
+
+#define STATMOUNT_BUFSIZE (1 << 15)
+
+static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask)
+{
+ struct statmount *buf;
+ size_t bufsize = STATMOUNT_BUFSIZE;
+ int ret;
+
+ for (;;) {
+ buf = malloc(bufsize);
+ if (!buf)
+ return NULL;
+
+ ret = statmount(mnt_id, mnt_ns_id, mask, buf, bufsize, 0);
+ if (ret == 0)
+ return buf;
+
+ free(buf);
+ if (errno != EOVERFLOW)
+ return NULL;
+
+ bufsize <<= 1;
+ }
+}
+
+static void log_mount(struct __test_metadata *_metadata, struct statmount *sm)
+{
+ const char *fs_type = "";
+ const char *mnt_root = "";
+ const char *mnt_point = "";
+
+ if (sm->mask & STATMOUNT_FS_TYPE)
+ fs_type = sm->str + sm->fs_type;
+ if (sm->mask & STATMOUNT_MNT_ROOT)
+ mnt_root = sm->str + sm->mnt_root;
+ if (sm->mask & STATMOUNT_MNT_POINT)
+ mnt_point = sm->str + sm->mnt_point;
+
+ TH_LOG(" mnt_id: %llu, parent_id: %llu, fs_type: %s, root: %s, point: %s",
+ (unsigned long long)sm->mnt_id,
+ (unsigned long long)sm->mnt_parent_id,
+ fs_type, mnt_root, mnt_point);
+}
+
+static void dump_mounts(struct __test_metadata *_metadata, uint64_t mnt_ns_id)
+{
+ uint64_t list[256];
+ ssize_t nr_mounts;
+
+ nr_mounts = listmount(LSMT_ROOT, mnt_ns_id, 0, list, 256, 0);
+ if (nr_mounts < 0) {
+ TH_LOG("listmount failed: %s", strerror(errno));
+ return;
+ }
+
+ TH_LOG("Mount namespace %llu contains %zd mount(s):",
+ (unsigned long long)mnt_ns_id, nr_mounts);
+
+ for (ssize_t i = 0; i < nr_mounts; i++) {
+ struct statmount *sm;
+
+ sm = statmount_alloc(list[i], mnt_ns_id,
+ STATMOUNT_MNT_BASIC |
+ STATMOUNT_FS_TYPE |
+ STATMOUNT_MNT_ROOT |
+ STATMOUNT_MNT_POINT);
+ if (!sm) {
+ TH_LOG(" [%zd] mnt_id %llu: statmount failed: %s",
+ i, (unsigned long long)list[i], strerror(errno));
+ continue;
+ }
+
+ log_mount(_metadata, sm);
+ free(sm);
+ }
+}
+
+FIXTURE(open_tree_ns)
+{
+ int fd;
+ uint64_t current_ns_id;
+};
+
+FIXTURE_VARIANT(open_tree_ns)
+{
+ const char *path;
+ unsigned int flags;
+ bool expect_success;
+ bool expect_different_ns;
+ int min_mounts;
+};
+
+FIXTURE_VARIANT_ADD(open_tree_ns, basic_root)
+{
+ .path = "/",
+ .flags = OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC,
+ .expect_success = true,
+ .expect_different_ns = true,
+ /*
+ * The empty rootfs is hidden from listmount()/mountinfo,
+ * so we only see the bind mount on top of it.
+ */
+ .min_mounts = 1,
+};
+
+FIXTURE_VARIANT_ADD(open_tree_ns, recursive_root)
+{
+ .path = "/",
+ .flags = OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC,
+ .expect_success = true,
+ .expect_different_ns = true,
+ .min_mounts = 1,
+};
+
+FIXTURE_VARIANT_ADD(open_tree_ns, subdir_tmp)
+{
+ .path = "/tmp",
+ .flags = OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC,
+ .expect_success = true,
+ .expect_different_ns = true,
+ .min_mounts = 1,
+};
+
+FIXTURE_VARIANT_ADD(open_tree_ns, subdir_proc)
+{
+ .path = "/proc",
+ .flags = OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC,
+ .expect_success = true,
+ .expect_different_ns = true,
+ .min_mounts = 1,
+};
+
+FIXTURE_VARIANT_ADD(open_tree_ns, recursive_tmp)
+{
+ .path = "/tmp",
+ .flags = OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC,
+ .expect_success = true,
+ .expect_different_ns = true,
+ .min_mounts = 1,
+};
+
+FIXTURE_VARIANT_ADD(open_tree_ns, recursive_run)
+{
+ .path = "/run",
+ .flags = OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC,
+ .expect_success = true,
+ .expect_different_ns = true,
+ .min_mounts = 1,
+};
+
+FIXTURE_VARIANT_ADD(open_tree_ns, invalid_recursive_alone)
+{
+ .path = "/",
+ .flags = AT_RECURSIVE | OPEN_TREE_CLOEXEC,
+ .expect_success = false,
+ .expect_different_ns = false,
+ .min_mounts = 0,
+};
+
+FIXTURE_SETUP(open_tree_ns)
+{
+ int ret;
+
+ self->fd = -1;
+
+ /* Check if open_tree syscall is supported */
+ ret = sys_open_tree(-1, NULL, 0);
+ if (ret == -1 && errno == ENOSYS)
+ SKIP(return, "open_tree() syscall not supported");
+
+ /* Check if statmount/listmount are supported */
+ ret = statmount(0, 0, 0, NULL, 0, 0);
+ if (ret == -1 && errno == ENOSYS)
+ SKIP(return, "statmount() syscall not supported");
+
+ /* Get current mount namespace ID for comparison */
+ ret = get_mnt_ns_id_from_path("/proc/self/ns/mnt", &self->current_ns_id);
+ if (ret < 0)
+ SKIP(return, "Failed to get current mount namespace ID");
+}
+
+FIXTURE_TEARDOWN(open_tree_ns)
+{
+ if (self->fd >= 0)
+ close(self->fd);
+}
+
+TEST_F(open_tree_ns, create_namespace)
+{
+ uint64_t new_ns_id;
+ uint64_t list[256];
+ ssize_t nr_mounts;
+ int ret;
+
+ self->fd = sys_open_tree(AT_FDCWD, variant->path, variant->flags);
+
+ if (!variant->expect_success) {
+ ASSERT_LT(self->fd, 0);
+ ASSERT_EQ(errno, EINVAL);
+ return;
+ }
+
+ if (self->fd < 0 && errno == EINVAL)
+ SKIP(return, "OPEN_TREE_NAMESPACE not supported");
+
+ ASSERT_GE(self->fd, 0);
+
+ /* Verify we can get the namespace ID */
+ ret = get_mnt_ns_id(self->fd, &new_ns_id);
+ ASSERT_EQ(ret, 0);
+
+ /* Verify it's a different namespace */
+ if (variant->expect_different_ns)
+ ASSERT_NE(new_ns_id, self->current_ns_id);
+
+ /* List mounts in the new namespace */
+ nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0);
+ ASSERT_GE(nr_mounts, 0) {
+ TH_LOG("%m - listmount failed");
+ }
+
+ /* Verify minimum expected mounts */
+ ASSERT_GE(nr_mounts, variant->min_mounts);
+ TH_LOG("Namespace contains %zd mounts", nr_mounts);
+}
+
+TEST_F(open_tree_ns, setns_into_namespace)
+{
+ uint64_t new_ns_id;
+ pid_t pid;
+ int status;
+ int ret;
+
+ /* Only test with basic flags */
+ if (!(variant->flags & OPEN_TREE_NAMESPACE))
+ SKIP(return, "setns test only for basic / case");
+
+ self->fd = sys_open_tree(AT_FDCWD, variant->path, variant->flags);
+ if (self->fd < 0 && errno == EINVAL)
+ SKIP(return, "OPEN_TREE_NAMESPACE not supported");
+
+ ASSERT_GE(self->fd, 0);
+
+ /* Get namespace ID and dump all mounts */
+ ret = get_mnt_ns_id(self->fd, &new_ns_id);
+ ASSERT_EQ(ret, 0);
+
+ dump_mounts(_metadata, new_ns_id);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child: try to enter the namespace */
+ if (setns(self->fd, CLONE_NEWNS) < 0)
+ _exit(1);
+ _exit(0);
+ }
+
+ ASSERT_EQ(waitpid(pid, &status, 0), pid);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+}
+
+TEST_F(open_tree_ns, verify_mount_properties)
+{
+ struct statmount sm;
+ uint64_t new_ns_id;
+ uint64_t list[256];
+ ssize_t nr_mounts;
+ int ret;
+
+ /* Only test with basic flags on root */
+ if (variant->flags != (OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC) ||
+ strcmp(variant->path, "/") != 0)
+ SKIP(return, "mount properties test only for basic / case");
+
+ self->fd = sys_open_tree(AT_FDCWD, "/", OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC);
+ if (self->fd < 0 && errno == EINVAL)
+ SKIP(return, "OPEN_TREE_NAMESPACE not supported");
+
+ ASSERT_GE(self->fd, 0);
+
+ ret = get_mnt_ns_id(self->fd, &new_ns_id);
+ ASSERT_EQ(ret, 0);
+
+ nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0);
+ ASSERT_GE(nr_mounts, 1);
+
+ /* Get info about the root mount (the bind mount, rootfs is hidden) */
+ ret = statmount(list[0], new_ns_id, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0);
+ ASSERT_EQ(ret, 0);
+
+ ASSERT_NE(sm.mnt_id, sm.mnt_parent_id);
+
+ TH_LOG("Root mount id: %llu, parent: %llu",
+ (unsigned long long)sm.mnt_id,
+ (unsigned long long)sm.mnt_parent_id);
+}
+
+FIXTURE(open_tree_ns_caps)
+{
+ bool has_caps;
+};
+
+FIXTURE_SETUP(open_tree_ns_caps)
+{
+ int ret;
+
+ /* Check if open_tree syscall is supported */
+ ret = sys_open_tree(-1, NULL, 0);
+ if (ret == -1 && errno == ENOSYS)
+ SKIP(return, "open_tree() syscall not supported");
+
+ self->has_caps = (geteuid() == 0);
+}
+
+FIXTURE_TEARDOWN(open_tree_ns_caps)
+{
+}
+
+TEST_F(open_tree_ns_caps, requires_cap_sys_admin)
+{
+ pid_t pid;
+ int status;
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+
+ /* Child: drop privileges using utils.h helper */
+ if (enter_userns() != 0)
+ _exit(2);
+
+ /* Drop all caps using utils.h helper */
+ if (caps_down() == 0)
+ _exit(3);
+
+ fd = sys_open_tree(AT_FDCWD, "/",
+ OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC);
+ if (fd >= 0) {
+ close(fd);
+ /* Should have failed without caps */
+ _exit(1);
+ }
+
+ if (errno == EPERM)
+ _exit(0);
+
+ /* EINVAL means OPEN_TREE_NAMESPACE not supported */
+ if (errno == EINVAL)
+ _exit(4);
+
+ /* Unexpected error */
+ _exit(5);
+ }
+
+ ASSERT_EQ(waitpid(pid, &status, 0), pid);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ switch (WEXITSTATUS(status)) {
+ case 0:
+ /* Expected: EPERM without caps */
+ break;
+ case 1:
+ ASSERT_FALSE(true) TH_LOG("OPEN_TREE_NAMESPACE succeeded without caps");
+ break;
+ case 2:
+ SKIP(return, "setup_userns failed");
+ break;
+ case 3:
+ SKIP(return, "caps_down failed");
+ break;
+ case 4:
+ SKIP(return, "OPEN_TREE_NAMESPACE not supported");
+ break;
+ default:
+ ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)",
+ WEXITSTATUS(status));
+ break;
+ }
+}
+
+FIXTURE(open_tree_ns_userns)
+{
+ int fd;
+};
+
+FIXTURE_SETUP(open_tree_ns_userns)
+{
+ int ret;
+
+ self->fd = -1;
+
+ /* Check if open_tree syscall is supported */
+ ret = sys_open_tree(-1, NULL, 0);
+ if (ret == -1 && errno == ENOSYS)
+ SKIP(return, "open_tree() syscall not supported");
+
+ /* Check if statmount/listmount are supported */
+ ret = statmount(0, 0, 0, NULL, 0, 0);
+ if (ret == -1 && errno == ENOSYS)
+ SKIP(return, "statmount() syscall not supported");
+}
+
+FIXTURE_TEARDOWN(open_tree_ns_userns)
+{
+ if (self->fd >= 0)
+ close(self->fd);
+}
+
+TEST_F(open_tree_ns_userns, create_in_userns)
+{
+ pid_t pid;
+ int status;
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ uint64_t new_ns_id;
+ uint64_t list[256];
+ ssize_t nr_mounts;
+ int fd;
+
+ /* Create new user namespace (also creates mount namespace) */
+ if (enter_userns() != 0)
+ _exit(2);
+
+ /* Now we have CAP_SYS_ADMIN in the user namespace */
+ fd = sys_open_tree(AT_FDCWD, "/",
+ OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC);
+ if (fd < 0) {
+ if (errno == EINVAL)
+ _exit(4); /* OPEN_TREE_NAMESPACE not supported */
+ _exit(1);
+ }
+
+ /* Verify we can get the namespace ID */
+ if (get_mnt_ns_id(fd, &new_ns_id) != 0)
+ _exit(5);
+
+ /* Verify we can list mounts in the new namespace */
+ nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0);
+ if (nr_mounts < 0)
+ _exit(6);
+
+ /* Should have at least 1 mount */
+ if (nr_mounts < 1)
+ _exit(7);
+
+ close(fd);
+ _exit(0);
+ }
+
+ ASSERT_EQ(waitpid(pid, &status, 0), pid);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ switch (WEXITSTATUS(status)) {
+ case 0:
+ /* Success */
+ break;
+ case 1:
+ ASSERT_FALSE(true) TH_LOG("open_tree(OPEN_TREE_NAMESPACE) failed in userns");
+ break;
+ case 2:
+ SKIP(return, "setup_userns failed");
+ break;
+ case 4:
+ SKIP(return, "OPEN_TREE_NAMESPACE not supported");
+ break;
+ case 5:
+ ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID");
+ break;
+ case 6:
+ ASSERT_FALSE(true) TH_LOG("listmount failed in new namespace");
+ break;
+ case 7:
+ ASSERT_FALSE(true) TH_LOG("New namespace has no mounts");
+ break;
+ default:
+ ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)",
+ WEXITSTATUS(status));
+ break;
+ }
+}
+
+TEST_F(open_tree_ns_userns, setns_in_userns)
+{
+ pid_t pid;
+ int status;
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ uint64_t new_ns_id;
+ int fd;
+ pid_t inner_pid;
+ int inner_status;
+
+ /* Create new user namespace */
+ if (enter_userns() != 0)
+ _exit(2);
+
+ fd = sys_open_tree(AT_FDCWD, "/",
+ OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC);
+ if (fd < 0) {
+ if (errno == EINVAL)
+ _exit(4);
+ _exit(1);
+ }
+
+ if (get_mnt_ns_id(fd, &new_ns_id) != 0)
+ _exit(5);
+
+ /* Fork again to test setns into the new namespace */
+ inner_pid = fork();
+ if (inner_pid < 0)
+ _exit(8);
+
+ if (inner_pid == 0) {
+ /* Inner child: enter the new namespace */
+ if (setns(fd, CLONE_NEWNS) < 0)
+ _exit(1);
+ _exit(0);
+ }
+
+ if (waitpid(inner_pid, &inner_status, 0) != inner_pid)
+ _exit(9);
+
+ if (!WIFEXITED(inner_status) || WEXITSTATUS(inner_status) != 0)
+ _exit(10);
+
+ close(fd);
+ _exit(0);
+ }
+
+ ASSERT_EQ(waitpid(pid, &status, 0), pid);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ switch (WEXITSTATUS(status)) {
+ case 0:
+ /* Success */
+ break;
+ case 1:
+ ASSERT_FALSE(true) TH_LOG("open_tree or setns failed in userns");
+ break;
+ case 2:
+ SKIP(return, "setup_userns failed");
+ break;
+ case 4:
+ SKIP(return, "OPEN_TREE_NAMESPACE not supported");
+ break;
+ case 5:
+ ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID");
+ break;
+ case 8:
+ ASSERT_FALSE(true) TH_LOG("Inner fork failed");
+ break;
+ case 9:
+ ASSERT_FALSE(true) TH_LOG("Inner waitpid failed");
+ break;
+ case 10:
+ ASSERT_FALSE(true) TH_LOG("setns into new namespace failed");
+ break;
+ default:
+ ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)",
+ WEXITSTATUS(status));
+ break;
+ }
+}
+
+TEST_F(open_tree_ns_userns, recursive_in_userns)
+{
+ pid_t pid;
+ int status;
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ uint64_t new_ns_id;
+ uint64_t list[256];
+ ssize_t nr_mounts;
+ int fd;
+
+ /* Create new user namespace */
+ if (enter_userns() != 0)
+ _exit(2);
+
+ /* Test recursive flag in userns */
+ fd = sys_open_tree(AT_FDCWD, "/",
+ OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC);
+ if (fd < 0) {
+ if (errno == EINVAL)
+ _exit(4);
+ _exit(1);
+ }
+
+ if (get_mnt_ns_id(fd, &new_ns_id) != 0)
+ _exit(5);
+
+ nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0);
+ if (nr_mounts < 0)
+ _exit(6);
+
+ /* Recursive should copy submounts too */
+ if (nr_mounts < 1)
+ _exit(7);
+
+ close(fd);
+ _exit(0);
+ }
+
+ ASSERT_EQ(waitpid(pid, &status, 0), pid);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ switch (WEXITSTATUS(status)) {
+ case 0:
+ /* Success */
+ break;
+ case 1:
+ ASSERT_FALSE(true) TH_LOG("open_tree(OPEN_TREE_NAMESPACE|AT_RECURSIVE) failed in userns");
+ break;
+ case 2:
+ SKIP(return, "setup_userns failed");
+ break;
+ case 4:
+ SKIP(return, "OPEN_TREE_NAMESPACE not supported");
+ break;
+ case 5:
+ ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID");
+ break;
+ case 6:
+ ASSERT_FALSE(true) TH_LOG("listmount failed in new namespace");
+ break;
+ case 7:
+ ASSERT_FALSE(true) TH_LOG("New namespace has no mounts");
+ break;
+ default:
+ ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)",
+ WEXITSTATUS(status));
+ break;
+ }
+}
+
+TEST_F(open_tree_ns_userns, umount_fails_einval)
+{
+ pid_t pid;
+ int status;
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ uint64_t new_ns_id;
+ uint64_t list[256];
+ ssize_t nr_mounts;
+ int fd;
+ ssize_t i;
+
+ /* Create new user namespace */
+ if (enter_userns() != 0)
+ _exit(2);
+
+ fd = sys_open_tree(AT_FDCWD, "/",
+ OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC);
+ if (fd < 0) {
+ if (errno == EINVAL)
+ _exit(4);
+ _exit(1);
+ }
+
+ if (get_mnt_ns_id(fd, &new_ns_id) != 0)
+ _exit(5);
+
+ /* Get all mounts in the new namespace */
+ nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, LISTMOUNT_REVERSE);
+ if (nr_mounts < 0)
+ _exit(9);
+
+ if (nr_mounts < 1)
+ _exit(10);
+
+ /* Enter the new namespace */
+ if (setns(fd, CLONE_NEWNS) < 0)
+ _exit(6);
+
+ for (i = 0; i < nr_mounts; i++) {
+ struct statmount *sm;
+ const char *mnt_point;
+
+ sm = statmount_alloc(list[i], new_ns_id,
+ STATMOUNT_MNT_POINT);
+ if (!sm)
+ _exit(11);
+
+ mnt_point = sm->str + sm->mnt_point;
+
+ TH_LOG("Trying to umount %s", mnt_point);
+ if (umount2(mnt_point, MNT_DETACH) == 0) {
+ free(sm);
+ _exit(7);
+ }
+
+ if (errno != EINVAL) {
+ /* Wrong error */
+ free(sm);
+ _exit(8);
+ }
+
+ free(sm);
+ }
+
+ close(fd);
+ _exit(0);
+ }
+
+ ASSERT_EQ(waitpid(pid, &status, 0), pid);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ switch (WEXITSTATUS(status)) {
+ case 0:
+ break;
+ case 1:
+ ASSERT_FALSE(true) TH_LOG("open_tree(OPEN_TREE_NAMESPACE) failed");
+ break;
+ case 2:
+ SKIP(return, "setup_userns failed");
+ break;
+ case 4:
+ SKIP(return, "OPEN_TREE_NAMESPACE not supported");
+ break;
+ case 5:
+ ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID");
+ break;
+ case 6:
+ ASSERT_FALSE(true) TH_LOG("setns into new namespace failed");
+ break;
+ case 7:
+ ASSERT_FALSE(true) TH_LOG("umount succeeded but should have failed with EINVAL");
+ break;
+ case 8:
+ ASSERT_FALSE(true) TH_LOG("umount failed with wrong error (expected EINVAL)");
+ break;
+ case 9:
+ ASSERT_FALSE(true) TH_LOG("listmount failed");
+ break;
+ case 10:
+ ASSERT_FALSE(true) TH_LOG("No mounts in new namespace");
+ break;
+ case 11:
+ ASSERT_FALSE(true) TH_LOG("statmount_alloc failed");
+ break;
+ default:
+ ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)",
+ WEXITSTATUS(status));
+ break;
+ }
+}
+
+TEST_F(open_tree_ns_userns, umount_succeeds)
+{
+ pid_t pid;
+ int status;
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ uint64_t new_ns_id;
+ uint64_t list[256];
+ ssize_t nr_mounts;
+ int fd;
+ ssize_t i;
+
+ if (unshare(CLONE_NEWNS))
+ _exit(1);
+
+ if (sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) != 0)
+ _exit(1);
+
+ fd = sys_open_tree(AT_FDCWD, "/",
+ OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC);
+ if (fd < 0) {
+ if (errno == EINVAL)
+ _exit(4);
+ _exit(1);
+ }
+
+ if (get_mnt_ns_id(fd, &new_ns_id) != 0)
+ _exit(5);
+
+ /* Get all mounts in the new namespace */
+ nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, LISTMOUNT_REVERSE);
+ if (nr_mounts < 0)
+ _exit(9);
+
+ if (nr_mounts < 1)
+ _exit(10);
+
+ /* Enter the new namespace */
+ if (setns(fd, CLONE_NEWNS) < 0)
+ _exit(6);
+
+ for (i = 0; i < nr_mounts; i++) {
+ struct statmount *sm;
+ const char *mnt_point;
+
+ sm = statmount_alloc(list[i], new_ns_id,
+ STATMOUNT_MNT_POINT);
+ if (!sm)
+ _exit(11);
+
+ mnt_point = sm->str + sm->mnt_point;
+
+ TH_LOG("Trying to umount %s", mnt_point);
+ if (umount2(mnt_point, MNT_DETACH) != 0) {
+ free(sm);
+ _exit(7);
+ }
+
+ free(sm);
+ }
+
+ close(fd);
+ _exit(0);
+ }
+
+ ASSERT_EQ(waitpid(pid, &status, 0), pid);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ switch (WEXITSTATUS(status)) {
+ case 0:
+ break;
+ case 1:
+ ASSERT_FALSE(true) TH_LOG("open_tree(OPEN_TREE_NAMESPACE) failed");
+ break;
+ case 2:
+ SKIP(return, "setup_userns failed");
+ break;
+ case 4:
+ SKIP(return, "OPEN_TREE_NAMESPACE not supported");
+ break;
+ case 5:
+ ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID");
+ break;
+ case 6:
+ ASSERT_FALSE(true) TH_LOG("setns into new namespace failed");
+ break;
+ case 7:
+ ASSERT_FALSE(true) TH_LOG("umount succeeded but should have failed with EINVAL");
+ break;
+ case 9:
+ ASSERT_FALSE(true) TH_LOG("listmount failed");
+ break;
+ case 10:
+ ASSERT_FALSE(true) TH_LOG("No mounts in new namespace");
+ break;
+ case 11:
+ ASSERT_FALSE(true) TH_LOG("statmount_alloc failed");
+ break;
+ default:
+ ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)",
+ WEXITSTATUS(status));
+ break;
+ }
+}
+
+FIXTURE(open_tree_ns_unbindable)
+{
+ char tmpdir[PATH_MAX];
+ bool mounted;
+};
+
+FIXTURE_SETUP(open_tree_ns_unbindable)
+{
+ int ret;
+
+ self->mounted = false;
+
+ /* Check if open_tree syscall is supported */
+ ret = sys_open_tree(-1, NULL, 0);
+ if (ret == -1 && errno == ENOSYS)
+ SKIP(return, "open_tree() syscall not supported");
+
+ /* Create a temporary directory for the test mount */
+ snprintf(self->tmpdir, sizeof(self->tmpdir),
+ "/tmp/open_tree_ns_test.XXXXXX");
+ ASSERT_NE(mkdtemp(self->tmpdir), NULL);
+
+ /* Mount tmpfs there */
+ ret = mount("tmpfs", self->tmpdir, "tmpfs", 0, NULL);
+ if (ret < 0) {
+ rmdir(self->tmpdir);
+ SKIP(return, "Failed to mount tmpfs");
+ }
+ self->mounted = true;
+
+ ret = mount(NULL, self->tmpdir, NULL, MS_UNBINDABLE, NULL);
+ if (ret < 0) {
+ rmdir(self->tmpdir);
+ SKIP(return, "Failed to make tmpfs unbindable");
+ }
+}
+
+FIXTURE_TEARDOWN(open_tree_ns_unbindable)
+{
+ if (self->mounted)
+ umount2(self->tmpdir, MNT_DETACH);
+ rmdir(self->tmpdir);
+}
+
+TEST_F(open_tree_ns_unbindable, fails_on_unbindable)
+{
+ int fd;
+
+ fd = sys_open_tree(AT_FDCWD, self->tmpdir,
+ OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC);
+ ASSERT_LT(fd, 0);
+}
+
+TEST_F(open_tree_ns_unbindable, recursive_skips_on_unbindable)
+{
+ uint64_t new_ns_id;
+ uint64_t list[256];
+ ssize_t nr_mounts;
+ int fd;
+ ssize_t i;
+ bool found_unbindable = false;
+
+ fd = sys_open_tree(AT_FDCWD, "/",
+ OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC);
+ ASSERT_GT(fd, 0);
+
+ ASSERT_EQ(get_mnt_ns_id(fd, &new_ns_id), 0);
+
+ nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0);
+ ASSERT_GE(nr_mounts, 0) {
+ TH_LOG("listmount failed: %m");
+ }
+
+ /*
+ * Iterate through all mounts in the new namespace and verify
+ * the unbindable tmpfs mount was silently dropped.
+ */
+ for (i = 0; i < nr_mounts; i++) {
+ struct statmount *sm;
+ const char *mnt_point;
+
+ sm = statmount_alloc(list[i], new_ns_id, STATMOUNT_MNT_POINT);
+ ASSERT_NE(sm, NULL) {
+ TH_LOG("statmount_alloc failed for mnt_id %llu",
+ (unsigned long long)list[i]);
+ }
+
+ mnt_point = sm->str + sm->mnt_point;
+
+ if (strcmp(mnt_point, self->tmpdir) == 0) {
+ TH_LOG("Found unbindable mount at %s (should have been dropped)",
+ mnt_point);
+ found_unbindable = true;
+ }
+
+ free(sm);
+ }
+
+ ASSERT_FALSE(found_unbindable) {
+ TH_LOG("Unbindable mount at %s was not dropped", self->tmpdir);
+ }
+
+ close(fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h
index 99e5ad082fb1..e1cba4bfd8d9 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount.h
+++ b/tools/testing/selftests/filesystems/statmount/statmount.h
@@ -43,19 +43,24 @@
#endif
#endif
-static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
- struct statmount *buf, size_t bufsize,
+static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint32_t fd,
+ uint64_t mask, struct statmount *buf, size_t bufsize,
unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
- .mnt_id = mnt_id,
.param = mask,
};
- if (mnt_ns_id) {
+ if (flags & STATMOUNT_BY_FD) {
req.size = MNT_ID_REQ_SIZE_VER1;
- req.mnt_ns_id = mnt_ns_id;
+ req.mnt_fd = fd;
+ } else {
+ req.mnt_id = mnt_id;
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
}
return syscall(__NR_statmount, &req, buf, bufsize, flags);
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
index 6e53430423d2..a04bcaace126 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -33,15 +33,24 @@ static const char *const known_fs[] = {
"sysv", "tmpfs", "tracefs", "ubifs", "udf", "ufs", "v7", "vboxsf",
"vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL };
-static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags)
+static struct statmount *statmount_alloc(uint64_t mnt_id, int fd, uint64_t mask, unsigned int flags)
{
size_t bufsize = 1 << 15;
- struct statmount *buf = NULL, *tmp = alloca(bufsize);
+ struct statmount *buf = NULL, *tmp = NULL;
int tofree = 0;
int ret;
+ if (flags & STATMOUNT_BY_FD && fd < 0)
+ return NULL;
+
+ tmp = alloca(bufsize);
+
for (;;) {
- ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags);
+ if (flags & STATMOUNT_BY_FD)
+ ret = statmount(0, 0, (uint32_t) fd, mask, tmp, bufsize, flags);
+ else
+ ret = statmount(mnt_id, 0, 0, mask, tmp, bufsize, flags);
+
if (ret != -1)
break;
if (tofree)
@@ -237,7 +246,7 @@ static void test_statmount_zero_mask(void)
struct statmount sm;
int ret;
- ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, 0, 0, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount zero mask: %s\n",
strerror(errno));
@@ -263,7 +272,7 @@ static void test_statmount_mnt_basic(void)
int ret;
uint64_t mask = STATMOUNT_MNT_BASIC;
- ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount mnt basic: %s\n",
strerror(errno));
@@ -323,7 +332,7 @@ static void test_statmount_sb_basic(void)
struct statx sx;
struct statfs sf;
- ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount sb basic: %s\n",
strerror(errno));
@@ -375,7 +384,7 @@ static void test_statmount_mnt_point(void)
{
struct statmount *sm;
- sm = statmount_alloc(root_id, STATMOUNT_MNT_POINT, 0);
+ sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_POINT, 0);
if (!sm) {
ksft_test_result_fail("statmount mount point: %s\n",
strerror(errno));
@@ -405,7 +414,7 @@ static void test_statmount_mnt_root(void)
assert(last_dir);
last_dir++;
- sm = statmount_alloc(root_id, STATMOUNT_MNT_ROOT, 0);
+ sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_ROOT, 0);
if (!sm) {
ksft_test_result_fail("statmount mount root: %s\n",
strerror(errno));
@@ -438,7 +447,7 @@ static void test_statmount_fs_type(void)
const char *fs_type;
const char *const *s;
- sm = statmount_alloc(root_id, STATMOUNT_FS_TYPE, 0);
+ sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0);
if (!sm) {
ksft_test_result_fail("statmount fs type: %s\n",
strerror(errno));
@@ -467,7 +476,7 @@ static void test_statmount_mnt_opts(void)
char *line = NULL;
size_t len = 0;
- sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS,
+ sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS,
0);
if (!sm) {
ksft_test_result_fail("statmount mnt opts: %s\n",
@@ -557,7 +566,7 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name)
uint32_t start, i;
int ret;
- sm = statmount_alloc(root_id, mask, 0);
+ sm = statmount_alloc(root_id, 0, mask, 0);
if (!sm) {
ksft_test_result_fail("statmount %s: %s\n", name,
strerror(errno));
@@ -586,14 +595,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name)
exactsize = sm->size;
shortsize = sizeof(*sm) + i;
- ret = statmount(root_id, 0, mask, sm, exactsize, 0);
+ ret = statmount(root_id, 0, 0, mask, sm, exactsize, 0);
if (ret == -1) {
ksft_test_result_fail("statmount exact size: %s\n",
strerror(errno));
goto out;
}
errno = 0;
- ret = statmount(root_id, 0, mask, sm, shortsize, 0);
+ ret = statmount(root_id, 0, 0, mask, sm, shortsize, 0);
if (ret != -1 || errno != EOVERFLOW) {
ksft_test_result_fail("should have failed with EOVERFLOW: %s\n",
strerror(errno));
@@ -658,6 +667,226 @@ static void test_listmount_tree(void)
ksft_test_result_pass("listmount tree\n");
}
+static void test_statmount_by_fd(void)
+{
+ struct statmount *sm = NULL;
+ char tmpdir[] = "/statmount.fd.XXXXXX";
+ const char root[] = "/test";
+ char subdir[PATH_MAX], tmproot[PATH_MAX];
+ int fd;
+
+ if (!mkdtemp(tmpdir)) {
+ ksft_perror("mkdtemp");
+ return;
+ }
+
+ if (mount("statmount.test", tmpdir, "tmpfs", 0, NULL)) {
+ ksft_perror("mount");
+ rmdir(tmpdir);
+ return;
+ }
+
+ snprintf(subdir, PATH_MAX, "%s%s", tmpdir, root);
+ snprintf(tmproot, PATH_MAX, "%s/%s", tmpdir, "chroot");
+
+ if (mkdir(subdir, 0755)) {
+ ksft_perror("mkdir");
+ goto err_tmpdir;
+ }
+
+ if (mount(subdir, subdir, NULL, MS_BIND, 0)) {
+ ksft_perror("mount");
+ goto err_subdir;
+ }
+
+ if (mkdir(tmproot, 0755)) {
+ ksft_perror("mkdir");
+ goto err_subdir;
+ }
+
+ fd = open(subdir, O_PATH);
+ if (fd < 0) {
+ ksft_perror("open");
+ goto err_tmproot;
+ }
+
+ if (chroot(tmproot)) {
+ ksft_perror("chroot");
+ goto err_fd;
+ }
+
+ sm = statmount_alloc(0, fd, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT, STATMOUNT_BY_FD);
+ if (!sm) {
+ ksft_test_result_fail("statmount by fd failed: %s\n", strerror(errno));
+ goto err_chroot;
+ }
+
+ if (sm->size < sizeof(*sm)) {
+ ksft_test_result_fail("unexpected size: %u < %u\n",
+ sm->size, (uint32_t) sizeof(*sm));
+ goto err_chroot;
+ }
+
+ if (sm->mask & STATMOUNT_MNT_POINT) {
+ ksft_test_result_fail("STATMOUNT_MNT_POINT unexpectedly set in statmount\n");
+ goto err_chroot;
+ }
+
+ if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
+ ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in statmount\n");
+ goto err_chroot;
+ }
+
+ if (strcmp(root, sm->str + sm->mnt_root) != 0) {
+ ksft_test_result_fail("statmount returned incorrect mnt_root,"
+ "statmount mnt_root: %s != %s\n",
+ sm->str + sm->mnt_root, root);
+ goto err_chroot;
+ }
+
+ if (chroot(".")) {
+ ksft_perror("chroot");
+ goto out;
+ }
+
+ free(sm);
+ sm = statmount_alloc(0, fd, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT, STATMOUNT_BY_FD);
+ if (!sm) {
+ ksft_test_result_fail("statmount by fd failed: %s\n", strerror(errno));
+ goto err_fd;
+ }
+
+ if (sm->size < sizeof(*sm)) {
+ ksft_test_result_fail("unexpected size: %u < %u\n",
+ sm->size, (uint32_t) sizeof(*sm));
+ goto out;
+ }
+
+ if (!(sm->mask & STATMOUNT_MNT_POINT)) {
+ ksft_test_result_fail("STATMOUNT_MNT_POINT not set in statmount\n");
+ goto out;
+ }
+
+ if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
+ ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in statmount\n");
+ goto out;
+ }
+
+ if (strcmp(subdir, sm->str + sm->mnt_point) != 0) {
+ ksft_test_result_fail("statmount returned incorrect mnt_point,"
+ "statmount mnt_point: %s != %s\n", sm->str + sm->mnt_point, subdir);
+ goto out;
+ }
+
+ if (strcmp(root, sm->str + sm->mnt_root) != 0) {
+ ksft_test_result_fail("statmount returned incorrect mnt_root,"
+ "statmount mnt_root: %s != %s\n", sm->str + sm->mnt_root, root);
+ goto out;
+ }
+
+ ksft_test_result_pass("statmount by fd\n");
+ goto out;
+err_chroot:
+ chroot(".");
+out:
+ free(sm);
+err_fd:
+ close(fd);
+err_tmproot:
+ rmdir(tmproot);
+err_subdir:
+ umount2(subdir, MNT_DETACH);
+ rmdir(subdir);
+err_tmpdir:
+ umount2(tmpdir, MNT_DETACH);
+ rmdir(tmpdir);
+}
+
+static void test_statmount_by_fd_unmounted(void)
+{
+ const char root[] = "/test.unmounted";
+ char tmpdir[] = "/statmount.fd.XXXXXX";
+ char subdir[PATH_MAX];
+ int fd;
+ struct statmount *sm = NULL;
+
+ if (!mkdtemp(tmpdir)) {
+ ksft_perror("mkdtemp");
+ return;
+ }
+
+ if (mount("statmount.test", tmpdir, "tmpfs", 0, NULL)) {
+ ksft_perror("mount");
+ rmdir(tmpdir);
+ return;
+ }
+
+ snprintf(subdir, PATH_MAX, "%s%s", tmpdir, root);
+
+ if (mkdir(subdir, 0755)) {
+ ksft_perror("mkdir");
+ goto err_tmpdir;
+ }
+
+ if (mount(subdir, subdir, 0, MS_BIND, NULL)) {
+ ksft_perror("mount");
+ goto err_subdir;
+ }
+
+ fd = open(subdir, O_PATH);
+ if (fd < 0) {
+ ksft_perror("open");
+ goto err_subdir;
+ }
+
+ if (umount2(tmpdir, MNT_DETACH)) {
+ ksft_perror("umount2");
+ goto err_fd;
+ }
+
+ sm = statmount_alloc(0, fd, STATMOUNT_MNT_POINT | STATMOUNT_MNT_ROOT, STATMOUNT_BY_FD);
+ if (!sm) {
+ ksft_test_result_fail("statmount by fd unmounted: %s\n",
+ strerror(errno));
+ goto err_sm;
+ }
+
+ if (sm->size < sizeof(*sm)) {
+ ksft_test_result_fail("unexpected size: %u < %u\n",
+ sm->size, (uint32_t) sizeof(*sm));
+ goto err_sm;
+ }
+
+ if (sm->mask & STATMOUNT_MNT_POINT) {
+ ksft_test_result_fail("STATMOUNT_MNT_POINT unexpectedly set in mask\n");
+ goto err_sm;
+ }
+
+ if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
+ ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in mask\n");
+ goto err_sm;
+ }
+
+ if (strcmp(sm->str + sm->mnt_root, root) != 0) {
+ ksft_test_result_fail("statmount returned incorrect mnt_root,"
+ "statmount mnt_root: %s != %s\n",
+ sm->str + sm->mnt_root, root);
+ goto err_sm;
+ }
+
+ ksft_test_result_pass("statmount by fd on unmounted mount\n");
+err_sm:
+ free(sm);
+err_fd:
+ close(fd);
+err_subdir:
+ umount2(subdir, MNT_DETACH);
+ rmdir(subdir);
+err_tmpdir:
+ umount2(tmpdir, MNT_DETACH);
+ rmdir(tmpdir);
+}
+
#define str_off(memb) (offsetof(struct statmount, memb) / sizeof(uint32_t))
int main(void)
@@ -669,14 +898,14 @@ int main(void)
ksft_print_header();
- ret = statmount(0, 0, 0, NULL, 0, 0);
+ ret = statmount(0, 0, 0, 0, NULL, 0, 0);
assert(ret == -1);
if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n");
setup_namespace();
- ksft_set_plan(15);
+ ksft_set_plan(17);
test_listmount_empty_root();
test_statmount_zero_mask();
test_statmount_mnt_basic();
@@ -693,6 +922,8 @@ int main(void)
test_statmount_string(all_mask, str_off(fs_type), "fs type & all");
test_listmount_tree();
+ test_statmount_by_fd_unmounted();
+ test_statmount_by_fd();
if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
index d56d4103182f..063d9de46431 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
@@ -102,7 +102,7 @@ static int _test_statmount_mnt_ns_id(void)
if (!root_id)
return NSID_ERROR;
- ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
return NSID_ERROR;
@@ -128,6 +128,98 @@ static int _test_statmount_mnt_ns_id(void)
return NSID_PASS;
}
+static int _test_statmount_mnt_ns_id_by_fd(void)
+{
+ struct statmount sm;
+ uint64_t mnt_ns_id;
+ int ret, fd, mounted = 1, status = NSID_ERROR;
+ char mnt[] = "/statmount.fd.XXXXXX";
+
+ ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id);
+ if (ret != NSID_PASS)
+ return ret;
+
+ if (!mkdtemp(mnt)) {
+ ksft_print_msg("statmount by fd mnt ns id mkdtemp: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (mount(mnt, mnt, NULL, MS_BIND, 0)) {
+ ksft_print_msg("statmount by fd mnt ns id mount: %s\n", strerror(errno));
+ status = NSID_ERROR;
+ goto err;
+ }
+
+ fd = open(mnt, O_PATH);
+ if (fd < 0) {
+ ksft_print_msg("statmount by fd mnt ns id open: %s\n", strerror(errno));
+ goto err;
+ }
+
+ ret = statmount(0, 0, fd, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), STATMOUNT_BY_FD);
+ if (ret == -1) {
+ ksft_print_msg("statmount mnt ns id statmount: %s\n", strerror(errno));
+ status = NSID_ERROR;
+ goto out;
+ }
+
+ if (sm.size != sizeof(sm)) {
+ ksft_print_msg("unexpected size: %u != %u\n", sm.size,
+ (uint32_t)sizeof(sm));
+ status = NSID_FAIL;
+ goto out;
+ }
+ if (sm.mask != STATMOUNT_MNT_NS_ID) {
+ ksft_print_msg("statmount mnt ns id unavailable\n");
+ status = NSID_SKIP;
+ goto out;
+ }
+
+ if (sm.mnt_ns_id != mnt_ns_id) {
+ ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n",
+ (unsigned long long)sm.mnt_ns_id,
+ (unsigned long long)mnt_ns_id);
+ status = NSID_FAIL;
+ goto out;
+ }
+
+ mounted = 0;
+ if (umount2(mnt, MNT_DETACH)) {
+ ksft_print_msg("statmount by fd mnt ns id umount2: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = statmount(0, 0, fd, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), STATMOUNT_BY_FD);
+ if (ret == -1) {
+ ksft_print_msg("statmount mnt ns id statmount: %s\n", strerror(errno));
+ status = NSID_ERROR;
+ goto out;
+ }
+
+ if (sm.size != sizeof(sm)) {
+ ksft_print_msg("unexpected size: %u != %u\n", sm.size,
+ (uint32_t)sizeof(sm));
+ status = NSID_FAIL;
+ goto out;
+ }
+
+ if (sm.mask == STATMOUNT_MNT_NS_ID) {
+ ksft_print_msg("unexpected STATMOUNT_MNT_NS_ID in mask\n");
+ status = NSID_FAIL;
+ goto out;
+ }
+
+ status = NSID_PASS;
+out:
+ close(fd);
+ if (mounted)
+ umount2(mnt, MNT_DETACH);
+err:
+ rmdir(mnt);
+ return status;
+}
+
+
static void test_statmount_mnt_ns_id(void)
{
pid_t pid;
@@ -148,6 +240,9 @@ static void test_statmount_mnt_ns_id(void)
if (ret != NSID_PASS)
exit(ret);
ret = _test_statmount_mnt_ns_id();
+ if (ret != NSID_PASS)
+ exit(ret);
+ ret = _test_statmount_mnt_ns_id_by_fd();
exit(ret);
}
@@ -179,7 +274,7 @@ static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts)
for (int i = 0; i < nr_mounts; i++) {
struct statmount sm;
- ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm,
+ ret = statmount(list[i], mnt_ns_id, 0, STATMOUNT_MNT_NS_ID, &sm,
sizeof(sm), 0);
if (ret < 0) {
ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
@@ -275,7 +370,7 @@ int main(void)
int ret;
ksft_print_header();
- ret = statmount(0, 0, 0, NULL, 0, 0);
+ ret = statmount(0, 0, 0, 0, NULL, 0, 0);
assert(ret == -1);
if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n");
diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c
index c9dd5412b37b..d6f26f849053 100644
--- a/tools/testing/selftests/filesystems/utils.c
+++ b/tools/testing/selftests/filesystems/utils.c
@@ -515,6 +515,32 @@ int setup_userns(void)
return 0;
}
+int enter_userns(void)
+{
+ int ret;
+ char buf[32];
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+
+ ret = unshare(CLONE_NEWUSER);
+ if (ret)
+ return ret;
+
+ sprintf(buf, "0 %d 1", uid);
+ ret = write_file("/proc/self/uid_map", buf);
+ if (ret)
+ return ret;
+ ret = write_file("/proc/self/setgroups", "deny");
+ if (ret)
+ return ret;
+ sprintf(buf, "0 %d 1", gid);
+ ret = write_file("/proc/self/gid_map", buf);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
/* caps_down - lower all effective caps */
int caps_down(void)
{
diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h
index 70f7ccc607f4..0bccfed666a9 100644
--- a/tools/testing/selftests/filesystems/utils.h
+++ b/tools/testing/selftests/filesystems/utils.h
@@ -28,6 +28,7 @@ extern int cap_down(cap_value_t down);
extern bool switch_ids(uid_t uid, gid_t gid);
extern int setup_userns(void);
+extern int enter_userns(void);
static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
{
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
index 7daf7292209e..a2c42e13f614 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
@@ -89,6 +89,7 @@ test_buffer() {
# The id must be four bytes, test that 3 bytes fails a write
if echo -n abc > ./trace_marker_raw ; then
echo "Too small of write expected to fail but did not"
+ echo ${ORIG} > buffer_size_kb
exit_fail
fi
@@ -99,9 +100,24 @@ test_buffer() {
if write_buffer 0xdeadbeef $size ; then
echo "Too big of write expected to fail but did not"
+ echo ${ORIG} > buffer_size_kb
exit_fail
fi
}
+ORIG=`cat buffer_size_kb`
+
+# test_multiple_writes test needs at least 12KB buffer
+NEW_SIZE=12
+
+if [ ${ORIG} -lt ${NEW_SIZE} ]; then
+ echo ${NEW_SIZE} > buffer_size_kb
+fi
+
test_buffer
-test_multiple_writes
+if ! test_multiple_writes; then
+ echo ${ORIG} > buffer_size_kb
+ exit_fail
+fi
+
+echo ${ORIG} > buffer_size_kb
diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
index 93c10ea42a68..8b8e1aea985b 100644
--- a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
@@ -1,7 +1,8 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event tracing - enable/disable with top level files
-# requires: available_events set_event events/enable
+# requires: set_event events/enable
+# flags: instance
do_reset() {
echo > set_event
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
index aee22289536b..1b57771dbfdf 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
@@ -90,9 +90,10 @@ if [ $on != "0" ]; then
fail "Tracing is not off"
fi
-csum1=`md5sum trace`
+# Cannot rely on names being around as they are only cached, strip them
+csum1=`cat trace | sed -e 's/^ *[^ ]*\(-[0-9][0-9]*\)/\1/' | md5sum`
sleep $SLEEP_TIME
-csum2=`md5sum trace`
+csum2=`cat trace | sed -e 's/^ *[^ ]*\(-[0-9][0-9]*\)/\1/' | md5sum`
if [ "$csum1" != "$csum2" ]; then
fail "Tracing file is still changing"
diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile
index 2839d2612ce3..50ec9e0406ab 100644
--- a/tools/testing/selftests/hid/Makefile
+++ b/tools/testing/selftests/hid/Makefile
@@ -184,6 +184,8 @@ MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
+ -Wno-microsoft-anon-tag \
+ -fms-extensions \
-I$(INCLUDE_DIR)
CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
index 531228b849da..80ab60905865 100644
--- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
+++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
@@ -116,10 +116,8 @@ extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx,
/* bpf_wq implementation */
extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
-extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
- int (callback_fn)(void *map, int *key, void *wq),
- unsigned int flags__k, void *aux__ign) __weak __ksym;
-#define bpf_wq_set_callback(timer, cb, flags) \
- bpf_wq_set_callback_impl(timer, cb, flags, NULL)
+extern int bpf_wq_set_callback(struct bpf_wq *wq,
+ int (*callback_fn)(void *, int *, void *),
+ unsigned int flags) __weak __ksym;
#endif /* __HID_BPF_HELPERS_H */
diff --git a/tools/testing/selftests/hid/tests/conftest.py b/tools/testing/selftests/hid/tests/conftest.py
index 1361ec981db6..985a535324b2 100644
--- a/tools/testing/selftests/hid/tests/conftest.py
+++ b/tools/testing/selftests/hid/tests/conftest.py
@@ -5,6 +5,7 @@
# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
# Copyright (c) 2017 Red Hat, Inc.
+from packaging.version import Version
import platform
import pytest
import re
@@ -14,6 +15,19 @@ from .base import HIDTestUdevRule
from pathlib import Path
+@pytest.fixture(autouse=True)
+def hidtools_version_check():
+ HIDTOOLS_VERSION = "0.12"
+ try:
+ import hidtools
+
+ version = hidtools.__version__ # type: ignore
+ if Version(version) < Version(HIDTOOLS_VERSION):
+ pytest.skip(reason=f"have hidtools {version}, require >={HIDTOOLS_VERSION}")
+ except Exception:
+ pytest.skip(reason=f"hidtools >={HIDTOOLS_VERSION} required")
+
+
# See the comment in HIDTestUdevRule, this doesn't set up but it will clean
# up once the last test exited.
@pytest.fixture(autouse=True, scope="session")
diff --git a/tools/testing/selftests/hid/tests/test_multitouch.py b/tools/testing/selftests/hid/tests/test_multitouch.py
index ece0ba8e7d34..fa4fb2054bd4 100644
--- a/tools/testing/selftests/hid/tests/test_multitouch.py
+++ b/tools/testing/selftests/hid/tests/test_multitouch.py
@@ -9,6 +9,7 @@
from . import base
from hidtools.hut import HUT
from hidtools.util import BusType
+import enum
import libevdev
import logging
import pytest
@@ -232,11 +233,17 @@ class Digitizer(base.UHIDTestDevice):
return 0
+class HIDButtonType(enum.IntEnum):
+ CLICKPAD = 0
+ PRESSUREPAD = 1
+ DISCRETE_BUTTONS = 2
+
+
class PTP(Digitizer):
def __init__(
self,
name,
- type="Click Pad",
+ buttontype=HIDButtonType.CLICKPAD,
rdesc_str=None,
rdesc=None,
application="Touch Pad",
@@ -244,11 +251,8 @@ class PTP(Digitizer):
max_contacts=None,
input_info=None,
):
- self.type = type.lower().replace(" ", "")
- if self.type == "clickpad":
- self.buttontype = 0
- else: # pressurepad
- self.buttontype = 1
+ self.buttontype = buttontype
+
self.clickpad_state = False
self.left_state = False
self.right_state = False
@@ -975,15 +979,36 @@ class BaseTest:
assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_ORIENTATION, 90) in events
class TestPTP(TestWin8Multitouch):
+ def test_buttontype(self):
+ """Check for the right ButtonType."""
+ uhdev = self.uhdev
+ assert uhdev is not None
+ evdev = uhdev.get_evdev()
+
+ # If libevdev.so is not yet compiled with INPUT_PROP_PRESSUREPAD
+ # python-libevdev won't have it either, let's fake it
+ if not getattr(libevdev, "INPUT_PROP_PRESSUREPAD", None):
+ prop = libevdev.InputProperty(name="INPUT_PROP_PRESSUREPAD", value=0x7)
+ libevdev.INPUT_PROP_PRESSUREPAD = prop
+ libevdev.props.append(prop)
+
+ if uhdev.buttontype == HIDButtonType.CLICKPAD:
+ assert libevdev.INPUT_PROP_BUTTONPAD in evdev.properties
+ elif uhdev.buttontype == HIDButtonType.PRESSUREPAD:
+ assert libevdev.INPUT_PROP_PRESSUREPAD in evdev.properties
+ else:
+ assert libevdev.INPUT_PROP_PRESSUREPAD not in evdev.properties
+ assert libevdev.INPUT_PROP_BUTTONPAD not in evdev.properties
+
def test_ptp_buttons(self):
"""check for button reliability.
- There are 2 types of touchpads: the click pads and the pressure pads.
- Each should reliably report the BTN_LEFT events.
+ There are 3 types of touchpads: click pads + pressure pads and
+ those with discrete buttons. Each should reliably report the BTN_LEFT events.
"""
uhdev = self.uhdev
evdev = uhdev.get_evdev()
- if uhdev.type == "clickpad":
+ if uhdev.buttontype in [HIDButtonType.CLICKPAD, HIDButtonType.PRESSUREPAD]:
r = uhdev.event(click=True)
events = uhdev.next_sync_events()
self.debug_reports(r, uhdev, events)
@@ -995,7 +1020,7 @@ class BaseTest:
self.debug_reports(r, uhdev, events)
assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 0) in events
assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 0
- else:
+ elif uhdev.buttontype == HIDButtonType.DISCRETE_BUTTONS:
r = uhdev.event(left=True)
events = uhdev.next_sync_events()
self.debug_reports(r, uhdev, events)
@@ -1918,7 +1943,7 @@ class Testdell_044e_1220(BaseTest.TestPTP):
def create_device(self):
return PTP(
"uhid test dell_044e_1220",
- type="pressurepad",
+ buttontype=HIDButtonType.DISCRETE_BUTTONS,
rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 75 01 95 03 81 02 95 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 09 38 95 01 81 06 05 0c 0a 38 02 81 06 c0 c0 05 0d 09 05 a1 01 85 08 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 af 04 75 10 55 0e 65 11 09 30 35 00 46 e8 03 95 01 81 02 26 7b 02 46 12 02 09 31 81 02 c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 05 0d 09 56 81 02 09 54 25 05 95 01 75 08 81 02 05 09 19 01 29 03 25 01 75 01 95 03 81 02 95 05 81 03 05 0d 85 09 09 55 75 08 95 01 25 05 b1 02 06 00 ff 85 0a 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 01 ff 09 01 a1 01 85 03 09 01 15 00 26 ff 00 95 1b 81 02 85 04 09 02 95 50 81 02 85 05 09 03 95 07 b1 02 85 06 09 04 81 02 c0 06 02 ff 09 01 a1 01 85 07 09 02 95 86 75 08 b1 02 c0 05 0d 09 0e a1 01 85 0b 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 0c 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0",
)
@@ -2018,7 +2043,7 @@ class Testelan_04f3_313a(BaseTest.TestPTP):
def create_device(self):
return PTP(
"uhid test elan_04f3_313a",
- type="touchpad",
+ buttontype=HIDButtonType.DISCRETE_BUTTONS,
input_info=(BusType.I2C, 0x04F3, 0x313A),
rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 75 01 95 03 81 02 95 05 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 75 08 95 05 81 03 c0 06 00 ff 09 01 85 0e 09 c5 15 00 26 ff 00 75 08 95 04 b1 02 85 0a 09 c6 15 00 26 ff 00 75 08 95 04 b1 02 c0 06 00 ff 09 01 a1 01 85 5c 09 01 95 0b 75 08 81 06 85 0d 09 c5 15 00 26 ff 00 75 08 95 04 b1 02 85 0c 09 c6 96 80 03 75 08 b1 02 85 0b 09 c7 95 82 75 08 b1 02 c0 05 0d 09 05 a1 01 85 04 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 05 09 09 02 09 03 15 00 25 01 75 01 95 02 81 02 05 0d 95 01 75 04 25 0f 09 51 81 02 05 01 15 00 26 d7 0e 75 10 55 0d 65 11 09 30 35 00 46 44 2f 95 01 81 02 46 12 16 26 eb 06 26 eb 06 09 31 81 02 05 0d 15 00 25 64 95 03 c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 25 01 75 01 95 08 81 03 09 c5 75 08 95 02 81 03 05 0d 85 02 09 55 09 59 75 04 95 02 25 0f b1 02 85 07 09 60 75 01 95 01 15 00 25 01 b1 02 95 0f b1 03 06 00 ff 06 00 ff 85 06 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 03 09 22 a1 00 09 52 15 00 25 0a 75 10 95 01 b1 02 c0 09 22 a1 00 85 05 09 57 09 58 75 01 95 02 25 01 b1 02 95 0e b1 03 c0 c0 05 01 09 02 a1 01 85 2a 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 75 01 95 03 81 02 95 05 81 03 05 01 09 30 09 31 15 81 25 7f 35 81 45 7f 55 00 65 13 75 08 95 02 81 06 75 08 95 05 81 03 c0 c0",
)
@@ -2058,6 +2083,16 @@ class Testite_06cb_2968(BaseTest.TestPTP):
)
+class Testven_0488_108c(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test ven_0488_108c",
+ rdesc="05 01 09 02 a1 01 85 06 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 03 05 01 09 30 09 31 09 38 15 81 25 7f 75 08 95 03 81 06 c0 c0 05 0d 09 05 a1 01 85 01 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 81 03 05 01 15 00 26 ba 0d 75 10 55 0e 65 11 09 30 35 00 46 d0 05 95 01 81 02 26 d0 06 46 bb 02 09 31 81 02 05 0d 95 01 75 10 26 ff 7f 46 ff 7f 09 30 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 81 03 05 01 15 00 26 ba 0d 75 10 55 0e 65 11 09 30 35 00 46 d0 05 95 01 81 02 26 d0 06 46 bb 02 09 31 81 02 05 0d 95 01 75 10 26 ff 7f 46 ff 7f 09 30 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 81 03 05 01 15 00 26 ba 0d 75 10 55 0e 65 11 09 30 35 00 46 d0 05 95 01 81 02 26 d0 06 46 bb 02 09 31 81 02 05 0d 95 01 75 10 26 ff 7f 46 ff 7f 09 30 81 02 c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 05 0d 09 56 81 02 09 54 25 05 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 02 09 55 75 08 95 01 25 05 b1 02 09 59 b1 02 06 00 ff 85 03 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 05 0e 09 01 a1 02 85 13 09 23 15 00 25 64 75 08 95 01 b1 02 c0 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 05 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 01 ff 09 02 a1 01 09 00 85 07 15 00 26 ff 00 75 08 96 12 02 b1 02 c0 06 00 ff 09 01 a1 01 85 0d 15 00 26 ff 00 75 08 95 11 09 01 81 02 09 01 91 02 c0 05 0e 09 01 a1 01 85 11 09 35 15 00 26 ff 00 75 08 95 17 b1 02 c0 06 81 ff 09 01 a1 01 09 20 85 17 15 00 26 ff 00 75 08 95 3f 09 01 81 02 09 01 91 02 c0",
+ input_info=(0x18, 0x0488, 0x108C),
+ buttontype=HIDButtonType.PRESSUREPAD,
+ )
+
+
class Testn_trig_1b96_0c01(BaseTest.TestWin8Multitouch):
def create_device(self):
return Digitizer(
@@ -2110,7 +2145,7 @@ class Testsipodev_0603_0002(BaseTest.TestPTP):
def create_device(self):
return PTP(
"uhid test sipodev_0603_0002",
- type="clickpad",
+ buttontype=HIDButtonType.CLICKPAD,
rdesc="05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 02 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 80 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 04 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 75 01 95 02 81 03 95 01 75 04 25 05 09 51 81 02 05 01 15 00 26 44 0a 75 0c 55 0e 65 11 09 30 35 00 46 ac 03 95 01 81 02 46 fe 01 26 34 05 75 0c 09 31 81 02 05 0d c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 0a 95 01 75 04 81 02 75 01 95 03 81 03 05 09 09 01 25 01 75 01 95 01 81 02 05 0d 85 0a 09 55 09 59 75 04 95 02 25 0f b1 02 85 0b 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 09 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 06 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 07 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 05 01 09 0c a1 01 85 08 15 00 25 01 09 c6 75 01 95 01 81 06 75 07 81 03 c0 05 01 09 80 a1 01 85 01 15 00 25 01 75 01 0a 81 00 0a 82 00 0a 83 00 95 03 81 06 95 05 81 01 c0 06 0c 00 09 01 a1 01 85 02 25 01 15 00 75 01 0a b5 00 0a b6 00 0a b7 00 0a cd 00 0a e2 00 0a a2 00 0a e9 00 0a ea 00 95 08 81 02 0a 83 01 0a 6f 00 0a 70 00 0a 88 01 0a 8a 01 0a 92 01 0a a8 02 0a 24 02 95 08 81 02 0a 21 02 0a 23 02 0a 96 01 0a 25 02 0a 26 02 0a 27 02 0a 23 02 0a b1 02 95 08 81 02 c0 06 00 ff 09 01 a1 01 85 05 15 00 26 ff 00 19 01 29 02 75 08 95 05 b1 02 c0",
)
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 10e051b6f592..dadad277f4eb 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -755,9 +755,6 @@ TEST_F(iommufd_ioas, get_hw_info)
struct iommu_test_hw_info info;
uint64_t trailing_bytes;
} buffer_larger;
- struct iommu_test_hw_info_buffer_smaller {
- __u32 flags;
- } buffer_smaller;
if (self->device_id) {
uint8_t max_pasid = 0;
@@ -789,8 +786,9 @@ TEST_F(iommufd_ioas, get_hw_info)
* the fields within the size range still gets updated.
*/
test_cmd_get_hw_info(self->device_id,
- IOMMU_HW_INFO_TYPE_DEFAULT,
- &buffer_smaller, sizeof(buffer_smaller));
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact,
+ offsetofend(struct iommu_test_hw_info,
+ flags));
test_cmd_get_hw_info_pasid(self->device_id, &max_pasid);
ASSERT_EQ(0, max_pasid);
if (variant->pasid_capable) {
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index baae6b7ded41..16a119a4656c 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -70,6 +70,12 @@
#include "kselftest.h"
+static inline void __kselftest_memset_safe(void *s, int c, size_t n)
+{
+ if (n > 0)
+ memset(s, c, n);
+}
+
#define TEST_TIMEOUT_DEFAULT 30
/* Utilities exposed to the test definitions */
@@ -416,7 +422,7 @@
self = mmap(NULL, sizeof(*self), PROT_READ | PROT_WRITE, \
MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
} else { \
- memset(&self_private, 0, sizeof(self_private)); \
+ __kselftest_memset_safe(&self_private, 0, sizeof(self_private)); \
self = &self_private; \
} \
} \
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index ba5c2b643efa..fdec90e85467 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -89,11 +89,14 @@ TEST_GEN_PROGS_x86 += x86/kvm_buslock_test
TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
TEST_GEN_PROGS_x86 += x86/msrs_test
TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test
+TEST_GEN_PROGS_x86 += x86/nested_dirty_log_test
TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test
+TEST_GEN_PROGS_x86 += x86/nested_set_state_test
TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test
TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test
+TEST_GEN_PROGS_x86 += x86/nested_vmsave_vmload_test
TEST_GEN_PROGS_x86 += x86/platform_info_test
TEST_GEN_PROGS_x86 += x86/pmu_counters_test
TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
@@ -115,15 +118,15 @@ TEST_GEN_PROGS_x86 += x86/ucna_injection_test
TEST_GEN_PROGS_x86 += x86/userspace_io_test
TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
-TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test
+TEST_GEN_PROGS_x86 += x86/vmx_apicv_updates_test
TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test
-TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
TEST_GEN_PROGS_x86 += x86/xapic_state_test
+TEST_GEN_PROGS_x86 += x86/xapic_tpr_test
TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test
TEST_GEN_PROGS_x86 += x86/xss_msr_test
TEST_GEN_PROGS_x86 += x86/debug_regs
@@ -175,6 +178,7 @@ TEST_GEN_PROGS_arm64 += arm64/vgic_irq
TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress
TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access
TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
+TEST_GEN_PROGS_arm64 += arm64/idreg-idst
TEST_GEN_PROGS_arm64 += arm64/kvm-uuid
TEST_GEN_PROGS_arm64 += access_tracking_perf_test
TEST_GEN_PROGS_arm64 += arch_timer
@@ -199,6 +203,7 @@ TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
TEST_GEN_PROGS_s390 += s390/ucontrol_test
TEST_GEN_PROGS_s390 += s390/user_operexec
+TEST_GEN_PROGS_s390 += s390/keyop
TEST_GEN_PROGS_s390 += rseq_test
TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
@@ -228,6 +233,7 @@ TEST_GEN_PROGS_loongarch += kvm_page_table_test
TEST_GEN_PROGS_loongarch += memslot_modification_stress_test
TEST_GEN_PROGS_loongarch += memslot_perf_test
TEST_GEN_PROGS_loongarch += set_memory_region_test
+TEST_GEN_PROGS_loongarch += steal_time
SPLIT_TESTS += arch_timer
SPLIT_TESTS += get-reg-list
@@ -251,6 +257,7 @@ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
+ -U_FORTIFY_SOURCE \
-fno-builtin-memcmp -fno-builtin-memcpy \
-fno-builtin-memset -fno-builtin-strnlen \
-fno-stack-protector -fno-PIE -fno-strict-aliasing \
diff --git a/tools/testing/selftests/kvm/arm64/idreg-idst.c b/tools/testing/selftests/kvm/arm64/idreg-idst.c
new file mode 100644
index 000000000000..9ca9f125abdb
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/idreg-idst.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Access all FEAT_IDST-handled registers that depend on more than
+ * just FEAT_AA64, and fail if we don't get an a trap with an 0x18 EC.
+ */
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+static volatile bool sys64, undef;
+
+#define __check_sr_read(r) \
+ ({ \
+ uint64_t val; \
+ \
+ sys64 = false; \
+ undef = false; \
+ dsb(sy); \
+ val = read_sysreg_s(SYS_ ## r); \
+ val; \
+ })
+
+/* Fatal checks */
+#define check_sr_read(r) \
+ do { \
+ __check_sr_read(r); \
+ __GUEST_ASSERT(!undef, #r " unexpected UNDEF"); \
+ __GUEST_ASSERT(sys64, #r " didn't trap"); \
+ } while(0)
+
+
+static void guest_code(void)
+{
+ check_sr_read(CCSIDR2_EL1);
+ check_sr_read(SMIDR_EL1);
+ check_sr_read(GMID_EL1);
+
+ GUEST_DONE();
+}
+
+static void guest_sys64_handler(struct ex_regs *regs)
+{
+ sys64 = true;
+ undef = false;
+ regs->pc += 4;
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+ sys64 = false;
+ undef = true;
+ regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_feat_idst(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* This VM has no MTE, no SME, no CCIDX */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_SYS64, guest_sys64_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_UNKNOWN, guest_undef_handler);
+
+ test_run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t mmfr2;
+
+ test_disable_default_vgic();
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ mmfr2 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR2_EL1));
+ __TEST_REQUIRE(FIELD_GET(ID_AA64MMFR2_EL1_IDS, mmfr2) > 0,
+ "FEAT_IDST not supported");
+ kvm_vm_free(vm);
+
+ test_guest_feat_idst();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index c4815d365816..73de5be58bab 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -91,7 +91,6 @@ static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
index b973bb2c64a6..4a2033708227 100644
--- a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
@@ -2,6 +2,8 @@
#ifndef SELFTEST_KVM_UTIL_ARCH_H
#define SELFTEST_KVM_UTIL_ARCH_H
+struct kvm_mmu_arch {};
+
struct kvm_vm_arch {
bool has_gic;
int gic_fd;
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index ff928716574d..ac97a1c436fc 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -90,6 +90,9 @@
#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)
+#define TCR_EPD1_SHIFT 23
+#define TCR_EPD1_MASK (UL(1) << TCR_EPD1_SHIFT)
+
#define TCR_IPS_SHIFT 32
#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT)
#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT)
@@ -97,6 +100,7 @@
#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT)
#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT)
+#define TCR_TBI1 (UL(1) << 38)
#define TCR_HA (UL(1) << 39)
#define TCR_DS (UL(1) << 59)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 81f4355ff28a..8b39cb919f4f 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -88,12 +88,19 @@ enum kvm_mem_region_type {
NR_MEM_REGIONS,
};
+struct kvm_mmu {
+ bool pgd_created;
+ uint64_t pgd;
+ int pgtable_levels;
+
+ struct kvm_mmu_arch arch;
+};
+
struct kvm_vm {
int mode;
unsigned long type;
int kvm_fd;
int fd;
- unsigned int pgtable_levels;
unsigned int page_size;
unsigned int page_shift;
unsigned int pa_bits;
@@ -104,13 +111,18 @@ struct kvm_vm {
struct sparsebit *vpages_valid;
struct sparsebit *vpages_mapped;
bool has_irqchip;
- bool pgd_created;
vm_paddr_t ucall_mmio_addr;
- vm_paddr_t pgd;
vm_vaddr_t handlers;
uint32_t dirty_ring_size;
uint64_t gpa_tag_mask;
+ /*
+ * "mmu" is the guest's stage-1, with a short name because the vast
+ * majority of tests only care about the stage-1 MMU.
+ */
+ struct kvm_mmu mmu;
+ struct kvm_mmu stage2_mmu;
+
struct kvm_vm_arch arch;
struct kvm_binary_stats stats;
@@ -186,6 +198,17 @@ enum vm_guest_mode {
VM_MODE_P36V48_64K,
VM_MODE_P47V47_16K,
VM_MODE_P36V47_16K,
+
+ VM_MODE_P56V57_4K, /* For riscv64 */
+ VM_MODE_P56V48_4K,
+ VM_MODE_P56V39_4K,
+ VM_MODE_P50V57_4K,
+ VM_MODE_P50V48_4K,
+ VM_MODE_P50V39_4K,
+ VM_MODE_P41V57_4K,
+ VM_MODE_P41V48_4K,
+ VM_MODE_P41V39_4K,
+
NUM_VM_MODES,
};
@@ -210,10 +233,10 @@ kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
shape; \
})
-#if defined(__aarch64__)
-
extern enum vm_guest_mode vm_mode_default;
+#if defined(__aarch64__)
+
#define VM_MODE_DEFAULT vm_mode_default
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
@@ -236,7 +259,7 @@ extern enum vm_guest_mode vm_mode_default;
#error "RISC-V 32-bit kvm selftests not supported"
#endif
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#define VM_MODE_DEFAULT vm_mode_default
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
@@ -939,7 +962,7 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu);
* VM VCPU Args Set
*
* Input Args:
- * vm - Virtual Machine
+ * vcpu - vCPU
* num - number of arguments
* ... - arguments, each of type uint64_t
*
@@ -1258,8 +1281,13 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0);
}
+static inline uint64_t vm_page_align(struct kvm_vm *vm, uint64_t v)
+{
+ return (v + vm->page_size - 1) & ~(vm->page_size - 1);
+}
+
/*
- * Arch hook that is invoked via a constructor, i.e. before exeucting main(),
+ * Arch hook that is invoked via a constructor, i.e. before executing main(),
* to allow for arch-specific setup that is common to all tests, e.g. computing
* the default guest "mode".
*/
diff --git a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
index e43a57d99b56..d5095900e442 100644
--- a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
@@ -2,6 +2,7 @@
#ifndef SELFTEST_KVM_UTIL_ARCH_H
#define SELFTEST_KVM_UTIL_ARCH_H
+struct kvm_mmu_arch {};
struct kvm_vm_arch {};
#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
index e43a57d99b56..d5095900e442 100644
--- a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
@@ -2,6 +2,7 @@
#ifndef SELFTEST_KVM_UTIL_ARCH_H
#define SELFTEST_KVM_UTIL_ARCH_H
+struct kvm_mmu_arch {};
struct kvm_vm_arch {};
#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index e58282488beb..4dade8c4d18e 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -192,4 +192,6 @@ static inline void local_irq_disable(void)
csr_clear(CSR_SSTATUS, SR_SIE);
}
+unsigned long riscv64_get_satp_mode(void);
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
index e43a57d99b56..d5095900e442 100644
--- a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
@@ -2,6 +2,7 @@
#ifndef SELFTEST_KVM_UTIL_ARCH_H
#define SELFTEST_KVM_UTIL_ARCH_H
+struct kvm_mmu_arch {};
struct kvm_vm_arch {};
#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h
index 80fe9f69b38d..5ca6bacbd70e 100644
--- a/tools/testing/selftests/kvm/include/x86/apic.h
+++ b/tools/testing/selftests/kvm/include/x86/apic.h
@@ -28,10 +28,13 @@
#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
#define APIC_TASKPRI 0x80
#define APIC_PROCPRI 0xA0
+#define GET_APIC_PRI(x) (((x) & GENMASK(7, 4)) >> 4)
+#define SET_APIC_PRI(x, y) (((x) & ~GENMASK(7, 4)) | (y << 4))
#define APIC_EOI 0xB0
#define APIC_SPIV 0xF0
#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
#define APIC_SPIV_APIC_ENABLED (1 << 8)
+#define APIC_ISR 0x100
#define APIC_IRR 0x200
#define APIC_ICR 0x300
#define APIC_LVTCMCI 0x2f0
@@ -67,6 +70,10 @@
#define APIC_TMICT 0x380
#define APIC_TMCCT 0x390
#define APIC_TDCR 0x3E0
+#define APIC_SELF_IPI 0x3F0
+
+#define APIC_VECTOR_TO_BIT_NUMBER(v) ((unsigned int)(v) % 32)
+#define APIC_VECTOR_TO_REG_OFFSET(v) ((unsigned int)(v) / 32 * 0x10)
void apic_disable(void);
void xapic_enable(void);
diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
index 972bb1c4ab4c..be35d26bb320 100644
--- a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
@@ -10,6 +10,28 @@
extern bool is_forced_emulation_enabled;
+struct pte_masks {
+ uint64_t present;
+ uint64_t writable;
+ uint64_t user;
+ uint64_t readable;
+ uint64_t executable;
+ uint64_t accessed;
+ uint64_t dirty;
+ uint64_t huge;
+ uint64_t nx;
+ uint64_t c;
+ uint64_t s;
+
+ uint64_t always_set;
+};
+
+struct kvm_mmu_arch {
+ struct pte_masks pte_masks;
+};
+
+struct kvm_mmu;
+
struct kvm_vm_arch {
vm_vaddr_t gdt;
vm_vaddr_t tss;
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 57d62a425109..4ebae4269e68 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -201,6 +201,7 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
+#define X86_FEATURE_V_VMSAVE_VMLOAD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 15)
#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
@@ -362,16 +363,6 @@ static inline unsigned int x86_model(unsigned int eax)
return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
}
-/* Page table bitfield declarations */
-#define PTE_PRESENT_MASK BIT_ULL(0)
-#define PTE_WRITABLE_MASK BIT_ULL(1)
-#define PTE_USER_MASK BIT_ULL(2)
-#define PTE_ACCESSED_MASK BIT_ULL(5)
-#define PTE_DIRTY_MASK BIT_ULL(6)
-#define PTE_LARGE_MASK BIT_ULL(7)
-#define PTE_GLOBAL_MASK BIT_ULL(8)
-#define PTE_NX_MASK BIT_ULL(63)
-
#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
#define PAGE_SHIFT 12
@@ -436,8 +427,10 @@ struct kvm_x86_state {
static inline uint64_t get_desc64_base(const struct desc64 *desc)
{
- return ((uint64_t)desc->base3 << 32) |
- (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+ return (uint64_t)desc->base3 << 32 |
+ (uint64_t)desc->base2 << 24 |
+ (uint64_t)desc->base1 << 16 |
+ (uint64_t)desc->base0;
}
static inline uint64_t rdtsc(void)
@@ -1367,9 +1360,7 @@ static inline bool kvm_is_ignore_msrs(void)
return get_kvm_param_bool("ignore_msrs");
}
-uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
- int *level);
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
+uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr);
uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
uint64_t a3);
@@ -1451,10 +1442,52 @@ enum pg_level {
#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
+#define PTE_PRESENT_MASK(mmu) ((mmu)->arch.pte_masks.present)
+#define PTE_WRITABLE_MASK(mmu) ((mmu)->arch.pte_masks.writable)
+#define PTE_USER_MASK(mmu) ((mmu)->arch.pte_masks.user)
+#define PTE_READABLE_MASK(mmu) ((mmu)->arch.pte_masks.readable)
+#define PTE_EXECUTABLE_MASK(mmu) ((mmu)->arch.pte_masks.executable)
+#define PTE_ACCESSED_MASK(mmu) ((mmu)->arch.pte_masks.accessed)
+#define PTE_DIRTY_MASK(mmu) ((mmu)->arch.pte_masks.dirty)
+#define PTE_HUGE_MASK(mmu) ((mmu)->arch.pte_masks.huge)
+#define PTE_NX_MASK(mmu) ((mmu)->arch.pte_masks.nx)
+#define PTE_C_BIT_MASK(mmu) ((mmu)->arch.pte_masks.c)
+#define PTE_S_BIT_MASK(mmu) ((mmu)->arch.pte_masks.s)
+#define PTE_ALWAYS_SET_MASK(mmu) ((mmu)->arch.pte_masks.always_set)
+
+/*
+ * For PTEs without a PRESENT bit (i.e. EPT entries), treat the PTE as present
+ * if it's executable or readable, as EPT supports execute-only PTEs, but not
+ * write-only PTEs.
+ */
+#define is_present_pte(mmu, pte) \
+ (PTE_PRESENT_MASK(mmu) ? \
+ !!(*(pte) & PTE_PRESENT_MASK(mmu)) : \
+ !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu))))
+#define is_executable_pte(mmu, pte) \
+ ((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu))
+#define is_writable_pte(mmu, pte) (!!(*(pte) & PTE_WRITABLE_MASK(mmu)))
+#define is_user_pte(mmu, pte) (!!(*(pte) & PTE_USER_MASK(mmu)))
+#define is_accessed_pte(mmu, pte) (!!(*(pte) & PTE_ACCESSED_MASK(mmu)))
+#define is_dirty_pte(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
+#define is_huge_pte(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
+#define is_nx_pte(mmu, pte) (!is_executable_pte(mmu, pte))
+
+void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
+ struct pte_masks *pte_masks);
+
+void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr,
+ uint64_t paddr, int level);
void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint64_t nr_bytes, int level);
+void vm_enable_tdp(struct kvm_vm *vm);
+bool kvm_cpu_has_tdp(void);
+void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint64_t size);
+void tdp_identity_map_default_memslots(struct kvm_vm *vm);
+void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size);
+uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa);
+
/*
* Basic CPU control in CR0
*/
diff --git a/tools/testing/selftests/kvm/include/x86/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h
index 29cffd0a9181..10b30b38bb3f 100644
--- a/tools/testing/selftests/kvm/include/x86/svm.h
+++ b/tools/testing/selftests/kvm/include/x86/svm.h
@@ -92,8 +92,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 int_vector;
u32 int_state;
u8 reserved_3[4];
- u32 exit_code;
- u32 exit_code_hi;
+ u64 exit_code;
u64 exit_info_1;
u64 exit_info_2;
u32 exit_int_info;
diff --git a/tools/testing/selftests/kvm/include/x86/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h
index b74c6dcddcbd..5d7c42534bc4 100644
--- a/tools/testing/selftests/kvm/include/x86/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86/svm_util.h
@@ -27,6 +27,9 @@ struct svm_test_data {
void *msr; /* gva */
void *msr_hva;
uint64_t msr_gpa;
+
+ /* NPT */
+ uint64_t ncr3_gpa;
};
static inline void vmmcall(void)
@@ -57,6 +60,12 @@ struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
+static inline bool kvm_cpu_has_npt(void)
+{
+ return kvm_cpu_has(X86_FEATURE_NPT);
+}
+void vm_enable_npt(struct kvm_vm *vm);
+
int open_sev_dev_path_or_exit(void);
#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h
index 96e2b4c630a9..92b918700d24 100644
--- a/tools/testing/selftests/kvm/include/x86/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86/vmx.h
@@ -520,13 +520,11 @@ struct vmx_pages {
uint64_t vmwrite_gpa;
void *vmwrite;
- void *eptp_hva;
- uint64_t eptp_gpa;
- void *eptp;
-
void *apic_access_hva;
uint64_t apic_access_gpa;
void *apic_access;
+
+ uint64_t eptp_gpa;
};
union vmx_basic {
@@ -559,16 +557,8 @@ bool load_vmcs(struct vmx_pages *vmx);
bool ept_1g_pages_supported(void);
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr);
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size);
-void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot);
-void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t addr, uint64_t size);
bool kvm_cpu_has_ept(void);
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm);
+void vm_enable_ept(struct kvm_vm *vm);
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index d46e4b13b92c..43ea40edc533 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -21,14 +21,9 @@
static vm_vaddr_t exception_handlers;
-static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
-{
- return (v + vm->page_size) & ~(vm->page_size - 1);
-}
-
static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
{
- unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+ unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
return (gva >> shift) & mask;
@@ -39,7 +34,7 @@ static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
- TEST_ASSERT(vm->pgtable_levels == 4,
+ TEST_ASSERT(vm->mmu.pgtable_levels == 4,
"Mode %d does not have 4 page table levels", vm->mode);
return (gva >> shift) & mask;
@@ -50,7 +45,7 @@ static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
- TEST_ASSERT(vm->pgtable_levels >= 3,
+ TEST_ASSERT(vm->mmu.pgtable_levels >= 3,
"Mode %d does not have >= 3 page table levels", vm->mode);
return (gva >> shift) & mask;
@@ -104,7 +99,7 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
{
- unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+ unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
return 1 << (vm->va_bits - shift);
}
@@ -115,15 +110,15 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
+ size_t nr_pages = vm_page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
- if (vm->pgd_created)
+ if (vm->mmu.pgd_created)
return;
- vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR,
- vm->memslots[MEM_REGION_PT]);
- vm->pgd_created = true;
+ vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->mmu.pgd_created = true;
}
static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
@@ -147,12 +142,12 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
+ ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, vaddr) * 8;
if (!*ptep)
*ptep = addr_pte(vm, vm_alloc_page_table(vm),
PGD_TYPE_TABLE | PTE_VALID);
- switch (vm->pgtable_levels) {
+ switch (vm->mmu.pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
if (!*ptep)
@@ -190,16 +185,16 @@ uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level
{
uint64_t *ptep;
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
goto unmapped_gva;
- ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
+ ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
if (level == 0)
return ptep;
- switch (vm->pgtable_levels) {
+ switch (vm->mmu.pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
if (!ptep)
@@ -263,13 +258,13 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p
void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
- int level = 4 - (vm->pgtable_levels - 1);
+ int level = 4 - (vm->mmu.pgtable_levels - 1);
uint64_t pgd, *ptep;
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
return;
- for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
+ for (pgd = vm->mmu.pgd; pgd < vm->mmu.pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
ptep = addr_gpa2hva(vm, pgd);
if (!*ptep)
continue;
@@ -350,7 +345,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
+ ttbr0_el1 = vm->mmu.pgd & GENMASK(47, vm->page_shift);
/* Configure output size */
switch (vm->mode) {
@@ -358,7 +353,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
case VM_MODE_P52V48_16K:
case VM_MODE_P52V48_64K:
tcr_el1 |= TCR_IPS_52_BITS;
- ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
+ ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->mmu.pgd) << 2;
break;
case VM_MODE_P48V48_4K:
case VM_MODE_P48V48_16K:
@@ -384,6 +379,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER;
tcr_el1 |= TCR_T0SZ(vm->va_bits);
+ tcr_el1 |= TCR_TBI1;
+ tcr_el1 |= TCR_EPD1_MASK;
if (use_lpa2_pte_format(vm))
tcr_el1 |= TCR_DS;
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
index b04901e55138..ce3099630397 100644
--- a/tools/testing/selftests/kvm/lib/guest_modes.c
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -4,7 +4,7 @@
*/
#include "guest_modes.h"
-#ifdef __aarch64__
+#if defined(__aarch64__) || defined(__riscv)
#include "processor.h"
enum vm_guest_mode vm_mode_default;
#endif
@@ -13,9 +13,11 @@ struct guest_mode guest_modes[NUM_VM_MODES];
void guest_modes_append_default(void)
{
-#ifndef __aarch64__
+#if !defined(__aarch64__) && !defined(__riscv)
guest_mode_append(VM_MODE_DEFAULT, true);
-#else
+#endif
+
+#ifdef __aarch64__
{
unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
uint32_t ipa4k, ipa16k, ipa64k;
@@ -74,11 +76,36 @@ void guest_modes_append_default(void)
#ifdef __riscv
{
unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS);
+ unsigned long satp_mode = riscv64_get_satp_mode() << SATP_MODE_SHIFT;
+ int i;
- if (sz >= 52)
- guest_mode_append(VM_MODE_P52V48_4K, true);
- if (sz >= 48)
- guest_mode_append(VM_MODE_P48V48_4K, true);
+ switch (sz) {
+ case 59:
+ guest_mode_append(VM_MODE_P56V57_4K, satp_mode >= SATP_MODE_57);
+ guest_mode_append(VM_MODE_P56V48_4K, satp_mode >= SATP_MODE_48);
+ guest_mode_append(VM_MODE_P56V39_4K, satp_mode >= SATP_MODE_39);
+ break;
+ case 50:
+ guest_mode_append(VM_MODE_P50V57_4K, satp_mode >= SATP_MODE_57);
+ guest_mode_append(VM_MODE_P50V48_4K, satp_mode >= SATP_MODE_48);
+ guest_mode_append(VM_MODE_P50V39_4K, satp_mode >= SATP_MODE_39);
+ break;
+ case 41:
+ guest_mode_append(VM_MODE_P41V57_4K, satp_mode >= SATP_MODE_57);
+ guest_mode_append(VM_MODE_P41V48_4K, satp_mode >= SATP_MODE_48);
+ guest_mode_append(VM_MODE_P41V39_4K, satp_mode >= SATP_MODE_39);
+ break;
+ default:
+ break;
+ }
+
+ /* set the first supported mode as default */
+ vm_mode_default = NUM_VM_MODES;
+ for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) {
+ if (guest_modes[i].supported && guest_modes[i].enabled)
+ vm_mode_default = i;
+ }
+ TEST_ASSERT(vm_mode_default != NUM_VM_MODES, "No supported mode!");
}
#endif
}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 8279b6ced8d2..1959bf556e88 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -209,6 +209,15 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
[VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages",
[VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
+ [VM_MODE_P56V57_4K] = "PA-bits:56, VA-bits:57, 4K pages",
+ [VM_MODE_P56V48_4K] = "PA-bits:56, VA-bits:48, 4K pages",
+ [VM_MODE_P56V39_4K] = "PA-bits:56, VA-bits:39, 4K pages",
+ [VM_MODE_P50V57_4K] = "PA-bits:50, VA-bits:57, 4K pages",
+ [VM_MODE_P50V48_4K] = "PA-bits:50, VA-bits:48, 4K pages",
+ [VM_MODE_P50V39_4K] = "PA-bits:50, VA-bits:39, 4K pages",
+ [VM_MODE_P41V57_4K] = "PA-bits:41, VA-bits:57, 4K pages",
+ [VM_MODE_P41V48_4K] = "PA-bits:41, VA-bits:48, 4K pages",
+ [VM_MODE_P41V39_4K] = "PA-bits:41, VA-bits:39, 4K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
"Missing new mode strings?");
@@ -236,6 +245,15 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
[VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
[VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 },
[VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
+ [VM_MODE_P56V57_4K] = { 56, 57, 0x1000, 12 },
+ [VM_MODE_P56V48_4K] = { 56, 48, 0x1000, 12 },
+ [VM_MODE_P56V39_4K] = { 56, 39, 0x1000, 12 },
+ [VM_MODE_P50V57_4K] = { 50, 57, 0x1000, 12 },
+ [VM_MODE_P50V48_4K] = { 50, 48, 0x1000, 12 },
+ [VM_MODE_P50V39_4K] = { 50, 39, 0x1000, 12 },
+ [VM_MODE_P41V57_4K] = { 41, 57, 0x1000, 12 },
+ [VM_MODE_P41V48_4K] = { 41, 48, 0x1000, 12 },
+ [VM_MODE_P41V39_4K] = { 41, 39, 0x1000, 12 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
@@ -281,34 +299,34 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
/* Setup mode specific traits. */
switch (vm->mode) {
case VM_MODE_P52V48_4K:
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
break;
case VM_MODE_P52V48_64K:
- vm->pgtable_levels = 3;
+ vm->mmu.pgtable_levels = 3;
break;
case VM_MODE_P48V48_4K:
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
break;
case VM_MODE_P48V48_64K:
- vm->pgtable_levels = 3;
+ vm->mmu.pgtable_levels = 3;
break;
case VM_MODE_P40V48_4K:
case VM_MODE_P36V48_4K:
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
break;
case VM_MODE_P40V48_64K:
case VM_MODE_P36V48_64K:
- vm->pgtable_levels = 3;
+ vm->mmu.pgtable_levels = 3;
break;
case VM_MODE_P52V48_16K:
case VM_MODE_P48V48_16K:
case VM_MODE_P40V48_16K:
case VM_MODE_P36V48_16K:
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
break;
case VM_MODE_P47V47_16K:
case VM_MODE_P36V47_16K:
- vm->pgtable_levels = 3;
+ vm->mmu.pgtable_levels = 3;
break;
case VM_MODE_PXXVYY_4K:
#ifdef __x86_64__
@@ -321,22 +339,37 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
vm->va_bits);
if (vm->va_bits == 57) {
- vm->pgtable_levels = 5;
+ vm->mmu.pgtable_levels = 5;
} else {
TEST_ASSERT(vm->va_bits == 48,
"Unexpected guest virtual address width: %d",
vm->va_bits);
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
}
#else
TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms");
#endif
break;
case VM_MODE_P47V64_4K:
- vm->pgtable_levels = 5;
+ vm->mmu.pgtable_levels = 5;
break;
case VM_MODE_P44V64_4K:
- vm->pgtable_levels = 5;
+ vm->mmu.pgtable_levels = 5;
+ break;
+ case VM_MODE_P56V57_4K:
+ case VM_MODE_P50V57_4K:
+ case VM_MODE_P41V57_4K:
+ vm->mmu.pgtable_levels = 5;
+ break;
+ case VM_MODE_P56V48_4K:
+ case VM_MODE_P50V48_4K:
+ case VM_MODE_P41V48_4K:
+ vm->mmu.pgtable_levels = 4;
+ break;
+ case VM_MODE_P56V39_4K:
+ case VM_MODE_P50V39_4K:
+ case VM_MODE_P41V39_4K:
+ vm->mmu.pgtable_levels = 3;
break;
default:
TEST_FAIL("Unknown guest mode: 0x%x", vm->mode);
@@ -1351,7 +1384,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
* Output Args: None
*
* Return:
- * Lowest virtual address at or below vaddr_min, with at least
+ * Lowest virtual address at or above vaddr_min, with at least
* sz unused bytes. TEST_ASSERT failure if no area of at least
* size sz is available.
*
@@ -1956,8 +1989,8 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
fprintf(stream, "%*spgd_created: %u\n", indent, "",
- vm->pgd_created);
- if (vm->pgd_created) {
+ vm->mmu.pgd_created);
+ if (vm->mmu.pgd_created) {
fprintf(stream, "%*sVirtual Translation Tables:\n",
indent + 2, "");
virt_dump(stream, vm, indent + 4);
diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c
index 07c103369ddb..17aa55a2047a 100644
--- a/tools/testing/selftests/kvm/lib/loongarch/processor.c
+++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c
@@ -50,11 +50,11 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
int i;
vm_paddr_t child, table;
- if (vm->pgd_created)
+ if (vm->mmu.pgd_created)
return;
child = table = 0;
- for (i = 0; i < vm->pgtable_levels; i++) {
+ for (i = 0; i < vm->mmu.pgtable_levels; i++) {
invalid_pgtable[i] = child;
table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN,
vm->memslots[MEM_REGION_PT]);
@@ -62,8 +62,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
virt_set_pgtable(vm, table, child);
child = table;
}
- vm->pgd = table;
- vm->pgd_created = true;
+ vm->mmu.pgd = table;
+ vm->mmu.pgd_created = true;
}
static int virt_pte_none(uint64_t *ptep, int level)
@@ -77,11 +77,11 @@ static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc)
uint64_t *ptep;
vm_paddr_t child;
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
goto unmapped_gva;
- child = vm->pgd;
- level = vm->pgtable_levels - 1;
+ child = vm->mmu.pgd;
+ level = vm->mmu.pgtable_levels - 1;
while (level > 0) {
ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
if (virt_pte_none(ptep, level)) {
@@ -161,11 +161,11 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
int level;
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
return;
- level = vm->pgtable_levels - 1;
- pte_dump(stream, vm, indent, vm->pgd, level);
+ level = vm->mmu.pgtable_levels - 1;
+ pte_dump(stream, vm, indent, vm->mmu.pgd, level);
}
void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
@@ -297,7 +297,7 @@ static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
width = vm->page_shift - 3;
- switch (vm->pgtable_levels) {
+ switch (vm->mmu.pgtable_levels) {
case 4:
/* pud page shift and width */
val = (vm->page_shift + width * 2) << 20 | (width << 25);
@@ -309,15 +309,15 @@ static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
val |= vm->page_shift | width << 5;
break;
default:
- TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->pgtable_levels);
+ TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->mmu.pgtable_levels);
}
loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val);
/* PGD page shift and width */
- val = (vm->page_shift + width * (vm->pgtable_levels - 1)) | width << 6;
+ val = (vm->page_shift + width * (vm->mmu.pgtable_levels - 1)) | width << 6;
loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val);
- loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->pgd);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->mmu.pgd);
/*
* Refill exception runs on real mode
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index 2eac7d4b59e9..51dd455ff52c 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -8,6 +8,7 @@
#include <linux/compiler.h>
#include <assert.h>
+#include "guest_modes.h"
#include "kvm_util.h"
#include "processor.h"
#include "ucall_common.h"
@@ -26,11 +27,6 @@ bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext)
return !ret && !!value;
}
-static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
-{
- return (v + vm->page_size) & ~(vm->page_size - 1);
-}
-
static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
{
return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) <<
@@ -60,7 +56,7 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
{
TEST_ASSERT(level > -1,
"Negative page table level (%d) not possible", level);
- TEST_ASSERT(level < vm->pgtable_levels,
+ TEST_ASSERT(level < vm->mmu.pgtable_levels,
"Invalid page table level (%d)", level);
return (gva & pte_index_mask[level]) >> pte_index_shift[level];
@@ -68,21 +64,21 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
+ size_t nr_pages = vm_page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
- if (vm->pgd_created)
+ if (vm->mmu.pgd_created)
return;
- vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR,
- vm->memslots[MEM_REGION_PT]);
- vm->pgd_created = true;
+ vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->mmu.pgd_created = true;
}
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
uint64_t *ptep, next_ppn;
- int level = vm->pgtable_levels - 1;
+ int level = vm->mmu.pgtable_levels - 1;
TEST_ASSERT((vaddr % vm->page_size) == 0,
"Virtual address not on page boundary,\n"
@@ -98,7 +94,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
+ ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, vaddr, level) * 8;
if (!*ptep) {
next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
*ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
@@ -126,12 +122,12 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint64_t *ptep;
- int level = vm->pgtable_levels - 1;
+ int level = vm->mmu.pgtable_levels - 1;
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
goto unmapped_gva;
- ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
+ ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8;
if (!ptep)
goto unmapped_gva;
level--;
@@ -176,13 +172,14 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
- int level = vm->pgtable_levels - 1;
+ struct kvm_mmu *mmu = &vm->mmu;
+ int level = mmu->pgtable_levels - 1;
uint64_t pgd, *ptep;
- if (!vm->pgd_created)
+ if (!mmu->pgd_created)
return;
- for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
+ for (pgd = mmu->pgd; pgd < mmu->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
ptep = addr_gpa2hva(vm, pgd);
if (!*ptep)
continue;
@@ -197,22 +194,41 @@ void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu)
{
struct kvm_vm *vm = vcpu->vm;
unsigned long satp;
+ unsigned long satp_mode;
+ unsigned long max_satp_mode;
/*
* The RISC-V Sv48 MMU mode supports 56-bit physical address
* for 48-bit virtual address with 4KB last level page size.
*/
switch (vm->mode) {
- case VM_MODE_P52V48_4K:
- case VM_MODE_P48V48_4K:
- case VM_MODE_P40V48_4K:
+ case VM_MODE_P56V57_4K:
+ case VM_MODE_P50V57_4K:
+ case VM_MODE_P41V57_4K:
+ satp_mode = SATP_MODE_57;
+ break;
+ case VM_MODE_P56V48_4K:
+ case VM_MODE_P50V48_4K:
+ case VM_MODE_P41V48_4K:
+ satp_mode = SATP_MODE_48;
+ break;
+ case VM_MODE_P56V39_4K:
+ case VM_MODE_P50V39_4K:
+ case VM_MODE_P41V39_4K:
+ satp_mode = SATP_MODE_39;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
- satp |= SATP_MODE_48;
+ max_satp_mode = vcpu_get_reg(vcpu, RISCV_CONFIG_REG(satp_mode));
+
+ if ((satp_mode >> SATP_MODE_SHIFT) > max_satp_mode)
+ TEST_FAIL("Unable to set satp mode 0x%lx, max mode 0x%lx\n",
+ satp_mode >> SATP_MODE_SHIFT, max_satp_mode);
+
+ satp = (vm->mmu.pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
+ satp |= satp_mode;
vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp);
}
@@ -515,3 +531,38 @@ unsigned long get_host_sbi_spec_version(void)
return ret.value;
}
+
+void kvm_selftest_arch_init(void)
+{
+ /*
+ * riscv64 doesn't have a true default mode, so start by detecting the
+ * supported vm mode.
+ */
+ guest_modes_append_default();
+}
+
+unsigned long riscv64_get_satp_mode(void)
+{
+ int kvm_fd, vm_fd, vcpu_fd, err;
+ uint64_t val;
+ struct kvm_one_reg reg = {
+ .id = RISCV_CONFIG_REG(satp_mode),
+ .addr = (uint64_t)&val,
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, NULL);
+ TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
+
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
+
+ err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+
+ return val;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c
index 8ceeb17c819a..6a9a660413a7 100644
--- a/tools/testing/selftests/kvm/lib/s390/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390/processor.c
@@ -17,7 +17,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
- if (vm->pgd_created)
+ if (vm->mmu.pgd_created)
return;
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
@@ -25,8 +25,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
vm->memslots[MEM_REGION_PT]);
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
- vm->pgd = paddr;
- vm->pgd_created = true;
+ vm->mmu.pgd = paddr;
+ vm->mmu.pgd_created = true;
}
/*
@@ -70,7 +70,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
gva, vm->max_gfn, vm->page_size);
/* Walk through region and segment tables */
- entry = addr_gpa2hva(vm, vm->pgd);
+ entry = addr_gpa2hva(vm, vm->mmu.pgd);
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
if (entry[idx] & REGION_ENTRY_INVALID)
@@ -94,7 +94,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
- entry = addr_gpa2hva(vm, vm->pgd);
+ entry = addr_gpa2hva(vm, vm->mmu.pgd);
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
@@ -149,10 +149,10 @@ static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
return;
- virt_dump_region(stream, vm, indent, vm->pgd);
+ virt_dump_region(stream, vm, indent, vm->mmu.pgd);
}
void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
@@ -184,7 +184,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
vcpu_sregs_get(vcpu, &sregs);
sregs.crs[0] |= 0x00040000; /* Enable floating point regs */
- sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */
+ sregs.crs[1] = vm->mmu.pgd | 0xf; /* Primary region table */
vcpu_sregs_set(vcpu, &sregs);
vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c
index 0b1f288ad556..f53414ba7103 100644
--- a/tools/testing/selftests/kvm/lib/x86/memstress.c
+++ b/tools/testing/selftests/kvm/lib/x86/memstress.c
@@ -13,6 +13,7 @@
#include "kvm_util.h"
#include "memstress.h"
#include "processor.h"
+#include "svm_util.h"
#include "vmx.h"
void memstress_l2_guest_code(uint64_t vcpu_id)
@@ -29,9 +30,10 @@ __asm__(
" ud2;"
);
-static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
-{
#define L2_GUEST_STACK_SIZE 64
+
+static void l1_vmx_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+{
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
unsigned long *rsp;
@@ -45,10 +47,34 @@ static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
GUEST_DONE();
}
+static void l1_svm_code(struct svm_test_data *svm, uint64_t vcpu_id)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ unsigned long *rsp;
+
+
+ rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+ *rsp = vcpu_id;
+ generic_svm_setup(svm, memstress_l2_guest_entry, rsp);
+
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+
+static void memstress_l1_guest_code(void *data, uint64_t vcpu_id)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data, vcpu_id);
+ else
+ l1_svm_code(data, vcpu_id);
+}
+
uint64_t memstress_nested_pages(int nr_vcpus)
{
/*
@@ -59,46 +85,37 @@ uint64_t memstress_nested_pages(int nr_vcpus)
return 513 + 10 * nr_vcpus;
}
-void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+static void memstress_setup_ept_mappings(struct kvm_vm *vm)
{
uint64_t start, end;
- prepare_eptp(vmx, vm);
-
/*
* Identity map the first 4G and the test region with 1G pages so that
* KVM can shadow the EPT12 with the maximum huge page size supported
* by the backing source.
*/
- nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+ tdp_identity_map_1g(vm, 0, 0x100000000ULL);
start = align_down(memstress_args.gpa, PG_SIZE_1G);
end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
- nested_identity_map_1g(vmx, vm, start, end - start);
+ tdp_identity_map_1g(vm, start, end - start);
}
void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
{
- struct vmx_pages *vmx, *vmx0 = NULL;
struct kvm_regs regs;
- vm_vaddr_t vmx_gva;
+ vm_vaddr_t nested_gva;
int vcpu_id;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- TEST_REQUIRE(kvm_cpu_has_ept());
+ TEST_REQUIRE(kvm_cpu_has_tdp());
+ vm_enable_tdp(vm);
+ memstress_setup_ept_mappings(vm);
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- vmx = vcpu_alloc_vmx(vm, &vmx_gva);
-
- if (vcpu_id == 0) {
- memstress_setup_ept(vmx, vm);
- vmx0 = vmx;
- } else {
- /* Share the same EPT table across all vCPUs. */
- vmx->eptp = vmx0->eptp;
- vmx->eptp_hva = vmx0->eptp_hva;
- vmx->eptp_gpa = vmx0->eptp_gpa;
- }
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
/*
* Override the vCPU to run memstress_l1_guest_code() which will
@@ -107,6 +124,6 @@ void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc
vcpu_regs_get(vcpus[vcpu_id], &regs);
regs.rip = (unsigned long) memstress_l1_guest_code;
vcpu_regs_set(vcpus[vcpu_id], &regs);
- vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
+ vcpu_args_set(vcpus[vcpu_id], 2, nested_gva, vcpu_id);
}
}
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index 36104d27f3d9..fab18e9be66c 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -8,7 +8,9 @@
#include "kvm_util.h"
#include "pmu.h"
#include "processor.h"
+#include "svm_util.h"
#include "sev.h"
+#include "vmx.h"
#ifndef NUM_INTERRUPTS
#define NUM_INTERRUPTS 256
@@ -156,26 +158,59 @@ bool kvm_is_tdp_enabled(void)
return get_kvm_amd_param_bool("npt");
}
+static void virt_mmu_init(struct kvm_vm *vm, struct kvm_mmu *mmu,
+ struct pte_masks *pte_masks)
+{
+ /* If needed, create the top-level page table. */
+ if (!mmu->pgd_created) {
+ mmu->pgd = vm_alloc_page_table(vm);
+ mmu->pgd_created = true;
+ mmu->arch.pte_masks = *pte_masks;
+ }
+
+ TEST_ASSERT(mmu->pgtable_levels == 4 || mmu->pgtable_levels == 5,
+ "Selftests MMU only supports 4-level and 5-level paging, not %u-level paging",
+ mmu->pgtable_levels);
+}
+
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
"Unknown or unsupported guest mode: 0x%x", vm->mode);
- /* If needed, create the top-level page table. */
- if (!vm->pgd_created) {
- vm->pgd = vm_alloc_page_table(vm);
- vm->pgd_created = true;
- }
+ struct pte_masks pte_masks = (struct pte_masks){
+ .present = BIT_ULL(0),
+ .writable = BIT_ULL(1),
+ .user = BIT_ULL(2),
+ .accessed = BIT_ULL(5),
+ .dirty = BIT_ULL(6),
+ .huge = BIT_ULL(7),
+ .nx = BIT_ULL(63),
+ .executable = 0,
+ .c = vm->arch.c_bit,
+ .s = vm->arch.s_bit,
+ };
+
+ virt_mmu_init(vm, &vm->mmu, &pte_masks);
+}
+
+void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
+ struct pte_masks *pte_masks)
+{
+ TEST_ASSERT(!vm->stage2_mmu.pgtable_levels, "TDP MMU already initialized");
+
+ vm->stage2_mmu.pgtable_levels = pgtable_levels;
+ virt_mmu_init(vm, &vm->stage2_mmu, pte_masks);
}
-static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
- uint64_t vaddr, int level)
+static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu,
+ uint64_t *parent_pte, uint64_t vaddr, int level)
{
uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
- TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
+ TEST_ASSERT((*parent_pte == mmu->pgd) || is_present_pte(mmu, parent_pte),
"Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
level + 1, vaddr);
@@ -183,20 +218,23 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
}
static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+ struct kvm_mmu *mmu,
uint64_t *parent_pte,
uint64_t vaddr,
uint64_t paddr,
int current_level,
int target_level)
{
- uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
+ uint64_t *pte = virt_get_pte(vm, mmu, parent_pte, vaddr, current_level);
paddr = vm_untag_gpa(vm, paddr);
- if (!(*pte & PTE_PRESENT_MASK)) {
- *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+ if (!is_present_pte(mmu, pte)) {
+ *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) |
+ PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) |
+ PTE_ALWAYS_SET_MASK(mmu);
if (current_level == target_level)
- *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ *pte |= PTE_HUGE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK);
else
*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
} else {
@@ -208,17 +246,18 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
TEST_ASSERT(current_level != target_level,
"Cannot create hugepage at level: %u, vaddr: 0x%lx",
current_level, vaddr);
- TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
+ TEST_ASSERT(!is_huge_pte(mmu, pte),
"Cannot create page table at level: %u, vaddr: 0x%lx",
current_level, vaddr);
}
return pte;
}
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
+void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr,
+ uint64_t paddr, int level)
{
const uint64_t pg_size = PG_LEVEL_SIZE(level);
- uint64_t *pte = &vm->pgd;
+ uint64_t *pte = &mmu->pgd;
int current_level;
TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
@@ -239,38 +278,43 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
"Unexpected bits in paddr: %lx", paddr);
+ TEST_ASSERT(!PTE_EXECUTABLE_MASK(mmu) || !PTE_NX_MASK(mmu),
+ "X and NX bit masks cannot be used simultaneously");
+
/*
* Allocate upper level page tables, if not already present. Return
* early if a hugepage was created.
*/
- for (current_level = vm->pgtable_levels;
+ for (current_level = mmu->pgtable_levels;
current_level > PG_LEVEL_4K;
current_level--) {
- pte = virt_create_upper_pte(vm, pte, vaddr, paddr,
+ pte = virt_create_upper_pte(vm, mmu, pte, vaddr, paddr,
current_level, level);
- if (*pte & PTE_LARGE_MASK)
+ if (is_huge_pte(mmu, pte))
return;
}
/* Fill in page table entry. */
- pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K);
- TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
+ pte = virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K);
+ TEST_ASSERT(!is_present_pte(mmu, pte),
"PTE already present for 4k page at vaddr: 0x%lx", vaddr);
- *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) |
+ PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) |
+ PTE_ALWAYS_SET_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK);
/*
* Neither SEV nor TDX supports shared page tables, so only the final
* leaf PTE needs manually set the C/S-bit.
*/
if (vm_is_gpa_protected(vm, paddr))
- *pte |= vm->arch.c_bit;
+ *pte |= PTE_C_BIT_MASK(mmu);
else
- *pte |= vm->arch.s_bit;
+ *pte |= PTE_S_BIT_MASK(mmu);
}
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
- __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
+ __virt_pg_map(vm, &vm->mmu, vaddr, paddr, PG_LEVEL_4K);
}
void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
@@ -285,7 +329,7 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
nr_bytes, pg_size);
for (i = 0; i < nr_pages; i++) {
- __virt_pg_map(vm, vaddr, paddr, level);
+ __virt_pg_map(vm, &vm->mmu, vaddr, paddr, level);
sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift,
nr_bytes / PAGE_SIZE);
@@ -294,9 +338,10 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
}
}
-static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
+static bool vm_is_target_pte(struct kvm_mmu *mmu, uint64_t *pte,
+ int *level, int current_level)
{
- if (*pte & PTE_LARGE_MASK) {
+ if (is_huge_pte(mmu, pte)) {
TEST_ASSERT(*level == PG_LEVEL_NONE ||
*level == current_level,
"Unexpected hugepage at level %d", current_level);
@@ -306,17 +351,19 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
return *level == current_level;
}
-uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
- int *level)
+static uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm,
+ struct kvm_mmu *mmu,
+ uint64_t vaddr,
+ int *level)
{
- int va_width = 12 + (vm->pgtable_levels) * 9;
- uint64_t *pte = &vm->pgd;
+ int va_width = 12 + (mmu->pgtable_levels) * 9;
+ uint64_t *pte = &mmu->pgd;
int current_level;
TEST_ASSERT(!vm->arch.is_pt_protected,
"Walking page tables of protected guests is impossible");
- TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= vm->pgtable_levels,
+ TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= mmu->pgtable_levels,
"Invalid PG_LEVEL_* '%d'", *level);
TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
@@ -332,32 +379,40 @@ uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
(((int64_t)vaddr << (64 - va_width) >> (64 - va_width))),
"Canonical check failed. The virtual address is invalid.");
- for (current_level = vm->pgtable_levels;
+ for (current_level = mmu->pgtable_levels;
current_level > PG_LEVEL_4K;
current_level--) {
- pte = virt_get_pte(vm, pte, vaddr, current_level);
- if (vm_is_target_pte(pte, level, current_level))
+ pte = virt_get_pte(vm, mmu, pte, vaddr, current_level);
+ if (vm_is_target_pte(mmu, pte, level, current_level))
return pte;
}
- return virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K);
+ return virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K);
+}
+
+uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa)
+{
+ int level = PG_LEVEL_4K;
+
+ return __vm_get_page_table_entry(vm, &vm->stage2_mmu, l2_gpa, &level);
}
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
+uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr)
{
int level = PG_LEVEL_4K;
- return __vm_get_page_table_entry(vm, vaddr, &level);
+ return __vm_get_page_table_entry(vm, &vm->mmu, vaddr, &level);
}
void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
+ struct kvm_mmu *mmu = &vm->mmu;
uint64_t *pml4e, *pml4e_start;
uint64_t *pdpe, *pdpe_start;
uint64_t *pde, *pde_start;
uint64_t *pte, *pte_start;
- if (!vm->pgd_created)
+ if (!mmu->pgd_created)
return;
fprintf(stream, "%*s "
@@ -365,47 +420,47 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*s index hvaddr gpaddr "
"addr w exec dirty\n",
indent, "");
- pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
+ pml4e_start = (uint64_t *) addr_gpa2hva(vm, mmu->pgd);
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
pml4e = &pml4e_start[n1];
- if (!(*pml4e & PTE_PRESENT_MASK))
+ if (!is_present_pte(mmu, pml4e))
continue;
fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
" %u\n",
indent, "",
pml4e - pml4e_start, pml4e,
addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
- !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
+ is_writable_pte(mmu, pml4e), is_nx_pte(mmu, pml4e));
pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
pdpe = &pdpe_start[n2];
- if (!(*pdpe & PTE_PRESENT_MASK))
+ if (!is_present_pte(mmu, pdpe))
continue;
fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
"%u %u\n",
indent, "",
pdpe - pdpe_start, pdpe,
addr_hva2gpa(vm, pdpe),
- PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
- !!(*pdpe & PTE_NX_MASK));
+ PTE_GET_PFN(*pdpe), is_writable_pte(mmu, pdpe),
+ is_nx_pte(mmu, pdpe));
pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
pde = &pde_start[n3];
- if (!(*pde & PTE_PRESENT_MASK))
+ if (!is_present_pte(mmu, pde))
continue;
fprintf(stream, "%*spde 0x%-3zx %p "
"0x%-12lx 0x%-10llx %u %u\n",
indent, "", pde - pde_start, pde,
addr_hva2gpa(vm, pde),
- PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
- !!(*pde & PTE_NX_MASK));
+ PTE_GET_PFN(*pde), is_writable_pte(mmu, pde),
+ is_nx_pte(mmu, pde));
pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
pte = &pte_start[n4];
- if (!(*pte & PTE_PRESENT_MASK))
+ if (!is_present_pte(mmu, pte))
continue;
fprintf(stream, "%*spte 0x%-3zx %p "
"0x%-12lx 0x%-10llx %u %u "
@@ -414,9 +469,9 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
pte - pte_start, pte,
addr_hva2gpa(vm, pte),
PTE_GET_PFN(*pte),
- !!(*pte & PTE_WRITABLE_MASK),
- !!(*pte & PTE_NX_MASK),
- !!(*pte & PTE_DIRTY_MASK),
+ is_writable_pte(mmu, pte),
+ is_nx_pte(mmu, pte),
+ is_dirty_pte(mmu, pte),
((uint64_t) n1 << 27)
| ((uint64_t) n2 << 18)
| ((uint64_t) n3 << 9)
@@ -427,6 +482,72 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
}
}
+void vm_enable_tdp(struct kvm_vm *vm)
+{
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vm_enable_ept(vm);
+ else
+ vm_enable_npt(vm);
+}
+
+bool kvm_cpu_has_tdp(void)
+{
+ return kvm_cpu_has_ept() || kvm_cpu_has_npt();
+}
+
+void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
+ uint64_t size, int level)
+{
+ size_t page_size = PG_LEVEL_SIZE(level);
+ size_t npages = size / page_size;
+
+ TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
+ TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+ while (npages--) {
+ __virt_pg_map(vm, &vm->stage2_mmu, nested_paddr, paddr, level);
+ nested_paddr += page_size;
+ paddr += page_size;
+ }
+}
+
+void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
+ uint64_t size)
+{
+ __tdp_map(vm, nested_paddr, paddr, size, PG_LEVEL_4K);
+}
+
+/* Prepare an identity extended page table that maps all the
+ * physical pages in VM.
+ */
+void tdp_identity_map_default_memslots(struct kvm_vm *vm)
+{
+ uint32_t s, memslot = 0;
+ sparsebit_idx_t i, last;
+ struct userspace_mem_region *region = memslot2region(vm, memslot);
+
+ /* Only memslot 0 is mapped here, ensure it's the only one being used */
+ for (s = 0; s < NR_MEM_REGIONS; s++)
+ TEST_ASSERT_EQ(vm->memslots[s], 0);
+
+ i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
+ last = i + (region->region.memory_size >> vm->page_shift);
+ for (;;) {
+ i = sparsebit_next_clear(region->unused_phy_pages, i);
+ if (i > last)
+ break;
+
+ tdp_map(vm, (uint64_t)i << vm->page_shift,
+ (uint64_t)i << vm->page_shift, 1 << vm->page_shift);
+ }
+}
+
+/* Identity map a region with 1GiB Pages. */
+void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size)
+{
+ __tdp_map(vm, addr, addr, size, PG_LEVEL_1G);
+}
+
/*
* Set Unusable Segment
*
@@ -497,9 +618,9 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp)
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
int level = PG_LEVEL_NONE;
- uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
+ uint64_t *pte = __vm_get_page_table_entry(vm, &vm->mmu, gva, &level);
- TEST_ASSERT(*pte & PTE_PRESENT_MASK,
+ TEST_ASSERT(is_present_pte(&vm->mmu, pte),
"Leaf PTE not PRESENT for gva: 0x%08lx", gva);
/*
@@ -538,7 +659,7 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
if (kvm_cpu_has(X86_FEATURE_XSAVE))
sregs.cr4 |= X86_CR4_OSXSAVE;
- if (vm->pgtable_levels == 5)
+ if (vm->mmu.pgtable_levels == 5)
sregs.cr4 |= X86_CR4_LA57;
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
@@ -549,7 +670,7 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
kvm_seg_set_kernel_data_64bit(&sregs.gs);
kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr);
- sregs.cr3 = vm->pgd;
+ sregs.cr3 = vm->mmu.pgd;
vcpu_sregs_set(vcpu, &sregs);
}
diff --git a/tools/testing/selftests/kvm/lib/x86/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c
index d239c2097391..2e5c480c9afd 100644
--- a/tools/testing/selftests/kvm/lib/x86/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86/svm.c
@@ -46,6 +46,9 @@ vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
memset(svm->msr_hva, 0, getpagesize());
+ if (vm->stage2_mmu.pgd_created)
+ svm->ncr3_gpa = vm->stage2_mmu.pgd;
+
*p_svm_gva = svm_gva;
return svm;
}
@@ -59,6 +62,25 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
seg->base = base;
}
+void vm_enable_npt(struct kvm_vm *vm)
+{
+ struct pte_masks pte_masks;
+
+ TEST_ASSERT(kvm_cpu_has_npt(), "KVM doesn't supported nested NPT");
+
+ /*
+ * NPTs use the same PTE format, but deliberately drop the C-bit as the
+ * per-VM shared vs. private information is only meant for stage-1.
+ */
+ pte_masks = vm->mmu.arch.pte_masks;
+ pte_masks.c = 0;
+
+ /* NPT walks are treated as user accesses, so set the 'user' bit. */
+ pte_masks.always_set = pte_masks.user;
+
+ tdp_mmu_init(vm, vm->mmu.pgtable_levels, &pte_masks);
+}
+
void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
{
struct vmcb *vmcb = svm->vmcb;
@@ -102,6 +124,11 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
vmcb->save.rip = (u64)guest_rip;
vmcb->save.rsp = (u64)guest_rsp;
guest_regs.rdi = (u64)svm;
+
+ if (svm->ncr3_gpa) {
+ ctrl->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
+ ctrl->nested_cr3 = svm->ncr3_gpa;
+ }
}
/*
diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
index 29b082a58daa..c87b340362a9 100644
--- a/tools/testing/selftests/kvm/lib/x86/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -10,38 +10,21 @@
#include "processor.h"
#include "vmx.h"
-#define PAGE_SHIFT_4K 12
-
#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
+#define EPTP_MT_SHIFT 0 /* EPTP memtype bits 2:0 */
+#define EPTP_PWL_SHIFT 3 /* EPTP page walk length bits 5:3 */
+#define EPTP_AD_ENABLED_SHIFT 6 /* EPTP AD enabled bit 6 */
+
+#define EPTP_WB (X86_MEMTYPE_WB << EPTP_MT_SHIFT)
+#define EPTP_PWL_4 (3ULL << EPTP_PWL_SHIFT) /* PWL is (levels - 1) */
+#define EPTP_AD_ENABLED (1ULL << EPTP_AD_ENABLED_SHIFT)
+
bool enable_evmcs;
struct hv_enlightened_vmcs *current_evmcs;
struct hv_vp_assist_page *current_vp_assist;
-struct eptPageTableEntry {
- uint64_t readable:1;
- uint64_t writable:1;
- uint64_t executable:1;
- uint64_t memory_type:3;
- uint64_t ignore_pat:1;
- uint64_t page_size:1;
- uint64_t accessed:1;
- uint64_t dirty:1;
- uint64_t ignored_11_10:2;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t suppress_ve:1;
-};
-
-struct eptPageTablePointer {
- uint64_t memory_type:3;
- uint64_t page_walk_length:3;
- uint64_t ad_enabled:1;
- uint64_t reserved_11_07:5;
- uint64_t address:40;
- uint64_t reserved_63_52:12;
-};
int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
{
uint16_t evmcs_ver;
@@ -58,6 +41,32 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
return evmcs_ver;
}
+void vm_enable_ept(struct kvm_vm *vm)
+{
+ struct pte_masks pte_masks;
+
+ TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
+
+ /*
+ * EPTs do not have 'present' or 'user' bits, instead bit 0 is the
+ * 'readable' bit.
+ */
+ pte_masks = (struct pte_masks) {
+ .present = 0,
+ .user = 0,
+ .readable = BIT_ULL(0),
+ .writable = BIT_ULL(1),
+ .executable = BIT_ULL(2),
+ .huge = BIT_ULL(7),
+ .accessed = BIT_ULL(8),
+ .dirty = BIT_ULL(9),
+ .nx = 0,
+ };
+
+ /* TODO: Add support for 5-level EPT. */
+ tdp_mmu_init(vm, 4, &pte_masks);
+}
+
/* Allocate memory regions for nested VMX tests.
*
* Input Args:
@@ -107,6 +116,9 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
memset(vmx->vmwrite_hva, 0, getpagesize());
+ if (vm->stage2_mmu.pgd_created)
+ vmx->eptp_gpa = vm->stage2_mmu.pgd;
+
*p_vmx_gva = vmx_gva;
return vmx;
}
@@ -196,16 +208,15 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
if (vmx->eptp_gpa) {
- uint64_t ept_paddr;
- struct eptPageTablePointer eptp = {
- .memory_type = X86_MEMTYPE_WB,
- .page_walk_length = 3, /* + 1 */
- .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
- .address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
- };
-
- memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
- vmwrite(EPT_POINTER, ept_paddr);
+ uint64_t eptp = vmx->eptp_gpa | EPTP_WB | EPTP_PWL_4;
+
+ TEST_ASSERT((vmx->eptp_gpa & ~PHYSICAL_PAGE_MASK) == 0,
+ "Illegal bits set in vmx->eptp_gpa");
+
+ if (ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS))
+ eptp |= EPTP_AD_ENABLED;
+
+ vmwrite(EPT_POINTER, eptp);
sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
}
@@ -362,170 +373,13 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
init_vmcs_guest_state(guest_rip, guest_rsp);
}
-static void nested_create_pte(struct kvm_vm *vm,
- struct eptPageTableEntry *pte,
- uint64_t nested_paddr,
- uint64_t paddr,
- int current_level,
- int target_level)
-{
- if (!pte->readable) {
- pte->writable = true;
- pte->readable = true;
- pte->executable = true;
- pte->page_size = (current_level == target_level);
- if (pte->page_size)
- pte->address = paddr >> vm->page_shift;
- else
- pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
- } else {
- /*
- * Entry already present. Assert that the caller doesn't want
- * a hugepage at this level, and that there isn't a hugepage at
- * this level.
- */
- TEST_ASSERT(current_level != target_level,
- "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
- current_level, nested_paddr);
- TEST_ASSERT(!pte->page_size,
- "Cannot create page table at level: %u, nested_paddr: 0x%lx",
- current_level, nested_paddr);
- }
-}
-
-
-void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, int target_level)
-{
- const uint64_t page_size = PG_LEVEL_SIZE(target_level);
- struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
- uint16_t index;
-
- TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
- "Unknown or unsupported guest mode: 0x%x", vm->mode);
-
- TEST_ASSERT((nested_paddr >> 48) == 0,
- "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT",
- nested_paddr);
- TEST_ASSERT((nested_paddr % page_size) == 0,
- "Nested physical address not on page boundary,\n"
- " nested_paddr: 0x%lx page_size: 0x%lx",
- nested_paddr, page_size);
- TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT((paddr % page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx page_size: 0x%lx",
- paddr, page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
-
- for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
- index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
- pte = &pt[index];
-
- nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
-
- if (pte->page_size)
- break;
-
- pt = addr_gpa2hva(vm, pte->address * vm->page_size);
- }
-
- /*
- * For now mark these as accessed and dirty because the only
- * testcase we have needs that. Can be reconsidered later.
- */
- pte->accessed = true;
- pte->dirty = true;
-
-}
-
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr)
-{
- __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
-}
-
-/*
- * Map a range of EPT guest physical addresses to the VM's physical address
- *
- * Input Args:
- * vm - Virtual Machine
- * nested_paddr - Nested guest physical address to map
- * paddr - VM Physical Address
- * size - The size of the range to map
- * level - The level at which to map the range
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within the VM given by vm, creates a nested guest translation for the
- * page range starting at nested_paddr to the page range starting at paddr.
- */
-void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- int level)
-{
- size_t page_size = PG_LEVEL_SIZE(level);
- size_t npages = size / page_size;
-
- TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
- TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
-
- while (npages--) {
- __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
- nested_paddr += page_size;
- paddr += page_size;
- }
-}
-
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size)
-{
- __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
-}
-
-/* Prepare an identity extended page table that maps all the
- * physical pages in VM.
- */
-void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot)
-{
- sparsebit_idx_t i, last;
- struct userspace_mem_region *region =
- memslot2region(vm, memslot);
-
- i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
- last = i + (region->region.memory_size >> vm->page_shift);
- for (;;) {
- i = sparsebit_next_clear(region->unused_phy_pages, i);
- if (i > last)
- break;
-
- nested_map(vmx, vm,
- (uint64_t)i << vm->page_shift,
- (uint64_t)i << vm->page_shift,
- 1 << vm->page_shift);
- }
-}
-
-/* Identity map a region with 1GiB Pages. */
-void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t addr, uint64_t size)
-{
- __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
-}
-
bool kvm_cpu_has_ept(void)
{
uint64_t ctrl;
+ if (!kvm_cpu_has(X86_FEATURE_VMX))
+ return false;
+
ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
return false;
@@ -534,15 +388,6 @@ bool kvm_cpu_has_ept(void)
return ctrl & SECONDARY_EXEC_ENABLE_EPT;
}
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm)
-{
- TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
-
- vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
- vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
- vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
-}
-
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
{
vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index cb54a56990a0..8d6b951434eb 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -65,6 +65,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALASR:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
@@ -78,6 +79,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCB:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCD:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCLSD:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFBFMIN:
@@ -94,6 +96,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZILSD:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIMOP:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE:
@@ -525,6 +528,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZAAMO),
KVM_ISA_EXT_ARR(ZABHA),
KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZALASR),
KVM_ISA_EXT_ARR(ZALRSC),
KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
@@ -538,6 +542,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZCB),
KVM_ISA_EXT_ARR(ZCD),
KVM_ISA_EXT_ARR(ZCF),
+ KVM_ISA_EXT_ARR(ZCLSD),
KVM_ISA_EXT_ARR(ZCMOP),
KVM_ISA_EXT_ARR(ZFA),
KVM_ISA_EXT_ARR(ZFBFMIN),
@@ -554,6 +559,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZIHINTNTL),
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
KVM_ISA_EXT_ARR(ZIHPM),
+ KVM_ISA_EXT_ARR(ZILSD),
KVM_ISA_EXT_ARR(ZIMOP),
KVM_ISA_EXT_ARR(ZKND),
KVM_ISA_EXT_ARR(ZKNE),
@@ -1166,6 +1172,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC);
KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO);
KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA);
KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zalasr, ZALASR);
KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC);
KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS);
KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
@@ -1179,6 +1186,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA);
KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB);
KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD);
KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF);
+KVM_ISA_EXT_SIMPLE_CONFIG(zclsd, ZCLSD);
KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP);
KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
KVM_ISA_EXT_SIMPLE_CONFIG(zfbfmin, ZFBFMIN);
@@ -1195,6 +1203,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI);
KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL);
KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE);
KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM);
+KVM_ISA_EXT_SIMPLE_CONFIG(zilsd, ZILSD);
KVM_ISA_EXT_SIMPLE_CONFIG(zimop, ZIMOP);
KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND);
KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE);
@@ -1247,6 +1256,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_zabha,
&config_zacas,
&config_zalrsc,
+ &config_zalasr,
&config_zawrs,
&config_zba,
&config_zbb,
@@ -1259,6 +1269,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_zcb,
&config_zcd,
&config_zcf,
+ &config_zclsd,
&config_zcmop,
&config_zfa,
&config_zfbfmin,
@@ -1275,6 +1286,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_zihintntl,
&config_zihintpause,
&config_zihpm,
+ &config_zilsd,
&config_zimop,
&config_zknd,
&config_zkne,
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 1375fca80bcd..f80ad6b47d16 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -215,6 +215,7 @@ int main(int argc, char *argv[])
switch (opt) {
case 'u':
skip_sanity_check = true;
+ break;
case 'l':
latency = atoi_paranoid(optarg);
break;
diff --git a/tools/testing/selftests/kvm/s390/keyop.c b/tools/testing/selftests/kvm/s390/keyop.c
new file mode 100644
index 000000000000..c7805e87d12c
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/keyop.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x KVM_S390_KEYOP
+ *
+ * Copyright IBM Corp. 2026
+ *
+ * Authors:
+ * Claudio Imbrenda <imbrenda@linux.ibm.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <linux/bits.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "processor.h"
+
+#define BUF_PAGES 128UL
+#define GUEST_PAGES 256UL
+
+#define BUF_START_GFN (GUEST_PAGES - BUF_PAGES)
+#define BUF_START_ADDR (BUF_START_GFN << PAGE_SHIFT)
+
+#define KEY_BITS_ACC 0xf0
+#define KEY_BIT_F 0x08
+#define KEY_BIT_R 0x04
+#define KEY_BIT_C 0x02
+
+#define KEY_BITS_RC (KEY_BIT_R | KEY_BIT_C)
+#define KEY_BITS_ALL (KEY_BITS_ACC | KEY_BIT_F | KEY_BITS_RC)
+
+static unsigned char tmp[BUF_PAGES];
+static unsigned char old[BUF_PAGES];
+static unsigned char expected[BUF_PAGES];
+
+static int _get_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[])
+{
+ struct kvm_s390_skeys skeys_ioctl = {
+ .start_gfn = BUF_START_GFN,
+ .count = BUF_PAGES,
+ .skeydata_addr = (unsigned long)skeys,
+ };
+
+ return __vm_ioctl(vcpu->vm, KVM_S390_GET_SKEYS, &skeys_ioctl);
+}
+
+static void get_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[])
+{
+ int r = _get_skeys(vcpu, skeys);
+
+ TEST_ASSERT(!r, "Failed to get storage keys, r=%d", r);
+}
+
+static void set_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[])
+{
+ struct kvm_s390_skeys skeys_ioctl = {
+ .start_gfn = BUF_START_GFN,
+ .count = BUF_PAGES,
+ .skeydata_addr = (unsigned long)skeys,
+ };
+ int r;
+
+ r = __vm_ioctl(vcpu->vm, KVM_S390_SET_SKEYS, &skeys_ioctl);
+ TEST_ASSERT(!r, "Failed to set storage keys, r=%d", r);
+}
+
+static int do_keyop(struct kvm_vcpu *vcpu, int op, unsigned long page_idx, unsigned char skey)
+{
+ struct kvm_s390_keyop keyop = {
+ .guest_addr = BUF_START_ADDR + page_idx * PAGE_SIZE,
+ .key = skey,
+ .operation = op,
+ };
+ int r;
+
+ r = __vm_ioctl(vcpu->vm, KVM_S390_KEYOP, &keyop);
+ TEST_ASSERT(!r, "Failed to perform keyop, r=%d", r);
+ TEST_ASSERT((keyop.key & 1) == 0,
+ "Last bit of key is 1, should be 0! page %lu, new key=%#x, old key=%#x",
+ page_idx, skey, keyop.key);
+
+ return keyop.key;
+}
+
+static void fault_in_buffer(struct kvm_vcpu *vcpu, int where, int cur_loc)
+{
+ unsigned long i;
+ int r;
+
+ if (where != cur_loc)
+ return;
+
+ for (i = 0; i < BUF_PAGES; i++) {
+ r = ioctl(vcpu->fd, KVM_S390_VCPU_FAULT, BUF_START_ADDR + i * PAGE_SIZE);
+ TEST_ASSERT(!r, "Faulting in buffer page %lu, r=%d", i, r);
+ }
+}
+
+static inline void set_pattern(unsigned char skeys[])
+{
+ int i;
+
+ for (i = 0; i < BUF_PAGES; i++)
+ skeys[i] = i << 1;
+}
+
+static void dump_sk(const unsigned char skeys[], const char *descr)
+{
+ int i, j;
+
+ fprintf(stderr, "# %s:\n", descr);
+ for (i = 0; i < BUF_PAGES; i += 32) {
+ fprintf(stderr, "# %3d: ", i);
+ for (j = 0; j < 32; j++)
+ fprintf(stderr, "%02x ", skeys[i + j]);
+ fprintf(stderr, "\n");
+ }
+}
+
+static inline void compare(const unsigned char what[], const unsigned char expected[],
+ const char *descr, int fault_in_loc)
+{
+ int i;
+
+ for (i = 0; i < BUF_PAGES; i++) {
+ if (expected[i] != what[i]) {
+ dump_sk(expected, "Expected");
+ dump_sk(what, "Got");
+ }
+ TEST_ASSERT(expected[i] == what[i],
+ "%s! fault-in location %d, page %d, expected %#x, got %#x",
+ descr, fault_in_loc, i, expected[i], what[i]);
+ }
+}
+
+static inline void clear_all(void)
+{
+ memset(tmp, 0, BUF_PAGES);
+ memset(old, 0, BUF_PAGES);
+ memset(expected, 0, BUF_PAGES);
+}
+
+static void test_init(struct kvm_vcpu *vcpu, int fault_in)
+{
+ /* Set all storage keys to zero */
+ fault_in_buffer(vcpu, fault_in, 1);
+ set_skeys(vcpu, expected);
+
+ fault_in_buffer(vcpu, fault_in, 2);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "Setting keys not zero", fault_in);
+
+ /* Set storage keys to a sequential pattern */
+ fault_in_buffer(vcpu, fault_in, 3);
+ set_pattern(expected);
+ set_skeys(vcpu, expected);
+
+ fault_in_buffer(vcpu, fault_in, 4);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "Setting storage keys failed", fault_in);
+}
+
+static void test_rrbe(struct kvm_vcpu *vcpu, int fault_in)
+{
+ unsigned char k;
+ int i;
+
+ /* Set storage keys to a sequential pattern */
+ fault_in_buffer(vcpu, fault_in, 1);
+ set_pattern(expected);
+ set_skeys(vcpu, expected);
+
+ /* Call the RRBE KEYOP ioctl on each page and verify the result */
+ fault_in_buffer(vcpu, fault_in, 2);
+ for (i = 0; i < BUF_PAGES; i++) {
+ k = do_keyop(vcpu, KVM_S390_KEYOP_RRBE, i, 0xff);
+ TEST_ASSERT((expected[i] & KEY_BITS_RC) == k,
+ "Old R or C value mismatch! expected: %#x, got %#x",
+ expected[i] & KEY_BITS_RC, k);
+ if (i == BUF_PAGES / 2)
+ fault_in_buffer(vcpu, fault_in, 3);
+ }
+
+ for (i = 0; i < BUF_PAGES; i++)
+ expected[i] &= ~KEY_BIT_R;
+
+ /* Verify that only the R bit has been cleared */
+ fault_in_buffer(vcpu, fault_in, 4);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "New value mismatch", fault_in);
+}
+
+static void test_iske(struct kvm_vcpu *vcpu, int fault_in)
+{
+ int i;
+
+ /* Set storage keys to a sequential pattern */
+ fault_in_buffer(vcpu, fault_in, 1);
+ set_pattern(expected);
+ set_skeys(vcpu, expected);
+
+ /* Call the ISKE KEYOP ioctl on each page and verify the result */
+ fault_in_buffer(vcpu, fault_in, 2);
+ for (i = 0; i < BUF_PAGES; i++) {
+ tmp[i] = do_keyop(vcpu, KVM_S390_KEYOP_ISKE, i, 0xff);
+ if (i == BUF_PAGES / 2)
+ fault_in_buffer(vcpu, fault_in, 3);
+ }
+ compare(tmp, expected, "Old value mismatch", fault_in);
+
+ /* Check storage keys have not changed */
+ fault_in_buffer(vcpu, fault_in, 4);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "Storage keys values changed", fault_in);
+}
+
+static void test_sske(struct kvm_vcpu *vcpu, int fault_in)
+{
+ int i;
+
+ /* Set storage keys to a sequential pattern */
+ fault_in_buffer(vcpu, fault_in, 1);
+ set_pattern(tmp);
+ set_skeys(vcpu, tmp);
+
+ /* Call the SSKE KEYOP ioctl on each page and verify the result */
+ fault_in_buffer(vcpu, fault_in, 2);
+ for (i = 0; i < BUF_PAGES; i++) {
+ expected[i] = ~tmp[i] & KEY_BITS_ALL;
+ /* Set the new storage keys to be the bit-inversion of the previous ones */
+ old[i] = do_keyop(vcpu, KVM_S390_KEYOP_SSKE, i, expected[i] | 1);
+ if (i == BUF_PAGES / 2)
+ fault_in_buffer(vcpu, fault_in, 3);
+ }
+ compare(old, tmp, "Old value mismatch", fault_in);
+
+ /* Verify that the storage keys have been set correctly */
+ fault_in_buffer(vcpu, fault_in, 4);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "New value mismatch", fault_in);
+}
+
+static struct testdef {
+ const char *name;
+ void (*test)(struct kvm_vcpu *vcpu, int fault_in_location);
+ int n_fault_in_locations;
+} testplan[] = {
+ { "Initialization", test_init, 5 },
+ { "RRBE", test_rrbe, 5 },
+ { "ISKE", test_iske, 5 },
+ { "SSKE", test_sske, 5 },
+};
+
+static void run_test(void (*the_test)(struct kvm_vcpu *, int), int fault_in_location)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int r;
+
+ vm = vm_create_barebones();
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, GUEST_PAGES, 0);
+ vcpu = __vm_vcpu_add(vm, 0);
+
+ r = _get_skeys(vcpu, tmp);
+ TEST_ASSERT(r == KVM_S390_GET_SKEYS_NONE,
+ "Storage keys are not disabled initially, r=%d", r);
+
+ clear_all();
+
+ the_test(vcpu, fault_in_location);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ int i, f;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_KEYOP));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL));
+
+ ksft_print_header();
+ for (i = 0, f = 0; i < ARRAY_SIZE(testplan); i++)
+ f += testplan[i].n_fault_in_locations;
+ ksft_set_plan(f);
+
+ for (i = 0; i < ARRAY_SIZE(testplan); i++) {
+ for (f = 0; f < testplan[i].n_fault_in_locations; f++) {
+ run_test(testplan[i].test, f);
+ ksft_test_result_pass("%s (fault-in location %d)\n", testplan[i].name, f);
+ }
+ }
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index 8edc1fca345b..7be8adfe5dd3 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -301,6 +301,102 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
pr_info("\n");
}
+#elif defined(__loongarch__)
+
+/* steal_time must have 64-byte alignment */
+#define STEAL_TIME_SIZE ((sizeof(struct kvm_steal_time) + 63) & ~63)
+#define KVM_STEAL_PHYS_VALID BIT_ULL(0)
+
+struct kvm_steal_time {
+ __u64 steal;
+ __u32 version;
+ __u32 flags;
+ __u8 preempted;
+ __u8 pad[47];
+};
+
+static void check_status(struct kvm_steal_time *st)
+{
+ GUEST_ASSERT(!(READ_ONCE(st->version) & 1));
+ GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0);
+}
+
+static void guest_code(int cpu)
+{
+ uint32_t version;
+ struct kvm_steal_time *st = st_gva[cpu];
+
+ memset(st, 0, sizeof(*st));
+ GUEST_SYNC(0);
+
+ check_status(st);
+ WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+ version = READ_ONCE(st->version);
+ check_status(st);
+ GUEST_SYNC(1);
+
+ check_status(st);
+ GUEST_ASSERT(version < READ_ONCE(st->version));
+ WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+ check_status(st);
+ GUEST_DONE();
+}
+
+static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
+{
+ int err;
+ uint64_t val;
+ struct kvm_device_attr attr = {
+ .group = KVM_LOONGARCH_VCPU_CPUCFG,
+ .attr = CPUCFG_KVM_FEATURE,
+ .addr = (uint64_t)&val,
+ };
+
+ err = __vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &attr);
+ if (err)
+ return false;
+
+ err = __vcpu_ioctl(vcpu, KVM_GET_DEVICE_ATTR, &attr);
+ if (err)
+ return false;
+
+ return val & BIT(KVM_FEATURE_STEAL_TIME);
+}
+
+static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+{
+ int err;
+ uint64_t st_gpa;
+ struct kvm_vm *vm = vcpu->vm;
+ struct kvm_device_attr attr = {
+ .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL,
+ .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA,
+ .addr = (uint64_t)&st_gpa,
+ };
+
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ sync_global_to_guest(vm, st_gva[i]);
+
+ err = __vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &attr);
+ TEST_ASSERT(err == 0, "No PV stealtime Feature");
+
+ st_gpa = (unsigned long)st_gva[i] | KVM_STEAL_PHYS_VALID;
+ err = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &attr);
+ TEST_ASSERT(err == 0, "Fail to set PV stealtime GPA");
+}
+
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
+{
+ struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
+
+ ksft_print_msg("VCPU%d:\n", vcpu_idx);
+ ksft_print_msg(" steal: %lld\n", st->steal);
+ ksft_print_msg(" flags: %d\n", st->flags);
+ ksft_print_msg(" version: %d\n", st->version);
+ ksft_print_msg(" preempted: %d\n", st->preempted);
+}
#endif
static void *do_steal_time(void *arg)
diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c
index f4ce5a185a7d..37b166260ee3 100644
--- a/tools/testing/selftests/kvm/x86/amx_test.c
+++ b/tools/testing/selftests/kvm/x86/amx_test.c
@@ -69,6 +69,12 @@ static inline void __tileloadd(void *tile)
: : "a"(tile), "d"(0));
}
+static inline int tileloadd_safe(void *tile)
+{
+ return kvm_asm_safe(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10",
+ "a"(tile), "d"(0));
+}
+
static inline void __tilerelease(void)
{
asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
@@ -124,27 +130,52 @@ static void set_tilecfg(struct tile_config *cfg)
}
}
+enum {
+ /* Retrieve TMM0 from guest, stash it for TEST_RESTORE_TILEDATA */
+ TEST_SAVE_TILEDATA = 1,
+
+ /* Check TMM0 against tiledata */
+ TEST_COMPARE_TILEDATA = 2,
+
+ /* Restore TMM0 from earlier save */
+ TEST_RESTORE_TILEDATA = 4,
+
+ /* Full VM save/restore */
+ TEST_SAVE_RESTORE = 8,
+};
+
static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
struct tile_data *tiledata,
struct xstate *xstate)
{
+ int vector;
+
GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) &&
this_cpu_has(X86_FEATURE_OSXSAVE));
check_xtile_info();
- GUEST_SYNC(1);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
/* xfd=0, enable amx */
wrmsr(MSR_IA32_XFD, 0);
- GUEST_SYNC(2);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
set_tilecfg(amx_cfg);
__ldtilecfg(amx_cfg);
- GUEST_SYNC(3);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
/* Check save/restore when trap to userspace */
__tileloadd(tiledata);
- GUEST_SYNC(4);
+ GUEST_SYNC(TEST_SAVE_TILEDATA | TEST_COMPARE_TILEDATA | TEST_SAVE_RESTORE);
+
+ /* xfd=0x40000, disable amx tiledata */
+ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
+
+ /* host tries setting tiledata while guest XFD is set */
+ GUEST_SYNC(TEST_RESTORE_TILEDATA);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
+
+ wrmsr(MSR_IA32_XFD, 0);
__tilerelease();
- GUEST_SYNC(5);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
/*
* After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
* the xcomp_bv.
@@ -154,6 +185,8 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
+ /* #NM test */
+
/* xfd=0x40000, disable amx tiledata */
wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
@@ -166,32 +199,33 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
- GUEST_SYNC(6);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
set_tilecfg(amx_cfg);
__ldtilecfg(amx_cfg);
- /* Trigger #NM exception */
- __tileloadd(tiledata);
- GUEST_SYNC(10);
- GUEST_DONE();
-}
+ /* Trigger #NM exception */
+ vector = tileloadd_safe(tiledata);
+ __GUEST_ASSERT(vector == NM_VECTOR,
+ "Wanted #NM on tileloadd with XFD[18]=1, got %s",
+ ex_str(vector));
-void guest_nm_handler(struct ex_regs *regs)
-{
- /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
- GUEST_SYNC(7);
GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
- GUEST_SYNC(8);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
/* Clear xfd_err */
wrmsr(MSR_IA32_XFD_ERR, 0);
/* xfd=0, enable amx */
wrmsr(MSR_IA32_XFD, 0);
- GUEST_SYNC(9);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
+
+ __tileloadd(tiledata);
+ GUEST_SYNC(TEST_COMPARE_TILEDATA | TEST_SAVE_RESTORE);
+
+ GUEST_DONE();
}
int main(int argc, char *argv[])
@@ -200,10 +234,10 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_x86_state *state;
+ struct kvm_x86_state *tile_state = NULL;
int xsave_restore_size;
vm_vaddr_t amx_cfg, tiledata, xstate;
struct ucall uc;
- u32 amx_offset;
int ret;
/*
@@ -228,9 +262,6 @@ int main(int argc, char *argv[])
vcpu_regs_get(vcpu, &regs1);
- /* Register #NM handler */
- vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
-
/* amx cfg for guest_code */
amx_cfg = vm_vaddr_alloc_page(vm);
memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
@@ -244,6 +275,7 @@ int main(int argc, char *argv[])
memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
+ int iter = 0;
for (;;) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
@@ -253,37 +285,47 @@ int main(int argc, char *argv[])
REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
- switch (uc.args[1]) {
- case 1:
- case 2:
- case 3:
- case 5:
- case 6:
- case 7:
- case 8:
- fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
- break;
- case 4:
- case 10:
- fprintf(stderr,
- "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+ ++iter;
+ if (uc.args[1] & TEST_SAVE_TILEDATA) {
+ fprintf(stderr, "GUEST_SYNC #%d, save tiledata\n", iter);
+ tile_state = vcpu_save_state(vcpu);
+ }
+ if (uc.args[1] & TEST_COMPARE_TILEDATA) {
+ fprintf(stderr, "GUEST_SYNC #%d, check TMM0 contents\n", iter);
/* Compacted mode, get amx offset by xsave area
* size subtract 8K amx size.
*/
- amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
- state = vcpu_save_state(vcpu);
- void *amx_start = (void *)state->xsave + amx_offset;
+ u32 amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+ void *amx_start = (void *)tile_state->xsave + amx_offset;
void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
/* Only check TMM0 register, 1 tile */
ret = memcmp(amx_start, tiles_data, TILE_SIZE);
TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
+ }
+ if (uc.args[1] & TEST_RESTORE_TILEDATA) {
+ fprintf(stderr, "GUEST_SYNC #%d, before KVM_SET_XSAVE\n", iter);
+ vcpu_xsave_set(vcpu, tile_state->xsave);
+ fprintf(stderr, "GUEST_SYNC #%d, after KVM_SET_XSAVE\n", iter);
+ }
+ if (uc.args[1] & TEST_SAVE_RESTORE) {
+ fprintf(stderr, "GUEST_SYNC #%d, save/restore VM state\n", iter);
+ state = vcpu_save_state(vcpu);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
kvm_x86_state_cleanup(state);
- break;
- case 9:
- fprintf(stderr,
- "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
- break;
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
}
break;
case UCALL_DONE:
@@ -293,22 +335,6 @@ int main(int argc, char *argv[])
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
- state = vcpu_save_state(vcpu);
- memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vcpu, &regs1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- vcpu = vm_recreate_with_one_vcpu(vm);
- vcpu_load_state(vcpu, state);
- kvm_x86_state_cleanup(state);
-
- memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vcpu, &regs2);
- TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
}
done:
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c
index 7b3fda6842bc..f9ed14996977 100644
--- a/tools/testing/selftests/kvm/x86/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/cpuid_test.c
@@ -155,6 +155,7 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct
static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *ent;
+ struct kvm_sregs sregs;
int rc;
u32 eax, ebx, x;
@@ -162,6 +163,20 @@ static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
rc = __vcpu_set_cpuid(vcpu);
TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+ /*
+ * Toggle CR4 bits that affect dynamic CPUID feature flags to verify
+ * setting unmodified CPUID succeeds with runtime CPUID updates.
+ */
+ vcpu_sregs_get(vcpu, &sregs);
+ if (kvm_cpu_has(X86_FEATURE_XSAVE))
+ sregs.cr4 ^= X86_CR4_OSXSAVE;
+ if (kvm_cpu_has(X86_FEATURE_PKU))
+ sregs.cr4 ^= X86_CR4_PKE;
+ vcpu_sregs_set(vcpu, &sregs);
+
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
/* Changing CPU features is forbidden */
ent = vcpu_get_cpuid_entry(vcpu, 0x7);
ebx = ent->ebx;
diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
index a3b7ce155981..c542cc4762b1 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
@@ -619,7 +619,7 @@ int main(int argc, char *argv[])
*/
gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
for (i = 0; i < NTEST_PAGES; i++) {
- pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
+ pte = vm_get_pte(vm, data->test_pages + i * PAGE_SIZE);
gpa = addr_hva2gpa(vm, pte);
virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK);
data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
diff --git a/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c
new file mode 100644
index 000000000000..619229bbd693
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX 1
+
+/*
+ * Allocate four pages total. Two pages are used to verify that the KVM marks
+ * the accessed page/GFN as marked dirty, but not the "other" page. Times two
+ * so that each "normal" page can be accessed from L2 via an aliased L2 GVA+GPA
+ * (when TDP is enabled), to verify KVM marks _L1's_ page/GFN as dirty (to
+ * detect failures, L2 => L1 GPAs can't be identity mapped in the TDP page
+ * tables, as marking L2's GPA dirty would get a false pass if L1 == L2).
+ */
+#define TEST_MEM_PAGES 4
+
+#define TEST_MEM_BASE 0xc0000000
+#define TEST_MEM_ALIAS_BASE 0xc0002000
+
+#define TEST_GUEST_ADDR(base, idx) ((base) + (idx) * PAGE_SIZE)
+
+#define TEST_GVA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx)
+#define TEST_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx)
+
+#define TEST_ALIAS_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_ALIAS_BASE, idx)
+
+#define TEST_HVA(vm, idx) addr_gpa2hva(vm, TEST_GPA(idx))
+
+#define L2_GUEST_STACK_SIZE 64
+
+/* Use the page offset bits to communicate the access+fault type. */
+#define TEST_SYNC_READ_FAULT BIT(0)
+#define TEST_SYNC_WRITE_FAULT BIT(1)
+#define TEST_SYNC_NO_FAULT BIT(2)
+
+static void l2_guest_code(vm_vaddr_t base)
+{
+ vm_vaddr_t page0 = TEST_GUEST_ADDR(base, 0);
+ vm_vaddr_t page1 = TEST_GUEST_ADDR(base, 1);
+
+ READ_ONCE(*(u64 *)page0);
+ GUEST_SYNC(page0 | TEST_SYNC_READ_FAULT);
+ WRITE_ONCE(*(u64 *)page0, 1);
+ GUEST_SYNC(page0 | TEST_SYNC_WRITE_FAULT);
+ READ_ONCE(*(u64 *)page0);
+ GUEST_SYNC(page0 | TEST_SYNC_NO_FAULT);
+
+ WRITE_ONCE(*(u64 *)page1, 1);
+ GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT);
+ WRITE_ONCE(*(u64 *)page1, 1);
+ GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT);
+ READ_ONCE(*(u64 *)page1);
+ GUEST_SYNC(page1 | TEST_SYNC_NO_FAULT);
+
+ /* Exit to L1 and never come back. */
+ vmcall();
+}
+
+static void l2_guest_code_tdp_enabled(void)
+{
+ /*
+ * Use the aliased virtual addresses when running with TDP to verify
+ * that KVM correctly handles the case where a page is dirtied via a
+ * different GPA than would be used by L1.
+ */
+ l2_guest_code(TEST_MEM_ALIAS_BASE);
+}
+
+static void l2_guest_code_tdp_disabled(void)
+{
+ /*
+ * Use the "normal" virtual addresses when running without TDP enabled,
+ * in which case L2 will use the same page tables as L1, and thus needs
+ * to use the same virtual addresses that are mapped into L1.
+ */
+ l2_guest_code(TEST_MEM_BASE);
+}
+
+void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ void *l2_rip;
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+
+ if (vmx->eptp_gpa)
+ l2_rip = l2_guest_code_tdp_enabled;
+ else
+ l2_rip = l2_guest_code_tdp_disabled;
+
+ prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(TEST_SYNC_NO_FAULT);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_SYNC(TEST_SYNC_NO_FAULT);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ void *l2_rip;
+
+ if (svm->ncr3_gpa)
+ l2_rip = l2_guest_code_tdp_enabled;
+ else
+ l2_rip = l2_guest_code_tdp_disabled;
+
+ generic_svm_setup(svm, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(TEST_SYNC_NO_FAULT);
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_SYNC(TEST_SYNC_NO_FAULT);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
+static void test_handle_ucall_sync(struct kvm_vm *vm, u64 arg,
+ unsigned long *bmap)
+{
+ vm_vaddr_t gva = arg & ~(PAGE_SIZE - 1);
+ int page_nr, i;
+
+ /*
+ * Extract the page number of underlying physical page, which is also
+ * the _L1_ page number. The dirty bitmap _must_ be updated based on
+ * the L1 GPA, not L2 GPA, i.e. whether or not L2 used an aliased GPA
+ * (i.e. if TDP enabled for L2) is irrelevant with respect to the dirty
+ * bitmap and which underlying physical page is accessed.
+ *
+ * Note, gva will be '0' if there was no access, i.e. if the purpose of
+ * the sync is to verify all pages are clean.
+ */
+ if (!gva)
+ page_nr = 0;
+ else if (gva >= TEST_MEM_ALIAS_BASE)
+ page_nr = (gva - TEST_MEM_ALIAS_BASE) >> PAGE_SHIFT;
+ else
+ page_nr = (gva - TEST_MEM_BASE) >> PAGE_SHIFT;
+ TEST_ASSERT(page_nr == 0 || page_nr == 1,
+ "Test bug, unexpected frame number '%u' for arg = %lx", page_nr, arg);
+ TEST_ASSERT(gva || (arg & TEST_SYNC_NO_FAULT),
+ "Test bug, gva must be valid if a fault is expected");
+
+ kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+
+ /*
+ * Check all pages to verify the correct physical page was modified (or
+ * not), and that all pages are clean/dirty as expected.
+ *
+ * If a fault of any kind is expected, the target page should be dirty
+ * as the Dirty bit is set in the gPTE. KVM should create a writable
+ * SPTE even on a read fault, *and* KVM must mark the GFN as dirty
+ * when doing so.
+ */
+ for (i = 0; i < TEST_MEM_PAGES; i++) {
+ if (i == page_nr && (arg & TEST_SYNC_WRITE_FAULT))
+ TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 1,
+ "Page %u incorrectly not written by guest", i);
+ else
+ TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 0xaaaaaaaaaaaaaaaaULL,
+ "Page %u incorrectly written by guest", i);
+
+ if (i == page_nr && !(arg & TEST_SYNC_NO_FAULT))
+ TEST_ASSERT(test_bit(i, bmap),
+ "Page %u incorrectly reported clean on %s fault",
+ i, arg & TEST_SYNC_READ_FAULT ? "read" : "write");
+ else
+ TEST_ASSERT(!test_bit(i, bmap),
+ "Page %u incorrectly reported dirty", i);
+ }
+}
+
+static void test_dirty_log(bool nested_tdp)
+{
+ vm_vaddr_t nested_gva = 0;
+ unsigned long *bmap;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool done = false;
+
+ pr_info("Nested TDP: %s\n", nested_tdp ? "enabled" : "disabled");
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ if (nested_tdp)
+ vm_enable_tdp(vm);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
+
+ vcpu_args_set(vcpu, 1, nested_gva);
+
+ /* Add an extra memory slot for testing dirty logging */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ TEST_MEM_BASE,
+ TEST_MEM_SLOT_INDEX,
+ TEST_MEM_PAGES,
+ KVM_MEM_LOG_DIRTY_PAGES);
+
+ /*
+ * Add an identity map for GVA range [0xc0000000, 0xc0004000). This
+ * affects both L1 and L2. However...
+ */
+ virt_map(vm, TEST_MEM_BASE, TEST_MEM_BASE, TEST_MEM_PAGES);
+
+ /*
+ * ... pages in the L2 GPA address range [0xc0002000, 0xc0004000) will
+ * map to [0xc0000000, 0xc0002000) when TDP is enabled (for L2).
+ *
+ * When TDP is disabled, the L2 guest code will still access the same L1
+ * GPAs as the TDP enabled case.
+ *
+ * Set the Dirty bit in the PTEs used by L2 so that KVM will create
+ * writable SPTEs when handling read faults (if the Dirty bit isn't
+ * set, KVM must intercept the next write to emulate the Dirty bit
+ * update).
+ */
+ if (nested_tdp) {
+ tdp_identity_map_default_memslots(vm);
+ tdp_map(vm, TEST_ALIAS_GPA(0), TEST_GPA(0), PAGE_SIZE);
+ tdp_map(vm, TEST_ALIAS_GPA(1), TEST_GPA(1), PAGE_SIZE);
+
+ *tdp_get_pte(vm, TEST_ALIAS_GPA(0)) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
+ *tdp_get_pte(vm, TEST_ALIAS_GPA(1)) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
+ } else {
+ *vm_get_pte(vm, TEST_GVA(0)) |= PTE_DIRTY_MASK(&vm->mmu);
+ *vm_get_pte(vm, TEST_GVA(1)) |= PTE_DIRTY_MASK(&vm->mmu);
+ }
+
+ bmap = bitmap_zalloc(TEST_MEM_PAGES);
+
+ while (!done) {
+ memset(TEST_HVA(vm, 0), 0xaa, TEST_MEM_PAGES * PAGE_SIZE);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ test_handle_ucall_sync(vm, uc.args[1], bmap);
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM));
+
+ test_dirty_log(/*nested_tdp=*/false);
+
+ if (kvm_cpu_has_tdp())
+ test_dirty_log(/*nested_tdp=*/true);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/nested_set_state_test.c
index 67a62a5a8895..0f2102b43629 100644
--- a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_set_state_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_set_nested_state_test
- *
* Copyright (C) 2019, Google LLC.
*
* This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
@@ -11,6 +9,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <errno.h>
#include <linux/kvm.h>
@@ -241,8 +240,108 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu)
TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
"Size must be between %ld and %d. The size returned was %d.",
sizeof(*state), state_sz, state->size);
- TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
- TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
+
+ TEST_ASSERT_EQ(state->hdr.vmx.vmxon_pa, -1ull);
+ TEST_ASSERT_EQ(state->hdr.vmx.vmcs12_pa, -1ull);
+ TEST_ASSERT_EQ(state->flags, 0);
+
+ free(state);
+}
+
+static void vcpu_efer_enable_svm(struct kvm_vcpu *vcpu)
+{
+ uint64_t old_efer = vcpu_get_msr(vcpu, MSR_EFER);
+
+ vcpu_set_msr(vcpu, MSR_EFER, old_efer | EFER_SVME);
+}
+
+static void vcpu_efer_disable_svm(struct kvm_vcpu *vcpu)
+{
+ uint64_t old_efer = vcpu_get_msr(vcpu, MSR_EFER);
+
+ vcpu_set_msr(vcpu, MSR_EFER, old_efer & ~EFER_SVME);
+}
+
+void set_default_svm_state(struct kvm_nested_state *state, int size)
+{
+ memset(state, 0, size);
+ state->format = 1;
+ state->size = size;
+ state->hdr.svm.vmcb_pa = 0x3000;
+}
+
+void test_svm_nested_state(struct kvm_vcpu *vcpu)
+{
+ /* Add a page for VMCB. */
+ const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
+ struct kvm_nested_state *state =
+ (struct kvm_nested_state *)malloc(state_sz);
+
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_SVM);
+
+ /* The format must be set to 1. 0 for VMX, 1 for SVM. */
+ set_default_svm_state(state, state_sz);
+ state->format = 0;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /* Invalid flags are rejected, KVM_STATE_NESTED_EVMCS is VMX-only */
+ set_default_svm_state(state, state_sz);
+ state->flags = KVM_STATE_NESTED_EVMCS;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * If EFER.SVME is clear, guest mode is disallowed and GIF can be set or
+ * cleared.
+ */
+ vcpu_efer_disable_svm(vcpu);
+
+ set_default_svm_state(state, state_sz);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+
+ state->flags = KVM_STATE_NESTED_GIF_SET;
+ test_nested_state(vcpu, state);
+
+ /* Enable SVM in the guest EFER. */
+ vcpu_efer_enable_svm(vcpu);
+
+ /* Setting vmcb_pa to a non-aligned address is only fine when not entering guest mode */
+ set_default_svm_state(state, state_sz);
+ state->hdr.svm.vmcb_pa = -1ull;
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * Size must be large enough to fit kvm_nested_state and VMCB
+ * only when entering guest mode.
+ */
+ set_default_svm_state(state, state_sz/2);
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * Test that if we leave nesting the state reflects that when we get it
+ * again, except for vmcb_pa, which is always returned as 0 when not in
+ * guest mode.
+ */
+ set_default_svm_state(state, state_sz);
+ state->hdr.svm.vmcb_pa = -1ull;
+ state->flags = KVM_STATE_NESTED_GIF_SET;
+ test_nested_state(vcpu, state);
+ vcpu_nested_state_get(vcpu, state);
+ TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
+ "Size must be between %ld and %d. The size returned was %d.",
+ sizeof(*state), state_sz, state->size);
+
+ TEST_ASSERT_EQ(state->hdr.svm.vmcb_pa, 0);
+ TEST_ASSERT_EQ(state->flags, KVM_STATE_NESTED_GIF_SET);
free(state);
}
@@ -255,20 +354,20 @@ int main(int argc, char *argv[])
have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
- /*
- * AMD currently does not implement set_nested_state, so for now we
- * just early out.
- */
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
vm = vm_create_with_one_vcpu(&vcpu, NULL);
/*
- * First run tests with VMX disabled to check error handling.
+ * First run tests with VMX/SVM disabled to check error handling.
+ * test_{vmx/svm}_nested_state() will re-enable as needed.
*/
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+ else
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_SVM);
/* Passing a NULL kvm_nested_state causes a EFAULT. */
test_nested_state_expect_efault(vcpu, NULL);
@@ -297,7 +396,10 @@ int main(int argc, char *argv[])
state.flags = KVM_STATE_NESTED_RUN_PENDING;
test_nested_state_expect_einval(vcpu, &state);
- test_vmx_nested_state(vcpu);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ test_vmx_nested_state(vcpu);
+ else
+ test_svm_nested_state(vcpu);
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c
new file mode 100644
index 000000000000..6764a48f9d4d
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google LLC.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+/*
+ * Allocate two VMCB pages for testing. Both pages have different GVAs (shared
+ * by both L1 and L2) and L1 GPAs. A single L2 GPA is used such that:
+ * - L2 GPA == L1 GPA for VMCB0.
+ * - L2 GPA is mapped to L1 GPA for VMCB1 using NPT in L1.
+ *
+ * This allows testing whether the GPA used by VMSAVE/VMLOAD in L2 is
+ * interpreted as a direct L1 GPA or translated using NPT as an L2 GPA, depends
+ * on which VMCB is accessed.
+ */
+#define TEST_MEM_SLOT_INDEX 1
+#define TEST_MEM_PAGES 2
+#define TEST_MEM_BASE 0xc0000000
+
+#define TEST_GUEST_ADDR(idx) (TEST_MEM_BASE + (idx) * PAGE_SIZE)
+
+#define TEST_VMCB_L1_GPA(idx) TEST_GUEST_ADDR(idx)
+#define TEST_VMCB_GVA(idx) TEST_GUEST_ADDR(idx)
+
+#define TEST_VMCB_L2_GPA TEST_VMCB_L1_GPA(0)
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code_vmsave(void)
+{
+ asm volatile("vmsave %0" : : "a"(TEST_VMCB_L2_GPA) : "memory");
+}
+
+static void l2_guest_code_vmload(void)
+{
+ asm volatile("vmload %0" : : "a"(TEST_VMCB_L2_GPA) : "memory");
+}
+
+static void l2_guest_code_vmcb(int vmcb_idx)
+{
+ wrmsr(MSR_KERNEL_GS_BASE, 0xaaaa);
+ l2_guest_code_vmsave();
+
+ /* Verify the VMCB used by VMSAVE and update KERNEL_GS_BASE to 0xbbbb */
+ GUEST_SYNC(vmcb_idx);
+
+ l2_guest_code_vmload();
+ GUEST_ASSERT_EQ(rdmsr(MSR_KERNEL_GS_BASE), 0xbbbb);
+
+ /* Reset MSR_KERNEL_GS_BASE */
+ wrmsr(MSR_KERNEL_GS_BASE, 0);
+ l2_guest_code_vmsave();
+
+ vmmcall();
+}
+
+static void l2_guest_code_vmcb0(void)
+{
+ l2_guest_code_vmcb(0);
+}
+
+static void l2_guest_code_vmcb1(void)
+{
+ l2_guest_code_vmcb(1);
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* Each test case initializes the guest RIP below */
+ generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Set VMSAVE/VMLOAD intercepts and make sure they work with.. */
+ svm->vmcb->control.intercept |= (BIT_ULL(INTERCEPT_VMSAVE) |
+ BIT_ULL(INTERCEPT_VMLOAD));
+
+ /* ..VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK cleared.. */
+ svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmsave;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE);
+
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmload;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD);
+
+ /* ..and VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK set */
+ svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmsave;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE);
+
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmload;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD);
+
+ /* Now clear the intercepts to test VMSAVE/VMLOAD behavior */
+ svm->vmcb->control.intercept &= ~(BIT_ULL(INTERCEPT_VMSAVE) |
+ BIT_ULL(INTERCEPT_VMLOAD));
+
+ /*
+ * Without VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK, the GPA will be
+ * interpreted as an L1 GPA, so VMCB0 should be used.
+ */
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmcb0;
+ svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+
+ /*
+ * With VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK, the GPA will be interpeted as
+ * an L2 GPA, and translated through the NPT to VMCB1.
+ */
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmcb1;
+ svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_gva = 0;
+ struct vmcb *test_vmcb[2];
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int i;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_NPT));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_enable_tdp(vm);
+
+ vcpu_alloc_svm(vm, &nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ TEST_MEM_BASE, TEST_MEM_SLOT_INDEX,
+ TEST_MEM_PAGES, 0);
+
+ for (i = 0; i <= 1; i++) {
+ virt_map(vm, TEST_VMCB_GVA(i), TEST_VMCB_L1_GPA(i), 1);
+ test_vmcb[i] = (struct vmcb *)addr_gva2hva(vm, TEST_VMCB_GVA(i));
+ }
+
+ tdp_identity_map_default_memslots(vm);
+
+ /*
+ * L2 GPA == L1_GPA(0), but map it to L1_GPA(1), to allow testing
+ * whether the L2 GPA is interpreted as an L1 GPA or translated through
+ * the NPT.
+ */
+ TEST_ASSERT_EQ(TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(0));
+ tdp_map(vm, TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(1), PAGE_SIZE);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ i = uc.args[1];
+ TEST_ASSERT(i == 0 || i == 1, "Unexpected VMCB idx: %d", i);
+
+ /*
+ * Check that only the expected VMCB has KERNEL_GS_BASE
+ * set to 0xaaaa, and update it to 0xbbbb.
+ */
+ TEST_ASSERT_EQ(test_vmcb[i]->save.kernel_gs_base, 0xaaaa);
+ TEST_ASSERT_EQ(test_vmcb[1-i]->save.kernel_gs_base, 0);
+ test_vmcb[i]->save.kernel_gs_base = 0xbbbb;
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
index fabeeaddfb3a..0e8aec568010 100644
--- a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
@@ -47,7 +47,6 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
- uint64_t *pte;
uint64_t *hva;
uint64_t gpa;
int rc;
@@ -73,8 +72,7 @@ int main(int argc, char *argv[])
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
memset(hva, 0, PAGE_SIZE);
- pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
- *pte |= BIT_ULL(MAXPHYADDR);
+ *vm_get_pte(vm, MEM_REGION_GVA) |= BIT_ULL(MAXPHYADDR);
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
index 7b6481d6c0d3..4bd1655f9e6d 100644
--- a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
@@ -103,7 +103,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
run_guest(vmcb, svm->vmcb_gpa);
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
- "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ "Expected VMMCAL #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
vmcb->control.exit_code,
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
@@ -133,7 +133,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
run_guest(vmcb, svm->vmcb_gpa);
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
- "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ "Expected HLT #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
vmcb->control.exit_code,
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
diff --git a/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c
new file mode 100644
index 000000000000..337c53fddeff
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define GOOD_IPI_VECTOR 0xe0
+#define BAD_IPI_VECTOR 0xf0
+
+static volatile int good_ipis_received;
+
+static void good_ipi_handler(struct ex_regs *regs)
+{
+ good_ipis_received++;
+}
+
+static void bad_ipi_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Received \"bad\" IPI; ICR MMIO write should have been ignored");
+}
+
+static void l2_guest_code(void)
+{
+ x2apic_enable();
+ vmcall();
+
+ xapic_enable();
+ xapic_write_reg(APIC_ID, 1 << 24);
+ vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+ /* Modify APIC ID to coerce KVM into inhibiting APICv. */
+ xapic_enable();
+ xapic_write_reg(APIC_ID, 1 << 24);
+
+ /*
+ * Generate+receive an IRQ without doing EOI to get an IRQ set in vISR
+ * but not SVI. APICv should be inhibited due to running with a
+ * modified APIC ID.
+ */
+ xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR);
+ GUEST_ASSERT_EQ(xapic_read_reg(APIC_ID), 1 << 24);
+
+ /* Enable IRQs and verify the IRQ was received. */
+ sti_nop();
+ GUEST_ASSERT_EQ(good_ipis_received, 1);
+
+ /*
+ * Run L2 to switch to x2APIC mode, which in turn will uninhibit APICv,
+ * as KVM should force the APIC ID back to its default.
+ */
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+ GUEST_ASSERT(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD);
+
+ /*
+ * Scribble the APIC access page to verify KVM disabled xAPIC
+ * virtualization in vmcs01, and to verify that KVM flushes L1's TLB
+ * when L2 switches back to accelerated xAPIC mode.
+ */
+ xapic_write_reg(APIC_ICR2, 0xdeadbeefu);
+ xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | BAD_IPI_VECTOR);
+
+ /*
+ * Verify the IRQ is still in-service and emit an EOI to verify KVM
+ * propagates the highest vISR vector to SVI when APICv is activated
+ * (and does so even if APICv was uninhibited while L2 was active).
+ */
+ GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)),
+ BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR)));
+ x2apic_write_reg(APIC_EOI, 0);
+ GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0);
+
+ /*
+ * Run L2 one more time to switch back to xAPIC mode to verify that KVM
+ * handles the x2APIC => xAPIC transition and inhibits APICv while L2
+ * is active.
+ */
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_ASSERT(!(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD));
+
+ xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR);
+ /* Re-enable IRQs, as VM-Exit clears RFLAGS.IF. */
+ sti_nop();
+ GUEST_ASSERT_EQ(good_ipis_received, 2);
+
+ GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)),
+ BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR)));
+ xapic_write_reg(APIC_EOI, 0);
+ GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva;
+ struct vmx_pages *vmx;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ prepare_virtualize_apic_accesses(vmx, vm);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+ vm_install_exception_handler(vm, BAD_IPI_VECTOR, bad_ipi_handler);
+ vm_install_exception_handler(vm, GOOD_IPI_VECTOR, good_ipi_handler);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall %lu", uc.cmd);
+ }
+
+ /*
+ * Verify at least two IRQs were injected. Unfortunately, KVM counts
+ * re-injected IRQs (e.g. if delivering the IRQ hits an EPT violation),
+ * so being more precise isn't possible given the current stats.
+ */
+ TEST_ASSERT(vcpu_get_stat(vcpu, irq_injections) >= 2,
+ "Wanted at least 2 IRQ injections, got %lu\n",
+ vcpu_get_stat(vcpu, irq_injections));
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
deleted file mode 100644
index 98cb6bdab3e6..000000000000
--- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
+++ /dev/null
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM dirty page logging test
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-/* The memory slot index to track dirty pages */
-#define TEST_MEM_SLOT_INDEX 1
-#define TEST_MEM_PAGES 3
-
-/* L1 guest test virtual memory offset */
-#define GUEST_TEST_MEM 0xc0000000
-
-/* L2 guest test virtual memory offset */
-#define NESTED_TEST_MEM1 0xc0001000
-#define NESTED_TEST_MEM2 0xc0002000
-
-static void l2_guest_code(u64 *a, u64 *b)
-{
- READ_ONCE(*a);
- WRITE_ONCE(*a, 1);
- GUEST_SYNC(true);
- GUEST_SYNC(false);
-
- WRITE_ONCE(*b, 1);
- GUEST_SYNC(true);
- WRITE_ONCE(*b, 1);
- GUEST_SYNC(true);
- GUEST_SYNC(false);
-
- /* Exit to L1 and never come back. */
- vmcall();
-}
-
-static void l2_guest_code_ept_enabled(void)
-{
- l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
-}
-
-static void l2_guest_code_ept_disabled(void)
-{
- /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
- l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
-}
-
-void l1_guest_code(struct vmx_pages *vmx)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- void *l2_rip;
-
- GUEST_ASSERT(vmx->vmcs_gpa);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx));
- GUEST_ASSERT(load_vmcs(vmx));
-
- if (vmx->eptp_gpa)
- l2_rip = l2_guest_code_ept_enabled;
- else
- l2_rip = l2_guest_code_ept_disabled;
-
- prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_SYNC(false);
- GUEST_ASSERT(!vmlaunch());
- GUEST_SYNC(false);
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- GUEST_DONE();
-}
-
-static void test_vmx_dirty_log(bool enable_ept)
-{
- vm_vaddr_t vmx_pages_gva = 0;
- struct vmx_pages *vmx;
- unsigned long *bmap;
- uint64_t *host_test_mem;
-
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- bool done = false;
-
- pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
-
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
- /* Add an extra memory slot for testing dirty logging */
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- GUEST_TEST_MEM,
- TEST_MEM_SLOT_INDEX,
- TEST_MEM_PAGES,
- KVM_MEM_LOG_DIRTY_PAGES);
-
- /*
- * Add an identity map for GVA range [0xc0000000, 0xc0002000). This
- * affects both L1 and L2. However...
- */
- virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
-
- /*
- * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
- * 0xc0000000.
- *
- * Note that prepare_eptp should be called only L1's GPA map is done,
- * meaning after the last call to virt_map.
- *
- * When EPT is disabled, the L2 guest code will still access the same L1
- * GPAs as the EPT enabled case.
- */
- if (enable_ept) {
- prepare_eptp(vmx, vm);
- nested_map_memslot(vmx, vm, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE);
- }
-
- bmap = bitmap_zalloc(TEST_MEM_PAGES);
- host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
-
- while (!done) {
- memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE);
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- /*
- * The nested guest wrote at offset 0x1000 in the memslot, but the
- * dirty bitmap must be filled in according to L1 GPA, not L2.
- */
- kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
- if (uc.args[1]) {
- TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
- TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
- } else {
- TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
- }
-
- TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
- TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
- break;
- case UCALL_DONE:
- done = true;
- break;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-}
-
-int main(int argc, char *argv[])
-{
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
- test_vmx_dirty_log(/*enable_ept=*/false);
-
- if (kvm_cpu_has_ept())
- test_vmx_dirty_log(/*enable_ept=*/true);
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
index cf1d2d1f2a8f..915c42001dba 100644
--- a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
@@ -90,7 +90,7 @@ int main(int argc, char *argv[])
* L1 needs to read its own PML5 table to set up L2. Identity map
* the PML5 table to facilitate this.
*/
- virt_map(vm, vm->pgd, vm->pgd, 1);
+ virt_map(vm, vm->mmu.pgd, vm->mmu.pgd, 1);
vcpu_alloc_vmx(vm, &vmx_pages_gva);
vcpu_args_set(vcpu, 1, vmx_pages_gva);
diff --git a/tools/testing/selftests/kvm/x86/xapic_tpr_test.c b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c
new file mode 100644
index 000000000000..3862134d9d40
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdatomic.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+static bool is_x2apic;
+
+#define IRQ_VECTOR 0x20
+
+/* See also the comment at similar assertion in memslot_perf_test.c */
+static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
+
+static atomic_uint tpr_guest_irq_sync_val;
+
+static void tpr_guest_irq_sync_flag_reset(void)
+{
+ atomic_store_explicit(&tpr_guest_irq_sync_val, 0,
+ memory_order_release);
+}
+
+static unsigned int tpr_guest_irq_sync_val_get(void)
+{
+ return atomic_load_explicit(&tpr_guest_irq_sync_val,
+ memory_order_acquire);
+}
+
+static void tpr_guest_irq_sync_val_inc(void)
+{
+ atomic_fetch_add_explicit(&tpr_guest_irq_sync_val, 1,
+ memory_order_acq_rel);
+}
+
+static void tpr_guest_irq_handler_xapic(struct ex_regs *regs)
+{
+ tpr_guest_irq_sync_val_inc();
+
+ xapic_write_reg(APIC_EOI, 0);
+}
+
+static void tpr_guest_irq_handler_x2apic(struct ex_regs *regs)
+{
+ tpr_guest_irq_sync_val_inc();
+
+ x2apic_write_reg(APIC_EOI, 0);
+}
+
+static void tpr_guest_irq_queue(void)
+{
+ if (is_x2apic) {
+ x2apic_write_reg(APIC_SELF_IPI, IRQ_VECTOR);
+ } else {
+ uint32_t icr, icr2;
+
+ icr = APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED |
+ IRQ_VECTOR;
+ icr2 = 0;
+
+ xapic_write_reg(APIC_ICR2, icr2);
+ xapic_write_reg(APIC_ICR, icr);
+ }
+}
+
+static uint8_t tpr_guest_tpr_get(void)
+{
+ uint32_t taskpri;
+
+ if (is_x2apic)
+ taskpri = x2apic_read_reg(APIC_TASKPRI);
+ else
+ taskpri = xapic_read_reg(APIC_TASKPRI);
+
+ return GET_APIC_PRI(taskpri);
+}
+
+static uint8_t tpr_guest_ppr_get(void)
+{
+ uint32_t procpri;
+
+ if (is_x2apic)
+ procpri = x2apic_read_reg(APIC_PROCPRI);
+ else
+ procpri = xapic_read_reg(APIC_PROCPRI);
+
+ return GET_APIC_PRI(procpri);
+}
+
+static uint8_t tpr_guest_cr8_get(void)
+{
+ uint64_t cr8;
+
+ asm volatile ("mov %%cr8, %[cr8]\n\t" : [cr8] "=r"(cr8));
+
+ return cr8 & GENMASK(3, 0);
+}
+
+static void tpr_guest_check_tpr_ppr_cr8_equal(void)
+{
+ uint8_t tpr;
+
+ tpr = tpr_guest_tpr_get();
+
+ GUEST_ASSERT_EQ(tpr_guest_ppr_get(), tpr);
+ GUEST_ASSERT_EQ(tpr_guest_cr8_get(), tpr);
+}
+
+static void tpr_guest_code(void)
+{
+ cli();
+
+ if (is_x2apic)
+ x2apic_enable();
+ else
+ xapic_enable();
+
+ GUEST_ASSERT_EQ(tpr_guest_tpr_get(), 0);
+ tpr_guest_check_tpr_ppr_cr8_equal();
+
+ tpr_guest_irq_queue();
+
+ /* TPR = 0 but IRQ masked by IF=0, should not fire */
+ udelay(1000);
+ GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 0);
+
+ sti();
+
+ /* IF=1 now, IRQ should fire */
+ while (tpr_guest_irq_sync_val_get() == 0)
+ cpu_relax();
+ GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1);
+
+ GUEST_SYNC(true);
+ tpr_guest_check_tpr_ppr_cr8_equal();
+
+ tpr_guest_irq_queue();
+
+ /* IRQ masked by barely high enough TPR now, should not fire */
+ udelay(1000);
+ GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1);
+
+ GUEST_SYNC(false);
+ tpr_guest_check_tpr_ppr_cr8_equal();
+
+ /* TPR barely low enough now to unmask IRQ, should fire */
+ while (tpr_guest_irq_sync_val_get() == 1)
+ cpu_relax();
+ GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 2);
+
+ GUEST_DONE();
+}
+
+static uint8_t lapic_tpr_get(struct kvm_lapic_state *xapic)
+{
+ return GET_APIC_PRI(*((u32 *)&xapic->regs[APIC_TASKPRI]));
+}
+
+static void lapic_tpr_set(struct kvm_lapic_state *xapic, uint8_t val)
+{
+ u32 *taskpri = (u32 *)&xapic->regs[APIC_TASKPRI];
+
+ *taskpri = SET_APIC_PRI(*taskpri, val);
+}
+
+static uint8_t sregs_tpr(struct kvm_sregs *sregs)
+{
+ return sregs->cr8 & GENMASK(3, 0);
+}
+
+static void test_tpr_check_tpr_zero(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic_state xapic;
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+ TEST_ASSERT_EQ(lapic_tpr_get(&xapic), 0);
+}
+
+static void test_tpr_check_tpr_cr8_equal(struct kvm_vcpu *vcpu)
+{
+ struct kvm_sregs sregs;
+ struct kvm_lapic_state xapic;
+
+ vcpu_sregs_get(vcpu, &sregs);
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+ TEST_ASSERT_EQ(sregs_tpr(&sregs), lapic_tpr_get(&xapic));
+}
+
+static void test_tpr_set_tpr_for_irq(struct kvm_vcpu *vcpu, bool mask)
+{
+ struct kvm_lapic_state xapic;
+ uint8_t tpr;
+
+ static_assert(IRQ_VECTOR >= 16, "invalid IRQ vector number");
+ tpr = IRQ_VECTOR / 16;
+ if (!mask)
+ tpr--;
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+ lapic_tpr_set(&xapic, tpr);
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
+}
+
+static void test_tpr(bool __is_x2apic)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ bool done = false;
+
+ is_x2apic = __is_x2apic;
+
+ vm = vm_create_with_one_vcpu(&vcpu, tpr_guest_code);
+ if (is_x2apic) {
+ vm_install_exception_handler(vm, IRQ_VECTOR,
+ tpr_guest_irq_handler_x2apic);
+ } else {
+ vm_install_exception_handler(vm, IRQ_VECTOR,
+ tpr_guest_irq_handler_xapic);
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_X2APIC);
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+ }
+
+ sync_global_to_guest(vcpu->vm, is_x2apic);
+
+ /* According to the SDM/APM the TPR value at reset is 0 */
+ test_tpr_check_tpr_zero(vcpu);
+ test_tpr_check_tpr_cr8_equal(vcpu);
+
+ tpr_guest_irq_sync_flag_reset();
+ sync_global_to_guest(vcpu->vm, tpr_guest_irq_sync_val);
+
+ while (!done) {
+ struct ucall uc;
+
+ alarm(2);
+ vcpu_run(vcpu);
+ alarm(0);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ test_tpr_check_tpr_cr8_equal(vcpu);
+ done = true;
+ break;
+ case UCALL_SYNC:
+ test_tpr_check_tpr_cr8_equal(vcpu);
+ test_tpr_set_tpr_for_irq(vcpu, uc.args[1]);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall result 0x%lx", uc.cmd);
+ break;
+ }
+ }
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ /*
+ * Use separate VMs for the xAPIC and x2APIC tests so that x2APIC can
+ * be fully hidden from the guest. KVM disallows changing CPUID after
+ * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
+ */
+ test_tpr(false);
+ test_tpr(true);
+}
diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore
index a820329cae0d..1974e17a2611 100644
--- a/tools/testing/selftests/landlock/.gitignore
+++ b/tools/testing/selftests/landlock/.gitignore
@@ -1,4 +1,5 @@
/*_test
+/fs_bench
/sandbox-and-launch
/true
/wait-pipe
diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile
index 044b83bde16e..fc43225d319a 100644
--- a/tools/testing/selftests/landlock/Makefile
+++ b/tools/testing/selftests/landlock/Makefile
@@ -9,6 +9,7 @@ LOCAL_HDRS += $(wildcard *.h)
src_test := $(wildcard *_test.c)
TEST_GEN_PROGS := $(src_test:.c=)
+TEST_GEN_PROGS += fs_bench
TEST_GEN_PROGS_EXTENDED := \
true \
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 7b69002239d7..0fea236ef4bd 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -76,7 +76,7 @@ TEST(abi_version)
const struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
};
- ASSERT_EQ(7, landlock_create_ruleset(NULL, 0,
+ ASSERT_EQ(8, landlock_create_ruleset(NULL, 0,
LANDLOCK_CREATE_RULESET_VERSION));
ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
@@ -288,7 +288,7 @@ TEST(restrict_self_fd)
EXPECT_EQ(EBADFD, errno);
}
-TEST(restrict_self_fd_flags)
+TEST(restrict_self_fd_logging_flags)
{
int fd;
@@ -304,9 +304,9 @@ TEST(restrict_self_fd_flags)
EXPECT_EQ(EBADFD, errno);
}
-TEST(restrict_self_flags)
+TEST(restrict_self_logging_flags)
{
- const __u32 last_flag = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF;
+ const __u32 last_flag = LANDLOCK_RESTRICT_SELF_TSYNC;
/* Tests invalid flag combinations. */
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index 230b75f6015b..90551650299c 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -237,6 +237,7 @@ struct service_fixture {
struct sockaddr_un unix_addr;
socklen_t unix_addr_len;
};
+ struct sockaddr_storage _largest;
};
};
diff --git a/tools/testing/selftests/landlock/fs_bench.c b/tools/testing/selftests/landlock/fs_bench.c
new file mode 100644
index 000000000000..d13a88dcd1ed
--- /dev/null
+++ b/tools/testing/selftests/landlock/fs_bench.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock filesystem benchmark
+ *
+ * This program benchmarks the time required for file access checks. We use a
+ * large number (-d flag) of nested directories where each directory inode has
+ * an associated Landlock rule, and we repeatedly (-n flag) exercise a file
+ * access for which Landlock has to walk the path all the way up to the root.
+ *
+ * With an increasing number of nested subdirectories, Landlock's portion of the
+ * overall system call time increases, which makes the effects of Landlock
+ * refactorings more measurable.
+ *
+ * This benchmark does *not* measure the building of the Landlock ruleset. The
+ * time required to add all these rules is not large enough to be easily
+ * measurable. A separate benchmark tool would be better to test that, and that
+ * tool could then also use a simpler file system layout.
+ *
+ * Copyright © 2026 Google LLC
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <linux/prctl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/times.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "wrappers.h"
+
+static void usage(const char *const argv0)
+{
+ printf("Usage:\n");
+ printf(" %s [OPTIONS]\n", argv0);
+ printf("\n");
+ printf(" Benchmark expensive Landlock checks for D nested dirs\n");
+ printf("\n");
+ printf("Options:\n");
+ printf(" -h help\n");
+ printf(" -L disable Landlock (as a baseline)\n");
+ printf(" -d D set directory depth to D\n");
+ printf(" -n N set number of benchmark iterations to N\n");
+}
+
+/*
+ * Build a deep directory, enforce Landlock and return the FD to the
+ * deepest dir. On any failure, exit the process with an error.
+ */
+static int build_directory(size_t depth, const bool use_landlock)
+{
+ const char *path = "d"; /* directory name */
+ int abi, ruleset_fd, curr, prev;
+
+ if (use_landlock) {
+ abi = landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_VERSION);
+ if (abi < 7)
+ err(1, "Landlock ABI too low: got %d, wanted 7+", abi);
+ }
+
+ ruleset_fd = -1;
+ if (use_landlock) {
+ struct landlock_ruleset_attr attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV |
+ LANDLOCK_ACCESS_FS_WRITE_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ };
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0U);
+ if (ruleset_fd < 0)
+ err(1, "landlock_create_ruleset");
+ }
+
+ curr = open(".", O_PATH);
+ if (curr < 0)
+ err(1, "open(.)");
+
+ while (depth--) {
+ if (use_landlock) {
+ struct landlock_path_beneath_attr attr = {
+ .allowed_access = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .parent_fd = curr,
+ };
+ if (landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_PATH_BENEATH, &attr,
+ 0) < 0)
+ err(1, "landlock_add_rule");
+ }
+
+ if (mkdirat(curr, path, 0700) < 0)
+ err(1, "mkdirat(%s)", path);
+
+ prev = curr;
+ curr = openat(curr, path, O_PATH);
+ if (curr < 0)
+ err(1, "openat(%s)", path);
+
+ close(prev);
+ }
+
+ if (use_landlock) {
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0)
+ err(1, "prctl");
+
+ if (landlock_restrict_self(ruleset_fd, 0) < 0)
+ err(1, "landlock_restrict_self");
+ }
+
+ close(ruleset_fd);
+ return curr;
+}
+
+static void remove_recursively(const size_t depth)
+{
+ const char *path = "d"; /* directory name */
+
+ int fd = openat(AT_FDCWD, ".", O_PATH);
+
+ if (fd < 0)
+ err(1, "openat(.)");
+
+ for (size_t i = 0; i < depth - 1; i++) {
+ int oldfd = fd;
+
+ fd = openat(fd, path, O_PATH);
+ if (fd < 0)
+ err(1, "openat(%s)", path);
+ close(oldfd);
+ }
+
+ for (size_t i = 0; i < depth; i++) {
+ if (unlinkat(fd, path, AT_REMOVEDIR) < 0)
+ err(1, "unlinkat(%s)", path);
+ int newfd = openat(fd, "..", O_PATH);
+
+ close(fd);
+ fd = newfd;
+ }
+ close(fd);
+}
+
+int main(int argc, char *argv[])
+{
+ bool use_landlock = true;
+ size_t num_iterations = 100000;
+ size_t num_subdirs = 10000;
+ int c, curr, fd;
+ struct tms start_time, end_time;
+
+ setbuf(stdout, NULL);
+ while ((c = getopt(argc, argv, "hLd:n:")) != -1) {
+ switch (c) {
+ case 'h':
+ usage(argv[0]);
+ return EXIT_SUCCESS;
+ case 'L':
+ use_landlock = false;
+ break;
+ case 'd':
+ num_subdirs = atoi(optarg);
+ break;
+ case 'n':
+ num_iterations = atoi(optarg);
+ break;
+ default:
+ usage(argv[0]);
+ return EXIT_FAILURE;
+ }
+ }
+
+ printf("*** Benchmark ***\n");
+ printf("%zu dirs, %zu iterations, %s Landlock\n", num_subdirs,
+ num_iterations, use_landlock ? "with" : "without");
+
+ if (times(&start_time) == -1)
+ err(1, "times");
+
+ curr = build_directory(num_subdirs, use_landlock);
+
+ for (int i = 0; i < num_iterations; i++) {
+ fd = openat(curr, "file.txt", O_CREAT | O_TRUNC | O_WRONLY,
+ 0600);
+ if (use_landlock) {
+ if (fd == 0)
+ errx(1, "openat succeeded, expected EACCES");
+ if (errno != EACCES)
+ err(1, "openat expected EACCES, but got");
+ }
+ if (fd != -1)
+ close(fd);
+ }
+
+ if (times(&end_time) == -1)
+ err(1, "times");
+
+ printf("*** Benchmark concluded ***\n");
+ printf("System: %ld clocks\n",
+ end_time.tms_stime - start_time.tms_stime);
+ printf("User : %ld clocks\n",
+ end_time.tms_utime - start_time.tms_utime);
+ printf("Clocks per second: %ld\n", CLOCKS_PER_SEC);
+
+ close(curr);
+
+ remove_recursively(num_subdirs);
+}
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index eee814e09dd7..968a91c927a4 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -4362,22 +4362,24 @@ TEST_F_FORK(layout1, named_unix_domain_socket_ioctl)
{
const char *const path = file1_s1d1;
int srv_fd, cli_fd, ruleset_fd;
- socklen_t size;
- struct sockaddr_un srv_un, cli_un;
+ struct sockaddr_un srv_un = {
+ .sun_family = AF_UNIX,
+ };
+ struct sockaddr_un cli_un = {
+ .sun_family = AF_UNIX,
+ };
const struct landlock_ruleset_attr attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV,
};
/* Sets up a server */
- srv_un.sun_family = AF_UNIX;
- strncpy(srv_un.sun_path, path, sizeof(srv_un.sun_path));
-
ASSERT_EQ(0, unlink(path));
srv_fd = socket(AF_UNIX, SOCK_STREAM, 0);
ASSERT_LE(0, srv_fd);
- size = offsetof(struct sockaddr_un, sun_path) + strlen(srv_un.sun_path);
- ASSERT_EQ(0, bind(srv_fd, (struct sockaddr *)&srv_un, size));
+ strncpy(srv_un.sun_path, path, sizeof(srv_un.sun_path));
+ ASSERT_EQ(0, bind(srv_fd, (struct sockaddr *)&srv_un, sizeof(srv_un)));
+
ASSERT_EQ(0, listen(srv_fd, 10 /* qlen */));
/* Enables Landlock. */
@@ -4387,24 +4389,18 @@ TEST_F_FORK(layout1, named_unix_domain_socket_ioctl)
ASSERT_EQ(0, close(ruleset_fd));
/* Sets up a client connection to it */
- cli_un.sun_family = AF_UNIX;
cli_fd = socket(AF_UNIX, SOCK_STREAM, 0);
ASSERT_LE(0, cli_fd);
- size = offsetof(struct sockaddr_un, sun_path) + strlen(cli_un.sun_path);
- ASSERT_EQ(0, bind(cli_fd, (struct sockaddr *)&cli_un, size));
-
- bzero(&cli_un, sizeof(cli_un));
- cli_un.sun_family = AF_UNIX;
strncpy(cli_un.sun_path, path, sizeof(cli_un.sun_path));
- size = offsetof(struct sockaddr_un, sun_path) + strlen(cli_un.sun_path);
-
- ASSERT_EQ(0, connect(cli_fd, (struct sockaddr *)&cli_un, size));
+ ASSERT_EQ(0,
+ connect(cli_fd, (struct sockaddr *)&cli_un, sizeof(cli_un)));
/* FIONREAD and other IOCTLs should not be forbidden. */
EXPECT_EQ(0, test_fionread_ioctl(cli_fd));
- ASSERT_EQ(0, close(cli_fd));
+ EXPECT_EQ(0, close(cli_fd));
+ EXPECT_EQ(0, close(srv_fd));
}
/* clang-format off */
@@ -7074,8 +7070,8 @@ static int matches_log_fs_extra(struct __test_metadata *const _metadata,
return -E2BIG;
/*
- * It is assume that absolute_path does not contain control characters nor
- * spaces, see audit_string_contains_control().
+ * It is assumed that absolute_path does not contain control
+ * characters nor spaces, see audit_string_contains_control().
*/
absolute_path = realpath(path, NULL);
if (!absolute_path)
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index 2a45208551e6..b34b139b3f89 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -121,6 +121,10 @@ static socklen_t get_addrlen(const struct service_fixture *const srv,
{
switch (srv->protocol.domain) {
case AF_UNSPEC:
+ if (minimal)
+ return sizeof(sa_family_t);
+ return sizeof(struct sockaddr_storage);
+
case AF_INET:
return sizeof(srv->ipv4_addr);
@@ -758,6 +762,11 @@ TEST_F(protocol, bind_unspec)
bind_fd = socket_variant(&self->srv0);
ASSERT_LE(0, bind_fd);
+ /* Tries to bind with too small addrlen. */
+ EXPECT_EQ(-EINVAL, bind_variant_addrlen(
+ bind_fd, &self->unspec_any0,
+ get_addrlen(&self->unspec_any0, true) - 1));
+
/* Allowed bind on AF_UNSPEC/INADDR_ANY. */
ret = bind_variant(bind_fd, &self->unspec_any0);
if (variant->prot.domain == AF_INET) {
@@ -766,6 +775,8 @@ TEST_F(protocol, bind_unspec)
TH_LOG("Failed to bind to unspec/any socket: %s",
strerror(errno));
}
+ } else if (variant->prot.domain == AF_INET6) {
+ EXPECT_EQ(-EAFNOSUPPORT, ret);
} else {
EXPECT_EQ(-EINVAL, ret);
}
@@ -792,6 +803,8 @@ TEST_F(protocol, bind_unspec)
} else {
EXPECT_EQ(0, ret);
}
+ } else if (variant->prot.domain == AF_INET6) {
+ EXPECT_EQ(-EAFNOSUPPORT, ret);
} else {
EXPECT_EQ(-EINVAL, ret);
}
@@ -801,7 +814,8 @@ TEST_F(protocol, bind_unspec)
bind_fd = socket_variant(&self->srv0);
ASSERT_LE(0, bind_fd);
ret = bind_variant(bind_fd, &self->unspec_srv0);
- if (variant->prot.domain == AF_INET) {
+ if (variant->prot.domain == AF_INET ||
+ variant->prot.domain == AF_INET6) {
EXPECT_EQ(-EAFNOSUPPORT, ret);
} else {
EXPECT_EQ(-EINVAL, ret)
@@ -892,7 +906,19 @@ TEST_F(protocol, connect_unspec)
EXPECT_EQ(0, close(ruleset_fd));
}
- ret = connect_variant(connect_fd, &self->unspec_any0);
+ /* Try to re-disconnect with a truncated address struct. */
+ EXPECT_EQ(-EINVAL,
+ connect_variant_addrlen(
+ connect_fd, &self->unspec_any0,
+ get_addrlen(&self->unspec_any0, true) - 1));
+
+ /*
+ * Re-disconnect, with a minimal sockaddr struct (just a
+ * bare af_family=AF_UNSPEC field).
+ */
+ ret = connect_variant_addrlen(connect_fd, &self->unspec_any0,
+ get_addrlen(&self->unspec_any0,
+ true));
if (self->srv0.protocol.domain == AF_UNIX &&
self->srv0.protocol.type == SOCK_STREAM) {
EXPECT_EQ(-EINVAL, ret);
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
index 4e356334ecb7..4f64c90583cd 100644
--- a/tools/testing/selftests/landlock/ptrace_test.c
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -86,16 +86,9 @@ static int get_yama_ptrace_scope(void)
}
/* clang-format off */
-FIXTURE(hierarchy) {};
+FIXTURE(scoped_domains) {};
/* clang-format on */
-FIXTURE_VARIANT(hierarchy)
-{
- const bool domain_both;
- const bool domain_parent;
- const bool domain_child;
-};
-
/*
* Test multiple tracing combinations between a parent process P1 and a child
* process P2.
@@ -104,155 +97,18 @@ FIXTURE_VARIANT(hierarchy)
* restriction is enforced in addition to any Landlock check, which means that
* all P2 requests to trace P1 would be denied.
*/
+#include "scoped_base_variants.h"
-/*
- * No domain
- *
- * P1-. P1 -> P2 : allow
- * \ P2 -> P1 : allow
- * 'P2
- */
-/* clang-format off */
-FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
- /* clang-format on */
- .domain_both = false,
- .domain_parent = false,
- .domain_child = false,
-};
-
-/*
- * Child domain
- *
- * P1--. P1 -> P2 : allow
- * \ P2 -> P1 : deny
- * .'-----.
- * | P2 |
- * '------'
- */
-/* clang-format off */
-FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
- /* clang-format on */
- .domain_both = false,
- .domain_parent = false,
- .domain_child = true,
-};
-
-/*
- * Parent domain
- * .------.
- * | P1 --. P1 -> P2 : deny
- * '------' \ P2 -> P1 : allow
- * '
- * P2
- */
-/* clang-format off */
-FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
- /* clang-format on */
- .domain_both = false,
- .domain_parent = true,
- .domain_child = false,
-};
-
-/*
- * Parent + child domain (siblings)
- * .------.
- * | P1 ---. P1 -> P2 : deny
- * '------' \ P2 -> P1 : deny
- * .---'--.
- * | P2 |
- * '------'
- */
-/* clang-format off */
-FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
- /* clang-format on */
- .domain_both = false,
- .domain_parent = true,
- .domain_child = true,
-};
-
-/*
- * Same domain (inherited)
- * .-------------.
- * | P1----. | P1 -> P2 : allow
- * | \ | P2 -> P1 : allow
- * | ' |
- * | P2 |
- * '-------------'
- */
-/* clang-format off */
-FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
- /* clang-format on */
- .domain_both = true,
- .domain_parent = false,
- .domain_child = false,
-};
-
-/*
- * Inherited + child domain
- * .-----------------.
- * | P1----. | P1 -> P2 : allow
- * | \ | P2 -> P1 : deny
- * | .-'----. |
- * | | P2 | |
- * | '------' |
- * '-----------------'
- */
-/* clang-format off */
-FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
- /* clang-format on */
- .domain_both = true,
- .domain_parent = false,
- .domain_child = true,
-};
-
-/*
- * Inherited + parent domain
- * .-----------------.
- * |.------. | P1 -> P2 : deny
- * || P1 ----. | P2 -> P1 : allow
- * |'------' \ |
- * | ' |
- * | P2 |
- * '-----------------'
- */
-/* clang-format off */
-FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
- /* clang-format on */
- .domain_both = true,
- .domain_parent = true,
- .domain_child = false,
-};
-
-/*
- * Inherited + parent and child domain (siblings)
- * .-----------------.
- * | .------. | P1 -> P2 : deny
- * | | P1 . | P2 -> P1 : deny
- * | '------'\ |
- * | \ |
- * | .--'---. |
- * | | P2 | |
- * | '------' |
- * '-----------------'
- */
-/* clang-format off */
-FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) {
- /* clang-format on */
- .domain_both = true,
- .domain_parent = true,
- .domain_child = true,
-};
-
-FIXTURE_SETUP(hierarchy)
+FIXTURE_SETUP(scoped_domains)
{
}
-FIXTURE_TEARDOWN(hierarchy)
+FIXTURE_TEARDOWN(scoped_domains)
{
}
/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */
-TEST_F(hierarchy, trace)
+TEST_F(scoped_domains, trace)
{
pid_t child, parent;
int status, err_proc_read;
diff --git a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
index 6825082c079c..72f97648d4a7 100644
--- a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
+++ b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
@@ -543,7 +543,7 @@ TEST_F(scoped_vs_unscoped, unix_scoping)
ASSERT_EQ(1, write(pipe_child[1], ".", 1));
ASSERT_EQ(grand_child, waitpid(grand_child, &status, 0));
- EXPECT_EQ(0, close(stream_server_child))
+ EXPECT_EQ(0, close(stream_server_child));
EXPECT_EQ(0, close(dgram_server_child));
return;
}
@@ -779,7 +779,6 @@ FIXTURE_TEARDOWN(various_address_sockets)
TEST_F(various_address_sockets, scoped_pathname_sockets)
{
- socklen_t size_stream, size_dgram;
pid_t child;
int status;
char buf_child, buf_parent;
@@ -798,12 +797,8 @@ TEST_F(various_address_sockets, scoped_pathname_sockets)
/* Pathname address. */
snprintf(stream_pathname_addr.sun_path,
sizeof(stream_pathname_addr.sun_path), "%s", stream_path);
- size_stream = offsetof(struct sockaddr_un, sun_path) +
- strlen(stream_pathname_addr.sun_path);
snprintf(dgram_pathname_addr.sun_path,
sizeof(dgram_pathname_addr.sun_path), "%s", dgram_path);
- size_dgram = offsetof(struct sockaddr_un, sun_path) +
- strlen(dgram_pathname_addr.sun_path);
/* Abstract address. */
memset(&stream_abstract_addr, 0, sizeof(stream_abstract_addr));
@@ -841,8 +836,9 @@ TEST_F(various_address_sockets, scoped_pathname_sockets)
/* Connects with pathname sockets. */
stream_pathname_socket = socket(AF_UNIX, SOCK_STREAM, 0);
ASSERT_LE(0, stream_pathname_socket);
- ASSERT_EQ(0, connect(stream_pathname_socket,
- &stream_pathname_addr, size_stream));
+ ASSERT_EQ(0,
+ connect(stream_pathname_socket, &stream_pathname_addr,
+ sizeof(stream_pathname_addr)));
ASSERT_EQ(1, write(stream_pathname_socket, "b", 1));
EXPECT_EQ(0, close(stream_pathname_socket));
@@ -850,12 +846,13 @@ TEST_F(various_address_sockets, scoped_pathname_sockets)
dgram_pathname_socket = socket(AF_UNIX, SOCK_DGRAM, 0);
ASSERT_LE(0, dgram_pathname_socket);
err = sendto(dgram_pathname_socket, "c", 1, 0,
- &dgram_pathname_addr, size_dgram);
+ &dgram_pathname_addr, sizeof(dgram_pathname_addr));
EXPECT_EQ(1, err);
/* Sends with connection. */
- ASSERT_EQ(0, connect(dgram_pathname_socket,
- &dgram_pathname_addr, size_dgram));
+ ASSERT_EQ(0,
+ connect(dgram_pathname_socket, &dgram_pathname_addr,
+ sizeof(dgram_pathname_addr)));
ASSERT_EQ(1, write(dgram_pathname_socket, "d", 1));
EXPECT_EQ(0, close(dgram_pathname_socket));
@@ -910,13 +907,13 @@ TEST_F(various_address_sockets, scoped_pathname_sockets)
stream_pathname_socket = socket(AF_UNIX, SOCK_STREAM, 0);
ASSERT_LE(0, stream_pathname_socket);
ASSERT_EQ(0, bind(stream_pathname_socket, &stream_pathname_addr,
- size_stream));
+ sizeof(stream_pathname_addr)));
ASSERT_EQ(0, listen(stream_pathname_socket, backlog));
dgram_pathname_socket = socket(AF_UNIX, SOCK_DGRAM, 0);
ASSERT_LE(0, dgram_pathname_socket);
ASSERT_EQ(0, bind(dgram_pathname_socket, &dgram_pathname_addr,
- size_dgram));
+ sizeof(dgram_pathname_addr)));
/* Sets up abstract servers. */
stream_abstract_socket = socket(AF_UNIX, SOCK_STREAM, 0);
diff --git a/tools/testing/selftests/landlock/scoped_base_variants.h b/tools/testing/selftests/landlock/scoped_base_variants.h
index d3b1fa8a584e..7116728ebc68 100644
--- a/tools/testing/selftests/landlock/scoped_base_variants.h
+++ b/tools/testing/selftests/landlock/scoped_base_variants.h
@@ -1,8 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Landlock scoped_domains variants
+ * Landlock scoped_domains test variant definition.
*
- * See the hierarchy variants from ptrace_test.c
+ * This file defines a fixture variant "scoped_domains" that has all
+ * permutations of parent/child process being in separate or shared
+ * Landlock domain, or not being in a Landlock domain at all.
+ *
+ * Scoped access tests can include this file to avoid repeating these
+ * combinations.
*
* Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
* Copyright © 2019-2020 ANSSI
diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c
new file mode 100644
index 000000000000..37ef0d2270db
--- /dev/null
+++ b/tools/testing/selftests/landlock/tsync_test.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Enforcing the same restrictions across multiple threads
+ *
+ * Copyright © 2025 Günther Noack <gnoack3000@gmail.com>
+ */
+
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sys/prctl.h>
+#include <linux/landlock.h>
+
+#include "common.h"
+
+/* create_ruleset - Create a simple ruleset FD common to all tests */
+static int create_ruleset(struct __test_metadata *const _metadata)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = (LANDLOCK_ACCESS_FS_WRITE_FILE |
+ LANDLOCK_ACCESS_FS_TRUNCATE),
+ };
+ const int ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+ ASSERT_LE(0, ruleset_fd)
+ {
+ TH_LOG("landlock_create_ruleset: %s", strerror(errno));
+ }
+ return ruleset_fd;
+}
+
+TEST(single_threaded_success)
+{
+ const int ruleset_fd = create_ruleset(_metadata);
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd,
+ LANDLOCK_RESTRICT_SELF_TSYNC));
+
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+static void store_no_new_privs(void *data)
+{
+ bool *nnp = data;
+
+ if (!nnp)
+ return;
+ *nnp = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+}
+
+static void *idle(void *data)
+{
+ pthread_cleanup_push(store_no_new_privs, data);
+
+ while (true)
+ sleep(1);
+
+ pthread_cleanup_pop(1);
+}
+
+TEST(multi_threaded_success)
+{
+ pthread_t t1, t2;
+ bool no_new_privs1, no_new_privs2;
+ const int ruleset_fd = create_ruleset(_metadata);
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, pthread_create(&t1, NULL, idle, &no_new_privs1));
+ ASSERT_EQ(0, pthread_create(&t2, NULL, idle, &no_new_privs2));
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd,
+ LANDLOCK_RESTRICT_SELF_TSYNC));
+
+ ASSERT_EQ(0, pthread_cancel(t1));
+ ASSERT_EQ(0, pthread_cancel(t2));
+ ASSERT_EQ(0, pthread_join(t1, NULL));
+ ASSERT_EQ(0, pthread_join(t2, NULL));
+
+ /* The no_new_privs flag was implicitly enabled on all threads. */
+ EXPECT_TRUE(no_new_privs1);
+ EXPECT_TRUE(no_new_privs2);
+
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+TEST(multi_threaded_success_despite_diverging_domains)
+{
+ pthread_t t1, t2;
+ const int ruleset_fd = create_ruleset(_metadata);
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+ ASSERT_EQ(0, pthread_create(&t1, NULL, idle, NULL));
+ ASSERT_EQ(0, pthread_create(&t2, NULL, idle, NULL));
+
+ /*
+ * The main thread enforces a ruleset,
+ * thereby bringing the threads' Landlock domains out of sync.
+ */
+ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+
+ /* Still, TSYNC succeeds, bringing the threads in sync again. */
+ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd,
+ LANDLOCK_RESTRICT_SELF_TSYNC));
+
+ ASSERT_EQ(0, pthread_cancel(t1));
+ ASSERT_EQ(0, pthread_cancel(t2));
+ ASSERT_EQ(0, pthread_join(t1, NULL));
+ ASSERT_EQ(0, pthread_join(t2, NULL));
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+struct thread_restrict_data {
+ pthread_t t;
+ int ruleset_fd;
+ int result;
+};
+
+static void *thread_restrict(void *data)
+{
+ struct thread_restrict_data *d = data;
+
+ d->result = landlock_restrict_self(d->ruleset_fd,
+ LANDLOCK_RESTRICT_SELF_TSYNC);
+ return NULL;
+}
+
+TEST(competing_enablement)
+{
+ const int ruleset_fd = create_ruleset(_metadata);
+ struct thread_restrict_data d[] = {
+ { .ruleset_fd = ruleset_fd },
+ { .ruleset_fd = ruleset_fd },
+ };
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, pthread_create(&d[0].t, NULL, thread_restrict, &d[0]));
+ ASSERT_EQ(0, pthread_create(&d[1].t, NULL, thread_restrict, &d[1]));
+
+ /* Wait for threads to finish. */
+ ASSERT_EQ(0, pthread_join(d[0].t, NULL));
+ ASSERT_EQ(0, pthread_join(d[1].t, NULL));
+
+ /* Expect that both succeeded. */
+ EXPECT_EQ(0, d[0].result);
+ EXPECT_EQ(0, d[1].result);
+
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index cff124c1eddd..e62b85b591be 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -1,6 +1,8 @@
#PANIC
#PANIC_STOP_IRQOFF Crashes entire system
+#PANIC_IN_HARDIRQ Crashes entire system
BUG kernel BUG at
+#BUG_IN_HARDIRQ Crashes entire system
WARNING WARNING:
WARNING_MESSAGE message trigger
EXCEPTION
@@ -9,6 +11,8 @@ EXCEPTION
#CORRUPT_STACK Crashes entire system on success
#CORRUPT_STACK_STRONG Crashes entire system on success
ARRAY_BOUNDS call trace:|UBSAN: array-index-out-of-bounds
+FAM_BOUNDS call trace:|UBSAN: array-index-out-of-bounds
+PTR_BOUNDS call trace:|UBSAN: array-index-out-of-bounds
CORRUPT_LIST_ADD list_add corruption
CORRUPT_LIST_DEL list_del corruption
STACK_GUARD_PAGE_LEADING
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 5b993924cc3f..2ca07ea7202a 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -18,6 +18,9 @@
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <sys/sem.h>
#include <unistd.h>
#include <ctype.h>
@@ -39,6 +42,20 @@
F_SEAL_EXEC)
#define MFD_NOEXEC_SEAL 0x0008U
+union semun {
+ int val;
+ struct semid_ds *buf;
+ unsigned short int *array;
+ struct seminfo *__buf;
+};
+
+/*
+ * we use semaphores on nested wait tasks due the use of CLONE_NEWPID: the
+ * child will be PID 1 and can't send SIGSTOP to themselves due special
+ * treatment of the init task, so the SIGSTOP/SIGCONT synchronization
+ * approach can't be used here.
+ */
+#define SEM_KEY 0xdeadbeef
/*
* Default is not to test hugetlbfs
@@ -1333,8 +1350,22 @@ static int sysctl_nested(void *arg)
static int sysctl_nested_wait(void *arg)
{
- /* Wait for a SIGCONT. */
- kill(getpid(), SIGSTOP);
+ int sem = semget(SEM_KEY, 1, 0600);
+ struct sembuf sembuf;
+
+ if (sem < 0) {
+ perror("semget:");
+ abort();
+ }
+ sembuf.sem_num = 0;
+ sembuf.sem_flg = 0;
+ sembuf.sem_op = 0;
+
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ abort();
+ }
+
return sysctl_nested(arg);
}
@@ -1355,7 +1386,9 @@ static void test_sysctl_sysctl2_failset(void)
static int sysctl_nested_child(void *arg)
{
- int pid;
+ int pid, sem;
+ union semun semun;
+ struct sembuf sembuf;
printf("%s nested sysctl 0\n", memfd_str);
sysctl_assert_write("0");
@@ -1389,23 +1422,53 @@ static int sysctl_nested_child(void *arg)
test_sysctl_sysctl2_failset);
join_thread(pid);
+ sem = semget(SEM_KEY, 1, IPC_CREAT | 0600);
+ if (sem < 0) {
+ perror("semget:");
+ return 1;
+ }
+ semun.val = 1;
+ sembuf.sem_op = -1;
+ sembuf.sem_flg = 0;
+ sembuf.sem_num = 0;
+
/* Verify that the rules are actually inherited after fork. */
printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
sysctl_assert_write("0");
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl1_failset);
sysctl_assert_write("1");
- kill(pid, SIGCONT);
+
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);
printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
sysctl_assert_write("0");
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl2_failset);
sysctl_assert_write("2");
- kill(pid, SIGCONT);
+
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);
/*
@@ -1415,28 +1478,62 @@ static int sysctl_nested_child(void *arg)
*/
printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
sysctl_assert_write("2");
+
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl2);
sysctl_assert_write("1");
- kill(pid, SIGCONT);
+
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);
printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
sysctl_assert_write("2");
+
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl2);
sysctl_assert_write("0");
- kill(pid, SIGCONT);
+
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);
printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
sysctl_assert_write("1");
+
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl1);
sysctl_assert_write("0");
- kill(pid, SIGCONT);
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);
+ semctl(sem, 0, IPC_RMID);
+
return 0;
}
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index c2a8586e51a1..83ad9454dd9d 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -12,6 +12,7 @@ map_hugetlb
map_populate
thuge-gen
compaction_test
+memory-failure
migration
mlock2-tests
mrelease_test
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index eaf9312097f7..7a5de4e9bf52 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for mm selftests
+# IMPORTANT: If you add a new test CATEGORY please add a simple wrapper
+# script so kunit knows to run it, and add it to the list below.
+# If you do not YOUR TESTS WILL NOT RUN IN THE CI.
+
LOCAL_HDRS += $(selfdir)/mm/local_config.h $(top_srcdir)/mm/gup_test.h
LOCAL_HDRS += $(selfdir)/mm/mseal_helpers.h
@@ -44,14 +48,10 @@ LDLIBS = -lrt -lpthread -lm
# warnings.
CFLAGS += -U_FORTIFY_SOURCE
-KDIR ?= /lib/modules/$(shell uname -r)/build
+KDIR ?= $(if $(O),$(O),$(realpath ../../../..))
ifneq (,$(wildcard $(KDIR)/Module.symvers))
-ifneq (,$(wildcard $(KDIR)/include/linux/page_frag_cache.h))
TEST_GEN_MODS_DIR := page_frag
else
-PAGE_FRAG_WARNING = "missing page_frag_cache.h, please use a newer kernel"
-endif
-else
PAGE_FRAG_WARNING = "missing Module.symvers, please have the kernel built first"
endif
@@ -72,9 +72,10 @@ TEST_GEN_FILES += madv_populate
TEST_GEN_FILES += map_fixed_noreplace
TEST_GEN_FILES += map_hugetlb
TEST_GEN_FILES += map_populate
-ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64))
+ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64 loongarch32 loongarch64))
TEST_GEN_FILES += memfd_secret
endif
+TEST_GEN_FILES += memory-failure
TEST_GEN_FILES += migration
TEST_GEN_FILES += mkdirty
TEST_GEN_FILES += mlock-random-test
@@ -140,13 +141,37 @@ endif
ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390))
TEST_GEN_FILES += va_high_addr_switch
-ifneq ($(ARCH),riscv64)
-TEST_GEN_FILES += virtual_address_range
-endif
TEST_GEN_FILES += write_to_hugetlbfs
endif
-TEST_PROGS := run_vmtests.sh
+TEST_PROGS += ksft_compaction.sh
+TEST_PROGS += ksft_cow.sh
+TEST_PROGS += ksft_gup_test.sh
+TEST_PROGS += ksft_hmm.sh
+TEST_PROGS += ksft_hugetlb.sh
+TEST_PROGS += ksft_hugevm.sh
+TEST_PROGS += ksft_ksm.sh
+TEST_PROGS += ksft_ksm_numa.sh
+TEST_PROGS += ksft_madv_guard.sh
+TEST_PROGS += ksft_madv_populate.sh
+TEST_PROGS += ksft_memfd_secret.sh
+TEST_PROGS += ksft_memory_failure.sh
+TEST_PROGS += ksft_migration.sh
+TEST_PROGS += ksft_mkdirty.sh
+TEST_PROGS += ksft_mlock.sh
+TEST_PROGS += ksft_mmap.sh
+TEST_PROGS += ksft_mremap.sh
+TEST_PROGS += ksft_pagemap.sh
+TEST_PROGS += ksft_pfnmap.sh
+TEST_PROGS += ksft_pkey.sh
+TEST_PROGS += ksft_process_madv.sh
+TEST_PROGS += ksft_process_mrelease.sh
+TEST_PROGS += ksft_rmap.sh
+TEST_PROGS += ksft_soft_dirty.sh
+TEST_PROGS += ksft_thp.sh
+TEST_PROGS += ksft_userfaultfd.sh
+TEST_PROGS += ksft_vma_merge.sh
+TEST_PROGS += ksft_vmalloc.sh
TEST_FILES := test_vmalloc.sh
TEST_FILES += test_hmm.sh
@@ -154,6 +179,7 @@ TEST_FILES += va_high_addr_switch.sh
TEST_FILES += charge_reserved_hugetlb.sh
TEST_FILES += hugetlb_reparenting_test.sh
TEST_FILES += test_page_frag.sh
+TEST_FILES += run_vmtests.sh
# required by charge_reserved_hugetlb.sh
TEST_FILES += write_hugetlb_memory.sh
@@ -234,7 +260,7 @@ $(OUTPUT)/migration: LDLIBS += -lnuma
$(OUTPUT)/rmap: LDLIBS += -lnuma
local_config.mk local_config.h: check_config.sh
- /bin/sh ./check_config.sh $(CC)
+ CC="$(CC)" CFLAGS="$(CFLAGS)" ./check_config.sh
EXTRA_CLEAN += local_config.mk local_config.h
diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
index e1fe16bcbbe8..447769657634 100755
--- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
@@ -100,7 +100,7 @@ function setup_cgroup() {
echo writing cgroup limit: "$cgroup_limit"
echo "$cgroup_limit" >$cgroup_path/$name/hugetlb.${MB}MB.$fault_limit_file
- echo writing reseravation limit: "$reservation_limit"
+ echo writing reservation limit: "$reservation_limit"
echo "$reservation_limit" > \
$cgroup_path/$name/hugetlb.${MB}MB.$reservation_limit_file
@@ -112,41 +112,50 @@ function setup_cgroup() {
fi
}
+function wait_for_file_value() {
+ local path="$1"
+ local expect="$2"
+ local max_tries=60
+
+ if [[ ! -r "$path" ]]; then
+ echo "ERROR: cannot read '$path', missing or permission denied"
+ return 1
+ fi
+
+ for ((i=1; i<=max_tries; i++)); do
+ local cur="$(cat "$path")"
+ if [[ "$cur" == "$expect" ]]; then
+ return 0
+ fi
+ echo "Waiting for $path to become '$expect' (current: '$cur') (try $i/$max_tries)"
+ sleep 1
+ done
+
+ echo "ERROR: timeout waiting for $path to become '$expect'"
+ return 1
+}
+
function wait_for_hugetlb_memory_to_get_depleted() {
local cgroup="$1"
local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
- # Wait for hugetlbfs memory to get depleted.
- while [ $(cat $path) != 0 ]; do
- echo Waiting for hugetlb memory to get depleted.
- cat $path
- sleep 0.5
- done
+
+ wait_for_file_value "$path" "0"
}
function wait_for_hugetlb_memory_to_get_reserved() {
local cgroup="$1"
local size="$2"
-
local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
- # Wait for hugetlbfs memory to get written.
- while [ $(cat $path) != $size ]; do
- echo Waiting for hugetlb memory reservation to reach size $size.
- cat $path
- sleep 0.5
- done
+
+ wait_for_file_value "$path" "$size"
}
function wait_for_hugetlb_memory_to_get_written() {
local cgroup="$1"
local size="$2"
-
local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
- # Wait for hugetlbfs memory to get written.
- while [ $(cat $path) != $size ]; do
- echo Waiting for hugetlb memory to reach size $size.
- cat $path
- sleep 0.5
- done
+
+ wait_for_file_value "$path" "$size"
}
function write_hugetlbfs_and_get_usage() {
@@ -290,7 +299,7 @@ function run_test() {
setup_cgroup "hugetlb_cgroup_test" "$cgroup_limit" "$reservation_limit"
mkdir -p /mnt/huge
- mount -t hugetlbfs -o pagesize=${MB}M,size=256M none /mnt/huge
+ mount -t hugetlbfs -o pagesize=${MB}M none /mnt/huge
write_hugetlbfs_and_get_usage "hugetlb_cgroup_test" "$size" "$populate" \
"$write" "/mnt/huge/test" "$method" "$private" "$expect_failure" \
@@ -344,7 +353,7 @@ function run_multiple_cgroup_test() {
setup_cgroup "hugetlb_cgroup_test2" "$cgroup_limit2" "$reservation_limit2"
mkdir -p /mnt/huge
- mount -t hugetlbfs -o pagesize=${MB}M,size=256M none /mnt/huge
+ mount -t hugetlbfs -o pagesize=${MB}M none /mnt/huge
write_hugetlbfs_and_get_usage "hugetlb_cgroup_test1" "$size1" \
"$populate1" "$write1" "/mnt/huge/test1" "$method" "$private" \
diff --git a/tools/testing/selftests/mm/check_config.sh b/tools/testing/selftests/mm/check_config.sh
index 3954f4746161..b84c82bbf875 100755
--- a/tools/testing/selftests/mm/check_config.sh
+++ b/tools/testing/selftests/mm/check_config.sh
@@ -16,8 +16,7 @@ echo "#include <sys/types.h>" > $tmpfile_c
echo "#include <liburing.h>" >> $tmpfile_c
echo "int func(void) { return 0; }" >> $tmpfile_c
-CC=${1:?"Usage: $0 <compiler> # example compiler: gcc"}
-$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1
+$CC $CFLAGS -c $tmpfile_c -o $tmpfile_o
if [ -f $tmpfile_o ]; then
echo "#define LOCAL_CONFIG_HAVE_LIBURING 1" > $OUTPUT_H_FILE
diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config
index deba93379c80..1dbe2b4558ab 100644
--- a/tools/testing/selftests/mm/config
+++ b/tools/testing/selftests/mm/config
@@ -11,3 +11,5 @@ CONFIG_ANON_VMA_NAME=y
CONFIG_FTRACE=y
CONFIG_PROFILING=y
CONFIG_UPROBES=y
+CONFIG_MEMORY_FAILURE=y
+CONFIG_HWPOISON_INJECT=m
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index accfd198dbda..d9c69c04b67d 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -75,6 +75,18 @@ static bool range_is_swapped(void *addr, size_t size)
return true;
}
+static bool populate_page_checked(char *addr)
+{
+ bool ret;
+
+ FORCE_READ(*addr);
+ ret = pagemap_is_populated(pagemap_fd, addr);
+ if (!ret)
+ ksft_print_msg("Failed to populate page\n");
+
+ return ret;
+}
+
struct comm_pipes {
int child_ready[2];
int parent_ready[2];
@@ -1549,8 +1561,10 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
}
/* Read from the page to populate the shared zeropage. */
- FORCE_READ(*mem);
- FORCE_READ(*smem);
+ if (!populate_page_checked(mem) || !populate_page_checked(smem)) {
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
fn(mem, smem, pagesize);
munmap:
@@ -1612,8 +1626,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
* the first sub-page and test if we get another sub-page populated
* automatically.
*/
- FORCE_READ(mem);
- FORCE_READ(smem);
+ if (!populate_page_checked(mem) || !populate_page_checked(smem)) {
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+
if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
!pagemap_is_populated(pagemap_fd, smem + pagesize)) {
ksft_test_result_skip("Did not get THPs populated\n");
@@ -1663,8 +1680,10 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc)
}
/* Fault the page in. */
- FORCE_READ(mem);
- FORCE_READ(smem);
+ if (!populate_page_checked(mem) || !populate_page_checked(smem)) {
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
fn(mem, smem, pagesize);
munmap:
@@ -1719,8 +1738,10 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
}
/* Fault the page in. */
- FORCE_READ(mem);
- FORCE_READ(smem);
+ if (!populate_page_checked(mem) || !populate_page_checked(smem)) {
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
fn(mem, smem, pagesize);
munmap:
@@ -1773,8 +1794,10 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
}
/* Fault the page in. */
- FORCE_READ(mem);
- FORCE_READ(smem);
+ if (!populate_page_checked(mem) || !populate_page_checked(smem)) {
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
fn(mem, smem, hugetlbsize);
munmap:
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index 6279893a0adc..f61150d28eb2 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -179,7 +179,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
if (rw && shared && fs_is_unknown(fs_type)) {
ksft_print_msg("Unknown filesystem\n");
result = KSFT_SKIP;
- return;
+ break;
}
/*
* R/O pinning or pinning in a private mapping is always
diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c
index 05d9d2805ae4..5b12041fa310 100644
--- a/tools/testing/selftests/mm/hugetlb-madvise.c
+++ b/tools/testing/selftests/mm/hugetlb-madvise.c
@@ -47,14 +47,7 @@ void write_fault_pages(void *addr, unsigned long nr_pages)
void read_fault_pages(void *addr, unsigned long nr_pages)
{
- unsigned long i;
-
- for (i = 0; i < nr_pages; i++) {
- unsigned long *addr2 =
- ((unsigned long *)(addr + (i * huge_page_size)));
- /* Prevent the compiler from optimizing out the entire loop: */
- FORCE_READ(*addr2);
- }
+ force_read_pages(addr, nr_pages, huge_page_size);
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/mm/ksft_compaction.sh b/tools/testing/selftests/mm/ksft_compaction.sh
new file mode 100755
index 000000000000..1f38f4228a34
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_compaction.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t compaction
diff --git a/tools/testing/selftests/mm/ksft_cow.sh b/tools/testing/selftests/mm/ksft_cow.sh
new file mode 100755
index 000000000000..1e03a95fd5f6
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_cow.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t cow
diff --git a/tools/testing/selftests/mm/ksft_gup_test.sh b/tools/testing/selftests/mm/ksft_gup_test.sh
new file mode 100755
index 000000000000..09e586d2f446
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_gup_test.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t gup_test
diff --git a/tools/testing/selftests/mm/ksft_hmm.sh b/tools/testing/selftests/mm/ksft_hmm.sh
new file mode 100755
index 000000000000..0a7b04f454d5
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_hmm.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t hmm
diff --git a/tools/testing/selftests/mm/ksft_hugetlb.sh b/tools/testing/selftests/mm/ksft_hugetlb.sh
new file mode 100755
index 000000000000..4f92974a4eb5
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_hugetlb.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t hugetlb
diff --git a/tools/testing/selftests/mm/ksft_hugevm.sh b/tools/testing/selftests/mm/ksft_hugevm.sh
new file mode 100755
index 000000000000..377967fe9c91
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_hugevm.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t hugevm
diff --git a/tools/testing/selftests/mm/ksft_ksm.sh b/tools/testing/selftests/mm/ksft_ksm.sh
new file mode 100755
index 000000000000..f6a6fe13a3b0
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_ksm.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t ksm
diff --git a/tools/testing/selftests/mm/ksft_ksm_numa.sh b/tools/testing/selftests/mm/ksft_ksm_numa.sh
new file mode 100755
index 000000000000..144b41a5e3bb
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_ksm_numa.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t ksm_numa
diff --git a/tools/testing/selftests/mm/ksft_madv_guard.sh b/tools/testing/selftests/mm/ksft_madv_guard.sh
new file mode 100755
index 000000000000..2d810c049182
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_madv_guard.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t madv_guard
diff --git a/tools/testing/selftests/mm/ksft_madv_populate.sh b/tools/testing/selftests/mm/ksft_madv_populate.sh
new file mode 100755
index 000000000000..127e22ed02c4
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_madv_populate.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t madv_populate
diff --git a/tools/testing/selftests/mm/ksft_mdwe.sh b/tools/testing/selftests/mm/ksft_mdwe.sh
new file mode 100755
index 000000000000..3dcae95ddabc
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_mdwe.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t mdwe
diff --git a/tools/testing/selftests/mm/ksft_memfd_secret.sh b/tools/testing/selftests/mm/ksft_memfd_secret.sh
new file mode 100755
index 000000000000..56e82dd648a7
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_memfd_secret.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t memfd_secret
diff --git a/tools/testing/selftests/mm/ksft_memory_failure.sh b/tools/testing/selftests/mm/ksft_memory_failure.sh
new file mode 100755
index 000000000000..ae1614d4d49b
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_memory_failure.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t memory-failure
diff --git a/tools/testing/selftests/mm/ksft_migration.sh b/tools/testing/selftests/mm/ksft_migration.sh
new file mode 100755
index 000000000000..7cf37c72d26e
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_migration.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t migration
diff --git a/tools/testing/selftests/mm/ksft_mkdirty.sh b/tools/testing/selftests/mm/ksft_mkdirty.sh
new file mode 100755
index 000000000000..dd6332df3204
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_mkdirty.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t mkdirty
diff --git a/tools/testing/selftests/mm/ksft_mlock.sh b/tools/testing/selftests/mm/ksft_mlock.sh
new file mode 100755
index 000000000000..1e25ab9fdc8b
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_mlock.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t mlock
diff --git a/tools/testing/selftests/mm/ksft_mmap.sh b/tools/testing/selftests/mm/ksft_mmap.sh
new file mode 100755
index 000000000000..2c3137ae8bc8
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_mmap.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t mmap
diff --git a/tools/testing/selftests/mm/ksft_mremap.sh b/tools/testing/selftests/mm/ksft_mremap.sh
new file mode 100755
index 000000000000..4101670d0e19
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_mremap.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t mremap
diff --git a/tools/testing/selftests/mm/ksft_page_frag.sh b/tools/testing/selftests/mm/ksft_page_frag.sh
new file mode 100755
index 000000000000..216e20ffe390
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_page_frag.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t page_frag
diff --git a/tools/testing/selftests/mm/ksft_pagemap.sh b/tools/testing/selftests/mm/ksft_pagemap.sh
new file mode 100755
index 000000000000..b8d270fdd43e
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_pagemap.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t pagemap
diff --git a/tools/testing/selftests/mm/ksft_pfnmap.sh b/tools/testing/selftests/mm/ksft_pfnmap.sh
new file mode 100755
index 000000000000..75758de968bb
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_pfnmap.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t pfnmap
diff --git a/tools/testing/selftests/mm/ksft_pkey.sh b/tools/testing/selftests/mm/ksft_pkey.sh
new file mode 100755
index 000000000000..ac944233b7f7
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_pkey.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t pkey
diff --git a/tools/testing/selftests/mm/ksft_process_madv.sh b/tools/testing/selftests/mm/ksft_process_madv.sh
new file mode 100755
index 000000000000..2c3137ae8bc8
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_process_madv.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t mmap
diff --git a/tools/testing/selftests/mm/ksft_process_mrelease.sh b/tools/testing/selftests/mm/ksft_process_mrelease.sh
new file mode 100755
index 000000000000..f560aa5e4218
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_process_mrelease.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t process_mrelease
diff --git a/tools/testing/selftests/mm/ksft_rmap.sh b/tools/testing/selftests/mm/ksft_rmap.sh
new file mode 100755
index 000000000000..974742b9b02f
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_rmap.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t rmap
diff --git a/tools/testing/selftests/mm/ksft_soft_dirty.sh b/tools/testing/selftests/mm/ksft_soft_dirty.sh
new file mode 100755
index 000000000000..d160d7fea0a9
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_soft_dirty.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t soft_dirty
diff --git a/tools/testing/selftests/mm/ksft_thp.sh b/tools/testing/selftests/mm/ksft_thp.sh
new file mode 100755
index 000000000000..95321aecabdb
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_thp.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t thp
diff --git a/tools/testing/selftests/mm/ksft_userfaultfd.sh b/tools/testing/selftests/mm/ksft_userfaultfd.sh
new file mode 100755
index 000000000000..92667abde6c6
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_userfaultfd.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t userfaultfd
diff --git a/tools/testing/selftests/mm/ksft_vma_merge.sh b/tools/testing/selftests/mm/ksft_vma_merge.sh
new file mode 100755
index 000000000000..68449d840680
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_vma_merge.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t vma_merge
diff --git a/tools/testing/selftests/mm/ksft_vmalloc.sh b/tools/testing/selftests/mm/ksft_vmalloc.sh
new file mode 100755
index 000000000000..0b5019a76612
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_vmalloc.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0
+
+./run_vmtests.sh -t vmalloc
diff --git a/tools/testing/selftests/mm/memory-failure.c b/tools/testing/selftests/mm/memory-failure.c
new file mode 100644
index 000000000000..3d9e0b9ffb41
--- /dev/null
+++ b/tools/testing/selftests/mm/memory-failure.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Memory-failure functional tests.
+ *
+ * Author(s): Miaohe Lin <linmiaohe@huawei.com>
+ */
+
+#include "../kselftest_harness.h"
+
+#include <sys/mman.h>
+#include <linux/mman.h>
+#include <linux/string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/vfs.h>
+#include <linux/magic.h>
+#include <errno.h>
+
+#include "vm_util.h"
+
+enum inject_type {
+ MADV_HARD,
+ MADV_SOFT,
+};
+
+enum result_type {
+ MADV_HARD_ANON,
+ MADV_HARD_CLEAN_PAGECACHE,
+ MADV_HARD_DIRTY_PAGECACHE,
+ MADV_SOFT_ANON,
+ MADV_SOFT_CLEAN_PAGECACHE,
+ MADV_SOFT_DIRTY_PAGECACHE,
+};
+
+static jmp_buf signal_jmp_buf;
+static siginfo_t siginfo;
+const char *pagemap_proc = "/proc/self/pagemap";
+const char *kpageflags_proc = "/proc/kpageflags";
+
+FIXTURE(memory_failure)
+{
+ unsigned long page_size;
+ unsigned long corrupted_size;
+ unsigned long pfn;
+ int pagemap_fd;
+ int kpageflags_fd;
+ bool triggered;
+};
+
+FIXTURE_VARIANT(memory_failure)
+{
+ enum inject_type type;
+ int (*inject)(FIXTURE_DATA(memory_failure) * self, void *vaddr);
+};
+
+static int madv_hard_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr)
+{
+ return madvise(vaddr, self->page_size, MADV_HWPOISON);
+}
+
+FIXTURE_VARIANT_ADD(memory_failure, madv_hard)
+{
+ .type = MADV_HARD,
+ .inject = madv_hard_inject,
+};
+
+static int madv_soft_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr)
+{
+ return madvise(vaddr, self->page_size, MADV_SOFT_OFFLINE);
+}
+
+FIXTURE_VARIANT_ADD(memory_failure, madv_soft)
+{
+ .type = MADV_SOFT,
+ .inject = madv_soft_inject,
+};
+
+static void sigbus_action(int signo, siginfo_t *si, void *args)
+{
+ memcpy(&siginfo, si, sizeof(siginfo_t));
+ siglongjmp(signal_jmp_buf, 1);
+}
+
+static int setup_sighandler(void)
+{
+ struct sigaction sa = {
+ .sa_sigaction = sigbus_action,
+ .sa_flags = SA_SIGINFO,
+ };
+
+ return sigaction(SIGBUS, &sa, NULL);
+}
+
+FIXTURE_SETUP(memory_failure)
+{
+ memset(self, 0, sizeof(*self));
+
+ self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
+
+ memset(&siginfo, 0, sizeof(siginfo));
+ if (setup_sighandler())
+ SKIP(return, "setup sighandler failed.\n");
+
+ self->pagemap_fd = open(pagemap_proc, O_RDONLY);
+ if (self->pagemap_fd == -1)
+ SKIP(return, "open %s failed.\n", pagemap_proc);
+
+ self->kpageflags_fd = open(kpageflags_proc, O_RDONLY);
+ if (self->kpageflags_fd == -1)
+ SKIP(return, "open %s failed.\n", kpageflags_proc);
+}
+
+static void teardown_sighandler(void)
+{
+ struct sigaction sa = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_SIGINFO,
+ };
+
+ sigaction(SIGBUS, &sa, NULL);
+}
+
+FIXTURE_TEARDOWN(memory_failure)
+{
+ close(self->kpageflags_fd);
+ close(self->pagemap_fd);
+ teardown_sighandler();
+}
+
+static void prepare(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
+ void *vaddr)
+{
+ self->pfn = pagemap_get_pfn(self->pagemap_fd, vaddr);
+ ASSERT_NE(self->pfn, -1UL);
+
+ ASSERT_EQ(get_hardware_corrupted_size(&self->corrupted_size), 0);
+}
+
+static bool check_memory(void *vaddr, unsigned long size)
+{
+ char buf[64];
+
+ memset(buf, 0xce, sizeof(buf));
+ while (size >= sizeof(buf)) {
+ if (memcmp(vaddr, buf, sizeof(buf)))
+ return false;
+ size -= sizeof(buf);
+ vaddr += sizeof(buf);
+ }
+
+ return true;
+}
+
+static void check(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
+ void *vaddr, enum result_type type, int setjmp)
+{
+ unsigned long size;
+ uint64_t pfn_flags;
+
+ switch (type) {
+ case MADV_SOFT_ANON:
+ case MADV_HARD_CLEAN_PAGECACHE:
+ case MADV_SOFT_CLEAN_PAGECACHE:
+ case MADV_SOFT_DIRTY_PAGECACHE:
+ /* It is not expected to receive a SIGBUS signal. */
+ ASSERT_EQ(setjmp, 0);
+
+ /* The page content should remain unchanged. */
+ ASSERT_TRUE(check_memory(vaddr, self->page_size));
+
+ /* The backing pfn of addr should have changed. */
+ ASSERT_NE(pagemap_get_pfn(self->pagemap_fd, vaddr), self->pfn);
+ break;
+ case MADV_HARD_ANON:
+ case MADV_HARD_DIRTY_PAGECACHE:
+ /* The SIGBUS signal should have been received. */
+ ASSERT_EQ(setjmp, 1);
+
+ /* Check if siginfo contains correct SIGBUS context. */
+ ASSERT_EQ(siginfo.si_signo, SIGBUS);
+ ASSERT_EQ(siginfo.si_code, BUS_MCEERR_AR);
+ ASSERT_EQ(1UL << siginfo.si_addr_lsb, self->page_size);
+ ASSERT_EQ(siginfo.si_addr, vaddr);
+
+ /* XXX Check backing pte is hwpoison entry when supported. */
+ ASSERT_TRUE(pagemap_is_swapped(self->pagemap_fd, vaddr));
+ break;
+ default:
+ SKIP(return, "unexpected inject type %d.\n", type);
+ }
+
+ /* Check if the value of HardwareCorrupted has increased. */
+ ASSERT_EQ(get_hardware_corrupted_size(&size), 0);
+ ASSERT_EQ(size, self->corrupted_size + self->page_size / 1024);
+
+ /* Check if HWPoison flag is set. */
+ ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0);
+ ASSERT_EQ(pfn_flags & KPF_HWPOISON, KPF_HWPOISON);
+}
+
+static void cleanup(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
+ void *vaddr)
+{
+ unsigned long size;
+ uint64_t pfn_flags;
+
+ ASSERT_EQ(unpoison_memory(self->pfn), 0);
+
+ /* Check if HWPoison flag is cleared. */
+ ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0);
+ ASSERT_NE(pfn_flags & KPF_HWPOISON, KPF_HWPOISON);
+
+ /* Check if the value of HardwareCorrupted has decreased. */
+ ASSERT_EQ(get_hardware_corrupted_size(&size), 0);
+ ASSERT_EQ(size, self->corrupted_size);
+}
+
+TEST_F(memory_failure, anon)
+{
+ char *addr;
+ int ret;
+
+ addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (addr == MAP_FAILED)
+ SKIP(return, "mmap failed, not enough memory.\n");
+ memset(addr, 0xce, self->page_size);
+
+ prepare(_metadata, self, addr);
+
+ ret = sigsetjmp(signal_jmp_buf, 1);
+ if (!self->triggered) {
+ self->triggered = true;
+ ASSERT_EQ(variant->inject(self, addr), 0);
+ FORCE_READ(*addr);
+ }
+
+ if (variant->type == MADV_HARD)
+ check(_metadata, self, addr, MADV_HARD_ANON, ret);
+ else
+ check(_metadata, self, addr, MADV_SOFT_ANON, ret);
+
+ cleanup(_metadata, self, addr);
+
+ ASSERT_EQ(munmap(addr, self->page_size), 0);
+}
+
+static int prepare_file(const char *fname, unsigned long size)
+{
+ int fd;
+
+ fd = open(fname, O_RDWR | O_CREAT, 0664);
+ if (fd >= 0) {
+ unlink(fname);
+ ftruncate(fd, size);
+ }
+ return fd;
+}
+
+/* Borrowed from mm/gup_longterm.c. */
+static int get_fs_type(int fd)
+{
+ struct statfs fs;
+ int ret;
+
+ do {
+ ret = fstatfs(fd, &fs);
+ } while (ret && errno == EINTR);
+
+ return ret ? 0 : (int)fs.f_type;
+}
+
+TEST_F(memory_failure, clean_pagecache)
+{
+ int fd;
+ char *addr;
+ int ret;
+ int fs_type;
+
+ fd = prepare_file("./clean-page-cache-test-file", self->page_size);
+ if (fd < 0)
+ SKIP(return, "failed to open test file.\n");
+ fs_type = get_fs_type(fd);
+ if (!fs_type || fs_type == TMPFS_MAGIC)
+ SKIP(return, "unsupported filesystem :%x\n", fs_type);
+
+ addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED)
+ SKIP(return, "mmap failed, not enough memory.\n");
+ memset(addr, 0xce, self->page_size);
+ fsync(fd);
+
+ prepare(_metadata, self, addr);
+
+ ret = sigsetjmp(signal_jmp_buf, 1);
+ if (!self->triggered) {
+ self->triggered = true;
+ ASSERT_EQ(variant->inject(self, addr), 0);
+ FORCE_READ(*addr);
+ }
+
+ if (variant->type == MADV_HARD)
+ check(_metadata, self, addr, MADV_HARD_CLEAN_PAGECACHE, ret);
+ else
+ check(_metadata, self, addr, MADV_SOFT_CLEAN_PAGECACHE, ret);
+
+ cleanup(_metadata, self, addr);
+
+ ASSERT_EQ(munmap(addr, self->page_size), 0);
+
+ ASSERT_EQ(close(fd), 0);
+}
+
+TEST_F(memory_failure, dirty_pagecache)
+{
+ int fd;
+ char *addr;
+ int ret;
+ int fs_type;
+
+ fd = prepare_file("./dirty-page-cache-test-file", self->page_size);
+ if (fd < 0)
+ SKIP(return, "failed to open test file.\n");
+ fs_type = get_fs_type(fd);
+ if (!fs_type || fs_type == TMPFS_MAGIC)
+ SKIP(return, "unsupported filesystem :%x\n", fs_type);
+
+ addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED)
+ SKIP(return, "mmap failed, not enough memory.\n");
+ memset(addr, 0xce, self->page_size);
+
+ prepare(_metadata, self, addr);
+
+ ret = sigsetjmp(signal_jmp_buf, 1);
+ if (!self->triggered) {
+ self->triggered = true;
+ ASSERT_EQ(variant->inject(self, addr), 0);
+ FORCE_READ(*addr);
+ }
+
+ if (variant->type == MADV_HARD)
+ check(_metadata, self, addr, MADV_HARD_DIRTY_PAGECACHE, ret);
+ else
+ check(_metadata, self, addr, MADV_SOFT_DIRTY_PAGECACHE, ret);
+
+ cleanup(_metadata, self, addr);
+
+ ASSERT_EQ(munmap(addr, self->page_size), 0);
+
+ ASSERT_EQ(close(fd), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c
index 363c1033cc7d..10b686102b79 100644
--- a/tools/testing/selftests/mm/merge.c
+++ b/tools/testing/selftests/mm/merge.c
@@ -22,12 +22,37 @@ FIXTURE(merge)
struct procmap_fd procmap;
};
+static char *map_carveout(unsigned int page_size)
+{
+ return mmap(NULL, 30 * page_size, PROT_NONE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+}
+
+static pid_t do_fork(struct procmap_fd *procmap)
+{
+ pid_t pid = fork();
+
+ if (pid == -1)
+ return -1;
+ if (pid != 0) {
+ wait(NULL);
+ return pid;
+ }
+
+ /* Reopen for child. */
+ if (close_procmap(procmap))
+ return -1;
+ if (open_self_procmap(procmap))
+ return -1;
+
+ return 0;
+}
+
FIXTURE_SETUP(merge)
{
self->page_size = psize();
/* Carve out PROT_NONE region to map over. */
- self->carveout = mmap(NULL, 30 * self->page_size, PROT_NONE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ self->carveout = map_carveout(self->page_size);
ASSERT_NE(self->carveout, MAP_FAILED);
/* Setup PROCMAP_QUERY interface. */
ASSERT_EQ(open_self_procmap(&self->procmap), 0);
@@ -36,7 +61,8 @@ FIXTURE_SETUP(merge)
FIXTURE_TEARDOWN(merge)
{
ASSERT_EQ(munmap(self->carveout, 30 * self->page_size), 0);
- ASSERT_EQ(close_procmap(&self->procmap), 0);
+ /* May fail for parent of forked process. */
+ close_procmap(&self->procmap);
/*
* Clear unconditionally, as some tests set this. It is no issue if this
* fails (KSM may be disabled for instance).
@@ -44,6 +70,44 @@ FIXTURE_TEARDOWN(merge)
prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
}
+FIXTURE(merge_with_fork)
+{
+ unsigned int page_size;
+ char *carveout;
+ struct procmap_fd procmap;
+};
+
+FIXTURE_VARIANT(merge_with_fork)
+{
+ bool forked;
+};
+
+FIXTURE_VARIANT_ADD(merge_with_fork, forked)
+{
+ .forked = true,
+};
+
+FIXTURE_VARIANT_ADD(merge_with_fork, unforked)
+{
+ .forked = false,
+};
+
+FIXTURE_SETUP(merge_with_fork)
+{
+ self->page_size = psize();
+ self->carveout = map_carveout(self->page_size);
+ ASSERT_NE(self->carveout, MAP_FAILED);
+ ASSERT_EQ(open_self_procmap(&self->procmap), 0);
+}
+
+FIXTURE_TEARDOWN(merge_with_fork)
+{
+ ASSERT_EQ(munmap(self->carveout, 30 * self->page_size), 0);
+ ASSERT_EQ(close_procmap(&self->procmap), 0);
+ /* See above. */
+ prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
+}
+
TEST_F(merge, mprotect_unfaulted_left)
{
unsigned int page_size = self->page_size;
@@ -322,8 +386,8 @@ TEST_F(merge, forked_target_vma)
unsigned int page_size = self->page_size;
char *carveout = self->carveout;
struct procmap_fd *procmap = &self->procmap;
- pid_t pid;
char *ptr, *ptr2;
+ pid_t pid;
int i;
/*
@@ -344,19 +408,10 @@ TEST_F(merge, forked_target_vma)
*/
ptr[0] = 'x';
- pid = fork();
+ pid = do_fork(&self->procmap);
ASSERT_NE(pid, -1);
-
- if (pid != 0) {
- wait(NULL);
+ if (pid != 0)
return;
- }
-
- /* Child process below: */
-
- /* Reopen for child. */
- ASSERT_EQ(close_procmap(&self->procmap), 0);
- ASSERT_EQ(open_self_procmap(&self->procmap), 0);
/* unCOWing everything does not cause the AVC to go away. */
for (i = 0; i < 5 * page_size; i += page_size)
@@ -386,8 +441,8 @@ TEST_F(merge, forked_source_vma)
unsigned int page_size = self->page_size;
char *carveout = self->carveout;
struct procmap_fd *procmap = &self->procmap;
- pid_t pid;
char *ptr, *ptr2;
+ pid_t pid;
int i;
/*
@@ -408,19 +463,10 @@ TEST_F(merge, forked_source_vma)
*/
ptr[0] = 'x';
- pid = fork();
+ pid = do_fork(&self->procmap);
ASSERT_NE(pid, -1);
-
- if (pid != 0) {
- wait(NULL);
+ if (pid != 0)
return;
- }
-
- /* Child process below: */
-
- /* Reopen for child. */
- ASSERT_EQ(close_procmap(&self->procmap), 0);
- ASSERT_EQ(open_self_procmap(&self->procmap), 0);
/* unCOWing everything does not cause the AVC to go away. */
for (i = 0; i < 5 * page_size; i += page_size)
@@ -1171,4 +1217,288 @@ TEST_F(merge, mremap_correct_placed_faulted)
ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size);
}
+TEST_F(merge_with_fork, mremap_faulted_to_unfaulted_prev)
+{
+ struct procmap_fd *procmap = &self->procmap;
+ unsigned int page_size = self->page_size;
+ unsigned long offset;
+ char *ptr_a, *ptr_b;
+
+ /*
+ * mremap() such that A and B merge:
+ *
+ * |------------|
+ * | \ |
+ * |-----------| | / |---------|
+ * | unfaulted | v \ | faulted |
+ * |-----------| / |---------|
+ * B \ A
+ */
+
+ /* Map VMA A into place. */
+ ptr_a = mmap(&self->carveout[page_size + 3 * page_size],
+ 3 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr_a, MAP_FAILED);
+ /* Fault it in. */
+ ptr_a[0] = 'x';
+
+ if (variant->forked) {
+ pid_t pid = do_fork(&self->procmap);
+
+ ASSERT_NE(pid, -1);
+ if (pid != 0)
+ return;
+ }
+
+ /*
+ * Now move it out of the way so we can place VMA B in position,
+ * unfaulted.
+ */
+ ptr_a = mremap(ptr_a, 3 * page_size, 3 * page_size,
+ MREMAP_FIXED | MREMAP_MAYMOVE, &self->carveout[20 * page_size]);
+ ASSERT_NE(ptr_a, MAP_FAILED);
+
+ /* Map VMA B into place. */
+ ptr_b = mmap(&self->carveout[page_size], 3 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr_b, MAP_FAILED);
+
+ /*
+ * Now move VMA A into position with MREMAP_DONTUNMAP to catch incorrect
+ * anon_vma propagation.
+ */
+ ptr_a = mremap(ptr_a, 3 * page_size, 3 * page_size,
+ MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
+ &self->carveout[page_size + 3 * page_size]);
+ ASSERT_NE(ptr_a, MAP_FAILED);
+
+ /* The VMAs should have merged, if not forked. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr_b));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_b);
+
+ offset = variant->forked ? 3 * page_size : 6 * page_size;
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_b + offset);
+}
+
+TEST_F(merge_with_fork, mremap_faulted_to_unfaulted_next)
+{
+ struct procmap_fd *procmap = &self->procmap;
+ unsigned int page_size = self->page_size;
+ unsigned long offset;
+ char *ptr_a, *ptr_b;
+
+ /*
+ * mremap() such that A and B merge:
+ *
+ * |---------------------------|
+ * | \ |
+ * | |-----------| / |---------|
+ * v | unfaulted | \ | faulted |
+ * |-----------| / |---------|
+ * B \ A
+ *
+ * Then unmap VMA A to trigger the bug.
+ */
+
+ /* Map VMA A into place. */
+ ptr_a = mmap(&self->carveout[page_size], 3 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr_a, MAP_FAILED);
+ /* Fault it in. */
+ ptr_a[0] = 'x';
+
+ if (variant->forked) {
+ pid_t pid = do_fork(&self->procmap);
+
+ ASSERT_NE(pid, -1);
+ if (pid != 0)
+ return;
+ }
+
+ /*
+ * Now move it out of the way so we can place VMA B in position,
+ * unfaulted.
+ */
+ ptr_a = mremap(ptr_a, 3 * page_size, 3 * page_size,
+ MREMAP_FIXED | MREMAP_MAYMOVE, &self->carveout[20 * page_size]);
+ ASSERT_NE(ptr_a, MAP_FAILED);
+
+ /* Map VMA B into place. */
+ ptr_b = mmap(&self->carveout[page_size + 3 * page_size], 3 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr_b, MAP_FAILED);
+
+ /*
+ * Now move VMA A into position with MREMAP_DONTUNMAP to catch incorrect
+ * anon_vma propagation.
+ */
+ ptr_a = mremap(ptr_a, 3 * page_size, 3 * page_size,
+ MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
+ &self->carveout[page_size]);
+ ASSERT_NE(ptr_a, MAP_FAILED);
+
+ /* The VMAs should have merged, if not forked. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr_a));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_a);
+ offset = variant->forked ? 3 * page_size : 6 * page_size;
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_a + offset);
+}
+
+TEST_F(merge_with_fork, mremap_faulted_to_unfaulted_prev_unfaulted_next)
+{
+ struct procmap_fd *procmap = &self->procmap;
+ unsigned int page_size = self->page_size;
+ unsigned long offset;
+ char *ptr_a, *ptr_b, *ptr_c;
+
+ /*
+ * mremap() with MREMAP_DONTUNMAP such that A, B and C merge:
+ *
+ * |---------------------------|
+ * | \ |
+ * |-----------| | |-----------| / |---------|
+ * | unfaulted | v | unfaulted | \ | faulted |
+ * |-----------| |-----------| / |---------|
+ * A C \ B
+ */
+
+ /* Map VMA B into place. */
+ ptr_b = mmap(&self->carveout[page_size + 3 * page_size], 3 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr_b, MAP_FAILED);
+ /* Fault it in. */
+ ptr_b[0] = 'x';
+
+ if (variant->forked) {
+ pid_t pid = do_fork(&self->procmap);
+
+ ASSERT_NE(pid, -1);
+ if (pid != 0)
+ return;
+ }
+
+ /*
+ * Now move it out of the way so we can place VMAs A, C in position,
+ * unfaulted.
+ */
+ ptr_b = mremap(ptr_b, 3 * page_size, 3 * page_size,
+ MREMAP_FIXED | MREMAP_MAYMOVE, &self->carveout[20 * page_size]);
+ ASSERT_NE(ptr_b, MAP_FAILED);
+
+ /* Map VMA A into place. */
+
+ ptr_a = mmap(&self->carveout[page_size], 3 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr_a, MAP_FAILED);
+
+ /* Map VMA C into place. */
+ ptr_c = mmap(&self->carveout[page_size + 3 * page_size + 3 * page_size],
+ 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr_c, MAP_FAILED);
+
+ /*
+ * Now move VMA B into position with MREMAP_DONTUNMAP to catch incorrect
+ * anon_vma propagation.
+ */
+ ptr_b = mremap(ptr_b, 3 * page_size, 3 * page_size,
+ MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
+ &self->carveout[page_size + 3 * page_size]);
+ ASSERT_NE(ptr_b, MAP_FAILED);
+
+ /* The VMAs should have merged, if not forked. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr_a));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_a);
+ offset = variant->forked ? 3 * page_size : 9 * page_size;
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_a + offset);
+
+ /* If forked, B and C should also not have merged. */
+ if (variant->forked) {
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr_b));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_b);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_b + 3 * page_size);
+ }
+}
+
+TEST_F(merge_with_fork, mremap_faulted_to_unfaulted_prev_faulted_next)
+{
+ struct procmap_fd *procmap = &self->procmap;
+ unsigned int page_size = self->page_size;
+ char *ptr_a, *ptr_b, *ptr_bc;
+
+ /*
+ * mremap() with MREMAP_DONTUNMAP such that A, B and C merge:
+ *
+ * |---------------------------|
+ * | \ |
+ * |-----------| | |-----------| / |---------|
+ * | unfaulted | v | faulted | \ | faulted |
+ * |-----------| |-----------| / |---------|
+ * A C \ B
+ */
+
+ /*
+ * Map VMA B and C into place. We have to map them together so their
+ * anon_vma is the same and the vma->vm_pgoff's are correctly aligned.
+ */
+ ptr_bc = mmap(&self->carveout[page_size + 3 * page_size],
+ 3 * page_size + 3 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr_bc, MAP_FAILED);
+
+ /* Fault it in. */
+ ptr_bc[0] = 'x';
+
+ if (variant->forked) {
+ pid_t pid = do_fork(&self->procmap);
+
+ ASSERT_NE(pid, -1);
+ if (pid != 0)
+ return;
+ }
+
+ /*
+ * Now move VMA B out the way (splitting VMA BC) so we can place VMA A
+ * in position, unfaulted, and leave the remainder of the VMA we just
+ * moved in place, faulted, as VMA C.
+ */
+ ptr_b = mremap(ptr_bc, 3 * page_size, 3 * page_size,
+ MREMAP_FIXED | MREMAP_MAYMOVE, &self->carveout[20 * page_size]);
+ ASSERT_NE(ptr_b, MAP_FAILED);
+
+ /* Map VMA A into place. */
+ ptr_a = mmap(&self->carveout[page_size], 3 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr_a, MAP_FAILED);
+
+ /*
+ * Now move VMA B into position with MREMAP_DONTUNMAP to catch incorrect
+ * anon_vma propagation.
+ */
+ ptr_b = mremap(ptr_b, 3 * page_size, 3 * page_size,
+ MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
+ &self->carveout[page_size + 3 * page_size]);
+ ASSERT_NE(ptr_b, MAP_FAILED);
+
+ /* The VMAs should have merged. A,B,C if unforked, B, C if forked. */
+ if (variant->forked) {
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr_b));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_b);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_b + 6 * page_size);
+ } else {
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr_a));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_a);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_a + 9 * page_size);
+ }
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/page_frag/Makefile b/tools/testing/selftests/mm/page_frag/Makefile
index 8c8bb39ffa28..96e5f646e69b 100644
--- a/tools/testing/selftests/mm/page_frag/Makefile
+++ b/tools/testing/selftests/mm/page_frag/Makefile
@@ -1,5 +1,5 @@
PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
-KDIR ?= /lib/modules/$(shell uname -r)/build
+KDIR ?= $(if $(O),$(O),$(realpath ../../../../..))
ifeq ($(V),1)
Q =
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index 2cb5441f29c7..2ca8a7e3c27e 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -1052,11 +1052,10 @@ static void test_simple(void)
int sanity_tests(void)
{
unsigned long long mem_size, vec_size;
- long ret, fd, i, buf_size;
+ long ret, fd, i, buf_size, nr_pages;
struct page_region *vec;
char *mem, *fmem;
struct stat sbuf;
- char *tmp_buf;
/* 1. wrong operation */
mem_size = 10 * page_size;
@@ -1167,14 +1166,14 @@ int sanity_tests(void)
if (fmem == MAP_FAILED)
ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
- tmp_buf = malloc(sbuf.st_size);
- memcpy(tmp_buf, fmem, sbuf.st_size);
+ nr_pages = (sbuf.st_size + page_size - 1) / page_size;
+ force_read_pages(fmem, nr_pages, page_size);
ret = pagemap_ioctl(fmem, sbuf.st_size, vec, vec_size, 0, 0,
0, PAGEMAP_NON_WRITTEN_BITS, 0, PAGEMAP_NON_WRITTEN_BITS);
ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem &&
- LEN(vec[0]) == ceilf((float)sbuf.st_size/page_size) &&
+ LEN(vec[0]) == nr_pages &&
(vec[0].categories & PAGE_IS_FILE),
"%s Memory mapped file\n", __func__);
@@ -1553,7 +1552,7 @@ int main(int __attribute__((unused)) argc, char *argv[])
ksft_print_header();
if (init_uffd())
- ksft_exit_pass();
+ ksft_exit_skip("Failed to initialize userfaultfd\n");
ksft_set_plan(117);
@@ -1562,7 +1561,7 @@ int main(int __attribute__((unused)) argc, char *argv[])
pagemap_fd = open(PAGEMAP, O_RDONLY);
if (pagemap_fd < 0)
- return -EINVAL;
+ ksft_exit_fail_msg("Failed to open " PAGEMAP "\n");
/* 1. Sanity testing */
sanity_tests_sd();
@@ -1734,5 +1733,5 @@ int main(int __attribute__((unused)) argc, char *argv[])
zeropfn_tests();
close(pagemap_fd);
- ksft_exit_pass();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c
index f546dfb10cae..4f550822385a 100644
--- a/tools/testing/selftests/mm/pfnmap.c
+++ b/tools/testing/selftests/mm/pfnmap.c
@@ -25,8 +25,12 @@
#include "kselftest_harness.h"
#include "vm_util.h"
+#define DEV_MEM_NPAGES 2
+
static sigjmp_buf sigjmp_buf_env;
static char *file = "/dev/mem";
+static off_t file_offset;
+static int fd;
static void signal_handler(int sig)
{
@@ -35,18 +39,15 @@ static void signal_handler(int sig)
static int test_read_access(char *addr, size_t size, size_t pagesize)
{
- size_t offs;
int ret;
if (signal(SIGSEGV, signal_handler) == SIG_ERR)
return -EINVAL;
ret = sigsetjmp(sigjmp_buf_env, 1);
- if (!ret) {
- for (offs = 0; offs < size; offs += pagesize)
- /* Force a read that the compiler cannot optimize out. */
- *((volatile char *)(addr + offs));
- }
+ if (!ret)
+ force_read_pages(addr, size/pagesize, pagesize);
+
if (signal(SIGSEGV, SIG_DFL) == SIG_ERR)
return -EINVAL;
@@ -91,7 +92,7 @@ static int find_ram_target(off_t *offset,
break;
/* We need two pages. */
- if (end > start + 2 * pagesize) {
+ if (end > start + DEV_MEM_NPAGES * pagesize) {
fclose(file);
*offset = start;
return 0;
@@ -100,11 +101,48 @@ static int find_ram_target(off_t *offset,
return -ENOENT;
}
+static void pfnmap_init(void)
+{
+ size_t pagesize = getpagesize();
+ size_t size = DEV_MEM_NPAGES * pagesize;
+ void *addr;
+
+ if (strncmp(file, "/dev/mem", strlen("/dev/mem")) == 0) {
+ int err = find_ram_target(&file_offset, pagesize);
+
+ if (err)
+ ksft_exit_skip("Cannot find ram target in '/proc/iomem': %s\n",
+ strerror(-err));
+ } else {
+ file_offset = 0;
+ }
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ ksft_exit_skip("Cannot open '%s': %s\n", file, strerror(errno));
+
+ /*
+ * Make sure we can map the file, and perform some basic checks; skip
+ * the whole suite if anything goes wrong.
+ * A fresh mapping is then created for every test case by
+ * FIXTURE_SETUP(pfnmap).
+ */
+ addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, file_offset);
+ if (addr == MAP_FAILED)
+ ksft_exit_skip("Cannot mmap '%s': %s\n", file, strerror(errno));
+
+ if (!check_vmflag_pfnmap(addr))
+ ksft_exit_skip("Invalid file: '%s'. Not pfnmap'ed\n", file);
+
+ if (test_read_access(addr, size, pagesize))
+ ksft_exit_skip("Cannot read-access mmap'ed '%s'\n", file);
+
+ munmap(addr, size);
+}
+
FIXTURE(pfnmap)
{
- off_t offset;
size_t pagesize;
- int dev_mem_fd;
char *addr1;
size_t size1;
char *addr2;
@@ -115,31 +153,10 @@ FIXTURE_SETUP(pfnmap)
{
self->pagesize = getpagesize();
- if (strncmp(file, "/dev/mem", strlen("/dev/mem")) == 0) {
- /* We'll require two physical pages throughout our tests ... */
- if (find_ram_target(&self->offset, self->pagesize))
- SKIP(return,
- "Cannot find ram target in '/proc/iomem'\n");
- } else {
- self->offset = 0;
- }
-
- self->dev_mem_fd = open(file, O_RDONLY);
- if (self->dev_mem_fd < 0)
- SKIP(return, "Cannot open '%s'\n", file);
-
- self->size1 = self->pagesize * 2;
+ self->size1 = DEV_MEM_NPAGES * self->pagesize;
self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED,
- self->dev_mem_fd, self->offset);
- if (self->addr1 == MAP_FAILED)
- SKIP(return, "Cannot mmap '%s'\n", file);
-
- if (!check_vmflag_pfnmap(self->addr1))
- SKIP(return, "Invalid file: '%s'. Not pfnmap'ed\n", file);
-
- /* ... and want to be able to read from them. */
- if (test_read_access(self->addr1, self->size1, self->pagesize))
- SKIP(return, "Cannot read-access mmap'ed '%s'\n", file);
+ fd, file_offset);
+ ASSERT_NE(self->addr1, MAP_FAILED);
self->size2 = 0;
self->addr2 = MAP_FAILED;
@@ -151,8 +168,6 @@ FIXTURE_TEARDOWN(pfnmap)
munmap(self->addr2, self->size2);
if (self->addr1 != MAP_FAILED)
munmap(self->addr1, self->size1);
- if (self->dev_mem_fd >= 0)
- close(self->dev_mem_fd);
}
TEST_F(pfnmap, madvise_disallowed)
@@ -192,7 +207,7 @@ TEST_F(pfnmap, munmap_split)
*/
self->size2 = self->pagesize;
self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED,
- self->dev_mem_fd, self->offset);
+ fd, file_offset);
ASSERT_NE(self->addr2, MAP_FAILED);
}
@@ -262,8 +277,12 @@ int main(int argc, char **argv)
if (strcmp(argv[i], "--") == 0) {
if (i + 1 < argc && strlen(argv[i + 1]) > 0)
file = argv[i + 1];
- return test_harness_run(i, argv);
+ argc = i;
+ break;
}
}
+
+ pfnmap_init();
+
return test_harness_run(argc, argv);
}
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index d9173f2312b7..afdcfd0d7cef 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -2,6 +2,10 @@
# SPDX-License-Identifier: GPL-2.0
# Please run as root
+# IMPORTANT: If you add a new test CATEGORY please add a simple wrapper
+# script so kunit knows to run it, and add it to the list below.
+# If you do not YOUR TESTS WILL NOT RUN IN THE CI.
+
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -87,6 +91,8 @@ separated by spaces:
test VMA merge cases behave as expected
- rmap
test rmap behaves as expected
+- memory-failure
+ test memory-failure behaves as expected
example: ./run_vmtests.sh -t "hmm mmap ksm"
EOF
@@ -399,28 +405,8 @@ CATEGORY="hugetlb" run_test ./hugetlb-read-hwpoison
fi
if [ $VADDR64 -ne 0 ]; then
-
- # set overcommit_policy as OVERCOMMIT_ALWAYS so that kernel
- # allows high virtual address allocation requests independent
- # of platform's physical memory.
-
- if [ -x ./virtual_address_range ]; then
- prev_policy=$(cat /proc/sys/vm/overcommit_memory)
- echo 1 > /proc/sys/vm/overcommit_memory
- CATEGORY="hugevm" run_test ./virtual_address_range
- echo $prev_policy > /proc/sys/vm/overcommit_memory
- fi
-
# va high address boundary switch test
- ARCH_ARM64="arm64"
- prev_nr_hugepages=$(cat /proc/sys/vm/nr_hugepages)
- if [ "$ARCH" == "$ARCH_ARM64" ]; then
- echo 6 > /proc/sys/vm/nr_hugepages
- fi
CATEGORY="hugevm" run_test bash ./va_high_addr_switch.sh
- if [ "$ARCH" == "$ARCH_ARM64" ]; then
- echo $prev_nr_hugepages > /proc/sys/vm/nr_hugepages
- fi
fi # VADDR64
# vmalloc stability smoke test
@@ -543,6 +529,25 @@ CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned
CATEGORY="rmap" run_test ./rmap
+# Try to load hwpoison_inject if not present.
+HWPOISON_DIR=/sys/kernel/debug/hwpoison/
+if [ ! -d "$HWPOISON_DIR" ]; then
+ if ! modprobe -q -R hwpoison_inject; then
+ echo "Module hwpoison_inject not found, skipping..."
+ else
+ modprobe hwpoison_inject > /dev/null 2>&1
+ LOADED_MOD=1
+ fi
+fi
+
+if [ -d "$HWPOISON_DIR" ]; then
+ CATEGORY="memory-failure" run_test ./memory-failure
+fi
+
+if [ -n "${LOADED_MOD}" ]; then
+ modprobe -r hwpoison_inject > /dev/null 2>&1
+fi
+
if [ "${HAVE_HUGEPAGES}" = 1 ]; then
echo "$orig_nr_hugepgs" > /proc/sys/vm/nr_hugepages
fi
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index 40799f3f0213..e0167111bdd1 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -652,11 +652,7 @@ static int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size,
}
madvise(*addr, fd_size, MADV_HUGEPAGE);
- for (size_t i = 0; i < fd_size; i++) {
- char *addr2 = *addr + i;
-
- FORCE_READ(*addr2);
- }
+ force_read_pages(*addr, fd_size / pmd_pagesize, pmd_pagesize);
if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
diff --git a/tools/testing/selftests/mm/test_vmalloc.sh b/tools/testing/selftests/mm/test_vmalloc.sh
index d39096723fca..b23d705bf570 100755
--- a/tools/testing/selftests/mm/test_vmalloc.sh
+++ b/tools/testing/selftests/mm/test_vmalloc.sh
@@ -13,6 +13,9 @@ TEST_NAME="vmalloc"
DRIVER="test_${TEST_NAME}"
NUM_CPUS=`grep -c ^processor /proc/cpuinfo`
+# Default number of times we allocate percpu objects:
+NR_PCPU_OBJECTS=35000
+
# 1 if fails
exitcode=1
@@ -27,6 +30,8 @@ PERF_PARAM="sequential_test_order=1 test_repeat_count=3"
SMOKE_PARAM="test_loop_count=10000 test_repeat_count=10"
STRESS_PARAM="nr_threads=$NUM_CPUS test_repeat_count=20"
+PCPU_OBJ_PARAM="nr_pcpu_objects=$NR_PCPU_OBJECTS"
+
check_test_requirements()
{
uid=$(id -u)
@@ -47,12 +52,30 @@ check_test_requirements()
fi
}
+check_memory_requirement()
+{
+ # The pcpu_alloc_test allocates nr_pcpu_objects per cpu. If the
+ # PAGE_SIZE is on the larger side it is easier to set a value
+ # that can cause oom events during testing. Since we are
+ # testing the functionality of vmalloc and not the oom-killer,
+ # calculate what is 90% of available memory and divide it by
+ # the number of online CPUs.
+ pages=$(($(getconf _AVPHYS_PAGES) * 90 / 100 / $NUM_CPUS))
+
+ if (($pages < $NR_PCPU_OBJECTS)); then
+ echo "Updated nr_pcpu_objects to 90% of available memory."
+ echo "nr_pcpu_objects is now set to: $pages."
+ PCPU_OBJ_PARAM="nr_pcpu_objects=$pages"
+ fi
+}
+
run_performance_check()
{
echo "Run performance tests to evaluate how fast vmalloc allocation is."
echo "It runs all test cases on one single CPU with sequential order."
- modprobe $DRIVER $PERF_PARAM > /dev/null 2>&1
+ check_memory_requirement
+ modprobe $DRIVER $PERF_PARAM $PCPU_OBJ_PARAM > /dev/null 2>&1
echo "Done."
echo "Check the kernel message buffer to see the summary."
}
@@ -63,7 +86,8 @@ run_stability_check()
echo "available test cases are run by NUM_CPUS workers simultaneously."
echo "It will take time, so be patient."
- modprobe $DRIVER $STRESS_PARAM > /dev/null 2>&1
+ check_memory_requirement
+ modprobe $DRIVER $STRESS_PARAM $PCPU_OBJ_PARAM > /dev/null 2>&1
echo "Done."
echo "Check the kernel ring buffer to see the summary."
}
@@ -74,7 +98,8 @@ run_smoke_check()
echo "Please check $0 output how it can be used"
echo "for deep performance analysis as well as stress testing."
- modprobe $DRIVER $SMOKE_PARAM > /dev/null 2>&1
+ check_memory_requirement
+ modprobe $DRIVER $SMOKE_PARAM $PCPU_OBJ_PARAM > /dev/null 2>&1
echo "Done."
echo "Check the kernel ring buffer to see the summary."
}
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index f4807242c5b2..6f5e404a446c 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -1317,7 +1317,7 @@ static thread_state thread_state_get(pid_t tid)
p = strstr(tmp, header);
if (p) {
/* For example, "State:\tD (disk sleep)" */
- c = *(p + sizeof(header) - 1);
+ c = *(p + strlen(header));
return c == 'D' ?
THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN;
}
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c
index 02f290a69132..51401e081b20 100644
--- a/tools/testing/selftests/mm/va_high_addr_switch.c
+++ b/tools/testing/selftests/mm/va_high_addr_switch.c
@@ -322,7 +322,7 @@ static int supported_arch(void)
int main(int argc, char **argv)
{
- int ret;
+ int ret, hugetlb_ret = KSFT_PASS;
if (!supported_arch())
return KSFT_SKIP;
@@ -331,6 +331,10 @@ int main(int argc, char **argv)
ret = run_test(testcases, sz_testcases);
if (argc == 2 && !strcmp(argv[1], "--run-hugetlb"))
- ret = run_test(hugetlb_testcases, sz_hugetlb_testcases);
- return ret;
+ hugetlb_ret = run_test(hugetlb_testcases, sz_hugetlb_testcases);
+
+ if (ret == KSFT_PASS && hugetlb_ret == KSFT_PASS)
+ return KSFT_PASS;
+ else
+ return KSFT_FAIL;
}
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
index a7d4b02b21dd..9492c2d72634 100755
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -61,9 +61,9 @@ check_supported_ppc64()
check_test_requirements()
{
- # The test supports x86_64 and powerpc64. We currently have no useful
- # eligibility check for powerpc64, and the test itself will reject other
- # architectures.
+ # The test supports x86_64, powerpc64 and arm64. There's check for arm64
+ # in va_high_addr_switch.c. The test itself will reject other architectures.
+
case `uname -m` in
"x86_64")
check_supported_x86_64
@@ -111,7 +111,9 @@ setup_nr_hugepages()
check_test_requirements
save_nr_hugepages
-# 4 keep_mapped pages, and one for tmp usage
-setup_nr_hugepages 5
+# The HugeTLB tests require 6 pages
+setup_nr_hugepages 6
./va_high_addr_switch --run-hugetlb
+retcode=$?
restore_nr_hugepages
+exit $retcode
diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
deleted file mode 100644
index 4f0923825ed7..000000000000
--- a/tools/testing/selftests/mm/virtual_address_range.c
+++ /dev/null
@@ -1,260 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright 2017, Anshuman Khandual, IBM Corp.
- *
- * Works on architectures which support 128TB virtual
- * address range and beyond.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/prctl.h>
-#include <sys/mman.h>
-#include <sys/time.h>
-#include <fcntl.h>
-
-#include "vm_util.h"
-#include "kselftest.h"
-
-/*
- * Maximum address range mapped with a single mmap()
- * call is little bit more than 1GB. Hence 1GB is
- * chosen as the single chunk size for address space
- * mapping.
- */
-
-#define SZ_1GB (1024 * 1024 * 1024UL)
-#define SZ_1TB (1024 * 1024 * 1024 * 1024UL)
-
-#define MAP_CHUNK_SIZE SZ_1GB
-
-/*
- * Address space till 128TB is mapped without any hint
- * and is enabled by default. Address space beyond 128TB
- * till 512TB is obtained by passing hint address as the
- * first argument into mmap() system call.
- *
- * The process heap address space is divided into two
- * different areas one below 128TB and one above 128TB
- * till it reaches 512TB. One with size 128TB and the
- * other being 384TB.
- *
- * On Arm64 the address space is 256TB and support for
- * high mappings up to 4PB virtual address space has
- * been added.
- *
- * On PowerPC64, the address space up to 128TB can be
- * mapped without a hint. Addresses beyond 128TB, up to
- * 4PB, can be mapped with a hint.
- *
- */
-
-#define NR_CHUNKS_128TB ((128 * SZ_1TB) / MAP_CHUNK_SIZE) /* Number of chunks for 128TB */
-#define NR_CHUNKS_256TB (NR_CHUNKS_128TB * 2UL)
-#define NR_CHUNKS_384TB (NR_CHUNKS_128TB * 3UL)
-#define NR_CHUNKS_3840TB (NR_CHUNKS_128TB * 30UL)
-#define NR_CHUNKS_3968TB (NR_CHUNKS_128TB * 31UL)
-
-#define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */
-#define ADDR_MARK_256TB (1UL << 48) /* First address beyond 256TB */
-
-#ifdef __aarch64__
-#define HIGH_ADDR_MARK ADDR_MARK_256TB
-#define HIGH_ADDR_SHIFT 49
-#define NR_CHUNKS_LOW NR_CHUNKS_256TB
-#define NR_CHUNKS_HIGH NR_CHUNKS_3840TB
-#elif defined(__PPC64__)
-#define HIGH_ADDR_MARK ADDR_MARK_128TB
-#define HIGH_ADDR_SHIFT 48
-#define NR_CHUNKS_LOW NR_CHUNKS_128TB
-#define NR_CHUNKS_HIGH NR_CHUNKS_3968TB
-#else
-#define HIGH_ADDR_MARK ADDR_MARK_128TB
-#define HIGH_ADDR_SHIFT 48
-#define NR_CHUNKS_LOW NR_CHUNKS_128TB
-#define NR_CHUNKS_HIGH NR_CHUNKS_384TB
-#endif
-
-static char *hint_addr(void)
-{
- int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT);
-
- return (char *) (1UL << bits);
-}
-
-static void validate_addr(char *ptr, int high_addr)
-{
- unsigned long addr = (unsigned long) ptr;
-
- if (high_addr) {
- if (addr < HIGH_ADDR_MARK)
- ksft_exit_fail_msg("Bad address %lx\n", addr);
- return;
- }
-
- if (addr > HIGH_ADDR_MARK)
- ksft_exit_fail_msg("Bad address %lx\n", addr);
-}
-
-static void mark_range(char *ptr, size_t size)
-{
- if (prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ptr, size, "virtual_address_range") == -1) {
- if (errno == EINVAL) {
- /* Depends on CONFIG_ANON_VMA_NAME */
- ksft_test_result_skip("prctl(PR_SET_VMA_ANON_NAME) not supported\n");
- ksft_finished();
- } else {
- ksft_exit_fail_perror("prctl(PR_SET_VMA_ANON_NAME) failed\n");
- }
- }
-}
-
-static int is_marked_vma(const char *vma_name)
-{
- return vma_name && !strcmp(vma_name, "[anon:virtual_address_range]\n");
-}
-
-static int validate_lower_address_hint(void)
-{
- char *ptr;
-
- ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ |
- PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-
- if (ptr == MAP_FAILED)
- return 0;
-
- return 1;
-}
-
-static int validate_complete_va_space(void)
-{
- unsigned long start_addr, end_addr, prev_end_addr;
- char line[400];
- char prot[6];
- FILE *file;
- int fd;
-
- fd = open("va_dump", O_CREAT | O_WRONLY, 0600);
- unlink("va_dump");
- if (fd < 0) {
- ksft_test_result_skip("cannot create or open dump file\n");
- ksft_finished();
- }
-
- file = fopen("/proc/self/maps", "r");
- if (file == NULL)
- ksft_exit_fail_msg("cannot open /proc/self/maps\n");
-
- prev_end_addr = 0;
- while (fgets(line, sizeof(line), file)) {
- const char *vma_name = NULL;
- int vma_name_start = 0;
- unsigned long hop;
-
- if (sscanf(line, "%lx-%lx %4s %*s %*s %*s %n",
- &start_addr, &end_addr, prot, &vma_name_start) != 3)
- ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
-
- if (vma_name_start)
- vma_name = line + vma_name_start;
-
- /* end of userspace mappings; ignore vsyscall mapping */
- if (start_addr & (1UL << 63))
- return 0;
-
- /* /proc/self/maps must have gaps less than MAP_CHUNK_SIZE */
- if (start_addr - prev_end_addr >= MAP_CHUNK_SIZE)
- return 1;
-
- prev_end_addr = end_addr;
-
- if (prot[0] != 'r')
- continue;
-
- if (check_vmflag_io((void *)start_addr))
- continue;
-
- /*
- * Confirm whether MAP_CHUNK_SIZE chunk can be found or not.
- * If write succeeds, no need to check MAP_CHUNK_SIZE - 1
- * addresses after that. If the address was not held by this
- * process, write would fail with errno set to EFAULT.
- * Anyways, if write returns anything apart from 1, exit the
- * program since that would mean a bug in /proc/self/maps.
- */
- hop = 0;
- while (start_addr + hop < end_addr) {
- if (write(fd, (void *)(start_addr + hop), 1) != 1)
- return 1;
- lseek(fd, 0, SEEK_SET);
-
- if (is_marked_vma(vma_name))
- munmap((char *)(start_addr + hop), MAP_CHUNK_SIZE);
-
- hop += MAP_CHUNK_SIZE;
- }
- }
- return 0;
-}
-
-int main(int argc, char *argv[])
-{
- char *ptr[NR_CHUNKS_LOW];
- char **hptr;
- char *hint;
- unsigned long i, lchunks, hchunks;
-
- ksft_print_header();
- ksft_set_plan(1);
-
- for (i = 0; i < NR_CHUNKS_LOW; i++) {
- ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-
- if (ptr[i] == MAP_FAILED) {
- if (validate_lower_address_hint())
- ksft_exit_fail_msg("mmap unexpectedly succeeded with hint\n");
- break;
- }
-
- mark_range(ptr[i], MAP_CHUNK_SIZE);
- validate_addr(ptr[i], 0);
- }
- lchunks = i;
- hptr = (char **) calloc(NR_CHUNKS_HIGH, sizeof(char *));
- if (hptr == NULL) {
- ksft_test_result_skip("Memory constraint not fulfilled\n");
- ksft_finished();
- }
-
- for (i = 0; i < NR_CHUNKS_HIGH; i++) {
- hint = hint_addr();
- hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-
- if (hptr[i] == MAP_FAILED)
- break;
-
- mark_range(hptr[i], MAP_CHUNK_SIZE);
- validate_addr(hptr[i], 1);
- }
- hchunks = i;
- if (validate_complete_va_space()) {
- ksft_test_result_fail("BUG in mmap() or /proc/self/maps\n");
- ksft_finished();
- }
-
- for (i = 0; i < lchunks; i++)
- munmap(ptr[i], MAP_CHUNK_SIZE);
-
- for (i = 0; i < hchunks; i++)
- munmap(hptr[i], MAP_CHUNK_SIZE);
-
- free(hptr);
-
- ksft_test_result_pass("Test\n");
- ksft_finished();
-}
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index d954bf91afd5..a6d4ff7dfdc0 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -723,3 +723,44 @@ int ksm_stop(void)
close(ksm_fd);
return ret == 1 ? 0 : -errno;
}
+
+int get_hardware_corrupted_size(unsigned long *val)
+{
+ unsigned long size;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+ int ret = -1;
+
+ if (!f)
+ return ret;
+
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "HardwareCorrupted: %12lu kB", &size) == 1) {
+ *val = size;
+ ret = 0;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(f);
+ return ret;
+}
+
+int unpoison_memory(unsigned long pfn)
+{
+ int unpoison_fd, len;
+ char buf[32];
+ ssize_t ret;
+
+ unpoison_fd = open("/sys/kernel/debug/hwpoison/unpoison-pfn", O_WRONLY);
+ if (unpoison_fd < 0)
+ return -errno;
+
+ len = sprintf(buf, "0x%lx\n", pfn);
+ ret = write(unpoison_fd, buf, len);
+ close(unpoison_fd);
+
+ return ret > 0 ? 0 : -errno;
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index 6ad32b1830f1..e9c4e24769c1 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -20,6 +20,7 @@
#define KPF_COMPOUND_HEAD BIT_ULL(15)
#define KPF_COMPOUND_TAIL BIT_ULL(16)
+#define KPF_HWPOISON BIT_ULL(19)
#define KPF_THP BIT_ULL(22)
/*
* Ignore the checkpatch warning, we must read from x but don't want to do
@@ -54,6 +55,13 @@ static inline unsigned int pshift(void)
return __page_shift;
}
+static inline void force_read_pages(char *addr, unsigned int nr_pages,
+ size_t pagesize)
+{
+ for (unsigned int i = 0; i < nr_pages; i++)
+ FORCE_READ(addr[i * pagesize]);
+}
+
bool detect_huge_zeropage(void);
/*
@@ -147,6 +155,8 @@ long ksm_get_full_scans(void);
int ksm_use_zero_pages(void);
int ksm_start(void);
int ksm_stop(void);
+int get_hardware_corrupted_size(unsigned long *val);
+int unpoison_memory(unsigned long pfn);
/*
* On ppc64 this will only work with radix 2M hugepage size
diff --git a/tools/testing/selftests/mm/write_to_hugetlbfs.c b/tools/testing/selftests/mm/write_to_hugetlbfs.c
index 34c91f7e6128..ecb5f7619960 100644
--- a/tools/testing/selftests/mm/write_to_hugetlbfs.c
+++ b/tools/testing/selftests/mm/write_to_hugetlbfs.c
@@ -68,7 +68,7 @@ int main(int argc, char **argv)
int key = 0;
int *ptr = NULL;
int c = 0;
- int size = 0;
+ size_t size = 0;
char path[256] = "";
enum method method = MAX_METHOD;
int want_sleep = 0, private = 0;
@@ -86,7 +86,10 @@ int main(int argc, char **argv)
while ((c = getopt(argc, argv, "s:p:m:owlrn")) != -1) {
switch (c) {
case 's':
- size = atoi(optarg);
+ if (sscanf(optarg, "%zu", &size) != 1) {
+ perror("Invalid -s.");
+ exit_usage();
+ }
break;
case 'p':
strncpy(path, optarg, sizeof(path) - 1);
@@ -131,7 +134,7 @@ int main(int argc, char **argv)
}
if (size != 0) {
- printf("Writing this size: %d\n", size);
+ printf("Writing this size: %zu\n", size);
} else {
errno = EINVAL;
perror("size not found");
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 6930fe926c58..97ad4d551d44 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -7,6 +7,7 @@ cmsg_sender
epoll_busy_poll
fin_ack_lat
hwtstamp_config
+icmp_rfc4884
io_uring_zerocopy_tx
ioam6_parser
ip_defrag
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index b66ba04f19d9..afdea6d95bde 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -22,6 +22,7 @@ TEST_PROGS := \
cmsg_so_mark.sh \
cmsg_so_priority.sh \
cmsg_time.sh \
+ double_udp_encap.sh \
drop_monitor_tests.sh \
fcnal-ipv4.sh \
fcnal-ipv6.sh \
@@ -48,6 +49,7 @@ TEST_PROGS := \
ipv6_flowlabel.sh \
ipv6_force_forwarding.sh \
ipv6_route_update_soft_lockup.sh \
+ ipvtap_test.sh \
l2_tos_ttl_inherit.sh \
l2tp.sh \
link_netns.py \
@@ -166,6 +168,7 @@ TEST_GEN_PROGS := \
bind_timewait \
bind_wildcard \
epoll_busy_poll \
+ icmp_rfc4884 \
ipv6_fragmentation \
proc_net_pktgen \
reuseaddr_conflict \
@@ -180,7 +183,6 @@ TEST_GEN_PROGS := \
tap \
tcp_port_share \
tls \
- tun \
# end of TEST_GEN_PROGS
TEST_FILES := \
@@ -192,7 +194,11 @@ TEST_FILES := \
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller
-YNL_GEN_PROGS := netlink-dumps
+YNL_GEN_PROGS := \
+ netlink-dumps \
+ tun \
+# end of YNL_GEN_PROGS
+
TEST_GEN_FILES += $(YNL_GEN_FILES)
TEST_GEN_PROGS += $(YNL_GEN_PROGS)
@@ -203,7 +209,14 @@ TEST_INCLUDES := forwarding/lib.sh
include ../lib.mk
# YNL build
-YNL_GENS := netdev
+YNL_GENS := \
+ netdev \
+ rt-addr \
+ rt-link \
+ rt-neigh \
+ rt-route \
+# end of YNL_GENS
+
include ynl.mk
$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 3cd677b72072..4c0375e28bbe 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,9 @@
-CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end
+top_srcdir := ../../../../..
+include $(top_srcdir)/scripts/Makefile.compiler
+
+cc-option = $(call __cc-option, $(CC),,$(1),$(2))
+
+CFLAGS += $(KHDR_INCLUDES) -Wall $(call cc-option,-Wflex-array-member-not-at-end)
TEST_GEN_PROGS := \
diag_uid \
diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
index 3ef209cacb8e..663744305e52 100755
--- a/tools/testing/selftests/net/amt.sh
+++ b/tools/testing/selftests/net/amt.sh
@@ -73,6 +73,8 @@
# +------------------------+
#==============================================================================
+source lib.sh
+
readonly LISTENER=$(mktemp -u listener-XXXXXXXX)
readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX)
readonly RELAY=$(mktemp -u relay-XXXXXXXX)
@@ -246,14 +248,15 @@ test_ipv6_forward()
send_mcast4()
{
- sleep 2
+ sleep 5
+ wait_local_port_listen ${LISTENER} 4000 udp
ip netns exec "${SOURCE}" bash -c \
'printf "%s %128s" 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' &
}
send_mcast6()
{
- sleep 2
+ wait_local_port_listen ${LISTENER} 6000 udp
ip netns exec "${SOURCE}" bash -c \
'printf "%s %128s" 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' &
}
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 1e1f253118f5..cd49b7dfe216 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -48,6 +48,7 @@ CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IPV6_SIT=y
CONFIG_IPV6_VTI=y
CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
CONFIG_KALLSYMS=y
CONFIG_L2TP=m
CONFIG_L2TP_ETH=m
@@ -76,6 +77,7 @@ CONFIG_NET_DROP_MONITOR=m
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_POLICY=m
CONFIG_NETFILTER_XT_NAT=m
@@ -116,6 +118,7 @@ CONFIG_PROC_SYSCTL=y
CONFIG_PSAMPLE=m
CONFIG_RPS=y
CONFIG_SYSFS=y
+CONFIG_TAP=m
CONFIG_TCP_MD5SIG=y
CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_TEST_BPF=m
diff --git a/tools/testing/selftests/net/double_udp_encap.sh b/tools/testing/selftests/net/double_udp_encap.sh
new file mode 100755
index 000000000000..9aaf97cdf141
--- /dev/null
+++ b/tools/testing/selftests/net/double_udp_encap.sh
@@ -0,0 +1,393 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+# shellcheck disable=SC2155 # prefer RO variable over return value from cmd
+readonly CLI="$(dirname "$(readlink -f "$0")")/../../../net/ynl/pyynl/cli.py"
+
+readonly SRC=1
+readonly DST=2
+
+readonly NET_V4=192.168.1.
+readonly NET_V6=2001:db8::
+readonly OL1_NET_V4=172.16.1.
+readonly OL1_NET_V6=2001:db8:1::
+readonly OL2_NET_V4=172.16.2.
+readonly OL2_NET_V6=2001:db8:2::
+
+trap cleanup_all_ns EXIT
+
+# shellcheck disable=SC2329 # can't figure out usage trough a variable
+is_ipv6() {
+ if [[ $1 =~ .*:.* ]]; then
+ return 0
+ fi
+ return 1
+}
+
+# shellcheck disable=SC2329 # can't figure out usage trough a variable
+create_gnv_endpoint() {
+ local -r netns=$1
+ local -r bm_rem_addr=$2
+ local -r gnv_dev=$3
+ local -r gnv_id=$4
+ local opts=$5
+ local gnv_json
+ local rem
+
+ if is_ipv6 "$bm_rem_addr"; then
+ rem=remote6
+ else
+ rem=remote
+ fi
+
+ # add ynl opt separator, if needed
+ [ -n "$opts" ] && opts=", $opts"
+
+ gnv_json="{ \"id\": $gnv_id, \"$rem\": \"$bm_rem_addr\"$opts }"
+ ip netns exec "$netns" "$CLI" --family rt-link --create --excl \
+ --do newlink --json "{\"ifname\": \"$gnv_dev\",
+ \"linkinfo\": {\"kind\":\"geneve\",
+ \"data\": $gnv_json } }" > /dev/null
+ ip -n "$netns" link set dev "$gnv_dev" up
+}
+
+# shellcheck disable=SC2329 # can't figure out usage trough a variable
+create_vxlan_endpoint() {
+ local -r netns=$1
+ local -r bm_rem_addr=$2
+ local -r vxlan_dev=$3
+ local -r vxlan_id=$4
+ local -r opts_str=$5
+ local oldifs
+ local -a opts
+ local opt
+
+ # convert the arguments from yaml format
+ oldifs=$IFS
+ IFS=','
+ for opt in $opts_str; do
+ local pattern='"port":'
+
+ [ -n "$opt" ] || continue
+
+ opts+=("${opt/$pattern*/dstport}" "${opt/$pattern/}")
+ done
+ IFS=$oldifs
+ [ ${#opts[@]} -gt 0 ] || opts+=("dstport" "4789")
+
+ ip -n "$netns" link add "$vxlan_dev" type vxlan id "$vxlan_id" \
+ remote "$bm_rem_addr" "${opts[@]}"
+ ip -n "$netns" link set dev "$vxlan_dev" up
+}
+
+create_ns() {
+ local nested_opt='"port":6082'
+ local create_endpoint
+ local options="$1"
+ local feature
+ local dev
+ local id
+ local ns
+
+ RET=0
+
+ # +-------------+ +-------------+
+ # | NS_SRC | | NS_NST_DST |
+ # | | | |
+ # | gnv_nst1 | | gnv_nst2 |
+ # | + | | + |
+ # | | | | | |
+ # | + | | + |
+ # | gnv1 | | gnv2 |
+ # | + | | + |
+ # | | | | | |
+ # | + veth1 +--------+ veth2 + |
+ # | | | |
+ # +-------------+ +-------------+
+
+ setup_ns NS_SRC NS_DST
+
+ # concatenate caller provided options and default one
+ [ -n "$2" ] && nested_opt="$nested_opt,$2"
+
+ ip link add name "veth$SRC" netns "$NS_SRC" type veth \
+ peer name "veth$DST" netns "$NS_DST"
+ case "$ENCAP" in
+ vxlan)
+ create_endpoint=create_vxlan_endpoint
+ dev=vx
+ ;;
+ geneve)
+ create_endpoint=create_gnv_endpoint
+ dev=gnv
+ ;;
+ esac
+
+ id=1
+ for ns in "${NS_LIST[@]}"; do
+ ip -n "$ns" link set dev "veth$id" up
+
+ # ensure the sender can do large write just after 3whs
+ ip netns exec "$ns" \
+ sysctl -qw net.ipv4.tcp_wmem="4096 4194304 4194304"
+
+ # note that 3 - $SRC == $DST and 3 - $DST == $SRC
+ if [ $FAMILY = "4" ]; then
+ ip -n "$ns" addr add dev "veth$id" "$NET_V4$id/24"
+ $create_endpoint "$ns" "$NET_V4$((3 - id))" \
+ "$dev$id" 4 "$options"
+ ip -n "$ns" addr add dev "$dev$id" "$OL1_NET_V4$id/24"
+
+ # nested tunnel devices
+ # pmtu can't be propagated to upper layer devices;
+ # need manual adjust
+ $create_endpoint "$ns" "$OL1_NET_V4$((3 - id))" \
+ "$dev"_nst"$id" 40 "$nested_opt"
+ ip -n "$ns" addr add dev "$dev"_nst"$id" \
+ "$OL2_NET_V4$id/24"
+ ip -n "$ns" link set dev "$dev"_nst"$id" mtu 1392
+ else
+ ip -n "$ns" addr add dev "veth$id" "$NET_V6$id/64" \
+ nodad
+ $create_endpoint "$ns" "$NET_V6$((3 - id))" \
+ "$dev"6"$id" 6 "$options"
+ ip -n "$ns" addr add dev "$dev"6"$id" \
+ "$OL1_NET_V6$id/64" nodad
+
+ $create_endpoint "$ns" "$OL1_NET_V6$((3 - id))" \
+ "$dev"6_nst"$id" 60 "$nested_opt"
+ ip -n "$ns" addr add dev "$dev"6_nst"$id" \
+ "$OL2_NET_V6$id/64" nodad
+ ip -n "$ns" link set dev "$dev"6_nst"$id" mtu 1352
+ fi
+ id=$((id+1))
+ done
+
+ # enable GRO heuristic on the veth peer and ensure UDP L4 over tunnel is
+ # actually segmented
+ for feature in tso tx-udp_tnl-segmentation; do
+ ip netns exec "$NS_SRC" ethtool -K "veth$SRC" \
+ "$feature" off 2>/dev/null
+ done
+}
+
+create_ns_gso() {
+ local dev
+
+ create_ns "$@"
+ if [ "$ENCAP" = "geneve" ]; then
+ dev=gnv
+ else
+ dev=vx
+ fi
+ [ "$FAMILY" = "6" ] && dev="$dev"6
+ ip netns exec "$NS_SRC" ethtool -K "$dev$SRC" \
+ tx-gso-partial on \
+ tx-udp_tnl-segmentation on \
+ tx-udp_tnl-csum-segmentation on
+}
+
+create_ns_gso_gro() {
+ create_ns_gso "$@"
+ ip netns exec "$NS_DST" ethtool -K "veth$DST" gro on
+ ip netns exec "$NS_SRC" ethtool -K "veth$SRC" tx off >/dev/null 2>&1
+}
+
+run_test() {
+ local -r dst=$NET$DST
+ local -r msg=$1
+ local -r total_size=$2
+ local -r encappkts=$3
+ local inner_proto_offset=0
+ local inner_maclen=14
+ local rx_family="-4"
+ local ipt=iptables
+ local bpf_filter
+ local -a rx_args
+ local wire_pkts
+ local rcvpkts
+ local encl=8
+ local dport
+ local pkts
+ local snd
+
+ if [ $FAMILY = "6" ]; then
+ ipt=ip6tables
+ else
+ # rx program does not support '-6' and implies ipv6 usage by
+ # default
+ rx_args=("$rx_family")
+ fi
+
+ # The received can only check fixed size packet
+ pkts=$((total_size / GSO_SIZE))
+ if [ -n "$4" ]; then
+ wire_pkts=$4
+ elif [ $((total_size % GSO_SIZE)) -eq 0 ]; then
+ wire_pkts=1
+ rx_args+=("-l" "$GSO_SIZE")
+ else
+ wire_pkts=2
+ pkts=$((pkts + 1))
+ fi
+
+ if [ "$ENCAP" = "geneve" ]; then
+ dport=6081
+ else
+ dport=4789
+ fi
+
+ # Either:
+ # - IPv4, nested tunnel carries UDP over IPv4, with dport 6082,
+ # innermost is TCP over IPv4 on port 8000
+ # - IPv6, nested tunnel carries UDP over IPv6, with dport 6082,
+ # innermost is TCP over IPv6 on port 8000
+ # The nested tunnel port is 6082 and the nested encap len is 8
+ # regardless of the encap type (no geneve opts).
+ # In inherit protocol mode there is no nested mac hdr and the nested
+ # l3 protocol type field belongs to the geneve hdr.
+ [ "$USE_HINT" = true ] && encl=16
+ [ "$INHERIT" = true ] && inner_maclen=0
+ [ "$INHERIT" = true ] && inner_proto_offset=-4
+ local inner=$((inner_maclen+encl))
+ local proto=$((inner_maclen+encl+inner_proto_offset))
+ bpf_filter=$(nfbpf_compile "(ip &&
+ ip[$((40+encl))] == 0x08 && ip[$((41+encl))] == 0x00 &&
+ ip[$((51+encl))] == 0x11 &&
+ ip[$((64+encl))] == 0x17 && ip[$((65+encl))] == 0xc2 &&
+ ip[$((76+proto))] == 0x08 && ip[$((77+proto))] == 0x00 &&
+ ip[$((87+inner))] == 0x6 &&
+ ip[$((100+inner))] == 0x1f && ip[$((101+inner))] == 0x40) ||
+ (ip6 &&
+ ip6[$((60+encl))] == 0x86 && ip6[$((61+encl))] == 0xdd &&
+ ip6[$((68+encl))] == 0x11 &&
+ ip6[$((104+encl))] == 0x17 && ip6[$((105+encl))] == 0xc2 &&
+ ip6[$((116+proto))] == 0x86 && ip6[$((117+proto))] == 0xdd &&
+ ip6[$((124+inner))] == 0x6 &&
+ ip6[$((160+inner))] == 0x1f && ip6[$((161+inner))] == 0x40)")
+
+ # ignore shorts packet, to avoid arp/mld induced noise
+ ip netns exec "$NS_SRC" "$ipt" -A OUTPUT -p udp --dport "$dport" \
+ -m length --length 600:65535 -m bpf --bytecode "$bpf_filter"
+ ip netns exec "$NS_DST" "$ipt" -A INPUT -p udp --dport "$dport" \
+ -m length --length 600:65535 -m bpf --bytecode "$bpf_filter"
+ ip netns exec "$NS_DST" ./udpgso_bench_rx -C 2000 -t -R 100 \
+ -n "$pkts" "${rx_args[@]}" &
+ local pid=$!
+ wait_local_port_listen "$NS_DST" 8000 tcp
+ ip netns exec "$NS_SRC" ./udpgso_bench_tx -"$FAMILY" -t -M 1 \
+ -s "$total_size" -D "$dst"
+ local ret=$?
+ check_err "$ret" "client failure exit code $ret"
+ wait "$pid"
+ ret=$?
+ check_err "$ret" "sever failure exit code $ret"
+
+ snd=$(ip netns exec "$NS_SRC" "$ipt"-save -c |
+ grep "dport $dport" | sed -e 's/\[//' -e 's/:.*//')
+
+ [ "$snd" = "$wire_pkts" ]
+ # shellcheck disable=SC2319 # known false positive
+ check_err $? "send $snd packets on the lowest link, expected $wire_pkts"
+
+ rcvpkts=$(ip netns exec "$NS_DST" "$ipt"-save -c | \
+ grep "dport $dport" | sed -e 's/\[//' -e 's/:.*//')
+
+ [ "$rcvpkts" = "$encappkts" ]
+ check_err $? "received $rcvpkts $ENCAP packets, expected $encappkts"
+ log_test "$msg"
+}
+
+run_tests() {
+ for FAMILY in 4 6; do
+ NET=$OL2_NET_V4
+ GSO_SIZE=1340 # 1392 - 20 - 32
+
+ if [ $FAMILY = 6 ]; then
+ NET=$OL2_NET_V6
+ GSO_SIZE=1280 # 1352 - 40 - 32
+ fi
+
+ echo "IPv$FAMILY"
+
+ unset USE_HINT
+ unset INHERIT
+
+ # "geneve" must be last encap in list, so that later
+ # test cases will run on it
+ for ENCAP in "vxlan" "geneve"; do
+ create_ns
+ run_test "No GSO - $ENCAP" $((GSO_SIZE * 4)) 4 4
+ cleanup_all_ns
+
+ create_ns_gso
+ run_test "GSO without GRO - $ENCAP" $((GSO_SIZE * 4)) \
+ 4 1
+ cleanup_all_ns
+
+ # IPv4 only test
+ [ $FAMILY = "4" ] || continue
+ create_ns_gso
+ ip netns exec "$NS_SRC" \
+ sysctl -qw net.ipv4.ip_no_pmtu_disc=1
+ run_test "GSO disable due to no fixedid - $ENCAP" \
+ $((GSO_SIZE * 4)) 4 4
+ cleanup_all_ns
+ done
+
+ # GRO tests imply/require geneve encap, the only one providing
+ # GRO hints
+ create_ns_gso_gro
+ run_test "double tunnel GRO, no hints" $((GSO_SIZE * 4)) 4
+ cleanup_all_ns
+
+ # hint option is expected for all the following tests in the RX
+ # path
+ USE_HINT=true
+ create_ns_gso_gro \
+ '"gro-hint":1,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' \
+ '"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1'
+ run_test "double tunnel GRO" $((GSO_SIZE * 4)) 1
+ cleanup_all_ns
+
+ create_ns_gso_gro '"gro-hint":1,"udp-csum":1' '"udp-csum":1'
+ run_test "double tunnel GRO - csum complete" $((GSO_SIZE * 4))\
+ 1
+ cleanup_all_ns
+
+ create_ns_gso_gro '"gro-hint":1' \
+ '"udp-csum":0,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1'
+ run_test "double tunnel GRO - no nested csum" \
+ $((GSO_SIZE * 4)) 1
+ cleanup_all_ns
+
+ create_ns_gso_gro \
+ '"gro-hint":1,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' \
+ '"udp-csum":1'
+ run_test "double tunnel GRO - nested csum, outer 0-csum, skip"\
+ $((GSO_SIZE * 4)) 4
+ cleanup_all_ns
+
+ INHERIT=true
+ create_ns_gso_gro '"gro-hint":1,"udp-csum":1' \
+ '"udp-csum":1,"inner-proto-inherit":1'
+ run_test "double tunnel GRO - nested inherit proto" \
+ $((GSO_SIZE * 4)) 1
+ cleanup_all_ns
+ unset INHERIT
+
+ create_ns_gso_gro '"gro-hint":1'
+ run_test "double tunnel GRO - short last pkt" \
+ $((GSO_SIZE * 4 + GSO_SIZE / 2)) 2
+ cleanup_all_ns
+ done
+}
+
+require_command nfbpf_compile
+require_command jq
+
+# tcp retransmisions will break the accounting
+xfail_on_slow run_tests
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 844a580ae74e..890c3f8e51bb 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -2327,6 +2327,13 @@ ipv6_ping_novrf()
log_test_addr ${a} $? 2 "ping local, device bind"
done
+ for a in ${NSA_LO_IP6} ${NSA_LINKIP6}%${NSA_DEV} ${NSA_IP6}
+ do
+ log_start
+ run_cmd ${ping6} -c1 -w1 -I ::1 ${a}
+ log_test_addr ${a} $? 0 "ping local, from localhost"
+ done
+
#
# ip rule blocks address
#
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index ec2d6ceb1f08..e0d45292a298 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -72,7 +72,8 @@ declare -A TEST_NET4IN6IN6
TEST_NET4IN6[1]=10.1.1.254
TEST_NET4IN6[2]=10.2.1.254
-# mcast address
+# mcast addresses
+MCAST4=233.252.0.1
MCAST6=ff02::1
VRF=lisa
@@ -120,7 +121,7 @@ log_subsection()
run_cmd()
{
- local cmd="$*"
+ local cmd="$1"
local out
local rc
@@ -145,7 +146,7 @@ get_linklocal()
local pfx
local addr
- addr=$(${pfx} ip -6 -br addr show dev ${dev} | \
+ addr=$(${pfx} ${IP} -6 -br addr show dev ${dev} | \
awk '{
for (i = 3; i <= NF; ++i) {
if ($i ~ /^fe80/)
@@ -173,58 +174,48 @@ setup()
set -e
- # create namespace
- setup_ns PEER_NS
+ # create namespaces
+ setup_ns ns1
+ IP="ip -netns $ns1"
+ setup_ns ns2
# add vrf table
- ip li add ${VRF} type vrf table ${VRF_TABLE}
- ip li set ${VRF} up
- ip ro add table ${VRF_TABLE} unreachable default metric 8192
- ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192
+ ${IP} li add ${VRF} type vrf table ${VRF_TABLE}
+ ${IP} li set ${VRF} up
+ ${IP} ro add table ${VRF_TABLE} unreachable default metric 8192
+ ${IP} -6 ro add table ${VRF_TABLE} unreachable default metric 8192
# create test interfaces
- ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
- ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
- ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
- ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
+ ${IP} li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
+ ${IP} li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
+ ${IP} li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
+ ${IP} li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
# enslave vrf interfaces
for n in 5 7; do
- ip li set ${NETIFS[p${n}]} vrf ${VRF}
+ ${IP} li set ${NETIFS[p${n}]} vrf ${VRF}
done
# add addresses
for n in 1 3 5 7; do
- ip li set ${NETIFS[p${n}]} up
- ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
- ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
+ ${IP} li set ${NETIFS[p${n}]} up
+ ${IP} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+ ${IP} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
done
# move peer interfaces to namespace and add addresses
for n in 2 4 6 8; do
- ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
- ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
- ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
+ ${IP} li set ${NETIFS[p${n}]} netns ${ns2} up
+ ip -netns $ns2 addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+ ip -netns $ns2 addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
done
- ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64}
- ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64}
+ ${IP} -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64}
+ ${IP} -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64}
set +e
}
-cleanup()
-{
- # make sure we start from a clean slate
- cleanup_ns ${PEER_NS} 2>/dev/null
- for n in 1 3 5 7; do
- ip link del ${NETIFS[p${n}]} 2>/dev/null
- done
- ip link del ${VRF} 2>/dev/null
- ip ro flush table ${VRF_TABLE}
- ip -6 ro flush table ${VRF_TABLE}
-}
-
################################################################################
# IPv4 tests
#
@@ -241,7 +232,7 @@ run_ip()
# dev arg may be empty
[ -n "${dev}" ] && dev="dev ${dev}"
- run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink
+ run_cmd "${IP} ro add table ${table} ${prefix}/32 via ${gw} ${dev} onlink"
log_test $? ${exp_rc} "${desc}"
}
@@ -257,8 +248,8 @@ run_ip_mpath()
# dev arg may be empty
[ -n "${dev}" ] && dev="dev ${dev}"
- run_cmd ip ro add table "${table}" "${prefix}"/32 \
- nexthop via ${nh1} nexthop via ${nh2}
+ run_cmd "${IP} ro add table ${table} ${prefix}/32 \
+ nexthop via ${nh1} nexthop via ${nh2}"
log_test $? ${exp_rc} "${desc}"
}
@@ -270,11 +261,15 @@ valid_onlink_ipv4()
run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
+ run_ip 254 ${TEST_NET4[1]}.9 ${CONGW[1]} ${NETIFS[p3]} 0 \
+ "nexthop device mismatch"
log_subsection "VRF ${VRF}"
run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.10 ${CONGW[3]} ${NETIFS[p7]} 0 \
+ "nexthop device mismatch"
log_subsection "VRF device, PBR table"
@@ -310,17 +305,15 @@ invalid_onlink_ipv4()
{
run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
"Invalid gw - local unicast address"
+ run_ip 254 ${TEST_NET4[1]}.12 ${MCAST4} ${NETIFS[p1]} 2 \
+ "Invalid gw - multicast address"
run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
"Invalid gw - local unicast address, VRF"
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.12 ${MCAST4} ${NETIFS[p5]} 2 \
+ "Invalid gw - multicast address, VRF"
run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
-
- run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
- "Gateway resolves to wrong nexthop device"
-
- run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
- "Gateway resolves to wrong nexthop device - VRF"
}
################################################################################
@@ -339,7 +332,7 @@ run_ip6()
# dev arg may be empty
[ -n "${dev}" ] && dev="dev ${dev}"
- run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink
+ run_cmd "${IP} -6 ro add table ${table} ${prefix}/128 via ${gw} ${dev} onlink"
log_test $? ${exp_rc} "${desc}"
}
@@ -353,8 +346,8 @@ run_ip6_mpath()
local exp_rc="$6"
local desc="$7"
- run_cmd ip -6 ro add table "${table}" "${prefix}"/128 "${opts}" \
- nexthop via ${nh1} nexthop via ${nh2}
+ run_cmd "${IP} -6 ro add table ${table} ${prefix}/128 ${opts} \
+ nexthop via ${nh1} nexthop via ${nh2}"
log_test $? ${exp_rc} "${desc}"
}
@@ -367,12 +360,16 @@ valid_onlink_ipv6()
run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
+ run_ip6 254 ${TEST_NET6[1]}::a ${V6ADDRS[p1]/::*}::64 ${NETIFS[p3]} 0 \
+ "nexthop device mismatch"
log_subsection "VRF ${VRF}"
run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::b ${V6ADDRS[p5]/::*}::64 \
+ ${NETIFS[p7]} 0 "nexthop device mismatch"
log_subsection "VRF device, PBR table"
@@ -438,13 +435,6 @@ invalid_onlink_ipv6()
run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
"No nexthop device given"
-
- # default VRF validation is done against LOCAL table
- # run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
- # "Gateway resolves to wrong nexthop device"
-
- run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
- "Gateway resolves to wrong nexthop device - VRF"
}
run_onlink_tests()
@@ -491,10 +481,9 @@ do
esac
done
-cleanup
setup
run_onlink_tests
-cleanup
+cleanup_ns ${ns1} ${ns2}
if [ "$TESTS" != "none" ]; then
printf "\nTests passed: %3d\n" ${nsuccess}
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 2b0a90581e2f..21026b667667 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -800,6 +800,14 @@ ipv6_fcnal()
set +e
check_nexthop "dev veth1" ""
log_test $? 0 "Nexthops removed on admin down"
+
+ # error routes should be deleted when their nexthop is deleted
+ run_cmd "$IP li set dev veth1 up"
+ run_cmd "$IP -6 nexthop add id 58 dev veth1"
+ run_cmd "$IP ro add blackhole 2001:db8:101::1/128 nhid 58"
+ run_cmd "$IP nexthop del id 58"
+ check_route6 "2001:db8:101::1" ""
+ log_test $? 0 "Error route removed on nexthop deletion"
}
ipv6_grp_refs()
@@ -1459,6 +1467,13 @@ ipv4_fcnal()
run_cmd "$IP ro del 172.16.102.0/24"
log_test $? 0 "Delete route when not specifying nexthop attributes"
+
+ # error routes should be deleted when their nexthop is deleted
+ run_cmd "$IP nexthop add id 23 dev veth1"
+ run_cmd "$IP ro add blackhole 172.16.102.100/32 nhid 23"
+ run_cmd "$IP nexthop del id 23"
+ check_route "172.16.102.100" ""
+ log_test $? 0 "Error route removed on nexthop deletion"
}
ipv4_grp_fcnal()
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index a88f797c549a..c5694cc4ddd2 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -12,7 +12,7 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance \
- fib6_ra_to_static"
+ ipv4_mpath_balance_preferred fib6_ra_to_static"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -2751,6 +2751,73 @@ ipv4_mpath_balance_test()
forwarding_cleanup
}
+get_route_dev_src()
+{
+ local pfx="$1"
+ local src="$2"
+ local out
+
+ if out=$($IP -j route get "$pfx" from "$src" | jq -re ".[0].dev"); then
+ echo "$out"
+ fi
+}
+
+ipv4_mpath_preferred()
+{
+ local src_ip=$1
+ local pref_dev=$2
+ local dev routes
+ local route0=0
+ local route1=0
+ local pref_route=0
+ num_routes=254
+
+ for i in $(seq 1 $num_routes) ; do
+ dev=$(get_route_dev_src 172.16.105.$i $src_ip)
+ if [ "$dev" = "$pref_dev" ]; then
+ pref_route=$((pref_route+1))
+ elif [ "$dev" = "veth1" ]; then
+ route0=$((route0+1))
+ elif [ "$dev" = "veth3" ]; then
+ route1=$((route1+1))
+ fi
+ done
+
+ routes=$((route0+route1))
+
+ [ "$VERBOSE" = "1" ] && echo "multipath: routes seen: ($route0,$route1,$pref_route)"
+
+ if [ x"$pref_dev" = x"" ]; then
+ [[ $routes -ge $num_routes ]] && [[ $route0 -gt 0 ]] && [[ $route1 -gt 0 ]]
+ else
+ [[ $pref_route -ge $num_routes ]]
+ fi
+
+}
+
+ipv4_mpath_balance_preferred_test()
+{
+ echo
+ echo "IPv4 multipath load balance preferred route"
+
+ forwarding_setup
+
+ $IP route add 172.16.105.0/24 \
+ nexthop via 172.16.101.2 \
+ nexthop via 172.16.103.2
+
+ ipv4_mpath_preferred 172.16.101.1 veth1
+ log_test $? 0 "IPv4 multipath loadbalance from veth1"
+
+ ipv4_mpath_preferred 172.16.103.1 veth3
+ log_test $? 0 "IPv4 multipath loadbalance from veth3"
+
+ ipv4_mpath_preferred 198.51.100.1
+ log_test $? 0 "IPv4 multipath loadbalance from dummy"
+
+ forwarding_cleanup
+}
+
ipv6_mpath_balance_test()
{
echo
@@ -2861,6 +2928,7 @@ do
ipv6_mpath_list) ipv6_mpath_list_test;;
ipv4_mpath_balance) ipv4_mpath_balance_test;;
ipv6_mpath_balance) ipv6_mpath_balance_test;;
+ ipv4_mpath_balance_preferred) ipv4_mpath_balance_preferred_test;;
fib6_ra_to_static) fib6_ra_to_static;;
help) echo "Test names: $TESTS"; exit 0;;
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
index 3da9d93ab36f..625162fd7e8b 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
@@ -28,6 +28,7 @@ ALL_TESTS="
test_8021d
test_8021q
test_8021qvs
+ test_mdb_count_warning
"
NUM_NETIFS=4
@@ -83,8 +84,6 @@ switch_create_8021q()
{
local br_flags=$1; shift
- log_info "802.1q $br_flags${br_flags:+ }tests"
-
ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
mcast_snooping 1 $br_flags \
mcast_igmp_version 3 mcast_mld_version 2
@@ -106,6 +105,7 @@ switch_create_8021q()
switch_create_8021qvs()
{
+ log_info "802.1q mcast_vlan_snooping 1 tests"
switch_create_8021q "mcast_vlan_snooping 1"
bridge vlan global set dev br0 vid 10 mcast_igmp_version 3
bridge vlan global set dev br0 vid 10 mcast_mld_version 2
@@ -1272,6 +1272,76 @@ test_8021qvs_toggle_vlan_snooping()
test_toggle_vlan_snooping_permanent
}
+mdb_count_check_warn()
+{
+ local msg=$1; shift
+
+ dmesg | grep -q "WARNING:.*br_multicast_port_ngroups_dec.*"
+ check_fail $? "$msg"
+}
+
+test_mdb_count_mcast_vlan_snooping_flush()
+{
+ RET=0
+
+ # check if we already have a warning
+ mdb_count_check_warn "Check MDB entries count warning before test"
+
+ bridge mdb add dev br0 port "$swp1" grp 239.0.0.1 permanent vid 10
+ ip link set dev br0 down
+ ip link set dev br0 type bridge mcast_vlan_snooping 1
+ bridge mdb flush dev br0
+
+ mdb_count_check_warn "Check MDB entries count warning after test"
+
+ ip link set dev br0 type bridge mcast_vlan_snooping 0
+ ip link set dev br0 up
+
+ log_test "MDB count warning: mcast_vlan_snooping and MDB flush"
+}
+
+test_mdb_count_mcast_snooping_flush()
+{
+ RET=0
+
+ # check if we already have a warning
+ mdb_count_check_warn "Check MDB entries count warning before test"
+
+ bridge mdb add dev br0 port "$swp1" grp 239.0.0.1 permanent vid 10
+ ip link set dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 type bridge mcast_vlan_snooping 1
+ bridge mdb flush dev br0
+
+ mdb_count_check_warn "Check MDB entries count warning after test"
+
+ ip link set dev br0 type bridge mcast_vlan_snooping 0
+ ip link set dev br0 type bridge mcast_snooping 1
+
+ log_test "MDB count warning: mcast_snooping and MDB flush"
+}
+
+test_mdb_count_vlan_state_flush()
+{
+ RET=0
+
+ # check if we already have a warning
+ mdb_count_check_warn "Check MDB entries count warning before test"
+
+ bridge mdb add dev br0 port "$swp1" grp 239.0.0.1 permanent vid 10
+ ip link set dev br0 down
+ bridge vlan set vid 10 dev "$swp1" state blocking
+ ip link set dev br0 type bridge mcast_vlan_snooping 1
+ ip link set dev br0 up
+ bridge mdb flush dev br0
+
+ mdb_count_check_warn "Check MDB entries count warning after test"
+
+ bridge vlan set vid 10 dev "$swp1" state forwarding
+ ip link set dev br0 type bridge mcast_vlan_snooping 0
+
+ log_test "MDB count warning: disabled vlan state and MDB flush"
+}
+
# test groups
test_8021d()
@@ -1297,6 +1367,7 @@ test_8021q()
{
# Tests for vlan_filtering 1 mcast_vlan_snooping 0.
+ log_info "802.1q tests"
switch_create_8021q
setup_wait
@@ -1334,6 +1405,21 @@ test_8021qvs()
switch_destroy
}
+test_mdb_count_warning()
+{
+ # Tests for mdb_n_entries warning
+
+ log_info "MDB count warning tests"
+ switch_create_8021q
+ setup_wait
+
+ test_mdb_count_mcast_vlan_snooping_flush
+ test_mdb_count_mcast_snooping_flush
+ test_mdb_count_vlan_state_flush
+
+ switch_destroy
+}
+
if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then
echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support"
exit $ksft_skip
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index ce64518aaa11..75a6c3d3c1da 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -29,6 +29,7 @@ CONFIG_NET_ACT_VLAN=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_CLS_U32=m
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_META=m
CONFIG_NETFILTER=y
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index 892895659c7e..1f2bf6e81847 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -306,39 +306,39 @@ run_test()
if [ $skip_ptp = false ]; then
check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \
- "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type *: sync msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \
- "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type *: follow up msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \
- "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type *: peer delay req msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \
- "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type *: sync msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \
- "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type *: follow up msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \
- "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type *: peer delay req msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \
- "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: sync msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \
- "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: follow up msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \
- "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: peer delay req msg" \
true "$test_name"
fi
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index af008fbf2725..eb2d8034de9c 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -98,12 +98,20 @@ setup_prepare()
h1_create
h2_create
switch_create
+
+ if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then
+ sysctl_set net.bridge.bridge-nf-call-iptables 0
+ fi
}
cleanup()
{
pre_cleanup
+ if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then
+ sysctl_restore net.bridge.bridge-nf-call-iptables
+ fi
+
switch_destroy
h2_destroy
h1_destroy
diff --git a/tools/testing/selftests/net/forwarding/pedit_ip.sh b/tools/testing/selftests/net/forwarding/pedit_ip.sh
index d14efb2d23b2..9235674627ab 100755
--- a/tools/testing/selftests/net/forwarding/pedit_ip.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_ip.sh
@@ -91,12 +91,20 @@ setup_prepare()
h1_create
h2_create
switch_create
+
+ if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then
+ sysctl_set net.bridge.bridge-nf-call-iptables 0
+ fi
}
cleanup()
{
pre_cleanup
+ if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then
+ sysctl_restore net.bridge.bridge-nf-call-iptables
+ fi
+
switch_destroy
h2_destroy
h1_destroy
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index ea89e558672d..86edbc7e2489 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -223,7 +223,7 @@ mirred_egress_to_ingress_tcp_test()
ip_proto icmp \
action drop
- ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
+ ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 > $mirred_e2i_tf2 &
local rpid=$!
ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
wait -n $rpid
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index b43816dd998c..457f41d5e584 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -567,6 +567,21 @@ vxlan_encapped_ping_do()
local inner_tos=$1; shift
local outer_tos=$1; shift
+ local ipv4hdr=$(:
+ )"45:"$( : IP version + IHL
+ )"$inner_tos:"$( : IP TOS
+ )"00:54:"$( : IP total length
+ )"99:83:"$( : IP identification
+ )"40:00:"$( : IP flags + frag off
+ )"40:"$( : IP TTL
+ )"01:"$( : IP proto
+ )"CHECKSUM:"$( : IP header csum
+ )"c0:00:02:03:"$( : IP saddr: 192.0.2.3
+ )"c0:00:02:01"$( : IP daddr: 192.0.2.1
+ )
+ local checksum=$(payload_template_calc_checksum "$ipv4hdr")
+ ipv4hdr=$(payload_template_expand_checksum "$ipv4hdr" $checksum)
+
$MZ $dev -c $count -d 100msec -q \
-b $next_hop_mac -B $dest_ip \
-t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(:
@@ -577,16 +592,7 @@ vxlan_encapped_ping_do()
)"$dest_mac:"$( : ETH daddr
)"$(mac_get w2):"$( : ETH saddr
)"08:00:"$( : ETH type
- )"45:"$( : IP version + IHL
- )"$inner_tos:"$( : IP TOS
- )"00:54:"$( : IP total length
- )"99:83:"$( : IP identification
- )"40:00:"$( : IP flags + frag off
- )"40:"$( : IP TTL
- )"01:"$( : IP proto
- )"00:00:"$( : IP header csum
- )"c0:00:02:03:"$( : IP saddr: 192.0.2.3
- )"c0:00:02:01:"$( : IP daddr: 192.0.2.1
+ )"$ipv4hdr:"$( : IPv4 header
)"08:"$( : ICMP type
)"00:"$( : ICMP code
)"8b:f2:"$( : ICMP csum
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
index a603f7b0a08f..e642feeada0e 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
@@ -695,7 +695,7 @@ vxlan_encapped_ping_do()
)"6"$( : IP version
)"$inner_tos"$( : Traffic class
)"0:00:00:"$( : Flow label
- )"00:08:"$( : Payload length
+ )"00:03:"$( : Payload length
)"3a:"$( : Next header
)"04:"$( : Hop limit
)"$saddr:"$( : IP saddr
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
index 6a570d256e07..2cf4c6d9245b 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
@@ -138,13 +138,18 @@ install_capture()
defer tc qdisc del dev "$dev" clsact
tc filter add dev "$dev" ingress proto ip pref 104 \
- flower skip_hw ip_proto udp dst_port "$VXPORT" \
- action pass
+ u32 match ip protocol 0x11 0xff \
+ match u16 "$VXPORT" 0xffff at 0x16 \
+ match u16 0x0800 0xffff at 0x30 \
+ action pass
defer tc filter del dev "$dev" ingress proto ip pref 104
tc filter add dev "$dev" ingress proto ipv6 pref 106 \
- flower skip_hw ip_proto udp dst_port "$VXPORT" \
- action pass
+ u32 match ip6 protocol 0x11 0xff \
+ match u16 "$VXPORT" 0xffff at 0x2a \
+ match u16 0x86dd 0xffff at 0x44 \
+ match u8 0x11 0xff at 0x4c \
+ action pass
defer tc filter del dev "$dev" ingress proto ipv6 pref 106
}
@@ -248,13 +253,6 @@ vx_create()
}
export -f vx_create
-vx_wait()
-{
- # Wait for all the ARP, IGMP etc. noise to settle down so that the
- # tunnel is clear for measurements.
- sleep 10
-}
-
vx10_create()
{
vx_create vx10 10 id 1000 "$@"
@@ -267,18 +265,6 @@ vx20_create()
}
export -f vx20_create
-vx10_create_wait()
-{
- vx10_create "$@"
- vx_wait
-}
-
-vx20_create_wait()
-{
- vx20_create "$@"
- vx_wait
-}
-
ns_init_common()
{
local ns=$1; shift
@@ -554,7 +540,7 @@ ipv4_nomcroute()
# Install a misleading (S,G) rule to attempt to trick the system into
# pushing the packets elsewhere.
adf_install_broken_sg
- vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2"
do_test 4 10 0 "IPv4 nomcroute"
}
@@ -562,7 +548,7 @@ ipv6_nomcroute()
{
# Like for IPv4, install a misleading (S,G).
adf_install_broken_sg
- vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
do_test 6 10 0 "IPv6 nomcroute"
}
@@ -581,35 +567,35 @@ ipv6_nomcroute_rx()
ipv4_mcroute()
{
adf_install_sg
- vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
do_test 4 10 10 "IPv4 mcroute"
}
ipv6_mcroute()
{
adf_install_sg
- vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
do_test 6 10 10 "IPv6 mcroute"
}
ipv4_mcroute_rx()
{
adf_install_sg
- vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
ipv4_do_test_rx 0 "IPv4 mcroute ping"
}
ipv6_mcroute_rx()
{
adf_install_sg
- vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
ipv6_do_test_rx 0 "IPv6 mcroute ping"
}
ipv4_mcroute_changelink()
{
adf_install_sg
- vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR"
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR"
ip link set dev vx10 type vxlan mcroute
sleep 1
do_test 4 10 10 "IPv4 mcroute changelink"
@@ -618,7 +604,7 @@ ipv4_mcroute_changelink()
ipv6_mcroute_changelink()
{
adf_install_sg
- vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
ip link set dev vx20 type vxlan mcroute
sleep 1
do_test 6 10 10 "IPv6 mcroute changelink"
@@ -627,47 +613,47 @@ ipv6_mcroute_changelink()
ipv4_mcroute_starg()
{
adf_install_starg
- vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
do_test 4 10 10 "IPv4 mcroute (*,G)"
}
ipv6_mcroute_starg()
{
adf_install_starg
- vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
do_test 6 10 10 "IPv6 mcroute (*,G)"
}
ipv4_mcroute_starg_rx()
{
adf_install_starg
- vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping"
}
ipv6_mcroute_starg_rx()
{
adf_install_starg
- vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping"
}
ipv4_mcroute_noroute()
{
- vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
do_test 4 0 0 "IPv4 mcroute, no route"
}
ipv6_mcroute_noroute()
{
- vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
do_test 6 0 0 "IPv6 mcroute, no route"
}
ipv4_mcroute_fdb()
{
adf_install_sg
- vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute
+ vx10_create local 192.0.2.100 dev "$IPMR" mcroute
bridge fdb add dev vx10 \
00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR"
do_test 4 10 10 "IPv4 mcroute FDB"
@@ -676,7 +662,7 @@ ipv4_mcroute_fdb()
ipv6_mcroute_fdb()
{
adf_install_sg
- vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute
+ vx20_create local 2001:db8:4::1 dev "$IPMR" mcroute
bridge -6 fdb add dev vx20 \
00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR"
do_test 6 10 10 "IPv6 mcroute FDB"
@@ -686,7 +672,7 @@ ipv6_mcroute_fdb()
ipv4_mcroute_fdb_oif0()
{
adf_install_sg
- vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
bridge fdb del dev vx10 00:00:00:00:00:00
bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
do_test 4 10 10 "IPv4 mcroute oif=0"
@@ -703,7 +689,7 @@ ipv6_mcroute_fdb_oif0()
defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR"
adf_install_sg
- vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
bridge -6 fdb del dev vx20 00:00:00:00:00:00
bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6"
do_test 6 10 10 "IPv6 mcroute oif=0"
@@ -716,7 +702,7 @@ ipv4_mcroute_fdb_oif0_sep()
adf_install_sg_sep
adf_ip_addr_add lo 192.0.2.120/28
- vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
bridge fdb del dev vx10 00:00:00:00:00:00
bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0"
@@ -727,7 +713,7 @@ ipv4_mcroute_fdb_oif0_sep_rx()
adf_install_sg_sep_rx lo
adf_ip_addr_add lo 192.0.2.120/28
- vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
bridge fdb del dev vx10 00:00:00:00:00:00
bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping"
@@ -738,7 +724,7 @@ ipv4_mcroute_fdb_sep_rx()
adf_install_sg_sep_rx lo
adf_ip_addr_add lo 192.0.2.120/28
- vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
bridge fdb del dev vx10 00:00:00:00:00:00
bridge fdb add \
dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo
@@ -750,7 +736,7 @@ ipv6_mcroute_fdb_sep_rx()
adf_install_sg_sep_rx "X$IPMR"
adf_ip_addr_add "X$IPMR" 2001:db8:5::1/64
- vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute
+ vx20_create local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute
bridge -6 fdb del dev vx20 00:00:00:00:00:00
bridge -6 fdb add dev vx20 00:00:00:00:00:00 \
self static dst "$GROUP6" via "X$IPMR"
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
index 4b6afc0fe9f8..31fb9326cf53 100644
--- a/tools/testing/selftests/net/hsr/Makefile
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -5,6 +5,8 @@ top_srcdir = ../../../../..
TEST_PROGS := \
hsr_ping.sh \
hsr_redbox.sh \
+ link_faults.sh \
+ prp_ping.sh \
# end of TEST_PROGS
TEST_FILES += hsr_common.sh
diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index 5a65f4f836be..f4d685df4345 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -27,31 +27,34 @@ while getopts "$optstring" option;do
esac
done
-do_complete_ping_test()
+do_ping_tests()
{
- echo "INFO: Initial validation ping."
- # Each node has to be able each one.
- do_ping "$ns1" 100.64.0.2
- do_ping "$ns2" 100.64.0.1
- do_ping "$ns3" 100.64.0.1
- stop_if_error "Initial validation failed."
-
- do_ping "$ns1" 100.64.0.3
- do_ping "$ns2" 100.64.0.3
- do_ping "$ns3" 100.64.0.2
+ local netid="$1"
- do_ping "$ns1" dead:beef:1::2
- do_ping "$ns1" dead:beef:1::3
- do_ping "$ns2" dead:beef:1::1
- do_ping "$ns2" dead:beef:1::2
- do_ping "$ns3" dead:beef:1::1
- do_ping "$ns3" dead:beef:1::2
+ echo "INFO: Running ping tests."
- stop_if_error "Initial validation failed."
+ echo "INFO: Initial validation ping."
+ # Each node has to be able to reach each one.
+ do_ping "$ns1" "100.64.$netid.2"
+ do_ping "$ns1" "100.64.$netid.3"
+ do_ping "$ns2" "100.64.$netid.1"
+ do_ping "$ns2" "100.64.$netid.3"
+ do_ping "$ns3" "100.64.$netid.1"
+ do_ping "$ns3" "100.64.$netid.2"
+ stop_if_error "Initial validation failed on IPv4."
+
+ do_ping "$ns1" "dead:beef:$netid::2"
+ do_ping "$ns1" "dead:beef:$netid::3"
+ do_ping "$ns2" "dead:beef:$netid::1"
+ do_ping "$ns2" "dead:beef:$netid::2"
+ do_ping "$ns3" "dead:beef:$netid::1"
+ do_ping "$ns3" "dead:beef:$netid::2"
+ stop_if_error "Initial validation failed on IPv6."
# Wait until supervisor all supervision frames have been processed and the node
# entries have been merged. Otherwise duplicate frames will be observed which is
# valid at this stage.
+ echo "INFO: Wait for node table entries to be merged."
WAIT=5
while [ ${WAIT} -gt 0 ]
do
@@ -68,62 +71,30 @@ do_complete_ping_test()
sleep 1
echo "INFO: Longer ping test."
- do_ping_long "$ns1" 100.64.0.2
- do_ping_long "$ns1" dead:beef:1::2
- do_ping_long "$ns1" 100.64.0.3
- do_ping_long "$ns1" dead:beef:1::3
-
- stop_if_error "Longer ping test failed."
-
- do_ping_long "$ns2" 100.64.0.1
- do_ping_long "$ns2" dead:beef:1::1
- do_ping_long "$ns2" 100.64.0.3
- do_ping_long "$ns2" dead:beef:1::2
- stop_if_error "Longer ping test failed."
-
- do_ping_long "$ns3" 100.64.0.1
- do_ping_long "$ns3" dead:beef:1::1
- do_ping_long "$ns3" 100.64.0.2
- do_ping_long "$ns3" dead:beef:1::2
- stop_if_error "Longer ping test failed."
-
- echo "INFO: Cutting one link."
- do_ping_long "$ns1" 100.64.0.3 &
-
- sleep 3
- ip -net "$ns3" link set ns3eth1 down
- wait
-
- ip -net "$ns3" link set ns3eth1 up
-
- stop_if_error "Failed with one link down."
-
- echo "INFO: Delay the link and drop a few packages."
- tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms
- tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25%
-
- do_ping_long "$ns1" 100.64.0.2
- do_ping_long "$ns1" 100.64.0.3
-
- stop_if_error "Failed with delay and packetloss."
-
- do_ping_long "$ns2" 100.64.0.1
- do_ping_long "$ns2" 100.64.0.3
-
- stop_if_error "Failed with delay and packetloss."
-
- do_ping_long "$ns3" 100.64.0.1
- do_ping_long "$ns3" 100.64.0.2
- stop_if_error "Failed with delay and packetloss."
-
- echo "INFO: All good."
+ do_ping_long "$ns1" "100.64.$netid.2"
+ do_ping_long "$ns1" "dead:beef:$netid::2"
+ do_ping_long "$ns1" "100.64.$netid.3"
+ do_ping_long "$ns1" "dead:beef:$netid::3"
+ stop_if_error "Longer ping test failed (ns1)."
+
+ do_ping_long "$ns2" "100.64.$netid.1"
+ do_ping_long "$ns2" "dead:beef:$netid::1"
+ do_ping_long "$ns2" "100.64.$netid.3"
+ do_ping_long "$ns2" "dead:beef:$netid::3"
+ stop_if_error "Longer ping test failed (ns2)."
+
+ do_ping_long "$ns3" "100.64.$netid.1"
+ do_ping_long "$ns3" "dead:beef:$netid::1"
+ do_ping_long "$ns3" "100.64.$netid.2"
+ do_ping_long "$ns3" "dead:beef:$netid::2"
+ stop_if_error "Longer ping test failed (ns3)."
}
setup_hsr_interfaces()
{
local HSRv="$1"
- echo "INFO: preparing interfaces for HSRv${HSRv}."
+ echo "INFO: Preparing interfaces for HSRv${HSRv}."
# Three HSR nodes. Each node has one link to each of its neighbour, two links in total.
#
# ns1eth1 ----- ns2eth1
@@ -140,17 +111,20 @@ setup_hsr_interfaces()
ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2"
# HSRv0/1
- ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0
- ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0
- ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0
+ ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 \
+ slave2 ns1eth2 supervision 45 version "$HSRv" proto 0
+ ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 \
+ slave2 ns2eth2 supervision 45 version "$HSRv" proto 0
+ ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 \
+ slave2 ns3eth2 supervision 45 version "$HSRv" proto 0
# IP for HSR
ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1
- ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad
+ ip -net "$ns1" addr add dead:beef:0::1/64 dev hsr1 nodad
ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2
- ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad
+ ip -net "$ns2" addr add dead:beef:0::2/64 dev hsr2 nodad
ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3
- ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad
+ ip -net "$ns3" addr add dead:beef:0::3/64 dev hsr3 nodad
ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1
ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2
@@ -177,113 +151,56 @@ setup_hsr_interfaces()
setup_vlan_interfaces() {
ip -net "$ns1" link add link hsr1 name hsr1.2 type vlan id 2
- ip -net "$ns1" link add link hsr1 name hsr1.3 type vlan id 3
- ip -net "$ns1" link add link hsr1 name hsr1.4 type vlan id 4
- ip -net "$ns1" link add link hsr1 name hsr1.5 type vlan id 5
-
ip -net "$ns2" link add link hsr2 name hsr2.2 type vlan id 2
- ip -net "$ns2" link add link hsr2 name hsr2.3 type vlan id 3
- ip -net "$ns2" link add link hsr2 name hsr2.4 type vlan id 4
- ip -net "$ns2" link add link hsr2 name hsr2.5 type vlan id 5
-
ip -net "$ns3" link add link hsr3 name hsr3.2 type vlan id 2
- ip -net "$ns3" link add link hsr3 name hsr3.3 type vlan id 3
- ip -net "$ns3" link add link hsr3 name hsr3.4 type vlan id 4
- ip -net "$ns3" link add link hsr3 name hsr3.5 type vlan id 5
ip -net "$ns1" addr add 100.64.2.1/24 dev hsr1.2
- ip -net "$ns1" addr add 100.64.3.1/24 dev hsr1.3
- ip -net "$ns1" addr add 100.64.4.1/24 dev hsr1.4
- ip -net "$ns1" addr add 100.64.5.1/24 dev hsr1.5
+ ip -net "$ns1" addr add dead:beef:2::1/64 dev hsr1.2 nodad
ip -net "$ns2" addr add 100.64.2.2/24 dev hsr2.2
- ip -net "$ns2" addr add 100.64.3.2/24 dev hsr2.3
- ip -net "$ns2" addr add 100.64.4.2/24 dev hsr2.4
- ip -net "$ns2" addr add 100.64.5.2/24 dev hsr2.5
+ ip -net "$ns2" addr add dead:beef:2::2/64 dev hsr2.2 nodad
ip -net "$ns3" addr add 100.64.2.3/24 dev hsr3.2
- ip -net "$ns3" addr add 100.64.3.3/24 dev hsr3.3
- ip -net "$ns3" addr add 100.64.4.3/24 dev hsr3.4
- ip -net "$ns3" addr add 100.64.5.3/24 dev hsr3.5
+ ip -net "$ns3" addr add dead:beef:2::3/64 dev hsr3.2 nodad
ip -net "$ns1" link set dev hsr1.2 up
- ip -net "$ns1" link set dev hsr1.3 up
- ip -net "$ns1" link set dev hsr1.4 up
- ip -net "$ns1" link set dev hsr1.5 up
-
ip -net "$ns2" link set dev hsr2.2 up
- ip -net "$ns2" link set dev hsr2.3 up
- ip -net "$ns2" link set dev hsr2.4 up
- ip -net "$ns2" link set dev hsr2.5 up
-
ip -net "$ns3" link set dev hsr3.2 up
- ip -net "$ns3" link set dev hsr3.3 up
- ip -net "$ns3" link set dev hsr3.4 up
- ip -net "$ns3" link set dev hsr3.5 up
}
-hsr_vlan_ping() {
- do_ping "$ns1" 100.64.2.2
- do_ping "$ns1" 100.64.3.2
- do_ping "$ns1" 100.64.4.2
- do_ping "$ns1" 100.64.5.2
-
- do_ping "$ns1" 100.64.2.3
- do_ping "$ns1" 100.64.3.3
- do_ping "$ns1" 100.64.4.3
- do_ping "$ns1" 100.64.5.3
-
- do_ping "$ns2" 100.64.2.1
- do_ping "$ns2" 100.64.3.1
- do_ping "$ns2" 100.64.4.1
- do_ping "$ns2" 100.64.5.1
-
- do_ping "$ns2" 100.64.2.3
- do_ping "$ns2" 100.64.3.3
- do_ping "$ns2" 100.64.4.3
- do_ping "$ns2" 100.64.5.3
-
- do_ping "$ns3" 100.64.2.1
- do_ping "$ns3" 100.64.3.1
- do_ping "$ns3" 100.64.4.1
- do_ping "$ns3" 100.64.5.1
-
- do_ping "$ns3" 100.64.2.2
- do_ping "$ns3" 100.64.3.2
- do_ping "$ns3" 100.64.4.2
- do_ping "$ns3" 100.64.5.2
+run_ping_tests()
+{
+ echo "INFO: Running ping tests."
+ do_ping_tests 0
}
-run_vlan_tests() {
+run_vlan_tests()
+{
vlan_challenged_hsr1=$(ip net exec "$ns1" ethtool -k hsr1 | grep "vlan-challenged" | awk '{print $2}')
vlan_challenged_hsr2=$(ip net exec "$ns2" ethtool -k hsr2 | grep "vlan-challenged" | awk '{print $2}')
vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}')
if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then
- echo "INFO: Running VLAN tests"
+ echo "INFO: Running VLAN ping tests"
setup_vlan_interfaces
- hsr_vlan_ping
+ do_ping_tests 2
else
echo "INFO: Not Running VLAN tests as the device does not support VLAN"
fi
}
check_prerequisites
-setup_ns ns1 ns2 ns3
-
trap cleanup_all_ns EXIT
+setup_ns ns1 ns2 ns3
setup_hsr_interfaces 0
-do_complete_ping_test
-
+run_ping_tests
run_vlan_tests
setup_ns ns1 ns2 ns3
-
setup_hsr_interfaces 1
-do_complete_ping_test
-
+run_ping_tests
run_vlan_tests
exit $ret
diff --git a/tools/testing/selftests/net/hsr/link_faults.sh b/tools/testing/selftests/net/hsr/link_faults.sh
new file mode 100755
index 000000000000..be526281571c
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/link_faults.sh
@@ -0,0 +1,378 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# shellcheck disable=SC2329
+
+source ../lib.sh
+
+ALL_TESTS="
+ test_clean_hsrv0
+ test_cut_link_hsrv0
+ test_packet_loss_hsrv0
+ test_high_packet_loss_hsrv0
+ test_reordering_hsrv0
+
+ test_clean_hsrv1
+ test_cut_link_hsrv1
+ test_packet_loss_hsrv1
+ test_high_packet_loss_hsrv1
+ test_reordering_hsrv1
+
+ test_clean_prp
+ test_cut_link_prp
+ test_packet_loss_prp
+ test_high_packet_loss_prp
+ test_reordering_prp
+"
+
+# The tests are running ping for 5sec with a relatively short interval in
+# different scenarios with faulty links (cut links, packet loss, delay,
+# reordering) that should be recoverable by HSR/PRP. The ping interval (10ms)
+# is short enough that the base delay (50ms) leads to a queue in the netem
+# qdiscs which is needed for reordering.
+
+setup_hsr_topo()
+{
+ # Three HSR nodes in a ring, every node has a LAN A interface connected
+ # to the LAN B interface of the next node.
+ #
+ # node1 node2
+ #
+ # vethA -------- vethB
+ # hsr1 hsr2
+ # vethB vethA
+ # \ /
+ # vethA vethB
+ # hsr3
+ #
+ # node3
+
+ local ver="$1"
+
+ setup_ns node1 node2 node3
+
+ # veth links
+ # shellcheck disable=SC2154 # variables assigned by setup_ns
+ ip link add vethA netns "$node1" type veth peer name vethB netns "$node2"
+ # shellcheck disable=SC2154 # variables assigned by setup_ns
+ ip link add vethA netns "$node2" type veth peer name vethB netns "$node3"
+ ip link add vethA netns "$node3" type veth peer name vethB netns "$node1"
+
+ # MAC addresses (not needed for HSR operation, but helps with debugging)
+ ip -net "$node1" link set address 00:11:22:00:01:01 dev vethA
+ ip -net "$node1" link set address 00:11:22:00:01:02 dev vethB
+
+ ip -net "$node2" link set address 00:11:22:00:02:01 dev vethA
+ ip -net "$node2" link set address 00:11:22:00:02:02 dev vethB
+
+ ip -net "$node3" link set address 00:11:22:00:03:01 dev vethA
+ ip -net "$node3" link set address 00:11:22:00:03:02 dev vethB
+
+ # HSR interfaces
+ ip -net "$node1" link add name hsr1 type hsr proto 0 version "$ver" \
+ slave1 vethA slave2 vethB supervision 45
+ ip -net "$node2" link add name hsr2 type hsr proto 0 version "$ver" \
+ slave1 vethA slave2 vethB supervision 45
+ ip -net "$node3" link add name hsr3 type hsr proto 0 version "$ver" \
+ slave1 vethA slave2 vethB supervision 45
+
+ # IP addresses
+ ip -net "$node1" addr add 100.64.0.1/24 dev hsr1
+ ip -net "$node2" addr add 100.64.0.2/24 dev hsr2
+ ip -net "$node3" addr add 100.64.0.3/24 dev hsr3
+
+ # Set all links up
+ ip -net "$node1" link set vethA up
+ ip -net "$node1" link set vethB up
+ ip -net "$node1" link set hsr1 up
+
+ ip -net "$node2" link set vethA up
+ ip -net "$node2" link set vethB up
+ ip -net "$node2" link set hsr2 up
+
+ ip -net "$node3" link set vethA up
+ ip -net "$node3" link set vethB up
+ ip -net "$node3" link set hsr3 up
+}
+
+setup_prp_topo()
+{
+ # Two PRP nodes, connected by two links (treated as LAN A and LAN B).
+ #
+ # vethA ----- vethA
+ # prp1 prp2
+ # vethB ----- vethB
+ #
+ # node1 node2
+
+ setup_ns node1 node2
+
+ # veth links
+ ip link add vethA netns "$node1" type veth peer name vethA netns "$node2"
+ ip link add vethB netns "$node1" type veth peer name vethB netns "$node2"
+
+ # MAC addresses will be copied from LAN A interface
+ ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA
+ ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA
+
+ # PRP interfaces
+ ip -net "$node1" link add name prp1 type hsr \
+ slave1 vethA slave2 vethB supervision 45 proto 1
+ ip -net "$node2" link add name prp2 type hsr \
+ slave1 vethA slave2 vethB supervision 45 proto 1
+
+ # IP addresses
+ ip -net "$node1" addr add 100.64.0.1/24 dev prp1
+ ip -net "$node2" addr add 100.64.0.2/24 dev prp2
+
+ # All links up
+ ip -net "$node1" link set vethA up
+ ip -net "$node1" link set vethB up
+ ip -net "$node1" link set prp1 up
+
+ ip -net "$node2" link set vethA up
+ ip -net "$node2" link set vethB up
+ ip -net "$node2" link set prp2 up
+}
+
+wait_for_hsr_node_table()
+{
+ log_info "Wait for node table entries to be merged."
+ WAIT=5
+ while [ "${WAIT}" -gt 0 ]; do
+ nts=$(cat /sys/kernel/debug/hsr/hsr*/node_table)
+
+ # We need entries in the node tables, and they need to be merged
+ if (echo "$nts" | grep -qE "^([0-9a-f]{2}:){5}") && \
+ ! (echo "$nts" | grep -q "00:00:00:00:00:00"); then
+ return
+ fi
+
+ sleep 1
+ ((WAIT--))
+ done
+ check_err 1 "Failed to wait for merged node table entries"
+}
+
+setup_topo()
+{
+ local proto="$1"
+
+ if [ "$proto" = "HSRv0" ]; then
+ setup_hsr_topo 0
+ wait_for_hsr_node_table
+ elif [ "$proto" = "HSRv1" ]; then
+ setup_hsr_topo 1
+ wait_for_hsr_node_table
+ elif [ "$proto" = "PRP" ]; then
+ setup_prp_topo
+ else
+ check_err 1 "Unknown protocol (${proto})"
+ fi
+}
+
+check_ping()
+{
+ local node="$1"
+ local dst="$2"
+ local accepted_dups="$3"
+ local ping_args="-q -i 0.01 -c 400"
+
+ log_info "Running ping $node -> $dst"
+ # shellcheck disable=SC2086
+ output=$(ip netns exec "$node" ping $ping_args "$dst" | \
+ grep "packets transmitted")
+ log_info "$output"
+
+ dups=0
+ loss=0
+
+ if [[ "$output" =~ \+([0-9]+)" duplicates" ]]; then
+ dups="${BASH_REMATCH[1]}"
+ fi
+ if [[ "$output" =~ ([0-9\.]+\%)" packet loss" ]]; then
+ loss="${BASH_REMATCH[1]}"
+ fi
+
+ if [ "$dups" -gt "$accepted_dups" ]; then
+ check_err 1 "Unexpected duplicate packets (${dups})"
+ fi
+ if [ "$loss" != "0%" ]; then
+ check_err 1 "Unexpected packet loss (${loss})"
+ fi
+}
+
+test_clean()
+{
+ local proto="$1"
+
+ RET=0
+ tname="${FUNCNAME[0]} - ${proto}"
+
+ setup_topo "$proto"
+ if ((RET != ksft_pass)); then
+ log_test "${tname} setup"
+ return
+ fi
+
+ check_ping "$node1" "100.64.0.2" 0
+
+ log_test "${tname}"
+}
+
+test_clean_hsrv0()
+{
+ test_clean "HSRv0"
+}
+
+test_clean_hsrv1()
+{
+ test_clean "HSRv1"
+}
+
+test_clean_prp()
+{
+ test_clean "PRP"
+}
+
+test_cut_link()
+{
+ local proto="$1"
+
+ RET=0
+ tname="${FUNCNAME[0]} - ${proto}"
+
+ setup_topo "$proto"
+ if ((RET != ksft_pass)); then
+ log_test "${tname} setup"
+ return
+ fi
+
+ # Cutting link from subshell, so check_ping can run in the normal shell
+ # with access to global variables from the test harness.
+ (
+ sleep 2
+ log_info "Cutting link"
+ ip -net "$node1" link set vethB down
+ ) &
+ check_ping "$node1" "100.64.0.2" 0
+
+ wait
+ log_test "${tname}"
+}
+
+
+test_cut_link_hsrv0()
+{
+ test_cut_link "HSRv0"
+}
+
+test_cut_link_hsrv1()
+{
+ test_cut_link "HSRv1"
+}
+
+test_cut_link_prp()
+{
+ test_cut_link "PRP"
+}
+
+test_packet_loss()
+{
+ local proto="$1"
+ local loss="$2"
+
+ RET=0
+ tname="${FUNCNAME[0]} - ${proto}, ${loss}"
+
+ setup_topo "$proto"
+ if ((RET != ksft_pass)); then
+ log_test "${tname} setup"
+ return
+ fi
+
+ # Packet loss with lower delay makes sure the packets on the lossy link
+ # arrive first.
+ tc -net "$node1" qdisc add dev vethA root netem delay 50ms
+ tc -net "$node1" qdisc add dev vethB root netem delay 20ms loss "$loss"
+
+ check_ping "$node1" "100.64.0.2" 40
+
+ log_test "${tname}"
+}
+
+test_packet_loss_hsrv0()
+{
+ test_packet_loss "HSRv0" "20%"
+}
+
+test_packet_loss_hsrv1()
+{
+ test_packet_loss "HSRv1" "20%"
+}
+
+test_packet_loss_prp()
+{
+ test_packet_loss "PRP" "20%"
+}
+
+test_high_packet_loss_hsrv0()
+{
+ test_packet_loss "HSRv0" "80%"
+}
+
+test_high_packet_loss_hsrv1()
+{
+ test_packet_loss "HSRv1" "80%"
+}
+
+test_high_packet_loss_prp()
+{
+ test_packet_loss "PRP" "80%"
+}
+
+test_reordering()
+{
+ local proto="$1"
+
+ RET=0
+ tname="${FUNCNAME[0]} - ${proto}"
+
+ setup_topo "$proto"
+ if ((RET != ksft_pass)); then
+ log_test "${tname} setup"
+ return
+ fi
+
+ tc -net "$node1" qdisc add dev vethA root netem delay 50ms
+ tc -net "$node1" qdisc add dev vethB root netem delay 50ms reorder 20%
+
+ check_ping "$node1" "100.64.0.2" 40
+
+ log_test "${tname}"
+}
+
+test_reordering_hsrv0()
+{
+ test_reordering "HSRv0"
+}
+
+test_reordering_hsrv1()
+{
+ test_reordering "HSRv1"
+}
+
+test_reordering_prp()
+{
+ test_reordering "PRP"
+}
+
+cleanup()
+{
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/hsr/prp_ping.sh b/tools/testing/selftests/net/hsr/prp_ping.sh
new file mode 100755
index 000000000000..fd2ba9f05d4c
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/prp_ping.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ipv6=true
+
+source ./hsr_common.sh
+
+optstring="h4"
+usage() {
+ echo "Usage: $0 [OPTION]"
+ echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
+}
+
+while getopts "$optstring" option;do
+ case "$option" in
+ "h")
+ usage "$0"
+ exit 0
+ ;;
+ "4")
+ ipv6=false
+ ;;
+ "?")
+ usage "$0"
+ exit 1
+ ;;
+esac
+done
+
+setup_prp_interfaces()
+{
+ echo "INFO: Preparing interfaces for PRP"
+# Two PRP nodes, connected by two links (treated as LAN A and LAN B).
+#
+# vethA ----- vethA
+# prp1 prp2
+# vethB ----- vethB
+#
+# node1 node2
+
+ # Interfaces
+ # shellcheck disable=SC2154 # variables assigned by setup_ns
+ ip link add vethA netns "$node1" type veth peer name vethA netns "$node2"
+ ip link add vethB netns "$node1" type veth peer name vethB netns "$node2"
+
+ # MAC addresses will be copied from LAN A interface
+ ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA
+ ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA
+
+ # PRP
+ ip -net "$node1" link add name prp1 type hsr \
+ slave1 vethA slave2 vethB supervision 45 proto 1
+ ip -net "$node2" link add name prp2 type hsr \
+ slave1 vethA slave2 vethB supervision 45 proto 1
+
+ # IP addresses
+ ip -net "$node1" addr add 100.64.0.1/24 dev prp1
+ ip -net "$node1" addr add dead:beef:0::1/64 dev prp1 nodad
+ ip -net "$node2" addr add 100.64.0.2/24 dev prp2
+ ip -net "$node2" addr add dead:beef:0::2/64 dev prp2 nodad
+
+ # All links up
+ ip -net "$node1" link set vethA up
+ ip -net "$node1" link set vethB up
+ ip -net "$node1" link set prp1 up
+
+ ip -net "$node2" link set vethA up
+ ip -net "$node2" link set vethB up
+ ip -net "$node2" link set prp2 up
+}
+
+setup_vlan_interfaces()
+{
+ # Interfaces
+ ip -net "$node1" link add link prp1 name prp1.2 type vlan id 2
+ ip -net "$node2" link add link prp2 name prp2.2 type vlan id 2
+
+ # IP addresses
+ ip -net "$node1" addr add 100.64.2.1/24 dev prp1.2
+ ip -net "$node1" addr add dead:beef:2::1/64 dev prp1.2 nodad
+
+ ip -net "$node2" addr add 100.64.2.2/24 dev prp2.2
+ ip -net "$node2" addr add dead:beef:2::2/64 dev prp2.2 nodad
+
+ # All links up
+ ip -net "$node1" link set prp1.2 up
+ ip -net "$node2" link set prp2.2 up
+}
+
+do_ping_tests()
+{
+ local netid="$1"
+
+ echo "INFO: Initial validation ping"
+
+ do_ping "$node1" "100.64.$netid.2"
+ do_ping "$node2" "100.64.$netid.1"
+ stop_if_error "Initial validation failed on IPv4"
+
+ do_ping "$node1" "dead:beef:$netid::2"
+ do_ping "$node2" "dead:beef:$netid::1"
+ stop_if_error "Initial validation failed on IPv6"
+
+ echo "INFO: Longer ping test."
+
+ do_ping_long "$node1" "100.64.$netid.2"
+ do_ping_long "$node2" "100.64.$netid.1"
+ stop_if_error "Longer ping test failed on IPv4."
+
+ do_ping_long "$node1" "dead:beef:$netid::2"
+ do_ping_long "$node2" "dead:beef:$netid::1"
+ stop_if_error "Longer ping test failed on IPv6."
+}
+
+run_ping_tests()
+{
+ echo "INFO: Running ping tests"
+ do_ping_tests 0
+}
+
+run_vlan_ping_tests()
+{
+ vlan_challenged_prp1=$(ip net exec "$node1" ethtool -k prp1 | \
+ grep "vlan-challenged" | awk '{print $2}')
+ vlan_challenged_prp2=$(ip net exec "$node2" ethtool -k prp2 | \
+ grep "vlan-challenged" | awk '{print $2}')
+
+ if [[ "$vlan_challenged_prp1" = "off" || \
+ "$vlan_challenged_prp2" = "off" ]]; then
+ echo "INFO: Running VLAN ping tests"
+ setup_vlan_interfaces
+ do_ping_tests 2
+ else
+ echo "INFO: Not Running VLAN tests as the device does not support VLAN"
+ fi
+}
+
+check_prerequisites
+trap cleanup_all_ns EXIT
+
+setup_ns node1 node2
+setup_prp_interfaces
+
+run_ping_tests
+run_vlan_ping_tests
+
+exit $ret
diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings
index 0fbc037f2aa8..a953c96aa16e 100644
--- a/tools/testing/selftests/net/hsr/settings
+++ b/tools/testing/selftests/net/hsr/settings
@@ -1 +1 @@
-timeout=50
+timeout=180
diff --git a/tools/testing/selftests/net/icmp_rfc4884.c b/tools/testing/selftests/net/icmp_rfc4884.c
new file mode 100644
index 000000000000..cd826b913557
--- /dev/null
+++ b/tools/testing/selftests/net/icmp_rfc4884.c
@@ -0,0 +1,679 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <linux/errqueue.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "../kselftest_harness.h"
+
+static const unsigned short src_port = 44444;
+static const unsigned short dst_port = 55555;
+static const int min_orig_dgram_len = 128;
+static const int min_payload_len_v4 =
+ min_orig_dgram_len - sizeof(struct iphdr) - sizeof(struct udphdr);
+static const int min_payload_len_v6 =
+ min_orig_dgram_len - sizeof(struct ipv6hdr) - sizeof(struct udphdr);
+static const uint8_t orig_payload_byte = 0xAA;
+
+struct sockaddr_inet {
+ union {
+ struct sockaddr_in6 v6;
+ struct sockaddr_in v4;
+ struct sockaddr sa;
+ };
+ socklen_t len;
+};
+
+struct ip_case_info {
+ int domain;
+ int level;
+ int opt1;
+ int opt2;
+ int proto;
+ int (*build_func)(uint8_t *buf, ssize_t buflen, bool with_ext,
+ int payload_len, bool bad_csum, bool bad_len,
+ bool smaller_len);
+ int min_payload;
+};
+
+static int bringup_loopback(void)
+{
+ struct ifreq ifr = {
+ .ifr_name = "lo"
+ };
+ int fd;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -1;
+
+ if (ioctl(fd, SIOCGIFFLAGS, &ifr) < 0)
+ goto err;
+
+ ifr.ifr_flags = ifr.ifr_flags | IFF_UP;
+
+ if (ioctl(fd, SIOCSIFFLAGS, &ifr) < 0)
+ goto err;
+
+ close(fd);
+ return 0;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static uint16_t csum(const void *buf, size_t len)
+{
+ const uint8_t *data = buf;
+ uint32_t sum = 0;
+
+ while (len > 1) {
+ sum += (data[0] << 8) | data[1];
+ data += 2;
+ len -= 2;
+ }
+
+ if (len == 1)
+ sum += data[0] << 8;
+
+ while (sum >> 16)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+
+ return ~sum & 0xFFFF;
+}
+
+static int poll_err(int fd)
+{
+ struct pollfd pfd;
+
+ memset(&pfd, 0, sizeof(pfd));
+ pfd.fd = fd;
+
+ if (poll(&pfd, 1, 5000) != 1 || pfd.revents != POLLERR)
+ return -1;
+
+ return 0;
+}
+
+static void set_addr(struct sockaddr_inet *addr, int domain,
+ unsigned short port)
+{
+ memset(addr, 0, sizeof(*addr));
+
+ switch (domain) {
+ case AF_INET:
+ addr->v4.sin_family = AF_INET;
+ addr->v4.sin_port = htons(port);
+ addr->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr->len = sizeof(addr->v4);
+ break;
+ case AF_INET6:
+ addr->v6.sin6_family = AF_INET6;
+ addr->v6.sin6_port = htons(port);
+ addr->v6.sin6_addr = in6addr_loopback;
+ addr->len = sizeof(addr->v6);
+ break;
+ }
+}
+
+static int bind_and_setsockopt(int fd, const struct ip_case_info *info)
+{
+ struct sockaddr_inet addr;
+ int opt = 1;
+
+ set_addr(&addr, info->domain, src_port);
+
+ if (setsockopt(fd, info->level, info->opt1, &opt, sizeof(opt)) < 0)
+ return -1;
+
+ if (setsockopt(fd, info->level, info->opt2, &opt, sizeof(opt)) < 0)
+ return -1;
+
+ return bind(fd, &addr.sa, addr.len);
+}
+
+static int build_rfc4884_ext(uint8_t *buf, size_t buflen, bool bad_csum,
+ bool bad_len, bool smaller_len)
+{
+ struct icmp_extobj_hdr *objh;
+ struct icmp_ext_hdr *exthdr;
+ size_t obj_len, ext_len;
+ uint16_t sum;
+
+ /* Use an object payload of 4 bytes */
+ obj_len = sizeof(*objh) + sizeof(uint32_t);
+ ext_len = sizeof(*exthdr) + obj_len;
+
+ if (ext_len > buflen)
+ return -EINVAL;
+
+ exthdr = (struct icmp_ext_hdr *)buf;
+ objh = (struct icmp_extobj_hdr *)(buf + sizeof(*exthdr));
+
+ exthdr->version = 2;
+ /* When encoding a bad object length, either encode a length too small
+ * to fit the object header or too big to fit in the packet.
+ */
+ if (bad_len)
+ obj_len = smaller_len ? sizeof(*objh) - 1 : obj_len * 2;
+ objh->length = htons(obj_len);
+
+ sum = csum(buf, ext_len);
+ exthdr->checksum = htons(bad_csum ? sum - 1 : sum);
+
+ return ext_len;
+}
+
+static int build_orig_dgram_v4(uint8_t *buf, ssize_t buflen, int payload_len)
+{
+ struct udphdr *udph;
+ struct iphdr *iph;
+ size_t len = 0;
+
+ len = sizeof(*iph) + sizeof(*udph) + payload_len;
+ if (len > buflen)
+ return -EINVAL;
+
+ iph = (struct iphdr *)buf;
+ udph = (struct udphdr *)(buf + sizeof(*iph));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = htonl(INADDR_LOOPBACK);
+ iph->daddr = htonl(INADDR_LOOPBACK);
+ iph->tot_len = htons(len);
+ iph->check = htons(csum(iph, sizeof(*iph)));
+
+ udph->source = htons(src_port);
+ udph->dest = htons(dst_port);
+ udph->len = htons(sizeof(*udph) + payload_len);
+
+ memset(buf + sizeof(*iph) + sizeof(*udph), orig_payload_byte,
+ payload_len);
+
+ return len;
+}
+
+static int build_orig_dgram_v6(uint8_t *buf, ssize_t buflen, int payload_len)
+{
+ struct udphdr *udph;
+ struct ipv6hdr *iph;
+ size_t len = 0;
+
+ len = sizeof(*iph) + sizeof(*udph) + payload_len;
+ if (len > buflen)
+ return -EINVAL;
+
+ iph = (struct ipv6hdr *)buf;
+ udph = (struct udphdr *)(buf + sizeof(*iph));
+
+ iph->version = 6;
+ iph->payload_len = htons(sizeof(*udph) + payload_len);
+ iph->nexthdr = IPPROTO_UDP;
+ iph->saddr = in6addr_loopback;
+ iph->daddr = in6addr_loopback;
+
+ udph->source = htons(src_port);
+ udph->dest = htons(dst_port);
+ udph->len = htons(sizeof(*udph) + payload_len);
+
+ memset(buf + sizeof(*iph) + sizeof(*udph), orig_payload_byte,
+ payload_len);
+
+ return len;
+}
+
+static int build_icmpv4_pkt(uint8_t *buf, ssize_t buflen, bool with_ext,
+ int payload_len, bool bad_csum, bool bad_len,
+ bool smaller_len)
+{
+ struct icmphdr *icmph;
+ int len, ret;
+
+ len = sizeof(*icmph);
+ memset(buf, 0, buflen);
+
+ icmph = (struct icmphdr *)buf;
+ icmph->type = ICMP_DEST_UNREACH;
+ icmph->code = ICMP_PORT_UNREACH;
+ icmph->checksum = 0;
+
+ ret = build_orig_dgram_v4(buf + len, buflen - len, payload_len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+
+ icmph->un.reserved[1] = (len - sizeof(*icmph)) / sizeof(uint32_t);
+
+ if (with_ext) {
+ ret = build_rfc4884_ext(buf + len, buflen - len,
+ bad_csum, bad_len, smaller_len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ }
+
+ icmph->checksum = htons(csum(icmph, len));
+ return len;
+}
+
+static int build_icmpv6_pkt(uint8_t *buf, ssize_t buflen, bool with_ext,
+ int payload_len, bool bad_csum, bool bad_len,
+ bool smaller_len)
+{
+ struct icmp6hdr *icmph;
+ int len, ret;
+
+ len = sizeof(*icmph);
+ memset(buf, 0, buflen);
+
+ icmph = (struct icmp6hdr *)buf;
+ icmph->icmp6_type = ICMPV6_DEST_UNREACH;
+ icmph->icmp6_code = ICMPV6_PORT_UNREACH;
+ icmph->icmp6_cksum = 0;
+
+ ret = build_orig_dgram_v6(buf + len, buflen - len, payload_len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+
+ icmph->icmp6_datagram_len = (len - sizeof(*icmph)) / sizeof(uint64_t);
+
+ if (with_ext) {
+ ret = build_rfc4884_ext(buf + len, buflen - len,
+ bad_csum, bad_len, smaller_len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ }
+
+ icmph->icmp6_cksum = htons(csum(icmph, len));
+ return len;
+}
+
+FIXTURE(rfc4884) {};
+
+FIXTURE_SETUP(rfc4884)
+{
+ int ret;
+
+ ret = unshare(CLONE_NEWNET);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("unshare(CLONE_NEWNET) failed: %s", strerror(errno));
+ }
+
+ ret = bringup_loopback();
+ ASSERT_EQ(ret, 0) TH_LOG("Failed to bring up loopback interface");
+}
+
+FIXTURE_TEARDOWN(rfc4884)
+{
+}
+
+const struct ip_case_info ipv4_info = {
+ .domain = AF_INET,
+ .level = SOL_IP,
+ .opt1 = IP_RECVERR,
+ .opt2 = IP_RECVERR_RFC4884,
+ .proto = IPPROTO_ICMP,
+ .build_func = build_icmpv4_pkt,
+ .min_payload = min_payload_len_v4,
+};
+
+const struct ip_case_info ipv6_info = {
+ .domain = AF_INET6,
+ .level = SOL_IPV6,
+ .opt1 = IPV6_RECVERR,
+ .opt2 = IPV6_RECVERR_RFC4884,
+ .proto = IPPROTO_ICMPV6,
+ .build_func = build_icmpv6_pkt,
+ .min_payload = min_payload_len_v6,
+};
+
+FIXTURE_VARIANT(rfc4884) {
+ /* IPv4/v6 related information */
+ struct ip_case_info info;
+ /* Whether to append an ICMP extension or not */
+ bool with_ext;
+ /* UDP payload length */
+ int payload_len;
+ /* Whether to generate a bad checksum in the ICMP extension structure */
+ bool bad_csum;
+ /* Whether to generate a bad length in the ICMP object header */
+ bool bad_len;
+ /* Whether it is too small to fit the object header or too big to fit
+ * in the packet
+ */
+ bool smaller_len;
+};
+
+/* Tests that a valid ICMPv4 error message with extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext_small_payload) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = 64,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message with extension and 128 bytes original
+ * datagram, generates an error with the expected offset, and does not raise the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message with extension and the original
+ * datagram is larger than 128 bytes, generates an error with the expected
+ * offset, and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext_large_payload) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = 256,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message without extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_small_payload) {
+ .info = ipv4_info,
+ .with_ext = false,
+ .payload_len = 64,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message without extension and 128 bytes
+ * original datagram, generates an error with zero offset, and does not raise
+ * the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_min_payload) {
+ .info = ipv4_info,
+ .with_ext = false,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message without extension and the original
+ * datagram is larger than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_large_payload) {
+ .info = ipv4_info,
+ .with_ext = false,
+ .payload_len = 256,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that an ICMPv4 error message with extension and an invalid checksum,
+ * generates an error with the expected offset, and raises the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_checksum) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = true,
+ .bad_len = false,
+};
+
+/* Tests that an ICMPv4 error message with extension and an object length
+ * smaller than the object header, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_length_small) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = false,
+ .bad_len = true,
+ .smaller_len = true,
+};
+
+/* Tests that an ICMPv4 error message with extension and an object length that
+ * is too big to fit in the packet, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_length_large) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = false,
+ .bad_len = true,
+ .smaller_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message with extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext_small_payload) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = 64,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message with extension and 128 bytes original
+ * datagram, generates an error with the expected offset, and does not raise the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message with extension and the original
+ * datagram is larger than 128 bytes, generates an error with the expected
+ * offset, and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext_large_payload) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = 256,
+ .bad_csum = false,
+ .bad_len = false,
+};
+/* Tests that a valid ICMPv6 error message without extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_small_payload) {
+ .info = ipv6_info,
+ .with_ext = false,
+ .payload_len = 64,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message without extension and 128 bytes
+ * original datagram, generates an error with zero offset, and does not
+ * raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_min_payload) {
+ .info = ipv6_info,
+ .with_ext = false,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message without extension and the original
+ * datagram is larger than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_large_payload) {
+ .info = ipv6_info,
+ .with_ext = false,
+ .payload_len = 256,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that an ICMPv6 error message with extension and an invalid checksum,
+ * generates an error with the expected offset, and raises the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_checksum) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = true,
+ .bad_len = false,
+};
+
+/* Tests that an ICMPv6 error message with extension and an object length
+ * smaller than the object header, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_length_small) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = false,
+ .bad_len = true,
+ .smaller_len = true,
+};
+
+/* Tests that an ICMPv6 error message with extension and an object length that
+ * is too big to fit in the packet, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_length_large) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = false,
+ .bad_len = true,
+ .smaller_len = false,
+};
+
+static void
+check_rfc4884_offset(struct __test_metadata *_metadata, int sock,
+ const FIXTURE_VARIANT(rfc4884) *v)
+{
+ char rxbuf[1024];
+ char ctrl[1024];
+ struct iovec iov = {
+ .iov_base = rxbuf,
+ .iov_len = sizeof(rxbuf)
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = ctrl,
+ .msg_controllen = sizeof(ctrl),
+ };
+ struct cmsghdr *cmsg;
+ int recv;
+
+ ASSERT_EQ(poll_err(sock), 0);
+
+ recv = recvmsg(sock, &msg, MSG_ERRQUEUE);
+ ASSERT_GE(recv, 0) TH_LOG("recvmsg(MSG_ERRQUEUE) failed");
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ bool is_invalid, expected_invalid;
+ struct sock_extended_err *ee;
+ int expected_off;
+ uint16_t off;
+
+ if (cmsg->cmsg_level != v->info.level ||
+ cmsg->cmsg_type != v->info.opt1) {
+ TH_LOG("Unrelated cmsgs were encountered in recvmsg()");
+ continue;
+ }
+
+ ee = (struct sock_extended_err *)CMSG_DATA(cmsg);
+ off = ee->ee_rfc4884.len;
+ is_invalid = ee->ee_rfc4884.flags & SO_EE_RFC4884_FLAG_INVALID;
+
+ expected_invalid = v->bad_csum || v->bad_len;
+ ASSERT_EQ(is_invalid, expected_invalid) {
+ TH_LOG("Expected invalidity flag to be %d, but got %d",
+ expected_invalid, is_invalid);
+ }
+
+ expected_off =
+ (v->with_ext && v->payload_len >= v->info.min_payload) ?
+ v->payload_len : 0;
+ ASSERT_EQ(off, expected_off) {
+ TH_LOG("Expected RFC4884 offset %u, got %u",
+ expected_off, off);
+ }
+ break;
+ }
+}
+
+TEST_F(rfc4884, rfc4884)
+{
+ const typeof(variant) v = variant;
+ struct sockaddr_inet addr;
+ uint8_t pkt[1024];
+ int dgram, raw;
+ int len, sent;
+ int err;
+
+ dgram = socket(v->info.domain, SOCK_DGRAM, 0);
+ ASSERT_GE(dgram, 0) TH_LOG("Opening datagram socket failed");
+
+ err = bind_and_setsockopt(dgram, &v->info);
+ ASSERT_EQ(err, 0) TH_LOG("Bind failed");
+
+ raw = socket(v->info.domain, SOCK_RAW, v->info.proto);
+ ASSERT_GE(raw, 0) TH_LOG("Opening raw socket failed");
+
+ len = v->info.build_func(pkt, sizeof(pkt), v->with_ext, v->payload_len,
+ v->bad_csum, v->bad_len, v->smaller_len);
+ ASSERT_GT(len, 0) TH_LOG("Building packet failed");
+
+ set_addr(&addr, v->info.domain, 0);
+ sent = sendto(raw, pkt, len, 0, &addr.sa, addr.len);
+ ASSERT_EQ(len, sent) TH_LOG("Sending packet failed");
+
+ check_rfc4884_offset(_metadata, dgram, v);
+
+ close(dgram);
+ close(raw);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
index 845c26dd01a9..b2b99889942f 100755
--- a/tools/testing/selftests/net/ioam6.sh
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -273,8 +273,8 @@ setup()
ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null
ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null
- ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null
ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null
+ ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null
ip -netns $ioam_node_alpha link set veth0 up &>/dev/null
ip -netns $ioam_node_alpha link set lo up &>/dev/null
ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index 0ccf484b1d9d..f4afef51b930 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -43,6 +43,10 @@
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
+#endif
+
#define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */
#define MAX_PAYLOAD 2048
#define XFRM_ALGO_KEY_BUF_SIZE 512
@@ -827,13 +831,16 @@ static int xfrm_fill_key(char *name, char *buf,
static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz,
struct xfrm_desc *desc)
{
- struct {
+ union {
union {
struct xfrm_algo alg;
struct xfrm_algo_aead aead;
struct xfrm_algo_auth auth;
} u;
- char buf[XFRM_ALGO_KEY_BUF_SIZE];
+ struct {
+ unsigned char __offset_to_FAM[offsetof(struct xfrm_algo_auth, alg_key)];
+ char buf[XFRM_ALGO_KEY_BUF_SIZE];
+ };
} alg = {};
size_t alen, elen, clen, aelen;
unsigned short type;
diff --git a/tools/testing/selftests/net/ipvtap_test.sh b/tools/testing/selftests/net/ipvtap_test.sh
new file mode 100755
index 000000000000..354ca7ce8584
--- /dev/null
+++ b/tools/testing/selftests/net/ipvtap_test.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Simple tests for ipvtap
+
+
+#
+# The testing environment looks this way:
+#
+# |------HNS-------| |------PHY-------|
+# | veth<----------------->veth |
+# |------|--|------| |----------------|
+# | |
+# | | |-----TST0-------|
+# | |------------|----ipvlan |
+# | |----------------|
+# |
+# | |-----TST1-------|
+# |---------------|----ipvlan |
+# |----------------|
+#
+
+ALL_TESTS="
+ test_ip_set
+"
+
+source lib.sh
+
+DEBUG=0
+
+VETH_HOST=vethtst.h
+VETH_PHY=vethtst.p
+
+NS_COUNT=32
+IP_ITERATIONS=1024
+IPSET_TIMEOUT="60s"
+
+ns_run() {
+ ns=$1
+ shift
+ if [[ "$ns" == "global" ]]; then
+ "$@" >/dev/null
+ else
+ ip netns exec "$ns" "$@" >/dev/null
+ fi
+}
+
+test_ip_setup_env() {
+ setup_ns NS_PHY
+ setup_ns HST_NS
+
+ # setup simulated other-host (phy) and host itself
+ ns_run "$HST_NS" ip link add $VETH_HOST type veth peer name $VETH_PHY \
+ netns "$NS_PHY" >/dev/null
+ ns_run "$HST_NS" ip link set $VETH_HOST up
+ ns_run "$NS_PHY" ip link set $VETH_PHY up
+
+ for ((i=0; i<NS_COUNT; i++)); do
+ setup_ns ipvlan_ns_$i
+ ns="ipvlan_ns_$i"
+ if [ "$DEBUG" = "1" ]; then
+ echo "created NS ${!ns}"
+ fi
+ if ! ns_run "$HST_NS" ip link add netns ${!ns} ipvlan0 \
+ link $VETH_HOST \
+ type ipvtap mode l2 bridge; then
+ exit_error "FAIL: Failed to configure ipvlan link."
+ fi
+ done
+}
+
+test_ip_cleanup_env() {
+ ns_run "$HST_NS" ip link del $VETH_HOST
+ cleanup_all_ns
+}
+
+exit_error() {
+ echo "$1"
+ exit $ksft_fail
+}
+
+rnd() {
+ echo $(( RANDOM % 32 + 16 ))
+}
+
+test_ip_set_thread() {
+ # Here we are trying to create some IP conflicts between namespaces.
+ # If just add/remove IP, nothing interesting will happen.
+ # But if add random IP and then remove random IP,
+ # eventually conflicts start to apear.
+ ip link set ipvlan0 up
+ for ((i=0; i<IP_ITERATIONS; i++)); do
+ v=$(rnd)
+ ip a a "172.25.0.$v/24" dev ipvlan0 2>/dev/null
+ ip a a "fc00::$v/64" dev ipvlan0 2>/dev/null
+ v=$(rnd)
+ ip a d "172.25.0.$v/24" dev ipvlan0 2>/dev/null
+ ip a d "fc00::$v/64" dev ipvlan0 2>/dev/null
+ done
+}
+
+test_ip_set() {
+ RET=0
+
+ trap test_ip_cleanup_env EXIT
+
+ test_ip_setup_env
+
+ declare -A ns_pids
+ for ((i=0; i<NS_COUNT; i++)); do
+ ns="ipvlan_ns_$i"
+ ns_run ${!ns} timeout "$IPSET_TIMEOUT" \
+ bash -c "$0 test_ip_set_thread"&
+ ns_pids[$i]=$!
+ done
+
+ for ((i=0; i<NS_COUNT; i++)); do
+ wait "${ns_pids[$i]}"
+ done
+
+ declare -A all_ips
+ for ((i=0; i<NS_COUNT; i++)); do
+ ns="ipvlan_ns_$i"
+ ip_output=$(ip netns exec ${!ns} ip a l dev ipvlan0 | grep inet)
+ while IFS= read -r nsip_out; do
+ if [[ -z $nsip_out ]]; then
+ continue;
+ fi
+ nsip=$(awk '{print $2}' <<< "$nsip_out")
+ if [[ -v all_ips[$nsip] ]]; then
+ RET=$ksft_fail
+ log_test "conflict for $nsip"
+ return "$RET"
+ else
+ all_ips[$nsip]=$i
+ fi
+ done <<< "$ip_output"
+ done
+
+ if [ "$DEBUG" = "1" ]; then
+ for key in "${!all_ips[@]}"; do
+ echo "$key: ${all_ips[$key]}"
+ done
+ fi
+
+ trap - EXIT
+ test_ip_cleanup_env
+
+ log_test "test multithreaded ip set"
+}
+
+if [[ "$1" == "-d" ]]; then
+ DEBUG=1
+ shift
+fi
+
+if [[ "$1" == "-t" ]]; then
+ shift
+ TESTS="$*"
+fi
+
+if [[ "$1" == "test_ip_set_thread" ]]; then
+ test_ip_set_thread
+else
+ require_command ip
+
+ tests_run
+fi
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index f448bafb3f20..b40694573f4c 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -280,7 +280,8 @@ tc_rule_stats_get()
local selector=${1:-.packets}; shift
tc -j -s filter show dev $dev $dir pref $pref \
- | jq ".[1].options.actions[].stats$selector"
+ | jq ".[] | select(.options.actions) |
+ .options.actions[].stats$selector"
}
tc_rule_handle_stats_get()
@@ -576,7 +577,7 @@ ip_link_has_flag()
local flag=$1; shift
local state=$(ip -j link show "$name" |
- jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)')
+ jq --arg flag "$flag" 'any(.[].flags[]; . == $flag)')
[[ $state == true ]]
}
diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c
index 27437590eeb5..e28884ce3ab3 100644
--- a/tools/testing/selftests/net/lib/csum.c
+++ b/tools/testing/selftests/net/lib/csum.c
@@ -707,7 +707,7 @@ static uint32_t recv_get_packet_csum_status(struct msghdr *msg)
cm->cmsg_level, cm->cmsg_type);
if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata)))
- error(1, 0, "cmsg: len=%lu expected=%lu",
+ error(1, 0, "cmsg: len=%zu expected=%zu",
cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata)));
aux = (void *)CMSG_DATA(cm);
diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h
index 17dc34a612c6..03912902a6d3 100644
--- a/tools/testing/selftests/net/lib/ksft.h
+++ b/tools/testing/selftests/net/lib/ksft.h
@@ -24,7 +24,8 @@ static inline void ksft_ready(void)
fd = STDOUT_FILENO;
}
- write(fd, msg, sizeof(msg));
+ if (write(fd, msg, sizeof(msg)) < 0)
+ perror("write()");
if (fd != STDOUT_FILENO)
close(fd);
}
@@ -48,7 +49,8 @@ static inline void ksft_wait(void)
fd = STDIN_FILENO;
}
- read(fd, &byte, sizeof(byte));
+ if (read(fd, &byte, sizeof(byte)) < 0)
+ perror("read()");
if (fd != STDIN_FILENO)
close(fd);
}
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 40f9ce307dd1..f528b67639de 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -13,7 +13,7 @@ from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \
from .netns import NetNS, NetNSEnter
from .nsim import NetdevSim, NetdevSimDev
from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \
- bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file
+ bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file, tool
from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily
@@ -26,7 +26,7 @@ __all__ = ["KSRC",
"NetNS", "NetNSEnter",
"CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer",
"bpftool", "ip", "ethtool", "bpftrace", "rand_port",
- "wait_port_listen", "wait_file",
+ "wait_port_listen", "wait_file", "tool",
"NetdevSim", "NetdevSimDev",
"NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError",
"YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily",
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 531e7fa1b3ea..6cdfb8afccb5 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -8,7 +8,7 @@ import time
import traceback
from collections import namedtuple
from .consts import KSFT_MAIN_NAME
-from .utils import global_defer_queue
+from . import utils
KSFT_RESULT = None
KSFT_RESULT_ALL = True
@@ -32,8 +32,23 @@ class KsftTerminate(KeyboardInterrupt):
def ksft_pr(*objs, **kwargs):
+ """
+ Print logs to stdout.
+
+ Behaves like print() but log lines will be prefixed
+ with # to prevent breaking the TAP output formatting.
+
+ Extra arguments (on top of what print() supports):
+ line_pfx - add extra string before each line
+ """
+ sep = kwargs.pop("sep", " ")
+ pfx = kwargs.pop("line_pfx", "")
+ pfx = "#" + (" " + pfx if pfx else "")
kwargs["flush"] = True
- print("#", *objs, **kwargs)
+
+ text = sep.join(str(obj) for obj in objs)
+ prefixed = f"\n{pfx} ".join(text.split('\n'))
+ print(pfx, prefixed, **kwargs)
def _fail(*args):
@@ -153,21 +168,24 @@ def ktap_result(ok, cnt=1, case_name="", comment=""):
print(res, flush=True)
+def _ksft_defer_arm(state):
+ """ Allow or disallow the use of defer() """
+ utils.GLOBAL_DEFER_ARMED = state
+
+
def ksft_flush_defer():
global KSFT_RESULT
i = 0
- qlen_start = len(global_defer_queue)
- while global_defer_queue:
+ qlen_start = len(utils.GLOBAL_DEFER_QUEUE)
+ while utils.GLOBAL_DEFER_QUEUE:
i += 1
- entry = global_defer_queue.pop()
+ entry = utils.GLOBAL_DEFER_QUEUE.pop()
try:
entry.exec_only()
except Exception:
ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!")
- tb = traceback.format_exc()
- for line in tb.strip().split('\n'):
- ksft_pr("Defer Exception|", line)
+ ksft_pr(traceback.format_exc(), line_pfx="Defer Exception|")
KSFT_RESULT = False
@@ -315,6 +333,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
comment = ""
cnt_key = ""
+ _ksft_defer_arm(True)
try:
func(*args)
except KsftSkipEx as e:
@@ -325,20 +344,17 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cnt_key = 'xfail'
except BaseException as e:
stop |= isinstance(e, KeyboardInterrupt)
- tb = traceback.format_exc()
- for line in tb.strip().split('\n'):
- ksft_pr("Exception|", line)
+ ksft_pr(traceback.format_exc(), line_pfx="Exception|")
if stop:
ksft_pr(f"Stopping tests due to {type(e).__name__}.")
KSFT_RESULT = False
cnt_key = 'fail'
+ _ksft_defer_arm(False)
try:
ksft_flush_defer()
except BaseException as e:
- tb = traceback.format_exc()
- for line in tb.strip().split('\n'):
- ksft_pr("Exception|", line)
+ ksft_pr(traceback.format_exc(), line_pfx="Exception|")
if isinstance(e, KeyboardInterrupt):
ksft_pr()
ksft_pr("WARN: defer() interrupted, cleanup may be incomplete.")
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 106ee1f2df86..85884f3e827b 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -41,7 +41,9 @@ class cmd:
self.ret = None
self.ksft_term_fd = None
+ self.host = host
self.comm = comm
+
if host:
self.proc = host.cmd(comm)
else:
@@ -99,6 +101,27 @@ class cmd:
raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" %
(self.proc.args, stdout, stderr), self)
+ def __repr__(self):
+ def str_fmt(name, s):
+ name += ': '
+ return (name + s.strip().replace('\n', '\n' + ' ' * len(name)))
+
+ ret = "CMD"
+ if self.host:
+ ret += "[remote]"
+ if self.ret is None:
+ ret += f" (unterminated): {self.comm}\n"
+ elif self.ret == 0:
+ ret += f" (success): {self.comm}\n"
+ else:
+ ret += f": {self.comm}\n"
+ ret += f" EXIT: {self.ret}\n"
+ if self.stdout:
+ ret += str_fmt(" STDOUT", self.stdout) + "\n"
+ if self.stderr:
+ ret += str_fmt(" STDERR", self.stderr) + "\n"
+ return ret.strip()
+
class bkg(cmd):
"""
@@ -137,11 +160,12 @@ class bkg(cmd):
def __exit__(self, ex_type, ex_value, ex_tb):
# Force termination on exception
- terminate = self.terminate or (self._exit_wait and ex_type)
+ terminate = self.terminate or (self._exit_wait and ex_type is not None)
return self.process(terminate=terminate, fail=self.check_fail)
-global_defer_queue = []
+GLOBAL_DEFER_QUEUE = []
+GLOBAL_DEFER_ARMED = False
class defer:
@@ -153,7 +177,9 @@ class defer:
self.args = args
self.kwargs = kwargs
- self._queue = global_defer_queue
+ if not GLOBAL_DEFER_ARMED:
+ raise Exception("defer queue not armed, did you use defer() outside of a test case?")
+ self._queue = GLOBAL_DEFER_QUEUE
self._queue.append(self)
def __enter__(self):
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 15d144a25d82..22ba0da2adb8 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -3,6 +3,7 @@
top_srcdir = ../../../../..
CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+CFLAGS += -I$(top_srcdir)/tools/include
TEST_PROGS := \
diag.sh \
@@ -10,6 +11,7 @@ TEST_PROGS := \
mptcp_connect_checksum.sh \
mptcp_connect_mmap.sh \
mptcp_connect_sendfile.sh \
+ mptcp_connect_splice.sh \
mptcp_join.sh \
mptcp_sockopt.sh \
pm_netlink.sh \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 404a77bf366a..cbe573c4ab3a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -33,6 +33,7 @@
#include <linux/tcp.h>
#include <linux/time_types.h>
#include <linux/sockios.h>
+#include <linux/compiler.h>
extern int optind;
@@ -51,6 +52,7 @@ enum cfg_mode {
CFG_MODE_POLL,
CFG_MODE_MMAP,
CFG_MODE_SENDFILE,
+ CFG_MODE_SPLICE,
};
enum cfg_peek {
@@ -123,7 +125,7 @@ static void die_usage(void)
fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down "
"-- for MPJ tests\n");
fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n");
- fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
+ fprintf(stderr, "\t-m [poll|mmap|sendfile|splice] -- use poll(default)/mmap+write/sendfile/splice\n");
fprintf(stderr, "\t-M mark -- set socket packet mark\n");
fprintf(stderr, "\t-o option -- test sockopt <option>\n");
fprintf(stderr, "\t-p num -- use port num\n");
@@ -140,7 +142,7 @@ static void die_usage(void)
exit(1);
}
-static void xerror(const char *fmt, ...)
+static void __noreturn xerror(const char *fmt, ...)
{
va_list ap;
@@ -257,7 +259,7 @@ static void set_transparent(int fd, int pf)
}
}
-static void set_mptfo(int fd, int pf)
+static void set_mptfo(int fd)
{
int qlen = 25;
@@ -334,7 +336,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
set_transparent(sock, pf);
if (cfg_sockopt_types.mptfo)
- set_mptfo(sock, pf);
+ set_mptfo(sock);
if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
break; /* success */
@@ -405,21 +407,18 @@ static int sock_connect_mptcp(const char * const remoteaddr,
*peer = a;
break; /* success */
}
+ perror("sendto()");
} else {
if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
*peer = a;
break; /* success */
}
- }
- if (cfg_sockopt_types.mptfo) {
- perror("sendto()");
- close(sock);
- sock = -1;
- } else {
perror("connect()");
- close(sock);
- sock = -1;
}
+
+ /* error */
+ close(sock);
+ sock = -1;
}
freeaddrinfo(addr);
@@ -934,6 +933,71 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
return err;
}
+static int do_splice(const int infd, const int outfd, const size_t len,
+ struct wstate *winfo)
+{
+ ssize_t in_bytes, out_bytes;
+ int pipefd[2];
+ int err;
+
+ err = pipe(pipefd);
+ if (err) {
+ perror("pipe");
+ return 2;
+ }
+
+again:
+ in_bytes = splice(infd, NULL, pipefd[1], NULL, len - winfo->total_len,
+ SPLICE_F_MOVE | SPLICE_F_MORE);
+ if (in_bytes < 0) {
+ perror("splice in");
+ err = 3;
+ } else if (in_bytes > 0) {
+ out_bytes = splice(pipefd[0], NULL, outfd, NULL, in_bytes,
+ SPLICE_F_MOVE | SPLICE_F_MORE);
+ if (out_bytes < 0) {
+ perror("splice out");
+ err = 4;
+ } else if (in_bytes != out_bytes) {
+ fprintf(stderr, "Unexpected transfer: %zu vs %zu\n",
+ in_bytes, out_bytes);
+ err = 5;
+ } else {
+ goto again;
+ }
+ }
+
+ close(pipefd[0]);
+ close(pipefd[1]);
+
+ return err;
+}
+
+static int copyfd_io_splice(int infd, int peerfd, int outfd, unsigned int size,
+ bool *in_closed_after_out, struct wstate *winfo)
+{
+ int err;
+
+ if (listen_mode) {
+ err = do_splice(peerfd, outfd, size, winfo);
+ if (err)
+ return err;
+
+ err = do_splice(infd, peerfd, size, winfo);
+ } else {
+ err = do_splice(infd, peerfd, size, winfo);
+ if (err)
+ return err;
+
+ shut_wr(peerfd);
+
+ err = do_splice(peerfd, outfd, size, winfo);
+ *in_closed_after_out = true;
+ }
+
+ return err;
+}
+
static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo)
{
bool in_closed_after_out = false;
@@ -966,6 +1030,14 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct
&in_closed_after_out, winfo);
break;
+ case CFG_MODE_SPLICE:
+ file_size = get_infd_size(infd);
+ if (file_size < 0)
+ return file_size;
+ ret = copyfd_io_splice(infd, peerfd, outfd, file_size,
+ &in_closed_after_out, winfo);
+ break;
+
default:
fprintf(stderr, "Invalid mode %d\n", cfg_mode);
@@ -1295,8 +1367,8 @@ void xdisconnect(int fd)
int main_loop(void)
{
+ struct addrinfo *peer = NULL;
int fd = 0, ret, fd_in = 0;
- struct addrinfo *peer;
struct wstate winfo;
if (cfg_input && cfg_sockopt_types.mptfo) {
@@ -1379,12 +1451,15 @@ int parse_mode(const char *mode)
return CFG_MODE_MMAP;
if (!strcasecmp(mode, "sendfile"))
return CFG_MODE_SENDFILE;
+ if (!strcasecmp(mode, "splice"))
+ return CFG_MODE_SPLICE;
fprintf(stderr, "Unknown test mode: %s\n", mode);
fprintf(stderr, "Supported modes are:\n");
fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n");
fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n");
fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n");
+ fprintf(stderr, "\t\t\"splice\" - send entire input file (splice), then read response (-l will read input first)\n");
die_usage();
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh
new file mode 100755
index 000000000000..241254a966c9
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m splice "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
index e084796e804d..5e222ba977e4 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_diag.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -1,20 +1,24 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2025, Kylin Software */
-#include <linux/sock_diag.h>
-#include <linux/rtnetlink.h>
-#include <linux/inet_diag.h>
-#include <linux/netlink.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
#include <sys/socket.h>
-#include <netinet/in.h>
-#include <linux/tcp.h>
+
#include <arpa/inet.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <stdio.h>
+#include <netinet/in.h>
+
+#include <linux/compiler.h>
+#include <linux/inet_diag.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sock_diag.h>
+#include <linux/tcp.h>
#ifndef IPPROTO_MPTCP
#define IPPROTO_MPTCP 262
@@ -87,7 +91,7 @@ enum {
#define rta_getattr(type, value) (*(type *)RTA_DATA(value))
-static void die_perror(const char *msg)
+static void __noreturn die_perror(const char *msg)
{
perror(msg);
exit(1);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
index 8e8f6441ad8b..5716998da192 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -28,6 +28,7 @@
#include <linux/tcp.h>
#include <linux/sockios.h>
+#include <linux/compiler.h>
#ifndef IPPROTO_MPTCP
#define IPPROTO_MPTCP 262
@@ -40,7 +41,7 @@ static int pf = AF_INET;
static int proto_tx = IPPROTO_MPTCP;
static int proto_rx = IPPROTO_MPTCP;
-static void die_perror(const char *msg)
+static void __noreturn die_perror(const char *msg)
{
perror(msg);
exit(1);
@@ -52,7 +53,7 @@ static void die_usage(int r)
exit(r);
}
-static void xerror(const char *fmt, ...)
+static void __noreturn xerror(const char *fmt, ...)
{
va_list ap;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index b2e6e548f796..dc1f200aaa81 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -603,8 +603,7 @@ wait_rm_addr()
local old_cnt="${2}"
local cnt
- local i
- for i in $(seq 10); do
+ for _ in $(seq 10); do
cnt=$(rm_addr_count ${ns})
[ "$cnt" = "${old_cnt}" ] || break
sleep 0.1
@@ -623,25 +622,22 @@ wait_rm_sf()
local old_cnt="${2}"
local cnt
- local i
- for i in $(seq 10); do
+ for _ in $(seq 10); do
cnt=$(rm_sf_count ${ns})
[ "$cnt" = "${old_cnt}" ] || break
sleep 0.1
done
}
+# $1: expected MPJ ACK Rx counter in $ns1
wait_mpj()
{
- local ns="${1}"
- local cnt old_cnt
-
- old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
+ local exp_cnt="${1}"
+ local cnt
- local i
- for i in $(seq 10); do
- cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
- [ "$cnt" = "${old_cnt}" ] || break
+ for _ in $(seq 10); do
+ cnt=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
+ [ "${cnt}" = "${exp_cnt}" ] && break
sleep 0.1
done
}
@@ -650,8 +646,7 @@ wait_ll_ready()
{
local ns="${1}"
- local i
- for i in $(seq 50); do
+ for _ in $(seq 50); do
ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" |
grep -qw "tentative" || break
sleep 0.1
@@ -1407,7 +1402,7 @@ chk_join_tx_nr()
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$create" ]; then
rc=${KSFT_FAIL}
print_check "syn tx create socket error"
@@ -1416,7 +1411,7 @@ chk_join_tx_nr()
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$bind" ]; then
rc=${KSFT_FAIL}
print_check "syn tx bind error"
@@ -1425,7 +1420,7 @@ chk_join_tx_nr()
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$connect" ]; then
rc=${KSFT_FAIL}
print_check "syn tx connect error"
@@ -1451,7 +1446,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$infinite_map_tx" ]; then
rc=${KSFT_FAIL}
print_check "$ns infinite map tx fallback"
@@ -1460,7 +1455,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$dss_corruption" ]; then
rc=${KSFT_FAIL}
print_check "$ns dss corruption fallback"
@@ -1469,7 +1464,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$simult_conn" ]; then
rc=${KSFT_FAIL}
print_check "$ns simult conn fallback"
@@ -1478,7 +1473,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$mpc_passive" ]; then
rc=${KSFT_FAIL}
print_check "$ns mpc passive fallback"
@@ -1487,7 +1482,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$mpc_active" ]; then
rc=${KSFT_FAIL}
print_check "$ns mpc active fallback"
@@ -1496,7 +1491,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$mpc_data" ]; then
rc=${KSFT_FAIL}
print_check "$ns mpc data fallback"
@@ -1505,7 +1500,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$md5_sig" ]; then
rc=${KSFT_FAIL}
print_check "$ns MD5 Sig fallback"
@@ -1514,7 +1509,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$dss" ]; then
rc=${KSFT_FAIL}
print_check "$ns dss fallback"
@@ -1590,7 +1585,7 @@ chk_join_nr()
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "0" ]; then
rc=${KSFT_FAIL}
print_check "synack HMAC"
@@ -1599,7 +1594,7 @@ chk_join_nr()
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$ack_nr" ]; then
rc=${KSFT_FAIL}
print_check "ack rx"
@@ -1608,7 +1603,7 @@ chk_join_nr()
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "0" ]; then
rc=${KSFT_FAIL}
print_check "ack HMAC"
@@ -1617,7 +1612,7 @@ chk_join_nr()
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$syn_rej" ]; then
rc=${KSFT_FAIL}
print_check "syn rejected"
@@ -1650,7 +1645,6 @@ chk_stale_nr()
local stale_min=$2
local stale_max=$3
local stale_delta=$4
- local dump_stats
local stale_nr
local recover_nr
@@ -1666,16 +1660,11 @@ chk_stale_nr()
fail_test "got $stale_nr stale[s] $recover_nr recover[s], " \
" expected stale in range [$stale_min..$stale_max]," \
" stale-recover delta $stale_delta"
- dump_stats=1
+ echo $ns stats
+ ip -n $ns -s link show
else
print_ok
fi
-
- if [ "${dump_stats}" = 1 ]; then
- echo $ns stats
- ip netns exec $ns ip -s link show
- ip netns exec $ns nstat -as | grep MPTcp
- fi
}
chk_add_nr()
@@ -2329,17 +2318,16 @@ signal_address_tests()
ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
# It is not directly linked to the commit introducing this
# symbol but for the parent one which is linked anyway.
- if ! mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
- chk_join_nr 3 3 2
- chk_add_nr 4 4
- else
- chk_join_nr 3 3 3
+ if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
# the server will not signal the address terminating
# the MPC subflow
chk_add_nr 3 3
+ else
+ chk_add_nr 4 4
fi
fi
}
@@ -3719,7 +3707,6 @@ userspace_pm_add_addr()
tk=$(mptcp_lib_evts_get_info token "$evts")
ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3
- sleep 1
}
# $1: ns ; $2: id
@@ -3750,7 +3737,6 @@ userspace_pm_add_sf()
ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \
rip $da rport $dp token $tk
- sleep 1
}
# $1: ns ; $2: addr $3: event type
@@ -3847,21 +3833,28 @@ userspace_pm_chk_get_addr()
fi
}
-# $1: ns ; $2: event type ; $3: count
+# $1: ns ; $2: event type ; $3: count ; [ $4: attr ; $5: attr count ]
chk_evt_nr()
{
local ns=${1}
local evt_name="${2}"
local exp="${3}"
+ local attr="${4}"
+ local attr_exp="${5}"
local evts="${evts_ns1}"
local evt="${!evt_name}"
+ local attr_name
local count
+ if [ -n "${attr}" ]; then
+ attr_name=", ${attr}: ${attr_exp}"
+ fi
+
evt_name="${evt_name:16}" # without MPTCP_LIB_EVENT_
[ "${ns}" == "ns2" ] && evts="${evts_ns2}"
- print_check "event ${ns} ${evt_name} (${exp})"
+ print_check "event ${ns} ${evt_name} (${exp}${attr_name})"
if [[ "${evt_name}" = "LISTENER_"* ]] &&
! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
@@ -3872,11 +3865,42 @@ chk_evt_nr()
count=$(grep -cw "type:${evt}" "${evts}")
if [ "${count}" != "${exp}" ]; then
fail_test "got ${count} events, expected ${exp}"
+ cat "${evts}"
+ return
+ elif [ -z "${attr}" ]; then
+ print_ok
+ return
+ fi
+
+ count=$(grep -w "type:${evt}" "${evts}" | grep -c ",${attr}:")
+ if [ "${count}" != "${attr_exp}" ]; then
+ fail_test "got ${count} event attributes, expected ${attr_exp}"
+ grep -w "type:${evt}" "${evts}"
else
print_ok
fi
}
+# $1: ns ; $2: event type ; $3: expected count
+wait_event()
+{
+ local ns="${1}"
+ local evt_name="${2}"
+ local exp="${3}"
+
+ local evt="${!evt_name}"
+ local evts="${evts_ns1}"
+ local count
+
+ [ "${ns}" == "ns2" ] && evts="${evts_ns2}"
+
+ for _ in $(seq 100); do
+ count=$(grep -cw "type:${evt}" "${evts}")
+ [ "${count}" -ge "${exp}" ] && break
+ sleep 0.1
+ done
+}
+
userspace_tests()
{
# userspace pm type prevents add_addr
@@ -3962,9 +3986,11 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns1
+ wait_event ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_addr $ns1 10.0.2.1 10
+ wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 1
userspace_pm_add_addr $ns1 10.0.3.1 20
+ wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 2
chk_join_nr 2 2 2
chk_add_nr 2 2
chk_mptcp_info subflows 2 subflows 2
@@ -3995,8 +4021,9 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_sf $ns2 10.0.3.2 20
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
chk_join_nr 1 1 1
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
@@ -4023,10 +4050,11 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
userspace_pm_add_sf $ns2 10.0.3.2 0
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
userspace_pm_chk_dump_addr "${ns2}" \
"id 0 flags subflow 10.0.3.2" "id 0 subflow"
chk_join_nr 1 1 1
@@ -4044,8 +4072,9 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_sf $ns2 10.0.3.2 20
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
chk_join_nr 1 1 1
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
@@ -4068,8 +4097,9 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns1
+ wait_event ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_addr $ns1 10.0.2.1 10
+ wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 1
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_mptcp_info subflows 1 subflows 1
@@ -4085,13 +4115,44 @@ userspace_tests()
kill_events_pids
mptcp_lib_kill_group_wait $tests_pid
fi
+
+ # userspace pm no duplicated spurious close events after an error
+ if reset_with_events "userspace pm no dup close events after error" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 0 2
+ { timeout_test=120 test_linkfail=128 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
+ local tests_pid=$!
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
+ userspace_pm_add_sf $ns2 10.0.3.2 20
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
+ chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 2 2
+
+ # force quick loss
+ ip netns exec $ns2 sysctl -q net.ipv4.tcp_syn_retries=1
+ if ip netns exec "${ns1}" ${iptables} -A INPUT -s "10.0.1.2" \
+ -p tcp --tcp-option 30 -j REJECT --reject-with tcp-reset &&
+ ip netns exec "${ns2}" ${iptables} -A INPUT -d "10.0.1.2" \
+ -p tcp --tcp-option 30 -j REJECT --reject-with tcp-reset; then
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_CLOSED 1
+ wait_event ns1 MPTCP_LIB_EVENT_SUB_CLOSED 1
+ chk_subflows_total 1 1
+ userspace_pm_add_sf $ns2 10.0.1.2 0
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_CLOSED 2
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 2 error 2
+ fi
+ kill_events_pids
+ mptcp_lib_kill_group_wait $tests_pid
+ fi
}
endpoint_tests()
{
# subflow_rebuild_header is needed to support the implicit flag
# userspace pm type prevents add_addr
- if reset "implicit EP" &&
+ if reset_with_events "implicit EP" &&
continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
@@ -4100,7 +4161,7 @@ endpoint_tests()
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns1
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
pm_nl_check_endpoint "creation" \
$ns2 10.0.2.2 id 1 flags implicit
chk_mptcp_info subflows 1 subflows 1
@@ -4114,6 +4175,7 @@ endpoint_tests()
pm_nl_check_endpoint "modif is allowed" \
$ns2 10.0.2.2 id 1 flags signal
mptcp_lib_kill_group_wait $tests_pid
+ kill_events_pids
fi
if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
@@ -4127,7 +4189,7 @@ endpoint_tests()
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
pm_nl_check_endpoint "creation" \
$ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
chk_subflow_nr "before delete id 2" 2
@@ -4139,7 +4201,7 @@ endpoint_tests()
chk_mptcp_info subflows 0 subflows 0
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
- wait_mpj $ns2
+ wait_mpj 2
chk_subflow_nr "after re-add id 2" 2
chk_mptcp_info subflows 1 subflows 1
@@ -4151,7 +4213,7 @@ endpoint_tests()
ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
pm_nl_del_endpoint $ns2 3 10.0.3.2
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
- wait_mpj $ns2
+ wait_mpj 3
chk_subflow_nr "after no reject" 3
chk_mptcp_info subflows 2 subflows 2
@@ -4163,7 +4225,7 @@ endpoint_tests()
chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf
pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
- wait_mpj $ns2
+ wait_mpj $((3 + i))
chk_subflow_nr "after re-add id 0 ($i)" 3
chk_mptcp_info subflows 3 subflows 3
done
@@ -4205,7 +4267,7 @@ endpoint_tests()
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
pm_nl_check_endpoint "creation" \
$ns1 10.0.2.1 id 1 flags signal
chk_subflow_nr "before delete" 2
@@ -4221,7 +4283,7 @@ endpoint_tests()
pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
- wait_mpj $ns2
+ wait_mpj 3
chk_subflow_nr "after re-add" 3
chk_mptcp_info subflows 2 subflows 2
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
@@ -4233,7 +4295,7 @@ endpoint_tests()
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
- wait_mpj $ns2
+ wait_mpj 4
chk_subflow_nr "after re-add ID 0" 3
chk_mptcp_info subflows 3 subflows 3
chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
@@ -4245,7 +4307,7 @@ endpoint_tests()
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal
- wait_mpj $ns2
+ wait_mpj 5
chk_subflow_nr "after re-re-add ID 0" 3
chk_mptcp_info subflows 3 subflows 3
chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
@@ -4294,9 +4356,9 @@ endpoint_tests()
wait_rm_addr $ns2 0
ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
- wait_mpj $ns2
+ wait_mpj 1
pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
- wait_mpj $ns2
+ wait_mpj 2
mptcp_lib_kill_group_wait $tests_pid
join_syn_tx=3 join_connect_err=1 \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index 286164f7246e..b6e58d936ebe 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -25,6 +25,7 @@
#include <netinet/in.h>
#include <linux/tcp.h>
+#include <linux/compiler.h>
static int pf = AF_INET;
@@ -127,7 +128,7 @@ struct so_state {
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif
-static void die_perror(const char *msg)
+static void __noreturn die_perror(const char *msg)
{
perror(msg);
exit(1);
@@ -139,7 +140,7 @@ static void die_usage(int r)
exit(r);
}
-static void xerror(const char *fmt, ...)
+static void __noreturn xerror(const char *fmt, ...)
{
va_list ap;
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index ec6a87588191..123d9d7a0278 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -192,6 +192,10 @@ check "show_endpoints" \
flush_endpoint
check "show_endpoints" "" "flush addrs"
+add_endpoint 10.0.1.1 flags unknown
+check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags"
+flush_endpoint
+
set_limits 9 1 2>/dev/null
check "get_limits" "${default_limits}" "rcv addrs above hard limit"
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 65b374232ff5..99eecccbf0c8 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -24,6 +24,8 @@
#define IPPROTO_MPTCP 262
#endif
+#define MPTCP_PM_ADDR_FLAG_UNKNOWN _BITUL(7)
+
static void syntax(char *argv[])
{
fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]);
@@ -836,6 +838,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
else if (!strcmp(tok, "fullmesh"))
flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
+ else if (!strcmp(tok, "unknown"))
+ flags |= MPTCP_PM_ADDR_FLAG_UNKNOWN;
else
error(1, errno,
"unknown flag %s", argv[arg]);
@@ -1048,6 +1052,13 @@ static void print_addr(struct rtattr *attrs, int len)
printf(",");
}
+ if (flags & MPTCP_PM_ADDR_FLAG_UNKNOWN) {
+ printf("unknown");
+ flags &= ~MPTCP_PM_ADDR_FLAG_UNKNOWN;
+ if (flags)
+ printf(",");
+ }
+
/* bump unknown flags, if any */
if (flags)
printf("0x%x", flags);
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 12ce61fa15a8..979cff56e1f5 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -29,6 +29,7 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_SCTP=m
CONFIG_IPV6=y
CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_TUNNEL=m
CONFIG_IP_VS=m
CONFIG_IP_VS_PROTO_TCP=y
CONFIG_IP_VS_RR=m
diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
index 7fc6c5dbd551..84b8eb12143a 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
@@ -116,7 +116,7 @@ run_one_clash_test()
# not a failure: clash resolution logic did not trigger.
# With right timing, xmit completed sequentially and
# no parallel insertion occurs.
- return $ksft_skip
+ return $ksft_xfail
}
run_clash_test()
@@ -133,12 +133,12 @@ run_clash_test()
if [ $rv -eq 0 ];then
echo "PASS: clash resolution test for $daddr:$dport on attempt $i"
return 0
- elif [ $rv -eq $ksft_skip ]; then
+ elif [ $rv -eq $ksft_xfail ]; then
softerr=1
fi
done
- [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger"
+ [ $softerr -eq 1 ] && echo "XFAIL: clash resolution for $daddr:$dport did not trigger"
}
ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter"
@@ -167,8 +167,7 @@ load_simple_ruleset "$nsclient2"
run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001
if [ $clash_resolution_active -eq 0 ];then
- [ "$ret" -eq 0 ] && ret=$ksft_skip
- echo "SKIP: Clash resolution did not trigger"
+ [ "$ret" -eq 0 ] && ret=$ksft_xfail
fi
exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
index 507930cee8cb..462d628cc3bd 100644
--- a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
+++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
@@ -33,9 +33,14 @@ static void die(const char *e)
exit(111);
}
-static void die_port(uint16_t got, uint16_t want)
+static void die_port(const struct sockaddr_in *sin, uint16_t want)
{
- fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got));
+ uint16_t got = ntohs(sin->sin_port);
+ char str[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, &sin->sin_addr, str, sizeof(str));
+
+ fprintf(stderr, "Port number changed, wanted %d got %d from %s\n", want, got, str);
exit(1);
}
@@ -100,7 +105,7 @@ int main(int argc, char *argv[])
die("child recvfrom");
if (peer.sin_port != htons(PORT))
- die_port(peer.sin_port, PORT);
+ die_port(&peer, PORT);
} else {
if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN)
continue;
@@ -109,7 +114,7 @@ int main(int argc, char *argv[])
die("parent recvfrom");
if (peer.sin_port != htons((PORT + 1)))
- die_port(peer.sin_port, PORT + 1);
+ die_port(&peer, PORT + 1);
}
}
diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
index a24c896347a8..dc7e9d6da062 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
@@ -45,6 +45,8 @@ if ip netns exec "$ns0" ./conntrack_reverse_clash; then
echo "PASS: No SNAT performed for null bindings"
else
echo "ERROR: SNAT performed without any matching snat rule"
+ ip netns exec "$ns0" conntrack -L
+ ip netns exec "$ns0" conntrack -S
exit 1
fi
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index ad97c6227f35..394166f224a4 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -29,7 +29,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate"
+BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate insert_overlap"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
@@ -420,6 +420,18 @@ race_repeat 0
perf_duration 0
"
+TYPE_insert_overlap="
+display reject overlapping range on add
+type_spec ipv4_addr . ipv4_addr
+chain_spec ip saddr . ip daddr
+dst addr4
+proto icmp
+
+race_repeat 0
+
+perf_duration 0
+"
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -1954,6 +1966,37 @@ EOF
return 0
}
+add_fail()
+{
+ if nft add element inet filter test "$1" 2>/dev/null ; then
+ err "Returned success for add ${1} given set:"
+ err "$(nft -a list set inet filter test )"
+ return 1
+ fi
+
+ return 0
+}
+
+test_bug_insert_overlap()
+{
+ local elements="1.2.3.4 . 1.2.4.1"
+
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ add "{ $elements }" || return 1
+
+ elements="1.2.3.0-1.2.3.4 . 1.2.4.1"
+ add_fail "{ $elements }" || return 1
+
+ elements="1.2.3.0-1.2.3.4 . 1.2.4.2"
+ add "{ $elements }" || return 1
+
+ elements="1.2.3.4 . 1.2.4.1-1.2.4.2"
+ add_fail "{ $elements }" || return 1
+
+ return 0
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index a68bc882fa4e..7a34ef468975 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -592,16 +592,33 @@ ip -net "$nsr1" link set tun0 up
ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2
+ip -net "$nsr1" link set tun6 up
+ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad
+
ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
ip -net "$nsr2" link set tun0 up
ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 || ret=1
+ip -net "$nsr2" link set tun6 up
+ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad
+
ip -net "$nsr1" route change default via 192.168.100.2
ip -net "$nsr2" route change default via 192.168.100.1
+
+# do not use "route change" and delete old default so
+# socat fails to connect in case new default can't be added.
+ip -6 -net "$nsr1" route delete default
+ip -6 -net "$nsr1" route add default via fee1:3::2
+ip -6 -net "$nsr2" route delete default
+ip -6 -net "$nsr2" route add default via fee1:3::1
ip -net "$ns2" route add default via 10.0.2.1
+ip -6 -net "$ns2" route add default via dead:2::1
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept'
ip netns exec "$nsr1" nft -a insert rule inet filter forward \
'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
@@ -611,28 +628,53 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
ret=1
fi
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+ echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel"
+else
+ echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
# Create vlan tagged devices for IPIP traffic.
ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
ip -net "$nsr1" link set veth1.10 up
ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
+ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad
ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
-ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
-ip -net "$nsr1" link set tun1 up
-ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
+
+ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2
+ip -net "$nsr1" link set tun0.10 up
+ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10
ip -net "$nsr1" route change default via 192.168.200.2
-ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
-ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept'
+
+ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2
+ip -net "$nsr1" link set tun6.10 up
+ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad
+ip -6 -net "$nsr1" route delete default
+ip -6 -net "$nsr1" route add default via fee1:5::2
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept'
ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
ip -net "$nsr2" link set veth0.10 up
ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
+ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad
ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
-ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
-ip -net "$nsr2" link set tun1 up
-ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
+
+ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1
+ip -net "$nsr2" link set tun0.10 up
+ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10
ip -net "$nsr2" route change default via 192.168.200.1
-ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 || ret=1
+ip -net "$nsr2" link set tun6.10 up
+ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad
+ip -6 -net "$nsr2" route delete default
+ip -6 -net "$nsr2" route add default via fee1:5::1
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
@@ -640,10 +682,19 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
ret=1
fi
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+ echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan"
+else
+ echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
# Restore the previous configuration
ip -net "$nsr1" route change default via 192.168.10.2
ip -net "$nsr2" route change default via 192.168.10.1
ip -net "$ns2" route del default via 10.0.2.1
+ip -6 -net "$ns2" route del default via dead:2::1
}
# Another test:
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 6136ceec45e0..139bc1211878 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -510,7 +510,7 @@ EOF
udp_listener_ready()
{
- ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345
+ ss -S -N "$1" -uln -o "sport = :$2" | grep -q "$2"
}
output_files_written()
@@ -518,7 +518,7 @@ output_files_written()
test -s "$1" && test -s "$2"
}
-test_udp_ct_race()
+test_udp_nat_race()
{
ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
flush ruleset
@@ -545,8 +545,8 @@ EOF
ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 &
local nfqpid=$!
- busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2"
- busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3"
+ busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12345
+ busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" 12345
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12
# Send two packets, one should end up in ns1, other in ns2.
@@ -557,7 +557,7 @@ EOF
busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2"
- kill "$nfqpid"
+ kill "$nfqpid" "$rpid1" "$rpid2"
if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then
echo "FAIL: Expected One udp conntrack entry"
@@ -585,6 +585,135 @@ EOF
echo "PASS: both udp receivers got one packet each"
}
+# Make sure UDPGRO aggregated packets don't lose
+# their skb->nfct entry when nfqueue passes the
+# skb to userspace with software gso segmentation on.
+test_udp_gro_ct()
+{
+ local errprefix="FAIL: test_udp_gro_ct:"
+
+ ip netns exec "$nsrouter" conntrack -F 2>/dev/null
+
+ ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet udpq {
+ # Number of packets/bytes queued to userspace
+ counter toqueue { }
+ # Number of packets/bytes reinjected from userspace with 'ct new' intact
+ counter fromqueue { }
+ # These two counters should be identical and not 0.
+
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+
+ # userspace sends small packets, if < 1000, UDPGRO did
+ # not kick in, but test needs a 'new' conntrack with udpgro skb.
+ meta iifname veth0 meta l4proto udp meta length > 1000 accept
+
+ # don't pick up non-gso packets and don't queue them to
+ # userspace.
+ notrack
+ }
+
+ chain postrouting {
+ type filter hook postrouting priority 0; policy accept;
+
+ # Only queue unconfirmed fraglist gro skbs to userspace.
+ udp dport 12346 ct status ! confirmed counter name "toqueue" mark set 1 queue num 1
+ }
+
+ chain validate {
+ type filter hook postrouting priority 1; policy accept;
+ # ... and only count those that were reinjected with the
+ # skb->nfct intact.
+ mark 1 counter name "fromqueue"
+ }
+}
+EOF
+ timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc &
+ local rpid=$!
+
+ ip netns exec "$nsrouter" ./nf_queue -G -c -q 1 -t 2 > "$TMPFILE2" &
+ local nfqpid=$!
+
+ ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding on rx-gro-list on generic-receive-offload on
+
+ busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12346
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1
+
+ local bs=512
+ local count=$(((32 * 1024 * 1024) / bs))
+ dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 16); do
+ timeout 5 ip netns exec "$ns1" \
+ socat -u -b 512 STDIN UDP-DATAGRAM:10.0.2.99:12346,reuseport,bind=0.0.0.0:55221 &
+ done
+
+ busywait 10000 test -s "$TMPFILE1"
+
+ kill "$rpid"
+
+ wait
+
+ local p
+ local b
+ local pqueued
+ local bqueued
+
+ c=$(ip netns exec "$nsrouter" nft list counter inet udpq "toqueue" | grep packets)
+ read p pqueued b bqueued <<EOF
+$c
+EOF
+ local preinject
+ local breinject
+ c=$(ip netns exec "$nsrouter" nft list counter inet udpq "fromqueue" | grep packets)
+ read p preinject b breinject <<EOF
+$c
+EOF
+ ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding off
+ ip netns exec "$nsrouter" ethtool -K "veth1" rx-udp-gro-forwarding off
+
+ if [ "$pqueued" -eq 0 ];then
+ # happens when gro did not build at least on aggregate
+ echo "SKIP: No packets were queued"
+ return
+ fi
+
+ local saw_ct_entry=0
+ if ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12346 2>/dev/null | wc -l | grep -q "^1"'; then
+ saw_ct_entry=1
+ else
+ echo "$errprefix Expected udp conntrack entry"
+ ip netns exec "$nsrouter" conntrack -L
+ ret=1
+ fi
+
+ if [ "$pqueued" -ge "$preinject" ] ;then
+ echo "$errprefix Expected software segmentation to occur, had $pqueued and $preinject"
+ ret=1
+ return
+ fi
+
+ # sw segmentation adds extra udp and ip headers.
+ local breinject_expect=$((preinject * (512 + 20 + 8)))
+
+ if [ "$breinject" -eq "$breinject_expect" ]; then
+ if [ "$saw_ct_entry" -eq 1 ];then
+ echo "PASS: fraglist gro skb passed with conntrack entry"
+ else
+ echo "$errprefix fraglist gro skb passed without conntrack entry"
+ ret=1
+ fi
+ else
+ echo "$errprefix Counter mismatch, conntrack entry dropped by nfqueue? Queued: $pqueued, $bqueued. Post-queue: $preinject, $breinject. Expected $breinject_expect"
+ ret=1
+ fi
+
+ if ! ip netns exec "$nsrouter" nft delete table inet udpq; then
+ echo "$errprefix: Could not delete udpq table"
+ ret=1
+ fi
+}
+
test_queue_removal()
{
read tainted_then < /proc/sys/kernel/tainted
@@ -663,7 +792,8 @@ test_tcp_localhost_connectclose
test_tcp_localhost_requeue
test_sctp_forward
test_sctp_output
-test_udp_ct_race
+test_udp_nat_race
+test_udp_gro_ct
# should be last, adds vrf device in ns1 and changes routes
test_icmp_vrf
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
index 3442cd29bc93..cdb3910af95b 100644
--- a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
@@ -26,7 +26,7 @@
+0.01 > R 643160523:643160523(0) win 0
-+0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
++0.1 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
// Must go through.
+0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index b34e5cf0112e..0a97d5ae3469 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -13,6 +13,15 @@ declare -A ip_args=(
-D TFO_COOKIE_ZERO=b7c12350a90dc8f5
-D CMSG_LEVEL_IP=SOL_IP
-D CMSG_TYPE_RECVERR=IP_RECVERR"
+ [ipv4-mapped-ipv6]="--ip_version=ipv4-mapped-ipv6
+ --local_ip=192.168.0.1
+ --gateway_ip=192.168.0.1
+ --netmask_ip=255.255.0.0
+ --remote_ip=192.0.2.1
+ -D TFO_COOKIE=3021b9d889017eeb
+ -D TFO_COOKIE_ZERO=b7c12350a90dc8f5
+ -D CMSG_LEVEL_IP=SOL_IPV6
+ -D CMSG_TYPE_RECVERR=IPV6_RECVERR"
[ipv6]="--ip_version=ipv6
--mtu=1520
--local_ip=fd3d:0a0b:17d6::1
@@ -45,7 +54,7 @@ fi
ip_versions=$(grep -E '^--ip_version=' $script | cut -d '=' -f 2)
if [[ -z $ip_versions ]]; then
- ip_versions="ipv4 ipv6"
+ ip_versions="ipv4 ipv6 ipv4-mapped-ipv6"
elif [[ ! "$ip_versions" =~ ^ipv[46]$ ]]; then
ktap_exit_fail_msg "Too many or unsupported --ip_version: $ip_versions"
exit "$KSFT_FAIL"
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt
new file mode 100644
index 000000000000..07e9936e70e6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt
@@ -0,0 +1,24 @@
+// 3rd ACK + 1st data segment lost, data segments with ce
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// 3rd ACK lost
+// 1st data segment lost
++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1 ceb 1000 e0b 1,nop,nop,nop,sack 1001:2001>
++.002 accept(3, ..., ...) = 4
+
++0.2 < [ce] EAP. 1:1001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.001 > [ect0] EWA. 1:1(0) ack 2001 <ECN e1b 1 ceb 2000 e0b 1,nop>
+
++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 264
++.001 > [ect0] . 1:1(0) ack 3001 <ECN e1b 1 ceb 3000 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt
new file mode 100644
index 000000000000..76b8422b34dc
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt
@@ -0,0 +1,30 @@
+// 3rd ACK + 1st data segment lost, 2nd data segments with ce
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1016,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// 3rd ACK lost
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 2000) = 2000
+// 1st data segment lost + 2nd gets CE
++.002 > [ect0] .5 1:1005(1004) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.000 > [ect0] P.5 1005:2001(996) ack 1 <ECN e1b 1 ceb 0 e0b 1, nop>
++0.05 < [ect0] .6 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 996 e1b 1,nop,nop,nop,sack 1005:2001>
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
+
++0.002~+0.1 > [ect0] .5 1:1005(1004) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.05 < [ect0] .6 1:1(0) ack 2001 win 264 <ECN e0b 1005 ceb 996 e1b 1,nop>
+
++0.01 write(4, ..., 1000) = 1000
++0~+0.002 > [ect0] P.5 2001:3001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.1 < [ect0] .5 1:1001(1000) ack 3001 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++0~+0.01 > [ect0] .5 3001:3001(0) ack 1001 <ECN e1b 1 ceb 0 e0b 1001,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt
new file mode 100644
index 000000000000..84060e490589
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt
@@ -0,0 +1,19 @@
+// Test 3rd ACK flags when SYN-ACK is rexmitted
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.1 < [ect0] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Our code currently sends a challenge ACK
+// when it receives a SYN in ESTABLISHED state
+// based on the latest SYN
++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt
new file mode 100644
index 000000000000..d3fe09d0606f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt
@@ -0,0 +1,18 @@
+// Third ACK CE increases r.cep
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ce] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] WAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt
new file mode 100644
index 000000000000..d28722db42b1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt
@@ -0,0 +1,22 @@
+// 3rd ACK lost, CE for the first data segment
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// 3rd ACK lost
++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.001 > [ect0] EWA. 1:1(0) ack 2001 <ECN e1b 1 ceb 2000 e0b 1 ,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt
new file mode 100644
index 000000000000..a4d808116e34
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt
@@ -0,0 +1,26 @@
+// Test SYN/ACK rexmit triggered 3rd ACK duplicate + CE on first data seg
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// SYN/ACK rexmitted => two 3rd ACKs in-flight
++1.0~+1.1 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// Delivered 1st 3rd ACK
++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// Duplicate 3rd ACK delivered
++1.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
+
++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop>
+ +0 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt
new file mode 100644
index 000000000000..410a303c6d49
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt
@@ -0,0 +1,13 @@
+// Test that when accurate ECN is disabled,
+// client uses RFC3168 ECN for SYN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,nop,nop,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt
new file mode 100644
index 000000000000..10728114b11b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt
@@ -0,0 +1,28 @@
+// Test that SYN-ACK with ACE flags and without
+// ACE flags got dropped. Although we disable ECN,
+// we shouldn't consider this as blackholed as
+// these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [ect0] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN
++0.1 < [noecn] S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0
++0.01 write(4, ..., 100) = 100
++.002 > [noecn] P5. 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt
new file mode 100644
index 000000000000..04d928f0d44d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt
@@ -0,0 +1,18 @@
+// Test AccECN -> RFC3168 fallback when sysctl asks for RFC3168 ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt
new file mode 100644
index 000000000000..788af6bea69c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt
@@ -0,0 +1,34 @@
+// Client negotiates AccECN and starts sending
+// AccECN option in last ACK and data segments
+// Middlebox drops AccECN option and client
+// reverts to ACE flags only
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+sysctl -q net.ipv4.tcp_ecn_option_beacon=1
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 1001 <ECN e1b 1 ceb 0 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 1001 <ECN e1b 1 ceb 0 e0b 2001,nop,nop,nop,sack 1:1001>
+
++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 1001 <nop,nop,sack 1:1001>
+
++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 2001
+ +0 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt
new file mode 100644
index 000000000000..f5839c2e682d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt
@@ -0,0 +1,38 @@
+// Client negotiates AccECN and starts sending
+// AccECN option in last ACK and data segments
+// Middlebox accepts AccECN option but some packets
+// are lost due to congestion. Client should
+// continue to send AccECN option
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.102 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.1 < [ect0] SW. 0:0(0) ack 1 win 32767 <mss 1024,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
+// Send
++0.01 write(4, ..., 3000) = 3000
++.002 > [ect0] .5 1:1013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.002 > [ect0] P.5 1013:2025(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.002 > [ect0] P.5 2025:3001(976) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
+// First two segments were lost due to congestion as SACK was
+// received acknowledging 3rd segment
++0.1 < [ect0] .5 1:1(0) ack 1 win 264 <ECN e1b 1 ceb 0 e0b 977,nop,nop,nop,sack 2025:3001>
+
+// Since data with option was SACKed, we can
+// continue to use AccECN option for the rest of
+// the connection. This one is a rexmt
++.02~+0.5 > [ect0] .5 1:1013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.1 < [ect0] .5 1:1(0) ack 3001 win 264 <ECN e1b 1 ceb 0 e0b 3000,nop>
+
+// Send new data, it should contain AccECN option
++0.01 write(4, ..., 2000) = 2000
++.002 > [ect0] .5 3001:4013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.002 > [ect0] P.5 4013:5001(988) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt
new file mode 100644
index 000000000000..c00b36d6a833
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt
@@ -0,0 +1,12 @@
+// AccECN sysctl server-side only, no ECN/AccECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=5
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,nop,nop,nop,wscale 8>
++.002 > . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt
new file mode 100644
index 000000000000..f9c27f39f354
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt
@@ -0,0 +1,25 @@
+// Test basic connection teardown where local process closes first:
+// the local process calls close() first, so we send a FIN, and receive an ACK.
+// Then we receive a FIN and ACK it.
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +.01...0.011 connect(3, ..., ...) = 0
+ +0 > [noecn] SEWA 0:0(0) <...>
+ +0 < [ect1] SW. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK>
+ +0 > [ect0] EW. 1:1(0) ack 1
+
+ +0 write(3, ..., 1000) = 1000
+ +0 > [ect0] P5. 1:1001(1000) ack 1
+ +0 < [ect0] .5 1:1(0) ack 1001 win 257
+
+ +0 close(3) = 0
+ +0 > [ect0] F5. 1001:1001(0) ack 1
+ +0 < [ect0] .5 1:1(0) ack 1002 win 257
+
+ +0 < [ect0] F5. 1:1(0) ack 1002 win 257
+ +0 > [ect0] . 1002:1002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt
new file mode 100644
index 000000000000..6d771234124a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt
@@ -0,0 +1,25 @@
+// Test a large ACK (> ACE field max)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
++0.05 < [ect0] .5 1:1(0) ack 1461 win 264
++0.05 < [ect0] .5 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 8, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt
new file mode 100644
index 000000000000..76384f52b021
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt
@@ -0,0 +1,31 @@
+// Test false overflow detection with option used to rule out overflow
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
+// Stop sending option to allow easier testing
++0 `sysctl -q net.ipv4.tcp_ecn_option=0`
+
++0.002 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
+
++0.05 < [ect0] .5 1:1(0) ack 1460 win 264 <ECN e0b 1461 ceb 0 e1b 1,nop>
++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 <ECN e0b 14601 ceb 0 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 0, tcpi_delivered_ce
+assert tcpi_delivered_e0_bytes == 14600, tcpi_delivered_e0_bytes
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt
new file mode 100644
index 000000000000..8bce5dce35a2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt
@@ -0,0 +1,24 @@
+// Test a large ACK (> ACE field max)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
++0.05 < [ect0] .5 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt
new file mode 100644
index 000000000000..5f2b147214f4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt
@@ -0,0 +1,25 @@
+// Test a large ACK (> ACE field max)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
+ // Fake CE
++0.05 < [ect0] .6 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt
new file mode 100644
index 000000000000..fd07bdc14f37
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt
@@ -0,0 +1,25 @@
+// Test a large ACK (at ACE field max delta)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
+ // Fake CE
++0.05 < [ect0] .4 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 7, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt
new file mode 100644
index 000000000000..cb1e70ff2d26
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt
@@ -0,0 +1,70 @@
+// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{
+assert tcpi_delivered_ce == 0, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake CE
++0.05 < [ect0] WA. 1:1(0) ack 1001 win 264 <ECN e0b 1 ceb 1000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 1, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 1000, tcpi_delivered_ce_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake ect0
++0.05 < [ect0] WA. 1:1(0) ack 2001 win 264 <ECN e0b 1001 ceb 1000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 1, tcpi_delivered_ce
+assert tcpi_delivered_e0_bytes == 1000, tcpi_delivered_e0_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 2001:3001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake ce
++0.05 < [ect0] EWA. 1:1(0) ack 3001 win 264 <ECN e0b 1001 ceb 2000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 2, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 2000, tcpi_delivered_ce_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 3001:4001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake ect1
++0.05 < [ect0] EWA. 1:1(0) ack 4001 win 264 <ECN e0b 1001 ceb 2000 e1b 1001,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 2, tcpi_delivered_ce
+assert tcpi_delivered_e1_bytes == 1000, tcpi_delivered_e1_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 4001:5001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake ce
++0.05 < [ect0] . 1:1(0) ack 5001 win 264 <ECN e0b 1001 ceb 3000 e1b 1001,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 3, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 3000, tcpi_delivered_ce_bytes
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt
new file mode 100644
index 000000000000..6627c7bb2d26
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt
@@ -0,0 +1,12 @@
+// Test that tcp_ecn=4 uses RFC3168 ECN for SYN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=4
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.05 connect(4, ..., ...) = 0
+
++.002 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt
new file mode 100644
index 000000000000..51879477bb50
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt
@@ -0,0 +1,35 @@
+// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 2001 <ECN e1b 1 ceb 1000 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EWA. 1:1(0) ack 3001 <ECN e1b 1 ceb 2000 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ect1] EAP. 3001:4001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EWA. 1:1(0) ack 4001 <ECN e1b 1001 ceb 2000 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ce] EAP. 4001:5001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] . 1:1(0) ack 5001 <ECN e1b 1001 ceb 3000 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt
new file mode 100644
index 000000000000..0c72fa4a1251
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt
@@ -0,0 +1,14 @@
+// Test IP flags drop
+--tolerance_usecs=50000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 1.1 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02 ~ +1.1 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt
new file mode 100644
index 000000000000..171f9433e55f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt
@@ -0,0 +1,16 @@
+// SYN/ACK option drop test
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.02 ~+2 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.02 ~+5 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.02 ~+8 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt
new file mode 100644
index 000000000000..0f65cf56cd2b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt
@@ -0,0 +1,28 @@
+// Test that SYN-ACK with ACE flags and without
+// ACE flags got dropped. Although we disable ECN,
+// we shouldn't consider this as blackholed as
+// these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// SYN-ACK maybe getting blackholed, disable ECN
++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++4~+4.4 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Received an ACK after sending 3rd retransmission, not a blackhole
++0.1 < [noecn] . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt
new file mode 100644
index 000000000000..343181633980
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt
@@ -0,0 +1,18 @@
+// Test that SYN with ACE flags and without
+// ACE flags got dropped. Although we disable
+// ECN, we shouldn't consider this as blackholed
+// as these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 3.1 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0~+0.01 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt
new file mode 100644
index 000000000000..37dabc4603c8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt
@@ -0,0 +1,23 @@
+// Test AccECN flags bleach
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] . 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [noecn] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt
new file mode 100644
index 000000000000..5b14892fda51
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt
@@ -0,0 +1,23 @@
+// Test basic AccECN negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 <ECN e0b 1001 ceb 0 e1b 0,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt
new file mode 100644
index 000000000000..25f7cb2feb25
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt
@@ -0,0 +1,26 @@
+// Test basic AccECN negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt
new file mode 100644
index 000000000000..50e08c492a69
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt
@@ -0,0 +1,23 @@
+// Test basic AccECN negotiation without option
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1
++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt
new file mode 100644
index 000000000000..2904f1ba9975
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt
@@ -0,0 +1,23 @@
+// Test basic AccECN negotiation, late option enable
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1
++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 <ECN e0b 1001 ceb 0 e1b 1,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt
new file mode 100644
index 000000000000..64e0fc1c1f14
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt
@@ -0,0 +1,20 @@
+// Test client behavior on receiving a non ECN SYN-ACK
+// after receiving an AccECN SYN-ACK and moving to
+// ESTABLISHED state
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+// Receive an AccECN SYN-ACK and move to ESTABLISHED
++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
+// Receive a non ECN SYN-ACK and send a challenge ACK with ACE feedback
++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt
new file mode 100644
index 000000000000..f407c629a3f7
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt
@@ -0,0 +1,27 @@
+// Test basic AccECN negotiation with option off using sysctl
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1
++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt
new file mode 100644
index 000000000000..32454e7187f9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt
@@ -0,0 +1,27 @@
+// Test no progress filtering
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake CE and claim no progress
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 1000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 0, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt
new file mode 100644
index 000000000000..6597d5f2d778
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt
@@ -0,0 +1,28 @@
+// Test that SYN-ACK with ACE flags and without
+// ACE flags got dropped. Although we disable ECN,
+// we shouldn't consider this as blackholed as
+// these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN
++0.1 < [ect0] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
++0.1 < [noecn] . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
+// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0
++0.01 write(4, ..., 100) = 100
++.002 > [noecn] P. 1:101(100) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt
new file mode 100644
index 000000000000..0f97dfcfa82d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt
@@ -0,0 +1,18 @@
+// Test RFC3168 fallback when sysctl asks for AccECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEW 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt
new file mode 100644
index 000000000000..9baffdd66fe5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt
@@ -0,0 +1,18 @@
+// Test RFC3168 ECN when sysctl asks for RFC3168 ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEW 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt
new file mode 100644
index 000000000000..3fc56f9c6a6f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt
@@ -0,0 +1,28 @@
+// Test SACK space grab to fit AccECN option
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264
++0.002 > [ect0] EA. 1:1(0) ack 1 <ECN e1b 1001 ceb 0 e0b 1,nop,nop,nop,sack 1001:2001>
++.01 < [ect0] EAP. 3001:4001(1000) ack 1 win 264
++0.002 > [ect0] EA. 1:1(0) ack 1 <ECN e1b 1001 ceb 0 e0b 1001,nop,nop,nop,sack 3001:4001 1001:2001>
++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264
++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1001 ceb 1000 e0b 1001,nop,nop,nop,sack 5001:6001 3001:4001 1001:2001>
+// DSACK works?
++.01 < [ect0] EAP. 5001:6001(1000) ack 1 win 264
++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1001 ceb 1000 e0b 2001,nop,nop,nop,sack 5001:6001 5001:6001 3001:4001>
++.01 < [ect1] EAP. 6001:7001(1000) ack 1 win 264
++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 2001 ceb 1000 e0b 2001,nop,nop,nop,sack 5001:7001 3001:4001 1001:2001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt
new file mode 100644
index 000000000000..1c075b5d81ae
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt
@@ -0,0 +1,39 @@
+// Test SACK space grab to fit AccECN option
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 100,ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// One SACK block should allow all 3 AccECN fields:
++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264 <nop,nop,TS val 3 ecr 100>
++0.002 > [ect0] EA. 1:1(0) ack 1 <nop,nop,TS val 160 ecr 2,ECN e1b 1001 ceb 0 e0b 1,nop,nop,nop,sack 1001:2001>
+
+// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check ect1 arriving.
++.01 < [ect1] EAP. 3001:4001(1000) ack 1 win 264 <nop,nop,TS val 4 ecr 100>
++0.002 > [ect0] EA. 1:1(0) ack 1 <nop,nop,TS val 172 ecr 2,ECN e1b 2001 ceb 0,nop,nop,sack 3001:4001 1001:2001>
+
+// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check CE arriving.
++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100>
++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 184 ecr 2,ECN e1b 2001 ceb 1000,nop,nop,sack 5001:6001 3001:4001>
+
+// Check that DSACK works, using 2 SACK blocks in total, if we only need to use 2 AccECN fields: check ect1 arriving.
++.01 < [ect1] EAP. 5001:6001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100>
++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 196 ecr 2,ECN e1b 3001 ceb 1000,nop,nop,sack 5001:6001 5001:6001>
+
+// Check the case where the AccECN option doesn't fit, because sending ect0
+// with order 1 would rquire 3 AccECN fields,
+// and TS (12 bytes) + 2 SACK blocks (20 bytes) + 3 AccECN fields (2 + 3*3 bytes) > 40 bytes.
+// That's OK; Linux TCP AccECN is optimized for the ECT1 case, not ECT0.
++.01 < [ect0] EAP. 6001:7001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100>
++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 204 ecr 2,nop,nop,sack 5001:7001 3001:4001 1001:2001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt
new file mode 100644
index 000000000000..6b88ab78bfce
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt
@@ -0,0 +1,20 @@
+// Test against classic ECN server
+// Not-ECT on SYN and server sets 1|0|1 (AE is unused for classic ECN)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SEA. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [ect0] P.5 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++.002 > [ect0] F.5 101:101(0) ack 1 <nop,nop,TS val 400 ecr 700>
++0.1 < [noecn] R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt
new file mode 100644
index 000000000000..d24ada008ece
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt
@@ -0,0 +1,20 @@
+// Test against classic ECN server
+// Not-ECT on SYN and server sets 0|0|1
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SE. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [ect0] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++0 > [noecn] F. 101:101(0) ack 1 <...>
++0.1 < R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt
new file mode 100644
index 000000000000..a20d7e890ee1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt
@@ -0,0 +1,19 @@
+// Test against broken server (1|1|1)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SEWA. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [noecn] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++.002 > [noecn] F. 101:101(0) ack 1 <...>
++0.1 < [noecn] R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt
new file mode 100644
index 000000000000..428255bedab7
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt
@@ -0,0 +1,19 @@
+// Test against Non ECN server (0|0|0)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [noecn] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++.002 > [noecn] F. 101:101(0) ack 1 <nop,nop,TS val 400 ecr 700>
++0.1 < [noecn] R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt
new file mode 100644
index 000000000000..e9a5a0d3677c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt
@@ -0,0 +1,18 @@
+// Test AccECN with sysctl set to server-side only
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=5
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt
new file mode 100644
index 000000000000..412fa903105c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt
@@ -0,0 +1,18 @@
+// Test that SYN with ACE flags was Acked
+// after 2nd retransmission. In this case,
+// since we got SYN-ACK that supports Accurate
+// ECN, we consider this as successful negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 2.1 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+
++0.1 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1016,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0~+0.01 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt
new file mode 100644
index 000000000000..4622754a2270
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt
@@ -0,0 +1,16 @@
+// Test that SYN with ACE flags got dropped
+// We retry one more time with ACE and then
+// fallback to disabled ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 2.1 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0~+0.01 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt
new file mode 100644
index 000000000000..ee15f108cafe
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt
@@ -0,0 +1,27 @@
+// Test that SYN-ACK with ACE flags was Acked
+// after 2nd retransmission. In this case,
+// since we got the last ACK that supports Accurate
+// ECN, we consider this as successful negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// SYN-ACK maybe getting blackholed, disable ECN
++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Received an ACK with ACE flags, state should be set to negotiation succeeded
++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt
new file mode 100644
index 000000000000..ccfe353a8ee4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt
@@ -0,0 +1,26 @@
+// Test that SYN-ACK with ACE flags got dropped
+// We retry one more time with ACE and then
+// fallback to disabled ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// SYN-ACK maybe getting blackholed, disable ECN
++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Received an ACK with no ACE flags, state should be set to blackholed
++0.1 < [noecn] . 1:1(0) ack 1 win 320
++0 accept(3, ..., ...) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt
new file mode 100644
index 000000000000..dc83f7a18180
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt
@@ -0,0 +1,13 @@
+// Test AccECN ECN field reflector in SYNACK
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < [ce] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SWA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt
new file mode 100644
index 000000000000..e63a8d018c37
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt
@@ -0,0 +1,13 @@
+// Test AccECN ECN field reflector in SYNACK
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < [ect0] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt
new file mode 100644
index 000000000000..23c0e43b3dbe
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt
@@ -0,0 +1,13 @@
+// Test AccECN ECN field reflector in SYNACK
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < [ect1] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SEW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt
new file mode 100644
index 000000000000..c3497738f680
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt
@@ -0,0 +1,27 @@
+// Test SYNACK CE & received_ce update
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [ce] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.6 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] P.5 1:101(100) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
++.002 > [ect0] .6 101:101(0) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.6 101:201(100) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop>
+
++0.1 < [ect1] P.5 201:301(100) ack 201 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
++.002 > [ect0] .6 201:201(0) ack 101 <ECN e1b 101 ceb 0 e0b 101,nop,nop,nop,sack 201:301>
+
++0.01 < [ce] .6 401:501(100) ack 201 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
++.002 > [ect0] .7 201:201(0) ack 101 <ECN e1b 101 ceb 100 e0b 101,nop,nop,nop,sack 401:501 201:301>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt
new file mode 100644
index 000000000000..5fd77f466572
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt
@@ -0,0 +1,22 @@
+// Reflected SYNACK CE mark increases delivered_ce
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_fallback=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// Fake ce for prev, ECT validator must be disabled for this to work
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt
new file mode 100644
index 000000000000..f6ad1ea5c0c4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt
@@ -0,0 +1,24 @@
+// Test SYN=0 reflector
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [ect0] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.5 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] P.5 1:1(0) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
+
++0.01 < [ect0] P.5 1:101(100) ack 101 win 256 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] .5 101:101(0) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop>
++0 read(4, ..., 100) = 100
+
++0 close(4) = 0
++0 > F.5 101:101(0) ack 101 <...>
++0.1 < R. 101:101(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt
new file mode 100644
index 000000000000..7ecfc5fb9dbb
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt
@@ -0,0 +1,24 @@
+// Test SYN=0 reflector
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [ect1] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] EW. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.5 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect1] P.5 1:1(0) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
+
++0.01 < [ect1] P.5 1:101(100) ack 101 win 256 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] .5 101:101(0) ack 101 <ECN e1b 101 ceb 0 e0b 1,nop>
++0 read(4, ..., 100) = 100
+
++0 close(4) = 0
++0 > F5. 101:101(0) ack 101 <...>
++0.1 < R. 101:101(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt
new file mode 100644
index 000000000000..9e0959782ef5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt
@@ -0,0 +1,15 @@
+// Test 3rd ACK flags when SYN-ACK is rexmitted
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt
new file mode 100644
index 000000000000..a5a41633af07
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt
@@ -0,0 +1,25 @@
+// Test that we retransmit SYN-ACK with ACE and without
+// AccECN options after
+// SYN-ACK was lost and TCP moved to TCPS_SYN_RECEIVED
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// We try to write with AccECN option
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P5. 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt
new file mode 100644
index 000000000000..f3fe2f098966
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt
@@ -0,0 +1,26 @@
+// Test TS progress filtering
+--tcp_ts_tick_usecs=1000
+--tolerance_usecs=7000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 10 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 10>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <nop,nop,TS val 83 ecr 2>
+ // Fake CE and claim no progress
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 83>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt
new file mode 100644
index 000000000000..1446799d2481
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt
@@ -0,0 +1,25 @@
+// Test TS progress filtering
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 10 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 10>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <nop,nop,TS val 83 ecr 2>
+ // Fake CE and claim no progress
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <nop,nop,TS val 3 ecr 83>
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt
new file mode 100644
index 000000000000..319f81dd717d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Minimal active open.
+// First to close connection.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+
+ // Connect to server: active open: three-way handshake
+ +0...0 connect(4, ..., ...) = 0
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+ +0 < S. 0:0(0) ack 1 win 65535 <mss 1460,sackOK,nop,nop,nop,wscale 7>
+ +0 > . 1:1(0) ack 1
+
+ // Send data
+ +0 send(4, ..., 1000, 0) = 1000
+ +0 > P. 1:1001(1000) ack 1
+ +0 < . 1:1(0) ack 1001 win 257
+
+ +0 close(4) = 0
+ +0 > F. 1001:1001(0) ack 1
+ +0 < F. 1:1(0) ack 1002 win 257
+ +0 > . 1002:1002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt
new file mode 100644
index 000000000000..e72a291b666e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Minimal passive open.
+// Peer is first to close.
+
+`./defaults.sh`
+
+ // Open listener socket
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ // Incoming connection: passive open: three-way handshake
+ +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 8>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ // Open connection socket and close listener socket
+ +0 accept(3, ..., ...) = 4
+ +0 close(3) = 0
+
+ // Peer sends data: acknowledge and receive
+ +0 < P. 1:1001(1000) ack 1 win 257
+ +0 > . 1:1(0) ack 1001
+ +0 recv(4, ..., 1000, 0) = 1000
+
+ // Peer initiates connection close
+ +0 < F. 1001:1001(0) ack 1 win 257
+ +.04 > . 1:1(0) ack 1002
+
+ // Local socket also closes its side
+ +0 close(4) = 0
+ +0 > F. 1:1(0) ack 1002
+ +0 < . 1002:1002(0) ack 2 win 257
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt
new file mode 100644
index 000000000000..95a1957a2cf9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test after "tcp: tcp_tx_timestamp() must look at the rtx queue"
+
+// This test is about receiving the SCM_TSTAMP_ACK,
+// we do not care about its SCM_TIMESTAMPING precision.
+--tolerance_usecs=1000000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_min_tso_segs=70
+`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Establish connection and verify that there was no error.
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.010 < S. 0:0(0) ack 1 win 65535 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7>
+ +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700>
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [30000], 4) = 0
+
+ +0 write(3, ..., 9880) = 9880
+ +0 > P. 1:9881(9880) ack 1 <nop,nop,TS val 200 ecr 700>
++.010 < . 1:1(0) ack 9881 win 10000 <nop,nop,TS val 701 ecr 200>
+
+ +0 write(3, ..., 19760) = 19760
+ +0 > P. 9881:29641(19760) ack 1 <nop,nop,TS val 201 ecr 701>
++.010 < . 1:1(0) ack 29641 win 10000 <nop,nop,TS val 702 ecr 201>
+
+ +0 write(3, ..., 39520) = 39520
+ +0 > P. 29641:69161(39520) ack 1 <nop,nop,TS val 202 ecr 702>
++.010 < . 1:1(0) ack 69161 win 10000 <nop,nop,TS val 703 ecr 202>
+
+// One more write to increase cwnd
+ +0 write(3, ..., 79040) = 79040
+ +0 > P. 69161:108681(39520) ack 1 <nop,nop,TS val 203 ecr 703>
+ +0 > P. 108681:148201(39520) ack 1 <nop,nop,TS val 203 ecr 703>
++.010 < . 1:1(0) ack 148201 win 1000 <nop,nop,TS val 704 ecr 203>
+
+ +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING,
+ [SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+// We have one write filling one skb
+// last byte can not be stored because of our small SO_SNDBUF
+ +0 write(3, ..., 65209) = 65208
+ +0 > P. 148201:213409(65208) ack 1 <nop,nop,TS val 204 ecr 704>
++.010 < . 1:1(0) ack 213409 win 1000 <nop,nop,TS val 705 ecr 204>
+
+// SCM_TSTAMP_ACK should be received after the last ack at
+// t=60ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=60000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=65207}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c
index 9ec1c9b50e77..a0c9418132c8 100644
--- a/tools/testing/selftests/net/tap.c
+++ b/tools/testing/selftests/net/tap.c
@@ -56,18 +56,12 @@ static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type,
const char *s)
{
- struct rtattr *rta = rtattr_add(nh, type, strlen(s));
+ unsigned int strsz = strlen(s) + 1;
+ struct rtattr *rta;
- memcpy(RTA_DATA(rta), s, strlen(s));
- return rta;
-}
-
-static struct rtattr *rtattr_add_strsz(struct nlmsghdr *nh, unsigned short type,
- const char *s)
-{
- struct rtattr *rta = rtattr_add(nh, type, strlen(s) + 1);
+ rta = rtattr_add(nh, type, strsz);
- strcpy(RTA_DATA(rta), s);
+ memcpy(RTA_DATA(rta), s, strsz);
return rta;
}
@@ -119,7 +113,7 @@ static int dev_create(const char *dev, const char *link_type,
link_info = rtattr_begin(&req.nh, IFLA_LINKINFO);
- rtattr_add_strsz(&req.nh, IFLA_INFO_KIND, link_type);
+ rtattr_add_str(&req.nh, IFLA_INFO_KIND, link_type);
if (fill_info_data) {
info_data = rtattr_begin(&req.nh, IFLA_INFO_DATA);
diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c
index eb3cac5e583c..3b1ee2d3d417 100644
--- a/tools/testing/selftests/net/tfo.c
+++ b/tools/testing/selftests/net/tfo.c
@@ -81,8 +81,11 @@ static void run_server(void)
if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0)
error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)");
- read(connfd, buf, 64);
- fprintf(outfile, "%d\n", opt);
+ if (read(connfd, buf, 64) < 0)
+ error(1, errno, "read()");
+
+ if (fprintf(outfile, "%d\n", opt) < 0)
+ error(1, errno, "fprintf()");
fclose(outfile);
close(connfd);
@@ -91,14 +94,17 @@ static void run_server(void)
static void run_client(void)
{
- int fd;
+ int fd, ret;
char *msg = "Hello, world!";
fd = socket(AF_INET6, SOCK_STREAM, 0);
if (fd == -1)
error(1, errno, "socket()");
- sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+ ret = sendto(fd, msg, strlen(msg), MSG_FASTOPEN,
+ (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+ if (ret < 0)
+ error(1, errno, "sendto()");
close(fd);
}
diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh
index a4550511830a..f116f888b794 100755
--- a/tools/testing/selftests/net/tfo_passive.sh
+++ b/tools/testing/selftests/net/tfo_passive.sh
@@ -85,12 +85,15 @@ timeout -k 1s 30s ip netns exec nssv ./tfo \
-s \
-p ${SERVER_PORT} \
-o ${out_file}&
+server_pid="$!"
wait_local_port_listen nssv ${SERVER_PORT} tcp
ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT}
+client_exit_status="$?"
-wait
+wait "$server_pid"
+server_exit_status="$?"
res=$(cat $out_file)
rm $out_file
@@ -101,6 +104,14 @@ if [ "$res" = "0" ]; then
exit 1
fi
+if [ "$client_exit_status" -ne 0 ] || [ "$server_exit_status" -ne 0 ]; then
+ # Note: timeout(1) exits with 124 if it timed out
+ echo "client exited with ${client_exit_status}"
+ echo "server exited with ${server_exit_status}"
+ cleanup_ns
+ exit 1
+fi
+
echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index a3ef4b57eb5f..9e2ccea13d70 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -2786,10 +2786,10 @@ TEST_F(tls_err, epoll_partial_rec)
TEST_F(tls_err, poll_partial_rec_async)
{
struct pollfd pfd = { };
+ char token = '\0';
ssize_t rec_len;
char rec[256];
char buf[128];
- char token;
int p[2];
int ret;
@@ -3260,17 +3260,25 @@ TEST(data_steal) {
ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0);
/* Spawn a child and get it into the read wait path of the underlying
- * TCP socket.
+ * TCP socket (before kernel .recvmsg is replaced with the TLS one).
*/
pid = fork();
ASSERT_GE(pid, 0);
if (!pid) {
- EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL),
- sizeof(buf) / 2);
+ EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2 + 1, MSG_WAITALL),
+ sizeof(buf) / 2 + 1);
exit(!__test_passed(_metadata));
}
- usleep(10000);
+ /* Send a sync byte and poll until it's consumed to ensure
+ * the child is in recv() before we proceed to install TLS.
+ */
+ ASSERT_EQ(send(fd, buf, 1, 0), 1);
+ do {
+ usleep(500);
+ } while (recv(cfd, buf, 1, MSG_PEEK | MSG_DONTWAIT) == 1);
+ EXPECT_EQ(errno, EAGAIN);
+
ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0);
ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0);
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c
index 0efc67b0357a..8a5cd5cb5472 100644
--- a/tools/testing/selftests/net/tun.c
+++ b/tools/testing/selftests/net/tun.c
@@ -8,14 +8,119 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include <linux/if.h>
#include <linux/if_tun.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include "kselftest_harness.h"
+#include "tuntap_helpers.h"
+
+static const char param_dev_geneve_name[] = "geneve1";
+static unsigned char param_hwaddr_outer_dst[] = { 0x00, 0xfe, 0x98,
+ 0x14, 0x22, 0x42 };
+static unsigned char param_hwaddr_outer_src[] = { 0x00, 0xfe, 0x98,
+ 0x94, 0xd2, 0x43 };
+static unsigned char param_hwaddr_inner_dst[] = { 0x00, 0xfe, 0x98,
+ 0x94, 0x22, 0xcc };
+static unsigned char param_hwaddr_inner_src[] = { 0x00, 0xfe, 0x98,
+ 0x94, 0xd2, 0xdd };
+
+static struct in_addr param_ipaddr4_outer_dst = {
+ __constant_htonl(0xac100001),
+};
+
+static struct in_addr param_ipaddr4_outer_src = {
+ __constant_htonl(0xac100002),
+};
+
+static struct in_addr param_ipaddr4_inner_dst = {
+ __constant_htonl(0xac100101),
+};
+
+static struct in_addr param_ipaddr4_inner_src = {
+ __constant_htonl(0xac100102),
+};
+
+static struct in6_addr param_ipaddr6_outer_dst = {
+ { { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } },
+};
+
+static struct in6_addr param_ipaddr6_outer_src = {
+ { { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } },
+};
+
+static struct in6_addr param_ipaddr6_inner_dst = {
+ { { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } },
+};
+
+static struct in6_addr param_ipaddr6_inner_src = {
+ { { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } },
+};
+
+#ifndef BIT
+#define BIT(nr) (1UL << (nr))
+#endif
+
+#define VN_ID 1
+#define VN_PORT 4789
+#define UDP_SRC_PORT 22
+#define UDP_DST_PORT 48878
+#define IPPREFIX_LEN 24
+#define IP6PREFIX_LEN 64
+#define TIMEOUT_SEC 10
+#define TIMEOUT_USEC 100000
+#define MAX_RETRIES 20
+
+#define UDP_TUNNEL_GENEVE_4IN4 0x01
+#define UDP_TUNNEL_GENEVE_6IN4 0x02
+#define UDP_TUNNEL_GENEVE_4IN6 0x04
+#define UDP_TUNNEL_GENEVE_6IN6 0x08
+
+#define UDP_TUNNEL_MAX_SEGMENTS BIT(7)
+
+#define UDP_TUNNEL_OUTER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_6IN4)
+#define UDP_TUNNEL_INNER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_4IN6)
+
+#define UDP_TUNNEL_GENEVE_4IN4_HDRLEN \
+ (ETH_HLEN + 2 * sizeof(struct iphdr) + GENEVE_HLEN + \
+ 2 * sizeof(struct udphdr))
+#define UDP_TUNNEL_GENEVE_6IN6_HDRLEN \
+ (ETH_HLEN + 2 * sizeof(struct ipv6hdr) + GENEVE_HLEN + \
+ 2 * sizeof(struct udphdr))
+#define UDP_TUNNEL_GENEVE_4IN6_HDRLEN \
+ (ETH_HLEN + sizeof(struct iphdr) + sizeof(struct ipv6hdr) + \
+ GENEVE_HLEN + 2 * sizeof(struct udphdr))
+#define UDP_TUNNEL_GENEVE_6IN4_HDRLEN \
+ (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct iphdr) + \
+ GENEVE_HLEN + 2 * sizeof(struct udphdr))
+
+#define UDP_TUNNEL_HDRLEN(type) \
+ ((type) == UDP_TUNNEL_GENEVE_4IN4 ? UDP_TUNNEL_GENEVE_4IN4_HDRLEN : \
+ (type) == UDP_TUNNEL_GENEVE_6IN6 ? UDP_TUNNEL_GENEVE_6IN6_HDRLEN : \
+ (type) == UDP_TUNNEL_GENEVE_4IN6 ? UDP_TUNNEL_GENEVE_4IN6_HDRLEN : \
+ (type) == UDP_TUNNEL_GENEVE_6IN4 ? UDP_TUNNEL_GENEVE_6IN4_HDRLEN : \
+ 0)
+
+#define UDP_TUNNEL_MSS(type) (ETH_DATA_LEN - UDP_TUNNEL_HDRLEN(type))
+#define UDP_TUNNEL_MAX(type, is_tap) \
+ (ETH_MAX_MTU - UDP_TUNNEL_HDRLEN(type) - ((is_tap) ? ETH_HLEN : 0))
+
+#define TUN_VNET_TNL_SIZE sizeof(struct virtio_net_hdr_v1_hash_tunnel)
+#define MAX_VNET_TUNNEL_PACKET_SZ \
+ (TUN_VNET_TNL_SIZE + ETH_HLEN + UDP_TUNNEL_GENEVE_6IN6_HDRLEN + \
+ ETH_MAX_MTU)
+
+struct geneve_setup_config {
+ int family;
+ union {
+ struct in_addr r4;
+ struct in6_addr r6;
+ } remote;
+ __be32 vnid;
+ __be16 vnport;
+ unsigned char hwaddr[6];
+ uint8_t csum;
+};
static int tun_attach(int fd, char *dev)
{
@@ -25,7 +130,7 @@ static int tun_attach(int fd, char *dev)
strcpy(ifr.ifr_name, dev);
ifr.ifr_flags = IFF_ATTACH_QUEUE;
- return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+ return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
}
static int tun_detach(int fd, char *dev)
@@ -36,7 +141,7 @@ static int tun_detach(int fd, char *dev)
strcpy(ifr.ifr_name, dev);
ifr.ifr_flags = IFF_DETACH_QUEUE;
- return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+ return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
}
static int tun_alloc(char *dev)
@@ -54,7 +159,7 @@ static int tun_alloc(char *dev)
strcpy(ifr.ifr_name, dev);
ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE;
- err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+ err = ioctl(fd, TUNSETIFF, (void *)&ifr);
if (err < 0) {
fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno));
close(fd);
@@ -66,42 +171,315 @@ static int tun_alloc(char *dev)
static int tun_delete(char *dev)
{
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifm;
- unsigned char data[64];
- } req;
- struct rtattr *rta;
- int ret, rtnl;
+ return ip_link_del(dev);
+}
+
+static int tun_open(char *dev, const int flags, const int hdrlen,
+ const int features, const unsigned char *mac_addr)
+{
+ struct ifreq ifr = { 0 };
+ int fd, sk = -1;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0) {
+ perror("open");
+ return -1;
+ }
+
+ ifr.ifr_flags = flags;
+ if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+ perror("ioctl(TUNSETIFF)");
+ goto err;
+ }
+ strcpy(dev, ifr.ifr_name);
+
+ if (hdrlen > 0) {
+ if (ioctl(fd, TUNSETVNETHDRSZ, &hdrlen) < 0) {
+ perror("ioctl(TUNSETVNETHDRSZ)");
+ goto err;
+ }
+ }
+
+ if (features) {
+ if (ioctl(fd, TUNSETOFFLOAD, features) < 0) {
+ perror("ioctl(TUNSETOFFLOAD)");
+ goto err;
+ }
+ }
+
+ sk = socket(PF_INET, SOCK_DGRAM, 0);
+ if (sk < 0) {
+ perror("socket");
+ goto err;
+ }
+
+ if (ioctl(sk, SIOCGIFFLAGS, &ifr) < 0) {
+ perror("ioctl(SIOCGIFFLAGS)");
+ goto err;
+ }
+
+ ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
+ if (ioctl(sk, SIOCSIFFLAGS, &ifr) < 0) {
+ perror("ioctl(SIOCSIFFLAGS)");
+ goto err;
+ }
+
+ if (mac_addr && flags & IFF_TAP) {
+ ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
+ memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETH_ALEN);
+
+ if (ioctl(sk, SIOCSIFHWADDR, &ifr) < 0) {
+ perror("ioctl(SIOCSIFHWADDR)");
+ goto err;
+ }
+ }
+
+out:
+ if (sk >= 0)
+ close(sk);
+ return fd;
+
+err:
+ close(fd);
+ fd = -1;
+ goto out;
+}
+
+static size_t sockaddr_len(int family)
+{
+ return (family == AF_INET) ? sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6);
+}
+
+static int geneve_fill_newlink(struct rt_link_newlink_req *req, void *data)
+{
+ struct geneve_setup_config *cfg = data;
+
+#define SET_GENEVE_REMOTE rt_link_newlink_req_set_linkinfo_data_geneve_remote
+#define SET_GENEVE_REMOTE6 rt_link_newlink_req_set_linkinfo_data_geneve_remote6
+
+ rt_link_newlink_req_set_address(req, cfg->hwaddr, ETH_ALEN);
+ rt_link_newlink_req_set_linkinfo_data_geneve_id(req, cfg->vnid);
+ rt_link_newlink_req_set_linkinfo_data_geneve_port(req, cfg->vnport);
+ rt_link_newlink_req_set_linkinfo_data_geneve_udp_csum(req, cfg->csum);
+
+ if (cfg->family == AF_INET)
+ SET_GENEVE_REMOTE(req, cfg->remote.r4.s_addr);
+ else
+ SET_GENEVE_REMOTE6(req, &cfg->remote.r6,
+ sizeof(cfg->remote.r6));
+
+ return 0;
+}
+
+static int geneve_create(const char *dev, int family, void *remote,
+ void *hwaddr)
+{
+ struct geneve_setup_config geneve;
+
+ memset(&geneve, 0, sizeof(geneve));
+ geneve.vnid = VN_ID;
+ geneve.vnport = htons(VN_PORT);
+ geneve.csum = 1;
+ geneve.family = family;
+ if (family == AF_INET)
+ memcpy(&geneve.remote.r4, remote, sizeof(struct in_addr));
+ else
+ memcpy(&geneve.remote.r6, remote, sizeof(struct in6_addr));
+ memcpy(geneve.hwaddr, hwaddr, ETH_ALEN);
+
+ return ip_link_add(dev, "geneve", geneve_fill_newlink, (void *)&geneve);
+}
+
+static int set_pmtu_discover(int fd, bool is_ipv4)
+{
+ int level, name, val;
+
+ if (is_ipv4) {
+ level = SOL_IP;
+ name = IP_MTU_DISCOVER;
+ val = IP_PMTUDISC_DO;
+ } else {
+ level = SOL_IPV6;
+ name = IPV6_MTU_DISCOVER;
+ val = IPV6_PMTUDISC_DO;
+ }
+
+ return setsockopt(fd, level, name, &val, sizeof(val));
+}
+
+static int udp_socket_open(struct sockaddr_storage *ssa, bool do_frag,
+ bool do_connect, struct sockaddr_storage *dsa)
+{
+ struct timeval to = { .tv_sec = TIMEOUT_SEC };
+ int fd, family = ssa->ss_family;
+ int salen = sockaddr_len(family);
+
+ fd = socket(family, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -1;
+
+ if (bind(fd, (struct sockaddr *)ssa, salen) < 0) {
+ perror("bind");
+ goto err;
+ }
+
+ if (do_connect && connect(fd, (struct sockaddr *)dsa, salen) < 0) {
+ perror("connect");
+ goto err;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &to, sizeof(to)) < 0) {
+ perror("setsockopt(SO_RCVTIMEO)");
+ goto err;
+ }
+
+ if (!do_frag && set_pmtu_discover(fd, family == AF_INET) < 0) {
+ perror("set_pmtu_discover");
+ goto err;
+ }
+ return fd;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static void parse_route_rsp(struct rt_route_getroute_rsp *rsp, void *rtm_type)
+{
+ *(uint8_t *)rtm_type = rsp->_hdr.rtm_type;
+}
+
+static int ip_route_check(const char *intf, int family, void *addr)
+{
+ uint8_t rtm_type, table = RT_TABLE_LOCAL;
+ int retries = MAX_RETRIES;
- rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
- if (rtnl < 0) {
- fprintf(stderr, "can't open rtnl: %s\n", strerror(errno));
- return 1;
+ while (retries-- > 0) {
+ if (ip_route_get(intf, family, table, addr, parse_route_rsp,
+ &rtm_type) == 0 &&
+ rtm_type == RTN_LOCAL)
+ break;
+
+ usleep(TIMEOUT_USEC);
}
- memset(&req, 0, sizeof(req));
- req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm)));
- req.nh.nlmsg_flags = NLM_F_REQUEST;
- req.nh.nlmsg_type = RTM_DELLINK;
+ if (retries < 0)
+ return -1;
+
+ return 0;
+}
+
+static int send_gso_udp_msg(int socket, struct sockaddr_storage *addr,
+ uint8_t *send_buf, int send_len, int gso_size)
+{
+ char control[CMSG_SPACE(sizeof(uint16_t))] = { 0 };
+ int alen = sockaddr_len(addr->ss_family);
+ struct msghdr msg = { 0 };
+ struct iovec iov = { 0 };
+ int ret;
+
+ iov.iov_base = send_buf;
+ iov.iov_len = send_len;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_name = addr;
+ msg.msg_namelen = alen;
- req.ifm.ifi_family = AF_UNSPEC;
+ if (gso_size > 0) {
+ struct cmsghdr *cmsg;
- rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
- rta->rta_type = IFLA_IFNAME;
- rta->rta_len = RTA_LENGTH(IFNAMSIZ);
- req.nh.nlmsg_len += rta->rta_len;
- memcpy(RTA_DATA(rta), dev, IFNAMSIZ);
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
- ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_UDP;
+ cmsg->cmsg_type = UDP_SEGMENT;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+ *(uint16_t *)CMSG_DATA(cmsg) = gso_size;
+ }
+
+ ret = sendmsg(socket, &msg, 0);
if (ret < 0)
- fprintf(stderr, "can't send: %s\n", strerror(errno));
- ret = (unsigned int)ret != req.nh.nlmsg_len;
+ perror("sendmsg");
- close(rtnl);
return ret;
}
+static int validate_hdrlen(uint8_t **cur, int *len, int x)
+{
+ if (*len < x)
+ return -1;
+ *cur += x;
+ *len -= x;
+ return 0;
+}
+
+static int parse_udp_tunnel_vnet_packet(uint8_t *buf, int len, int tunnel_type,
+ bool is_tap)
+{
+ struct ipv6hdr *iph6;
+ struct udphdr *udph;
+ struct iphdr *iph4;
+ uint8_t *cur = buf;
+
+ if (validate_hdrlen(&cur, &len, TUN_VNET_TNL_SIZE))
+ return -1;
+
+ if (is_tap) {
+ if (validate_hdrlen(&cur, &len, ETH_HLEN))
+ return -1;
+ }
+
+ if (tunnel_type & UDP_TUNNEL_OUTER_IPV4) {
+ iph4 = (struct iphdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct iphdr)))
+ return -1;
+ if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP)
+ return -1;
+ } else {
+ iph6 = (struct ipv6hdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr)))
+ return -1;
+ if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP)
+ return -1;
+ }
+
+ udph = (struct udphdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct udphdr)))
+ return -1;
+ if (ntohs(udph->dest) != VN_PORT)
+ return -1;
+
+ if (validate_hdrlen(&cur, &len, GENEVE_HLEN))
+ return -1;
+ if (validate_hdrlen(&cur, &len, ETH_HLEN))
+ return -1;
+
+ if (tunnel_type & UDP_TUNNEL_INNER_IPV4) {
+ iph4 = (struct iphdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct iphdr)))
+ return -1;
+ if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP)
+ return -1;
+ } else {
+ iph6 = (struct ipv6hdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr)))
+ return -1;
+ if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP)
+ return -1;
+ }
+
+ udph = (struct udphdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct udphdr)))
+ return -1;
+ if (ntohs(udph->dest) != UDP_DST_PORT)
+ return -1;
+
+ return len;
+}
+
FIXTURE(tun)
{
char ifname[IFNAMSIZ];
@@ -127,31 +505,36 @@ FIXTURE_TEARDOWN(tun)
close(self->fd2);
}
-TEST_F(tun, delete_detach_close) {
+TEST_F(tun, delete_detach_close)
+{
EXPECT_EQ(tun_delete(self->ifname), 0);
EXPECT_EQ(tun_detach(self->fd, self->ifname), -1);
EXPECT_EQ(errno, 22);
}
-TEST_F(tun, detach_delete_close) {
+TEST_F(tun, detach_delete_close)
+{
EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
EXPECT_EQ(tun_delete(self->ifname), 0);
}
-TEST_F(tun, detach_close_delete) {
+TEST_F(tun, detach_close_delete)
+{
EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
close(self->fd);
self->fd = -1;
EXPECT_EQ(tun_delete(self->ifname), 0);
}
-TEST_F(tun, reattach_delete_close) {
+TEST_F(tun, reattach_delete_close)
+{
EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
EXPECT_EQ(tun_delete(self->ifname), 0);
}
-TEST_F(tun, reattach_close_delete) {
+TEST_F(tun, reattach_close_delete)
+{
EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
close(self->fd);
@@ -159,4 +542,447 @@ TEST_F(tun, reattach_close_delete) {
EXPECT_EQ(tun_delete(self->ifname), 0);
}
+FIXTURE(tun_vnet_udptnl)
+{
+ char ifname[IFNAMSIZ];
+ int fd, sock;
+};
+
+FIXTURE_VARIANT(tun_vnet_udptnl)
+{
+ int tunnel_type;
+ int gso_size;
+ int data_size;
+ int r_num_mss;
+ bool is_tap, no_gso;
+};
+
+/* clang-format off */
+#define TUN_VNET_UDPTNL_VARIANT_ADD(type, desc) \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1byte) { \
+ /* no GSO: send a single byte */ \
+ .tunnel_type = type, \
+ .data_size = 1, \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1mss) { \
+ /* no GSO: send a single MSS, fall back to no GSO */ \
+ .tunnel_type = type, \
+ .data_size = UDP_TUNNEL_MSS(type), \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_gtmss) { \
+ /* no GSO: send a single MSS + 1B: fail */ \
+ .tunnel_type = type, \
+ .data_size = UDP_TUNNEL_MSS(type) + 1, \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1byte) { \
+ /* GSO: send 1 byte, gso 1 byte, fall back to no GSO */ \
+ .tunnel_type = type, \
+ .gso_size = 1, \
+ .data_size = 1, \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1mss) { \
+ /* send a single MSS: fall back to no GSO */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = UDP_TUNNEL_MSS(type), \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_ltgso) { \
+ /* data <= MSS < gso: will fall back to no GSO */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type) + 1, \
+ .data_size = UDP_TUNNEL_MSS(type), \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_gtgso) { \
+ /* GSO: a single MSS + 1B */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = UDP_TUNNEL_MSS(type) + 1, \
+ .r_num_mss = 2, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_2mss) { \
+ /* no GSO: send exactly 2 MSS */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = UDP_TUNNEL_MSS(type) * 2, \
+ .r_num_mss = 2, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxbytes) { \
+ /* GSO: send max bytes */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = UDP_TUNNEL_MAX(type, true), \
+ .r_num_mss = UDP_TUNNEL_MAX(type, true) / \
+ UDP_TUNNEL_MSS(type) + 1, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_over_maxbytes) { \
+ /* GSO: send oversize max bytes: fail */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = ETH_MAX_MTU, \
+ .r_num_mss = ETH_MAX_MTU / UDP_TUNNEL_MSS(type) + 1, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxsegs) { \
+ /* GSO: send max number of min sized segments */ \
+ .tunnel_type = type, \
+ .gso_size = 1, \
+ .data_size = UDP_TUNNEL_MAX_SEGMENTS, \
+ .r_num_mss = UDP_TUNNEL_MAX_SEGMENTS, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_5byte) { \
+ /* GSO: send 5 bytes, gso 2 bytes */ \
+ .tunnel_type = type, \
+ .gso_size = 2, \
+ .data_size = 5, \
+ .r_num_mss = 3, \
+ .is_tap = true, \
+ } /* clang-format on */
+
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN4, 4in4);
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN4, 6in4);
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN6, 4in6);
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN6, 6in6);
+
+static void assign_ifaddr_vars(int family, int is_outer, void **srcip,
+ void **dstip, void **srcmac, void **dstmac)
+{
+ if (is_outer) {
+ if (family == AF_INET) {
+ *srcip = (void *)&param_ipaddr4_outer_src;
+ *dstip = (void *)&param_ipaddr4_outer_dst;
+ } else {
+ *srcip = (void *)&param_ipaddr6_outer_src;
+ *dstip = (void *)&param_ipaddr6_outer_dst;
+ }
+ *srcmac = param_hwaddr_outer_src;
+ *dstmac = param_hwaddr_outer_dst;
+ } else {
+ if (family == AF_INET) {
+ *srcip = (void *)&param_ipaddr4_inner_src;
+ *dstip = (void *)&param_ipaddr4_inner_dst;
+ } else {
+ *srcip = (void *)&param_ipaddr6_inner_src;
+ *dstip = (void *)&param_ipaddr6_inner_dst;
+ }
+ *srcmac = param_hwaddr_inner_src;
+ *dstmac = param_hwaddr_inner_dst;
+ }
+}
+
+static void assign_sockaddr_vars(int family, int is_outer,
+ struct sockaddr_storage *src,
+ struct sockaddr_storage *dst)
+{
+ src->ss_family = family;
+ dst->ss_family = family;
+
+ if (family == AF_INET) {
+ struct sockaddr_in *s4 = (struct sockaddr_in *)src;
+ struct sockaddr_in *d4 = (struct sockaddr_in *)dst;
+
+ s4->sin_addr = is_outer ? param_ipaddr4_outer_src :
+ param_ipaddr4_inner_src;
+ d4->sin_addr = is_outer ? param_ipaddr4_outer_dst :
+ param_ipaddr4_inner_dst;
+ if (!is_outer) {
+ s4->sin_port = htons(UDP_SRC_PORT);
+ d4->sin_port = htons(UDP_DST_PORT);
+ }
+ } else {
+ struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)src;
+ struct sockaddr_in6 *d6 = (struct sockaddr_in6 *)dst;
+
+ s6->sin6_addr = is_outer ? param_ipaddr6_outer_src :
+ param_ipaddr6_inner_src;
+ d6->sin6_addr = is_outer ? param_ipaddr6_outer_dst :
+ param_ipaddr6_inner_dst;
+ if (!is_outer) {
+ s6->sin6_port = htons(UDP_SRC_PORT);
+ d6->sin6_port = htons(UDP_DST_PORT);
+ }
+ }
+}
+
+FIXTURE_SETUP(tun_vnet_udptnl)
+{
+ int ret, family, prefix, flags, features;
+ int tunnel_type = variant->tunnel_type;
+ struct sockaddr_storage ssa, dsa;
+ void *sip, *dip, *smac, *dmac;
+
+ flags = (variant->is_tap ? IFF_TAP : IFF_TUN) | IFF_VNET_HDR |
+ IFF_MULTI_QUEUE | IFF_NO_PI;
+ features = TUN_F_CSUM | TUN_F_UDP_TUNNEL_GSO |
+ TUN_F_UDP_TUNNEL_GSO_CSUM | TUN_F_USO4 | TUN_F_USO6;
+ self->fd = tun_open(self->ifname, flags, TUN_VNET_TNL_SIZE, features,
+ param_hwaddr_outer_src);
+ ASSERT_GE(self->fd, 0);
+
+ family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET : AF_INET6;
+ prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN;
+ assign_ifaddr_vars(family, 1, &sip, &dip, &smac, &dmac);
+
+ ret = ip_addr_add(self->ifname, family, sip, prefix);
+ ASSERT_EQ(ret, 0);
+ ret = ip_neigh_add(self->ifname, family, dip, dmac);
+ ASSERT_EQ(ret, 0);
+ ret = ip_route_check(self->ifname, family, sip);
+ ASSERT_EQ(ret, 0);
+
+ ret = geneve_create(param_dev_geneve_name, family, dip,
+ param_hwaddr_inner_src);
+ ASSERT_EQ(ret, 0);
+
+ family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET : AF_INET6;
+ prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN;
+ assign_ifaddr_vars(family, 0, &sip, &dip, &smac, &dmac);
+
+ ret = ip_addr_add(param_dev_geneve_name, family, sip, prefix);
+ ASSERT_EQ(ret, 0);
+ ret = ip_neigh_add(param_dev_geneve_name, family, dip, dmac);
+ ASSERT_EQ(ret, 0);
+ ret = ip_route_check(param_dev_geneve_name, family, sip);
+ ASSERT_EQ(ret, 0);
+
+ assign_sockaddr_vars(family, 0, &ssa, &dsa);
+ self->sock = udp_socket_open(&ssa, false, true, &dsa);
+ ASSERT_GE(self->sock, 0);
+}
+
+FIXTURE_TEARDOWN(tun_vnet_udptnl)
+{
+ int ret;
+
+ if (self->sock != -1)
+ close(self->sock);
+
+ ret = ip_link_del(param_dev_geneve_name);
+ EXPECT_EQ(ret, 0);
+
+ ret = tun_delete(self->ifname);
+ EXPECT_EQ(ret, 0);
+}
+
+static int build_gso_packet_into_tun(const FIXTURE_VARIANT(tun_vnet_udptnl) *
+ variant,
+ uint8_t *buf)
+{
+ int pktlen, hlen, proto, inner_family, outer_family;
+ int tunnel_type = variant->tunnel_type;
+ int payload_len = variant->data_size;
+ int gso_size = variant->gso_size;
+ uint8_t *outer_udph, *cur = buf;
+ void *sip, *dip, *smac, *dmac;
+ bool is_tap = variant->is_tap;
+
+ hlen = (is_tap ? ETH_HLEN : 0) + UDP_TUNNEL_HDRLEN(tunnel_type);
+ inner_family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET :
+ AF_INET6;
+ outer_family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET :
+ AF_INET6;
+
+ cur += build_virtio_net_hdr_v1_hash_tunnel(cur, is_tap, hlen, gso_size,
+ outer_family, inner_family);
+
+ pktlen = hlen + payload_len;
+ assign_ifaddr_vars(outer_family, 1, &sip, &dip, &smac, &dmac);
+
+ if (is_tap) {
+ proto = outer_family == AF_INET ? ETH_P_IP : ETH_P_IPV6;
+ pktlen -= ETH_HLEN;
+ cur += build_eth(cur, proto, dmac, smac);
+ }
+
+ if (outer_family == AF_INET) {
+ pktlen = pktlen - sizeof(struct iphdr);
+ cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip);
+ } else {
+ pktlen = pktlen - sizeof(struct ipv6hdr);
+ cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip);
+ }
+
+ outer_udph = cur;
+ assign_ifaddr_vars(inner_family, 0, &sip, &dip, &smac, &dmac);
+
+ pktlen -= sizeof(struct udphdr);
+ proto = inner_family == AF_INET ? ETH_P_IP : ETH_P_IPV6;
+ cur += build_udp_header(cur, UDP_SRC_PORT, VN_PORT, pktlen);
+ cur += build_geneve_header(cur, VN_ID);
+ cur += build_eth(cur, proto, dmac, smac);
+
+ pktlen = sizeof(struct udphdr) + payload_len;
+ if (inner_family == AF_INET)
+ cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip);
+ else
+ cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip);
+
+ cur += build_udp_packet(cur, UDP_DST_PORT, UDP_SRC_PORT, payload_len,
+ inner_family, false);
+
+ build_udp_packet_csum(outer_udph, outer_family, false);
+
+ return cur - buf;
+}
+
+static int
+receive_gso_packet_from_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self,
+ const FIXTURE_VARIANT(tun_vnet_udptnl) * variant,
+ int *r_num_mss)
+{
+ uint8_t packet_buf[MAX_VNET_TUNNEL_PACKET_SZ];
+ int len, total_len = 0, socket = self->sock;
+ int payload_len = variant->data_size;
+
+ while (total_len < payload_len) {
+ len = recv(socket, packet_buf, sizeof(packet_buf), 0);
+ if (len <= 0) {
+ if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK)
+ perror("recv");
+ break;
+ }
+
+ (*r_num_mss)++;
+ total_len += len;
+ }
+
+ return total_len;
+}
+
+static int send_gso_packet_into_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self,
+ const FIXTURE_VARIANT(tun_vnet_udptnl) *
+ variant)
+{
+ int family = (variant->tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET :
+ AF_INET6;
+ uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ] = { 0 };
+ int payload_len = variant->data_size;
+ int gso_size = variant->gso_size;
+ struct sockaddr_storage ssa, dsa;
+
+ assign_sockaddr_vars(family, 0, &ssa, &dsa);
+ return send_gso_udp_msg(self->sock, &dsa, buf, payload_len, gso_size);
+}
+
+static int
+receive_gso_packet_from_tun(FIXTURE_DATA(tun_vnet_udptnl) * self,
+ const FIXTURE_VARIANT(tun_vnet_udptnl) * variant,
+ struct virtio_net_hdr_v1_hash_tunnel *vnet_hdr)
+{
+ struct timeval timeout = { .tv_sec = TIMEOUT_SEC };
+ uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ];
+ int tunnel_type = variant->tunnel_type;
+ int payload_len = variant->data_size;
+ bool is_tap = variant->is_tap;
+ int ret, len, total_len = 0;
+ int tun_fd = self->fd;
+ fd_set fdset;
+
+ while (total_len < payload_len) {
+ FD_ZERO(&fdset);
+ FD_SET(tun_fd, &fdset);
+
+ ret = select(tun_fd + 1, &fdset, NULL, NULL, &timeout);
+ if (ret <= 0) {
+ perror("select");
+ break;
+ }
+ if (!FD_ISSET(tun_fd, &fdset))
+ continue;
+
+ len = read(tun_fd, buf, sizeof(buf));
+ if (len <= 0) {
+ if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK)
+ perror("read");
+ break;
+ }
+
+ len = parse_udp_tunnel_vnet_packet(buf, len, tunnel_type,
+ is_tap);
+ if (len < 0)
+ continue;
+
+ if (total_len == 0)
+ memcpy(vnet_hdr, buf, TUN_VNET_TNL_SIZE);
+
+ total_len += len;
+ }
+
+ return total_len;
+}
+
+TEST_F(tun_vnet_udptnl, send_gso_packet)
+{
+ uint8_t pkt[MAX_VNET_TUNNEL_PACKET_SZ];
+ int r_num_mss = 0;
+ int ret, off;
+
+ memset(pkt, 0, sizeof(pkt));
+ off = build_gso_packet_into_tun(variant, pkt);
+ ret = write(self->fd, pkt, off);
+ ASSERT_EQ(ret, off);
+
+ ret = receive_gso_packet_from_tunnel(self, variant, &r_num_mss);
+ ASSERT_EQ(ret, variant->data_size);
+ ASSERT_EQ(r_num_mss, variant->r_num_mss);
+}
+
+TEST_F(tun_vnet_udptnl, recv_gso_packet)
+{
+ struct virtio_net_hdr_v1_hash_tunnel vnet_hdr = { 0 };
+ struct virtio_net_hdr_v1 *vh = &vnet_hdr.hash_hdr.hdr;
+ int ret, gso_type = VIRTIO_NET_HDR_GSO_UDP_L4;
+
+ ret = send_gso_packet_into_tunnel(self, variant);
+ ASSERT_EQ(ret, variant->data_size);
+
+ memset(&vnet_hdr, 0, sizeof(vnet_hdr));
+ ret = receive_gso_packet_from_tun(self, variant, &vnet_hdr);
+ ASSERT_EQ(ret, variant->data_size);
+
+ if (!variant->no_gso) {
+ ASSERT_EQ(vh->gso_size, variant->gso_size);
+ gso_type |= (variant->tunnel_type & UDP_TUNNEL_OUTER_IPV4) ?
+ (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4) :
+ (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6);
+ ASSERT_EQ(vh->gso_type, gso_type);
+ }
+}
+
+XFAIL_ADD(tun_vnet_udptnl, 4in4_nogsosz_gtmss, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in4_nogsosz_gtmss, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 4in6_nogsosz_gtmss, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in6_nogsosz_gtmss, recv_gso_packet);
+
+XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, send_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, send_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, send_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, send_gso_packet);
+
+XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, recv_gso_packet);
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/tuntap_helpers.h b/tools/testing/selftests/net/tuntap_helpers.h
new file mode 100644
index 000000000000..d6c0437136ec
--- /dev/null
+++ b/tools/testing/selftests/net/tuntap_helpers.h
@@ -0,0 +1,390 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _TUNTAP_HELPERS_H
+#define _TUNTAP_HELPERS_H
+
+#include <errno.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/virtio_net.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/udp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ynl.h>
+
+#include "rt-route-user.h"
+#include "rt-addr-user.h"
+#include "rt-neigh-user.h"
+#include "rt-link-user.h"
+
+#define GENEVE_HLEN 8
+#define PKT_DATA 0xCB
+#define TUNTAP_DEFAULT_TTL 8
+#define TUNTAP_DEFAULT_IPID 1337
+
+unsigned int if_nametoindex(const char *ifname);
+
+static inline int ip_addr_len(int family)
+{
+ return (family == AF_INET) ? sizeof(struct in_addr) :
+ sizeof(struct in6_addr);
+}
+
+static inline void fill_ifaddr_msg(struct ifaddrmsg *ifam, int family,
+ int prefix, int flags, const char *dev)
+{
+ ifam->ifa_family = family;
+ ifam->ifa_prefixlen = prefix;
+ ifam->ifa_index = if_nametoindex(dev);
+ ifam->ifa_flags = flags;
+ ifam->ifa_scope = RT_SCOPE_UNIVERSE;
+}
+
+static inline int ip_addr_add(const char *dev, int family, void *addr,
+ uint8_t prefix)
+{
+ int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ int ifa_flags = IFA_F_PERMANENT | IFA_F_NODAD;
+ int ret = -1, ipalen = ip_addr_len(family);
+ struct rt_addr_newaddr_req *req;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_addr_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_addr_newaddr_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ fill_ifaddr_msg(&req->_hdr, family, prefix, ifa_flags, dev);
+ rt_addr_newaddr_req_set_nlflags(req, nl_flags);
+ rt_addr_newaddr_req_set_local(req, addr, ipalen);
+
+ ret = rt_addr_newaddr(ys, req);
+ rt_addr_newaddr_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline void fill_neigh_req_header(struct ndmsg *ndm, int family,
+ int state, const char *dev)
+{
+ ndm->ndm_family = family;
+ ndm->ndm_ifindex = if_nametoindex(dev);
+ ndm->ndm_state = state;
+ ndm->ndm_flags = 0;
+ ndm->ndm_type = RTN_UNICAST;
+}
+
+static inline int ip_neigh_add(const char *dev, int family, void *addr,
+ unsigned char *lladdr)
+{
+ int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ int ret = -1, ipalen = ip_addr_len(family);
+ struct rt_neigh_newneigh_req *req;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_neigh_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_neigh_newneigh_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ fill_neigh_req_header(&req->_hdr, family, NUD_PERMANENT, dev);
+ rt_neigh_newneigh_req_set_nlflags(req, nl_flags);
+ rt_neigh_newneigh_req_set_dst(req, addr, ipalen);
+ rt_neigh_newneigh_req_set_lladdr(req, lladdr, ETH_ALEN);
+ rt_neigh_newneigh_req_set_ifindex(req, if_nametoindex(dev));
+
+ ret = rt_neigh_newneigh(ys, req);
+ rt_neigh_newneigh_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline void fill_route_req_header(struct rtmsg *rtm, int family,
+ int table)
+{
+ rtm->rtm_family = family;
+ rtm->rtm_table = table;
+}
+
+static inline int
+ip_route_get(const char *dev, int family, int table, void *dst,
+ void (*parse_rsp)(struct rt_route_getroute_rsp *rsp, void *out),
+ void *out)
+{
+ int ret = -1, ipalen = ip_addr_len(family);
+ struct rt_route_getroute_req *req;
+ struct rt_route_getroute_rsp *rsp;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_route_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_route_getroute_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ fill_route_req_header(&req->_hdr, family, table);
+ rt_route_getroute_req_set_nlflags(req, NLM_F_REQUEST);
+ rt_route_getroute_req_set_dst(req, dst, ipalen);
+ rt_route_getroute_req_set_oif(req, if_nametoindex(dev));
+
+ rsp = rt_route_getroute(ys, req);
+ if (!rsp)
+ goto err_rsp_get;
+
+ ret = 0;
+ if (parse_rsp)
+ parse_rsp(rsp, out);
+
+ rt_route_getroute_rsp_free(rsp);
+err_rsp_get:
+ rt_route_getroute_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline int
+ip_link_add(const char *dev, char *link_type,
+ int (*fill_link_attr)(struct rt_link_newlink_req *req, void *data),
+ void *data)
+{
+ int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ struct rt_link_newlink_req *req;
+ struct ynl_sock *ys;
+ int ret = -1;
+
+ ys = ynl_sock_create(&ynl_rt_link_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_link_newlink_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ req->_hdr.ifi_flags = IFF_UP;
+ rt_link_newlink_req_set_nlflags(req, nl_flags);
+ rt_link_newlink_req_set_ifname(req, dev);
+ rt_link_newlink_req_set_linkinfo_kind(req, link_type);
+
+ if (fill_link_attr && fill_link_attr(req, data) < 0)
+ goto err_attr_fill;
+
+ ret = rt_link_newlink(ys, req);
+err_attr_fill:
+ rt_link_newlink_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline int ip_link_del(const char *dev)
+{
+ struct rt_link_dellink_req *req;
+ struct ynl_sock *ys;
+ int ret = -1;
+
+ ys = ynl_sock_create(&ynl_rt_link_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_link_dellink_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ rt_link_dellink_req_set_nlflags(req, NLM_F_REQUEST);
+ rt_link_dellink_req_set_ifname(req, dev);
+
+ ret = rt_link_dellink(ys, req);
+ rt_link_dellink_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline size_t build_eth(uint8_t *buf, uint16_t proto, unsigned char *src,
+ unsigned char *dest)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+
+ eth->h_proto = htons(proto);
+ memcpy(eth->h_source, src, ETH_ALEN);
+ memcpy(eth->h_dest, dest, ETH_ALEN);
+
+ return ETH_HLEN;
+}
+
+static inline uint32_t add_csum(const uint8_t *buf, int len)
+{
+ uint16_t *sbuf = (uint16_t *)buf;
+ uint32_t sum = 0;
+
+ while (len > 1) {
+ sum += *sbuf++;
+ len -= 2;
+ }
+
+ if (len)
+ sum += *(uint8_t *)sbuf;
+
+ return sum;
+}
+
+static inline uint16_t finish_ip_csum(uint32_t sum)
+{
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+ return ~((uint16_t)sum);
+}
+
+static inline uint16_t build_ip_csum(const uint8_t *buf, int len, uint32_t sum)
+{
+ sum += add_csum(buf, len);
+ return finish_ip_csum(sum);
+}
+
+static inline int build_ipv4_header(uint8_t *buf, uint8_t proto,
+ int payload_len, struct in_addr *src,
+ struct in_addr *dst)
+{
+ struct iphdr *iph = (struct iphdr *)buf;
+
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->ttl = TUNTAP_DEFAULT_TTL;
+ iph->tot_len = htons(sizeof(*iph) + payload_len);
+ iph->id = htons(TUNTAP_DEFAULT_IPID);
+ iph->protocol = proto;
+ iph->saddr = src->s_addr;
+ iph->daddr = dst->s_addr;
+ iph->check = build_ip_csum(buf, iph->ihl << 2, 0);
+
+ return iph->ihl << 2;
+}
+
+static inline void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+ uint16_t val, *ptr = (uint16_t *)ip6h;
+
+ val = ntohs(*ptr);
+ val &= 0xF00F;
+ val |= ((uint16_t)dsfield) << 4;
+ *ptr = htons(val);
+}
+
+static inline int build_ipv6_header(uint8_t *buf, uint8_t proto,
+ uint8_t dsfield, int payload_len,
+ struct in6_addr *src, struct in6_addr *dst)
+{
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)buf;
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(payload_len);
+ ip6h->nexthdr = proto;
+ ip6h->hop_limit = TUNTAP_DEFAULT_TTL;
+ ipv6_set_dsfield(ip6h, dsfield);
+ memcpy(&ip6h->saddr, src, sizeof(ip6h->saddr));
+ memcpy(&ip6h->daddr, dst, sizeof(ip6h->daddr));
+
+ return sizeof(struct ipv6hdr);
+}
+
+static inline int build_geneve_header(uint8_t *buf, uint32_t vni)
+{
+ uint16_t protocol = htons(ETH_P_TEB);
+ uint32_t geneve_vni = htonl((vni << 8) & 0xffffff00);
+
+ memcpy(buf + 2, &protocol, 2);
+ memcpy(buf + 4, &geneve_vni, 4);
+ return GENEVE_HLEN;
+}
+
+static inline int build_udp_header(uint8_t *buf, uint16_t sport, uint16_t dport,
+ int payload_len)
+{
+ struct udphdr *udph = (struct udphdr *)buf;
+
+ udph->source = htons(sport);
+ udph->dest = htons(dport);
+ udph->len = htons(sizeof(*udph) + payload_len);
+ return sizeof(*udph);
+}
+
+static inline void build_udp_packet_csum(uint8_t *buf, int family,
+ bool csum_off)
+{
+ struct udphdr *udph = (struct udphdr *)buf;
+ size_t ipalen = ip_addr_len(family);
+ uint32_t sum;
+
+ /* No extension IPv4 and IPv6 headers addresses are the last fields */
+ sum = add_csum(buf - 2 * ipalen, 2 * ipalen);
+ sum += htons(IPPROTO_UDP) + udph->len;
+
+ if (!csum_off)
+ sum += add_csum(buf, udph->len);
+
+ udph->check = finish_ip_csum(sum);
+}
+
+static inline int build_udp_packet(uint8_t *buf, uint16_t sport, uint16_t dport,
+ int payload_len, int family, bool csum_off)
+{
+ struct udphdr *udph = (struct udphdr *)buf;
+
+ build_udp_header(buf, sport, dport, payload_len);
+ memset(buf + sizeof(*udph), PKT_DATA, payload_len);
+ build_udp_packet_csum(buf, family, csum_off);
+
+ return sizeof(*udph) + payload_len;
+}
+
+static inline int build_virtio_net_hdr_v1_hash_tunnel(uint8_t *buf, bool is_tap,
+ int hdr_len, int gso_size,
+ int outer_family,
+ int inner_family)
+{
+ struct virtio_net_hdr_v1_hash_tunnel *vh_tunnel = (void *)buf;
+ struct virtio_net_hdr_v1 *vh = &vh_tunnel->hash_hdr.hdr;
+ int outer_iphlen, inner_iphlen, eth_hlen, gso_type;
+
+ eth_hlen = is_tap ? ETH_HLEN : 0;
+ outer_iphlen = (outer_family == AF_INET) ? sizeof(struct iphdr) :
+ sizeof(struct ipv6hdr);
+ inner_iphlen = (inner_family == AF_INET) ? sizeof(struct iphdr) :
+ sizeof(struct ipv6hdr);
+
+ vh_tunnel->outer_th_offset = eth_hlen + outer_iphlen;
+ vh_tunnel->inner_nh_offset = vh_tunnel->outer_th_offset + ETH_HLEN +
+ GENEVE_HLEN + sizeof(struct udphdr);
+
+ vh->csum_start = vh_tunnel->inner_nh_offset + inner_iphlen;
+ vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+ vh->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vh->hdr_len = hdr_len;
+ vh->gso_size = gso_size;
+
+ if (gso_size) {
+ gso_type = outer_family == AF_INET ?
+ VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 :
+ VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6;
+ vh->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4 | gso_type;
+ }
+
+ return sizeof(struct virtio_net_hdr_v1_hash_tunnel);
+}
+
+#endif /* _TUNTAP_HELPERS_H */
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index bcc14688661d..170be192f5c7 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -206,12 +206,10 @@ static void __print_timestamp(const char *name, struct timespec *cur,
fprintf(stderr, "\n");
}
-static void print_timestamp_usr(void)
+static void record_timestamp_usr(void)
{
if (clock_gettime(CLOCK_REALTIME, &ts_usr))
error(1, errno, "clock_gettime");
-
- __print_timestamp(" USR", &ts_usr, 0, 0);
}
static void print_timestamp(struct scm_timestamping *tss, int tstype,
@@ -599,8 +597,6 @@ static void do_test(int family, unsigned int report_opt)
fill_header_udp(buf + off, family == PF_INET);
}
- print_timestamp_usr();
-
iov.iov_base = buf;
iov.iov_len = total_len;
@@ -655,10 +651,14 @@ static void do_test(int family, unsigned int report_opt)
}
+ record_timestamp_usr();
+
val = sendmsg(fd, &msg, 0);
if (val != total_len)
error(1, errno, "send");
+ __print_timestamp(" USR", &ts_usr, 0, 0);
+
/* wait for all errors to be queued, else ACKs arrive OOO */
if (cfg_sleep_usec)
usleep(cfg_sleep_usec);
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index a39fdc4aa2ff..9b722c1e4b0f 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -162,6 +162,39 @@ run_test() {
echo " ok"
}
+run_test_csum() {
+ local -r msg="$1"
+ local -r dst="$2"
+ local csum_error_filter=UdpInCsumErrors
+ local csum_errors
+
+ printf "%-40s" "$msg"
+
+ is_ipv6 "$dst" && csum_error_filter=Udp6InCsumErrors
+
+ ip netns exec "$NS_DST" iperf3 -s -1 >/dev/null &
+ wait_local_port_listen "$NS_DST" 5201 tcp
+ local spid="$!"
+ ip netns exec "$NS_SRC" iperf3 -c "$dst" -t 2 >/dev/null
+ local retc="$?"
+ wait "$spid"
+ local rets="$?"
+ if [ "$rets" -ne 0 ] || [ "$retc" -ne 0 ]; then
+ echo " fail client exit code $retc, server $rets"
+ ret=1
+ return
+ fi
+
+ csum_errors=$(ip netns exec "$NS_DST" nstat -as "$csum_error_filter" |
+ grep "$csum_error_filter" | awk '{print $2}')
+ if [ -n "$csum_errors" ] && [ "$csum_errors" -gt 0 ]; then
+ echo " fail - csum error on receive $csum_errors, expected 0"
+ ret=1
+ return
+ fi
+ echo " ok"
+}
+
run_bench() {
local -r msg=$1
local -r dst=$2
@@ -260,6 +293,37 @@ for family in 4 6; do
ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST
cleanup
+
+ # force segmentation and re-aggregation
+ create_vxlan_pair
+ ip netns exec "$NS_DST" ethtool -K veth"$DST" generic-receive-offload on
+ ip netns exec "$NS_SRC" ethtool -K veth"$SRC" tso off
+ ip -n "$NS_SRC" link set dev veth"$SRC" mtu 1430
+
+ # forward to a 2nd veth pair
+ ip -n "$NS_DST" link add br0 type bridge
+ ip -n "$NS_DST" link set dev veth"$DST" master br0
+
+ # segment the aggregated TSO packet, without csum offload
+ ip -n "$NS_DST" link add veth_segment type veth peer veth_rx
+ for FEATURE in tso tx-udp-segmentation tx-checksumming; do
+ ip netns exec "$NS_DST" ethtool -K veth_segment "$FEATURE" off
+ done
+ ip -n "$NS_DST" link set dev veth_segment master br0 up
+ ip -n "$NS_DST" link set dev br0 up
+ ip -n "$NS_DST" link set dev veth_rx up
+
+ # move the lower layer IP in the last added veth
+ for ADDR in "$BM_NET_V4$DST/24" "$BM_NET_V6$DST/64"; do
+ # the dad argument will let iproute emit a unharmful warning
+ # with ipv4 addresses
+ ip -n "$NS_DST" addr del dev veth"$DST" "$ADDR"
+ ip -n "$NS_DST" addr add dev veth_rx "$ADDR" \
+ nodad 2>/dev/null
+ done
+
+ run_test_csum "GSO after GRO" "$OL_NET$DST"
+ cleanup
done
exit $ret
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 40f5c2908dda..0370489d938b 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := nolibc-test
+TEST_GEN_PROGS := nolibc-test libc-test
include ../lib.mk
include $(top_srcdir)/scripts/Makefile.compiler
@@ -9,16 +9,16 @@ cc-option = $(call __cc-option, $(CC),,$(1),$(2))
include Makefile.include
-CFLAGS = -nostdlib -nostdinc -static \
+$(OUTPUT)/nolibc-test: CFLAGS = -nostdlib -nostdinc -static \
-isystem $(top_srcdir)/tools/include/nolibc -isystem $(top_srcdir)/usr/include \
$(CFLAGS_NOLIBC_TEST)
-
-ifeq ($(LLVM),)
-LDLIBS := -lgcc
-endif
-
+$(OUTPUT)/nolibc-test: LDLIBS = $(if $(LLVM),,-lgcc)
$(OUTPUT)/nolibc-test: nolibc-test.c nolibc-test-linkage.c | headers
+$(OUTPUT)/libc-test: nolibc-test.c nolibc-test-linkage.c
+ $(call msg,CC,,$@)
+ $(Q)$(LINK.c) $^ -o $@
+
help:
@echo "For the custom nolibc testsuite use '$(MAKE) -f Makefile.nolibc'; available targets:"
@$(MAKE) -f Makefile.nolibc help
diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc
index f9d43cbdc894..f5704193038f 100644
--- a/tools/testing/selftests/nolibc/Makefile.nolibc
+++ b/tools/testing/selftests/nolibc/Makefile.nolibc
@@ -226,7 +226,7 @@ CFLAGS_mipsn32be = -EB -mabi=n32 -march=mips64r6
CFLAGS_mips64le = -EL -mabi=64 -march=mips64r6
CFLAGS_mips64be = -EB -mabi=64 -march=mips64r2
CFLAGS_loongarch = $(if $(LLVM),-fuse-ld=lld)
-CFLAGS_sparc32 = $(call cc-option,-m32)
+CFLAGS_sparc32 = $(call cc-option,-m32) -mcpu=v8
CFLAGS_sh4 = -ml -m4
ifeq ($(origin XARCH),command line)
CFLAGS_XARCH = $(CFLAGS_$(XARCH))
@@ -302,15 +302,9 @@ sysroot/$(ARCH)/include:
$(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone headers_check
$(Q)mv sysroot/sysroot sysroot/$(ARCH)
-ifneq ($(NOLIBC_SYSROOT),0)
nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include
$(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
-nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c $(LIBGCC)
-else
-nolibc-test: nolibc-test.c nolibc-test-linkage.c
- $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
- -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c $(LIBGCC)
-endif
libc-test: nolibc-test.c nolibc-test-linkage.c
$(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 3c5a226dad3a..1b9d3b2e2491 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -17,6 +17,7 @@
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/prctl.h>
+#include <sys/ptrace.h>
#include <sys/random.h>
#include <sys/reboot.h>
#include <sys/resource.h>
@@ -877,6 +878,58 @@ int test_file_stream(void)
return 0;
}
+int test_file_stream_wsr(void)
+{
+ const char dataout[] = "foo";
+ const size_t datasz = sizeof(dataout);
+ char datain[datasz];
+ int fd, r;
+ FILE *f;
+
+ fd = open("/tmp", O_TMPFILE | O_RDWR, 0644);
+ if (fd == -1)
+ return -1;
+
+ f = fdopen(fd, "w+");
+ if (!f)
+ return -1;
+
+ errno = 0;
+ r = fwrite(dataout, 1, datasz, f);
+ if (r != datasz)
+ return -1;
+
+ /* Attempt to read from the file without rewinding,
+ * we should read 0 items.
+ */
+ r = fread(datain, 1, datasz, f);
+ if (r)
+ return -1;
+
+ /* Rewind the file to the start */
+ r = fseek(f, 0, SEEK_SET);
+ if (r)
+ return -1;
+
+ /* Attempt to read back more than was written to
+ * make sure we handle short reads properly.
+ * fread() should return the number of complete items.
+ */
+ r = fread(datain, 1, datasz + 1, f);
+ if (r != datasz)
+ return -1;
+
+ /* Data we read should match the data we just wrote */
+ if (memcmp(datain, dataout, datasz) != 0)
+ return -1;
+
+ r = fclose(f);
+ if (r)
+ return -1;
+
+ return 0;
+}
+
enum fork_type {
FORK_STANDARD,
FORK_VFORK,
@@ -1351,6 +1404,7 @@ int run_syscall(int min, int max)
CASE_TEST(fchdir_stdin); EXPECT_SYSER(1, fchdir(STDIN_FILENO), -1, ENOTDIR); break;
CASE_TEST(fchdir_badfd); EXPECT_SYSER(1, fchdir(-1), -1, EBADF); break;
CASE_TEST(file_stream); EXPECT_SYSZR(1, test_file_stream()); break;
+ CASE_TEST(file_stream_wsr); EXPECT_SYSZR(1, test_file_stream_wsr()); break;
CASE_TEST(fork); EXPECT_SYSZR(1, test_fork(FORK_STANDARD)); break;
CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break;
CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break;
@@ -1403,9 +1457,10 @@ int run_syscall(int min, int max)
CASE_TEST(write_badf); EXPECT_SYSER(1, write(-1, &tmp, 1), -1, EBADF); break;
CASE_TEST(write_zero); EXPECT_SYSZR(1, write(1, &tmp, 0)); break;
CASE_TEST(readv_badf); EXPECT_SYSER(1, readv(-1, &iov_one, 1), -1, EBADF); break;
- CASE_TEST(readv_zero); EXPECT_SYSZR(1, readv(1, NULL, 0)); break;
+ CASE_TEST(readv_zero); EXPECT_SYSZR(1, readv(0, NULL, 0)); break;
CASE_TEST(writev_badf); EXPECT_SYSER(1, writev(-1, &iov_one, 1), -1, EBADF); break;
CASE_TEST(writev_zero); EXPECT_SYSZR(1, writev(1, NULL, 0)); break;
+ CASE_TEST(ptrace); EXPECT_SYSER(1, ptrace(PTRACE_CONT, getpid(), NULL, NULL), -1, ESRCH); break;
CASE_TEST(syscall_noargs); EXPECT_SYSEQ(1, syscall(__NR_getpid), getpid()); break;
CASE_TEST(syscall_args); EXPECT_SYSER(1, syscall(__NR_statx, 0, NULL, 0, 0, NULL), -1, EFAULT); break;
CASE_TEST(namespace); EXPECT_SYSZR(euid0 && proc, test_namespace()); break;
@@ -1428,6 +1483,34 @@ int test_difftime(void)
return 0;
}
+int test_time_types(void)
+{
+#ifdef NOLIBC
+ struct __kernel_timespec kts;
+ struct timespec ts;
+
+ if (!__builtin_types_compatible_p(time_t, __kernel_time64_t))
+ return 1;
+
+ if (sizeof(ts) != sizeof(kts))
+ return 1;
+
+ if (!__builtin_types_compatible_p(__typeof__(ts.tv_sec), __typeof__(kts.tv_sec)))
+ return 1;
+
+ if (!__builtin_types_compatible_p(__typeof__(ts.tv_nsec), __typeof__(kts.tv_nsec)))
+ return 1;
+
+ if (offsetof(__typeof__(ts), tv_sec) != offsetof(__typeof__(kts), tv_sec))
+ return 1;
+
+ if (offsetof(__typeof__(ts), tv_nsec) != offsetof(__typeof__(kts), tv_nsec))
+ return 1;
+#endif /* NOLIBC */
+
+ return 0;
+}
+
int run_stdlib(int min, int max)
{
int test;
@@ -1553,6 +1636,7 @@ int run_stdlib(int min, int max)
CASE_TEST(difftime); EXPECT_ZR(1, test_difftime()); break;
CASE_TEST(memchr_foobar6_o); EXPECT_STREQ(1, memchr("foobar", 'o', 6), "oobar"); break;
CASE_TEST(memchr_foobar3_b); EXPECT_STRZR(1, memchr("foobar", 'b', 3)); break;
+ CASE_TEST(time_types); EXPECT_ZR(is_nolibc, test_time_types()); break;
case __LINE__:
return ret; /* must be last */
diff --git a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
index 23aac6f97061..eecb776c33af 100644
--- a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
+++ b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
@@ -70,6 +70,23 @@ TEST_F(pci_ep_bar, BAR_TEST)
EXPECT_FALSE(ret) TH_LOG("Test failed for BAR%d", variant->barno);
}
+TEST_F(pci_ep_bar, BAR_SUBRANGE_TEST)
+{
+ int ret;
+
+ pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_AUTO);
+ ASSERT_EQ(0, ret) TH_LOG("Can't set AUTO IRQ type");
+
+ pci_ep_ioctl(PCITEST_BAR_SUBRANGE, variant->barno);
+ if (ret == -ENODATA)
+ SKIP(return, "BAR is disabled");
+ if (ret == -EBUSY)
+ SKIP(return, "BAR is test register space");
+ if (ret == -EOPNOTSUPP)
+ SKIP(return, "Subrange map is not supported");
+ EXPECT_FALSE(ret) TH_LOG("Test failed for BAR%d", variant->barno);
+}
+
FIXTURE(pci_ep_basic)
{
int fd;
diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c
index 6571e04acd88..8bed951e06a0 100644
--- a/tools/testing/selftests/pidfd/pidfd_info_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_info_test.c
@@ -229,7 +229,7 @@ static void *pidfd_info_pause_thread(void *arg)
close(ipc_socket);
- /* Sleep untill we're killed. */
+ /* Sleep until we're killed. */
pause();
return NULL;
}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
index f93b4c7c3a8a..ea29228334e8 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -1,5 +1,6 @@
bhrb_filter_map_test
bhrb_no_crash_wo_pmu_test
+check_extended_reg_test
intr_regs_no_crash_wo_pmu_test
mmcr0_cc56run_test
mmcr0_exceptionbits_test
diff --git a/tools/testing/selftests/ptp/phc.sh b/tools/testing/selftests/ptp/phc.sh
index ac6e5a6e1d3a..9f61c1579edf 100755
--- a/tools/testing/selftests/ptp/phc.sh
+++ b/tools/testing/selftests/ptp/phc.sh
@@ -8,17 +8,20 @@ ALL_TESTS="
"
DEV=$1
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
##############################################################################
# Sanity checks
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [[ "$DEV" == "" ]]; then
echo "SKIP: PTP device not provided"
- exit 0
+ exit $ksft_skip
fi
require_command()
@@ -27,7 +30,7 @@ require_command()
if [[ ! -x "$(command -v "$cmd")" ]]; then
echo "SKIP: $cmd not installed"
- exit 1
+ exit $ksft_skip
fi
}
@@ -37,7 +40,7 @@ phc_sanity()
if [ $? != 0 ]; then
echo "SKIP: unknown clock $DEV: No such device"
- exit 1
+ exit $ksft_skip
fi
}
@@ -49,6 +52,7 @@ phc_sanity
# Exit status to return at the end. Set in case one of the tests fails.
EXIT_STATUS=0
+PASS_COUNT=0
# Per-test return value. Clear at the beginning of each test.
RET=0
@@ -65,12 +69,18 @@ log_test()
{
local test_name=$1
+ if [[ $RET -eq $ksft_skip ]]; then
+ printf "TEST: %-60s [SKIP]\n" "$test_name"
+ return 0
+ fi
+
if [[ $RET -ne 0 ]]; then
EXIT_STATUS=1
printf "TEST: %-60s [FAIL]\n" "$test_name"
return 1
fi
+ ((PASS_COUNT++))
printf "TEST: %-60s [ OK ]\n" "$test_name"
return 0
}
@@ -89,34 +99,49 @@ tests_run()
settime_do()
{
- local res
+ local res out
- res=$(phc_ctl $DEV set 0 wait 120.5 get 2> /dev/null \
- | awk '/clock time is/{print $5}' \
- | awk -F. '{print $1}')
+ out=$(LC_ALL=C phc_ctl $DEV set 0 wait 120.5 get 2>&1)
+ if [[ $? -ne 0 ]]; then
+ if echo "$out" | grep -qi "Operation not supported"; then
+ return $ksft_skip
+ fi
+ return 1
+ fi
+ res=$(echo "$out" | awk '/clock time is/{print $5}' | awk -F. '{print $1}')
(( res == 120 ))
}
adjtime_do()
{
- local res
+ local res out
- res=$(phc_ctl $DEV set 0 adj 10 get 2> /dev/null \
- | awk '/clock time is/{print $5}' \
- | awk -F. '{print $1}')
+ out=$(LC_ALL=C phc_ctl $DEV set 0 adj 10 get 2>&1)
+ if [[ $? -ne 0 ]]; then
+ if echo "$out" | grep -qi "Operation not supported"; then
+ return $ksft_skip
+ fi
+ return 1
+ fi
+ res=$(echo "$out" | awk '/clock time is/{print $5}' | awk -F. '{print $1}')
(( res == 10 ))
}
adjfreq_do()
{
- local res
+ local res out
# Set the clock to be 1% faster
- res=$(phc_ctl $DEV freq 10000000 set 0 wait 100.5 get 2> /dev/null \
- | awk '/clock time is/{print $5}' \
- | awk -F. '{print $1}')
+ out=$(LC_ALL=C phc_ctl $DEV freq 10000000 set 0 wait 100.5 get 2>&1)
+ if [[ $? -ne 0 ]]; then
+ if echo "$out" | grep -qi "Operation not supported"; then
+ return $ksft_skip
+ fi
+ return 1
+ fi
+ res=$(echo "$out" | awk '/clock time is/{print $5}' | awk -F. '{print $1}')
(( res == 101 ))
}
@@ -163,4 +188,7 @@ trap cleanup EXIT
tests_run
+if [[ $EXIT_STATUS -eq 0 && $PASS_COUNT -eq 0 ]]; then
+ exit $ksft_skip
+fi
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/rcutorture/.gitignore b/tools/testing/selftests/rcutorture/.gitignore
index f6cbce77460b..b8fd42547a6e 100644
--- a/tools/testing/selftests/rcutorture/.gitignore
+++ b/tools/testing/selftests/rcutorture/.gitignore
@@ -3,3 +3,4 @@ initrd
b[0-9]*
res
*.swp
+.kvm.sh.lock
diff --git a/tools/testing/selftests/rcutorture/bin/config2csv.sh b/tools/testing/selftests/rcutorture/bin/config2csv.sh
index 0cf55f1bf654..aeab4d6f11ad 100755
--- a/tools/testing/selftests/rcutorture/bin/config2csv.sh
+++ b/tools/testing/selftests/rcutorture/bin/config2csv.sh
@@ -42,7 +42,7 @@ do
grep -v '^#' < $i | grep -v '^ *$' > $T/p
if test -r $i.boot
then
- tr -s ' ' '\012' < $i.boot | grep -v '^#' >> $T/p
+ sed -e 's/#.*$//' < $i.boot | tr -s ' ' '\012' >> $T/p
fi
sed -e 's/^[^=]*$/&=?/' < $T/p |
sed -e 's/^\([^=]*\)=\(.*\)$/\tp["\1:'"$i"'"] = "\2";\n\tc["\1"] = 1;/' >> $T/p.awk
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-series.sh b/tools/testing/selftests/rcutorture/bin/kvm-series.sh
index 2ff905a1853b..c4ee5f910931 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-series.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-series.sh
@@ -15,7 +15,7 @@
# This script is intended to replace kvm-check-branches.sh by providing
# ease of use and faster execution.
-T="`mktemp -d ${TMPDIR-/tmp}/kvm-series.sh.XXXXXX`"
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-series.sh.XXXXXX`"; export T
trap 'rm -rf $T' 0
scriptname=$0
@@ -32,6 +32,7 @@ then
echo "$0: Repetition ('*') not allowed in config list."
exit 1
fi
+config_list_len="`echo ${config_list} | wc -w | awk '{ print $1; }'`"
commit_list="${2}"
if test -z "${commit_list}"
@@ -47,70 +48,209 @@ then
exit 2
fi
sha1_list=`cat $T/commits`
+sha1_list_len="`echo ${sha1_list} | wc -w | awk '{ print $1; }'`"
shift
shift
RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
PATH=${RCUTORTURE}/bin:$PATH; export PATH
+RES="${RCUTORTURE}/res"; export RES
. functions.sh
ret=0
-nfail=0
+nbuildfail=0
+nrunfail=0
nsuccess=0
-faillist=
+ncpus=0
+buildfaillist=
+runfaillist=
successlist=
cursha1="`git rev-parse --abbrev-ref HEAD`"
ds="`date +%Y.%m.%d-%H.%M.%S`-series"
+DS="${RES}/${ds}"; export DS
startdate="`date`"
starttime="`get_starttime`"
echo " --- " $scriptname $args | tee -a $T/log
echo " --- Results directory: " $ds | tee -a $T/log
+# Do all builds. Iterate through commits within a given scenario
+# because builds normally go faster from one commit to the next within a
+# given scenario. In contrast, switching scenarios on each rebuild will
+# often force a full rebuild due to Kconfig differences, for example,
+# turning preemption on and off. Defer actual runs in order to run
+# lots of them concurrently on large systems.
+touch $T/torunlist
+n2build="$((config_list_len*sha1_list_len))"
+nbuilt=0
for config in ${config_list}
do
sha_n=0
for sha in ${sha1_list}
do
sha1=${sha_n}.${sha} # Enable "sort -k1nr" to list commits in order.
- echo Starting ${config}/${sha1} at `date` | tee -a $T/log
- git checkout "${sha}"
- time tools/testing/selftests/rcutorture/bin/kvm.sh --configs "$config" --datestamp "$ds/${config}/${sha1}" --duration 1 "$@"
+ echo
+ echo Starting ${config}/${sha1} "($((nbuilt+1)) of ${n2build})" at `date` | tee -a $T/log
+ git checkout --detach "${sha}"
+ tools/testing/selftests/rcutorture/bin/kvm.sh --configs "$config" --datestamp "$ds/${config}/${sha1}" --duration 1 --build-only --trust-make "$@"
curret=$?
if test "${curret}" -ne 0
then
- nfail=$((nfail+1))
- faillist="$faillist ${config}/${sha1}(${curret})"
+ nbuildfail=$((nbuildfail+1))
+ buildfaillist="$buildfaillist ${config}/${sha1}(${curret})"
else
- nsuccess=$((nsuccess+1))
- successlist="$successlist ${config}/${sha1}"
- # Successful run, so remove large files.
- rm -f ${RCUTORTURE}/$ds/${config}/${sha1}/{vmlinux,bzImage,System.map,Module.symvers}
+ batchncpus="`grep -v "^# cpus=" "${DS}/${config}/${sha1}/batches" | awk '{ sum += $3 } END { print sum }'`"
+ echo run_one_qemu ${sha_n} ${config}/${sha1} ${batchncpus} >> $T/torunlist
+ if test "${ncpus}" -eq 0
+ then
+ ncpus="`grep "^# cpus=" "${DS}/${config}/${sha1}/batches" | sed -e 's/^# cpus=//'`"
+ case "${ncpus}" in
+ ^[0-9]*$)
+ ;;
+ *)
+ ncpus=0
+ ;;
+ esac
+ fi
fi
if test "${ret}" -eq 0
then
ret=${curret}
fi
sha_n=$((sha_n+1))
+ nbuilt=$((nbuilt+1))
done
done
+
+# If the user did not specify the number of CPUs, use them all.
+if test "${ncpus}" -eq 0
+then
+ ncpus="`identify_qemu_vcpus`"
+fi
+
+cpusused=0
+touch $T/successlistfile
+touch $T/faillistfile
+n2run="`wc -l $T/torunlist | awk '{ print $1; }'`"
+nrun=0
+
+# do_run_one_qemu ds resultsdir qemu_curout
+#
+# Start the specified qemu run and record its success or failure.
+do_run_one_qemu () {
+ local ret
+ local ds="$1"
+ local resultsdir="$2"
+ local qemu_curout="$3"
+
+ tools/testing/selftests/rcutorture/bin/kvm-again.sh "${DS}/${resultsdir}" --link inplace-force > ${qemu_curout} 2>&1
+ ret=$?
+ if test "${ret}" -eq 0
+ then
+ echo ${resultsdir} >> $T/successlistfile
+ # Successful run, so remove large files.
+ rm -f ${DS}/${resultsdir}/{vmlinux,bzImage,System.map,Module.symvers}
+ else
+ echo "${resultsdir}(${ret})" >> $T/faillistfile
+ fi
+}
+
+# cleanup_qemu_batch batchncpus
+#
+# Update success and failure lists, files, and counts at the end of
+# a batch.
+cleanup_qemu_batch () {
+ local batchncpus="$1"
+
+ echo Waiting, cpusused=${cpusused}, ncpus=${ncpus} `date` | tee -a $T/log
+ wait
+ cpusused="${batchncpus}"
+ nsuccessbatch="`wc -l $T/successlistfile | awk '{ print $1 }'`"
+ nsuccess=$((nsuccess+nsuccessbatch))
+ successlist="$successlist `cat $T/successlistfile`"
+ rm $T/successlistfile
+ touch $T/successlistfile
+ nfailbatch="`wc -l $T/faillistfile | awk '{ print $1 }'`"
+ nrunfail=$((nrunfail+nfailbatch))
+ runfaillist="$runfaillist `cat $T/faillistfile`"
+ rm $T/faillistfile
+ touch $T/faillistfile
+}
+
+# run_one_qemu sha_n config/sha1 batchncpus
+#
+# Launch into the background the sha_n-th qemu job whose results directory
+# is config/sha1 and which uses batchncpus CPUs. Once we reach a job that
+# would overflow the number of available CPUs, wait for the previous jobs
+# to complete and record their results.
+run_one_qemu () {
+ local sha_n="$1"
+ local config_sha1="$2"
+ local batchncpus="$3"
+ local qemu_curout
+
+ cpusused=$((cpusused+batchncpus))
+ if test "${cpusused}" -gt $ncpus
+ then
+ cleanup_qemu_batch "${batchncpus}"
+ fi
+ echo Starting ${config_sha1} using ${batchncpus} CPUs "($((nrun+1)) of ${n2run})" `date`
+ qemu_curout="${DS}/${config_sha1}/qemu-series"
+ do_run_one_qemu "$ds" "${config_sha1}" ${qemu_curout} &
+ nrun="$((nrun+1))"
+}
+
+# Re-ordering the runs will mess up the affinity chosen at build time
+# (among other things, over-using CPU 0), so suppress it.
+TORTURE_NO_AFFINITY="no-affinity"; export TORTURE_NO_AFFINITY
+
+# Run the kernels (if any) that built correctly.
+echo | tee -a $T/log # Put a blank line between build and run messages.
+. $T/torunlist
+cleanup_qemu_batch "${batchncpus}"
+
+# Get back to initial checkout/SHA-1.
git checkout "${cursha1}"
-echo ${nsuccess} SUCCESSES: | tee -a $T/log
-echo ${successlist} | fmt | tee -a $T/log
-echo | tee -a $T/log
-echo ${nfail} FAILURES: | tee -a $T/log
-echo ${faillist} | fmt | tee -a $T/log
-if test -n "${faillist}"
+# Throw away leading and trailing space characters for fmt.
+successlist="`echo ${successlist} | sed -e 's/^ *//' -e 's/ *$//'`"
+buildfaillist="`echo ${buildfaillist} | sed -e 's/^ *//' -e 's/ *$//'`"
+runfaillist="`echo ${runfaillist} | sed -e 's/^ *//' -e 's/ *$//'`"
+
+# Print lists of successes, build failures, and run failures, if any.
+if test "${nsuccess}" -gt 0
+then
+ echo | tee -a $T/log
+ echo ${nsuccess} SUCCESSES: | tee -a $T/log
+ echo ${successlist} | fmt | tee -a $T/log
+fi
+if test "${nbuildfail}" -gt 0
then
echo | tee -a $T/log
- echo Failures across commits: | tee -a $T/log
- echo ${faillist} | tr ' ' '\012' | sed -e 's,^[^/]*/,,' -e 's/([0-9]*)//' |
+ echo ${nbuildfail} BUILD FAILURES: | tee -a $T/log
+ echo ${buildfaillist} | fmt | tee -a $T/log
+fi
+if test "${nrunfail}" -gt 0
+then
+ echo | tee -a $T/log
+ echo ${nrunfail} RUN FAILURES: | tee -a $T/log
+ echo ${runfaillist} | fmt | tee -a $T/log
+fi
+
+# If there were build or runtime failures, map them to commits.
+if test "${nbuildfail}" -gt 0 || test "${nrunfail}" -gt 0
+then
+ echo | tee -a $T/log
+ echo Build failures across commits: | tee -a $T/log
+ echo ${buildfaillist} | tr ' ' '\012' | sed -e 's,^[^/]*/,,' -e 's/([0-9]*)//' |
sort | uniq -c | sort -k2n | tee -a $T/log
fi
+
+# Print run summary.
+echo | tee -a $T/log
echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
-echo Summary: Successes: ${nsuccess} Failures: ${nfail} | tee -a $T/log
-cp $T/log tools/testing/selftests/rcutorture/res/${ds}
+echo Summary: Successes: ${nsuccess} " "Build Failures: ${nbuildfail} " "Runtime Failures: ${nrunfail}| tee -a $T/log
+cp $T/log ${DS}
exit "${ret}"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index fff15821c44c..65b04b832733 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -80,6 +80,7 @@ usage () {
echo " --kasan"
echo " --kconfig Kconfig-options"
echo " --kcsan"
+ echo " --kill-previous"
echo " --kmake-arg kernel-make-arguments"
echo " --mac nn:nn:nn:nn:nn:nn"
echo " --memory megabytes|nnnG"
@@ -206,6 +207,9 @@ do
--kcsan)
TORTURE_KCONFIG_KCSAN_ARG="$debuginfo CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
;;
+ --kill-previous)
+ TORTURE_KILL_PREVIOUS=1
+ ;;
--kmake-arg|--kmake-args)
checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
TORTURE_KMAKE_ARG="`echo "$TORTURE_KMAKE_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
@@ -275,6 +279,42 @@ do
shift
done
+# Prevent concurrent kvm.sh runs on the same source tree. The flock
+# is automatically released when the script exits, even if killed.
+TORTURE_LOCK="$RCUTORTURE/.kvm.sh.lock"
+
+# Terminate any processes holding the lock file, if requested.
+if test -n "$TORTURE_KILL_PREVIOUS"
+then
+ if test -e "$TORTURE_LOCK"
+ then
+ echo "Killing processes holding $TORTURE_LOCK..."
+ if fuser -k "$TORTURE_LOCK" >/dev/null 2>&1
+ then
+ sleep 2
+ echo "Previous kvm.sh processes killed."
+ else
+ echo "No processes were holding the lock."
+ fi
+ else
+ echo "No lock file exists, nothing to kill."
+ fi
+fi
+
+if test -z "$dryrun"
+then
+ # Create a file descriptor and flock it, so that when kvm.sh (and its
+ # children) exit, the flock is released by the kernel automatically.
+ exec 9>"$TORTURE_LOCK"
+ if ! flock -n 9
+ then
+ echo "ERROR: Another kvm.sh instance is already running on this tree."
+ echo " Lock file: $TORTURE_LOCK"
+ echo " To run kvm.sh, kill all existing kvm.sh runs first (--kill-previous)."
+ exit 1
+ fi
+fi
+
if test -n "$dryrun" || test -z "$TORTURE_INITRD" || tools/testing/selftests/rcutorture/bin/mkinitrd.sh
then
:
diff --git a/tools/testing/selftests/rcutorture/bin/mktestid.sh b/tools/testing/selftests/rcutorture/bin/mktestid.sh
index 16f9907a4dae..24f6261dab6a 100755
--- a/tools/testing/selftests/rcutorture/bin/mktestid.sh
+++ b/tools/testing/selftests/rcutorture/bin/mktestid.sh
@@ -18,7 +18,7 @@ fi
echo Build directory: `pwd` > ${resdir}/testid.txt
if test -d .git
then
- echo Current commit: `git rev-parse HEAD` >> ${resdir}/testid.txt
+ echo Current commit: `git show --oneline --no-patch HEAD` >> ${resdir}/testid.txt
echo >> ${resdir}/testid.txt
echo ' ---' Output of "'"git status"'": >> ${resdir}/testid.txt
git status >> ${resdir}/testid.txt
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index 85b407467454..18efab346381 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -10,5 +10,4 @@ CONFIG_PROVE_LOCKING=n
#CHECK#CONFIG_PROVE_RCU=n
CONFIG_FORCE_TASKS_TRACE_RCU=y
#CHECK#CONFIG_TASKS_TRACE_RCU=y
-CONFIG_TASKS_TRACE_RCU_READ_MB=y
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
index 9003c56cd764..8da390e82829 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -9,6 +9,5 @@ CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
CONFIG_FORCE_TASKS_TRACE_RCU=y
#CHECK#CONFIG_TASKS_TRACE_RCU=y
-CONFIG_TASKS_TRACE_RCU_READ_MB=n
CONFIG_RCU_EXPERT=y
CONFIG_DEBUG_OBJECTS=y
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 94cfdba5308d..f00b622c1460 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -290,8 +290,10 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param
static bool arch_supports_noncont_cat(const struct resctrl_test *test)
{
- /* AMD always supports non-contiguous CBM. */
- if (get_vendor() == ARCH_AMD)
+ unsigned int vendor_id = get_vendor();
+
+ /* AMD and Hygon always support non-contiguous CBM. */
+ if (vendor_id == ARCH_AMD || vendor_id == ARCH_HYGON)
return true;
#if defined(__i386__) || defined(__x86_64__) /* arch */
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 3c51bdac2dfa..afe635b6e48d 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -23,6 +23,7 @@
#include <asm/unistd.h>
#include <linux/perf_event.h>
#include <linux/compiler.h>
+#include <linux/bits.h>
#include "kselftest.h"
#define MB (1024 * 1024)
@@ -36,8 +37,9 @@
* Define as bits because they're used for vendor_specific bitmask in
* the struct resctrl_test.
*/
-#define ARCH_INTEL 1
-#define ARCH_AMD 2
+#define ARCH_INTEL BIT(0)
+#define ARCH_AMD BIT(1)
+#define ARCH_HYGON BIT(2)
#define END_OF_TESTS 1
@@ -163,7 +165,7 @@ extern int snc_unreliable;
extern char llc_occup_path[1024];
int snc_nodes_per_l3_cache(void);
-int get_vendor(void);
+unsigned int get_vendor(void);
bool check_resctrlfs_support(void);
int filter_dmesg(void);
int get_domain_id(const char *resource, int cpu_no, int *domain_id);
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 5154ffd821c4..dbcd5eea9fbc 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -23,16 +23,24 @@ static struct resctrl_test *resctrl_tests[] = {
&l2_noncont_cat_test,
};
-static int detect_vendor(void)
+static unsigned int detect_vendor(void)
{
- FILE *inf = fopen("/proc/cpuinfo", "r");
- int vendor_id = 0;
+ static unsigned int vendor_id;
+ static bool initialized;
char *s = NULL;
+ FILE *inf;
char *res;
- if (!inf)
+ if (initialized)
return vendor_id;
+ inf = fopen("/proc/cpuinfo", "r");
+ if (!inf) {
+ vendor_id = 0;
+ initialized = true;
+ return vendor_id;
+ }
+
res = fgrep(inf, "vendor_id");
if (res)
@@ -42,18 +50,22 @@ static int detect_vendor(void)
vendor_id = ARCH_INTEL;
else if (s && !strcmp(s, ": AuthenticAMD\n"))
vendor_id = ARCH_AMD;
+ else if (s && !strcmp(s, ": HygonGenuine\n"))
+ vendor_id = ARCH_HYGON;
fclose(inf);
free(res);
+
+ initialized = true;
return vendor_id;
}
-int get_vendor(void)
+unsigned int get_vendor(void)
{
- static int vendor = -1;
+ unsigned int vendor;
+
+ vendor = detect_vendor();
- if (vendor == -1)
- vendor = detect_vendor();
if (vendor == 0)
ksft_print_msg("Can not get vendor info...\n");
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 195f04c4d158..b9c1bfb6cc02 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -243,6 +243,16 @@ int snc_nodes_per_l3_cache(void)
}
snc_mode = cache_cpus / node_cpus;
+ /*
+ * On some platforms (e.g. Hygon),
+ * cache_cpus < node_cpus, the calculated snc_mode is 0.
+ *
+ * Set snc_mode = 1 to indicate that SNC mode is not
+ * supported on the platform.
+ */
+ if (!snc_mode)
+ snc_mode = 1;
+
if (snc_mode > 1)
ksft_print_msg("SNC-%d mode discovered.\n", snc_mode);
}
diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile
index 099b8c1f46f8..5671b4405a12 100644
--- a/tools/testing/selftests/riscv/Makefile
+++ b/tools/testing/selftests/riscv/Makefile
@@ -5,7 +5,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),riscv))
-RISCV_SUBTARGETS ?= abi hwprobe mm sigreturn vector
+RISCV_SUBTARGETS ?= abi hwprobe mm sigreturn vector cfi
else
RISCV_SUBTARGETS :=
endif
diff --git a/tools/testing/selftests/riscv/cfi/.gitignore b/tools/testing/selftests/riscv/cfi/.gitignore
new file mode 100644
index 000000000000..c1faf7ca4346
--- /dev/null
+++ b/tools/testing/selftests/riscv/cfi/.gitignore
@@ -0,0 +1,2 @@
+cfitests
+shadowstack
diff --git a/tools/testing/selftests/riscv/cfi/Makefile b/tools/testing/selftests/riscv/cfi/Makefile
new file mode 100644
index 000000000000..96a4dc4b69c3
--- /dev/null
+++ b/tools/testing/selftests/riscv/cfi/Makefile
@@ -0,0 +1,23 @@
+CFLAGS += $(KHDR_INCLUDES)
+CFLAGS += -I$(top_srcdir)/tools/include
+
+CFLAGS += -march=rv64gc_zicfilp_zicfiss -fcf-protection=full
+
+# Check for zicfi* extensions needs cross compiler
+# which is not set until lib.mk is included
+ifeq ($(LLVM)$(CC),cc)
+CC := $(CROSS_COMPILE)gcc
+endif
+
+
+ifeq ($(shell $(CC) $(CFLAGS) -nostdlib -xc /dev/null -o /dev/null > /dev/null 2>&1; echo $$?),0)
+TEST_GEN_PROGS := cfitests
+
+$(OUTPUT)/cfitests: cfitests.c shadowstack.c
+ $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
+else
+
+$(shell echo "Toolchain doesn't support CFI, skipping CFI kselftest." >&2)
+endif
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/riscv/cfi/cfi_rv_test.h b/tools/testing/selftests/riscv/cfi/cfi_rv_test.h
new file mode 100644
index 000000000000..1c8043f2b778
--- /dev/null
+++ b/tools/testing/selftests/riscv/cfi/cfi_rv_test.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef SELFTEST_RISCV_CFI_H
+#define SELFTEST_RISCV_CFI_H
+#include <stddef.h>
+#include <sys/types.h>
+#include "shadowstack.h"
+
+#define CHILD_EXIT_CODE_SSWRITE 10
+#define CHILD_EXIT_CODE_SIG_TEST 11
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ register long _arg5 __asm__ ("a4") = (long)(arg5); \
+ \
+ __asm__ volatile( \
+ "ecall\n" \
+ : "+r" \
+ (_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ \
+ __asm__ volatile( \
+ "ecall\n" \
+ : "+r" (_arg1) \
+ : "r"(_arg2), "r"(_arg3), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#ifndef __NR_prctl
+#define __NR_prctl 167
+#endif
+
+#ifndef __NR_map_shadow_stack
+#define __NR_map_shadow_stack 453
+#endif
+
+#define CSR_SSP 0x011
+
+#ifdef __ASSEMBLY__
+#define __ASM_STR(x) x
+#else
+#define __ASM_STR(x) #x
+#endif
+
+#define csr_read(csr) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__ ("csrr %0, " __ASM_STR(csr) \
+ : "=r" (__v) : \
+ : "memory"); \
+ __v; \
+})
+
+#define csr_write(csr, val) \
+({ \
+ unsigned long __v = (unsigned long)(val); \
+ __asm__ __volatile__ ("csrw " __ASM_STR(csr) ", %0" \
+ : : "rK" (__v) \
+ : "memory"); \
+})
+
+#endif
diff --git a/tools/testing/selftests/riscv/cfi/cfitests.c b/tools/testing/selftests/riscv/cfi/cfitests.c
new file mode 100644
index 000000000000..298544854415
--- /dev/null
+++ b/tools/testing/selftests/riscv/cfi/cfitests.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../../kselftest.h"
+#include <sys/signal.h>
+#include <asm/ucontext.h>
+#include <linux/prctl.h>
+#include <errno.h>
+#include <linux/ptrace.h>
+#include <sys/wait.h>
+#include <linux/elf.h>
+#include <sys/uio.h>
+#include <asm-generic/unistd.h>
+
+#include "cfi_rv_test.h"
+
+/* do not optimize cfi related test functions */
+#pragma GCC push_options
+#pragma GCC optimize("O0")
+
+void sigsegv_handler(int signum, siginfo_t *si, void *uc)
+{
+ struct ucontext *ctx = (struct ucontext *)uc;
+
+ if (si->si_code == SEGV_CPERR) {
+ ksft_print_msg("Control flow violation happened somewhere\n");
+ ksft_print_msg("PC where violation happened %lx\n", ctx->uc_mcontext.gregs[0]);
+ exit(-1);
+ }
+
+ /* all other cases are expected to be of shadow stack write case */
+ exit(CHILD_EXIT_CODE_SSWRITE);
+}
+
+bool register_signal_handler(void)
+{
+ struct sigaction sa = {};
+
+ sa.sa_sigaction = sigsegv_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL)) {
+ ksft_print_msg("Registering signal handler for landing pad violation failed\n");
+ return false;
+ }
+
+ return true;
+}
+
+long ptrace(int request, pid_t pid, void *addr, void *data);
+
+bool cfi_ptrace_test(void)
+{
+ pid_t pid;
+ int status, ret = 0;
+ unsigned long ptrace_test_num = 0, total_ptrace_tests = 2;
+
+ struct user_cfi_state cfi_reg;
+ struct iovec iov;
+
+ pid = fork();
+
+ if (pid == -1) {
+ ksft_exit_fail_msg("%s: fork failed\n", __func__);
+ exit(1);
+ }
+
+ if (pid == 0) {
+ /* allow to be traced */
+ ptrace(PTRACE_TRACEME, 0, NULL, NULL);
+ raise(SIGSTOP);
+ asm volatile ("la a5, 1f\n"
+ "jalr a5\n"
+ "nop\n"
+ "nop\n"
+ "1: nop\n"
+ : : : "a5");
+ exit(11);
+ /* child shouldn't go beyond here */
+ }
+
+ /* parent's code goes here */
+ iov.iov_base = &cfi_reg;
+ iov.iov_len = sizeof(cfi_reg);
+
+ while (ptrace_test_num < total_ptrace_tests) {
+ memset(&cfi_reg, 0, sizeof(cfi_reg));
+ waitpid(pid, &status, 0);
+ if (WIFSTOPPED(status)) {
+ errno = 0;
+ ret = ptrace(PTRACE_GETREGSET, pid, (void *)NT_RISCV_USER_CFI, &iov);
+ if (ret == -1 && errno)
+ ksft_exit_fail_msg("%s: PTRACE_GETREGSET failed\n", __func__);
+ } else {
+ ksft_exit_fail_msg("%s: child didn't stop, failed\n", __func__);
+ }
+
+ switch (ptrace_test_num) {
+#define CFI_ENABLE_MASK (PTRACE_CFI_LP_EN_STATE | \
+ PTRACE_CFI_SS_EN_STATE | \
+ PTRACE_CFI_SS_PTR_STATE)
+ case 0:
+ if ((cfi_reg.cfi_status.cfi_state & CFI_ENABLE_MASK) != CFI_ENABLE_MASK)
+ ksft_exit_fail_msg("%s: ptrace_getregset failed, %llu\n", __func__,
+ cfi_reg.cfi_status.cfi_state);
+ if (!cfi_reg.shstk_ptr)
+ ksft_exit_fail_msg("%s: NULL shadow stack pointer, test failed\n",
+ __func__);
+ break;
+ case 1:
+ if (!(cfi_reg.cfi_status.cfi_state & PTRACE_CFI_ELP_STATE))
+ ksft_exit_fail_msg("%s: elp must have been set\n", __func__);
+ /* clear elp state. not interested in anything else */
+ cfi_reg.cfi_status.cfi_state = 0;
+
+ ret = ptrace(PTRACE_SETREGSET, pid, (void *)NT_RISCV_USER_CFI, &iov);
+ if (ret == -1 && errno)
+ ksft_exit_fail_msg("%s: PTRACE_GETREGSET failed\n", __func__);
+ break;
+ default:
+ ksft_exit_fail_msg("%s: unreachable switch case\n", __func__);
+ break;
+ }
+ ptrace(PTRACE_CONT, pid, NULL, NULL);
+ ptrace_test_num++;
+ }
+
+ waitpid(pid, &status, 0);
+ if (WEXITSTATUS(status) != 11)
+ ksft_print_msg("%s, bad return code from child\n", __func__);
+
+ ksft_print_msg("%s, ptrace test succeeded\n", __func__);
+ return true;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = 0;
+ unsigned long lpad_status = 0, ss_status = 0;
+
+ ksft_print_header();
+
+ ksft_print_msg("Starting risc-v tests\n");
+
+ /*
+ * Landing pad test. Not a lot of kernel changes to support landing
+ * pads for user mode except lighting up a bit in senvcfg via a prctl.
+ * Enable landing pad support throughout the execution of the test binary.
+ */
+ ret = my_syscall5(__NR_prctl, PR_GET_INDIR_BR_LP_STATUS, &lpad_status, 0, 0, 0);
+ if (ret)
+ ksft_exit_fail_msg("Get landing pad status failed with %d\n", ret);
+
+ if (!(lpad_status & PR_INDIR_BR_LP_ENABLE))
+ ksft_exit_fail_msg("Landing pad is not enabled, should be enabled via glibc\n");
+
+ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &ss_status, 0, 0, 0);
+ if (ret)
+ ksft_exit_fail_msg("Get shadow stack failed with %d\n", ret);
+
+ if (!(ss_status & PR_SHADOW_STACK_ENABLE))
+ ksft_exit_fail_msg("Shadow stack is not enabled, should be enabled via glibc\n");
+
+ if (!register_signal_handler())
+ ksft_exit_fail_msg("Registering signal handler for SIGSEGV failed\n");
+
+ ksft_print_msg("Landing pad and shadow stack are enabled for binary\n");
+ cfi_ptrace_test();
+
+ execute_shadow_stack_tests();
+
+ return 0;
+}
+
+#pragma GCC pop_options
diff --git a/tools/testing/selftests/riscv/cfi/shadowstack.c b/tools/testing/selftests/riscv/cfi/shadowstack.c
new file mode 100644
index 000000000000..f8eed8260a12
--- /dev/null
+++ b/tools/testing/selftests/riscv/cfi/shadowstack.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../../kselftest.h"
+#include <sys/wait.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <asm-generic/unistd.h>
+#include <sys/mman.h>
+#include "shadowstack.h"
+#include "cfi_rv_test.h"
+
+static struct shadow_stack_tests shstk_tests[] = {
+ { "shstk fork test\n", shadow_stack_fork_test },
+ { "map shadow stack syscall\n", shadow_stack_map_test },
+ { "shadow stack gup tests\n", shadow_stack_gup_tests },
+ { "shadow stack signal tests\n", shadow_stack_signal_test},
+ { "memory protections of shadow stack memory\n", shadow_stack_protection_test }
+};
+
+#define RISCV_SHADOW_STACK_TESTS ARRAY_SIZE(shstk_tests)
+
+/* do not optimize shadow stack related test functions */
+#pragma GCC push_options
+#pragma GCC optimize("O0")
+
+void zar(void)
+{
+ unsigned long ssp = 0;
+
+ ssp = csr_read(CSR_SSP);
+ ksft_print_msg("Spewing out shadow stack ptr: %lx\n"
+ " This is to ensure shadow stack is indeed enabled and working\n",
+ ssp);
+}
+
+void bar(void)
+{
+ zar();
+}
+
+void foo(void)
+{
+ bar();
+}
+
+void zar_child(void)
+{
+ unsigned long ssp = 0;
+
+ ssp = csr_read(CSR_SSP);
+ ksft_print_msg("Spewing out shadow stack ptr: %lx\n"
+ " This is to ensure shadow stack is indeed enabled and working\n",
+ ssp);
+}
+
+void bar_child(void)
+{
+ zar_child();
+}
+
+void foo_child(void)
+{
+ bar_child();
+}
+
+typedef void (call_func_ptr)(void);
+/*
+ * call couple of functions to test push/pop.
+ */
+int shadow_stack_call_tests(call_func_ptr fn_ptr, bool parent)
+{
+ ksft_print_msg("dummy calls for sspush and sspopchk in context of %s\n",
+ parent ? "parent" : "child");
+
+ (fn_ptr)();
+
+ return 0;
+}
+
+/* forks a thread, and ensure shadow stacks fork out */
+bool shadow_stack_fork_test(unsigned long test_num, void *ctx)
+{
+ int pid = 0, child_status = 0, parent_pid = 0, ret = 0;
+ unsigned long ss_status = 0;
+
+ ksft_print_msg("Exercising shadow stack fork test\n");
+
+ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &ss_status, 0, 0, 0);
+ if (ret) {
+ ksft_exit_skip("Shadow stack get status prctl failed with errorcode %d\n", ret);
+ return false;
+ }
+
+ if (!(ss_status & PR_SHADOW_STACK_ENABLE))
+ ksft_exit_skip("Shadow stack is not enabled, should be enabled via glibc\n");
+
+ parent_pid = getpid();
+ pid = fork();
+
+ if (pid) {
+ ksft_print_msg("Parent pid %d and child pid %d\n", parent_pid, pid);
+ shadow_stack_call_tests(&foo, true);
+ } else {
+ shadow_stack_call_tests(&foo_child, false);
+ }
+
+ if (pid) {
+ ksft_print_msg("Waiting on child to finish\n");
+ wait(&child_status);
+ } else {
+ /* exit child gracefully */
+ exit(0);
+ }
+
+ if (pid && WIFSIGNALED(child_status)) {
+ ksft_print_msg("Child faulted, fork test failed\n");
+ return false;
+ }
+
+ return true;
+}
+
+/* exercise 'map_shadow_stack', pivot to it and call some functions to ensure it works */
+#define SHADOW_STACK_ALLOC_SIZE 4096
+bool shadow_stack_map_test(unsigned long test_num, void *ctx)
+{
+ unsigned long shdw_addr;
+ int ret = 0;
+
+ ksft_print_msg("Exercising shadow stack map test\n");
+
+ shdw_addr = my_syscall3(__NR_map_shadow_stack, NULL, SHADOW_STACK_ALLOC_SIZE, 0);
+
+ if (((long)shdw_addr) <= 0) {
+ ksft_print_msg("map_shadow_stack failed with error code %d\n",
+ (int)shdw_addr);
+ return false;
+ }
+
+ ret = munmap((void *)shdw_addr, SHADOW_STACK_ALLOC_SIZE);
+
+ if (ret) {
+ ksft_print_msg("munmap failed with error code %d\n", ret);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * shadow stack protection tests. map a shadow stack and
+ * validate all memory protections work on it
+ */
+bool shadow_stack_protection_test(unsigned long test_num, void *ctx)
+{
+ unsigned long shdw_addr;
+ unsigned long *write_addr = NULL;
+ int ret = 0, pid = 0, child_status = 0;
+
+ ksft_print_msg("Exercising shadow stack protection test (WPT)\n");
+
+ shdw_addr = my_syscall3(__NR_map_shadow_stack, NULL, SHADOW_STACK_ALLOC_SIZE, 0);
+
+ if (((long)shdw_addr) <= 0) {
+ ksft_print_msg("map_shadow_stack failed with error code %d\n",
+ (int)shdw_addr);
+ return false;
+ }
+
+ write_addr = (unsigned long *)shdw_addr;
+ pid = fork();
+
+ /* no child was created, return false */
+ if (pid == -1)
+ return false;
+
+ /*
+ * try to perform a store from child on shadow stack memory
+ * it should result in SIGSEGV
+ */
+ if (!pid) {
+ /* below write must lead to SIGSEGV */
+ *write_addr = 0xdeadbeef;
+ } else {
+ wait(&child_status);
+ }
+
+ /* test fail, if 0xdeadbeef present on shadow stack address */
+ if (*write_addr == 0xdeadbeef) {
+ ksft_print_msg("Shadow stack WPT failed\n");
+ return false;
+ }
+
+ /* if child reached here, then fail */
+ if (!pid) {
+ ksft_print_msg("Shadow stack WPT failed: child reached unreachable state\n");
+ return false;
+ }
+
+ /* if child exited via signal handler but not for write on ss */
+ if (WIFEXITED(child_status) &&
+ WEXITSTATUS(child_status) != CHILD_EXIT_CODE_SSWRITE) {
+ ksft_print_msg("Shadow stack WPT failed: child wasn't signaled for write\n");
+ return false;
+ }
+
+ ret = munmap(write_addr, SHADOW_STACK_ALLOC_SIZE);
+ if (ret) {
+ ksft_print_msg("Shadow stack WPT failed: munmap failed, error code %d\n",
+ ret);
+ return false;
+ }
+
+ return true;
+}
+
+#define SS_MAGIC_WRITE_VAL 0xbeefdead
+
+int gup_tests(int mem_fd, unsigned long *shdw_addr)
+{
+ unsigned long val = 0;
+
+ lseek(mem_fd, (unsigned long)shdw_addr, SEEK_SET);
+ if (read(mem_fd, &val, sizeof(val)) < 0) {
+ ksft_print_msg("Reading shadow stack mem via gup failed\n");
+ return 1;
+ }
+
+ val = SS_MAGIC_WRITE_VAL;
+ lseek(mem_fd, (unsigned long)shdw_addr, SEEK_SET);
+ if (write(mem_fd, &val, sizeof(val)) < 0) {
+ ksft_print_msg("Writing shadow stack mem via gup failed\n");
+ return 1;
+ }
+
+ if (*shdw_addr != SS_MAGIC_WRITE_VAL) {
+ ksft_print_msg("GUP write to shadow stack memory failed\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+bool shadow_stack_gup_tests(unsigned long test_num, void *ctx)
+{
+ unsigned long shdw_addr = 0;
+ unsigned long *write_addr = NULL;
+ int fd = 0;
+ bool ret = false;
+
+ ksft_print_msg("Exercising shadow stack gup tests\n");
+ shdw_addr = my_syscall3(__NR_map_shadow_stack, NULL, SHADOW_STACK_ALLOC_SIZE, 0);
+
+ if (((long)shdw_addr) <= 0) {
+ ksft_print_msg("map_shadow_stack failed with error code %d\n", (int)shdw_addr);
+ return false;
+ }
+
+ write_addr = (unsigned long *)shdw_addr;
+
+ fd = open("/proc/self/mem", O_RDWR);
+ if (fd == -1)
+ return false;
+
+ if (gup_tests(fd, write_addr)) {
+ ksft_print_msg("gup tests failed\n");
+ goto out;
+ }
+
+ ret = true;
+out:
+ if (shdw_addr && munmap(write_addr, SHADOW_STACK_ALLOC_SIZE)) {
+ ksft_print_msg("munmap failed with error code %d\n", ret);
+ ret = false;
+ }
+
+ return ret;
+}
+
+volatile bool break_loop;
+
+void sigusr1_handler(int signo)
+{
+ break_loop = true;
+}
+
+bool sigusr1_signal_test(void)
+{
+ struct sigaction sa = {};
+
+ sa.sa_handler = sigusr1_handler;
+ sa.sa_flags = 0;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGUSR1, &sa, NULL)) {
+ ksft_print_msg("Registering signal handler for SIGUSR1 failed\n");
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * shadow stack signal test. shadow stack must be enabled.
+ * register a signal, fork another thread which is waiting
+ * on signal. Send a signal from parent to child, verify
+ * that signal was received by child. If not test fails
+ */
+bool shadow_stack_signal_test(unsigned long test_num, void *ctx)
+{
+ int pid = 0, child_status = 0, ret = 0;
+ unsigned long ss_status = 0;
+
+ ksft_print_msg("Exercising shadow stack signal test\n");
+
+ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &ss_status, 0, 0, 0);
+ if (ret) {
+ ksft_print_msg("Shadow stack get status prctl failed with errorcode %d\n", ret);
+ return false;
+ }
+
+ if (!(ss_status & PR_SHADOW_STACK_ENABLE))
+ ksft_print_msg("Shadow stack is not enabled, should be enabled via glibc\n");
+
+ /* this should be caught by signal handler and do an exit */
+ if (!sigusr1_signal_test()) {
+ ksft_print_msg("Registering sigusr1 handler failed\n");
+ exit(-1);
+ }
+
+ pid = fork();
+
+ if (pid == -1) {
+ ksft_print_msg("Signal test: fork failed\n");
+ goto out;
+ }
+
+ if (pid == 0) {
+ while (!break_loop)
+ sleep(1);
+
+ exit(11);
+ /* child shouldn't go beyond here */
+ }
+
+ /* send SIGUSR1 to child */
+ kill(pid, SIGUSR1);
+ wait(&child_status);
+
+out:
+
+ return (WIFEXITED(child_status) &&
+ WEXITSTATUS(child_status) == 11);
+}
+
+int execute_shadow_stack_tests(void)
+{
+ int ret = 0;
+ unsigned long test_count = 0;
+ unsigned long shstk_status = 0;
+ bool test_pass = false;
+
+ ksft_print_msg("Executing RISC-V shadow stack self tests\n");
+ ksft_set_plan(RISCV_SHADOW_STACK_TESTS);
+
+ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &shstk_status, 0, 0, 0);
+
+ if (ret != 0)
+ ksft_exit_fail_msg("Get shadow stack status failed with %d\n", ret);
+
+ /*
+ * If we are here that means get shadow stack status succeeded and
+ * thus shadow stack support is baked in the kernel.
+ */
+ while (test_count < RISCV_SHADOW_STACK_TESTS) {
+ test_pass = (*shstk_tests[test_count].t_func)(test_count, NULL);
+ ksft_test_result(test_pass, shstk_tests[test_count].name);
+ test_count++;
+ }
+
+ ksft_finished();
+
+ return 0;
+}
+
+#pragma GCC pop_options
diff --git a/tools/testing/selftests/riscv/cfi/shadowstack.h b/tools/testing/selftests/riscv/cfi/shadowstack.h
new file mode 100644
index 000000000000..943a3685905f
--- /dev/null
+++ b/tools/testing/selftests/riscv/cfi/shadowstack.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef SELFTEST_SHADOWSTACK_TEST_H
+#define SELFTEST_SHADOWSTACK_TEST_H
+#include <stddef.h>
+#include <linux/prctl.h>
+
+/*
+ * A CFI test returns true for success or false for fail.
+ * Takes a test number to index into array, and a void pointer.
+ */
+typedef bool (*shstk_test_func)(unsigned long test_num, void *);
+
+struct shadow_stack_tests {
+ char *name;
+ shstk_test_func t_func;
+};
+
+bool shadow_stack_fork_test(unsigned long test_num, void *ctx);
+bool shadow_stack_map_test(unsigned long test_num, void *ctx);
+bool shadow_stack_protection_test(unsigned long test_num, void *ctx);
+bool shadow_stack_gup_tests(unsigned long test_num, void *ctx);
+bool shadow_stack_signal_test(unsigned long test_num, void *ctx);
+
+int execute_shadow_stack_tests(void);
+
+#endif
diff --git a/tools/testing/selftests/riscv/hwprobe/which-cpus.c b/tools/testing/selftests/riscv/hwprobe/which-cpus.c
index 3ab53067e8dd..587feb198c04 100644
--- a/tools/testing/selftests/riscv/hwprobe/which-cpus.c
+++ b/tools/testing/selftests/riscv/hwprobe/which-cpus.c
@@ -83,9 +83,9 @@ static void do_which_cpus(int argc, char **argv, cpu_set_t *cpus)
int main(int argc, char **argv)
{
- struct riscv_hwprobe pairs[2];
+ struct riscv_hwprobe pairs[3];
cpu_set_t cpus_aff, cpus;
- __u64 ext0_all;
+ __u64 ext0_all, ext1_all;
long rc;
rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus_aff);
@@ -112,6 +112,11 @@ int main(int argc, char **argv)
assert(rc == 0 && pairs[0].key == RISCV_HWPROBE_KEY_IMA_EXT_0);
ext0_all = pairs[0].value;
+ pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_1, };
+ rc = riscv_hwprobe(pairs, 1, 0, NULL, 0);
+ assert(rc == 0 && pairs[0].key == RISCV_HWPROBE_KEY_IMA_EXT_1);
+ ext1_all = pairs[0].value;
+
pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, };
CPU_ZERO(&cpus);
rc = riscv_hwprobe(pairs, 1, 0, (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
@@ -134,20 +139,23 @@ int main(int argc, char **argv)
pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, };
pairs[1] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_0, .value = ext0_all, };
+ pairs[2] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_1, .value = ext1_all, };
CPU_ZERO(&cpus);
- rc = riscv_hwprobe(pairs, 2, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
+ rc = riscv_hwprobe(pairs, 3, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
ksft_test_result(rc == 0 && CPU_COUNT(&cpus) == sysconf(_SC_NPROCESSORS_ONLN), "set all cpus\n");
pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, };
pairs[1] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_0, .value = ext0_all, };
+ pairs[2] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_1, .value = ext1_all, };
memcpy(&cpus, &cpus_aff, sizeof(cpu_set_t));
- rc = riscv_hwprobe(pairs, 2, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
+ rc = riscv_hwprobe(pairs, 3, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
ksft_test_result(rc == 0 && CPU_EQUAL(&cpus, &cpus_aff), "set all affinity cpus\n");
pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, };
pairs[1] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_0, .value = ~ext0_all, };
+ pairs[2] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_1, .value = ~ext1_all, };
memcpy(&cpus, &cpus_aff, sizeof(cpu_set_t));
- rc = riscv_hwprobe(pairs, 2, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
+ rc = riscv_hwprobe(pairs, 3, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
ksft_test_result(rc == 0 && CPU_COUNT(&cpus) == 0, "clear all cpus\n");
ksft_finished();
diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore
index 7d9c87cd0649..40a82baf364f 100644
--- a/tools/testing/selftests/riscv/vector/.gitignore
+++ b/tools/testing/selftests/riscv/vector/.gitignore
@@ -2,3 +2,5 @@ vstate_exec_nolibc
vstate_prctl
v_initval
v_exec_initval_nolibc
+vstate_ptrace
+validate_v_ptrace
diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile
index 2c2a33fc083e..326dafd739bf 100644
--- a/tools/testing/selftests/riscv/vector/Makefile
+++ b/tools/testing/selftests/riscv/vector/Makefile
@@ -2,11 +2,14 @@
# Copyright (C) 2021 ARM Limited
# Originally tools/testing/arm64/abi/Makefile
-TEST_GEN_PROGS := v_initval vstate_prctl vstate_ptrace
+TEST_GEN_PROGS := v_initval vstate_prctl vstate_ptrace validate_v_ptrace
TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc v_exec_initval_nolibc
+TEST_GEN_LIBS := v_helpers.c sys_hwprobe.c
include ../../lib.mk
+TEST_GEN_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(TEST_GEN_LIBS))
+
$(OUTPUT)/sys_hwprobe.o: ../hwprobe/sys_hwprobe.S
$(CC) -static -c -o$@ $(CFLAGS) $^
@@ -29,3 +32,8 @@ $(OUTPUT)/v_exec_initval_nolibc: v_exec_initval_nolibc.c
$(OUTPUT)/vstate_ptrace: vstate_ptrace.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
$(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+$(OUTPUT)/validate_v_ptrace: validate_v_ptrace.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+EXTRA_CLEAN += $(TEST_GEN_OBJ)
diff --git a/tools/testing/selftests/riscv/vector/v_helpers.c b/tools/testing/selftests/riscv/vector/v_helpers.c
index 01a8799dcb78..de6da7c8d2f1 100644
--- a/tools/testing/selftests/riscv/vector/v_helpers.c
+++ b/tools/testing/selftests/riscv/vector/v_helpers.c
@@ -26,6 +26,29 @@ bool is_vector_supported(void)
return pair.value & RISCV_HWPROBE_EXT_ZVE32X;
}
+unsigned long get_vr_len(void)
+{
+ unsigned long vlenb;
+
+ if (is_vector_supported()) {
+ asm volatile("csrr %[vlenb], vlenb" : [vlenb] "=r"(vlenb));
+ return vlenb;
+ }
+
+ if (is_xtheadvector_supported()) {
+ asm volatile (
+ // 0 | zimm[10:0] | rs1 | 1 1 1 | rd | 1010111 | vsetvli
+ // vsetvli t4, x0, e8, m1, d1
+ ".4byte 0b00000000000000000111111011010111\n\t"
+ "mv %[vlenb], t4\n\t"
+ : [vlenb] "=r"(vlenb) : : "memory", "t4");
+ return vlenb;
+ }
+
+ printf("WARNING: vector not supported\n");
+ return 0;
+}
+
int launch_test(char *next_program, int test_inherit, int xtheadvector)
{
char *exec_argv[4], *exec_envp[1];
diff --git a/tools/testing/selftests/riscv/vector/v_helpers.h b/tools/testing/selftests/riscv/vector/v_helpers.h
index 763cddfe26da..c538077f1195 100644
--- a/tools/testing/selftests/riscv/vector/v_helpers.h
+++ b/tools/testing/selftests/riscv/vector/v_helpers.h
@@ -5,4 +5,6 @@ bool is_xtheadvector_supported(void);
bool is_vector_supported(void);
+unsigned long get_vr_len(void);
+
int launch_test(char *next_program, int test_inherit, int xtheadvector);
diff --git a/tools/testing/selftests/riscv/vector/validate_v_ptrace.c b/tools/testing/selftests/riscv/vector/validate_v_ptrace.c
new file mode 100644
index 000000000000..3589549f7228
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/validate_v_ptrace.c
@@ -0,0 +1,915 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <linux/ptrace.h>
+#include <linux/elf.h>
+
+#include "kselftest_harness.h"
+#include "v_helpers.h"
+
+#define SR_FS_DIRTY 0x00006000UL
+#define CSR_VXRM_SHIFT 1
+
+volatile unsigned long chld_lock;
+
+TEST(ptrace_v_not_enabled)
+{
+ pid_t pid;
+
+ if (!(is_vector_supported() || is_xtheadvector_supported()))
+ SKIP(return, "Vector not supported");
+
+ chld_lock = 1;
+ pid = fork();
+ ASSERT_LE(0, pid)
+ TH_LOG("fork: %m");
+
+ if (pid == 0) {
+ while (chld_lock == 1)
+ asm volatile("" : : "g"(chld_lock) : "memory");
+
+ asm volatile ("ebreak" : : : );
+ } else {
+ struct __riscv_v_regset_state *regset_data;
+ unsigned long vlenb = get_vr_len();
+ size_t regset_size;
+ struct iovec iov;
+ int status;
+ int ret;
+
+ /* attach */
+
+ ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* unlock */
+
+ ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, pid, &chld_lock, 0));
+
+ /* resume and wait for ebreak */
+
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* try to read vector registers from the tracee */
+
+ regset_size = sizeof(*regset_data) + vlenb * 32;
+ regset_data = calloc(1, regset_size);
+
+ iov.iov_base = regset_data;
+ iov.iov_len = regset_size;
+
+ /* V extension is available, but not yet enabled for the tracee */
+
+ errno = 0;
+ ret = ptrace(PTRACE_GETREGSET, pid, NT_RISCV_VECTOR, &iov);
+ ASSERT_EQ(ENODATA, errno);
+ ASSERT_EQ(-1, ret);
+
+ /* cleanup */
+
+ ASSERT_EQ(0, kill(pid, SIGKILL));
+ }
+}
+
+TEST(ptrace_v_early_debug)
+{
+ static volatile unsigned long vstart;
+ static volatile unsigned long vtype;
+ static volatile unsigned long vlenb;
+ static volatile unsigned long vcsr;
+ static volatile unsigned long vl;
+ bool xtheadvector;
+ pid_t pid;
+
+ if (!(is_vector_supported() || is_xtheadvector_supported()))
+ SKIP(return, "Vector not supported");
+
+ xtheadvector = is_xtheadvector_supported();
+
+ chld_lock = 1;
+ pid = fork();
+ ASSERT_LE(0, pid)
+ TH_LOG("fork: %m");
+
+ if (pid == 0) {
+ unsigned long vxsat, vxrm;
+
+ vlenb = get_vr_len();
+
+ while (chld_lock == 1)
+ asm volatile ("" : : "g"(chld_lock) : "memory");
+
+ asm volatile (
+ "csrr %[vstart], vstart\n"
+ "csrr %[vtype], vtype\n"
+ "csrr %[vl], vl\n"
+ : [vtype] "=r"(vtype), [vstart] "=r"(vstart), [vl] "=r"(vl)
+ :
+ : "memory");
+
+ /* no 'is_xtheadvector_supported()' here to avoid clobbering v-state by syscall */
+ if (xtheadvector) {
+ asm volatile (
+ "csrs sstatus, %[bit]\n"
+ "csrr %[vxsat], vxsat\n"
+ "csrr %[vxrm], vxrm\n"
+ : [vxsat] "=r"(vxsat), [vxrm] "=r"(vxrm)
+ : [bit] "r" (SR_FS_DIRTY)
+ : "memory");
+ vcsr = vxsat | vxrm << CSR_VXRM_SHIFT;
+ } else {
+ asm volatile (
+ "csrr %[vcsr], vcsr\n"
+ : [vcsr] "=r"(vcsr)
+ :
+ : "memory");
+ }
+
+ asm volatile (
+ ".option push\n"
+ ".option norvc\n"
+ "ebreak\n"
+ ".option pop\n");
+ } else {
+ struct __riscv_v_regset_state *regset_data;
+ unsigned long vstart_csr;
+ unsigned long vlenb_csr;
+ unsigned long vtype_csr;
+ unsigned long vcsr_csr;
+ unsigned long vl_csr;
+ size_t regset_size;
+ struct iovec iov;
+ int status;
+
+ /* attach */
+
+ ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* unlock */
+
+ ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, pid, &chld_lock, 0));
+
+ /* resume and wait for ebreak */
+
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* read tracee vector csr regs using ptrace PEEKDATA */
+
+ errno = 0;
+ vstart_csr = ptrace(PTRACE_PEEKDATA, pid, &vstart, NULL);
+ ASSERT_FALSE((errno != 0) && (vstart_csr == -1));
+
+ errno = 0;
+ vl_csr = ptrace(PTRACE_PEEKDATA, pid, &vl, NULL);
+ ASSERT_FALSE((errno != 0) && (vl_csr == -1));
+
+ errno = 0;
+ vtype_csr = ptrace(PTRACE_PEEKDATA, pid, &vtype, NULL);
+ ASSERT_FALSE((errno != 0) && (vtype_csr == -1));
+
+ errno = 0;
+ vcsr_csr = ptrace(PTRACE_PEEKDATA, pid, &vcsr, NULL);
+ ASSERT_FALSE((errno != 0) && (vcsr_csr == -1));
+
+ errno = 0;
+ vlenb_csr = ptrace(PTRACE_PEEKDATA, pid, &vlenb, NULL);
+ ASSERT_FALSE((errno != 0) && (vlenb_csr == -1));
+
+ /* read tracee csr regs using ptrace GETREGSET */
+
+ regset_size = sizeof(*regset_data) + vlenb_csr * 32;
+ regset_data = calloc(1, regset_size);
+
+ iov.iov_base = regset_data;
+ iov.iov_len = regset_size;
+
+ ASSERT_EQ(0, ptrace(PTRACE_GETREGSET, pid, NT_RISCV_VECTOR, &iov));
+
+ /* compare */
+
+ EXPECT_EQ(vstart_csr, regset_data->vstart);
+ EXPECT_EQ(vtype_csr, regset_data->vtype);
+ EXPECT_EQ(vlenb_csr, regset_data->vlenb);
+ EXPECT_EQ(vcsr_csr, regset_data->vcsr);
+ EXPECT_EQ(vl_csr, regset_data->vl);
+
+ /* cleanup */
+
+ ASSERT_EQ(0, kill(pid, SIGKILL));
+ }
+}
+
+TEST(ptrace_v_syscall_clobbering)
+{
+ pid_t pid;
+
+ if (!is_vector_supported() && !is_xtheadvector_supported())
+ SKIP(return, "Vector not supported");
+
+ chld_lock = 1;
+ pid = fork();
+ ASSERT_LE(0, pid)
+ TH_LOG("fork: %m");
+
+ if (pid == 0) {
+ unsigned long vl;
+
+ while (chld_lock == 1)
+ asm volatile("" : : "g"(chld_lock) : "memory");
+
+ if (is_xtheadvector_supported()) {
+ asm volatile (
+ // 0 | zimm[10:0] | rs1 | 1 1 1 | rd |1010111| vsetvli
+ // vsetvli t4, x0, e16, m2, d1
+ ".4byte 0b00000000010100000111111011010111\n"
+ "mv %[new_vl], t4\n"
+ : [new_vl] "=r" (vl) : : "t4");
+ } else {
+ asm volatile (
+ ".option push\n"
+ ".option arch, +zve32x\n"
+ "vsetvli %[new_vl], x0, e16, m2, tu, mu\n"
+ ".option pop\n"
+ : [new_vl] "=r"(vl) : : );
+ }
+
+ while (1) {
+ asm volatile (
+ ".option push\n"
+ ".option norvc\n"
+ "ebreak\n"
+ ".option pop\n");
+
+ sleep(0);
+ }
+ } else {
+ struct __riscv_v_regset_state *regset_data;
+ unsigned long vlenb = get_vr_len();
+ struct user_regs_struct regs;
+ size_t regset_size;
+ struct iovec iov;
+ int status;
+
+ /* attach */
+
+ ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* unlock */
+
+ ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, pid, &chld_lock, 0));
+
+ /* resume and wait for the 1st ebreak */
+
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* read tracee vector csr regs using ptrace GETREGSET */
+
+ regset_size = sizeof(*regset_data) + vlenb * 32;
+ regset_data = calloc(1, regset_size);
+
+ iov.iov_base = regset_data;
+ iov.iov_len = regset_size;
+
+ ASSERT_EQ(0, ptrace(PTRACE_GETREGSET, pid, NT_RISCV_VECTOR, &iov));
+
+ /* verify initial vsetvli settings */
+
+ if (is_xtheadvector_supported())
+ EXPECT_EQ(5UL, regset_data->vtype);
+ else
+ EXPECT_EQ(9UL, regset_data->vtype);
+
+ EXPECT_EQ(regset_data->vlenb, regset_data->vl);
+ EXPECT_EQ(vlenb, regset_data->vlenb);
+ EXPECT_EQ(0UL, regset_data->vstart);
+ EXPECT_EQ(0UL, regset_data->vcsr);
+
+ /* skip 1st ebreak, then resume and wait for the 2nd ebreak */
+
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+
+ ASSERT_EQ(0, ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov));
+ regs.pc += 4;
+ ASSERT_EQ(0, ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov));
+
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* read tracee vtype using ptrace GETREGSET */
+
+ iov.iov_base = regset_data;
+ iov.iov_len = regset_size;
+
+ ASSERT_EQ(0, ptrace(PTRACE_GETREGSET, pid, NT_RISCV_VECTOR, &iov));
+
+ /* verify that V state is illegal after syscall */
+
+ EXPECT_EQ((1UL << (__riscv_xlen - 1)), regset_data->vtype);
+ EXPECT_EQ(vlenb, regset_data->vlenb);
+ EXPECT_EQ(0UL, regset_data->vstart);
+ EXPECT_EQ(0UL, regset_data->vcsr);
+ EXPECT_EQ(0UL, regset_data->vl);
+
+ /* cleanup */
+
+ ASSERT_EQ(0, kill(pid, SIGKILL));
+ }
+}
+
+FIXTURE(v_csr_invalid)
+{
+};
+
+FIXTURE_SETUP(v_csr_invalid)
+{
+}
+
+FIXTURE_TEARDOWN(v_csr_invalid)
+{
+}
+
+#define VECTOR_1_0 BIT(0)
+#define XTHEAD_VECTOR_0_7 BIT(1)
+
+#define vector_test(x) ((x) & VECTOR_1_0)
+#define xthead_test(x) ((x) & XTHEAD_VECTOR_0_7)
+
+/* modifications of the initial vsetvli settings */
+FIXTURE_VARIANT(v_csr_invalid)
+{
+ unsigned long vstart;
+ unsigned long vl;
+ unsigned long vtype;
+ unsigned long vcsr;
+ unsigned long vlenb_mul;
+ unsigned long vlenb_min;
+ unsigned long vlenb_max;
+ unsigned long spec;
+};
+
+/* unexpected vlenb value */
+FIXTURE_VARIANT_ADD(v_csr_invalid, new_vlenb)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = 0x3,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x2,
+ .vlenb_min = 0x0,
+ .vlenb_max = 0x0,
+ .spec = VECTOR_1_0 | XTHEAD_VECTOR_0_7,
+};
+
+/* invalid reserved bits in vcsr */
+FIXTURE_VARIANT_ADD(v_csr_invalid, vcsr_invalid_reserved_bits)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = 0x3,
+ .vcsr = 0x1UL << 8,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x0,
+ .vlenb_max = 0x0,
+ .spec = VECTOR_1_0 | XTHEAD_VECTOR_0_7,
+};
+
+/* invalid reserved bits in vtype */
+FIXTURE_VARIANT_ADD(v_csr_invalid, vtype_invalid_reserved_bits)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = (0x1UL << 8) | 0x3,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x0,
+ .vlenb_max = 0x0,
+ .spec = VECTOR_1_0 | XTHEAD_VECTOR_0_7,
+};
+
+/* set vill bit */
+FIXTURE_VARIANT_ADD(v_csr_invalid, invalid_vill_bit)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = (0x1UL << (__riscv_xlen - 1)) | 0x3,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x0,
+ .vlenb_max = 0x0,
+ .spec = VECTOR_1_0 | XTHEAD_VECTOR_0_7,
+};
+
+/* reserved vsew value: vsew > 3 */
+FIXTURE_VARIANT_ADD(v_csr_invalid, reserved_vsew)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = 0x4UL << 3,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x0,
+ .vlenb_max = 0x0,
+ .spec = VECTOR_1_0,
+};
+
+/* XTheadVector: unsupported non-zero VEDIV value */
+FIXTURE_VARIANT_ADD(v_csr_invalid, reserved_vediv)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = 0x3UL << 5,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x0,
+ .vlenb_max = 0x0,
+ .spec = XTHEAD_VECTOR_0_7,
+};
+
+/* reserved vlmul value: vlmul == 4 */
+FIXTURE_VARIANT_ADD(v_csr_invalid, reserved_vlmul)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = 0x4,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x0,
+ .vlenb_max = 0x0,
+ .spec = VECTOR_1_0,
+};
+
+/* invalid fractional LMUL for VLEN <= 256: LMUL= 1/8, SEW = 64 */
+FIXTURE_VARIANT_ADD(v_csr_invalid, frac_lmul1)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = 0x1d,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x0,
+ .vlenb_max = 0x20,
+ .spec = VECTOR_1_0,
+};
+
+/* invalid integral LMUL for VLEN <= 16: LMUL= 2, SEW = 64 */
+FIXTURE_VARIANT_ADD(v_csr_invalid, int_lmul1)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = 0x19,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x0,
+ .vlenb_max = 0x2,
+ .spec = VECTOR_1_0,
+};
+
+/* XTheadVector: invalid integral LMUL for VLEN <= 16: LMUL= 2, SEW = 64 */
+FIXTURE_VARIANT_ADD(v_csr_invalid, int_lmul2)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = 0xd,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x0,
+ .vlenb_max = 0x2,
+ .spec = XTHEAD_VECTOR_0_7,
+};
+
+/* invalid VL for VLEN <= 128: LMUL= 2, SEW = 64, VL = 8 */
+FIXTURE_VARIANT_ADD(v_csr_invalid, vl1)
+{
+ .vstart = 0x0,
+ .vl = 0x8,
+ .vtype = 0x19,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x0,
+ .vlenb_max = 0x10,
+ .spec = VECTOR_1_0,
+};
+
+/* XTheadVector: invalid VL for VLEN <= 128: LMUL= 2, SEW = 64, VL = 8 */
+FIXTURE_VARIANT_ADD(v_csr_invalid, vl2)
+{
+ .vstart = 0x0,
+ .vl = 0x8,
+ .vtype = 0xd,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x0,
+ .vlenb_max = 0x10,
+ .spec = XTHEAD_VECTOR_0_7,
+};
+
+TEST_F(v_csr_invalid, ptrace_v_invalid_values)
+{
+ unsigned long vlenb;
+ pid_t pid;
+
+ if (!is_vector_supported() && !is_xtheadvector_supported())
+ SKIP(return, "Vectors not supported");
+
+ if (is_vector_supported() && !vector_test(variant->spec))
+ SKIP(return, "Test not supported for Vector");
+
+ if (is_xtheadvector_supported() && !xthead_test(variant->spec))
+ SKIP(return, "Test not supported for XTheadVector");
+
+ vlenb = get_vr_len();
+
+ if (variant->vlenb_min) {
+ if (vlenb < variant->vlenb_min)
+ SKIP(return, "This test does not support VLEN < %lu\n",
+ variant->vlenb_min * 8);
+ }
+
+ if (variant->vlenb_max) {
+ if (vlenb > variant->vlenb_max)
+ SKIP(return, "This test does not support VLEN > %lu\n",
+ variant->vlenb_max * 8);
+ }
+
+ chld_lock = 1;
+ pid = fork();
+ ASSERT_LE(0, pid)
+ TH_LOG("fork: %m");
+
+ if (pid == 0) {
+ unsigned long vl;
+
+ while (chld_lock == 1)
+ asm volatile("" : : "g"(chld_lock) : "memory");
+
+ if (is_xtheadvector_supported()) {
+ asm volatile (
+ // 0 | zimm[10:0] | rs1 | 1 1 1 | rd |1010111| vsetvli
+ // vsetvli t4, x0, e16, m2, d1
+ ".4byte 0b00000000010100000111111011010111\n"
+ "mv %[new_vl], t4\n"
+ : [new_vl] "=r" (vl) : : "t4");
+ } else {
+ asm volatile (
+ ".option push\n"
+ ".option arch, +zve32x\n"
+ "vsetvli %[new_vl], x0, e16, m2, tu, mu\n"
+ ".option pop\n"
+ : [new_vl] "=r"(vl) : : );
+ }
+
+ while (1) {
+ asm volatile (
+ ".option push\n"
+ ".option norvc\n"
+ "ebreak\n"
+ "nop\n"
+ ".option pop\n");
+ }
+ } else {
+ struct __riscv_v_regset_state *regset_data;
+ size_t regset_size;
+ struct iovec iov;
+ int status;
+ int ret;
+
+ /* attach */
+
+ ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* unlock */
+
+ ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, pid, &chld_lock, 0));
+
+ /* resume and wait for the 1st ebreak */
+
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* read tracee vector csr regs using ptrace GETREGSET */
+
+ regset_size = sizeof(*regset_data) + vlenb * 32;
+ regset_data = calloc(1, regset_size);
+
+ iov.iov_base = regset_data;
+ iov.iov_len = regset_size;
+
+ ASSERT_EQ(0, ptrace(PTRACE_GETREGSET, pid, NT_RISCV_VECTOR, &iov));
+
+ /* verify initial vsetvli settings */
+
+ if (is_xtheadvector_supported())
+ EXPECT_EQ(5UL, regset_data->vtype);
+ else
+ EXPECT_EQ(9UL, regset_data->vtype);
+
+ EXPECT_EQ(regset_data->vlenb, regset_data->vl);
+ EXPECT_EQ(vlenb, regset_data->vlenb);
+ EXPECT_EQ(0UL, regset_data->vstart);
+ EXPECT_EQ(0UL, regset_data->vcsr);
+
+ /* apply invalid settings from fixture variants */
+
+ regset_data->vlenb *= variant->vlenb_mul;
+ regset_data->vstart = variant->vstart;
+ regset_data->vtype = variant->vtype;
+ regset_data->vcsr = variant->vcsr;
+ regset_data->vl = variant->vl;
+
+ iov.iov_base = regset_data;
+ iov.iov_len = regset_size;
+
+ errno = 0;
+ ret = ptrace(PTRACE_SETREGSET, pid, NT_RISCV_VECTOR, &iov);
+ ASSERT_EQ(errno, EINVAL);
+ ASSERT_EQ(ret, -1);
+
+ /* cleanup */
+
+ ASSERT_EQ(0, kill(pid, SIGKILL));
+ }
+}
+
+FIXTURE(v_csr_valid)
+{
+};
+
+FIXTURE_SETUP(v_csr_valid)
+{
+}
+
+FIXTURE_TEARDOWN(v_csr_valid)
+{
+}
+
+/* modifications of the initial vsetvli settings */
+FIXTURE_VARIANT(v_csr_valid)
+{
+ unsigned long vstart;
+ unsigned long vl;
+ unsigned long vtype;
+ unsigned long vcsr;
+ unsigned long vlenb_mul;
+ unsigned long vlenb_min;
+ unsigned long vlenb_max;
+ unsigned long spec;
+};
+
+/* valid for VLEN >= 128: LMUL= 1/4, SEW = 32 */
+FIXTURE_VARIANT_ADD(v_csr_valid, frac_lmul1)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = 0x16,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x10,
+ .vlenb_max = 0x0,
+ .spec = VECTOR_1_0,
+};
+
+/* valid for VLEN >= 16: LMUL= 2, SEW = 32 */
+FIXTURE_VARIANT_ADD(v_csr_valid, int_lmul1)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = 0x11,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x2,
+ .vlenb_max = 0x0,
+ .spec = VECTOR_1_0,
+};
+
+/* valid for XTheadVector VLEN >= 16: LMUL= 2, SEW = 32 */
+FIXTURE_VARIANT_ADD(v_csr_valid, int_lmul2)
+{
+ .vstart = 0x0,
+ .vl = 0x0,
+ .vtype = 0x9,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x2,
+ .vlenb_max = 0x0,
+ .spec = XTHEAD_VECTOR_0_7,
+};
+
+/* valid for VLEN >= 32: LMUL= 2, SEW = 32, VL = 2 */
+FIXTURE_VARIANT_ADD(v_csr_valid, int_lmul3)
+{
+ .vstart = 0x0,
+ .vl = 0x2,
+ .vtype = 0x11,
+ .vcsr = 0x0,
+ .vlenb_mul = 0x1,
+ .vlenb_min = 0x4,
+ .vlenb_max = 0x0,
+ .spec = VECTOR_1_0,
+};
+
+TEST_F(v_csr_valid, ptrace_v_valid_values)
+{
+ unsigned long vlenb;
+ pid_t pid;
+
+ if (!is_vector_supported() && !is_xtheadvector_supported())
+ SKIP(return, "Vectors not supported");
+
+ if (is_vector_supported() && !vector_test(variant->spec))
+ SKIP(return, "Test not supported for Vector");
+
+ if (is_xtheadvector_supported() && !xthead_test(variant->spec))
+ SKIP(return, "Test not supported for XTheadVector");
+
+ vlenb = get_vr_len();
+
+ if (variant->vlenb_min) {
+ if (vlenb < variant->vlenb_min)
+ SKIP(return, "This test does not support VLEN < %lu\n",
+ variant->vlenb_min * 8);
+ }
+ if (variant->vlenb_max) {
+ if (vlenb > variant->vlenb_max)
+ SKIP(return, "This test does not support VLEN > %lu\n",
+ variant->vlenb_max * 8);
+ }
+
+ chld_lock = 1;
+ pid = fork();
+ ASSERT_LE(0, pid)
+ TH_LOG("fork: %m");
+
+ if (pid == 0) {
+ unsigned long vl;
+
+ while (chld_lock == 1)
+ asm volatile("" : : "g"(chld_lock) : "memory");
+
+ if (is_xtheadvector_supported()) {
+ asm volatile (
+ // 0 | zimm[10:0] | rs1 | 1 1 1 | rd |1010111| vsetvli
+ // vsetvli t4, x0, e16, m2, d1
+ ".4byte 0b00000000010100000111111011010111\n"
+ "mv %[new_vl], t4\n"
+ : [new_vl] "=r" (vl) : : "t4");
+ } else {
+ asm volatile (
+ ".option push\n"
+ ".option arch, +zve32x\n"
+ "vsetvli %[new_vl], x0, e16, m2, tu, mu\n"
+ ".option pop\n"
+ : [new_vl] "=r"(vl) : : );
+ }
+
+ asm volatile (
+ ".option push\n"
+ ".option norvc\n"
+ ".option arch, +zve32x\n"
+ "ebreak\n" /* breakpoint 1: apply new V state using ptrace */
+ "nop\n"
+ "ebreak\n" /* breakpoint 2: V state clean - context will not be saved */
+ "vmv.v.i v0, -1\n"
+ "ebreak\n" /* breakpoint 3: V state dirty - context will be saved */
+ ".option pop\n");
+ } else {
+ struct __riscv_v_regset_state *regset_data;
+ struct user_regs_struct regs;
+ size_t regset_size;
+ struct iovec iov;
+ int status;
+
+ /* attach */
+
+ ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* unlock */
+
+ ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, pid, &chld_lock, 0));
+
+ /* resume and wait for the 1st ebreak */
+
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* read tracee vector csr regs using ptrace GETREGSET */
+
+ regset_size = sizeof(*regset_data) + vlenb * 32;
+ regset_data = calloc(1, regset_size);
+
+ iov.iov_base = regset_data;
+ iov.iov_len = regset_size;
+
+ ASSERT_EQ(0, ptrace(PTRACE_GETREGSET, pid, NT_RISCV_VECTOR, &iov));
+
+ /* verify initial vsetvli settings */
+
+ if (is_xtheadvector_supported())
+ EXPECT_EQ(5UL, regset_data->vtype);
+ else
+ EXPECT_EQ(9UL, regset_data->vtype);
+
+ EXPECT_EQ(regset_data->vlenb, regset_data->vl);
+ EXPECT_EQ(vlenb, regset_data->vlenb);
+ EXPECT_EQ(0UL, regset_data->vstart);
+ EXPECT_EQ(0UL, regset_data->vcsr);
+
+ /* apply valid settings from fixture variants */
+
+ regset_data->vlenb *= variant->vlenb_mul;
+ regset_data->vstart = variant->vstart;
+ regset_data->vtype = variant->vtype;
+ regset_data->vcsr = variant->vcsr;
+ regset_data->vl = variant->vl;
+
+ iov.iov_base = regset_data;
+ iov.iov_len = regset_size;
+
+ ASSERT_EQ(0, ptrace(PTRACE_SETREGSET, pid, NT_RISCV_VECTOR, &iov));
+
+ /* skip 1st ebreak, then resume and wait for the 2nd ebreak */
+
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+
+ ASSERT_EQ(0, ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov));
+ regs.pc += 4;
+ ASSERT_EQ(0, ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov));
+
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* read tracee vector csr regs using ptrace GETREGSET */
+
+ iov.iov_base = regset_data;
+ iov.iov_len = regset_size;
+
+ ASSERT_EQ(0, ptrace(PTRACE_GETREGSET, pid, NT_RISCV_VECTOR, &iov));
+
+ /* verify vector csr regs from tracee context */
+
+ EXPECT_EQ(regset_data->vstart, variant->vstart);
+ EXPECT_EQ(regset_data->vtype, variant->vtype);
+ EXPECT_EQ(regset_data->vcsr, variant->vcsr);
+ EXPECT_EQ(regset_data->vl, variant->vl);
+ EXPECT_EQ(regset_data->vlenb, vlenb);
+
+ /* skip 2nd ebreak, then resume and wait for the 3rd ebreak */
+
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+
+ ASSERT_EQ(0, ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov));
+ regs.pc += 4;
+ ASSERT_EQ(0, ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov));
+
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, pid, NULL, NULL));
+ ASSERT_EQ(pid, waitpid(pid, &status, 0));
+ ASSERT_TRUE(WIFSTOPPED(status));
+
+ /* read tracee vector csr regs using ptrace GETREGSET */
+
+ iov.iov_base = regset_data;
+ iov.iov_len = regset_size;
+
+ ASSERT_EQ(0, ptrace(PTRACE_GETREGSET, pid, NT_RISCV_VECTOR, &iov));
+
+ /* verify vector csr regs from tracee context */
+
+ EXPECT_EQ(regset_data->vstart, variant->vstart);
+ EXPECT_EQ(regset_data->vtype, variant->vtype);
+ EXPECT_EQ(regset_data->vcsr, variant->vcsr);
+ EXPECT_EQ(regset_data->vl, variant->vl);
+ EXPECT_EQ(regset_data->vlenb, vlenb);
+
+ /* cleanup */
+
+ ASSERT_EQ(0, kill(pid, SIGKILL));
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
index 7b7d6f21acb4..12f1b1b1c7aa 100644
--- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
+++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
@@ -16,10 +16,10 @@ int main(int argc, char **argv)
if (argc > 2 && strcmp(argv[2], "x"))
xtheadvector = 1;
- ctrl = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL);
- if (ctrl < 0) {
+ ctrl = prctl(PR_RISCV_V_GET_CONTROL, 0, 0, 0, 0);
+ if (ctrl == -1) {
puts("PR_RISCV_V_GET_CONTROL is not supported\n");
- return ctrl;
+ exit(-1);
}
if (test_inherit) {
@@ -51,7 +51,7 @@ int main(int argc, char **argv)
}
if (!pid) {
- rc = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL);
+ rc = prctl(PR_RISCV_V_GET_CONTROL, 0, 0, 0, 0);
if (rc != ctrl) {
puts("child's vstate_ctrl not equal to parent's\n");
exit(-1);
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
index 0fda241fa62b..ec01d164c1f0 100644
--- a/tools/testing/selftests/rseq/.gitignore
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -10,3 +10,4 @@ param_test_mm_cid
param_test_mm_cid_benchmark
param_test_mm_cid_compare_twice
syscall_errors_test
+slice_test
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index 0d0a5fae5954..4ef90823b652 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -17,7 +17,7 @@ OVERRIDE_TARGETS = 1
TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \
param_test_benchmark param_test_compare_twice param_test_mm_cid \
param_test_mm_cid_benchmark param_test_mm_cid_compare_twice \
- syscall_errors_test
+ syscall_errors_test slice_test
TEST_GEN_PROGS_EXTENDED = librseq.so
@@ -59,3 +59,6 @@ $(OUTPUT)/param_test_mm_cid_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDE
$(OUTPUT)/syscall_errors_test: syscall_errors_test.c $(TEST_GEN_PROGS_EXTENDED) \
rseq.h rseq-*.h
$(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/slice_test: slice_test.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h
+ $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h
index fb4ec8a75dd4..ecef315204b2 100644
--- a/tools/testing/selftests/rseq/rseq-abi.h
+++ b/tools/testing/selftests/rseq/rseq-abi.h
@@ -53,6 +53,27 @@ struct rseq_abi_cs {
__u64 abort_ip;
} __attribute__((aligned(4 * sizeof(__u64))));
+/**
+ * rseq_abi_slice_ctrl - Time slice extension control structure
+ * @all: Compound value
+ * @request: Request for a time slice extension
+ * @granted: Granted time slice extension
+ *
+ * @request is set by user space and can be cleared by user space or kernel
+ * space. @granted is set and cleared by the kernel and must only be read
+ * by user space.
+ */
+struct rseq_abi_slice_ctrl {
+ union {
+ __u32 all;
+ struct {
+ __u8 request;
+ __u8 granted;
+ __u16 __reserved;
+ };
+ };
+};
+
/*
* struct rseq_abi is aligned on 4 * 8 bytes to ensure it is always
* contained within a single cache-line.
@@ -165,6 +186,12 @@ struct rseq_abi {
__u32 mm_cid;
/*
+ * Time slice extension control structure. CPU local updates from
+ * kernel and user space.
+ */
+ struct rseq_abi_slice_ctrl slice_ctrl;
+
+ /*
* Flexible array member at end of structure, after last feature field.
*/
char end[];
diff --git a/tools/testing/selftests/rseq/rseq-slice-hist.py b/tools/testing/selftests/rseq/rseq-slice-hist.py
new file mode 100644
index 000000000000..b7933eeaefb9
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-slice-hist.py
@@ -0,0 +1,132 @@
+#!/usr/bin/python3
+
+#
+# trace-cmd record -e hrtimer_start -e hrtimer_cancel -e hrtimer_expire_entry -- $cmd
+#
+
+from tracecmd import *
+
+def load_kallsyms(file_path='/proc/kallsyms'):
+ """
+ Parses /proc/kallsyms into a dictionary.
+ Returns: { address_int: symbol_name }
+ """
+ kallsyms_map = {}
+
+ try:
+ with open(file_path, 'r') as f:
+ for line in f:
+ # The format is: [address] [type] [name] [module]
+ parts = line.split()
+ if len(parts) < 3:
+ continue
+
+ addr = int(parts[0], 16)
+ name = parts[2]
+
+ kallsyms_map[addr] = name
+
+ except PermissionError:
+ print(f"Error: Permission denied reading {file_path}. Try running with sudo.")
+ except FileNotFoundError:
+ print(f"Error: {file_path} not found.")
+
+ return kallsyms_map
+
+ksyms = load_kallsyms()
+
+# pending[timer_ptr] = {'ts': timestamp, 'comm': comm}
+pending = {}
+
+# histograms[comm][bucket] = count
+histograms = {}
+
+class OnlineHarmonicMean:
+ def __init__(self):
+ self.n = 0 # Count of elements
+ self.S = 0.0 # Cumulative sum of reciprocals
+
+ def update(self, x):
+ if x == 0:
+ raise ValueError("Harmonic mean is undefined for zero.")
+
+ self.n += 1
+ self.S += 1.0 / x
+ return self.n / self.S
+
+ @property
+ def mean(self):
+ return self.n / self.S if self.n > 0 else 0
+
+ohms = {}
+
+def handle_start(record):
+ func_name = ksyms[record.num_field("function")]
+ if "rseq_slice_expired" in func_name:
+ timer_ptr = record.num_field("hrtimer")
+ pending[timer_ptr] = {
+ 'ts': record.ts,
+ 'comm': record.comm
+ }
+ return None
+
+def handle_cancel(record):
+ timer_ptr = record.num_field("hrtimer")
+
+ if timer_ptr in pending:
+ start_data = pending.pop(timer_ptr)
+ duration_ns = record.ts - start_data['ts']
+ duration_us = duration_ns // 1000
+
+ comm = start_data['comm']
+
+ if comm not in ohms:
+ ohms[comm] = OnlineHarmonicMean()
+
+ ohms[comm].update(duration_ns)
+
+ if comm not in histograms:
+ histograms[comm] = {}
+
+ histograms[comm][duration_us] = histograms[comm].get(duration_us, 0) + 1
+ return None
+
+def handle_expire(record):
+ timer_ptr = record.num_field("hrtimer")
+
+ if timer_ptr in pending:
+ start_data = pending.pop(timer_ptr)
+ comm = start_data['comm']
+
+ if comm not in histograms:
+ histograms[comm] = {}
+
+ # Record -1 bucket for expired (failed to cancel)
+ histograms[comm][-1] = histograms[comm].get(-1, 0) + 1
+ return None
+
+if __name__ == "__main__":
+ t = Trace("trace.dat")
+ for cpu in range(0, t.cpus):
+ ev = t.read_event(cpu)
+ while ev:
+ if "hrtimer_start" in ev.name:
+ handle_start(ev)
+ if "hrtimer_cancel" in ev.name:
+ handle_cancel(ev)
+ if "hrtimer_expire_entry" in ev.name:
+ handle_expire(ev)
+
+ ev = t.read_event(cpu)
+
+ print("\n" + "="*40)
+ print("RSEQ SLICE HISTOGRAM (us)")
+ print("="*40)
+ for comm, buckets in histograms.items():
+ print(f"\nTask: {comm} Mean: {ohms[comm].mean:.3f} ns")
+ print(f" {'Latency (us)':<15} | {'Count'}")
+ print(f" {'-'*30}")
+ # Sort buckets numerically, putting -1 at the top
+ for bucket in sorted(buckets.keys()):
+ label = "EXPIRED" if bucket == -1 else f"{bucket} us"
+ print(f" {label:<15} | {buckets[bucket]}")
diff --git a/tools/testing/selftests/rseq/slice_test.c b/tools/testing/selftests/rseq/slice_test.c
new file mode 100644
index 000000000000..357122dcb487
--- /dev/null
+++ b/tools/testing/selftests/rseq/slice_test.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: LGPL-2.1
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include <linux/prctl.h>
+#include <sys/prctl.h>
+#include <sys/time.h>
+
+#include "rseq.h"
+
+#include "../kselftest_harness.h"
+
+#ifndef __NR_rseq_slice_yield
+# define __NR_rseq_slice_yield 471
+#endif
+
+#define BITS_PER_INT 32
+#define BITS_PER_BYTE 8
+
+#ifndef PR_RSEQ_SLICE_EXTENSION
+# define PR_RSEQ_SLICE_EXTENSION 79
+# define PR_RSEQ_SLICE_EXTENSION_GET 1
+# define PR_RSEQ_SLICE_EXTENSION_SET 2
+# define PR_RSEQ_SLICE_EXT_ENABLE 0x01
+#endif
+
+#ifndef RSEQ_SLICE_EXT_REQUEST_BIT
+# define RSEQ_SLICE_EXT_REQUEST_BIT 0
+# define RSEQ_SLICE_EXT_GRANTED_BIT 1
+#endif
+
+#ifndef asm_inline
+# define asm_inline asm __inline
+#endif
+
+#define NSEC_PER_SEC 1000000000L
+#define NSEC_PER_USEC 1000L
+
+struct noise_params {
+ int64_t noise_nsecs;
+ int64_t sleep_nsecs;
+ int64_t run;
+};
+
+FIXTURE(slice_ext)
+{
+ pthread_t noise_thread;
+ struct noise_params noise_params;
+};
+
+FIXTURE_VARIANT(slice_ext)
+{
+ int64_t total_nsecs;
+ int64_t slice_nsecs;
+ int64_t noise_nsecs;
+ int64_t sleep_nsecs;
+ bool no_yield;
+};
+
+FIXTURE_VARIANT_ADD(slice_ext, n2_2_50)
+{
+ .total_nsecs = 5LL * NSEC_PER_SEC,
+ .slice_nsecs = 2LL * NSEC_PER_USEC,
+ .noise_nsecs = 2LL * NSEC_PER_USEC,
+ .sleep_nsecs = 50LL * NSEC_PER_USEC,
+};
+
+FIXTURE_VARIANT_ADD(slice_ext, n50_2_50)
+{
+ .total_nsecs = 5LL * NSEC_PER_SEC,
+ .slice_nsecs = 50LL * NSEC_PER_USEC,
+ .noise_nsecs = 2LL * NSEC_PER_USEC,
+ .sleep_nsecs = 50LL * NSEC_PER_USEC,
+};
+
+FIXTURE_VARIANT_ADD(slice_ext, n2_2_50_no_yield)
+{
+ .total_nsecs = 5LL * NSEC_PER_SEC,
+ .slice_nsecs = 2LL * NSEC_PER_USEC,
+ .noise_nsecs = 2LL * NSEC_PER_USEC,
+ .sleep_nsecs = 50LL * NSEC_PER_USEC,
+ .no_yield = true,
+};
+
+
+static inline bool elapsed(struct timespec *start, struct timespec *now,
+ int64_t span)
+{
+ int64_t delta = now->tv_sec - start->tv_sec;
+
+ delta *= NSEC_PER_SEC;
+ delta += now->tv_nsec - start->tv_nsec;
+ return delta >= span;
+}
+
+static void *noise_thread(void *arg)
+{
+ struct noise_params *p = arg;
+
+ while (RSEQ_READ_ONCE(p->run)) {
+ struct timespec ts_start, ts_now;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+ do {
+ clock_gettime(CLOCK_MONOTONIC, &ts_now);
+ } while (!elapsed(&ts_start, &ts_now, p->noise_nsecs));
+
+ ts_start.tv_sec = 0;
+ ts_start.tv_nsec = p->sleep_nsecs;
+ clock_nanosleep(CLOCK_MONOTONIC, 0, &ts_start, NULL);
+ }
+ return NULL;
+}
+
+FIXTURE_SETUP(slice_ext)
+{
+ cpu_set_t affinity;
+
+ ASSERT_EQ(sched_getaffinity(0, sizeof(affinity), &affinity), 0);
+
+ /* Pin it on a single CPU. Avoid CPU 0 */
+ for (int i = 1; i < CPU_SETSIZE; i++) {
+ if (!CPU_ISSET(i, &affinity))
+ continue;
+
+ CPU_ZERO(&affinity);
+ CPU_SET(i, &affinity);
+ ASSERT_EQ(sched_setaffinity(0, sizeof(affinity), &affinity), 0);
+ break;
+ }
+
+ ASSERT_EQ(rseq_register_current_thread(), 0);
+
+ ASSERT_EQ(prctl(PR_RSEQ_SLICE_EXTENSION, PR_RSEQ_SLICE_EXTENSION_SET,
+ PR_RSEQ_SLICE_EXT_ENABLE, 0, 0), 0);
+
+ self->noise_params.noise_nsecs = variant->noise_nsecs;
+ self->noise_params.sleep_nsecs = variant->sleep_nsecs;
+ self->noise_params.run = 1;
+
+ ASSERT_EQ(pthread_create(&self->noise_thread, NULL, noise_thread, &self->noise_params), 0);
+}
+
+FIXTURE_TEARDOWN(slice_ext)
+{
+ self->noise_params.run = 0;
+ pthread_join(self->noise_thread, NULL);
+}
+
+TEST_F(slice_ext, slice_test)
+{
+ unsigned long success = 0, yielded = 0, scheduled = 0, raced = 0;
+ unsigned long total = 0, aborted = 0;
+ struct rseq_abi *rs = rseq_get_abi();
+ struct timespec ts_start, ts_now;
+
+ ASSERT_NE(rs, NULL);
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+ do {
+ struct timespec ts_cs;
+ bool req = false;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_cs);
+
+ total++;
+ RSEQ_WRITE_ONCE(rs->slice_ctrl.request, 1);
+ do {
+ clock_gettime(CLOCK_MONOTONIC, &ts_now);
+ } while (!elapsed(&ts_cs, &ts_now, variant->slice_nsecs));
+
+ /*
+ * request can be cleared unconditionally, but for making
+ * the stats work this is actually checking it first
+ */
+ if (RSEQ_READ_ONCE(rs->slice_ctrl.request)) {
+ RSEQ_WRITE_ONCE(rs->slice_ctrl.request, 0);
+ /* Race between check and clear! */
+ req = true;
+ success++;
+ }
+
+ if (RSEQ_READ_ONCE(rs->slice_ctrl.granted)) {
+ /* The above raced against a late grant */
+ if (req)
+ success--;
+ if (variant->no_yield) {
+ syscall(__NR_getpid);
+ aborted++;
+ } else {
+ yielded++;
+ if (!syscall(__NR_rseq_slice_yield))
+ raced++;
+ }
+ } else {
+ if (!req)
+ scheduled++;
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_now);
+ } while (!elapsed(&ts_start, &ts_now, variant->total_nsecs));
+
+ printf("# Total %12ld\n", total);
+ printf("# Success %12ld\n", success);
+ printf("# Yielded %12ld\n", yielded);
+ printf("# Aborted %12ld\n", aborted);
+ printf("# Scheduled %12ld\n", scheduled);
+ printf("# Raced %12ld\n", raced);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
index d4be97498b32..84d45254675c 100755
--- a/tools/testing/selftests/run_kselftest.sh
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -30,6 +30,7 @@ Usage: $0 [OPTIONS]
-s | --summary Print summary with detailed log in output.log (conflict with -p)
-p | --per-test-log Print test log in /tmp with each test name (conflict with -s)
-t | --test COLLECTION:TEST Run TEST from COLLECTION
+ -S | --skip COLLECTION:TEST Skip TEST from COLLECTION
-c | --collection COLLECTION Run all tests from COLLECTION
-l | --list List the available collection:test entries
-d | --dry-run Don't actually run any tests
@@ -43,6 +44,7 @@ EOF
COLLECTIONS=""
TESTS=""
+SKIP=""
dryrun=""
kselftest_override_timeout=""
ERROR_ON_FAIL=true
@@ -58,6 +60,9 @@ while true; do
-t | --test)
TESTS="$TESTS $2"
shift 2 ;;
+ -S | --skip)
+ SKIP="$SKIP $2"
+ shift 2 ;;
-c | --collection)
COLLECTIONS="$COLLECTIONS $2"
shift 2 ;;
@@ -109,6 +114,12 @@ if [ -n "$TESTS" ]; then
done
available="$(echo "$valid" | sed -e 's/ /\n/g')"
fi
+# Remove tests to be skipped from available list
+if [ -n "$SKIP" ]; then
+ for skipped in $SKIP ; do
+ available="$(echo "$available" | grep -v "^${skipped}$")"
+ done
+fi
kselftest_failures_file="$(mktemp --tmpdir kselftest-failures-XXXXXX)"
export kselftest_failures_file
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
index 5fe45f9c5f8f..2c601a7eaff5 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -183,7 +183,9 @@ auto-test-targets := \
select_cpu_dispatch_bad_dsq \
select_cpu_dispatch_dbl_dsp \
select_cpu_vtime \
+ rt_stall \
test_example \
+ total_bw \
testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets)))
diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c
index eddf9e0e26e7..82c71653977b 100644
--- a/tools/testing/selftests/sched_ext/init_enable_count.c
+++ b/tools/testing/selftests/sched_ext/init_enable_count.c
@@ -4,6 +4,7 @@
* Copyright (c) 2023 David Vernet <dvernet@meta.com>
* Copyright (c) 2023 Tejun Heo <tj@kernel.org>
*/
+#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <sched.h>
@@ -23,6 +24,9 @@ static enum scx_test_status run_test(bool global)
int ret, i, status;
struct sched_param param = {};
pid_t pids[num_pre_forks];
+ int pipe_fds[2];
+
+ SCX_FAIL_IF(pipe(pipe_fds) < 0, "Failed to create pipe");
skel = init_enable_count__open();
SCX_FAIL_IF(!skel, "Failed to open");
@@ -38,26 +42,34 @@ static enum scx_test_status run_test(bool global)
* ensure (at least in practical terms) that there are more tasks that
* transition from SCHED_OTHER -> SCHED_EXT than there are tasks that
* take the fork() path either below or in other processes.
+ *
+ * All children will block on read() on the pipe until the parent closes
+ * the write end after attaching the scheduler, which signals all of
+ * them to exit simultaneously. Auto-reap so we don't have to wait on
+ * them.
*/
+ signal(SIGCHLD, SIG_IGN);
for (i = 0; i < num_pre_forks; i++) {
- pids[i] = fork();
- SCX_FAIL_IF(pids[i] < 0, "Failed to fork child");
- if (pids[i] == 0) {
- sleep(1);
+ pid_t pid = fork();
+
+ SCX_FAIL_IF(pid < 0, "Failed to fork child");
+ if (pid == 0) {
+ char buf;
+
+ close(pipe_fds[1]);
+ read(pipe_fds[0], &buf, 1);
+ close(pipe_fds[0]);
exit(0);
}
}
+ close(pipe_fds[0]);
link = bpf_map__attach_struct_ops(skel->maps.init_enable_count_ops);
SCX_FAIL_IF(!link, "Failed to attach struct_ops");
- for (i = 0; i < num_pre_forks; i++) {
- SCX_FAIL_IF(waitpid(pids[i], &status, 0) != pids[i],
- "Failed to wait for pre-forked child\n");
-
- SCX_FAIL_IF(status != 0, "Pre-forked child %d exited with status %d\n", i,
- status);
- }
+ /* Signal all pre-forked children to exit. */
+ close(pipe_fds[1]);
+ signal(SIGCHLD, SIG_DFL);
bpf_link__destroy(link);
SCX_GE(skel->bss->init_task_cnt, num_pre_forks);
diff --git a/tools/testing/selftests/sched_ext/rt_stall.bpf.c b/tools/testing/selftests/sched_ext/rt_stall.bpf.c
new file mode 100644
index 000000000000..80086779dd1e
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/rt_stall.bpf.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A scheduler that verified if RT tasks can stall SCHED_EXT tasks.
+ *
+ * Copyright (c) 2025 NVIDIA Corporation.
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+UEI_DEFINE(uei);
+
+void BPF_STRUCT_OPS(rt_stall_exit, struct scx_exit_info *ei)
+{
+ UEI_RECORD(uei, ei);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops rt_stall_ops = {
+ .exit = (void *)rt_stall_exit,
+ .name = "rt_stall",
+};
diff --git a/tools/testing/selftests/sched_ext/rt_stall.c b/tools/testing/selftests/sched_ext/rt_stall.c
new file mode 100644
index 000000000000..015200f80f6e
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/rt_stall.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 NVIDIA Corporation.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sched.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <linux/sched.h>
+#include <signal.h>
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <unistd.h>
+#include "rt_stall.bpf.skel.h"
+#include "scx_test.h"
+#include "../kselftest.h"
+
+#define CORE_ID 0 /* CPU to pin tasks to */
+#define RUN_TIME 5 /* How long to run the test in seconds */
+
+/* Simple busy-wait function for test tasks */
+static void process_func(void)
+{
+ while (1) {
+ /* Busy wait */
+ for (volatile unsigned long i = 0; i < 10000000UL; i++)
+ ;
+ }
+}
+
+/* Set CPU affinity to a specific core */
+static void set_affinity(int cpu)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask) != 0) {
+ perror("sched_setaffinity");
+ exit(EXIT_FAILURE);
+ }
+}
+
+/* Set task scheduling policy and priority */
+static void set_sched(int policy, int priority)
+{
+ struct sched_param param;
+
+ param.sched_priority = priority;
+ if (sched_setscheduler(0, policy, &param) != 0) {
+ perror("sched_setscheduler");
+ exit(EXIT_FAILURE);
+ }
+}
+
+/* Get process runtime from /proc/<pid>/stat */
+static float get_process_runtime(int pid)
+{
+ char path[256];
+ FILE *file;
+ long utime, stime;
+ int fields;
+
+ snprintf(path, sizeof(path), "/proc/%d/stat", pid);
+ file = fopen(path, "r");
+ if (file == NULL) {
+ perror("Failed to open stat file");
+ return -1;
+ }
+
+ /* Skip the first 13 fields and read the 14th and 15th */
+ fields = fscanf(file,
+ "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu %lu",
+ &utime, &stime);
+ fclose(file);
+
+ if (fields != 2) {
+ fprintf(stderr, "Failed to read stat file\n");
+ return -1;
+ }
+
+ /* Calculate the total time spent in the process */
+ long total_time = utime + stime;
+ long ticks_per_second = sysconf(_SC_CLK_TCK);
+ float runtime_seconds = total_time * 1.0 / ticks_per_second;
+
+ return runtime_seconds;
+}
+
+static enum scx_test_status setup(void **ctx)
+{
+ struct rt_stall *skel;
+
+ skel = rt_stall__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(rt_stall__load(skel), "Failed to load skel");
+
+ *ctx = skel;
+
+ return SCX_TEST_PASS;
+}
+
+static bool sched_stress_test(bool is_ext)
+{
+ /*
+ * We're expecting the EXT task to get around 5% of CPU time when
+ * competing with the RT task (small 1% fluctuations are expected).
+ *
+ * However, the EXT task should get at least 4% of the CPU to prove
+ * that the EXT deadline server is working correctly. A percentage
+ * less than 4% indicates a bug where RT tasks can potentially
+ * stall SCHED_EXT tasks, causing the test to fail.
+ */
+ const float expected_min_ratio = 0.04; /* 4% */
+ const char *class_str = is_ext ? "EXT" : "FAIR";
+
+ float ext_runtime, rt_runtime, actual_ratio;
+ int ext_pid, rt_pid;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ /* Create and set up a EXT task */
+ ext_pid = fork();
+ if (ext_pid == 0) {
+ set_affinity(CORE_ID);
+ process_func();
+ exit(0);
+ } else if (ext_pid < 0) {
+ perror("fork task");
+ ksft_exit_fail();
+ }
+
+ /* Create an RT task */
+ rt_pid = fork();
+ if (rt_pid == 0) {
+ set_affinity(CORE_ID);
+ set_sched(SCHED_FIFO, 50);
+ process_func();
+ exit(0);
+ } else if (rt_pid < 0) {
+ perror("fork for RT task");
+ ksft_exit_fail();
+ }
+
+ /* Let the processes run for the specified time */
+ sleep(RUN_TIME);
+
+ /* Get runtime for the EXT task */
+ ext_runtime = get_process_runtime(ext_pid);
+ if (ext_runtime == -1)
+ ksft_exit_fail_msg("Error getting runtime for %s task (PID %d)\n",
+ class_str, ext_pid);
+ ksft_print_msg("Runtime of %s task (PID %d) is %f seconds\n",
+ class_str, ext_pid, ext_runtime);
+
+ /* Get runtime for the RT task */
+ rt_runtime = get_process_runtime(rt_pid);
+ if (rt_runtime == -1)
+ ksft_exit_fail_msg("Error getting runtime for RT task (PID %d)\n", rt_pid);
+ ksft_print_msg("Runtime of RT task (PID %d) is %f seconds\n", rt_pid, rt_runtime);
+
+ /* Kill the processes */
+ kill(ext_pid, SIGKILL);
+ kill(rt_pid, SIGKILL);
+ waitpid(ext_pid, NULL, 0);
+ waitpid(rt_pid, NULL, 0);
+
+ /* Verify that the scx task got enough runtime */
+ actual_ratio = ext_runtime / (ext_runtime + rt_runtime);
+ ksft_print_msg("%s task got %.2f%% of total runtime\n",
+ class_str, actual_ratio * 100);
+
+ if (actual_ratio >= expected_min_ratio) {
+ ksft_test_result_pass("PASS: %s task got more than %.2f%% of runtime\n",
+ class_str, expected_min_ratio * 100);
+ return true;
+ }
+ ksft_test_result_fail("FAIL: %s task got less than %.2f%% of runtime\n",
+ class_str, expected_min_ratio * 100);
+ return false;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+ struct rt_stall *skel = ctx;
+ struct bpf_link *link = NULL;
+ bool res;
+ int i;
+
+ /*
+ * Test if the dl_server is working both with and without the
+ * sched_ext scheduler attached.
+ *
+ * This ensures all the scenarios are covered:
+ * - fair_server stop -> ext_server start
+ * - ext_server stop -> fair_server stop
+ */
+ for (i = 0; i < 4; i++) {
+ bool is_ext = i % 2;
+
+ if (is_ext) {
+ memset(&skel->data->uei, 0, sizeof(skel->data->uei));
+ link = bpf_map__attach_struct_ops(skel->maps.rt_stall_ops);
+ SCX_FAIL_IF(!link, "Failed to attach scheduler");
+ }
+ res = sched_stress_test(is_ext);
+ if (is_ext) {
+ SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE));
+ bpf_link__destroy(link);
+ }
+
+ if (!res)
+ ksft_exit_fail();
+ }
+
+ return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+ struct rt_stall *skel = ctx;
+
+ rt_stall__destroy(skel);
+}
+
+struct scx_test rt_stall = {
+ .name = "rt_stall",
+ .description = "Verify that RT tasks cannot stall SCHED_EXT tasks",
+ .setup = setup,
+ .run = run,
+ .cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&rt_stall)
diff --git a/tools/testing/selftests/sched_ext/runner.c b/tools/testing/selftests/sched_ext/runner.c
index aa2d7d32dda9..5748d2c69903 100644
--- a/tools/testing/selftests/sched_ext/runner.c
+++ b/tools/testing/selftests/sched_ext/runner.c
@@ -46,6 +46,14 @@ static void print_test_preamble(const struct scx_test *test, bool quiet)
if (!quiet)
printf("DESCRIPTION: %s\n", test->description);
printf("OUTPUT:\n");
+
+ /*
+ * The tests may fork with the preamble buffered
+ * in the children's stdout. Flush before the test
+ * to avoid printing the message multiple times.
+ */
+ fflush(stdout);
+ fflush(stderr);
}
static const char *status_to_result(enum scx_test_status status)
diff --git a/tools/testing/selftests/sched_ext/total_bw.c b/tools/testing/selftests/sched_ext/total_bw.c
new file mode 100644
index 000000000000..5b0a619bab86
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/total_bw.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test to verify that total_bw value remains consistent across all CPUs
+ * in different BPF program states.
+ *
+ * Copyright (C) 2025 NVIDIA Corporation.
+ */
+#include <bpf/bpf.h>
+#include <errno.h>
+#include <pthread.h>
+#include <scx/common.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "minimal.bpf.skel.h"
+#include "scx_test.h"
+
+#define MAX_CPUS 512
+#define STRESS_DURATION_SEC 5
+
+struct total_bw_ctx {
+ struct minimal *skel;
+ long baseline_bw[MAX_CPUS];
+ int nr_cpus;
+};
+
+static void *cpu_stress_thread(void *arg)
+{
+ volatile int i;
+ time_t end_time = time(NULL) + STRESS_DURATION_SEC;
+
+ while (time(NULL) < end_time)
+ for (i = 0; i < 1000000; i++)
+ ;
+
+ return NULL;
+}
+
+/*
+ * The first enqueue on a CPU causes the DL server to start, for that
+ * reason run stressor threads in the hopes it schedules on all CPUs.
+ */
+static int run_cpu_stress(int nr_cpus)
+{
+ pthread_t *threads;
+ int i, ret = 0;
+
+ threads = calloc(nr_cpus, sizeof(pthread_t));
+ if (!threads)
+ return -ENOMEM;
+
+ /* Create threads to run on each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ if (pthread_create(&threads[i], NULL, cpu_stress_thread, NULL)) {
+ ret = -errno;
+ fprintf(stderr, "Failed to create thread %d: %s\n", i, strerror(-ret));
+ break;
+ }
+ }
+
+ /* Wait for all threads to complete */
+ for (i = 0; i < nr_cpus; i++) {
+ if (threads[i])
+ pthread_join(threads[i], NULL);
+ }
+
+ free(threads);
+ return ret;
+}
+
+static int read_total_bw_values(long *bw_values, int max_cpus)
+{
+ FILE *fp;
+ char line[256];
+ int cpu_count = 0;
+
+ fp = fopen("/sys/kernel/debug/sched/debug", "r");
+ if (!fp) {
+ SCX_ERR("Failed to open debug file");
+ return -1;
+ }
+
+ while (fgets(line, sizeof(line), fp)) {
+ char *bw_str = strstr(line, "total_bw");
+
+ if (bw_str) {
+ bw_str = strchr(bw_str, ':');
+ if (bw_str) {
+ /* Only store up to max_cpus values */
+ if (cpu_count < max_cpus)
+ bw_values[cpu_count] = atol(bw_str + 1);
+ cpu_count++;
+ }
+ }
+ }
+
+ fclose(fp);
+ return cpu_count;
+}
+
+static bool verify_total_bw_consistency(long *bw_values, int count)
+{
+ int i;
+ long first_value;
+
+ if (count <= 0)
+ return false;
+
+ first_value = bw_values[0];
+
+ for (i = 1; i < count; i++) {
+ if (bw_values[i] != first_value) {
+ SCX_ERR("Inconsistent total_bw: CPU0=%ld, CPU%d=%ld",
+ first_value, i, bw_values[i]);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int fetch_verify_total_bw(long *bw_values, int nr_cpus)
+{
+ int attempts = 0;
+ int max_attempts = 10;
+ int count;
+
+ /*
+ * The first enqueue on a CPU causes the DL server to start, for that
+ * reason run stressor threads in the hopes it schedules on all CPUs.
+ */
+ if (run_cpu_stress(nr_cpus) < 0) {
+ SCX_ERR("Failed to run CPU stress");
+ return -1;
+ }
+
+ /* Try multiple times to get stable values */
+ while (attempts < max_attempts) {
+ count = read_total_bw_values(bw_values, nr_cpus);
+ fprintf(stderr, "Read %d total_bw values (testing %d CPUs)\n", count, nr_cpus);
+ /* If system has more CPUs than we're testing, that's OK */
+ if (count < nr_cpus) {
+ SCX_ERR("Expected at least %d CPUs, got %d", nr_cpus, count);
+ attempts++;
+ sleep(1);
+ continue;
+ }
+
+ /* Only verify the CPUs we're testing */
+ if (verify_total_bw_consistency(bw_values, nr_cpus)) {
+ fprintf(stderr, "Values are consistent: %ld\n", bw_values[0]);
+ return 0;
+ }
+
+ attempts++;
+ sleep(1);
+ }
+
+ return -1;
+}
+
+static enum scx_test_status setup(void **ctx)
+{
+ struct total_bw_ctx *test_ctx;
+
+ if (access("/sys/kernel/debug/sched/debug", R_OK) != 0) {
+ fprintf(stderr, "Skipping test: debugfs sched/debug not accessible\n");
+ return SCX_TEST_SKIP;
+ }
+
+ test_ctx = calloc(1, sizeof(*test_ctx));
+ if (!test_ctx)
+ return SCX_TEST_FAIL;
+
+ test_ctx->nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (test_ctx->nr_cpus <= 0) {
+ free(test_ctx);
+ return SCX_TEST_FAIL;
+ }
+
+ /* If system has more CPUs than MAX_CPUS, just test the first MAX_CPUS */
+ if (test_ctx->nr_cpus > MAX_CPUS)
+ test_ctx->nr_cpus = MAX_CPUS;
+
+ /* Test scenario 1: BPF program not loaded */
+ /* Read and verify baseline total_bw before loading BPF program */
+ fprintf(stderr, "BPF prog initially not loaded, reading total_bw values\n");
+ if (fetch_verify_total_bw(test_ctx->baseline_bw, test_ctx->nr_cpus) < 0) {
+ SCX_ERR("Failed to get stable baseline values");
+ free(test_ctx);
+ return SCX_TEST_FAIL;
+ }
+
+ /* Load the BPF skeleton */
+ test_ctx->skel = minimal__open();
+ if (!test_ctx->skel) {
+ free(test_ctx);
+ return SCX_TEST_FAIL;
+ }
+
+ SCX_ENUM_INIT(test_ctx->skel);
+ if (minimal__load(test_ctx->skel)) {
+ minimal__destroy(test_ctx->skel);
+ free(test_ctx);
+ return SCX_TEST_FAIL;
+ }
+
+ *ctx = test_ctx;
+ return SCX_TEST_PASS;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+ struct total_bw_ctx *test_ctx = ctx;
+ struct bpf_link *link;
+ long loaded_bw[MAX_CPUS];
+ long unloaded_bw[MAX_CPUS];
+ int i;
+
+ /* Test scenario 2: BPF program loaded */
+ link = bpf_map__attach_struct_ops(test_ctx->skel->maps.minimal_ops);
+ if (!link) {
+ SCX_ERR("Failed to attach scheduler");
+ return SCX_TEST_FAIL;
+ }
+
+ fprintf(stderr, "BPF program loaded, reading total_bw values\n");
+ if (fetch_verify_total_bw(loaded_bw, test_ctx->nr_cpus) < 0) {
+ SCX_ERR("Failed to get stable values with BPF loaded");
+ bpf_link__destroy(link);
+ return SCX_TEST_FAIL;
+ }
+ bpf_link__destroy(link);
+
+ /* Test scenario 3: BPF program unloaded */
+ fprintf(stderr, "BPF program unloaded, reading total_bw values\n");
+ if (fetch_verify_total_bw(unloaded_bw, test_ctx->nr_cpus) < 0) {
+ SCX_ERR("Failed to get stable values after BPF unload");
+ return SCX_TEST_FAIL;
+ }
+
+ /* Verify all three scenarios have the same total_bw values */
+ for (i = 0; i < test_ctx->nr_cpus; i++) {
+ if (test_ctx->baseline_bw[i] != loaded_bw[i]) {
+ SCX_ERR("CPU%d: baseline_bw=%ld != loaded_bw=%ld",
+ i, test_ctx->baseline_bw[i], loaded_bw[i]);
+ return SCX_TEST_FAIL;
+ }
+
+ if (test_ctx->baseline_bw[i] != unloaded_bw[i]) {
+ SCX_ERR("CPU%d: baseline_bw=%ld != unloaded_bw=%ld",
+ i, test_ctx->baseline_bw[i], unloaded_bw[i]);
+ return SCX_TEST_FAIL;
+ }
+ }
+
+ fprintf(stderr, "All total_bw values are consistent across all scenarios\n");
+ return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+ struct total_bw_ctx *test_ctx = ctx;
+
+ if (test_ctx) {
+ if (test_ctx->skel)
+ minimal__destroy(test_ctx->skel);
+ free(test_ctx);
+ }
+}
+
+struct scx_test total_bw = {
+ .name = "total_bw",
+ .description = "Verify total_bw consistency across BPF program states",
+ .setup = setup,
+ .run = run,
+ .cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&total_bw)
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index b73bd255ea36..b056eb966871 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -1052,5 +1052,98 @@
"$TC qdisc del dev $DEV1 ingress_block 21 clsact",
"$TC actions flush action mirred"
]
+ },
+ {
+ "id": "7eba",
+ "name": "Redirect multiport: dummy egress -> dummy egress (Loop)",
+ "category": [
+ "filter",
+ "mirred"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1: root drr",
+ "$TC filter add dev $DUMMY parent 1: protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1"
+ ],
+ "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC -j -s actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "redirect",
+ "direction": "egress",
+ "index": 1,
+ "stats": {
+ "packets": 1,
+ "overlimits": 1
+ },
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root"
+ ]
+ },
+ {
+ "id": "4ed9",
+ "name": "Try to redirect to self on egress with clsact",
+ "category": [
+ "filter",
+ "mirred"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY clsact",
+ "$TC filter add dev $DUMMY egress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1"
+ ],
+ "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC -j -s actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "redirect",
+ "direction": "egress",
+ "index": 1,
+ "stats": {
+ "packets": 1,
+ "overlimits": 1
+ },
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY clsact"
+ ]
}
+
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index 47de27fd4f90..6a39640aa2a8 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -1033,5 +1033,83 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "6e4f",
+ "name": "Try to delete ets drr class' qdisc while still keeping it in the active list",
+ "category": [
+ "qdisc",
+ "ets",
+ "tbf"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: ets bands 2 strict 1",
+ "$TC qdisc add dev $DUMMY parent 1:2 handle 20: tbf rate 8bit burst 100b latency 1s",
+ "$TC filter add dev $DUMMY parent 1: basic classid 1:2",
+ "ping -c2 -W0.01 -s 56 -I $DUMMY 10.10.11.11 || true",
+ "$TC qdisc change dev $DUMMY root handle 1: ets bands 2 strict 2",
+ "$TC qdisc change dev $DUMMY root handle 1: ets bands 1 strict 1"
+ ],
+ "cmdUnderTest": "ping -c1 -W0.01 -s 56 -I $DUMMY 10.10.11.11",
+ "expExitCode": "1",
+ "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY root",
+ "matchJSON": [
+ {
+ "kind": "ets",
+ "handle": "1:",
+ "bytes": 196,
+ "packets": 2
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1:"
+ ]
+ },
+ {
+ "id": "0b8f",
+ "name": "Try to add ets class to the active list twice",
+ "category": [
+ "qdisc",
+ "ets",
+ "tbf"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: ets bands 2 strict 1",
+ "$TC qdisc add dev $DUMMY parent 1:2 handle 20: tbf rate 8bit burst 100b latency 1s",
+ "$TC filter add dev $DUMMY parent 1: basic classid 1:2",
+ "ping -c2 -W0.01 -s 56 -I $DUMMY 10.10.11.11 || true",
+ "$TC qdisc change dev $DUMMY root handle 1: ets bands 2 strict 2",
+ "$TC qdisc change dev $DUMMY root handle 1: ets bands 2 strict 1"
+ ],
+ "cmdUnderTest": "ping -c1 -W0.01 -s 56 -I $DUMMY 10.10.11.11",
+ "expExitCode": "1",
+ "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY root",
+ "matchJSON": [
+ {
+ "kind": "ets",
+ "handle": "1:",
+ "bytes": 98,
+ "packets": 1
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1:"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake_mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake_mq.json
new file mode 100644
index 000000000000..0efe229fb86e
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake_mq.json
@@ -0,0 +1,559 @@
+[
+ {
+ "id": "684b",
+ "name": "Create CAKE_MQ with default setting (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device || true",
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "7ee8",
+ "name": "Create CAKE_MQ with bandwidth limit (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq bandwidth 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth 1Kbit diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "1f87",
+ "name": "Create CAKE_MQ with rtt time (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq rtt 200",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 200us raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "e9cf",
+ "name": "Create CAKE_MQ with besteffort flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq besteffort",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited besteffort triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "7c05",
+ "name": "Create CAKE_MQ with diffserv8 flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq diffserv8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv8 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "5a77",
+ "name": "Create CAKE_MQ with diffserv4 flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq diffserv4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv4 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "8f7a",
+ "name": "Create CAKE_MQ with flowblind flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq flowblind",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 flowblind nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "7ef7",
+ "name": "Create CAKE_MQ with dsthost and nat flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq dsthost nat",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 dsthost nat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "2e4d",
+ "name": "Create CAKE_MQ with wash flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq hosts wash",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 hosts nonat wash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "b3e6",
+ "name": "Create CAKE_MQ with flowblind and no-split-gso flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq flowblind no-split-gso",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 flowblind nonat nowash no-ack-filter no-split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "62cd",
+ "name": "Create CAKE_MQ with dual-srchost and ack-filter flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq dual-srchost ack-filter",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 dual-srchost nonat nowash ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "0df3",
+ "name": "Create CAKE_MQ with dual-dsthost and ack-filter-aggressive flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq dual-dsthost ack-filter-aggressive",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 dual-dsthost nonat nowash ack-filter-aggressive split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "9a75",
+ "name": "Create CAKE_MQ with memlimit and ptm flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq memlimit 10000 ptm",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw ptm overhead 0 memlimit 10000b ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "cdef",
+ "name": "Create CAKE_MQ with fwmark and atm flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq fwmark 8 atm",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw atm overhead 0 fwmark 0x8 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "93dd",
+ "name": "Create CAKE_MQ with overhead 0 and mpu (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq overhead 128 mpu 256",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 256 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "1475",
+ "name": "Create CAKE_MQ with conservative and ingress flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq conservative ingress",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash ingress no-ack-filter split-gso rtt 100ms atm overhead 48 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "7bf1",
+ "name": "Delete CAKE_MQ with conservative and ingress flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH handle 1: root cake_mq conservative ingress"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash ingress no-ack-filter split-gso rtt 100ms atm overhead 48 ",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "ee55",
+ "name": "Replace CAKE_MQ with mpu (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH handle 1: root cake_mq overhead 128 mpu 256"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $ETH handle 1: root cake_mq mpu 128",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 128 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "6df9",
+ "name": "Change CAKE_MQ with mpu (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH handle 1: root cake_mq overhead 128 mpu 256"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $ETH handle 1: root cake_mq mpu 128",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 128 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "67e2",
+ "name": "Show CAKE_MQ class (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $ETH",
+ "matchPattern": "class cake_mq",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "2de4",
+ "name": "Change bandwidth of CAKE_MQ (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH handle 1: root cake_mq"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $ETH handle 1: root cake_mq bandwidth 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth 1Kbit diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "5f62",
+ "name": "Fail to create CAKE_MQ with autorate-ingress flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq autorate-ingress",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited autorate-ingress diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "038e",
+ "name": "Fail to change setting of sub-qdisc under CAKE_MQ",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH handle 1: root cake_mq"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH parent 1:1 cake besteffort flows",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "7bdc",
+ "name": "Fail to replace sub-qdisc under CAKE_MQ",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH handle 1: root cake_mq"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH parent 1:1 fq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "18e0",
+ "name": "Fail to install CAKE_MQ on single queue device",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 1\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json
index e5cc31f265f8..0179c57104ad 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json
@@ -81,5 +81,30 @@
"$TC qdisc del dev $DUMMY handle 1: root",
"$IP link del dev $DUMMY"
]
+ },
+ {
+ "id": "124e",
+ "name": "Try to add teql as a child qdisc",
+ "category": [
+ "qdisc",
+ "ets",
+ "tbf"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin"
+ ]
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle 1: qfq",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 15 maxpkt 16384"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1:1 handle 2:1 teql0",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:1",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1:"
+ ]
}
]
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
index ca2bd03154e4..569d44f22835 100644
--- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
+++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
@@ -12,6 +12,7 @@
#define WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/notification_delay_ms"
#define WORKLOAD_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_hint_enable"
+#define WORKLOAD_SLOW_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_slow_hint_enable"
#define WORKLOAD_TYPE_INDEX_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_type_index"
static const char * const workload_types[] = {
@@ -22,6 +23,9 @@ static const char * const workload_types[] = {
NULL
};
+static int wlt_slow;
+static char *wlt_enable_attr;
+
#define WORKLOAD_TYPE_MAX_INDEX 3
void workload_hint_exit(int signum)
@@ -30,7 +34,7 @@ void workload_hint_exit(int signum)
/* Disable feature via sysfs knob */
- fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR);
+ fd = open(wlt_enable_attr, O_RDWR);
if (fd < 0) {
perror("Unable to open workload type feature enable file");
exit(1);
@@ -46,6 +50,26 @@ void workload_hint_exit(int signum)
close(fd);
}
+static void update_delay(char *delay_str)
+{
+ int fd;
+
+ printf("Setting notification delay in ms to %s\n", delay_str);
+
+ fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open workload notification delay");
+ exit(1);
+ }
+
+ if (write(fd, delay_str, strlen(delay_str)) < 0) {
+ perror("Can't set delay");
+ exit(1);
+ }
+
+ close(fd);
+}
+
int main(int argc, char **argv)
{
struct pollfd ufd;
@@ -54,32 +78,26 @@ int main(int argc, char **argv)
char delay_str[64];
int delay = 0;
- printf("Usage: workload_hint_test [notification delay in milli seconds]\n");
+ printf("Usage: workload_hint_test [notification delay in milli seconds][slow]\n");
if (argc > 1) {
- ret = sscanf(argv[1], "%d", &delay);
- if (ret < 0) {
- printf("Invalid delay\n");
- exit(1);
- }
+ int i;
- printf("Setting notification delay to %d ms\n", delay);
- if (delay < 0)
- exit(1);
+ for (i = 1; i < argc; ++i) {
+ if (!strcmp(argv[i], "slow")) {
+ wlt_slow = 1;
+ continue;
+ }
- sprintf(delay_str, "%s\n", argv[1]);
- fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR);
- if (fd < 0) {
- perror("Unable to open workload notification delay");
- exit(1);
- }
+ ret = sscanf(argv[1], "%d", &delay);
+ if (ret < 0) {
+ printf("Invalid delay\n");
+ exit(1);
+ }
- if (write(fd, delay_str, strlen(delay_str)) < 0) {
- perror("Can't set delay");
- exit(1);
+ sprintf(delay_str, "%s\n", argv[1]);
+ update_delay(delay_str);
}
-
- close(fd);
}
if (signal(SIGINT, workload_hint_exit) == SIG_IGN)
@@ -89,8 +107,13 @@ int main(int argc, char **argv)
if (signal(SIGTERM, workload_hint_exit) == SIG_IGN)
signal(SIGTERM, SIG_IGN);
+ if (wlt_slow)
+ wlt_enable_attr = WORKLOAD_SLOW_ENABLE_ATTRIBUTE;
+ else
+ wlt_enable_attr = WORKLOAD_ENABLE_ATTRIBUTE;
+
/* Enable feature via sysfs knob */
- fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR);
+ fd = open(wlt_enable_attr, O_RDWR);
if (fd < 0) {
perror("Unable to open workload type feature enable file");
exit(1);
@@ -145,6 +168,13 @@ int main(int argc, char **argv)
if (ret < 0)
break;
+ if (wlt_slow) {
+ if (index & 0x10)
+ printf("workload type slow:%s\n", "power");
+ else
+ printf("workload type slow:%s\n", "performance");
+ }
+
index &= 0x0f;
if (index > WORKLOAD_TYPE_MAX_INDEX)
printf("Invalid workload type index\n");
diff --git a/tools/testing/selftests/ublk/.gitignore b/tools/testing/selftests/ublk/.gitignore
index 8b2871ea7751..e17bd28f27e0 100644
--- a/tools/testing/selftests/ublk/.gitignore
+++ b/tools/testing/selftests/ublk/.gitignore
@@ -1,3 +1,5 @@
-kublk
-/tools
+# SPDX-License-Identifier: GPL-2.0
*-verify.state
+/tools
+kublk
+metadata_size
diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index 770269efe42a..8ac2d4a682a1 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -7,45 +7,106 @@ endif
LDLIBS += -lpthread -lm -luring
-TEST_PROGS := test_generic_01.sh
-TEST_PROGS += test_generic_02.sh
+TEST_PROGS := test_generic_02.sh
TEST_PROGS += test_generic_03.sh
-TEST_PROGS += test_generic_04.sh
-TEST_PROGS += test_generic_05.sh
TEST_PROGS += test_generic_06.sh
TEST_PROGS += test_generic_07.sh
TEST_PROGS += test_generic_08.sh
TEST_PROGS += test_generic_09.sh
TEST_PROGS += test_generic_10.sh
-TEST_PROGS += test_generic_11.sh
TEST_PROGS += test_generic_12.sh
TEST_PROGS += test_generic_13.sh
+TEST_PROGS += test_generic_16.sh
+
+TEST_PROGS += test_batch_01.sh
+TEST_PROGS += test_batch_02.sh
+TEST_PROGS += test_batch_03.sh
TEST_PROGS += test_null_01.sh
TEST_PROGS += test_null_02.sh
+TEST_PROGS += test_null_03.sh
TEST_PROGS += test_loop_01.sh
TEST_PROGS += test_loop_02.sh
TEST_PROGS += test_loop_03.sh
TEST_PROGS += test_loop_04.sh
TEST_PROGS += test_loop_05.sh
+TEST_PROGS += test_loop_06.sh
+TEST_PROGS += test_loop_07.sh
+
+TEST_PROGS += test_integrity_01.sh
+TEST_PROGS += test_integrity_02.sh
+
+TEST_PROGS += test_recover_01.sh
+TEST_PROGS += test_recover_02.sh
+TEST_PROGS += test_recover_03.sh
+TEST_PROGS += test_recover_04.sh
TEST_PROGS += test_stripe_01.sh
TEST_PROGS += test_stripe_02.sh
TEST_PROGS += test_stripe_03.sh
TEST_PROGS += test_stripe_04.sh
+TEST_PROGS += test_stripe_05.sh
+TEST_PROGS += test_stripe_06.sh
+
+TEST_PROGS += test_part_01.sh
+TEST_PROGS += test_part_02.sh
TEST_PROGS += test_stress_01.sh
TEST_PROGS += test_stress_02.sh
TEST_PROGS += test_stress_03.sh
TEST_PROGS += test_stress_04.sh
TEST_PROGS += test_stress_05.sh
+TEST_PROGS += test_stress_06.sh
+TEST_PROGS += test_stress_07.sh
+TEST_PROGS += test_stress_08.sh
+TEST_PROGS += test_stress_09.sh
+
+TEST_FILES := settings
-TEST_GEN_PROGS_EXTENDED = kublk
+TEST_GEN_PROGS_EXTENDED = kublk metadata_size
+STANDALONE_UTILS := metadata_size.c
+LOCAL_HDRS += $(wildcard *.h)
include ../lib.mk
-$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c \
- fault_inject.c
+$(OUTPUT)/kublk: $(filter-out $(STANDALONE_UTILS),$(wildcard *.c))
check:
shellcheck -x -f gcc *.sh
+
+# Test groups for running subsets of tests
+# JOBS=1 (default): sequential with kselftest TAP output
+# JOBS>1: parallel execution with xargs -P
+# Usage: make run_null JOBS=4
+JOBS ?= 1
+export JOBS
+
+# Auto-detect test groups from TEST_PROGS (test_<group>_<num>.sh -> group)
+TEST_GROUPS := $(shell echo "$(TEST_PROGS)" | tr ' ' '\n' | \
+ sed 's/test_\([^_]*\)_.*/\1/' | sort -u)
+
+# Template for group test targets
+# $(1) = group name (e.g., null, generic, stress)
+define RUN_GROUP
+run_$(1): all
+ @if [ $$(JOBS) -gt 1 ]; then \
+ echo $$(filter test_$(1)_%.sh,$$(TEST_PROGS)) | tr ' ' '\n' | \
+ xargs -P $$(JOBS) -n1 sh -c './"$$$$0"' || true; \
+ else \
+ $$(call RUN_TESTS, $$(filter test_$(1)_%.sh,$$(TEST_PROGS))); \
+ fi
+.PHONY: run_$(1)
+endef
+
+# Generate targets for each discovered test group
+$(foreach group,$(TEST_GROUPS),$(eval $(call RUN_GROUP,$(group))))
+
+# Run all tests (parallel when JOBS>1)
+run_all: all
+ @if [ $(JOBS) -gt 1 ]; then \
+ echo $(TEST_PROGS) | tr ' ' '\n' | \
+ xargs -P $(JOBS) -n1 sh -c './"$$0"' || true; \
+ else \
+ $(call RUN_TESTS, $(TEST_PROGS)); \
+ fi
+.PHONY: run_all
diff --git a/tools/testing/selftests/ublk/batch.c b/tools/testing/selftests/ublk/batch.c
new file mode 100644
index 000000000000..a54025b00917
--- /dev/null
+++ b/tools/testing/selftests/ublk/batch.c
@@ -0,0 +1,607 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: UBLK_F_BATCH_IO buffer management
+ */
+
+#include "kublk.h"
+
+static inline void *ublk_get_commit_buf(struct ublk_thread *t,
+ unsigned short buf_idx)
+{
+ unsigned idx;
+
+ if (buf_idx < t->commit_buf_start ||
+ buf_idx >= t->commit_buf_start + t->nr_commit_buf)
+ return NULL;
+ idx = buf_idx - t->commit_buf_start;
+ return t->commit_buf + idx * t->commit_buf_size;
+}
+
+/*
+ * Allocate one buffer for UBLK_U_IO_PREP_IO_CMDS or UBLK_U_IO_COMMIT_IO_CMDS
+ *
+ * Buffer index is returned.
+ */
+static inline unsigned short ublk_alloc_commit_buf(struct ublk_thread *t)
+{
+ int idx = allocator_get(&t->commit_buf_alloc);
+
+ if (idx >= 0)
+ return idx + t->commit_buf_start;
+ return UBLKS_T_COMMIT_BUF_INV_IDX;
+}
+
+/*
+ * Free one commit buffer which is used by UBLK_U_IO_PREP_IO_CMDS or
+ * UBLK_U_IO_COMMIT_IO_CMDS
+ */
+static inline void ublk_free_commit_buf(struct ublk_thread *t,
+ unsigned short i)
+{
+ unsigned short idx = i - t->commit_buf_start;
+
+ ublk_assert(idx < t->nr_commit_buf);
+ ublk_assert(allocator_get_val(&t->commit_buf_alloc, idx) != 0);
+
+ allocator_put(&t->commit_buf_alloc, idx);
+}
+
+static unsigned char ublk_commit_elem_buf_size(struct ublk_dev *dev)
+{
+ if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY |
+ UBLK_F_AUTO_BUF_REG))
+ return 8;
+
+ /* one extra 8bytes for carrying buffer address */
+ return 16;
+}
+
+static unsigned ublk_commit_buf_size(struct ublk_thread *t)
+{
+ struct ublk_dev *dev = t->dev;
+ unsigned elem_size = ublk_commit_elem_buf_size(dev);
+ unsigned int total = elem_size * dev->dev_info.queue_depth;
+ unsigned int page_sz = getpagesize();
+
+ return round_up(total, page_sz);
+}
+
+static void free_batch_commit_buf(struct ublk_thread *t)
+{
+ if (t->commit_buf) {
+ unsigned buf_size = ublk_commit_buf_size(t);
+ unsigned int total = buf_size * t->nr_commit_buf;
+
+ munlock(t->commit_buf, total);
+ free(t->commit_buf);
+ }
+ allocator_deinit(&t->commit_buf_alloc);
+ free(t->commit);
+}
+
+static int alloc_batch_commit_buf(struct ublk_thread *t)
+{
+ unsigned buf_size = ublk_commit_buf_size(t);
+ unsigned int total = buf_size * t->nr_commit_buf;
+ unsigned int page_sz = getpagesize();
+ void *buf = NULL;
+ int i, ret, j = 0;
+
+ t->commit = calloc(t->nr_queues, sizeof(*t->commit));
+ for (i = 0; i < t->dev->dev_info.nr_hw_queues; i++) {
+ if (t->q_map[i])
+ t->commit[j++].q_id = i;
+ }
+
+ allocator_init(&t->commit_buf_alloc, t->nr_commit_buf);
+
+ t->commit_buf = NULL;
+ ret = posix_memalign(&buf, page_sz, total);
+ if (ret || !buf)
+ goto fail;
+
+ t->commit_buf = buf;
+
+ /* lock commit buffer pages for fast access */
+ if (mlock(t->commit_buf, total))
+ ublk_err("%s: can't lock commit buffer %s\n", __func__,
+ strerror(errno));
+
+ return 0;
+
+fail:
+ free_batch_commit_buf(t);
+ return ret;
+}
+
+static unsigned int ublk_thread_nr_queues(const struct ublk_thread *t)
+{
+ int i;
+ int ret = 0;
+
+ for (i = 0; i < t->dev->dev_info.nr_hw_queues; i++)
+ ret += !!t->q_map[i];
+
+ return ret;
+}
+
+void ublk_batch_prepare(struct ublk_thread *t)
+{
+ /*
+ * We only handle single device in this thread context.
+ *
+ * All queues have same feature flags, so use queue 0's for
+ * calculate uring_cmd flags.
+ *
+ * This way looks not elegant, but it works so far.
+ */
+ struct ublk_queue *q = &t->dev->q[0];
+
+ /* cache nr_queues because we don't support dynamic load-balance yet */
+ t->nr_queues = ublk_thread_nr_queues(t);
+
+ t->commit_buf_elem_size = ublk_commit_elem_buf_size(t->dev);
+ t->commit_buf_size = ublk_commit_buf_size(t);
+ t->commit_buf_start = t->nr_bufs;
+ t->nr_commit_buf = 2 * t->nr_queues;
+ t->nr_bufs += t->nr_commit_buf;
+
+ t->cmd_flags = 0;
+ if (ublk_queue_use_auto_zc(q)) {
+ if (ublk_queue_auto_zc_fallback(q))
+ t->cmd_flags |= UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK;
+ } else if (!ublk_queue_no_buf(q))
+ t->cmd_flags |= UBLK_BATCH_F_HAS_BUF_ADDR;
+
+ t->state |= UBLKS_T_BATCH_IO;
+
+ ublk_log("%s: thread %d commit(nr_bufs %u, buf_size %u, start %u)\n",
+ __func__, t->idx,
+ t->nr_commit_buf, t->commit_buf_size,
+ t->nr_bufs);
+}
+
+static void free_batch_fetch_buf(struct ublk_thread *t)
+{
+ int i;
+
+ for (i = 0; i < t->nr_fetch_bufs; i++) {
+ io_uring_free_buf_ring(&t->ring, t->fetch[i].br, 1, i);
+ munlock(t->fetch[i].fetch_buf, t->fetch[i].fetch_buf_size);
+ free(t->fetch[i].fetch_buf);
+ }
+ free(t->fetch);
+}
+
+static int alloc_batch_fetch_buf(struct ublk_thread *t)
+{
+ /* page aligned fetch buffer, and it is mlocked for speedup delivery */
+ unsigned pg_sz = getpagesize();
+ unsigned buf_size = round_up(t->dev->dev_info.queue_depth * 2, pg_sz);
+ int ret;
+ int i = 0;
+
+ /* double fetch buffer for each queue */
+ t->nr_fetch_bufs = t->nr_queues * 2;
+ t->fetch = calloc(t->nr_fetch_bufs, sizeof(*t->fetch));
+
+ /* allocate one buffer for each queue */
+ for (i = 0; i < t->nr_fetch_bufs; i++) {
+ t->fetch[i].fetch_buf_size = buf_size;
+
+ if (posix_memalign((void **)&t->fetch[i].fetch_buf, pg_sz,
+ t->fetch[i].fetch_buf_size))
+ return -ENOMEM;
+
+ /* lock fetch buffer page for fast fetching */
+ if (mlock(t->fetch[i].fetch_buf, t->fetch[i].fetch_buf_size))
+ ublk_err("%s: can't lock fetch buffer %s\n", __func__,
+ strerror(errno));
+ t->fetch[i].br = io_uring_setup_buf_ring(&t->ring, 1,
+ i, IOU_PBUF_RING_INC, &ret);
+ if (!t->fetch[i].br) {
+ ublk_err("Buffer ring register failed %d\n", ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int ublk_batch_alloc_buf(struct ublk_thread *t)
+{
+ int ret;
+
+ ublk_assert(t->nr_commit_buf < 2 * UBLK_MAX_QUEUES);
+
+ ret = alloc_batch_commit_buf(t);
+ if (ret)
+ return ret;
+ return alloc_batch_fetch_buf(t);
+}
+
+void ublk_batch_free_buf(struct ublk_thread *t)
+{
+ free_batch_commit_buf(t);
+ free_batch_fetch_buf(t);
+}
+
+static void ublk_init_batch_cmd(struct ublk_thread *t, __u16 q_id,
+ struct io_uring_sqe *sqe, unsigned op,
+ unsigned short elem_bytes,
+ unsigned short nr_elem,
+ unsigned short buf_idx)
+{
+ struct ublk_batch_io *cmd;
+ __u64 user_data;
+
+ cmd = (struct ublk_batch_io *)ublk_get_sqe_cmd(sqe);
+
+ ublk_set_sqe_cmd_op(sqe, op);
+
+ sqe->fd = 0; /* dev->fds[0] */
+ sqe->opcode = IORING_OP_URING_CMD;
+ sqe->flags = IOSQE_FIXED_FILE;
+
+ cmd->q_id = q_id;
+ cmd->flags = 0;
+ cmd->reserved = 0;
+ cmd->elem_bytes = elem_bytes;
+ cmd->nr_elem = nr_elem;
+
+ user_data = build_user_data(buf_idx, _IOC_NR(op), nr_elem, q_id, 0);
+ io_uring_sqe_set_data64(sqe, user_data);
+
+ t->cmd_inflight += 1;
+
+ ublk_dbg(UBLK_DBG_IO_CMD, "%s: thread %u qid %d cmd_op %x data %lx "
+ "nr_elem %u elem_bytes %u buf_size %u buf_idx %d "
+ "cmd_inflight %u\n",
+ __func__, t->idx, q_id, op, user_data,
+ cmd->nr_elem, cmd->elem_bytes,
+ nr_elem * elem_bytes, buf_idx, t->cmd_inflight);
+}
+
+static void ublk_setup_commit_sqe(struct ublk_thread *t,
+ struct io_uring_sqe *sqe,
+ unsigned short buf_idx)
+{
+ struct ublk_batch_io *cmd;
+
+ cmd = (struct ublk_batch_io *)ublk_get_sqe_cmd(sqe);
+
+ /* Use plain user buffer instead of fixed buffer */
+ cmd->flags |= t->cmd_flags;
+}
+
+static void ublk_batch_queue_fetch(struct ublk_thread *t,
+ struct ublk_queue *q,
+ unsigned short buf_idx)
+{
+ unsigned short nr_elem = t->fetch[buf_idx].fetch_buf_size / 2;
+ struct io_uring_sqe *sqe;
+
+ io_uring_buf_ring_add(t->fetch[buf_idx].br, t->fetch[buf_idx].fetch_buf,
+ t->fetch[buf_idx].fetch_buf_size,
+ 0, 0, 0);
+ io_uring_buf_ring_advance(t->fetch[buf_idx].br, 1);
+
+ ublk_io_alloc_sqes(t, &sqe, 1);
+
+ ublk_init_batch_cmd(t, q->q_id, sqe, UBLK_U_IO_FETCH_IO_CMDS, 2, nr_elem,
+ buf_idx);
+
+ sqe->rw_flags= IORING_URING_CMD_MULTISHOT;
+ sqe->buf_group = buf_idx;
+ sqe->flags |= IOSQE_BUFFER_SELECT;
+
+ t->fetch[buf_idx].fetch_buf_off = 0;
+}
+
+void ublk_batch_start_fetch(struct ublk_thread *t)
+{
+ int i;
+ int j = 0;
+
+ for (i = 0; i < t->dev->dev_info.nr_hw_queues; i++) {
+ if (t->q_map[i]) {
+ struct ublk_queue *q = &t->dev->q[i];
+
+ /* submit two fetch commands for each queue */
+ ublk_batch_queue_fetch(t, q, j++);
+ ublk_batch_queue_fetch(t, q, j++);
+ }
+ }
+}
+
+static unsigned short ublk_compl_batch_fetch(struct ublk_thread *t,
+ struct ublk_queue *q,
+ const struct io_uring_cqe *cqe)
+{
+ unsigned short buf_idx = user_data_to_tag(cqe->user_data);
+ unsigned start = t->fetch[buf_idx].fetch_buf_off;
+ unsigned end = start + cqe->res;
+ void *buf = t->fetch[buf_idx].fetch_buf;
+ int i;
+
+ if (cqe->res < 0)
+ return buf_idx;
+
+ if ((end - start) / 2 > q->q_depth) {
+ ublk_err("%s: fetch duplicated ios offset %u count %u\n", __func__, start, cqe->res);
+
+ for (i = start; i < end; i += 2) {
+ unsigned short tag = *(unsigned short *)(buf + i);
+
+ ublk_err("%u ", tag);
+ }
+ ublk_err("\n");
+ }
+
+ for (i = start; i < end; i += 2) {
+ unsigned short tag = *(unsigned short *)(buf + i);
+
+ if (tag >= q->q_depth)
+ ublk_err("%s: bad tag %u\n", __func__, tag);
+
+ if (q->tgt_ops->queue_io)
+ q->tgt_ops->queue_io(t, q, tag);
+ }
+ t->fetch[buf_idx].fetch_buf_off = end;
+ return buf_idx;
+}
+
+static int __ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q)
+{
+ unsigned short nr_elem = q->q_depth;
+ unsigned short buf_idx = ublk_alloc_commit_buf(t);
+ struct io_uring_sqe *sqe;
+ void *buf;
+ int i;
+
+ ublk_assert(buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX);
+
+ ublk_io_alloc_sqes(t, &sqe, 1);
+
+ ublk_assert(nr_elem == q->q_depth);
+ buf = ublk_get_commit_buf(t, buf_idx);
+ for (i = 0; i < nr_elem; i++) {
+ struct ublk_batch_elem *elem = (struct ublk_batch_elem *)(
+ buf + i * t->commit_buf_elem_size);
+ struct ublk_io *io = &q->ios[i];
+
+ elem->tag = i;
+ elem->result = 0;
+
+ if (ublk_queue_use_auto_zc(q))
+ elem->buf_index = ublk_batch_io_buf_idx(t, q, i);
+ else if (!ublk_queue_no_buf(q))
+ elem->buf_addr = (__u64)io->buf_addr;
+ }
+
+ sqe->addr = (__u64)buf;
+ sqe->len = t->commit_buf_elem_size * nr_elem;
+
+ ublk_init_batch_cmd(t, q->q_id, sqe, UBLK_U_IO_PREP_IO_CMDS,
+ t->commit_buf_elem_size, nr_elem, buf_idx);
+ ublk_setup_commit_sqe(t, sqe, buf_idx);
+ return 0;
+}
+
+int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q)
+{
+ int ret = 0;
+
+ pthread_spin_lock(&q->lock);
+ if (q->flags & UBLKS_Q_PREPARED)
+ goto unlock;
+ ret = __ublk_batch_queue_prep_io_cmds(t, q);
+ if (!ret)
+ q->flags |= UBLKS_Q_PREPARED;
+unlock:
+ pthread_spin_unlock(&q->lock);
+
+ return ret;
+}
+
+static void ublk_batch_compl_commit_cmd(struct ublk_thread *t,
+ const struct io_uring_cqe *cqe,
+ unsigned op)
+{
+ unsigned short buf_idx = user_data_to_tag(cqe->user_data);
+
+ if (op == _IOC_NR(UBLK_U_IO_PREP_IO_CMDS))
+ ublk_assert(cqe->res == 0);
+ else if (op == _IOC_NR(UBLK_U_IO_COMMIT_IO_CMDS)) {
+ int nr_elem = user_data_to_tgt_data(cqe->user_data);
+
+ ublk_assert(cqe->res == t->commit_buf_elem_size * nr_elem);
+ } else
+ ublk_assert(0);
+
+ ublk_free_commit_buf(t, buf_idx);
+}
+
+void ublk_batch_compl_cmd(struct ublk_thread *t,
+ const struct io_uring_cqe *cqe)
+{
+ unsigned op = user_data_to_op(cqe->user_data);
+ struct ublk_queue *q;
+ unsigned buf_idx;
+ unsigned q_id;
+
+ if (op == _IOC_NR(UBLK_U_IO_PREP_IO_CMDS) ||
+ op == _IOC_NR(UBLK_U_IO_COMMIT_IO_CMDS)) {
+ t->cmd_inflight--;
+ ublk_batch_compl_commit_cmd(t, cqe, op);
+ return;
+ }
+
+ /* FETCH command is per queue */
+ q_id = user_data_to_q_id(cqe->user_data);
+ q = &t->dev->q[q_id];
+ buf_idx = ublk_compl_batch_fetch(t, q, cqe);
+
+ if (cqe->res < 0 && cqe->res != -ENOBUFS) {
+ t->cmd_inflight--;
+ t->state |= UBLKS_T_STOPPING;
+ } else if (!(cqe->flags & IORING_CQE_F_MORE) || cqe->res == -ENOBUFS) {
+ t->cmd_inflight--;
+ ublk_batch_queue_fetch(t, q, buf_idx);
+ }
+}
+
+static void __ublk_batch_commit_io_cmds(struct ublk_thread *t,
+ struct batch_commit_buf *cb)
+{
+ struct io_uring_sqe *sqe;
+ unsigned short buf_idx;
+ unsigned short nr_elem = cb->done;
+
+ /* nothing to commit */
+ if (!nr_elem) {
+ ublk_free_commit_buf(t, cb->buf_idx);
+ return;
+ }
+
+ ublk_io_alloc_sqes(t, &sqe, 1);
+ buf_idx = cb->buf_idx;
+ sqe->addr = (__u64)cb->elem;
+ sqe->len = nr_elem * t->commit_buf_elem_size;
+
+ /* commit isn't per-queue command */
+ ublk_init_batch_cmd(t, cb->q_id, sqe, UBLK_U_IO_COMMIT_IO_CMDS,
+ t->commit_buf_elem_size, nr_elem, buf_idx);
+ ublk_setup_commit_sqe(t, sqe, buf_idx);
+}
+
+void ublk_batch_commit_io_cmds(struct ublk_thread *t)
+{
+ int i;
+
+ for (i = 0; i < t->nr_queues; i++) {
+ struct batch_commit_buf *cb = &t->commit[i];
+
+ if (cb->buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX)
+ __ublk_batch_commit_io_cmds(t, cb);
+ }
+
+}
+
+static void __ublk_batch_init_commit(struct ublk_thread *t,
+ struct batch_commit_buf *cb,
+ unsigned short buf_idx)
+{
+ /* so far only support 1:1 queue/thread mapping */
+ cb->buf_idx = buf_idx;
+ cb->elem = ublk_get_commit_buf(t, buf_idx);
+ cb->done = 0;
+ cb->count = t->commit_buf_size /
+ t->commit_buf_elem_size;
+}
+
+/* COMMIT_IO_CMDS is per-queue command, so use its own commit buffer */
+static void ublk_batch_init_commit(struct ublk_thread *t,
+ struct batch_commit_buf *cb)
+{
+ unsigned short buf_idx = ublk_alloc_commit_buf(t);
+
+ ublk_assert(buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX);
+ ublk_assert(!ublk_batch_commit_prepared(cb));
+
+ __ublk_batch_init_commit(t, cb, buf_idx);
+}
+
+void ublk_batch_prep_commit(struct ublk_thread *t)
+{
+ int i;
+
+ for (i = 0; i < t->nr_queues; i++)
+ t->commit[i].buf_idx = UBLKS_T_COMMIT_BUF_INV_IDX;
+}
+
+void ublk_batch_complete_io(struct ublk_thread *t, struct ublk_queue *q,
+ unsigned tag, int res)
+{
+ unsigned q_t_idx = ublk_queue_idx_in_thread(t, q);
+ struct batch_commit_buf *cb = &t->commit[q_t_idx];
+ struct ublk_batch_elem *elem;
+ struct ublk_io *io = &q->ios[tag];
+
+ if (!ublk_batch_commit_prepared(cb))
+ ublk_batch_init_commit(t, cb);
+
+ ublk_assert(q->q_id == cb->q_id);
+
+ elem = (struct ublk_batch_elem *)(cb->elem + cb->done * t->commit_buf_elem_size);
+ elem->tag = tag;
+ elem->buf_index = ublk_batch_io_buf_idx(t, q, tag);
+ elem->result = res;
+
+ if (!ublk_queue_no_buf(q))
+ elem->buf_addr = (__u64) (uintptr_t) io->buf_addr;
+
+ cb->done += 1;
+ ublk_assert(cb->done <= cb->count);
+}
+
+void ublk_batch_setup_map(unsigned char (*q_thread_map)[UBLK_MAX_QUEUES],
+ int nthreads, int queues)
+{
+ int i, j;
+
+ /*
+ * Setup round-robin queue-to-thread mapping for arbitrary N:M combinations.
+ *
+ * This algorithm distributes queues across threads (and threads across queues)
+ * in a balanced round-robin fashion to ensure even load distribution.
+ *
+ * Examples:
+ * - 2 threads, 4 queues: T0=[Q0,Q2], T1=[Q1,Q3]
+ * - 4 threads, 2 queues: T0=[Q0], T1=[Q1], T2=[Q0], T3=[Q1]
+ * - 3 threads, 3 queues: T0=[Q0], T1=[Q1], T2=[Q2] (1:1 mapping)
+ *
+ * Phase 1: Mark which queues each thread handles (boolean mapping)
+ */
+ for (i = 0, j = 0; i < queues || j < nthreads; i++, j++) {
+ q_thread_map[j % nthreads][i % queues] = 1;
+ }
+
+ /*
+ * Phase 2: Convert boolean mapping to sequential indices within each thread.
+ *
+ * Transform from: q_thread_map[thread][queue] = 1 (handles queue)
+ * To: q_thread_map[thread][queue] = N (queue index within thread)
+ *
+ * This allows each thread to know the local index of each queue it handles,
+ * which is essential for buffer allocation and management. For example:
+ * - Thread 0 handling queues [0,2] becomes: q_thread_map[0][0]=1, q_thread_map[0][2]=2
+ * - Thread 1 handling queues [1,3] becomes: q_thread_map[1][1]=1, q_thread_map[1][3]=2
+ */
+ for (j = 0; j < nthreads; j++) {
+ unsigned char seq = 1;
+
+ for (i = 0; i < queues; i++) {
+ if (q_thread_map[j][i])
+ q_thread_map[j][i] = seq++;
+ }
+ }
+
+#if 0
+ for (j = 0; j < nthreads; j++) {
+ printf("thread %0d: ", j);
+ for (i = 0; i < queues; i++) {
+ if (q_thread_map[j][i])
+ printf("%03u ", i);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ for (j = 0; j < nthreads; j++) {
+ for (i = 0; i < queues; i++) {
+ printf("%03u ", q_thread_map[j][i]);
+ }
+ printf("\n");
+ }
+#endif
+}
diff --git a/tools/testing/selftests/ublk/common.c b/tools/testing/selftests/ublk/common.c
index 01580a6f8519..530f9877c9dd 100644
--- a/tools/testing/selftests/ublk/common.c
+++ b/tools/testing/selftests/ublk/common.c
@@ -12,11 +12,11 @@ void backing_file_tgt_deinit(struct ublk_dev *dev)
}
}
-int backing_file_tgt_init(struct ublk_dev *dev)
+int backing_file_tgt_init(struct ublk_dev *dev, unsigned int nr_direct)
{
int fd, i;
- assert(dev->nr_fds == 1);
+ ublk_assert(dev->nr_fds == 1);
for (i = 0; i < dev->tgt.nr_backing_files; i++) {
char *file = dev->tgt.backing_file[i];
@@ -25,7 +25,7 @@ int backing_file_tgt_init(struct ublk_dev *dev)
ublk_dbg(UBLK_DBG_DEV, "%s: file %d: %s\n", __func__, i, file);
- fd = open(file, O_RDWR | O_DIRECT);
+ fd = open(file, O_RDWR | (i < nr_direct ? O_DIRECT : 0));
if (fd < 0) {
ublk_err("%s: backing file %s can't be opened: %s\n",
__func__, file, strerror(errno));
diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c
index b227bd78b252..3b897f69c014 100644
--- a/tools/testing/selftests/ublk/fault_inject.c
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -33,6 +33,7 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
.dev_sectors = dev_size >> 9,
},
};
+ ublk_set_integrity_params(ctx, &dev->tgt.params);
dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000);
return 0;
diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c
index cd9fe69ecce2..228af2580ac6 100644
--- a/tools/testing/selftests/ublk/file_backed.c
+++ b/tools/testing/selftests/ublk/file_backed.c
@@ -10,7 +10,7 @@ static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int
return zc ? IORING_OP_READ_FIXED : IORING_OP_READ;
else if (ublk_op == UBLK_IO_OP_WRITE)
return zc ? IORING_OP_WRITE_FIXED : IORING_OP_WRITE;
- assert(0);
+ ublk_assert(0);
}
static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q,
@@ -34,8 +34,24 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
unsigned zc = ublk_queue_use_zc(q);
unsigned auto_zc = ublk_queue_use_auto_zc(q);
enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc);
+ struct ublk_io *io = ublk_get_io(q, tag);
+ __u64 offset = iod->start_sector << 9;
+ __u32 len = iod->nr_sectors << 9;
struct io_uring_sqe *sqe[3];
- void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr;
+ void *addr = io->buf_addr;
+ unsigned short buf_index = ublk_io_buf_idx(t, q, tag);
+
+ if (iod->op_flags & UBLK_IO_F_INTEGRITY) {
+ ublk_io_alloc_sqes(t, sqe, 1);
+ /* Use second backing file for integrity data */
+ io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 2),
+ io->integrity_buf,
+ ublk_integrity_len(q, len),
+ ublk_integrity_len(q, offset));
+ sqe[0]->flags = IOSQE_FIXED_FILE;
+ /* tgt_data = 1 indicates integrity I/O */
+ sqe[0]->user_data = build_user_data(tag, ublk_op, 1, q->q_id, 1);
+ }
if (!zc || auto_zc) {
ublk_io_alloc_sqes(t, sqe, 1);
@@ -44,34 +60,34 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/,
addr,
- iod->nr_sectors << 9,
- iod->start_sector << 9);
+ len,
+ offset);
if (auto_zc)
- sqe[0]->buf_index = tag;
+ sqe[0]->buf_index = buf_index;
io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
- return 1;
+ return !!(iod->op_flags & UBLK_IO_F_INTEGRITY) + 1;
}
ublk_io_alloc_sqes(t, sqe, 3);
- io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
+ io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_index);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
sqe[0]->user_data = build_user_data(tag,
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
io_uring_prep_rw(op, sqe[1], ublk_get_registered_fd(q, 1) /*fds[1]*/, 0,
- iod->nr_sectors << 9,
- iod->start_sector << 9);
- sqe[1]->buf_index = tag;
+ len,
+ offset);
+ sqe[1]->buf_index = buf_index;
sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
- io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
+ io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, buf_index);
sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
- return 2;
+ return !!(iod->op_flags & UBLK_IO_F_INTEGRITY) + 2;
}
static int loop_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, int tag)
@@ -118,12 +134,17 @@ static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q,
unsigned op = user_data_to_op(cqe->user_data);
struct ublk_io *io = ublk_get_io(q, tag);
- if (cqe->res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
- if (!io->result)
- io->result = cqe->res;
- if (cqe->res < 0)
- ublk_err("%s: io failed op %x user_data %lx\n",
- __func__, op, cqe->user_data);
+ if (cqe->res < 0) {
+ io->result = cqe->res;
+ ublk_err("%s: io failed op %x user_data %lx\n",
+ __func__, op, cqe->user_data);
+ } else if (op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
+ __s32 data_len = user_data_to_tgt_data(cqe->user_data)
+ ? ublk_integrity_data_len(q, cqe->res)
+ : cqe->res;
+
+ if (!io->result || data_len < io->result)
+ io->result = data_len;
}
/* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
@@ -134,9 +155,30 @@ static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q,
ublk_complete_io(t, q, tag, io->result);
}
+static int ublk_loop_memset_file(int fd, __u8 byte, size_t len)
+{
+ off_t offset = 0;
+ __u8 buf[4096];
+
+ memset(buf, byte, sizeof(buf));
+ while (len) {
+ int ret = pwrite(fd, buf, min(len, sizeof(buf)), offset);
+
+ if (ret < 0)
+ return -errno;
+ if (!ret)
+ return -EIO;
+
+ len -= ret;
+ offset += ret;
+ }
+ return 0;
+}
+
static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
{
unsigned long long bytes;
+ unsigned long blocks;
int ret;
struct ublk_params p = {
.types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN,
@@ -153,19 +195,39 @@ static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
},
};
+ ublk_set_integrity_params(ctx, &p);
if (ctx->auto_zc_fallback) {
ublk_err("%s: not support auto_zc_fallback\n", __func__);
return -EINVAL;
}
- ret = backing_file_tgt_init(dev);
+ /* Use O_DIRECT only for data file */
+ ret = backing_file_tgt_init(dev, 1);
if (ret)
return ret;
- if (dev->tgt.nr_backing_files != 1)
+ /* Expect a second file for integrity data */
+ if (dev->tgt.nr_backing_files != 1 + !!ctx->metadata_size)
return -EINVAL;
- bytes = dev->tgt.backing_file_size[0];
+ blocks = dev->tgt.backing_file_size[0] >> p.basic.logical_bs_shift;
+ if (ctx->metadata_size) {
+ unsigned long metadata_blocks =
+ dev->tgt.backing_file_size[1] / ctx->metadata_size;
+ unsigned long integrity_len;
+
+ /* Ensure both data and integrity data fit in backing files */
+ blocks = min(blocks, metadata_blocks);
+ integrity_len = blocks * ctx->metadata_size;
+ /*
+ * Initialize PI app tag and ref tag to 0xFF
+ * to disable bio-integrity-auto checks
+ */
+ ret = ublk_loop_memset_file(dev->fds[2], 0xFF, integrity_len);
+ if (ret)
+ return ret;
+ }
+ bytes = blocks << p.basic.logical_bs_shift;
dev->tgt.dev_size = bytes;
p.basic.dev_sectors = bytes >> 9;
dev->tgt.params = p;
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index f8fa102a627f..e1c3b3c55e56 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -3,6 +3,7 @@
* Description: uring_cmd based ublk
*/
+#include <linux/fs.h>
#include "kublk.h"
#define MAX_NR_TGT_ARG 64
@@ -107,6 +108,15 @@ static int ublk_ctrl_stop_dev(struct ublk_dev *dev)
return __ublk_ctrl_cmd(dev, &data);
}
+static int ublk_ctrl_try_stop_dev(struct ublk_dev *dev)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_TRY_STOP_DEV,
+ };
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
static int ublk_ctrl_start_dev(struct ublk_dev *dev,
int daemon_pid)
{
@@ -415,14 +425,18 @@ static void ublk_queue_deinit(struct ublk_queue *q)
if (q->io_cmd_buf)
munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
- for (i = 0; i < nr_ios; i++)
+ for (i = 0; i < nr_ios; i++) {
free(q->ios[i].buf_addr);
+ free(q->ios[i].integrity_buf);
+ }
}
static void ublk_thread_deinit(struct ublk_thread *t)
{
io_uring_unregister_buffers(&t->ring);
+ ublk_batch_free_buf(t);
+
io_uring_unregister_ring_fd(&t->ring);
if (t->ring.ring_fd > 0) {
@@ -432,19 +446,22 @@ static void ublk_thread_deinit(struct ublk_thread *t)
}
}
-static int ublk_queue_init(struct ublk_queue *q, unsigned long long extra_flags)
+static int ublk_queue_init(struct ublk_queue *q, unsigned long long extra_flags,
+ __u8 metadata_size)
{
struct ublk_dev *dev = q->dev;
int depth = dev->dev_info.queue_depth;
int i;
- int cmd_buf_size, io_buf_size;
+ int cmd_buf_size, io_buf_size, integrity_size;
unsigned long off;
+ pthread_spin_init(&q->lock, PTHREAD_PROCESS_PRIVATE);
q->tgt_ops = dev->tgt.ops;
q->flags = 0;
q->q_depth = depth;
q->flags = dev->dev_info.flags;
q->flags |= extra_flags;
+ q->metadata_size = metadata_size;
/* Cache fd in queue for fast path access */
q->ublk_fd = dev->fds[0];
@@ -460,11 +477,23 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned long long extra_flags)
}
io_buf_size = dev->dev_info.max_io_buf_bytes;
+ integrity_size = ublk_integrity_len(q, io_buf_size);
for (i = 0; i < q->q_depth; i++) {
q->ios[i].buf_addr = NULL;
q->ios[i].flags = UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_FREE;
q->ios[i].tag = i;
+ if (integrity_size) {
+ q->ios[i].integrity_buf = malloc(integrity_size);
+ if (!q->ios[i].integrity_buf) {
+ ublk_err("ublk dev %d queue %d io %d malloc(%d) failed: %m\n",
+ dev->dev_info.dev_id, q->q_id, i,
+ integrity_size);
+ goto fail;
+ }
+ }
+
+
if (ublk_queue_no_buf(q))
continue;
@@ -491,6 +520,10 @@ static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flag
int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth;
int ret;
+ /* FETCH_IO_CMDS is multishot, so increase cq depth for BATCH_IO */
+ if (ublk_dev_batch_io(dev))
+ cq_depth += dev->dev_info.queue_depth * 2;
+
ret = ublk_setup_ring(&t->ring, ring_depth, cq_depth,
IORING_SETUP_COOP_TASKRUN |
IORING_SETUP_SINGLE_ISSUER |
@@ -505,15 +538,33 @@ static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flag
unsigned nr_ios = dev->dev_info.queue_depth * dev->dev_info.nr_hw_queues;
unsigned max_nr_ios_per_thread = nr_ios / dev->nthreads;
max_nr_ios_per_thread += !!(nr_ios % dev->nthreads);
- ret = io_uring_register_buffers_sparse(
- &t->ring, max_nr_ios_per_thread);
+
+ t->nr_bufs = max_nr_ios_per_thread;
+ } else {
+ t->nr_bufs = 0;
+ }
+
+ if (ublk_dev_batch_io(dev))
+ ublk_batch_prepare(t);
+
+ if (t->nr_bufs) {
+ ret = io_uring_register_buffers_sparse(&t->ring, t->nr_bufs);
if (ret) {
- ublk_err("ublk dev %d thread %d register spare buffers failed %d",
+ ublk_err("ublk dev %d thread %d register spare buffers failed %d\n",
dev->dev_info.dev_id, t->idx, ret);
goto fail;
}
}
+ if (ublk_dev_batch_io(dev)) {
+ ret = ublk_batch_alloc_buf(t);
+ if (ret) {
+ ublk_err("ublk dev %d thread %d alloc batch buf failed %d\n",
+ dev->dev_info.dev_id, t->idx, ret);
+ goto fail;
+ }
+ }
+
io_uring_register_ring_fd(&t->ring);
if (flags & UBLKS_Q_NO_UBLK_FIXED_FD) {
@@ -579,16 +630,17 @@ static void ublk_dev_unprep(struct ublk_dev *dev)
close(dev->fds[0]);
}
-static void ublk_set_auto_buf_reg(const struct ublk_queue *q,
+static void ublk_set_auto_buf_reg(const struct ublk_thread *t,
+ const struct ublk_queue *q,
struct io_uring_sqe *sqe,
unsigned short tag)
{
struct ublk_auto_buf_reg buf = {};
if (q->tgt_ops->buf_index)
- buf.index = q->tgt_ops->buf_index(q, tag);
+ buf.index = q->tgt_ops->buf_index(t, q, tag);
else
- buf.index = q->ios[tag].buf_index;
+ buf.index = ublk_io_buf_idx(t, q, tag);
if (ublk_queue_auto_zc_fallback(q))
buf.flags = UBLK_AUTO_BUF_REG_FALLBACK;
@@ -596,6 +648,52 @@ static void ublk_set_auto_buf_reg(const struct ublk_queue *q,
sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf);
}
+/* Copy in pieces to test the buffer offset logic */
+#define UBLK_USER_COPY_LEN 2048
+
+static void ublk_user_copy(const struct ublk_io *io, __u8 match_ublk_op)
+{
+ const struct ublk_queue *q = ublk_io_to_queue(io);
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, io->tag);
+ __u64 off = ublk_user_copy_offset(q->q_id, io->tag);
+ __u8 ublk_op = ublksrv_get_op(iod);
+ __u32 len = iod->nr_sectors << 9;
+ void *addr = io->buf_addr;
+ ssize_t copied;
+
+ if (ublk_op != match_ublk_op)
+ return;
+
+ while (len) {
+ __u32 copy_len = min(len, UBLK_USER_COPY_LEN);
+
+ if (ublk_op == UBLK_IO_OP_WRITE)
+ copied = pread(q->ublk_fd, addr, copy_len, off);
+ else if (ublk_op == UBLK_IO_OP_READ)
+ copied = pwrite(q->ublk_fd, addr, copy_len, off);
+ else
+ assert(0);
+ assert(copied == (ssize_t)copy_len);
+ addr += copy_len;
+ off += copy_len;
+ len -= copy_len;
+ }
+
+ if (!(iod->op_flags & UBLK_IO_F_INTEGRITY))
+ return;
+
+ len = ublk_integrity_len(q, iod->nr_sectors << 9);
+ off = ublk_user_copy_offset(q->q_id, io->tag);
+ off |= UBLKSRV_IO_INTEGRITY_FLAG;
+ if (ublk_op == UBLK_IO_OP_WRITE)
+ copied = pread(q->ublk_fd, io->integrity_buf, len, off);
+ else if (ublk_op == UBLK_IO_OP_READ)
+ copied = pwrite(q->ublk_fd, io->integrity_buf, len, off);
+ else
+ assert(0);
+ assert(copied == (ssize_t)len);
+}
+
int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io)
{
struct ublk_queue *q = ublk_io_to_queue(io);
@@ -618,9 +716,12 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io)
if (io->flags & UBLKS_IO_NEED_GET_DATA)
cmd_op = UBLK_U_IO_NEED_GET_DATA;
- else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP)
+ else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP) {
+ if (ublk_queue_use_user_copy(q))
+ ublk_user_copy(io, UBLK_IO_OP_READ);
+
cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
- else if (io->flags & UBLKS_IO_NEED_FETCH_RQ)
+ } else if (io->flags & UBLKS_IO_NEED_FETCH_RQ)
cmd_op = UBLK_U_IO_FETCH_REQ;
if (io_uring_sq_space_left(&t->ring) < 1)
@@ -649,13 +750,13 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io)
sqe[0]->rw_flags = 0;
cmd->tag = io->tag;
cmd->q_id = q->q_id;
- if (!ublk_queue_no_buf(q))
+ if (!ublk_queue_no_buf(q) && !ublk_queue_use_user_copy(q))
cmd->addr = (__u64) (uintptr_t) io->buf_addr;
else
cmd->addr = 0;
if (ublk_queue_use_auto_zc(q))
- ublk_set_auto_buf_reg(q, sqe[0], io->tag);
+ ublk_set_auto_buf_reg(t, q, sqe[0], io->tag);
user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0);
io_uring_sqe_set_data64(sqe[0], user_data);
@@ -718,7 +819,7 @@ static int ublk_thread_is_idle(struct ublk_thread *t)
static int ublk_thread_is_done(struct ublk_thread *t)
{
- return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t);
+ return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t) && !t->cmd_inflight;
}
static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t,
@@ -744,13 +845,19 @@ static void ublk_handle_uring_cmd(struct ublk_thread *t,
unsigned tag = user_data_to_tag(cqe->user_data);
struct ublk_io *io = &q->ios[tag];
+ t->cmd_inflight--;
+
if (!fetch) {
t->state |= UBLKS_T_STOPPING;
io->flags &= ~UBLKS_IO_NEED_FETCH_RQ;
}
if (cqe->res == UBLK_IO_RES_OK) {
- assert(tag < q->q_depth);
+ ublk_assert(tag < q->q_depth);
+
+ if (ublk_queue_use_user_copy(q))
+ ublk_user_copy(io, UBLK_IO_OP_WRITE);
+
if (q->tgt_ops->queue_io)
q->tgt_ops->queue_io(t, q, tag);
} else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
@@ -774,28 +881,30 @@ static void ublk_handle_cqe(struct ublk_thread *t,
{
struct ublk_dev *dev = t->dev;
unsigned q_id = user_data_to_q_id(cqe->user_data);
- struct ublk_queue *q = &dev->q[q_id];
unsigned cmd_op = user_data_to_op(cqe->user_data);
- if (cqe->res < 0 && cqe->res != -ENODEV)
- ublk_err("%s: res %d userdata %llx queue state %x\n", __func__,
- cqe->res, cqe->user_data, q->flags);
+ if (cqe->res < 0 && cqe->res != -ENODEV && cqe->res != -ENOBUFS)
+ ublk_err("%s: res %d userdata %llx thread state %x\n", __func__,
+ cqe->res, cqe->user_data, t->state);
- ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n",
- __func__, cqe->res, q->q_id, user_data_to_tag(cqe->user_data),
- cmd_op, is_target_io(cqe->user_data),
+ ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (thread %d qid %d tag %u cmd_op %x "
+ "data %lx target %d/%d) stopping %d\n",
+ __func__, cqe->res, t->idx, q_id,
+ user_data_to_tag(cqe->user_data),
+ cmd_op, cqe->user_data, is_target_io(cqe->user_data),
user_data_to_tgt_data(cqe->user_data),
(t->state & UBLKS_T_STOPPING));
/* Don't retrieve io in case of target io */
if (is_target_io(cqe->user_data)) {
- ublksrv_handle_tgt_cqe(t, q, cqe);
+ ublksrv_handle_tgt_cqe(t, &dev->q[q_id], cqe);
return;
}
- t->cmd_inflight--;
-
- ublk_handle_uring_cmd(t, q, cqe);
+ if (ublk_thread_batch_io(t))
+ ublk_batch_compl_cmd(t, cqe);
+ else
+ ublk_handle_uring_cmd(t, &dev->q[q_id], cqe);
}
static int ublk_reap_events_uring(struct ublk_thread *t)
@@ -827,7 +936,13 @@ static int ublk_process_io(struct ublk_thread *t)
return -ENODEV;
ret = io_uring_submit_and_wait(&t->ring, 1);
- reapped = ublk_reap_events_uring(t);
+ if (ublk_thread_batch_io(t)) {
+ ublk_batch_prep_commit(t);
+ reapped = ublk_reap_events_uring(t);
+ ublk_batch_commit_io_cmds(t);
+ } else {
+ reapped = ublk_reap_events_uring(t);
+ }
ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n",
ret, reapped, (t->state & UBLKS_T_STOPPING),
@@ -843,6 +958,7 @@ struct ublk_thread_info {
sem_t *ready;
cpu_set_t *affinity;
unsigned long long extra_flags;
+ unsigned char (*q_thread_map)[UBLK_MAX_QUEUES];
};
static void ublk_thread_set_sched_affinity(const struct ublk_thread_info *info)
@@ -852,6 +968,26 @@ static void ublk_thread_set_sched_affinity(const struct ublk_thread_info *info)
info->dev->dev_info.dev_id, info->idx);
}
+static void ublk_batch_setup_queues(struct ublk_thread *t)
+{
+ int i;
+
+ for (i = 0; i < t->dev->dev_info.nr_hw_queues; i++) {
+ struct ublk_queue *q = &t->dev->q[i];
+ int ret;
+
+ /*
+ * Only prepare io commands in the mapped thread context,
+ * otherwise io command buffer index may not work as expected
+ */
+ if (t->q_map[i] == 0)
+ continue;
+
+ ret = ublk_batch_queue_prep_io_cmds(t, q);
+ ublk_assert(ret >= 0);
+ }
+}
+
static __attribute__((noinline)) int __ublk_io_handler_fn(struct ublk_thread_info *info)
{
struct ublk_thread t = {
@@ -861,6 +997,10 @@ static __attribute__((noinline)) int __ublk_io_handler_fn(struct ublk_thread_inf
int dev_id = info->dev->dev_info.dev_id;
int ret;
+ /* Copy per-thread queue mapping into thread-local variable */
+ if (info->q_thread_map)
+ memcpy(t.q_map, info->q_thread_map[info->idx], sizeof(t.q_map));
+
ret = ublk_thread_init(&t, info->extra_flags);
if (ret) {
ublk_err("ublk dev %d thread %u init failed\n",
@@ -872,8 +1012,14 @@ static __attribute__((noinline)) int __ublk_io_handler_fn(struct ublk_thread_inf
ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n",
gettid(), dev_id, t.idx);
- /* submit all io commands to ublk driver */
- ublk_submit_fetch_commands(&t);
+ if (!ublk_thread_batch_io(&t)) {
+ /* submit all io commands to ublk driver */
+ ublk_submit_fetch_commands(&t);
+ } else {
+ ublk_batch_setup_queues(&t);
+ ublk_batch_start_fetch(&t);
+ }
+
do {
if (ublk_process_io(&t) < 0)
break;
@@ -945,6 +1091,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
struct ublk_thread_info *tinfo;
unsigned long long extra_flags = 0;
cpu_set_t *affinity_buf;
+ unsigned char (*q_thread_map)[UBLK_MAX_QUEUES] = NULL;
void *thread_ret;
sem_t ready;
int ret, i;
@@ -964,6 +1111,16 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
if (ret)
return ret;
+ if (ublk_dev_batch_io(dev)) {
+ q_thread_map = calloc(dev->nthreads, sizeof(*q_thread_map));
+ if (!q_thread_map) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ ublk_batch_setup_map(q_thread_map, dev->nthreads,
+ dinfo->nr_hw_queues);
+ }
+
if (ctx->auto_zc_fallback)
extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK;
if (ctx->no_ublk_fixed_fd)
@@ -973,7 +1130,8 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
dev->q[i].dev = dev;
dev->q[i].q_id = i;
- ret = ublk_queue_init(&dev->q[i], extra_flags);
+ ret = ublk_queue_init(&dev->q[i], extra_flags,
+ ctx->metadata_size);
if (ret) {
ublk_err("ublk dev %d queue %d init queue failed\n",
dinfo->dev_id, i);
@@ -986,6 +1144,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
tinfo[i].idx = i;
tinfo[i].ready = &ready;
tinfo[i].extra_flags = extra_flags;
+ tinfo[i].q_thread_map = q_thread_map;
/*
* If threads are not tied 1:1 to queues, setting thread
@@ -1005,6 +1164,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
for (i = 0; i < dev->nthreads; i++)
sem_wait(&ready);
free(affinity_buf);
+ free(q_thread_map);
/* everything is fine now, start us */
if (ctx->recovery)
@@ -1015,7 +1175,9 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
}
if (ret < 0) {
ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
- goto fail;
+ /* stop device so that inflight uring_cmd can be cancelled */
+ ublk_ctrl_stop_dev(dev);
+ goto fail_start;
}
ublk_ctrl_get_info(dev);
@@ -1023,7 +1185,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
ublk_ctrl_dump(dev);
else
ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id);
-
+fail_start:
/* wait until we are terminated */
for (i = 0; i < dev->nthreads; i++)
pthread_join(tinfo[i].thread, &thread_ret);
@@ -1173,7 +1335,8 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
goto fail;
}
- if (nthreads != nr_queues && !ctx->per_io_tasks) {
+ if (nthreads != nr_queues && (!ctx->per_io_tasks &&
+ !(ctx->flags & UBLK_F_BATCH_IO))) {
ublk_err("%s: threads %u must be same as queues %u if "
"not using per_io_tasks\n",
__func__, nthreads, nr_queues);
@@ -1233,7 +1396,7 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
}
ret = ublk_start_daemon(ctx, dev);
- ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret);
+ ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\n", __func__, ret);
if (ret < 0)
ublk_ctrl_del_dev(dev);
@@ -1353,6 +1516,42 @@ static int cmd_dev_del(struct dev_ctx *ctx)
return 0;
}
+static int cmd_dev_stop(struct dev_ctx *ctx)
+{
+ int number = ctx->dev_id;
+ struct ublk_dev *dev;
+ int ret;
+
+ if (number < 0) {
+ ublk_err("%s: device id is required\n", __func__);
+ return -EINVAL;
+ }
+
+ dev = ublk_ctrl_init();
+ dev->dev_info.dev_id = number;
+
+ ret = ublk_ctrl_get_info(dev);
+ if (ret < 0)
+ goto fail;
+
+ if (ctx->safe_stop) {
+ ret = ublk_ctrl_try_stop_dev(dev);
+ if (ret < 0)
+ ublk_err("%s: try_stop dev %d failed ret %d\n",
+ __func__, number, ret);
+ } else {
+ ret = ublk_ctrl_stop_dev(dev);
+ if (ret < 0)
+ ublk_err("%s: stop dev %d failed ret %d\n",
+ __func__, number, ret);
+ }
+
+fail:
+ ublk_ctrl_deinit(dev);
+
+ return ret;
+}
+
static int __cmd_dev_list(struct dev_ctx *ctx)
{
struct ublk_dev *dev = ublk_ctrl_init();
@@ -1415,6 +1614,10 @@ static int cmd_dev_get_features(void)
FEAT_NAME(UBLK_F_QUIESCE),
FEAT_NAME(UBLK_F_PER_IO_DAEMON),
FEAT_NAME(UBLK_F_BUF_REG_OFF_DAEMON),
+ FEAT_NAME(UBLK_F_INTEGRITY),
+ FEAT_NAME(UBLK_F_SAFE_STOP_DEV),
+ FEAT_NAME(UBLK_F_BATCH_IO),
+ FEAT_NAME(UBLK_F_NO_AUTO_PART_SCAN),
};
struct ublk_dev *dev;
__u64 features = 0;
@@ -1507,9 +1710,12 @@ static void __cmd_create_help(char *exe, bool recovery)
printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
exe, recovery ? "recover" : "add");
- printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n");
+ printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1] [-g] [-u]\n");
printf("\t[-e 0|1 ] [-i 0|1] [--no_ublk_fixed_fd]\n");
printf("\t[--nthreads threads] [--per_io_tasks]\n");
+ printf("\t[--integrity_capable] [--integrity_reftag] [--metadata_size SIZE] "
+ "[--pi_offset OFFSET] [--csum_type ip|t10dif|nvme] [--tag_size SIZE]\n");
+ printf("\t[--batch|-b] [--no_auto_part_scan]\n");
printf("\t[target options] [backfile1] [backfile2] ...\n");
printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
printf("\tdefault: nthreads=nr_queues");
@@ -1542,6 +1748,8 @@ static int cmd_dev_help(char *exe)
printf("%s del [-n dev_id] -a \n", exe);
printf("\t -a delete all devices -n delete specified device\n\n");
+ printf("%s stop -n dev_id [--safe]\n", exe);
+ printf("\t --safe only stop if device has no active openers\n\n");
printf("%s list [-n dev_id] -a \n", exe);
printf("\t -a list all devices, -n list specified device, default -a \n\n");
printf("%s features\n", exe);
@@ -1568,20 +1776,32 @@ int main(int argc, char *argv[])
{ "get_data", 1, NULL, 'g'},
{ "auto_zc", 0, NULL, 0 },
{ "auto_zc_fallback", 0, NULL, 0 },
+ { "user_copy", 0, NULL, 'u'},
{ "size", 1, NULL, 's'},
{ "nthreads", 1, NULL, 0 },
{ "per_io_tasks", 0, NULL, 0 },
{ "no_ublk_fixed_fd", 0, NULL, 0 },
+ { "integrity_capable", 0, NULL, 0 },
+ { "integrity_reftag", 0, NULL, 0 },
+ { "metadata_size", 1, NULL, 0 },
+ { "pi_offset", 1, NULL, 0 },
+ { "csum_type", 1, NULL, 0 },
+ { "tag_size", 1, NULL, 0 },
+ { "safe", 0, NULL, 0 },
+ { "batch", 0, NULL, 'b'},
+ { "no_auto_part_scan", 0, NULL, 0 },
{ 0, 0, 0, 0 }
};
const struct ublk_tgt_ops *ops = NULL;
int option_idx, opt;
const char *cmd = argv[1];
struct dev_ctx ctx = {
+ ._evtfd = -1,
.queue_depth = 128,
.nr_hw_queues = 2,
.dev_id = -1,
.tgt_type = "unknown",
+ .csum_type = LBMD_PI_CSUM_NONE,
};
int ret = -EINVAL, i;
int tgt_argc = 1;
@@ -1593,12 +1813,15 @@ int main(int argc, char *argv[])
opterr = 0;
optind = 2;
- while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gaz",
+ while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gazub",
longopts, &option_idx)) != -1) {
switch (opt) {
case 'a':
ctx.all = 1;
break;
+ case 'b':
+ ctx.flags |= UBLK_F_BATCH_IO;
+ break;
case 'n':
ctx.dev_id = strtol(optarg, NULL, 10);
break;
@@ -1613,7 +1836,7 @@ int main(int argc, char *argv[])
ctx.queue_depth = strtol(optarg, NULL, 10);
break;
case 'z':
- ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
+ ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY;
break;
case 'r':
value = strtol(optarg, NULL, 10);
@@ -1633,6 +1856,9 @@ int main(int argc, char *argv[])
case 'g':
ctx.flags |= UBLK_F_NEED_GET_DATA;
break;
+ case 'u':
+ ctx.flags |= UBLK_F_USER_COPY;
+ break;
case 's':
ctx.size = strtoull(optarg, NULL, 10);
break;
@@ -1653,6 +1879,32 @@ int main(int argc, char *argv[])
ctx.per_io_tasks = 1;
if (!strcmp(longopts[option_idx].name, "no_ublk_fixed_fd"))
ctx.no_ublk_fixed_fd = 1;
+ if (!strcmp(longopts[option_idx].name, "integrity_capable"))
+ ctx.integrity_flags |= LBMD_PI_CAP_INTEGRITY;
+ if (!strcmp(longopts[option_idx].name, "integrity_reftag"))
+ ctx.integrity_flags |= LBMD_PI_CAP_REFTAG;
+ if (!strcmp(longopts[option_idx].name, "metadata_size"))
+ ctx.metadata_size = strtoul(optarg, NULL, 0);
+ if (!strcmp(longopts[option_idx].name, "pi_offset"))
+ ctx.pi_offset = strtoul(optarg, NULL, 0);
+ if (!strcmp(longopts[option_idx].name, "csum_type")) {
+ if (!strcmp(optarg, "ip")) {
+ ctx.csum_type = LBMD_PI_CSUM_IP;
+ } else if (!strcmp(optarg, "t10dif")) {
+ ctx.csum_type = LBMD_PI_CSUM_CRC16_T10DIF;
+ } else if (!strcmp(optarg, "nvme")) {
+ ctx.csum_type = LBMD_PI_CSUM_CRC64_NVME;
+ } else {
+ ublk_err("invalid csum_type: %s\n", optarg);
+ return -EINVAL;
+ }
+ }
+ if (!strcmp(longopts[option_idx].name, "tag_size"))
+ ctx.tag_size = strtoul(optarg, NULL, 0);
+ if (!strcmp(longopts[option_idx].name, "safe"))
+ ctx.safe_stop = 1;
+ if (!strcmp(longopts[option_idx].name, "no_auto_part_scan"))
+ ctx.flags |= UBLK_F_NO_AUTO_PART_SCAN;
break;
case '?':
/*
@@ -1676,6 +1928,11 @@ int main(int argc, char *argv[])
}
}
+ if (ctx.per_io_tasks && (ctx.flags & UBLK_F_BATCH_IO)) {
+ ublk_err("per_io_task and F_BATCH_IO conflict\n");
+ return -EINVAL;
+ }
+
/* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */
if (ctx.auto_zc_fallback &&
!((ctx.flags & UBLK_F_AUTO_BUF_REG) &&
@@ -1686,6 +1943,37 @@ int main(int argc, char *argv[])
return -EINVAL;
}
+ if (!!(ctx.flags & UBLK_F_NEED_GET_DATA) +
+ !!(ctx.flags & UBLK_F_USER_COPY) +
+ (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY && !ctx.auto_zc_fallback) +
+ (ctx.flags & UBLK_F_AUTO_BUF_REG && !ctx.auto_zc_fallback) +
+ ctx.auto_zc_fallback > 1) {
+ fprintf(stderr, "too many data copy modes specified\n");
+ return -EINVAL;
+ }
+
+ if (ctx.metadata_size) {
+ if (!(ctx.flags & UBLK_F_USER_COPY)) {
+ ublk_err("integrity requires user_copy\n");
+ return -EINVAL;
+ }
+
+ ctx.flags |= UBLK_F_INTEGRITY;
+ } else if (ctx.integrity_flags ||
+ ctx.pi_offset ||
+ ctx.csum_type != LBMD_PI_CSUM_NONE ||
+ ctx.tag_size) {
+ ublk_err("integrity parameters require metadata_size\n");
+ return -EINVAL;
+ }
+
+ if ((ctx.flags & UBLK_F_AUTO_BUF_REG) &&
+ (ctx.flags & UBLK_F_BATCH_IO) &&
+ (ctx.nthreads > ctx.nr_hw_queues)) {
+ ublk_err("too many threads for F_AUTO_BUF_REG & F_BATCH_IO\n");
+ return -EINVAL;
+ }
+
i = optind;
while (i < argc && ctx.nr_files < MAX_BACK_FILES) {
ctx.files[ctx.nr_files++] = argv[i++];
@@ -1711,6 +1999,8 @@ int main(int argc, char *argv[])
}
} else if (!strcmp(cmd, "del"))
ret = cmd_dev_del(&ctx);
+ else if (!strcmp(cmd, "stop"))
+ ret = cmd_dev_stop(&ctx);
else if (!strcmp(cmd, "list")) {
ctx.all = 1;
ret = cmd_dev_list(&ctx);
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index fe42705c6d42..02f0c55d006b 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -78,6 +78,13 @@ struct dev_ctx {
unsigned int auto_zc_fallback:1;
unsigned int per_io_tasks:1;
unsigned int no_ublk_fixed_fd:1;
+ unsigned int safe_stop:1;
+ unsigned int no_auto_part_scan:1;
+ __u32 integrity_flags;
+ __u8 metadata_size;
+ __u8 pi_offset;
+ __u8 csum_type;
+ __u8 tag_size;
int _evtfd;
int _shmid;
@@ -107,6 +114,7 @@ struct ublk_ctrl_cmd_data {
struct ublk_io {
char *buf_addr;
+ void *integrity_buf;
#define UBLKS_IO_NEED_FETCH_RQ (1UL << 0)
#define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1)
@@ -143,7 +151,8 @@ struct ublk_tgt_ops {
void (*usage)(const struct ublk_tgt_ops *ops);
/* return buffer index for UBLK_F_AUTO_BUF_REG */
- unsigned short (*buf_index)(const struct ublk_queue *, int tag);
+ unsigned short (*buf_index)(const struct ublk_thread *t,
+ const struct ublk_queue *, int tag);
};
struct ublk_tgt {
@@ -165,23 +174,76 @@ struct ublk_queue {
const struct ublk_tgt_ops *tgt_ops;
struct ublksrv_io_desc *io_cmd_buf;
-/* borrow one bit of ublk uapi flags, which may never be used */
+/* borrow three bit of ublk uapi flags, which may never be used */
#define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63)
#define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62)
+#define UBLKS_Q_PREPARED (1ULL << 61)
__u64 flags;
int ublk_fd; /* cached ublk char device fd */
+ __u8 metadata_size;
struct ublk_io ios[UBLK_QUEUE_DEPTH];
+
+ /* used for prep io commands */
+ pthread_spinlock_t lock;
+};
+
+/* align with `ublk_elem_header` */
+struct ublk_batch_elem {
+ __u16 tag;
+ __u16 buf_index;
+ __s32 result;
+ __u64 buf_addr;
+};
+
+struct batch_commit_buf {
+ unsigned short q_id;
+ unsigned short buf_idx;
+ void *elem;
+ unsigned short done;
+ unsigned short count;
+};
+
+struct batch_fetch_buf {
+ struct io_uring_buf_ring *br;
+ void *fetch_buf;
+ unsigned int fetch_buf_size;
+ unsigned int fetch_buf_off;
};
struct ublk_thread {
+ /* Thread-local copy of queue-to-thread mapping for this thread */
+ unsigned char q_map[UBLK_MAX_QUEUES];
+
struct ublk_dev *dev;
- unsigned idx;
+ unsigned short idx;
+ unsigned short nr_queues;
#define UBLKS_T_STOPPING (1U << 0)
#define UBLKS_T_IDLE (1U << 1)
+#define UBLKS_T_BATCH_IO (1U << 31) /* readonly */
unsigned state;
unsigned int cmd_inflight;
unsigned int io_inflight;
+
+ unsigned short nr_bufs;
+
+ /* followings are for BATCH_IO */
+ unsigned short commit_buf_start;
+ unsigned char commit_buf_elem_size;
+ /*
+ * We just support single device, so pre-calculate commit/prep flags
+ */
+ unsigned short cmd_flags;
+ unsigned int nr_commit_buf;
+ unsigned int commit_buf_size;
+ void *commit_buf;
+#define UBLKS_T_COMMIT_BUF_INV_IDX ((unsigned short)-1)
+ struct allocator commit_buf_alloc;
+ struct batch_commit_buf *commit;
+ /* FETCH_IO_CMDS buffer */
+ unsigned short nr_fetch_bufs;
+ struct batch_fetch_buf *fetch;
+
struct io_uring ring;
};
@@ -202,12 +264,67 @@ struct ublk_dev {
extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io);
+static inline int __ublk_use_batch_io(__u64 flags)
+{
+ return flags & UBLK_F_BATCH_IO;
+}
+
+static inline int ublk_queue_batch_io(const struct ublk_queue *q)
+{
+ return __ublk_use_batch_io(q->flags);
+}
+
+static inline int ublk_dev_batch_io(const struct ublk_dev *dev)
+{
+ return __ublk_use_batch_io(dev->dev_info.flags);
+}
+
+/* only work for handle single device in this pthread context */
+static inline int ublk_thread_batch_io(const struct ublk_thread *t)
+{
+ return t->state & UBLKS_T_BATCH_IO;
+}
+
+static inline void ublk_set_integrity_params(const struct dev_ctx *ctx,
+ struct ublk_params *params)
+{
+ if (!ctx->metadata_size)
+ return;
+
+ params->types |= UBLK_PARAM_TYPE_INTEGRITY;
+ params->integrity = (struct ublk_param_integrity) {
+ .flags = ctx->integrity_flags,
+ .interval_exp = params->basic.logical_bs_shift,
+ .metadata_size = ctx->metadata_size,
+ .pi_offset = ctx->pi_offset,
+ .csum_type = ctx->csum_type,
+ .tag_size = ctx->tag_size,
+ };
+}
+
+static inline size_t ublk_integrity_len(const struct ublk_queue *q, size_t len)
+{
+ /* All targets currently use interval_exp = logical_bs_shift = 9 */
+ return (len >> 9) * q->metadata_size;
+}
+
+static inline size_t
+ublk_integrity_data_len(const struct ublk_queue *q, size_t integrity_len)
+{
+ return (integrity_len / q->metadata_size) << 9;
+}
static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
{
return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF);
}
+static inline __u64 ublk_user_copy_offset(unsigned q_id, unsigned tag)
+{
+ return UBLKSRV_IO_BUF_OFFSET +
+ ((__u64)q_id << UBLK_QID_OFF | (__u64)tag << UBLK_TAG_OFF);
+}
+
static inline int is_target_io(__u64 user_data)
{
return (user_data & (1ULL << 63)) != 0;
@@ -217,10 +334,10 @@ static inline __u64 build_user_data(unsigned tag, unsigned op,
unsigned tgt_data, unsigned q_id, unsigned is_target_io)
{
/* we only have 7 bits to encode q_id */
- _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7);
- assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7));
+ _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7, "UBLK_MAX_QUEUES_SHIFT must be <= 7");
+ ublk_assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7));
- return tag | (op << 16) | (tgt_data << 24) |
+ return tag | ((__u64)op << 16) | ((__u64)tgt_data << 24) |
(__u64)q_id << 56 | (__u64)is_target_io << 63;
}
@@ -351,33 +468,22 @@ static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op)
addr[1] = 0;
}
-static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag)
-{
- return &q->ios[tag];
-}
+static inline unsigned short ublk_batch_io_buf_idx(
+ const struct ublk_thread *t, const struct ublk_queue *q,
+ unsigned tag);
-static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q,
- unsigned tag, int res)
+static inline unsigned short ublk_io_buf_idx(const struct ublk_thread *t,
+ const struct ublk_queue *q,
+ unsigned tag)
{
- struct ublk_io *io = &q->ios[tag];
-
- ublk_mark_io_done(io, res);
-
- return ublk_queue_io_cmd(t, io);
+ if (ublk_queue_batch_io(q))
+ return ublk_batch_io_buf_idx(t, q, tag);
+ return q->ios[tag].buf_index;
}
-static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
- unsigned tag, int queued)
+static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag)
{
- if (queued < 0)
- ublk_complete_io(t, q, tag, queued);
- else {
- struct ublk_io *io = ublk_get_io(q, tag);
-
- t->io_inflight += queued;
- io->tgt_ios = queued;
- io->result = 0;
- }
+ return &q->ios[tag];
}
static inline int ublk_completed_tgt_io(struct ublk_thread *t,
@@ -390,19 +496,24 @@ static inline int ublk_completed_tgt_io(struct ublk_thread *t,
return --io->tgt_ios == 0;
}
-static inline int ublk_queue_use_zc(const struct ublk_queue *q)
+static inline bool ublk_queue_use_zc(const struct ublk_queue *q)
+{
+ return !!(q->flags & UBLK_F_SUPPORT_ZERO_COPY);
+}
+
+static inline bool ublk_queue_use_auto_zc(const struct ublk_queue *q)
{
- return q->flags & UBLK_F_SUPPORT_ZERO_COPY;
+ return !!(q->flags & UBLK_F_AUTO_BUF_REG);
}
-static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q)
+static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q)
{
- return q->flags & UBLK_F_AUTO_BUF_REG;
+ return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK);
}
-static inline int ublk_queue_auto_zc_fallback(const struct ublk_queue *q)
+static inline bool ublk_queue_use_user_copy(const struct ublk_queue *q)
{
- return q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK;
+ return !!(q->flags & UBLK_F_USER_COPY);
}
static inline int ublk_queue_no_buf(const struct ublk_queue *q)
@@ -410,12 +521,90 @@ static inline int ublk_queue_no_buf(const struct ublk_queue *q)
return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q);
}
+static inline int ublk_batch_commit_prepared(struct batch_commit_buf *cb)
+{
+ return cb->buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX;
+}
+
+static inline unsigned ublk_queue_idx_in_thread(const struct ublk_thread *t,
+ const struct ublk_queue *q)
+{
+ unsigned char idx;
+
+ idx = t->q_map[q->q_id];
+ ublk_assert(idx != 0);
+ return idx - 1;
+}
+
+/*
+ * Each IO's buffer index has to be calculated by this helper for
+ * UBLKS_T_BATCH_IO
+ */
+static inline unsigned short ublk_batch_io_buf_idx(
+ const struct ublk_thread *t, const struct ublk_queue *q,
+ unsigned tag)
+{
+ return ublk_queue_idx_in_thread(t, q) * q->q_depth + tag;
+}
+
+/* Queue UBLK_U_IO_PREP_IO_CMDS for a specific queue with batch elements */
+int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q);
+/* Start fetching I/O commands using multishot UBLK_U_IO_FETCH_IO_CMDS */
+void ublk_batch_start_fetch(struct ublk_thread *t);
+/* Handle completion of batch I/O commands (prep/commit) */
+void ublk_batch_compl_cmd(struct ublk_thread *t,
+ const struct io_uring_cqe *cqe);
+/* Initialize batch I/O state and calculate buffer parameters */
+void ublk_batch_prepare(struct ublk_thread *t);
+/* Allocate and register commit buffers for batch operations */
+int ublk_batch_alloc_buf(struct ublk_thread *t);
+/* Free commit buffers and cleanup batch allocator */
+void ublk_batch_free_buf(struct ublk_thread *t);
+
+/* Prepare a new commit buffer for batching completed I/O operations */
+void ublk_batch_prep_commit(struct ublk_thread *t);
+/* Submit UBLK_U_IO_COMMIT_IO_CMDS with batched completed I/O operations */
+void ublk_batch_commit_io_cmds(struct ublk_thread *t);
+/* Add a completed I/O operation to the current batch commit buffer */
+void ublk_batch_complete_io(struct ublk_thread *t, struct ublk_queue *q,
+ unsigned tag, int res);
+void ublk_batch_setup_map(unsigned char (*q_thread_map)[UBLK_MAX_QUEUES],
+ int nthreads, int queues);
+
+static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q,
+ unsigned tag, int res)
+{
+ if (ublk_queue_batch_io(q)) {
+ ublk_batch_complete_io(t, q, tag, res);
+ return 0;
+ } else {
+ struct ublk_io *io = &q->ios[tag];
+
+ ublk_mark_io_done(io, res);
+ return ublk_queue_io_cmd(t, io);
+ }
+}
+
+static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
+ unsigned tag, int queued)
+{
+ if (queued < 0)
+ ublk_complete_io(t, q, tag, queued);
+ else {
+ struct ublk_io *io = ublk_get_io(q, tag);
+
+ t->io_inflight += queued;
+ io->tgt_ios = queued;
+ io->result = 0;
+ }
+}
+
extern const struct ublk_tgt_ops null_tgt_ops;
extern const struct ublk_tgt_ops loop_tgt_ops;
extern const struct ublk_tgt_ops stripe_tgt_ops;
extern const struct ublk_tgt_ops fault_inject_tgt_ops;
void backing_file_tgt_deinit(struct ublk_dev *dev);
-int backing_file_tgt_init(struct ublk_dev *dev);
+int backing_file_tgt_init(struct ublk_dev *dev, unsigned int nr_direct);
#endif
diff --git a/tools/testing/selftests/ublk/metadata_size.c b/tools/testing/selftests/ublk/metadata_size.c
new file mode 100644
index 000000000000..76ecddf04d25
--- /dev/null
+++ b/tools/testing/selftests/ublk/metadata_size.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+
+int main(int argc, char **argv)
+{
+ struct logical_block_metadata_cap cap = {};
+ const char *filename;
+ int fd;
+ int result;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s BLOCK_DEVICE\n", argv[0]);
+ return 1;
+ }
+
+ filename = argv[1];
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ perror(filename);
+ return 1;
+ }
+
+ result = ioctl(fd, FS_IOC_GETLBMD_CAP, &cap);
+ if (result < 0) {
+ perror("ioctl");
+ return 1;
+ }
+
+ printf("metadata_size: %u\n", cap.lbmd_size);
+ printf("pi_offset: %u\n", cap.lbmd_pi_offset);
+ printf("pi_tuple_size: %u\n", cap.lbmd_pi_size);
+ return 0;
+}
diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c
index 280043f6b689..7656888f4149 100644
--- a/tools/testing/selftests/ublk/null.c
+++ b/tools/testing/selftests/ublk/null.c
@@ -36,6 +36,7 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
.max_segments = 32,
},
};
+ ublk_set_integrity_params(ctx, &dev->tgt.params);
if (info->flags & UBLK_F_SUPPORT_ZERO_COPY)
dev->tgt.sq_depth = dev->tgt.cq_depth = 2 * info->queue_depth;
@@ -43,12 +44,12 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
}
static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod,
- struct io_uring_sqe *sqe, int q_id)
+ struct io_uring_sqe *sqe, int q_id, unsigned buf_idx)
{
unsigned ublk_op = ublksrv_get_op(iod);
io_uring_prep_nop(sqe);
- sqe->buf_index = tag;
+ sqe->buf_index = buf_idx;
sqe->flags |= IOSQE_FIXED_FILE;
sqe->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT;
sqe->len = iod->nr_sectors << 9; /* injected result */
@@ -60,18 +61,19 @@ static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q,
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
struct io_uring_sqe *sqe[3];
+ unsigned short buf_idx = ublk_io_buf_idx(t, q, tag);
ublk_io_alloc_sqes(t, sqe, 3);
- io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
+ io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_idx);
sqe[0]->user_data = build_user_data(tag,
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
- __setup_nop_io(tag, iod, sqe[1], q->q_id);
+ __setup_nop_io(tag, iod, sqe[1], q->q_id, buf_idx);
sqe[1]->flags |= IOSQE_IO_HARDLINK;
- io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
+ io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, buf_idx);
sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
// buf register is marked as IOSQE_CQE_SKIP_SUCCESS
@@ -85,7 +87,7 @@ static int null_queue_auto_zc_io(struct ublk_thread *t, struct ublk_queue *q,
struct io_uring_sqe *sqe[1];
ublk_io_alloc_sqes(t, sqe, 1);
- __setup_nop_io(tag, iod, sqe[0], q->q_id);
+ __setup_nop_io(tag, iod, sqe[0], q->q_id, ublk_io_buf_idx(t, q, tag));
return 1;
}
@@ -136,11 +138,12 @@ static int ublk_null_queue_io(struct ublk_thread *t, struct ublk_queue *q,
* return invalid buffer index for triggering auto buffer register failure,
* then UBLK_IO_RES_NEED_REG_BUF handling is covered
*/
-static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag)
+static unsigned short ublk_null_buf_index(const struct ublk_thread *t,
+ const struct ublk_queue *q, int tag)
{
if (ublk_queue_auto_zc_fallback(q))
return (unsigned short)-1;
- return q->ios[tag].buf_index;
+ return ublk_io_buf_idx(t, q, tag);
}
const struct ublk_tgt_ops null_tgt_ops = {
diff --git a/tools/testing/selftests/ublk/settings b/tools/testing/selftests/ublk/settings
new file mode 100644
index 000000000000..682a40f1c8e6
--- /dev/null
+++ b/tools/testing/selftests/ublk/settings
@@ -0,0 +1 @@
+timeout=150
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 791fa8dc1651..dca819f5366e 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -96,12 +96,12 @@ static void calculate_stripe_array(const struct stripe_conf *conf,
this->seq = seq;
s->nr += 1;
} else {
- assert(seq == this->seq);
- assert(this->start + this->nr_sects == stripe_off);
+ ublk_assert(seq == this->seq);
+ ublk_assert(this->start + this->nr_sects == stripe_off);
this->nr_sects += nr_sects;
}
- assert(this->nr_vec < this->cap);
+ ublk_assert(this->nr_vec < this->cap);
this->vec[this->nr_vec].iov_base = (void *)(base + done);
this->vec[this->nr_vec++].iov_len = nr_sects << 9;
@@ -120,7 +120,7 @@ static inline enum io_uring_op stripe_to_uring_op(
return zc ? IORING_OP_READV_FIXED : IORING_OP_READV;
else if (ublk_op == UBLK_IO_OP_WRITE)
return zc ? IORING_OP_WRITEV_FIXED : IORING_OP_WRITEV;
- assert(0);
+ ublk_assert(0);
}
static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
@@ -134,7 +134,8 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
struct stripe_array *s = alloc_stripe_array(conf, iod);
struct ublk_io *io = ublk_get_io(q, tag);
int i, extra = zc ? 2 : 0;
- void *base = (zc | auto_zc) ? NULL : (void *)iod->addr;
+ void *base = io->buf_addr;
+ unsigned short buf_idx = ublk_io_buf_idx(t, q, tag);
io->private_data = s;
calculate_stripe_array(conf, iod, s, base);
@@ -142,7 +143,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
ublk_io_alloc_sqes(t, sqe, s->nr + extra);
if (zc) {
- io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, io->buf_index);
+ io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_idx);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
sqe[0]->user_data = build_user_data(tag,
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
@@ -158,7 +159,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
t->start << 9);
io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
if (auto_zc || zc) {
- sqe[i]->buf_index = tag;
+ sqe[i]->buf_index = buf_idx;
if (zc)
sqe[i]->flags |= IOSQE_IO_HARDLINK;
}
@@ -168,7 +169,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
if (zc) {
struct io_uring_sqe *unreg = sqe[s->nr + 1];
- io_uring_prep_buf_unregister(unreg, q, tag, q->q_id, io->buf_index);
+ io_uring_prep_buf_unregister(unreg, q, tag, q->q_id, buf_idx);
unreg->user_data = build_user_data(
tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1);
}
@@ -298,6 +299,10 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
ublk_err("%s: not support auto_zc_fallback\n", __func__);
return -EINVAL;
}
+ if (ctx->metadata_size) {
+ ublk_err("%s: integrity not supported\n", __func__);
+ return -EINVAL;
+ }
if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
ublk_err("invalid chunk size %u\n", chunk_size);
@@ -311,14 +316,14 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
chunk_shift = ilog2(chunk_size);
- ret = backing_file_tgt_init(dev);
+ ret = backing_file_tgt_init(dev, dev->tgt.nr_backing_files);
if (ret)
return ret;
if (!dev->tgt.nr_backing_files || dev->tgt.nr_backing_files > NR_STRIPE)
return -EINVAL;
- assert(dev->nr_fds == dev->tgt.nr_backing_files + 1);
+ ublk_assert(dev->nr_fds == dev->tgt.nr_backing_files + 1);
for (i = 0; i < dev->tgt.nr_backing_files; i++)
dev->tgt.backing_file_size[i] &= ~((1 << chunk_shift) - 1);
diff --git a/tools/testing/selftests/ublk/test_batch_01.sh b/tools/testing/selftests/ublk/test_batch_01.sh
new file mode 100755
index 000000000000..a18fb39af8be
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_batch_01.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+if ! _have_feature "BATCH_IO"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "generic" "test basic function of UBLK_F_BATCH_IO"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t loop -q 2 -b "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+if ! _mkfs_mount_test /dev/ublkb"${dev_id}"; then
+ _cleanup_test "generic"
+ _show_result $TID 255
+fi
+
+dev_id=$(_add_ublk_dev -t stripe -b --auto_zc "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "generic"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_batch_02.sh b/tools/testing/selftests/ublk/test_batch_02.sh
new file mode 100755
index 000000000000..7ca384d11987
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_batch_02.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+if ! _have_feature "BATCH_IO"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "generic" "test UBLK_F_BATCH_IO with 4_threads vs. 1_queues"
+
+_create_backfile 0 512M
+
+dev_id=$(_add_ublk_dev -t loop -q 1 --nthreads 4 -b "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+# run fio over the ublk disk
+fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite \
+ --iodepth=32 --size=100M --numjobs=4 > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test "generic"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_batch_03.sh b/tools/testing/selftests/ublk/test_batch_03.sh
new file mode 100755
index 000000000000..aca9cf144b55
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_batch_03.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+if ! _have_feature "BATCH_IO"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "generic" "test UBLK_F_BATCH_IO with 1_threads vs. 4_queues"
+
+_create_backfile 0 512M
+
+dev_id=$(_add_ublk_dev -t loop -q 4 --nthreads 1 -b "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+# run fio over the ublk disk
+fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite \
+ --iodepth=32 --size=100M --numjobs=4 > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test "generic"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh
index 8a4dbd09feb0..163a40007910 100755
--- a/tools/testing/selftests/ublk/test_common.sh
+++ b/tools/testing/selftests/ublk/test_common.sh
@@ -1,6 +1,11 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Derive TID from script name: test_<type>_<num>.sh -> <type>_<num>
+# Can be overridden in test script after sourcing this file
+TID=$(basename "$0" .sh)
+TID=${TID#test_}
+
UBLK_SKIP_CODE=4
_have_program() {
@@ -10,6 +15,16 @@ _have_program() {
return 1
}
+# Sleep with awareness of parallel execution.
+# Usage: _ublk_sleep <normal_secs> <parallel_secs>
+_ublk_sleep() {
+ if [ "${JOBS:-1}" -gt 1 ]; then
+ sleep "$2"
+ else
+ sleep "$1"
+ fi
+}
+
_get_disk_dev_t() {
local dev_id=$1
local dev
@@ -43,7 +58,7 @@ _create_backfile() {
old_file="${UBLK_BACKFILES[$index]}"
[ -f "$old_file" ] && rm -f "$old_file"
- new_file=$(mktemp ublk_file_"${new_size}"_XXXXX)
+ new_file=$(mktemp ${UBLK_TEST_DIR}/ublk_file_"${new_size}"_XXXXX)
truncate -s "${new_size}" "${new_file}"
UBLK_BACKFILES["$index"]="$new_file"
}
@@ -60,7 +75,7 @@ _remove_files() {
_create_tmp_dir() {
local my_file;
- my_file=$(mktemp -d ublk_dir_XXXXX)
+ my_file=$(mktemp -d ${UBLK_TEST_DIR}/ublk_dir_XXXXX)
echo "$my_file"
}
@@ -101,11 +116,6 @@ _check_root() {
fi
}
-_remove_ublk_devices() {
- ${UBLK_PROG} del -a
- modprobe -r ublk_drv > /dev/null 2>&1
-}
-
_get_ublk_dev_state() {
${UBLK_PROG} list -n "$1" | grep "state" | awk '{print $11}'
}
@@ -119,8 +129,12 @@ _prep_test() {
local type=$1
shift 1
modprobe ublk_drv > /dev/null 2>&1
- UBLK_TMP=$(mktemp ublk_test_XXXXX)
+ local base_dir=${TMPDIR:-./ublktest-dir}
+ mkdir -p "$base_dir"
+ UBLK_TEST_DIR=$(mktemp -d ${base_dir}/${TID}.XXXXXX)
+ UBLK_TMP=$(mktemp ${UBLK_TEST_DIR}/ublk_test_XXXXX)
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*"
+ echo "ublk selftest: $TID starting at $(date '+%F %T')" | tee /dev/kmsg
}
_remove_test_files()
@@ -162,9 +176,16 @@ _check_add_dev()
}
_cleanup_test() {
- "${UBLK_PROG}" del -a
+ if [ -f "${UBLK_TEST_DIR}/.ublk_devs" ]; then
+ while read -r dev_id; do
+ ${UBLK_PROG} del -n "${dev_id}"
+ done < "${UBLK_TEST_DIR}/.ublk_devs"
+ rm -f "${UBLK_TEST_DIR}/.ublk_devs"
+ fi
_remove_files
+ rmdir ${UBLK_TEST_DIR}
+ echo "ublk selftest: $TID done at $(date '+%F %T')" | tee /dev/kmsg
}
_have_feature()
@@ -178,8 +199,9 @@ _have_feature()
_create_ublk_dev() {
local dev_id;
local cmd=$1
+ local settle=$2
- shift 1
+ shift 2
if [ ! -c /dev/ublk-control ]; then
return ${UBLK_SKIP_CODE}
@@ -194,9 +216,13 @@ _create_ublk_dev() {
echo "fail to add ublk dev $*"
return 255
fi
- udevadm settle
+
+ if [ "$settle" = "yes" ]; then
+ udevadm settle --timeout=20
+ fi
if [[ "$dev_id" =~ ^[0-9]+$ ]]; then
+ echo "$dev_id" >> "${UBLK_TEST_DIR}/.ublk_devs"
echo "${dev_id}"
else
return 255
@@ -204,15 +230,19 @@ _create_ublk_dev() {
}
_add_ublk_dev() {
- _create_ublk_dev "add" "$@"
+ _create_ublk_dev "add" "yes" "$@"
+}
+
+_add_ublk_dev_no_settle() {
+ _create_ublk_dev "add" "no" "$@"
}
_recover_ublk_dev() {
local dev_id
local state
- dev_id=$(_create_ublk_dev "recover" "$@")
- for ((j=0;j<20;j++)); do
+ dev_id=$(_create_ublk_dev "recover" "yes" "$@")
+ for ((j=0;j<100;j++)); do
state=$(_get_ublk_dev_state "${dev_id}")
[ "$state" == "LIVE" ] && break
sleep 1
@@ -232,7 +262,7 @@ __ublk_quiesce_dev()
return "$state"
fi
- for ((j=0;j<50;j++)); do
+ for ((j=0;j<100;j++)); do
state=$(_get_ublk_dev_state "${dev_id}")
[ "$state" == "$exp_state" ] && break
sleep 1
@@ -251,7 +281,7 @@ __ublk_kill_daemon()
daemon_pid=$(_get_ublk_daemon_pid "${dev_id}")
state=$(_get_ublk_dev_state "${dev_id}")
- for ((j=0;j<50;j++)); do
+ for ((j=0;j<100;j++)); do
[ "$state" == "$exp_state" ] && break
kill -9 "$daemon_pid" > /dev/null 2>&1
sleep 1
@@ -260,12 +290,23 @@ __ublk_kill_daemon()
echo "$state"
}
-__remove_ublk_dev_return() {
+_ublk_del_dev() {
local dev_id=$1
${UBLK_PROG} del -n "${dev_id}"
+
+ # Remove from tracking file
+ if [ -f "${UBLK_TEST_DIR}/.ublk_devs" ]; then
+ sed -i "/^${dev_id}$/d" "${UBLK_TEST_DIR}/.ublk_devs"
+ fi
+}
+
+__remove_ublk_dev_return() {
+ local dev_id=$1
+
+ _ublk_del_dev "${dev_id}"
local res=$?
- udevadm settle
+ udevadm settle --timeout=20
return ${res}
}
@@ -333,11 +374,12 @@ run_io_and_kill_daemon()
run_io_and_recover()
{
- local action=$1
+ local size=$1
+ local action=$2
local state
local dev_id
- shift 1
+ shift 2
dev_id=$(_add_ublk_dev "$@")
_check_add_dev "$TID" $?
@@ -375,6 +417,16 @@ _ublk_test_top_dir()
cd "$(dirname "$0")" && pwd
}
+METADATA_SIZE_PROG="$(_ublk_test_top_dir)/metadata_size"
+
+_get_metadata_size()
+{
+ local dev_id=$1
+ local field=$2
+
+ "$METADATA_SIZE_PROG" "/dev/ublkb$dev_id" | grep "$field" | grep -o "[0-9]*"
+}
+
UBLK_PROG=$(_ublk_test_top_dir)/kublk
UBLK_TEST_QUIET=1
UBLK_TEST_SHOW_RESULT=1
diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
deleted file mode 100755
index 21a31cd5491a..000000000000
--- a/tools/testing/selftests/ublk/test_generic_01.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-
-TID="generic_01"
-ERR_CODE=0
-
-if ! _have_program bpftrace; then
- exit "$UBLK_SKIP_CODE"
-fi
-
-if ! _have_program fio; then
- exit "$UBLK_SKIP_CODE"
-fi
-
-_prep_test "null" "sequential io order"
-
-dev_id=$(_add_ublk_dev -t null)
-_check_add_dev $TID $?
-
-dev_t=$(_get_disk_dev_t "$dev_id")
-bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
-btrace_pid=$!
-sleep 2
-
-if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
- _cleanup_test "null"
- exit "$UBLK_SKIP_CODE"
-fi
-
-# run fio over this ublk disk
-fio --name=write_seq \
- --filename=/dev/ublkb"${dev_id}" \
- --ioengine=libaio --iodepth=16 \
- --rw=write \
- --size=512M \
- --direct=1 \
- --bs=4k > /dev/null 2>&1
-ERR_CODE=$?
-kill "$btrace_pid"
-wait
-if grep -q "io_out_of_order" "$UBLK_TMP"; then
- cat "$UBLK_TMP"
- ERR_CODE=255
-fi
-_cleanup_test "null"
-_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_02.sh b/tools/testing/selftests/ublk/test_generic_02.sh
index 12920768b1a0..46b657143fd6 100755
--- a/tools/testing/selftests/ublk/test_generic_02.sh
+++ b/tools/testing/selftests/ublk/test_generic_02.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="generic_02"
ERR_CODE=0
if ! _have_program bpftrace; then
@@ -14,7 +13,7 @@ if ! _have_program fio; then
exit "$UBLK_SKIP_CODE"
fi
-_prep_test "null" "sequential io order for MQ"
+_prep_test "null" "ublk dispatch won't reorder IO for MQ"
dev_id=$(_add_ublk_dev -t null -q 2)
_check_add_dev $TID $?
@@ -22,15 +21,20 @@ _check_add_dev $TID $?
dev_t=$(_get_disk_dev_t "$dev_id")
bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
btrace_pid=$!
-sleep 2
-if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
+# Wait for bpftrace probes to be attached (BEGIN block prints BPFTRACE_READY)
+for _ in $(seq 100); do
+ grep -q "BPFTRACE_READY" "$UBLK_TMP" 2>/dev/null && break
+ sleep 0.1
+done
+
+if ! kill -0 "$btrace_pid" 2>/dev/null; then
_cleanup_test "null"
exit "$UBLK_SKIP_CODE"
fi
-# run fio over this ublk disk
-fio --name=write_seq \
+# run fio over this ublk disk (pinned to CPU 0)
+taskset -c 0 fio --name=write_seq \
--filename=/dev/ublkb"${dev_id}" \
--ioengine=libaio --iodepth=16 \
--rw=write \
@@ -40,8 +44,11 @@ fio --name=write_seq \
ERR_CODE=$?
kill "$btrace_pid"
wait
-if grep -q "io_out_of_order" "$UBLK_TMP"; then
- cat "$UBLK_TMP"
+
+# Check for out-of-order completions detected by bpftrace
+if grep -q "^out_of_order:" "$UBLK_TMP"; then
+ echo "I/O reordering detected:"
+ grep "^out_of_order:" "$UBLK_TMP"
ERR_CODE=255
fi
_cleanup_test "null"
diff --git a/tools/testing/selftests/ublk/test_generic_03.sh b/tools/testing/selftests/ublk/test_generic_03.sh
index b551aa76cb0d..8934ea926762 100755
--- a/tools/testing/selftests/ublk/test_generic_03.sh
+++ b/tools/testing/selftests/ublk/test_generic_03.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="generic_03"
ERR_CODE=0
_prep_test "null" "check dma & segment limits for zero copy"
diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh
index fd42062b7b76..14a05054fcd8 100755
--- a/tools/testing/selftests/ublk/test_generic_06.sh
+++ b/tools/testing/selftests/ublk/test_generic_06.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="generic_06"
ERR_CODE=0
_prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server"
diff --git a/tools/testing/selftests/ublk/test_generic_07.sh b/tools/testing/selftests/ublk/test_generic_07.sh
index cba86451fa5e..8dcfd8978f50 100755
--- a/tools/testing/selftests/ublk/test_generic_07.sh
+++ b/tools/testing/selftests/ublk/test_generic_07.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="generic_07"
ERR_CODE=0
if ! _have_program fio; then
diff --git a/tools/testing/selftests/ublk/test_generic_08.sh b/tools/testing/selftests/ublk/test_generic_08.sh
index b222f3a77e12..ce88c31d6b9c 100755
--- a/tools/testing/selftests/ublk/test_generic_08.sh
+++ b/tools/testing/selftests/ublk/test_generic_08.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="generic_08"
ERR_CODE=0
if ! _have_feature "AUTO_BUF_REG"; then
diff --git a/tools/testing/selftests/ublk/test_generic_09.sh b/tools/testing/selftests/ublk/test_generic_09.sh
index bb6f77ca5522..744d0cdaa242 100755
--- a/tools/testing/selftests/ublk/test_generic_09.sh
+++ b/tools/testing/selftests/ublk/test_generic_09.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="generic_09"
ERR_CODE=0
if ! _have_feature "AUTO_BUF_REG"; then
diff --git a/tools/testing/selftests/ublk/test_generic_10.sh b/tools/testing/selftests/ublk/test_generic_10.sh
index abc11c3d416b..4b4293b9081f 100755
--- a/tools/testing/selftests/ublk/test_generic_10.sh
+++ b/tools/testing/selftests/ublk/test_generic_10.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="generic_10"
ERR_CODE=0
if ! _have_feature "UPDATE_SIZE"; then
diff --git a/tools/testing/selftests/ublk/test_generic_12.sh b/tools/testing/selftests/ublk/test_generic_12.sh
index b4046201b4d9..54b81ddfe9f9 100755
--- a/tools/testing/selftests/ublk/test_generic_12.sh
+++ b/tools/testing/selftests/ublk/test_generic_12.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="generic_12"
ERR_CODE=0
if ! _have_program bpftrace; then
diff --git a/tools/testing/selftests/ublk/test_generic_13.sh b/tools/testing/selftests/ublk/test_generic_13.sh
index b7aa90b1cb74..922115aa14f4 100755
--- a/tools/testing/selftests/ublk/test_generic_13.sh
+++ b/tools/testing/selftests/ublk/test_generic_13.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="generic_13"
ERR_CODE=0
_prep_test "null" "check that feature list is complete"
diff --git a/tools/testing/selftests/ublk/test_generic_16.sh b/tools/testing/selftests/ublk/test_generic_16.sh
new file mode 100755
index 000000000000..3ef367836ac5
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_16.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+_prep_test "null" "stop --safe command"
+
+# Check if SAFE_STOP_DEV feature is supported
+if ! _have_feature "SAFE_STOP_DEV"; then
+ _cleanup_test "null"
+ exit "$UBLK_SKIP_CODE"
+fi
+
+# Test 1: stop --safe on idle device should succeed
+dev_id=$(_add_ublk_dev -t null -q 2 -d 32)
+_check_add_dev $TID $?
+
+# Device is idle (no openers), stop --safe should succeed
+if ! ${UBLK_PROG} stop -n "${dev_id}" --safe; then
+ echo "stop --safe on idle device failed unexpectedly!"
+ ERR_CODE=255
+fi
+
+# Clean up device
+_ublk_del_dev "${dev_id}" > /dev/null 2>&1
+udevadm settle
+
+# Test 2: stop --safe on device with active opener should fail
+dev_id=$(_add_ublk_dev -t null -q 2 -d 32)
+_check_add_dev $TID $?
+
+# Open device in background (dd reads indefinitely)
+dd if=/dev/ublkb${dev_id} of=/dev/null bs=4k iflag=direct > /dev/null 2>&1 &
+dd_pid=$!
+
+# Give dd time to start
+sleep 0.2
+
+# Device has active opener, stop --safe should fail with -EBUSY
+if ${UBLK_PROG} stop -n "${dev_id}" --safe 2>/dev/null; then
+ echo "stop --safe on busy device succeeded unexpectedly!"
+ ERR_CODE=255
+fi
+
+# Kill dd and clean up
+kill $dd_pid 2>/dev/null
+wait $dd_pid 2>/dev/null
+
+# Now device should be idle, regular delete should work
+_ublk_del_dev "${dev_id}"
+udevadm settle
+
+_cleanup_test "null"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_integrity_01.sh b/tools/testing/selftests/ublk/test_integrity_01.sh
new file mode 100755
index 000000000000..6713b280a6ff
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_integrity_01.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+_check_value() {
+ local name=$1
+ local actual=$2
+ local expected=$3
+
+ if [ "$actual" != "$expected" ]; then
+ echo "$name $actual != $expected"
+ ERR_CODE=255
+ return 1
+ fi
+ return 0
+}
+
+_test_metadata_only() {
+ local dev_id
+
+ dev_id=$(_add_ublk_dev -t null -u --no_auto_part_scan --metadata_size 8)
+ _check_add_dev "$TID" $?
+
+ _check_value "metadata_size" "$(_get_metadata_size "$dev_id" metadata_size)" 8 &&
+ _check_value "pi_offset" "$(_get_metadata_size "$dev_id" pi_offset)" 0 &&
+ _check_value "pi_tuple_size" "$(_get_metadata_size "$dev_id" pi_tuple_size)" 0 &&
+ _check_value "device_is_integrity_capable" \
+ "$(cat "/sys/block/ublkb$dev_id/integrity/device_is_integrity_capable")" 0 &&
+ _check_value "format" "$(cat "/sys/block/ublkb$dev_id/integrity/format")" nop &&
+ _check_value "protection_interval_bytes" \
+ "$(cat "/sys/block/ublkb$dev_id/integrity/protection_interval_bytes")" 512 &&
+ _check_value "tag_size" "$(cat "/sys/block/ublkb$dev_id/integrity/tag_size")" 0
+
+ _ublk_del_dev "${dev_id}"
+}
+
+_test_integrity_capable_ip() {
+ local dev_id
+
+ dev_id=$(_add_ublk_dev -t null -u --no_auto_part_scan --integrity_capable --metadata_size 64 --pi_offset 56 --csum_type ip)
+ _check_add_dev "$TID" $?
+
+ _check_value "metadata_size" "$(_get_metadata_size "$dev_id" metadata_size)" 64 &&
+ _check_value "pi_offset" "$(_get_metadata_size "$dev_id" pi_offset)" 56 &&
+ _check_value "pi_tuple_size" "$(_get_metadata_size "$dev_id" pi_tuple_size)" 8 &&
+ _check_value "device_is_integrity_capable" \
+ "$(cat "/sys/block/ublkb$dev_id/integrity/device_is_integrity_capable")" 1 &&
+ _check_value "format" "$(cat "/sys/block/ublkb$dev_id/integrity/format")" T10-DIF-TYPE3-IP &&
+ _check_value "protection_interval_bytes" \
+ "$(cat "/sys/block/ublkb$dev_id/integrity/protection_interval_bytes")" 512 &&
+ _check_value "tag_size" "$(cat "/sys/block/ublkb$dev_id/integrity/tag_size")" 0
+
+ _ublk_del_dev "${dev_id}"
+}
+
+_test_integrity_reftag_t10dif() {
+ local dev_id
+
+ dev_id=$(_add_ublk_dev -t null -u --no_auto_part_scan --integrity_reftag --metadata_size 8 --csum_type t10dif)
+ _check_add_dev "$TID" $?
+
+ _check_value "metadata_size" "$(_get_metadata_size "$dev_id" metadata_size)" 8 &&
+ _check_value "pi_offset" "$(_get_metadata_size "$dev_id" pi_offset)" 0 &&
+ _check_value "pi_tuple_size" "$(_get_metadata_size "$dev_id" pi_tuple_size)" 8 &&
+ _check_value "device_is_integrity_capable" \
+ "$(cat "/sys/block/ublkb$dev_id/integrity/device_is_integrity_capable")" 0 &&
+ _check_value "format" "$(cat "/sys/block/ublkb$dev_id/integrity/format")" T10-DIF-TYPE1-CRC &&
+ _check_value "protection_interval_bytes" \
+ "$(cat "/sys/block/ublkb$dev_id/integrity/protection_interval_bytes")" 512 &&
+ _check_value "tag_size" "$(cat "/sys/block/ublkb$dev_id/integrity/tag_size")" 0
+
+ _ublk_del_dev "${dev_id}"
+}
+
+_test_nvme_csum() {
+ local dev_id
+
+ dev_id=$(_add_ublk_dev -t null -u --no_auto_part_scan --metadata_size 16 --csum_type nvme --tag_size 8)
+ _check_add_dev "$TID" $?
+
+ _check_value "metadata_size" "$(_get_metadata_size "$dev_id" metadata_size)" 16 &&
+ _check_value "pi_offset" "$(_get_metadata_size "$dev_id" pi_offset)" 0 &&
+ _check_value "pi_tuple_size" "$(_get_metadata_size "$dev_id" pi_tuple_size)" 16 &&
+ _check_value "device_is_integrity_capable" \
+ "$(cat "/sys/block/ublkb$dev_id/integrity/device_is_integrity_capable")" 0 &&
+ _check_value "format" "$(cat "/sys/block/ublkb$dev_id/integrity/format")" EXT-DIF-TYPE3-CRC64 &&
+ _check_value "protection_interval_bytes" \
+ "$(cat "/sys/block/ublkb$dev_id/integrity/protection_interval_bytes")" 512 &&
+ _check_value "tag_size" "$(cat "/sys/block/ublkb$dev_id/integrity/tag_size")" 8
+
+ _ublk_del_dev "${dev_id}"
+}
+
+_prep_test "null" "integrity params"
+
+_test_metadata_only
+_test_integrity_capable_ip
+_test_integrity_reftag_t10dif
+_test_nvme_csum
+
+_cleanup_test
+_show_result "$TID" $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_integrity_02.sh b/tools/testing/selftests/ublk/test_integrity_02.sh
new file mode 100755
index 000000000000..aaf1f52da559
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_integrity_02.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+if ! _have_program fio; then
+ exit $UBLK_SKIP_CODE
+fi
+
+fio_version=$(fio --version)
+if [[ "$fio_version" =~ fio-[0-9]+\.[0-9]+$ ]]; then
+ echo "Requires development fio version with https://github.com/axboe/fio/pull/1992"
+ exit $UBLK_SKIP_CODE
+fi
+
+ERR_CODE=0
+
+# Global variables set during device setup
+dev_id=""
+fio_args=""
+fio_err=""
+
+_setup_device() {
+ _create_backfile 0 256M
+ _create_backfile 1 32M # 256M * (64 integrity bytes / 512 data bytes)
+
+ local integrity_params="--integrity_capable --integrity_reftag
+ --metadata_size 64 --pi_offset 56 --csum_type t10dif"
+ dev_id=$(_add_ublk_dev -t loop -u $integrity_params "${UBLK_BACKFILES[@]}")
+ _check_add_dev "$TID" $?
+
+ # 1M * (64 integrity bytes / 512 data bytes) = 128K
+ fio_args="--ioengine io_uring --direct 1 --bsrange 512-1M --iodepth 32
+ --md_per_io_size 128K --pi_act 0 --pi_chk GUARD,REFTAG,APPTAG
+ --filename /dev/ublkb$dev_id"
+
+ fio_err=$(mktemp "${UBLK_TEST_DIR}"/fio_err_XXXXX)
+}
+
+_test_fill_and_verify() {
+ fio --name fill --rw randwrite $fio_args > /dev/null
+ if [ $? != 0 ]; then
+ echo "fio fill failed"
+ ERR_CODE=255
+ return 1
+ fi
+
+ fio --name verify --rw randread $fio_args > /dev/null
+ if [ $? != 0 ]; then
+ echo "fio verify failed"
+ ERR_CODE=255
+ return 1
+ fi
+}
+
+_test_corrupted_reftag() {
+ local dd_reftag_args="bs=1 seek=60 count=4 oflag=dsync conv=notrunc status=none"
+ local expected_err="REFTAG compare error: LBA: 0 Expected=0, Actual="
+
+ # Overwrite 4-byte reftag at offset 56 + 4 = 60
+ dd if=/dev/urandom "of=${UBLK_BACKFILES[1]}" $dd_reftag_args
+ if [ $? != 0 ]; then
+ echo "dd corrupted_reftag failed"
+ ERR_CODE=255
+ return 1
+ fi
+
+ if fio --name corrupted_reftag --rw randread $fio_args > /dev/null 2> "$fio_err"; then
+ echo "fio corrupted_reftag unexpectedly succeeded"
+ ERR_CODE=255
+ return 1
+ fi
+
+ if ! grep -q "$expected_err" "$fio_err"; then
+ echo "fio corrupted_reftag message not found: $expected_err"
+ ERR_CODE=255
+ return 1
+ fi
+
+ # Reset to 0
+ dd if=/dev/zero "of=${UBLK_BACKFILES[1]}" $dd_reftag_args
+ if [ $? != 0 ]; then
+ echo "dd restore corrupted_reftag failed"
+ ERR_CODE=255
+ return 1
+ fi
+}
+
+_test_corrupted_data() {
+ local dd_data_args="bs=512 count=1 oflag=direct,dsync conv=notrunc status=none"
+ local expected_err="Guard compare error: LBA: 0 Expected=0, Actual="
+
+ dd if=/dev/zero "of=${UBLK_BACKFILES[0]}" $dd_data_args
+ if [ $? != 0 ]; then
+ echo "dd corrupted_data failed"
+ ERR_CODE=255
+ return 1
+ fi
+
+ if fio --name corrupted_data --rw randread $fio_args > /dev/null 2> "$fio_err"; then
+ echo "fio corrupted_data unexpectedly succeeded"
+ ERR_CODE=255
+ return 1
+ fi
+
+ if ! grep -q "$expected_err" "$fio_err"; then
+ echo "fio corrupted_data message not found: $expected_err"
+ ERR_CODE=255
+ return 1
+ fi
+}
+
+_test_bad_apptag() {
+ local expected_err="APPTAG compare error: LBA: [0-9]* Expected=4321, Actual=1234"
+
+ if fio --name bad_apptag --rw randread $fio_args --apptag 0x4321 > /dev/null 2> "$fio_err"; then
+ echo "fio bad_apptag unexpectedly succeeded"
+ ERR_CODE=255
+ return 1
+ fi
+
+ if ! grep -q "$expected_err" "$fio_err"; then
+ echo "fio bad_apptag message not found: $expected_err"
+ ERR_CODE=255
+ return 1
+ fi
+}
+
+_prep_test "loop" "end-to-end integrity"
+
+_setup_device
+
+_test_fill_and_verify && \
+_test_corrupted_reftag && \
+_test_corrupted_data && \
+_test_bad_apptag
+
+rm -f "$fio_err"
+
+_cleanup_test
+_show_result "$TID" $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh
index 833fa0dbc700..338a235fd82a 100755
--- a/tools/testing/selftests/ublk/test_loop_01.sh
+++ b/tools/testing/selftests/ublk/test_loop_01.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="loop_01"
ERR_CODE=0
if ! _have_program fio; then
diff --git a/tools/testing/selftests/ublk/test_loop_02.sh b/tools/testing/selftests/ublk/test_loop_02.sh
index 874568b3646b..04c52454e2ec 100755
--- a/tools/testing/selftests/ublk/test_loop_02.sh
+++ b/tools/testing/selftests/ublk/test_loop_02.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="loop_02"
ERR_CODE=0
_prep_test "loop" "mkfs & mount & umount"
diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh
index c30f797c6429..6e8f649fe93d 100755
--- a/tools/testing/selftests/ublk/test_loop_03.sh
+++ b/tools/testing/selftests/ublk/test_loop_03.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="loop_03"
ERR_CODE=0
if ! _have_program fio; then
diff --git a/tools/testing/selftests/ublk/test_loop_04.sh b/tools/testing/selftests/ublk/test_loop_04.sh
index b01d75b3214d..9f6774ec0de6 100755
--- a/tools/testing/selftests/ublk/test_loop_04.sh
+++ b/tools/testing/selftests/ublk/test_loop_04.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="loop_04"
ERR_CODE=0
_prep_test "loop" "mkfs & mount & umount with zero copy"
diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh
index de2141533074..2b8d99e007be 100755
--- a/tools/testing/selftests/ublk/test_loop_05.sh
+++ b/tools/testing/selftests/ublk/test_loop_05.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="loop_05"
ERR_CODE=0
if ! _have_program fio; then
diff --git a/tools/testing/selftests/ublk/test_loop_06.sh b/tools/testing/selftests/ublk/test_loop_06.sh
new file mode 100755
index 000000000000..e73f6f4844db
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_loop_06.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "loop" "write and verify over user copy"
+
+_create_backfile 0 256M
+dev_id=$(_add_ublk_dev -t loop -u "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+# run fio over the ublk disk
+_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
+ERR_CODE=$?
+
+_cleanup_test "loop"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_07.sh b/tools/testing/selftests/ublk/test_loop_07.sh
new file mode 100755
index 000000000000..264d20e7c530
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_loop_07.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+_prep_test "loop" "mkfs & mount & umount with user copy"
+
+_create_backfile 0 256M
+
+dev_id=$(_add_ublk_dev -t loop -u "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "loop"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_null_01.sh b/tools/testing/selftests/ublk/test_null_01.sh
index c2cb8f7a09fe..eebce8076530 100755
--- a/tools/testing/selftests/ublk/test_null_01.sh
+++ b/tools/testing/selftests/ublk/test_null_01.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="null_01"
ERR_CODE=0
if ! _have_program fio; then
diff --git a/tools/testing/selftests/ublk/test_null_02.sh b/tools/testing/selftests/ublk/test_null_02.sh
index 8accd35beb55..654bdff39664 100755
--- a/tools/testing/selftests/ublk/test_null_02.sh
+++ b/tools/testing/selftests/ublk/test_null_02.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="null_02"
ERR_CODE=0
if ! _have_program fio; then
diff --git a/tools/testing/selftests/ublk/test_null_03.sh b/tools/testing/selftests/ublk/test_null_03.sh
new file mode 100755
index 000000000000..29cd09f06672
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_null_03.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "null" "basic IO test with user copy"
+
+dev_id=$(_add_ublk_dev -t null -u)
+_check_add_dev $TID $?
+
+# run fio over the two disks
+fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test "null"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_part_01.sh b/tools/testing/selftests/ublk/test_part_01.sh
new file mode 100755
index 000000000000..8028f6e4b3a5
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_part_01.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+format_backing_file()
+{
+ local backing_file=$1
+
+ # Create ublk device to write partition table
+ local tmp_dev=$(_add_ublk_dev -t loop "${backing_file}")
+ [ $? -ne 0 ] && return 1
+
+ # Write partition table with sfdisk
+ sfdisk /dev/ublkb"${tmp_dev}" > /dev/null 2>&1 <<EOF
+label: dos
+start=2048, size=100MiB, type=83
+start=206848, size=100MiB, type=83
+EOF
+ local ret=$?
+
+ "${UBLK_PROG}" del -n "${tmp_dev}"
+
+ return $ret
+}
+
+test_auto_part_scan()
+{
+ local backing_file=$1
+
+ # Create device WITHOUT --no_auto_part_scan
+ local dev_id=$(_add_ublk_dev -t loop "${backing_file}")
+ [ $? -ne 0 ] && return 1
+
+ udevadm settle
+
+ # Partitions should be auto-detected
+ if [ ! -e /dev/ublkb"${dev_id}"p1 ] || [ ! -e /dev/ublkb"${dev_id}"p2 ]; then
+ "${UBLK_PROG}" del -n "${dev_id}"
+ return 1
+ fi
+
+ "${UBLK_PROG}" del -n "${dev_id}"
+ return 0
+}
+
+test_no_auto_part_scan()
+{
+ local backing_file=$1
+
+ # Create device WITH --no_auto_part_scan
+ local dev_id=$(_add_ublk_dev -t loop --no_auto_part_scan "${backing_file}")
+ [ $? -ne 0 ] && return 1
+
+ udevadm settle
+
+ # Partitions should NOT be auto-detected
+ if [ -e /dev/ublkb"${dev_id}"p1 ]; then
+ "${UBLK_PROG}" del -n "${dev_id}"
+ return 1
+ fi
+
+ # Manual scan should work
+ blockdev --rereadpt /dev/ublkb"${dev_id}" > /dev/null 2>&1
+ udevadm settle
+
+ if [ ! -e /dev/ublkb"${dev_id}"p1 ] || [ ! -e /dev/ublkb"${dev_id}"p2 ]; then
+ "${UBLK_PROG}" del -n "${dev_id}"
+ return 1
+ fi
+
+ "${UBLK_PROG}" del -n "${dev_id}"
+ return 0
+}
+
+if ! _have_program sfdisk || ! _have_program blockdev; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "generic" "test UBLK_F_NO_AUTO_PART_SCAN"
+
+if ! _have_feature "UBLK_F_NO_AUTO_PART_SCAN"; then
+ _cleanup_test "generic"
+ exit "$UBLK_SKIP_CODE"
+fi
+
+
+# Create and format backing file with partition table
+_create_backfile 0 256M
+format_backing_file "${UBLK_BACKFILES[0]}"
+[ $? -ne 0 ] && ERR_CODE=255
+
+# Test normal auto partition scan
+[ "$ERR_CODE" -eq 0 ] && test_auto_part_scan "${UBLK_BACKFILES[0]}"
+[ $? -ne 0 ] && ERR_CODE=255
+
+# Test no auto partition scan with manual scan
+[ "$ERR_CODE" -eq 0 ] && test_no_auto_part_scan "${UBLK_BACKFILES[0]}"
+[ $? -ne 0 ] && ERR_CODE=255
+
+_cleanup_test "generic"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_part_02.sh b/tools/testing/selftests/ublk/test_part_02.sh
new file mode 100755
index 000000000000..7d42ab4d6e83
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_part_02.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+_test_partition_scan_no_hang()
+{
+ local recovery_flag=$1
+ local expected_state=$2
+ local dev_id
+ local state
+ local daemon_pid
+ local start_time
+ local elapsed
+
+ # Create ublk device with fault_inject target and very large delay
+ # to simulate hang during partition table read
+ # --delay_us 60000000 = 60 seconds delay
+ # Use _add_ublk_dev_no_settle to avoid udevadm settle hang waiting
+ # for partition scan events to complete
+ if [ "$recovery_flag" = "yes" ]; then
+ echo "Testing partition scan with recovery support..."
+ dev_id=$(_add_ublk_dev_no_settle -t fault_inject -q 1 -d 1 --delay_us 60000000 -r 1)
+ else
+ echo "Testing partition scan without recovery..."
+ dev_id=$(_add_ublk_dev_no_settle -t fault_inject -q 1 -d 1 --delay_us 60000000)
+ fi
+
+ _check_add_dev "$TID" $?
+
+ # The add command should return quickly because partition scan is async.
+ # Now sleep briefly to let the async partition scan work start and hit
+ # the delay in the fault_inject handler.
+ _ublk_sleep 1 5
+
+ # Kill the ublk daemon while partition scan is potentially blocked
+ # And check state transitions properly
+ start_time=${SECONDS}
+ daemon_pid=$(_get_ublk_daemon_pid "${dev_id}")
+ state=$(__ublk_kill_daemon "${dev_id}" "${expected_state}")
+ elapsed=$((SECONDS - start_time))
+
+ # Verify the device transitioned to expected state
+ if [ "$state" != "${expected_state}" ]; then
+ echo "FAIL: Device state is $state, expected ${expected_state}"
+ ERR_CODE=255
+ _ublk_del_dev "${dev_id}" > /dev/null 2>&1
+ return
+ fi
+ echo "PASS: Device transitioned to ${expected_state} in ${elapsed}s without hanging"
+
+ # Clean up the device
+ _ublk_del_dev "${dev_id}" > /dev/null 2>&1
+}
+
+_prep_test "partition_scan" "verify async partition scan prevents IO hang"
+
+# Test 1: Without recovery support - should transition to DEAD
+_test_partition_scan_no_hang "no" "DEAD"
+
+# Test 2: With recovery support - should transition to QUIESCED
+_test_partition_scan_no_hang "yes" "QUIESCED"
+
+_cleanup_test "partition_scan"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_recover_01.sh
index 8b533217d4a1..2672f9c40fa8 100755
--- a/tools/testing/selftests/ublk/test_generic_04.sh
+++ b/tools/testing/selftests/ublk/test_recover_01.sh
@@ -3,12 +3,11 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="generic_04"
ERR_CODE=0
ublk_run_recover_test()
{
- run_io_and_recover "kill_daemon" "$@"
+ run_io_and_recover 256M "kill_daemon" "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
@@ -26,6 +25,11 @@ _create_backfile 0 256M
_create_backfile 1 128M
_create_backfile 2 128M
+ublk_run_recover_test -t null -q 2 -r 1 -b &
+ublk_run_recover_test -t loop -q 2 -r 1 -b "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -b "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
ublk_run_recover_test -t null -q 2 -r 1 &
ublk_run_recover_test -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" &
ublk_run_recover_test -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_recover_02.sh
index 398e9e2b58e1..bda5064bc31f 100755
--- a/tools/testing/selftests/ublk/test_generic_05.sh
+++ b/tools/testing/selftests/ublk/test_recover_02.sh
@@ -3,12 +3,11 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="generic_05"
ERR_CODE=0
ublk_run_recover_test()
{
- run_io_and_recover "kill_daemon" "$@"
+ run_io_and_recover 256M "kill_daemon" "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
@@ -30,6 +29,11 @@ _create_backfile 0 256M
_create_backfile 1 128M
_create_backfile 2 128M
+ublk_run_recover_test -t null -q 2 -r 1 -z -b &
+ublk_run_recover_test -t loop -q 2 -r 1 -z -b "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -z -b "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
ublk_run_recover_test -t null -q 2 -r 1 -z &
ublk_run_recover_test -t loop -q 2 -r 1 -z "${UBLK_BACKFILES[0]}" &
ublk_run_recover_test -t stripe -q 2 -r 1 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
diff --git a/tools/testing/selftests/ublk/test_generic_11.sh b/tools/testing/selftests/ublk/test_recover_03.sh
index a00357a5ec6b..e0dc0b8fe5d6 100755
--- a/tools/testing/selftests/ublk/test_generic_11.sh
+++ b/tools/testing/selftests/ublk/test_recover_03.sh
@@ -3,12 +3,11 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="generic_11"
ERR_CODE=0
ublk_run_quiesce_recover()
{
- run_io_and_recover "quiesce_dev" "$@"
+ run_io_and_recover 256M "quiesce_dev" "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
diff --git a/tools/testing/selftests/ublk/test_recover_04.sh b/tools/testing/selftests/ublk/test_recover_04.sh
new file mode 100755
index 000000000000..178443394ca5
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_recover_04.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+ublk_run_recover_test()
+{
+ run_io_and_recover 256M "kill_daemon" "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "recover" "basic recover function verification (user copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_run_recover_test -t null -q 2 -r 1 -u &
+ublk_run_recover_test -t loop -q 2 -r 1 -u "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -u "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_run_recover_test -t null -q 2 -r 1 -u -i 1 &
+ublk_run_recover_test -t loop -q 2 -r 1 -u -i 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -u -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "recover"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh
index 7d3150f057d4..a9322ce496e9 100755
--- a/tools/testing/selftests/ublk/test_stress_01.sh
+++ b/tools/testing/selftests/ublk/test_stress_01.sh
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="stress_01"
ERR_CODE=0
ublk_io_and_remove()
diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh
index 4bdd921081e5..6c114194f9c9 100755
--- a/tools/testing/selftests/ublk/test_stress_02.sh
+++ b/tools/testing/selftests/ublk/test_stress_02.sh
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="stress_02"
ERR_CODE=0
if ! _have_program fio; then
diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh
index 3ed4c9b2d8c0..4e81ca0db758 100755
--- a/tools/testing/selftests/ublk/test_stress_03.sh
+++ b/tools/testing/selftests/ublk/test_stress_03.sh
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="stress_03"
ERR_CODE=0
ublk_io_and_remove()
diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh
index 3f901db4d09d..6c6f44b172bc 100755
--- a/tools/testing/selftests/ublk/test_stress_04.sh
+++ b/tools/testing/selftests/ublk/test_stress_04.sh
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="stress_04"
ERR_CODE=0
ublk_io_and_kill_daemon()
@@ -31,21 +30,23 @@ _create_backfile 2 128M
ublk_io_and_kill_daemon 8G -t null -q 4 -z --no_ublk_fixed_fd &
ublk_io_and_kill_daemon 256M -t loop -q 4 -z --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" &
ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
if _have_feature "AUTO_BUF_REG"; then
ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc &
ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --no_ublk_fixed_fd "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback &
+ wait
fi
if _have_feature "PER_IO_DAEMON"; then
- ublk_io_and_kill_daemon 8G -t null -q 4 --nthreads 8 --per_io_tasks &
- ublk_io_and_kill_daemon 256M -t loop -q 4 --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" &
- ublk_io_and_kill_daemon 256M -t stripe -q 4 --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
- ublk_io_and_kill_daemon 8G -t null -q 4 --nthreads 8 --per_io_tasks &
+ ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc --nthreads 8 --per_io_tasks &
+ ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" &
+ ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks &
+ wait
fi
-wait
_cleanup_test "stress"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh
index 274295061042..7e9324de2030 100755
--- a/tools/testing/selftests/ublk/test_stress_05.sh
+++ b/tools/testing/selftests/ublk/test_stress_05.sh
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="stress_05"
ERR_CODE=0
if ! _have_program fio; then
@@ -58,17 +57,17 @@ done
if _have_feature "ZERO_COPY"; then
for reissue in $(seq 0 1); do
- ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" &
- ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
+ ublk_io_and_remove 8G -t null -q 4 -z -r 1 -i "$reissue" &
+ ublk_io_and_remove 256M -t loop -q 4 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
wait
done
fi
if _have_feature "AUTO_BUF_REG"; then
for reissue in $(seq 0 1); do
- ublk_io_and_remove 8G -t null -q 4 -g --auto_zc -r 1 -i "$reissue" &
- ublk_io_and_remove 256M -t loop -q 4 -g --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
- ublk_io_and_remove 8G -t null -q 4 -g -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" &
+ ublk_io_and_remove 8G -t null -q 4 --auto_zc -r 1 -i "$reissue" &
+ ublk_io_and_remove 256M -t loop -q 4 --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
+ ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" &
wait
done
fi
diff --git a/tools/testing/selftests/ublk/test_stress_06.sh b/tools/testing/selftests/ublk/test_stress_06.sh
new file mode 100755
index 000000000000..c72e5d0b14be
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_06.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+ERR_CODE=0
+
+ublk_io_and_remove()
+{
+ run_io_and_remove "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stress" "run IO and remove device (user copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_io_and_remove 8G -t null -q 4 -u &
+ublk_io_and_remove 256M -t loop -q 4 -u "${UBLK_BACKFILES[0]}" &
+ublk_io_and_remove 256M -t stripe -q 4 -u "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_io_and_remove 8G -t null -q 4 -u --nthreads 8 --per_io_tasks &
+ublk_io_and_remove 256M -t loop -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" &
+ublk_io_and_remove 256M -t stripe -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_07.sh b/tools/testing/selftests/ublk/test_stress_07.sh
new file mode 100755
index 000000000000..04c2764d5238
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_07.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+ERR_CODE=0
+
+ublk_io_and_kill_daemon()
+{
+ run_io_and_kill_daemon "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stress" "run IO and kill ublk server (user copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_io_and_kill_daemon 8G -t null -q 4 -u --no_ublk_fixed_fd &
+ublk_io_and_kill_daemon 256M -t loop -q 4 -u --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" &
+ublk_io_and_kill_daemon 256M -t stripe -q 4 -u "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_io_and_kill_daemon 8G -t null -q 4 -u --nthreads 8 --per_io_tasks &
+ublk_io_and_kill_daemon 256M -t loop -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" &
+ublk_io_and_kill_daemon 256M -t stripe -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_08.sh b/tools/testing/selftests/ublk/test_stress_08.sh
new file mode 100755
index 000000000000..37f7d204879a
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_08.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+ERR_CODE=0
+
+ublk_io_and_remove()
+{
+ run_io_and_remove "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_feature "ZERO_COPY"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+if ! _have_feature "AUTO_BUF_REG"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+if ! _have_feature "BATCH_IO"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stress" "run IO and remove device(zero copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_io_and_remove 8G -t null -q 4 -b &
+ublk_io_and_remove 256M -t loop -q 4 --auto_zc -b "${UBLK_BACKFILES[0]}" &
+ublk_io_and_remove 256M -t stripe -q 4 --auto_zc -b "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback -b &
+wait
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_09.sh b/tools/testing/selftests/ublk/test_stress_09.sh
new file mode 100755
index 000000000000..53c1e3b2ab30
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_09.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+ERR_CODE=0
+
+ublk_io_and_kill_daemon()
+{
+ run_io_and_kill_daemon "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+if ! _have_feature "ZERO_COPY"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+if ! _have_feature "AUTO_BUF_REG"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+if ! _have_feature "BATCH_IO"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stress" "run IO and kill ublk server(zero copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_io_and_kill_daemon 8G -t null -q 4 -z -b &
+ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc -b "${UBLK_BACKFILES[0]}" &
+ublk_io_and_kill_daemon 256M -t stripe -q 4 -b "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback -b &
+wait
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh
index 4e4f0fdf3c9b..3bc821aadad8 100755
--- a/tools/testing/selftests/ublk/test_stripe_01.sh
+++ b/tools/testing/selftests/ublk/test_stripe_01.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="stripe_01"
ERR_CODE=0
if ! _have_program fio; then
diff --git a/tools/testing/selftests/ublk/test_stripe_02.sh b/tools/testing/selftests/ublk/test_stripe_02.sh
index 5820ab2efba4..4a7d2b21a6bf 100755
--- a/tools/testing/selftests/ublk/test_stripe_02.sh
+++ b/tools/testing/selftests/ublk/test_stripe_02.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="stripe_02"
ERR_CODE=0
_prep_test "stripe" "mkfs & mount & umount"
diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh
index 20b977e27814..a1c159d54e53 100755
--- a/tools/testing/selftests/ublk/test_stripe_03.sh
+++ b/tools/testing/selftests/ublk/test_stripe_03.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="stripe_03"
ERR_CODE=0
if ! _have_program fio; then
diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh
index 1b51ed2f1d84..0c30bd6c2b3b 100755
--- a/tools/testing/selftests/ublk/test_stripe_04.sh
+++ b/tools/testing/selftests/ublk/test_stripe_04.sh
@@ -3,7 +3,6 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
-TID="stripe_04"
ERR_CODE=0
_prep_test "stripe" "mkfs & mount & umount on zero copy"
diff --git a/tools/testing/selftests/ublk/test_stripe_05.sh b/tools/testing/selftests/ublk/test_stripe_05.sh
new file mode 100755
index 000000000000..6ddfa88ad226
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stripe_05.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stripe" "write and verify test on user copy"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t stripe -q 2 -u "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
+
+# run fio over the ublk disk
+_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
+ERR_CODE=$?
+
+_cleanup_test "stripe"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_06.sh b/tools/testing/selftests/ublk/test_stripe_06.sh
new file mode 100755
index 000000000000..a2c7bf4cc613
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stripe_06.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+_prep_test "stripe" "mkfs & mount & umount on user copy"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t stripe -u -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
+
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "stripe"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt
index 272ac54c9d5f..9d36ba35468f 100644
--- a/tools/testing/selftests/ublk/trace/seq_io.bt
+++ b/tools/testing/selftests/ublk/trace/seq_io.bt
@@ -2,24 +2,52 @@
$1: dev_t
$2: RWBS
$3: strlen($2)
+
+ Track request order between block_io_start and block_rq_complete.
+ Sequence starts at 1 so 0 means "never seen". On first valid
+ completion, sync complete_seq to handle probe attachment races.
+ block_rq_complete listed first to reduce missed completion window.
*/
+
BEGIN {
- @last_rw[$1, str($2)] = 0;
+ @start_seq = (uint64)1;
+ @complete_seq = (uint64)0;
+ @out_of_order = (uint64)0;
+ @start_order[0] = (uint64)0;
+ delete(@start_order[0]);
+ printf("BPFTRACE_READY\n");
}
+
tracepoint:block:block_rq_complete
+/(int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)/
{
- $dev = $1;
- if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
- $last = @last_rw[$dev, str($2)];
- if ((uint64)args.sector != $last) {
- printf("io_out_of_order: exp %llu actual %llu\n",
- args.sector, $last);
+ $expected = @start_order[args.sector];
+ if ($expected > 0) {
+ if (@complete_seq == 0) {
+ @complete_seq = $expected;
+ }
+ if ($expected != @complete_seq) {
+ printf("out_of_order: sector %llu started at seq %llu but completed at seq %llu\n",
+ args.sector, $expected, @complete_seq);
+ @out_of_order = @out_of_order + 1;
}
- @last_rw[$dev, str($2)] = (args.sector + args.nr_sector);
+ delete(@start_order[args.sector]);
+ @complete_seq = @complete_seq + 1;
}
- @ios = count();
+}
+
+tracepoint:block:block_io_start
+/(int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)/
+{
+ @start_order[args.sector] = @start_seq;
+ @start_seq = @start_seq + 1;
}
END {
- clear(@last_rw);
+ printf("total_start: %llu total_complete: %llu out_of_order: %llu\n",
+ @start_seq - 1, @complete_seq, @out_of_order);
+ clear(@start_order);
+ clear(@start_seq);
+ clear(@complete_seq);
+ clear(@out_of_order);
}
diff --git a/tools/testing/selftests/ublk/utils.h b/tools/testing/selftests/ublk/utils.h
index a852e0b7153e..aab522f26167 100644
--- a/tools/testing/selftests/ublk/utils.h
+++ b/tools/testing/selftests/ublk/utils.h
@@ -21,6 +21,60 @@
#define round_up(val, rnd) \
(((val) + ((rnd) - 1)) & ~((rnd) - 1))
+/* small sized & per-thread allocator */
+struct allocator {
+ unsigned int size;
+ cpu_set_t *set;
+};
+
+static inline int allocator_init(struct allocator *a, unsigned size)
+{
+ a->set = CPU_ALLOC(size);
+ a->size = size;
+
+ if (a->set)
+ return 0;
+ return -ENOMEM;
+}
+
+static inline void allocator_deinit(struct allocator *a)
+{
+ CPU_FREE(a->set);
+ a->set = NULL;
+ a->size = 0;
+}
+
+static inline int allocator_get(struct allocator *a)
+{
+ int i;
+
+ for (i = 0; i < a->size; i += 1) {
+ size_t set_size = CPU_ALLOC_SIZE(a->size);
+
+ if (!CPU_ISSET_S(i, set_size, a->set)) {
+ CPU_SET_S(i, set_size, a->set);
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+static inline void allocator_put(struct allocator *a, int i)
+{
+ size_t set_size = CPU_ALLOC_SIZE(a->size);
+
+ if (i >= 0 && i < a->size)
+ CPU_CLR_S(i, set_size, a->set);
+}
+
+static inline int allocator_get_val(struct allocator *a, int i)
+{
+ size_t set_size = CPU_ALLOC_SIZE(a->size);
+
+ return CPU_ISSET_S(i, set_size, a->set);
+}
+
static inline unsigned int ilog2(unsigned int x)
{
if (x == 0)
@@ -43,6 +97,7 @@ static inline void ublk_err(const char *fmt, ...)
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
+ va_end(ap);
}
static inline void ublk_log(const char *fmt, ...)
@@ -52,6 +107,7 @@ static inline void ublk_log(const char *fmt, ...)
va_start(ap, fmt);
vfprintf(stdout, fmt, ap);
+ va_end(ap);
}
}
@@ -62,7 +118,15 @@ static inline void ublk_dbg(int level, const char *fmt, ...)
va_start(ap, fmt);
vfprintf(stdout, fmt, ap);
+ va_end(ap);
}
}
+#define ublk_assert(x) do { \
+ if (!(x)) { \
+ ublk_err("%s %d: assert!\n", __func__, __LINE__); \
+ assert(x); \
+ } \
+} while (0)
+
#endif
diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
index 50c261005111..5da223731b81 100644
--- a/tools/testing/selftests/vDSO/vdso_config.h
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -66,7 +66,7 @@ static const char *versions[7] = {
};
__attribute__((unused))
-static const char *names[2][7] = {
+static const char *names[2][8] = {
{
"__kernel_gettimeofday",
"__kernel_clock_gettime",
@@ -75,6 +75,7 @@ static const char *names[2][7] = {
"__kernel_getcpu",
"__kernel_clock_gettime64",
"__kernel_getrandom",
+ "__kernel_clock_getres_time64",
},
{
"__vdso_gettimeofday",
@@ -84,6 +85,7 @@ static const char *names[2][7] = {
"__vdso_getcpu",
"__vdso_clock_gettime64",
"__vdso_getrandom",
+ "__vdso_clock_getres_time64",
},
};
diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c
index c620317eaeea..b162a4ba9c4f 100644
--- a/tools/testing/selftests/vDSO/vdso_test_abi.c
+++ b/tools/testing/selftests/vDSO/vdso_test_abi.c
@@ -36,6 +36,7 @@ typedef long (*vdso_gettimeofday_t)(struct timeval *tv, struct timezone *tz);
typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts);
typedef long (*vdso_clock_gettime64_t)(clockid_t clk_id, struct vdso_timespec64 *ts);
typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts);
+typedef long (*vdso_clock_getres_time64_t)(clockid_t clk_id, struct vdso_timespec64 *ts);
typedef time_t (*vdso_time_t)(time_t *t);
static const char * const vdso_clock_name[] = {
@@ -179,7 +180,7 @@ static void vdso_test_clock_getres(clockid_t clk_id)
clock_getres_fail++;
}
- ret = syscall(SYS_clock_getres, clk_id, &sys_ts);
+ ret = syscall(__NR_clock_getres, clk_id, &sys_ts);
ksft_print_msg("The syscall resolution is %lld %lld\n",
(long long)sys_ts.tv_sec, (long long)sys_ts.tv_nsec);
@@ -196,6 +197,55 @@ static void vdso_test_clock_getres(clockid_t clk_id)
}
}
+#ifdef __NR_clock_getres_time64
+static void vdso_test_clock_getres_time64(clockid_t clk_id)
+{
+ int clock_getres_fail = 0;
+
+ /* Find clock_getres. */
+ vdso_clock_getres_time64_t vdso_clock_getres_time64 =
+ (vdso_clock_getres_time64_t)vdso_sym(version, name[7]);
+
+ if (!vdso_clock_getres_time64) {
+ ksft_print_msg("Couldn't find %s\n", name[7]);
+ ksft_test_result_skip("%s %s\n", name[7],
+ vdso_clock_name[clk_id]);
+ return;
+ }
+
+ struct vdso_timespec64 ts, sys_ts;
+ long ret = VDSO_CALL(vdso_clock_getres_time64, 2, clk_id, &ts);
+
+ if (ret == 0) {
+ ksft_print_msg("The vdso resolution is %lld %lld\n",
+ (long long)ts.tv_sec, (long long)ts.tv_nsec);
+ } else {
+ clock_getres_fail++;
+ }
+
+ ret = syscall(__NR_clock_getres_time64, clk_id, &sys_ts);
+
+ ksft_print_msg("The syscall resolution is %lld %lld\n",
+ (long long)sys_ts.tv_sec, (long long)sys_ts.tv_nsec);
+
+ if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec))
+ clock_getres_fail++;
+
+ if (clock_getres_fail > 0) {
+ ksft_test_result_fail("%s %s\n", name[7],
+ vdso_clock_name[clk_id]);
+ } else {
+ ksft_test_result_pass("%s %s\n", name[7],
+ vdso_clock_name[clk_id]);
+ }
+}
+#else /* !__NR_clock_getres_time64 */
+static void vdso_test_clock_getres_time64(clockid_t clk_id)
+{
+ ksft_test_result_skip("%s %s\n", name[7], vdso_clock_name[clk_id]);
+}
+#endif /* __NR_clock_getres_time64 */
+
/*
* This function calls vdso_test_clock_gettime and vdso_test_clock_getres
* with different values for clock_id.
@@ -208,9 +258,10 @@ static inline void vdso_test_clock(clockid_t clock_id)
vdso_test_clock_gettime64(clock_id);
vdso_test_clock_getres(clock_id);
+ vdso_test_clock_getres_time64(clock_id);
}
-#define VDSO_TEST_PLAN 29
+#define VDSO_TEST_PLAN 38
int main(int argc, char **argv)
{
diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
index bea8ad54da11..3fe49cbdae98 100644
--- a/tools/testing/selftests/vDSO/vdso_test_getcpu.c
+++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
@@ -16,9 +16,7 @@
#include "vdso_config.h"
#include "vdso_call.h"
-struct getcpu_cache;
-typedef long (*getcpu_t)(unsigned int *, unsigned int *,
- struct getcpu_cache *);
+typedef long (*getcpu_t)(unsigned int *, unsigned int *, void *);
int main(int argc, char **argv)
{
diff --git a/tools/testing/selftests/vDSO/vgetrandom-chacha.S b/tools/testing/selftests/vDSO/vgetrandom-chacha.S
index a4a82e1c28a9..16f985b089d4 100644
--- a/tools/testing/selftests/vDSO/vgetrandom-chacha.S
+++ b/tools/testing/selftests/vDSO/vgetrandom-chacha.S
@@ -14,7 +14,7 @@
#elif defined(__riscv) && __riscv_xlen == 64
#include "../../../../arch/riscv/kernel/vdso/vgetrandom-chacha.S"
#elif defined(__s390x__)
-#include "../../../../arch/s390/kernel/vdso64/vgetrandom-chacha.S"
+#include "../../../../arch/s390/kernel/vdso/vgetrandom-chacha.S"
#elif defined(__x86_64__)
-#include "../../../../arch/x86/entry/vdso/vgetrandom-chacha.S"
+#include "../../../../arch/x86/entry/vdso/vdso64/vgetrandom-chacha.S"
#endif
diff --git a/tools/testing/selftests/vfio/Makefile b/tools/testing/selftests/vfio/Makefile
index 3c796ca99a50..8e90e409e91d 100644
--- a/tools/testing/selftests/vfio/Makefile
+++ b/tools/testing/selftests/vfio/Makefile
@@ -1,5 +1,13 @@
+ARCH ?= $(shell uname -m)
+
+ifeq (,$(filter $(ARCH),arm64 x86_64))
+# Do nothing on unsupported architectures
+include ../lib.mk
+else
+
CFLAGS = $(KHDR_INCLUDES)
TEST_GEN_PROGS += vfio_dma_mapping_test
+TEST_GEN_PROGS += vfio_dma_mapping_mmio_test
TEST_GEN_PROGS += vfio_iommufd_setup_test
TEST_GEN_PROGS += vfio_pci_device_test
TEST_GEN_PROGS += vfio_pci_device_init_perf_test
@@ -27,3 +35,5 @@ TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_PROGS_O) $(LIBVFIO_O))
-include $(TEST_DEP_FILES)
EXTRA_CLEAN += $(TEST_GEN_PROGS_O) $(TEST_DEP_FILES)
+
+endif
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio.h b/tools/testing/selftests/vfio/lib/include/libvfio.h
index 279ddcd70194..1b6da54cc2cb 100644
--- a/tools/testing/selftests/vfio/lib/include/libvfio.h
+++ b/tools/testing/selftests/vfio/lib/include/libvfio.h
@@ -23,4 +23,13 @@
const char *vfio_selftests_get_bdf(int *argc, char *argv[]);
char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs);
+/*
+ * Reserve virtual address space of size at an address satisfying
+ * (vaddr % align) == offset.
+ *
+ * Returns the reserved vaddr. The caller is responsible for unmapping
+ * the returned region.
+ */
+void *mmap_reserve(size_t size, size_t align, size_t offset);
+
#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
index 5c9b9dc6d993..e9a3386a4719 100644
--- a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
@@ -61,6 +61,12 @@ iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr);
struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges);
+#define MODE_VFIO_TYPE1_IOMMU "vfio_type1_iommu"
+#define MODE_VFIO_TYPE1V2_IOMMU "vfio_type1v2_iommu"
+#define MODE_IOMMUFD_COMPAT_TYPE1 "iommufd_compat_type1"
+#define MODE_IOMMUFD_COMPAT_TYPE1V2 "iommufd_compat_type1v2"
+#define MODE_IOMMUFD "iommufd"
+
/*
* Generator for VFIO selftests fixture variants that replicate across all
* possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE()
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h b/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h
index 8f1d994e9ea2..c7c0796a757f 100644
--- a/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h
@@ -2,7 +2,6 @@
#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOVA_ALLOCATOR_H
#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOVA_ALLOCATOR_H
-#include <uapi/linux/types.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/iommufd.h>
diff --git a/tools/testing/selftests/vfio/lib/iommu.c b/tools/testing/selftests/vfio/lib/iommu.c
index 8079d43523f3..035dac069d60 100644
--- a/tools/testing/selftests/vfio/lib/iommu.c
+++ b/tools/testing/selftests/vfio/lib/iommu.c
@@ -11,7 +11,6 @@
#include <sys/ioctl.h>
#include <sys/mman.h>
-#include <uapi/linux/types.h>
#include <linux/limits.h>
#include <linux/mman.h>
#include <linux/types.h>
@@ -21,32 +20,32 @@
#include "../../../kselftest.h"
#include <libvfio.h>
-const char *default_iommu_mode = "iommufd";
+const char *default_iommu_mode = MODE_IOMMUFD;
/* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */
static const struct iommu_mode iommu_modes[] = {
{
- .name = "vfio_type1_iommu",
+ .name = MODE_VFIO_TYPE1_IOMMU,
.container_path = "/dev/vfio/vfio",
.iommu_type = VFIO_TYPE1_IOMMU,
},
{
- .name = "vfio_type1v2_iommu",
+ .name = MODE_VFIO_TYPE1V2_IOMMU,
.container_path = "/dev/vfio/vfio",
.iommu_type = VFIO_TYPE1v2_IOMMU,
},
{
- .name = "iommufd_compat_type1",
+ .name = MODE_IOMMUFD_COMPAT_TYPE1,
.container_path = "/dev/iommu",
.iommu_type = VFIO_TYPE1_IOMMU,
},
{
- .name = "iommufd_compat_type1v2",
+ .name = MODE_IOMMUFD_COMPAT_TYPE1V2,
.container_path = "/dev/iommu",
.iommu_type = VFIO_TYPE1v2_IOMMU,
},
{
- .name = "iommufd",
+ .name = MODE_IOMMUFD,
},
};
diff --git a/tools/testing/selftests/vfio/lib/iova_allocator.c b/tools/testing/selftests/vfio/lib/iova_allocator.c
index a12b0a51e9e6..8c1cc86b70cd 100644
--- a/tools/testing/selftests/vfio/lib/iova_allocator.c
+++ b/tools/testing/selftests/vfio/lib/iova_allocator.c
@@ -11,7 +11,6 @@
#include <sys/ioctl.h>
#include <sys/mman.h>
-#include <uapi/linux/types.h>
#include <linux/iommufd.h>
#include <linux/limits.h>
#include <linux/mman.h>
diff --git a/tools/testing/selftests/vfio/lib/libvfio.c b/tools/testing/selftests/vfio/lib/libvfio.c
index a23a3cc5be69..3a3d1ed635c1 100644
--- a/tools/testing/selftests/vfio/lib/libvfio.c
+++ b/tools/testing/selftests/vfio/lib/libvfio.c
@@ -2,6 +2,9 @@
#include <stdio.h>
#include <stdlib.h>
+#include <sys/mman.h>
+
+#include <linux/align.h>
#include "../../../kselftest.h"
#include <libvfio.h>
@@ -76,3 +79,25 @@ const char *vfio_selftests_get_bdf(int *argc, char *argv[])
return vfio_selftests_get_bdfs(argc, argv, &nr_bdfs)[0];
}
+
+void *mmap_reserve(size_t size, size_t align, size_t offset)
+{
+ void *map_base, *map_align;
+ size_t delta;
+
+ VFIO_ASSERT_GT(align, offset);
+ delta = align - offset;
+
+ map_base = mmap(NULL, size + align, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ VFIO_ASSERT_NE(map_base, MAP_FAILED);
+
+ map_align = (void *)(ALIGN((uintptr_t)map_base + delta, align) - delta);
+
+ if (map_align > map_base)
+ VFIO_ASSERT_EQ(munmap(map_base, map_align - map_base), 0);
+
+ VFIO_ASSERT_EQ(munmap(map_align + size, map_base + align - map_align), 0);
+
+ return map_align;
+}
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
index 8e34b9bfc96b..4e5871f1ebc3 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -11,11 +11,14 @@
#include <sys/ioctl.h>
#include <sys/mman.h>
-#include <uapi/linux/types.h>
+#include <linux/align.h>
#include <linux/iommufd.h>
+#include <linux/kernel.h>
#include <linux/limits.h>
+#include <linux/log2.h>
#include <linux/mman.h>
#include <linux/overflow.h>
+#include <linux/sizes.h>
#include <linux/types.h>
#include <linux/vfio.h>
@@ -124,20 +127,38 @@ static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
static void vfio_pci_bar_map(struct vfio_pci_device *device, int index)
{
struct vfio_pci_bar *bar = &device->bars[index];
+ size_t align, size;
int prot = 0;
+ void *vaddr;
VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
VFIO_ASSERT_NULL(bar->vaddr);
VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP);
+ VFIO_ASSERT_TRUE(is_power_of_2(bar->info.size));
if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ)
prot |= PROT_READ;
if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
prot |= PROT_WRITE;
- bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED,
+ size = bar->info.size;
+
+ /*
+ * Align BAR mmaps to improve page fault granularity during potential
+ * subsequent IOMMU mapping of these BAR vaddr. 1G for x86 is the
+ * largest hugepage size across any architecture, so no benefit from
+ * larger alignment. BARs smaller than 1G will be aligned by their
+ * power-of-two size, guaranteeing sufficient alignment for smaller
+ * hugepages, if present.
+ */
+ align = min_t(size_t, size, SZ_1G);
+
+ vaddr = mmap_reserve(size, align, 0);
+ bar->vaddr = mmap(vaddr, size, prot, MAP_SHARED | MAP_FIXED,
device->fd, bar->info.offset);
VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED);
+
+ madvise(bar->vaddr, size, MADV_HUGEPAGE);
}
static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_mmio_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_mmio_test.c
new file mode 100644
index 000000000000..957a89ce7b3a
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_mmio_test.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <uapi/linux/types.h>
+#include <linux/pci_regs.h>
+#include <linux/sizes.h>
+#include <linux/vfio.h>
+
+#include <libvfio.h>
+
+#include "../kselftest_harness.h"
+
+static const char *device_bdf;
+
+static struct vfio_pci_bar *largest_mapped_bar(struct vfio_pci_device *device)
+{
+ u32 flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE;
+ struct vfio_pci_bar *largest = NULL;
+ u64 bar_size = 0;
+
+ for (int i = 0; i < PCI_STD_NUM_BARS; i++) {
+ struct vfio_pci_bar *bar = &device->bars[i];
+
+ if (!bar->vaddr)
+ continue;
+
+ /*
+ * iommu_map() maps with READ|WRITE, so require the same
+ * abilities for the underlying VFIO region.
+ */
+ if ((bar->info.flags & flags) != flags)
+ continue;
+
+ if (bar->info.size > bar_size) {
+ bar_size = bar->info.size;
+ largest = bar;
+ }
+ }
+
+ return largest;
+}
+
+FIXTURE(vfio_dma_mapping_mmio_test) {
+ struct iommu *iommu;
+ struct vfio_pci_device *device;
+ struct iova_allocator *iova_allocator;
+ struct vfio_pci_bar *bar;
+};
+
+FIXTURE_VARIANT(vfio_dma_mapping_mmio_test) {
+ const char *iommu_mode;
+};
+
+#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \
+FIXTURE_VARIANT_ADD(vfio_dma_mapping_mmio_test, _iommu_mode) { \
+ .iommu_mode = #_iommu_mode, \
+}
+
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES();
+
+#undef FIXTURE_VARIANT_ADD_IOMMU_MODE
+
+FIXTURE_SETUP(vfio_dma_mapping_mmio_test)
+{
+ self->iommu = iommu_init(variant->iommu_mode);
+ self->device = vfio_pci_device_init(device_bdf, self->iommu);
+ self->iova_allocator = iova_allocator_init(self->iommu);
+ self->bar = largest_mapped_bar(self->device);
+
+ if (!self->bar)
+ SKIP(return, "No mappable BAR found on device %s", device_bdf);
+}
+
+FIXTURE_TEARDOWN(vfio_dma_mapping_mmio_test)
+{
+ iova_allocator_cleanup(self->iova_allocator);
+ vfio_pci_device_cleanup(self->device);
+ iommu_cleanup(self->iommu);
+}
+
+static void do_mmio_map_test(struct iommu *iommu,
+ struct iova_allocator *iova_allocator,
+ void *vaddr, size_t size)
+{
+ struct dma_region region = {
+ .vaddr = vaddr,
+ .size = size,
+ .iova = iova_allocator_alloc(iova_allocator, size),
+ };
+
+ /*
+ * NOTE: Check for iommufd compat success once it lands. Native iommufd
+ * will never support this.
+ */
+ if (!strcmp(iommu->mode->name, MODE_VFIO_TYPE1V2_IOMMU) ||
+ !strcmp(iommu->mode->name, MODE_VFIO_TYPE1_IOMMU)) {
+ iommu_map(iommu, &region);
+ iommu_unmap(iommu, &region);
+ } else {
+ VFIO_ASSERT_NE(__iommu_map(iommu, &region), 0);
+ VFIO_ASSERT_NE(__iommu_unmap(iommu, &region, NULL), 0);
+ }
+}
+
+TEST_F(vfio_dma_mapping_mmio_test, map_full_bar)
+{
+ do_mmio_map_test(self->iommu, self->iova_allocator,
+ self->bar->vaddr, self->bar->info.size);
+}
+
+TEST_F(vfio_dma_mapping_mmio_test, map_partial_bar)
+{
+ if (self->bar->info.size < 2 * getpagesize())
+ SKIP(return, "BAR too small (size=0x%llx)", self->bar->info.size);
+
+ do_mmio_map_test(self->iommu, self->iova_allocator,
+ self->bar->vaddr, getpagesize());
+}
+
+/* Test IOMMU mapping of BAR mmap with intentionally poor vaddr alignment. */
+TEST_F(vfio_dma_mapping_mmio_test, map_bar_misaligned)
+{
+ /* Limit size to bound test time for large BARs */
+ size_t size = min_t(size_t, self->bar->info.size, SZ_1G);
+ void *vaddr;
+
+ vaddr = mmap_reserve(size, SZ_1G, getpagesize());
+ vaddr = mmap(vaddr, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED,
+ self->device->fd, self->bar->info.offset);
+ VFIO_ASSERT_NE(vaddr, MAP_FAILED);
+
+ do_mmio_map_test(self->iommu, self->iova_allocator, vaddr, size);
+
+ VFIO_ASSERT_EQ(munmap(vaddr, size), 0);
+}
+
+int main(int argc, char *argv[])
+{
+ device_bdf = vfio_selftests_get_bdf(&argc, argv);
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
index 16eba2ecca47..abb170bdcef7 100644
--- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
@@ -3,7 +3,6 @@
#include <sys/mman.h>
#include <unistd.h>
-#include <uapi/linux/types.h>
#include <linux/iommufd.h>
#include <linux/limits.h>
#include <linux/mman.h>
@@ -162,12 +161,8 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap)
if (rc == -EOPNOTSUPP)
goto unmap;
- /*
- * IOMMUFD compatibility-mode does not support huge mappings when
- * using VFIO_TYPE1_IOMMU.
- */
- if (!strcmp(variant->iommu_mode, "iommufd_compat_type1"))
- mapping_size = SZ_4K;
+ if (self->iommu->mode->iommu_type == VFIO_TYPE1_IOMMU)
+ goto unmap;
ASSERT_EQ(0, rc);
printf("Found IOMMU mappings for IOVA 0x%lx:\n", region.iova);
diff --git a/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
index 17017ed3beac..ec1e5633e080 100644
--- a/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
+++ b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#include <uapi/linux/types.h>
#include <linux/limits.h>
#include <linux/sizes.h>
#include <linux/vfio.h>
diff --git a/tools/testing/selftests/vsock/settings b/tools/testing/selftests/vsock/settings
index 694d70710ff0..79b65bdf05db 100644
--- a/tools/testing/selftests/vsock/settings
+++ b/tools/testing/selftests/vsock/settings
@@ -1 +1 @@
-timeout=300
+timeout=1200
diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh
index c7b270dd77a9..dc8dbe74a6d0 100755
--- a/tools/testing/selftests/vsock/vmtest.sh
+++ b/tools/testing/selftests/vsock/vmtest.sh
@@ -7,6 +7,7 @@
# * virtme-ng
# * busybox-static (used by virtme-ng)
# * qemu (used by virtme-ng)
+# * socat
#
# shellcheck disable=SC2317,SC2119
@@ -41,14 +42,119 @@ readonly KERNEL_CMDLINE="\
virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \
"
readonly LOG=$(mktemp /tmp/vsock_vmtest_XXXX.log)
-readonly TEST_NAMES=(vm_server_host_client vm_client_host_server vm_loopback)
+
+# Namespace tests must use the ns_ prefix. This is checked in check_netns() and
+# is used to determine if a test needs namespace setup before test execution.
+readonly TEST_NAMES=(
+ vm_server_host_client
+ vm_client_host_server
+ vm_loopback
+ ns_host_vsock_ns_mode_ok
+ ns_host_vsock_child_ns_mode_ok
+ ns_global_same_cid_fails
+ ns_local_same_cid_ok
+ ns_global_local_same_cid_ok
+ ns_local_global_same_cid_ok
+ ns_diff_global_host_connect_to_global_vm_ok
+ ns_diff_global_host_connect_to_local_vm_fails
+ ns_diff_global_vm_connect_to_global_host_ok
+ ns_diff_global_vm_connect_to_local_host_fails
+ ns_diff_local_host_connect_to_local_vm_fails
+ ns_diff_local_vm_connect_to_local_host_fails
+ ns_diff_global_to_local_loopback_local_fails
+ ns_diff_local_to_global_loopback_fails
+ ns_diff_local_to_local_loopback_fails
+ ns_diff_global_to_global_loopback_ok
+ ns_same_local_loopback_ok
+ ns_same_local_host_connect_to_local_vm_ok
+ ns_same_local_vm_connect_to_local_host_ok
+ ns_delete_vm_ok
+ ns_delete_host_ok
+ ns_delete_both_ok
+)
readonly TEST_DESCS=(
+ # vm_server_host_client
"Run vsock_test in server mode on the VM and in client mode on the host."
+
+ # vm_client_host_server
"Run vsock_test in client mode on the VM and in server mode on the host."
+
+ # vm_loopback
"Run vsock_test using the loopback transport in the VM."
+
+ # ns_host_vsock_ns_mode_ok
+ "Check /proc/sys/net/vsock/ns_mode strings on the host."
+
+ # ns_host_vsock_child_ns_mode_ok
+ "Check /proc/sys/net/vsock/ns_mode is read-only and child_ns_mode is writable."
+
+ # ns_global_same_cid_fails
+ "Check QEMU fails to start two VMs with same CID in two different global namespaces."
+
+ # ns_local_same_cid_ok
+ "Check QEMU successfully starts two VMs with same CID in two different local namespaces."
+
+ # ns_global_local_same_cid_ok
+ "Check QEMU successfully starts one VM in a global ns and then another VM in a local ns with the same CID."
+
+ # ns_local_global_same_cid_ok
+ "Check QEMU successfully starts one VM in a local ns and then another VM in a global ns with the same CID."
+
+ # ns_diff_global_host_connect_to_global_vm_ok
+ "Run vsock_test client in global ns with server in VM in another global ns."
+
+ # ns_diff_global_host_connect_to_local_vm_fails
+ "Run socat to test a process in a global ns fails to connect to a VM in a local ns."
+
+ # ns_diff_global_vm_connect_to_global_host_ok
+ "Run vsock_test client in VM in a global ns with server in another global ns."
+
+ # ns_diff_global_vm_connect_to_local_host_fails
+ "Run socat to test a VM in a global ns fails to connect to a host process in a local ns."
+
+ # ns_diff_local_host_connect_to_local_vm_fails
+ "Run socat to test a host process in a local ns fails to connect to a VM in another local ns."
+
+ # ns_diff_local_vm_connect_to_local_host_fails
+ "Run socat to test a VM in a local ns fails to connect to a host process in another local ns."
+
+ # ns_diff_global_to_local_loopback_local_fails
+ "Run socat to test a loopback vsock in a global ns fails to connect to a vsock in a local ns."
+
+ # ns_diff_local_to_global_loopback_fails
+ "Run socat to test a loopback vsock in a local ns fails to connect to a vsock in a global ns."
+
+ # ns_diff_local_to_local_loopback_fails
+ "Run socat to test a loopback vsock in a local ns fails to connect to a vsock in another local ns."
+
+ # ns_diff_global_to_global_loopback_ok
+ "Run socat to test a loopback vsock in a global ns successfully connects to a vsock in another global ns."
+
+ # ns_same_local_loopback_ok
+ "Run socat to test a loopback vsock in a local ns successfully connects to a vsock in the same ns."
+
+ # ns_same_local_host_connect_to_local_vm_ok
+ "Run vsock_test client in a local ns with server in VM in same ns."
+
+ # ns_same_local_vm_connect_to_local_host_ok
+ "Run vsock_test client in VM in a local ns with server in same ns."
+
+ # ns_delete_vm_ok
+ "Check that deleting the VM's namespace does not break the socket connection"
+
+ # ns_delete_host_ok
+ "Check that deleting the host's namespace does not break the socket connection"
+
+ # ns_delete_both_ok
+ "Check that deleting the VM and host's namespaces does not break the socket connection"
)
-readonly USE_SHARED_VM=(vm_server_host_client vm_client_host_server vm_loopback)
+readonly USE_SHARED_VM=(
+ vm_server_host_client
+ vm_client_host_server
+ vm_loopback
+)
+readonly NS_MODES=("local" "global")
VERBOSE=0
@@ -71,7 +177,7 @@ usage() {
for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do
name=${TEST_NAMES[${i}]}
desc=${TEST_DESCS[${i}]}
- printf "\t%-35s%-35s\n" "${name}" "${desc}"
+ printf "\t%-55s%-35s\n" "${name}" "${desc}"
done
echo
@@ -103,13 +209,55 @@ check_result() {
fi
}
+add_namespaces() {
+ local orig_mode
+ orig_mode=$(cat /proc/sys/net/vsock/child_ns_mode)
+
+ for mode in "${NS_MODES[@]}"; do
+ echo "${mode}" > /proc/sys/net/vsock/child_ns_mode
+ ip netns add "${mode}0" 2>/dev/null
+ ip netns add "${mode}1" 2>/dev/null
+ done
+
+ echo "${orig_mode}" > /proc/sys/net/vsock/child_ns_mode
+}
+
+init_namespaces() {
+ for mode in "${NS_MODES[@]}"; do
+ # we need lo for qemu port forwarding
+ ip netns exec "${mode}0" ip link set dev lo up
+ ip netns exec "${mode}1" ip link set dev lo up
+ done
+}
+
+del_namespaces() {
+ for mode in "${NS_MODES[@]}"; do
+ ip netns del "${mode}0" &>/dev/null
+ ip netns del "${mode}1" &>/dev/null
+ log_host "removed ns ${mode}0"
+ log_host "removed ns ${mode}1"
+ done
+}
+
vm_ssh() {
- ssh -q -o UserKnownHostsFile=/dev/null -p ${SSH_HOST_PORT} localhost "$@"
+ local ns_exec
+
+ if [[ "${1}" == init_ns ]]; then
+ ns_exec=""
+ else
+ ns_exec="ip netns exec ${1}"
+ fi
+
+ shift
+
+ ${ns_exec} ssh -q -o UserKnownHostsFile=/dev/null -p "${SSH_HOST_PORT}" localhost "$@"
+
return $?
}
cleanup() {
terminate_pidfiles "${!PIDFILES[@]}"
+ del_namespaces
}
check_args() {
@@ -139,7 +287,7 @@ check_args() {
}
check_deps() {
- for dep in vng ${QEMU} busybox pkill ssh; do
+ for dep in vng ${QEMU} busybox pkill ssh ss socat; do
if [[ ! -x $(command -v "${dep}") ]]; then
echo -e "skip: dependency ${dep} not found!\n"
exit "${KSFT_SKIP}"
@@ -153,6 +301,20 @@ check_deps() {
fi
}
+check_netns() {
+ local tname=$1
+
+ # If the test requires NS support, check if NS support exists
+ # using /proc/self/ns
+ if [[ "${tname}" =~ ^ns_ ]] &&
+ [[ ! -e /proc/self/ns ]]; then
+ log_host "No NS support detected for test ${tname}"
+ return 1
+ fi
+
+ return 0
+}
+
check_vng() {
local tested_versions
local version
@@ -176,6 +338,20 @@ check_vng() {
fi
}
+check_socat() {
+ local support_string
+
+ support_string="$(socat -V)"
+
+ if [[ "${support_string}" != *"WITH_VSOCK 1"* ]]; then
+ die "err: socat is missing vsock support"
+ fi
+
+ if [[ "${support_string}" != *"WITH_UNIX 1"* ]]; then
+ die "err: socat is missing unix support"
+ fi
+}
+
handle_build() {
if [[ ! "${BUILD}" -eq 1 ]]; then
return
@@ -224,12 +400,22 @@ terminate_pidfiles() {
done
}
+terminate_pids() {
+ local pid
+
+ for pid in "$@"; do
+ kill -SIGTERM "${pid}" &>/dev/null || :
+ done
+}
+
vm_start() {
local pidfile=$1
+ local ns=$2
local logfile=/dev/null
local verbose_opt=""
local kernel_opt=""
local qemu_opts=""
+ local ns_exec=""
local qemu
qemu=$(command -v "${QEMU}")
@@ -250,7 +436,11 @@ vm_start() {
kernel_opt="${KERNEL_CHECKOUT}"
fi
- vng \
+ if [[ "${ns}" != "init_ns" ]]; then
+ ns_exec="ip netns exec ${ns}"
+ fi
+
+ ${ns_exec} vng \
--run \
${kernel_opt} \
${verbose_opt} \
@@ -265,6 +455,7 @@ vm_start() {
}
vm_wait_for_ssh() {
+ local ns=$1
local i
i=0
@@ -272,7 +463,8 @@ vm_wait_for_ssh() {
if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then
die "Timed out waiting for guest ssh"
fi
- if vm_ssh -- true; then
+
+ if vm_ssh "${ns}" -- true; then
break
fi
i=$(( i + 1 ))
@@ -286,50 +478,107 @@ wait_for_listener()
local port=$1
local interval=$2
local max_intervals=$3
- local protocol=tcp
- local pattern
+ local protocol=$4
local i
- pattern=":$(printf "%04X" "${port}") "
-
- # for tcp protocol additionally check the socket state
- [ "${protocol}" = "tcp" ] && pattern="${pattern}0A"
-
for i in $(seq "${max_intervals}"); do
- if awk -v pattern="${pattern}" \
- 'BEGIN {rc=1} $2" "$4 ~ pattern {rc=0} END {exit rc}' \
- /proc/net/"${protocol}"*; then
+ case "${protocol}" in
+ tcp)
+ if ss --listening --tcp --numeric | grep -q ":${port} "; then
+ break
+ fi
+ ;;
+ vsock)
+ if ss --listening --vsock --numeric | grep -q ":${port} "; then
+ break
+ fi
+ ;;
+ unix)
+ # For unix sockets, port is actually the socket path
+ if ss --listening --unix | grep -q "${port}"; then
+ break
+ fi
+ ;;
+ *)
+ echo "Unknown protocol: ${protocol}" >&2
break
- fi
+ ;;
+ esac
sleep "${interval}"
done
}
vm_wait_for_listener() {
- local port=$1
+ local ns=$1
+ local port=$2
+ local protocol=$3
- vm_ssh <<EOF
+ vm_ssh "${ns}" <<EOF
$(declare -f wait_for_listener)
-wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX}
+wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX} ${protocol}
EOF
}
host_wait_for_listener() {
- local port=$1
+ local ns=$1
+ local port=$2
+ local protocol=$3
+
+ if [[ "${ns}" == "init_ns" ]]; then
+ wait_for_listener "${port}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}" "${protocol}"
+ else
+ ip netns exec "${ns}" bash <<-EOF
+ $(declare -f wait_for_listener)
+ wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX} ${protocol}
+ EOF
+ fi
+}
+
+vm_dmesg_oops_count() {
+ local ns=$1
+
+ vm_ssh "${ns}" -- dmesg 2>/dev/null | grep -c -i 'Oops'
+}
+
+vm_dmesg_warn_count() {
+ local ns=$1
+
+ vm_ssh "${ns}" -- dmesg --level=warn 2>/dev/null | grep -c -i 'vsock'
+}
+
+vm_dmesg_check() {
+ local pidfile=$1
+ local ns=$2
+ local oops_before=$3
+ local warn_before=$4
+ local oops_after warn_after
+
+ oops_after=$(vm_dmesg_oops_count "${ns}")
+ if [[ "${oops_after}" -gt "${oops_before}" ]]; then
+ echo "FAIL: kernel oops detected on vm in ns ${ns}" | log_host
+ return 1
+ fi
+
+ warn_after=$(vm_dmesg_warn_count "${ns}")
+ if [[ "${warn_after}" -gt "${warn_before}" ]]; then
+ echo "FAIL: kernel warning detected on vm in ns ${ns}" | log_host
+ return 1
+ fi
- wait_for_listener "${port}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}"
+ return 0
}
vm_vsock_test() {
- local host=$1
- local cid=$2
- local port=$3
+ local ns=$1
+ local host=$2
+ local cid=$3
+ local port=$4
local rc
# log output and use pipefail to respect vsock_test errors
set -o pipefail
if [[ "${host}" != server ]]; then
- vm_ssh -- "${VSOCK_TEST}" \
+ vm_ssh "${ns}" -- "${VSOCK_TEST}" \
--mode=client \
--control-host="${host}" \
--peer-cid="${cid}" \
@@ -337,7 +586,7 @@ vm_vsock_test() {
2>&1 | log_guest
rc=$?
else
- vm_ssh -- "${VSOCK_TEST}" \
+ vm_ssh "${ns}" -- "${VSOCK_TEST}" \
--mode=server \
--peer-cid="${cid}" \
--control-port="${port}" \
@@ -349,7 +598,7 @@ vm_vsock_test() {
return $rc
fi
- vm_wait_for_listener "${port}"
+ vm_wait_for_listener "${ns}" "${port}" "tcp"
rc=$?
fi
set +o pipefail
@@ -358,25 +607,35 @@ vm_vsock_test() {
}
host_vsock_test() {
- local host=$1
- local cid=$2
- local port=$3
+ local ns=$1
+ local host=$2
+ local cid=$3
+ local port=$4
+ shift 4
+ local extra_args=("$@")
local rc
+ local cmd="${VSOCK_TEST}"
+ if [[ "${ns}" != "init_ns" ]]; then
+ cmd="ip netns exec ${ns} ${cmd}"
+ fi
+
# log output and use pipefail to respect vsock_test errors
set -o pipefail
if [[ "${host}" != server ]]; then
- ${VSOCK_TEST} \
+ ${cmd} \
--mode=client \
--peer-cid="${cid}" \
--control-host="${host}" \
- --control-port="${port}" 2>&1 | log_host
+ --control-port="${port}" \
+ "${extra_args[@]}" 2>&1 | log_host
rc=$?
else
- ${VSOCK_TEST} \
+ ${cmd} \
--mode=server \
--peer-cid="${cid}" \
- --control-port="${port}" 2>&1 | log_host &
+ --control-port="${port}" \
+ "${extra_args[@]}" 2>&1 | log_host &
rc=$?
if [[ $rc -ne 0 ]]; then
@@ -384,7 +643,7 @@ host_vsock_test() {
return $rc
fi
- host_wait_for_listener "${port}"
+ host_wait_for_listener "${ns}" "${port}" "tcp"
rc=$?
fi
set +o pipefail
@@ -427,12 +686,584 @@ log_guest() {
LOG_PREFIX=guest log "$@"
}
+ns_get_mode() {
+ local ns=$1
+
+ ip netns exec "${ns}" cat /proc/sys/net/vsock/ns_mode 2>/dev/null
+}
+
+test_ns_host_vsock_ns_mode_ok() {
+ for mode in "${NS_MODES[@]}"; do
+ local actual
+
+ actual=$(ns_get_mode "${mode}0")
+ if [[ "${actual}" != "${mode}" ]]; then
+ log_host "expected mode ${mode}, got ${actual}"
+ return "${KSFT_FAIL}"
+ fi
+ done
+
+ return "${KSFT_PASS}"
+}
+
+test_ns_diff_global_host_connect_to_global_vm_ok() {
+ local oops_before warn_before
+ local pids pid pidfile
+ local ns0 ns1 port
+ declare -a pids
+ local unixfile
+ ns0="global0"
+ ns1="global1"
+ port=1234
+ local rc
+
+ init_namespaces
+
+ pidfile="$(create_pidfile)"
+
+ if ! vm_start "${pidfile}" "${ns0}"; then
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns0}"
+ oops_before=$(vm_dmesg_oops_count "${ns0}")
+ warn_before=$(vm_dmesg_warn_count "${ns0}")
+
+ unixfile=$(mktemp -u /tmp/XXXX.sock)
+ ip netns exec "${ns1}" \
+ socat TCP-LISTEN:"${TEST_HOST_PORT}",fork \
+ UNIX-CONNECT:"${unixfile}" &
+ pids+=($!)
+ host_wait_for_listener "${ns1}" "${TEST_HOST_PORT}" "tcp"
+
+ ip netns exec "${ns0}" socat UNIX-LISTEN:"${unixfile}",fork \
+ TCP-CONNECT:localhost:"${TEST_HOST_PORT}" &
+ pids+=($!)
+ host_wait_for_listener "${ns0}" "${unixfile}" "unix"
+
+ vm_vsock_test "${ns0}" "server" 2 "${TEST_GUEST_PORT}"
+ vm_wait_for_listener "${ns0}" "${TEST_GUEST_PORT}" "tcp"
+ host_vsock_test "${ns1}" "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}"
+ rc=$?
+
+ vm_dmesg_check "${pidfile}" "${ns0}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pids "${pids[@]}"
+ terminate_pidfiles "${pidfile}"
+
+ if [[ "${rc}" -ne 0 ]] || [[ "${dmesg_rc}" -ne 0 ]]; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
+}
+
+test_ns_diff_global_host_connect_to_local_vm_fails() {
+ local oops_before warn_before
+ local ns0="global0"
+ local ns1="local0"
+ local port=12345
+ local dmesg_rc
+ local pidfile
+ local result
+ local pid
+
+ init_namespaces
+
+ outfile=$(mktemp)
+
+ pidfile="$(create_pidfile)"
+ if ! vm_start "${pidfile}" "${ns1}"; then
+ log_host "failed to start vm (cid=${VSOCK_CID}, ns=${ns0})"
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns1}"
+ oops_before=$(vm_dmesg_oops_count "${ns1}")
+ warn_before=$(vm_dmesg_warn_count "${ns1}")
+
+ vm_ssh "${ns1}" -- socat VSOCK-LISTEN:"${port}" STDOUT > "${outfile}" &
+ vm_wait_for_listener "${ns1}" "${port}" "vsock"
+ echo TEST | ip netns exec "${ns0}" \
+ socat STDIN VSOCK-CONNECT:"${VSOCK_CID}":"${port}" 2>/dev/null
+
+ vm_dmesg_check "${pidfile}" "${ns1}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+ result=$(cat "${outfile}")
+ rm -f "${outfile}"
+
+ if [[ "${result}" == "TEST" ]] || [[ "${dmesg_rc}" -ne 0 ]]; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
+}
+
+test_ns_diff_global_vm_connect_to_global_host_ok() {
+ local oops_before warn_before
+ local ns0="global0"
+ local ns1="global1"
+ local port=12345
+ local unixfile
+ local dmesg_rc
+ local pidfile
+ local pids
+ local rc
+
+ init_namespaces
+
+ declare -a pids
+
+ log_host "Setup socat bridge from ns ${ns0} to ns ${ns1} over port ${port}"
+
+ unixfile=$(mktemp -u /tmp/XXXX.sock)
+
+ ip netns exec "${ns0}" \
+ socat TCP-LISTEN:"${port}" UNIX-CONNECT:"${unixfile}" &
+ pids+=($!)
+ host_wait_for_listener "${ns0}" "${port}" "tcp"
+
+ ip netns exec "${ns1}" \
+ socat UNIX-LISTEN:"${unixfile}" TCP-CONNECT:127.0.0.1:"${port}" &
+ pids+=($!)
+ host_wait_for_listener "${ns1}" "${unixfile}" "unix"
+
+ log_host "Launching ${VSOCK_TEST} in ns ${ns1}"
+ host_vsock_test "${ns1}" "server" "${VSOCK_CID}" "${port}"
+
+ pidfile="$(create_pidfile)"
+ if ! vm_start "${pidfile}" "${ns0}"; then
+ log_host "failed to start vm (cid=${cid}, ns=${ns0})"
+ terminate_pids "${pids[@]}"
+ rm -f "${unixfile}"
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns0}"
+
+ oops_before=$(vm_dmesg_oops_count "${ns0}")
+ warn_before=$(vm_dmesg_warn_count "${ns0}")
+
+ vm_vsock_test "${ns0}" "10.0.2.2" 2 "${port}"
+ rc=$?
+
+ vm_dmesg_check "${pidfile}" "${ns0}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+ terminate_pids "${pids[@]}"
+ rm -f "${unixfile}"
+
+ if [[ "${rc}" -ne 0 ]] || [[ "${dmesg_rc}" -ne 0 ]]; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
+
+}
+
+test_ns_diff_global_vm_connect_to_local_host_fails() {
+ local ns0="global0"
+ local ns1="local0"
+ local port=12345
+ local oops_before warn_before
+ local dmesg_rc
+ local pidfile
+ local result
+ local pid
+
+ init_namespaces
+
+ log_host "Launching socat in ns ${ns1}"
+ outfile=$(mktemp)
+
+ ip netns exec "${ns1}" socat VSOCK-LISTEN:"${port}" STDOUT &> "${outfile}" &
+ pid=$!
+ host_wait_for_listener "${ns1}" "${port}" "vsock"
+
+ pidfile="$(create_pidfile)"
+ if ! vm_start "${pidfile}" "${ns0}"; then
+ log_host "failed to start vm (cid=${cid}, ns=${ns0})"
+ terminate_pids "${pid}"
+ rm -f "${outfile}"
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns0}"
+
+ oops_before=$(vm_dmesg_oops_count "${ns0}")
+ warn_before=$(vm_dmesg_warn_count "${ns0}")
+
+ vm_ssh "${ns0}" -- \
+ bash -c "echo TEST | socat STDIN VSOCK-CONNECT:2:${port}" 2>&1 | log_guest
+
+ vm_dmesg_check "${pidfile}" "${ns0}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+ terminate_pids "${pid}"
+
+ result=$(cat "${outfile}")
+ rm -f "${outfile}"
+
+ if [[ "${result}" != TEST ]] && [[ "${dmesg_rc}" -eq 0 ]]; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_diff_local_host_connect_to_local_vm_fails() {
+ local ns0="local0"
+ local ns1="local1"
+ local port=12345
+ local oops_before warn_before
+ local dmesg_rc
+ local pidfile
+ local result
+ local pid
+
+ init_namespaces
+
+ outfile=$(mktemp)
+
+ pidfile="$(create_pidfile)"
+ if ! vm_start "${pidfile}" "${ns1}"; then
+ log_host "failed to start vm (cid=${cid}, ns=${ns0})"
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns1}"
+ oops_before=$(vm_dmesg_oops_count "${ns1}")
+ warn_before=$(vm_dmesg_warn_count "${ns1}")
+
+ vm_ssh "${ns1}" -- socat VSOCK-LISTEN:"${port}" STDOUT > "${outfile}" &
+ vm_wait_for_listener "${ns1}" "${port}" "vsock"
+
+ echo TEST | ip netns exec "${ns0}" \
+ socat STDIN VSOCK-CONNECT:"${VSOCK_CID}":"${port}" 2>/dev/null
+
+ vm_dmesg_check "${pidfile}" "${ns1}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+
+ result=$(cat "${outfile}")
+ rm -f "${outfile}"
+
+ if [[ "${result}" != TEST ]] && [[ "${dmesg_rc}" -eq 0 ]]; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_diff_local_vm_connect_to_local_host_fails() {
+ local oops_before warn_before
+ local ns0="local0"
+ local ns1="local1"
+ local port=12345
+ local dmesg_rc
+ local pidfile
+ local result
+ local pid
+
+ init_namespaces
+
+ log_host "Launching socat in ns ${ns1}"
+ outfile=$(mktemp)
+ ip netns exec "${ns1}" socat VSOCK-LISTEN:"${port}" STDOUT &> "${outfile}" &
+ pid=$!
+ host_wait_for_listener "${ns1}" "${port}" "vsock"
+
+ pidfile="$(create_pidfile)"
+ if ! vm_start "${pidfile}" "${ns0}"; then
+ log_host "failed to start vm (cid=${cid}, ns=${ns0})"
+ rm -f "${outfile}"
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns0}"
+ oops_before=$(vm_dmesg_oops_count "${ns0}")
+ warn_before=$(vm_dmesg_warn_count "${ns0}")
+
+ vm_ssh "${ns0}" -- \
+ bash -c "echo TEST | socat STDIN VSOCK-CONNECT:2:${port}" 2>&1 | log_guest
+
+ vm_dmesg_check "${pidfile}" "${ns0}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+ terminate_pids "${pid}"
+
+ result=$(cat "${outfile}")
+ rm -f "${outfile}"
+
+ if [[ "${result}" != TEST ]] && [[ "${dmesg_rc}" -eq 0 ]]; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+__test_loopback_two_netns() {
+ local ns0=$1
+ local ns1=$2
+ local port=12345
+ local result
+ local pid
+
+ modprobe vsock_loopback &> /dev/null || :
+
+ log_host "Launching socat in ns ${ns1}"
+ outfile=$(mktemp)
+
+ ip netns exec "${ns1}" socat VSOCK-LISTEN:"${port}" STDOUT > "${outfile}" 2>/dev/null &
+ pid=$!
+ host_wait_for_listener "${ns1}" "${port}" "vsock"
+
+ log_host "Launching socat in ns ${ns0}"
+ echo TEST | ip netns exec "${ns0}" socat STDIN VSOCK-CONNECT:1:"${port}" 2>/dev/null
+ terminate_pids "${pid}"
+
+ result=$(cat "${outfile}")
+ rm -f "${outfile}"
+
+ if [[ "${result}" == TEST ]]; then
+ return 0
+ fi
+
+ return 1
+}
+
+test_ns_diff_global_to_local_loopback_local_fails() {
+ init_namespaces
+
+ if ! __test_loopback_two_netns "global0" "local0"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_diff_local_to_global_loopback_fails() {
+ init_namespaces
+
+ if ! __test_loopback_two_netns "local0" "global0"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_diff_local_to_local_loopback_fails() {
+ init_namespaces
+
+ if ! __test_loopback_two_netns "local0" "local1"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_diff_global_to_global_loopback_ok() {
+ init_namespaces
+
+ if __test_loopback_two_netns "global0" "global1"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_same_local_loopback_ok() {
+ init_namespaces
+
+ if __test_loopback_two_netns "local0" "local0"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_same_local_host_connect_to_local_vm_ok() {
+ local oops_before warn_before
+ local ns="local0"
+ local port=1234
+ local dmesg_rc
+ local pidfile
+ local rc
+
+ init_namespaces
+
+ pidfile="$(create_pidfile)"
+
+ if ! vm_start "${pidfile}" "${ns}"; then
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns}"
+ oops_before=$(vm_dmesg_oops_count "${ns}")
+ warn_before=$(vm_dmesg_warn_count "${ns}")
+
+ vm_vsock_test "${ns}" "server" 2 "${TEST_GUEST_PORT}"
+
+ # Skip test 29 (transport release use-after-free): This test attempts
+ # binding both G2H and H2G CIDs. Because virtio-vsock (G2H) doesn't
+ # support local namespaces the test will fail when
+ # transport_g2h->stream_allow() returns false. This edge case only
+ # happens for vsock_test in client mode on the host in a local
+ # namespace. This is a false positive.
+ host_vsock_test "${ns}" "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}" --skip=29
+ rc=$?
+
+ vm_dmesg_check "${pidfile}" "${ns}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+
+ if [[ "${rc}" -ne 0 ]] || [[ "${dmesg_rc}" -ne 0 ]]; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
+}
+
+test_ns_same_local_vm_connect_to_local_host_ok() {
+ local oops_before warn_before
+ local ns="local0"
+ local port=1234
+ local dmesg_rc
+ local pidfile
+ local rc
+
+ init_namespaces
+
+ pidfile="$(create_pidfile)"
+
+ if ! vm_start "${pidfile}" "${ns}"; then
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns}"
+ oops_before=$(vm_dmesg_oops_count "${ns}")
+ warn_before=$(vm_dmesg_warn_count "${ns}")
+
+ host_vsock_test "${ns}" "server" "${VSOCK_CID}" "${port}"
+ vm_vsock_test "${ns}" "10.0.2.2" 2 "${port}"
+ rc=$?
+
+ vm_dmesg_check "${pidfile}" "${ns}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+
+ if [[ "${rc}" -ne 0 ]] || [[ "${dmesg_rc}" -ne 0 ]]; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
+}
+
+namespaces_can_boot_same_cid() {
+ local ns0=$1
+ local ns1=$2
+ local pidfile1 pidfile2
+ local rc
+
+ pidfile1="$(create_pidfile)"
+
+ # The first VM should be able to start. If it can't then we have
+ # problems and need to return non-zero.
+ if ! vm_start "${pidfile1}" "${ns0}"; then
+ return 1
+ fi
+
+ pidfile2="$(create_pidfile)"
+ vm_start "${pidfile2}" "${ns1}"
+ rc=$?
+ terminate_pidfiles "${pidfile1}" "${pidfile2}"
+
+ return "${rc}"
+}
+
+test_ns_global_same_cid_fails() {
+ init_namespaces
+
+ if namespaces_can_boot_same_cid "global0" "global1"; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
+}
+
+test_ns_local_global_same_cid_ok() {
+ init_namespaces
+
+ if namespaces_can_boot_same_cid "local0" "global0"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_global_local_same_cid_ok() {
+ init_namespaces
+
+ if namespaces_can_boot_same_cid "global0" "local0"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_local_same_cid_ok() {
+ init_namespaces
+
+ if namespaces_can_boot_same_cid "local0" "local1"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_host_vsock_child_ns_mode_ok() {
+ local orig_mode
+ local rc
+
+ orig_mode=$(cat /proc/sys/net/vsock/child_ns_mode)
+
+ rc="${KSFT_PASS}"
+ for mode in "${NS_MODES[@]}"; do
+ local ns="${mode}0"
+
+ if echo "${mode}" 2>/dev/null > /proc/sys/net/vsock/ns_mode; then
+ log_host "ns_mode should be read-only but write succeeded"
+ rc="${KSFT_FAIL}"
+ continue
+ fi
+
+ if ! echo "${mode}" > /proc/sys/net/vsock/child_ns_mode; then
+ log_host "child_ns_mode should be writable to ${mode}"
+ rc="${KSFT_FAIL}"
+ continue
+ fi
+ done
+
+ echo "${orig_mode}" > /proc/sys/net/vsock/child_ns_mode
+
+ return "${rc}"
+}
+
test_vm_server_host_client() {
- if ! vm_vsock_test "server" 2 "${TEST_GUEST_PORT}"; then
+ if ! vm_vsock_test "init_ns" "server" 2 "${TEST_GUEST_PORT}"; then
return "${KSFT_FAIL}"
fi
- if ! host_vsock_test "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}"; then
+ if ! host_vsock_test "init_ns" "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}"; then
return "${KSFT_FAIL}"
fi
@@ -440,11 +1271,11 @@ test_vm_server_host_client() {
}
test_vm_client_host_server() {
- if ! host_vsock_test "server" "${VSOCK_CID}" "${TEST_HOST_PORT_LISTENER}"; then
+ if ! host_vsock_test "init_ns" "server" "${VSOCK_CID}" "${TEST_HOST_PORT_LISTENER}"; then
return "${KSFT_FAIL}"
fi
- if ! vm_vsock_test "10.0.2.2" 2 "${TEST_HOST_PORT_LISTENER}"; then
+ if ! vm_vsock_test "init_ns" "10.0.2.2" 2 "${TEST_HOST_PORT_LISTENER}"; then
return "${KSFT_FAIL}"
fi
@@ -454,19 +1285,92 @@ test_vm_client_host_server() {
test_vm_loopback() {
local port=60000 # non-forwarded local port
- vm_ssh -- modprobe vsock_loopback &> /dev/null || :
+ vm_ssh "init_ns" -- modprobe vsock_loopback &> /dev/null || :
- if ! vm_vsock_test "server" 1 "${port}"; then
+ if ! vm_vsock_test "init_ns" "server" 1 "${port}"; then
return "${KSFT_FAIL}"
fi
- if ! vm_vsock_test "127.0.0.1" 1 "${port}"; then
+
+ if ! vm_vsock_test "init_ns" "127.0.0.1" 1 "${port}"; then
return "${KSFT_FAIL}"
fi
return "${KSFT_PASS}"
}
+check_ns_delete_doesnt_break_connection() {
+ local pipefile pidfile outfile
+ local ns0="global0"
+ local ns1="global1"
+ local port=12345
+ local pids=()
+ local rc=0
+
+ init_namespaces
+
+ pidfile="$(create_pidfile)"
+ if ! vm_start "${pidfile}" "${ns0}"; then
+ return "${KSFT_FAIL}"
+ fi
+ vm_wait_for_ssh "${ns0}"
+
+ outfile=$(mktemp)
+ vm_ssh "${ns0}" -- \
+ socat VSOCK-LISTEN:"${port}",fork STDOUT > "${outfile}" 2>/dev/null &
+ pids+=($!)
+ vm_wait_for_listener "${ns0}" "${port}" "vsock"
+
+ # We use a pipe here so that we can echo into the pipe instead of using
+ # socat and a unix socket file. We just need a name for the pipe (not a
+ # regular file) so use -u.
+ pipefile=$(mktemp -u /tmp/vmtest_pipe_XXXX)
+ ip netns exec "${ns1}" \
+ socat PIPE:"${pipefile}" VSOCK-CONNECT:"${VSOCK_CID}":"${port}" &
+ pids+=($!)
+
+ timeout "${WAIT_PERIOD}" \
+ bash -c 'while [[ ! -e '"${pipefile}"' ]]; do sleep 1; done; exit 0'
+
+ if [[ "$1" == "vm" ]]; then
+ ip netns del "${ns0}"
+ elif [[ "$1" == "host" ]]; then
+ ip netns del "${ns1}"
+ elif [[ "$1" == "both" ]]; then
+ ip netns del "${ns0}"
+ ip netns del "${ns1}"
+ fi
+
+ echo "TEST" > "${pipefile}"
+
+ timeout "${WAIT_PERIOD}" \
+ bash -c 'while [[ ! -s '"${outfile}"' ]]; do sleep 1; done; exit 0'
+
+ if grep -q "TEST" "${outfile}"; then
+ rc="${KSFT_PASS}"
+ else
+ rc="${KSFT_FAIL}"
+ fi
+
+ terminate_pidfiles "${pidfile}"
+ terminate_pids "${pids[@]}"
+ rm -f "${outfile}" "${pipefile}"
+
+ return "${rc}"
+}
+
+test_ns_delete_vm_ok() {
+ check_ns_delete_doesnt_break_connection "vm"
+}
+
+test_ns_delete_host_ok() {
+ check_ns_delete_doesnt_break_connection "host"
+}
+
+test_ns_delete_both_ok() {
+ check_ns_delete_doesnt_break_connection "both"
+}
+
shared_vm_test() {
local tname
@@ -499,6 +1403,11 @@ run_shared_vm_tests() {
continue
fi
+ if ! check_netns "${arg}"; then
+ check_result "${KSFT_SKIP}" "${arg}"
+ continue
+ fi
+
run_shared_vm_test "${arg}"
check_result "$?" "${arg}"
done
@@ -518,8 +1427,8 @@ run_shared_vm_test() {
host_oops_cnt_before=$(dmesg | grep -c -i 'Oops')
host_warn_cnt_before=$(dmesg --level=warn | grep -c -i 'vsock')
- vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops')
- vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock')
+ vm_oops_cnt_before=$(vm_dmesg_oops_count "init_ns")
+ vm_warn_cnt_before=$(vm_dmesg_warn_count "init_ns")
name=$(echo "${1}" | awk '{ print $1 }')
eval test_"${name}"
@@ -537,13 +1446,13 @@ run_shared_vm_test() {
rc=$KSFT_FAIL
fi
- vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l)
+ vm_oops_cnt_after=$(vm_dmesg_oops_count "init_ns")
if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then
echo "FAIL: kernel oops detected on vm" | log_host
rc=$KSFT_FAIL
fi
- vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock')
+ vm_warn_cnt_after=$(vm_dmesg_warn_count "init_ns")
if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then
echo "FAIL: kernel warning detected on vm" | log_host
rc=$KSFT_FAIL
@@ -552,6 +1461,49 @@ run_shared_vm_test() {
return "${rc}"
}
+run_ns_tests() {
+ for arg in "${ARGS[@]}"; do
+ if shared_vm_test "${arg}"; then
+ continue
+ fi
+
+ if ! check_netns "${arg}"; then
+ check_result "${KSFT_SKIP}" "${arg}"
+ continue
+ fi
+
+ add_namespaces
+
+ name=$(echo "${arg}" | awk '{ print $1 }')
+ log_host "Executing test_${name}"
+
+ host_oops_before=$(dmesg 2>/dev/null | grep -c -i 'Oops')
+ host_warn_before=$(dmesg --level=warn 2>/dev/null | grep -c -i 'vsock')
+ eval test_"${name}"
+ rc=$?
+
+ host_oops_after=$(dmesg 2>/dev/null | grep -c -i 'Oops')
+ if [[ "${host_oops_after}" -gt "${host_oops_before}" ]]; then
+ echo "FAIL: kernel oops detected on host" | log_host
+ check_result "${KSFT_FAIL}" "${name}"
+ del_namespaces
+ continue
+ fi
+
+ host_warn_after=$(dmesg --level=warn 2>/dev/null | grep -c -i 'vsock')
+ if [[ "${host_warn_after}" -gt "${host_warn_before}" ]]; then
+ echo "FAIL: kernel warning detected on host" | log_host
+ check_result "${KSFT_FAIL}" "${name}"
+ del_namespaces
+ continue
+ fi
+
+ check_result "${rc}" "${name}"
+
+ del_namespaces
+ done
+}
+
BUILD=0
QEMU="qemu-system-$(uname -m)"
@@ -577,6 +1529,7 @@ fi
check_args "${ARGS[@]}"
check_deps
check_vng
+check_socat
handle_build
echo "1..${#ARGS[@]}"
@@ -589,14 +1542,16 @@ cnt_total=0
if shared_vm_tests_requested "${ARGS[@]}"; then
log_host "Booting up VM"
pidfile="$(create_pidfile)"
- vm_start "${pidfile}"
- vm_wait_for_ssh
+ vm_start "${pidfile}" "init_ns"
+ vm_wait_for_ssh "init_ns"
log_host "VM booted up"
run_shared_vm_tests "${ARGS[@]}"
terminate_pidfiles "${pidfile}"
fi
+run_ns_tests "${ARGS[@]}"
+
echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}"
echo "Log: ${LOG}"
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index 0504c11c2de6..bb89d2dfaa2a 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -80,7 +80,7 @@ CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_WQ_WATCHDOG=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
CONFIG_BOOTPARAM_HUNG_TASK_PANIC=1
CONFIG_PANIC_TIMEOUT=-1
CONFIG_STACKTRACE=y
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 83148875a12c..434065215d12 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -36,6 +36,7 @@ BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
CFLAGS := -O2 -g -std=gnu99 -pthread -Wall $(KHDR_INCLUDES)
+CFLAGS += -I $(top_srcdir)/tools/testing/selftests/
# call32_from_64 in thunks.S uses absolute addresses.
ifeq ($(CAN_BUILD_WITH_NOPIE),1)
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
index 5fb531e3ad7c..2e423a335e1c 100644
--- a/tools/testing/selftests/x86/sysret_rip.c
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -31,7 +31,7 @@
void test_syscall_ins(void);
extern const char test_page[];
-static void const *current_test_page_addr = test_page;
+static const void *current_test_page_addr = test_page;
/* State used by our signal handlers. */
static gregset_t initial_regs;
@@ -40,7 +40,7 @@ static volatile unsigned long rip;
static void sigsegv_for_sigreturn_test(int sig, siginfo_t *info, void *ctx_void)
{
- ucontext_t *ctx = (ucontext_t*)ctx_void;
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
if (rip != ctx->uc_mcontext.gregs[REG_RIP]) {
printf("[FAIL]\tRequested RIP=0x%lx but got RIP=0x%lx\n",
@@ -56,7 +56,7 @@ static void sigsegv_for_sigreturn_test(int sig, siginfo_t *info, void *ctx_void)
static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
{
- ucontext_t *ctx = (ucontext_t*)ctx_void;
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
@@ -69,8 +69,6 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
ctx->uc_mcontext.gregs[REG_R11]);
sethandler(SIGSEGV, sigsegv_for_sigreturn_test, SA_RESETHAND);
-
- return;
}
static void test_sigreturn_to(unsigned long ip)
@@ -84,7 +82,7 @@ static jmp_buf jmpbuf;
static void sigsegv_for_fallthrough(int sig, siginfo_t *info, void *ctx_void)
{
- ucontext_t *ctx = (ucontext_t*)ctx_void;
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
if (rip != ctx->uc_mcontext.gregs[REG_RIP]) {
printf("[FAIL]\tExpected SIGSEGV at 0x%lx but got RIP=0x%lx\n",
@@ -130,7 +128,7 @@ static void test_syscall_fallthrough_to(unsigned long ip)
printf("[OK]\tWe survived\n");
}
-int main()
+int main(void)
{
/*
* When the kernel returns from a slow-path syscall, it will
diff --git a/tools/testing/shared/linux/kernel.h b/tools/testing/shared/linux/kernel.h
index c0a2bb785b92..dc2b4ccfb185 100644
--- a/tools/testing/shared/linux/kernel.h
+++ b/tools/testing/shared/linux/kernel.h
@@ -21,9 +21,5 @@
#define schedule()
#define PAGE_SHIFT 12
-#define __acquires(x)
-#define __releases(x)
-#define __must_hold(x)
-
#define EXPORT_PER_CPU_SYMBOL_GPL(x)
#endif /* _KERNEL_H */
diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile
index 66f3831a668f..e72b45dedda5 100644
--- a/tools/testing/vma/Makefile
+++ b/tools/testing/vma/Makefile
@@ -6,10 +6,13 @@ default: vma
include ../shared/shared.mk
-OFILES = $(SHARED_OFILES) vma.o maple-shim.o
+OFILES = $(SHARED_OFILES) main.o shared.o maple-shim.o
TARGETS = vma
-vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h
+# These can be varied to test different sizes.
+CFLAGS += -DNUM_VMA_FLAG_BITS=128 -DNUM_MM_FLAG_BITS=128
+
+main.o: main.c shared.c shared.h vma_internal.h tests/merge.c tests/mmap.c tests/vma.c ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h include/custom.h include/dup.h include/stubs.h
vma: $(OFILES)
$(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS)
diff --git a/tools/testing/vma/include/custom.h b/tools/testing/vma/include/custom.h
new file mode 100644
index 000000000000..802a76317245
--- /dev/null
+++ b/tools/testing/vma/include/custom.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#pragma once
+
+/*
+ * Contains declarations that exist in the kernel which have been CUSTOMISED for
+ * testing purposes to faciliate userland VMA testing.
+ */
+
+#ifdef CONFIG_MMU
+extern unsigned long mmap_min_addr;
+extern unsigned long dac_mmap_min_addr;
+#else
+#define mmap_min_addr 0UL
+#define dac_mmap_min_addr 0UL
+#endif
+
+#define VM_WARN_ON(_expr) (WARN_ON(_expr))
+#define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr))
+#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr))
+#define VM_BUG_ON(_expr) (BUG_ON(_expr))
+#define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
+
+/* We hardcode this for now. */
+#define sysctl_max_map_count 0x1000000UL
+
+#define TASK_SIZE ((1ul << 47)-PAGE_SIZE)
+
+/*
+ * The shared stubs do not implement this, it amounts to an fprintf(STDERR,...)
+ * either way :)
+ */
+#define pr_warn_once pr_err
+
+#define pgtable_supports_soft_dirty() 1
+
+struct anon_vma {
+ struct anon_vma *root;
+ struct rb_root_cached rb_root;
+
+ /* Test fields. */
+ bool was_cloned;
+ bool was_unlinked;
+};
+
+static inline void unlink_anon_vmas(struct vm_area_struct *vma)
+{
+ /* For testing purposes, indicate that the anon_vma was unlinked. */
+ vma->anon_vma->was_unlinked = true;
+}
+
+static inline void vma_start_write(struct vm_area_struct *vma)
+{
+ /* Used to indicate to tests that a write operation has begun. */
+ vma->vm_lock_seq++;
+}
+
+static inline __must_check
+int vma_start_write_killable(struct vm_area_struct *vma)
+{
+ /* Used to indicate to tests that a write operation has begun. */
+ vma->vm_lock_seq++;
+ return 0;
+}
+
+static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src,
+ enum vma_operation operation)
+{
+ /* For testing purposes. We indicate that an anon_vma has been cloned. */
+ if (src->anon_vma != NULL) {
+ dst->anon_vma = src->anon_vma;
+ dst->anon_vma->was_cloned = true;
+ }
+
+ return 0;
+}
+
+static inline int __anon_vma_prepare(struct vm_area_struct *vma)
+{
+ struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma));
+
+ if (!anon_vma)
+ return -ENOMEM;
+
+ anon_vma->root = anon_vma;
+ vma->anon_vma = anon_vma;
+
+ return 0;
+}
+
+static inline int anon_vma_prepare(struct vm_area_struct *vma)
+{
+ if (likely(vma->anon_vma))
+ return 0;
+
+ return __anon_vma_prepare(vma);
+}
+
+static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
+{
+ if (reset_refcnt)
+ refcount_set(&vma->vm_refcnt, 0);
+}
+
+static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits)
+{
+ vma_flags_t flags;
+ int i;
+
+ /*
+ * For testing purposes: allow invalid bit specification so we can
+ * easily test.
+ */
+ vma_flags_clear_all(&flags);
+ for (i = 0; i < count; i++)
+ if (bits[i] < NUM_VMA_FLAG_BITS)
+ vma_flag_set(&flags, bits[i]);
+ return flags;
+}
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
new file mode 100644
index 000000000000..3078ff1487d3
--- /dev/null
+++ b/tools/testing/vma/include/dup.h
@@ -0,0 +1,1320 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#pragma once
+
+/* Forward declarations to avoid header cycle. */
+struct vm_area_struct;
+static inline void vma_start_write(struct vm_area_struct *vma);
+
+extern const struct vm_operations_struct vma_dummy_vm_ops;
+extern unsigned long stack_guard_gap;
+extern const struct vm_operations_struct vma_dummy_vm_ops;
+extern unsigned long rlimit(unsigned int limit);
+struct task_struct *get_current(void);
+
+#define MMF_HAS_MDWE 28
+#define current get_current()
+
+/*
+ * Define the task command name length as enum, then it can be visible to
+ * BPF programs.
+ */
+enum {
+ TASK_COMM_LEN = 16,
+};
+
+/* PARTIALLY implemented types. */
+struct mm_struct {
+ struct maple_tree mm_mt;
+ int map_count; /* number of VMAs */
+ unsigned long total_vm; /* Total pages mapped */
+ unsigned long locked_vm; /* Pages that have PG_mlocked set */
+ unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
+ unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
+ unsigned long stack_vm; /* VM_STACK */
+
+ unsigned long def_flags;
+
+ mm_flags_t flags; /* Must use mm_flags_* helpers to access */
+};
+struct address_space {
+ struct rb_root_cached i_mmap;
+ unsigned long flags;
+ atomic_t i_mmap_writable;
+};
+struct file_operations {
+ int (*mmap)(struct file *, struct vm_area_struct *);
+ int (*mmap_prepare)(struct vm_area_desc *);
+};
+struct file {
+ struct address_space *f_mapping;
+ const struct file_operations *f_op;
+};
+struct anon_vma_chain {
+ struct anon_vma *anon_vma;
+ struct list_head same_vma;
+};
+struct task_struct {
+ char comm[TASK_COMM_LEN];
+ pid_t pid;
+ struct mm_struct *mm;
+
+ /* Used for emulating ABI behavior of previous Linux versions: */
+ unsigned int personality;
+};
+
+struct kref {
+ refcount_t refcount;
+};
+
+struct anon_vma_name {
+ struct kref kref;
+ /* The name needs to be at the end because it is dynamically sized. */
+ char name[];
+};
+
+/*
+ * Contains declarations that are DUPLICATED from kernel source in order to
+ * faciliate userland VMA testing.
+ *
+ * These must be kept in sync with kernel source.
+ */
+
+#define VMA_LOCK_OFFSET 0x40000000
+
+typedef struct { unsigned long v; } freeptr_t;
+
+#define VM_NONE 0x00000000
+
+typedef int __bitwise vma_flag_t;
+
+#define ACCESS_PRIVATE(p, member) ((p)->member)
+
+#define DECLARE_VMA_BIT(name, bitnum) \
+ VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum)
+#define DECLARE_VMA_BIT_ALIAS(name, aliased) \
+ VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT
+enum {
+ DECLARE_VMA_BIT(READ, 0),
+ DECLARE_VMA_BIT(WRITE, 1),
+ DECLARE_VMA_BIT(EXEC, 2),
+ DECLARE_VMA_BIT(SHARED, 3),
+ /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
+ DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */
+ DECLARE_VMA_BIT(MAYWRITE, 5),
+ DECLARE_VMA_BIT(MAYEXEC, 6),
+ DECLARE_VMA_BIT(MAYSHARE, 7),
+ DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */
+#ifdef CONFIG_MMU
+ DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */
+#else
+ /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
+ DECLARE_VMA_BIT(MAYOVERLAY, 9),
+#endif /* CONFIG_MMU */
+ /* Page-ranges managed without "struct page", just pure PFN */
+ DECLARE_VMA_BIT(PFNMAP, 10),
+ DECLARE_VMA_BIT(MAYBE_GUARD, 11),
+ DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */
+ DECLARE_VMA_BIT(LOCKED, 13),
+ DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */
+ DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */
+ DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */
+ DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */
+ DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */
+ DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */
+ DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */
+ DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */
+ DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */
+ DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */
+ DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */
+ DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */
+ DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */
+ DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */
+ DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */
+ DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */
+ DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */
+ DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */
+ /* These bits are reused, we define specific uses below. */
+ DECLARE_VMA_BIT(HIGH_ARCH_0, 32),
+ DECLARE_VMA_BIT(HIGH_ARCH_1, 33),
+ DECLARE_VMA_BIT(HIGH_ARCH_2, 34),
+ DECLARE_VMA_BIT(HIGH_ARCH_3, 35),
+ DECLARE_VMA_BIT(HIGH_ARCH_4, 36),
+ DECLARE_VMA_BIT(HIGH_ARCH_5, 37),
+ DECLARE_VMA_BIT(HIGH_ARCH_6, 38),
+ /*
+ * This flag is used to connect VFIO to arch specific KVM code. It
+ * indicates that the memory under this VMA is safe for use with any
+ * non-cachable memory type inside KVM. Some VFIO devices, on some
+ * platforms, are thought to be unsafe and can cause machine crashes
+ * if KVM does not lock down the memory type.
+ */
+ DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39),
+#ifdef CONFIG_PPC32
+ DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
+#else
+ DECLARE_VMA_BIT(DROPPABLE, 40),
+#endif
+ DECLARE_VMA_BIT(UFFD_MINOR, 41),
+ DECLARE_VMA_BIT(SEALED, 42),
+ /* Flags that reuse flags above. */
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4),
+#if defined(CONFIG_X86_USER_SHADOW_STACK)
+ /*
+ * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
+ * support core mm.
+ *
+ * These VMAs will get a single end guard page. This helps userspace
+ * protect itself from attacks. A single page is enough for current
+ * shadow stack archs (x86). See the comments near alloc_shstk() in
+ * arch/x86/kernel/shstk.c for more details on the guard size.
+ */
+ DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5),
+#elif defined(CONFIG_ARM64_GCS)
+ /*
+ * arm64's Guarded Control Stack implements similar functionality and
+ * has similar constraints to shadow stacks.
+ */
+ DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6),
+#endif
+ DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */
+ DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */
+ DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */
+ DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */
+ DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */
+ DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */
+ DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */
+ DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */
+#ifdef CONFIG_STACK_GROWSUP
+ DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP),
+ DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN),
+#else
+ DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN),
+#endif
+};
+
+#define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT)
+#define VM_READ INIT_VM_FLAG(READ)
+#define VM_WRITE INIT_VM_FLAG(WRITE)
+#define VM_EXEC INIT_VM_FLAG(EXEC)
+#define VM_SHARED INIT_VM_FLAG(SHARED)
+#define VM_MAYREAD INIT_VM_FLAG(MAYREAD)
+#define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE)
+#define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC)
+#define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE)
+#define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN)
+#ifdef CONFIG_MMU
+#define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING)
+#else
+#define VM_UFFD_MISSING VM_NONE
+#define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY)
+#endif
+#define VM_PFNMAP INIT_VM_FLAG(PFNMAP)
+#define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD)
+#define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP)
+#define VM_LOCKED INIT_VM_FLAG(LOCKED)
+#define VM_IO INIT_VM_FLAG(IO)
+#define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ)
+#define VM_RAND_READ INIT_VM_FLAG(RAND_READ)
+#define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY)
+#define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND)
+#define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT)
+#define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT)
+#define VM_NORESERVE INIT_VM_FLAG(NORESERVE)
+#define VM_HUGETLB INIT_VM_FLAG(HUGETLB)
+#define VM_SYNC INIT_VM_FLAG(SYNC)
+#define VM_ARCH_1 INIT_VM_FLAG(ARCH_1)
+#define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK)
+#define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP)
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY)
+#else
+#define VM_SOFTDIRTY VM_NONE
+#endif
+#define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP)
+#define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE)
+#define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE)
+#define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE)
+#define VM_STACK INIT_VM_FLAG(STACK)
+#ifdef CONFIG_STACK_GROWS_UP
+#define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY)
+#else
+#define VM_STACK_EARLY VM_NONE
+#endif
+#ifdef CONFIG_ARCH_HAS_PKEYS
+#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT)
+/* Despite the naming, these are FLAGS not bits. */
+#define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0)
+#define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1)
+#define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2)
+#if CONFIG_ARCH_PKEY_BITS > 3
+#define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3)
+#else
+#define VM_PKEY_BIT3 VM_NONE
+#endif /* CONFIG_ARCH_PKEY_BITS > 3 */
+#if CONFIG_ARCH_PKEY_BITS > 4
+#define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4)
+#else
+#define VM_PKEY_BIT4 VM_NONE
+#endif /* CONFIG_ARCH_PKEY_BITS > 4 */
+#endif /* CONFIG_ARCH_HAS_PKEYS */
+#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS)
+#define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK)
+#else
+#define VM_SHADOW_STACK VM_NONE
+#endif
+#if defined(CONFIG_PPC64)
+#define VM_SAO INIT_VM_FLAG(SAO)
+#elif defined(CONFIG_PARISC)
+#define VM_GROWSUP INIT_VM_FLAG(GROWSUP)
+#elif defined(CONFIG_SPARC64)
+#define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI)
+#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
+#elif defined(CONFIG_ARM64)
+#define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI)
+#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
+#elif !defined(CONFIG_MMU)
+#define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY)
+#endif
+#ifndef VM_GROWSUP
+#define VM_GROWSUP VM_NONE
+#endif
+#ifdef CONFIG_ARM64_MTE
+#define VM_MTE INIT_VM_FLAG(MTE)
+#define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED)
+#else
+#define VM_MTE VM_NONE
+#define VM_MTE_ALLOWED VM_NONE
+#endif
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+#define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR)
+#else
+#define VM_UFFD_MINOR VM_NONE
+#endif
+#ifdef CONFIG_64BIT
+#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
+#define VM_SEALED INIT_VM_FLAG(SEALED)
+#else
+#define VM_ALLOW_ANY_UNCACHED VM_NONE
+#define VM_SEALED VM_NONE
+#endif
+#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
+#define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE)
+#else
+#define VM_DROPPABLE VM_NONE
+#endif
+
+/* Bits set in the VMA until the stack is in its final location */
+#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
+
+#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
+
+/* Common data flag combinations */
+#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \
+ VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */
+#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC
+#endif
+
+#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
+#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
+#endif
+
+#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+
+#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+
+/* VMA basic access permission flags */
+#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
+
+/*
+ * Special vmas that are non-mergable, non-mlock()able.
+ */
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+
+#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
+#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW
+#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW
+#define STACK_TOP TASK_SIZE_LOW
+#define STACK_TOP_MAX TASK_SIZE_MAX
+
+/* This mask represents all the VMA flag bits used by mlock */
+#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
+
+#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
+
+#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define RLIMIT_STACK 3 /* max stack size */
+#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
+
+#define CAP_IPC_LOCK 14
+
+#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
+
+#define VM_IGNORE_MERGE VM_STICKY
+
+#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
+
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) } )
+
+#define for_each_vma(__vmi, __vma) \
+ while (((__vma) = vma_next(&(__vmi))) != NULL)
+
+/* The MM code likes to work with exclusive end addresses */
+#define for_each_vma_range(__vmi, __vma, __end) \
+ while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)
+
+#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
+
+#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT))
+
+#define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr)
+#define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr)
+
+#define AS_MM_ALL_LOCKS 2
+
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+/*
+ * Flags for bug emulation.
+ *
+ * These occupy the top three bytes.
+ */
+enum {
+ READ_IMPLIES_EXEC = 0x0400000,
+};
+
+struct vma_iterator {
+ struct ma_state mas;
+};
+
+#define VMA_ITERATOR(name, __mm, __addr) \
+ struct vma_iterator name = { \
+ .mas = { \
+ .tree = &(__mm)->mm_mt, \
+ .index = __addr, \
+ .node = NULL, \
+ .status = ma_start, \
+ }, \
+ }
+
+#define DEFINE_MUTEX(mutexname) \
+ struct mutex mutexname = {}
+
+#define DECLARE_BITMAP(name, bits) \
+ unsigned long name[BITS_TO_LONGS(bits)]
+
+#define EMPTY_VMA_FLAGS ((vma_flags_t){ })
+
+/* What action should be taken after an .mmap_prepare call is complete? */
+enum mmap_action_type {
+ MMAP_NOTHING, /* Mapping is complete, no further action. */
+ MMAP_REMAP_PFN, /* Remap PFN range. */
+ MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
+};
+
+/*
+ * Describes an action an mmap_prepare hook can instruct to be taken to complete
+ * the mapping of a VMA. Specified in vm_area_desc.
+ */
+struct mmap_action {
+ union {
+ /* Remap range. */
+ struct {
+ unsigned long start;
+ unsigned long start_pfn;
+ unsigned long size;
+ pgprot_t pgprot;
+ } remap;
+ };
+ enum mmap_action_type type;
+
+ /*
+ * If specified, this hook is invoked after the selected action has been
+ * successfully completed. Note that the VMA write lock still held.
+ *
+ * The absolute minimum ought to be done here.
+ *
+ * Returns 0 on success, or an error code.
+ */
+ int (*success_hook)(const struct vm_area_struct *vma);
+
+ /*
+ * If specified, this hook is invoked when an error occurred when
+ * attempting the selection action.
+ *
+ * The hook can return an error code in order to filter the error, but
+ * it is not valid to clear the error here.
+ */
+ int (*error_hook)(int err);
+
+ /*
+ * This should be set in rare instances where the operation required
+ * that the rmap should not be able to access the VMA until
+ * completely set up.
+ */
+ bool hide_from_rmap_until_complete :1;
+};
+
+/* Operations which modify VMAs. */
+enum vma_operation {
+ VMA_OP_SPLIT,
+ VMA_OP_MERGE_UNFAULTED,
+ VMA_OP_REMAP,
+ VMA_OP_FORK,
+};
+
+/*
+ * Describes a VMA that is about to be mmap()'ed. Drivers may choose to
+ * manipulate mutable fields which will cause those fields to be updated in the
+ * resultant VMA.
+ *
+ * Helper functions are not required for manipulating any field.
+ */
+struct vm_area_desc {
+ /* Immutable state. */
+ const struct mm_struct *const mm;
+ struct file *const file; /* May vary from vm_file in stacked callers. */
+ unsigned long start;
+ unsigned long end;
+
+ /* Mutable fields. Populated with initial state. */
+ pgoff_t pgoff;
+ struct file *vm_file;
+ union {
+ vm_flags_t vm_flags;
+ vma_flags_t vma_flags;
+ };
+ pgprot_t page_prot;
+
+ /* Write-only fields. */
+ const struct vm_operations_struct *vm_ops;
+ void *private_data;
+
+ /* Take further action? */
+ struct mmap_action action;
+};
+
+struct vm_area_struct {
+ /* The first cache line has the info for VMA tree walking. */
+
+ union {
+ struct {
+ /* VMA covers [vm_start; vm_end) addresses within mm */
+ unsigned long vm_start;
+ unsigned long vm_end;
+ };
+ freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
+ };
+
+ struct mm_struct *vm_mm; /* The address space we belong to. */
+ pgprot_t vm_page_prot; /* Access permissions of this VMA. */
+
+ /*
+ * Flags, see mm.h.
+ * To modify use vm_flags_{init|reset|set|clear|mod} functions.
+ */
+ union {
+ const vm_flags_t vm_flags;
+ vma_flags_t flags;
+ };
+
+#ifdef CONFIG_PER_VMA_LOCK
+ /*
+ * Can only be written (using WRITE_ONCE()) while holding both:
+ * - mmap_lock (in write mode)
+ * - vm_refcnt bit at VMA_LOCK_OFFSET is set
+ * Can be read reliably while holding one of:
+ * - mmap_lock (in read or write mode)
+ * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1
+ * Can be read unreliably (using READ_ONCE()) for pessimistic bailout
+ * while holding nothing (except RCU to keep the VMA struct allocated).
+ *
+ * This sequence counter is explicitly allowed to overflow; sequence
+ * counter reuse can only lead to occasional unnecessary use of the
+ * slowpath.
+ */
+ unsigned int vm_lock_seq;
+#endif
+
+ /*
+ * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
+ * list, after a COW of one of the file pages. A MAP_SHARED vma
+ * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
+ * or brk vma (with NULL file) can only be in an anon_vma list.
+ */
+ struct list_head anon_vma_chain; /* Serialized by mmap_lock &
+ * page_table_lock */
+ struct anon_vma *anon_vma; /* Serialized by page_table_lock */
+
+ /* Function pointers to deal with this struct. */
+ const struct vm_operations_struct *vm_ops;
+
+ /* Information about our backing store: */
+ unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
+ units */
+ struct file * vm_file; /* File we map to (can be NULL). */
+ void * vm_private_data; /* was vm_pte (shared mem) */
+
+#ifdef CONFIG_SWAP
+ atomic_long_t swap_readahead_info;
+#endif
+#ifndef CONFIG_MMU
+ struct vm_region *vm_region; /* NOMMU mapping region */
+#endif
+#ifdef CONFIG_NUMA
+ struct mempolicy *vm_policy; /* NUMA policy for the VMA */
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+ struct vma_numab_state *numab_state; /* NUMA Balancing state */
+#endif
+#ifdef CONFIG_PER_VMA_LOCK
+ /* Unstable RCU readers are allowed to read this. */
+ refcount_t vm_refcnt;
+#endif
+ /*
+ * For areas with an address space and backing store,
+ * linkage into the address_space->i_mmap interval tree.
+ *
+ */
+ struct {
+ struct rb_node rb;
+ unsigned long rb_subtree_last;
+ } shared;
+#ifdef CONFIG_ANON_VMA_NAME
+ /*
+ * For private and shared anonymous mappings, a pointer to a null
+ * terminated string containing the name given to the vma, or NULL if
+ * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
+ */
+ struct anon_vma_name *anon_name;
+#endif
+ struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
+} __randomize_layout;
+
+struct vm_operations_struct {
+ void (*open)(struct vm_area_struct * area);
+ /**
+ * @close: Called when the VMA is being removed from the MM.
+ * Context: User context. May sleep. Caller holds mmap_lock.
+ */
+ void (*close)(struct vm_area_struct * area);
+ /* Called any time before splitting to check if it's allowed */
+ int (*may_split)(struct vm_area_struct *area, unsigned long addr);
+ int (*mremap)(struct vm_area_struct *area);
+ /*
+ * Called by mprotect() to make driver-specific permission
+ * checks before mprotect() is finalised. The VMA must not
+ * be modified. Returns 0 if mprotect() can proceed.
+ */
+ int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, unsigned long newflags);
+ vm_fault_t (*fault)(struct vm_fault *vmf);
+ vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
+ vm_fault_t (*map_pages)(struct vm_fault *vmf,
+ pgoff_t start_pgoff, pgoff_t end_pgoff);
+ unsigned long (*pagesize)(struct vm_area_struct * area);
+
+ /* notification that a previously read-only page is about to become
+ * writable, if an error is returned it will cause a SIGBUS */
+ vm_fault_t (*page_mkwrite)(struct vm_fault *vmf);
+
+ /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
+ vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf);
+
+ /* called by access_process_vm when get_user_pages() fails, typically
+ * for use by special VMAs. See also generic_access_phys() for a generic
+ * implementation useful for any iomem mapping.
+ */
+ int (*access)(struct vm_area_struct *vma, unsigned long addr,
+ void *buf, int len, int write);
+
+ /* Called by the /proc/PID/maps code to ask the vma whether it
+ * has a special name. Returning non-NULL will also cause this
+ * vma to be dumped unconditionally. */
+ const char *(*name)(struct vm_area_struct *vma);
+
+#ifdef CONFIG_NUMA
+ /*
+ * set_policy() op must add a reference to any non-NULL @new mempolicy
+ * to hold the policy upon return. Caller should pass NULL @new to
+ * remove a policy and fall back to surrounding context--i.e. do not
+ * install a MPOL_DEFAULT policy, nor the task or system default
+ * mempolicy.
+ */
+ int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
+
+ /*
+ * get_policy() op must add reference [mpol_get()] to any policy at
+ * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure
+ * in mm/mempolicy.c will do this automatically.
+ * get_policy() must NOT add a ref if the policy at (vma,addr) is not
+ * marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
+ * If no [shared/vma] mempolicy exists at the addr, get_policy() op
+ * must return NULL--i.e., do not "fallback" to task or system default
+ * policy.
+ */
+ struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
+ unsigned long addr, pgoff_t *ilx);
+#endif
+#ifdef CONFIG_FIND_NORMAL_PAGE
+ /*
+ * Called by vm_normal_page() for special PTEs in @vma at @addr. This
+ * allows for returning a "normal" page from vm_normal_page() even
+ * though the PTE indicates that the "struct page" either does not exist
+ * or should not be touched: "special".
+ *
+ * Do not add new users: this really only works when a "normal" page
+ * was mapped, but then the PTE got changed to something weird (+
+ * marked special) that would not make pte_pfn() identify the originally
+ * inserted page.
+ */
+ struct page *(*find_normal_page)(struct vm_area_struct *vma,
+ unsigned long addr);
+#endif /* CONFIG_FIND_NORMAL_PAGE */
+};
+
+struct vm_unmapped_area_info {
+#define VM_UNMAPPED_AREA_TOPDOWN 1
+ unsigned long flags;
+ unsigned long length;
+ unsigned long low_limit;
+ unsigned long high_limit;
+ unsigned long align_mask;
+ unsigned long align_offset;
+ unsigned long start_gap;
+};
+
+struct pagetable_move_control {
+ struct vm_area_struct *old; /* Source VMA. */
+ struct vm_area_struct *new; /* Destination VMA. */
+ unsigned long old_addr; /* Address from which the move begins. */
+ unsigned long old_end; /* Exclusive address at which old range ends. */
+ unsigned long new_addr; /* Address to move page tables to. */
+ unsigned long len_in; /* Bytes to remap specified by user. */
+
+ bool need_rmap_locks; /* Do rmap locks need to be taken? */
+ bool for_stack; /* Is this an early temp stack being moved? */
+};
+
+#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \
+ struct pagetable_move_control name = { \
+ .old = old_, \
+ .new = new_, \
+ .old_addr = old_addr_, \
+ .old_end = (old_addr_) + (len_), \
+ .new_addr = new_addr_, \
+ .len_in = len_, \
+ }
+
+static inline void vma_iter_invalidate(struct vma_iterator *vmi)
+{
+ mas_pause(&vmi->mas);
+}
+
+static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
+{
+ return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot));
+}
+
+static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
+{
+ return __pgprot(vm_flags);
+}
+
+static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
+{
+ return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
+}
+
+/*
+ * Copy value to the first system word of VMA flags, non-atomically.
+ *
+ * IMPORTANT: This does not overwrite bytes past the first system word. The
+ * caller must account for this.
+ */
+static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
+{
+ *ACCESS_PRIVATE(flags, __vma_flags) = value;
+}
+
+/*
+ * Copy value to the first system word of VMA flags ONCE, non-atomically.
+ *
+ * IMPORTANT: This does not overwrite bytes past the first system word. The
+ * caller must account for this.
+ */
+static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ WRITE_ONCE(*bitmap, value);
+}
+
+/* Update the first system word of VMA flags setting bits, non-atomically. */
+static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ *bitmap |= value;
+}
+
+/* Update the first system word of VMA flags clearing bits, non-atomically. */
+static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ *bitmap &= ~value;
+}
+
+static inline void vma_flags_clear_all(vma_flags_t *flags)
+{
+ bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
+}
+
+static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ __set_bit((__force int)bit, bitmap);
+}
+
+/* Use when VMA is not part of the VMA tree and needs no locking */
+static inline void vm_flags_init(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_flags_clear_all(&vma->flags);
+ vma_flags_overwrite_word(&vma->flags, flags);
+}
+
+/*
+ * Use when VMA is part of the VMA tree and modifications need coordination
+ * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
+ * it should be locked explicitly beforehand.
+ */
+static inline void vm_flags_reset(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_assert_write_locked(vma);
+ vm_flags_init(vma, flags);
+}
+
+static inline void vm_flags_reset_once(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_assert_write_locked(vma);
+ /*
+ * The user should only be interested in avoiding reordering of
+ * assignment to the first word.
+ */
+ vma_flags_clear_all(&vma->flags);
+ vma_flags_overwrite_word_once(&vma->flags, flags);
+}
+
+static inline void vm_flags_set(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_start_write(vma);
+ vma_flags_set_word(&vma->flags, flags);
+}
+
+static inline void vm_flags_clear(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_start_write(vma);
+ vma_flags_clear_word(&vma->flags, flags);
+}
+
+static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits);
+
+#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
+ (const vma_flag_t []){__VA_ARGS__})
+
+static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
+ vma_flags_t to_test)
+{
+ const unsigned long *bitmap = flags->__vma_flags;
+ const unsigned long *bitmap_to_test = to_test.__vma_flags;
+
+ return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
+}
+
+#define vma_flags_test(flags, ...) \
+ vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__))
+
+static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
+ vma_flags_t to_test)
+{
+ const unsigned long *bitmap = flags->__vma_flags;
+ const unsigned long *bitmap_to_test = to_test.__vma_flags;
+
+ return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
+}
+
+#define vma_flags_test_all(flags, ...) \
+ vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__))
+
+static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set)
+{
+ unsigned long *bitmap = flags->__vma_flags;
+ const unsigned long *bitmap_to_set = to_set.__vma_flags;
+
+ bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS);
+}
+
+#define vma_flags_set(flags, ...) \
+ vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__))
+
+static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear)
+{
+ unsigned long *bitmap = flags->__vma_flags;
+ const unsigned long *bitmap_to_clear = to_clear.__vma_flags;
+
+ bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS);
+}
+
+#define vma_flags_clear(flags, ...) \
+ vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__))
+
+static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
+ vma_flags_t flags)
+{
+ return vma_flags_test_all_mask(&vma->flags, flags);
+}
+
+#define vma_test_all_flags(vma, ...) \
+ vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
+
+static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
+{
+ return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
+ (VM_SHARED | VM_MAYWRITE);
+}
+
+static inline void vma_set_flags_mask(struct vm_area_struct *vma,
+ vma_flags_t flags)
+{
+ vma_flags_set_mask(&vma->flags, flags);
+}
+
+#define vma_set_flags(vma, ...) \
+ vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
+
+static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc,
+ vma_flags_t flags)
+{
+ return vma_flags_test_mask(&desc->vma_flags, flags);
+}
+
+#define vma_desc_test_flags(desc, ...) \
+ vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
+
+static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
+ vma_flags_t flags)
+{
+ vma_flags_set_mask(&desc->vma_flags, flags);
+}
+
+#define vma_desc_set_flags(desc, ...) \
+ vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
+
+static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
+ vma_flags_t flags)
+{
+ vma_flags_clear_mask(&desc->vma_flags, flags);
+}
+
+#define vma_desc_clear_flags(desc, ...) \
+ vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
+
+static inline bool is_shared_maywrite(const vma_flags_t *flags)
+{
+ return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT);
+}
+
+static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
+{
+ return is_shared_maywrite(&vma->flags);
+}
+
+static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
+{
+ /*
+ * Uses mas_find() to get the first VMA when the iterator starts.
+ * Calling mas_next() could skip the first entry.
+ */
+ return mas_find(&vmi->mas, ULONG_MAX);
+}
+
+/*
+ * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
+ * assertions should be made either under mmap_write_lock or when the object
+ * has been isolated under mmap_write_lock, ensuring no competing writers.
+ */
+static inline void vma_assert_attached(struct vm_area_struct *vma)
+{
+ WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
+}
+
+static inline void vma_assert_detached(struct vm_area_struct *vma)
+{
+ WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
+}
+
+static inline void vma_assert_write_locked(struct vm_area_struct *);
+static inline void vma_mark_attached(struct vm_area_struct *vma)
+{
+ vma_assert_write_locked(vma);
+ vma_assert_detached(vma);
+ refcount_set_release(&vma->vm_refcnt, 1);
+}
+
+static inline void vma_mark_detached(struct vm_area_struct *vma)
+{
+ vma_assert_write_locked(vma);
+ vma_assert_attached(vma);
+ /* We are the only writer, so no need to use vma_refcount_put(). */
+ if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
+ /*
+ * Reader must have temporarily raised vm_refcnt but it will
+ * drop it without using the vma since vma is write-locked.
+ */
+ }
+}
+
+static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
+{
+ memset(vma, 0, sizeof(*vma));
+ vma->vm_mm = mm;
+ vma->vm_ops = &vma_dummy_vm_ops;
+ INIT_LIST_HEAD(&vma->anon_vma_chain);
+ vma->vm_lock_seq = UINT_MAX;
+}
+
+/*
+ * These are defined in vma.h, but sadly vm_stat_account() is referenced by
+ * kernel/fork.c, so we have to these broadly available there, and temporarily
+ * define them here to resolve the dependency cycle.
+ */
+#define is_exec_mapping(flags) \
+ ((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC)
+
+#define is_stack_mapping(flags) \
+ (((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK))
+
+#define is_data_mapping(flags) \
+ ((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE)
+
+static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags,
+ long npages)
+{
+ WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages);
+
+ if (is_exec_mapping(flags))
+ mm->exec_vm += npages;
+ else if (is_stack_mapping(flags))
+ mm->stack_vm += npages;
+ else if (is_data_mapping(flags))
+ mm->data_vm += npages;
+}
+
+#undef is_exec_mapping
+#undef is_stack_mapping
+#undef is_data_mapping
+
+static inline void vm_unacct_memory(long pages)
+{
+ vm_acct_memory(-pages);
+}
+
+static inline void mapping_allow_writable(struct address_space *mapping)
+{
+ atomic_inc(&mapping->i_mmap_writable);
+}
+
+static inline
+struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
+{
+ return mas_find(&vmi->mas, max - 1);
+}
+
+static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
+ unsigned long start, unsigned long end, gfp_t gfp)
+{
+ __mas_set_range(&vmi->mas, start, end - 1);
+ mas_store_gfp(&vmi->mas, NULL, gfp);
+ if (unlikely(mas_is_err(&vmi->mas)))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline void vma_set_anonymous(struct vm_area_struct *vma)
+{
+ vma->vm_ops = NULL;
+}
+
+/* Declared in vma.h. */
+static inline void set_vma_from_desc(struct vm_area_struct *vma,
+ struct vm_area_desc *desc);
+
+static inline int __compat_vma_mmap(const struct file_operations *f_op,
+ struct file *file, struct vm_area_struct *vma)
+{
+ struct vm_area_desc desc = {
+ .mm = vma->vm_mm,
+ .file = file,
+ .start = vma->vm_start,
+ .end = vma->vm_end,
+
+ .pgoff = vma->vm_pgoff,
+ .vm_file = vma->vm_file,
+ .vm_flags = vma->vm_flags,
+ .page_prot = vma->vm_page_prot,
+
+ .action.type = MMAP_NOTHING, /* Default */
+ };
+ int err;
+
+ err = f_op->mmap_prepare(&desc);
+ if (err)
+ return err;
+
+ mmap_action_prepare(&desc.action, &desc);
+ set_vma_from_desc(vma, &desc);
+ return mmap_action_complete(&desc.action, vma);
+}
+
+static inline int compat_vma_mmap(struct file *file,
+ struct vm_area_struct *vma)
+{
+ return __compat_vma_mmap(file->f_op, file, vma);
+}
+
+
+static inline void vma_iter_init(struct vma_iterator *vmi,
+ struct mm_struct *mm, unsigned long addr)
+{
+ mas_init(&vmi->mas, &mm->mm_mt, addr);
+}
+
+static inline unsigned long vma_pages(struct vm_area_struct *vma)
+{
+ return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+}
+
+static inline void mmap_assert_locked(struct mm_struct *);
+static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
+ unsigned long start_addr,
+ unsigned long end_addr)
+{
+ unsigned long index = start_addr;
+
+ mmap_assert_locked(mm);
+ return mt_find(&mm->mm_mt, &index, end_addr - 1);
+}
+
+static inline
+struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
+{
+ return mtree_load(&mm->mm_mt, addr);
+}
+
+static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
+{
+ return mas_prev(&vmi->mas, 0);
+}
+
+static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr)
+{
+ mas_set(&vmi->mas, addr);
+}
+
+static inline bool vma_is_anonymous(struct vm_area_struct *vma)
+{
+ return !vma->vm_ops;
+}
+
+/* Defined in vma.h, so temporarily define here to avoid circular dependency. */
+#define vma_iter_load(vmi) \
+ mas_walk(&(vmi)->mas)
+
+static inline struct vm_area_struct *
+find_vma_prev(struct mm_struct *mm, unsigned long addr,
+ struct vm_area_struct **pprev)
+{
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, addr);
+
+ vma = vma_iter_load(&vmi);
+ *pprev = vma_prev(&vmi);
+ if (!vma)
+ vma = vma_next(&vmi);
+ return vma;
+}
+
+#undef vma_iter_load
+
+static inline void vma_iter_free(struct vma_iterator *vmi)
+{
+ mas_destroy(&vmi->mas);
+}
+
+static inline
+struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
+{
+ return mas_next_range(&vmi->mas, ULONG_MAX);
+}
+
+bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
+
+/* Update vma->vm_page_prot to reflect vma->vm_flags. */
+static inline void vma_set_page_prot(struct vm_area_struct *vma)
+{
+ vm_flags_t vm_flags = vma->vm_flags;
+ pgprot_t vm_page_prot;
+
+ /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
+ vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags));
+
+ if (vma_wants_writenotify(vma, vm_page_prot)) {
+ vm_flags &= ~VM_SHARED;
+ /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
+ vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags));
+ }
+ /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
+ WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
+}
+
+static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & VM_GROWSDOWN)
+ return stack_guard_gap;
+
+ /* See reasoning around the VM_SHADOW_STACK definition */
+ if (vma->vm_flags & VM_SHADOW_STACK)
+ return PAGE_SIZE;
+
+ return 0;
+}
+
+static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
+{
+ unsigned long gap = stack_guard_start_gap(vma);
+ unsigned long vm_start = vma->vm_start;
+
+ vm_start -= gap;
+ if (vm_start > vma->vm_start)
+ vm_start = 0;
+ return vm_start;
+}
+
+static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
+{
+ unsigned long vm_end = vma->vm_end;
+
+ if (vma->vm_flags & VM_GROWSUP) {
+ vm_end += stack_guard_gap;
+ if (vm_end < vma->vm_end)
+ vm_end = -PAGE_SIZE;
+ }
+ return vm_end;
+}
+
+static inline bool vma_is_accessible(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & VM_ACCESS_FLAGS;
+}
+
+static inline bool mlock_future_ok(const struct mm_struct *mm,
+ vm_flags_t vm_flags, unsigned long bytes)
+{
+ unsigned long locked_pages, limit_pages;
+
+ if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
+ return true;
+
+ locked_pages = bytes >> PAGE_SHIFT;
+ locked_pages += mm->locked_vm;
+
+ limit_pages = rlimit(RLIMIT_MEMLOCK);
+ limit_pages >>= PAGE_SHIFT;
+
+ return locked_pages <= limit_pages;
+}
+
+static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
+{
+ /* If MDWE is disabled, we have nothing to deny. */
+ if (mm_flags_test(MMF_HAS_MDWE, current->mm))
+ return false;
+
+ /* If the new VMA is not executable, we have nothing to deny. */
+ if (!(new & VM_EXEC))
+ return false;
+
+ /* Under MDWE we do not accept newly writably executable VMAs... */
+ if (new & VM_WRITE)
+ return true;
+
+ /* ...nor previously non-executable VMAs becoming executable. */
+ if (!(old & VM_EXEC))
+ return true;
+
+ return false;
+}
+
+static inline int mapping_map_writable(struct address_space *mapping)
+{
+ return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
+ 0 : -EPERM;
+}
+
+/* Did the driver provide valid mmap hook configuration? */
+static inline bool can_mmap_file(struct file *file)
+{
+ bool has_mmap = file->f_op->mmap;
+ bool has_mmap_prepare = file->f_op->mmap_prepare;
+
+ /* Hooks are mutually exclusive. */
+ if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
+ return false;
+ if (!has_mmap && !has_mmap_prepare)
+ return false;
+
+ return true;
+}
+
+static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (file->f_op->mmap_prepare)
+ return compat_vma_mmap(file, vma);
+
+ return file->f_op->mmap(file, vma);
+}
+
+static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
+{
+ return file->f_op->mmap_prepare(desc);
+}
+
+static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
+{
+ /* Changing an anonymous vma with this is illegal */
+ get_file(file);
+ swap(vma->vm_file, file);
+ fput(file);
+}
diff --git a/tools/testing/vma/include/stubs.h b/tools/testing/vma/include/stubs.h
new file mode 100644
index 000000000000..947a3a0c2566
--- /dev/null
+++ b/tools/testing/vma/include/stubs.h
@@ -0,0 +1,428 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#pragma once
+
+/*
+ * Contains declarations that are STUBBED, that is that are rendered no-ops, in
+ * order to faciliate userland VMA testing.
+ */
+
+/* Forward declarations. */
+struct mm_struct;
+struct vm_area_struct;
+struct vm_area_desc;
+struct pagetable_move_control;
+struct mmap_action;
+struct file;
+struct anon_vma;
+struct anon_vma_chain;
+struct address_space;
+struct unmap_desc;
+
+#define __bitwise
+#define __randomize_layout
+
+#define FIRST_USER_ADDRESS 0UL
+#define USER_PGTABLES_CEILING 0UL
+
+#define vma_policy(vma) NULL
+
+#define down_write_nest_lock(sem, nest_lock)
+
+#define data_race(expr) expr
+
+#define ASSERT_EXCLUSIVE_WRITER(x)
+
+struct vm_userfaultfd_ctx {};
+struct mempolicy {};
+struct mmu_gather {};
+struct mutex {};
+struct vm_fault {};
+
+static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
+ struct list_head *uf)
+{
+}
+
+static inline unsigned long move_page_tables(struct pagetable_move_control *pmc)
+{
+ return 0;
+}
+
+static inline void free_pgd_range(struct mmu_gather *tlb,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
+{
+}
+
+static inline int ksm_execve(struct mm_struct *mm)
+{
+ return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+}
+
+static inline void vma_numab_state_init(struct vm_area_struct *vma)
+{
+}
+
+static inline void vma_numab_state_free(struct vm_area_struct *vma)
+{
+}
+
+static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
+ struct vm_area_struct *new_vma)
+{
+}
+
+static inline void free_anon_vma_name(struct vm_area_struct *vma)
+{
+}
+
+static inline void mmap_action_prepare(struct mmap_action *action,
+ struct vm_area_desc *desc)
+{
+}
+
+static inline int mmap_action_complete(struct mmap_action *action,
+ struct vm_area_struct *vma)
+{
+ return 0;
+}
+
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
+static inline bool shmem_file(struct file *file)
+{
+ return false;
+}
+
+static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
+ const struct file *file, vm_flags_t vm_flags)
+{
+ return vm_flags;
+}
+
+static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
+{
+}
+
+static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t pgprot)
+{
+ return 0;
+}
+
+static inline int do_munmap(struct mm_struct *, unsigned long, size_t,
+ struct list_head *uf)
+{
+ return 0;
+}
+
+/* Currently stubbed but we may later wish to un-stub. */
+static inline void vm_acct_memory(long pages);
+
+static inline void mmap_assert_locked(struct mm_struct *mm)
+{
+}
+
+
+static inline void anon_vma_unlock_write(struct anon_vma *anon_vma)
+{
+}
+
+static inline void i_mmap_unlock_write(struct address_space *mapping)
+{
+}
+
+static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ struct list_head *unmaps)
+{
+ return 0;
+}
+
+static inline void mmap_write_downgrade(struct mm_struct *mm)
+{
+}
+
+static inline void mmap_read_unlock(struct mm_struct *mm)
+{
+}
+
+static inline void mmap_write_unlock(struct mm_struct *mm)
+{
+}
+
+static inline int mmap_write_lock_killable(struct mm_struct *mm)
+{
+ return 0;
+}
+
+static inline bool can_modify_mm(struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ return true;
+}
+
+static inline void arch_unmap(struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+}
+
+static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
+{
+ return true;
+}
+
+static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
+ vm_flags_t vm_flags)
+{
+}
+
+static inline bool mapping_can_writeback(struct address_space *mapping)
+{
+ return true;
+}
+
+static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
+{
+ return false;
+}
+
+static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
+{
+ return false;
+}
+
+static inline bool userfaultfd_wp(struct vm_area_struct *vma)
+{
+ return false;
+}
+
+static inline void mmap_assert_write_locked(struct mm_struct *mm)
+{
+}
+
+static inline void mutex_lock(struct mutex *lock)
+{
+}
+
+static inline void mutex_unlock(struct mutex *lock)
+{
+}
+
+static inline bool mutex_is_locked(struct mutex *lock)
+{
+ return true;
+}
+
+static inline bool signal_pending(void *p)
+{
+ return false;
+}
+
+static inline bool is_file_hugepages(struct file *file)
+{
+ return false;
+}
+
+static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
+{
+ return 0;
+}
+
+static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags,
+ unsigned long npages)
+{
+ return true;
+}
+
+static inline int shmem_zero_setup(struct vm_area_struct *vma)
+{
+ return 0;
+}
+
+
+static inline void vm_acct_memory(long pages)
+{
+}
+
+static inline void vma_interval_tree_insert(struct vm_area_struct *vma,
+ struct rb_root_cached *rb)
+{
+}
+
+static inline void vma_interval_tree_remove(struct vm_area_struct *vma,
+ struct rb_root_cached *rb)
+{
+}
+
+static inline void flush_dcache_mmap_unlock(struct address_space *mapping)
+{
+}
+
+static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc,
+ struct rb_root_cached *rb)
+{
+}
+
+static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc,
+ struct rb_root_cached *rb)
+{
+}
+
+static inline void uprobe_mmap(struct vm_area_struct *vma)
+{
+}
+
+static inline void uprobe_munmap(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+}
+
+static inline void i_mmap_lock_write(struct address_space *mapping)
+{
+}
+
+static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
+{
+}
+
+static inline void vma_assert_write_locked(struct vm_area_struct *vma)
+{
+}
+
+static inline void ksm_add_vma(struct vm_area_struct *vma)
+{
+}
+
+static inline void perf_event_mmap(struct vm_area_struct *vma)
+{
+}
+
+static inline bool vma_is_dax(struct vm_area_struct *vma)
+{
+ return false;
+}
+
+static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+ return NULL;
+}
+
+static inline bool arch_validate_flags(vm_flags_t flags)
+{
+ return true;
+}
+
+static inline void vma_close(struct vm_area_struct *vma)
+{
+}
+
+static inline int mmap_file(struct file *file, struct vm_area_struct *vma)
+{
+ return 0;
+}
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+ unsigned long addr, unsigned long len)
+{
+ return 0;
+}
+
+static inline bool capable(int cap)
+{
+ return true;
+}
+
+static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+
+static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
+ struct vm_userfaultfd_ctx vm_ctx)
+{
+ return true;
+}
+
+static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
+ struct anon_vma_name *anon_name2)
+{
+ return true;
+}
+
+static inline void might_sleep(void)
+{
+}
+
+static inline void fput(struct file *file)
+{
+}
+
+static inline void mpol_put(struct mempolicy *pol)
+{
+}
+
+static inline void lru_add_drain(void)
+{
+}
+
+static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
+{
+}
+
+static inline void update_hiwater_rss(struct mm_struct *mm)
+{
+}
+
+static inline void update_hiwater_vm(struct mm_struct *mm)
+{
+}
+
+static inline void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap)
+{
+}
+
+static inline void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *unmap)
+{
+}
+
+static inline void mapping_unmap_writable(struct address_space *mapping)
+{
+}
+
+static inline void flush_dcache_mmap_lock(struct address_space *mapping)
+{
+}
+
+static inline void tlb_finish_mmu(struct mmu_gather *tlb)
+{
+}
+
+static inline struct file *get_file(struct file *f)
+{
+ return f;
+}
+
+static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
+{
+ return 0;
+}
+
+static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ struct vm_area_struct *next)
+{
+}
+
+static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
diff --git a/tools/testing/vma/main.c b/tools/testing/vma/main.c
new file mode 100644
index 000000000000..49b09e97a51f
--- /dev/null
+++ b/tools/testing/vma/main.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shared.h"
+/*
+ * Directly import the VMA implementation here. Our vma_internal.h wrapper
+ * provides userland-equivalent functionality for everything vma.c uses.
+ */
+#include "../../../mm/vma_init.c"
+#include "../../../mm/vma_exec.c"
+#include "../../../mm/vma.c"
+
+/* Tests are included directly so they can test static functions in mm/vma.c. */
+#include "tests/merge.c"
+#include "tests/mmap.c"
+#include "tests/vma.c"
+
+/* Helper functions which utilise static kernel functions. */
+
+struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg)
+{
+ struct vm_area_struct *vma;
+
+ vma = vma_merge_existing_range(vmg);
+ if (vma)
+ vma_assert_attached(vma);
+ return vma;
+}
+
+int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+ int res;
+
+ res = vma_link(mm, vma);
+ if (!res)
+ vma_assert_attached(vma);
+ return res;
+}
+
+/* Main test running which invokes tests/ *.c runners. */
+int main(void)
+{
+ int num_tests = 0, num_fail = 0;
+
+ maple_tree_init();
+ vma_state_init();
+
+ run_merge_tests(&num_tests, &num_fail);
+ run_mmap_tests(&num_tests, &num_fail);
+ run_vma_tests(&num_tests, &num_fail);
+
+ printf("%d tests run, %d passed, %d failed.\n",
+ num_tests, num_tests - num_fail, num_fail);
+
+ return num_fail == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/tools/testing/vma/shared.c b/tools/testing/vma/shared.c
new file mode 100644
index 000000000000..bda578cc3304
--- /dev/null
+++ b/tools/testing/vma/shared.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shared.h"
+
+
+bool fail_prealloc;
+unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+
+const struct vm_operations_struct vma_dummy_vm_ops;
+struct anon_vma dummy_anon_vma;
+struct task_struct __current;
+
+struct vm_area_struct *alloc_vma(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ pgoff_t pgoff, vm_flags_t vm_flags)
+{
+ struct vm_area_struct *vma = vm_area_alloc(mm);
+
+ if (vma == NULL)
+ return NULL;
+
+ vma->vm_start = start;
+ vma->vm_end = end;
+ vma->vm_pgoff = pgoff;
+ vm_flags_reset(vma, vm_flags);
+ vma_assert_detached(vma);
+
+ return vma;
+}
+
+void detach_free_vma(struct vm_area_struct *vma)
+{
+ vma_mark_detached(vma);
+ vm_area_free(vma);
+}
+
+struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ pgoff_t pgoff, vm_flags_t vm_flags)
+{
+ struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, vm_flags);
+
+ if (vma == NULL)
+ return NULL;
+
+ if (attach_vma(mm, vma)) {
+ detach_free_vma(vma);
+ return NULL;
+ }
+
+ /*
+ * Reset this counter which we use to track whether writes have
+ * begun. Linking to the tree will have caused this to be incremented,
+ * which means we will get a false positive otherwise.
+ */
+ vma->vm_lock_seq = UINT_MAX;
+
+ return vma;
+}
+
+void reset_dummy_anon_vma(void)
+{
+ dummy_anon_vma.was_cloned = false;
+ dummy_anon_vma.was_unlinked = false;
+}
+
+int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
+{
+ struct vm_area_struct *vma;
+ int count = 0;
+
+ fail_prealloc = false;
+ reset_dummy_anon_vma();
+
+ vma_iter_set(vmi, 0);
+ for_each_vma(*vmi, vma) {
+ detach_free_vma(vma);
+ count++;
+ }
+
+ mtree_destroy(&mm->mm_mt);
+ mm->map_count = 0;
+ return count;
+}
+
+bool vma_write_started(struct vm_area_struct *vma)
+{
+ int seq = vma->vm_lock_seq;
+
+ /* We reset after each check. */
+ vma->vm_lock_seq = UINT_MAX;
+
+ /* The vma_start_write() stub simply increments this value. */
+ return seq > -1;
+}
+
+void __vma_set_dummy_anon_vma(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc, struct anon_vma *anon_vma)
+{
+ vma->anon_vma = anon_vma;
+ INIT_LIST_HEAD(&vma->anon_vma_chain);
+ list_add(&avc->same_vma, &vma->anon_vma_chain);
+ avc->anon_vma = vma->anon_vma;
+}
+
+void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc)
+{
+ __vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma);
+}
+
+struct task_struct *get_current(void)
+{
+ return &__current;
+}
+
+unsigned long rlimit(unsigned int limit)
+{
+ return (unsigned long)-1;
+}
+
+void vma_set_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ pgoff_t pgoff)
+{
+ vma->vm_start = start;
+ vma->vm_end = end;
+ vma->vm_pgoff = pgoff;
+}
diff --git a/tools/testing/vma/shared.h b/tools/testing/vma/shared.h
new file mode 100644
index 000000000000..6c64211cfa22
--- /dev/null
+++ b/tools/testing/vma/shared.h
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "generated/bit-length.h"
+#include "maple-shared.h"
+#include "vma_internal.h"
+#include "../../../mm/vma.h"
+
+/* Simple test runner. Assumes local num_[fail, tests] counters. */
+#define TEST(name) \
+ do { \
+ (*num_tests)++; \
+ if (!test_##name()) { \
+ (*num_fail)++; \
+ fprintf(stderr, "Test " #name " FAILED\n"); \
+ } \
+ } while (0)
+
+#define ASSERT_TRUE(_expr) \
+ do { \
+ if (!(_expr)) { \
+ fprintf(stderr, \
+ "Assert FAILED at %s:%d:%s(): %s is FALSE.\n", \
+ __FILE__, __LINE__, __FUNCTION__, #_expr); \
+ return false; \
+ } \
+ } while (0)
+
+#define ASSERT_FALSE(_expr) ASSERT_TRUE(!(_expr))
+#define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2))
+#define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2))
+
+#define IS_SET(_val, _flags) ((_val & _flags) == _flags)
+
+extern bool fail_prealloc;
+
+/* Override vma_iter_prealloc() so we can choose to fail it. */
+#define vma_iter_prealloc(vmi, vma) \
+ (fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL))
+
+#define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536
+
+extern unsigned long mmap_min_addr;
+extern unsigned long dac_mmap_min_addr;
+extern unsigned long stack_guard_gap;
+
+extern const struct vm_operations_struct vma_dummy_vm_ops;
+extern struct anon_vma dummy_anon_vma;
+extern struct task_struct __current;
+
+/*
+ * Helper function which provides a wrapper around a merge existing VMA
+ * operation.
+ *
+ * Declared in main.c as uses static VMA function.
+ */
+struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg);
+
+/*
+ * Helper function to allocate a VMA and link it to the tree.
+ *
+ * Declared in main.c as uses static VMA function.
+ */
+int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma);
+
+/* Helper function providing a dummy vm_ops->close() method.*/
+static inline void dummy_close(struct vm_area_struct *)
+{
+}
+
+/* Helper function to simply allocate a VMA. */
+struct vm_area_struct *alloc_vma(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ pgoff_t pgoff, vm_flags_t vm_flags);
+
+/* Helper function to detach and free a VMA. */
+void detach_free_vma(struct vm_area_struct *vma);
+
+/* Helper function to allocate a VMA and link it to the tree. */
+struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ pgoff_t pgoff, vm_flags_t vm_flags);
+
+/*
+ * Helper function to reset the dummy anon_vma to indicate it has not been
+ * duplicated.
+ */
+void reset_dummy_anon_vma(void);
+
+/*
+ * Helper function to remove all VMAs and destroy the maple tree associated with
+ * a virtual address space. Returns a count of VMAs in the tree.
+ */
+int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi);
+
+/* Helper function to determine if VMA has had vma_start_write() performed. */
+bool vma_write_started(struct vm_area_struct *vma);
+
+void __vma_set_dummy_anon_vma(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc, struct anon_vma *anon_vma);
+
+/* Provide a simple dummy VMA/anon_vma dummy setup for testing. */
+void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc);
+
+/* Helper function to specify a VMA's range. */
+void vma_set_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ pgoff_t pgoff);
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/tests/merge.c
index 93d21bc7e112..3708dc6945b0 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/tests/merge.c
@@ -1,132 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "generated/bit-length.h"
-
-#include "maple-shared.h"
-#include "vma_internal.h"
-
-/* Include so header guard set. */
-#include "../../../mm/vma.h"
-
-static bool fail_prealloc;
-
-/* Then override vma_iter_prealloc() so we can choose to fail it. */
-#define vma_iter_prealloc(vmi, vma) \
- (fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL))
-
-#define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536
-
-unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
-unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
-unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
-
-/*
- * Directly import the VMA implementation here. Our vma_internal.h wrapper
- * provides userland-equivalent functionality for everything vma.c uses.
- */
-#include "../../../mm/vma_init.c"
-#include "../../../mm/vma_exec.c"
-#include "../../../mm/vma.c"
-
-const struct vm_operations_struct vma_dummy_vm_ops;
-static struct anon_vma dummy_anon_vma;
-
-#define ASSERT_TRUE(_expr) \
- do { \
- if (!(_expr)) { \
- fprintf(stderr, \
- "Assert FAILED at %s:%d:%s(): %s is FALSE.\n", \
- __FILE__, __LINE__, __FUNCTION__, #_expr); \
- return false; \
- } \
- } while (0)
-#define ASSERT_FALSE(_expr) ASSERT_TRUE(!(_expr))
-#define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2))
-#define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2))
-
-#define IS_SET(_val, _flags) ((_val & _flags) == _flags)
-
-static struct task_struct __current;
-
-struct task_struct *get_current(void)
-{
- return &__current;
-}
-
-unsigned long rlimit(unsigned int limit)
-{
- return (unsigned long)-1;
-}
-
-/* Helper function to simply allocate a VMA. */
-static struct vm_area_struct *alloc_vma(struct mm_struct *mm,
- unsigned long start,
- unsigned long end,
- pgoff_t pgoff,
- vm_flags_t vm_flags)
-{
- struct vm_area_struct *vma = vm_area_alloc(mm);
-
- if (vma == NULL)
- return NULL;
-
- vma->vm_start = start;
- vma->vm_end = end;
- vma->vm_pgoff = pgoff;
- vm_flags_reset(vma, vm_flags);
- vma_assert_detached(vma);
-
- return vma;
-}
-
-/* Helper function to allocate a VMA and link it to the tree. */
-static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
-{
- int res;
-
- res = vma_link(mm, vma);
- if (!res)
- vma_assert_attached(vma);
- return res;
-}
-
-static void detach_free_vma(struct vm_area_struct *vma)
-{
- vma_mark_detached(vma);
- vm_area_free(vma);
-}
-
-/* Helper function to allocate a VMA and link it to the tree. */
-static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
- unsigned long start,
- unsigned long end,
- pgoff_t pgoff,
- vm_flags_t vm_flags)
-{
- struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, vm_flags);
-
- if (vma == NULL)
- return NULL;
-
- if (attach_vma(mm, vma)) {
- detach_free_vma(vma);
- return NULL;
- }
-
- /*
- * Reset this counter which we use to track whether writes have
- * begun. Linking to the tree will have caused this to be incremented,
- * which means we will get a false positive otherwise.
- */
- vma->vm_lock_seq = UINT_MAX;
-
- return vma;
-}
-
/* Helper function which provides a wrapper around a merge new VMA operation. */
static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg)
{
@@ -147,20 +20,6 @@ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg)
}
/*
- * Helper function which provides a wrapper around a merge existing VMA
- * operation.
- */
-static struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg)
-{
- struct vm_area_struct *vma;
-
- vma = vma_merge_existing_range(vmg);
- if (vma)
- vma_assert_attached(vma);
- return vma;
-}
-
-/*
* Helper function which provides a wrapper around the expansion of an existing
* VMA.
*/
@@ -173,8 +32,8 @@ static int expand_existing(struct vma_merge_struct *vmg)
* Helper function to reset merge state the associated VMA iterator to a
* specified new range.
*/
-static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
- unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags)
+void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
+ unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags)
{
vma_iter_set(vmg->vmi, start);
@@ -197,8 +56,8 @@ static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
/* Helper function to set both the VMG range and its anon_vma. */
static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long start,
- unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags,
- struct anon_vma *anon_vma)
+ unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags,
+ struct anon_vma *anon_vma)
{
vmg_set_range(vmg, start, end, pgoff, vm_flags);
vmg->anon_vma = anon_vma;
@@ -211,10 +70,9 @@ static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long s
* VMA, link it to the maple tree and return it.
*/
static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm,
- struct vma_merge_struct *vmg,
- unsigned long start, unsigned long end,
- pgoff_t pgoff, vm_flags_t vm_flags,
- bool *was_merged)
+ struct vma_merge_struct *vmg, unsigned long start,
+ unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags,
+ bool *was_merged)
{
struct vm_area_struct *merged;
@@ -234,72 +92,6 @@ static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm,
return alloc_and_link_vma(mm, start, end, pgoff, vm_flags);
}
-/*
- * Helper function to reset the dummy anon_vma to indicate it has not been
- * duplicated.
- */
-static void reset_dummy_anon_vma(void)
-{
- dummy_anon_vma.was_cloned = false;
- dummy_anon_vma.was_unlinked = false;
-}
-
-/*
- * Helper function to remove all VMAs and destroy the maple tree associated with
- * a virtual address space. Returns a count of VMAs in the tree.
- */
-static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
-{
- struct vm_area_struct *vma;
- int count = 0;
-
- fail_prealloc = false;
- reset_dummy_anon_vma();
-
- vma_iter_set(vmi, 0);
- for_each_vma(*vmi, vma) {
- detach_free_vma(vma);
- count++;
- }
-
- mtree_destroy(&mm->mm_mt);
- mm->map_count = 0;
- return count;
-}
-
-/* Helper function to determine if VMA has had vma_start_write() performed. */
-static bool vma_write_started(struct vm_area_struct *vma)
-{
- int seq = vma->vm_lock_seq;
-
- /* We reset after each check. */
- vma->vm_lock_seq = UINT_MAX;
-
- /* The vma_start_write() stub simply increments this value. */
- return seq > -1;
-}
-
-/* Helper function providing a dummy vm_ops->close() method.*/
-static void dummy_close(struct vm_area_struct *)
-{
-}
-
-static void __vma_set_dummy_anon_vma(struct vm_area_struct *vma,
- struct anon_vma_chain *avc,
- struct anon_vma *anon_vma)
-{
- vma->anon_vma = anon_vma;
- INIT_LIST_HEAD(&vma->anon_vma_chain);
- list_add(&avc->same_vma, &vma->anon_vma_chain);
- avc->anon_vma = vma->anon_vma;
-}
-
-static void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
- struct anon_vma_chain *avc)
-{
- __vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma);
-}
-
static bool test_simple_merge(void)
{
struct vm_area_struct *vma;
@@ -1616,39 +1408,6 @@ static bool test_merge_extend(void)
return true;
}
-static bool test_copy_vma(void)
-{
- vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
- struct mm_struct mm = {};
- bool need_locks = false;
- VMA_ITERATOR(vmi, &mm, 0);
- struct vm_area_struct *vma, *vma_new, *vma_next;
-
- /* Move backwards and do not merge. */
-
- vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
- vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks);
- ASSERT_NE(vma_new, vma);
- ASSERT_EQ(vma_new->vm_start, 0);
- ASSERT_EQ(vma_new->vm_end, 0x2000);
- ASSERT_EQ(vma_new->vm_pgoff, 0);
- vma_assert_attached(vma_new);
-
- cleanup_mm(&mm, &vmi);
-
- /* Move a VMA into position next to another and merge the two. */
-
- vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags);
- vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, vm_flags);
- vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks);
- vma_assert_attached(vma_new);
-
- ASSERT_EQ(vma_new, vma_next);
-
- cleanup_mm(&mm, &vmi);
- return true;
-}
-
static bool test_expand_only_mode(void)
{
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
@@ -1689,73 +1448,8 @@ static bool test_expand_only_mode(void)
return true;
}
-static bool test_mmap_region_basic(void)
-{
- struct mm_struct mm = {};
- unsigned long addr;
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, &mm, 0);
-
- current->mm = &mm;
-
- /* Map at 0x300000, length 0x3000. */
- addr = __mmap_region(NULL, 0x300000, 0x3000,
- VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
- 0x300, NULL);
- ASSERT_EQ(addr, 0x300000);
-
- /* Map at 0x250000, length 0x3000. */
- addr = __mmap_region(NULL, 0x250000, 0x3000,
- VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
- 0x250, NULL);
- ASSERT_EQ(addr, 0x250000);
-
- /* Map at 0x303000, merging to 0x300000 of length 0x6000. */
- addr = __mmap_region(NULL, 0x303000, 0x3000,
- VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
- 0x303, NULL);
- ASSERT_EQ(addr, 0x303000);
-
- /* Map at 0x24d000, merging to 0x250000 of length 0x6000. */
- addr = __mmap_region(NULL, 0x24d000, 0x3000,
- VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
- 0x24d, NULL);
- ASSERT_EQ(addr, 0x24d000);
-
- ASSERT_EQ(mm.map_count, 2);
-
- for_each_vma(vmi, vma) {
- if (vma->vm_start == 0x300000) {
- ASSERT_EQ(vma->vm_end, 0x306000);
- ASSERT_EQ(vma->vm_pgoff, 0x300);
- } else if (vma->vm_start == 0x24d000) {
- ASSERT_EQ(vma->vm_end, 0x253000);
- ASSERT_EQ(vma->vm_pgoff, 0x24d);
- } else {
- ASSERT_FALSE(true);
- }
- }
-
- cleanup_mm(&mm, &vmi);
- return true;
-}
-
-int main(void)
+static void run_merge_tests(int *num_tests, int *num_fail)
{
- int num_tests = 0, num_fail = 0;
-
- maple_tree_init();
- vma_state_init();
-
-#define TEST(name) \
- do { \
- num_tests++; \
- if (!test_##name()) { \
- num_fail++; \
- fprintf(stderr, "Test " #name " FAILED\n"); \
- } \
- } while (0)
-
/* Very simple tests to kick the tyres. */
TEST(simple_merge);
TEST(simple_modify);
@@ -1771,15 +1465,5 @@ int main(void)
TEST(dup_anon_vma);
TEST(vmi_prealloc_fail);
TEST(merge_extend);
- TEST(copy_vma);
TEST(expand_only_mode);
-
- TEST(mmap_region_basic);
-
-#undef TEST
-
- printf("%d tests run, %d passed, %d failed.\n",
- num_tests, num_tests - num_fail, num_fail);
-
- return num_fail == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
}
diff --git a/tools/testing/vma/tests/mmap.c b/tools/testing/vma/tests/mmap.c
new file mode 100644
index 000000000000..bded4ecbe5db
--- /dev/null
+++ b/tools/testing/vma/tests/mmap.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+static bool test_mmap_region_basic(void)
+{
+ struct mm_struct mm = {};
+ unsigned long addr;
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, &mm, 0);
+
+ current->mm = &mm;
+
+ /* Map at 0x300000, length 0x3000. */
+ addr = __mmap_region(NULL, 0x300000, 0x3000,
+ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
+ 0x300, NULL);
+ ASSERT_EQ(addr, 0x300000);
+
+ /* Map at 0x250000, length 0x3000. */
+ addr = __mmap_region(NULL, 0x250000, 0x3000,
+ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
+ 0x250, NULL);
+ ASSERT_EQ(addr, 0x250000);
+
+ /* Map at 0x303000, merging to 0x300000 of length 0x6000. */
+ addr = __mmap_region(NULL, 0x303000, 0x3000,
+ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
+ 0x303, NULL);
+ ASSERT_EQ(addr, 0x303000);
+
+ /* Map at 0x24d000, merging to 0x250000 of length 0x6000. */
+ addr = __mmap_region(NULL, 0x24d000, 0x3000,
+ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
+ 0x24d, NULL);
+ ASSERT_EQ(addr, 0x24d000);
+
+ ASSERT_EQ(mm.map_count, 2);
+
+ for_each_vma(vmi, vma) {
+ if (vma->vm_start == 0x300000) {
+ ASSERT_EQ(vma->vm_end, 0x306000);
+ ASSERT_EQ(vma->vm_pgoff, 0x300);
+ } else if (vma->vm_start == 0x24d000) {
+ ASSERT_EQ(vma->vm_end, 0x253000);
+ ASSERT_EQ(vma->vm_pgoff, 0x24d);
+ } else {
+ ASSERT_FALSE(true);
+ }
+ }
+
+ cleanup_mm(&mm, &vmi);
+ return true;
+}
+
+static void run_mmap_tests(int *num_tests, int *num_fail)
+{
+ TEST(mmap_region_basic);
+}
diff --git a/tools/testing/vma/tests/vma.c b/tools/testing/vma/tests/vma.c
new file mode 100644
index 000000000000..c54ffc954f11
--- /dev/null
+++ b/tools/testing/vma/tests/vma.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+static bool compare_legacy_flags(vm_flags_t legacy_flags, vma_flags_t flags)
+{
+ const unsigned long legacy_val = legacy_flags;
+ /* The lower word should contain the precise same value. */
+ const unsigned long flags_lower = flags.__vma_flags[0];
+#if NUM_VMA_FLAGS > BITS_PER_LONG
+ int i;
+
+ /* All bits in higher flag values should be zero. */
+ for (i = 1; i < NUM_VMA_FLAGS / BITS_PER_LONG; i++) {
+ if (flags.__vma_flags[i] != 0)
+ return false;
+ }
+#endif
+
+ static_assert(sizeof(legacy_flags) == sizeof(unsigned long));
+
+ return legacy_val == flags_lower;
+}
+
+static bool test_copy_vma(void)
+{
+ vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+ struct mm_struct mm = {};
+ bool need_locks = false;
+ VMA_ITERATOR(vmi, &mm, 0);
+ struct vm_area_struct *vma, *vma_new, *vma_next;
+
+ /* Move backwards and do not merge. */
+
+ vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
+ vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks);
+ ASSERT_NE(vma_new, vma);
+ ASSERT_EQ(vma_new->vm_start, 0);
+ ASSERT_EQ(vma_new->vm_end, 0x2000);
+ ASSERT_EQ(vma_new->vm_pgoff, 0);
+ vma_assert_attached(vma_new);
+
+ cleanup_mm(&mm, &vmi);
+
+ /* Move a VMA into position next to another and merge the two. */
+
+ vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags);
+ vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, vm_flags);
+ vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks);
+ vma_assert_attached(vma_new);
+
+ ASSERT_EQ(vma_new, vma_next);
+
+ cleanup_mm(&mm, &vmi);
+ return true;
+}
+
+static bool test_vma_flags_unchanged(void)
+{
+ vma_flags_t flags = EMPTY_VMA_FLAGS;
+ vm_flags_t legacy_flags = 0;
+ int bit;
+ struct vm_area_struct vma;
+ struct vm_area_desc desc;
+
+
+ vma.flags = EMPTY_VMA_FLAGS;
+ desc.vma_flags = EMPTY_VMA_FLAGS;
+
+ for (bit = 0; bit < BITS_PER_LONG; bit++) {
+ vma_flags_t mask = mk_vma_flags(bit);
+
+ legacy_flags |= (1UL << bit);
+
+ /* Individual flags. */
+ vma_flags_set(&flags, bit);
+ ASSERT_TRUE(compare_legacy_flags(legacy_flags, flags));
+
+ /* Via mask. */
+ vma_flags_set_mask(&flags, mask);
+ ASSERT_TRUE(compare_legacy_flags(legacy_flags, flags));
+
+ /* Same for VMA. */
+ vma_set_flags(&vma, bit);
+ ASSERT_TRUE(compare_legacy_flags(legacy_flags, vma.flags));
+ vma_set_flags_mask(&vma, mask);
+ ASSERT_TRUE(compare_legacy_flags(legacy_flags, vma.flags));
+
+ /* Same for VMA descriptor. */
+ vma_desc_set_flags(&desc, bit);
+ ASSERT_TRUE(compare_legacy_flags(legacy_flags, desc.vma_flags));
+ vma_desc_set_flags_mask(&desc, mask);
+ ASSERT_TRUE(compare_legacy_flags(legacy_flags, desc.vma_flags));
+ }
+
+ return true;
+}
+
+static bool test_vma_flags_cleared(void)
+{
+ const vma_flags_t empty = EMPTY_VMA_FLAGS;
+ vma_flags_t flags;
+ int i;
+
+ /* Set all bits high. */
+ memset(&flags, 1, sizeof(flags));
+ /* Try to clear. */
+ vma_flags_clear_all(&flags);
+ /* Equal to EMPTY_VMA_FLAGS? */
+ ASSERT_EQ(memcmp(&empty, &flags, sizeof(flags)), 0);
+ /* Make sure every unsigned long entry in bitmap array zero. */
+ for (i = 0; i < sizeof(flags) / BITS_PER_LONG; i++) {
+ const unsigned long val = flags.__vma_flags[i];
+
+ ASSERT_EQ(val, 0);
+ }
+
+ return true;
+}
+
+/*
+ * Assert that VMA flag functions that operate at the system word level function
+ * correctly.
+ */
+static bool test_vma_flags_word(void)
+{
+ vma_flags_t flags = EMPTY_VMA_FLAGS;
+ const vma_flags_t comparison =
+ mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, 64, 65);
+
+ /* Set some custom high flags. */
+ vma_flags_set(&flags, 64, 65);
+ /* Now overwrite the first word. */
+ vma_flags_overwrite_word(&flags, VM_READ | VM_WRITE);
+ /* Ensure they are equal. */
+ ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0);
+
+ flags = EMPTY_VMA_FLAGS;
+ vma_flags_set(&flags, 64, 65);
+
+ /* Do the same with the _once() equivalent. */
+ vma_flags_overwrite_word_once(&flags, VM_READ | VM_WRITE);
+ ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0);
+
+ flags = EMPTY_VMA_FLAGS;
+ vma_flags_set(&flags, 64, 65);
+
+ /* Make sure we can set a word without disturbing other bits. */
+ vma_flags_set(&flags, VMA_WRITE_BIT);
+ vma_flags_set_word(&flags, VM_READ);
+ ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0);
+
+ flags = EMPTY_VMA_FLAGS;
+ vma_flags_set(&flags, 64, 65);
+
+ /* Make sure we can clear a word without disturbing other bits. */
+ vma_flags_set(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
+ vma_flags_clear_word(&flags, VM_EXEC);
+ ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0);
+
+ return true;
+}
+
+/* Ensure that vma_flags_test() and friends works correctly. */
+static bool test_vma_flags_test(void)
+{
+ const vma_flags_t flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT,
+ VMA_EXEC_BIT, 64, 65);
+ struct vm_area_struct vma;
+ struct vm_area_desc desc;
+
+ vma.flags = flags;
+ desc.vma_flags = flags;
+
+#define do_test(...) \
+ ASSERT_TRUE(vma_flags_test(&flags, __VA_ARGS__)); \
+ ASSERT_TRUE(vma_desc_test_flags(&desc, __VA_ARGS__))
+
+#define do_test_all_true(...) \
+ ASSERT_TRUE(vma_flags_test_all(&flags, __VA_ARGS__)); \
+ ASSERT_TRUE(vma_test_all_flags(&vma, __VA_ARGS__))
+
+#define do_test_all_false(...) \
+ ASSERT_FALSE(vma_flags_test_all(&flags, __VA_ARGS__)); \
+ ASSERT_FALSE(vma_test_all_flags(&vma, __VA_ARGS__))
+
+ /*
+ * Testing for some flags that are present, some that are not - should
+ * pass. ANY flags matching should work.
+ */
+ do_test(VMA_READ_BIT, VMA_MAYREAD_BIT, VMA_SEQ_READ_BIT);
+ /* However, the ...test_all() variant should NOT pass. */
+ do_test_all_false(VMA_READ_BIT, VMA_MAYREAD_BIT, VMA_SEQ_READ_BIT);
+ /* But should pass for flags present. */
+ do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64, 65);
+ /* Also subsets... */
+ do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64);
+ do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
+ do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT);
+ do_test_all_true(VMA_READ_BIT);
+ /*
+ * Check _mask variant. We don't need to test extensively as macro
+ * helper is the equivalent.
+ */
+ ASSERT_TRUE(vma_flags_test_mask(&flags, flags));
+ ASSERT_TRUE(vma_flags_test_all_mask(&flags, flags));
+
+ /* Single bits. */
+ do_test(VMA_READ_BIT);
+ do_test(VMA_WRITE_BIT);
+ do_test(VMA_EXEC_BIT);
+#if NUM_VMA_FLAG_BITS > 64
+ do_test(64);
+ do_test(65);
+#endif
+
+ /* Two bits. */
+ do_test(VMA_READ_BIT, VMA_WRITE_BIT);
+ do_test(VMA_READ_BIT, VMA_EXEC_BIT);
+ do_test(VMA_WRITE_BIT, VMA_EXEC_BIT);
+ /* Ordering shouldn't matter. */
+ do_test(VMA_WRITE_BIT, VMA_READ_BIT);
+ do_test(VMA_EXEC_BIT, VMA_READ_BIT);
+ do_test(VMA_EXEC_BIT, VMA_WRITE_BIT);
+#if NUM_VMA_FLAG_BITS > 64
+ do_test(VMA_READ_BIT, 64);
+ do_test(VMA_WRITE_BIT, 64);
+ do_test(64, VMA_READ_BIT);
+ do_test(64, VMA_WRITE_BIT);
+ do_test(VMA_READ_BIT, 65);
+ do_test(VMA_WRITE_BIT, 65);
+ do_test(65, VMA_READ_BIT);
+ do_test(65, VMA_WRITE_BIT);
+#endif
+ /* Three bits. */
+ do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
+#if NUM_VMA_FLAG_BITS > 64
+ /* No need to consider every single permutation. */
+ do_test(VMA_READ_BIT, VMA_WRITE_BIT, 64);
+ do_test(VMA_READ_BIT, VMA_WRITE_BIT, 65);
+
+ /* Four bits. */
+ do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64);
+ do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 65);
+
+ /* Five bits. */
+ do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64, 65);
+#endif
+
+#undef do_test
+#undef do_test_all_true
+#undef do_test_all_false
+
+ return true;
+}
+
+/* Ensure that vma_flags_clear() and friends works correctly. */
+static bool test_vma_flags_clear(void)
+{
+ vma_flags_t flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT,
+ VMA_EXEC_BIT, 64, 65);
+ vma_flags_t mask = mk_vma_flags(VMA_EXEC_BIT, 64);
+ struct vm_area_struct vma;
+ struct vm_area_desc desc;
+
+ vma.flags = flags;
+ desc.vma_flags = flags;
+
+ /* Cursory check of _mask() variant, as the helper macros imply. */
+ vma_flags_clear_mask(&flags, mask);
+ vma_flags_clear_mask(&vma.flags, mask);
+ vma_desc_clear_flags_mask(&desc, mask);
+ ASSERT_FALSE(vma_flags_test(&flags, VMA_EXEC_BIT, 64));
+ ASSERT_FALSE(vma_flags_test(&vma.flags, VMA_EXEC_BIT, 64));
+ ASSERT_FALSE(vma_desc_test_flags(&desc, VMA_EXEC_BIT, 64));
+ /* Reset. */
+ vma_flags_set(&flags, VMA_EXEC_BIT, 64);
+ vma_set_flags(&vma, VMA_EXEC_BIT, 64);
+ vma_desc_set_flags(&desc, VMA_EXEC_BIT, 64);
+
+ /*
+ * Clear the flags and assert clear worked, then reset flags back to
+ * include specified flags.
+ */
+#define do_test_and_reset(...) \
+ vma_flags_clear(&flags, __VA_ARGS__); \
+ vma_flags_clear(&vma.flags, __VA_ARGS__); \
+ vma_desc_clear_flags(&desc, __VA_ARGS__); \
+ ASSERT_FALSE(vma_flags_test(&flags, __VA_ARGS__)); \
+ ASSERT_FALSE(vma_flags_test(&vma.flags, __VA_ARGS__)); \
+ ASSERT_FALSE(vma_desc_test_flags(&desc, __VA_ARGS__)); \
+ vma_flags_set(&flags, __VA_ARGS__); \
+ vma_set_flags(&vma, __VA_ARGS__); \
+ vma_desc_set_flags(&desc, __VA_ARGS__)
+
+ /* Single flags. */
+ do_test_and_reset(VMA_READ_BIT);
+ do_test_and_reset(VMA_WRITE_BIT);
+ do_test_and_reset(VMA_EXEC_BIT);
+ do_test_and_reset(64);
+ do_test_and_reset(65);
+
+ /* Two flags, in different orders. */
+ do_test_and_reset(VMA_READ_BIT, VMA_WRITE_BIT);
+ do_test_and_reset(VMA_READ_BIT, VMA_EXEC_BIT);
+ do_test_and_reset(VMA_READ_BIT, 64);
+ do_test_and_reset(VMA_READ_BIT, 65);
+ do_test_and_reset(VMA_WRITE_BIT, VMA_READ_BIT);
+ do_test_and_reset(VMA_WRITE_BIT, VMA_EXEC_BIT);
+ do_test_and_reset(VMA_WRITE_BIT, 64);
+ do_test_and_reset(VMA_WRITE_BIT, 65);
+ do_test_and_reset(VMA_EXEC_BIT, VMA_READ_BIT);
+ do_test_and_reset(VMA_EXEC_BIT, VMA_WRITE_BIT);
+ do_test_and_reset(VMA_EXEC_BIT, 64);
+ do_test_and_reset(VMA_EXEC_BIT, 65);
+ do_test_and_reset(64, VMA_READ_BIT);
+ do_test_and_reset(64, VMA_WRITE_BIT);
+ do_test_and_reset(64, VMA_EXEC_BIT);
+ do_test_and_reset(64, 65);
+ do_test_and_reset(65, VMA_READ_BIT);
+ do_test_and_reset(65, VMA_WRITE_BIT);
+ do_test_and_reset(65, VMA_EXEC_BIT);
+ do_test_and_reset(65, 64);
+
+ /* Three flags. */
+
+#undef do_test_some_missing
+#undef do_test_and_reset
+
+ return true;
+}
+
+static void run_vma_tests(int *num_tests, int *num_fail)
+{
+ TEST(copy_vma);
+ TEST(vma_flags_unchanged);
+ TEST(vma_flags_cleared);
+ TEST(vma_flags_word);
+ TEST(vma_flags_test);
+ TEST(vma_flags_clear);
+}
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 9f0a9f5ed0fe..0e1121e2ef23 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -12,16 +12,18 @@
#ifndef __MM_VMA_INTERNAL_H
#define __MM_VMA_INTERNAL_H
-#define __private
-#define __bitwise
-#define __randomize_layout
+#include <stdlib.h>
#define CONFIG_MMU
#define CONFIG_PER_VMA_LOCK
-#include <stdlib.h>
+#ifdef __CONCAT
+#undef __CONCAT
+#endif
+#include <linux/args.h>
#include <linux/atomic.h>
+#include <linux/bitmap.h>
#include <linux/list.h>
#include <linux/maple_tree.h>
#include <linux/mm.h>
@@ -29,1835 +31,28 @@
#include <linux/refcount.h>
#include <linux/slab.h>
-extern unsigned long stack_guard_gap;
-#ifdef CONFIG_MMU
-extern unsigned long mmap_min_addr;
-extern unsigned long dac_mmap_min_addr;
-#else
-#define mmap_min_addr 0UL
-#define dac_mmap_min_addr 0UL
-#endif
-
-#define VM_WARN_ON(_expr) (WARN_ON(_expr))
-#define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr))
-#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr))
-#define VM_BUG_ON(_expr) (BUG_ON(_expr))
-#define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
-
-#define MMF_HAS_MDWE 28
-
-/*
- * vm_flags in vm_area_struct, see mm_types.h.
- * When changing, update also include/trace/events/mmflags.h
- */
-
-#define VM_NONE 0x00000000
-
-/**
- * typedef vma_flag_t - specifies an individual VMA flag by bit number.
- *
- * This value is made type safe by sparse to avoid passing invalid flag values
- * around.
- */
-typedef int __bitwise vma_flag_t;
-
-#define DECLARE_VMA_BIT(name, bitnum) \
- VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum)
-#define DECLARE_VMA_BIT_ALIAS(name, aliased) \
- VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT
-enum {
- DECLARE_VMA_BIT(READ, 0),
- DECLARE_VMA_BIT(WRITE, 1),
- DECLARE_VMA_BIT(EXEC, 2),
- DECLARE_VMA_BIT(SHARED, 3),
- /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
- DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */
- DECLARE_VMA_BIT(MAYWRITE, 5),
- DECLARE_VMA_BIT(MAYEXEC, 6),
- DECLARE_VMA_BIT(MAYSHARE, 7),
- DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */
-#ifdef CONFIG_MMU
- DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */
-#else
- /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
- DECLARE_VMA_BIT(MAYOVERLAY, 9),
-#endif /* CONFIG_MMU */
- /* Page-ranges managed without "struct page", just pure PFN */
- DECLARE_VMA_BIT(PFNMAP, 10),
- DECLARE_VMA_BIT(MAYBE_GUARD, 11),
- DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */
- DECLARE_VMA_BIT(LOCKED, 13),
- DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */
- DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */
- DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */
- DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */
- DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */
- DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */
- DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */
- DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */
- DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */
- DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */
- DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */
- DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */
- DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */
- DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */
- DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */
- DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */
- DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */
- DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */
- /* These bits are reused, we define specific uses below. */
- DECLARE_VMA_BIT(HIGH_ARCH_0, 32),
- DECLARE_VMA_BIT(HIGH_ARCH_1, 33),
- DECLARE_VMA_BIT(HIGH_ARCH_2, 34),
- DECLARE_VMA_BIT(HIGH_ARCH_3, 35),
- DECLARE_VMA_BIT(HIGH_ARCH_4, 36),
- DECLARE_VMA_BIT(HIGH_ARCH_5, 37),
- DECLARE_VMA_BIT(HIGH_ARCH_6, 38),
- /*
- * This flag is used to connect VFIO to arch specific KVM code. It
- * indicates that the memory under this VMA is safe for use with any
- * non-cachable memory type inside KVM. Some VFIO devices, on some
- * platforms, are thought to be unsafe and can cause machine crashes
- * if KVM does not lock down the memory type.
- */
- DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39),
-#ifdef CONFIG_PPC32
- DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
-#else
- DECLARE_VMA_BIT(DROPPABLE, 40),
-#endif
- DECLARE_VMA_BIT(UFFD_MINOR, 41),
- DECLARE_VMA_BIT(SEALED, 42),
- /* Flags that reuse flags above. */
- DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
- DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
- DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2),
- DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3),
- DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4),
-#if defined(CONFIG_X86_USER_SHADOW_STACK)
- /*
- * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
- * support core mm.
- *
- * These VMAs will get a single end guard page. This helps userspace
- * protect itself from attacks. A single page is enough for current
- * shadow stack archs (x86). See the comments near alloc_shstk() in
- * arch/x86/kernel/shstk.c for more details on the guard size.
- */
- DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5),
-#elif defined(CONFIG_ARM64_GCS)
- /*
- * arm64's Guarded Control Stack implements similar functionality and
- * has similar constraints to shadow stacks.
- */
- DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6),
-#endif
- DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */
- DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */
- DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */
- DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */
- DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */
- DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */
- DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */
- DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */
-#ifdef CONFIG_STACK_GROWSUP
- DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP),
- DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN),
-#else
- DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN),
-#endif
-};
-
-#define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT)
-#define VM_READ INIT_VM_FLAG(READ)
-#define VM_WRITE INIT_VM_FLAG(WRITE)
-#define VM_EXEC INIT_VM_FLAG(EXEC)
-#define VM_SHARED INIT_VM_FLAG(SHARED)
-#define VM_MAYREAD INIT_VM_FLAG(MAYREAD)
-#define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE)
-#define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC)
-#define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE)
-#define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN)
-#ifdef CONFIG_MMU
-#define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING)
-#else
-#define VM_UFFD_MISSING VM_NONE
-#define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY)
-#endif
-#define VM_PFNMAP INIT_VM_FLAG(PFNMAP)
-#define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD)
-#define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP)
-#define VM_LOCKED INIT_VM_FLAG(LOCKED)
-#define VM_IO INIT_VM_FLAG(IO)
-#define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ)
-#define VM_RAND_READ INIT_VM_FLAG(RAND_READ)
-#define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY)
-#define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND)
-#define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT)
-#define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT)
-#define VM_NORESERVE INIT_VM_FLAG(NORESERVE)
-#define VM_HUGETLB INIT_VM_FLAG(HUGETLB)
-#define VM_SYNC INIT_VM_FLAG(SYNC)
-#define VM_ARCH_1 INIT_VM_FLAG(ARCH_1)
-#define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK)
-#define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP)
-#ifdef CONFIG_MEM_SOFT_DIRTY
-#define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY)
-#else
-#define VM_SOFTDIRTY VM_NONE
-#endif
-#define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP)
-#define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE)
-#define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE)
-#define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE)
-#define VM_STACK INIT_VM_FLAG(STACK)
-#ifdef CONFIG_STACK_GROWS_UP
-#define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY)
-#else
-#define VM_STACK_EARLY VM_NONE
-#endif
-#ifdef CONFIG_ARCH_HAS_PKEYS
-#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT)
-/* Despite the naming, these are FLAGS not bits. */
-#define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0)
-#define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1)
-#define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2)
-#if CONFIG_ARCH_PKEY_BITS > 3
-#define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3)
-#else
-#define VM_PKEY_BIT3 VM_NONE
-#endif /* CONFIG_ARCH_PKEY_BITS > 3 */
-#if CONFIG_ARCH_PKEY_BITS > 4
-#define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4)
-#else
-#define VM_PKEY_BIT4 VM_NONE
-#endif /* CONFIG_ARCH_PKEY_BITS > 4 */
-#endif /* CONFIG_ARCH_HAS_PKEYS */
-#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS)
-#define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK)
-#else
-#define VM_SHADOW_STACK VM_NONE
-#endif
-#if defined(CONFIG_PPC64)
-#define VM_SAO INIT_VM_FLAG(SAO)
-#elif defined(CONFIG_PARISC)
-#define VM_GROWSUP INIT_VM_FLAG(GROWSUP)
-#elif defined(CONFIG_SPARC64)
-#define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI)
-#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
-#elif defined(CONFIG_ARM64)
-#define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI)
-#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
-#elif !defined(CONFIG_MMU)
-#define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY)
-#endif
-#ifndef VM_GROWSUP
-#define VM_GROWSUP VM_NONE
-#endif
-#ifdef CONFIG_ARM64_MTE
-#define VM_MTE INIT_VM_FLAG(MTE)
-#define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED)
-#else
-#define VM_MTE VM_NONE
-#define VM_MTE_ALLOWED VM_NONE
-#endif
-#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
-#define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR)
-#else
-#define VM_UFFD_MINOR VM_NONE
-#endif
-#ifdef CONFIG_64BIT
-#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
-#define VM_SEALED INIT_VM_FLAG(SEALED)
-#else
-#define VM_ALLOW_ANY_UNCACHED VM_NONE
-#define VM_SEALED VM_NONE
-#endif
-#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
-#define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE)
-#else
-#define VM_DROPPABLE VM_NONE
-#endif
-
-/* Bits set in the VMA until the stack is in its final location */
-#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
-
-#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
-
-/* Common data flag combinations */
-#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \
- VM_MAYWRITE | VM_MAYEXEC)
-#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */
-#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC
-#endif
-
-#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
-#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
-#endif
-
-#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
-
-#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
-
-/* VMA basic access permission flags */
-#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
-
-/*
- * Special vmas that are non-mergable, non-mlock()able.
- */
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
-
-#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
-#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW
-#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW
-#define STACK_TOP TASK_SIZE_LOW
-#define STACK_TOP_MAX TASK_SIZE_MAX
-
-/* This mask represents all the VMA flag bits used by mlock */
-#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
-
-#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
-
-#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#define RLIMIT_STACK 3 /* max stack size */
-#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
-
-#define CAP_IPC_LOCK 14
-
-/*
- * Flags which should be 'sticky' on merge - that is, flags which, when one VMA
- * possesses it but the other does not, the merged VMA should nonetheless have
- * applied to it:
- *
- * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its
- * references cleared via /proc/$pid/clear_refs, any merged VMA
- * should be considered soft-dirty also as it operates at a VMA
- * granularity.
- */
-#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
-
-/*
- * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one
- * of these flags and the other not does not preclude a merge.
- *
- * VM_STICKY - When merging VMAs, VMA flags must match, unless they are
- * 'sticky'. If any sticky flags exist in either VMA, we simply
- * set all of them on the merged VMA.
- */
-#define VM_IGNORE_MERGE VM_STICKY
-
-/*
- * Flags which should result in page tables being copied on fork. These are
- * flags which indicate that the VMA maps page tables which cannot be
- * reconsistuted upon page fault, so necessitate page table copying upon
- *
- * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
- * reasonably reconstructed on page fault.
- *
- * VM_UFFD_WP - Encodes metadata about an installed uffd
- * write protect handler, which cannot be
- * reconstructed on page fault.
- *
- * We always copy pgtables when dst_vma has uffd-wp
- * enabled even if it's file-backed
- * (e.g. shmem). Because when uffd-wp is enabled,
- * pgtable contains uffd-wp protection information,
- * that's something we can't retrieve from page cache,
- * and skip copying will lose those info.
- *
- * VM_MAYBE_GUARD - Could contain page guard region markers which
- * by design are a property of the page tables
- * only and thus cannot be reconstructed on page
- * fault.
- */
-#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
-
-#define FIRST_USER_ADDRESS 0UL
-#define USER_PGTABLES_CEILING 0UL
-
-#define vma_policy(vma) NULL
-
-#define down_write_nest_lock(sem, nest_lock)
-
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) } )
-
-#define for_each_vma(__vmi, __vma) \
- while (((__vma) = vma_next(&(__vmi))) != NULL)
-
-/* The MM code likes to work with exclusive end addresses */
-#define for_each_vma_range(__vmi, __vma, __end) \
- while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)
-
-#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
-
-#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT))
-
-#define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr)
-#define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr)
-
-#define TASK_SIZE ((1ul << 47)-PAGE_SIZE)
-
-#define AS_MM_ALL_LOCKS 2
-
-/* We hardcode this for now. */
-#define sysctl_max_map_count 0x1000000UL
-
-#define pgoff_t unsigned long
-typedef unsigned long pgprotval_t;
-typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
-typedef unsigned long vm_flags_t;
-typedef __bitwise unsigned int vm_fault_t;
-
-/*
- * The shared stubs do not implement this, it amounts to an fprintf(STDERR,...)
- * either way :)
- */
-#define pr_warn_once pr_err
-
-#define data_race(expr) expr
-
-#define ASSERT_EXCLUSIVE_WRITER(x)
-
-#define pgtable_supports_soft_dirty() 1
-
-/**
- * swap - swap values of @a and @b
- * @a: first value
- * @b: second value
- */
-#define swap(a, b) \
- do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
-
-struct kref {
- refcount_t refcount;
-};
-
-/*
- * Define the task command name length as enum, then it can be visible to
- * BPF programs.
- */
-enum {
- TASK_COMM_LEN = 16,
-};
-
/*
- * Flags for bug emulation.
- *
- * These occupy the top three bytes.
+ * DUPLICATE typedef definitions from kernel source that have to be declared
+ * ahead of all other headers.
*/
-enum {
- READ_IMPLIES_EXEC = 0x0400000,
-};
-
-struct task_struct {
- char comm[TASK_COMM_LEN];
- pid_t pid;
- struct mm_struct *mm;
-
- /* Used for emulating ABI behavior of previous Linux versions: */
- unsigned int personality;
-};
-
-struct task_struct *get_current(void);
-#define current get_current()
-
-struct anon_vma {
- struct anon_vma *root;
- struct rb_root_cached rb_root;
-
- /* Test fields. */
- bool was_cloned;
- bool was_unlinked;
-};
-
-struct anon_vma_chain {
- struct anon_vma *anon_vma;
- struct list_head same_vma;
-};
-
-struct anon_vma_name {
- struct kref kref;
- /* The name needs to be at the end because it is dynamically sized. */
- char name[];
-};
-
-struct vma_iterator {
- struct ma_state mas;
-};
-
-#define VMA_ITERATOR(name, __mm, __addr) \
- struct vma_iterator name = { \
- .mas = { \
- .tree = &(__mm)->mm_mt, \
- .index = __addr, \
- .node = NULL, \
- .status = ma_start, \
- }, \
- }
-
-struct address_space {
- struct rb_root_cached i_mmap;
- unsigned long flags;
- atomic_t i_mmap_writable;
-};
-
-struct vm_userfaultfd_ctx {};
-struct mempolicy {};
-struct mmu_gather {};
-struct mutex {};
-#define DEFINE_MUTEX(mutexname) \
- struct mutex mutexname = {}
-
-#define DECLARE_BITMAP(name, bits) \
- unsigned long name[BITS_TO_LONGS(bits)]
-
-#define NUM_MM_FLAG_BITS (64)
+#define __private
+/* NUM_MM_FLAG_BITS defined by test code. */
typedef struct {
__private DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS);
} mm_flags_t;
-
-/*
- * Opaque type representing current VMA (vm_area_struct) flag state. Must be
- * accessed via vma_flags_xxx() helper functions.
- */
-#define NUM_VMA_FLAG_BITS BITS_PER_LONG
+/* NUM_VMA_FLAG_BITS defined by test code. */
typedef struct {
DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS);
} __private vma_flags_t;
-struct mm_struct {
- struct maple_tree mm_mt;
- int map_count; /* number of VMAs */
- unsigned long total_vm; /* Total pages mapped */
- unsigned long locked_vm; /* Pages that have PG_mlocked set */
- unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
- unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
- unsigned long stack_vm; /* VM_STACK */
-
- unsigned long def_flags;
-
- mm_flags_t flags; /* Must use mm_flags_* helpers to access */
-};
-
-struct vm_area_struct;
-
-
-/* What action should be taken after an .mmap_prepare call is complete? */
-enum mmap_action_type {
- MMAP_NOTHING, /* Mapping is complete, no further action. */
- MMAP_REMAP_PFN, /* Remap PFN range. */
- MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
-};
-
-/*
- * Describes an action an mmap_prepare hook can instruct to be taken to complete
- * the mapping of a VMA. Specified in vm_area_desc.
- */
-struct mmap_action {
- union {
- /* Remap range. */
- struct {
- unsigned long start;
- unsigned long start_pfn;
- unsigned long size;
- pgprot_t pgprot;
- } remap;
- };
- enum mmap_action_type type;
-
- /*
- * If specified, this hook is invoked after the selected action has been
- * successfully completed. Note that the VMA write lock still held.
- *
- * The absolute minimum ought to be done here.
- *
- * Returns 0 on success, or an error code.
- */
- int (*success_hook)(const struct vm_area_struct *vma);
-
- /*
- * If specified, this hook is invoked when an error occurred when
- * attempting the selection action.
- *
- * The hook can return an error code in order to filter the error, but
- * it is not valid to clear the error here.
- */
- int (*error_hook)(int err);
-
- /*
- * This should be set in rare instances where the operation required
- * that the rmap should not be able to access the VMA until
- * completely set up.
- */
- bool hide_from_rmap_until_complete :1;
-};
-
-/*
- * Describes a VMA that is about to be mmap()'ed. Drivers may choose to
- * manipulate mutable fields which will cause those fields to be updated in the
- * resultant VMA.
- *
- * Helper functions are not required for manipulating any field.
- */
-struct vm_area_desc {
- /* Immutable state. */
- const struct mm_struct *const mm;
- struct file *const file; /* May vary from vm_file in stacked callers. */
- unsigned long start;
- unsigned long end;
-
- /* Mutable fields. Populated with initial state. */
- pgoff_t pgoff;
- struct file *vm_file;
- union {
- vm_flags_t vm_flags;
- vma_flags_t vma_flags;
- };
- pgprot_t page_prot;
-
- /* Write-only fields. */
- const struct vm_operations_struct *vm_ops;
- void *private_data;
-
- /* Take further action? */
- struct mmap_action action;
-};
-
-struct file_operations {
- int (*mmap)(struct file *, struct vm_area_struct *);
- int (*mmap_prepare)(struct vm_area_desc *);
-};
-
-struct file {
- struct address_space *f_mapping;
- const struct file_operations *f_op;
-};
-
-#define VMA_LOCK_OFFSET 0x40000000
-
-typedef struct { unsigned long v; } freeptr_t;
-
-struct vm_area_struct {
- /* The first cache line has the info for VMA tree walking. */
-
- union {
- struct {
- /* VMA covers [vm_start; vm_end) addresses within mm */
- unsigned long vm_start;
- unsigned long vm_end;
- };
- freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
- };
-
- struct mm_struct *vm_mm; /* The address space we belong to. */
- pgprot_t vm_page_prot; /* Access permissions of this VMA. */
-
- /*
- * Flags, see mm.h.
- * To modify use vm_flags_{init|reset|set|clear|mod} functions.
- */
- union {
- const vm_flags_t vm_flags;
- vma_flags_t flags;
- };
-
-#ifdef CONFIG_PER_VMA_LOCK
- /*
- * Can only be written (using WRITE_ONCE()) while holding both:
- * - mmap_lock (in write mode)
- * - vm_refcnt bit at VMA_LOCK_OFFSET is set
- * Can be read reliably while holding one of:
- * - mmap_lock (in read or write mode)
- * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1
- * Can be read unreliably (using READ_ONCE()) for pessimistic bailout
- * while holding nothing (except RCU to keep the VMA struct allocated).
- *
- * This sequence counter is explicitly allowed to overflow; sequence
- * counter reuse can only lead to occasional unnecessary use of the
- * slowpath.
- */
- unsigned int vm_lock_seq;
-#endif
-
- /*
- * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
- * list, after a COW of one of the file pages. A MAP_SHARED vma
- * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
- * or brk vma (with NULL file) can only be in an anon_vma list.
- */
- struct list_head anon_vma_chain; /* Serialized by mmap_lock &
- * page_table_lock */
- struct anon_vma *anon_vma; /* Serialized by page_table_lock */
-
- /* Function pointers to deal with this struct. */
- const struct vm_operations_struct *vm_ops;
-
- /* Information about our backing store: */
- unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
- units */
- struct file * vm_file; /* File we map to (can be NULL). */
- void * vm_private_data; /* was vm_pte (shared mem) */
-
-#ifdef CONFIG_SWAP
- atomic_long_t swap_readahead_info;
-#endif
-#ifndef CONFIG_MMU
- struct vm_region *vm_region; /* NOMMU mapping region */
-#endif
-#ifdef CONFIG_NUMA
- struct mempolicy *vm_policy; /* NUMA policy for the VMA */
-#endif
-#ifdef CONFIG_NUMA_BALANCING
- struct vma_numab_state *numab_state; /* NUMA Balancing state */
-#endif
-#ifdef CONFIG_PER_VMA_LOCK
- /* Unstable RCU readers are allowed to read this. */
- refcount_t vm_refcnt;
-#endif
- /*
- * For areas with an address space and backing store,
- * linkage into the address_space->i_mmap interval tree.
- *
- */
- struct {
- struct rb_node rb;
- unsigned long rb_subtree_last;
- } shared;
-#ifdef CONFIG_ANON_VMA_NAME
- /*
- * For private and shared anonymous mappings, a pointer to a null
- * terminated string containing the name given to the vma, or NULL if
- * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
- */
- struct anon_vma_name *anon_name;
-#endif
- struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
-} __randomize_layout;
-
-struct vm_fault {};
-
-struct vm_operations_struct {
- void (*open)(struct vm_area_struct * area);
- /**
- * @close: Called when the VMA is being removed from the MM.
- * Context: User context. May sleep. Caller holds mmap_lock.
- */
- void (*close)(struct vm_area_struct * area);
- /* Called any time before splitting to check if it's allowed */
- int (*may_split)(struct vm_area_struct *area, unsigned long addr);
- int (*mremap)(struct vm_area_struct *area);
- /*
- * Called by mprotect() to make driver-specific permission
- * checks before mprotect() is finalised. The VMA must not
- * be modified. Returns 0 if mprotect() can proceed.
- */
- int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, unsigned long newflags);
- vm_fault_t (*fault)(struct vm_fault *vmf);
- vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
- vm_fault_t (*map_pages)(struct vm_fault *vmf,
- pgoff_t start_pgoff, pgoff_t end_pgoff);
- unsigned long (*pagesize)(struct vm_area_struct * area);
-
- /* notification that a previously read-only page is about to become
- * writable, if an error is returned it will cause a SIGBUS */
- vm_fault_t (*page_mkwrite)(struct vm_fault *vmf);
-
- /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
- vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf);
-
- /* called by access_process_vm when get_user_pages() fails, typically
- * for use by special VMAs. See also generic_access_phys() for a generic
- * implementation useful for any iomem mapping.
- */
- int (*access)(struct vm_area_struct *vma, unsigned long addr,
- void *buf, int len, int write);
-
- /* Called by the /proc/PID/maps code to ask the vma whether it
- * has a special name. Returning non-NULL will also cause this
- * vma to be dumped unconditionally. */
- const char *(*name)(struct vm_area_struct *vma);
-
-#ifdef CONFIG_NUMA
- /*
- * set_policy() op must add a reference to any non-NULL @new mempolicy
- * to hold the policy upon return. Caller should pass NULL @new to
- * remove a policy and fall back to surrounding context--i.e. do not
- * install a MPOL_DEFAULT policy, nor the task or system default
- * mempolicy.
- */
- int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
-
- /*
- * get_policy() op must add reference [mpol_get()] to any policy at
- * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure
- * in mm/mempolicy.c will do this automatically.
- * get_policy() must NOT add a ref if the policy at (vma,addr) is not
- * marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
- * If no [shared/vma] mempolicy exists at the addr, get_policy() op
- * must return NULL--i.e., do not "fallback" to task or system default
- * policy.
- */
- struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
- unsigned long addr, pgoff_t *ilx);
-#endif
-#ifdef CONFIG_FIND_NORMAL_PAGE
- /*
- * Called by vm_normal_page() for special PTEs in @vma at @addr. This
- * allows for returning a "normal" page from vm_normal_page() even
- * though the PTE indicates that the "struct page" either does not exist
- * or should not be touched: "special".
- *
- * Do not add new users: this really only works when a "normal" page
- * was mapped, but then the PTE got changed to something weird (+
- * marked special) that would not make pte_pfn() identify the originally
- * inserted page.
- */
- struct page *(*find_normal_page)(struct vm_area_struct *vma,
- unsigned long addr);
-#endif /* CONFIG_FIND_NORMAL_PAGE */
-};
-
-struct vm_unmapped_area_info {
-#define VM_UNMAPPED_AREA_TOPDOWN 1
- unsigned long flags;
- unsigned long length;
- unsigned long low_limit;
- unsigned long high_limit;
- unsigned long align_mask;
- unsigned long align_offset;
- unsigned long start_gap;
-};
-
-struct pagetable_move_control {
- struct vm_area_struct *old; /* Source VMA. */
- struct vm_area_struct *new; /* Destination VMA. */
- unsigned long old_addr; /* Address from which the move begins. */
- unsigned long old_end; /* Exclusive address at which old range ends. */
- unsigned long new_addr; /* Address to move page tables to. */
- unsigned long len_in; /* Bytes to remap specified by user. */
-
- bool need_rmap_locks; /* Do rmap locks need to be taken? */
- bool for_stack; /* Is this an early temp stack being moved? */
-};
-
-#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \
- struct pagetable_move_control name = { \
- .old = old_, \
- .new = new_, \
- .old_addr = old_addr_, \
- .old_end = (old_addr_) + (len_), \
- .new_addr = new_addr_, \
- .len_in = len_, \
- }
-
-static inline void vma_iter_invalidate(struct vma_iterator *vmi)
-{
- mas_pause(&vmi->mas);
-}
-
-static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
-{
- return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot));
-}
-
-static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
-{
- return __pgprot(vm_flags);
-}
-
-static inline bool is_shared_maywrite(vm_flags_t vm_flags)
-{
- return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
- (VM_SHARED | VM_MAYWRITE);
-}
-
-static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
-{
- return is_shared_maywrite(vma->vm_flags);
-}
-
-static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
-{
- /*
- * Uses mas_find() to get the first VMA when the iterator starts.
- * Calling mas_next() could skip the first entry.
- */
- return mas_find(&vmi->mas, ULONG_MAX);
-}
-
-/*
- * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
- * assertions should be made either under mmap_write_lock or when the object
- * has been isolated under mmap_write_lock, ensuring no competing writers.
- */
-static inline void vma_assert_attached(struct vm_area_struct *vma)
-{
- WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
-}
-
-static inline void vma_assert_detached(struct vm_area_struct *vma)
-{
- WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
-}
-
-static inline void vma_assert_write_locked(struct vm_area_struct *);
-static inline void vma_mark_attached(struct vm_area_struct *vma)
-{
- vma_assert_write_locked(vma);
- vma_assert_detached(vma);
- refcount_set_release(&vma->vm_refcnt, 1);
-}
-
-static inline void vma_mark_detached(struct vm_area_struct *vma)
-{
- vma_assert_write_locked(vma);
- vma_assert_attached(vma);
- /* We are the only writer, so no need to use vma_refcount_put(). */
- if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
- /*
- * Reader must have temporarily raised vm_refcnt but it will
- * drop it without using the vma since vma is write-locked.
- */
- }
-}
-
-extern const struct vm_operations_struct vma_dummy_vm_ops;
-
-extern unsigned long rlimit(unsigned int limit);
-
-static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
-{
- memset(vma, 0, sizeof(*vma));
- vma->vm_mm = mm;
- vma->vm_ops = &vma_dummy_vm_ops;
- INIT_LIST_HEAD(&vma->anon_vma_chain);
- vma->vm_lock_seq = UINT_MAX;
-}
-
-/*
- * These are defined in vma.h, but sadly vm_stat_account() is referenced by
- * kernel/fork.c, so we have to these broadly available there, and temporarily
- * define them here to resolve the dependency cycle.
- */
-
-#define is_exec_mapping(flags) \
- ((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC)
-
-#define is_stack_mapping(flags) \
- (((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK))
-
-#define is_data_mapping(flags) \
- ((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE)
-
-static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags,
- long npages)
-{
- WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages);
-
- if (is_exec_mapping(flags))
- mm->exec_vm += npages;
- else if (is_stack_mapping(flags))
- mm->stack_vm += npages;
- else if (is_data_mapping(flags))
- mm->data_vm += npages;
-}
-
-#undef is_exec_mapping
-#undef is_stack_mapping
-#undef is_data_mapping
-
-/* Currently stubbed but we may later wish to un-stub. */
-static inline void vm_acct_memory(long pages);
-static inline void vm_unacct_memory(long pages)
-{
- vm_acct_memory(-pages);
-}
-
-static inline void mapping_allow_writable(struct address_space *mapping)
-{
- atomic_inc(&mapping->i_mmap_writable);
-}
-
-static inline void vma_set_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- pgoff_t pgoff)
-{
- vma->vm_start = start;
- vma->vm_end = end;
- vma->vm_pgoff = pgoff;
-}
-
-static inline
-struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
-{
- return mas_find(&vmi->mas, max - 1);
-}
-
-static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
- unsigned long start, unsigned long end, gfp_t gfp)
-{
- __mas_set_range(&vmi->mas, start, end - 1);
- mas_store_gfp(&vmi->mas, NULL, gfp);
- if (unlikely(mas_is_err(&vmi->mas)))
- return -ENOMEM;
-
- return 0;
-}
-
-static inline void mmap_assert_locked(struct mm_struct *);
-static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
- unsigned long start_addr,
- unsigned long end_addr)
-{
- unsigned long index = start_addr;
-
- mmap_assert_locked(mm);
- return mt_find(&mm->mm_mt, &index, end_addr - 1);
-}
-
-static inline
-struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
-{
- return mtree_load(&mm->mm_mt, addr);
-}
-
-static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
-{
- return mas_prev(&vmi->mas, 0);
-}
-
-static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr)
-{
- mas_set(&vmi->mas, addr);
-}
-
-static inline bool vma_is_anonymous(struct vm_area_struct *vma)
-{
- return !vma->vm_ops;
-}
-
-/* Defined in vma.h, so temporarily define here to avoid circular dependency. */
-#define vma_iter_load(vmi) \
- mas_walk(&(vmi)->mas)
-
-static inline struct vm_area_struct *
-find_vma_prev(struct mm_struct *mm, unsigned long addr,
- struct vm_area_struct **pprev)
-{
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, addr);
-
- vma = vma_iter_load(&vmi);
- *pprev = vma_prev(&vmi);
- if (!vma)
- vma = vma_next(&vmi);
- return vma;
-}
-
-#undef vma_iter_load
-
-static inline void vma_iter_init(struct vma_iterator *vmi,
- struct mm_struct *mm, unsigned long addr)
-{
- mas_init(&vmi->mas, &mm->mm_mt, addr);
-}
-
-/* Stubbed functions. */
-
-static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
-{
- return NULL;
-}
-
-static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
- struct vm_userfaultfd_ctx vm_ctx)
-{
- return true;
-}
-
-static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
- struct anon_vma_name *anon_name2)
-{
- return true;
-}
-
-static inline void might_sleep(void)
-{
-}
-
-static inline unsigned long vma_pages(struct vm_area_struct *vma)
-{
- return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
-}
-
-static inline void fput(struct file *file)
-{
-}
-
-static inline void mpol_put(struct mempolicy *pol)
-{
-}
-
-static inline void lru_add_drain(void)
-{
-}
-
-static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
-{
-}
-
-static inline void update_hiwater_rss(struct mm_struct *mm)
-{
-}
-
-static inline void update_hiwater_vm(struct mm_struct *mm)
-{
-}
-
-static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
- struct vm_area_struct *vma, unsigned long start_addr,
- unsigned long end_addr, unsigned long tree_end)
-{
-}
-
-static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
- struct vm_area_struct *vma, unsigned long floor,
- unsigned long ceiling, bool mm_wr_locked)
-{
-}
-
-static inline void mapping_unmap_writable(struct address_space *mapping)
-{
-}
-
-static inline void flush_dcache_mmap_lock(struct address_space *mapping)
-{
-}
-
-static inline void tlb_finish_mmu(struct mmu_gather *tlb)
-{
-}
-
-static inline struct file *get_file(struct file *f)
-{
- return f;
-}
-
-static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
-{
- return 0;
-}
-
-static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
-{
- /* For testing purposes. We indicate that an anon_vma has been cloned. */
- if (src->anon_vma != NULL) {
- dst->anon_vma = src->anon_vma;
- dst->anon_vma->was_cloned = true;
- }
-
- return 0;
-}
-
-static inline void vma_start_write(struct vm_area_struct *vma)
-{
- /* Used to indicate to tests that a write operation has begun. */
- vma->vm_lock_seq++;
-}
-
-static inline __must_check
-int vma_start_write_killable(struct vm_area_struct *vma)
-{
- /* Used to indicate to tests that a write operation has begun. */
- vma->vm_lock_seq++;
- return 0;
-}
-
-static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
- unsigned long start,
- unsigned long end,
- struct vm_area_struct *next)
-{
-}
-
-static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
-
-static inline void vma_iter_free(struct vma_iterator *vmi)
-{
- mas_destroy(&vmi->mas);
-}
-
-static inline
-struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
-{
- return mas_next_range(&vmi->mas, ULONG_MAX);
-}
-
-static inline void vm_acct_memory(long pages)
-{
-}
-
-static inline void vma_interval_tree_insert(struct vm_area_struct *vma,
- struct rb_root_cached *rb)
-{
-}
-
-static inline void vma_interval_tree_remove(struct vm_area_struct *vma,
- struct rb_root_cached *rb)
-{
-}
-
-static inline void flush_dcache_mmap_unlock(struct address_space *mapping)
-{
-}
-
-static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc,
- struct rb_root_cached *rb)
-{
-}
-
-static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc,
- struct rb_root_cached *rb)
-{
-}
-
-static inline void uprobe_mmap(struct vm_area_struct *vma)
-{
-}
-
-static inline void uprobe_munmap(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
-}
-
-static inline void i_mmap_lock_write(struct address_space *mapping)
-{
-}
-
-static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
-{
-}
-
-static inline void vma_assert_write_locked(struct vm_area_struct *vma)
-{
-}
-
-static inline void unlink_anon_vmas(struct vm_area_struct *vma)
-{
- /* For testing purposes, indicate that the anon_vma was unlinked. */
- vma->anon_vma->was_unlinked = true;
-}
-
-static inline void anon_vma_unlock_write(struct anon_vma *anon_vma)
-{
-}
-
-static inline void i_mmap_unlock_write(struct address_space *mapping)
-{
-}
-
-static inline void anon_vma_merge(struct vm_area_struct *vma,
- struct vm_area_struct *next)
-{
-}
-
-static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
- unsigned long start,
- unsigned long end,
- struct list_head *unmaps)
-{
- return 0;
-}
-
-static inline void mmap_write_downgrade(struct mm_struct *mm)
-{
-}
-
-static inline void mmap_read_unlock(struct mm_struct *mm)
-{
-}
-
-static inline void mmap_write_unlock(struct mm_struct *mm)
-{
-}
-
-static inline int mmap_write_lock_killable(struct mm_struct *mm)
-{
- return 0;
-}
-
-static inline bool can_modify_mm(struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
-{
- return true;
-}
-
-static inline void arch_unmap(struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
-{
-}
-
-static inline void mmap_assert_locked(struct mm_struct *mm)
-{
-}
-
-static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
-{
- return true;
-}
-
-static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
- vm_flags_t vm_flags)
-{
-}
-
-static inline bool mapping_can_writeback(struct address_space *mapping)
-{
- return true;
-}
-
-static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
-{
- return false;
-}
-
-static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
-{
- return false;
-}
-
-static inline bool userfaultfd_wp(struct vm_area_struct *vma)
-{
- return false;
-}
-
-static inline void mmap_assert_write_locked(struct mm_struct *mm)
-{
-}
-
-static inline void mutex_lock(struct mutex *lock)
-{
-}
-
-static inline void mutex_unlock(struct mutex *lock)
-{
-}
-
-static inline bool mutex_is_locked(struct mutex *lock)
-{
- return true;
-}
-
-static inline bool signal_pending(void *p)
-{
- return false;
-}
-
-static inline bool is_file_hugepages(struct file *file)
-{
- return false;
-}
-
-static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
-{
- return 0;
-}
-
-static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags,
- unsigned long npages)
-{
- return true;
-}
-
-static inline int shmem_zero_setup(struct vm_area_struct *vma)
-{
- return 0;
-}
-
-static inline void vma_set_anonymous(struct vm_area_struct *vma)
-{
- vma->vm_ops = NULL;
-}
-
-static inline void ksm_add_vma(struct vm_area_struct *vma)
-{
-}
-
-static inline void perf_event_mmap(struct vm_area_struct *vma)
-{
-}
-
-static inline bool vma_is_dax(struct vm_area_struct *vma)
-{
- return false;
-}
-
-static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
- return NULL;
-}
-
-bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
-
-/* Update vma->vm_page_prot to reflect vma->vm_flags. */
-static inline void vma_set_page_prot(struct vm_area_struct *vma)
-{
- vm_flags_t vm_flags = vma->vm_flags;
- pgprot_t vm_page_prot;
-
- /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
- vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags));
-
- if (vma_wants_writenotify(vma, vm_page_prot)) {
- vm_flags &= ~VM_SHARED;
- /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
- vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags));
- }
- /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
- WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
-}
-
-static inline bool arch_validate_flags(vm_flags_t flags)
-{
- return true;
-}
-
-static inline void vma_close(struct vm_area_struct *vma)
-{
-}
-
-static inline int mmap_file(struct file *file, struct vm_area_struct *vma)
-{
- return 0;
-}
-
-static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
-{
- if (vma->vm_flags & VM_GROWSDOWN)
- return stack_guard_gap;
-
- /* See reasoning around the VM_SHADOW_STACK definition */
- if (vma->vm_flags & VM_SHADOW_STACK)
- return PAGE_SIZE;
-
- return 0;
-}
-
-static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
-{
- unsigned long gap = stack_guard_start_gap(vma);
- unsigned long vm_start = vma->vm_start;
-
- vm_start -= gap;
- if (vm_start > vma->vm_start)
- vm_start = 0;
- return vm_start;
-}
-
-static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
-{
- unsigned long vm_end = vma->vm_end;
-
- if (vma->vm_flags & VM_GROWSUP) {
- vm_end += stack_guard_gap;
- if (vm_end < vma->vm_end)
- vm_end = -PAGE_SIZE;
- }
- return vm_end;
-}
-
-static inline int is_hugepage_only_range(struct mm_struct *mm,
- unsigned long addr, unsigned long len)
-{
- return 0;
-}
-
-static inline bool vma_is_accessible(struct vm_area_struct *vma)
-{
- return vma->vm_flags & VM_ACCESS_FLAGS;
-}
-
-static inline bool capable(int cap)
-{
- return true;
-}
-
-static inline bool mlock_future_ok(const struct mm_struct *mm,
- vm_flags_t vm_flags, unsigned long bytes)
-{
- unsigned long locked_pages, limit_pages;
-
- if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
- return true;
-
- locked_pages = bytes >> PAGE_SHIFT;
- locked_pages += mm->locked_vm;
-
- limit_pages = rlimit(RLIMIT_MEMLOCK);
- limit_pages >>= PAGE_SHIFT;
-
- return locked_pages <= limit_pages;
-}
-
-static inline int __anon_vma_prepare(struct vm_area_struct *vma)
-{
- struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma));
-
- if (!anon_vma)
- return -ENOMEM;
-
- anon_vma->root = anon_vma;
- vma->anon_vma = anon_vma;
-
- return 0;
-}
-
-static inline int anon_vma_prepare(struct vm_area_struct *vma)
-{
- if (likely(vma->anon_vma))
- return 0;
-
- return __anon_vma_prepare(vma);
-}
-
-static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
- struct list_head *uf)
-{
-}
-
-#define ACCESS_PRIVATE(p, member) ((p)->member)
-
-#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE)
-
-static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
-{
- unsigned int len = bitmap_size(nbits);
-
- if (small_const_nbits(nbits))
- *dst = 0;
- else
- memset(dst, 0, len);
-}
-
-static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
-{
- return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
-}
-
-/* Clears all bits in the VMA flags bitmap, non-atomically. */
-static inline void vma_flags_clear_all(vma_flags_t *flags)
-{
- bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
-}
-
-/*
- * Copy value to the first system word of VMA flags, non-atomically.
- *
- * IMPORTANT: This does not overwrite bytes past the first system word. The
- * caller must account for this.
- */
-static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
-{
- *ACCESS_PRIVATE(flags, __vma_flags) = value;
-}
-
-/*
- * Copy value to the first system word of VMA flags ONCE, non-atomically.
- *
- * IMPORTANT: This does not overwrite bytes past the first system word. The
- * caller must account for this.
- */
-static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
-{
- unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
-
- WRITE_ONCE(*bitmap, value);
-}
-
-/* Update the first system word of VMA flags setting bits, non-atomically. */
-static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
-{
- unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
-
- *bitmap |= value;
-}
-
-/* Update the first system word of VMA flags clearing bits, non-atomically. */
-static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
-{
- unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
-
- *bitmap &= ~value;
-}
-
-
-/* Use when VMA is not part of the VMA tree and needs no locking */
-static inline void vm_flags_init(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma_flags_clear_all(&vma->flags);
- vma_flags_overwrite_word(&vma->flags, flags);
-}
-
-/*
- * Use when VMA is part of the VMA tree and modifications need coordination
- * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
- * it should be locked explicitly beforehand.
- */
-static inline void vm_flags_reset(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma_assert_write_locked(vma);
- vm_flags_init(vma, flags);
-}
-
-static inline void vm_flags_reset_once(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma_assert_write_locked(vma);
- /*
- * The user should only be interested in avoiding reordering of
- * assignment to the first word.
- */
- vma_flags_clear_all(&vma->flags);
- vma_flags_overwrite_word_once(&vma->flags, flags);
-}
-
-static inline void vm_flags_set(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma_start_write(vma);
- vma_flags_set_word(&vma->flags, flags);
-}
-
-static inline void vm_flags_clear(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma_start_write(vma);
- vma_flags_clear_word(&vma->flags, flags);
-}
-
-/*
- * Denies creating a writable executable mapping or gaining executable permissions.
- *
- * This denies the following:
- *
- * a) mmap(PROT_WRITE | PROT_EXEC)
- *
- * b) mmap(PROT_WRITE)
- * mprotect(PROT_EXEC)
- *
- * c) mmap(PROT_WRITE)
- * mprotect(PROT_READ)
- * mprotect(PROT_EXEC)
- *
- * But allows the following:
- *
- * d) mmap(PROT_READ | PROT_EXEC)
- * mmap(PROT_READ | PROT_EXEC | PROT_BTI)
- *
- * This is only applicable if the user has set the Memory-Deny-Write-Execute
- * (MDWE) protection mask for the current process.
- *
- * @old specifies the VMA flags the VMA originally possessed, and @new the ones
- * we propose to set.
- *
- * Return: false if proposed change is OK, true if not ok and should be denied.
- */
-static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
-{
- /* If MDWE is disabled, we have nothing to deny. */
- if (mm_flags_test(MMF_HAS_MDWE, current->mm))
- return false;
-
- /* If the new VMA is not executable, we have nothing to deny. */
- if (!(new & VM_EXEC))
- return false;
-
- /* Under MDWE we do not accept newly writably executable VMAs... */
- if (new & VM_WRITE)
- return true;
-
- /* ...nor previously non-executable VMAs becoming executable. */
- if (!(old & VM_EXEC))
- return true;
-
- return false;
-}
-
-static inline int mapping_map_writable(struct address_space *mapping)
-{
- return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
- 0 : -EPERM;
-}
-
-static inline unsigned long move_page_tables(struct pagetable_move_control *pmc)
-{
- return 0;
-}
-
-static inline void free_pgd_range(struct mmu_gather *tlb,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
-}
-
-static inline int ksm_execve(struct mm_struct *mm)
-{
- return 0;
-}
-
-static inline void ksm_exit(struct mm_struct *mm)
-{
-}
-
-static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
-{
- if (reset_refcnt)
- refcount_set(&vma->vm_refcnt, 0);
-}
-
-static inline void vma_numab_state_init(struct vm_area_struct *vma)
-{
-}
-
-static inline void vma_numab_state_free(struct vm_area_struct *vma)
-{
-}
-
-static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
- struct vm_area_struct *new_vma)
-{
-}
-
-static inline void free_anon_vma_name(struct vm_area_struct *vma)
-{
-}
-
-/* Declared in vma.h. */
-static inline void set_vma_from_desc(struct vm_area_struct *vma,
- struct vm_area_desc *desc);
-
-static inline void mmap_action_prepare(struct mmap_action *action,
- struct vm_area_desc *desc)
-{
-}
-
-static inline int mmap_action_complete(struct mmap_action *action,
- struct vm_area_struct *vma)
-{
- return 0;
-}
-
-static inline int __compat_vma_mmap(const struct file_operations *f_op,
- struct file *file, struct vm_area_struct *vma)
-{
- struct vm_area_desc desc = {
- .mm = vma->vm_mm,
- .file = file,
- .start = vma->vm_start,
- .end = vma->vm_end,
-
- .pgoff = vma->vm_pgoff,
- .vm_file = vma->vm_file,
- .vm_flags = vma->vm_flags,
- .page_prot = vma->vm_page_prot,
-
- .action.type = MMAP_NOTHING, /* Default */
- };
- int err;
-
- err = f_op->mmap_prepare(&desc);
- if (err)
- return err;
-
- mmap_action_prepare(&desc.action, &desc);
- set_vma_from_desc(vma, &desc);
- return mmap_action_complete(&desc.action, vma);
-}
-
-static inline int compat_vma_mmap(struct file *file,
- struct vm_area_struct *vma)
-{
- return __compat_vma_mmap(file->f_op, file, vma);
-}
-
-/* Did the driver provide valid mmap hook configuration? */
-static inline bool can_mmap_file(struct file *file)
-{
- bool has_mmap = file->f_op->mmap;
- bool has_mmap_prepare = file->f_op->mmap_prepare;
-
- /* Hooks are mutually exclusive. */
- if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
- return false;
- if (!has_mmap && !has_mmap_prepare)
- return false;
-
- return true;
-}
-
-static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
-{
- if (file->f_op->mmap_prepare)
- return compat_vma_mmap(file, vma);
-
- return file->f_op->mmap(file, vma);
-}
-
-static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
-{
- return file->f_op->mmap_prepare(desc);
-}
-
-static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
-{
-}
-
-static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
-{
- /* Changing an anonymous vma with this is illegal */
- get_file(file);
- swap(vma->vm_file, file);
- fput(file);
-}
-
-static inline bool shmem_file(struct file *file)
-{
- return false;
-}
-
-static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
- const struct file *file, vm_flags_t vm_flags)
-{
- return vm_flags;
-}
-
-static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
-{
-}
-
-static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
- unsigned long pfn, unsigned long size, pgprot_t pgprot)
-{
- return 0;
-}
+typedef unsigned long vm_flags_t;
+#define pgoff_t unsigned long
+typedef unsigned long pgprotval_t;
+typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
+typedef __bitwise unsigned int vm_fault_t;
-static inline int do_munmap(struct mm_struct *, unsigned long, size_t,
- struct list_head *uf)
-{
- return 0;
-}
+#include "include/stubs.h"
+#include "include/dup.h"
+#include "include/custom.h"
#endif /* __MM_VMA_INTERNAL_H */
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index d843643ced6b..9430ef5b8bc3 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -511,6 +511,18 @@ void run_tests(const struct test_case *test_cases,
printf("ok\n");
}
+
+ printf("All tests have been executed. Waiting other peer...");
+ fflush(stdout);
+
+ /*
+ * Final full barrier, to ensure that all tests have been run and
+ * that even the last one has been successful on both sides.
+ */
+ control_writeln("COMPLETED");
+ control_expectln("COMPLETED");
+
+ printf("ok\n");
}
void list_tests(const struct test_case *test_cases)
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index 142c02a6834a..bf633cde82b0 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -25,7 +25,7 @@ enum transport {
};
static const char * const transport_ksyms[] = {
- #define x(name, symbol) "d " symbol "_transport",
+ #define x(name, symbol) " " symbol "_transport",
KNOWN_TRANSPORTS(x)
#undef x
};
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 9e1250790f33..5bd20ccd9335 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -347,10 +347,12 @@ static void test_stream_msg_peek_server(const struct test_opts *opts)
}
#define SOCK_BUF_SIZE (2 * 1024 * 1024)
+#define SOCK_BUF_SIZE_SMALL (64 * 1024)
#define MAX_MSG_PAGES 4
static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
{
+ unsigned long long sock_buf_size;
unsigned long curr_hash;
size_t max_msg_size;
int page_size;
@@ -363,6 +365,16 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
exit(EXIT_FAILURE);
}
+ sock_buf_size = SOCK_BUF_SIZE;
+
+ setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
+ sock_buf_size,
+ "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");
+
+ setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
+ sock_buf_size,
+ "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
+
/* Wait, until receiver sets buffer size. */
control_expectln("SRVREADY");
@@ -2192,6 +2204,128 @@ static void test_stream_nolinger_server(const struct test_opts *opts)
close(fd);
}
+static void test_stream_accepted_setsockopt_client(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static void test_stream_accepted_setsockopt_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ enable_so_zerocopy_check(fd);
+ close(fd);
+}
+
+static void test_stream_tx_credit_bounds_client(const struct test_opts *opts)
+{
+ unsigned long long sock_buf_size;
+ size_t total = 0;
+ char buf[4096];
+ int fd;
+
+ memset(buf, 'A', sizeof(buf));
+
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ sock_buf_size = SOCK_BUF_SIZE_SMALL;
+
+ setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
+ sock_buf_size,
+ "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");
+
+ setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
+ sock_buf_size,
+ "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
+
+ if (fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_NONBLOCK) < 0) {
+ perror("fcntl(F_SETFL)");
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("SRVREADY");
+
+ for (;;) {
+ ssize_t sent = send(fd, buf, sizeof(buf), 0);
+
+ if (sent == 0) {
+ fprintf(stderr, "unexpected EOF while sending bytes\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (sent < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ break;
+
+ perror("send");
+ exit(EXIT_FAILURE);
+ }
+
+ total += sent;
+ }
+
+ control_writeln("CLIDONE");
+ close(fd);
+
+ /* We should not be able to send more bytes than the value set as
+ * local buffer size.
+ */
+ if (total > sock_buf_size) {
+ fprintf(stderr,
+ "TX credit too large: queued %zu bytes (expected <= %llu)\n",
+ total, sock_buf_size);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void test_stream_tx_credit_bounds_server(const struct test_opts *opts)
+{
+ unsigned long long sock_buf_size;
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ sock_buf_size = SOCK_BUF_SIZE;
+
+ setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
+ sock_buf_size,
+ "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");
+
+ setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
+ sock_buf_size,
+ "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
+
+ control_writeln("SRVREADY");
+ control_expectln("CLIDONE");
+
+ close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -2371,6 +2505,21 @@ static struct test_case test_cases[] = {
.run_client = test_seqpacket_unread_bytes_client,
.run_server = test_seqpacket_unread_bytes_server,
},
+ {
+ .name = "SOCK_STREAM accept()ed socket custom setsockopt()",
+ .run_client = test_stream_accepted_setsockopt_client,
+ .run_server = test_stream_accepted_setsockopt_server,
+ },
+ {
+ .name = "SOCK_STREAM virtio MSG_ZEROCOPY coalescence corruption",
+ .run_client = test_stream_msgzcopy_mangle_client,
+ .run_server = test_stream_msgzcopy_mangle_server,
+ },
+ {
+ .name = "SOCK_STREAM TX credit bounds",
+ .run_client = test_stream_tx_credit_bounds_client,
+ .run_server = test_stream_tx_credit_bounds_server,
+ },
{},
};
diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c
index 9d9a6cb9614a..a31ddfc1cd0c 100644
--- a/tools/testing/vsock/vsock_test_zerocopy.c
+++ b/tools/testing/vsock/vsock_test_zerocopy.c
@@ -9,14 +9,18 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/ioctl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <poll.h>
#include <linux/errqueue.h>
#include <linux/kernel.h>
+#include <linux/sockios.h>
+#include <linux/time64.h>
#include <errno.h>
#include "control.h"
+#include "timeout.h"
#include "vsock_test_zerocopy.h"
#include "msg_zerocopy_common.h"
@@ -356,3 +360,73 @@ void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts)
control_expectln("DONE");
close(fd);
}
+
+#define GOOD_COPY_LEN 128 /* net/vmw_vsock/virtio_transport_common.c */
+
+void test_stream_msgzcopy_mangle_client(const struct test_opts *opts)
+{
+ char sbuf1[PAGE_SIZE + 1], sbuf2[GOOD_COPY_LEN];
+ unsigned long hash;
+ struct pollfd fds;
+ int fd, i;
+
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ enable_so_zerocopy_check(fd);
+
+ memset(sbuf1, 'x', sizeof(sbuf1));
+ send_buf(fd, sbuf1, sizeof(sbuf1), 0, sizeof(sbuf1));
+
+ for (i = 0; i < sizeof(sbuf2); i++)
+ sbuf2[i] = rand() & 0xff;
+
+ send_buf(fd, sbuf2, sizeof(sbuf2), MSG_ZEROCOPY, sizeof(sbuf2));
+
+ hash = hash_djb2(sbuf2, sizeof(sbuf2));
+ control_writeulong(hash);
+
+ fds.fd = fd;
+ fds.events = 0;
+
+ if (poll(&fds, 1, TIMEOUT * MSEC_PER_SEC) != 1 ||
+ !(fds.revents & POLLERR)) {
+ perror("poll");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+void test_stream_msgzcopy_mangle_server(const struct test_opts *opts)
+{
+ unsigned long local_hash, remote_hash;
+ char rbuf[PAGE_SIZE + 1];
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Wait, don't race the (buggy) skbs coalescence. */
+ vsock_ioctl_int(fd, SIOCINQ, PAGE_SIZE + 1 + GOOD_COPY_LEN);
+
+ /* Discard the first packet. */
+ recv_buf(fd, rbuf, PAGE_SIZE + 1, 0, PAGE_SIZE + 1);
+
+ recv_buf(fd, rbuf, GOOD_COPY_LEN, 0, GOOD_COPY_LEN);
+ remote_hash = control_readulong();
+ local_hash = hash_djb2(rbuf, GOOD_COPY_LEN);
+
+ if (local_hash != remote_hash) {
+ fprintf(stderr, "Data received corrupted\n");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
diff --git a/tools/testing/vsock/vsock_test_zerocopy.h b/tools/testing/vsock/vsock_test_zerocopy.h
index 3ef2579e024d..d46c91a69f16 100644
--- a/tools/testing/vsock/vsock_test_zerocopy.h
+++ b/tools/testing/vsock/vsock_test_zerocopy.h
@@ -12,4 +12,7 @@ void test_seqpacket_msgzcopy_server(const struct test_opts *opts);
void test_stream_msgzcopy_empty_errq_client(const struct test_opts *opts);
void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts);
+void test_stream_msgzcopy_mangle_client(const struct test_opts *opts);
+void test_stream_msgzcopy_mangle_server(const struct test_opts *opts);
+
#endif /* VSOCK_TEST_ZEROCOPY_H */