From f62a5d39368e34a966c8df63e1f05eed7fe9c5de Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:42 -0400 Subject: cgroup/cpuset: Remove remote_partition_check() & make update_cpumasks_hier() handle remote partition Currently, changes in exclusive CPUs are being handled in remote_partition_check() by disabling conflicting remote partitions. However, that may lead to results unexpected by the users. Fix this problem by removing remote_partition_check() and making update_cpumasks_hier() handle changes in descendant remote partitions properly. The compute_effective_exclusive_cpumask() function is enhanced to check the exclusive_cpus and effective_xcpus from siblings and excluded them in its effective exclusive CPUs computation and return a value to show if there is any sibling conflicts. This is somewhat like the cpu_exclusive flag check in validate_change(). This is the initial step to enable us to retire the use of cpu_exclusive flag in cgroup v2 in the future. One of the tests in the TEST_MATRIX of the test_cpuset_prs.sh script has to be updated due to changes in the way a child remote partition root is being handled (updated instead of invalidation) in update_cpumasks_hier(). Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 400a696a0d21..4e3fabed52da 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -326,8 +326,8 @@ TEST_MATRIX=( . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \ A1:P0,A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ - A1:P0,A3:P-2" + . . X3 . . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3,XA3:3 \ + A1:P0,A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \ A1:P0,A3:P-2" -- cgit v1.2.3 From 65046b5e0ad71990b5a0256710cf050d2d2ab3dd Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:46 -0400 Subject: selftest/cgroup: Update test_cpuset_prs.sh to use | as effective CPUs and state separator Currently, ',' is used as the cgroup separator of the expected effective CPUs and partition root states in the test matrix. However, ',' can be part of the output of the cpuset.cpus*.effective and cpuset.cpus.isolated files. Change the separator to '|' so that ',' can appear as part of the expected values. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 236 +++++++++++----------- 1 file changed, 118 insertions(+), 118 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 4e3fabed52da..f11f347129d8 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -207,130 +207,130 @@ TEST_MATRIX=( " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" " C0-1 . . C2-3:P1 . . . C2 0 " " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" - "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3" - "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0" - "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2" - "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1" - "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3" + "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0" + "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2" + "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1" + "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # CPU offlining cases: - " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1,B1:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1,A2:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1,A2:2-3" - "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2,A2: A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1" - "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2,A2:2 A1:P1,A2:P-1" - "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1" - "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1" - "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" + " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1|B1:3" + "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3" + "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3" + "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1" + "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1" + "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" + "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1|A2:3|A3:3 A1:P1|A2:P1|A3:P-1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1|A2:2|A3:2 A1:P1|A2:P1|A3:P-1" + "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # # Remote partition and cpuset.cpus.exclusive tests # - " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3|A2:1-3|A3:2-3|B1:2-3 A1:P0|A3:P0|B1:P-2" " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5" - " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4,A2:1-3,A3:1-3 A2:P2 1-3" - " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4,A2:4,A3:2-3 A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5" + " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3" + " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3" # Nested remote/local partition tests - " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1,A2:2-3,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P0,B1:P1" - " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4,3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ - A1:P0,A2:P2,A3:P1 2-4,2-3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1,A2:2,A3:3-4 \ - A1:P0,A2:P2,A3:P1 2" + " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \ + A1:P0|A2:P1|A3:P0|B1:P1" + " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-4|3" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \ + A1:P0|A2:P2|A3:P1 2-4|2-3" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \ + A1:P0|A2:P2|A3:P1 2" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ - . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ - A1:P0,A2:P-2,A3:P-1" + . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \ + A1:P0|A2:P-2|A3:P-1" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ - . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \ - A1:P0,A2:P2,A3:P-1 2-4" + . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \ + A1:P0|A2:P2|A3:P-1 2-4" # Remote partition offline tests - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3," + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|" # An invalidated remote partition cannot self-recover from hotplug - " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2" + " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2" # cpus.exclusive.effective clearing test - " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3,A2:1-3,A3:2,XA1:" + " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:" # Invalid to valid remote partition transition test - " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3,A2:1-3,XA2: A2:P-2" + " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2" " C0-3:S+ C1-3:X3:P2 - . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3" + . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3" # Invalid to valid local partition direct transition tests - " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:1-3:XA2: A1:P2,A2:P-2 1-3" - " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3" - " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0" - " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3" - " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3" + " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3" + " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" + " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0" + " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" + " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3|B1:4-5 A1:P2|B1:P0 0-3" # Local partition invalidation tests " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . . . . 0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3" + . . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3" " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + . . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . C4:X . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + . . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" # Local partition CPU change tests - " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2" - " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3" + " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2" + " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3" # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . X:C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ - A1:P0,A3:P-2" + . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \ + A1:P0|A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ - A1:P0,A3:P-2" + . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \ + A1:P0|A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P2 3" + . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \ + A1:P0|A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . . X3 . . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3,XA3:3 \ - A1:P0,A3:P2 3" + . . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \ + A1:P0|A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \ - A1:P0,A3:P-2" + . X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \ + A1:P0|A3:P-2" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- @@ -339,68 +339,68 @@ TEST_MATRIX=( # # Adding CPUs to partition root that are not in parent's # cpuset.cpus is allowed, but those extra CPUs are ignored. - "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1" # Taking away all CPUs from parent or itself if there are tasks # will make the partition invalid. - "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1" - " C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1" - "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1" + " C3:P1:S+ C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # Changing a partition root to member makes child partitions invalid - "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1" - "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1" + "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1" + "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1" # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long # as they overlap. - "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1" # Deletion of CPUs distributed to child cgroup is allowed. - "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5" + "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5" # To become a valid partition root, cpuset.cpus must overlap parent's # cpuset.cpus. - " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1" + " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1" # Enabling partition with child cpusets is allowed - " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1" + " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1" - # A partition root with non-partition root parent is invalid, but it + # A partition root with non-partition root parent is invalid| but it # can be made valid if its parent becomes a partition root too. - " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2" - " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2" + " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" + " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2" # A non-exclusive cpuset.cpus change will invalidate partition and its siblings - " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0" - " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1" - " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1" + " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0" + " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P-1" + " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P0|B1:P-1" # cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it - " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X5 . . . 0 A1:0-3|B1:4-5" # Child partition root that try to take all CPUs from parent partition # with tasks will remain invalid. - " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" - " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1,A2:1-4 A1:P1,A2:P1" - " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" + " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" + " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1" + " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" # Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't # affect cpuset.cpus.exclusive.effective. - " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4,XA2:3" + " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: # A task cannot be added to a partition with no cpu - "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1" # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected - " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5" # cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive - " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5" ) # @@ -567,12 +567,12 @@ dump_states() # # Check effective cpus -# $1 - check string, format: :[,:]* +# $1 - check string, format: :[|:]* # check_effective_cpus() { CHK_STR=$1 - for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + for CHK in $(echo $CHK_STR | sed -e "s/|/ /g") do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 @@ -593,12 +593,12 @@ check_effective_cpus() # # Check cgroup states -# $1 - check string, format: :[,:]* +# $1 - check string, format: :[|:]* # check_cgroup_states() { CHK_STR=$1 - for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + for CHK in $(echo $CHK_STR | sed -e "s/|/ /g") do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 @@ -674,9 +674,9 @@ check_isolcpus() then EXPECT_VAL= EXPECT_VAL2= - elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]] + elif [[ $(expr $EXPECT_VAL : ".*|.*") > 0 ]] then - set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g") + set -- $(echo $EXPECT_VAL | sed -e "s/|/ /g") EXPECT_VAL=$1 EXPECT_VAL2=$2 else -- cgit v1.2.3 From b2b2b4d058b776be0168b4ea46ed84cfb0f884e9 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:47 -0400 Subject: selftest/cgroup: Clean up and restructure test_cpuset_prs.sh Cleaning up the test_cpuset_prs.sh script and restructure some of the functions so that a new test matrix with a different cgroup directory structure can be added in the next patch. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 257 +++++++++++++--------- 1 file changed, 156 insertions(+), 101 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index f11f347129d8..d99412e7d196 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -88,22 +88,30 @@ echo "" > test/cpuset.cpus # If isolated CPUs have been reserved at boot time (as shown in # cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8 # that will be used by this script for testing purpose. If not, some of -# the tests may fail incorrectly. These pre-isolated CPUs should stay in -# an isolated state throughout the testing process for now. +# the tests may fail incorrectly. Wait a bit and retry again just in case +# these isolated CPUs are leftover from previous run and have just been +# cleaned up earlier in this script. +# +# These pre-isolated CPUs should stay in an isolated state throughout the +# testing process for now. # BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +[[ -n "$BOOT_ISOLCPUS" ]] && { + sleep 0.5 + BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +} if [[ -n "$BOOT_ISOLCPUS" ]] then [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] && skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested" echo "Pre-isolated CPUs: $BOOT_ISOLCPUS" fi + cleanup() { online_cpus cd $CGROUP2 - rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - rmdir test > /dev/null 2>&1 + rmdir A1/A2/A3 A1/A2 A1 B1 test/A1 test/B1 test > /dev/null 2>&1 [[ -n "$SCHED_DEBUG" ]] && echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose } @@ -173,14 +181,22 @@ test_add_proc() # # Cgroup test hierarchy # -# root -- A1 -- A2 -- A3 -# +- B1 +# root +# | +# +------+------+ +# | | +# A1 B1 +# | +# A2 +# | +# A3 # # P = set cpus.partition (0:member, 1:root, 2:isolated) # C = add cpu-list to cpuset.cpus # X = add cpu-list to cpuset.cpus.exclusive # S

= use prefix in subtree_control # T = put a task into cgroup +# CX = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive # O= = Write to CPU online file of # # ECPUs - effective CPUs of cpusets @@ -453,25 +469,26 @@ set_ctrl_state() PFILE=$CGRP/cpuset.cpus.partition CFILE=$CGRP/cpuset.cpus XFILE=$CGRP/cpuset.cpus.exclusive - S=$(expr substr $CMD 1 1) - if [[ $S = S ]] - then - PREFIX=${CMD#?} + case $CMD in + S*) PREFIX=${CMD#?} COMM="echo ${PREFIX}${CTRL} > $SFILE" eval $COMM $REDIRECT - elif [[ $S = X ]] - then + ;; + X*) CPUS=${CMD#?} COMM="echo $CPUS > $XFILE" eval $COMM $REDIRECT - elif [[ $S = C ]] - then - CPUS=${CMD#?} + ;; + CX*) + CPUS=${CMD#??} + COMM="echo $CPUS > $CFILE; echo $CPUS > $XFILE" + eval $COMM $REDIRECT + ;; + C*) CPUS=${CMD#?} COMM="echo $CPUS > $CFILE" eval $COMM $REDIRECT - elif [[ $S = P ]] - then - VAL=${CMD#?} + ;; + P*) VAL=${CMD#?} case $VAL in 0) VAL=member ;; @@ -486,15 +503,17 @@ set_ctrl_state() esac COMM="echo $VAL > $PFILE" eval $COMM $REDIRECT - elif [[ $S = O ]] - then - VAL=${CMD#?} + ;; + O*) VAL=${CMD#?} write_cpu_online $VAL - elif [[ $S = T ]] - then - COMM="echo 0 > $TFILE" + ;; + T*) COMM="echo 0 > $TFILE" eval $COMM $REDIRECT - fi + ;; + *) echo "Unknown command: $CMD" + exit 1 + ;; + esac RET=$? [[ $RET -ne 0 ]] && { [[ -n "$SHOWERR" ]] && { @@ -532,21 +551,18 @@ online_cpus() } # -# Return 1 if the list of effective cpus isn't the same as the initial list. +# Remove all the test cgroup directories # reset_cgroup_states() { echo 0 > $CGROUP2/cgroup.procs online_cpus - rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - pause 0.02 - set_ctrl_state . R- - pause 0.01 + rmdir $RESET_LIST > /dev/null 2>&1 } dump_states() { - for DIR in . A1 A1/A2 A1/A2/A3 B1 + for DIR in $CGROUP_LIST do CPUS=$DIR/cpuset.cpus ECPUS=$DIR/cpuset.cpus.effective @@ -565,6 +581,21 @@ dump_states() done } +# +# Set the actual cgroup directory into $CGRP_DIR +# $1 - cgroup name +# +set_cgroup_dir() +{ + CGRP_DIR=$1 + [[ $CGRP_DIR = A2 ]] && CGRP_DIR=A1/A2 + [[ $CGRP_DIR = A3 ]] && CGRP_DIR=A1/A2/A3 + [[ $CGRP_DIR = c11 ]] && CGRP_DIR=p1/c11 + [[ $CGRP_DIR = c12 ]] && CGRP_DIR=p1/c12 + [[ $CGRP_DIR = c21 ]] && CGRP_DIR=p2/c21 + [[ $CGRP_DIR = c22 ]] && CGRP_DIR=p2/c22 +} + # # Check effective cpus # $1 - check string, format: :[|:]* @@ -576,7 +607,8 @@ check_effective_cpus() do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 - CPUS=$2 + EXPECTED_CPUS=$2 + ACTUAL_CPUS= if [[ $CGRP = X* ]] then CGRP=${CGRP#X} @@ -584,10 +616,10 @@ check_effective_cpus() else FILE=cpuset.cpus.effective fi - [[ $CGRP = A2 ]] && CGRP=A1/A2 - [[ $CGRP = A3 ]] && CGRP=A1/A2/A3 - [[ -e $CGRP/$FILE ]] || return 1 - [[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1 + set_cgroup_dir $CGRP + [[ -e $CGRP_DIR/$FILE ]] || return 1 + ACTUAL_CPUS=$(cat $CGRP_DIR/$FILE) + [[ $EXPECTED_CPUS = $ACTUAL_CPUS ]] || return 1 done } @@ -602,23 +634,21 @@ check_cgroup_states() do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 - CGRP_DIR=$CGRP - STATE=$2 + EXPECTED_STATE=$2 FILE= - EVAL=$(expr substr $STATE 2 2) - [[ $CGRP = A2 ]] && CGRP_DIR=A1/A2 - [[ $CGRP = A3 ]] && CGRP_DIR=A1/A2/A3 + EVAL=$(expr substr $EXPECTED_STATE 2 2) - case $STATE in + set_cgroup_dir $CGRP + case $EXPECTED_STATE in P*) FILE=$CGRP_DIR/cpuset.cpus.partition ;; - *) echo "Unknown state: $STATE!" + *) echo "Unknown state: $EXPECTED_STATE!" exit 1 ;; esac - VAL=$(cat $FILE) + ACTUAL_STATE=$(cat $FILE) - case "$VAL" in + case "$ACTUAL_STATE" in member) VAL=0 ;; root) VAL=1 @@ -642,7 +672,7 @@ check_cgroup_states() [[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && { DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective) [[ -n "$DOMS" ]] && - echo " [$CGRP] sched-domain: $DOMS" > $CONSOLE + echo " [$CGRP_DIR] sched-domain: $DOMS" > $CONSOLE } done return 0 @@ -665,22 +695,22 @@ check_cgroup_states() # check_isolcpus() { - EXPECT_VAL=$1 - ISOLCPUS= + EXPECTED_ISOLCPUS=$1 + ISCPUS=${CGROUP2}/cpuset.cpus.isolated + ISOLCPUS=$(cat $ISCPUS) LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains - ISCPUS=${CGROUP2}/cpuset.cpus.isolated - if [[ $EXPECT_VAL = . ]] + if [[ $EXPECTED_ISOLCPUS = . ]] then - EXPECT_VAL= - EXPECT_VAL2= - elif [[ $(expr $EXPECT_VAL : ".*|.*") > 0 ]] + EXPECTED_ISOLCPUS= + EXPECTED_SDOMAIN= + elif [[ $(expr $EXPECTED_ISOLCPUS : ".*|.*") > 0 ]] then - set -- $(echo $EXPECT_VAL | sed -e "s/|/ /g") - EXPECT_VAL=$1 - EXPECT_VAL2=$2 + set -- $(echo $EXPECTED_ISOLCPUS | sed -e "s/|/ /g") + EXPECTED_ISOLCPUS=$2 + EXPECTED_SDOMAIN=$1 else - EXPECT_VAL2=$EXPECT_VAL + EXPECTED_SDOMAIN=$EXPECTED_ISOLCPUS fi # @@ -689,20 +719,21 @@ check_isolcpus() # to make appending those CPUs easier. # [[ -n "$BOOT_ISOLCPUS" ]] && { - EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS} - EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS} + EXPECTED_ISOLCPUS=${EXPECTED_ISOLCPUS:+${EXPECTED_ISOLCPUS},}${BOOT_ISOLCPUS} + EXPECTED_SDOMAIN=${EXPECTED_SDOMAIN:+${EXPECTED_SDOMAIN},}${BOOT_ISOLCPUS} } # # Check cpuset.cpus.isolated cpumask # - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { + [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && { # Take a 50ms pause and try again pause 0.05 ISOLCPUS=$(cat $ISCPUS) } - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1 + [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && return 1 ISOLCPUS= + EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN # # Use the sched domain in debugfs to check isolated CPUs, if available @@ -736,7 +767,7 @@ check_isolcpus() done [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU - [[ "$EXPECT_VAL" = "$ISOLCPUS" ]] + [[ "$EXPECTED_SDOMAIN" = "$ISOLCPUS" ]] } test_fail() @@ -773,6 +804,63 @@ null_isolcpus_check() exit 1 } +# +# Check state transition test result +# $1 - Test number +# $2 - Expected effective CPU values +# $3 - Expected partition states +# $4 - Expected isolated CPUs +# +check_test_results() +{ + _NR=$1 + _ECPUS="$2" + _PSTATES="$3" + _ISOLCPUS="$4" + + [[ -n "$_ECPUS" && "$_ECPUS" != . ]] && { + check_effective_cpus $_ECPUS + [[ $? -ne 0 ]] && test_fail $_NR "effective CPU" \ + "Cgroup $CGRP: expected $EXPECTED_CPUS, got $ACTUAL_CPUS" + } + + [[ -n "$_PSTATES" && "$_PSTATES" != . ]] && { + check_cgroup_states $_PSTATES + [[ $? -ne 0 ]] && test_fail $_NR states \ + "Cgroup $CGRP: expected $EXPECTED_STATE, got $ACTUAL_STATE" + } + + # Compare the expected isolated CPUs with the actual ones, + # if available + [[ -n "$_ISOLCPUS" ]] && { + check_isolcpus $_ISOLCPUS + [[ $? -ne 0 ]] && { + [[ -n "$BOOT_ISOLCPUS" ]] && _ISOLCPUS=${_ISOLCPUS},${BOOT_ISOLCPUS} + test_fail $_NR "isolated CPU" \ + "Expect $_ISOLCPUS, get $ISOLCPUS instead" + } + } + reset_cgroup_states + # + # Check to see if effective cpu list changes + # + _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective) + RETRY=0 + while [[ $_NEWLIST != $CPULIST && $RETRY -lt 8 ]] + do + # Wait a bit longer & recheck a few times + pause 0.02 + ((RETRY++)) + _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective) + done + [[ $_NEWLIST != $CPULIST ]] && { + echo "Effective cpus changed to $_NEWLIST after test $_NR!" + exit 1 + } + null_isolcpus_check + [[ $VERBOSE -gt 0 ]] && echo "Test $I done." +} + # # Run cpuset state transition test # $1 - test matrix name @@ -785,6 +873,8 @@ run_state_test() { TEST=$1 CONTROLLER=cpuset + CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1" + RESET_LIST="A1/A2/A3 A1/A2 A1 B1" I=0 eval CNT="\${#$TEST[@]}" @@ -824,45 +914,7 @@ run_state_test() [[ $RETVAL -ne $RESULT ]] && test_fail $I result - [[ -n "$ECPUS" && "$ECPUS" != . ]] && { - check_effective_cpus $ECPUS - [[ $? -ne 0 ]] && test_fail $I "effective CPU" - } - - [[ -n "$STATES" && "$STATES" != . ]] && { - check_cgroup_states $STATES - [[ $? -ne 0 ]] && test_fail $I states - } - - # Compare the expected isolated CPUs with the actual ones, - # if available - [[ -n "$ICPUS" ]] && { - check_isolcpus $ICPUS - [[ $? -ne 0 ]] && { - [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS} - test_fail $I "isolated CPU" \ - "Expect $ICPUS, get $ISOLCPUS instead" - } - } - reset_cgroup_states - # - # Check to see if effective cpu list changes - # - NEWLIST=$(cat cpuset.cpus.effective) - RETRY=0 - while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]] - do - # Wait a bit longer & recheck a few times - pause 0.02 - ((RETRY++)) - NEWLIST=$(cat cpuset.cpus.effective) - done - [[ $NEWLIST != $CPULIST ]] && { - echo "Effective cpus changed to $NEWLIST after test $I!" - exit 1 - } - null_isolcpus_check - [[ $VERBOSE -gt 0 ]] && echo "Test $I done." + check_test_results $I "$ECPUS" "$STATES" "$ICPUS" ((I++)) done echo "All $I tests of $TEST PASSED." @@ -932,6 +984,7 @@ test_isolated() echo $$ > $CGROUP2/cgroup.procs [[ -d A1 ]] && rmdir A1 null_isolcpus_check + pause 0.05 } # @@ -997,6 +1050,8 @@ test_inotify() else echo "Inotify test PASSED" fi + echo member > cpuset.cpus.partition + echo "" > cpuset.cpus } trap cleanup 0 2 3 6 -- cgit v1.2.3 From e8a457b73569d7096ff46c307c37dbba55dd7a9c Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:48 -0400 Subject: selftest/cgroup: Add a remote partition transition test to test_cpuset_prs.sh The current cgroup directory layout for running the partition state transition tests is mainly suitable for testing local partitions as well as with a mix of local and remote partitions. It is not that suitable for doing extensive remote partition and nested remote/local partition testing. Add a new set of remote partition tests REMOTE_TEST_MATRIX with another cgroup directory structure more tailored for remote partition testing to provide better code coverage. Also add a few new test cases as well as adjusting existig ones for the original TEST_MATRIX. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 154 ++++++++++++++++++++-- 1 file changed, 143 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index d99412e7d196..a17256d9f88a 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -112,6 +112,8 @@ cleanup() online_cpus cd $CGROUP2 rmdir A1/A2/A3 A1/A2 A1 B1 test/A1 test/B1 test > /dev/null 2>&1 + rmdir rtest/p1/c11 rtest/p1/c12 rtest/p2/c21 \ + rtest/p2/c22 rtest/p1 rtest/p2 rtest > /dev/null 2>&1 [[ -n "$SCHED_DEBUG" ]] && echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose } @@ -223,9 +225,9 @@ TEST_MATRIX=( " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" " C0-1 . . C2-3:P1 . . . C2 0 " " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" - "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3" - "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3 A1:P1|A2:P1" + "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3" + "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3" + "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1" "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0" "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2" "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1" @@ -291,7 +293,7 @@ TEST_MATRIX=( A1:P0|A2:P2|A3:P1 2" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \ - A1:P0|A2:P-2|A3:P-1" + A1:P0|A2:P-2|A3:P-1 ." " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \ A1:P0|A2:P2|A3:P-1 2-4" @@ -303,13 +305,13 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|" # An invalidated remote partition cannot self-recover from hotplug - " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2" + " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ." # cpus.exclusive.effective clearing test " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:" # Invalid to valid remote partition transition test - " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2" + " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ." " C0-3:S+ C1-3:X3:P2 . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3" @@ -318,7 +320,6 @@ TEST_MATRIX=( " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0" " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" - " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3|B1:4-5 A1:P2|B1:P0 0-3" # Local partition invalidation tests " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ @@ -334,10 +335,10 @@ TEST_MATRIX=( # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \ - A1:P0|A3:P-2" + A1:P0|A3:P-2 ." " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \ - A1:P0|A3:P-2" + A1:P0|A3:P-2 ." " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \ A1:P0|A3:P2 3" @@ -385,7 +386,7 @@ TEST_MATRIX=( # A partition root with non-partition root parent is invalid| but it # can be made valid if its parent becomes a partition root too. " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" - " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2" + " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1" # A non-exclusive cpuset.cpus change will invalidate partition and its siblings " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0" @@ -405,6 +406,17 @@ TEST_MATRIX=( # affect cpuset.cpus.exclusive.effective. " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3" + # cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive + # but creating a local partition out of it is not allowed. Similarly and change + # in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will + # invalidate it. + " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \ + A1:P0|A2:P2:B1:P1 2-4" + " CX1-4:S+ CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \ + A1:P0|A2:P2:B1:P-1 2-4" + " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \ + A1:P0|A2:P2:B1:P-1 2-4" + # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: @@ -419,6 +431,54 @@ TEST_MATRIX=( " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5" ) +# +# Cpuset controller remote partition test matrix. +# +# Cgroup test hierarchy +# +# root +# | +# rtest (cpuset.cpus.exclusive=1-7) +# | +# +------+------+ +# | | +# p1 p2 +# +--+--+ +--+--+ +# | | | | +# c11 c12 c21 c22 +# +# REMOTE_TEST_MATRIX uses the same notational convention as TEST_MATRIX. +# Only CPUs 1-7 should be used. +# +REMOTE_TEST_MATRIX=( + # old-p1 old-p2 old-c11 old-c12 old-c21 old-c22 + # new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS + # ------ ------ ------- ------- ------- ------- ----- ------ -------- + " X1-3:S+ X4-6:S+ X1-2 X3 X4-5 X6 \ + . . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \ + c11:P2|c12:P2|c21:P2|c22:P2 1-6" + " CX1-4:S+ . X1-2:P2 C3 . . \ + . . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \ + p1:P0|c11:P2|c12:P0 1-2" + " CX1-4:S+ . X1-2:P2 . . . \ + X2-4 . . . . . p1:1,3-4|c11:2 \ + p1:P0|c11:P2 2" + " CX1-5:S+ . X1-2:P2 X3-5:P1 . . \ + X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \ + p1:P0|c11:P2|c12:P1 2" + " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + . . X2 . . . p1:1|c11:2|c12:3-4 \ + p1:P0|c11:P2|c12:P1 2" + # p1 as member, will get its effective CPUs from its parent rtest + " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + . . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \ + p1:P0|c11:P2|c12:P1 1" + " CX1-4:S+ X5-6:P1:S+ . . . . \ + . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \ + p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \ + 1-2,4-6|1-2,5-6" +) + # # Write to the cpu online file # $1 - = where = cpu number, value to be written @@ -902,10 +962,11 @@ run_state_test() STATES=${11} ICPUS=${12} - set_ctrl_state_noerr B1 $OLD_B1 set_ctrl_state_noerr A1 $OLD_A1 set_ctrl_state_noerr A1/A2 $OLD_A2 set_ctrl_state_noerr A1/A2/A3 $OLD_A3 + set_ctrl_state_noerr B1 $OLD_B1 + RETVAL=0 set_ctrl_state A1 $NEW_A1; ((RETVAL += $?)) set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?)) @@ -920,6 +981,76 @@ run_state_test() echo "All $I tests of $TEST PASSED." } +# +# Run cpuset remote partition state transition test +# $1 - test matrix name +# +run_remote_state_test() +{ + TEST=$1 + CONTROLLER=cpuset + [[ -d rtest ]] || mkdir rtest + cd rtest + echo +cpuset > cgroup.subtree_control + echo "1-7" > cpuset.cpus + echo "1-7" > cpuset.cpus.exclusive + CGROUP_LIST=".. . p1 p2 p1/c11 p1/c12 p2/c21 p2/c22" + RESET_LIST="p1/c11 p1/c12 p2/c21 p2/c22 p1 p2" + I=0 + eval CNT="\${#$TEST[@]}" + + reset_cgroup_states + console_msg "Running remote partition state transition test ..." + + while [[ $I -lt $CNT ]] + do + echo "Running test $I ..." > $CONSOLE + [[ $VERBOSE -gt 1 ]] && { + echo "" + eval echo \${$TEST[$I]} + } + eval set -- "\${$TEST[$I]}" + OLD_p1=$1 + OLD_p2=$2 + OLD_c11=$3 + OLD_c12=$4 + OLD_c21=$5 + OLD_c22=$6 + NEW_p1=$7 + NEW_p2=$8 + NEW_c11=$9 + NEW_c12=${10} + NEW_c21=${11} + NEW_c22=${12} + ECPUS=${13} + STATES=${14} + ICPUS=${15} + + set_ctrl_state_noerr p1 $OLD_p1 + set_ctrl_state_noerr p2 $OLD_p2 + set_ctrl_state_noerr p1/c11 $OLD_c11 + set_ctrl_state_noerr p1/c12 $OLD_c12 + set_ctrl_state_noerr p2/c21 $OLD_c21 + set_ctrl_state_noerr p2/c22 $OLD_c22 + + RETVAL=0 + set_ctrl_state p1 $NEW_p1 ; ((RETVAL += $?)) + set_ctrl_state p2 $NEW_p2 ; ((RETVAL += $?)) + set_ctrl_state p1/c11 $NEW_c11; ((RETVAL += $?)) + set_ctrl_state p1/c12 $NEW_c12; ((RETVAL += $?)) + set_ctrl_state p2/c21 $NEW_c21; ((RETVAL += $?)) + set_ctrl_state p2/c22 $NEW_c22; ((RETVAL += $?)) + + [[ $RETVAL -ne 0 ]] && test_fail $I result + + check_test_results $I "$ECPUS" "$STATES" "$ICPUS" + ((I++)) + done + cd .. + rmdir rtest + echo "All $I tests of $TEST PASSED." +} + # # Testing the new "isolated" partition root type # @@ -1056,6 +1187,7 @@ test_inotify() trap cleanup 0 2 3 6 run_state_test TEST_MATRIX +run_remote_state_test REMOTE_TEST_MATRIX test_isolated test_inotify echo "All tests PASSED." -- cgit v1.2.3 From 72070e57b0a518ec8e562a2b68fdfc796ef5c040 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 4 Apr 2025 08:18:49 +0800 Subject: selftests: ublk: fix test_stripe_04 Commit 57ed58c13256 ("selftests: ublk: enable zero copy for stripe target") added test entry of test_stripe_04, but forgot to add the test script. So fix the test by adding the script file. Reported-by: Uday Shankar Signed-off-by: Ming Lei Reviewed-by: Uday Shankar Link: https://lore.kernel.org/r/20250404001849.1443064-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_stripe_04.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100755 tools/testing/selftests/ublk/test_stripe_04.sh (limited to 'tools') diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh new file mode 100755 index 000000000000..1f2b642381d1 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_04.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_04" +ERR_CODE=0 + +_prep_test "stripe" "mkfs & mount & umount on zero copy" + +backfile_0=$(_create_backfile 256M) +backfile_1=$(_create_backfile 256M) +dev_id=$(_add_ublk_dev -t stripe -z -q 2 "$backfile_0" "$backfile_1") +_check_add_dev $TID $? "$backfile_0" "$backfile_1" + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "stripe" + +_remove_backfile "$backfile_0" +_remove_backfile "$backfile_1" + +_show_result $TID $ERR_CODE -- cgit v1.2.3 From 80fd663590cf4c6a7baaa405cd65060469c95eca Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Mar 2025 11:42:55 -0400 Subject: selftests: kvm: revamp MONITOR/MWAIT tests Run each testcase in a separate VMs to cover more possibilities; move WRMSR close to MONITOR/MWAIT to test updating CPUID bits while in the VM. Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/x86/monitor_mwait_test.c | 108 +++++++++++---------- 1 file changed, 57 insertions(+), 51 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 2b550eff35f1..390ae2d87493 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -7,6 +7,7 @@ #include "kvm_util.h" #include "processor.h" +#include "kselftest.h" #define CPUID_MWAIT (1u << 3) @@ -14,6 +15,8 @@ enum monitor_mwait_testcases { MWAIT_QUIRK_DISABLED = BIT(0), MISC_ENABLES_QUIRK_DISABLED = BIT(1), MWAIT_DISABLED = BIT(2), + CPUID_DISABLED = BIT(3), + TEST_MAX = CPUID_DISABLED * 2 - 1, }; /* @@ -35,11 +38,19 @@ do { \ testcase, vector); \ } while (0) -static void guest_monitor_wait(int testcase) +static void guest_monitor_wait(void *arg) { + int testcase = (int) (long) arg; u8 vector; - GUEST_SYNC(testcase); + u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT; + if (!(testcase & MWAIT_DISABLED)) + val |= MSR_IA32_MISC_ENABLE_MWAIT; + wrmsr(MSR_IA32_MISC_ENABLE, val); + + __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED), + "Expected CPUID.MWAIT %s\n", + (testcase & MWAIT_DISABLED) ? "cleared" : "set"); /* * Arbitrarily MONITOR this function, SVM performs fault checks before @@ -50,19 +61,6 @@ static void guest_monitor_wait(int testcase) vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector); -} - -static void guest_code(void) -{ - guest_monitor_wait(MWAIT_DISABLED); - - guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED); GUEST_DONE(); } @@ -74,56 +72,64 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct ucall uc; int testcase; + char test[80]; - TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + ksft_print_header(); + ksft_set_plan(12); + for (testcase = 0; testcase <= TEST_MAX; testcase++) { + vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait); + vcpu_args_set(vcpu, 1, (void *)(long)testcase); + + disabled_quirks = 0; + if (testcase & MWAIT_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; + strcpy(test, "MWAIT can fault"); + } else { + strcpy(test, "MWAIT never faults"); + } + if (testcase & MISC_ENABLES_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; + strcat(test, ", MISC_ENABLE updates CPUID"); + } else { + strcat(test, ", no CPUID updates"); + } + + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); + + if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) && + (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED))) + continue; + + if (testcase & CPUID_DISABLED) { + strcat(test, ", CPUID clear"); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } else { + strcat(test, ", CPUID set"); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } + + if (testcase & MWAIT_DISABLED) + strcat(test, ", MWAIT disabled"); - while (1) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - testcase = uc.args[1]; - break; case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - goto done; + /* Detected in vcpu_run */ + break; case UCALL_DONE: - goto done; + ksft_test_result_pass("%s\n", test); + break; default: TEST_FAIL("Unknown ucall %lu", uc.cmd); - goto done; - } - - disabled_quirks = 0; - if (testcase & MWAIT_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; - if (testcase & MISC_ENABLES_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); - - /* - * If the MISC_ENABLES quirk (KVM neglects to update CPUID to - * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT - * bit in MISC_ENABLES accordingly. If the quirk is enabled, - * the only valid configuration is MWAIT disabled, as CPUID - * can't be manually changed after running the vCPU. - */ - if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) { - TEST_ASSERT(testcase & MWAIT_DISABLED, - "Can't toggle CPUID features after running vCPU"); - continue; + break; } - - vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE, - (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT); + kvm_vm_free(vm); } + ksft_finished(); -done: - kvm_vm_free(vm); return 0; } -- cgit v1.2.3 From 11934771e7e79dcf4528803f9e3299b214c36f30 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Apr 2025 00:18:51 +0200 Subject: selftests: kvm: bring list of exit reasons up to date Signed-off-by: Paolo Bonzini Message-ID: <20250331221851.614582-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 279ad8946040..815bc45dd8dc 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2019,9 +2019,8 @@ static struct exit_reason { KVM_EXIT_STRING(RISCV_SBI), KVM_EXIT_STRING(RISCV_CSR), KVM_EXIT_STRING(NOTIFY), -#ifdef KVM_EXIT_MEMORY_NOT_PRESENT - KVM_EXIT_STRING(MEMORY_NOT_PRESENT), -#endif + KVM_EXIT_STRING(LOONGARCH_IOCSR), + KVM_EXIT_STRING(MEMORY_FAULT), }; /* -- cgit v1.2.3 From c57047f6f37906cc4f6a4fec1683f87731f25248 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Apr 2025 16:13:27 +0200 Subject: selftests: kvm: list once tests that are valid on all architectures Several tests cover infrastructure from virt/kvm/ and userspace APIs that have only minimal requirements from architecture-specific code. As such, they are available on all architectures that have libkvm support, and this presumably will apply also in the future (for example if loongarch gets selftests support). Put them in a separate variable and list them only once. Signed-off-by: Paolo Bonzini Message-ID: <20250401141327.785520-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile.kvm | 45 +++++++++++--------------------- 1 file changed, 15 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index f773f8f99249..f62b0a5aba35 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -50,8 +50,18 @@ LIBKVM_riscv += lib/riscv/ucall.c # Non-compiled test targets TEST_PROGS_x86 += x86/nx_huge_pages_test.sh +# Compiled test targets valid on all architectures with libkvm support +TEST_GEN_PROGS_COMMON = demand_paging_test +TEST_GEN_PROGS_COMMON += dirty_log_test +TEST_GEN_PROGS_COMMON += guest_print_test +TEST_GEN_PROGS_COMMON += kvm_binary_stats_test +TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus +TEST_GEN_PROGS_COMMON += kvm_page_table_test +TEST_GEN_PROGS_COMMON += set_memory_region_test + # Compiled test targets -TEST_GEN_PROGS_x86 = x86/cpuid_test +TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_x86 += x86/cpuid_test TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test TEST_GEN_PROGS_x86 += x86/feature_msrs_test @@ -119,27 +129,21 @@ TEST_GEN_PROGS_x86 += x86/triple_fault_event_test TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test TEST_GEN_PROGS_x86 += access_tracking_perf_test TEST_GEN_PROGS_x86 += coalesced_io_test -TEST_GEN_PROGS_x86 += demand_paging_test -TEST_GEN_PROGS_x86 += dirty_log_test TEST_GEN_PROGS_x86 += dirty_log_perf_test TEST_GEN_PROGS_x86 += guest_memfd_test -TEST_GEN_PROGS_x86 += guest_print_test TEST_GEN_PROGS_x86 += hardware_disable_test -TEST_GEN_PROGS_x86 += kvm_create_max_vcpus -TEST_GEN_PROGS_x86 += kvm_page_table_test TEST_GEN_PROGS_x86 += memslot_modification_stress_test TEST_GEN_PROGS_x86 += memslot_perf_test TEST_GEN_PROGS_x86 += mmu_stress_test TEST_GEN_PROGS_x86 += rseq_test -TEST_GEN_PROGS_x86 += set_memory_region_test TEST_GEN_PROGS_x86 += steal_time -TEST_GEN_PROGS_x86 += kvm_binary_stats_test TEST_GEN_PROGS_x86 += system_counter_offset_test TEST_GEN_PROGS_x86 += pre_fault_memory_test # Compiled outputs used by test targets TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test +TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions @@ -158,22 +162,16 @@ TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 TEST_GEN_PROGS_arm64 += access_tracking_perf_test TEST_GEN_PROGS_arm64 += arch_timer TEST_GEN_PROGS_arm64 += coalesced_io_test -TEST_GEN_PROGS_arm64 += demand_paging_test -TEST_GEN_PROGS_arm64 += dirty_log_test TEST_GEN_PROGS_arm64 += dirty_log_perf_test -TEST_GEN_PROGS_arm64 += guest_print_test TEST_GEN_PROGS_arm64 += get-reg-list -TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus -TEST_GEN_PROGS_arm64 += kvm_page_table_test TEST_GEN_PROGS_arm64 += memslot_modification_stress_test TEST_GEN_PROGS_arm64 += memslot_perf_test TEST_GEN_PROGS_arm64 += mmu_stress_test TEST_GEN_PROGS_arm64 += rseq_test -TEST_GEN_PROGS_arm64 += set_memory_region_test TEST_GEN_PROGS_arm64 += steal_time -TEST_GEN_PROGS_arm64 += kvm_binary_stats_test -TEST_GEN_PROGS_s390 = s390/memop +TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_s390 += s390/memop TEST_GEN_PROGS_s390 += s390/resets TEST_GEN_PROGS_s390 += s390/sync_regs_test TEST_GEN_PROGS_s390 += s390/tprot @@ -182,27 +180,14 @@ TEST_GEN_PROGS_s390 += s390/debug_test TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test TEST_GEN_PROGS_s390 += s390/shared_zeropage_test TEST_GEN_PROGS_s390 += s390/ucontrol_test -TEST_GEN_PROGS_s390 += demand_paging_test -TEST_GEN_PROGS_s390 += dirty_log_test -TEST_GEN_PROGS_s390 += guest_print_test -TEST_GEN_PROGS_s390 += kvm_create_max_vcpus -TEST_GEN_PROGS_s390 += kvm_page_table_test TEST_GEN_PROGS_s390 += rseq_test -TEST_GEN_PROGS_s390 += set_memory_region_test -TEST_GEN_PROGS_s390 += kvm_binary_stats_test +TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += riscv/ebreak_test TEST_GEN_PROGS_riscv += arch_timer TEST_GEN_PROGS_riscv += coalesced_io_test -TEST_GEN_PROGS_riscv += demand_paging_test -TEST_GEN_PROGS_riscv += dirty_log_test TEST_GEN_PROGS_riscv += get-reg-list -TEST_GEN_PROGS_riscv += guest_print_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test -TEST_GEN_PROGS_riscv += kvm_create_max_vcpus -TEST_GEN_PROGS_riscv += kvm_page_table_test -TEST_GEN_PROGS_riscv += set_memory_region_test TEST_GEN_PROGS_riscv += steal_time SPLIT_TESTS += arch_timer -- cgit v1.2.3 From 0297cdc12a87629ad904ac8c0630f7702f9a2d48 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Apr 2025 07:22:38 -0700 Subject: KVM: selftests: Add option to rseq test to override /dev/cpu_dma_latency Add a "-l " param to the rseq test so that the user can override /dev/cpu_dma_latency, as described by the test's suggested workaround for not being able to complete enough migrations. cpu_dma_latency is not a normal file, even as far as procfs files go. Writes to cpu_dma_latency only persist so long as the file is open, e.g. so that the kernel automatically reverts back to a power-optimized state once the sensitive workload completes. Provide the necessary functionality instead of effectively forcing the user to write a non-obvious wrapper. Cc: Dongsheng Zhang Cc: Zide Chen Signed-off-by: Sean Christopherson Message-ID: <20250401142238.819487-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/rseq_test.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index e5898678bfab..1375fca80bcd 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -196,25 +196,27 @@ static void calc_min_max_cpu(void) static void help(const char *name) { puts(""); - printf("usage: %s [-h] [-u]\n", name); + printf("usage: %s [-h] [-u] [-l latency]\n", name); printf(" -u: Don't sanity check the number of successful KVM_RUNs\n"); + printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n"); puts(""); exit(0); } int main(int argc, char *argv[]) { + int r, i, snapshot, opt, fd = -1, latency = -1; bool skip_sanity_check = false; - int r, i, snapshot; struct kvm_vm *vm; struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; - int opt; - while ((opt = getopt(argc, argv, "hu")) != -1) { + while ((opt = getopt(argc, argv, "hl:u")) != -1) { switch (opt) { case 'u': skip_sanity_check = true; + case 'l': + latency = atoi_paranoid(optarg); break; case 'h': default: @@ -243,6 +245,20 @@ int main(int argc, char *argv[]) pthread_create(&migration_thread, NULL, migration_worker, (void *)(unsigned long)syscall(SYS_gettid)); + if (latency >= 0) { + /* + * Writes to cpu_dma_latency persist only while the file is + * open, i.e. it allows userspace to provide guaranteed latency + * while running a workload. Keep the file open until the test + * completes, otherwise writing cpu_dma_latency is meaningless. + */ + fd = open("/dev/cpu_dma_latency", O_RDWR); + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd)); + + r = write(fd, &latency, 4); + TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency"); + } + for (i = 0; !done; i++) { vcpu_run(vcpu); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, @@ -278,6 +294,9 @@ int main(int argc, char *argv[]) "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu); } + if (fd > 0) + close(fd); + /* * Sanity check that the test was able to enter the guest a reasonable * number of times, e.g. didn't get stalled too often/long waiting for @@ -293,8 +312,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2), "Only performed %d KVM_RUNs, task stalled too much?\n\n" " Try disabling deep sleep states to reduce CPU wakeup latency,\n" - " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n" - " or run with -u to disable this sanity check.", i); + " e.g. via cpuidle.off=1 or via -l , or run with -u to\n" + " disable this sanity check.", i); pthread_join(migration_thread, NULL); -- cgit v1.2.3 From a6d0dbba950880e269d433222ca6d516ebe8a6ae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 15:15:57 -0700 Subject: lib/crc: remove unnecessary prompt for CONFIG_CRC_T10DIF All modules that need CONFIG_CRC_T10DIF already select it, so there is no need to bother users about the option. Reviewed-by: Christoph Hellwig Reviewed-by: "Martin K. Petersen" Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250401221600.24878-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- tools/testing/selftests/bpf/config.x86_64 | 1 - tools/testing/selftests/hid/config.common | 1 - 2 files changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 5680befae8c6..5e713ef7caa3 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index 45b5570441ce..b1f40857307d 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_CRYPTO_SEQIV=y -- cgit v1.2.3 From 9bae8f4f21689b96a4b4fc505740dd97b9142c41 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 3 Apr 2025 15:08:41 -0700 Subject: selftests/bpf: Make res_spin_lock test less verbose Currently, the res_spin_lock test is too chatty as it constantly prints the test_run results for each iteration in each thread, so in case verbose output is requested or things go wrong, it will flood the logs of CI and other systems with repeated messages that offer no valuable insight. Reduce this by doing assertions when the condition actually flips, and proceed to break out and exit the threads. We still assert to mark the test as failed and print the expected and reported values. Suggested-by: Alexei Starovoitov Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250403220841.66654-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/res_spin_lock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c index 115287ba441b..0703e987df89 100644 --- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c @@ -25,8 +25,11 @@ static void *spin_lock_thread(void *arg) while (!READ_ONCE(skip)) { err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "test_run"); - ASSERT_OK(topts.retval, "test_run retval"); + if (err || topts.retval) { + ASSERT_OK(err, "test_run"); + ASSERT_OK(topts.retval, "test_run retval"); + break; + } } pthread_exit(arg); } -- cgit v1.2.3 From d8d78398e550039295e0237eafb703e2d21f7d57 Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Sat, 5 Apr 2025 00:10:41 +0000 Subject: KVM: arm64: selftests: Introduce and use hardware-definition macros The kvm selftest library for arm64 currently configures the hardware fields, such as shift and mask in the page-table entries and registers, directly with numbers. While it add comments at places, it's better to rewrite them with appropriate macros to improve the readability and reduce the risk of errors. Hence, introduce macros to define the hardware fields and use them in the arm64 processor library. Most of the definitions are primary copied from the Linux's header, arch/arm64/include/asm/pgtable-hwdef.h. No functional change intended. Suggested-by: Oliver Upton Signed-off-by: Raghavendra Rao Ananta Link: https://lore.kernel.org/r/20250405001042.1470552-2-rananta@google.com Signed-off-by: Oliver Upton --- .../testing/selftests/kvm/arm64/page_fault_test.c | 2 +- .../selftests/kvm/include/arm64/processor.h | 66 ++++++++++++++++++++-- tools/testing/selftests/kvm/lib/arm64/processor.c | 57 ++++++++++--------- 3 files changed, 92 insertions(+), 33 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index ec33a8f9c908..dc6559dad9d8 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -199,7 +199,7 @@ static bool guest_set_ha(void) if (hadbs == 0) return false; - tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + tcr = read_sysreg(tcr_el1) | TCR_HA; write_sysreg(tcr, tcr_el1); isb(); diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 1e8d0d531fbd..7d88ff22013a 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -62,6 +62,66 @@ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) +/* TCR_EL1 specific flags */ +#define TCR_T0SZ_OFFSET 0 +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) + +#define TCR_IRGN0_SHIFT 8 +#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) + +#define TCR_ORGN0_SHIFT 10 +#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) + +#define TCR_SH0_SHIFT 12 +#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) +#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) + +#define TCR_TG0_SHIFT 14 +#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) +#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) +#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) +#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) + +#define TCR_IPS_SHIFT 32 +#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) +#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT) +#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT) +#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT) +#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT) + +#define TCR_HA (UL(1) << 39) +#define TCR_DS (UL(1) << 59) + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PTE_ATTRINDX(t) ((t) << 2) +#define PTE_ATTRINDX_MASK GENMASK(4, 2) +#define PTE_ATTRINDX_SHIFT 2 + +#define PTE_VALID BIT(0) +#define PGD_TYPE_TABLE BIT(1) +#define PUD_TYPE_TABLE BIT(1) +#define PMD_TYPE_TABLE BIT(1) +#define PTE_TYPE_PAGE BIT(1) + +#define PTE_AF BIT(10) + +#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift)) +#define PTE_ADDR_51_48 GENMASK(15, 12) +#define PTE_ADDR_51_48_SHIFT 12 +#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift)) +#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8) +#define PTE_ADDR_51_50_LPA2_SHIFT 8 + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code); @@ -102,12 +162,6 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -/* Access flag */ -#define PTE_AF (1ULL << 10) - -/* Access flag update enable/disable */ -#define TCR_EL1_HA (1ULL << 39) - void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, uint32_t *ipa16k, uint32_t *ipa64k); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 7ba3aa3755f3..da5802c8a59c 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -72,13 +72,13 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) uint64_t pte; if (use_lpa2_pte_format(vm)) { - pte = pa & GENMASK(49, vm->page_shift); - pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; - attrs &= ~GENMASK(9, 8); + pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT; + attrs &= ~PTE_ADDR_51_50_LPA2; } else { - pte = pa & GENMASK(47, vm->page_shift); + pte = pa & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT; } pte |= attrs; @@ -90,12 +90,12 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) uint64_t pa; if (use_lpa2_pte_format(vm)) { - pa = pte & GENMASK(49, vm->page_shift); - pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; + pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift); + pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50; } else { - pa = pte & GENMASK(47, vm->page_shift); + pa = pte & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48; } return pa; @@ -128,7 +128,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t flags) { - uint8_t attr_idx = flags & 7; + uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); + uint64_t pg_attr; uint64_t *ptep; TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -147,18 +148,21 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PGD_TYPE_TABLE | PTE_VALID); switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PUD_TYPE_TABLE | PTE_VALID); /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PMD_TYPE_TABLE | PTE_VALID); /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; @@ -167,7 +171,8 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, TEST_FAIL("Page table levels must be 2, 3, or 4"); } - *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ + pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + *ptep = addr_pte(vm, paddr, pg_attr); } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -293,20 +298,20 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P48V48_64K: case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= TCR_TG0_64K; break; case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: case VM_MODE_P36V47_16K: - tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ + tcr_el1 |= TCR_TG0_16K; break; case VM_MODE_P52V48_4K: case VM_MODE_P48V48_4K: case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= TCR_TG0_4K; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); @@ -319,35 +324,35 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P52V48_4K: case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + tcr_el1 |= TCR_IPS_52_BITS; ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; break; case VM_MODE_P48V48_4K: case VM_MODE_P48V48_16K: case VM_MODE_P48V48_64K: - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + tcr_el1 |= TCR_IPS_48_BITS; break; case VM_MODE_P40V48_4K: case VM_MODE_P40V48_16K: case VM_MODE_P40V48_64K: - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + tcr_el1 |= TCR_IPS_40_BITS; break; case VM_MODE_P36V48_4K: case VM_MODE_P36V48_16K: case VM_MODE_P36V48_64K: case VM_MODE_P36V47_16K: - tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + tcr_el1 |= TCR_IPS_36_BITS; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; - /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; - tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); - tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I; + + tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER; + tcr_el1 |= TCR_T0SZ(vm->va_bits); if (use_lpa2_pte_format(vm)) - tcr_el1 |= (1ul << 59) /* DS */; + tcr_el1 |= TCR_DS; vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); -- cgit v1.2.3 From c8631ea59b6523035ffb607634eef7bacc8947fe Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Sat, 5 Apr 2025 00:10:42 +0000 Subject: KVM: arm64: selftests: Explicitly set the page attrs to Inner-Shareable Atomic instructions such as 'ldset' in the guest have been observed to cause an EL1 data abort with FSC 0x35 (IMPLEMENTATION DEFINED fault (Unsupported Exclusive or Atomic access)) on Neoverse-N3. Per DDI0487L.a B2.2.6, atomic instructions are only architecturally guaranteed for Inner/Outer Shareable Normal Write-Back memory. For anything else the behavior is IMPLEMENTATION DEFINED and can lose atomicity, or, in this case, generate an abort. It would appear that selftests sets up the stage-1 mappings as Non Shareable, leading to the observed abort. Explicitly set the Shareability field to Inner Shareable for non-LPA2 page tables. Note that for the LPA2 page table format, translations for cacheable memory inherit the shareability attribute of the PTW, i.e. TCR_ELx.SH{0,1}. Suggested-by: Oliver Upton Signed-off-by: Raghavendra Rao Ananta Link: https://lore.kernel.org/r/20250405001042.1470552-3-rananta@google.com [oliver: Rephrase changelog] Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/include/arm64/processor.h | 1 + tools/testing/selftests/kvm/lib/arm64/processor.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 7d88ff22013a..b0fc0f945766 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -113,6 +113,7 @@ #define PMD_TYPE_TABLE BIT(1) #define PTE_TYPE_PAGE BIT(1) +#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */ #define PTE_AF BIT(10) #define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift)) diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index da5802c8a59c..9d69904cb608 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -172,6 +172,9 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + if (!use_lpa2_pte_format(vm)) + pg_attr |= PTE_SHARED; + *ptep = addr_pte(vm, paddr, pg_attr); } -- cgit v1.2.3 From ed471e1984939a500eea179bc16e1c2aadf00db5 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 7 Apr 2025 10:43:51 +0900 Subject: memblock tests: Fix mutex related build error Fix mutex and free_reserved_area() related build errors which have been introduced by commit 74e2498ccf7b ("mm/memblock: Add reserved memory release function"). Fixes: 74e2498ccf7b ("mm/memblock: Add reserved memory release function") Reported-by: Wei Yang Closes: https://lore.kernel.org/all/20250405023018.g2ae52nrz2757b3n@master/ Signed-off-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/174399023133.47537.7375975856054461445.stgit@devnote2 Signed-off-by: Mike Rapoport (Microsoft) --- tools/testing/memblock/internal.h | 6 ++++++ tools/testing/memblock/linux/mutex.h | 14 ++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tools/testing/memblock/linux/mutex.h (limited to 'tools') diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index 1cf82acb2a3e..0ab4b53bb4f3 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -24,4 +24,10 @@ static inline void accept_memory(phys_addr_t start, unsigned long size) { } +static inline unsigned long free_reserved_area(void *start, void *end, + int poison, const char *s) +{ + return 0; +} + #endif diff --git a/tools/testing/memblock/linux/mutex.h b/tools/testing/memblock/linux/mutex.h new file mode 100644 index 000000000000..ae3f497165d6 --- /dev/null +++ b/tools/testing/memblock/linux/mutex.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MUTEX_H +#define _MUTEX_H + +#define DEFINE_MUTEX(name) int name + +static inline void dummy_mutex_guard(int *name) +{ +} + +#define guard(mutex) \ + dummy_##mutex##_guard + +#endif /* _MUTEX_H */ \ No newline at end of file -- cgit v1.2.3 From 3b394dff15e14550a26b133fc7b556b5b526f6a5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 18 Mar 2025 07:19:48 +0000 Subject: memblock tests: add test for memblock_set_node Add a test to check memblock_set_node() behavior. And create a corner case in which the memblock.reserved array is doubled during memblock_set_node(). And finally make sure all regions in memblock.reserved are with valid node id. Signed-off-by: Wei Yang CC: Mike Rapoport CC: Yajun Deng Link: https://lore.kernel.org/r/20250318071948.23854-4-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- tools/testing/memblock/tests/basic_api.c | 102 +++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) (limited to 'tools') diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 67503089e6a0..01e836fba488 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -2434,6 +2434,107 @@ static int memblock_overlaps_region_checks(void) return 0; } +#ifdef CONFIG_NUMA +static int memblock_set_node_check(void) +{ + unsigned long i, max_reserved; + struct memblock_region *rgn; + void *orig_region; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_allow_resize(); + + dummy_physical_memory_init(); + memblock_add(dummy_physical_memory_base(), MEM_SIZE); + orig_region = memblock.reserved.regions; + + /* Equally Split range to node 0 and 1*/ + memblock_set_node(memblock_start_of_DRAM(), + memblock_phys_mem_size() / 2, &memblock.memory, 0); + memblock_set_node(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2, + memblock_phys_mem_size() / 2, &memblock.memory, 1); + + ASSERT_EQ(memblock.memory.cnt, 2); + rgn = &memblock.memory.regions[0]; + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); + ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2); + ASSERT_EQ(memblock_get_region_node(rgn), 0); + rgn = &memblock.memory.regions[1]; + ASSERT_EQ(rgn->base, memblock_start_of_DRAM() + memblock_phys_mem_size() / 2); + ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2); + ASSERT_EQ(memblock_get_region_node(rgn), 1); + + /* Reserve 126 regions with the last one across node boundary */ + for (i = 0; i < 125; i++) + memblock_reserve(memblock_start_of_DRAM() + SZ_16 * i, SZ_8); + + memblock_reserve(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2 - SZ_8, + SZ_16); + + /* + * Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") + * do following process to set nid to each memblock.reserved region. + * But it may miss some region if memblock_set_node() double the + * array. + * + * By checking 'max', we make sure all region nid is set properly. + */ +repeat: + max_reserved = memblock.reserved.max; + for_each_mem_region(rgn) { + int nid = memblock_get_region_node(rgn); + + memblock_set_node(rgn->base, rgn->size, &memblock.reserved, nid); + } + if (max_reserved != memblock.reserved.max) + goto repeat; + + /* Confirm each region has valid node set */ + for_each_reserved_mem_region(rgn) { + ASSERT_TRUE(numa_valid_node(memblock_get_region_node(rgn))); + if (rgn == (memblock.reserved.regions + memblock.reserved.cnt - 1)) + ASSERT_EQ(1, memblock_get_region_node(rgn)); + else + ASSERT_EQ(0, memblock_get_region_node(rgn)); + } + + dummy_physical_memory_cleanup(); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + + test_pass_pop(); + + return 0; +} + +static int memblock_set_node_checks(void) +{ + prefix_reset(); + prefix_push("memblock_set_node"); + test_print("Running memblock_set_node tests...\n"); + + memblock_set_node_check(); + + prefix_pop(); + + return 0; +} +#else +static int memblock_set_node_checks(void) +{ + return 0; +} +#endif + int memblock_basic_checks(void) { memblock_initialization_check(); @@ -2444,6 +2545,7 @@ int memblock_basic_checks(void) memblock_bottom_up_checks(); memblock_trim_memory_checks(); memblock_overlaps_region_checks(); + memblock_set_node_checks(); return 0; } -- cgit v1.2.3 From 22d3a63d5321326cb05a6dff7d2c488236cf56f2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 4 Apr 2025 12:21:26 +0000 Subject: selftests: drv-net: test random value for hds-thresh hds.py has been testing 0(set_hds_thresh_zero()), MAX(set_hds_thresh_max()), GT(set_hds_thresh_gt()) values for hds-thresh. However if a hds-thresh value was already 0, set_hds_thresh_zero() can't test properly. So, it tests random value first and then tests 0, MAX, GT values. Testing bnxt: TAP version 13 1..13 ok 1 hds.get_hds ok 2 hds.get_hds_thresh ok 3 hds.set_hds_disable # SKIP disabling of HDS not supported by the device ok 4 hds.set_hds_enable ok 5 hds.set_hds_thresh_random ok 6 hds.set_hds_thresh_zero ok 7 hds.set_hds_thresh_max ok 8 hds.set_hds_thresh_gt ok 9 hds.set_xdp ok 10 hds.enabled_set_xdp ok 11 hds.ioctl ok 12 hds.ioctl_set_xdp ok 13 hds.ioctl_enabled_set_xdp # Totals: pass:12 fail:0 xfail:0 xpass:0 skip:1 error:0 Testing lo: TAP version 13 1..13 ok 1 hds.get_hds # SKIP tcp-data-split not supported by device ok 2 hds.get_hds_thresh # SKIP hds-thresh not supported by device ok 3 hds.set_hds_disable # SKIP ring-set not supported by the device ok 4 hds.set_hds_enable # SKIP ring-set not supported by the device ok 5 hds.set_hds_thresh_random # SKIP hds-thresh not supported by device ok 6 hds.set_hds_thresh_zero # SKIP ring-set not supported by the device ok 7 hds.set_hds_thresh_max # SKIP hds-thresh not supported by device ok 8 hds.set_hds_thresh_gt # SKIP hds-thresh not supported by device ok 9 hds.set_xdp # SKIP tcp-data-split not supported by device ok 10 hds.enabled_set_xdp # SKIP tcp-data-split not supported by device ok 11 hds.ioctl # SKIP tcp-data-split not supported by device ok 12 hds.ioctl_set_xdp # SKIP tcp-data-split not supported by device ok 13 hds.ioctl_enabled_set_xdp # SKIP tcp-data-split not supported by device # Totals: pass:0 fail:0 xfail:0 xpass:0 skip:13 error:0 Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250404122126.1555648-3-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hds.py | 33 +++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 8b7f6acad15f..7c90a040ce45 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -6,7 +6,7 @@ import os from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv -from lib.py import defer, ethtool, ip +from lib.py import defer, ethtool, ip, random def _get_hds_mode(cfg, netnl) -> str: @@ -109,6 +109,36 @@ def set_hds_thresh_zero(cfg, netnl) -> None: ksft_eq(0, rings['hds-thresh']) +def set_hds_thresh_random(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + if 'hds-thresh-max' not in rings: + raise KsftSkipEx('hds-thresh-max not defined by device') + + if rings['hds-thresh-max'] < 2: + raise KsftSkipEx('hds-thresh-max is too small') + elif rings['hds-thresh-max'] == 2: + hds_thresh = 1 + else: + while True: + hds_thresh = random.randint(1, rings['hds-thresh-max'] - 1) + if hds_thresh != rings['hds-thresh']: + break + + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_thresh}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(hds_thresh, rings['hds-thresh']) + def set_hds_thresh_max(cfg, netnl) -> None: try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) @@ -243,6 +273,7 @@ def main() -> None: get_hds_thresh, set_hds_disable, set_hds_enable, + set_hds_thresh_random, set_hds_thresh_zero, set_hds_thresh_max, set_hds_thresh_gt, -- cgit v1.2.3 From a8df7d0ef92eca28c610206c6748daf537ac0586 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 00:02:13 -0700 Subject: objtool: Fix INSN_CONTEXT_SWITCH handling in validate_unret() The !CONFIG_IA32_EMULATION version of xen_entry_SYSCALL_compat() ends with a SYSCALL instruction which is classified by objtool as INSN_CONTEXT_SWITCH. Unlike validate_branch(), validate_unret() doesn't consider INSN_CONTEXT_SWITCH in a non-function to be a dead end, so it keeps going past the end of xen_entry_SYSCALL_compat(), resulting in the following warning: vmlinux.o: warning: objtool: xen_reschedule_interrupt+0x2a: RET before UNTRAIN Fix that by adding INSN_CONTEXT_SWITCH handling to validate_unret() to match what validate_branch() is already doing. Fixes: a09a6e2399ba ("objtool: Add entry UNRET validation") Reported-by: Andrew Cooper Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/f5eda46fd09f15b1f5cde3d9ae3b92b958342add.1744095216.git.jpoimboe@kernel.org --- tools/objtool/check.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4a1f6c3169b3..c81b070ca495 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3886,6 +3886,11 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) WARN_INSN(insn, "RET before UNTRAIN"); return 1; + case INSN_CONTEXT_SWITCH: + if (insn_func(insn)) + break; + return 0; + case INSN_NOP: if (insn->retpoline_safe) return 0; -- cgit v1.2.3 From fe1042b1ef79e4d5df33d5c0f0ce936493714eec Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 00:02:14 -0700 Subject: objtool: Split INSN_CONTEXT_SWITCH into INSN_SYSCALL and INSN_SYSRET INSN_CONTEXT_SWITCH is ambiguous. It can represent both call semantics (SYSCALL, SYSENTER) and return semantics (SYSRET, IRET, RETS, RETU). Those differ significantly: calls preserve control flow whereas returns terminate it. Objtool uses an arbitrary rule for INSN_CONTEXT_SWITCH that almost works by accident: if in a function, keep going; otherwise stop. It should instead be based on the semantics of the underlying instruction. In preparation for improving that, split INSN_CONTEXT_SWITCH into INSN_SYCALL and INSN_SYSRET. No functional change. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/19a76c74d2c051d3bc9a775823cafc65ad267a7a.1744095216.git.jpoimboe@kernel.org --- tools/objtool/arch/x86/decode.c | 18 +++++++++++------- tools/objtool/check.c | 6 ++++-- tools/objtool/include/objtool/arch.h | 3 ++- 3 files changed, 17 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 33d861c04ebd..3ce7b54003c2 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -522,7 +522,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec case INAT_PFX_REPNE: if (modrm == 0xca) /* eretu/erets */ - insn->type = INSN_CONTEXT_SWITCH; + insn->type = INSN_SYSRET; break; default: if (modrm == 0xca) @@ -535,11 +535,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec insn->type = INSN_JUMP_CONDITIONAL; - } else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 || - op2 == 0x35) { + } else if (op2 == 0x05 || op2 == 0x34) { - /* sysenter, sysret */ - insn->type = INSN_CONTEXT_SWITCH; + /* syscall, sysenter */ + insn->type = INSN_SYSCALL; + + } else if (op2 == 0x07 || op2 == 0x35) { + + /* sysret, sysexit */ + insn->type = INSN_SYSRET; } else if (op2 == 0x0b || op2 == 0xb9) { @@ -676,7 +680,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec case 0xca: /* retf */ case 0xcb: /* retf */ - insn->type = INSN_CONTEXT_SWITCH; + insn->type = INSN_SYSRET; break; case 0xe0: /* loopne */ @@ -721,7 +725,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec } else if (modrm_reg == 5) { /* jmpf */ - insn->type = INSN_CONTEXT_SWITCH; + insn->type = INSN_SYSRET; } else if (modrm_reg == 6) { diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c81b070ca495..2c703b960420 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3684,7 +3684,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; - case INSN_CONTEXT_SWITCH: + case INSN_SYSCALL: + case INSN_SYSRET: if (func) { if (!next_insn || !next_insn->hint) { WARN_INSN(insn, "unsupported instruction in callable function"); @@ -3886,7 +3887,8 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) WARN_INSN(insn, "RET before UNTRAIN"); return 1; - case INSN_CONTEXT_SWITCH: + case INSN_SYSCALL: + case INSN_SYSRET: if (insn_func(insn)) break; return 0; diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 089a1acc48a8..01ef6f415adf 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -19,7 +19,8 @@ enum insn_type { INSN_CALL, INSN_CALL_DYNAMIC, INSN_RETURN, - INSN_CONTEXT_SWITCH, + INSN_SYSCALL, + INSN_SYSRET, INSN_BUG, INSN_NOP, INSN_STAC, -- cgit v1.2.3 From 9f9cc012c2cbac4833746a0182e06a8eec940d19 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 00:02:15 -0700 Subject: objtool: Stop UNRET validation on UD2 In preparation for simplifying INSN_SYSCALL, make validate_unret() terminate control flow on UD2 just like validate_branch() already does. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/ce841269e7e28c8b7f32064464a9821034d724ff.1744095216.git.jpoimboe@kernel.org --- tools/objtool/check.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2c703b960420..2dd89b0f4d5e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3902,6 +3902,9 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) break; } + if (insn->dead_end) + return 0; + if (!next) { WARN_INSN(insn, "teh end!"); return 1; -- cgit v1.2.3 From 2dbbca9be4e5ed68d0972a2bcf4561d9cb85b7b7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 00:02:16 -0700 Subject: objtool, xen: Fix INSN_SYSCALL / INSN_SYSRET semantics Objtool uses an arbitrary rule for INSN_SYSCALL and INSN_SYSRET that almost works by accident: if it's in a function, control flow continues after the instruction, otherwise it terminates. That behavior should instead be based on the semantics of the underlying instruction. Change INSN_SYSCALL to always preserve control flow and INSN_SYSRET to always terminate it. The changed semantic for INSN_SYSCALL requires a tweak to the !CONFIG_IA32_EMULATION version of xen_entry_SYSCALL_compat(). In Xen, SYSCALL is a hypercall which usually returns. But in this case it's a hypercall to IRET which doesn't return. Add UD2 to tell objtool to terminate control flow, and to prevent undefined behavior at runtime. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Juergen Gross # for the Xen part Cc: Linus Torvalds Link: https://lore.kernel.org/r/19453dfe9a0431b7f016e9dc16d031cad3812a50.1744095216.git.jpoimboe@kernel.org --- tools/objtool/check.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2dd89b0f4d5e..69f94bc47499 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3685,14 +3685,19 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_SYSCALL: + if (func && (!next_insn || !next_insn->hint)) { + WARN_INSN(insn, "unsupported instruction in callable function"); + return 1; + } + + break; + case INSN_SYSRET: - if (func) { - if (!next_insn || !next_insn->hint) { - WARN_INSN(insn, "unsupported instruction in callable function"); - return 1; - } - break; + if (func && (!next_insn || !next_insn->hint)) { + WARN_INSN(insn, "unsupported instruction in callable function"); + return 1; } + return 0; case INSN_STAC: @@ -3888,9 +3893,9 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) return 1; case INSN_SYSCALL: + break; + case INSN_SYSRET: - if (insn_func(insn)) - break; return 0; case INSN_NOP: -- cgit v1.2.3 From cbe9588b12d058cbb16735fd468c82ec4b3d1256 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:16:32 -0700 Subject: selftests/tc-testing: Add a test case for FQ_CODEL with HTB parent Add a test case for FQ_CODEL with HTB parent to verify packet drop behavior when the queue becomes empty. This helps ensure proper notification mechanisms between qdiscs. Note this is best-effort, it is hard to play with those parameters perfectly to always trigger ->qlen_notify(). Cc: Pedro Tammela Signed-off-by: Cong Wang Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20250403211636.166257-2-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 25454fd95537..545966b6adc6 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -158,5 +158,36 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4bb", + "name": "Test FQ_CODEL with HTB parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "htb" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root htb default 10", + "$TC class add dev $DUMMY parent 1: classid 1:10 htb rate 1kbit", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] -- cgit v1.2.3 From 4cb1837ac5375b9b271cb83b2a43a3f942f4c36e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:16:33 -0700 Subject: selftests/tc-testing: Add a test case for FQ_CODEL with QFQ parent Add a test case for FQ_CODEL with QFQ parent to verify packet drop behavior when the queue becomes empty. This helps ensure proper notification mechanisms between qdiscs. Note this is best-effort, it is hard to play with those parameters perfectly to always trigger ->qlen_notify(). Cc: Pedro Tammela Signed-off-by: Cong Wang Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20250403211636.166257-3-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 545966b6adc6..695522b00a3c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -189,5 +189,36 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4be", + "name": "Test FQ_CODEL with QFQ parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root qfq", + "$TC class add dev $DUMMY parent 1: classid 1:10 qfq weight 1 maxpkt 1000", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 10 -s 1000 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] -- cgit v1.2.3 From 72b05c1bf7ea799bfce1164d6605b27f060191ac Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:16:34 -0700 Subject: selftests/tc-testing: Add a test case for FQ_CODEL with HFSC parent Add a test case for FQ_CODEL with HFSC parent to verify packet drop behavior when the queue becomes empty. This helps ensure proper notification mechanisms between qdiscs. Note this is best-effort, it is hard to play with those parameters perfectly to always trigger ->qlen_notify(). Cc: Pedro Tammela Signed-off-by: Cong Wang Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20250403211636.166257-4-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 695522b00a3c..0347b207fe6d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -220,5 +220,36 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4bf", + "name": "Test FQ_CODEL with HFSC parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root hfsc default 10", + "$TC class add dev $DUMMY parent 1: classid 1:10 hfsc sc rate 1kbit ul rate 1kbit", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] -- cgit v1.2.3 From 0d5c27ecb60c6cc4e394035aa04696d7eb39f072 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:16:35 -0700 Subject: selftests/tc-testing: Add a test case for FQ_CODEL with DRR parent Add a test case for FQ_CODEL with DRR parent to verify packet drop behavior when the queue becomes empty. This helps ensure proper notification mechanisms between qdiscs. Note this is best-effort, it is hard to play with those parameters perfectly to always trigger ->qlen_notify(). Cc: Pedro Tammela Signed-off-by: Cong Wang Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20250403211636.166257-5-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 0347b207fe6d..4a45fedad876 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -251,5 +251,36 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4c0", + "name": "Test FQ_CODEL with DRR parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root drr", + "$TC class add dev $DUMMY parent 1: classid 1:10 drr quantum 1500", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] -- cgit v1.2.3 From ce94507f5fe04eb7fe1eecfe32a2b29233341ff0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:16:36 -0700 Subject: selftests/tc-testing: Add a test case for FQ_CODEL with ETS parent Add a test case for FQ_CODEL with ETS parent to verify packet drop behavior when the queue becomes empty. This helps ensure proper notification mechanisms between qdiscs. Note this is best-effort, it is hard to play with those parameters perfectly to always trigger ->qlen_notify(). Cc: Pedro Tammela Signed-off-by: Cong Wang Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20250403211636.166257-6-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 4a45fedad876..d4ea9cd845a3 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -282,5 +282,36 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4c1", + "name": "Test FQ_CODEL with ETS parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "ets" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 strict 1", + "$TC class change dev $DUMMY parent 1: classid 1:1 ets", + "$TC qdisc add dev $DUMMY parent 1:1 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] -- cgit v1.2.3 From a1328a671e1c93a3513c286a05ff0abe6698d891 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 4 Apr 2025 11:03:34 -0700 Subject: selftests: tls: check that disconnect does nothing "Inspired" by syzbot test, pre-queue some data, disconnect() and try to receive(). This used to trigger a warning in TLS's strp. Now we expect the disconnect() to have almost no effect. Link: https://lore.kernel.org/67e6be74.050a0220.2f068f.007e.GAE@google.com Signed-off-by: Jakub Kicinski Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250404180334.3224206-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/tls.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 9a85f93c33d8..5ded3b3a7538 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1753,6 +1753,42 @@ TEST_F(tls_basic, rekey_tx) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls_basic, disconnect) +{ + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + struct tls_crypto_info_keys key; + struct sockaddr_in addr; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &key, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len); + ASSERT_EQ(ret, 0); + + /* Pre-queue the data so that setsockopt parses it but doesn't + * dequeue it from the TCP socket. recvmsg would dequeue. + */ + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len); + ASSERT_EQ(ret, 0); + + addr.sin_family = AF_UNSPEC; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + ret = connect(self->cfd, &addr, sizeof(addr)); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); +} + TEST_F(tls, rekey) { char const *test_str_1 = "test_message_before_rekey"; -- cgit v1.2.3 From bc08b15b54b8aadbc8a8f413271c07a3f4bead87 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Apr 2025 08:31:12 -1000 Subject: sched_ext: Mark SCX_OPS_HAS_CGROUP_WEIGHT for deprecation SCX_OPS_HAS_CGROUP_WEIGHT was only used to suppress the missing cgroup weight support warnings. Now that the warnings are removed, the flag doesn't do anything. Mark it for deprecation and remove its usage from scx_flatcg. v2: Actually include the scx_flatcg update. Signed-off-by: Tejun Heo Suggested-and-reviewed-by: Andrea Righi --- tools/sched_ext/scx_flatcg.bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 2c720e3ecad5..fdc7170639e6 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops, .cgroup_move = (void *)fcg_cgroup_move, .init = (void *)fcg_init, .exit = (void *)fcg_exit, - .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING, + .flags = SCX_OPS_ENQ_EXITING, .name = "flatcg"); -- cgit v1.2.3 From 2d12c6fb78753925f494ca9079e2383529e8ae0e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 01:21:14 -0700 Subject: objtool: Remove ANNOTATE_IGNORE_ALTERNATIVE from CLAC/STAC ANNOTATE_IGNORE_ALTERNATIVE adds additional noise to the code generated by CLAC/STAC alternatives, hurting readability for those whose read uaccess-related code generation on a regular basis. Remove the annotation specifically for the "NOP patched with CLAC/STAC" case in favor of a manual check. Leave the other uses of that annotation in place as they're less common and more difficult to detect. Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Acked-by: Linus Torvalds Link: https://lore.kernel.org/r/fc972ba4995d826fcfb8d02733a14be8d670900b.1744098446.git.jpoimboe@kernel.org --- tools/objtool/check.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 69f94bc47499..b649049b6a11 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3505,6 +3505,34 @@ next_orig: return next_insn_same_sec(file, alt_group->orig_group->last_insn); } +static bool skip_alt_group(struct instruction *insn) +{ + struct instruction *alt_insn = insn->alts ? insn->alts->insn : NULL; + + /* ANNOTATE_IGNORE_ALTERNATIVE */ + if (insn->alt_group && insn->alt_group->ignore) + return true; + + /* + * For NOP patched with CLAC/STAC, only follow the latter to avoid + * impossible code paths combining patched CLAC with unpatched STAC + * or vice versa. + * + * ANNOTATE_IGNORE_ALTERNATIVE could have been used here, but Linus + * requested not to do that to avoid hurting .s file readability + * around CLAC/STAC alternative sites. + */ + + if (!alt_insn) + return false; + + /* Don't override ASM_{CLAC,STAC}_UNSAFE */ + if (alt_insn->alt_group && alt_insn->alt_group->ignore) + return false; + + return alt_insn->type == INSN_CLAC || alt_insn->type == INSN_STAC; +} + /* * Follow the branch starting at the given instruction, and recursively follow * any other branches (jumps). Meanwhile, track the frame pointer state at @@ -3625,7 +3653,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, } } - if (insn->alt_group && insn->alt_group->ignore) + if (skip_alt_group(insn)) return 0; if (handle_insn_ops(insn, next_insn, &state)) -- cgit v1.2.3 From 5cd2950359ef31c9e273588c33f372ff8e546e2e Mon Sep 17 00:00:00 2001 From: Khaled Elnaggar Date: Sun, 26 Jan 2025 21:51:33 +0200 Subject: selftests: tpm2: create a dedicated .gitignore The tpm2 selftests produce two logs: SpaceTest.log and AsyncTest.log. Only SpaceTest.log was listed in selftests/.gitignore, while AsyncTest.log remained untracked. This change creates a dedicated .gitignore in the tpm2/ directory to manage these entries, keeping tpm2-specific patterns isolated from parent .gitignore. Fixed white-space errors during commit Shuah Khan Link: https://lore.kernel.org/r/20250126195147.902608-1-khaledelnaggarlinux@gmail.com Signed-off-by: Khaled Elnaggar Reviewed-by: Jarkko Sakkinen Signed-off-by: Shuah Khan --- tools/testing/selftests/.gitignore | 1 - tools/testing/selftests/tpm2/.gitignore | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/tpm2/.gitignore (limited to 'tools') diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore index cb24124ac5b9..674aaa02e396 100644 --- a/tools/testing/selftests/.gitignore +++ b/tools/testing/selftests/.gitignore @@ -4,7 +4,6 @@ gpiogpio-hammer gpioinclude/ gpiolsgpio kselftest_install/ -tpm2/SpaceTest.log # Python bytecode and cache __pycache__/ diff --git a/tools/testing/selftests/tpm2/.gitignore b/tools/testing/selftests/tpm2/.gitignore new file mode 100644 index 000000000000..6d6165c5e35d --- /dev/null +++ b/tools/testing/selftests/tpm2/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +AsyncTest.log +SpaceTest.log -- cgit v1.2.3 From 170ec11935de4acced0ae001562cca3a144acd67 Mon Sep 17 00:00:00 2001 From: Ahmed Salem Date: Wed, 12 Feb 2025 01:16:17 +0200 Subject: selftests: tpm2: test_smoke: use POSIX-conformant expression operator Use POSIX-conformant expression operator symbol '='. The use of the non POSIX-conformant symbol '==' would work in bash, but not in sh where the unexpected operator error would result in test_smoke.sh being skipped. Instead of changing the shebang to use bash, which may not be available on all systems, use the POSIX-conformant expression symbol '=' to test for equality. Without this patch: =================== # make -j8 TARGETS=tpm2 kselftest # selftests: tpm2: test_smoke.sh # ./test_smoke.sh: 9: [: 2: unexpected operator ok 1 selftests: tpm2: test_smoke.sh # SKIP With this patch: ================ # make -j8 TARGETS=tpm2 kselftest # selftests: tpm2: test_smoke.sh # Ran 9 tests in 9.236s ok 1 selftests: tpm2: test_smoke.sh Link: https://lore.kernel.org/r/37ztyakgrrtgvec344mg7mspchwjpxxtsprtjidso3pwkmm4f4@awsa5mzgqmtb Signed-off-by: Ahmed Salem Reviewed-by: Jarkko Sakkinen Signed-off-by: Shuah Khan --- tools/testing/selftests/tpm2/test_smoke.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh index 168f4b166234..3a60e6c6f5c9 100755 --- a/tools/testing/selftests/tpm2/test_smoke.sh +++ b/tools/testing/selftests/tpm2/test_smoke.sh @@ -6,6 +6,6 @@ ksft_skip=4 [ -e /dev/tpm0 ] || exit $ksft_skip read tpm_version < /sys/class/tpm/tpm0/tpm_version_major -[ "$tpm_version" == 2 ] || exit $ksft_skip +[ "$tpm_version" = 2 ] || exit $ksft_skip python3 -m unittest -v tpm2_tests.SmokeTest 2>&1 -- cgit v1.2.3 From 14e594a1fc8b879734f8057a870d28c86a889c5f Mon Sep 17 00:00:00 2001 From: Rae Moar Date: Wed, 19 Mar 2025 22:33:51 +0000 Subject: kunit: tool: fix count of tests if late test plan Fix test count with late test plan. For example, TAP version 13 ok 1 test1 1..4 Returns a count of 1 passed, 1 crashed (because it expects tests after the test plan): returning the total count of 2 tests Change this to be 1 passed, 1 error: total count of 1 test Link: https://lore.kernel.org/r/20250319223351.1517262-1-rmoar@google.com Signed-off-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 4 ++++ tools/testing/kunit/kunit_tool_test.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index da53a709773a..c176487356e6 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -809,6 +809,10 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: test.log.extend(parse_diagnostic(lines)) if test.name != "" and not peek_test_name_match(lines, test): test.add_error(printer, 'missing subtest result line!') + elif not lines: + print_log(test.log, printer) + test.status = TestStatus.NO_TESTS + test.add_error(printer, 'No more test results!') else: parse_test_result(lines, test, expected_num, printer) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 5ff4f6ffd873..bbba921e0eac 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -371,8 +371,8 @@ class KUnitParserTest(unittest.TestCase): """ result = kunit_parser.parse_run_tests(output.splitlines(), stdout) # Missing test results after test plan should alert a suspected test crash. - self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status) - self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, crashed=1, errors=1)) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=2)) def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream: return kunit_parser.LineStream(enumerate(strs, start=1)) -- cgit v1.2.3 From 7d50e00fef2832e98d7e06bbfc85c1d66ee110ca Mon Sep 17 00:00:00 2001 From: Edward Liaw Date: Fri, 4 Apr 2025 22:12:20 +0000 Subject: selftests/futex: futex_waitv wouldblock test should fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Testcase should fail if -EWOULDBLOCK is not returned when expected value differs from actual value from the waiter. Link: https://lore.kernel.org/r/20250404221225.1596324-1-edliaw@google.com Fixes: 9d57f7c79748920636f8293d2f01192d702fe390 ("selftests: futex: Test sys_futex_waitv() wouldblock") Signed-off-by: Edward Liaw Reviewed-by: Thomas Gleixner Reviewed-by: André Almeida Signed-off-by: Shuah Khan --- tools/testing/selftests/futex/functional/futex_wait_wouldblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 7d7a6a06cdb7..2d8230da9064 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -98,7 +98,7 @@ int main(int argc, char *argv[]) info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); if (!res || errno != EWOULDBLOCK) { - ksft_test_result_pass("futex_waitv returned: %d %s\n", + ksft_test_result_fail("futex_waitv returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); ret = RET_FAIL; -- cgit v1.2.3 From 197c1eaa7ba633a482ed7588eea6fd4aa57e08d4 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 11 Mar 2025 16:09:40 +0800 Subject: selftests/mincore: Allow read-ahead pages to reach the end of the file When running the mincore_selftest on a system with an XFS file system, it failed the "check_file_mmap" test case due to the read-ahead pages reaching the end of the file. The failure log is as below: RUN global.check_file_mmap ... mincore_selftest.c:264:check_file_mmap:Expected i (1024) < vec_size (1024) mincore_selftest.c:265:check_file_mmap:Read-ahead pages reached the end of the file check_file_mmap: Test failed FAIL global.check_file_mmap This is because the read-ahead window size of the XFS file system on this machine is 4 MB, which is larger than the size from the #PF address to the end of the file. As a result, all the pages for this file are populated. blockdev --getra /dev/nvme0n1p5 8192 blockdev --getbsz /dev/nvme0n1p5 512 This issue can be fixed by extending the current FILE_SIZE 4MB to a larger number, but it will still fail if the read-ahead window size of the file system is larger enough. Additionally, in the real world, read-ahead pages reaching the end of the file can happen and is an expected behavior. Therefore, allowing read-ahead pages to reach the end of the file is a better choice for the "check_file_mmap" test case. Link: https://lore.kernel.org/r/20250311080940.21413-1-qiuxu.zhuo@intel.com Reported-by: Yi Lai Signed-off-by: Qiuxu Zhuo Signed-off-by: Shuah Khan --- tools/testing/selftests/mincore/mincore_selftest.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index e949a43a6145..efabfcbe0b49 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -261,9 +261,6 @@ TEST(check_file_mmap) TH_LOG("No read-ahead pages found in memory"); } - EXPECT_LT(i, vec_size) { - TH_LOG("Read-ahead pages reached the end of the file"); - } /* * End of the readahead window. The rest of the pages shouldn't * be in memory. -- cgit v1.2.3 From 6767698cf9c144c8d4c72925e9a1cd2cbc031d25 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 7 Apr 2025 20:26:33 +0200 Subject: selftests: mptcp: validate MPJoin HMacFailure counters The parent commit fixes an issue around these counters where one of them -- MPJoinAckHMacFailure -- was wrongly incremented in some cases. This makes sure the counter is always 0. It should be incremented only in case of corruption, or a wrong implementation, which should not be the case in these selftests. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250407-net-mptcp-hmac-failure-mib-v1-2-3c9ecd0a3a50@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 13a3b68181ee..befa66f5a366 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1441,6 +1441,15 @@ chk_join_nr() fi fi + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "synack HMAC" + fail_test "got $count JOIN[s] synack HMAC failure expected 0" + fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then rc=${KSFT_SKIP} @@ -1450,6 +1459,15 @@ chk_join_nr() fail_test "got $count JOIN[s] ack rx expected $ack_nr" fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "ack HMAC" + fail_test "got $count JOIN[s] ack HMAC failure expected 0" + fi + print_results "join Rx" ${rc} join_syn_tx="${join_syn_tx:-${syn_nr}}" \ -- cgit v1.2.3 From 26e705184e7a67bdcded69b4b86b583fc81971ce Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 7 Apr 2025 13:24:09 -0700 Subject: selftests/tc-testing: sfq: check that a derived limit of 1 is rejected Because the limit is updated indirectly when other parameters are updated, there are cases where even though the user requests a limit of 2 it can actually be set to 1. Add the following test cases to check that the kernel rejects them: - limit 2 depth 1 flows 1 - limit 2 depth 1 divisor 1 Signed-off-by: Octavian Purdila Acked-by: Cong Wang Signed-off-by: David S. Miller --- .../selftests/tc-testing/tc-tests/qdiscs/sfq.json | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json index 50e8d72781cb..28c6ce6da7db 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json @@ -228,5 +228,41 @@ "matchCount": "0", "teardown": [ ] + }, + { + "id": "7f8f", + "name": "Check that a derived limit of 1 is rejected (limit 2 depth 1 flows 1)", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 2 depth 1 flows 1", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] + }, + { + "id": "5168", + "name": "Check that a derived limit of 1 is rejected (limit 2 depth 1 divisor 1)", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 2 depth 1 divisor 1", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] } ] -- cgit v1.2.3 From fcd7132cb1f93e4d4594ecb19b8dcecdf0497d9e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 8 Apr 2025 09:27:49 -0400 Subject: selftests/net: test sk_filter support for SKF_NET_OFF on frags Verify that a classic BPF linux socket filter correctly matches packet contents. Including when accessing contents in an skb_frag. 1. Open a SOCK_RAW socket with a classic BPF filter on UDP dport 8000. 2. Open a tap device with IFF_NAPI_FRAGS to inject skbs with frags. 3. Send a packet for which the UDP header is in frag[0]. 4. Receive this packet to demonstrate that the socket accepted it. Acked-by: Stanislav Fomichev Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20250408132833.195491-3-willemdebruijn.kernel@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 2 + tools/testing/selftests/net/skf_net_off.c | 244 +++++++++++++++++++++++++++++ tools/testing/selftests/net/skf_net_off.sh | 30 ++++ 4 files changed, 277 insertions(+) create mode 100644 tools/testing/selftests/net/skf_net_off.c create mode 100755 tools/testing/selftests/net/skf_net_off.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 679542f565a4..532bb732bc6d 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -39,6 +39,7 @@ scm_rights sk_bind_sendto_listen sk_connect_zero_addr sk_so_peek_off +skf_net_off socket so_incoming_cpu so_netns_cookie diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 6d718b478ed8..124078b56fa4 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -106,6 +106,8 @@ TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh TEST_GEN_PROGS += proc_net_pktgen TEST_PROGS += lwt_dst_cache_ref_loop.sh +TEST_PROGS += skf_net_off.sh +TEST_GEN_FILES += skf_net_off # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/skf_net_off.c b/tools/testing/selftests/net/skf_net_off.c new file mode 100644 index 000000000000..1fdf61d6cd7f --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Open a tun device. + * + * [modifications: use IFF_NAPI_FRAGS, add sk filter] + * + * Expects the device to have been configured previously, e.g.: + * sudo ip tuntap add name tap1 mode tap + * sudo ip link set tap1 up + * sudo ip link set dev tap1 addr 02:00:00:00:00:01 + * sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad + * + * And to avoid premature pskb_may_pull: + * + * sudo ethtool -K tap1 gro off + * sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux' + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static bool cfg_do_filter; +static bool cfg_do_frags; +static int cfg_dst_port = 8000; +static char *cfg_ifname; + +static int tun_open(const char *tun_name) +{ + struct ifreq ifr = {0}; + int fd, ret; + + fd = open("/dev/net/tun", O_RDWR); + if (fd == -1) + error(1, errno, "open /dev/net/tun"); + + ifr.ifr_flags = IFF_TAP; + if (cfg_do_frags) + ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS; + + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1); + + ret = ioctl(fd, TUNSETIFF, &ifr); + if (ret) + error(1, ret, "ioctl TUNSETIFF"); + + return fd; +} + +static void sk_set_filter(int fd) +{ + const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt); + const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest); + + /* Filter UDP packets with destination port cfg_dst_port */ + struct sock_filter filter_code[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + + struct sock_fprog filter = { + sizeof(filter_code) / sizeof(filter_code[0]), + filter_code, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter))) + error(1, errno, "setsockopt attach filter"); +} + +static int raw_open(void) +{ + int fd; + + fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP); + if (fd == -1) + error(1, errno, "socket raw (udp)"); + + if (cfg_do_filter) + sk_set_filter(fd); + + return fd; +} + +static void tun_write(int fd) +{ + const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 }; + const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 }; + struct tun_pi pi = {0}; + struct ipv6hdr ip6h = {0}; + struct udphdr uh = {0}; + struct ethhdr eth = {0}; + uint32_t payload; + struct iovec iov[5]; + int ret; + + pi.proto = htons(ETH_P_IPV6); + + memcpy(eth.h_source, eth_src, sizeof(eth_src)); + memcpy(eth.h_dest, eth_dst, sizeof(eth_dst)); + eth.h_proto = htons(ETH_P_IPV6); + + ip6h.version = 6; + ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t)); + ip6h.nexthdr = IPPROTO_UDP; + ip6h.hop_limit = 8; + if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1) + error(1, errno, "inet_pton src"); + if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1) + error(1, errno, "inet_pton src"); + + uh.source = htons(8000); + uh.dest = htons(cfg_dst_port); + uh.len = ip6h.payload_len; + uh.check = 0; + + payload = htonl(0xABABABAB); /* Covered in IPv6 length */ + + iov[0].iov_base = π + iov[0].iov_len = sizeof(pi); + iov[1].iov_base = ð + iov[1].iov_len = sizeof(eth); + iov[2].iov_base = &ip6h; + iov[2].iov_len = sizeof(ip6h); + iov[3].iov_base = &uh; + iov[3].iov_len = sizeof(uh); + iov[4].iov_base = &payload; + iov[4].iov_len = sizeof(payload); + + ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0])); + if (ret <= 0) + error(1, errno, "writev"); +} + +static void raw_read(int fd) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + struct msghdr msg = {0}; + struct iovec iov[2]; + struct udphdr uh; + uint32_t payload[2]; + int ret; + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcvtimeo udp"); + + iov[0].iov_base = &uh; + iov[0].iov_len = sizeof(uh); + + iov[1].iov_base = payload; + iov[1].iov_len = sizeof(payload); + + msg.msg_iov = iov; + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); + + ret = recvmsg(fd, &msg, 0); + if (ret <= 0) + error(1, errno, "read raw"); + if (ret != sizeof(uh) + sizeof(payload[0])) + error(1, errno, "read raw: len=%d\n", ret); + + fprintf(stderr, "raw recv: 0x%x\n", payload[0]); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "fFi:")) != -1) { + switch (c) { + case 'f': + cfg_do_filter = true; + printf("bpf filter enabled\n"); + break; + case 'F': + cfg_do_frags = true; + printf("napi frags mode enabled\n"); + break; + case 'i': + cfg_ifname = optarg; + break; + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!cfg_ifname) + error(1, 0, "must specify tap interface name (-i)"); +} + +int main(int argc, char **argv) +{ + int fdt, fdr; + + parse_opts(argc, argv); + + fdr = raw_open(); + fdt = tun_open(cfg_ifname); + + tun_write(fdt); + raw_read(fdr); + + if (close(fdt)) + error(1, errno, "close tun"); + if (close(fdr)) + error(1, errno, "close udp"); + + fprintf(stderr, "OK\n"); + return 0; +} + diff --git a/tools/testing/selftests/net/skf_net_off.sh b/tools/testing/selftests/net/skf_net_off.sh new file mode 100755 index 000000000000..5da5066fb465 --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly NS="ns-$(mktemp -u XXXXXX)" + +cleanup() { + ip netns del $NS +} + +ip netns add $NS +trap cleanup EXIT + +ip -netns $NS link set lo up +ip -netns $NS tuntap add name tap1 mode tap +ip -netns $NS link set tap1 up +ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01 +ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad +ip netns exec $NS ethtool -K tap1 gro off + +# disable early demux, else udp_v6_early_demux pulls udp header into linear +ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0 + +echo "no filter" +ip netns exec $NS ./skf_net_off -i tap1 + +echo "filter, linear skb (-f)" +ip netns exec $NS ./skf_net_off -i tap1 -f + +echo "filter, fragmented skb (-f) (-F)" +ip netns exec $NS ./skf_net_off -i tap1 -f -F -- cgit v1.2.3 From 27eb86e22f1067a39f05e8878fd83f00e3311dc3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 7 Apr 2025 19:40:19 +0200 Subject: selftests: netfilter: add test case for recent mismatch bug Without 'nft_set_pipapo: fix incorrect avx2 match of 5th field octet" this fails: TEST: reported issues Add two elements, flush, re-add 1s [ OK ] net,mac with reload 0s [ OK ] net,port,proto 3s [ OK ] avx2 false match 0s [FAIL] False match for fe80:dead:01fe:0a02:0b03:6007:8009:a001 Other tests do not detect the kernel bug as they only alter parts in the /64 netmask. Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../selftests/net/netfilter/nft_concat_range.sh | 39 +++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 47088b005390..1f5979c1510c 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -387,6 +387,25 @@ race_repeat 0 perf_duration 0 " + +TYPE_avx2_mismatch=" +display avx2 false match +type_spec inet_proto . ipv6_addr +chain_spec meta l4proto . ip6 daddr +dst proto addr6 +src +start 1 +count 1 +src_delta 1 +tools ping +proto icmp6 + +race_repeat 0 + +perf_duration 0 +" + + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1629,6 +1648,24 @@ test_bug_net_port_proto_match() { nft flush ruleset } +test_bug_avx2_mismatch() +{ + setup veth send_"${proto}" set || return ${ksft_skip} + + local a1="fe80:dead:01ff:0a02:0b03:6007:8009:a001" + local a2="fe80:dead:01fe:0a02:0b03:6007:8009:a001" + + nft "add element inet filter test { icmpv6 . $a1 }" + + dst_addr6="$a2" + send_icmp6 + + if [ "$(count_packets)" -gt "0" ]; then + err "False match for $a2" + return 1 + fi +} + test_reported_issues() { eval test_bug_"${subtest}" } -- cgit v1.2.3 From ddc592972ff4f1350f456edc3047fc5fb01777aa Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:16 -0700 Subject: tools headers: Update the KVM headers with the kernel sources To pick up the changes in: af5366bea2cb9dfb KVM: x86: Drop the now unused KVM_X86_DISABLE_VALID_EXITS 915d2f0718a42ee0 KVM: Move KVM_REG_SIZE() definition to common uAPI header 5c17848134ab1ffb KVM: x86/xen: Restrict hypercall MSR to unofficial synthetic range 9364789567f9b492 KVM: x86: Add a VM type define for TDX fa662c9080732b1f KVM: SVM: Add Idle HLT intercept support 3adaee78306148da KVM: arm64: Allow userspace to change the implementation ID registers faf7714a47a25c62 KVM: arm64: nv: Allow userland to set VGIC maintenance IRQ c0000e58c74eed07 KVM: arm64: Introduce KVM_REG_ARM_VENDOR_HYP_BMAP_2 f83c41fb3dddbf47 KVM: arm64: Allow userspace to limit NV support to nVHE Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: kvm@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-2-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/arm64/include/uapi/asm/kvm.h | 5 ++--- tools/arch/x86/include/uapi/asm/kvm.h | 4 ++++ tools/arch/x86/include/uapi/asm/svm.h | 2 ++ tools/include/uapi/linux/kvm.h | 9 +++++---- 4 files changed, 13 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 6d44f8c8a18f..af9d9acaf997 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -43,9 +43,6 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - struct kvm_regs { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -108,6 +105,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ +#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */ struct kvm_vcpu_init { __u32 target; @@ -418,6 +416,7 @@ enum { #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 88585c1de416..460306b35a4b 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -559,6 +559,9 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) #define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) +#define KVM_XEN_MSR_MIN_INDEX 0x40000000u +#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -925,5 +928,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 #define KVM_X86_SNP_VM 4 +#define KVM_X86_TDX_VM 5 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 1814b413fd57..ec1321248dac 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -95,6 +95,7 @@ #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_IDLE_HLT 0x0a6 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 @@ -224,6 +225,7 @@ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ + { SVM_EXIT_IDLE_HLT, "idle-halt" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 502ea63b5d2e..b6ae8ad8934b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -617,10 +617,6 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { @@ -933,6 +929,7 @@ struct kvm_enable_cap { #define KVM_CAP_PRE_FAULT_MEMORY 236 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1070,6 +1067,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL -- cgit v1.2.3 From 9dbe66640f43a3530a0e7897557f4ea41c3abe85 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:17 -0700 Subject: tools headers: Update the socket headers with the kernel sources To pick up the changes in: 64e844505bc08cde include: uapi: protocol number and packet structs for AGGFRAG in ESP 18912c520674ec4d tcp: devmem: don't write truncated dmabuf CMSGs to userspace Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-3-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/in.h | 2 ++ tools/perf/trace/beauty/include/linux/socket.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 5d32d53508d9..ced0fc3c3aa5 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index d18cc47e89bd..c3322eb3d686 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -392,6 +392,8 @@ struct ucred { extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +extern int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len, + void *data); struct timespec64; struct __kernel_timespec; -- cgit v1.2.3 From ae62977331fcbf5c9a4260c88d9f94450db2d99a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:18 -0700 Subject: tools headers: Update the uapi/linux/perf_event.h copy with the kernel sources To pick up the changes in: c53e14f1ea4a8f8d perf: Extend per event callchain limit to branch stack Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Link: https://lore.kernel.org/r/20250410001125.391820-4-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 0524d541d4e3..5fc753c23734 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -385,6 +385,8 @@ enum perf_event_read_format { * * @sample_max_stack: Max number of frame pointers in a callchain, * should be < /proc/sys/kernel/perf_event_max_stack + * Max number of entries of branch stack + * should be < hardware limit */ struct perf_event_attr { -- cgit v1.2.3 From af74e5fe7453c1cb2b86c601426cfc4ad9ea9753 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:19 -0700 Subject: tools headers: Update the VFS headers with the kernel sources To pick up the changes in: 7ed6cbe0f8caa6ee fs: add STATX_DIO_READ_ALIGN 8fc7e23a9bd851e6 fs: reformat the statx definition a5874fde3c0884a3 exec: Add a new AT_EXECVE_CHECK flag to execveat(2) 1ebd4a3c095cd538 blk-crypto: add ioctls to create and prepare hardware-wrapped keys af6505e5745b9f3a fs: add RWF_DONTCACHE iocb and FOP_DONTCACHE file_operations flag 10783d0ba0d7731e fs, iov_iter: define meta io descriptor 8f6116b5b77b0536 statmount: add a new supported_mask field 37c4a9590e1efcae statmount: allow to retrieve idmappings Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h diff -u tools/perf/trace/beauty/include/uapi/linux/stat.h include/uapi/linux/stat.h diff -u tools/perf/trace/beauty/include/uapi/linux/fcntl.h include/uapi/linux/fcntl.h diff -u tools/perf/trace/beauty/include/uapi/linux/fs.h include/uapi/linux/fs.h diff -u tools/perf/trace/beauty/include/uapi/linux/mount.h include/uapi/linux/mount.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-5-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/stat.h | 99 ++++++++++++++++------ tools/perf/trace/beauty/include/uapi/linux/fcntl.h | 4 + tools/perf/trace/beauty/include/uapi/linux/fs.h | 21 +++-- tools/perf/trace/beauty/include/uapi/linux/mount.h | 10 ++- tools/perf/trace/beauty/include/uapi/linux/stat.h | 99 ++++++++++++++++------ 5 files changed, 179 insertions(+), 54 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 887a25286441..f78ee3670dd5 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -98,43 +98,93 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index 6e6907e63bfc..a15ac2fa4b20 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -155,4 +155,8 @@ #define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ #define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */ +/* Flags for execveat2(2). */ +#define AT_EXECVE_CHECK 0x10000 /* Only perform a check if execution + would be allowed. */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 753971770733..e762e1af650c 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -40,6 +40,15 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1< Date: Wed, 9 Apr 2025 17:11:20 -0700 Subject: tools headers: Update the syscall table with the kernel sources To pick up the changes in: c4a16820d9019940 fs: add open_tree_attr() 2df1ad0d25803399 x86/arch_prctl: Simplify sys_arch_prctl() e632bca07c8eef1d arm64: generate 64-bit syscall.tbl This is basically to support the new open_tree_attr syscall. But it also needs to update asm-generic unistd.h header to get the new syscall number. And arm64 unistd.h header was converted to use the generic 64-bit header. Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/scripts/syscall.tbl scripts/syscall.tbl diff -u tools/perf/arch/x86/entry/syscalls/syscall_32.tbl arch/x86/entry/syscalls/syscall_32.tbl diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl diff -u tools/perf/arch/arm/entry/syscalls/syscall.tbl arch/arm/tools/syscall.tbl diff -u tools/perf/arch/sh/entry/syscalls/syscall.tbl arch/sh/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/sparc/entry/syscalls/syscall.tbl arch/sparc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/xtensa/entry/syscalls/syscall.tbl arch/xtensa/kernel/syscalls/syscall.tbl diff -u tools/arch/arm64/include/uapi/asm/unistd.h arch/arm64/include/uapi/asm/unistd.h diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: linux-arch@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-6-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/arm64/include/uapi/asm/unistd.h | 24 +--------------------- tools/include/uapi/asm-generic/unistd.h | 4 +++- tools/perf/arch/arm/entry/syscalls/syscall.tbl | 1 + .../perf/arch/mips/entry/syscalls/syscall_n64.tbl | 1 + tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 1 + tools/perf/arch/s390/entry/syscalls/syscall.tbl | 1 + tools/perf/arch/sh/entry/syscalls/syscall.tbl | 1 + tools/perf/arch/sparc/entry/syscalls/syscall.tbl | 1 + tools/perf/arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 + tools/perf/arch/xtensa/entry/syscalls/syscall.tbl | 1 + tools/scripts/syscall.tbl | 1 + 12 files changed, 15 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index 9306726337fe..df36f23876e8 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -1,24 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#define __ARCH_WANT_RENAMEAT -#define __ARCH_WANT_NEW_STAT -#define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_TIME32_SYSCALLS -#define __ARCH_WANT_MEMFD_SECRET - -#include +#include diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 88dc393c2bca..2892a45023af 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat) __SYSCALL(__NR_listxattrat, sys_listxattrat) #define __NR_removexattrat 466 __SYSCALL(__NR_removexattrat, sys_removexattrat) +#define __NR_open_tree_attr 467 +__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) #undef __NR_syscalls -#define __NR_syscalls 467 +#define __NR_syscalls 468 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl index 49eeb2ad8dbd..27c1d5ebcd91 100644 --- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl @@ -481,3 +481,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index c844cd5cda62..1e8c44c7b614 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -381,3 +381,4 @@ 464 n64 getxattrat sys_getxattrat 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat +467 n64 open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index d8b4ab78bef0..9a084bdb8926 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -557,3 +557,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index e9115b4d8b63..a4569b96ef06 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl index c8cad33bf250..52a7652fcff6 100644 --- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl @@ -470,3 +470,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl index 727f99d333b3..83e45eb6c095 100644 --- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl @@ -512,3 +512,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index 4d0fb2fba7e2..ac007ea00979 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -396,7 +396,7 @@ 381 i386 pkey_alloc sys_pkey_alloc 382 i386 pkey_free sys_pkey_free 383 i386 statx sys_statx -384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl +384 i386 arch_prctl sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents 386 i386 rseq sys_rseq 393 i386 semget sys_semget @@ -472,3 +472,4 @@ 464 i386 getxattrat sys_getxattrat 465 i386 listxattrat sys_listxattrat 466 i386 removexattrat sys_removexattrat +467 i386 open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 5eb708bff1c7..cfb5ca41e30d 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -390,6 +390,7 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl index 37effc1b134e..f657a77314f8 100644 --- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl @@ -437,3 +437,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl index ebbdb3c42e9f..580b4e246aec 100644 --- a/tools/scripts/syscall.tbl +++ b/tools/scripts/syscall.tbl @@ -407,3 +407,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr -- cgit v1.2.3 From df4bd8c76d49cf5948d63987f4a795c544155906 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:21 -0700 Subject: tools headers: Update the uapi/linux/prctl.h copy with the kernel sources To pick up the changes in: ec2d0c04624b3c8a posix-timers: Provide a mechanism to allocate a given timer ID Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/prctl.h include/uapi/linux/prctl.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: Thomas Gleixner Link: https://lore.kernel.org/r/20250410001125.391820-7-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/uapi/linux/prctl.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 5c6080680cb2..15c18ef4eb11 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -353,4 +353,15 @@ struct prctl_mm_map { */ #define PR_LOCK_SHADOW_STACK_STATUS 76 +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 4056cf407253ac81fc960088acfe9579496a871f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:22 -0700 Subject: tools headers: Update the uapi/asm-generic/mman-common.h copy with the kernel sources To pick up the changes in: 6d61527d931ba07b mm/pkey: Add PKEY_UNRESTRICTED macro Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/asm-generic/mman-common.h include/uapi/asm-generic/mman-common.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: linux-arch@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-8-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/asm-generic/mman-common.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 1ea2c4c33b86..ef1c27fa3c57 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -85,6 +85,7 @@ /* compatibility flags */ #define MAP_FILE 0 +#define PKEY_UNRESTRICTED 0x0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ -- cgit v1.2.3 From 74709981873d2baa5573735b1f24108206d0197e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:23 -0700 Subject: tools headers: Update the linux/unaligned.h copy with the kernel sources To pick up the changes in: 3846699217798061 ALSA: rawmidi: Make tied_device=0 as default / unknown 7bb49d2e8b52adac ALSA: rawmidi: Bump protocol version to 2.0.5 b8fefed73a952a33 ALSA: rawmidi: Show substream activity in info ioctl bdf46443f350dd5d ALSA: rawmidi: Expose the tied device number in info ioctl Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/sound/asound.h include/uapi/sound/asound.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: linux-sound@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-9-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/uapi/sound/asound.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 4cd513215bcd..5a049eeaecce 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -716,7 +716,7 @@ enum { * Raw MIDI section - /dev/snd/midi?? */ -#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4) +#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 5) enum { SNDRV_RAWMIDI_STREAM_OUTPUT = 0, @@ -728,6 +728,9 @@ enum { #define SNDRV_RAWMIDI_INFO_INPUT 0x00000002 #define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004 #define SNDRV_RAWMIDI_INFO_UMP 0x00000008 +#define SNDRV_RAWMIDI_INFO_STREAM_INACTIVE 0x00000010 + +#define SNDRV_RAWMIDI_DEVICE_UNKNOWN 0 struct snd_rawmidi_info { unsigned int device; /* RO/WR (control): device number */ @@ -740,7 +743,8 @@ struct snd_rawmidi_info { unsigned char subname[32]; /* name of active or selected subdevice */ unsigned int subdevices_count; unsigned int subdevices_avail; - unsigned char reserved[64]; /* reserved for future use */ + int tied_device; /* R: tied rawmidi device (UMP/legacy) */ + unsigned char reserved[60]; /* reserved for future use */ }; #define SNDRV_RAWMIDI_MODE_FRAMING_MASK (7<<0) -- cgit v1.2.3 From 847f1403d3ee51278dfbece84ec7f199de43daa5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:24 -0700 Subject: tools headers: Update the x86 headers with the kernel sources To pick up the changes in: 841326332bcb13ae x86/cpufeatures: Generate the header based on build config 440a65b7d25fb06f x86/mm: Enable AMD translation cache extensions 767ae437a32d6447 x86/mm: Add INVLPGB feature and Kconfig entry b4cc466b97359011 cpufreq/amd-pstate: Replace all AMD_CPPC_* macros with masks 98c7a713db91c5a9 x86/bugs: Add X86_BUG_SPECTRE_V2_USER 8f64eee70cdd3bb8 x86/bugs: Remove X86_FEATURE_USE_IBPB 8442df2b49ed9bcd x86/bugs: KVM: Add support for SRSO_MSR_FIX 70792aed14551e31 x86/cpufeatures: Add CPUID feature bit for Idle HLT intercept 968e9bc4cef87054 x86: move ZMM exclusion list into CPU feature flag c631a2de7ae48d50 perf/x86/intel: Ensure LBRs are disabled when a CPU is starting 38cc6495cdec18a4 x86/sev: Prevent GUEST_TSC_FREQ MSR interception for Secure TSC enabled guests 288bba2f4c8be1e1 x86/cpufeatures: Remove "AMD" from the comments to the AMD-specific leaf 877818802c3e970f x86/bugs: Add SRSO_USER_KERNEL_NO support 8ae3291f773befee x86/sev: Add full support for a segmented RMP table 0cbc0258415814c8 x86/sev: Add support for the RMPREAD instruction 7a470e826d7521be x86/cpufeatures: Free up unused feature bits Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: x86@kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-10-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/x86/include/asm/cpufeatures.h | 28 ++++++++++++++++++++-------- tools/arch/x86/include/asm/msr-index.h | 31 +++++++++++++++++++------------ 2 files changed, 39 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 9e3fa7942e7d..6c2c152d8a67 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -75,8 +75,8 @@ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ #define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ #define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +/* Free ( 3*32+ 6) */ +/* Free ( 3*32+ 7) */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ #define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ #define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ @@ -329,6 +329,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */ #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ @@ -377,6 +378,7 @@ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ #define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ +#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ @@ -434,15 +436,18 @@ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" Secure Encrypted Virtualization */ #define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ +#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ +#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ +#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ @@ -455,6 +460,11 @@ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ +#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */ +#define X86_FEATURE_SRSO_BP_SPEC_REDUCE (20*32+31) /* + * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs. + * (SRSO_MSR_FIX in the official doc). + */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -470,6 +480,7 @@ #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ /* * BUG word(s) @@ -521,4 +532,5 @@ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index dc1c1057f26e..e6134ef2263d 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -397,7 +397,8 @@ #define MSR_IA32_PASID_VALID BIT_ULL(31) /* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */ +#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT) #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) @@ -610,6 +611,7 @@ #define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_AMD_PERF_STATUS 0xc0010063 #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 @@ -646,6 +648,7 @@ #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ #define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 @@ -684,11 +687,12 @@ #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) #define MSR_AMD64_SNP_RESV_BIT 18 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) - -#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f - #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_AMD64_RMP_CFG 0xc0010136 +#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0 +#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT) +#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8) #define MSR_SVSM_CAA 0xc001f000 @@ -699,15 +703,17 @@ #define MSR_AMD_CPPC_REQ 0xc00102b3 #define MSR_AMD_CPPC_STATUS 0xc00102b4 -#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff) -#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) -#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) -#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) +/* Masks for use with MSR_AMD_CPPC_CAP1 */ +#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24) -#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) -#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) -#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) -#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) +/* Masks for use with MSR_AMD_CPPC_REQ */ +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) /* AMD Performance Counter Global Status and Control MSRs */ #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 @@ -719,6 +725,7 @@ /* Zen4 */ #define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 /* Fam 19h MSRs */ -- cgit v1.2.3 From 7f56978e5876521eaa90fda0e63630fa64f69bce Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:25 -0700 Subject: tools headers: Update the arch/x86/lib/memset_64.S copy with the kernel sources To pick up the changes in: 2981557cb0408e14 x86,kcfi: Fix EXPORT_SYMBOL vs kCFI That required adding a copy of include/linux/cfi_types.h and its checking in tools/perf/check-headers.h. Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: x86@kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-11-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/x86/lib/memset_64.S | 3 ++- tools/include/linux/cfi_types.h | 45 +++++++++++++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tools/include/linux/cfi_types.h (limited to 'tools') diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 0199d56cb479..d66b710d628f 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -28,7 +29,7 @@ * only for the return value that is the same as the source input, * which the compiler could/should do much better anyway. */ -SYM_FUNC_START(__memset) +SYM_TYPED_FUNC_START(__memset) ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS movq %rdi,%r9 diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h new file mode 100644 index 000000000000..6b8713675765 --- /dev/null +++ b/tools/include/linux/cfi_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include + +#ifdef CONFIG_CFI_CLANG +/* + * Use the __kcfi_typeid_ type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI_CLANG */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI_CLANG */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index a4499e5a6f9c..857f6646cc23 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -20,6 +20,7 @@ FILES=( "include/uapi/linux/stat.h" "include/linux/bits.h" "include/vdso/bits.h" + "include/linux/cfi_types.h" "include/linux/const.h" "include/vdso/const.h" "include/vdso/unaligned.h" -- cgit v1.2.3 From 1ddb9ad2ac6e527f220d5821ad54d37d3f9d122a Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 10 Apr 2025 10:00:23 -0700 Subject: selftests/bpf: Make res_spin_lock AA test condition stronger Let's make sure that we see a EDEADLK and ETIMEDOUT whenever checking for the AA tests (in case of simple AA and AA after exhausting 31 entries). Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250410170023.2670683-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/res_spin_lock.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock.c b/tools/testing/selftests/bpf/progs/res_spin_lock.c index b33385dfbd35..22c4fb8b9266 100644 --- a/tools/testing/selftests/bpf/progs/res_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/res_spin_lock.c @@ -38,13 +38,14 @@ int res_spin_lock_test(struct __sk_buff *ctx) r = bpf_res_spin_lock(&elem1->lock); if (r) return r; - if (!bpf_res_spin_lock(&elem2->lock)) { + r = bpf_res_spin_lock(&elem2->lock); + if (!r) { bpf_res_spin_unlock(&elem2->lock); bpf_res_spin_unlock(&elem1->lock); return -1; } bpf_res_spin_unlock(&elem1->lock); - return 0; + return r != -EDEADLK; } SEC("tc") @@ -124,12 +125,15 @@ int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx) /* Trigger AA, after exhausting entries in the held lock table. This * time, only the timeout can save us, as AA detection won't succeed. */ - if (!bpf_res_spin_lock(locks[34])) { + ret = bpf_res_spin_lock(locks[34]); + if (!ret) { bpf_res_spin_unlock(locks[34]); ret = 1; goto end; } + ret = ret != -ETIMEDOUT ? 2 : 0; + end: for (i = i - 1; i >= 0; i--) bpf_res_spin_unlock(locks[i]); -- cgit v1.2.3 From 87cb582d2f55d379ce95b5bcc4ec596e29b0a65e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 9 Apr 2025 15:49:36 -0700 Subject: objtool: Fix false-positive "ignoring unreachables" warning There's no need to try to automatically disable unreachable warnings if they've already been manually disabled due to CONFIG_KCOV quirks. This avoids a spurious warning with a KCOV kernel: fs/smb/client/cifs_unicode.o: warning: objtool: cifsConvertToUTF16.part.0+0xce5: ignoring unreachables due to jump table quirk Fixes: eeff7ac61526 ("objtool: Warn when disabling unreachable warnings") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/5eb28eeb6a724b7d945a961cfdcf8d41e6edf3dc.1744238814.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/r/202504090910.QkvTAR36-lkp@intel.com/ --- tools/objtool/arch/x86/special.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 403e587676f1..06ca4a2659a4 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -126,7 +126,7 @@ struct reloc *arch_find_switch_table(struct objtool_file *file, * indicates a rare GCC quirk/bug which can leave dead * code behind. */ - if (reloc_type(text_reloc) == R_X86_64_PC32) { + if (!file->ignore_unreachables && reloc_type(text_reloc) == R_X86_64_PC32) { WARN_INSN(insn, "ignoring unreachables due to jump table quirk"); file->ignore_unreachables = true; } -- cgit v1.2.3 From 6afd0a3c7ecb5049d75801a3efda0ada70483bd0 Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 9 Apr 2025 09:31:53 -0700 Subject: io_uring/zcrx: enable tcp-data-split in selftest For bnxt when the agg ring is used then tcp-data-split is automatically reported to be enabled, but __net_mp_open_rxq() requires tcp-data-split to be explicitly enabled by the user. Enable tcp-data-split explicitly in io_uring zc rx selftest. Signed-off-by: David Wei Link: https://patch.msgid.link/20250409163153.2747918-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/iou-zcrx.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 9f271ab6ec04..6a0378e06cab 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -35,6 +35,7 @@ def test_zcrx(cfg) -> None: rx_ring = _get_rx_ring_entries(cfg) try: + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) @@ -48,6 +49,7 @@ def test_zcrx(cfg) -> None: ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) ethtool(f"-X {cfg.ifname} default", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) def test_zcrx_oneshot(cfg) -> None: @@ -59,6 +61,7 @@ def test_zcrx_oneshot(cfg) -> None: rx_ring = _get_rx_ring_entries(cfg) try: + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) @@ -72,6 +75,7 @@ def test_zcrx_oneshot(cfg) -> None: ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) ethtool(f"-X {cfg.ifname} default", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) def main() -> None: -- cgit v1.2.3 From 1293dacbbd43ab9848ac4655f6f2ba1dcc5a96ad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Apr 2025 10:35:31 -0300 Subject: perf libunwind arm64: Fix missing close parens in an if statement While testing building with libunwind (using LIBUNWIND=1) in various arches I noticed a problem on arm64, on an rpi5 system, a missing close parens in a change related to dso__data_get_fd() usage, fix it. Fixes: 5ac22c35aa8519f1 ("perf dso: Use lock annotations to fix asan deadlock") Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/Z_Z3o8KvB2i5c6ab@x1 Signed-off-by: Namhyung Kim --- tools/perf/util/unwind-libunwind-local.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 9fb2c1343c7f..0b037e7389a0 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -371,7 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, * has to be pointed by symsrc_filename */ if (ofs == 0) { - if (dso__data_get_fd(dso, machine, &fd) { + if (dso__data_get_fd(dso, machine, &fd)) { ofs = elf_section_offset(fd, ".debug_frame"); dso__data_put_fd(dso); } -- cgit v1.2.3 From e4a0f9e0cacd93094b619616426a273e0bc9107e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 10 Apr 2025 19:17:22 +0200 Subject: selftests/landlock: Factor out audit fixture in audit_test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The audit fixture needlessly stores and manages domain_stack. Move it to the audit.layers tests. This will be useful to reuse the audit fixture with the next patch. Cc: Günther Noack Link: https://lore.kernel.org/r/20250410171725.1265860-2-mic@digikod.net Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/audit_test.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index a0643070c403..815c0f03e1fb 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -40,7 +40,6 @@ FIXTURE(audit) { struct audit_filter audit_filter; int audit_fd; - __u64(*domain_stack)[16]; }; FIXTURE_SETUP(audit) @@ -60,18 +59,10 @@ FIXTURE_SETUP(audit) TH_LOG("Failed to initialize audit: %s", error_msg); } clear_cap(_metadata, CAP_AUDIT_CONTROL); - - self->domain_stack = mmap(NULL, sizeof(*self->domain_stack), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, -1, 0); - ASSERT_NE(MAP_FAILED, self->domain_stack); - memset(self->domain_stack, 0, sizeof(*self->domain_stack)); } FIXTURE_TEARDOWN(audit) { - EXPECT_EQ(0, munmap(self->domain_stack, sizeof(*self->domain_stack))); - set_cap(_metadata, CAP_AUDIT_CONTROL); EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter)); clear_cap(_metadata, CAP_AUDIT_CONTROL); @@ -83,9 +74,15 @@ TEST_F(audit, layers) .scoped = LANDLOCK_SCOPE_SIGNAL, }; int status, ruleset_fd, i; + __u64(*domain_stack)[16]; __u64 prev_dom = 3; pid_t child; + domain_stack = mmap(NULL, sizeof(*domain_stack), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, domain_stack); + memset(domain_stack, 0, sizeof(*domain_stack)); + ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd); @@ -94,7 +91,7 @@ TEST_F(audit, layers) child = fork(); ASSERT_LE(0, child); if (child == 0) { - for (i = 0; i < ARRAY_SIZE(*self->domain_stack); i++) { + for (i = 0; i < ARRAY_SIZE(*domain_stack); i++) { __u64 denial_dom = 1; __u64 allocated_dom = 2; @@ -115,7 +112,7 @@ TEST_F(audit, layers) /* Checks that the new domain is younger than the previous one. */ EXPECT_GT(allocated_dom, prev_dom); prev_dom = allocated_dom; - (*self->domain_stack)[i] = allocated_dom; + (*domain_stack)[i] = allocated_dom; } /* Checks that we reached the maximum number of layers. */ @@ -142,20 +139,20 @@ TEST_F(audit, layers) /* Purges log from deallocated domains. */ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); - for (i = ARRAY_SIZE(*self->domain_stack) - 1; i >= 0; i--) { + for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) { __u64 deallocated_dom = 2; EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, &deallocated_dom)); - EXPECT_EQ((*self->domain_stack)[i], deallocated_dom) + EXPECT_EQ((*domain_stack)[i], deallocated_dom) { TH_LOG("Failed to match domain %llx (#%d)", - (*self->domain_stack)[i], i); + (*domain_stack)[i], i); } } + EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack))); EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default, sizeof(audit_tv_default))); - EXPECT_EQ(0, close(ruleset_fd)); } -- cgit v1.2.3 From 6b4566400a2919e6c1137404c53d7cf1ada559aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 10 Apr 2025 19:17:23 +0200 Subject: selftests/landlock: Add PID tests for audit records MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add audit.thread tests to check that the PID tied to a domain is not a thread ID but the thread group ID. These new tests would not pass without the previous TGID fix. Extend matches_log_domain_allocated() to check against the PID that created the domain. Test coverage for security/landlock is 93.6% of 1524 lines according to gcc/gcov-14. Cc: Christian Brauner Cc: Günther Noack Cc: Paul Moore Link: https://lore.kernel.org/r/20250410171725.1265860-3-mic@digikod.net Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/audit.h | 21 +++-- tools/testing/selftests/landlock/audit_test.c | 127 +++++++++++++++++++++++++- tools/testing/selftests/landlock/fs_test.c | 3 +- 3 files changed, 141 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index b9054086a0c9..18a6014920b5 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -300,15 +300,22 @@ out: return err; } -static int __maybe_unused matches_log_domain_allocated(int audit_fd, +static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid, __u64 *domain_id) { - return audit_match_record( - audit_fd, AUDIT_LANDLOCK_DOMAIN, - REGEX_LANDLOCK_PREFIX - " status=allocated mode=enforcing pid=[0-9]\\+ uid=[0-9]\\+" - " exe=\"[^\"]\\+\" comm=\".*_test\"$", - domain_id); + static const char log_template[] = REGEX_LANDLOCK_PREFIX + " status=allocated mode=enforcing pid=%d uid=[0-9]\\+" + " exe=\"[^\"]\\+\" comm=\".*_test\"$"; + char log_match[sizeof(log_template) + 10]; + int log_match_len; + + log_match_len = + snprintf(log_match, sizeof(log_match), log_template, pid); + if (log_match_len > sizeof(log_match)) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, + domain_id); } static int __maybe_unused matches_log_domain_deallocated( diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index 815c0f03e1fb..cfc571afd0eb 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -104,7 +105,8 @@ TEST_F(audit, layers) matches_log_signal(_metadata, self->audit_fd, getppid(), &denial_dom)); EXPECT_EQ(0, matches_log_domain_allocated( - self->audit_fd, &allocated_dom)); + self->audit_fd, getpid(), + &allocated_dom)); EXPECT_NE(denial_dom, 1); EXPECT_NE(denial_dom, 0); EXPECT_EQ(denial_dom, allocated_dom); @@ -156,6 +158,126 @@ TEST_F(audit, layers) EXPECT_EQ(0, close(ruleset_fd)); } +struct thread_data { + pid_t parent_pid; + int ruleset_fd, pipe_child, pipe_parent; +}; + +static void *thread_audit_test(void *arg) +{ + const struct thread_data *data = (struct thread_data *)arg; + uintptr_t err = 0; + char buffer; + + /* TGID and TID are different for a second thread. */ + if (getpid() == gettid()) { + err = 1; + goto out; + } + + if (landlock_restrict_self(data->ruleset_fd, 0)) { + err = 2; + goto out; + } + + if (close(data->ruleset_fd)) { + err = 3; + goto out; + } + + /* Creates a denial to get the domain ID. */ + if (kill(data->parent_pid, 0) != -1) { + err = 4; + goto out; + } + + if (EPERM != errno) { + err = 5; + goto out; + } + + /* Signals the parent to read denial logs. */ + if (write(data->pipe_child, ".", 1) != 1) { + err = 6; + goto out; + } + + /* Waits for the parent to update audit filters. */ + if (read(data->pipe_parent, &buffer, 1) != 1) { + err = 7; + goto out; + } + +out: + close(data->pipe_child); + close(data->pipe_parent); + return (void *)err; +} + +/* Checks that the PID tied to a domain is not a TID but the TGID. */ +TEST_F(audit, thread) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .scoped = LANDLOCK_SCOPE_SIGNAL, + }; + __u64 denial_dom = 1; + __u64 allocated_dom = 2; + __u64 deallocated_dom = 3; + pthread_t thread; + int pipe_child[2], pipe_parent[2]; + char buffer; + struct thread_data child_data; + + child_data.parent_pid = getppid(); + ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); + child_data.pipe_child = pipe_child[1]; + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + child_data.pipe_parent = pipe_parent[0]; + child_data.ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, child_data.ruleset_fd); + + /* TGID and TID are the same for the initial thread . */ + EXPECT_EQ(getpid(), gettid()); + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, pthread_create(&thread, NULL, thread_audit_test, + &child_data)); + + /* Waits for the child to generate a denial. */ + ASSERT_EQ(1, read(pipe_child[0], &buffer, 1)); + EXPECT_EQ(0, close(pipe_child[0])); + + /* Matches the signal log to get the domain ID. */ + EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, + child_data.parent_pid, &denial_dom)); + EXPECT_NE(denial_dom, 1); + EXPECT_NE(denial_dom, 0); + + EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, getpid(), + &allocated_dom)); + EXPECT_EQ(denial_dom, allocated_dom); + + /* Updates filter rules to match the drop record. */ + set_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE)); + EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter, + AUDIT_DEL_RULE)); + clear_cap(_metadata, CAP_AUDIT_CONTROL); + + /* Signals the thread to exit, which will generate a domain deallocation. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + EXPECT_EQ(0, close(pipe_parent[1])); + ASSERT_EQ(0, pthread_join(thread, NULL)); + + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); + EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, + &deallocated_dom)); + EXPECT_EQ(denial_dom, deallocated_dom); + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_default, sizeof(audit_tv_default))); +} + FIXTURE(audit_flags) { struct audit_filter audit_filter; @@ -270,7 +392,8 @@ TEST_F(audit_flags, signal) /* Checks domain information records. */ EXPECT_EQ(0, matches_log_domain_allocated( - self->audit_fd, &allocated_dom)); + self->audit_fd, getpid(), + &allocated_dom)); EXPECT_NE(*self->domain_id, 1); EXPECT_NE(*self->domain_id, 0); EXPECT_EQ(*self->domain_id, allocated_dom); diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index f819011a8798..73729382d40f 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -5964,7 +5964,8 @@ TEST_F(audit_layout1, refer_handled) EXPECT_EQ(EXDEV, errno); EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", dir_s1d1)); - EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, NULL)); + EXPECT_EQ(0, + matches_log_domain_allocated(self->audit_fd, getpid(), NULL)); EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", dir_s1d3)); -- cgit v1.2.3 From 2b70702917337a8d6d07f03eed961e0119091647 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 18:02:52 -0700 Subject: perf tools: Remove evsel__handle_error_quirks() The evsel__handle_error_quirks() is to fixup invalid event attributes on some architecture based on the error code. Currently it's only used for AMD to disable precise_ip not to use IBS which has more restrictions. But the commit c33aea446bf555ab changed call evsel__precise_ip_fallback for any errors so there's no difference with the above function. To make matter worse, it caused a problem with branch stack on Zen3. The IBS doesn't support branch stack so it should use a regular core PMU event. The default event is set precise_max and it starts with 3. And evsel__precise_ip_fallback() tries with it and reduces the level one by one. At last it tries with 0 but it also failed on Zen3 since the branch stack is not supported for the cycles event. At this point, evsel__precise_ip_fallback() restores the original precise_ip value (3) in the hope that it can succeed with other modifier (like exclude_kernel). Then evsel__handle_error_quirks() see it has precise_ip != 0 and make it retry with 0. This created an infinite loop. Before: $ perf record -b -vv |& grep removing removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD ... After: $ perf record -b true Error: Failure to open event 'cycles:P' on PMU 'cpu' which will be removed. Invalid event (cycles:P) in per-thread mode, enable system wide with '-a'. Error: Failure to open any events for recording. Fixes: c33aea446bf555ab ("perf tools: Fix precise_ip fallback logic") Tested-by: Chun-Tse Shao Cc: Ravi Bangoria Link: https://lore.kernel.org/r/20250410010252.402221-1-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/evsel.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1974395492d7..3c030da2e477 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2566,25 +2566,6 @@ check: return false; } -static bool evsel__handle_error_quirks(struct evsel *evsel, int error) -{ - /* - * AMD core PMU tries to forward events with precise_ip to IBS PMU - * implicitly. But IBS PMU has more restrictions so it can fail with - * supported event attributes. Let's forward it back to the core PMU - * by clearing precise_ip only if it's from precise_max (:P). - */ - if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() && - evsel->core.attr.precise_ip && evsel->precise_max) { - evsel->core.attr.precise_ip = 0; - pr_debug2_peo("removing precise_ip on AMD\n"); - display_attr(&evsel->core.attr); - return true; - } - - return false; -} - static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads, int start_cpu_map_idx, int end_cpu_map_idx) @@ -2730,9 +2711,6 @@ try_fallback: if (evsel__precise_ip_fallback(evsel)) goto retry_open; - if (evsel__handle_error_quirks(evsel, err)) - goto retry_open; - out_close: if (err) threads->err_thread = thread; -- cgit v1.2.3 From a1fc89d409d8fd927622c238b7c7d719e9ecab3d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Apr 2025 11:15:51 -0400 Subject: tracing/selftest: Add test to better test subops filtering of function graph A bug was discovered that showed the accounting of the subops of the ftrace_ops filtering was incorrect. Add a new test to better test the filtering. This test creates two instances, where it will add various filters to both the set_ftrace_filter and the set_ftrace_notrace files and enable function_graph. Then it looks into the enabled_functions file to make sure that the filters are behaving correctly. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Shuah Khan Cc: Andy Chiu Link: https://lore.kernel.org/20250409152720.380778379@goodmis.org Signed-off-by: Steven Rostedt (Google) --- .../ftrace/test.d/ftrace/fgraph-multi-filter.tc | 177 +++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc new file mode 100644 index 000000000000..b6d6a312ead5 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc @@ -0,0 +1,177 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function graph filters +# requires: set_ftrace_filter function_graph:tracer + +# Make sure that function graph filtering works + +INSTANCE1="instances/test1_$$" +INSTANCE2="instances/test2_$$" + +WD=`pwd` + +do_reset() { + cd $WD + if [ -d $INSTANCE1 ]; then + echo nop > $INSTANCE1/current_tracer + rmdir $INSTANCE1 + fi + if [ -d $INSTANCE2 ]; then + echo nop > $INSTANCE2/current_tracer + rmdir $INSTANCE2 + fi +} + +mkdir $INSTANCE1 +if ! grep -q function_graph $INSTANCE1/available_tracers; then + echo "function_graph not allowed with instances" + rmdir $INSTANCE1 + exit_unsupported +fi + +mkdir $INSTANCE2 + +fail() { # msg + do_reset + echo $1 + exit_fail +} + +disable_tracing +clear_trace + +function_count() { + search=$1 + vsearch=$2 + + if [ -z "$search" ]; then + cat enabled_functions | wc -l + elif [ -z "$vsearch" ]; then + grep $search enabled_functions | wc -l + else + grep $search enabled_functions | grep $vsearch| wc -l + fi +} + +set_fgraph() { + instance=$1 + filter="$2" + notrace="$3" + + echo "$filter" > $instance/set_ftrace_filter + echo "$notrace" > $instance/set_ftrace_notrace + echo function_graph > $instance/current_tracer +} + +check_functions() { + orig_cnt=$1 + test=$2 + + cnt=`function_count $test` + if [ $cnt -gt $orig_cnt ]; then + fail + fi +} + +check_cnt() { + orig_cnt=$1 + search=$2 + vsearch=$3 + + cnt=`function_count $search $vsearch` + if [ $cnt -gt $orig_cnt ]; then + fail + fi +} + +reset_graph() { + instance=$1 + echo nop > $instance/current_tracer +} + +# get any functions that were enabled before the test +total_cnt=`function_count` +sched_cnt=`function_count sched` +lock_cnt=`function_count lock` +time_cnt=`function_count time` +clock_cnt=`function_count clock` +locks_clock_cnt=`function_count locks clock` +clock_locks_cnt=`function_count clock locks` + +# Trace functions with "sched" but not "time" +set_fgraph $INSTANCE1 '*sched*' '*time*' + +# Make sure "time" isn't listed +check_functions $time_cnt 'time' +instance1_cnt=`function_count` + +# Trace functions with "lock" but not "clock" +set_fgraph $INSTANCE2 '*lock*' '*clock*' +instance1_2_cnt=`function_count` + +# Turn off the first instance +reset_graph $INSTANCE1 + +# The second instance doesn't trace "clock" functions +check_functions $clock_cnt 'clock' +instance2_cnt=`function_count` + +# Start from a clean slate +reset_graph $INSTANCE2 +check_functions $total_cnt + +# Trace functions with "lock" but not "clock" +set_fgraph $INSTANCE2 '*lock*' '*clock*' + +# This should match the last time instance 2 was by itself +cnt=`function_count` +if [ $instance2_cnt -ne $cnt ]; then + fail +fi + +# And it should not be tracing "clock" functions +check_functions $clock_cnt 'clock' + +# Trace functions with "sched" but not "time" +set_fgraph $INSTANCE1 '*sched*' '*time*' + +# This should match the last time both instances were enabled +cnt=`function_count` +if [ $instance1_2_cnt -ne $cnt ]; then + fail +fi + +# Turn off the second instance +reset_graph $INSTANCE2 + +# This should match the last time instance 1 was by itself +cnt=`function_count` +if [ $instance1_cnt -ne $cnt ]; then + fail +fi + +# And it should not be tracing "time" functions +check_functions $time_cnt 'time' + +# Start from a clean slate +reset_graph $INSTANCE1 +check_functions $total_cnt + +# Enable all functions but those that have "locks" +set_fgraph $INSTANCE1 '' '*locks*' + +# Enable all functions but those that have "clock" +set_fgraph $INSTANCE2 '' '*clock*' + +# If a function has "locks" it should not have "clock" +check_cnt $locks_clock_cnt locks clock + +# If a function has "clock" it should not have "locks" +check_cnt $clock_locks_cnt clock locks + +reset_graph $INSTANCE1 +reset_graph $INSTANCE2 + +do_reset + +exit 0 -- cgit v1.2.3 From 18c889a9a419dc1662548777b7122d980bccfdad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 10 Apr 2025 12:43:21 +0200 Subject: selftests/tc-testing: Add test for echo of big TC filters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a selftest that checks whether the kernel can successfully echo a big tc filter, to test the fix introduced in commit: 369609fc6272 ("tc: Ensure we have enough buffer space when sending filter netlink notifications") Signed-off-by: Toke Høiland-Jørgensen Tested-by: Victor Nogueira Link: https://patch.msgid.link/20250410104322.214620-1-toke@redhat.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/actions.json | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json index 1ba96c467754..d9fc62ab476c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json @@ -412,5 +412,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY ingress" ] + }, + { + "id": "33f4", + "name": "Check echo of big filter command", + "category": [ + "infra", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY parent root handle 10: fq_codel" + ], + "cmdUnderTest": "bash -c '$TC -echo filter add dev $DUMMY parent 10: u32 match u32 0 0 $(for i in $(seq 32); do echo action pedit munge ip dport set 22; done) | grep \"added filter\"'", + "verifyCmd": "", + "expExitCode": "0", + "matchCount": "0", + "matchPattern": "", + "teardown": [ + "$TC qdisc del dev $DUMMY parent root fq_codel" + ] } ] -- cgit v1.2.3 From be8254f694469e60252e35c467ac9a878d7797bf Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Fri, 21 Mar 2025 20:24:33 +0000 Subject: radix-tree: add missing cleanup.h Add shared cleanup.h header for radix-tree testing tools. Fixes build error found with kdevops [1]: cc -I../shared -I. -I../../include -I../../../lib -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined -c -o radix-tree.o radix-tree.c In file included from ../shared/linux/idr.h:1, from radix-tree.c:18: ../shared/linux/../../../../include/linux/idr.h:18:10: fatal error: linux/cleanup.h: No such file or directory 18 | #include | ^~~~~~~~~~~~~~~~~ compilation terminated. make: *** [: radix-tree.o] Error 1 [1] https://github.com/linux-kdevops/kdevops https://github.com/linux-kdevops/linux-mm-kpd/ actions/runs/13971648496/job/39114756401 [akpm@linux-foundation.org: remove unneeded header guards, per Sidhartha] Link: https://lkml.kernel.org/r/20250321-fix-radix-tree-build-v1-1-838a1e6540e2@samsung.com Fixes: 6c8b0b835f00 ("perf/core: Simplify perf_pmu_register()") Signed-off-by: Daniel Gomez Cc: Daniel Gomez Cc: Ingo Molnar Cc: Liam Howlett Cc: Luis Chamberalin Cc: Matthew Wilcox (Oracle) Cc: Ravi Bangoria Cc: Peter Zijlstra Cc: Sidhartha Kumar Signed-off-by: Andrew Morton --- tools/testing/shared/linux/cleanup.h | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tools/testing/shared/linux/cleanup.h (limited to 'tools') diff --git a/tools/testing/shared/linux/cleanup.h b/tools/testing/shared/linux/cleanup.h new file mode 100644 index 000000000000..ea3081426ee9 --- /dev/null +++ b/tools/testing/shared/linux/cleanup.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../../../../include/linux/cleanup.h" -- cgit v1.2.3 From a30951d09c33c899f0e4aca80eb87fad5f10ecfa Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 20:33:11 -0400 Subject: test suite: use %zu to print size_t On 32-bit, we can't use %lu to print a size_t variable and gcc warns us about it. Shame it doesn't warn about it on 64-bit. Link: https://lkml.kernel.org/r/20250403003311.359917-1-Liam.Howlett@oracle.com Fixes: cc86e0c2f306 ("radix tree test suite: add support for slab bulk APIs") Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- tools/testing/shared/linux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 66dbb362385f..0f97fb0d19e1 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -150,7 +150,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list) { if (kmalloc_verbose) - pr_debug("Bulk free %p[0-%lu]\n", list, size - 1); + pr_debug("Bulk free %p[0-%zu]\n", list, size - 1); pthread_mutex_lock(&cachep->lock); for (int i = 0; i < size; i++) @@ -168,7 +168,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, size_t i; if (kmalloc_verbose) - pr_debug("Bulk alloc %lu\n", size); + pr_debug("Bulk alloc %zu\n", size); pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs >= size) { -- cgit v1.2.3 From 9c02223e2d9df5cb37c51aedb78f3960294e09b5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 4 Apr 2025 17:42:32 +0100 Subject: selftests/mm: generate a temporary mountpoint for cgroup filesystem Currently if the filesystem for the cgroups version it wants to use is not mounted charge_reserved_hugetlb.sh and hugetlb_reparenting_test.sh tests will attempt to mount it on the hard coded path /dev/cgroup/memory, deleting that directory when the test finishes. This will fail if there is not a preexisting directory at that path, and since the directory is deleted subsequent runs of the test will fail. Instead of relying on this hard coded directory name use mktemp to generate a temporary directory to use as a mountpoint, fixing both the assumption and the disruption caused by deleting a preexisting directory. This means that if the relevant cgroup filesystem is not already mounted then we rely on having coreutils (which provides mktemp) installed. I suspect that many current users are relying on having things automounted by default, and given that the script relies on bash it's probably not an unreasonable requirement. Link: https://lkml.kernel.org/r/20250404-kselftest-mm-cgroup2-detection-v1-1-3dba6d32ba8c@kernel.org Fixes: 209376ed2a84 ("selftests/vm: make charge_reserved_hugetlb.sh work with existing cgroup setting") Signed-off-by: Mark Brown Cc: Aishwarya TCV Cc: Mark Brown Cc: Mina Almasry Cc: Shuah Khan Cc: Waiman Long Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/charge_reserved_hugetlb.sh | 4 ++-- tools/testing/selftests/mm/hugetlb_reparenting_test.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 67df7b47087f..e1fe16bcbbe8 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -29,7 +29,7 @@ fi if [[ $cgroup2 ]]; then cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup2 none $cgroup_path do_umount=1 fi @@ -37,7 +37,7 @@ if [[ $cgroup2 ]]; then else cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup memory,hugetlb $cgroup_path do_umount=1 fi diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index 11f9bbe7dc22..0b0d4ba1af27 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -23,7 +23,7 @@ fi if [[ $cgroup2 ]]; then CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then - CGROUP_ROOT=/dev/cgroup/memory + CGROUP_ROOT=$(mktemp -d) mount -t cgroup2 none $CGROUP_ROOT do_umount=1 fi -- cgit v1.2.3 From 8c583e538aa681ecb293d5606054de70f44b5558 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 7 Apr 2025 19:31:35 +0800 Subject: selftests: mincore: fix tmpfs mincore test failure When running mincore test cases, I encountered the following failures: " mincore_selftest.c:359:check_tmpfs_mmap:Expected ra_pages (511) == 0 (0) mincore_selftest.c:360:check_tmpfs_mmap:Read-ahead pages found in memory check_tmpfs_mmap: Test terminated by assertion FAIL global.check_tmpfs_mmap not ok 5 global.check_tmpfs_mmap FAILED: 4 / 5 tests passed " The reason for the test case failure is that my system automatically enabled tmpfs large folio allocation by adding the 'transparent_hugepage_tmpfs=always' cmdline. However, the test case still expects the tmpfs mounted on /dev/shm to allocate small folios, which leads to assertion failures when verifying readahead pages. As discussed with David, there's no reason to continue checking the readahead logic for tmpfs. Drop it to fix this issue. Link: https://lkml.kernel.org/r/9a00856cc6a8b4e46f4ab8b1af11ce5fc1a31851.1744025467.git.baolin.wang@linux.alibaba.com Fixes: d635ccdb435c ("mm: shmem: add a kernel command line to change the default huge policy for tmpfs") Signed-off-by: Baolin Wang Acked-by: Zi Yan Acked-by: David Hildenbrand Cc: Barry Song <21cnbao@gmail.com> Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Ryan Roberts Signed-off-by: Andrew Morton --- tools/testing/selftests/mincore/mincore_selftest.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index e949a43a6145..0fd4b00bd345 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -286,8 +286,7 @@ out_free: /* * Test mincore() behavior on a page backed by a tmpfs file. This test - * performs the same steps as the previous one. However, we don't expect - * any readahead in this case. + * performs the same steps as the previous one. */ TEST(check_tmpfs_mmap) { @@ -298,7 +297,6 @@ TEST(check_tmpfs_mmap) int page_size; int fd; int i; - int ra_pages = 0; page_size = sysconf(_SC_PAGESIZE); vec_size = FILE_SIZE / page_size; @@ -341,8 +339,7 @@ TEST(check_tmpfs_mmap) } /* - * Touch a page in the middle of the mapping. We expect only - * that page to be fetched into memory. + * Touch a page in the middle of the mapping. */ addr[FILE_SIZE / 2] = 1; retval = mincore(addr, FILE_SIZE, vec); @@ -351,15 +348,6 @@ TEST(check_tmpfs_mmap) TH_LOG("Page not found in memory after use"); } - i = FILE_SIZE / 2 / page_size + 1; - while (i < vec_size && vec[i]) { - ra_pages++; - i++; - } - ASSERT_EQ(ra_pages, 0) { - TH_LOG("Read-ahead pages found in memory"); - } - munmap(addr, FILE_SIZE); close(fd); free(vec); -- cgit v1.2.3 From 92868577d05ff75f9f38c6345ed275203827faba Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 9 Apr 2025 15:20:06 +0530 Subject: selftests/mm: fix compiler -Wmaybe-uninitialized warning Following build warning comes up for cow test as 'transferred' variable has not been initialized. Fix the warning via zero init for the variable. CC cow cow.c: In function `do_test_vmsplice_in_parent': cow.c:365:61: warning: `transferred' may be used uninitialized [-Wmaybe-uninitialized] 365 | cur = read(fds[0], new + total, transferred - total); | ~~~~~~~~~~~~^~~~~~~ cow.c:296:29: note: `transferred' was declared here 296 | ssize_t cur, total, transferred; | ^~~~~~~~~~~ CC compaction_test CC gup_longterm Link: https://lkml.kernel.org/r/20250409095006.1422620-1-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Cc: Shuah Khan Cc: Anshuman Khandual Cc: David Hildenbrand Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index f0cb14ea8608..b6cfe0a4b7df 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -293,7 +293,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, .iov_base = mem, .iov_len = size, }; - ssize_t cur, total, transferred; + ssize_t cur, total, transferred = 0; struct comm_pipes comm_pipes; char *old, *new; int ret, fds[2]; -- cgit v1.2.3 From 1aa495a6572f8641da4ec4cd32210deca61bed64 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 11 Apr 2025 13:36:06 +0100 Subject: kunit: configs: Add some Cirrus Logic modules to all_tests Add CONFIG_I2C and CONFIG_SND_SOC_CS35L56_I2C to all_tests.config so that Cirrus Logic modules with KUnit tests will be built. The CS35L56 driver doesn't currently have any KUnit tests itself, but it enables two other libraries that have KUnit tests: cs_dsp and cs-amp-lib. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250411123608.1676462-2-rf@opensource.cirrus.com Reviewed-by: David Gow Signed-off-by: Mark Brown --- tools/testing/kunit/configs/all_tests.config | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index cdd9782f9646..43d3c31ab53f 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -20,6 +20,7 @@ CONFIG_VFAT_FS=y CONFIG_PCI=y CONFIG_USB4=y +CONFIG_I2C=y CONFIG_NET=y CONFIG_MCTP=y @@ -51,3 +52,4 @@ CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SOC=y CONFIG_SND_SOC_TOPOLOGY_BUILD=y +CONFIG_SND_SOC_CS35L56_I2C=y -- cgit v1.2.3 From a3cd5f507b72c0532c3345b6913557efab34f405 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 13 Apr 2025 02:23:38 +0200 Subject: objtool/rust: add one more `noreturn` Rust function for Rust 1.86.0 Starting with Rust 1.86.0 (see upstream commit b151b513ba2b ("Insert null checks for pointer dereferences when debug assertions are enabled") [1]), under some kernel configurations with `CONFIG_RUST_DEBUG_ASSERTIONS=y`, one may trigger a new `objtool` warning: rust/kernel.o: warning: objtool: _R..._6kernel9workqueue6system() falls through to next function _R...9workqueue14system_highpri() due to a call to the `noreturn` symbol: core::panicking::panic_null_pointer_dereference Thus add it to the list so that `objtool` knows it is actually `noreturn`. See commit 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions") for more details. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Fixes: 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions") Link: https://github.com/rust-lang/rust/commit/b151b513ba2b65c7506ec1a80f2712bbd09154d1 [1] Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250413002338.1741593-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4a1f6c3169b3..67006eeb30c8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -225,6 +225,7 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking14panic_nounwind") || str_ends_with(func->name, "_4core9panicking18panic_bounds_check") || str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || + str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || strstr(func->name, "_4core9panicking13assert_failed") || strstr(func->name, "_4core9panicking11panic_const24panic_const_") || -- cgit v1.2.3 From b26c1a85f3fc3cc749380ff94199377fc2d0c203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 7 Apr 2025 10:58:03 +0200 Subject: kunit: qemu_configs: SH: Respect kunit cmdline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default SH kunit configuration sets CONFIG_CMDLINE_OVERWRITE which completely disregards the cmdline passed from the bootloader/QEMU in favor of the builtin CONFIG_CMDLINE. However the kunit tool needs to pass arguments to the in-kernel kunit core, for filters and other runtime parameters. Enable CONFIG_CMDLINE_EXTEND instead, so kunit arguments are respected. Link: https://lore.kernel.org/r/20250407-kunit-sh-v1-1-f5432a54cf2f@linutronix.de Fixes: 8110a3cab05e ("kunit: tool: Add support for SH under QEMU") Signed-off-by: Thomas Weißschuh Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/qemu_configs/sh.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/kunit/qemu_configs/sh.py b/tools/testing/kunit/qemu_configs/sh.py index 78a474a5b95f..f00cb89fdef6 100644 --- a/tools/testing/kunit/qemu_configs/sh.py +++ b/tools/testing/kunit/qemu_configs/sh.py @@ -7,7 +7,9 @@ CONFIG_CPU_SUBTYPE_SH7751R=y CONFIG_MEMORY_START=0x0c000000 CONFIG_SH_RTS7751R2D=y CONFIG_RTS7751R2D_PLUS=y -CONFIG_SERIAL_SH_SCI=y''', +CONFIG_SERIAL_SH_SCI=y +CONFIG_CMDLINE_EXTEND=y +''', qemu_arch='sh4', kernel_path='arch/sh/boot/zImage', kernel_command_line='console=ttySC1', -- cgit v1.2.3 From 3f2925174f8bd811f9399cb4049f6b75fd2fba91 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 14 Apr 2025 16:35:00 +0200 Subject: lib/prime_numbers: KUnit test should not select PRIME_NUMBERS Enabling a (modular) test should not silently enable additional kernel functionality, as that may increase the attack vector of a product. Fix this by making PRIME_NUMBERS_KUNIT_TEST depend on PRIME_NUMBERS instead of selecting it. After this, one can safely enable CONFIG_KUNIT_ALL_TESTS=m to build modules for all appropriate tests for ones system, without pulling in extra unwanted functionality, while still allowing a tester to manually enable PRIME_NUMBERS and this test suite on a system where PRIME_NUMBERS is not enabled by default. Resurrect CONFIG_PRIME_NUMBERS=m in tools/testing/selftests/lib/config for the latter use case. Fixes: 313b38a6ecb46db4 ("lib/prime_numbers: convert self-test to KUnit") Signed-off-by: Geert Uytterhoeven Acked-by: Tamir Duberstein Link: https://lore.kernel.org/r/40f8a40eef4930d3ac9febd205bc171eb04e171c.1744641237.git.geert@linux-m68k.org Signed-off-by: Kees Cook --- tools/testing/selftests/lib/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config index 81a1f64a22e8..377b3699ff31 100644 --- a/tools/testing/selftests/lib/config +++ b/tools/testing/selftests/lib/config @@ -1,2 +1,3 @@ CONFIG_TEST_BITMAP=m +CONFIG_PRIME_NUMBERS=m CONFIG_TEST_BITOPS=m -- cgit v1.2.3 From f9c87590ed6ab78b69042ea31b7b8e37302d53f3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 14 Apr 2025 20:20:22 +0300 Subject: selftests: fib_rule_tests: Add VRF match tests Add tests for FIB rules that match on iif / oif being a VRF device. Test both good and bad flows. With previous patch ("net: fib_rules: Fix iif / oif matching on L3 master device"): # ./fib_rule_tests.sh [...] Tests passed: 328 Tests failed: 0 Without it: # ./fib_rule_tests.sh [...] Tests passed: 324 Tests failed: 4 Signed-off-by: Ido Schimmel Acked-by: David Ahern Link: https://patch.msgid.link/20250414172022.242991-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_rule_tests.sh | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index b866bab1d92a..c7cea556b416 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -359,6 +359,23 @@ fib_rule6_test() "$getnomatch" "iif flowlabel masked redirect to table" \ "iif flowlabel masked no redirect to table" fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP6 iif vrf0" + getmatch="from $SRC_IP6 iif $DEV" + getnomatch="from $SRC_IP6 iif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi } fib_rule6_vrf_test() @@ -635,6 +652,23 @@ fib_rule4_test() "$getnomatch" "iif dscp masked redirect to table" \ "iif dscp masked no redirect to table" fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP iif vrf0" + getmatch="from $SRC_IP iif $DEV" + getnomatch="from $SRC_IP iif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi } fib_rule4_vrf_test() -- cgit v1.2.3 From 07be53cfa81afe94b14fb4bfee8243f2e0125d5e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 14 Apr 2025 21:09:00 -0400 Subject: selftests/ftrace: Differentiate bash and dash in dynevent_limitations.tc bash and dash evaluate variables differently. dash will evaluate '\\' every time it is read whereas bash does not. TEST_STRING="$TEST_STRING \\$i" echo $TEST_STRING With i=123 On bash, that will print "\123" but on dash, that will print the escape sequence of \123 as the \ will be interpreted again in the echo. The dynevent_limitations.tc test created a very large list of arguments to test the maximum number of arguments to pass to the dynamic events file. It had a loop of: TEST_STRING=$1 # Acceptable for i in `seq 1 $MAX_ARGS`; do TEST_STRING="$TEST_STRING \\$i" done echo "$TEST_STRING" >> dynamic_events This worked fine on bash, but when run on dash it failed. This was due to dash interpreting the "\\$i" twice. Once when it was assigned to TEST_STRING and a second time with the echo $TEST_STRING. bash does not process the backslash more than the first time. To solve this, assign a double backslash to a variable "bs" and then echo it to "ts". If "ts" changes, it is dash, if not, it is bash. Then update "bs" accordingly, and use that to assign TEST_STRING. Now this could possibly just check if "$BASH" is defined or not, but this is testing if the issue exists and not just which shell is being used. Link: https://lore.kernel.org/r/20250414210900.4de5e8b9@gandalf.local.home Fixes: 581a7b26ab364 ("selftests/ftrace: Add dynamic events argument limitation test case") Reported-by: Mark Brown Closes: https://lore.kernel.org/all/350786cc-9e40-4396-ab95-4f10d69122fb@sirena.org.uk/ Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Shuah Khan --- .../ftrace/test.d/dynevent/dynevent_limitations.tc | 23 +++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc index 6b94b678741a..f656bccb1a14 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc @@ -7,11 +7,32 @@ MAX_ARGS=128 EXCEED_ARGS=$((MAX_ARGS + 1)) +# bash and dash evaluate variables differently. +# dash will evaluate '\\' every time it is read whereas bash does not. +# +# TEST_STRING="$TEST_STRING \\$i" +# echo $TEST_STRING +# +# With i=123 +# On bash, that will print "\123" +# but on dash, that will print the escape sequence of \123 as the \ will +# be interpreted again in the echo. +# +# Set a variable "bs" to save a double backslash, then echo that +# to "ts" to see if $ts changed or not. If it changed, it's dash, +# if not, it's bash, and then bs can equal a single backslash. +bs='\\' +ts=`echo $bs` +if [ "$ts" = '\\' ]; then + # this is bash + bs='\' +fi + check_max_args() { # event_header TEST_STRING=$1 # Acceptable for i in `seq 1 $MAX_ARGS`; do - TEST_STRING="$TEST_STRING \\$i" + TEST_STRING="$TEST_STRING $bs$i" done echo "$TEST_STRING" >> dynamic_events echo > dynamic_events -- cgit v1.2.3 From 4d07bbf2d45683841e578a2f255e4c174534bf38 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:44 -0700 Subject: tools: ynl-gen: don't declare loop iterator in place The codegen tries to follow the "old" C style and declare loop iterators at the start of the block / function. Only nested request handling breaks this style, so adjust it. Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index a1427c537030..305f5696bc4f 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -654,10 +654,10 @@ class TypeMultiAttr(Type): def attr_put(self, ri, var): if self.attr['type'] in scalars: put_type = self.type - ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)") ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);") elif 'type' not in self.attr or self.attr['type'] == 'nest': - ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)") self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " + f"{self.enum_name}, &{var}->{self.c_name}[i])") else: @@ -1644,11 +1644,23 @@ def put_req_nested_prototype(ri, struct, suffix=';'): def put_req_nested(ri, struct): + local_vars = [] + init_lines = [] + + local_vars.append('struct nlattr *nest;') + init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);") + + for _, arg in struct.member_list(): + if arg.presence_type() == 'count': + local_vars.append('unsigned int i;') + break + put_req_nested_prototype(ri, struct, suffix='') ri.cw.block_start() - ri.cw.write_func_lvar('struct nlattr *nest;') + ri.cw.write_func_lvar(local_vars) - ri.cw.p("nest = ynl_attr_nest_start(nlh, attr_type);") + for line in init_lines: + ri.cw.p(line) for _, arg in struct.member_list(): arg.attr_put(ri, "obj") @@ -1850,6 +1862,11 @@ def print_req(ri): local_vars += ['size_t hdr_len;', 'void *hdr;'] + for _, attr in ri.struct["request"].member_list(): + if attr.presence_type() == 'count': + local_vars += ['unsigned int i;'] + break + print_prototype(ri, direction, terminate=False) ri.cw.block_start() ri.cw.write_func_lvar(local_vars) -- cgit v1.2.3 From dfa464b4a603984d648a9beb9bce72df5858c1e2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:45 -0700 Subject: tools: ynl-gen: move local vars after the opening bracket The "function writing helper" tries to put local variables between prototype and the opening bracket. Clearly wrong, but up until now nothing actually uses it to write local vars so it wasn't noticed. Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 305f5696bc4f..662a925bd9e1 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -1399,9 +1399,9 @@ class CodeWriter: def write_func(self, qual_ret, name, body, args=None, local_vars=None): self.write_func_prot(qual_ret=qual_ret, name=name, args=args) + self.block_start() self.write_func_lvar(local_vars=local_vars) - self.block_start() for line in body: self.p(line) self.block_end() -- cgit v1.2.3 From ce6cb8113c842b94e77364b247c4f85c7b34e0c2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:46 -0700 Subject: tools: ynl-gen: individually free previous values on double set When user calls request_attrA_set() multiple times (for the same attribute), and attrA is of type which allocates memory - we try to free the previously associated values. For array types (including multi-attr) we have only freed the array, but the array may have contained pointers. Refactor the code generation for free attr and reuse the generated lines in setters to flush out the previous state. Since setters are static inlines in the header we need to add forward declarations for the free helpers of pure nested structs. Track which types get used by arrays and include the right forwad declarations. At least ethtool string set and bit set would not be freed without this. Tho, admittedly, overriding already set attribute twice is likely a very very rare thing to do. Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 62 +++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 662a925bd9e1..2d856ccc88f4 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -162,9 +162,15 @@ class Type(SpecAttr): def free_needs_iter(self): return False - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): if self.is_multi_val() or self.presence_type() == 'len': - ri.cw.p(f'free({var}->{ref}{self.c_name});') + return [f'free({var}->{ref}{self.c_name});'] + return [] + + def free(self, ri, var, ref): + lines = self._free_lines(ri, var, ref) + for line in lines: + ri.cw.p(line) def arg_member(self, ri): member = self._complex_member_type(ri) @@ -263,6 +269,10 @@ class Type(SpecAttr): var = "req" member = f"{var}->{'.'.join(ref)}" + local_vars = [] + if self.free_needs_iter(): + local_vars += ['unsigned int i;'] + code = [] presence = '' for i in range(0, len(ref)): @@ -272,6 +282,10 @@ class Type(SpecAttr): if i == len(ref) - 1 and self.presence_type() != 'bit': continue code.append(presence + ' = 1;') + ref_path = '.'.join(ref[:-1]) + if ref_path: + ref_path += '.' + code += self._free_lines(ri, var, ref_path) code += self._setter_lines(ri, member, presence) func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}" @@ -279,7 +293,8 @@ class Type(SpecAttr): alloc = bool([x for x in code if 'alloc(' in x]) if free and not alloc: func_name = '__' + func_name - ri.cw.write_func('static inline void', func_name, body=code, + ri.cw.write_func('static inline void', func_name, local_vars=local_vars, + body=code, args=[f'{type_name(ri, direction, deref=deref)} *{var}'] + self.arg_member(ri)) @@ -482,8 +497,7 @@ class TypeString(Type): ['unsigned int len;'] def _setter_lines(self, ri, member, presence): - return [f"free({member});", - f"{presence}_len = strlen({self.c_name});", + return [f"{presence}_len = strlen({self.c_name});", f"{member} = malloc({presence}_len + 1);", f'memcpy({member}, {self.c_name}, {presence}_len);', f'{member}[{presence}_len] = 0;'] @@ -536,8 +550,7 @@ class TypeBinary(Type): ['unsigned int len;'] def _setter_lines(self, ri, member, presence): - return [f"free({member});", - f"{presence}_len = len;", + return [f"{presence}_len = len;", f"{member} = malloc({presence}_len);", f'memcpy({member}, {self.c_name}, {presence}_len);'] @@ -574,12 +587,14 @@ class TypeNest(Type): def _complex_member_type(self, ri): return self.nested_struct_type - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): + lines = [] at = '&' if self.is_recursive_for_op(ri): at = '' - ri.cw.p(f'if ({var}->{ref}{self.c_name})') - ri.cw.p(f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});') + lines += [f'if ({var}->{ref}{self.c_name})'] + lines += [f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});'] + return lines def _attr_typol(self): return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' @@ -632,15 +647,19 @@ class TypeMultiAttr(Type): def free_needs_iter(self): return 'type' not in self.attr or self.attr['type'] == 'nest' - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): + lines = [] if self.attr['type'] in scalars: - ri.cw.p(f"free({var}->{ref}{self.c_name});") + lines += [f"free({var}->{ref}{self.c_name});"] elif 'type' not in self.attr or self.attr['type'] == 'nest': - ri.cw.p(f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)") - ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);') - ri.cw.p(f"free({var}->{ref}{self.c_name});") + lines += [ + f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)", + f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);', + f"free({var}->{ref}{self.c_name});", + ] else: raise Exception(f"Free of MultiAttr sub-type {self.attr['type']} not supported yet") + return lines def _attr_policy(self, policy): return self.base_type._attr_policy(policy) @@ -666,8 +685,7 @@ class TypeMultiAttr(Type): def _setter_lines(self, ri, member, presence): # For multi-attr we have a count, not presence, hack up the presence presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name - return [f"free({member});", - f"{member} = {self.c_name};", + return [f"{member} = {self.c_name};", f"{presence} = n_{self.c_name};"] @@ -755,6 +773,7 @@ class Struct: self.request = False self.reply = False self.recursive = False + self.in_multi_val = False # used by a MultiAttr or and legacy arrays self.attr_list = [] self.attrs = dict() @@ -1122,6 +1141,10 @@ class Family(SpecFamily): if attr in rs_members['reply']: self.pure_nested_structs[nested].reply = True + if spec.is_multi_val(): + child = self.pure_nested_structs.get(nested) + child.in_multi_val = True + self._sort_pure_types() # Propagate the request / reply / recursive @@ -1136,6 +1159,8 @@ class Family(SpecFamily): struct.child_nests.update(child.child_nests) child.request |= struct.request child.reply |= struct.reply + if spec.is_multi_val(): + child.in_multi_val = True if attr_set in struct.child_nests: struct.recursive = True @@ -2958,6 +2983,9 @@ def main(): for attr_set, struct in parsed.pure_nested_structs.items(): ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set) print_type_full(ri, struct) + if struct.request and struct.in_multi_val: + free_rsp_nested_prototype(ri) + cw.nl() for op_name, op in parsed.ops.items(): cw.p(f"/* ============== {op.enum_name} ============== */") -- cgit v1.2.3 From 57e7dedf2b8c72caa6f04b9e08b19e4f370562fa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:47 -0700 Subject: tools: ynl-gen: make sure we validate subtype of array-nest ArrayNest AKA indexed-array support currently skips inner type validation. We count the attributes and then we parse them, make sure we call validate, too. Otherwise buggy / unexpected kernel response may lead to crashes. Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 2d856ccc88f4..30c0a34b2784 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -714,8 +714,11 @@ class TypeArrayNest(Type): def _attr_get(self, ri, var): local_vars = ['const struct nlattr *attr2;'] get_lines = [f'attr_{self.c_name} = attr;', - 'ynl_attr_for_each_nested(attr2, attr)', - f'\t{var}->n_{self.c_name}++;'] + 'ynl_attr_for_each_nested(attr2, attr) {', + '\tif (ynl_attr_validate(yarg, attr2))', + '\t\treturn YNL_PARSE_CB_ERROR;', + f'\t{var}->n_{self.c_name}++;', + '}'] return get_lines, None, local_vars -- cgit v1.2.3 From ec120093180b9d92b0c84cb89a205876f9a4cb40 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:17 +0800 Subject: selftests: ublk: fix ublk_find_tgt() Bounds check for iterator variable `i` is missed, so add it and fix ublk_find_tgt(). Cc: Johannes Thumshirn Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 3 +-- tools/testing/selftests/ublk/kublk.h | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 91c282bc7674..74cf70b2f28e 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -14,13 +14,12 @@ static const struct ublk_tgt_ops *tgt_ops_list[] = { static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) { - const struct ublk_tgt_ops *ops; int i; if (name == NULL) return NULL; - for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++) + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) if (strcmp(tgt_ops_list[i]->name, name) == 0) return tgt_ops_list[i]; return NULL; diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 760ff8ffb810..73294f6e3e49 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -30,6 +30,8 @@ #define min(a, b) ((a) < (b) ? (a) : (b)) #endif +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + /****************** part 1: libublk ********************/ #define CTRL_DEV "/dev/ublk-control" -- cgit v1.2.3 From 9cad26d66b7a6306fa1e3cf64e30941afdadf6c8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:18 +0800 Subject: selftests: ublk: add io_uring uapi header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add io_uring UAPI header so that ublk can work with latest uapi definition. Fix the following build failure: stripe.c: In function ‘stripe_to_uring_op’: stripe.c:120:29: error: ‘IORING_OP_READV_FIXED’ undeclared (first use in this function); did you mean ‘IORING_OP_READ_FIXED’? 120 | return zc ? IORING_OP_READV_FIXED : IORING_OP_READV; | ^~~~~~~~~~~~~~~~~~~~~ | IORING_OP_READ_FIXED Reviewed-by: Johannes Thumshirn Fixes: 57ed58c13256 ("selftests: ublk: enable zero copy for stripe target") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 73294f6e3e49..eccf12360a14 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include "ublk_dep.h" -- cgit v1.2.3 From 8d31a7e505340a69528cbccb0894ef530f123cbb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:19 +0800 Subject: selftests: ublk: cleanup backfile automatically Use global array of $UBLK_BACKFILES for storing all backfile name, then clean them automatically. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_common.sh | 36 +++++++++++++++++--------- tools/testing/selftests/ublk/test_loop_01.sh | 8 +++--- tools/testing/selftests/ublk/test_loop_02.sh | 8 +++--- tools/testing/selftests/ublk/test_loop_03.sh | 8 +++--- tools/testing/selftests/ublk/test_loop_04.sh | 9 +++---- tools/testing/selftests/ublk/test_loop_05.sh | 8 +++--- tools/testing/selftests/ublk/test_stress_01.sh | 16 +++++------- tools/testing/selftests/ublk/test_stress_02.sh | 16 +++++------- tools/testing/selftests/ublk/test_stripe_01.sh | 12 +++------ tools/testing/selftests/ublk/test_stripe_02.sh | 13 ++++------ tools/testing/selftests/ublk/test_stripe_03.sh | 12 +++------ tools/testing/selftests/ublk/test_stripe_04.sh | 13 ++++------ 12 files changed, 70 insertions(+), 89 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index a88b35943227..c7d04da7235a 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -30,18 +30,26 @@ _run_fio_verify_io() { } _create_backfile() { - local my_size=$1 - local my_file + local index=$1 + local new_size=$2 + local old_file + local new_file - my_file=$(mktemp ublk_file_"${my_size}"_XXXXX) - truncate -s "${my_size}" "${my_file}" - echo "$my_file" + old_file="${UBLK_BACKFILES[$index]}" + [ -f "$old_file" ] && rm -f "$old_file" + + new_file=$(mktemp ublk_file_"${new_size}"_XXXXX) + truncate -s "${new_size}" "${new_file}" + UBLK_BACKFILES["$index"]="$new_file" } -_remove_backfile() { - local file=$1 +_remove_files() { + local file - [ -f "$file" ] && rm -f "$file" + for file in "${UBLK_BACKFILES[@]}"; do + [ -f "$file" ] && rm -f "$file" + done + [ -f "$UBLK_TMP" ] && rm -f "$UBLK_TMP" } _create_tmp_dir() { @@ -129,7 +137,10 @@ _show_result() echo "$1 : [FAIL]" fi fi - [ "$2" -ne 0 ] && exit "$2" + if [ "$2" -ne 0 ]; then + _remove_files + exit "$2" + fi return 0 } @@ -138,16 +149,16 @@ _check_add_dev() { local tid=$1 local code=$2 - shift 2 + if [ "${code}" -ne 0 ]; then - _remove_test_files "$@" _show_result "${tid}" "${code}" fi } _cleanup_test() { "${UBLK_PROG}" del -a - rm -f "$UBLK_TMP" + + _remove_files } _have_feature() @@ -247,6 +258,7 @@ UBLK_TMP=$(mktemp ublk_test_XXXXX) UBLK_PROG=$(_ublk_test_top_dir)/kublk UBLK_TEST_QUIET=1 UBLK_TEST_SHOW_RESULT=1 +UBLK_BACKFILES=() export UBLK_PROG export UBLK_TEST_QUIET export UBLK_TEST_SHOW_RESULT diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh index 1ef8b6044777..833fa0dbc700 100755 --- a/tools/testing/selftests/ublk/test_loop_01.sh +++ b/tools/testing/selftests/ublk/test_loop_01.sh @@ -12,10 +12,10 @@ fi _prep_test "loop" "write and verify test" -backfile_0=$(_create_backfile 256M) +_create_backfile 0 256M -dev_id=$(_add_ublk_dev -t loop "$backfile_0") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M @@ -23,6 +23,4 @@ ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_02.sh b/tools/testing/selftests/ublk/test_loop_02.sh index 03863d825e07..874568b3646b 100755 --- a/tools/testing/selftests/ublk/test_loop_02.sh +++ b/tools/testing/selftests/ublk/test_loop_02.sh @@ -8,15 +8,13 @@ ERR_CODE=0 _prep_test "loop" "mkfs & mount & umount" -backfile_0=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t loop "$backfile_0") -_check_add_dev $TID $? "$backfile_0" +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh index e9ca744de8b1..c30f797c6429 100755 --- a/tools/testing/selftests/ublk/test_loop_03.sh +++ b/tools/testing/selftests/ublk/test_loop_03.sh @@ -12,9 +12,9 @@ fi _prep_test "loop" "write and verify over zero copy" -backfile_0=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") -_check_add_dev $TID $? "$backfile_0" +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M @@ -22,6 +22,4 @@ ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_04.sh b/tools/testing/selftests/ublk/test_loop_04.sh index 1435422c38ec..b01d75b3214d 100755 --- a/tools/testing/selftests/ublk/test_loop_04.sh +++ b/tools/testing/selftests/ublk/test_loop_04.sh @@ -8,15 +8,14 @@ ERR_CODE=0 _prep_test "loop" "mkfs & mount & umount with zero copy" -backfile_0=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") -_check_add_dev $TID $? "$backfile_0" +_create_backfile 0 256M + +dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh index 2e6e2e6978fc..de2141533074 100755 --- a/tools/testing/selftests/ublk/test_loop_05.sh +++ b/tools/testing/selftests/ublk/test_loop_05.sh @@ -12,10 +12,10 @@ fi _prep_test "loop" "write and verify test" -backfile_0=$(_create_backfile 256M) +_create_backfile 0 256M -dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -q 2 -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M @@ -23,6 +23,4 @@ ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh index a8be24532b24..4c37a2cf13a3 100755 --- a/tools/testing/selftests/ublk/test_stress_01.sh +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -10,17 +10,13 @@ ublk_io_and_remove() { local size=$1 shift 1 - local backfile="" - if echo "$@" | grep -q "loop"; then - backfile=${*: -1} - fi + DEV_ID=$(_add_ublk_dev "$@") - _check_add_dev $TID $? "${backfile}" + _check_add_dev $TID $? [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then echo "/dev/ublkc${DEV_ID} isn't removed" - _remove_backfile "${backfile}" exit 255 fi } @@ -33,15 +29,15 @@ if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -BACK_FILE=$(_create_backfile 256M) -ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}" +_create_backfile 0 256M + +ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}" +ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" ERR_CODE=$? _cleanup_test "stress" -_remove_backfile "${BACK_FILE}" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 2159e4cc8140..4b6ad441d500 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -10,17 +10,13 @@ ublk_io_and_kill_daemon() { local size=$1 shift 1 - local backfile="" - if echo "$@" | grep -q "loop"; then - backfile=${*: -1} - fi + DEV_ID=$(_add_ublk_dev "$@") - _check_add_dev $TID $? "${backfile}" + _check_add_dev $TID $? [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then echo "/dev/ublkc${DEV_ID} isn't removed res ${res}" - _remove_backfile "${backfile}" exit 255 fi } @@ -33,15 +29,15 @@ if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -BACK_FILE=$(_create_backfile 256M) -ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}" +_create_backfile 0 256M + +ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}" +ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" ERR_CODE=$? _cleanup_test "stress" -_remove_backfile "${BACK_FILE}" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh index 7e387ef656ea..4e4f0fdf3c9b 100755 --- a/tools/testing/selftests/ublk/test_stripe_01.sh +++ b/tools/testing/selftests/ublk/test_stripe_01.sh @@ -12,19 +12,15 @@ fi _prep_test "stripe" "write and verify test" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) +_create_backfile 0 256M +_create_backfile 1 256M -dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_02.sh b/tools/testing/selftests/ublk/test_stripe_02.sh index e8a45fa82dde..5820ab2efba4 100755 --- a/tools/testing/selftests/ublk/test_stripe_02.sh +++ b/tools/testing/selftests/ublk/test_stripe_02.sh @@ -8,17 +8,14 @@ ERR_CODE=0 _prep_test "stripe" "mkfs & mount & umount" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "$backfile_0" "$backfile_1" +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh index c1b34af36145..20b977e27814 100755 --- a/tools/testing/selftests/ublk/test_stripe_03.sh +++ b/tools/testing/selftests/ublk/test_stripe_03.sh @@ -12,19 +12,15 @@ fi _prep_test "stripe" "write and verify test" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) +_create_backfile 0 256M +_create_backfile 1 256M -dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -q 2 -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh index 1f2b642381d1..1b51ed2f1d84 100755 --- a/tools/testing/selftests/ublk/test_stripe_04.sh +++ b/tools/testing/selftests/ublk/test_stripe_04.sh @@ -8,17 +8,14 @@ ERR_CODE=0 _prep_test "stripe" "mkfs & mount & umount on zero copy" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t stripe -z -q 2 "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "$backfile_0" "$backfile_1" +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe -z -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE -- cgit v1.2.3 From 573840ab90ad5bfc8711f0252cf88db028ad473e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:20 +0800 Subject: selftests: ublk: make sure _add_ublk_dev can return in sub-shell Detach ublk daemon from the starting process completely by double-fork and clearing its process group, so that `_add_ublk_dev` can return from sub-shell. Then it is more friendly for writing shell test script for adding/recovering ublk device. Prepare for running ublk test in parallel. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 30 ++++++++++++++++++++------ tools/testing/selftests/ublk/test_common.sh | 15 ++++++------- tools/testing/selftests/ublk/test_stress_01.sh | 8 +++---- tools/testing/selftests/ublk/test_stress_02.sh | 8 +++---- 4 files changed, 39 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 74cf70b2f28e..381e31acaad9 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -654,6 +654,8 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) if (write(evtfd, &id, sizeof(id)) != sizeof(id)) return -EINVAL; + close(evtfd); + return 0; } @@ -889,24 +891,40 @@ static int cmd_dev_add(struct dev_ctx *ctx) exit(-1); } - setsid(); res = fork(); if (res == 0) { + int res2; + + setsid(); + res2 = fork(); + if (res2 == 0) { + /* prepare for detaching */ + close(STDIN_FILENO); + close(STDOUT_FILENO); + close(STDERR_FILENO); run: - res = __cmd_dev_add(ctx); - return res; + res = __cmd_dev_add(ctx); + return res; + } else { + /* detached from the foreground task */ + exit(EXIT_SUCCESS); + } } else if (res > 0) { uint64_t id; + int exit_code = EXIT_FAILURE; res = read(ctx->_evtfd, &id, sizeof(id)); close(ctx->_evtfd); if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) { ctx->dev_id = id - 1; - return __cmd_dev_list(ctx); + if (__cmd_dev_list(ctx) >= 0) + exit_code = EXIT_SUCCESS; } - exit(EXIT_FAILURE); + /* wait for child and detach from it */ + wait(NULL); + exit(exit_code); } else { - return res; + exit(EXIT_FAILURE); } } diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index c7d04da7235a..c43bd1d5c9c0 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -170,7 +170,6 @@ _have_feature() } _add_ublk_dev() { - local kublk_temp; local dev_id; if [ ! -c /dev/ublk-control ]; then @@ -182,17 +181,17 @@ _add_ublk_dev() { fi fi - kublk_temp=$(mktemp /tmp/kublk-XXXXXX) - if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then + if ! dev_id=$("${UBLK_PROG}" add "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then echo "fail to add ublk dev $*" - rm -f "${kublk_temp}" return 255 fi - - dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}') udevadm settle - rm -f "${kublk_temp}" - echo "${dev_id}" + + if [[ "$dev_id" =~ ^[0-9]+$ ]]; then + echo "${dev_id}" + else + return 255 + fi } # kill the ublk daemon and return ublk device state diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh index 4c37a2cf13a3..61fdbdfe70bc 100755 --- a/tools/testing/selftests/ublk/test_stress_01.sh +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -4,19 +4,19 @@ . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh TID="stress_01" ERR_CODE=0 -DEV_ID=-1 ublk_io_and_remove() { local size=$1 + local dev_id shift 1 - DEV_ID=$(_add_ublk_dev "$@") + dev_id=$(_add_ublk_dev "$@") _check_add_dev $TID $? [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" - if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then - echo "/dev/ublkc${DEV_ID} isn't removed" + if ! __run_io_and_remove "$dev_id" "${size}" "no"; then + echo "/dev/ublkc$dev_id isn't removed" exit 255 fi } diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 4b6ad441d500..7643e58637c8 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -4,19 +4,19 @@ . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh TID="stress_02" ERR_CODE=0 -DEV_ID=-1 ublk_io_and_kill_daemon() { local size=$1 + local dev_id shift 1 - DEV_ID=$(_add_ublk_dev "$@") + dev_id=$(_add_ublk_dev "$@") _check_add_dev $TID $? [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" - if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then - echo "/dev/ublkc${DEV_ID} isn't removed res ${res}" + if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then + echo "/dev/ublkc$dev_id isn't removed res ${res}" exit 255 fi } -- cgit v1.2.3 From bb2cabf23568d74407a3881e81f43777f490299b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:21 +0800 Subject: selftests: ublk: run stress tests in parallel Run stress tests in parallel, meantime add shell local function to simplify the two stress tests. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-6-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_common.sh | 34 +++++++++++++++++++++- tools/testing/selftests/ublk/test_stress_01.sh | 39 ++++++++++---------------- tools/testing/selftests/ublk/test_stress_02.sh | 39 ++++++++++---------------- 3 files changed, 63 insertions(+), 49 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index c43bd1d5c9c0..87fd0c824b77 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -230,7 +230,7 @@ __run_io_and_remove() local kill_server=$3 fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ - --rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \ + --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \ --runtime=20 --time_based > /dev/null 2>&1 & sleep 2 if [ "${kill_server}" = "yes" ]; then @@ -248,6 +248,38 @@ __run_io_and_remove() wait } +run_io_and_remove() +{ + local size=$1 + local dev_id + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" + if ! __run_io_and_remove "$dev_id" "${size}" "no"; then + echo "/dev/ublkc$dev_id isn't removed" + exit 255 + fi +} + +run_io_and_kill_daemon() +{ + local size=$1 + local dev_id + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" + if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then + echo "/dev/ublkc$dev_id isn't removed res ${res}" + exit 255 + fi +} + _ublk_test_top_dir() { cd "$(dirname "$0")" && pwd diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh index 61fdbdfe70bc..7d3150f057d4 100755 --- a/tools/testing/selftests/ublk/test_stress_01.sh +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -7,37 +7,28 @@ ERR_CODE=0 ublk_io_and_remove() { - local size=$1 - local dev_id - shift 1 - - dev_id=$(_add_ublk_dev "$@") - _check_add_dev $TID $? - - [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" - if ! __run_io_and_remove "$dev_id" "${size}" "no"; then - echo "/dev/ublkc$dev_id isn't removed" - exit 255 + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE fi } -_prep_test "stress" "run IO and remove device" - -ublk_io_and_remove 8G -t null -q 4 -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" fi +_prep_test "stress" "run IO and remove device" + _create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M -ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi +ublk_io_and_remove 8G -t null -q 4 & +ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait -ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" -ERR_CODE=$? _cleanup_test "stress" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 7643e58637c8..1a9065125ae1 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -5,39 +5,30 @@ TID="stress_02" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + ublk_io_and_kill_daemon() { - local size=$1 - local dev_id - shift 1 - - dev_id=$(_add_ublk_dev "$@") - _check_add_dev $TID $? - - [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" - if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then - echo "/dev/ublkc$dev_id isn't removed res ${res}" - exit 255 + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE fi } _prep_test "stress" "run IO and kill ublk server" -ublk_io_and_kill_daemon 8G -t null -q 4 -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi - _create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M -ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi +ublk_io_and_kill_daemon 8G -t null -q 4 & +ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait -ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" -ERR_CODE=$? _cleanup_test "stress" _show_result $TID $ERR_CODE -- cgit v1.2.3 From d836590d9a9e1d822667e2720ef0d5e69a566aef Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:22 +0800 Subject: selftests: ublk: add two stress tests for zero copy feature Add stress_03 & stress_04 for covering zero copy feature. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-7-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 2 ++ tools/testing/selftests/ublk/test_stress_03.sh | 38 ++++++++++++++++++++++++++ tools/testing/selftests/ublk/test_stress_04.sh | 37 +++++++++++++++++++++++++ 3 files changed, 77 insertions(+) create mode 100755 tools/testing/selftests/ublk/test_stress_03.sh create mode 100755 tools/testing/selftests/ublk/test_stress_04.sh (limited to 'tools') diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index c7781efea0f3..7311e8f6bee7 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -21,6 +21,8 @@ TEST_PROGS += test_stripe_04.sh TEST_PROGS += test_stress_01.sh TEST_PROGS += test_stress_02.sh +TEST_PROGS += test_stress_03.sh +TEST_PROGS += test_stress_04.sh TEST_GEN_PROGS_EXTENDED = kublk diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh new file mode 100755 index 000000000000..e0854f71d35b --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_03.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_03" +ERR_CODE=0 + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and remove device(zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_remove 8G -t null -q 4 -z & +ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "stress" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh new file mode 100755 index 000000000000..1798a98387e8 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_04" +ERR_CODE=0 + +ublk_io_and_kill_daemon() +{ + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and kill ublk server(zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_kill_daemon 8G -t null -q 4 -z & +ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "stress" +_show_result $TID $ERR_CODE -- cgit v1.2.3 From 62867a046a223e6eb771e23d2048e839c1d949d7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:23 +0800 Subject: selftests: ublk: setup ring with IORING_SETUP_SINGLE_ISSUER/IORING_SETUP_DEFER_TASKRUN It is observed that this way is more efficient for fast nvme backing file. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-8-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 381e31acaad9..c2acd874f9af 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -346,7 +346,9 @@ static int ublk_queue_init(struct ublk_queue *q) } ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth, - IORING_SETUP_COOP_TASKRUN); + IORING_SETUP_COOP_TASKRUN | + IORING_SETUP_SINGLE_ISSUER | + IORING_SETUP_DEFER_TASKRUN); if (ret < 0) { ublk_err("ublk dev %d queue %d setup io_uring failed %d\n", q->dev->dev_info.dev_id, q->q_id, ret); -- cgit v1.2.3 From 2f0a692a93a585ead9ccffd0642694946d74411f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:24 +0800 Subject: selftests: ublk: set queue pthread's cpu affinity In NUMA machine, ublk IO performance is very sensitive with queue pthread's affinity setting. Retrieve queue's affinity and select the 1st cpu as queue thread's sched affinity, and it is observed that single cpu task affinity can get stable & good performance if client application is put on proper cpu. Dump this info when adding one ublk device. Use shmem to communicate queue's tid between parent and daemon. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-9-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 156 +++++++++++++++++++++++++++++++++-- tools/testing/selftests/ublk/kublk.h | 11 ++- 2 files changed, 159 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index c2acd874f9af..1e21e1401a08 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -206,10 +206,73 @@ static const char *ublk_dev_state_desc(struct ublk_dev *dev) }; } +static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len) +{ + unsigned done = 0; + int i; + + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, set)) + done += snprintf(&buf[done], len - done, "%d ", i); + } +} + +static void ublk_adjust_affinity(cpu_set_t *set) +{ + int j, updated = 0; + + /* + * Just keep the 1st CPU now. + * + * In future, auto affinity selection can be tried. + */ + for (j = 0; j < CPU_SETSIZE; j++) { + if (CPU_ISSET(j, set)) { + if (!updated) { + updated = 1; + continue; + } + CPU_CLR(j, set); + } + } +} + +/* Caller must free the allocated buffer */ +static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY, + .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF, + }; + cpu_set_t *buf; + int i, ret; + + buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues); + if (!buf) + return -ENOMEM; + + for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) { + data.data[0] = i; + data.len = sizeof(cpu_set_t); + data.addr = (__u64)&buf[i]; + + ret = __ublk_ctrl_cmd(ctrl_dev, &data); + if (ret < 0) { + free(buf); + return ret; + } + ublk_adjust_affinity(&buf[i]); + } + + *ptr_buf = buf; + return 0; +} + static void ublk_ctrl_dump(struct ublk_dev *dev) { struct ublksrv_ctrl_dev_info *info = &dev->dev_info; struct ublk_params p; + cpu_set_t *affinity; int ret; ret = ublk_ctrl_get_params(dev, &p); @@ -218,12 +281,31 @@ static void ublk_ctrl_dump(struct ublk_dev *dev) return; } + ret = ublk_ctrl_get_affinity(dev, &affinity); + if (ret < 0) { + ublk_err("failed to get affinity %m\n"); + return; + } + ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", info->dev_id, info->nr_hw_queues, info->queue_depth, 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", info->max_io_buf_bytes, info->ublksrv_pid, info->flags, ublk_dev_state_desc(dev)); + + if (affinity) { + char buf[512]; + int i; + + for (i = 0; i < info->nr_hw_queues; i++) { + ublk_print_cpu_set(&affinity[i], buf, sizeof(buf)); + printf("\tqueue %u: tid %d affinity(%s)\n", + i, dev->q[i].tid, buf); + } + free(affinity); + } + fflush(stdout); } @@ -603,9 +685,24 @@ static int ublk_process_io(struct ublk_queue *q) return reapped; } +static void ublk_queue_set_sched_affinity(const struct ublk_queue *q, + cpu_set_t *cpuset) +{ + if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0) + ublk_err("ublk dev %u queue %u set affinity failed", + q->dev->dev_info.dev_id, q->q_id); +} + +struct ublk_queue_info { + struct ublk_queue *q; + sem_t *queue_sem; + cpu_set_t *affinity; +}; + static void *ublk_io_handler_fn(void *data) { - struct ublk_queue *q = data; + struct ublk_queue_info *info = data; + struct ublk_queue *q = info->q; int dev_id = q->dev->dev_info.dev_id; int ret; @@ -615,6 +712,10 @@ static void *ublk_io_handler_fn(void *data) dev_id, q->q_id); return NULL; } + /* IO perf is sensitive with queue pthread affinity on NUMA machine*/ + ublk_queue_set_sched_affinity(q, info->affinity); + sem_post(info->queue_sem); + ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n", q->tid, dev_id, q->q_id); @@ -640,7 +741,7 @@ static void ublk_set_parameters(struct ublk_dev *dev) dev->dev_info.dev_id, ret); } -static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) +static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id) { uint64_t id; int evtfd = ctx->_evtfd; @@ -653,10 +754,14 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) else id = ERROR_EVTFD_DEVID; + if (dev && ctx->shadow_dev) + memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q)); + if (write(evtfd, &id, sizeof(id)) != sizeof(id)) return -EINVAL; close(evtfd); + shmdt(ctx->shadow_dev); return 0; } @@ -664,24 +769,46 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) { - int ret, i; - void *thread_ret; const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; + struct ublk_queue_info *qinfo; + cpu_set_t *affinity_buf; + void *thread_ret; + sem_t queue_sem; + int ret, i; ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); + qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info), + dinfo->nr_hw_queues); + if (!qinfo) + return -ENOMEM; + + sem_init(&queue_sem, 0, 0); ret = ublk_dev_prep(ctx, dev); if (ret) return ret; + ret = ublk_ctrl_get_affinity(dev, &affinity_buf); + if (ret) + return ret; + for (i = 0; i < dinfo->nr_hw_queues; i++) { dev->q[i].dev = dev; dev->q[i].q_id = i; + + qinfo[i].q = &dev->q[i]; + qinfo[i].queue_sem = &queue_sem; + qinfo[i].affinity = &affinity_buf[i]; pthread_create(&dev->q[i].thread, NULL, ublk_io_handler_fn, - &dev->q[i]); + &qinfo[i]); } + for (i = 0; i < dinfo->nr_hw_queues; i++) + sem_wait(&queue_sem); + free(qinfo); + free(affinity_buf); + /* everything is fine now, start us */ ublk_set_parameters(dev); ret = ublk_ctrl_start_dev(dev, getpid()); @@ -694,7 +821,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) if (ctx->fg) ublk_ctrl_dump(dev); else - ublk_send_dev_event(ctx, dev->dev_info.dev_id); + ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); /* wait until we are terminated */ for (i = 0; i < dinfo->nr_hw_queues; i++) @@ -873,7 +1000,7 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) fail: if (ret < 0) - ublk_send_dev_event(ctx, -1); + ublk_send_dev_event(ctx, dev, -1); ublk_ctrl_deinit(dev); return ret; } @@ -887,6 +1014,16 @@ static int cmd_dev_add(struct dev_ctx *ctx) if (ctx->fg) goto run; + ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666); + if (ctx->_shmid < 0) { + ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno)); + exit(-1); + } + ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0); + if (ctx->shadow_dev == (struct ublk_dev *)-1) { + ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno)); + exit(-1); + } ctx->_evtfd = eventfd(0, 0); if (ctx->_evtfd < 0) { ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno)); @@ -922,6 +1059,8 @@ run: if (__cmd_dev_list(ctx) >= 0) exit_code = EXIT_SUCCESS; } + shmdt(ctx->shadow_dev); + shmctl(ctx->_shmid, IPC_RMID, NULL); /* wait for child and detach from it */ wait(NULL); exit(exit_code); @@ -988,6 +1127,9 @@ static int __cmd_dev_list(struct dev_ctx *ctx) ublk_err("%s: can't get dev info from %d: %d\n", __func__, ctx->dev_id, ret); } else { + if (ctx->shadow_dev) + memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q)); + ublk_ctrl_dump(dev); } diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index eccf12360a14..85295d3e36cb 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -20,10 +20,15 @@ #include #include #include +#include +#include #include #include -#include +#include + +/* allow ublk_dep.h to override ublk_cmd.h */ #include "ublk_dep.h" +#include #define __maybe_unused __attribute__((unused)) #define MAX_BACK_FILES 4 @@ -74,6 +79,10 @@ struct dev_ctx { unsigned int chunk_size; int _evtfd; + int _shmid; + + /* built from shmem, only for ublk_dump_dev() */ + struct ublk_dev *shadow_dev; }; struct ublk_ctrl_cmd_data { -- cgit v1.2.3 From 6c62fd04e8bfc06f37ccda0d12fd367591445954 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:25 +0800 Subject: selftests: ublk: increase max nr_queues and queue depth Increase max nr_queues to 32, and queue depth to 1024. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-10-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 2 +- tools/testing/selftests/ublk/kublk.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 1e21e1401a08..5e805d358739 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1203,7 +1203,7 @@ static int cmd_dev_get_features(void) static int cmd_dev_help(char *exe) { printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe); - printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n"); + printf("\t default: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); printf("%s del [-n dev_id] -a \n", exe); printf("\t -a delete all devices -n delete specified device\n"); printf("%s list [-n dev_id] -a \n", exe); diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 85295d3e36cb..9b77137b8700 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -50,8 +50,8 @@ #define UBLKSRV_IO_IDLE_SECS 20 #define UBLK_IO_MAX_BYTES (1 << 20) -#define UBLK_MAX_QUEUES 4 -#define UBLK_QUEUE_DEPTH 128 +#define UBLK_MAX_QUEUES 32 +#define UBLK_QUEUE_DEPTH 1024 #define UBLK_DBG_DEV (1U << 0) #define UBLK_DBG_QUEUE (1U << 1) -- cgit v1.2.3 From 810b88f3dcb6d04e274b37d05f421330e20a3714 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:26 +0800 Subject: selftests: ublk: support target specific command line Support target specific command line for making related command line code handling more readable & clean. Also helps for adding new features. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-11-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 59 ++++++++++++++++++++++++++++++----- tools/testing/selftests/ublk/kublk.h | 20 ++++++++++-- tools/testing/selftests/ublk/stripe.c | 28 ++++++++++++++++- 3 files changed, 95 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 5e805d358739..03b3d6427775 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -5,6 +5,8 @@ #include "kublk.h" +#define MAX_NR_TGT_ARG 64 + unsigned int ublk_dbg_mask = UBLK_LOG; static const struct ublk_tgt_ops *tgt_ops_list[] = { &null_tgt_ops, @@ -1202,12 +1204,25 @@ static int cmd_dev_get_features(void) static int cmd_dev_help(char *exe) { - printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe); - printf("\t default: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); + int i; + + printf("%s add -t [null|loop|stripe] [-q nr_queues] [-d depth] [-n dev_id]\n", exe); + printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask]\n"); + printf("\t[target options] [backfile1] [backfile2] ...\n"); + printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); + + for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { + const struct ublk_tgt_ops *ops = tgt_ops_list[i]; + + if (ops->usage) + ops->usage(ops); + } + printf("\n"); + printf("%s del [-n dev_id] -a \n", exe); - printf("\t -a delete all devices -n delete specified device\n"); + printf("\t -a delete all devices -n delete specified device\n\n"); printf("%s list [-n dev_id] -a \n", exe); - printf("\t -a list all devices, -n list specified device, default -a \n"); + printf("\t -a list all devices, -n list specified device, default -a \n\n"); printf("%s features\n", exe); return 0; } @@ -1224,9 +1239,9 @@ int main(int argc, char *argv[]) { "quiet", 0, NULL, 0 }, { "zero_copy", 0, NULL, 'z' }, { "foreground", 0, NULL, 0 }, - { "chunk_size", 1, NULL, 0 }, { 0, 0, 0, 0 } }; + const struct ublk_tgt_ops *ops = NULL; int option_idx, opt; const char *cmd = argv[1]; struct dev_ctx ctx = { @@ -1234,13 +1249,15 @@ int main(int argc, char *argv[]) .nr_hw_queues = 2, .dev_id = -1, .tgt_type = "unknown", - .chunk_size = 65536, /* def chunk size is 64K */ }; int ret = -EINVAL, i; + int tgt_argc = 1; + char *tgt_argv[MAX_NR_TGT_ARG] = { NULL }; if (argc == 1) return ret; + opterr = 0; optind = 2; while ((opt = getopt_long(argc, argv, "t:n:d:q:az", longopts, &option_idx)) != -1) { @@ -1271,8 +1288,26 @@ int main(int argc, char *argv[]) ublk_dbg_mask = 0; if (!strcmp(longopts[option_idx].name, "foreground")) ctx.fg = 1; - if (!strcmp(longopts[option_idx].name, "chunk_size")) - ctx.chunk_size = strtol(optarg, NULL, 10); + break; + case '?': + /* + * target requires every option must have argument + */ + if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') { + fprintf(stderr, "every target option requires argument: %s %s\n", + argv[optind - 1], argv[optind]); + exit(EXIT_FAILURE); + } + + if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) { + tgt_argv[tgt_argc++] = argv[optind - 1]; + tgt_argv[tgt_argc++] = argv[optind]; + } else { + fprintf(stderr, "too many target options\n"); + exit(EXIT_FAILURE); + } + optind += 1; + break; } } @@ -1281,6 +1316,14 @@ int main(int argc, char *argv[]) ctx.files[ctx.nr_files++] = argv[i++]; } + ops = ublk_find_tgt(ctx.tgt_type); + if (ops && ops->parse_cmd_line) { + optind = 0; + + tgt_argv[0] = ctx.tgt_type; + ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv); + } + if (!strcmp(cmd, "add")) ret = cmd_dev_add(&ctx); else if (!strcmp(cmd, "del")) diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 9b77137b8700..7b7446359c8f 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -63,6 +63,11 @@ struct ublk_dev; struct ublk_queue; +struct stripe_ctx { + /* stripe */ + unsigned int chunk_size; +}; + struct dev_ctx { char tgt_type[16]; unsigned long flags; @@ -75,14 +80,15 @@ struct dev_ctx { unsigned int all:1; unsigned int fg:1; - /* stripe */ - unsigned int chunk_size; - int _evtfd; int _shmid; /* built from shmem, only for ublk_dump_dev() */ struct ublk_dev *shadow_dev; + + union { + struct stripe_ctx stripe; + }; }; struct ublk_ctrl_cmd_data { @@ -119,6 +125,14 @@ struct ublk_tgt_ops { int (*queue_io)(struct ublk_queue *, int tag); void (*tgt_io_done)(struct ublk_queue *, int tag, const struct io_uring_cqe *); + + /* + * Target specific command line handling + * + * each option requires argument for target command line + */ + void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); + void (*usage)(const struct ublk_tgt_ops *ops); }; struct ublk_tgt { diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 179731c3dd6f..5dbd6392d83d 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -281,7 +281,7 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, }, }; - unsigned chunk_size = ctx->chunk_size; + unsigned chunk_size = ctx->stripe.chunk_size; struct stripe_conf *conf; unsigned chunk_shift; loff_t bytes = 0; @@ -344,10 +344,36 @@ static void ublk_stripe_tgt_deinit(struct ublk_dev *dev) backing_file_tgt_deinit(dev); } +static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "chunk_size", 1, NULL, 0 }, + { 0, 0, 0, 0 } + }; + int option_idx, opt; + + ctx->stripe.chunk_size = 65536; + while ((opt = getopt_long(argc, argv, "", + longopts, &option_idx)) != -1) { + switch (opt) { + case 0: + if (!strcmp(longopts[option_idx].name, "chunk_size")) + ctx->stripe.chunk_size = strtol(optarg, NULL, 10); + } + } +} + +static void ublk_stripe_usage(const struct ublk_tgt_ops *ops) +{ + printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n"); +} + const struct ublk_tgt_ops stripe_tgt_ops = { .name = "stripe", .init_tgt = ublk_stripe_tgt_init, .deinit_tgt = ublk_stripe_tgt_deinit, .queue_io = ublk_stripe_queue_io, .tgt_io_done = ublk_stripe_io_done, + .parse_cmd_line = ublk_stripe_cmd_line, + .usage = ublk_stripe_usage, }; -- cgit v1.2.3 From 57e13a2e8cd208db254968631820fc1353da9db0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:27 +0800 Subject: selftests: ublk: support user recovery Add user recovery feature. Meantime add user recovery test: generic_04 and generic_05(zero copy) Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-12-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 2 + tools/testing/selftests/ublk/kublk.c | 96 ++++++++++++++++++++++--- tools/testing/selftests/ublk/kublk.h | 1 + tools/testing/selftests/ublk/test_common.sh | 57 ++++++++++++++- tools/testing/selftests/ublk/test_generic_04.sh | 40 +++++++++++ tools/testing/selftests/ublk/test_generic_05.sh | 44 ++++++++++++ 6 files changed, 230 insertions(+), 10 deletions(-) create mode 100755 tools/testing/selftests/ublk/test_generic_04.sh create mode 100755 tools/testing/selftests/ublk/test_generic_05.sh (limited to 'tools') diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index 7311e8f6bee7..d93373384e93 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -6,6 +6,8 @@ LDLIBS += -lpthread -lm -luring TEST_PROGS := test_generic_01.sh TEST_PROGS += test_generic_02.sh TEST_PROGS += test_generic_03.sh +TEST_PROGS += test_generic_04.sh +TEST_PROGS += test_generic_05.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 03b3d6427775..0cd6dce3f303 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -119,6 +119,27 @@ static int ublk_ctrl_start_dev(struct ublk_dev *dev, return __ublk_ctrl_cmd(dev, &data); } +static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_START_USER_RECOVERY, + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_END_USER_RECOVERY, + .flags = CTRL_CMD_HAS_DATA, + }; + + dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; + + return __ublk_ctrl_cmd(dev, &data); +} + static int ublk_ctrl_add_dev(struct ublk_dev *dev) { struct ublk_ctrl_cmd_data data = { @@ -812,8 +833,12 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) free(affinity_buf); /* everything is fine now, start us */ - ublk_set_parameters(dev); - ret = ublk_ctrl_start_dev(dev, getpid()); + if (ctx->recovery) + ret = ublk_ctrl_end_user_recovery(dev, getpid()); + else { + ublk_set_parameters(dev); + ret = ublk_ctrl_start_dev(dev, getpid()); + } if (ret < 0) { ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); goto fail; @@ -988,7 +1013,10 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) } } - ret = ublk_ctrl_add_dev(dev); + if (ctx->recovery) + ret = ublk_ctrl_start_user_recovery(dev); + else + ret = ublk_ctrl_add_dev(dev); if (ret < 0) { ublk_err("%s: can't add dev id %d, type %s ret %d\n", __func__, dev_id, tgt_type, ret); @@ -1202,12 +1230,14 @@ static int cmd_dev_get_features(void) return ret; } -static int cmd_dev_help(char *exe) +static void __cmd_create_help(char *exe, bool recovery) { int i; - printf("%s add -t [null|loop|stripe] [-q nr_queues] [-d depth] [-n dev_id]\n", exe); - printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask]\n"); + printf("%s %s -t [null|loop|stripe] [-q nr_queues] [-d depth] [-n dev_id]\n", + exe, recovery ? "recover" : "add"); + printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g 0|1]\n"); + printf("\t[-e 0|1 ] [-i 0|1]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); @@ -1217,7 +1247,25 @@ static int cmd_dev_help(char *exe) if (ops->usage) ops->usage(ops); } +} + +static void cmd_add_help(char *exe) +{ + __cmd_create_help(exe, false); + printf("\n"); +} + +static void cmd_recover_help(char *exe) +{ + __cmd_create_help(exe, true); + printf("\tPlease provide exact command line for creating this device with real dev_id\n"); printf("\n"); +} + +static int cmd_dev_help(char *exe) +{ + cmd_add_help(exe); + cmd_recover_help(exe); printf("%s del [-n dev_id] -a \n", exe); printf("\t -a delete all devices -n delete specified device\n\n"); @@ -1239,6 +1287,10 @@ int main(int argc, char *argv[]) { "quiet", 0, NULL, 0 }, { "zero_copy", 0, NULL, 'z' }, { "foreground", 0, NULL, 0 }, + { "recovery", 1, NULL, 'r' }, + { "recovery_fail_io", 1, NULL, 'e'}, + { "recovery_reissue", 1, NULL, 'i'}, + { "get_data", 1, NULL, 'g'}, { 0, 0, 0, 0 } }; const struct ublk_tgt_ops *ops = NULL; @@ -1253,13 +1305,14 @@ int main(int argc, char *argv[]) int ret = -EINVAL, i; int tgt_argc = 1; char *tgt_argv[MAX_NR_TGT_ARG] = { NULL }; + int value; if (argc == 1) return ret; opterr = 0; optind = 2; - while ((opt = getopt_long(argc, argv, "t:n:d:q:az", + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:az", longopts, &option_idx)) != -1) { switch (opt) { case 'a': @@ -1281,6 +1334,25 @@ int main(int argc, char *argv[]) case 'z': ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY; break; + case 'r': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY; + break; + case 'e': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO; + break; + case 'i': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; + break; + case 'g': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_NEED_GET_DATA; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) ublk_dbg_mask = strtol(optarg, NULL, 16); @@ -1326,7 +1398,15 @@ int main(int argc, char *argv[]) if (!strcmp(cmd, "add")) ret = cmd_dev_add(&ctx); - else if (!strcmp(cmd, "del")) + else if (!strcmp(cmd, "recover")) { + if (ctx.dev_id < 0) { + fprintf(stderr, "device id isn't provided for recovering\n"); + ret = -EINVAL; + } else { + ctx.recovery = 1; + ret = cmd_dev_add(&ctx); + } + } else if (!strcmp(cmd, "del")) ret = cmd_dev_del(&ctx); else if (!strcmp(cmd, "list")) { ctx.all = 1; diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 7b7446359c8f..3d2b9f14491c 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -79,6 +79,7 @@ struct dev_ctx { unsigned int logging:1; unsigned int all:1; unsigned int fg:1; + unsigned int recovery:1; int _evtfd; int _shmid; diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index 87fd0c824b77..e822b2a2729a 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -169,8 +169,11 @@ _have_feature() return 1 } -_add_ublk_dev() { +_create_ublk_dev() { local dev_id; + local cmd=$1 + + shift 1 if [ ! -c /dev/ublk-control ]; then return ${UBLK_SKIP_CODE} @@ -181,7 +184,7 @@ _add_ublk_dev() { fi fi - if ! dev_id=$("${UBLK_PROG}" add "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then + if ! dev_id=$("${UBLK_PROG}" "$cmd" "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then echo "fail to add ublk dev $*" return 255 fi @@ -194,6 +197,23 @@ _add_ublk_dev() { fi } +_add_ublk_dev() { + _create_ublk_dev "add" "$@" +} + +_recover_ublk_dev() { + local dev_id + local state + + dev_id=$(_create_ublk_dev "recover" "$@") + for ((j=0;j<20;j++)); do + state=$(_get_ublk_dev_state "${dev_id}") + [ "$state" == "LIVE" ] && break + sleep 1 + done + echo "$state" +} + # kill the ublk daemon and return ublk device state __ublk_kill_daemon() { @@ -280,6 +300,39 @@ run_io_and_kill_daemon() fi } +run_io_and_recover() +{ + local state + local dev_id + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ + --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \ + --runtime=20 --time_based > /dev/null 2>&1 & + sleep 4 + + state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED") + if [ "$state" != "QUIESCED" ]; then + echo "device isn't quiesced($state) after killing daemon" + return 255 + fi + + state=$(_recover_ublk_dev -n "$dev_id" "$@") + if [ "$state" != "LIVE" ]; then + echo "faile to recover to LIVE($state)" + return 255 + fi + + if ! __remove_ublk_dev_return "${dev_id}"; then + echo "delete dev ${dev_id} failed" + return 255 + fi + wait +} + + _ublk_test_top_dir() { cd "$(dirname "$0")" && pwd diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh new file mode 100755 index 000000000000..8a3bc080c577 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_04.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_04" +ERR_CODE=0 + +ublk_run_recover_test() +{ + run_io_and_recover "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "recover" "basic recover function verification" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_recover_test -t null -q 2 -r 1 & +ublk_run_recover_test -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -i 1 & +ublk_run_recover_test -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "recover" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh new file mode 100755 index 000000000000..714630b4b329 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_05.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_04" +ERR_CODE=0 + +ublk_run_recover_test() +{ + run_io_and_recover "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "recover" "basic recover function verification (zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_recover_test -t null -q 2 -r 1 -z & +ublk_run_recover_test -t loop -q 2 -r 1 -z "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -z -i 1 & +ublk_run_recover_test -t loop -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "recover" +_show_result $TID $ERR_CODE -- cgit v1.2.3 From 2f9a30bd16643d842da0921dc37bf00c750b0a8b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:28 +0800 Subject: selftests: ublk: add test_stress_05.sh Add test_stress_05.sh for covering removing device with recovery enabled. io-hang has been observed with the following patch: https://lore.kernel.org/linux-block/20250403-ublk_timeout-v3-1-aa09f76c7451@purestorage.com/ Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-13-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 1 + tools/testing/selftests/ublk/test_stress_05.sh | 64 ++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100755 tools/testing/selftests/ublk/test_stress_05.sh (limited to 'tools') diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index d93373384e93..dddc64036aa1 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -25,6 +25,7 @@ TEST_PROGS += test_stress_01.sh TEST_PROGS += test_stress_02.sh TEST_PROGS += test_stress_03.sh TEST_PROGS += test_stress_04.sh +TEST_PROGS += test_stress_05.sh TEST_GEN_PROGS_EXTENDED = kublk diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh new file mode 100755 index 000000000000..a7071b10224d --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_05" +ERR_CODE=0 + +run_io_and_remove() +{ + local size=$1 + local dev_id + local dev_pid + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev $TID $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" + + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ + --rw=readwrite --iodepth=128 --size="${size}" --numjobs=4 \ + --runtime=40 --time_based > /dev/null 2>&1 & + sleep 4 + + dev_pid=$(_get_ublk_daemon_pid "$dev_id") + kill -9 "$dev_pid" + + if ! __remove_ublk_dev_return "${dev_id}"; then + echo "delete dev ${dev_id} failed" + return 255 + fi +} + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +_prep_test "stress" "run IO and remove device with recovery enabled" + +_create_backfile 0 256M +_create_backfile 1 256M + +for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -g 1 -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g 1 -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & + wait +done + +if _have_feature "ZERO_COPY"; then + for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -g 1 -z -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g 1 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + wait + done +fi + +_cleanup_test "stress" +_show_result $TID $ERR_CODE -- cgit v1.2.3 From 3bf540609cab0402a7c3e40c1425532f3376318a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:29 +0800 Subject: selftests: ublk: move creating UBLK_TMP into _prep_test() test may exit early because of missing program or not having required feature before calling _prep_test(), then $UBLK_TMP isn't cleaned. Fix it by moving creating $UBLK_TMP into _prep_test(), any resources created since _prep_test() will be cleaned by _cleanup_test(). Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-14-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index e822b2a2729a..9fc111f64576 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -114,6 +114,7 @@ _prep_test() { local type=$1 shift 1 modprobe ublk_drv > /dev/null 2>&1 + UBLK_TMP=$(mktemp ublk_test_XXXXX) [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*" } @@ -338,7 +339,6 @@ _ublk_test_top_dir() cd "$(dirname "$0")" && pwd } -UBLK_TMP=$(mktemp ublk_test_XXXXX) UBLK_PROG=$(_ublk_test_top_dir)/kublk UBLK_TEST_QUIET=1 UBLK_TEST_SHOW_RESULT=1 -- cgit v1.2.3 From 81586652bb1f6c797159161db8d59c18d66b9eb3 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Wed, 16 Apr 2025 11:54:42 +0800 Subject: selftests: ublk: add generic_06 for covering fault inject Add one simple fault inject target, and verify if an application using ublk device sees an I/O error quickly after the ublk server dies. Signed-off-by: Uday Shankar Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250416035444.99569-9-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 4 +- tools/testing/selftests/ublk/fault_inject.c | 98 +++++++++++++++++++++++++ tools/testing/selftests/ublk/kublk.c | 3 +- tools/testing/selftests/ublk/kublk.h | 12 ++- tools/testing/selftests/ublk/test_generic_06.sh | 41 +++++++++++ 5 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/ublk/fault_inject.c create mode 100755 tools/testing/selftests/ublk/test_generic_06.sh (limited to 'tools') diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index dddc64036aa1..ec4624a283bc 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -8,6 +8,7 @@ TEST_PROGS += test_generic_02.sh TEST_PROGS += test_generic_03.sh TEST_PROGS += test_generic_04.sh TEST_PROGS += test_generic_05.sh +TEST_PROGS += test_generic_06.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh @@ -31,7 +32,8 @@ TEST_GEN_PROGS_EXTENDED = kublk include ../lib.mk -$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c +$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c \ + fault_inject.c check: shellcheck -x -f gcc *.sh diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c new file mode 100644 index 000000000000..94a8e729ba4c --- /dev/null +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Fault injection ublk target. Hack this up however you like for + * testing specific behaviors of ublk_drv. Currently is a null target + * with a configurable delay before completing each I/O. This delay can + * be used to test ublk_drv's handling of I/O outstanding to the ublk + * server when it dies. + */ + +#include "kublk.h" + +static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, + struct ublk_dev *dev) +{ + const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + unsigned long dev_size = 250UL << 30; + + dev->tgt.dev_size = dev_size; + dev->tgt.params = (struct ublk_params) { + .types = UBLK_PARAM_TYPE_BASIC, + .basic = { + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = info->max_io_buf_bytes >> 9, + .dev_sectors = dev_size >> 9, + }, + }; + + dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000); + return 0; +} + +static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + struct io_uring_sqe *sqe; + struct __kernel_timespec ts = { + .tv_nsec = (long long)q->dev->private_data, + }; + + ublk_queue_alloc_sqes(q, &sqe, 1); + io_uring_prep_timeout(sqe, &ts, 1, 0); + sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, 1); + + ublk_queued_tgt_io(q, tag, 1); + + return 0; +} + +static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag, + const struct io_uring_cqe *cqe) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + + if (cqe->res != -ETIME) + ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res); + + if (ublk_completed_tgt_io(q, tag)) + ublk_complete_io(q, tag, iod->nr_sectors << 9); + else + ublk_err("%s: io not complete after 1 cqe\n", __func__); +} + +static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "delay_us", 1, NULL, 0 }, + { 0, 0, 0, 0 } + }; + int option_idx, opt; + + ctx->fault_inject.delay_us = 0; + while ((opt = getopt_long(argc, argv, "", + longopts, &option_idx)) != -1) { + switch (opt) { + case 0: + if (!strcmp(longopts[option_idx].name, "delay_us")) + ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10); + } + } +} + +static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops) +{ + printf("\tfault_inject: [--delay_us us (default 0)]\n"); +} + +const struct ublk_tgt_ops fault_inject_tgt_ops = { + .name = "fault_inject", + .init_tgt = ublk_fault_inject_tgt_init, + .queue_io = ublk_fault_inject_queue_io, + .tgt_io_done = ublk_fault_inject_tgt_io_done, + .parse_cmd_line = ublk_fault_inject_cmd_line, + .usage = ublk_fault_inject_usage, +}; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 0cd6dce3f303..759f06637146 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -12,6 +12,7 @@ static const struct ublk_tgt_ops *tgt_ops_list[] = { &null_tgt_ops, &loop_tgt_ops, &stripe_tgt_ops, + &fault_inject_tgt_ops, }; static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) @@ -1234,7 +1235,7 @@ static void __cmd_create_help(char *exe, bool recovery) { int i; - printf("%s %s -t [null|loop|stripe] [-q nr_queues] [-d depth] [-n dev_id]\n", + printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g 0|1]\n"); printf("\t[-e 0|1 ] [-i 0|1]\n"); diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 3d2b9f14491c..29571eb296f1 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -68,6 +68,11 @@ struct stripe_ctx { unsigned int chunk_size; }; +struct fault_inject_ctx { + /* fault_inject */ + unsigned long delay_us; +}; + struct dev_ctx { char tgt_type[16]; unsigned long flags; @@ -81,6 +86,9 @@ struct dev_ctx { unsigned int fg:1; unsigned int recovery:1; + /* fault_inject */ + long long delay_us; + int _evtfd; int _shmid; @@ -88,7 +96,8 @@ struct dev_ctx { struct ublk_dev *shadow_dev; union { - struct stripe_ctx stripe; + struct stripe_ctx stripe; + struct fault_inject_ctx fault_inject; }; }; @@ -384,6 +393,7 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q) extern const struct ublk_tgt_ops null_tgt_ops; extern const struct ublk_tgt_ops loop_tgt_ops; extern const struct ublk_tgt_ops stripe_tgt_ops; +extern const struct ublk_tgt_ops fault_inject_tgt_ops; void backing_file_tgt_deinit(struct ublk_dev *dev); int backing_file_tgt_init(struct ublk_dev *dev); diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh new file mode 100755 index 000000000000..b67230c42c84 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_06.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_06" +ERR_CODE=0 + +_prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server" + +# configure ublk server to sleep 2s before completing each I/O +dev_id=$(_add_ublk_dev -t fault_inject -q 2 -d 1 --delay_us 2000000) +_check_add_dev $TID $? + +STARTTIME=${SECONDS} + +dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 & +dd_pid=$! + +__ublk_kill_daemon ${dev_id} "DEAD" + +wait $dd_pid +dd_exitcode=$? + +ENDTIME=${SECONDS} +ELAPSED=$(($ENDTIME - $STARTTIME)) + +# assert that dd sees an error and exits quickly after ublk server is +# killed. previously this relied on seeing an I/O timeout and so would +# take ~30s +if [ $dd_exitcode -eq 0 ]; then + echo "dd unexpectedly exited successfully!" + ERR_CODE=255 +fi +if [ $ELAPSED -ge 5 ]; then + echo "dd took $ELAPSED seconds to exit (>= 5s tolerance)!" + ERR_CODE=255 +fi + +_cleanup_test "fault_inject" +_show_result $TID $ERR_CODE -- cgit v1.2.3 From 39e703ed3b48c4262be141072d4f42a8b89a10cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 17 Apr 2025 15:45:29 +0300 Subject: selftests/pcie_bwctrl: Fix test progs list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit df6f8c4d72ae ("selftests/pcie_bwctrl: Add 'set_pcie_speed.sh' to TEST_PROGS") added set_pcie_speed.sh into TEST_PROGS but that script is a helper that is only being called by set_pcie_cooling_state.sh, not a test case itself. When set_pcie_speed.sh is in TEST_PROGS, selftest harness will execute also it leading to bwctrl selftest errors: # selftests: pcie_bwctrl: set_pcie_speed.sh # cat: /cur_state: No such file or directory not ok 2 selftests: pcie_bwctrl: set_pcie_speed.sh # exit=1 Place set_pcie_speed.sh into TEST_FILES instead to have it included into installed test files but not execute it from the test harness. Fixes: df6f8c4d72ae ("selftests/pcie_bwctrl: Add 'set_pcie_speed.sh' to TEST_PROGS") Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250417124529.11391-1-ilpo.jarvinen@linux.intel.com --- tools/testing/selftests/pcie_bwctrl/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile index 48ec048f47af..277f92f9d753 100644 --- a/tools/testing/selftests/pcie_bwctrl/Makefile +++ b/tools/testing/selftests/pcie_bwctrl/Makefile @@ -1,2 +1,3 @@ -TEST_PROGS = set_pcie_cooling_state.sh set_pcie_speed.sh +TEST_PROGS = set_pcie_cooling_state.sh +TEST_FILES = set_pcie_speed.sh include ../lib.mk -- cgit v1.2.3 From d481ee35247d2a01764667a25f6f512c292ba42d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 18 Apr 2025 10:12:08 -0400 Subject: tracing: selftests: Add testing a user string to filters Running the following commands was broken: # cd /sys/kernel/tracing # echo "filename.ustring ~ \"/proc*\"" > events/syscalls/sys_enter_openat/filter # echo 1 > events/syscalls/sys_enter_openat/enable # ls /proc/$$/maps # cat trace And would produce nothing when it should have produced something like: ls-1192 [007] ..... 8169.828333: sys_openat(dfd: ffffffffffffff9c, filename: 7efc18359904, flags: 80000, mode: 0) Add a test to check this case so that it will be caught if it breaks again. Link: https://lore.kernel.org/linux-trace-kernel/20250417183003.505835fb@gandalf.local.home/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Shuah Khan Link: https://lore.kernel.org/20250418101208.38dc81f5@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- .../ftrace/test.d/filter/event-filter-function.tc | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index 118247b8dd84..c62165fabd0c 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -80,6 +80,26 @@ if [ $misscnt -gt 0 ]; then exit_fail fi +# Check strings too +if [ -f events/syscalls/sys_enter_openat/filter ]; then + DIRNAME=`basename $TMPDIR` + echo "filename.ustring ~ \"*$DIRNAME*\"" > events/syscalls/sys_enter_openat/filter + echo 1 > events/syscalls/sys_enter_openat/enable + echo 1 > tracing_on + ls /bin/sh + nocnt=`grep openat trace | wc -l` + ls $TMPDIR + echo 0 > tracing_on + hitcnt=`grep openat trace | wc -l`; + echo 0 > events/syscalls/sys_enter_openat/enable + if [ $nocnt -gt 0 ]; then + exit_fail + fi + if [ $hitcnt -eq 0 ]; then + exit_fail + fi +fi + reset_events_filter exit 0 -- cgit v1.2.3 From dc915672f9176799e48ac23a155f48742b15ec6c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 17 Apr 2025 17:29:33 -0700 Subject: cxl: Fix devm host device for CXL fwctl initialization Testing revealed the following error message for a CXL memdev that has Feature support: [ 56.690430] cxl mem0: Resources present before probing Attach the allocation of cxl_fwctl to the parent device of cxl_memdev. devm_add_* calls for cxl_memdev should not happen before the memdev probe function or outside the scope of the memdev driver. cxl_test missed this bug because cxl_test always arranges for the cxl_mem driver to be loaded before cxl_mock_mem runs. So the driver core always finds the devres list idle in that case. [DJ: Updated subject title and added commit log suggestion from djbw] Fixes: 858ce2f56b52 ("cxl: Add FWCTL support to CXL") Reviewed-by: Dan Williams Reviewed-by: Alison Schofield Link: https://lore.kernel.org/linux-cxl/6801aea053466_71fe2944c@dwillia2-xfh.jf.intel.com.notmuch/ Link: https://patch.msgid.link/20250418002933.406439-1-dave.jiang@intel.com Signed-off-by: Dave Jiang --- tools/testing/cxl/test/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index f2957a3e36fe..bf9caa908f89 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1780,7 +1780,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - rc = devm_cxl_setup_fwctl(cxlmd); + rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); if (rc) dev_dbg(dev, "No CXL FWCTL setup\n"); -- cgit v1.2.3 From 4ea404fdbc39971814cd3eb36b43c11fb6f32e17 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 22 Apr 2025 16:43:29 +0100 Subject: lib: Ensure prime numbers tests are included in KUnit test runs When the select of PRIME_MUMBERS was removed from it's KUnit test Kconfig nothing was added to the KUnit configs, meaning that when run via the KUnit runner the tests are neither built nor run. Add PRIME_NUMBERS to all_tests.config so they are enabled when the KUnit runner builds the kernel. Fixes: 3f2925174f8b ("lib/prime_numbers: KUnit test should not select PRIME_NUMBERS") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250422-lib-fix-prime-numbers-kunit-v1-1-4278c1d4a4ae@kernel.org Signed-off-by: Kees Cook --- tools/testing/kunit/configs/all_tests.config | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index cdd9782f9646..7bb885b0c32d 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -43,6 +43,8 @@ CONFIG_REGMAP_BUILD=y CONFIG_AUDIT=y +CONFIG_PRIME_NUMBERS=y + CONFIG_SECURITY=y CONFIG_SECURITY_APPARMOR=y CONFIG_SECURITY_LANDLOCK=y -- cgit v1.2.3 From bfb713ea53c746b07ae69fe97fa9b5388e4f34f9 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 17 Apr 2025 14:55:50 +0100 Subject: perf tools: Fix arm64 build by generating unistd_64.h Since pulling in the kernel changes in commit 22f72088ffe6 ("tools headers: Update the syscall table with the kernel sources"), arm64 is no longer using a generic syscall header and generates one from the syscall table. Therefore we must also generate the syscall header for arm64 before building Perf. Add it as a dependency to libperf which uses one syscall number. Perf uses more, but as libperf is a dependency of Perf it will be generated for both. Future platforms that need this will have to add their own syscall-y targets in libperf manually. Unfortunately the arch specific files that do this (e.g. arch/arm64/include/asm/Kbuild) can't easily be imported into the Perf build. But Perf only needs a subset of the generated files anyway, so redefining them is probably the correct thing to do. Fixes: 22f72088ffe6 ("tools headers: Update the syscall table with the kernel sources") Signed-off-by: James Clark Tested-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20250417-james-perf-fix-gen-syscall-v1-1-1d268c923901@linaro.org Signed-off-by: Namhyung Kim --- tools/lib/perf/Makefile | 12 +++++++++++- tools/perf/Makefile.config | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index ffcfd777c451..1a19b5013f45 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -42,6 +42,7 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) TEST_ARGS := $(if $(V),-v) INCLUDES = \ +-I$(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -99,7 +100,16 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBPERF_IN): FORCE +uapi-asm := $(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi/asm +ifeq ($(SRCARCH),arm64) + syscall-y := $(uapi-asm)/unistd_64.h +endif +uapi-asm-generic: + $(if $(syscall-y),\ + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \ + generic=include/uapi/asm-generic $(syscall-y),) + +$(LIBPERF_IN): uapi-asm-generic FORCE $(Q)$(MAKE) $(build)=libperf $(LIBPERF_A): $(LIBPERF_IN) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index eea95c6c0c71..a52482654d4b 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -29,6 +29,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated +CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) -- cgit v1.2.3 From f2858f308131a09e33afb766cd70119b5b900569 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Wed, 16 Apr 2025 10:02:46 -0700 Subject: selftests/bpf: Mitigate sockmap_ktls disconnect_after_delete failure "sockmap_ktls disconnect_after_delete" test has been failing on BPF CI after recent merges from netdev: * https://github.com/kernel-patches/bpf/actions/runs/14458537639 * https://github.com/kernel-patches/bpf/actions/runs/14457178732 It happens because disconnect has been disabled for TLS [1], and it renders the test case invalid. Removing all the test code creates a conflict between bpf and bpf-next, so for now only remove the offending assert [2]. The test will be removed later on bpf-next. [1] https://lore.kernel.org/netdev/20250404180334.3224206-1-kuba@kernel.org/ [2] https://lore.kernel.org/bpf/cfc371285323e1a3f3b006bfcf74e6cf7ad65258@linux.dev/ Signed-off-by: Ihor Solodrai Signed-off-by: Andrii Nakryiko Reviewed-by: Jiayuan Chen Link: https://lore.kernel.org/bpf/20250416170246.2438524-1-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 2d0796314862..0a99fd404f6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -68,7 +68,6 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map) goto close_cli; err = disconnect(cli); - ASSERT_OK(err, "disconnect"); close_cli: close(cli); -- cgit v1.2.3 From 5533bc70aedc7c9872841ac8649344f8cbc6bc4c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 22 Apr 2025 07:59:41 +0800 Subject: selftests: ublk: fix recover test When adding recovery test: - 'break' is missed for handling '-g' argument - test name of test_generic_05.sh is wrong So fix the two. Fixes: 57e13a2e8cd2 ("selftests: ublk: support user recovery") Signed-off-by: Ming Lei Reviewed-by: Uday Shankar Link: https://lore.kernel.org/r/20250421235947.715272-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 1 + tools/testing/selftests/ublk/test_generic_05.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 759f06637146..e57a1486bb48 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1354,6 +1354,7 @@ int main(int argc, char *argv[]) value = strtol(optarg, NULL, 10); if (value) ctx.flags |= UBLK_F_NEED_GET_DATA; + break; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) ublk_dbg_mask = strtol(optarg, NULL, 16); diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh index 714630b4b329..3bb00a347402 100755 --- a/tools/testing/selftests/ublk/test_generic_05.sh +++ b/tools/testing/selftests/ublk/test_generic_05.sh @@ -3,7 +3,7 @@ . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh -TID="generic_04" +TID="generic_05" ERR_CODE=0 ublk_run_recover_test() -- cgit v1.2.3 From 8f503637898313c048bf21e386e09be90e30cc31 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 22 Apr 2025 07:59:42 +0800 Subject: selftests: ublk: remove useless 'delay_us' from 'struct dev_ctx' 'delay_us' shouldn't be added to 'struct dev_ctx' since now it is handled by per-target command line & 'struct fault_inject_ctx'. So remove it. Fixes: 81586652bb1f ("selftests: ublk: add generic_06 for covering fault inject") Signed-off-by: Ming Lei Reviewed-by: Uday Shankar Link: https://lore.kernel.org/r/20250421235947.715272-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 29571eb296f1..918db5cd633f 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -86,9 +86,6 @@ struct dev_ctx { unsigned int fg:1; unsigned int recovery:1; - /* fault_inject */ - long long delay_us; - int _evtfd; int _shmid; -- cgit v1.2.3 From ce72fea219c13c6485503928181c547d0e26756b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 21 Apr 2025 19:07:14 +0200 Subject: selftests: mptcp: diag: use mptcp_lib_get_info_value When running diag.sh in a loop, chk_dump_one will report the following "grep: write error": 13 ....chk 2 cestab [ OK ] grep: write error 14 ....chk dump_one [ OK ] 15 ....chk 2->0 msk in use after flush [ OK ] 16 ....chk 2->0 cestab after flush [ OK ] This error is caused by a broken pipe. When the output of 'ss' is processed by grep, 'head -n 1' will exit immediately after getting the first line, causing the subsequent pipe to close. At this time, if 'grep' is still trying to write data to the closed pipe, it will trigger a SIGPIPE signal, causing a write error. One solution is not to use this problematic "head -n 1" command, but to use mptcp_lib_get_info_value() helper defined in mptcp_lib.sh to get the value of 'token'. Fixes: ba2400166570 ("selftests: mptcp: add a test for mptcp_diag_dump_one") Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Tested-by: Gang Yan Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250421-net-mptcp-pm-defer-freeing-v1-2-e731dc6e86b9@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 4f55477ffe08..e7a75341f0f3 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -206,9 +206,8 @@ chk_dump_one() local token local msg - ss_token="$(ss -inmHMN $ns | grep 'token:' |\ - head -n 1 |\ - sed 's/.*token:\([0-9a-f]*\).*/\1/')" + ss_token="$(ss -inmHMN $ns | + mptcp_lib_get_info_value "token" "token")" token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\ awk -F':[ \t]+' '/^token/ {print $2}')" -- cgit v1.2.3 From 7629d1a04ad2e76709401b655263040486972c2c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 17 Apr 2025 11:47:32 -0700 Subject: selftests/tc-testing: Add test for HFSC queue emptying during peek operation Add a selftest to exercise the condition where qdisc implementations like netem or codel might empty the queue during a peek operation. This tests the defensive code path in HFSC that checks the queue length again after peeking to handle this case. Based on the reproducer from Gerrard, improved by Jamal. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Tested-by: Victor Nogueira Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20250417184732.943057-4-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 39 ++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index d4ea9cd845a3..e26bbc169783 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -313,5 +313,44 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4c3", + "name": "Test HFSC with netem/blackhole - queue emptying during peek operation", + "category": [ + "qdisc", + "hfsc", + "netem", + "blackhole" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 plug limit 1024", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 hfsc default 1", + "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 3:1 handle 4:0 netem delay 1ms", + "$TC qdisc add dev $DUMMY parent 4:1 handle 5:0 blackhole", + "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true", + "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit", + "$TC class del dev $DUMMY parent 3:0 classid 3:1", + "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit", + "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true" + ], + "cmdUnderTest": "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc hfsc 3:.*parent 1:2.*default 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] -- cgit v1.2.3 From cd188e9ef80fd005fd8c8de34ed649bd653d00e5 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 18 Apr 2025 21:39:03 +0200 Subject: selftests/fs/mount-notify: test also remove/flush of mntns marks Regression test for FAN_MARK_MNTFS | FAN_MARK_FLUSH bug. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250418193903.2607617-3-amir73il@gmail.com --- .../filesystems/mount-notify/mount-notify_test.c | 57 +++++++++++++++++----- 1 file changed, 46 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 4a2d5c454fd1..59a71f22fb11 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -48,8 +48,16 @@ static uint64_t get_mnt_id(struct __test_metadata *const _metadata, static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; +static const int mark_cmds[] = { + FAN_MARK_ADD, + FAN_MARK_REMOVE, + FAN_MARK_FLUSH +}; + +#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) + FIXTURE(fanotify) { - int fan_fd; + int fan_fd[NUM_FAN_FDS]; char buf[256]; unsigned int rem; void *next; @@ -61,7 +69,7 @@ FIXTURE(fanotify) { FIXTURE_SETUP(fanotify) { - int ret; + int i, ret; ASSERT_EQ(unshare(CLONE_NEWNS), 0); @@ -89,20 +97,34 @@ FIXTURE_SETUP(fanotify) self->root_id = get_mnt_id(_metadata, "/"); ASSERT_NE(self->root_id, 0); - self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0); - ASSERT_GE(self->fan_fd, 0); - - ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS, - FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL); - ASSERT_EQ(ret, 0); + for (i = 0; i < NUM_FAN_FDS; i++) { + self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, + 0); + ASSERT_GE(self->fan_fd[i], 0); + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + // On fd[0] we do an extra ADD that changes nothing. + // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. + ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + } self->rem = 0; } FIXTURE_TEARDOWN(fanotify) { + int i; + ASSERT_EQ(self->rem, 0); - close(self->fan_fd); + for (i = 0; i < NUM_FAN_FDS; i++) + close(self->fan_fd[i]); ASSERT_EQ(fchdir(self->orig_root), 0); @@ -123,8 +145,21 @@ static uint64_t expect_notify(struct __test_metadata *const _metadata, unsigned int thislen; if (!self->rem) { - ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf)); - ASSERT_GT(len, 0); + ssize_t len; + int i; + + for (i = NUM_FAN_FDS - 1; i >= 0; i--) { + len = read(self->fan_fd[i], self->buf, + sizeof(self->buf)); + if (i > 0) { + // Groups 1,2 should get EAGAIN + ASSERT_EQ(len, -1); + ASSERT_EQ(errno, EAGAIN); + } else { + // Group 0 should get events + ASSERT_GT(len, 0); + } + } self->rem = len; self->next = (void *) self->buf; -- cgit v1.2.3 From 1d019736b6f812bebf3ef89d6e887d06e2a822fc Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Wed, 23 Apr 2025 15:29:03 -0600 Subject: selftests: ublk: common: fix _get_disk_dev_t for pre-9.0 coreutils Some distributions, such as centos stream 9, still have a version of coreutils which does not yet support the %Hr and %Lr formats for stat(1) [1, 2]. Running ublk selftests on these distributions results in the following error in tests that use the _get_disk_dev_t helper: line 23: ?r: syntax error: operand expected (error token is "?r") To better accommodate older distributions, rewrite _get_disk_dev_t to use the much older %t and %T formats for stat instead. [1] https://github.com/coreutils/coreutils/blob/v9.0/NEWS#L114 [2] https://pkgs.org/download/coreutils Signed-off-by: Uday Shankar Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250423-ublk_selftests-v1-2-7d060e260e76@purestorage.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_common.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index 9fc111f64576..a81210ca3e99 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -17,8 +17,8 @@ _get_disk_dev_t() { local minor dev=/dev/ublkb"${dev_id}" - major=$(stat -c '%Hr' "$dev") - minor=$(stat -c '%Lr' "$dev") + major="0x"$(stat -c '%t' "$dev") + minor="0x"$(stat -c '%T' "$dev") echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) } -- cgit v1.2.3 From 85fd85bc025a525354acb2241beb3c5387c551ec Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 23 Apr 2025 09:58:15 +0300 Subject: x86/insn: Fix CTEST instruction decoding insn_decoder_test found a problem with decoding APX CTEST instructions: Found an x86 instruction decoder bug, please report this. ffffffff810021df 62 54 94 05 85 ff ctestneq objdump says 6 bytes, but insn_get_length() says 5 It happens because x86-opcode-map.txt doesn't specify arguments for the instruction and the decoder doesn't expect to see ModRM byte. Fixes: 690ca3a3067f ("x86/insn: Add support for APX EVEX instructions to the opcode map") Signed-off-by: Kirill A. Shutemov Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Adrian Hunter Cc: stable@vger.kernel.org # v6.10+ Link: https://lore.kernel.org/r/20250423065815.2003231-1-kirill.shutemov@linux.intel.com --- tools/arch/x86/lib/x86-opcode-map.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index caedb3ef6688..f5dd84eb55dc 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -996,8 +996,8 @@ AVXcode: 4 83: Grp1 Ev,Ib (1A),(es) # CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, # CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ -84: CTESTSCC (ev) -85: CTESTSCC (es) | CTESTSCC (66),(es) +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) 88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) 8f: POP2 Bq,Rq (000),(11B),(ev) a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) -- cgit v1.2.3 From 9bbb8a07fd65fca0f29a869ec3f2435761a6c676 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Mon, 2 Dec 2024 11:19:55 +0100 Subject: tools/hv: update route parsing in kvp daemon After recent changes in the VM network stack, the host fails to display the IP addresses of the VM. As a result the "IP Addresses" column in the "Networking" tab in the Windows Hyper-V Manager is empty. This is caused by a change in the expected output of the "ip route show" command. Previously the gateway address was shown in the third row. Now the gateway addresses might be split into several lines of output. As a result, the string "ra" instead of an IP address is sent to the host. To me more specific, a VM with the wellknown wicked network managing tool still shows the expected output in recent openSUSE Tumbleweed snapshots: ip a show dev uplink;ip -4 route show;ip -6 route show 2: uplink: mtu 1500 qdisc mq state ... link/ether 00:15:5d:d0:93:08 brd ff:ff:ff:ff:ff:ff inet 1.2.3.4/22 brd 1.2.3.255 scope global uplink valid_lft forever preferred_lft forever inet6 fe80::215:5dff:fed0:9308/64 scope link proto kernel_ll valid_lft forever preferred_lft forever default via 1.2.3.254 dev uplink proto dhcp 1.2.3.0/22 dev uplink proto kernel scope link src 1.2.3.4 fe80::/64 dev uplink proto kernel metric 256 pref medium default via fe80::26fc:4e00:3b:74 dev uplink proto ra metric 1024 exp... default via fe80::6a22:8e00:fb:14f8 dev uplink proto ra metric 1024 e... A similar VM, but with NetworkManager as network managing tool: ip a show dev eth0;ip -4 route show;ip -6 route show 2: eth0: mtu 1500 qdisc mq state UP... link/ether 00:15:5d:d0:93:0b brd ff:ff:ff:ff:ff:ff inet 1.2.3.8/22 brd 1.2.3.255 scope global dynamic noprefixroute ... valid_lft 1022sec preferred_lft 1022sec inet6 fe80::215:5dff:fed0:930b/64 scope link noprefixroute valid_lft forever preferred_lft forever default via 1.2.3.254 dev eth0 proto dhcp src 1.2.3.8 metric 100 1.2.3.0/22 dev eth0 proto kernel scope link src 1.2.3.8 metric 100 fe80::/64 dev eth0 proto kernel metric 1024 pref medium default proto ra metric 20100 pref medium nexthop via fe80::6a22:8e00:fb:14f8 dev eth0 weight 1 nexthop via fe80::26fc:4e00:3b:74 dev eth0 weight 1 Adjust the route parsing to use a single line for each line of output. Also use a single shell invocation to retrieve both IPv4 and IPv6 information. The actual IP addresses are expected after the "via" keyword. Signed-off-by: Olaf Hering Reviewed-by: Shradha Gupta Link: https://lore.kernel.org/r/20241202102235.9701-1-olaf@aepfle.de Signed-off-by: Wei Liu Message-ID: <20241202102235.9701-1-olaf@aepfle.de> --- tools/hv/hv_kvp_daemon.c | 108 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 04ba035d67e9..b9ce3aab15fe 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -677,6 +678,88 @@ static void kvp_process_ipconfig_file(char *cmd, pclose(file); } +static bool kvp_verify_ip_address(const void *address_string) +{ + char verify_buf[sizeof(struct in6_addr)]; + + if (inet_pton(AF_INET, address_string, verify_buf) == 1) + return true; + if (inet_pton(AF_INET6, address_string, verify_buf) == 1) + return true; + return false; +} + +static void kvp_extract_routes(const char *line, void **output, size_t *remaining) +{ + static const char needle[] = "via "; + const char *match, *haystack = line; + + while ((match = strstr(haystack, needle))) { + const char *address, *next_char; + + /* Address starts after needle. */ + address = match + strlen(needle); + + /* The char following address is a space or end of line. */ + next_char = strpbrk(address, " \t\\"); + if (!next_char) + next_char = address + strlen(address) + 1; + + /* Enough room for address and semicolon. */ + if (*remaining >= (next_char - address) + 1) { + memcpy(*output, address, next_char - address); + /* Terminate string for verification. */ + memcpy(*output + (next_char - address), "", 1); + if (kvp_verify_ip_address(*output)) { + /* Advance output buffer. */ + *output += next_char - address; + *remaining -= next_char - address; + + /* Each address needs a trailing semicolon. */ + memcpy(*output, ";", 1); + *output += 1; + *remaining -= 1; + } + } + haystack = next_char; + } +} + +static void kvp_get_gateway(void *buffer, size_t buffer_len) +{ + static const char needle[] = "default "; + FILE *f; + void *output = buffer; + char *line = NULL; + size_t alloc_size = 0, remaining = buffer_len - 1; + ssize_t num_chars; + + /* Show route information in a single line, for each address family */ + f = popen("ip --oneline -4 route show;ip --oneline -6 route show", "r"); + if (!f) { + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + return; + } + while ((num_chars = getline(&line, &alloc_size, f)) > 0) { + /* Skip short lines. */ + if (num_chars <= strlen(needle)) + continue; + /* Skip lines without default route. */ + if (memcmp(line, needle, strlen(needle))) + continue; + /* Remove trailing newline to simplify further parsing. */ + if (line[num_chars - 1] == '\n') + line[num_chars - 1] = '\0'; + /* Search routes after match. */ + kvp_extract_routes(line + strlen(needle), &output, &remaining); + } + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + free(line); + pclose(f); +} + static void kvp_get_ipconfig_info(char *if_name, struct hv_kvp_ipaddr_value *buffer) { @@ -685,30 +768,7 @@ static void kvp_get_ipconfig_info(char *if_name, char *p; FILE *file; - /* - * Get the address of default gateway (ipv4). - */ - sprintf(cmd, "%s %s", "ip route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info. - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0); - - /* - * Get the address of default gateway (ipv6). - */ - sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info (ipv6). - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1); - + kvp_get_gateway(buffer->gate_way, sizeof(buffer->gate_way)); /* * Gather the DNS state. -- cgit v1.2.3 From 3d9c463f959f41cd6616ebf8a5d15e9d3ef04f16 Mon Sep 17 00:00:00 2001 From: Brandon Kammerdiener Date: Thu, 24 Apr 2025 11:32:55 -0400 Subject: selftests/bpf: add test for softlock when modifying hashmap while iterating Add test that modifies the map while it's being iterated in such a way that hangs the kernel thread unless the _safe fix is applied to bpf_for_each_hash_elem. Signed-off-by: Brandon Kammerdiener Link: https://lore.kernel.org/r/20250424153246.141677-3-brandon.kammerdiener@intel.com Signed-off-by: Alexei Starovoitov Acked-by: Hou Tao --- tools/testing/selftests/bpf/prog_tests/for_each.c | 37 ++++++++++++++++++++++ .../selftests/bpf/progs/for_each_hash_modify.c | 30 ++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/for_each_hash_modify.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 09f6487f58b9..5fea3209566e 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -6,6 +6,7 @@ #include "for_each_array_map_elem.skel.h" #include "for_each_map_elem_write_key.skel.h" #include "for_each_multi_maps.skel.h" +#include "for_each_hash_modify.skel.h" static unsigned int duration; @@ -203,6 +204,40 @@ out: for_each_multi_maps__destroy(skel); } +static void test_hash_modify(void) +{ + struct for_each_hash_modify *skel; + int max_entries, i, err; + __u64 key, val; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1 + ); + + skel = for_each_hash_modify__open_and_load(); + if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load")) + return; + + max_entries = bpf_map__max_entries(skel->maps.hashmap); + for (i = 0; i < max_entries; i++) { + key = i; + val = i; + err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); + if (!ASSERT_OK(err, "map_update")) + goto out; + } + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_OK(topts.retval, "retval"); + +out: + for_each_hash_modify__destroy(skel); +} + void test_for_each(void) { if (test__start_subtest("hash_map")) @@ -213,4 +248,6 @@ void test_for_each(void) test_write_map_key(); if (test__start_subtest("multi_maps")) test_multi_maps(); + if (test__start_subtest("hash_modify")) + test_hash_modify(); } diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_modify.c b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c new file mode 100644 index 000000000000..82307166f789 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Intel Corporation */ +#include "vmlinux.h" +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 128); + __type(key, __u64); + __type(value, __u64); +} hashmap SEC(".maps"); + +static int cb(struct bpf_map *map, __u64 *key, __u64 *val, void *arg) +{ + bpf_map_delete_elem(map, key); + bpf_map_update_elem(map, key, val, 0); + return 0; +} + +SEC("tc") +int test_pkt_access(struct __sk_buff *skb) +{ + (void)skb; + + bpf_for_each_map_elem(&hashmap, cb, NULL, 0); + + return 0; +} -- cgit v1.2.3 From f0007910784a61556e94c42b401a38116a899c73 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Fri, 25 Apr 2025 21:37:10 +0000 Subject: selftests/bpf: Correct typo in __clang_major__ macro Make sure that CAN_USE_BPF_ST test (compute_live_registers/store) is enabled when __clang_major__ >= 18. Fixes: 2ea8f6a1cda7 ("selftests/bpf: test cases for compute_live_registers()") Signed-off-by: Peilin Ye Link: https://lore.kernel.org/r/20250425213712.1542077-1-yepeilin@google.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_misc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 13a2e22f5465..863df7c0fdd0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -221,7 +221,7 @@ #define CAN_USE_GOTOL #endif -#if _clang_major__ >= 18 +#if __clang_major__ >= 18 #define CAN_USE_BPF_ST #endif -- cgit v1.2.3 From bf9de1dcd0eecd16020a677c900a70ea9b0a9714 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Apr 2025 01:37:34 +0300 Subject: selftests: net: bridge_vlan_aware: test untagged/8021p-tagged with and without PVID Recent discussions around commit ad1afb003939 ("vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)") have sparked the question what happens with the DSA (and possibly other switchdev) data path when the bridge says that ports should have no PVID VLAN, but the 8021q module, as the result of a NETDEV_UP event, decides it should add VID 0 to the RX filter of those bridge ports. Do those bridge ports receive packets tagged with VID 0 or not, now? We don't know, there is no test. In the veth realm, this passes trivially, because veth is not VLAN filtering and this, the 8021q module lacks the instinct to add VID 0 in the first place. In the realm of VLAN filtering NICs with no switchdev offload, this should also pass, because the VLAN groups of the software bridge are consulted, where it can clearly be seen that a PVID is missing, even though the packet was initially accepted by the NIC. The test only poses a challenge for switchdev drivers, which usually have to program to hardware both VLANs from RX filtering, as well as from switchdev. Especially when a switchdev port joins a VLAN-aware bridge, it is unavoidable that it gains the NETIF_F_HW_VLAN_CTAG_FILTER feature, i.e. any 8021q uppers that the bridge port may have must also be committed to the RX filtering table of the interface. When a VLAN-tagged packet is physically received by the port, it is initially indistinguishable whether it will reach the bridge data path or the 8021q upper data path. That is rather the final step of the new tests that we introduce. We need to build context up to that stage, which means the following: - we need to test that 802.1p (VID 0) tagged traffic is received in the first place (on bridge ports with a valid PVID). This is the "8021p" test. - we need to test that the usual paths of reaching a configuration with no PVID on a bridge port are all covered and they all reach the same state. Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20250424223734.3096202-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_vlan_aware.sh | 96 +++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 90f8a244ea90..e59fba366a0a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -194,6 +194,100 @@ other_tpid() tc qdisc del dev $h2 clsact } +8021p_do() +{ + local should_fail=$1; shift + local mac=de:ad:be:ef:13:37 + + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err_fail $should_fail $? "802.1p-tagged reception" + + tc filter del dev $h2 ingress pref 1 +} + +8021p() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with the default_pvid, 1, packets tagged with VID 0 are + # accepted. + 8021p_do 0 + + # Test that packets tagged with VID 0 are still accepted after changing + # the default_pvid. + ip link set br0 type bridge vlan_default_pvid 10 + 8021p_do 0 + + log_test "Reception of 802.1p-tagged traffic" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + +send_untagged_and_8021p() +{ + ping_do $h1 192.0.2.2 + check_fail $? + + 8021p_do 1 +} + +drop_untagged() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with no PVID, untagged and 802.1p-tagged traffic is + # dropped. + ip link set br0 type bridge vlan_default_pvid 1 + + # First we reconfigure the default_pvid, 1, as a non-PVID VLAN. + bridge vlan add dev $swp1 vid 1 untagged + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Next we try to delete VID 1 altogether + bridge vlan del dev $swp1 vid 1 + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Set up the bridge without a default_pvid, then check that the 8021q + # module, when the bridge port goes down and then up again, does not + # accidentally re-enable untagged packet reception. + ip link set br0 type bridge vlan_default_pvid 0 + ip link set $swp1 down + ip link set $swp1 up + setup_wait + send_untagged_and_8021p + + # Remove swp1 as a bridge port and let it rejoin the bridge while it + # has no default_pvid. + ip link set $swp1 nomaster + ip link set $swp1 master br0 + send_untagged_and_8021p + + # Restore settings + ip link set br0 type bridge vlan_default_pvid 1 + + log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare -- cgit v1.2.3 From a6e1c5aa16dd5d351603c9d3ae259a069eabdcc2 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:09 -0300 Subject: selftests: tc-testing: Add TDC tests that exercise reentrant enqueue behaviour Add 5 TDC tests that exercise the reentrant enqueue behaviour in drr, ets, qfq, and hfsc: - Test DRR's enqueue reentrant behaviour with netem (which caused a double list add) - Test ETS's enqueue reentrant behaviour with netem (which caused a double list add) - Test QFQ's enqueue reentrant behaviour with netem (which caused a double list add) - Test HFSC's enqueue reentrant behaviour with netem (which caused a UAF) - Test nested DRR's enqueue reentrant behaviour with netem (which caused a double list add) Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-6-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 186 +++++++++++++++++++++ 1 file changed, 186 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index e26bbc169783..0843f6d37e9c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -352,5 +352,191 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "90ec", + "name": "Test DRR's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "drr", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "1f1f", + "name": "Test ETS's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "ets" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root ets bands 2", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 ets quantum 1500", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s class show dev $DUMMY", + "matchJSON": [ + { + "class": "ets", + "handle": "1:1", + "stats": { + "bytes": 196, + "packets": 2 + } + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "5e6d", + "name": "Test QFQ's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root qfq", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 qfq weight 100 maxpkt 1500", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "qfq", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "bf1d", + "name": "Test HFSC's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root hfsc", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2", + "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true", + "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1", + "$TC class del dev $DUMMY classid 1:1" + ], + "cmdUnderTest": "ping -c 1 10.10.10.2 -I$DUMMY > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "hfsc", + "handle": "1:", + "bytes": 392, + "packets": 4 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "7c3b", + "name": "Test nested DRR's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1", + "$TC qdisc add dev $DUMMY handle 2:0 parent 1:1 drr", + "$TC class add dev $DUMMY classid 2:1 parent 2:0 drr", + "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 2:1", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3:0 netem duplicate 100%" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "drr", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] -- cgit v1.2.3 From 730d837979bac203c786f2c5b0707f5426275c0d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:36 +0800 Subject: selftests: ublk: fix UBLK_F_NEED_GET_DATA Commit 57e13a2e8cd2 ("selftests: ublk: support user recovery") starts to support UBLK_F_NEED_GET_DATA for covering recovery feature, however the ublk utility implementation isn't done correctly. Fix it by supporting UBLK_F_NEED_GET_DATA correctly. Also add test generic_07 for covering UBLK_F_NEED_GET_DATA. Reviewed-by: Caleb Sander Mateos Fixes: 57e13a2e8cd2 ("selftests: ublk: support user recovery") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 1 + tools/testing/selftests/ublk/kublk.c | 22 ++++++++++++------- tools/testing/selftests/ublk/kublk.h | 1 + tools/testing/selftests/ublk/test_generic_07.sh | 28 +++++++++++++++++++++++++ tools/testing/selftests/ublk/test_stress_05.sh | 8 +++---- 5 files changed, 48 insertions(+), 12 deletions(-) create mode 100755 tools/testing/selftests/ublk/test_generic_07.sh (limited to 'tools') diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index ec4624a283bc..f34ac0bac696 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -9,6 +9,7 @@ TEST_PROGS += test_generic_03.sh TEST_PROGS += test_generic_04.sh TEST_PROGS += test_generic_05.sh TEST_PROGS += test_generic_06.sh +TEST_PROGS += test_generic_07.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index e57a1486bb48..842b40736a9b 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -536,12 +536,17 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) if (!(io->flags & UBLKSRV_IO_FREE)) return 0; - /* we issue because we need either fetching or committing */ + /* + * we issue because we need either fetching or committing or + * getting data + */ if (!(io->flags & - (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP))) + (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA))) return 0; - if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) + if (io->flags & UBLKSRV_NEED_GET_DATA) + cmd_op = UBLK_U_IO_NEED_GET_DATA; + else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; else if (io->flags & UBLKSRV_NEED_FETCH_RQ) cmd_op = UBLK_U_IO_FETCH_REQ; @@ -658,6 +663,9 @@ static void ublk_handle_cqe(struct io_uring *r, assert(tag < q->q_depth); if (q->tgt_ops->queue_io) q->tgt_ops->queue_io(q, tag); + } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { + io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; + ublk_queue_io_cmd(q, io, tag); } else { /* * COMMIT_REQ will be completed immediately since no fetching @@ -1237,7 +1245,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); - printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g 0|1]\n"); + printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n"); printf("\t[-e 0|1 ] [-i 0|1]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); @@ -1313,7 +1321,7 @@ int main(int argc, char *argv[]) opterr = 0; optind = 2; - while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:az", + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz", longopts, &option_idx)) != -1) { switch (opt) { case 'a': @@ -1351,9 +1359,7 @@ int main(int argc, char *argv[]) ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; break; case 'g': - value = strtol(optarg, NULL, 10); - if (value) - ctx.flags |= UBLK_F_NEED_GET_DATA; + ctx.flags |= UBLK_F_NEED_GET_DATA; break; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 918db5cd633f..44ee1e4ac55b 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -115,6 +115,7 @@ struct ublk_io { #define UBLKSRV_NEED_FETCH_RQ (1UL << 0) #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) #define UBLKSRV_IO_FREE (1UL << 2) +#define UBLKSRV_NEED_GET_DATA (1UL << 3) unsigned short flags; unsigned short refs; /* used by target code only */ diff --git a/tools/testing/selftests/ublk/test_generic_07.sh b/tools/testing/selftests/ublk/test_generic_07.sh new file mode 100755 index 000000000000..cba86451fa5e --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_07.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_07" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_NEED_GET_DATA" + +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -q 2 -g "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? +if [ "$ERR_CODE" -eq 0 ]; then + _mkfs_mount_test /dev/ublkb"${dev_id}" + ERR_CODE=$? +fi + +_cleanup_test "generic" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh index a7071b10224d..88601b48f1cd 100755 --- a/tools/testing/selftests/ublk/test_stress_05.sh +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -47,15 +47,15 @@ _create_backfile 0 256M _create_backfile 1 256M for reissue in $(seq 0 1); do - ublk_io_and_remove 8G -t null -q 4 -g 1 -r 1 -i "$reissue" & - ublk_io_and_remove 256M -t loop -q 4 -g 1 -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & + ublk_io_and_remove 8G -t null -q 4 -g -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & wait done if _have_feature "ZERO_COPY"; then for reissue in $(seq 0 1); do - ublk_io_and_remove 8G -t null -q 4 -g 1 -z -r 1 -i "$reissue" & - ublk_io_and_remove 256M -t loop -q 4 -g 1 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & wait done fi -- cgit v1.2.3 From 8988c4b91945173a6b5505764915d470f0238fdc Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 29 Apr 2025 15:22:18 +0100 Subject: perf tools: Fix in-source libperf build When libperf is built alone in-source, $(OUTPUT) isn't set. This causes the generated uapi path to resolve to '/../arch' which results in a permissions error: mkdir: cannot create directory '/../arch': Permission denied Fix it by removing the preceding '/..' which means that it gets generated either in the tools/lib/perf part of the tree or the OUTPUT folder. Some other rules that rely on OUTPUT further refine this conditionally depending on whether it's an in-source or out-of-source build, but I don't think we need the extra complexity here. And this rule is slightly different to others because the header is needed by both libperf and Perf. This is further complicated by the fact that Perf always passes O=... to libperf even for in source builds, meaning that OUTPUT isn't set consistently between projects. Because we're no longer going one level up to try to generate the file in the tools/ folder, Perf's include rule needs to descend into libperf. Also fix the clean rule while we're here. Reported-by: Thorsten Leemhuis Closes: https://lore.kernel.org/linux-perf-users/7703f88e-ccb7-4c98-9da4-8aad224e780f@leemhuis.info/ Fixes: bfb713ea53c7 ("perf tools: Fix arm64 build by generating unistd_64.h") Signed-off-by: James Clark Tested-by: Thorsten Leemhuis Link: https://lore.kernel.org/r/20250429-james-perf-fix-libperf-in-source-build-v1-1-a1a827ac15e5@linaro.org Signed-off-by: Namhyung Kim --- tools/lib/perf/Makefile | 6 +++--- tools/perf/Makefile.config | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 1a19b5013f45..7fbb50b74c00 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -42,7 +42,7 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) TEST_ARGS := $(if $(V),-v) INCLUDES = \ --I$(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi \ +-I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -100,7 +100,7 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -uapi-asm := $(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi/asm +uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm ifeq ($(SRCARCH),arm64) syscall-y := $(uapi-asm)/unistd_64.h endif @@ -130,7 +130,7 @@ all: fixdep clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \ - $(TESTS_STATIC) $(TESTS_SHARED) + $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y) TESTS_IN = tests-in.o diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a52482654d4b..b7769a22fe1a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -29,7 +29,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated -CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi +CFLAGS += -I$(OUTPUT)libperf/arch/$(SRCARCH)/include/generated/uapi # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) -- cgit v1.2.3 From efa6eb7d77aaf5b05eed25c0ecbf7754cc325c83 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:56 +0300 Subject: selftests: net: tsn_lib: create common helper for counting received packets This snippet will be necessary for a future isochron-based test, so provide a simpler high-level interface for counting the received packets. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-3-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ocelot/psfp.sh | 7 +------ tools/testing/selftests/net/forwarding/tsn_lib.sh | 11 +++++++++++ 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh index bed748dde4b0..f96a4bc7120f 100755 --- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh +++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh @@ -272,12 +272,7 @@ run_test() "" \ "${isochron_dat}" - # Count all received packets by looking at the non-zero RX timestamps - received=$(isochron report \ - --input-file "${isochron_dat}" \ - --printf-format "%u\n" --printf-args "R" | \ - grep -w -v '0' | wc -l) - + received=$(isochron_report_num_received "${isochron_dat}") if [ "${received}" = "${expected}" ]; then RET=0 else diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index b91bcd8008a9..19da1ccceac8 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -247,3 +247,14 @@ isochron_do() cpufreq_restore ${ISOCHRON_CPU} } + +isochron_report_num_received() +{ + local isochron_dat=$1; shift + + # Count all received packets by looking at the non-zero RX timestamps + isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "%u\n" --printf-args "R" | \ + grep -w -v '0' | wc -l +} -- cgit v1.2.3 From f52fe6efd61f54c5cb0e19ef1fde96cf23048a70 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:57 +0300 Subject: selftests: net: tsn_lib: add window_size argument to isochron_do() Make out-of-band testing (send a packet when its traffic class gate is closed, expecting it to be delayed) more predictable by allowing the window size to be customized by isochron_do(). From man isochron-send, the window size alters the advance time (the delta between the transmission time of the packet, and its expected TX time when using SO_TXTIME or tc-taprio on the sender). In absence of the argument, isochron-send defaults to maximizing the advance time (making it equal to the cycle length). The default behavior is exactly what is problematic. An advance time that is too large will make packets intended to be out-of-band still be potentially in-band with an open gate from the schedule's previous cycle. We need to allow that advance time to be reduced. Perhaps a bit confusingly, isochron_do() has a shift_time argument currently, but that does not help here. The shift time shifts both the user space wakeup time and the expected TX time by equal amounts, it is unable of bringing them closer to one another. Set the window size properly for the Ocelot PSFP selftest as well. That used to work due to a very carefully chosen SHIFT_TIME_NS. I've re-tested that the test still works properly. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-4-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ocelot/psfp.sh | 1 + tools/testing/selftests/net/forwarding/tsn_lib.sh | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh index f96a4bc7120f..8972f42dfe03 100755 --- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh +++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh @@ -266,6 +266,7 @@ run_test() "${base_time}" \ "${CYCLE_TIME_NS}" \ "${SHIFT_TIME_NS}" \ + "${GATE_DURATION_NS}" \ "${NUM_PKTS}" \ "${STREAM_VID}" \ "${STREAM_PRIO}" \ diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index 19da1ccceac8..bcee7960a39f 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -182,6 +182,7 @@ isochron_do() local base_time=$1; shift local cycle_time=$1; shift local shift_time=$1; shift + local window_size=$1; shift local num_pkts=$1; shift local vid=$1; shift local priority=$1; shift @@ -212,6 +213,10 @@ isochron_do() extra_args="${extra_args} --shift-time=${shift_time}" fi + if ! [ -z "${window_size}" ]; then + extra_args="${extra_args} --window-size=${window_size}" + fi + if [ "${use_l2}" = "true" ]; then extra_args="${extra_args} --l2 --etype=0xdead ${vid}" receiver_extra_args="--l2 --etype=0xdead" -- cgit v1.2.3 From 4eb9da050f005fbbb7d301e8e99cfdb6e4771a0d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:58 +0300 Subject: selftests: net: tc_taprio: new test Add a forwarding path test for tc-taprio, based on isochron. This is specifically intended for NICs with an offloaded data path (switchdev/DSA) and requires taprio 'flags 2'. Also, $h1 and $h2 must support hardware timestamping, and $h1 tc-etf offload, for isochron to work. Packets received by a switch while the egress port has a taprio schedule with an open gate for the traffic class must be sent right away. Packets received by the switch while the traffic class gate must be delayed until it opens. Packets received by the switch must be dropped if the gate for the traffic class never opens. Packets should pass if the maximum SDU for the traffic class allows it, and should be dropped otherwise. The schedule should auto-update itself if clock jumps take place while taprio is installed. Repeat most of the above tests after forcing two clock jumps, one backwards (in Jan 1970) and one back into the present. Symlink it from tools/testing/selftests/drivers/net/dsa, because usually DSA ports have the same MAC address, and we need STABLE_MAC_ADDRS=yes from its forwarding.config for the test to run successfully. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-5-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/dsa/tc_taprio.sh | 1 + .../testing/selftests/net/forwarding/tc_taprio.sh | 421 +++++++++++++++++++++ tools/testing/selftests/net/forwarding/tsn_lib.sh | 10 + 3 files changed, 432 insertions(+) create mode 120000 tools/testing/selftests/drivers/net/dsa/tc_taprio.sh create mode 100755 tools/testing/selftests/net/forwarding/tc_taprio.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh new file mode 100755 index 000000000000..8992aeabfe0b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh @@ -0,0 +1,421 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" \ + test_clock_jump_backward \ + test_taprio_after_ptp \ + test_max_sdu \ + test_clock_jump_backward_forward \ +" +NUM_NETIFS=4 +source tc_common.sh +source lib.sh +source tsn_lib.sh + +require_command python3 + +# The test assumes the usual topology from the README, where h1 is connected to +# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge. +# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2. +# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to +# swp1 (and both to CLOCK_REALTIME). +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +UDS_ADDRESS_H1="/var/run/ptp4l_h1" +UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1" + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +# Tunables +NUM_PKTS=100 +STREAM_VID=10 +STREAM_PRIO_1=6 +STREAM_PRIO_2=5 +STREAM_PRIO_3=4 +# PTP uses TC 0 +ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2)) +# Use a conservative cycle of 10 ms to allow the test to still pass when the +# kernel has some extra overhead like lockdep etc +CYCLE_TIME_NS=10000000 +# Create two Gate Control List entries, one OPEN and one CLOSE, of equal +# durations +GATE_DURATION_NS=$((CYCLE_TIME_NS / 2)) +# Give 2/3 of the cycle time to user space and 1/3 to the kernel +FUDGE_FACTOR=$((CYCLE_TIME_NS / 3)) +# Shift the isochron base time by half the gate time, so that packets are +# always received by swp1 close to the middle of the time slot, to minimize +# inaccuracies due to network sync +SHIFT_TIME_NS=$((GATE_DURATION_NS / 2)) + +path_delay= + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +switch_create() +{ + local h2_mac_addr=$(mac_get $h2) + + ip link set $swp1 up + ip link set $swp2 up + + ip link add br0 type bridge vlan_filtering 1 + ip link set $swp1 master br0 + ip link set $swp2 master br0 + ip link set br0 up + + bridge vlan add dev $swp2 vid $STREAM_VID + bridge vlan add dev $swp1 vid $STREAM_VID + bridge fdb add dev $swp2 \ + $h2_mac_addr vlan $STREAM_VID static master +} + +switch_destroy() +{ + ip link del br0 +} + +ptp_setup() +{ + # Set up swp1 as a master PHC for h1, synchronized to the local + # CLOCK_REALTIME. + phc2sys_start $UDS_ADDRESS_SWP1 + ptp4l_start $h1 true $UDS_ADDRESS_H1 + ptp4l_start $swp1 false $UDS_ADDRESS_SWP1 +} + +ptp_cleanup() +{ + ptp4l_stop $swp1 + ptp4l_stop $h1 + phc2sys_stop +} + +txtime_setup() +{ + local if_name=$1 + + tc qdisc add dev $if_name clsact + # Classify PTP on TC 7 and isochron on TC 6 + tc filter add dev $if_name egress protocol 0x88f7 \ + flower action skbedit priority 7 + tc filter add dev $if_name egress protocol 802.1Q \ + flower vlan_ethtype 0xdead action skbedit priority 6 + tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + hw 1 + # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1. + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR +} + +txtime_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name clsact + tc qdisc del dev $if_name root +} + +taprio_replace() +{ + local if_name="$1"; shift + local extra_args="$1"; shift + + # STREAM_PRIO_1 always has an open gate. + # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time) + # STREAM_PRIO_3 always has a closed gate. + tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \ + sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \ + base-time 0 flags 0x2 $extra_args + taprio_wait_for_admin $if_name +} + +taprio_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name root +} + +probe_path_delay() +{ + local isochron_dat="$(mktemp)" + local received + + log_info "Probing path delay" + + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \ + "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \ + "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + if [ "$received" != "$NUM_PKTS" ]; then + echo "Cannot establish basic data path between $h1 and $h2" + exit $ksft_fail + fi + + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(np.max(w))) + EOF + path_delay=$(python3 ./isochron_postprocess.py) + + log_info "Path delay from $h1 to $h2 estimated at $path_delay ns" + + if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then + echo "Path delay larger than gate duration, aborting" + exit $ksft_fail + fi + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create + + txtime_setup $h1 + + # Temporarily set up PTP just to probe the end-to-end path delay. + ptp_setup + probe_path_delay + ptp_cleanup +} + +cleanup() +{ + pre_cleanup + + isochron_recv_stop + txtime_cleanup $h1 + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +run_test() +{ + local base_time=$1; shift + local stream_prio=$1; shift + local expected_delay=$1; shift + local should_fail=$1; shift + local test_name=$1; shift + local isochron_dat="$(mktemp)" + local received + local median_delay + + RET=0 + + # Set the shift time equal to the cycle time, which effectively + # cancels the default advance time. Packets won't be sent early in + # software, which ensures that they won't prematurely enter through + # the open gate in __test_out_of_band(). Also, the gate is open for + # long enough that this won't cause a problem in __test_in_band(). + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \ + "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \ + "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + [ "$received" = "$NUM_PKTS" ] + check_err_fail $should_fail $? "Reception of $NUM_PKTS packets" + + if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(int(np.median(w)))) + EOF + median_delay=$(python3 ./isochron_postprocess.py) + + # If the condition below is true, packets were delayed by a closed gate + [ "$median_delay" -gt $((path_delay + expected_delay)) ] + check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay" + + # If the condition below is true, packets were sent expecting them to + # hit a closed gate in the switch, but were not delayed + [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ] + check_fail $? "Median delay $median_delay is less than expected delay $expected_delay" + fi + + log_test "$test_name" + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +__test_always_open() +{ + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open" +} + +__test_always_closed() +{ + run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed" +} + +__test_in_band() +{ + # Send packets in-band with the OPEN gate entry + run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate" +} + +__test_out_of_band() +{ + # Send packets in-band with the CLOSE gate entry + run_test 0.005000000 $STREAM_PRIO_2 \ + $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \ + "Out of band with gate" +} + +run_subtests() +{ + __test_always_open + __test_always_closed + __test_in_band + __test_out_of_band +} + +test_taprio_after_ptp() +{ + log_info "Setting up taprio after PTP" + ptp_setup + taprio_replace $swp2 + run_subtests + taprio_cleanup $swp2 + ptp_cleanup +} + +__test_under_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0" + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU" +} + +__test_over_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0" + run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU" +} + +test_max_sdu() +{ + ptp_setup + __test_under_max_sdu + __test_over_max_sdu + taprio_cleanup $swp2 + ptp_cleanup +} + +# Perform a clock jump in the past without synchronization running, so that the +# time base remains where it was set by phc_ctl. +test_clock_jump_backward() +{ + # This is a more complex schedule specifically crafted in a way that + # has been problematic on NXP LS1028A. Not much to test with it other + # than the fact that it passes traffic. + tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \ + base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \ + sched-entry S 20 300000 sched-entry S 48 200000 \ + sched-entry S 20 300000 sched-entry S 83 200000 \ + sched-entry S 40 300000 sched-entry S 00 200000 flags 2 + + log_info "Forcing a backward clock jump" + phc_ctl $swp1 set 0 + + ping_test $h1 192.0.2.2 + taprio_cleanup $swp2 +} + +# Test that taprio tolerates clock jumps. +# Since ptp4l and phc2sys are running, it is expected for the time to +# eventually recover (through yet another clock jump). Isochron waits +# until that is the case. +test_clock_jump_backward_forward() +{ + log_info "Forcing a backward and a forward clock jump" + taprio_replace $swp2 + phc_ctl $swp1 set 0 + ptp_setup + ping_test $h1 192.0.2.2 + run_subtests + ptp_cleanup + taprio_cleanup $swp2 +} + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_test_skip "Could not test offloaded functionality" + exit $EXIT_STATUS +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index bcee7960a39f..08c044ff6689 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright 2021-2022 NXP +tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts + REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes} REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes} @@ -18,6 +20,7 @@ fi if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then require_command phc2sys require_command ptp4l + require_command phc_ctl fi phc2sys_start() @@ -263,3 +266,10 @@ isochron_report_num_received() --printf-format "%u\n" --printf-args "R" | \ grep -w -v '0' | wc -l } + +taprio_wait_for_admin() +{ + local if_name="$1"; shift + + "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name" +} -- cgit v1.2.3