diff options
Diffstat (limited to 'tools/testing/selftests')
20 files changed, 1601 insertions, 193 deletions
| diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 16f5d74ae762..d6534d7301a2 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -380,10 +380,11 @@ static bool reclaim_until(const char *memcg, long goal);   *   * Then it checks actual memory usages and expects that:   * A/B    memory.current ~= 50M - * A/B/C  memory.current ~= 29M - * A/B/D  memory.current ~= 21M - * A/B/E  memory.current ~= 0 - * A/B/F  memory.current  = 0 + * A/B/C  memory.current ~= 29M [memory.events:low > 0] + * A/B/D  memory.current ~= 21M [memory.events:low > 0] + * A/B/E  memory.current ~= 0   [memory.events:low == 0 if !memory_recursiveprot, + *				 undefined otherwise] + * A/B/F  memory.current  = 0   [memory.events:low == 0]   * (for origin of the numbers, see model in memcg_protection.m.)   *   * After that it tries to allocate more than there is @@ -495,10 +496,10 @@ static int test_memcg_protection(const char *root, bool min)  	for (i = 0; i < ARRAY_SIZE(children); i++)  		c[i] = cg_read_long(children[i], "memory.current"); -	if (!values_close(c[0], MB(29), 10)) +	if (!values_close(c[0], MB(29), 15))  		goto cleanup; -	if (!values_close(c[1], MB(21), 10)) +	if (!values_close(c[1], MB(21), 20))  		goto cleanup;  	if (c[3] != 0) @@ -525,7 +526,14 @@ static int test_memcg_protection(const char *root, bool min)  		goto cleanup;  	} +	/* +	 * Child 2 has memory.low=0, but some low protection may still be +	 * distributed down from its parent with memory.low=50M if cgroup2 +	 * memory_recursiveprot mount option is enabled. Ignore the low +	 * event count in this case. +	 */  	for (i = 0; i < ARRAY_SIZE(children); i++) { +		int ignore_low_events_index = has_recursiveprot ? 2 : -1;  		int no_low_events_index = 1;  		long low, oom; @@ -534,6 +542,8 @@ static int test_memcg_protection(const char *root, bool min)  		if (oom)  			goto cleanup; +		if (i == ignore_low_events_index) +			continue;  		if (i <= no_low_events_index && low <= 0)  			goto cleanup;  		if (i > no_low_events_index && low) diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index ecbf07afc6dd..ff21524be458 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -3,7 +3,7 @@  TEST_GEN_FILES += access_memory access_memory_even -TEST_FILES = _chk_dependency.sh _damon_sysfs.py +TEST_FILES = _damon_sysfs.py  # functionality tests  TEST_PROGS += sysfs.sh diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh deleted file mode 100644 index dda3a87dc00a..000000000000 --- a/tools/testing/selftests/damon/_chk_dependency.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -DBGFS=$(grep debugfs /proc/mounts --max-count 1 | awk '{print $2}') -if [ "$DBGFS" = "" ] -then -	echo "debugfs not mounted" -	exit $ksft_skip -fi - -DBGFS+="/damon" - -if [ $EUID -ne 0 ]; -then -	echo "Run as root" -	exit $ksft_skip -fi - -if [ ! -d "$DBGFS" ] -then -	echo "$DBGFS not found" -	exit $ksft_skip -fi - -if [ -f "$DBGFS/monitor_on_DEPRECATED" ] -then -	monitor_on_file="monitor_on_DEPRECATED" -else -	monitor_on_file="monitor_on" -fi - -for f in attrs target_ids "$monitor_on_file" -do -	if [ ! -f "$DBGFS/$f" ] -	then -		echo "$f not found" -		exit 1 -	fi -done - -permission_error="Operation not permitted" -for f in attrs target_ids "$monitor_on_file" -do -	status=$( cat "$DBGFS/$f" 2>&1 ) -	if [ "${status#*$permission_error}" != "$status" ]; then -		echo "Permission for reading $DBGFS/$f denied; maybe secureboot enabled?" -		exit $ksft_skip -	fi -done diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 6e136dc3df19..1e587e0b1a39 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -420,11 +420,16 @@ class Kdamond:                  tried_regions = []                  tried_regions_dir = os.path.join(                          scheme.sysfs_dir(), 'tried_regions') +                region_indices = []                  for filename in os.listdir(                          os.path.join(scheme.sysfs_dir(), 'tried_regions')):                      tried_region_dir = os.path.join(tried_regions_dir, filename)                      if not os.path.isdir(tried_region_dir):                          continue +                    region_indices.append(int(filename)) +                for region_idx in sorted(region_indices): +                    tried_region_dir = os.path.join(tried_regions_dir, +                                                    '%d' % region_idx)                      region_values = []                      for f in ['start', 'end', 'nr_accesses', 'age']:                          content, err = read_file( diff --git a/tools/testing/selftests/damon/_debugfs_common.sh b/tools/testing/selftests/damon/_debugfs_common.sh deleted file mode 100644 index 54d45791b0d9..000000000000 --- a/tools/testing/selftests/damon/_debugfs_common.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -test_write_result() { -	file=$1 -	content=$2 -	orig_content=$3 -	expect_reason=$4 -	expected=$5 - -	if [ "$expected" = "0" ] -	then -		echo "$content" > "$file" -	else -		echo "$content" > "$file" 2> /dev/null -	fi -	if [ $? -ne "$expected" ] -	then -		echo "writing $content to $file doesn't return $expected" -		echo "expected because: $expect_reason" -		echo "$orig_content" > "$file" -		exit 1 -	fi -} - -test_write_succ() { -	test_write_result "$1" "$2" "$3" "$4" 0 -} - -test_write_fail() { -	test_write_result "$1" "$2" "$3" "$4" 1 -} - -test_content() { -	file=$1 -	orig_content=$2 -	expected=$3 -	expect_reason=$4 - -	content=$(cat "$file") -	if [ "$content" != "$expected" ] -	then -		echo "reading $file expected $expected but $content" -		echo "expected because: $expect_reason" -		echo "$orig_content" > "$file" -		exit 1 -	fi -} - -source ./_chk_dependency.sh - -damon_onoff="$DBGFS/monitor_on" -if [ -f "$DBGFS/monitor_on_DEPRECATED" ] -then -	damon_onoff="$DBGFS/monitor_on_DEPRECATED" -else -	damon_onoff="$DBGFS/monitor_on" -fi - -if [ $(cat "$damon_onoff") = "on" ] -then -	echo "monitoring is on" -	exit $ksft_skip -fi diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c index 85acb4e3ef00..72d51ad0ee0e 100644 --- a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c +++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c @@ -50,7 +50,7 @@ TEST(eventfd_check_flag_rdwr)  	ASSERT_GE(fd, 0);  	flags = fcntl(fd, F_GETFL); -	// since the kernel automatically added O_RDWR. +	// The kernel automatically adds the O_RDWR flag.  	EXPECT_EQ(flags, O_RDWR);  	close(fd); @@ -85,7 +85,7 @@ TEST(eventfd_check_flag_nonblock)  	close(fd);  } -TEST(eventfd_chek_flag_cloexec_and_nonblock) +TEST(eventfd_check_flag_cloexec_and_nonblock)  {  	int fd, flags; @@ -178,8 +178,7 @@ TEST(eventfd_check_flag_semaphore)  	// The semaphore could only be obtained from fdinfo.  	ret = verify_fdinfo(fd, &err, "eventfd-semaphore: ", 19, "1\n");  	if (ret != 0) -		ksft_print_msg("eventfd-semaphore check failed, msg: %s\n", -				err.msg); +		ksft_print_msg("eventfd semaphore flag check failed: %s\n", err.msg);  	EXPECT_EQ(ret, 0);  	close(fd); diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index c5241b193db8..824266982aa3 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -20,6 +20,7 @@ mremap_test  on-fault-limit  transhuge-stress  pagemap_ioctl +pfnmap  *.tmp*  protection_keys  protection_keys_32 @@ -58,3 +59,4 @@ hugetlb_dio  pkey_sighandler_tests_32  pkey_sighandler_tests_64  guard-regions +merge diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 8270895039d1..ae6f994d3add 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -84,6 +84,7 @@ TEST_GEN_FILES += mremap_test  TEST_GEN_FILES += mseal_test  TEST_GEN_FILES += on-fault-limit  TEST_GEN_FILES += pagemap_ioctl +TEST_GEN_FILES += pfnmap  TEST_GEN_FILES += thuge-gen  TEST_GEN_FILES += transhuge-stress  TEST_GEN_FILES += uffd-stress @@ -98,6 +99,7 @@ TEST_GEN_FILES += hugetlb_madv_vs_map  TEST_GEN_FILES += hugetlb_dio  TEST_GEN_FILES += droppable  TEST_GEN_FILES += guard-regions +TEST_GEN_FILES += merge  ifneq ($(ARCH),arm64)  TEST_GEN_FILES += soft-dirty diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index eba43ead13ae..0cd9d236649d 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -8,6 +8,7 @@  #include <fcntl.h>  #include <linux/limits.h>  #include <linux/userfaultfd.h> +#include <linux/fs.h>  #include <setjmp.h>  #include <signal.h>  #include <stdbool.h> @@ -2075,4 +2076,60 @@ TEST_F(guard_regions, pagemap)  	ASSERT_EQ(munmap(ptr, 10 * page_size), 0);  } +/* + * Assert that PAGEMAP_SCAN correctly reports guard region ranges. + */ +TEST_F(guard_regions, pagemap_scan) +{ +	const unsigned long page_size = self->page_size; +	struct page_region pm_regs[10]; +	struct pm_scan_arg pm_scan_args = { +		.size = sizeof(struct pm_scan_arg), +		.category_anyof_mask = PAGE_IS_GUARD, +		.return_mask = PAGE_IS_GUARD, +		.vec = (long)&pm_regs, +		.vec_len = ARRAY_SIZE(pm_regs), +	}; +	int proc_fd, i; +	char *ptr; + +	proc_fd = open("/proc/self/pagemap", O_RDONLY); +	ASSERT_NE(proc_fd, -1); + +	ptr = mmap_(self, variant, NULL, 10 * page_size, +		    PROT_READ | PROT_WRITE, 0, 0); +	ASSERT_NE(ptr, MAP_FAILED); + +	pm_scan_args.start = (long)ptr; +	pm_scan_args.end = (long)ptr + 10 * page_size; +	ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 0); +	ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size); + +	/* Install a guard region in every other page. */ +	for (i = 0; i < 10; i += 2) { +		char *ptr_p = &ptr[i * page_size]; + +		ASSERT_EQ(syscall(__NR_madvise, ptr_p, page_size, MADV_GUARD_INSTALL), 0); +	} + +	/* +	 * Assert ioctl() returns the count of located regions, where each +	 * region spans every other page within the range of 10 pages. +	 */ +	ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 5); +	ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size); + +	/* Re-read from pagemap, and assert guard regions are detected. */ +	for (i = 0; i < 5; i++) { +		long ptr_p = (long)&ptr[2 * i * page_size]; + +		ASSERT_EQ(pm_regs[i].start, ptr_p); +		ASSERT_EQ(pm_regs[i].end, ptr_p + page_size); +		ASSERT_EQ(pm_regs[i].categories, PAGE_IS_GUARD); +	} + +	ASSERT_EQ(close(proc_fd), 0); +	ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} +  TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index 0b0d4ba1af27..0dd31892ff67 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -36,7 +36,7 @@ else      do_umount=1    fi  fi -MNT='/mnt/huge/' +MNT='/mnt/huge'  function get_machine_hugepage_size() {    hpz=$(grep -i hugepagesize /proc/meminfo) @@ -56,10 +56,45 @@ function cleanup() {    rmdir "$CGROUP_ROOT"/a/b 2>/dev/null    rmdir "$CGROUP_ROOT"/a 2>/dev/null    rmdir "$CGROUP_ROOT"/test1 2>/dev/null -  echo 0 >/proc/sys/vm/nr_hugepages +  echo $nr_hugepgs >/proc/sys/vm/nr_hugepages    set -e  } +function assert_with_retry() { +  local actual_path="$1" +  local expected="$2" +  local tolerance=$((7 * 1024 * 1024)) +  local timeout=20 +  local interval=1 +  local start_time +  local now +  local elapsed +  local actual + +  start_time=$(date +%s) + +  while true; do +    actual="$(cat "$actual_path")" + +    if [[ $actual -ge $(($expected - $tolerance)) ]] && +        [[ $actual -le $(($expected + $tolerance)) ]]; then +      return 0 +    fi + +    now=$(date +%s) +    elapsed=$((now - start_time)) + +    if [[ $elapsed -ge $timeout ]]; then +      echo "actual = $((${actual%% *} / 1024 / 1024)) MB" +      echo "expected = $((${expected%% *} / 1024 / 1024)) MB" +      cleanup +      exit 1 +    fi + +    sleep $interval +  done +} +  function assert_state() {    local expected_a="$1"    local expected_a_hugetlb="$2" @@ -70,58 +105,13 @@ function assert_state() {      expected_b="$3"      expected_b_hugetlb="$4"    fi -  local tolerance=$((5 * 1024 * 1024)) - -  local actual_a -  actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)" -  if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] || -    [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then -    echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB -    echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB -    echo fail - -    cleanup -    exit 1 -  fi - -  local actual_a_hugetlb -  actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)" -  if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] || -    [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then -    echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB -    echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB -    echo fail - -    cleanup -    exit 1 -  fi - -  if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then -    return -  fi - -  local actual_b -  actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)" -  if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] || -    [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then -    echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB -    echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB -    echo fail - -    cleanup -    exit 1 -  fi -  local actual_b_hugetlb -  actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)" -  if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] || -    [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then -    echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB -    echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB -    echo fail +  assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a" +  assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb" -    cleanup -    exit 1 +  if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then +    assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b" +    assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb"    fi  } @@ -175,7 +165,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.  cleanup  echo -echo  echo Test charge, rmdir, uncharge  setup  echo mkdir @@ -195,7 +184,6 @@ cleanup  echo done  echo -echo  if [[ ! $cgroup2 ]]; then    echo "Test parent and child hugetlb usage"    setup @@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then    assert_state 0 $(($size * 2)) 0 $size    rmdir "$CGROUP_ROOT"/a/b -  sleep 5    echo Assert memory reparent correctly.    assert_state 0 $(($size * 2)) @@ -225,7 +212,6 @@ if [[ ! $cgroup2 ]]; then  fi  echo -echo  echo "Test child only hugetlb usage"  echo setup  setup diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c index d53de2486080..1e9980b8993c 100644 --- a/tools/testing/selftests/mm/map_fixed_noreplace.c +++ b/tools/testing/selftests/mm/map_fixed_noreplace.c @@ -96,7 +96,7 @@ int main(void)  		ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n");  	}  	ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); -	ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); +	ksft_test_result_pass("Second mmap() 5*PAGE_SIZE at base\n");  	/*  	 * Second mapping contained within first: diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c new file mode 100644 index 000000000000..c76646cdf6e6 --- /dev/null +++ b/tools/testing/selftests/mm/merge.c @@ -0,0 +1,455 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include "vm_util.h" + +FIXTURE(merge) +{ +	unsigned int page_size; +	char *carveout; +	struct procmap_fd procmap; +}; + +FIXTURE_SETUP(merge) +{ +	self->page_size = psize(); +	/* Carve out PROT_NONE region to map over. */ +	self->carveout = mmap(NULL, 12 * self->page_size, PROT_NONE, +			      MAP_ANON | MAP_PRIVATE, -1, 0); +	ASSERT_NE(self->carveout, MAP_FAILED); +	/* Setup PROCMAP_QUERY interface. */ +	ASSERT_EQ(open_self_procmap(&self->procmap), 0); +} + +FIXTURE_TEARDOWN(merge) +{ +	ASSERT_EQ(munmap(self->carveout, 12 * self->page_size), 0); +	ASSERT_EQ(close_procmap(&self->procmap), 0); +} + +TEST_F(merge, mprotect_unfaulted_left) +{ +	unsigned int page_size = self->page_size; +	char *carveout = self->carveout; +	struct procmap_fd *procmap = &self->procmap; +	char *ptr; + +	/* +	 * Map 10 pages of R/W memory within. MAP_NORESERVE so we don't hit +	 * merge failure due to lack of VM_ACCOUNT flag by mistake. +	 * +	 * |-----------------------| +	 * |       unfaulted       | +	 * |-----------------------| +	 */ +	ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE, +		   MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); +	ASSERT_NE(ptr, MAP_FAILED); +	/* +	 * Now make the first 5 pages read-only, splitting the VMA: +	 * +	 *      RO          RW +	 * |-----------|-----------| +	 * | unfaulted | unfaulted | +	 * |-----------|-----------| +	 */ +	ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0); +	/* +	 * Fault in the first of the last 5 pages so it gets an anon_vma and +	 * thus the whole VMA becomes 'faulted': +	 * +	 *      RO          RW +	 * |-----------|-----------| +	 * | unfaulted |  faulted  | +	 * |-----------|-----------| +	 */ +	ptr[5 * page_size] = 'x'; +	/* +	 * Now mprotect() the RW region read-only, we should merge (though for +	 * ~15 years we did not! :): +	 * +	 *             RO +	 * |-----------------------| +	 * |        faulted        | +	 * |-----------------------| +	 */ +	ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0); + +	/* Assert that the merge succeeded using PROCMAP_QUERY. */ +	ASSERT_TRUE(find_vma_procmap(procmap, ptr)); +	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); +	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); +} + +TEST_F(merge, mprotect_unfaulted_right) +{ +	unsigned int page_size = self->page_size; +	char *carveout = self->carveout; +	struct procmap_fd *procmap = &self->procmap; +	char *ptr; + +	/* +	 * |-----------------------| +	 * |       unfaulted       | +	 * |-----------------------| +	 */ +	ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE, +		   MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); +	ASSERT_NE(ptr, MAP_FAILED); +	/* +	 * Now make the last 5 pages read-only, splitting the VMA: +	 * +	 *      RW          RO +	 * |-----------|-----------| +	 * | unfaulted | unfaulted | +	 * |-----------|-----------| +	 */ +	ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0); +	/* +	 * Fault in the first of the first 5 pages so it gets an anon_vma and +	 * thus the whole VMA becomes 'faulted': +	 * +	 *      RW          RO +	 * |-----------|-----------| +	 * |  faulted  | unfaulted | +	 * |-----------|-----------| +	 */ +	ptr[0] = 'x'; +	/* +	 * Now mprotect() the RW region read-only, we should merge: +	 * +	 *             RO +	 * |-----------------------| +	 * |        faulted        | +	 * |-----------------------| +	 */ +	ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0); + +	/* Assert that the merge succeeded using PROCMAP_QUERY. */ +	ASSERT_TRUE(find_vma_procmap(procmap, ptr)); +	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); +	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); +} + +TEST_F(merge, mprotect_unfaulted_both) +{ +	unsigned int page_size = self->page_size; +	char *carveout = self->carveout; +	struct procmap_fd *procmap = &self->procmap; +	char *ptr; + +	/* +	 * |-----------------------| +	 * |       unfaulted       | +	 * |-----------------------| +	 */ +	ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, +		   MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); +	ASSERT_NE(ptr, MAP_FAILED); +	/* +	 * Now make the first and last 3 pages read-only, splitting the VMA: +	 * +	 *      RO          RW          RO +	 * |-----------|-----------|-----------| +	 * | unfaulted | unfaulted | unfaulted | +	 * |-----------|-----------|-----------| +	 */ +	ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); +	ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); +	/* +	 * Fault in the first of the middle 3 pages so it gets an anon_vma and +	 * thus the whole VMA becomes 'faulted': +	 * +	 *      RO          RW          RO +	 * |-----------|-----------|-----------| +	 * | unfaulted |  faulted  | unfaulted | +	 * |-----------|-----------|-----------| +	 */ +	ptr[3 * page_size] = 'x'; +	/* +	 * Now mprotect() the RW region read-only, we should merge: +	 * +	 *             RO +	 * |-----------------------| +	 * |        faulted        | +	 * |-----------------------| +	 */ +	ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); + +	/* Assert that the merge succeeded using PROCMAP_QUERY. */ +	ASSERT_TRUE(find_vma_procmap(procmap, ptr)); +	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); +	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); +} + +TEST_F(merge, mprotect_faulted_left_unfaulted_right) +{ +	unsigned int page_size = self->page_size; +	char *carveout = self->carveout; +	struct procmap_fd *procmap = &self->procmap; +	char *ptr; + +	/* +	 * |-----------------------| +	 * |       unfaulted       | +	 * |-----------------------| +	 */ +	ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, +		   MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); +	ASSERT_NE(ptr, MAP_FAILED); +	/* +	 * Now make the last 3 pages read-only, splitting the VMA: +	 * +	 *             RW               RO +	 * |-----------------------|-----------| +	 * |       unfaulted       | unfaulted | +	 * |-----------------------|-----------| +	 */ +	ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); +	/* +	 * Fault in the first of the first 6 pages so it gets an anon_vma and +	 * thus the whole VMA becomes 'faulted': +	 * +	 *             RW               RO +	 * |-----------------------|-----------| +	 * |       unfaulted       | unfaulted | +	 * |-----------------------|-----------| +	 */ +	ptr[0] = 'x'; +	/* +	 * Now make the first 3 pages read-only, splitting the VMA: +	 * +	 *      RO          RW          RO +	 * |-----------|-----------|-----------| +	 * |  faulted  |  faulted  | unfaulted | +	 * |-----------|-----------|-----------| +	 */ +	ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); +	/* +	 * Now mprotect() the RW region read-only, we should merge: +	 * +	 *             RO +	 * |-----------------------| +	 * |        faulted        | +	 * |-----------------------| +	 */ +	ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); + +	/* Assert that the merge succeeded using PROCMAP_QUERY. */ +	ASSERT_TRUE(find_vma_procmap(procmap, ptr)); +	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); +	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); +} + +TEST_F(merge, mprotect_unfaulted_left_faulted_right) +{ +	unsigned int page_size = self->page_size; +	char *carveout = self->carveout; +	struct procmap_fd *procmap = &self->procmap; +	char *ptr; + +	/* +	 * |-----------------------| +	 * |       unfaulted       | +	 * |-----------------------| +	 */ +	ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, +		   MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); +	ASSERT_NE(ptr, MAP_FAILED); +	/* +	 * Now make the first 3 pages read-only, splitting the VMA: +	 * +	 *      RO                RW +	 * |-----------|-----------------------| +	 * | unfaulted |       unfaulted       | +	 * |-----------|-----------------------| +	 */ +	ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); +	/* +	 * Fault in the first of the last 6 pages so it gets an anon_vma and +	 * thus the whole VMA becomes 'faulted': +	 * +	 *      RO                RW +	 * |-----------|-----------------------| +	 * | unfaulted |        faulted        | +	 * |-----------|-----------------------| +	 */ +	ptr[3 * page_size] = 'x'; +	/* +	 * Now make the last 3 pages read-only, splitting the VMA: +	 * +	 *      RO          RW          RO +	 * |-----------|-----------|-----------| +	 * | unfaulted |  faulted  |  faulted  | +	 * |-----------|-----------|-----------| +	 */ +	ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); +	/* +	 * Now mprotect() the RW region read-only, we should merge: +	 * +	 *             RO +	 * |-----------------------| +	 * |        faulted        | +	 * |-----------------------| +	 */ +	ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); + +	/* Assert that the merge succeeded using PROCMAP_QUERY. */ +	ASSERT_TRUE(find_vma_procmap(procmap, ptr)); +	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); +	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); +} + +TEST_F(merge, forked_target_vma) +{ +	unsigned int page_size = self->page_size; +	char *carveout = self->carveout; +	struct procmap_fd *procmap = &self->procmap; +	pid_t pid; +	char *ptr, *ptr2; +	int i; + +	/* +	 * |-----------| +	 * | unfaulted | +	 * |-----------| +	 */ +	ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, +		   MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); +	ASSERT_NE(ptr, MAP_FAILED); + +	/* +	 * Fault in process. +	 * +	 * |-----------| +	 * |  faulted  | +	 * |-----------| +	 */ +	ptr[0] = 'x'; + +	pid = fork(); +	ASSERT_NE(pid, -1); + +	if (pid != 0) { +		wait(NULL); +		return; +	} + +	/* Child process below: */ + +	/* Reopen for child. */ +	ASSERT_EQ(close_procmap(&self->procmap), 0); +	ASSERT_EQ(open_self_procmap(&self->procmap), 0); + +	/* unCOWing everything does not cause the AVC to go away. */ +	for (i = 0; i < 5 * page_size; i += page_size) +		ptr[i] = 'x'; + +	/* +	 * Map in adjacent VMA in child. +	 * +	 *     forked +	 * |-----------|-----------| +	 * |  faulted  | unfaulted | +	 * |-----------|-----------| +	 *      ptr         ptr2 +	 */ +	ptr2 = mmap(&ptr[5 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, +		   MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); +	ASSERT_NE(ptr2, MAP_FAILED); + +	/* Make sure not merged. */ +	ASSERT_TRUE(find_vma_procmap(procmap, ptr)); +	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); +	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 5 * page_size); +} + +TEST_F(merge, forked_source_vma) +{ +	unsigned int page_size = self->page_size; +	char *carveout = self->carveout; +	struct procmap_fd *procmap = &self->procmap; +	pid_t pid; +	char *ptr, *ptr2; +	int i; + +	/* +	 * |-----------|------------| +	 * | unfaulted | <unmapped> | +	 * |-----------|------------| +	 */ +	ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, +		   MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); +	ASSERT_NE(ptr, MAP_FAILED); + +	/* +	 * Fault in process. +	 * +	 * |-----------|------------| +	 * |  faulted  | <unmapped> | +	 * |-----------|------------| +	 */ +	ptr[0] = 'x'; + +	pid = fork(); +	ASSERT_NE(pid, -1); + +	if (pid != 0) { +		wait(NULL); +		return; +	} + +	/* Child process below: */ + +	/* Reopen for child. */ +	ASSERT_EQ(close_procmap(&self->procmap), 0); +	ASSERT_EQ(open_self_procmap(&self->procmap), 0); + +	/* unCOWing everything does not cause the AVC to go away. */ +	for (i = 0; i < 5 * page_size; i += page_size) +		ptr[i] = 'x'; + +	/* +	 * Map in adjacent VMA in child, ptr2 after ptr, but incompatible. +	 * +	 *   forked RW      RWX +	 * |-----------|-----------| +	 * |  faulted  | unfaulted | +	 * |-----------|-----------| +	 *      ptr        ptr2 +	 */ +	ptr2 = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC, +		   MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); +	ASSERT_NE(ptr2, MAP_FAILED); + +	/* Make sure not merged. */ +	ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); +	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); +	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); + +	/* +	 * Now mprotect forked region to RWX so it becomes the source for the +	 * merge to unfaulted region: +	 * +	 *  forked RWX      RWX +	 * |-----------|-----------| +	 * |  faulted  | unfaulted | +	 * |-----------|-----------| +	 *      ptr         ptr2 +	 * +	 * This should NOT result in a merge, as ptr was forked. +	 */ +	ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC), 0); +	/* Again, make sure not merged. */ +	ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); +	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); +	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 57b4bba2b45f..b07acc86f4f0 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -34,7 +34,7 @@  #define PAGEMAP "/proc/self/pagemap"  int pagemap_fd;  int uffd; -unsigned int page_size; +unsigned long page_size;  unsigned int hpage_size;  const char *progname; @@ -112,7 +112,7 @@ int init_uffd(void)  	return 0;  } -int wp_init(void *lpBaseAddress, int dwRegionSize) +int wp_init(void *lpBaseAddress, long dwRegionSize)  {  	struct uffdio_register uffdio_register;  	struct uffdio_writeprotect wp; @@ -136,7 +136,7 @@ int wp_init(void *lpBaseAddress, int dwRegionSize)  	return 0;  } -int wp_free(void *lpBaseAddress, int dwRegionSize) +int wp_free(void *lpBaseAddress, long dwRegionSize)  {  	struct uffdio_register uffdio_register; @@ -184,7 +184,7 @@ void *gethugetlb_mem(int size, int *shmid)  int userfaultfd_tests(void)  { -	int mem_size, vec_size, written, num_pages = 16; +	long mem_size, vec_size, written, num_pages = 16;  	char *mem, *vec;  	mem_size = num_pages * page_size; @@ -213,7 +213,7 @@ int userfaultfd_tests(void)  	written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,  				vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);  	if (written < 0) -		ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); +		ksft_exit_fail_msg("error %ld %d %s\n", written, errno, strerror(errno));  	ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__); @@ -995,7 +995,7 @@ int unmapped_region_tests(void)  {  	void *start = (void *)0x10000000;  	int written, len = 0x00040000; -	int vec_size = len / page_size; +	long vec_size = len / page_size;  	struct page_region *vec = malloc(sizeof(struct page_region) * vec_size);  	/* 1. Get written pages */ @@ -1051,7 +1051,7 @@ static void test_simple(void)  int sanity_tests(void)  {  	unsigned long long mem_size, vec_size; -	int ret, fd, i, buf_size; +	long ret, fd, i, buf_size;  	struct page_region *vec;  	char *mem, *fmem;  	struct stat sbuf; @@ -1160,7 +1160,7 @@ int sanity_tests(void)  	ret = stat(progname, &sbuf);  	if (ret < 0) -		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); +		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));  	fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);  	if (fmem == MAP_FAILED) diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c new file mode 100644 index 000000000000..8a9d19b6020c --- /dev/null +++ b/tools/testing/selftests/mm/pfnmap.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Basic VM_PFNMAP tests relying on mmap() of '/dev/mem' + * + * Copyright 2025, Red Hat, Inc. + * + * Author(s): David Hildenbrand <david@redhat.com> + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <setjmp.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <sys/wait.h> + +#include "../kselftest_harness.h" +#include "vm_util.h" + +static sigjmp_buf sigjmp_buf_env; + +static void signal_handler(int sig) +{ +	siglongjmp(sigjmp_buf_env, -EFAULT); +} + +static int test_read_access(char *addr, size_t size, size_t pagesize) +{ +	size_t offs; +	int ret; + +	if (signal(SIGSEGV, signal_handler) == SIG_ERR) +		return -EINVAL; + +	ret = sigsetjmp(sigjmp_buf_env, 1); +	if (!ret) { +		for (offs = 0; offs < size; offs += pagesize) +			/* Force a read that the compiler cannot optimize out. */ +			*((volatile char *)(addr + offs)); +	} +	if (signal(SIGSEGV, signal_handler) == SIG_ERR) +		return -EINVAL; + +	return ret; +} + +FIXTURE(pfnmap) +{ +	size_t pagesize; +	int dev_mem_fd; +	char *addr1; +	size_t size1; +	char *addr2; +	size_t size2; +}; + +FIXTURE_SETUP(pfnmap) +{ +	self->pagesize = getpagesize(); + +	self->dev_mem_fd = open("/dev/mem", O_RDONLY); +	if (self->dev_mem_fd < 0) +		SKIP(return, "Cannot open '/dev/mem'\n"); + +	/* We'll require the first two pages throughout our tests ... */ +	self->size1 = self->pagesize * 2; +	self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, +			   self->dev_mem_fd, 0); +	if (self->addr1 == MAP_FAILED) +		SKIP(return, "Cannot mmap '/dev/mem'\n"); + +	/* ... and want to be able to read from them. */ +	if (test_read_access(self->addr1, self->size1, self->pagesize)) +		SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n"); + +	self->size2 = 0; +	self->addr2 = MAP_FAILED; +} + +FIXTURE_TEARDOWN(pfnmap) +{ +	if (self->addr2 != MAP_FAILED) +		munmap(self->addr2, self->size2); +	if (self->addr1 != MAP_FAILED) +		munmap(self->addr1, self->size1); +	if (self->dev_mem_fd >= 0) +		close(self->dev_mem_fd); +} + +TEST_F(pfnmap, madvise_disallowed) +{ +	int advices[] = { +		MADV_DONTNEED, +		MADV_DONTNEED_LOCKED, +		MADV_FREE, +		MADV_WIPEONFORK, +		MADV_COLD, +		MADV_PAGEOUT, +		MADV_POPULATE_READ, +		MADV_POPULATE_WRITE, +	}; +	int i; + +	/* All these advices must be rejected. */ +	for (i = 0; i < ARRAY_SIZE(advices); i++) { +		EXPECT_LT(madvise(self->addr1, self->pagesize, advices[i]), 0); +		EXPECT_EQ(errno, EINVAL); +	} +} + +TEST_F(pfnmap, munmap_split) +{ +	/* +	 * Unmap the first page. This munmap() call is not really expected to +	 * fail, but we might be able to trigger other internal issues. +	 */ +	ASSERT_EQ(munmap(self->addr1, self->pagesize), 0); + +	/* +	 * Remap the first page while the second page is still mapped. This +	 * makes sure that any PAT tracking on x86 will allow for mmap()'ing +	 * a page again while some parts of the first mmap() are still +	 * around. +	 */ +	self->size2 = self->pagesize; +	self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, +			   self->dev_mem_fd, 0); +	ASSERT_NE(self->addr2, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_fixed) +{ +	char *ret; + +	/* Reserve a destination area. */ +	self->size2 = self->size1; +	self->addr2 = mmap(NULL, self->size2, PROT_READ, MAP_ANON | MAP_PRIVATE, +			   -1, 0); +	ASSERT_NE(self->addr2, MAP_FAILED); + +	/* mremap() over our destination. */ +	ret = mremap(self->addr1, self->size1, self->size2, +		     MREMAP_FIXED | MREMAP_MAYMOVE, self->addr2); +	ASSERT_NE(ret, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_shrink) +{ +	char *ret; + +	/* Shrinking is expected to work. */ +	ret = mremap(self->addr1, self->size1, self->size1 - self->pagesize, 0); +	ASSERT_NE(ret, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_expand) +{ +	/* +	 * Growing is not expected to work, and getting it right would +	 * be challenging. So this test primarily serves as an early warning +	 * that something that probably should never work suddenly works. +	 */ +	self->size2 = self->size1 + self->pagesize; +	self->addr2 = mremap(self->addr1, self->size1, self->size2, MREMAP_MAYMOVE); +	ASSERT_EQ(self->addr2, MAP_FAILED); +} + +TEST_F(pfnmap, fork) +{ +	pid_t pid; +	int ret; + +	/* fork() a child and test if the child can access the pages. */ +	pid = fork(); +	ASSERT_GE(pid, 0); + +	if (!pid) { +		EXPECT_EQ(test_read_access(self->addr1, self->size1, +					   self->pagesize), 0); +		exit(0); +	} + +	wait(&ret); +	if (WIFEXITED(ret)) +		ret = WEXITSTATUS(ret); +	else +		ret = -EINVAL; +	ASSERT_EQ(ret, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 9aff33b10999..dddd1dd8af14 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -63,6 +63,8 @@ separated by spaces:  	test soft dirty page bit semantics  - pagemap  	test pagemap_scan IOCTL +- pfnmap +	tests for VM_PFNMAP handling  - cow  	test copy-on-write semantics  - thp @@ -79,6 +81,8 @@ separated by spaces:  	test prctl(PR_SET_MDWE, ...)  - page_frag  	test handling of page fragment allocation and freeing +- vma_merge +	test VMA merge cases behave as expected  example: ./run_vmtests.sh -t "hmm mmap ksm"  EOF @@ -421,6 +425,8 @@ CATEGORY="madv_guard" run_test ./guard-regions  # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests  CATEGORY="madv_populate" run_test ./madv_populate +CATEGORY="vma_merge" run_test ./merge +  if [ -x ./memfd_secret ]  then  (echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix @@ -468,6 +474,8 @@ fi  CATEGORY="pagemap" run_test ./pagemap_ioctl +CATEGORY="pfnmap" run_test ./pfnmap +  # COW tests  CATEGORY="cow" run_test ./cow diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index e8fd9011c2a3..c73fd5d455c8 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -1231,6 +1231,182 @@ static void uffd_move_pmd_split_test(uffd_test_args_t *targs)  			      uffd_move_pmd_handle_fault);  } +static bool +uffdio_verify_results(const char *name, int ret, int error, long result) +{ +	/* +	 * Should always return -1 with errno=EAGAIN, with corresponding +	 * result field updated in ioctl() args to be -EAGAIN too +	 * (e.g. copy.copy field for UFFDIO_COPY). +	 */ +	if (ret != -1) { +		uffd_test_fail("%s should have returned -1", name); +		return false; +	} + +	if (error != EAGAIN) { +		uffd_test_fail("%s should have errno==EAGAIN", name); +		return false; +	} + +	if (result != -EAGAIN) { +		uffd_test_fail("%s should have been updated for -EAGAIN", +			       name); +		return false; +	} + +	return true; +} + +/* + * This defines a function to test one ioctl.  Note that here "field" can + * be 1 or anything not -EAGAIN.  With that initial value set, we can + * verify later that it should be updated by kernel (when -EAGAIN + * returned), by checking whether it is also updated to -EAGAIN. + */ +#define DEFINE_MMAP_CHANGING_TEST(name, ioctl_name, field)		\ +	static bool uffdio_mmap_changing_test_##name(int fd)		\ +	{								\ +		int ret;						\ +		struct uffdio_##name args = {				\ +			.field = 1,					\ +		};							\ +		ret = ioctl(fd, ioctl_name, &args);			\ +		return uffdio_verify_results(#ioctl_name, ret, errno, args.field); \ +	} + +DEFINE_MMAP_CHANGING_TEST(zeropage, UFFDIO_ZEROPAGE, zeropage) +DEFINE_MMAP_CHANGING_TEST(copy, UFFDIO_COPY, copy) +DEFINE_MMAP_CHANGING_TEST(move, UFFDIO_MOVE, move) +DEFINE_MMAP_CHANGING_TEST(poison, UFFDIO_POISON, updated) +DEFINE_MMAP_CHANGING_TEST(continue, UFFDIO_CONTINUE, mapped) + +typedef enum { +	/* We actually do not care about any state except UNINTERRUPTIBLE.. */ +	THR_STATE_UNKNOWN = 0, +	THR_STATE_UNINTERRUPTIBLE, +} thread_state; + +static void sleep_short(void) +{ +	usleep(1000); +} + +static thread_state thread_state_get(pid_t tid) +{ +	const char *header = "State:\t"; +	char tmp[256], *p, c; +	FILE *fp; + +	snprintf(tmp, sizeof(tmp), "/proc/%d/status", tid); +	fp = fopen(tmp, "r"); + +	if (!fp) +		return THR_STATE_UNKNOWN; + +	while (fgets(tmp, sizeof(tmp), fp)) { +		p = strstr(tmp, header); +		if (p) { +			/* For example, "State:\tD (disk sleep)" */ +			c = *(p + sizeof(header) - 1); +			return c == 'D' ? +			    THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN; +		} +	} + +	return THR_STATE_UNKNOWN; +} + +static void thread_state_until(pid_t tid, thread_state state) +{ +	thread_state s; + +	do { +		s = thread_state_get(tid); +		sleep_short(); +	} while (s != state); +} + +static void *uffd_mmap_changing_thread(void *opaque) +{ +	volatile pid_t *pid = opaque; +	int ret; + +	/* Unfortunately, it's only fetch-able from the thread itself.. */ +	assert(*pid == 0); +	*pid = syscall(SYS_gettid); + +	/* Inject an event, this will hang solid until the event read */ +	ret = madvise(area_dst, page_size, MADV_REMOVE); +	if (ret) +		err("madvise(MADV_REMOVE) failed"); + +	return NULL; +} + +static void uffd_consume_message(int fd) +{ +	struct uffd_msg msg = { 0 }; + +	while (uffd_read_msg(fd, &msg)); +} + +static void uffd_mmap_changing_test(uffd_test_args_t *targs) +{ +	/* +	 * This stores the real PID (which can be different from how tid is +	 * defined..) for the child thread, 0 means not initialized. +	 */ +	pid_t pid = 0; +	pthread_t tid; +	int ret; + +	if (uffd_register(uffd, area_dst, nr_pages * page_size, +			  true, false, false)) +		err("uffd_register() failed"); + +	/* Create a thread to generate the racy event */ +	ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid); +	if (ret) +		err("pthread_create() failed"); + +	/* +	 * Wait until the thread setup the pid.  Use volatile to make sure +	 * it reads from RAM not regs. +	 */ +	while (!(volatile pid_t)pid) +		sleep_short(); + +	/* Wait until the thread hangs at REMOVE event */ +	thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE); + +	if (!uffdio_mmap_changing_test_copy(uffd)) +		return; + +	if (!uffdio_mmap_changing_test_zeropage(uffd)) +		return; + +	if (!uffdio_mmap_changing_test_move(uffd)) +		return; + +	if (!uffdio_mmap_changing_test_poison(uffd)) +		return; + +	if (!uffdio_mmap_changing_test_continue(uffd)) +		return; + +	/* +	 * All succeeded above!  Recycle everything.  Start by reading the +	 * event so as to kick the thread roll again.. +	 */ +	uffd_consume_message(uffd); + +	ret = pthread_join(tid, NULL); +	assert(ret == 0); + +	uffd_test_pass(); +} +  static int prevent_hugepages(const char **errmsg)  {  	/* This should be done before source area is populated */ @@ -1470,6 +1646,32 @@ uffd_test_case_t uffd_tests[] = {  		.mem_targets = MEM_ALL,  		.uffd_feature_required = UFFD_FEATURE_POISON,  	}, +	{ +		.name = "mmap-changing", +		.uffd_fn = uffd_mmap_changing_test, +		/* +		 * There's no point running this test over all mem types as +		 * they share the same code paths. +		 * +		 * Choose shmem for simplicity, because (1) shmem supports +		 * MINOR mode to cover UFFDIO_CONTINUE, and (2) shmem is +		 * almost always available (unlike hugetlb).  Here we +		 * abused SHMEM for UFFDIO_MOVE, but the test we want to +		 * cover doesn't yet need the correct memory type.. +		 */ +		.mem_targets = MEM_SHMEM, +		/* +		 * Any UFFD_FEATURE_EVENT_* should work to trigger the +		 * race logically, but choose the simplest (REMOVE). +		 * +		 * Meanwhile, since we'll cover quite a few new ioctl()s +		 * (CONTINUE, POISON, MOVE), skip this test for old kernels +		 * by choosing all of them. +		 */ +		.uffd_feature_required = UFFD_FEATURE_EVENT_REMOVE | +		UFFD_FEATURE_MOVE | UFFD_FEATURE_POISON | +		UFFD_FEATURE_MINOR_SHMEM, +	},  };  static void usage(const char *prog) diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index a36734fb62f3..1357e2d6a7b6 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -1,5 +1,6 @@  // SPDX-License-Identifier: GPL-2.0  #include <string.h> +#include <errno.h>  #include <fcntl.h>  #include <dirent.h>  #include <inttypes.h> @@ -424,3 +425,64 @@ bool check_vmflag_io(void *addr)  		flags += flaglen;  	}  } + +/* + * Open an fd at /proc/$pid/maps and configure procmap_out ready for + * PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise. + */ +int open_procmap(pid_t pid, struct procmap_fd *procmap_out) +{ +	char path[256]; +	int ret = 0; + +	memset(procmap_out, '\0', sizeof(*procmap_out)); +	sprintf(path, "/proc/%d/maps", pid); +	procmap_out->query.size = sizeof(procmap_out->query); +	procmap_out->fd = open(path, O_RDONLY); +	if (procmap_out < 0) +		ret = -errno; + +	return ret; +} + +/* Perform PROCMAP_QUERY. Returns 0 on success, or an error code otherwise. */ +int query_procmap(struct procmap_fd *procmap) +{ +	int ret = 0; + +	if (ioctl(procmap->fd, PROCMAP_QUERY, &procmap->query) == -1) +		ret = -errno; + +	return ret; +} + +/* + * Try to find the VMA at specified address, returns true if found, false if not + * found, and the test is failed if any other error occurs. + * + * On success, procmap->query is populated with the results. + */ +bool find_vma_procmap(struct procmap_fd *procmap, void *address) +{ +	int err; + +	procmap->query.query_flags = 0; +	procmap->query.query_addr = (unsigned long)address; +	err = query_procmap(procmap); +	if (!err) +		return true; + +	if (err != -ENOENT) +		ksft_exit_fail_msg("%s: Error %d on ioctl(PROCMAP_QUERY)\n", +				   __func__, err); +	return false; +} + +/* + * Close fd used by PROCMAP_QUERY mechanism. Returns 0 on success, or an error + * code otherwise. + */ +int close_procmap(struct procmap_fd *procmap) +{ +	return close(procmap->fd); +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 6effafdc4d8a..9211ba640d9c 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -6,6 +6,7 @@  #include <strings.h> /* ffsl() */  #include <unistd.h> /* _SC_PAGESIZE */  #include "../kselftest.h" +#include <linux/fs.h>  #define BIT_ULL(nr)                   (1ULL << (nr))  #define PM_SOFT_DIRTY                 BIT_ULL(55) @@ -19,6 +20,15 @@  extern unsigned int __page_size;  extern unsigned int __page_shift; +/* + * Represents an open fd and PROCMAP_QUERY state for binary (via ioctl) + * /proc/$pid/[s]maps lookup. + */ +struct procmap_fd { +	int fd; +	struct procmap_query query; +}; +  static inline unsigned int psize(void)  {  	if (!__page_size) @@ -73,6 +83,17 @@ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,  			      bool miss, bool wp, bool minor, uint64_t *ioctls);  unsigned long get_free_hugepages(void);  bool check_vmflag_io(void *addr); +int open_procmap(pid_t pid, struct procmap_fd *procmap_out); +int query_procmap(struct procmap_fd *procmap); +bool find_vma_procmap(struct procmap_fd *procmap, void *address); +int close_procmap(struct procmap_fd *procmap); + +static inline int open_self_procmap(struct procmap_fd *procmap_out) +{ +	pid_t pid = getpid(); + +	return open_procmap(pid, procmap_out); +}  /*   * On ppc64 this will only work with radix 2M hugepage size diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile index 1c631740a730..c5e0b76ba6ac 100644 --- a/tools/testing/selftests/ptrace/Makefile +++ b/tools/testing/selftests/ptrace/Makefile @@ -1,6 +1,6 @@  # SPDX-License-Identifier: GPL-2.0-only  CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES) -TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess get_set_sud +TEST_GEN_PROGS := get_syscall_info set_syscall_info peeksiginfo vmaccess get_set_sud  include ../lib.mk diff --git a/tools/testing/selftests/ptrace/set_syscall_info.c b/tools/testing/selftests/ptrace/set_syscall_info.c new file mode 100644 index 000000000000..4198248ef874 --- /dev/null +++ b/tools/testing/selftests/ptrace/set_syscall_info.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2018-2025 Dmitry V. Levin <ldv@strace.io> + * All rights reserved. + * + * Check whether PTRACE_SET_SYSCALL_INFO semantics implemented in the kernel + * matches userspace expectations. + */ + +#include "../kselftest_harness.h" +#include <err.h> +#include <fcntl.h> +#include <signal.h> +#include <asm/unistd.h> +#include <linux/types.h> +#include <linux/ptrace.h> + +#if defined(_MIPS_SIM) && _MIPS_SIM == _MIPS_SIM_NABI32 +/* + * MIPS N32 is the only architecture where __kernel_ulong_t + * does not match the bitness of syscall arguments. + */ +typedef unsigned long long kernel_ulong_t; +#else +typedef __kernel_ulong_t kernel_ulong_t; +#endif + +struct si_entry { +	int nr; +	kernel_ulong_t args[6]; +}; +struct si_exit { +	unsigned int is_error; +	int rval; +}; + +static unsigned int ptrace_stop; +static pid_t tracee_pid; + +static int +kill_tracee(pid_t pid) +{ +	if (!pid) +		return 0; + +	int saved_errno = errno; + +	int rc = kill(pid, SIGKILL); + +	errno = saved_errno; +	return rc; +} + +static long +sys_ptrace(int request, pid_t pid, unsigned long addr, unsigned long data) +{ +	return syscall(__NR_ptrace, request, pid, addr, data); +} + +#define LOG_KILL_TRACEE(fmt, ...)				\ +	do {							\ +		kill_tracee(tracee_pid);			\ +		TH_LOG("wait #%d: " fmt,			\ +		       ptrace_stop, ##__VA_ARGS__);		\ +	} while (0) + +static void +check_psi_entry(struct __test_metadata *_metadata, +		const struct ptrace_syscall_info *info, +		const struct si_entry *exp_entry, +		const char *text) +{ +	unsigned int i; +	int exp_nr = exp_entry->nr; +#if defined __s390__ || defined __s390x__ +	/* s390 is the only architecture that has 16-bit syscall numbers */ +	exp_nr &= 0xffff; +#endif + +	ASSERT_EQ(PTRACE_SYSCALL_INFO_ENTRY, info->op) { +		LOG_KILL_TRACEE("%s: entry stop mismatch", text); +	} +	ASSERT_TRUE(info->arch) { +		LOG_KILL_TRACEE("%s: entry stop mismatch", text); +	} +	ASSERT_TRUE(info->instruction_pointer) { +		LOG_KILL_TRACEE("%s: entry stop mismatch", text); +	} +	ASSERT_TRUE(info->stack_pointer) { +		LOG_KILL_TRACEE("%s: entry stop mismatch", text); +	} +	ASSERT_EQ(exp_nr, info->entry.nr) { +		LOG_KILL_TRACEE("%s: syscall nr mismatch", text); +	} +	for (i = 0; i < ARRAY_SIZE(exp_entry->args); ++i) { +		ASSERT_EQ(exp_entry->args[i], info->entry.args[i]) { +			LOG_KILL_TRACEE("%s: syscall arg #%u mismatch", +					text, i); +		} +	} +} + +static void +check_psi_exit(struct __test_metadata *_metadata, +	       const struct ptrace_syscall_info *info, +	       const struct si_exit *exp_exit, +	       const char *text) +{ +	ASSERT_EQ(PTRACE_SYSCALL_INFO_EXIT, info->op) { +		LOG_KILL_TRACEE("%s: exit stop mismatch", text); +	} +	ASSERT_TRUE(info->arch) { +		LOG_KILL_TRACEE("%s: exit stop mismatch", text); +	} +	ASSERT_TRUE(info->instruction_pointer) { +		LOG_KILL_TRACEE("%s: exit stop mismatch", text); +	} +	ASSERT_TRUE(info->stack_pointer) { +		LOG_KILL_TRACEE("%s: exit stop mismatch", text); +	} +	ASSERT_EQ(exp_exit->is_error, info->exit.is_error) { +		LOG_KILL_TRACEE("%s: exit stop mismatch", text); +	} +	ASSERT_EQ(exp_exit->rval, info->exit.rval) { +		LOG_KILL_TRACEE("%s: exit stop mismatch", text); +	} +} + +TEST(set_syscall_info) +{ +	const pid_t tracer_pid = getpid(); +	const kernel_ulong_t dummy[] = { +		(kernel_ulong_t) 0xdad0bef0bad0fed0ULL, +		(kernel_ulong_t) 0xdad1bef1bad1fed1ULL, +		(kernel_ulong_t) 0xdad2bef2bad2fed2ULL, +		(kernel_ulong_t) 0xdad3bef3bad3fed3ULL, +		(kernel_ulong_t) 0xdad4bef4bad4fed4ULL, +		(kernel_ulong_t) 0xdad5bef5bad5fed5ULL, +	}; +	int splice_in[2], splice_out[2]; + +	ASSERT_EQ(0, pipe(splice_in)); +	ASSERT_EQ(0, pipe(splice_out)); +	ASSERT_EQ(sizeof(dummy), write(splice_in[1], dummy, sizeof(dummy))); + +	const struct { +		struct si_entry entry[2]; +		struct si_exit exit[2]; +	} si[] = { +		/* change scno, keep non-error rval */ +		{ +			{ +				{ +					__NR_gettid, +					{ +						dummy[0], dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				}, { +					__NR_getppid, +					{ +						dummy[0], dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				} +			}, { +				{ 0, tracer_pid }, { 0, tracer_pid } +			} +		}, + +		/* set scno to -1, keep error rval */ +		{ +			{ +				{ +					__NR_chdir, +					{ +						(uintptr_t) ".", +						dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				}, { +					-1, +					{ +						(uintptr_t) ".", +						dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				} +			}, { +				{ 1, -ENOSYS }, { 1, -ENOSYS } +			} +		}, + +		/* keep scno, change non-error rval */ +		{ +			{ +				{ +					__NR_getppid, +					{ +						dummy[0], dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				}, { +					__NR_getppid, +					{ +						dummy[0], dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				} +			}, { +				{ 0, tracer_pid }, { 0, tracer_pid + 1 } +			} +		}, + +		/* change arg1, keep non-error rval */ +		{ +			{ +				{ +					__NR_chdir, +					{ +						(uintptr_t) "", +						dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				}, { +					__NR_chdir, +					{ +						(uintptr_t) ".", +						dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				} +			}, { +				{ 0, 0 }, { 0, 0 } +			} +		}, + +		/* set scno to -1, change error rval to non-error */ +		{ +			{ +				{ +					__NR_gettid, +					{ +						dummy[0], dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				}, { +					-1, +					{ +						dummy[0], dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				} +			}, { +				{ 1, -ENOSYS }, { 0, tracer_pid } +			} +		}, + +		/* change scno, change non-error rval to error */ +		{ +			{ +				{ +					__NR_chdir, +					{ +						dummy[0], dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				}, { +					__NR_getppid, +					{ +						dummy[0], dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				} +			}, { +				{ 0, tracer_pid }, { 1, -EISDIR } +			} +		}, + +		/* change scno and all args, change non-error rval */ +		{ +			{ +				{ +					__NR_gettid, +					{ +						dummy[0], dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				}, { +					__NR_splice, +					{ +						splice_in[0], 0, splice_out[1], 0, +						sizeof(dummy), SPLICE_F_NONBLOCK +					} +				} +			}, { +				{ 0, sizeof(dummy) }, { 0, sizeof(dummy) + 1 } +			} +		}, + +		/* change arg1, no exit stop */ +		{ +			{ +				{ +					__NR_exit_group, +					{ +						dummy[0], dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				}, { +					__NR_exit_group, +					{ +						0, dummy[1], dummy[2], +						dummy[3], dummy[4], dummy[5] +					} +				} +			}, { +				{ 0, 0 }, { 0, 0 } +			} +		}, +	}; + +	long rc; +	unsigned int i; + +	tracee_pid = fork(); + +	ASSERT_LE(0, tracee_pid) { +		TH_LOG("fork: %m"); +	} + +	if (tracee_pid == 0) { +		/* get the pid before PTRACE_TRACEME */ +		tracee_pid = getpid(); +		ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) { +			TH_LOG("PTRACE_TRACEME: %m"); +		} +		ASSERT_EQ(0, kill(tracee_pid, SIGSTOP)) { +			/* cannot happen */ +			TH_LOG("kill SIGSTOP: %m"); +		} +		for (i = 0; i < ARRAY_SIZE(si); ++i) { +			rc = syscall(si[i].entry[0].nr, +				     si[i].entry[0].args[0], +				     si[i].entry[0].args[1], +				     si[i].entry[0].args[2], +				     si[i].entry[0].args[3], +				     si[i].entry[0].args[4], +				     si[i].entry[0].args[5]); +			if (si[i].exit[1].is_error) { +				if (rc != -1 || errno != -si[i].exit[1].rval) +					break; +			} else { +				if (rc != si[i].exit[1].rval) +					break; +			} +		} +		/* +		 * Something went wrong, but in this state tracee +		 * cannot reliably issue syscalls, so just crash. +		 */ +		*(volatile unsigned char *) (uintptr_t) i = 42; +		/* unreachable */ +		_exit(i + 1); +	} + +	for (ptrace_stop = 0; ; ++ptrace_stop) { +		struct ptrace_syscall_info info = { +			.op = 0xff	/* invalid PTRACE_SYSCALL_INFO_* op */ +		}; +		const size_t size = sizeof(info); +		const int expected_entry_size = +			(void *) &info.entry.args[6] - (void *) &info; +		const int expected_exit_size = +			(void *) (&info.exit.is_error + 1) - +			(void *) &info; +		int status; + +		ASSERT_EQ(tracee_pid, wait(&status)) { +			/* cannot happen */ +			LOG_KILL_TRACEE("wait: %m"); +		} +		if (WIFEXITED(status)) { +			tracee_pid = 0;	/* the tracee is no more */ +			ASSERT_EQ(0, WEXITSTATUS(status)) { +				LOG_KILL_TRACEE("unexpected exit status %u", +						WEXITSTATUS(status)); +			} +			break; +		} +		ASSERT_FALSE(WIFSIGNALED(status)) { +			tracee_pid = 0;	/* the tracee is no more */ +			LOG_KILL_TRACEE("unexpected signal %u", +					WTERMSIG(status)); +		} +		ASSERT_TRUE(WIFSTOPPED(status)) { +			/* cannot happen */ +			LOG_KILL_TRACEE("unexpected wait status %#x", status); +		} + +		ASSERT_LT(ptrace_stop, ARRAY_SIZE(si) * 2) { +			LOG_KILL_TRACEE("ptrace stop overflow"); +		} + +		switch (WSTOPSIG(status)) { +		case SIGSTOP: +			ASSERT_EQ(0, ptrace_stop) { +				LOG_KILL_TRACEE("unexpected signal stop"); +			} +			ASSERT_EQ(0, sys_ptrace(PTRACE_SETOPTIONS, tracee_pid, +						0, PTRACE_O_TRACESYSGOOD)) { +				LOG_KILL_TRACEE("PTRACE_SETOPTIONS: %m"); +			} +			break; + +		case SIGTRAP | 0x80: +			ASSERT_LT(0, ptrace_stop) { +				LOG_KILL_TRACEE("unexpected syscall stop"); +			} +			ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO, +						      tracee_pid, size, +						      (uintptr_t) &info))) { +				LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #1: %m"); +			} +			if (ptrace_stop & 1) { +				/* entering syscall */ +				const struct si_entry *exp_entry = +					&si[ptrace_stop / 2].entry[0]; +				const struct si_entry *set_entry = +					&si[ptrace_stop / 2].entry[1]; + +				/* check ptrace_syscall_info before the changes */ +				ASSERT_EQ(expected_entry_size, rc) { +					LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #1" +							": entry stop mismatch"); +				} +				check_psi_entry(_metadata, &info, exp_entry, +						"PTRACE_GET_SYSCALL_INFO #1"); + +				/* apply the changes */ +				info.entry.nr = set_entry->nr; +				for (i = 0; i < ARRAY_SIZE(set_entry->args); ++i) +					info.entry.args[i] = set_entry->args[i]; +				ASSERT_EQ(0, sys_ptrace(PTRACE_SET_SYSCALL_INFO, +							tracee_pid, size, +							(uintptr_t) &info)) { +					LOG_KILL_TRACEE("PTRACE_SET_SYSCALL_INFO: %m"); +				} + +				/* check ptrace_syscall_info after the changes */ +				memset(&info, 0, sizeof(info)); +				info.op = 0xff; +				ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO, +							      tracee_pid, size, +							      (uintptr_t) &info))) { +					LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO: %m"); +				} +				ASSERT_EQ(expected_entry_size, rc) { +					LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #2" +							": entry stop mismatch"); +				} +				check_psi_entry(_metadata, &info, set_entry, +						"PTRACE_GET_SYSCALL_INFO #2"); +			} else { +				/* exiting syscall */ +				const struct si_exit *exp_exit = +					&si[ptrace_stop / 2 - 1].exit[0]; +				const struct si_exit *set_exit = +					&si[ptrace_stop / 2 - 1].exit[1]; + +				/* check ptrace_syscall_info before the changes */ +				ASSERT_EQ(expected_exit_size, rc) { +					LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #1" +							": exit stop mismatch"); +				} +				check_psi_exit(_metadata, &info, exp_exit, +						"PTRACE_GET_SYSCALL_INFO #1"); + +				/* apply the changes */ +				info.exit.is_error = set_exit->is_error; +				info.exit.rval = set_exit->rval; +				ASSERT_EQ(0, sys_ptrace(PTRACE_SET_SYSCALL_INFO, +							tracee_pid, size, +							(uintptr_t) &info)) { +					LOG_KILL_TRACEE("PTRACE_SET_SYSCALL_INFO: %m"); +				} + +				/* check ptrace_syscall_info after the changes */ +				memset(&info, 0, sizeof(info)); +				info.op = 0xff; +				ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO, +							      tracee_pid, size, +							      (uintptr_t) &info))) { +					LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #2: %m"); +				} +				ASSERT_EQ(expected_exit_size, rc) { +					LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #2" +							": exit stop mismatch"); +				} +				check_psi_exit(_metadata, &info, set_exit, +						"PTRACE_GET_SYSCALL_INFO #2"); +			} +			break; + +		default: +			LOG_KILL_TRACEE("unexpected stop signal %u", +					WSTOPSIG(status)); +			abort(); +		} + +		ASSERT_EQ(0, sys_ptrace(PTRACE_SYSCALL, tracee_pid, 0, 0)) { +			LOG_KILL_TRACEE("PTRACE_SYSCALL: %m"); +		} +	} + +	ASSERT_EQ(ptrace_stop, ARRAY_SIZE(si) * 2); +} + +TEST_HARNESS_MAIN | 
